xref: /dflybsd-src/sys/dev/drm/radeon/si.c (revision d0a74117cc5baed46a1514b9ec89c0b7943c91f2)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52 
53 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
67 
68 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
69 MODULE_FIRMWARE("radeon/VERDE_me.bin");
70 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
71 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
73 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/verde_pfp.bin");
77 MODULE_FIRMWARE("radeon/verde_me.bin");
78 MODULE_FIRMWARE("radeon/verde_ce.bin");
79 MODULE_FIRMWARE("radeon/verde_mc.bin");
80 MODULE_FIRMWARE("radeon/verde_rlc.bin");
81 MODULE_FIRMWARE("radeon/verde_smc.bin");
82 
83 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
84 MODULE_FIRMWARE("radeon/OLAND_me.bin");
85 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
86 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
88 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
89 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
90 
91 MODULE_FIRMWARE("radeon/oland_pfp.bin");
92 MODULE_FIRMWARE("radeon/oland_me.bin");
93 MODULE_FIRMWARE("radeon/oland_ce.bin");
94 MODULE_FIRMWARE("radeon/oland_mc.bin");
95 MODULE_FIRMWARE("radeon/oland_rlc.bin");
96 MODULE_FIRMWARE("radeon/oland_smc.bin");
97 
98 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
99 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
105 
106 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
107 MODULE_FIRMWARE("radeon/hainan_me.bin");
108 MODULE_FIRMWARE("radeon/hainan_ce.bin");
109 MODULE_FIRMWARE("radeon/hainan_mc.bin");
110 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
111 MODULE_FIRMWARE("radeon/hainan_smc.bin");
112 
113 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
114 static void si_pcie_gen3_enable(struct radeon_device *rdev);
115 static void si_program_aspm(struct radeon_device *rdev);
116 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
117 					 bool enable);
118 static void si_init_pg(struct radeon_device *rdev);
119 static void si_init_cg(struct radeon_device *rdev);
120 static void si_fini_pg(struct radeon_device *rdev);
121 static void si_fini_cg(struct radeon_device *rdev);
122 static void si_rlc_stop(struct radeon_device *rdev);
123 
124 static const u32 verde_rlc_save_restore_register_list[] =
125 {
126 	(0x8000 << 16) | (0x98f4 >> 2),
127 	0x00000000,
128 	(0x8040 << 16) | (0x98f4 >> 2),
129 	0x00000000,
130 	(0x8000 << 16) | (0xe80 >> 2),
131 	0x00000000,
132 	(0x8040 << 16) | (0xe80 >> 2),
133 	0x00000000,
134 	(0x8000 << 16) | (0x89bc >> 2),
135 	0x00000000,
136 	(0x8040 << 16) | (0x89bc >> 2),
137 	0x00000000,
138 	(0x8000 << 16) | (0x8c1c >> 2),
139 	0x00000000,
140 	(0x8040 << 16) | (0x8c1c >> 2),
141 	0x00000000,
142 	(0x9c00 << 16) | (0x98f0 >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0xe7c >> 2),
145 	0x00000000,
146 	(0x8000 << 16) | (0x9148 >> 2),
147 	0x00000000,
148 	(0x8040 << 16) | (0x9148 >> 2),
149 	0x00000000,
150 	(0x9c00 << 16) | (0x9150 >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x897c >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x8d8c >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0xac54 >> 2),
157 	0X00000000,
158 	0x3,
159 	(0x9c00 << 16) | (0x98f8 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9910 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x9914 >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9918 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x991c >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9920 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9924 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9928 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x992c >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9930 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9934 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9938 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x993c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x9940 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9944 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9948 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x994c >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x9950 >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9954 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9958 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x995c >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x9960 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9964 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9968 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x996c >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9970 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9974 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9978 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x997c >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9980 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9984 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9988 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x998c >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x8c00 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x8c14 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x8c04 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x8c08 >> 2),
232 	0x00000000,
233 	(0x8000 << 16) | (0x9b7c >> 2),
234 	0x00000000,
235 	(0x8040 << 16) | (0x9b7c >> 2),
236 	0x00000000,
237 	(0x8000 << 16) | (0xe84 >> 2),
238 	0x00000000,
239 	(0x8040 << 16) | (0xe84 >> 2),
240 	0x00000000,
241 	(0x8000 << 16) | (0x89c0 >> 2),
242 	0x00000000,
243 	(0x8040 << 16) | (0x89c0 >> 2),
244 	0x00000000,
245 	(0x8000 << 16) | (0x914c >> 2),
246 	0x00000000,
247 	(0x8040 << 16) | (0x914c >> 2),
248 	0x00000000,
249 	(0x8000 << 16) | (0x8c20 >> 2),
250 	0x00000000,
251 	(0x8040 << 16) | (0x8c20 >> 2),
252 	0x00000000,
253 	(0x8000 << 16) | (0x9354 >> 2),
254 	0x00000000,
255 	(0x8040 << 16) | (0x9354 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x9060 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x9364 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x9100 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x913c >> 2),
264 	0x00000000,
265 	(0x8000 << 16) | (0x90e0 >> 2),
266 	0x00000000,
267 	(0x8000 << 16) | (0x90e4 >> 2),
268 	0x00000000,
269 	(0x8000 << 16) | (0x90e8 >> 2),
270 	0x00000000,
271 	(0x8040 << 16) | (0x90e0 >> 2),
272 	0x00000000,
273 	(0x8040 << 16) | (0x90e4 >> 2),
274 	0x00000000,
275 	(0x8040 << 16) | (0x90e8 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x8bcc >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x8b24 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x88c4 >> 2),
282 	0x00000000,
283 	(0x9c00 << 16) | (0x8e50 >> 2),
284 	0x00000000,
285 	(0x9c00 << 16) | (0x8c0c >> 2),
286 	0x00000000,
287 	(0x9c00 << 16) | (0x8e58 >> 2),
288 	0x00000000,
289 	(0x9c00 << 16) | (0x8e5c >> 2),
290 	0x00000000,
291 	(0x9c00 << 16) | (0x9508 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x950c >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x9494 >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0xac0c >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0xac10 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0xac14 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0xae00 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0xac08 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x88d4 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x88c8 >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x88cc >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0x89b0 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0x8b10 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0x8a14 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0x9830 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0x9834 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x9838 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x9a10 >> 2),
326 	0x00000000,
327 	(0x8000 << 16) | (0x9870 >> 2),
328 	0x00000000,
329 	(0x8000 << 16) | (0x9874 >> 2),
330 	0x00000000,
331 	(0x8001 << 16) | (0x9870 >> 2),
332 	0x00000000,
333 	(0x8001 << 16) | (0x9874 >> 2),
334 	0x00000000,
335 	(0x8040 << 16) | (0x9870 >> 2),
336 	0x00000000,
337 	(0x8040 << 16) | (0x9874 >> 2),
338 	0x00000000,
339 	(0x8041 << 16) | (0x9870 >> 2),
340 	0x00000000,
341 	(0x8041 << 16) | (0x9874 >> 2),
342 	0x00000000,
343 	0x00000000
344 };
345 
346 static const u32 tahiti_golden_rlc_registers[] =
347 {
348 	0xc424, 0xffffffff, 0x00601005,
349 	0xc47c, 0xffffffff, 0x10104040,
350 	0xc488, 0xffffffff, 0x0100000a,
351 	0xc314, 0xffffffff, 0x00000800,
352 	0xc30c, 0xffffffff, 0x800000f4,
353 	0xf4a8, 0xffffffff, 0x00000000
354 };
355 
356 static const u32 tahiti_golden_registers[] =
357 {
358 	0x9a10, 0x00010000, 0x00018208,
359 	0x9830, 0xffffffff, 0x00000000,
360 	0x9834, 0xf00fffff, 0x00000400,
361 	0x9838, 0x0002021c, 0x00020200,
362 	0xc78, 0x00000080, 0x00000000,
363 	0xd030, 0x000300c0, 0x00800040,
364 	0xd830, 0x000300c0, 0x00800040,
365 	0x5bb0, 0x000000f0, 0x00000070,
366 	0x5bc0, 0x00200000, 0x50100000,
367 	0x7030, 0x31000311, 0x00000011,
368 	0x277c, 0x00000003, 0x000007ff,
369 	0x240c, 0x000007ff, 0x00000000,
370 	0x8a14, 0xf000001f, 0x00000007,
371 	0x8b24, 0xffffffff, 0x00ffffff,
372 	0x8b10, 0x0000ff0f, 0x00000000,
373 	0x28a4c, 0x07ffffff, 0x4e000000,
374 	0x28350, 0x3f3f3fff, 0x2a00126a,
375 	0x30, 0x000000ff, 0x0040,
376 	0x34, 0x00000040, 0x00004040,
377 	0x9100, 0x07ffffff, 0x03000000,
378 	0x8e88, 0x01ff1f3f, 0x00000000,
379 	0x8e84, 0x01ff1f3f, 0x00000000,
380 	0x9060, 0x0000007f, 0x00000020,
381 	0x9508, 0x00010000, 0x00010000,
382 	0xac14, 0x00000200, 0x000002fb,
383 	0xac10, 0xffffffff, 0x0000543b,
384 	0xac0c, 0xffffffff, 0xa9210876,
385 	0x88d0, 0xffffffff, 0x000fff40,
386 	0x88d4, 0x0000001f, 0x00000010,
387 	0x1410, 0x20000000, 0x20fffed8,
388 	0x15c0, 0x000c0fc0, 0x000c0400
389 };
390 
391 static const u32 tahiti_golden_registers2[] =
392 {
393 	0xc64, 0x00000001, 0x00000001
394 };
395 
396 static const u32 pitcairn_golden_rlc_registers[] =
397 {
398 	0xc424, 0xffffffff, 0x00601004,
399 	0xc47c, 0xffffffff, 0x10102020,
400 	0xc488, 0xffffffff, 0x01000020,
401 	0xc314, 0xffffffff, 0x00000800,
402 	0xc30c, 0xffffffff, 0x800000a4
403 };
404 
405 static const u32 pitcairn_golden_registers[] =
406 {
407 	0x9a10, 0x00010000, 0x00018208,
408 	0x9830, 0xffffffff, 0x00000000,
409 	0x9834, 0xf00fffff, 0x00000400,
410 	0x9838, 0x0002021c, 0x00020200,
411 	0xc78, 0x00000080, 0x00000000,
412 	0xd030, 0x000300c0, 0x00800040,
413 	0xd830, 0x000300c0, 0x00800040,
414 	0x5bb0, 0x000000f0, 0x00000070,
415 	0x5bc0, 0x00200000, 0x50100000,
416 	0x7030, 0x31000311, 0x00000011,
417 	0x2ae4, 0x00073ffe, 0x000022a2,
418 	0x240c, 0x000007ff, 0x00000000,
419 	0x8a14, 0xf000001f, 0x00000007,
420 	0x8b24, 0xffffffff, 0x00ffffff,
421 	0x8b10, 0x0000ff0f, 0x00000000,
422 	0x28a4c, 0x07ffffff, 0x4e000000,
423 	0x28350, 0x3f3f3fff, 0x2a00126a,
424 	0x30, 0x000000ff, 0x0040,
425 	0x34, 0x00000040, 0x00004040,
426 	0x9100, 0x07ffffff, 0x03000000,
427 	0x9060, 0x0000007f, 0x00000020,
428 	0x9508, 0x00010000, 0x00010000,
429 	0xac14, 0x000003ff, 0x000000f7,
430 	0xac10, 0xffffffff, 0x00000000,
431 	0xac0c, 0xffffffff, 0x32761054,
432 	0x88d4, 0x0000001f, 0x00000010,
433 	0x15c0, 0x000c0fc0, 0x000c0400
434 };
435 
436 static const u32 verde_golden_rlc_registers[] =
437 {
438 	0xc424, 0xffffffff, 0x033f1005,
439 	0xc47c, 0xffffffff, 0x10808020,
440 	0xc488, 0xffffffff, 0x00800008,
441 	0xc314, 0xffffffff, 0x00001000,
442 	0xc30c, 0xffffffff, 0x80010014
443 };
444 
445 static const u32 verde_golden_registers[] =
446 {
447 	0x9a10, 0x00010000, 0x00018208,
448 	0x9830, 0xffffffff, 0x00000000,
449 	0x9834, 0xf00fffff, 0x00000400,
450 	0x9838, 0x0002021c, 0x00020200,
451 	0xc78, 0x00000080, 0x00000000,
452 	0xd030, 0x000300c0, 0x00800040,
453 	0xd030, 0x000300c0, 0x00800040,
454 	0xd830, 0x000300c0, 0x00800040,
455 	0xd830, 0x000300c0, 0x00800040,
456 	0x5bb0, 0x000000f0, 0x00000070,
457 	0x5bc0, 0x00200000, 0x50100000,
458 	0x7030, 0x31000311, 0x00000011,
459 	0x2ae4, 0x00073ffe, 0x000022a2,
460 	0x2ae4, 0x00073ffe, 0x000022a2,
461 	0x2ae4, 0x00073ffe, 0x000022a2,
462 	0x240c, 0x000007ff, 0x00000000,
463 	0x240c, 0x000007ff, 0x00000000,
464 	0x240c, 0x000007ff, 0x00000000,
465 	0x8a14, 0xf000001f, 0x00000007,
466 	0x8a14, 0xf000001f, 0x00000007,
467 	0x8a14, 0xf000001f, 0x00000007,
468 	0x8b24, 0xffffffff, 0x00ffffff,
469 	0x8b10, 0x0000ff0f, 0x00000000,
470 	0x28a4c, 0x07ffffff, 0x4e000000,
471 	0x28350, 0x3f3f3fff, 0x0000124a,
472 	0x28350, 0x3f3f3fff, 0x0000124a,
473 	0x28350, 0x3f3f3fff, 0x0000124a,
474 	0x30, 0x000000ff, 0x0040,
475 	0x34, 0x00000040, 0x00004040,
476 	0x9100, 0x07ffffff, 0x03000000,
477 	0x9100, 0x07ffffff, 0x03000000,
478 	0x8e88, 0x01ff1f3f, 0x00000000,
479 	0x8e88, 0x01ff1f3f, 0x00000000,
480 	0x8e88, 0x01ff1f3f, 0x00000000,
481 	0x8e84, 0x01ff1f3f, 0x00000000,
482 	0x8e84, 0x01ff1f3f, 0x00000000,
483 	0x8e84, 0x01ff1f3f, 0x00000000,
484 	0x9060, 0x0000007f, 0x00000020,
485 	0x9508, 0x00010000, 0x00010000,
486 	0xac14, 0x000003ff, 0x00000003,
487 	0xac14, 0x000003ff, 0x00000003,
488 	0xac14, 0x000003ff, 0x00000003,
489 	0xac10, 0xffffffff, 0x00000000,
490 	0xac10, 0xffffffff, 0x00000000,
491 	0xac10, 0xffffffff, 0x00000000,
492 	0xac0c, 0xffffffff, 0x00001032,
493 	0xac0c, 0xffffffff, 0x00001032,
494 	0xac0c, 0xffffffff, 0x00001032,
495 	0x88d4, 0x0000001f, 0x00000010,
496 	0x88d4, 0x0000001f, 0x00000010,
497 	0x88d4, 0x0000001f, 0x00000010,
498 	0x15c0, 0x000c0fc0, 0x000c0400
499 };
500 
501 static const u32 oland_golden_rlc_registers[] =
502 {
503 	0xc424, 0xffffffff, 0x00601005,
504 	0xc47c, 0xffffffff, 0x10104040,
505 	0xc488, 0xffffffff, 0x0100000a,
506 	0xc314, 0xffffffff, 0x00000800,
507 	0xc30c, 0xffffffff, 0x800000f4
508 };
509 
510 static const u32 oland_golden_registers[] =
511 {
512 	0x9a10, 0x00010000, 0x00018208,
513 	0x9830, 0xffffffff, 0x00000000,
514 	0x9834, 0xf00fffff, 0x00000400,
515 	0x9838, 0x0002021c, 0x00020200,
516 	0xc78, 0x00000080, 0x00000000,
517 	0xd030, 0x000300c0, 0x00800040,
518 	0xd830, 0x000300c0, 0x00800040,
519 	0x5bb0, 0x000000f0, 0x00000070,
520 	0x5bc0, 0x00200000, 0x50100000,
521 	0x7030, 0x31000311, 0x00000011,
522 	0x2ae4, 0x00073ffe, 0x000022a2,
523 	0x240c, 0x000007ff, 0x00000000,
524 	0x8a14, 0xf000001f, 0x00000007,
525 	0x8b24, 0xffffffff, 0x00ffffff,
526 	0x8b10, 0x0000ff0f, 0x00000000,
527 	0x28a4c, 0x07ffffff, 0x4e000000,
528 	0x28350, 0x3f3f3fff, 0x00000082,
529 	0x30, 0x000000ff, 0x0040,
530 	0x34, 0x00000040, 0x00004040,
531 	0x9100, 0x07ffffff, 0x03000000,
532 	0x9060, 0x0000007f, 0x00000020,
533 	0x9508, 0x00010000, 0x00010000,
534 	0xac14, 0x000003ff, 0x000000f3,
535 	0xac10, 0xffffffff, 0x00000000,
536 	0xac0c, 0xffffffff, 0x00003210,
537 	0x88d4, 0x0000001f, 0x00000010,
538 	0x15c0, 0x000c0fc0, 0x000c0400
539 };
540 
541 static const u32 hainan_golden_registers[] =
542 {
543 	0x9a10, 0x00010000, 0x00018208,
544 	0x9830, 0xffffffff, 0x00000000,
545 	0x9834, 0xf00fffff, 0x00000400,
546 	0x9838, 0x0002021c, 0x00020200,
547 	0xd0c0, 0xff000fff, 0x00000100,
548 	0xd030, 0x000300c0, 0x00800040,
549 	0xd8c0, 0xff000fff, 0x00000100,
550 	0xd830, 0x000300c0, 0x00800040,
551 	0x2ae4, 0x00073ffe, 0x000022a2,
552 	0x240c, 0x000007ff, 0x00000000,
553 	0x8a14, 0xf000001f, 0x00000007,
554 	0x8b24, 0xffffffff, 0x00ffffff,
555 	0x8b10, 0x0000ff0f, 0x00000000,
556 	0x28a4c, 0x07ffffff, 0x4e000000,
557 	0x28350, 0x3f3f3fff, 0x00000000,
558 	0x30, 0x000000ff, 0x0040,
559 	0x34, 0x00000040, 0x00004040,
560 	0x9100, 0x03e00000, 0x03600000,
561 	0x9060, 0x0000007f, 0x00000020,
562 	0x9508, 0x00010000, 0x00010000,
563 	0xac14, 0x000003ff, 0x000000f1,
564 	0xac10, 0xffffffff, 0x00000000,
565 	0xac0c, 0xffffffff, 0x00003210,
566 	0x88d4, 0x0000001f, 0x00000010,
567 	0x15c0, 0x000c0fc0, 0x000c0400
568 };
569 
570 static const u32 hainan_golden_registers2[] =
571 {
572 	0x98f8, 0xffffffff, 0x02010001
573 };
574 
575 static const u32 tahiti_mgcg_cgcg_init[] =
576 {
577 	0xc400, 0xffffffff, 0xfffffffc,
578 	0x802c, 0xffffffff, 0xe0000000,
579 	0x9a60, 0xffffffff, 0x00000100,
580 	0x92a4, 0xffffffff, 0x00000100,
581 	0xc164, 0xffffffff, 0x00000100,
582 	0x9774, 0xffffffff, 0x00000100,
583 	0x8984, 0xffffffff, 0x06000100,
584 	0x8a18, 0xffffffff, 0x00000100,
585 	0x92a0, 0xffffffff, 0x00000100,
586 	0xc380, 0xffffffff, 0x00000100,
587 	0x8b28, 0xffffffff, 0x00000100,
588 	0x9144, 0xffffffff, 0x00000100,
589 	0x8d88, 0xffffffff, 0x00000100,
590 	0x8d8c, 0xffffffff, 0x00000100,
591 	0x9030, 0xffffffff, 0x00000100,
592 	0x9034, 0xffffffff, 0x00000100,
593 	0x9038, 0xffffffff, 0x00000100,
594 	0x903c, 0xffffffff, 0x00000100,
595 	0xad80, 0xffffffff, 0x00000100,
596 	0xac54, 0xffffffff, 0x00000100,
597 	0x897c, 0xffffffff, 0x06000100,
598 	0x9868, 0xffffffff, 0x00000100,
599 	0x9510, 0xffffffff, 0x00000100,
600 	0xaf04, 0xffffffff, 0x00000100,
601 	0xae04, 0xffffffff, 0x00000100,
602 	0x949c, 0xffffffff, 0x00000100,
603 	0x802c, 0xffffffff, 0xe0000000,
604 	0x9160, 0xffffffff, 0x00010000,
605 	0x9164, 0xffffffff, 0x00030002,
606 	0x9168, 0xffffffff, 0x00040007,
607 	0x916c, 0xffffffff, 0x00060005,
608 	0x9170, 0xffffffff, 0x00090008,
609 	0x9174, 0xffffffff, 0x00020001,
610 	0x9178, 0xffffffff, 0x00040003,
611 	0x917c, 0xffffffff, 0x00000007,
612 	0x9180, 0xffffffff, 0x00060005,
613 	0x9184, 0xffffffff, 0x00090008,
614 	0x9188, 0xffffffff, 0x00030002,
615 	0x918c, 0xffffffff, 0x00050004,
616 	0x9190, 0xffffffff, 0x00000008,
617 	0x9194, 0xffffffff, 0x00070006,
618 	0x9198, 0xffffffff, 0x000a0009,
619 	0x919c, 0xffffffff, 0x00040003,
620 	0x91a0, 0xffffffff, 0x00060005,
621 	0x91a4, 0xffffffff, 0x00000009,
622 	0x91a8, 0xffffffff, 0x00080007,
623 	0x91ac, 0xffffffff, 0x000b000a,
624 	0x91b0, 0xffffffff, 0x00050004,
625 	0x91b4, 0xffffffff, 0x00070006,
626 	0x91b8, 0xffffffff, 0x0008000b,
627 	0x91bc, 0xffffffff, 0x000a0009,
628 	0x91c0, 0xffffffff, 0x000d000c,
629 	0x91c4, 0xffffffff, 0x00060005,
630 	0x91c8, 0xffffffff, 0x00080007,
631 	0x91cc, 0xffffffff, 0x0000000b,
632 	0x91d0, 0xffffffff, 0x000a0009,
633 	0x91d4, 0xffffffff, 0x000d000c,
634 	0x91d8, 0xffffffff, 0x00070006,
635 	0x91dc, 0xffffffff, 0x00090008,
636 	0x91e0, 0xffffffff, 0x0000000c,
637 	0x91e4, 0xffffffff, 0x000b000a,
638 	0x91e8, 0xffffffff, 0x000e000d,
639 	0x91ec, 0xffffffff, 0x00080007,
640 	0x91f0, 0xffffffff, 0x000a0009,
641 	0x91f4, 0xffffffff, 0x0000000d,
642 	0x91f8, 0xffffffff, 0x000c000b,
643 	0x91fc, 0xffffffff, 0x000f000e,
644 	0x9200, 0xffffffff, 0x00090008,
645 	0x9204, 0xffffffff, 0x000b000a,
646 	0x9208, 0xffffffff, 0x000c000f,
647 	0x920c, 0xffffffff, 0x000e000d,
648 	0x9210, 0xffffffff, 0x00110010,
649 	0x9214, 0xffffffff, 0x000a0009,
650 	0x9218, 0xffffffff, 0x000c000b,
651 	0x921c, 0xffffffff, 0x0000000f,
652 	0x9220, 0xffffffff, 0x000e000d,
653 	0x9224, 0xffffffff, 0x00110010,
654 	0x9228, 0xffffffff, 0x000b000a,
655 	0x922c, 0xffffffff, 0x000d000c,
656 	0x9230, 0xffffffff, 0x00000010,
657 	0x9234, 0xffffffff, 0x000f000e,
658 	0x9238, 0xffffffff, 0x00120011,
659 	0x923c, 0xffffffff, 0x000c000b,
660 	0x9240, 0xffffffff, 0x000e000d,
661 	0x9244, 0xffffffff, 0x00000011,
662 	0x9248, 0xffffffff, 0x0010000f,
663 	0x924c, 0xffffffff, 0x00130012,
664 	0x9250, 0xffffffff, 0x000d000c,
665 	0x9254, 0xffffffff, 0x000f000e,
666 	0x9258, 0xffffffff, 0x00100013,
667 	0x925c, 0xffffffff, 0x00120011,
668 	0x9260, 0xffffffff, 0x00150014,
669 	0x9264, 0xffffffff, 0x000e000d,
670 	0x9268, 0xffffffff, 0x0010000f,
671 	0x926c, 0xffffffff, 0x00000013,
672 	0x9270, 0xffffffff, 0x00120011,
673 	0x9274, 0xffffffff, 0x00150014,
674 	0x9278, 0xffffffff, 0x000f000e,
675 	0x927c, 0xffffffff, 0x00110010,
676 	0x9280, 0xffffffff, 0x00000014,
677 	0x9284, 0xffffffff, 0x00130012,
678 	0x9288, 0xffffffff, 0x00160015,
679 	0x928c, 0xffffffff, 0x0010000f,
680 	0x9290, 0xffffffff, 0x00120011,
681 	0x9294, 0xffffffff, 0x00000015,
682 	0x9298, 0xffffffff, 0x00140013,
683 	0x929c, 0xffffffff, 0x00170016,
684 	0x9150, 0xffffffff, 0x96940200,
685 	0x8708, 0xffffffff, 0x00900100,
686 	0xc478, 0xffffffff, 0x00000080,
687 	0xc404, 0xffffffff, 0x0020003f,
688 	0x30, 0xffffffff, 0x0000001c,
689 	0x34, 0x000f0000, 0x000f0000,
690 	0x160c, 0xffffffff, 0x00000100,
691 	0x1024, 0xffffffff, 0x00000100,
692 	0x102c, 0x00000101, 0x00000000,
693 	0x20a8, 0xffffffff, 0x00000104,
694 	0x264c, 0x000c0000, 0x000c0000,
695 	0x2648, 0x000c0000, 0x000c0000,
696 	0x55e4, 0xff000fff, 0x00000100,
697 	0x55e8, 0x00000001, 0x00000001,
698 	0x2f50, 0x00000001, 0x00000001,
699 	0x30cc, 0xc0000fff, 0x00000104,
700 	0xc1e4, 0x00000001, 0x00000001,
701 	0xd0c0, 0xfffffff0, 0x00000100,
702 	0xd8c0, 0xfffffff0, 0x00000100
703 };
704 
705 static const u32 pitcairn_mgcg_cgcg_init[] =
706 {
707 	0xc400, 0xffffffff, 0xfffffffc,
708 	0x802c, 0xffffffff, 0xe0000000,
709 	0x9a60, 0xffffffff, 0x00000100,
710 	0x92a4, 0xffffffff, 0x00000100,
711 	0xc164, 0xffffffff, 0x00000100,
712 	0x9774, 0xffffffff, 0x00000100,
713 	0x8984, 0xffffffff, 0x06000100,
714 	0x8a18, 0xffffffff, 0x00000100,
715 	0x92a0, 0xffffffff, 0x00000100,
716 	0xc380, 0xffffffff, 0x00000100,
717 	0x8b28, 0xffffffff, 0x00000100,
718 	0x9144, 0xffffffff, 0x00000100,
719 	0x8d88, 0xffffffff, 0x00000100,
720 	0x8d8c, 0xffffffff, 0x00000100,
721 	0x9030, 0xffffffff, 0x00000100,
722 	0x9034, 0xffffffff, 0x00000100,
723 	0x9038, 0xffffffff, 0x00000100,
724 	0x903c, 0xffffffff, 0x00000100,
725 	0xad80, 0xffffffff, 0x00000100,
726 	0xac54, 0xffffffff, 0x00000100,
727 	0x897c, 0xffffffff, 0x06000100,
728 	0x9868, 0xffffffff, 0x00000100,
729 	0x9510, 0xffffffff, 0x00000100,
730 	0xaf04, 0xffffffff, 0x00000100,
731 	0xae04, 0xffffffff, 0x00000100,
732 	0x949c, 0xffffffff, 0x00000100,
733 	0x802c, 0xffffffff, 0xe0000000,
734 	0x9160, 0xffffffff, 0x00010000,
735 	0x9164, 0xffffffff, 0x00030002,
736 	0x9168, 0xffffffff, 0x00040007,
737 	0x916c, 0xffffffff, 0x00060005,
738 	0x9170, 0xffffffff, 0x00090008,
739 	0x9174, 0xffffffff, 0x00020001,
740 	0x9178, 0xffffffff, 0x00040003,
741 	0x917c, 0xffffffff, 0x00000007,
742 	0x9180, 0xffffffff, 0x00060005,
743 	0x9184, 0xffffffff, 0x00090008,
744 	0x9188, 0xffffffff, 0x00030002,
745 	0x918c, 0xffffffff, 0x00050004,
746 	0x9190, 0xffffffff, 0x00000008,
747 	0x9194, 0xffffffff, 0x00070006,
748 	0x9198, 0xffffffff, 0x000a0009,
749 	0x919c, 0xffffffff, 0x00040003,
750 	0x91a0, 0xffffffff, 0x00060005,
751 	0x91a4, 0xffffffff, 0x00000009,
752 	0x91a8, 0xffffffff, 0x00080007,
753 	0x91ac, 0xffffffff, 0x000b000a,
754 	0x91b0, 0xffffffff, 0x00050004,
755 	0x91b4, 0xffffffff, 0x00070006,
756 	0x91b8, 0xffffffff, 0x0008000b,
757 	0x91bc, 0xffffffff, 0x000a0009,
758 	0x91c0, 0xffffffff, 0x000d000c,
759 	0x9200, 0xffffffff, 0x00090008,
760 	0x9204, 0xffffffff, 0x000b000a,
761 	0x9208, 0xffffffff, 0x000c000f,
762 	0x920c, 0xffffffff, 0x000e000d,
763 	0x9210, 0xffffffff, 0x00110010,
764 	0x9214, 0xffffffff, 0x000a0009,
765 	0x9218, 0xffffffff, 0x000c000b,
766 	0x921c, 0xffffffff, 0x0000000f,
767 	0x9220, 0xffffffff, 0x000e000d,
768 	0x9224, 0xffffffff, 0x00110010,
769 	0x9228, 0xffffffff, 0x000b000a,
770 	0x922c, 0xffffffff, 0x000d000c,
771 	0x9230, 0xffffffff, 0x00000010,
772 	0x9234, 0xffffffff, 0x000f000e,
773 	0x9238, 0xffffffff, 0x00120011,
774 	0x923c, 0xffffffff, 0x000c000b,
775 	0x9240, 0xffffffff, 0x000e000d,
776 	0x9244, 0xffffffff, 0x00000011,
777 	0x9248, 0xffffffff, 0x0010000f,
778 	0x924c, 0xffffffff, 0x00130012,
779 	0x9250, 0xffffffff, 0x000d000c,
780 	0x9254, 0xffffffff, 0x000f000e,
781 	0x9258, 0xffffffff, 0x00100013,
782 	0x925c, 0xffffffff, 0x00120011,
783 	0x9260, 0xffffffff, 0x00150014,
784 	0x9150, 0xffffffff, 0x96940200,
785 	0x8708, 0xffffffff, 0x00900100,
786 	0xc478, 0xffffffff, 0x00000080,
787 	0xc404, 0xffffffff, 0x0020003f,
788 	0x30, 0xffffffff, 0x0000001c,
789 	0x34, 0x000f0000, 0x000f0000,
790 	0x160c, 0xffffffff, 0x00000100,
791 	0x1024, 0xffffffff, 0x00000100,
792 	0x102c, 0x00000101, 0x00000000,
793 	0x20a8, 0xffffffff, 0x00000104,
794 	0x55e4, 0xff000fff, 0x00000100,
795 	0x55e8, 0x00000001, 0x00000001,
796 	0x2f50, 0x00000001, 0x00000001,
797 	0x30cc, 0xc0000fff, 0x00000104,
798 	0xc1e4, 0x00000001, 0x00000001,
799 	0xd0c0, 0xfffffff0, 0x00000100,
800 	0xd8c0, 0xfffffff0, 0x00000100
801 };
802 
803 static const u32 verde_mgcg_cgcg_init[] =
804 {
805 	0xc400, 0xffffffff, 0xfffffffc,
806 	0x802c, 0xffffffff, 0xe0000000,
807 	0x9a60, 0xffffffff, 0x00000100,
808 	0x92a4, 0xffffffff, 0x00000100,
809 	0xc164, 0xffffffff, 0x00000100,
810 	0x9774, 0xffffffff, 0x00000100,
811 	0x8984, 0xffffffff, 0x06000100,
812 	0x8a18, 0xffffffff, 0x00000100,
813 	0x92a0, 0xffffffff, 0x00000100,
814 	0xc380, 0xffffffff, 0x00000100,
815 	0x8b28, 0xffffffff, 0x00000100,
816 	0x9144, 0xffffffff, 0x00000100,
817 	0x8d88, 0xffffffff, 0x00000100,
818 	0x8d8c, 0xffffffff, 0x00000100,
819 	0x9030, 0xffffffff, 0x00000100,
820 	0x9034, 0xffffffff, 0x00000100,
821 	0x9038, 0xffffffff, 0x00000100,
822 	0x903c, 0xffffffff, 0x00000100,
823 	0xad80, 0xffffffff, 0x00000100,
824 	0xac54, 0xffffffff, 0x00000100,
825 	0x897c, 0xffffffff, 0x06000100,
826 	0x9868, 0xffffffff, 0x00000100,
827 	0x9510, 0xffffffff, 0x00000100,
828 	0xaf04, 0xffffffff, 0x00000100,
829 	0xae04, 0xffffffff, 0x00000100,
830 	0x949c, 0xffffffff, 0x00000100,
831 	0x802c, 0xffffffff, 0xe0000000,
832 	0x9160, 0xffffffff, 0x00010000,
833 	0x9164, 0xffffffff, 0x00030002,
834 	0x9168, 0xffffffff, 0x00040007,
835 	0x916c, 0xffffffff, 0x00060005,
836 	0x9170, 0xffffffff, 0x00090008,
837 	0x9174, 0xffffffff, 0x00020001,
838 	0x9178, 0xffffffff, 0x00040003,
839 	0x917c, 0xffffffff, 0x00000007,
840 	0x9180, 0xffffffff, 0x00060005,
841 	0x9184, 0xffffffff, 0x00090008,
842 	0x9188, 0xffffffff, 0x00030002,
843 	0x918c, 0xffffffff, 0x00050004,
844 	0x9190, 0xffffffff, 0x00000008,
845 	0x9194, 0xffffffff, 0x00070006,
846 	0x9198, 0xffffffff, 0x000a0009,
847 	0x919c, 0xffffffff, 0x00040003,
848 	0x91a0, 0xffffffff, 0x00060005,
849 	0x91a4, 0xffffffff, 0x00000009,
850 	0x91a8, 0xffffffff, 0x00080007,
851 	0x91ac, 0xffffffff, 0x000b000a,
852 	0x91b0, 0xffffffff, 0x00050004,
853 	0x91b4, 0xffffffff, 0x00070006,
854 	0x91b8, 0xffffffff, 0x0008000b,
855 	0x91bc, 0xffffffff, 0x000a0009,
856 	0x91c0, 0xffffffff, 0x000d000c,
857 	0x9200, 0xffffffff, 0x00090008,
858 	0x9204, 0xffffffff, 0x000b000a,
859 	0x9208, 0xffffffff, 0x000c000f,
860 	0x920c, 0xffffffff, 0x000e000d,
861 	0x9210, 0xffffffff, 0x00110010,
862 	0x9214, 0xffffffff, 0x000a0009,
863 	0x9218, 0xffffffff, 0x000c000b,
864 	0x921c, 0xffffffff, 0x0000000f,
865 	0x9220, 0xffffffff, 0x000e000d,
866 	0x9224, 0xffffffff, 0x00110010,
867 	0x9228, 0xffffffff, 0x000b000a,
868 	0x922c, 0xffffffff, 0x000d000c,
869 	0x9230, 0xffffffff, 0x00000010,
870 	0x9234, 0xffffffff, 0x000f000e,
871 	0x9238, 0xffffffff, 0x00120011,
872 	0x923c, 0xffffffff, 0x000c000b,
873 	0x9240, 0xffffffff, 0x000e000d,
874 	0x9244, 0xffffffff, 0x00000011,
875 	0x9248, 0xffffffff, 0x0010000f,
876 	0x924c, 0xffffffff, 0x00130012,
877 	0x9250, 0xffffffff, 0x000d000c,
878 	0x9254, 0xffffffff, 0x000f000e,
879 	0x9258, 0xffffffff, 0x00100013,
880 	0x925c, 0xffffffff, 0x00120011,
881 	0x9260, 0xffffffff, 0x00150014,
882 	0x9150, 0xffffffff, 0x96940200,
883 	0x8708, 0xffffffff, 0x00900100,
884 	0xc478, 0xffffffff, 0x00000080,
885 	0xc404, 0xffffffff, 0x0020003f,
886 	0x30, 0xffffffff, 0x0000001c,
887 	0x34, 0x000f0000, 0x000f0000,
888 	0x160c, 0xffffffff, 0x00000100,
889 	0x1024, 0xffffffff, 0x00000100,
890 	0x102c, 0x00000101, 0x00000000,
891 	0x20a8, 0xffffffff, 0x00000104,
892 	0x264c, 0x000c0000, 0x000c0000,
893 	0x2648, 0x000c0000, 0x000c0000,
894 	0x55e4, 0xff000fff, 0x00000100,
895 	0x55e8, 0x00000001, 0x00000001,
896 	0x2f50, 0x00000001, 0x00000001,
897 	0x30cc, 0xc0000fff, 0x00000104,
898 	0xc1e4, 0x00000001, 0x00000001,
899 	0xd0c0, 0xfffffff0, 0x00000100,
900 	0xd8c0, 0xfffffff0, 0x00000100
901 };
902 
903 static const u32 oland_mgcg_cgcg_init[] =
904 {
905 	0xc400, 0xffffffff, 0xfffffffc,
906 	0x802c, 0xffffffff, 0xe0000000,
907 	0x9a60, 0xffffffff, 0x00000100,
908 	0x92a4, 0xffffffff, 0x00000100,
909 	0xc164, 0xffffffff, 0x00000100,
910 	0x9774, 0xffffffff, 0x00000100,
911 	0x8984, 0xffffffff, 0x06000100,
912 	0x8a18, 0xffffffff, 0x00000100,
913 	0x92a0, 0xffffffff, 0x00000100,
914 	0xc380, 0xffffffff, 0x00000100,
915 	0x8b28, 0xffffffff, 0x00000100,
916 	0x9144, 0xffffffff, 0x00000100,
917 	0x8d88, 0xffffffff, 0x00000100,
918 	0x8d8c, 0xffffffff, 0x00000100,
919 	0x9030, 0xffffffff, 0x00000100,
920 	0x9034, 0xffffffff, 0x00000100,
921 	0x9038, 0xffffffff, 0x00000100,
922 	0x903c, 0xffffffff, 0x00000100,
923 	0xad80, 0xffffffff, 0x00000100,
924 	0xac54, 0xffffffff, 0x00000100,
925 	0x897c, 0xffffffff, 0x06000100,
926 	0x9868, 0xffffffff, 0x00000100,
927 	0x9510, 0xffffffff, 0x00000100,
928 	0xaf04, 0xffffffff, 0x00000100,
929 	0xae04, 0xffffffff, 0x00000100,
930 	0x949c, 0xffffffff, 0x00000100,
931 	0x802c, 0xffffffff, 0xe0000000,
932 	0x9160, 0xffffffff, 0x00010000,
933 	0x9164, 0xffffffff, 0x00030002,
934 	0x9168, 0xffffffff, 0x00040007,
935 	0x916c, 0xffffffff, 0x00060005,
936 	0x9170, 0xffffffff, 0x00090008,
937 	0x9174, 0xffffffff, 0x00020001,
938 	0x9178, 0xffffffff, 0x00040003,
939 	0x917c, 0xffffffff, 0x00000007,
940 	0x9180, 0xffffffff, 0x00060005,
941 	0x9184, 0xffffffff, 0x00090008,
942 	0x9188, 0xffffffff, 0x00030002,
943 	0x918c, 0xffffffff, 0x00050004,
944 	0x9190, 0xffffffff, 0x00000008,
945 	0x9194, 0xffffffff, 0x00070006,
946 	0x9198, 0xffffffff, 0x000a0009,
947 	0x919c, 0xffffffff, 0x00040003,
948 	0x91a0, 0xffffffff, 0x00060005,
949 	0x91a4, 0xffffffff, 0x00000009,
950 	0x91a8, 0xffffffff, 0x00080007,
951 	0x91ac, 0xffffffff, 0x000b000a,
952 	0x91b0, 0xffffffff, 0x00050004,
953 	0x91b4, 0xffffffff, 0x00070006,
954 	0x91b8, 0xffffffff, 0x0008000b,
955 	0x91bc, 0xffffffff, 0x000a0009,
956 	0x91c0, 0xffffffff, 0x000d000c,
957 	0x91c4, 0xffffffff, 0x00060005,
958 	0x91c8, 0xffffffff, 0x00080007,
959 	0x91cc, 0xffffffff, 0x0000000b,
960 	0x91d0, 0xffffffff, 0x000a0009,
961 	0x91d4, 0xffffffff, 0x000d000c,
962 	0x9150, 0xffffffff, 0x96940200,
963 	0x8708, 0xffffffff, 0x00900100,
964 	0xc478, 0xffffffff, 0x00000080,
965 	0xc404, 0xffffffff, 0x0020003f,
966 	0x30, 0xffffffff, 0x0000001c,
967 	0x34, 0x000f0000, 0x000f0000,
968 	0x160c, 0xffffffff, 0x00000100,
969 	0x1024, 0xffffffff, 0x00000100,
970 	0x102c, 0x00000101, 0x00000000,
971 	0x20a8, 0xffffffff, 0x00000104,
972 	0x264c, 0x000c0000, 0x000c0000,
973 	0x2648, 0x000c0000, 0x000c0000,
974 	0x55e4, 0xff000fff, 0x00000100,
975 	0x55e8, 0x00000001, 0x00000001,
976 	0x2f50, 0x00000001, 0x00000001,
977 	0x30cc, 0xc0000fff, 0x00000104,
978 	0xc1e4, 0x00000001, 0x00000001,
979 	0xd0c0, 0xfffffff0, 0x00000100,
980 	0xd8c0, 0xfffffff0, 0x00000100
981 };
982 
983 static const u32 hainan_mgcg_cgcg_init[] =
984 {
985 	0xc400, 0xffffffff, 0xfffffffc,
986 	0x802c, 0xffffffff, 0xe0000000,
987 	0x9a60, 0xffffffff, 0x00000100,
988 	0x92a4, 0xffffffff, 0x00000100,
989 	0xc164, 0xffffffff, 0x00000100,
990 	0x9774, 0xffffffff, 0x00000100,
991 	0x8984, 0xffffffff, 0x06000100,
992 	0x8a18, 0xffffffff, 0x00000100,
993 	0x92a0, 0xffffffff, 0x00000100,
994 	0xc380, 0xffffffff, 0x00000100,
995 	0x8b28, 0xffffffff, 0x00000100,
996 	0x9144, 0xffffffff, 0x00000100,
997 	0x8d88, 0xffffffff, 0x00000100,
998 	0x8d8c, 0xffffffff, 0x00000100,
999 	0x9030, 0xffffffff, 0x00000100,
1000 	0x9034, 0xffffffff, 0x00000100,
1001 	0x9038, 0xffffffff, 0x00000100,
1002 	0x903c, 0xffffffff, 0x00000100,
1003 	0xad80, 0xffffffff, 0x00000100,
1004 	0xac54, 0xffffffff, 0x00000100,
1005 	0x897c, 0xffffffff, 0x06000100,
1006 	0x9868, 0xffffffff, 0x00000100,
1007 	0x9510, 0xffffffff, 0x00000100,
1008 	0xaf04, 0xffffffff, 0x00000100,
1009 	0xae04, 0xffffffff, 0x00000100,
1010 	0x949c, 0xffffffff, 0x00000100,
1011 	0x802c, 0xffffffff, 0xe0000000,
1012 	0x9160, 0xffffffff, 0x00010000,
1013 	0x9164, 0xffffffff, 0x00030002,
1014 	0x9168, 0xffffffff, 0x00040007,
1015 	0x916c, 0xffffffff, 0x00060005,
1016 	0x9170, 0xffffffff, 0x00090008,
1017 	0x9174, 0xffffffff, 0x00020001,
1018 	0x9178, 0xffffffff, 0x00040003,
1019 	0x917c, 0xffffffff, 0x00000007,
1020 	0x9180, 0xffffffff, 0x00060005,
1021 	0x9184, 0xffffffff, 0x00090008,
1022 	0x9188, 0xffffffff, 0x00030002,
1023 	0x918c, 0xffffffff, 0x00050004,
1024 	0x9190, 0xffffffff, 0x00000008,
1025 	0x9194, 0xffffffff, 0x00070006,
1026 	0x9198, 0xffffffff, 0x000a0009,
1027 	0x919c, 0xffffffff, 0x00040003,
1028 	0x91a0, 0xffffffff, 0x00060005,
1029 	0x91a4, 0xffffffff, 0x00000009,
1030 	0x91a8, 0xffffffff, 0x00080007,
1031 	0x91ac, 0xffffffff, 0x000b000a,
1032 	0x91b0, 0xffffffff, 0x00050004,
1033 	0x91b4, 0xffffffff, 0x00070006,
1034 	0x91b8, 0xffffffff, 0x0008000b,
1035 	0x91bc, 0xffffffff, 0x000a0009,
1036 	0x91c0, 0xffffffff, 0x000d000c,
1037 	0x91c4, 0xffffffff, 0x00060005,
1038 	0x91c8, 0xffffffff, 0x00080007,
1039 	0x91cc, 0xffffffff, 0x0000000b,
1040 	0x91d0, 0xffffffff, 0x000a0009,
1041 	0x91d4, 0xffffffff, 0x000d000c,
1042 	0x9150, 0xffffffff, 0x96940200,
1043 	0x8708, 0xffffffff, 0x00900100,
1044 	0xc478, 0xffffffff, 0x00000080,
1045 	0xc404, 0xffffffff, 0x0020003f,
1046 	0x30, 0xffffffff, 0x0000001c,
1047 	0x34, 0x000f0000, 0x000f0000,
1048 	0x160c, 0xffffffff, 0x00000100,
1049 	0x1024, 0xffffffff, 0x00000100,
1050 	0x20a8, 0xffffffff, 0x00000104,
1051 	0x264c, 0x000c0000, 0x000c0000,
1052 	0x2648, 0x000c0000, 0x000c0000,
1053 	0x2f50, 0x00000001, 0x00000001,
1054 	0x30cc, 0xc0000fff, 0x00000104,
1055 	0xc1e4, 0x00000001, 0x00000001,
1056 	0xd0c0, 0xfffffff0, 0x00000100,
1057 	0xd8c0, 0xfffffff0, 0x00000100
1058 };
1059 
1060 static u32 verde_pg_init[] =
1061 {
1062 	0x353c, 0xffffffff, 0x40000,
1063 	0x3538, 0xffffffff, 0x200010ff,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x353c, 0xffffffff, 0x0,
1069 	0x353c, 0xffffffff, 0x7007,
1070 	0x3538, 0xffffffff, 0x300010ff,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x353c, 0xffffffff, 0x400000,
1077 	0x3538, 0xffffffff, 0x100010ff,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x0,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x120200,
1084 	0x3538, 0xffffffff, 0x500010ff,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x1e1e16,
1091 	0x3538, 0xffffffff, 0x600010ff,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x171f1e,
1098 	0x3538, 0xffffffff, 0x700010ff,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x3538, 0xffffffff, 0x9ff,
1106 	0x3500, 0xffffffff, 0x0,
1107 	0x3504, 0xffffffff, 0x10000800,
1108 	0x3504, 0xffffffff, 0xf,
1109 	0x3504, 0xffffffff, 0xf,
1110 	0x3500, 0xffffffff, 0x4,
1111 	0x3504, 0xffffffff, 0x1000051e,
1112 	0x3504, 0xffffffff, 0xffff,
1113 	0x3504, 0xffffffff, 0xffff,
1114 	0x3500, 0xffffffff, 0x8,
1115 	0x3504, 0xffffffff, 0x80500,
1116 	0x3500, 0xffffffff, 0x12,
1117 	0x3504, 0xffffffff, 0x9050c,
1118 	0x3500, 0xffffffff, 0x1d,
1119 	0x3504, 0xffffffff, 0xb052c,
1120 	0x3500, 0xffffffff, 0x2a,
1121 	0x3504, 0xffffffff, 0x1053e,
1122 	0x3500, 0xffffffff, 0x2d,
1123 	0x3504, 0xffffffff, 0x10546,
1124 	0x3500, 0xffffffff, 0x30,
1125 	0x3504, 0xffffffff, 0xa054e,
1126 	0x3500, 0xffffffff, 0x3c,
1127 	0x3504, 0xffffffff, 0x1055f,
1128 	0x3500, 0xffffffff, 0x3f,
1129 	0x3504, 0xffffffff, 0x10567,
1130 	0x3500, 0xffffffff, 0x42,
1131 	0x3504, 0xffffffff, 0x1056f,
1132 	0x3500, 0xffffffff, 0x45,
1133 	0x3504, 0xffffffff, 0x10572,
1134 	0x3500, 0xffffffff, 0x48,
1135 	0x3504, 0xffffffff, 0x20575,
1136 	0x3500, 0xffffffff, 0x4c,
1137 	0x3504, 0xffffffff, 0x190801,
1138 	0x3500, 0xffffffff, 0x67,
1139 	0x3504, 0xffffffff, 0x1082a,
1140 	0x3500, 0xffffffff, 0x6a,
1141 	0x3504, 0xffffffff, 0x1b082d,
1142 	0x3500, 0xffffffff, 0x87,
1143 	0x3504, 0xffffffff, 0x310851,
1144 	0x3500, 0xffffffff, 0xba,
1145 	0x3504, 0xffffffff, 0x891,
1146 	0x3500, 0xffffffff, 0xbc,
1147 	0x3504, 0xffffffff, 0x893,
1148 	0x3500, 0xffffffff, 0xbe,
1149 	0x3504, 0xffffffff, 0x20895,
1150 	0x3500, 0xffffffff, 0xc2,
1151 	0x3504, 0xffffffff, 0x20899,
1152 	0x3500, 0xffffffff, 0xc6,
1153 	0x3504, 0xffffffff, 0x2089d,
1154 	0x3500, 0xffffffff, 0xca,
1155 	0x3504, 0xffffffff, 0x8a1,
1156 	0x3500, 0xffffffff, 0xcc,
1157 	0x3504, 0xffffffff, 0x8a3,
1158 	0x3500, 0xffffffff, 0xce,
1159 	0x3504, 0xffffffff, 0x308a5,
1160 	0x3500, 0xffffffff, 0xd3,
1161 	0x3504, 0xffffffff, 0x6d08cd,
1162 	0x3500, 0xffffffff, 0x142,
1163 	0x3504, 0xffffffff, 0x2000095a,
1164 	0x3504, 0xffffffff, 0x1,
1165 	0x3500, 0xffffffff, 0x144,
1166 	0x3504, 0xffffffff, 0x301f095b,
1167 	0x3500, 0xffffffff, 0x165,
1168 	0x3504, 0xffffffff, 0xc094d,
1169 	0x3500, 0xffffffff, 0x173,
1170 	0x3504, 0xffffffff, 0xf096d,
1171 	0x3500, 0xffffffff, 0x184,
1172 	0x3504, 0xffffffff, 0x15097f,
1173 	0x3500, 0xffffffff, 0x19b,
1174 	0x3504, 0xffffffff, 0xc0998,
1175 	0x3500, 0xffffffff, 0x1a9,
1176 	0x3504, 0xffffffff, 0x409a7,
1177 	0x3500, 0xffffffff, 0x1af,
1178 	0x3504, 0xffffffff, 0xcdc,
1179 	0x3500, 0xffffffff, 0x1b1,
1180 	0x3504, 0xffffffff, 0x800,
1181 	0x3508, 0xffffffff, 0x6c9b2000,
1182 	0x3510, 0xfc00, 0x2000,
1183 	0x3544, 0xffffffff, 0xfc0,
1184 	0x28d4, 0x00000100, 0x100
1185 };
1186 
1187 static void si_init_golden_registers(struct radeon_device *rdev)
1188 {
1189 	switch (rdev->family) {
1190 	case CHIP_TAHITI:
1191 		radeon_program_register_sequence(rdev,
1192 						 tahiti_golden_registers,
1193 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1194 		radeon_program_register_sequence(rdev,
1195 						 tahiti_golden_rlc_registers,
1196 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1197 		radeon_program_register_sequence(rdev,
1198 						 tahiti_mgcg_cgcg_init,
1199 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1200 		radeon_program_register_sequence(rdev,
1201 						 tahiti_golden_registers2,
1202 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1203 		break;
1204 	case CHIP_PITCAIRN:
1205 		radeon_program_register_sequence(rdev,
1206 						 pitcairn_golden_registers,
1207 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1208 		radeon_program_register_sequence(rdev,
1209 						 pitcairn_golden_rlc_registers,
1210 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1211 		radeon_program_register_sequence(rdev,
1212 						 pitcairn_mgcg_cgcg_init,
1213 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1214 		break;
1215 	case CHIP_VERDE:
1216 		radeon_program_register_sequence(rdev,
1217 						 verde_golden_registers,
1218 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1219 		radeon_program_register_sequence(rdev,
1220 						 verde_golden_rlc_registers,
1221 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1222 		radeon_program_register_sequence(rdev,
1223 						 verde_mgcg_cgcg_init,
1224 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1225 		radeon_program_register_sequence(rdev,
1226 						 verde_pg_init,
1227 						 (const u32)ARRAY_SIZE(verde_pg_init));
1228 		break;
1229 	case CHIP_OLAND:
1230 		radeon_program_register_sequence(rdev,
1231 						 oland_golden_registers,
1232 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1233 		radeon_program_register_sequence(rdev,
1234 						 oland_golden_rlc_registers,
1235 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1236 		radeon_program_register_sequence(rdev,
1237 						 oland_mgcg_cgcg_init,
1238 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1239 		break;
1240 	case CHIP_HAINAN:
1241 		radeon_program_register_sequence(rdev,
1242 						 hainan_golden_registers,
1243 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1244 		radeon_program_register_sequence(rdev,
1245 						 hainan_golden_registers2,
1246 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1247 		radeon_program_register_sequence(rdev,
1248 						 hainan_mgcg_cgcg_init,
1249 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1250 		break;
1251 	default:
1252 		break;
1253 	}
1254 }
1255 
1256 /**
1257  * si_get_allowed_info_register - fetch the register for the info ioctl
1258  *
1259  * @rdev: radeon_device pointer
1260  * @reg: register offset in bytes
1261  * @val: register value
1262  *
1263  * Returns 0 for success or -EINVAL for an invalid register
1264  *
1265  */
1266 int si_get_allowed_info_register(struct radeon_device *rdev,
1267 				 u32 reg, u32 *val)
1268 {
1269 	switch (reg) {
1270 	case GRBM_STATUS:
1271 	case GRBM_STATUS2:
1272 	case GRBM_STATUS_SE0:
1273 	case GRBM_STATUS_SE1:
1274 	case SRBM_STATUS:
1275 	case SRBM_STATUS2:
1276 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1277 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1278 	case UVD_STATUS:
1279 		*val = RREG32(reg);
1280 		return 0;
1281 	default:
1282 		return -EINVAL;
1283 	}
1284 }
1285 
1286 #define PCIE_BUS_CLK                10000
1287 #define TCLK                        (PCIE_BUS_CLK / 10)
1288 
1289 /**
1290  * si_get_xclk - get the xclk
1291  *
1292  * @rdev: radeon_device pointer
1293  *
1294  * Returns the reference clock used by the gfx engine
1295  * (SI).
1296  */
1297 u32 si_get_xclk(struct radeon_device *rdev)
1298 {
1299         u32 reference_clock = rdev->clock.spll.reference_freq;
1300 	u32 tmp;
1301 
1302 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1303 	if (tmp & MUX_TCLK_TO_XCLK)
1304 		return TCLK;
1305 
1306 	tmp = RREG32(CG_CLKPIN_CNTL);
1307 	if (tmp & XTALIN_DIVIDE)
1308 		return reference_clock / 4;
1309 
1310 	return reference_clock;
1311 }
1312 
1313 /* get temperature in millidegrees */
1314 int si_get_temp(struct radeon_device *rdev)
1315 {
1316 	u32 temp;
1317 	int actual_temp = 0;
1318 
1319 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1320 		CTF_TEMP_SHIFT;
1321 
1322 	if (temp & 0x200)
1323 		actual_temp = 255;
1324 	else
1325 		actual_temp = temp & 0x1ff;
1326 
1327 	actual_temp = (actual_temp * 1000);
1328 
1329 	return actual_temp;
1330 }
1331 
1332 #define TAHITI_IO_MC_REGS_SIZE 36
1333 
1334 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1335 	{0x0000006f, 0x03044000},
1336 	{0x00000070, 0x0480c018},
1337 	{0x00000071, 0x00000040},
1338 	{0x00000072, 0x01000000},
1339 	{0x00000074, 0x000000ff},
1340 	{0x00000075, 0x00143400},
1341 	{0x00000076, 0x08ec0800},
1342 	{0x00000077, 0x040000cc},
1343 	{0x00000079, 0x00000000},
1344 	{0x0000007a, 0x21000409},
1345 	{0x0000007c, 0x00000000},
1346 	{0x0000007d, 0xe8000000},
1347 	{0x0000007e, 0x044408a8},
1348 	{0x0000007f, 0x00000003},
1349 	{0x00000080, 0x00000000},
1350 	{0x00000081, 0x01000000},
1351 	{0x00000082, 0x02000000},
1352 	{0x00000083, 0x00000000},
1353 	{0x00000084, 0xe3f3e4f4},
1354 	{0x00000085, 0x00052024},
1355 	{0x00000087, 0x00000000},
1356 	{0x00000088, 0x66036603},
1357 	{0x00000089, 0x01000000},
1358 	{0x0000008b, 0x1c0a0000},
1359 	{0x0000008c, 0xff010000},
1360 	{0x0000008e, 0xffffefff},
1361 	{0x0000008f, 0xfff3efff},
1362 	{0x00000090, 0xfff3efbf},
1363 	{0x00000094, 0x00101101},
1364 	{0x00000095, 0x00000fff},
1365 	{0x00000096, 0x00116fff},
1366 	{0x00000097, 0x60010000},
1367 	{0x00000098, 0x10010000},
1368 	{0x00000099, 0x00006000},
1369 	{0x0000009a, 0x00001000},
1370 	{0x0000009f, 0x00a77400}
1371 };
1372 
1373 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1374 	{0x0000006f, 0x03044000},
1375 	{0x00000070, 0x0480c018},
1376 	{0x00000071, 0x00000040},
1377 	{0x00000072, 0x01000000},
1378 	{0x00000074, 0x000000ff},
1379 	{0x00000075, 0x00143400},
1380 	{0x00000076, 0x08ec0800},
1381 	{0x00000077, 0x040000cc},
1382 	{0x00000079, 0x00000000},
1383 	{0x0000007a, 0x21000409},
1384 	{0x0000007c, 0x00000000},
1385 	{0x0000007d, 0xe8000000},
1386 	{0x0000007e, 0x044408a8},
1387 	{0x0000007f, 0x00000003},
1388 	{0x00000080, 0x00000000},
1389 	{0x00000081, 0x01000000},
1390 	{0x00000082, 0x02000000},
1391 	{0x00000083, 0x00000000},
1392 	{0x00000084, 0xe3f3e4f4},
1393 	{0x00000085, 0x00052024},
1394 	{0x00000087, 0x00000000},
1395 	{0x00000088, 0x66036603},
1396 	{0x00000089, 0x01000000},
1397 	{0x0000008b, 0x1c0a0000},
1398 	{0x0000008c, 0xff010000},
1399 	{0x0000008e, 0xffffefff},
1400 	{0x0000008f, 0xfff3efff},
1401 	{0x00000090, 0xfff3efbf},
1402 	{0x00000094, 0x00101101},
1403 	{0x00000095, 0x00000fff},
1404 	{0x00000096, 0x00116fff},
1405 	{0x00000097, 0x60010000},
1406 	{0x00000098, 0x10010000},
1407 	{0x00000099, 0x00006000},
1408 	{0x0000009a, 0x00001000},
1409 	{0x0000009f, 0x00a47400}
1410 };
1411 
1412 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1413 	{0x0000006f, 0x03044000},
1414 	{0x00000070, 0x0480c018},
1415 	{0x00000071, 0x00000040},
1416 	{0x00000072, 0x01000000},
1417 	{0x00000074, 0x000000ff},
1418 	{0x00000075, 0x00143400},
1419 	{0x00000076, 0x08ec0800},
1420 	{0x00000077, 0x040000cc},
1421 	{0x00000079, 0x00000000},
1422 	{0x0000007a, 0x21000409},
1423 	{0x0000007c, 0x00000000},
1424 	{0x0000007d, 0xe8000000},
1425 	{0x0000007e, 0x044408a8},
1426 	{0x0000007f, 0x00000003},
1427 	{0x00000080, 0x00000000},
1428 	{0x00000081, 0x01000000},
1429 	{0x00000082, 0x02000000},
1430 	{0x00000083, 0x00000000},
1431 	{0x00000084, 0xe3f3e4f4},
1432 	{0x00000085, 0x00052024},
1433 	{0x00000087, 0x00000000},
1434 	{0x00000088, 0x66036603},
1435 	{0x00000089, 0x01000000},
1436 	{0x0000008b, 0x1c0a0000},
1437 	{0x0000008c, 0xff010000},
1438 	{0x0000008e, 0xffffefff},
1439 	{0x0000008f, 0xfff3efff},
1440 	{0x00000090, 0xfff3efbf},
1441 	{0x00000094, 0x00101101},
1442 	{0x00000095, 0x00000fff},
1443 	{0x00000096, 0x00116fff},
1444 	{0x00000097, 0x60010000},
1445 	{0x00000098, 0x10010000},
1446 	{0x00000099, 0x00006000},
1447 	{0x0000009a, 0x00001000},
1448 	{0x0000009f, 0x00a37400}
1449 };
1450 
1451 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1452 	{0x0000006f, 0x03044000},
1453 	{0x00000070, 0x0480c018},
1454 	{0x00000071, 0x00000040},
1455 	{0x00000072, 0x01000000},
1456 	{0x00000074, 0x000000ff},
1457 	{0x00000075, 0x00143400},
1458 	{0x00000076, 0x08ec0800},
1459 	{0x00000077, 0x040000cc},
1460 	{0x00000079, 0x00000000},
1461 	{0x0000007a, 0x21000409},
1462 	{0x0000007c, 0x00000000},
1463 	{0x0000007d, 0xe8000000},
1464 	{0x0000007e, 0x044408a8},
1465 	{0x0000007f, 0x00000003},
1466 	{0x00000080, 0x00000000},
1467 	{0x00000081, 0x01000000},
1468 	{0x00000082, 0x02000000},
1469 	{0x00000083, 0x00000000},
1470 	{0x00000084, 0xe3f3e4f4},
1471 	{0x00000085, 0x00052024},
1472 	{0x00000087, 0x00000000},
1473 	{0x00000088, 0x66036603},
1474 	{0x00000089, 0x01000000},
1475 	{0x0000008b, 0x1c0a0000},
1476 	{0x0000008c, 0xff010000},
1477 	{0x0000008e, 0xffffefff},
1478 	{0x0000008f, 0xfff3efff},
1479 	{0x00000090, 0xfff3efbf},
1480 	{0x00000094, 0x00101101},
1481 	{0x00000095, 0x00000fff},
1482 	{0x00000096, 0x00116fff},
1483 	{0x00000097, 0x60010000},
1484 	{0x00000098, 0x10010000},
1485 	{0x00000099, 0x00006000},
1486 	{0x0000009a, 0x00001000},
1487 	{0x0000009f, 0x00a17730}
1488 };
1489 
1490 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1491 	{0x0000006f, 0x03044000},
1492 	{0x00000070, 0x0480c018},
1493 	{0x00000071, 0x00000040},
1494 	{0x00000072, 0x01000000},
1495 	{0x00000074, 0x000000ff},
1496 	{0x00000075, 0x00143400},
1497 	{0x00000076, 0x08ec0800},
1498 	{0x00000077, 0x040000cc},
1499 	{0x00000079, 0x00000000},
1500 	{0x0000007a, 0x21000409},
1501 	{0x0000007c, 0x00000000},
1502 	{0x0000007d, 0xe8000000},
1503 	{0x0000007e, 0x044408a8},
1504 	{0x0000007f, 0x00000003},
1505 	{0x00000080, 0x00000000},
1506 	{0x00000081, 0x01000000},
1507 	{0x00000082, 0x02000000},
1508 	{0x00000083, 0x00000000},
1509 	{0x00000084, 0xe3f3e4f4},
1510 	{0x00000085, 0x00052024},
1511 	{0x00000087, 0x00000000},
1512 	{0x00000088, 0x66036603},
1513 	{0x00000089, 0x01000000},
1514 	{0x0000008b, 0x1c0a0000},
1515 	{0x0000008c, 0xff010000},
1516 	{0x0000008e, 0xffffefff},
1517 	{0x0000008f, 0xfff3efff},
1518 	{0x00000090, 0xfff3efbf},
1519 	{0x00000094, 0x00101101},
1520 	{0x00000095, 0x00000fff},
1521 	{0x00000096, 0x00116fff},
1522 	{0x00000097, 0x60010000},
1523 	{0x00000098, 0x10010000},
1524 	{0x00000099, 0x00006000},
1525 	{0x0000009a, 0x00001000},
1526 	{0x0000009f, 0x00a07730}
1527 };
1528 
1529 /* ucode loading */
1530 int si_mc_load_microcode(struct radeon_device *rdev)
1531 {
1532 	const __be32 *fw_data = NULL;
1533 	const __le32 *new_fw_data = NULL;
1534 	u32 running, blackout = 0;
1535 	u32 *io_mc_regs = NULL;
1536 	const __le32 *new_io_mc_regs = NULL;
1537 	int i, regs_size, ucode_size;
1538 
1539 	if (!rdev->mc_fw)
1540 		return -EINVAL;
1541 
1542 	if (rdev->new_fw) {
1543 		const struct mc_firmware_header_v1_0 *hdr =
1544 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1545 
1546 		radeon_ucode_print_mc_hdr(&hdr->header);
1547 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1548 		new_io_mc_regs = (const __le32 *)
1549 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1550 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1551 		new_fw_data = (const __le32 *)
1552 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1553 	} else {
1554 		ucode_size = rdev->mc_fw->datasize / 4;
1555 
1556 		switch (rdev->family) {
1557 		case CHIP_TAHITI:
1558 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1559 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1560 			break;
1561 		case CHIP_PITCAIRN:
1562 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1563 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1564 			break;
1565 		case CHIP_VERDE:
1566 		default:
1567 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1568 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1569 			break;
1570 		case CHIP_OLAND:
1571 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1572 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1573 			break;
1574 		case CHIP_HAINAN:
1575 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1576 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1577 			break;
1578 		}
1579 		fw_data = (const __be32 *)rdev->mc_fw->data;
1580 	}
1581 
1582 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1583 
1584 	if (running == 0) {
1585 		if (running) {
1586 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1587 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1588 		}
1589 
1590 		/* reset the engine and set to writable */
1591 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1592 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1593 
1594 		/* load mc io regs */
1595 		for (i = 0; i < regs_size; i++) {
1596 			if (rdev->new_fw) {
1597 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1598 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1599 			} else {
1600 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1601 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1602 			}
1603 		}
1604 		/* load the MC ucode */
1605 		for (i = 0; i < ucode_size; i++) {
1606 			if (rdev->new_fw)
1607 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1608 			else
1609 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1610 		}
1611 
1612 		/* put the engine back into the active state */
1613 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1614 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1615 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1616 
1617 		/* wait for training to complete */
1618 		for (i = 0; i < rdev->usec_timeout; i++) {
1619 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1620 				break;
1621 			udelay(1);
1622 		}
1623 		for (i = 0; i < rdev->usec_timeout; i++) {
1624 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1625 				break;
1626 			udelay(1);
1627 		}
1628 
1629 		if (running)
1630 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1631 	}
1632 
1633 	return 0;
1634 }
1635 
1636 static int si_init_microcode(struct radeon_device *rdev)
1637 {
1638 	const char *chip_name;
1639 	const char *new_chip_name;
1640 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1641 	size_t smc_req_size, mc2_req_size;
1642 	char fw_name[30];
1643 	int err;
1644 	int new_fw = 0;
1645 
1646 	DRM_DEBUG("\n");
1647 
1648 	switch (rdev->family) {
1649 	case CHIP_TAHITI:
1650 		chip_name = "TAHITI";
1651 		new_chip_name = "tahiti";
1652 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1653 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1654 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1655 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1656 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1657 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1658 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1659 		break;
1660 	case CHIP_PITCAIRN:
1661 		chip_name = "PITCAIRN";
1662 		new_chip_name = "pitcairn";
1663 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1664 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1665 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1666 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1667 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1668 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1669 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1670 		break;
1671 	case CHIP_VERDE:
1672 		chip_name = "VERDE";
1673 		new_chip_name = "verde";
1674 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1675 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1676 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1677 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1678 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1679 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1680 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1681 		break;
1682 	case CHIP_OLAND:
1683 		chip_name = "OLAND";
1684 		new_chip_name = "oland";
1685 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1686 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1687 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1688 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1689 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1690 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1691 		break;
1692 	case CHIP_HAINAN:
1693 		chip_name = "HAINAN";
1694 		new_chip_name = "hainan";
1695 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1696 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1697 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1698 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1699 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1700 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1701 		break;
1702 	default: BUG();
1703 	}
1704 
1705 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1706 
1707 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1708 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1709 	if (err) {
1710 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1711 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1712 		if (err)
1713 			goto out;
1714 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1715 			printk(KERN_ERR
1716 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1717 			       rdev->pfp_fw->datasize, fw_name);
1718 			err = -EINVAL;
1719 			goto out;
1720 		}
1721 	} else {
1722 		err = radeon_ucode_validate(rdev->pfp_fw);
1723 		if (err) {
1724 			printk(KERN_ERR
1725 			       "si_cp: validation failed for firmware \"%s\"\n",
1726 			       fw_name);
1727 			goto out;
1728 		} else {
1729 			new_fw++;
1730 		}
1731 	}
1732 
1733 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1734 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1735 	if (err) {
1736 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1737 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1738 		if (err)
1739 			goto out;
1740 		if (rdev->me_fw->datasize != me_req_size) {
1741 			printk(KERN_ERR
1742 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1743 			       rdev->me_fw->datasize, fw_name);
1744 			err = -EINVAL;
1745 		}
1746 	} else {
1747 		err = radeon_ucode_validate(rdev->me_fw);
1748 		if (err) {
1749 			printk(KERN_ERR
1750 			       "si_cp: validation failed for firmware \"%s\"\n",
1751 			       fw_name);
1752 			goto out;
1753 		} else {
1754 			new_fw++;
1755 		}
1756 	}
1757 
1758 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1759 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1760 	if (err) {
1761 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1762 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1763 		if (err)
1764 			goto out;
1765 		if (rdev->ce_fw->datasize != ce_req_size) {
1766 			printk(KERN_ERR
1767 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1768 			       rdev->ce_fw->datasize, fw_name);
1769 			err = -EINVAL;
1770 		}
1771 	} else {
1772 		err = radeon_ucode_validate(rdev->ce_fw);
1773 		if (err) {
1774 			printk(KERN_ERR
1775 			       "si_cp: validation failed for firmware \"%s\"\n",
1776 			       fw_name);
1777 			goto out;
1778 		} else {
1779 			new_fw++;
1780 		}
1781 	}
1782 
1783 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1784 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1785 	if (err) {
1786 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1787 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1788 		if (err)
1789 			goto out;
1790 		if (rdev->rlc_fw->datasize != rlc_req_size) {
1791 			printk(KERN_ERR
1792 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1793 			       rdev->rlc_fw->datasize, fw_name);
1794 			err = -EINVAL;
1795 		}
1796 	} else {
1797 		err = radeon_ucode_validate(rdev->rlc_fw);
1798 		if (err) {
1799 			printk(KERN_ERR
1800 			       "si_cp: validation failed for firmware \"%s\"\n",
1801 			       fw_name);
1802 			goto out;
1803 		} else {
1804 			new_fw++;
1805 		}
1806 	}
1807 
1808 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
1809 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1810 	if (err) {
1811 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1812 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1813 		if (err) {
1814 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1815 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1816 			if (err)
1817 				goto out;
1818 		}
1819 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1820 		    (rdev->mc_fw->datasize != mc2_req_size)) {
1821 			printk(KERN_ERR
1822 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1823 			       rdev->mc_fw->datasize, fw_name);
1824 			err = -EINVAL;
1825 		}
1826 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1827 	} else {
1828 		err = radeon_ucode_validate(rdev->mc_fw);
1829 		if (err) {
1830 			printk(KERN_ERR
1831 			       "si_cp: validation failed for firmware \"%s\"\n",
1832 			       fw_name);
1833 			goto out;
1834 		} else {
1835 			new_fw++;
1836 		}
1837 	}
1838 
1839 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1840 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1841 	if (err) {
1842 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1843 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1844 		if (err) {
1845 			printk(KERN_ERR
1846 			       "smc: error loading firmware \"%s\"\n",
1847 			       fw_name);
1848 			release_firmware(rdev->smc_fw);
1849 			rdev->smc_fw = NULL;
1850 			err = 0;
1851 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1852 			printk(KERN_ERR
1853 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1854 			       rdev->smc_fw->datasize, fw_name);
1855 			err = -EINVAL;
1856 		}
1857 	} else {
1858 		err = radeon_ucode_validate(rdev->smc_fw);
1859 		if (err) {
1860 			printk(KERN_ERR
1861 			       "si_cp: validation failed for firmware \"%s\"\n",
1862 			       fw_name);
1863 			goto out;
1864 		} else {
1865 			new_fw++;
1866 		}
1867 	}
1868 
1869 	if (new_fw == 0) {
1870 		rdev->new_fw = false;
1871 	} else if (new_fw < 6) {
1872 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1873 		err = -EINVAL;
1874 	} else {
1875 		rdev->new_fw = true;
1876 	}
1877 out:
1878 	if (err) {
1879 		if (err != -EINVAL)
1880 			printk(KERN_ERR
1881 			       "si_cp: Failed to load firmware \"%s\"\n",
1882 			       fw_name);
1883 		release_firmware(rdev->pfp_fw);
1884 		rdev->pfp_fw = NULL;
1885 		release_firmware(rdev->me_fw);
1886 		rdev->me_fw = NULL;
1887 		release_firmware(rdev->ce_fw);
1888 		rdev->ce_fw = NULL;
1889 		release_firmware(rdev->rlc_fw);
1890 		rdev->rlc_fw = NULL;
1891 		release_firmware(rdev->mc_fw);
1892 		rdev->mc_fw = NULL;
1893 		release_firmware(rdev->smc_fw);
1894 		rdev->smc_fw = NULL;
1895 	}
1896 	return err;
1897 }
1898 
1899 /**
1900  * si_fini_microcode - drop the firmwares image references
1901  *
1902  * @rdev: radeon_device pointer
1903  *
1904  * Drop the pfp, me, rlc, mc and ce firmware image references.
1905  * Called at driver shutdown.
1906  */
1907 static void si_fini_microcode(struct radeon_device *rdev)
1908 {
1909 	release_firmware(rdev->pfp_fw);
1910 	rdev->pfp_fw = NULL;
1911 	release_firmware(rdev->me_fw);
1912 	rdev->me_fw = NULL;
1913 	release_firmware(rdev->rlc_fw);
1914 	rdev->rlc_fw = NULL;
1915 	release_firmware(rdev->mc_fw);
1916 	rdev->mc_fw = NULL;
1917 	release_firmware(rdev->smc_fw);
1918 	rdev->smc_fw = NULL;
1919 	release_firmware(rdev->ce_fw);
1920 	rdev->ce_fw = NULL;
1921 }
1922 
1923 /* watermark setup */
1924 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1925 				   struct radeon_crtc *radeon_crtc,
1926 				   struct drm_display_mode *mode,
1927 				   struct drm_display_mode *other_mode)
1928 {
1929 	u32 tmp, buffer_alloc, i;
1930 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1931 	/*
1932 	 * Line Buffer Setup
1933 	 * There are 3 line buffers, each one shared by 2 display controllers.
1934 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1935 	 * the display controllers.  The paritioning is done via one of four
1936 	 * preset allocations specified in bits 21:20:
1937 	 *  0 - half lb
1938 	 *  2 - whole lb, other crtc must be disabled
1939 	 */
1940 	/* this can get tricky if we have two large displays on a paired group
1941 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1942 	 * non-linked crtcs for maximum line buffer allocation.
1943 	 */
1944 	if (radeon_crtc->base.enabled && mode) {
1945 		if (other_mode) {
1946 			tmp = 0; /* 1/2 */
1947 			buffer_alloc = 1;
1948 		} else {
1949 			tmp = 2; /* whole */
1950 			buffer_alloc = 2;
1951 		}
1952 	} else {
1953 		tmp = 0;
1954 		buffer_alloc = 0;
1955 	}
1956 
1957 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1958 	       DC_LB_MEMORY_CONFIG(tmp));
1959 
1960 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1961 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1962 	for (i = 0; i < rdev->usec_timeout; i++) {
1963 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1964 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1965 			break;
1966 		udelay(1);
1967 	}
1968 
1969 	if (radeon_crtc->base.enabled && mode) {
1970 		switch (tmp) {
1971 		case 0:
1972 		default:
1973 			return 4096 * 2;
1974 		case 2:
1975 			return 8192 * 2;
1976 		}
1977 	}
1978 
1979 	/* controller not enabled, so no lb used */
1980 	return 0;
1981 }
1982 
1983 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1984 {
1985 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1986 
1987 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1988 	case 0:
1989 	default:
1990 		return 1;
1991 	case 1:
1992 		return 2;
1993 	case 2:
1994 		return 4;
1995 	case 3:
1996 		return 8;
1997 	case 4:
1998 		return 3;
1999 	case 5:
2000 		return 6;
2001 	case 6:
2002 		return 10;
2003 	case 7:
2004 		return 12;
2005 	case 8:
2006 		return 16;
2007 	}
2008 }
2009 
2010 struct dce6_wm_params {
2011 	u32 dram_channels; /* number of dram channels */
2012 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2013 	u32 sclk;          /* engine clock in kHz */
2014 	u32 disp_clk;      /* display clock in kHz */
2015 	u32 src_width;     /* viewport width */
2016 	u32 active_time;   /* active display time in ns */
2017 	u32 blank_time;    /* blank time in ns */
2018 	bool interlaced;    /* mode is interlaced */
2019 	fixed20_12 vsc;    /* vertical scale ratio */
2020 	u32 num_heads;     /* number of active crtcs */
2021 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2022 	u32 lb_size;       /* line buffer allocated to pipe */
2023 	u32 vtaps;         /* vertical scaler taps */
2024 };
2025 
2026 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2027 {
2028 	/* Calculate raw DRAM Bandwidth */
2029 	fixed20_12 dram_efficiency; /* 0.7 */
2030 	fixed20_12 yclk, dram_channels, bandwidth;
2031 	fixed20_12 a;
2032 
2033 	a.full = dfixed_const(1000);
2034 	yclk.full = dfixed_const(wm->yclk);
2035 	yclk.full = dfixed_div(yclk, a);
2036 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2037 	a.full = dfixed_const(10);
2038 	dram_efficiency.full = dfixed_const(7);
2039 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2040 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2041 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2042 
2043 	return dfixed_trunc(bandwidth);
2044 }
2045 
2046 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2047 {
2048 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2049 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2050 	fixed20_12 yclk, dram_channels, bandwidth;
2051 	fixed20_12 a;
2052 
2053 	a.full = dfixed_const(1000);
2054 	yclk.full = dfixed_const(wm->yclk);
2055 	yclk.full = dfixed_div(yclk, a);
2056 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2057 	a.full = dfixed_const(10);
2058 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2059 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2060 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2061 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2062 
2063 	return dfixed_trunc(bandwidth);
2064 }
2065 
2066 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2067 {
2068 	/* Calculate the display Data return Bandwidth */
2069 	fixed20_12 return_efficiency; /* 0.8 */
2070 	fixed20_12 sclk, bandwidth;
2071 	fixed20_12 a;
2072 
2073 	a.full = dfixed_const(1000);
2074 	sclk.full = dfixed_const(wm->sclk);
2075 	sclk.full = dfixed_div(sclk, a);
2076 	a.full = dfixed_const(10);
2077 	return_efficiency.full = dfixed_const(8);
2078 	return_efficiency.full = dfixed_div(return_efficiency, a);
2079 	a.full = dfixed_const(32);
2080 	bandwidth.full = dfixed_mul(a, sclk);
2081 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2082 
2083 	return dfixed_trunc(bandwidth);
2084 }
2085 
2086 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2087 {
2088 	return 32;
2089 }
2090 
2091 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2092 {
2093 	/* Calculate the DMIF Request Bandwidth */
2094 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2095 	fixed20_12 disp_clk, sclk, bandwidth;
2096 	fixed20_12 a, b1, b2;
2097 	u32 min_bandwidth;
2098 
2099 	a.full = dfixed_const(1000);
2100 	disp_clk.full = dfixed_const(wm->disp_clk);
2101 	disp_clk.full = dfixed_div(disp_clk, a);
2102 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2103 	b1.full = dfixed_mul(a, disp_clk);
2104 
2105 	a.full = dfixed_const(1000);
2106 	sclk.full = dfixed_const(wm->sclk);
2107 	sclk.full = dfixed_div(sclk, a);
2108 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2109 	b2.full = dfixed_mul(a, sclk);
2110 
2111 	a.full = dfixed_const(10);
2112 	disp_clk_request_efficiency.full = dfixed_const(8);
2113 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2114 
2115 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2116 
2117 	a.full = dfixed_const(min_bandwidth);
2118 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2119 
2120 	return dfixed_trunc(bandwidth);
2121 }
2122 
2123 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2124 {
2125 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2126 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2127 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2128 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2129 
2130 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2131 }
2132 
2133 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2134 {
2135 	/* Calculate the display mode Average Bandwidth
2136 	 * DisplayMode should contain the source and destination dimensions,
2137 	 * timing, etc.
2138 	 */
2139 	fixed20_12 bpp;
2140 	fixed20_12 line_time;
2141 	fixed20_12 src_width;
2142 	fixed20_12 bandwidth;
2143 	fixed20_12 a;
2144 
2145 	a.full = dfixed_const(1000);
2146 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2147 	line_time.full = dfixed_div(line_time, a);
2148 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2149 	src_width.full = dfixed_const(wm->src_width);
2150 	bandwidth.full = dfixed_mul(src_width, bpp);
2151 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2152 	bandwidth.full = dfixed_div(bandwidth, line_time);
2153 
2154 	return dfixed_trunc(bandwidth);
2155 }
2156 
2157 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2158 {
2159 	/* First calcualte the latency in ns */
2160 	u32 mc_latency = 2000; /* 2000 ns. */
2161 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2162 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2163 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2164 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2165 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2166 		(wm->num_heads * cursor_line_pair_return_time);
2167 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2168 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2169 	u32 tmp, dmif_size = 12288;
2170 	fixed20_12 a, b, c;
2171 
2172 	if (wm->num_heads == 0)
2173 		return 0;
2174 
2175 	a.full = dfixed_const(2);
2176 	b.full = dfixed_const(1);
2177 	if ((wm->vsc.full > a.full) ||
2178 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2179 	    (wm->vtaps >= 5) ||
2180 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2181 		max_src_lines_per_dst_line = 4;
2182 	else
2183 		max_src_lines_per_dst_line = 2;
2184 
2185 	a.full = dfixed_const(available_bandwidth);
2186 	b.full = dfixed_const(wm->num_heads);
2187 	a.full = dfixed_div(a, b);
2188 
2189 	b.full = dfixed_const(mc_latency + 512);
2190 	c.full = dfixed_const(wm->disp_clk);
2191 	b.full = dfixed_div(b, c);
2192 
2193 	c.full = dfixed_const(dmif_size);
2194 	b.full = dfixed_div(c, b);
2195 
2196 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2197 
2198 	b.full = dfixed_const(1000);
2199 	c.full = dfixed_const(wm->disp_clk);
2200 	b.full = dfixed_div(c, b);
2201 	c.full = dfixed_const(wm->bytes_per_pixel);
2202 	b.full = dfixed_mul(b, c);
2203 
2204 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2205 
2206 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2207 	b.full = dfixed_const(1000);
2208 	c.full = dfixed_const(lb_fill_bw);
2209 	b.full = dfixed_div(c, b);
2210 	a.full = dfixed_div(a, b);
2211 	line_fill_time = dfixed_trunc(a);
2212 
2213 	if (line_fill_time < wm->active_time)
2214 		return latency;
2215 	else
2216 		return latency + (line_fill_time - wm->active_time);
2217 
2218 }
2219 
2220 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2221 {
2222 	if (dce6_average_bandwidth(wm) <=
2223 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2224 		return true;
2225 	else
2226 		return false;
2227 };
2228 
2229 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2230 {
2231 	if (dce6_average_bandwidth(wm) <=
2232 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2233 		return true;
2234 	else
2235 		return false;
2236 };
2237 
2238 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2239 {
2240 	u32 lb_partitions = wm->lb_size / wm->src_width;
2241 	u32 line_time = wm->active_time + wm->blank_time;
2242 	u32 latency_tolerant_lines;
2243 	u32 latency_hiding;
2244 	fixed20_12 a;
2245 
2246 	a.full = dfixed_const(1);
2247 	if (wm->vsc.full > a.full)
2248 		latency_tolerant_lines = 1;
2249 	else {
2250 		if (lb_partitions <= (wm->vtaps + 1))
2251 			latency_tolerant_lines = 1;
2252 		else
2253 			latency_tolerant_lines = 2;
2254 	}
2255 
2256 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2257 
2258 	if (dce6_latency_watermark(wm) <= latency_hiding)
2259 		return true;
2260 	else
2261 		return false;
2262 }
2263 
2264 static void dce6_program_watermarks(struct radeon_device *rdev,
2265 					 struct radeon_crtc *radeon_crtc,
2266 					 u32 lb_size, u32 num_heads)
2267 {
2268 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2269 	struct dce6_wm_params wm_low, wm_high;
2270 	u32 dram_channels;
2271 	u32 pixel_period;
2272 	u32 line_time = 0;
2273 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2274 	u32 priority_a_mark = 0, priority_b_mark = 0;
2275 	u32 priority_a_cnt = PRIORITY_OFF;
2276 	u32 priority_b_cnt = PRIORITY_OFF;
2277 	u32 tmp, arb_control3;
2278 	fixed20_12 a, b, c;
2279 
2280 	if (radeon_crtc->base.enabled && num_heads && mode) {
2281 		pixel_period = 1000000 / (u32)mode->clock;
2282 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2283 		priority_a_cnt = 0;
2284 		priority_b_cnt = 0;
2285 
2286 		if (rdev->family == CHIP_ARUBA)
2287 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2288 		else
2289 			dram_channels = si_get_number_of_dram_channels(rdev);
2290 
2291 		/* watermark for high clocks */
2292 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2293 			wm_high.yclk =
2294 				radeon_dpm_get_mclk(rdev, false) * 10;
2295 			wm_high.sclk =
2296 				radeon_dpm_get_sclk(rdev, false) * 10;
2297 		} else {
2298 			wm_high.yclk = rdev->pm.current_mclk * 10;
2299 			wm_high.sclk = rdev->pm.current_sclk * 10;
2300 		}
2301 
2302 		wm_high.disp_clk = mode->clock;
2303 		wm_high.src_width = mode->crtc_hdisplay;
2304 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2305 		wm_high.blank_time = line_time - wm_high.active_time;
2306 		wm_high.interlaced = false;
2307 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2308 			wm_high.interlaced = true;
2309 		wm_high.vsc = radeon_crtc->vsc;
2310 		wm_high.vtaps = 1;
2311 		if (radeon_crtc->rmx_type != RMX_OFF)
2312 			wm_high.vtaps = 2;
2313 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2314 		wm_high.lb_size = lb_size;
2315 		wm_high.dram_channels = dram_channels;
2316 		wm_high.num_heads = num_heads;
2317 
2318 		/* watermark for low clocks */
2319 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2320 			wm_low.yclk =
2321 				radeon_dpm_get_mclk(rdev, true) * 10;
2322 			wm_low.sclk =
2323 				radeon_dpm_get_sclk(rdev, true) * 10;
2324 		} else {
2325 			wm_low.yclk = rdev->pm.current_mclk * 10;
2326 			wm_low.sclk = rdev->pm.current_sclk * 10;
2327 		}
2328 
2329 		wm_low.disp_clk = mode->clock;
2330 		wm_low.src_width = mode->crtc_hdisplay;
2331 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2332 		wm_low.blank_time = line_time - wm_low.active_time;
2333 		wm_low.interlaced = false;
2334 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2335 			wm_low.interlaced = true;
2336 		wm_low.vsc = radeon_crtc->vsc;
2337 		wm_low.vtaps = 1;
2338 		if (radeon_crtc->rmx_type != RMX_OFF)
2339 			wm_low.vtaps = 2;
2340 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2341 		wm_low.lb_size = lb_size;
2342 		wm_low.dram_channels = dram_channels;
2343 		wm_low.num_heads = num_heads;
2344 
2345 		/* set for high clocks */
2346 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2347 		/* set for low clocks */
2348 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2349 
2350 		/* possibly force display priority to high */
2351 		/* should really do this at mode validation time... */
2352 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2353 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2354 		    !dce6_check_latency_hiding(&wm_high) ||
2355 		    (rdev->disp_priority == 2)) {
2356 			DRM_DEBUG_KMS("force priority to high\n");
2357 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2358 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2359 		}
2360 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2361 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2362 		    !dce6_check_latency_hiding(&wm_low) ||
2363 		    (rdev->disp_priority == 2)) {
2364 			DRM_DEBUG_KMS("force priority to high\n");
2365 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2366 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2367 		}
2368 
2369 		a.full = dfixed_const(1000);
2370 		b.full = dfixed_const(mode->clock);
2371 		b.full = dfixed_div(b, a);
2372 		c.full = dfixed_const(latency_watermark_a);
2373 		c.full = dfixed_mul(c, b);
2374 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2375 		c.full = dfixed_div(c, a);
2376 		a.full = dfixed_const(16);
2377 		c.full = dfixed_div(c, a);
2378 		priority_a_mark = dfixed_trunc(c);
2379 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2380 
2381 		a.full = dfixed_const(1000);
2382 		b.full = dfixed_const(mode->clock);
2383 		b.full = dfixed_div(b, a);
2384 		c.full = dfixed_const(latency_watermark_b);
2385 		c.full = dfixed_mul(c, b);
2386 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2387 		c.full = dfixed_div(c, a);
2388 		a.full = dfixed_const(16);
2389 		c.full = dfixed_div(c, a);
2390 		priority_b_mark = dfixed_trunc(c);
2391 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2392 
2393 		/* Save number of lines the linebuffer leads before the scanout */
2394 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2395 	}
2396 
2397 	/* select wm A */
2398 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2399 	tmp = arb_control3;
2400 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2401 	tmp |= LATENCY_WATERMARK_MASK(1);
2402 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2403 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2404 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2405 		LATENCY_HIGH_WATERMARK(line_time)));
2406 	/* select wm B */
2407 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2408 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2409 	tmp |= LATENCY_WATERMARK_MASK(2);
2410 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2411 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2412 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2413 		LATENCY_HIGH_WATERMARK(line_time)));
2414 	/* restore original selection */
2415 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2416 
2417 	/* write the priority marks */
2418 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2419 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2420 
2421 	/* save values for DPM */
2422 	radeon_crtc->line_time = line_time;
2423 	radeon_crtc->wm_high = latency_watermark_a;
2424 	radeon_crtc->wm_low = latency_watermark_b;
2425 }
2426 
2427 void dce6_bandwidth_update(struct radeon_device *rdev)
2428 {
2429 	struct drm_display_mode *mode0 = NULL;
2430 	struct drm_display_mode *mode1 = NULL;
2431 	u32 num_heads = 0, lb_size;
2432 	int i;
2433 
2434 	if (!rdev->mode_info.mode_config_initialized)
2435 		return;
2436 
2437 	radeon_update_display_priority(rdev);
2438 
2439 	for (i = 0; i < rdev->num_crtc; i++) {
2440 		if (rdev->mode_info.crtcs[i]->base.enabled)
2441 			num_heads++;
2442 	}
2443 	for (i = 0; i < rdev->num_crtc; i += 2) {
2444 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2445 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2446 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2447 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2448 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2449 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2450 	}
2451 }
2452 
2453 /*
2454  * Core functions
2455  */
2456 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2457 {
2458 	const u32 num_tile_mode_states = 32;
2459 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2460 
2461 	switch (rdev->config.si.mem_row_size_in_kb) {
2462 	case 1:
2463 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2464 		break;
2465 	case 2:
2466 	default:
2467 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2468 		break;
2469 	case 4:
2470 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2471 		break;
2472 	}
2473 
2474 	if ((rdev->family == CHIP_TAHITI) ||
2475 	    (rdev->family == CHIP_PITCAIRN)) {
2476 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2477 			switch (reg_offset) {
2478 			case 0:  /* non-AA compressed depth or any compressed stencil */
2479 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2481 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2483 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2484 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2487 				break;
2488 			case 1:  /* 2xAA/4xAA compressed depth only */
2489 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2491 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2492 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2494 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2496 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2497 				break;
2498 			case 2:  /* 8xAA compressed depth only */
2499 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2501 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2502 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2503 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2504 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2507 				break;
2508 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2509 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2511 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2512 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2513 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2514 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2516 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2517 				break;
2518 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2519 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2520 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2521 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2522 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2523 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2524 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2526 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2527 				break;
2528 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2529 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2532 						 TILE_SPLIT(split_equal_to_row_size) |
2533 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2534 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2536 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2537 				break;
2538 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2539 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2541 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2542 						 TILE_SPLIT(split_equal_to_row_size) |
2543 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2544 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2547 				break;
2548 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552 						 TILE_SPLIT(split_equal_to_row_size) |
2553 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2554 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2556 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2557 				break;
2558 			case 8:  /* 1D and 1D Array Surfaces */
2559 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2560 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2561 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2562 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2563 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2564 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2566 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2567 				break;
2568 			case 9:  /* Displayable maps. */
2569 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2571 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2573 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2574 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2576 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2577 				break;
2578 			case 10:  /* Display 8bpp. */
2579 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2581 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2583 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2584 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2586 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2587 				break;
2588 			case 11:  /* Display 16bpp. */
2589 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2591 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2592 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2593 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2594 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2596 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2597 				break;
2598 			case 12:  /* Display 32bpp. */
2599 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2601 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2602 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2603 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2604 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2607 				break;
2608 			case 13:  /* Thin. */
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2612 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2613 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2614 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2616 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2617 				break;
2618 			case 14:  /* Thin 8 bpp. */
2619 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2621 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2622 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2623 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2624 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2627 				break;
2628 			case 15:  /* Thin 16 bpp. */
2629 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2630 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2631 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2632 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2633 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2634 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2637 				break;
2638 			case 16:  /* Thin 32 bpp. */
2639 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2641 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2642 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2643 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2644 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2647 				break;
2648 			case 17:  /* Thin 64 bpp. */
2649 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2651 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2652 						 TILE_SPLIT(split_equal_to_row_size) |
2653 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2654 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2657 				break;
2658 			case 21:  /* 8 bpp PRT. */
2659 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2661 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2663 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2664 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2665 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2666 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2667 				break;
2668 			case 22:  /* 16 bpp PRT */
2669 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2671 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2672 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2673 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2674 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2676 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2677 				break;
2678 			case 23:  /* 32 bpp PRT */
2679 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2680 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2681 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2682 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2683 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2684 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2686 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2687 				break;
2688 			case 24:  /* 64 bpp PRT */
2689 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2691 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2692 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2693 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2694 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2696 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2697 				break;
2698 			case 25:  /* 128 bpp PRT */
2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2702 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2703 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2704 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2705 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2706 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2707 				break;
2708 			default:
2709 				gb_tile_moden = 0;
2710 				break;
2711 			}
2712 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2713 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2714 		}
2715 	} else if ((rdev->family == CHIP_VERDE) ||
2716 		   (rdev->family == CHIP_OLAND) ||
2717 		   (rdev->family == CHIP_HAINAN)) {
2718 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2719 			switch (reg_offset) {
2720 			case 0:  /* non-AA compressed depth or any compressed stencil */
2721 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2723 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2725 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2726 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2729 				break;
2730 			case 1:  /* 2xAA/4xAA compressed depth only */
2731 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2733 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2734 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2736 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2739 				break;
2740 			case 2:  /* 8xAA compressed depth only */
2741 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2743 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2745 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2746 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2749 				break;
2750 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2751 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2752 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2753 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2755 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2756 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2759 				break;
2760 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2761 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2762 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2765 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2766 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2768 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2769 				break;
2770 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2771 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2772 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2773 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 						 TILE_SPLIT(split_equal_to_row_size) |
2775 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2776 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2778 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2779 				break;
2780 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2781 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2783 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784 						 TILE_SPLIT(split_equal_to_row_size) |
2785 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2786 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2789 				break;
2790 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2791 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2793 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2794 						 TILE_SPLIT(split_equal_to_row_size) |
2795 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2796 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2798 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2799 				break;
2800 			case 8:  /* 1D and 1D Array Surfaces */
2801 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2802 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2805 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2806 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2808 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2809 				break;
2810 			case 9:  /* Displayable maps. */
2811 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2815 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2816 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2818 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2819 				break;
2820 			case 10:  /* Display 8bpp. */
2821 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2823 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2824 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2825 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2826 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2829 				break;
2830 			case 11:  /* Display 16bpp. */
2831 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2833 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2834 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2835 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2836 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2839 				break;
2840 			case 12:  /* Display 32bpp. */
2841 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2844 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2845 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2846 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2849 				break;
2850 			case 13:  /* Thin. */
2851 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2853 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2854 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2855 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2856 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2858 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2859 				break;
2860 			case 14:  /* Thin 8 bpp. */
2861 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2863 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2864 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2865 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2866 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2869 				break;
2870 			case 15:  /* Thin 16 bpp. */
2871 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2873 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2874 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2875 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2876 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2878 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2879 				break;
2880 			case 16:  /* Thin 32 bpp. */
2881 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2883 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2884 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2885 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2886 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2889 				break;
2890 			case 17:  /* Thin 64 bpp. */
2891 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2893 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2894 						 TILE_SPLIT(split_equal_to_row_size) |
2895 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2896 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2898 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2899 				break;
2900 			case 21:  /* 8 bpp PRT. */
2901 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2903 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2904 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2905 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2906 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2907 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2909 				break;
2910 			case 22:  /* 16 bpp PRT */
2911 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2913 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2914 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2915 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2916 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2918 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2919 				break;
2920 			case 23:  /* 32 bpp PRT */
2921 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2923 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2924 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2925 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2926 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2928 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2929 				break;
2930 			case 24:  /* 64 bpp PRT */
2931 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2933 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2934 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2935 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2936 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2939 				break;
2940 			case 25:  /* 128 bpp PRT */
2941 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2943 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2944 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2945 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2946 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2949 				break;
2950 			default:
2951 				gb_tile_moden = 0;
2952 				break;
2953 			}
2954 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2955 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2956 		}
2957 	} else
2958 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2959 }
2960 
2961 static void si_select_se_sh(struct radeon_device *rdev,
2962 			    u32 se_num, u32 sh_num)
2963 {
2964 	u32 data = INSTANCE_BROADCAST_WRITES;
2965 
2966 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2967 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2968 	else if (se_num == 0xffffffff)
2969 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2970 	else if (sh_num == 0xffffffff)
2971 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2972 	else
2973 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2974 	WREG32(GRBM_GFX_INDEX, data);
2975 }
2976 
2977 static u32 si_create_bitmask(u32 bit_width)
2978 {
2979 	u32 i, mask = 0;
2980 
2981 	for (i = 0; i < bit_width; i++) {
2982 		mask <<= 1;
2983 		mask |= 1;
2984 	}
2985 	return mask;
2986 }
2987 
2988 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2989 {
2990 	u32 data, mask;
2991 
2992 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2993 	if (data & 1)
2994 		data &= INACTIVE_CUS_MASK;
2995 	else
2996 		data = 0;
2997 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2998 
2999 	data >>= INACTIVE_CUS_SHIFT;
3000 
3001 	mask = si_create_bitmask(cu_per_sh);
3002 
3003 	return ~data & mask;
3004 }
3005 
3006 static void si_setup_spi(struct radeon_device *rdev,
3007 			 u32 se_num, u32 sh_per_se,
3008 			 u32 cu_per_sh)
3009 {
3010 	int i, j, k;
3011 	u32 data, mask, active_cu;
3012 
3013 	for (i = 0; i < se_num; i++) {
3014 		for (j = 0; j < sh_per_se; j++) {
3015 			si_select_se_sh(rdev, i, j);
3016 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3017 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3018 
3019 			mask = 1;
3020 			for (k = 0; k < 16; k++) {
3021 				mask <<= k;
3022 				if (active_cu & mask) {
3023 					data &= ~mask;
3024 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3025 					break;
3026 				}
3027 			}
3028 		}
3029 	}
3030 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3031 }
3032 
3033 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3034 			      u32 max_rb_num_per_se,
3035 			      u32 sh_per_se)
3036 {
3037 	u32 data, mask;
3038 
3039 	data = RREG32(CC_RB_BACKEND_DISABLE);
3040 	if (data & 1)
3041 		data &= BACKEND_DISABLE_MASK;
3042 	else
3043 		data = 0;
3044 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3045 
3046 	data >>= BACKEND_DISABLE_SHIFT;
3047 
3048 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3049 
3050 	return data & mask;
3051 }
3052 
3053 static void si_setup_rb(struct radeon_device *rdev,
3054 			u32 se_num, u32 sh_per_se,
3055 			u32 max_rb_num_per_se)
3056 {
3057 	int i, j;
3058 	u32 data, mask;
3059 	u32 disabled_rbs = 0;
3060 	u32 enabled_rbs = 0;
3061 
3062 	for (i = 0; i < se_num; i++) {
3063 		for (j = 0; j < sh_per_se; j++) {
3064 			si_select_se_sh(rdev, i, j);
3065 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3066 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3067 		}
3068 	}
3069 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3070 
3071 	mask = 1;
3072 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3073 		if (!(disabled_rbs & mask))
3074 			enabled_rbs |= mask;
3075 		mask <<= 1;
3076 	}
3077 
3078 	rdev->config.si.backend_enable_mask = enabled_rbs;
3079 
3080 	for (i = 0; i < se_num; i++) {
3081 		si_select_se_sh(rdev, i, 0xffffffff);
3082 		data = 0;
3083 		for (j = 0; j < sh_per_se; j++) {
3084 			switch (enabled_rbs & 3) {
3085 			case 1:
3086 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3087 				break;
3088 			case 2:
3089 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3090 				break;
3091 			case 3:
3092 			default:
3093 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3094 				break;
3095 			}
3096 			enabled_rbs >>= 2;
3097 		}
3098 		WREG32(PA_SC_RASTER_CONFIG, data);
3099 	}
3100 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3101 }
3102 
3103 static void si_gpu_init(struct radeon_device *rdev)
3104 {
3105 	u32 gb_addr_config = 0;
3106 	u32 mc_shared_chmap, mc_arb_ramcfg;
3107 	u32 sx_debug_1;
3108 	u32 hdp_host_path_cntl;
3109 	u32 tmp;
3110 	int i, j;
3111 
3112 	switch (rdev->family) {
3113 	case CHIP_TAHITI:
3114 		rdev->config.si.max_shader_engines = 2;
3115 		rdev->config.si.max_tile_pipes = 12;
3116 		rdev->config.si.max_cu_per_sh = 8;
3117 		rdev->config.si.max_sh_per_se = 2;
3118 		rdev->config.si.max_backends_per_se = 4;
3119 		rdev->config.si.max_texture_channel_caches = 12;
3120 		rdev->config.si.max_gprs = 256;
3121 		rdev->config.si.max_gs_threads = 32;
3122 		rdev->config.si.max_hw_contexts = 8;
3123 
3124 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3125 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3126 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3127 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3128 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3129 		break;
3130 	case CHIP_PITCAIRN:
3131 		rdev->config.si.max_shader_engines = 2;
3132 		rdev->config.si.max_tile_pipes = 8;
3133 		rdev->config.si.max_cu_per_sh = 5;
3134 		rdev->config.si.max_sh_per_se = 2;
3135 		rdev->config.si.max_backends_per_se = 4;
3136 		rdev->config.si.max_texture_channel_caches = 8;
3137 		rdev->config.si.max_gprs = 256;
3138 		rdev->config.si.max_gs_threads = 32;
3139 		rdev->config.si.max_hw_contexts = 8;
3140 
3141 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3142 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3143 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3144 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3145 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3146 		break;
3147 	case CHIP_VERDE:
3148 	default:
3149 		rdev->config.si.max_shader_engines = 1;
3150 		rdev->config.si.max_tile_pipes = 4;
3151 		rdev->config.si.max_cu_per_sh = 5;
3152 		rdev->config.si.max_sh_per_se = 2;
3153 		rdev->config.si.max_backends_per_se = 4;
3154 		rdev->config.si.max_texture_channel_caches = 4;
3155 		rdev->config.si.max_gprs = 256;
3156 		rdev->config.si.max_gs_threads = 32;
3157 		rdev->config.si.max_hw_contexts = 8;
3158 
3159 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3160 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3161 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3162 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3163 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3164 		break;
3165 	case CHIP_OLAND:
3166 		rdev->config.si.max_shader_engines = 1;
3167 		rdev->config.si.max_tile_pipes = 4;
3168 		rdev->config.si.max_cu_per_sh = 6;
3169 		rdev->config.si.max_sh_per_se = 1;
3170 		rdev->config.si.max_backends_per_se = 2;
3171 		rdev->config.si.max_texture_channel_caches = 4;
3172 		rdev->config.si.max_gprs = 256;
3173 		rdev->config.si.max_gs_threads = 16;
3174 		rdev->config.si.max_hw_contexts = 8;
3175 
3176 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3177 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3178 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3179 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3180 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3181 		break;
3182 	case CHIP_HAINAN:
3183 		rdev->config.si.max_shader_engines = 1;
3184 		rdev->config.si.max_tile_pipes = 4;
3185 		rdev->config.si.max_cu_per_sh = 5;
3186 		rdev->config.si.max_sh_per_se = 1;
3187 		rdev->config.si.max_backends_per_se = 1;
3188 		rdev->config.si.max_texture_channel_caches = 2;
3189 		rdev->config.si.max_gprs = 256;
3190 		rdev->config.si.max_gs_threads = 16;
3191 		rdev->config.si.max_hw_contexts = 8;
3192 
3193 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3194 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3195 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3196 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3197 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3198 		break;
3199 	}
3200 
3201 	/* Initialize HDP */
3202 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3203 		WREG32((0x2c14 + j), 0x00000000);
3204 		WREG32((0x2c18 + j), 0x00000000);
3205 		WREG32((0x2c1c + j), 0x00000000);
3206 		WREG32((0x2c20 + j), 0x00000000);
3207 		WREG32((0x2c24 + j), 0x00000000);
3208 	}
3209 
3210 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3211 	WREG32(SRBM_INT_CNTL, 1);
3212 	WREG32(SRBM_INT_ACK, 1);
3213 
3214 	evergreen_fix_pci_max_read_req_size(rdev);
3215 
3216 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3217 
3218 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3219 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3220 
3221 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3222 	rdev->config.si.mem_max_burst_length_bytes = 256;
3223 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3224 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3225 	if (rdev->config.si.mem_row_size_in_kb > 4)
3226 		rdev->config.si.mem_row_size_in_kb = 4;
3227 	/* XXX use MC settings? */
3228 	rdev->config.si.shader_engine_tile_size = 32;
3229 	rdev->config.si.num_gpus = 1;
3230 	rdev->config.si.multi_gpu_tile_size = 64;
3231 
3232 	/* fix up row size */
3233 	gb_addr_config &= ~ROW_SIZE_MASK;
3234 	switch (rdev->config.si.mem_row_size_in_kb) {
3235 	case 1:
3236 	default:
3237 		gb_addr_config |= ROW_SIZE(0);
3238 		break;
3239 	case 2:
3240 		gb_addr_config |= ROW_SIZE(1);
3241 		break;
3242 	case 4:
3243 		gb_addr_config |= ROW_SIZE(2);
3244 		break;
3245 	}
3246 
3247 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3248 	 * not have bank info, so create a custom tiling dword.
3249 	 * bits 3:0   num_pipes
3250 	 * bits 7:4   num_banks
3251 	 * bits 11:8  group_size
3252 	 * bits 15:12 row_size
3253 	 */
3254 	rdev->config.si.tile_config = 0;
3255 	switch (rdev->config.si.num_tile_pipes) {
3256 	case 1:
3257 		rdev->config.si.tile_config |= (0 << 0);
3258 		break;
3259 	case 2:
3260 		rdev->config.si.tile_config |= (1 << 0);
3261 		break;
3262 	case 4:
3263 		rdev->config.si.tile_config |= (2 << 0);
3264 		break;
3265 	case 8:
3266 	default:
3267 		/* XXX what about 12? */
3268 		rdev->config.si.tile_config |= (3 << 0);
3269 		break;
3270 	}
3271 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3272 	case 0: /* four banks */
3273 		rdev->config.si.tile_config |= 0 << 4;
3274 		break;
3275 	case 1: /* eight banks */
3276 		rdev->config.si.tile_config |= 1 << 4;
3277 		break;
3278 	case 2: /* sixteen banks */
3279 	default:
3280 		rdev->config.si.tile_config |= 2 << 4;
3281 		break;
3282 	}
3283 	rdev->config.si.tile_config |=
3284 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3285 	rdev->config.si.tile_config |=
3286 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3287 
3288 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3289 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3290 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3291 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3292 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3293 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3294 	if (rdev->has_uvd) {
3295 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3296 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3297 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3298 	}
3299 
3300 	si_tiling_mode_table_init(rdev);
3301 
3302 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3303 		    rdev->config.si.max_sh_per_se,
3304 		    rdev->config.si.max_backends_per_se);
3305 
3306 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3307 		     rdev->config.si.max_sh_per_se,
3308 		     rdev->config.si.max_cu_per_sh);
3309 
3310 	rdev->config.si.active_cus = 0;
3311 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3312 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3313 			rdev->config.si.active_cus +=
3314 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3315 		}
3316 	}
3317 
3318 	/* set HW defaults for 3D engine */
3319 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3320 				     ROQ_IB2_START(0x2b)));
3321 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3322 
3323 	sx_debug_1 = RREG32(SX_DEBUG_1);
3324 	WREG32(SX_DEBUG_1, sx_debug_1);
3325 
3326 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3327 
3328 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3329 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3330 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3331 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3332 
3333 	WREG32(VGT_NUM_INSTANCES, 1);
3334 
3335 	WREG32(CP_PERFMON_CNTL, 0);
3336 
3337 	WREG32(SQ_CONFIG, 0);
3338 
3339 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3340 					  FORCE_EOV_MAX_REZ_CNT(255)));
3341 
3342 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3343 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3344 
3345 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3346 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3347 
3348 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3349 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3350 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3351 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3352 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3353 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3354 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3355 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3356 
3357 	tmp = RREG32(HDP_MISC_CNTL);
3358 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3359 	WREG32(HDP_MISC_CNTL, tmp);
3360 
3361 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3362 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3363 
3364 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3365 
3366 	udelay(50);
3367 }
3368 
3369 /*
3370  * GPU scratch registers helpers function.
3371  */
3372 static void si_scratch_init(struct radeon_device *rdev)
3373 {
3374 	int i;
3375 
3376 	rdev->scratch.num_reg = 7;
3377 	rdev->scratch.reg_base = SCRATCH_REG0;
3378 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3379 		rdev->scratch.free[i] = true;
3380 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3381 	}
3382 }
3383 
3384 void si_fence_ring_emit(struct radeon_device *rdev,
3385 			struct radeon_fence *fence)
3386 {
3387 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3388 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3389 
3390 	/* flush read cache over gart */
3391 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3392 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3393 	radeon_ring_write(ring, 0);
3394 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3395 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3396 			  PACKET3_TC_ACTION_ENA |
3397 			  PACKET3_SH_KCACHE_ACTION_ENA |
3398 			  PACKET3_SH_ICACHE_ACTION_ENA);
3399 	radeon_ring_write(ring, 0xFFFFFFFF);
3400 	radeon_ring_write(ring, 0);
3401 	radeon_ring_write(ring, 10); /* poll interval */
3402 	/* EVENT_WRITE_EOP - flush caches, send int */
3403 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3404 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3405 	radeon_ring_write(ring, lower_32_bits(addr));
3406 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3407 	radeon_ring_write(ring, fence->seq);
3408 	radeon_ring_write(ring, 0);
3409 }
3410 
3411 /*
3412  * IB stuff
3413  */
3414 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3415 {
3416 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3417 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3418 	u32 header;
3419 
3420 	if (ib->is_const_ib) {
3421 		/* set switch buffer packet before const IB */
3422 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3423 		radeon_ring_write(ring, 0);
3424 
3425 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3426 	} else {
3427 		u32 next_rptr;
3428 		if (ring->rptr_save_reg) {
3429 			next_rptr = ring->wptr + 3 + 4 + 8;
3430 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3431 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3432 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3433 			radeon_ring_write(ring, next_rptr);
3434 		} else if (rdev->wb.enabled) {
3435 			next_rptr = ring->wptr + 5 + 4 + 8;
3436 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3437 			radeon_ring_write(ring, (1 << 8));
3438 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3439 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3440 			radeon_ring_write(ring, next_rptr);
3441 		}
3442 
3443 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3444 	}
3445 
3446 	radeon_ring_write(ring, header);
3447 	radeon_ring_write(ring,
3448 #ifdef __BIG_ENDIAN
3449 			  (2 << 0) |
3450 #endif
3451 			  (ib->gpu_addr & 0xFFFFFFFC));
3452 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3453 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3454 
3455 	if (!ib->is_const_ib) {
3456 		/* flush read cache over gart for this vmid */
3457 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3458 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3459 		radeon_ring_write(ring, vm_id);
3460 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3461 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3462 				  PACKET3_TC_ACTION_ENA |
3463 				  PACKET3_SH_KCACHE_ACTION_ENA |
3464 				  PACKET3_SH_ICACHE_ACTION_ENA);
3465 		radeon_ring_write(ring, 0xFFFFFFFF);
3466 		radeon_ring_write(ring, 0);
3467 		radeon_ring_write(ring, 10); /* poll interval */
3468 	}
3469 }
3470 
3471 /*
3472  * CP.
3473  */
3474 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3475 {
3476 	if (enable)
3477 		WREG32(CP_ME_CNTL, 0);
3478 	else {
3479 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3480 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3481 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3482 		WREG32(SCRATCH_UMSK, 0);
3483 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3484 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3485 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3486 	}
3487 	udelay(50);
3488 }
3489 
3490 static int si_cp_load_microcode(struct radeon_device *rdev)
3491 {
3492 	int i;
3493 
3494 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3495 		return -EINVAL;
3496 
3497 	si_cp_enable(rdev, false);
3498 
3499 	if (rdev->new_fw) {
3500 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3501 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3502 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3503 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3504 		const struct gfx_firmware_header_v1_0 *me_hdr =
3505 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3506 		const __le32 *fw_data;
3507 		u32 fw_size;
3508 
3509 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3510 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3511 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3512 
3513 		/* PFP */
3514 		fw_data = (const __le32 *)
3515 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3516 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3517 		WREG32(CP_PFP_UCODE_ADDR, 0);
3518 		for (i = 0; i < fw_size; i++)
3519 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3520 		WREG32(CP_PFP_UCODE_ADDR, 0);
3521 
3522 		/* CE */
3523 		fw_data = (const __le32 *)
3524 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3525 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3526 		WREG32(CP_CE_UCODE_ADDR, 0);
3527 		for (i = 0; i < fw_size; i++)
3528 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3529 		WREG32(CP_CE_UCODE_ADDR, 0);
3530 
3531 		/* ME */
3532 		fw_data = (const __be32 *)
3533 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3534 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3535 		WREG32(CP_ME_RAM_WADDR, 0);
3536 		for (i = 0; i < fw_size; i++)
3537 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3538 		WREG32(CP_ME_RAM_WADDR, 0);
3539 	} else {
3540 		const __be32 *fw_data;
3541 
3542 		/* PFP */
3543 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3544 		WREG32(CP_PFP_UCODE_ADDR, 0);
3545 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3546 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3547 		WREG32(CP_PFP_UCODE_ADDR, 0);
3548 
3549 		/* CE */
3550 		fw_data = (const __be32 *)rdev->ce_fw->data;
3551 		WREG32(CP_CE_UCODE_ADDR, 0);
3552 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3553 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3554 		WREG32(CP_CE_UCODE_ADDR, 0);
3555 
3556 		/* ME */
3557 		fw_data = (const __be32 *)rdev->me_fw->data;
3558 		WREG32(CP_ME_RAM_WADDR, 0);
3559 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3560 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3561 		WREG32(CP_ME_RAM_WADDR, 0);
3562 	}
3563 
3564 	WREG32(CP_PFP_UCODE_ADDR, 0);
3565 	WREG32(CP_CE_UCODE_ADDR, 0);
3566 	WREG32(CP_ME_RAM_WADDR, 0);
3567 	WREG32(CP_ME_RAM_RADDR, 0);
3568 	return 0;
3569 }
3570 
3571 static int si_cp_start(struct radeon_device *rdev)
3572 {
3573 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3574 	int r, i;
3575 
3576 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3577 	if (r) {
3578 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3579 		return r;
3580 	}
3581 	/* init the CP */
3582 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3583 	radeon_ring_write(ring, 0x1);
3584 	radeon_ring_write(ring, 0x0);
3585 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3586 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3587 	radeon_ring_write(ring, 0);
3588 	radeon_ring_write(ring, 0);
3589 
3590 	/* init the CE partitions */
3591 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3592 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3593 	radeon_ring_write(ring, 0xc000);
3594 	radeon_ring_write(ring, 0xe000);
3595 	radeon_ring_unlock_commit(rdev, ring, false);
3596 
3597 	si_cp_enable(rdev, true);
3598 
3599 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3600 	if (r) {
3601 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3602 		return r;
3603 	}
3604 
3605 	/* setup clear context state */
3606 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3607 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3608 
3609 	for (i = 0; i < si_default_size; i++)
3610 		radeon_ring_write(ring, si_default_state[i]);
3611 
3612 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3613 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3614 
3615 	/* set clear context state */
3616 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3617 	radeon_ring_write(ring, 0);
3618 
3619 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3620 	radeon_ring_write(ring, 0x00000316);
3621 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3622 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3623 
3624 	radeon_ring_unlock_commit(rdev, ring, false);
3625 
3626 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3627 		ring = &rdev->ring[i];
3628 		r = radeon_ring_lock(rdev, ring, 2);
3629 
3630 		/* clear the compute context state */
3631 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3632 		radeon_ring_write(ring, 0);
3633 
3634 		radeon_ring_unlock_commit(rdev, ring, false);
3635 	}
3636 
3637 	return 0;
3638 }
3639 
3640 static void si_cp_fini(struct radeon_device *rdev)
3641 {
3642 	struct radeon_ring *ring;
3643 	si_cp_enable(rdev, false);
3644 
3645 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3646 	radeon_ring_fini(rdev, ring);
3647 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3648 
3649 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3650 	radeon_ring_fini(rdev, ring);
3651 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3652 
3653 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3654 	radeon_ring_fini(rdev, ring);
3655 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3656 }
3657 
3658 static int si_cp_resume(struct radeon_device *rdev)
3659 {
3660 	struct radeon_ring *ring;
3661 	u32 tmp;
3662 	u32 rb_bufsz;
3663 	int r;
3664 
3665 	si_enable_gui_idle_interrupt(rdev, false);
3666 
3667 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3668 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3669 
3670 	/* Set the write pointer delay */
3671 	WREG32(CP_RB_WPTR_DELAY, 0);
3672 
3673 	WREG32(CP_DEBUG, 0);
3674 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3675 
3676 	/* ring 0 - compute and gfx */
3677 	/* Set ring buffer size */
3678 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3679 	rb_bufsz = order_base_2(ring->ring_size / 8);
3680 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3681 #ifdef __BIG_ENDIAN
3682 	tmp |= BUF_SWAP_32BIT;
3683 #endif
3684 	WREG32(CP_RB0_CNTL, tmp);
3685 
3686 	/* Initialize the ring buffer's read and write pointers */
3687 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3688 	ring->wptr = 0;
3689 	WREG32(CP_RB0_WPTR, ring->wptr);
3690 
3691 	/* set the wb address whether it's enabled or not */
3692 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3693 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3694 
3695 	if (rdev->wb.enabled)
3696 		WREG32(SCRATCH_UMSK, 0xff);
3697 	else {
3698 		tmp |= RB_NO_UPDATE;
3699 		WREG32(SCRATCH_UMSK, 0);
3700 	}
3701 
3702 	mdelay(1);
3703 	WREG32(CP_RB0_CNTL, tmp);
3704 
3705 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3706 
3707 	/* ring1  - compute only */
3708 	/* Set ring buffer size */
3709 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3710 	rb_bufsz = order_base_2(ring->ring_size / 8);
3711 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3712 #ifdef __BIG_ENDIAN
3713 	tmp |= BUF_SWAP_32BIT;
3714 #endif
3715 	WREG32(CP_RB1_CNTL, tmp);
3716 
3717 	/* Initialize the ring buffer's read and write pointers */
3718 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3719 	ring->wptr = 0;
3720 	WREG32(CP_RB1_WPTR, ring->wptr);
3721 
3722 	/* set the wb address whether it's enabled or not */
3723 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3724 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3725 
3726 	mdelay(1);
3727 	WREG32(CP_RB1_CNTL, tmp);
3728 
3729 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3730 
3731 	/* ring2 - compute only */
3732 	/* Set ring buffer size */
3733 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3734 	rb_bufsz = order_base_2(ring->ring_size / 8);
3735 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3736 #ifdef __BIG_ENDIAN
3737 	tmp |= BUF_SWAP_32BIT;
3738 #endif
3739 	WREG32(CP_RB2_CNTL, tmp);
3740 
3741 	/* Initialize the ring buffer's read and write pointers */
3742 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3743 	ring->wptr = 0;
3744 	WREG32(CP_RB2_WPTR, ring->wptr);
3745 
3746 	/* set the wb address whether it's enabled or not */
3747 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3748 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3749 
3750 	mdelay(1);
3751 	WREG32(CP_RB2_CNTL, tmp);
3752 
3753 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3754 
3755 	/* start the rings */
3756 	si_cp_start(rdev);
3757 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3758 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3759 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3760 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3761 	if (r) {
3762 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3763 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3764 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3765 		return r;
3766 	}
3767 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3768 	if (r) {
3769 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3770 	}
3771 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3772 	if (r) {
3773 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3774 	}
3775 
3776 	si_enable_gui_idle_interrupt(rdev, true);
3777 
3778 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3779 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3780 
3781 	return 0;
3782 }
3783 
3784 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3785 {
3786 	u32 reset_mask = 0;
3787 	u32 tmp;
3788 
3789 	/* GRBM_STATUS */
3790 	tmp = RREG32(GRBM_STATUS);
3791 	if (tmp & (PA_BUSY | SC_BUSY |
3792 		   BCI_BUSY | SX_BUSY |
3793 		   TA_BUSY | VGT_BUSY |
3794 		   DB_BUSY | CB_BUSY |
3795 		   GDS_BUSY | SPI_BUSY |
3796 		   IA_BUSY | IA_BUSY_NO_DMA))
3797 		reset_mask |= RADEON_RESET_GFX;
3798 
3799 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3800 		   CP_BUSY | CP_COHERENCY_BUSY))
3801 		reset_mask |= RADEON_RESET_CP;
3802 
3803 	if (tmp & GRBM_EE_BUSY)
3804 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3805 
3806 	/* GRBM_STATUS2 */
3807 	tmp = RREG32(GRBM_STATUS2);
3808 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3809 		reset_mask |= RADEON_RESET_RLC;
3810 
3811 	/* DMA_STATUS_REG 0 */
3812 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3813 	if (!(tmp & DMA_IDLE))
3814 		reset_mask |= RADEON_RESET_DMA;
3815 
3816 	/* DMA_STATUS_REG 1 */
3817 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3818 	if (!(tmp & DMA_IDLE))
3819 		reset_mask |= RADEON_RESET_DMA1;
3820 
3821 	/* SRBM_STATUS2 */
3822 	tmp = RREG32(SRBM_STATUS2);
3823 	if (tmp & DMA_BUSY)
3824 		reset_mask |= RADEON_RESET_DMA;
3825 
3826 	if (tmp & DMA1_BUSY)
3827 		reset_mask |= RADEON_RESET_DMA1;
3828 
3829 	/* SRBM_STATUS */
3830 	tmp = RREG32(SRBM_STATUS);
3831 
3832 	if (tmp & IH_BUSY)
3833 		reset_mask |= RADEON_RESET_IH;
3834 
3835 	if (tmp & SEM_BUSY)
3836 		reset_mask |= RADEON_RESET_SEM;
3837 
3838 	if (tmp & GRBM_RQ_PENDING)
3839 		reset_mask |= RADEON_RESET_GRBM;
3840 
3841 	if (tmp & VMC_BUSY)
3842 		reset_mask |= RADEON_RESET_VMC;
3843 
3844 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3845 		   MCC_BUSY | MCD_BUSY))
3846 		reset_mask |= RADEON_RESET_MC;
3847 
3848 	if (evergreen_is_display_hung(rdev))
3849 		reset_mask |= RADEON_RESET_DISPLAY;
3850 
3851 	/* VM_L2_STATUS */
3852 	tmp = RREG32(VM_L2_STATUS);
3853 	if (tmp & L2_BUSY)
3854 		reset_mask |= RADEON_RESET_VMC;
3855 
3856 	/* Skip MC reset as it's mostly likely not hung, just busy */
3857 	if (reset_mask & RADEON_RESET_MC) {
3858 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3859 		reset_mask &= ~RADEON_RESET_MC;
3860 	}
3861 
3862 	return reset_mask;
3863 }
3864 
3865 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3866 {
3867 	struct evergreen_mc_save save;
3868 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3869 	u32 tmp;
3870 
3871 	if (reset_mask == 0)
3872 		return;
3873 
3874 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3875 
3876 	evergreen_print_gpu_status_regs(rdev);
3877 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3878 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3879 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3880 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3881 
3882 	/* disable PG/CG */
3883 	si_fini_pg(rdev);
3884 	si_fini_cg(rdev);
3885 
3886 	/* stop the rlc */
3887 	si_rlc_stop(rdev);
3888 
3889 	/* Disable CP parsing/prefetching */
3890 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3891 
3892 	if (reset_mask & RADEON_RESET_DMA) {
3893 		/* dma0 */
3894 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3895 		tmp &= ~DMA_RB_ENABLE;
3896 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3897 	}
3898 	if (reset_mask & RADEON_RESET_DMA1) {
3899 		/* dma1 */
3900 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3901 		tmp &= ~DMA_RB_ENABLE;
3902 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3903 	}
3904 
3905 	udelay(50);
3906 
3907 	evergreen_mc_stop(rdev, &save);
3908 	if (evergreen_mc_wait_for_idle(rdev)) {
3909 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3910 	}
3911 
3912 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3913 		grbm_soft_reset = SOFT_RESET_CB |
3914 			SOFT_RESET_DB |
3915 			SOFT_RESET_GDS |
3916 			SOFT_RESET_PA |
3917 			SOFT_RESET_SC |
3918 			SOFT_RESET_BCI |
3919 			SOFT_RESET_SPI |
3920 			SOFT_RESET_SX |
3921 			SOFT_RESET_TC |
3922 			SOFT_RESET_TA |
3923 			SOFT_RESET_VGT |
3924 			SOFT_RESET_IA;
3925 	}
3926 
3927 	if (reset_mask & RADEON_RESET_CP) {
3928 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3929 
3930 		srbm_soft_reset |= SOFT_RESET_GRBM;
3931 	}
3932 
3933 	if (reset_mask & RADEON_RESET_DMA)
3934 		srbm_soft_reset |= SOFT_RESET_DMA;
3935 
3936 	if (reset_mask & RADEON_RESET_DMA1)
3937 		srbm_soft_reset |= SOFT_RESET_DMA1;
3938 
3939 	if (reset_mask & RADEON_RESET_DISPLAY)
3940 		srbm_soft_reset |= SOFT_RESET_DC;
3941 
3942 	if (reset_mask & RADEON_RESET_RLC)
3943 		grbm_soft_reset |= SOFT_RESET_RLC;
3944 
3945 	if (reset_mask & RADEON_RESET_SEM)
3946 		srbm_soft_reset |= SOFT_RESET_SEM;
3947 
3948 	if (reset_mask & RADEON_RESET_IH)
3949 		srbm_soft_reset |= SOFT_RESET_IH;
3950 
3951 	if (reset_mask & RADEON_RESET_GRBM)
3952 		srbm_soft_reset |= SOFT_RESET_GRBM;
3953 
3954 	if (reset_mask & RADEON_RESET_VMC)
3955 		srbm_soft_reset |= SOFT_RESET_VMC;
3956 
3957 	if (reset_mask & RADEON_RESET_MC)
3958 		srbm_soft_reset |= SOFT_RESET_MC;
3959 
3960 	if (grbm_soft_reset) {
3961 		tmp = RREG32(GRBM_SOFT_RESET);
3962 		tmp |= grbm_soft_reset;
3963 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3964 		WREG32(GRBM_SOFT_RESET, tmp);
3965 		tmp = RREG32(GRBM_SOFT_RESET);
3966 
3967 		udelay(50);
3968 
3969 		tmp &= ~grbm_soft_reset;
3970 		WREG32(GRBM_SOFT_RESET, tmp);
3971 		tmp = RREG32(GRBM_SOFT_RESET);
3972 	}
3973 
3974 	if (srbm_soft_reset) {
3975 		tmp = RREG32(SRBM_SOFT_RESET);
3976 		tmp |= srbm_soft_reset;
3977 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3978 		WREG32(SRBM_SOFT_RESET, tmp);
3979 		tmp = RREG32(SRBM_SOFT_RESET);
3980 
3981 		udelay(50);
3982 
3983 		tmp &= ~srbm_soft_reset;
3984 		WREG32(SRBM_SOFT_RESET, tmp);
3985 		tmp = RREG32(SRBM_SOFT_RESET);
3986 	}
3987 
3988 	/* Wait a little for things to settle down */
3989 	udelay(50);
3990 
3991 	evergreen_mc_resume(rdev, &save);
3992 	udelay(50);
3993 
3994 	evergreen_print_gpu_status_regs(rdev);
3995 }
3996 
3997 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3998 {
3999 	u32 tmp, i;
4000 
4001 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4002 	tmp |= SPLL_BYPASS_EN;
4003 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4004 
4005 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4006 	tmp |= SPLL_CTLREQ_CHG;
4007 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4008 
4009 	for (i = 0; i < rdev->usec_timeout; i++) {
4010 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
4011 			break;
4012 		udelay(1);
4013 	}
4014 
4015 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4016 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4017 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4018 
4019 	tmp = RREG32(MPLL_CNTL_MODE);
4020 	tmp &= ~MPLL_MCLK_SEL;
4021 	WREG32(MPLL_CNTL_MODE, tmp);
4022 }
4023 
4024 static void si_spll_powerdown(struct radeon_device *rdev)
4025 {
4026 	u32 tmp;
4027 
4028 	tmp = RREG32(SPLL_CNTL_MODE);
4029 	tmp |= SPLL_SW_DIR_CONTROL;
4030 	WREG32(SPLL_CNTL_MODE, tmp);
4031 
4032 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4033 	tmp |= SPLL_RESET;
4034 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4035 
4036 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4037 	tmp |= SPLL_SLEEP;
4038 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4039 
4040 	tmp = RREG32(SPLL_CNTL_MODE);
4041 	tmp &= ~SPLL_SW_DIR_CONTROL;
4042 	WREG32(SPLL_CNTL_MODE, tmp);
4043 }
4044 
4045 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4046 {
4047 	struct evergreen_mc_save save;
4048 	u32 tmp, i;
4049 
4050 	dev_info(rdev->dev, "GPU pci config reset\n");
4051 
4052 	/* disable dpm? */
4053 
4054 	/* disable cg/pg */
4055 	si_fini_pg(rdev);
4056 	si_fini_cg(rdev);
4057 
4058 	/* Disable CP parsing/prefetching */
4059 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4060 	/* dma0 */
4061 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4062 	tmp &= ~DMA_RB_ENABLE;
4063 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4064 	/* dma1 */
4065 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4066 	tmp &= ~DMA_RB_ENABLE;
4067 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4068 	/* XXX other engines? */
4069 
4070 	/* halt the rlc, disable cp internal ints */
4071 	si_rlc_stop(rdev);
4072 
4073 	udelay(50);
4074 
4075 	/* disable mem access */
4076 	evergreen_mc_stop(rdev, &save);
4077 	if (evergreen_mc_wait_for_idle(rdev)) {
4078 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4079 	}
4080 
4081 	/* set mclk/sclk to bypass */
4082 	si_set_clk_bypass_mode(rdev);
4083 	/* powerdown spll */
4084 	si_spll_powerdown(rdev);
4085 	/* disable BM */
4086 	pci_clear_master(rdev->pdev);
4087 	/* reset */
4088 	radeon_pci_config_reset(rdev);
4089 	/* wait for asic to come out of reset */
4090 	for (i = 0; i < rdev->usec_timeout; i++) {
4091 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4092 			break;
4093 		udelay(1);
4094 	}
4095 }
4096 
4097 int si_asic_reset(struct radeon_device *rdev)
4098 {
4099 	u32 reset_mask;
4100 
4101 	reset_mask = si_gpu_check_soft_reset(rdev);
4102 
4103 	if (reset_mask)
4104 		r600_set_bios_scratch_engine_hung(rdev, true);
4105 
4106 	/* try soft reset */
4107 	si_gpu_soft_reset(rdev, reset_mask);
4108 
4109 	reset_mask = si_gpu_check_soft_reset(rdev);
4110 
4111 	/* try pci config reset */
4112 	if (reset_mask && radeon_hard_reset)
4113 		si_gpu_pci_config_reset(rdev);
4114 
4115 	reset_mask = si_gpu_check_soft_reset(rdev);
4116 
4117 	if (!reset_mask)
4118 		r600_set_bios_scratch_engine_hung(rdev, false);
4119 
4120 	return 0;
4121 }
4122 
4123 /**
4124  * si_gfx_is_lockup - Check if the GFX engine is locked up
4125  *
4126  * @rdev: radeon_device pointer
4127  * @ring: radeon_ring structure holding ring information
4128  *
4129  * Check if the GFX engine is locked up.
4130  * Returns true if the engine appears to be locked up, false if not.
4131  */
4132 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4133 {
4134 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4135 
4136 	if (!(reset_mask & (RADEON_RESET_GFX |
4137 			    RADEON_RESET_COMPUTE |
4138 			    RADEON_RESET_CP))) {
4139 		radeon_ring_lockup_update(rdev, ring);
4140 		return false;
4141 	}
4142 	return radeon_ring_test_lockup(rdev, ring);
4143 }
4144 
4145 /* MC */
4146 static void si_mc_program(struct radeon_device *rdev)
4147 {
4148 	struct evergreen_mc_save save;
4149 	u32 tmp;
4150 	int i, j;
4151 
4152 	/* Initialize HDP */
4153 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4154 		WREG32((0x2c14 + j), 0x00000000);
4155 		WREG32((0x2c18 + j), 0x00000000);
4156 		WREG32((0x2c1c + j), 0x00000000);
4157 		WREG32((0x2c20 + j), 0x00000000);
4158 		WREG32((0x2c24 + j), 0x00000000);
4159 	}
4160 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4161 
4162 	evergreen_mc_stop(rdev, &save);
4163 	if (radeon_mc_wait_for_idle(rdev)) {
4164 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4165 	}
4166 	if (!ASIC_IS_NODCE(rdev))
4167 		/* Lockout access through VGA aperture*/
4168 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4169 	/* Update configuration */
4170 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4171 	       rdev->mc.vram_start >> 12);
4172 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4173 	       rdev->mc.vram_end >> 12);
4174 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4175 	       rdev->vram_scratch.gpu_addr >> 12);
4176 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4177 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4178 	WREG32(MC_VM_FB_LOCATION, tmp);
4179 	/* XXX double check these! */
4180 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4181 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4182 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4183 	WREG32(MC_VM_AGP_BASE, 0);
4184 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4185 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4186 	if (radeon_mc_wait_for_idle(rdev)) {
4187 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4188 	}
4189 	evergreen_mc_resume(rdev, &save);
4190 	if (!ASIC_IS_NODCE(rdev)) {
4191 		/* we need to own VRAM, so turn off the VGA renderer here
4192 		 * to stop it overwriting our objects */
4193 		rv515_vga_render_disable(rdev);
4194 	}
4195 }
4196 
4197 void si_vram_gtt_location(struct radeon_device *rdev,
4198 			  struct radeon_mc *mc)
4199 {
4200 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4201 		/* leave room for at least 1024M GTT */
4202 		dev_warn(rdev->dev, "limiting VRAM\n");
4203 		mc->real_vram_size = 0xFFC0000000ULL;
4204 		mc->mc_vram_size = 0xFFC0000000ULL;
4205 	}
4206 	radeon_vram_location(rdev, &rdev->mc, 0);
4207 	rdev->mc.gtt_base_align = 0;
4208 	radeon_gtt_location(rdev, mc);
4209 }
4210 
4211 static int si_mc_init(struct radeon_device *rdev)
4212 {
4213 	u32 tmp;
4214 	int chansize, numchan;
4215 
4216 	/* Get VRAM informations */
4217 	rdev->mc.vram_is_ddr = true;
4218 	tmp = RREG32(MC_ARB_RAMCFG);
4219 	if (tmp & CHANSIZE_OVERRIDE) {
4220 		chansize = 16;
4221 	} else if (tmp & CHANSIZE_MASK) {
4222 		chansize = 64;
4223 	} else {
4224 		chansize = 32;
4225 	}
4226 	tmp = RREG32(MC_SHARED_CHMAP);
4227 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4228 	case 0:
4229 	default:
4230 		numchan = 1;
4231 		break;
4232 	case 1:
4233 		numchan = 2;
4234 		break;
4235 	case 2:
4236 		numchan = 4;
4237 		break;
4238 	case 3:
4239 		numchan = 8;
4240 		break;
4241 	case 4:
4242 		numchan = 3;
4243 		break;
4244 	case 5:
4245 		numchan = 6;
4246 		break;
4247 	case 6:
4248 		numchan = 10;
4249 		break;
4250 	case 7:
4251 		numchan = 12;
4252 		break;
4253 	case 8:
4254 		numchan = 16;
4255 		break;
4256 	}
4257 	rdev->mc.vram_width = numchan * chansize;
4258 	/* Could aper size report 0 ? */
4259 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4260 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4261 	/* size in MB on si */
4262 	tmp = RREG32(CONFIG_MEMSIZE);
4263 	/* some boards may have garbage in the upper 16 bits */
4264 	if (tmp & 0xffff0000) {
4265 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4266 		if (tmp & 0xffff)
4267 			tmp &= 0xffff;
4268 	}
4269 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4270 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4271 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4272 	si_vram_gtt_location(rdev, &rdev->mc);
4273 	radeon_update_bandwidth_info(rdev);
4274 
4275 	return 0;
4276 }
4277 
4278 /*
4279  * GART
4280  */
4281 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4282 {
4283 	/* flush hdp cache */
4284 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4285 
4286 	/* bits 0-15 are the VM contexts0-15 */
4287 	WREG32(VM_INVALIDATE_REQUEST, 1);
4288 }
4289 
4290 static int si_pcie_gart_enable(struct radeon_device *rdev)
4291 {
4292 	int r, i;
4293 
4294 	if (rdev->gart.robj == NULL) {
4295 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4296 		return -EINVAL;
4297 	}
4298 	r = radeon_gart_table_vram_pin(rdev);
4299 	if (r)
4300 		return r;
4301 	/* Setup TLB control */
4302 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4303 	       (0xA << 7) |
4304 	       ENABLE_L1_TLB |
4305 	       ENABLE_L1_FRAGMENT_PROCESSING |
4306 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4307 	       ENABLE_ADVANCED_DRIVER_MODEL |
4308 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4309 	/* Setup L2 cache */
4310 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4311 	       ENABLE_L2_FRAGMENT_PROCESSING |
4312 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4313 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4314 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4315 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4316 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4317 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4318 	       BANK_SELECT(4) |
4319 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4320 	/* setup context0 */
4321 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4322 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4323 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4324 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4325 			(u32)(rdev->dummy_page.addr >> 12));
4326 	WREG32(VM_CONTEXT0_CNTL2, 0);
4327 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4328 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4329 
4330 	WREG32(0x15D4, 0);
4331 	WREG32(0x15D8, 0);
4332 	WREG32(0x15DC, 0);
4333 
4334 	/* empty context1-15 */
4335 	/* set vm size, must be a multiple of 4 */
4336 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4337 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4338 	/* Assign the pt base to something valid for now; the pts used for
4339 	 * the VMs are determined by the application and setup and assigned
4340 	 * on the fly in the vm part of radeon_gart.c
4341 	 */
4342 	for (i = 1; i < 16; i++) {
4343 		if (i < 8)
4344 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4345 			       rdev->vm_manager.saved_table_addr[i]);
4346 		else
4347 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4348 			       rdev->vm_manager.saved_table_addr[i]);
4349 	}
4350 
4351 	/* enable context1-15 */
4352 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4353 	       (u32)(rdev->dummy_page.addr >> 12));
4354 	WREG32(VM_CONTEXT1_CNTL2, 4);
4355 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4356 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4357 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4358 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4359 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4360 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4361 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4362 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4363 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4364 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4365 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4366 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4367 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4368 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4369 
4370 	si_pcie_gart_tlb_flush(rdev);
4371 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4372 		 (unsigned)(rdev->mc.gtt_size >> 20),
4373 		 (unsigned long long)rdev->gart.table_addr);
4374 	rdev->gart.ready = true;
4375 	return 0;
4376 }
4377 
4378 static void si_pcie_gart_disable(struct radeon_device *rdev)
4379 {
4380 	unsigned i;
4381 
4382 	for (i = 1; i < 16; ++i) {
4383 		uint32_t reg;
4384 		if (i < 8)
4385 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4386 		else
4387 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4388 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4389 	}
4390 
4391 	/* Disable all tables */
4392 	WREG32(VM_CONTEXT0_CNTL, 0);
4393 	WREG32(VM_CONTEXT1_CNTL, 0);
4394 	/* Setup TLB control */
4395 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4396 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4397 	/* Setup L2 cache */
4398 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4399 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4400 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4401 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4402 	WREG32(VM_L2_CNTL2, 0);
4403 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4404 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4405 	radeon_gart_table_vram_unpin(rdev);
4406 }
4407 
4408 static void si_pcie_gart_fini(struct radeon_device *rdev)
4409 {
4410 	si_pcie_gart_disable(rdev);
4411 	radeon_gart_table_vram_free(rdev);
4412 	radeon_gart_fini(rdev);
4413 }
4414 
4415 /* vm parser */
4416 static bool si_vm_reg_valid(u32 reg)
4417 {
4418 	/* context regs are fine */
4419 	if (reg >= 0x28000)
4420 		return true;
4421 
4422 	/* check config regs */
4423 	switch (reg) {
4424 	case GRBM_GFX_INDEX:
4425 	case CP_STRMOUT_CNTL:
4426 	case VGT_VTX_VECT_EJECT_REG:
4427 	case VGT_CACHE_INVALIDATION:
4428 	case VGT_ESGS_RING_SIZE:
4429 	case VGT_GSVS_RING_SIZE:
4430 	case VGT_GS_VERTEX_REUSE:
4431 	case VGT_PRIMITIVE_TYPE:
4432 	case VGT_INDEX_TYPE:
4433 	case VGT_NUM_INDICES:
4434 	case VGT_NUM_INSTANCES:
4435 	case VGT_TF_RING_SIZE:
4436 	case VGT_HS_OFFCHIP_PARAM:
4437 	case VGT_TF_MEMORY_BASE:
4438 	case PA_CL_ENHANCE:
4439 	case PA_SU_LINE_STIPPLE_VALUE:
4440 	case PA_SC_LINE_STIPPLE_STATE:
4441 	case PA_SC_ENHANCE:
4442 	case SQC_CACHES:
4443 	case SPI_STATIC_THREAD_MGMT_1:
4444 	case SPI_STATIC_THREAD_MGMT_2:
4445 	case SPI_STATIC_THREAD_MGMT_3:
4446 	case SPI_PS_MAX_WAVE_ID:
4447 	case SPI_CONFIG_CNTL:
4448 	case SPI_CONFIG_CNTL_1:
4449 	case TA_CNTL_AUX:
4450 		return true;
4451 	default:
4452 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4453 		return false;
4454 	}
4455 }
4456 
4457 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4458 				  u32 *ib, struct radeon_cs_packet *pkt)
4459 {
4460 	switch (pkt->opcode) {
4461 	case PACKET3_NOP:
4462 	case PACKET3_SET_BASE:
4463 	case PACKET3_SET_CE_DE_COUNTERS:
4464 	case PACKET3_LOAD_CONST_RAM:
4465 	case PACKET3_WRITE_CONST_RAM:
4466 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4467 	case PACKET3_DUMP_CONST_RAM:
4468 	case PACKET3_INCREMENT_CE_COUNTER:
4469 	case PACKET3_WAIT_ON_DE_COUNTER:
4470 	case PACKET3_CE_WRITE:
4471 		break;
4472 	default:
4473 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4474 		return -EINVAL;
4475 	}
4476 	return 0;
4477 }
4478 
4479 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4480 {
4481 	u32 start_reg, reg, i;
4482 	u32 command = ib[idx + 4];
4483 	u32 info = ib[idx + 1];
4484 	u32 idx_value = ib[idx];
4485 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4486 		/* src address space is register */
4487 		if (((info & 0x60000000) >> 29) == 0) {
4488 			start_reg = idx_value << 2;
4489 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4490 				reg = start_reg;
4491 				if (!si_vm_reg_valid(reg)) {
4492 					DRM_ERROR("CP DMA Bad SRC register\n");
4493 					return -EINVAL;
4494 				}
4495 			} else {
4496 				for (i = 0; i < (command & 0x1fffff); i++) {
4497 					reg = start_reg + (4 * i);
4498 					if (!si_vm_reg_valid(reg)) {
4499 						DRM_ERROR("CP DMA Bad SRC register\n");
4500 						return -EINVAL;
4501 					}
4502 				}
4503 			}
4504 		}
4505 	}
4506 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4507 		/* dst address space is register */
4508 		if (((info & 0x00300000) >> 20) == 0) {
4509 			start_reg = ib[idx + 2];
4510 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4511 				reg = start_reg;
4512 				if (!si_vm_reg_valid(reg)) {
4513 					DRM_ERROR("CP DMA Bad DST register\n");
4514 					return -EINVAL;
4515 				}
4516 			} else {
4517 				for (i = 0; i < (command & 0x1fffff); i++) {
4518 					reg = start_reg + (4 * i);
4519 				if (!si_vm_reg_valid(reg)) {
4520 						DRM_ERROR("CP DMA Bad DST register\n");
4521 						return -EINVAL;
4522 					}
4523 				}
4524 			}
4525 		}
4526 	}
4527 	return 0;
4528 }
4529 
4530 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4531 				   u32 *ib, struct radeon_cs_packet *pkt)
4532 {
4533 	int r;
4534 	u32 idx = pkt->idx + 1;
4535 	u32 idx_value = ib[idx];
4536 	u32 start_reg, end_reg, reg, i;
4537 
4538 	switch (pkt->opcode) {
4539 	case PACKET3_NOP:
4540 	case PACKET3_SET_BASE:
4541 	case PACKET3_CLEAR_STATE:
4542 	case PACKET3_INDEX_BUFFER_SIZE:
4543 	case PACKET3_DISPATCH_DIRECT:
4544 	case PACKET3_DISPATCH_INDIRECT:
4545 	case PACKET3_ALLOC_GDS:
4546 	case PACKET3_WRITE_GDS_RAM:
4547 	case PACKET3_ATOMIC_GDS:
4548 	case PACKET3_ATOMIC:
4549 	case PACKET3_OCCLUSION_QUERY:
4550 	case PACKET3_SET_PREDICATION:
4551 	case PACKET3_COND_EXEC:
4552 	case PACKET3_PRED_EXEC:
4553 	case PACKET3_DRAW_INDIRECT:
4554 	case PACKET3_DRAW_INDEX_INDIRECT:
4555 	case PACKET3_INDEX_BASE:
4556 	case PACKET3_DRAW_INDEX_2:
4557 	case PACKET3_CONTEXT_CONTROL:
4558 	case PACKET3_INDEX_TYPE:
4559 	case PACKET3_DRAW_INDIRECT_MULTI:
4560 	case PACKET3_DRAW_INDEX_AUTO:
4561 	case PACKET3_DRAW_INDEX_IMMD:
4562 	case PACKET3_NUM_INSTANCES:
4563 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4564 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4565 	case PACKET3_DRAW_INDEX_OFFSET_2:
4566 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4567 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4568 	case PACKET3_MPEG_INDEX:
4569 	case PACKET3_WAIT_REG_MEM:
4570 	case PACKET3_MEM_WRITE:
4571 	case PACKET3_PFP_SYNC_ME:
4572 	case PACKET3_SURFACE_SYNC:
4573 	case PACKET3_EVENT_WRITE:
4574 	case PACKET3_EVENT_WRITE_EOP:
4575 	case PACKET3_EVENT_WRITE_EOS:
4576 	case PACKET3_SET_CONTEXT_REG:
4577 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4578 	case PACKET3_SET_SH_REG:
4579 	case PACKET3_SET_SH_REG_OFFSET:
4580 	case PACKET3_INCREMENT_DE_COUNTER:
4581 	case PACKET3_WAIT_ON_CE_COUNTER:
4582 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4583 	case PACKET3_ME_WRITE:
4584 		break;
4585 	case PACKET3_COPY_DATA:
4586 		if ((idx_value & 0xf00) == 0) {
4587 			reg = ib[idx + 3] * 4;
4588 			if (!si_vm_reg_valid(reg))
4589 				return -EINVAL;
4590 		}
4591 		break;
4592 	case PACKET3_WRITE_DATA:
4593 		if ((idx_value & 0xf00) == 0) {
4594 			start_reg = ib[idx + 1] * 4;
4595 			if (idx_value & 0x10000) {
4596 				if (!si_vm_reg_valid(start_reg))
4597 					return -EINVAL;
4598 			} else {
4599 				for (i = 0; i < (pkt->count - 2); i++) {
4600 					reg = start_reg + (4 * i);
4601 					if (!si_vm_reg_valid(reg))
4602 						return -EINVAL;
4603 				}
4604 			}
4605 		}
4606 		break;
4607 	case PACKET3_COND_WRITE:
4608 		if (idx_value & 0x100) {
4609 			reg = ib[idx + 5] * 4;
4610 			if (!si_vm_reg_valid(reg))
4611 				return -EINVAL;
4612 		}
4613 		break;
4614 	case PACKET3_COPY_DW:
4615 		if (idx_value & 0x2) {
4616 			reg = ib[idx + 3] * 4;
4617 			if (!si_vm_reg_valid(reg))
4618 				return -EINVAL;
4619 		}
4620 		break;
4621 	case PACKET3_SET_CONFIG_REG:
4622 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4623 		end_reg = 4 * pkt->count + start_reg - 4;
4624 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4625 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4626 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4627 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4628 			return -EINVAL;
4629 		}
4630 		for (i = 0; i < pkt->count; i++) {
4631 			reg = start_reg + (4 * i);
4632 			if (!si_vm_reg_valid(reg))
4633 				return -EINVAL;
4634 		}
4635 		break;
4636 	case PACKET3_CP_DMA:
4637 		r = si_vm_packet3_cp_dma_check(ib, idx);
4638 		if (r)
4639 			return r;
4640 		break;
4641 	default:
4642 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4643 		return -EINVAL;
4644 	}
4645 	return 0;
4646 }
4647 
4648 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4649 				       u32 *ib, struct radeon_cs_packet *pkt)
4650 {
4651 	int r;
4652 	u32 idx = pkt->idx + 1;
4653 	u32 idx_value = ib[idx];
4654 	u32 start_reg, reg, i;
4655 
4656 	switch (pkt->opcode) {
4657 	case PACKET3_NOP:
4658 	case PACKET3_SET_BASE:
4659 	case PACKET3_CLEAR_STATE:
4660 	case PACKET3_DISPATCH_DIRECT:
4661 	case PACKET3_DISPATCH_INDIRECT:
4662 	case PACKET3_ALLOC_GDS:
4663 	case PACKET3_WRITE_GDS_RAM:
4664 	case PACKET3_ATOMIC_GDS:
4665 	case PACKET3_ATOMIC:
4666 	case PACKET3_OCCLUSION_QUERY:
4667 	case PACKET3_SET_PREDICATION:
4668 	case PACKET3_COND_EXEC:
4669 	case PACKET3_PRED_EXEC:
4670 	case PACKET3_CONTEXT_CONTROL:
4671 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4672 	case PACKET3_WAIT_REG_MEM:
4673 	case PACKET3_MEM_WRITE:
4674 	case PACKET3_PFP_SYNC_ME:
4675 	case PACKET3_SURFACE_SYNC:
4676 	case PACKET3_EVENT_WRITE:
4677 	case PACKET3_EVENT_WRITE_EOP:
4678 	case PACKET3_EVENT_WRITE_EOS:
4679 	case PACKET3_SET_CONTEXT_REG:
4680 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4681 	case PACKET3_SET_SH_REG:
4682 	case PACKET3_SET_SH_REG_OFFSET:
4683 	case PACKET3_INCREMENT_DE_COUNTER:
4684 	case PACKET3_WAIT_ON_CE_COUNTER:
4685 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4686 	case PACKET3_ME_WRITE:
4687 		break;
4688 	case PACKET3_COPY_DATA:
4689 		if ((idx_value & 0xf00) == 0) {
4690 			reg = ib[idx + 3] * 4;
4691 			if (!si_vm_reg_valid(reg))
4692 				return -EINVAL;
4693 		}
4694 		break;
4695 	case PACKET3_WRITE_DATA:
4696 		if ((idx_value & 0xf00) == 0) {
4697 			start_reg = ib[idx + 1] * 4;
4698 			if (idx_value & 0x10000) {
4699 				if (!si_vm_reg_valid(start_reg))
4700 					return -EINVAL;
4701 			} else {
4702 				for (i = 0; i < (pkt->count - 2); i++) {
4703 					reg = start_reg + (4 * i);
4704 					if (!si_vm_reg_valid(reg))
4705 						return -EINVAL;
4706 				}
4707 			}
4708 		}
4709 		break;
4710 	case PACKET3_COND_WRITE:
4711 		if (idx_value & 0x100) {
4712 			reg = ib[idx + 5] * 4;
4713 			if (!si_vm_reg_valid(reg))
4714 				return -EINVAL;
4715 		}
4716 		break;
4717 	case PACKET3_COPY_DW:
4718 		if (idx_value & 0x2) {
4719 			reg = ib[idx + 3] * 4;
4720 			if (!si_vm_reg_valid(reg))
4721 				return -EINVAL;
4722 		}
4723 		break;
4724 	case PACKET3_CP_DMA:
4725 		r = si_vm_packet3_cp_dma_check(ib, idx);
4726 		if (r)
4727 			return r;
4728 		break;
4729 	default:
4730 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4731 		return -EINVAL;
4732 	}
4733 	return 0;
4734 }
4735 
4736 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4737 {
4738 	int ret = 0;
4739 	u32 idx = 0, i;
4740 	struct radeon_cs_packet pkt;
4741 
4742 	do {
4743 		pkt.idx = idx;
4744 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4745 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4746 		pkt.one_reg_wr = 0;
4747 		switch (pkt.type) {
4748 		case RADEON_PACKET_TYPE0:
4749 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4750 			ret = -EINVAL;
4751 			break;
4752 		case RADEON_PACKET_TYPE2:
4753 			idx += 1;
4754 			break;
4755 		case RADEON_PACKET_TYPE3:
4756 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4757 			if (ib->is_const_ib)
4758 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4759 			else {
4760 				switch (ib->ring) {
4761 				case RADEON_RING_TYPE_GFX_INDEX:
4762 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4763 					break;
4764 				case CAYMAN_RING_TYPE_CP1_INDEX:
4765 				case CAYMAN_RING_TYPE_CP2_INDEX:
4766 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4767 					break;
4768 				default:
4769 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4770 					ret = -EINVAL;
4771 					break;
4772 				}
4773 			}
4774 			idx += pkt.count + 2;
4775 			break;
4776 		default:
4777 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4778 			ret = -EINVAL;
4779 			break;
4780 		}
4781 		if (ret) {
4782 			for (i = 0; i < ib->length_dw; i++) {
4783 				if (i == idx)
4784 					printk("\t0x%08x <---\n", ib->ptr[i]);
4785 				else
4786 					printk("\t0x%08x\n", ib->ptr[i]);
4787 			}
4788 			break;
4789 		}
4790 	} while (idx < ib->length_dw);
4791 
4792 	return ret;
4793 }
4794 
4795 /*
4796  * vm
4797  */
4798 int si_vm_init(struct radeon_device *rdev)
4799 {
4800 	/* number of VMs */
4801 	rdev->vm_manager.nvm = 16;
4802 	/* base offset of vram pages */
4803 	rdev->vm_manager.vram_base_offset = 0;
4804 
4805 	return 0;
4806 }
4807 
4808 void si_vm_fini(struct radeon_device *rdev)
4809 {
4810 }
4811 
4812 /**
4813  * si_vm_decode_fault - print human readable fault info
4814  *
4815  * @rdev: radeon_device pointer
4816  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4817  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4818  *
4819  * Print human readable fault information (SI).
4820  */
4821 static void si_vm_decode_fault(struct radeon_device *rdev,
4822 			       u32 status, u32 addr)
4823 {
4824 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4825 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4826 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4827 	char *block;
4828 
4829 	if (rdev->family == CHIP_TAHITI) {
4830 		switch (mc_id) {
4831 		case 160:
4832 		case 144:
4833 		case 96:
4834 		case 80:
4835 		case 224:
4836 		case 208:
4837 		case 32:
4838 		case 16:
4839 			block = "CB";
4840 			break;
4841 		case 161:
4842 		case 145:
4843 		case 97:
4844 		case 81:
4845 		case 225:
4846 		case 209:
4847 		case 33:
4848 		case 17:
4849 			block = "CB_FMASK";
4850 			break;
4851 		case 162:
4852 		case 146:
4853 		case 98:
4854 		case 82:
4855 		case 226:
4856 		case 210:
4857 		case 34:
4858 		case 18:
4859 			block = "CB_CMASK";
4860 			break;
4861 		case 163:
4862 		case 147:
4863 		case 99:
4864 		case 83:
4865 		case 227:
4866 		case 211:
4867 		case 35:
4868 		case 19:
4869 			block = "CB_IMMED";
4870 			break;
4871 		case 164:
4872 		case 148:
4873 		case 100:
4874 		case 84:
4875 		case 228:
4876 		case 212:
4877 		case 36:
4878 		case 20:
4879 			block = "DB";
4880 			break;
4881 		case 165:
4882 		case 149:
4883 		case 101:
4884 		case 85:
4885 		case 229:
4886 		case 213:
4887 		case 37:
4888 		case 21:
4889 			block = "DB_HTILE";
4890 			break;
4891 		case 167:
4892 		case 151:
4893 		case 103:
4894 		case 87:
4895 		case 231:
4896 		case 215:
4897 		case 39:
4898 		case 23:
4899 			block = "DB_STEN";
4900 			break;
4901 		case 72:
4902 		case 68:
4903 		case 64:
4904 		case 8:
4905 		case 4:
4906 		case 0:
4907 		case 136:
4908 		case 132:
4909 		case 128:
4910 		case 200:
4911 		case 196:
4912 		case 192:
4913 			block = "TC";
4914 			break;
4915 		case 112:
4916 		case 48:
4917 			block = "CP";
4918 			break;
4919 		case 49:
4920 		case 177:
4921 		case 50:
4922 		case 178:
4923 			block = "SH";
4924 			break;
4925 		case 53:
4926 		case 190:
4927 			block = "VGT";
4928 			break;
4929 		case 117:
4930 			block = "IH";
4931 			break;
4932 		case 51:
4933 		case 115:
4934 			block = "RLC";
4935 			break;
4936 		case 119:
4937 		case 183:
4938 			block = "DMA0";
4939 			break;
4940 		case 61:
4941 			block = "DMA1";
4942 			break;
4943 		case 248:
4944 		case 120:
4945 			block = "HDP";
4946 			break;
4947 		default:
4948 			block = "unknown";
4949 			break;
4950 		}
4951 	} else {
4952 		switch (mc_id) {
4953 		case 32:
4954 		case 16:
4955 		case 96:
4956 		case 80:
4957 		case 160:
4958 		case 144:
4959 		case 224:
4960 		case 208:
4961 			block = "CB";
4962 			break;
4963 		case 33:
4964 		case 17:
4965 		case 97:
4966 		case 81:
4967 		case 161:
4968 		case 145:
4969 		case 225:
4970 		case 209:
4971 			block = "CB_FMASK";
4972 			break;
4973 		case 34:
4974 		case 18:
4975 		case 98:
4976 		case 82:
4977 		case 162:
4978 		case 146:
4979 		case 226:
4980 		case 210:
4981 			block = "CB_CMASK";
4982 			break;
4983 		case 35:
4984 		case 19:
4985 		case 99:
4986 		case 83:
4987 		case 163:
4988 		case 147:
4989 		case 227:
4990 		case 211:
4991 			block = "CB_IMMED";
4992 			break;
4993 		case 36:
4994 		case 20:
4995 		case 100:
4996 		case 84:
4997 		case 164:
4998 		case 148:
4999 		case 228:
5000 		case 212:
5001 			block = "DB";
5002 			break;
5003 		case 37:
5004 		case 21:
5005 		case 101:
5006 		case 85:
5007 		case 165:
5008 		case 149:
5009 		case 229:
5010 		case 213:
5011 			block = "DB_HTILE";
5012 			break;
5013 		case 39:
5014 		case 23:
5015 		case 103:
5016 		case 87:
5017 		case 167:
5018 		case 151:
5019 		case 231:
5020 		case 215:
5021 			block = "DB_STEN";
5022 			break;
5023 		case 72:
5024 		case 68:
5025 		case 8:
5026 		case 4:
5027 		case 136:
5028 		case 132:
5029 		case 200:
5030 		case 196:
5031 			block = "TC";
5032 			break;
5033 		case 112:
5034 		case 48:
5035 			block = "CP";
5036 			break;
5037 		case 49:
5038 		case 177:
5039 		case 50:
5040 		case 178:
5041 			block = "SH";
5042 			break;
5043 		case 53:
5044 			block = "VGT";
5045 			break;
5046 		case 117:
5047 			block = "IH";
5048 			break;
5049 		case 51:
5050 		case 115:
5051 			block = "RLC";
5052 			break;
5053 		case 119:
5054 		case 183:
5055 			block = "DMA0";
5056 			break;
5057 		case 61:
5058 			block = "DMA1";
5059 			break;
5060 		case 248:
5061 		case 120:
5062 			block = "HDP";
5063 			break;
5064 		default:
5065 			block = "unknown";
5066 			break;
5067 		}
5068 	}
5069 
5070 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5071 	       protections, vmid, addr,
5072 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5073 	       block, mc_id);
5074 }
5075 
5076 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5077 		 unsigned vm_id, uint64_t pd_addr)
5078 {
5079 	/* write new base address */
5080 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5081 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5082 				 WRITE_DATA_DST_SEL(0)));
5083 
5084 	if (vm_id < 8) {
5085 		radeon_ring_write(ring,
5086 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5087 	} else {
5088 		radeon_ring_write(ring,
5089 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5090 	}
5091 	radeon_ring_write(ring, 0);
5092 	radeon_ring_write(ring, pd_addr >> 12);
5093 
5094 	/* flush hdp cache */
5095 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5096 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5097 				 WRITE_DATA_DST_SEL(0)));
5098 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5099 	radeon_ring_write(ring, 0);
5100 	radeon_ring_write(ring, 0x1);
5101 
5102 	/* bits 0-15 are the VM contexts0-15 */
5103 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5104 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5105 				 WRITE_DATA_DST_SEL(0)));
5106 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5107 	radeon_ring_write(ring, 0);
5108 	radeon_ring_write(ring, 1 << vm_id);
5109 
5110 	/* wait for the invalidate to complete */
5111 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5112 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5113 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5114 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5115 	radeon_ring_write(ring, 0);
5116 	radeon_ring_write(ring, 0); /* ref */
5117 	radeon_ring_write(ring, 0); /* mask */
5118 	radeon_ring_write(ring, 0x20); /* poll interval */
5119 
5120 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5121 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5122 	radeon_ring_write(ring, 0x0);
5123 }
5124 
5125 /*
5126  *  Power and clock gating
5127  */
5128 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5129 {
5130 	int i;
5131 
5132 	for (i = 0; i < rdev->usec_timeout; i++) {
5133 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5134 			break;
5135 		udelay(1);
5136 	}
5137 
5138 	for (i = 0; i < rdev->usec_timeout; i++) {
5139 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5140 			break;
5141 		udelay(1);
5142 	}
5143 }
5144 
5145 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5146 					 bool enable)
5147 {
5148 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5149 	u32 mask;
5150 	int i;
5151 
5152 	if (enable)
5153 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5154 	else
5155 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5156 	WREG32(CP_INT_CNTL_RING0, tmp);
5157 
5158 	if (!enable) {
5159 		/* read a gfx register */
5160 		tmp = RREG32(DB_DEPTH_INFO);
5161 
5162 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5163 		for (i = 0; i < rdev->usec_timeout; i++) {
5164 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5165 				break;
5166 			udelay(1);
5167 		}
5168 	}
5169 }
5170 
5171 static void si_set_uvd_dcm(struct radeon_device *rdev,
5172 			   bool sw_mode)
5173 {
5174 	u32 tmp, tmp2;
5175 
5176 	tmp = RREG32(UVD_CGC_CTRL);
5177 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5178 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5179 
5180 	if (sw_mode) {
5181 		tmp &= ~0x7ffff800;
5182 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5183 	} else {
5184 		tmp |= 0x7ffff800;
5185 		tmp2 = 0;
5186 	}
5187 
5188 	WREG32(UVD_CGC_CTRL, tmp);
5189 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5190 }
5191 
5192 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5193 {
5194 	bool hw_mode = true;
5195 
5196 	if (hw_mode) {
5197 		si_set_uvd_dcm(rdev, false);
5198 	} else {
5199 		u32 tmp = RREG32(UVD_CGC_CTRL);
5200 		tmp &= ~DCM;
5201 		WREG32(UVD_CGC_CTRL, tmp);
5202 	}
5203 }
5204 
5205 static u32 si_halt_rlc(struct radeon_device *rdev)
5206 {
5207 	u32 data, orig;
5208 
5209 	orig = data = RREG32(RLC_CNTL);
5210 
5211 	if (data & RLC_ENABLE) {
5212 		data &= ~RLC_ENABLE;
5213 		WREG32(RLC_CNTL, data);
5214 
5215 		si_wait_for_rlc_serdes(rdev);
5216 	}
5217 
5218 	return orig;
5219 }
5220 
5221 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5222 {
5223 	u32 tmp;
5224 
5225 	tmp = RREG32(RLC_CNTL);
5226 	if (tmp != rlc)
5227 		WREG32(RLC_CNTL, rlc);
5228 }
5229 
5230 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5231 {
5232 	u32 data, orig;
5233 
5234 	orig = data = RREG32(DMA_PG);
5235 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5236 		data |= PG_CNTL_ENABLE;
5237 	else
5238 		data &= ~PG_CNTL_ENABLE;
5239 	if (orig != data)
5240 		WREG32(DMA_PG, data);
5241 }
5242 
5243 static void si_init_dma_pg(struct radeon_device *rdev)
5244 {
5245 	u32 tmp;
5246 
5247 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5248 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5249 
5250 	for (tmp = 0; tmp < 5; tmp++)
5251 		WREG32(DMA_PGFSM_WRITE, 0);
5252 }
5253 
5254 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5255 			       bool enable)
5256 {
5257 	u32 tmp;
5258 
5259 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5260 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5261 		WREG32(RLC_TTOP_D, tmp);
5262 
5263 		tmp = RREG32(RLC_PG_CNTL);
5264 		tmp |= GFX_PG_ENABLE;
5265 		WREG32(RLC_PG_CNTL, tmp);
5266 
5267 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5268 		tmp |= AUTO_PG_EN;
5269 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5270 	} else {
5271 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5272 		tmp &= ~AUTO_PG_EN;
5273 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5274 
5275 		tmp = RREG32(DB_RENDER_CONTROL);
5276 	}
5277 }
5278 
5279 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5280 {
5281 	u32 tmp;
5282 
5283 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5284 
5285 	tmp = RREG32(RLC_PG_CNTL);
5286 	tmp |= GFX_PG_SRC;
5287 	WREG32(RLC_PG_CNTL, tmp);
5288 
5289 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5290 
5291 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5292 
5293 	tmp &= ~GRBM_REG_SGIT_MASK;
5294 	tmp |= GRBM_REG_SGIT(0x700);
5295 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5296 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5297 }
5298 
5299 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5300 {
5301 	u32 mask = 0, tmp, tmp1;
5302 	int i;
5303 
5304 	si_select_se_sh(rdev, se, sh);
5305 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5306 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5307 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5308 
5309 	tmp &= 0xffff0000;
5310 
5311 	tmp |= tmp1;
5312 	tmp >>= 16;
5313 
5314 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5315 		mask <<= 1;
5316 		mask |= 1;
5317 	}
5318 
5319 	return (~tmp) & mask;
5320 }
5321 
5322 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5323 {
5324 	u32 i, j, k, active_cu_number = 0;
5325 	u32 mask, counter, cu_bitmap;
5326 	u32 tmp = 0;
5327 
5328 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5329 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5330 			mask = 1;
5331 			cu_bitmap = 0;
5332 			counter  = 0;
5333 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5334 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5335 					if (counter < 2)
5336 						cu_bitmap |= mask;
5337 					counter++;
5338 				}
5339 				mask <<= 1;
5340 			}
5341 
5342 			active_cu_number += counter;
5343 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5344 		}
5345 	}
5346 
5347 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5348 
5349 	tmp = RREG32(RLC_MAX_PG_CU);
5350 	tmp &= ~MAX_PU_CU_MASK;
5351 	tmp |= MAX_PU_CU(active_cu_number);
5352 	WREG32(RLC_MAX_PG_CU, tmp);
5353 }
5354 
5355 static void si_enable_cgcg(struct radeon_device *rdev,
5356 			   bool enable)
5357 {
5358 	u32 data, orig, tmp;
5359 
5360 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5361 
5362 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5363 		si_enable_gui_idle_interrupt(rdev, true);
5364 
5365 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5366 
5367 		tmp = si_halt_rlc(rdev);
5368 
5369 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5370 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5371 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5372 
5373 		si_wait_for_rlc_serdes(rdev);
5374 
5375 		si_update_rlc(rdev, tmp);
5376 
5377 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5378 
5379 		data |= CGCG_EN | CGLS_EN;
5380 	} else {
5381 		si_enable_gui_idle_interrupt(rdev, false);
5382 
5383 		RREG32(CB_CGTT_SCLK_CTRL);
5384 		RREG32(CB_CGTT_SCLK_CTRL);
5385 		RREG32(CB_CGTT_SCLK_CTRL);
5386 		RREG32(CB_CGTT_SCLK_CTRL);
5387 
5388 		data &= ~(CGCG_EN | CGLS_EN);
5389 	}
5390 
5391 	if (orig != data)
5392 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5393 }
5394 
5395 static void si_enable_mgcg(struct radeon_device *rdev,
5396 			   bool enable)
5397 {
5398 	u32 data, orig, tmp = 0;
5399 
5400 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5401 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5402 		data = 0x96940200;
5403 		if (orig != data)
5404 			WREG32(CGTS_SM_CTRL_REG, data);
5405 
5406 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5407 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5408 			data |= CP_MEM_LS_EN;
5409 			if (orig != data)
5410 				WREG32(CP_MEM_SLP_CNTL, data);
5411 		}
5412 
5413 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5414 		data &= 0xffffffc0;
5415 		if (orig != data)
5416 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5417 
5418 		tmp = si_halt_rlc(rdev);
5419 
5420 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5421 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5422 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5423 
5424 		si_update_rlc(rdev, tmp);
5425 	} else {
5426 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5427 		data |= 0x00000003;
5428 		if (orig != data)
5429 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5430 
5431 		data = RREG32(CP_MEM_SLP_CNTL);
5432 		if (data & CP_MEM_LS_EN) {
5433 			data &= ~CP_MEM_LS_EN;
5434 			WREG32(CP_MEM_SLP_CNTL, data);
5435 		}
5436 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5437 		data |= LS_OVERRIDE | OVERRIDE;
5438 		if (orig != data)
5439 			WREG32(CGTS_SM_CTRL_REG, data);
5440 
5441 		tmp = si_halt_rlc(rdev);
5442 
5443 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5444 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5445 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5446 
5447 		si_update_rlc(rdev, tmp);
5448 	}
5449 }
5450 
5451 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5452 			       bool enable)
5453 {
5454 	u32 orig, data, tmp;
5455 
5456 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5457 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5458 		tmp |= 0x3fff;
5459 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5460 
5461 		orig = data = RREG32(UVD_CGC_CTRL);
5462 		data |= DCM;
5463 		if (orig != data)
5464 			WREG32(UVD_CGC_CTRL, data);
5465 
5466 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5467 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5468 	} else {
5469 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5470 		tmp &= ~0x3fff;
5471 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5472 
5473 		orig = data = RREG32(UVD_CGC_CTRL);
5474 		data &= ~DCM;
5475 		if (orig != data)
5476 			WREG32(UVD_CGC_CTRL, data);
5477 
5478 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5479 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5480 	}
5481 }
5482 
5483 static const u32 mc_cg_registers[] =
5484 {
5485 	MC_HUB_MISC_HUB_CG,
5486 	MC_HUB_MISC_SIP_CG,
5487 	MC_HUB_MISC_VM_CG,
5488 	MC_XPB_CLK_GAT,
5489 	ATC_MISC_CG,
5490 	MC_CITF_MISC_WR_CG,
5491 	MC_CITF_MISC_RD_CG,
5492 	MC_CITF_MISC_VM_CG,
5493 	VM_L2_CG,
5494 };
5495 
5496 static void si_enable_mc_ls(struct radeon_device *rdev,
5497 			    bool enable)
5498 {
5499 	int i;
5500 	u32 orig, data;
5501 
5502 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5503 		orig = data = RREG32(mc_cg_registers[i]);
5504 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5505 			data |= MC_LS_ENABLE;
5506 		else
5507 			data &= ~MC_LS_ENABLE;
5508 		if (data != orig)
5509 			WREG32(mc_cg_registers[i], data);
5510 	}
5511 }
5512 
5513 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5514 			       bool enable)
5515 {
5516 	int i;
5517 	u32 orig, data;
5518 
5519 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5520 		orig = data = RREG32(mc_cg_registers[i]);
5521 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5522 			data |= MC_CG_ENABLE;
5523 		else
5524 			data &= ~MC_CG_ENABLE;
5525 		if (data != orig)
5526 			WREG32(mc_cg_registers[i], data);
5527 	}
5528 }
5529 
5530 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5531 			       bool enable)
5532 {
5533 	u32 orig, data, offset;
5534 	int i;
5535 
5536 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5537 		for (i = 0; i < 2; i++) {
5538 			if (i == 0)
5539 				offset = DMA0_REGISTER_OFFSET;
5540 			else
5541 				offset = DMA1_REGISTER_OFFSET;
5542 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5543 			data &= ~MEM_POWER_OVERRIDE;
5544 			if (data != orig)
5545 				WREG32(DMA_POWER_CNTL + offset, data);
5546 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5547 		}
5548 	} else {
5549 		for (i = 0; i < 2; i++) {
5550 			if (i == 0)
5551 				offset = DMA0_REGISTER_OFFSET;
5552 			else
5553 				offset = DMA1_REGISTER_OFFSET;
5554 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5555 			data |= MEM_POWER_OVERRIDE;
5556 			if (data != orig)
5557 				WREG32(DMA_POWER_CNTL + offset, data);
5558 
5559 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5560 			data = 0xff000000;
5561 			if (data != orig)
5562 				WREG32(DMA_CLK_CTRL + offset, data);
5563 		}
5564 	}
5565 }
5566 
5567 static void si_enable_bif_mgls(struct radeon_device *rdev,
5568 			       bool enable)
5569 {
5570 	u32 orig, data;
5571 
5572 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5573 
5574 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5575 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5576 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5577 	else
5578 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5579 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5580 
5581 	if (orig != data)
5582 		WREG32_PCIE(PCIE_CNTL2, data);
5583 }
5584 
5585 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5586 			       bool enable)
5587 {
5588 	u32 orig, data;
5589 
5590 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5591 
5592 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5593 		data &= ~CLOCK_GATING_DIS;
5594 	else
5595 		data |= CLOCK_GATING_DIS;
5596 
5597 	if (orig != data)
5598 		WREG32(HDP_HOST_PATH_CNTL, data);
5599 }
5600 
5601 static void si_enable_hdp_ls(struct radeon_device *rdev,
5602 			     bool enable)
5603 {
5604 	u32 orig, data;
5605 
5606 	orig = data = RREG32(HDP_MEM_POWER_LS);
5607 
5608 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5609 		data |= HDP_LS_ENABLE;
5610 	else
5611 		data &= ~HDP_LS_ENABLE;
5612 
5613 	if (orig != data)
5614 		WREG32(HDP_MEM_POWER_LS, data);
5615 }
5616 
5617 static void si_update_cg(struct radeon_device *rdev,
5618 			 u32 block, bool enable)
5619 {
5620 	if (block & RADEON_CG_BLOCK_GFX) {
5621 		si_enable_gui_idle_interrupt(rdev, false);
5622 		/* order matters! */
5623 		if (enable) {
5624 			si_enable_mgcg(rdev, true);
5625 			si_enable_cgcg(rdev, true);
5626 		} else {
5627 			si_enable_cgcg(rdev, false);
5628 			si_enable_mgcg(rdev, false);
5629 		}
5630 		si_enable_gui_idle_interrupt(rdev, true);
5631 	}
5632 
5633 	if (block & RADEON_CG_BLOCK_MC) {
5634 		si_enable_mc_mgcg(rdev, enable);
5635 		si_enable_mc_ls(rdev, enable);
5636 	}
5637 
5638 	if (block & RADEON_CG_BLOCK_SDMA) {
5639 		si_enable_dma_mgcg(rdev, enable);
5640 	}
5641 
5642 	if (block & RADEON_CG_BLOCK_BIF) {
5643 		si_enable_bif_mgls(rdev, enable);
5644 	}
5645 
5646 	if (block & RADEON_CG_BLOCK_UVD) {
5647 		if (rdev->has_uvd) {
5648 			si_enable_uvd_mgcg(rdev, enable);
5649 		}
5650 	}
5651 
5652 	if (block & RADEON_CG_BLOCK_HDP) {
5653 		si_enable_hdp_mgcg(rdev, enable);
5654 		si_enable_hdp_ls(rdev, enable);
5655 	}
5656 }
5657 
5658 static void si_init_cg(struct radeon_device *rdev)
5659 {
5660 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5661 			    RADEON_CG_BLOCK_MC |
5662 			    RADEON_CG_BLOCK_SDMA |
5663 			    RADEON_CG_BLOCK_BIF |
5664 			    RADEON_CG_BLOCK_HDP), true);
5665 	if (rdev->has_uvd) {
5666 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5667 		si_init_uvd_internal_cg(rdev);
5668 	}
5669 }
5670 
5671 static void si_fini_cg(struct radeon_device *rdev)
5672 {
5673 	if (rdev->has_uvd) {
5674 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5675 	}
5676 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5677 			    RADEON_CG_BLOCK_MC |
5678 			    RADEON_CG_BLOCK_SDMA |
5679 			    RADEON_CG_BLOCK_BIF |
5680 			    RADEON_CG_BLOCK_HDP), false);
5681 }
5682 
5683 u32 si_get_csb_size(struct radeon_device *rdev)
5684 {
5685 	u32 count = 0;
5686 	const struct cs_section_def *sect = NULL;
5687 	const struct cs_extent_def *ext = NULL;
5688 
5689 	if (rdev->rlc.cs_data == NULL)
5690 		return 0;
5691 
5692 	/* begin clear state */
5693 	count += 2;
5694 	/* context control state */
5695 	count += 3;
5696 
5697 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5698 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5699 			if (sect->id == SECT_CONTEXT)
5700 				count += 2 + ext->reg_count;
5701 			else
5702 				return 0;
5703 		}
5704 	}
5705 	/* pa_sc_raster_config */
5706 	count += 3;
5707 	/* end clear state */
5708 	count += 2;
5709 	/* clear state */
5710 	count += 2;
5711 
5712 	return count;
5713 }
5714 
5715 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5716 {
5717 	u32 count = 0, i;
5718 	const struct cs_section_def *sect = NULL;
5719 	const struct cs_extent_def *ext = NULL;
5720 
5721 	if (rdev->rlc.cs_data == NULL)
5722 		return;
5723 	if (buffer == NULL)
5724 		return;
5725 
5726 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5727 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5728 
5729 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5730 	buffer[count++] = cpu_to_le32(0x80000000);
5731 	buffer[count++] = cpu_to_le32(0x80000000);
5732 
5733 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5734 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5735 			if (sect->id == SECT_CONTEXT) {
5736 				buffer[count++] =
5737 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5738 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5739 				for (i = 0; i < ext->reg_count; i++)
5740 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5741 			} else {
5742 				return;
5743 			}
5744 		}
5745 	}
5746 
5747 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5748 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5749 	switch (rdev->family) {
5750 	case CHIP_TAHITI:
5751 	case CHIP_PITCAIRN:
5752 		buffer[count++] = cpu_to_le32(0x2a00126a);
5753 		break;
5754 	case CHIP_VERDE:
5755 		buffer[count++] = cpu_to_le32(0x0000124a);
5756 		break;
5757 	case CHIP_OLAND:
5758 		buffer[count++] = cpu_to_le32(0x00000082);
5759 		break;
5760 	case CHIP_HAINAN:
5761 		buffer[count++] = cpu_to_le32(0x00000000);
5762 		break;
5763 	default:
5764 		buffer[count++] = cpu_to_le32(0x00000000);
5765 		break;
5766 	}
5767 
5768 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5769 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5770 
5771 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5772 	buffer[count++] = cpu_to_le32(0);
5773 }
5774 
5775 static void si_init_pg(struct radeon_device *rdev)
5776 {
5777 	if (rdev->pg_flags) {
5778 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5779 			si_init_dma_pg(rdev);
5780 		}
5781 		si_init_ao_cu_mask(rdev);
5782 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5783 			si_init_gfx_cgpg(rdev);
5784 		} else {
5785 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5786 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5787 		}
5788 		si_enable_dma_pg(rdev, true);
5789 		si_enable_gfx_cgpg(rdev, true);
5790 	} else {
5791 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5792 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5793 	}
5794 }
5795 
5796 static void si_fini_pg(struct radeon_device *rdev)
5797 {
5798 	if (rdev->pg_flags) {
5799 		si_enable_dma_pg(rdev, false);
5800 		si_enable_gfx_cgpg(rdev, false);
5801 	}
5802 }
5803 
5804 /*
5805  * RLC
5806  */
5807 void si_rlc_reset(struct radeon_device *rdev)
5808 {
5809 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5810 
5811 	tmp |= SOFT_RESET_RLC;
5812 	WREG32(GRBM_SOFT_RESET, tmp);
5813 	udelay(50);
5814 	tmp &= ~SOFT_RESET_RLC;
5815 	WREG32(GRBM_SOFT_RESET, tmp);
5816 	udelay(50);
5817 }
5818 
5819 static void si_rlc_stop(struct radeon_device *rdev)
5820 {
5821 	WREG32(RLC_CNTL, 0);
5822 
5823 	si_enable_gui_idle_interrupt(rdev, false);
5824 
5825 	si_wait_for_rlc_serdes(rdev);
5826 }
5827 
5828 static void si_rlc_start(struct radeon_device *rdev)
5829 {
5830 	WREG32(RLC_CNTL, RLC_ENABLE);
5831 
5832 	si_enable_gui_idle_interrupt(rdev, true);
5833 
5834 	udelay(50);
5835 }
5836 
5837 static bool si_lbpw_supported(struct radeon_device *rdev)
5838 {
5839 	u32 tmp;
5840 
5841 	/* Enable LBPW only for DDR3 */
5842 	tmp = RREG32(MC_SEQ_MISC0);
5843 	if ((tmp & 0xF0000000) == 0xB0000000)
5844 		return true;
5845 	return false;
5846 }
5847 
5848 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5849 {
5850 	u32 tmp;
5851 
5852 	tmp = RREG32(RLC_LB_CNTL);
5853 	if (enable)
5854 		tmp |= LOAD_BALANCE_ENABLE;
5855 	else
5856 		tmp &= ~LOAD_BALANCE_ENABLE;
5857 	WREG32(RLC_LB_CNTL, tmp);
5858 
5859 	if (!enable) {
5860 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5861 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5862 	}
5863 }
5864 
5865 static int si_rlc_resume(struct radeon_device *rdev)
5866 {
5867 	u32 i;
5868 
5869 	if (!rdev->rlc_fw)
5870 		return -EINVAL;
5871 
5872 	si_rlc_stop(rdev);
5873 
5874 	si_rlc_reset(rdev);
5875 
5876 	si_init_pg(rdev);
5877 
5878 	si_init_cg(rdev);
5879 
5880 	WREG32(RLC_RL_BASE, 0);
5881 	WREG32(RLC_RL_SIZE, 0);
5882 	WREG32(RLC_LB_CNTL, 0);
5883 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5884 	WREG32(RLC_LB_CNTR_INIT, 0);
5885 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5886 
5887 	WREG32(RLC_MC_CNTL, 0);
5888 	WREG32(RLC_UCODE_CNTL, 0);
5889 
5890 	if (rdev->new_fw) {
5891 		const struct rlc_firmware_header_v1_0 *hdr =
5892 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5893 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5894 		const __le32 *fw_data = (const __le32 *)
5895 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5896 
5897 		radeon_ucode_print_rlc_hdr(&hdr->header);
5898 
5899 		for (i = 0; i < fw_size; i++) {
5900 			WREG32(RLC_UCODE_ADDR, i);
5901 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5902 		}
5903 	} else {
5904 		const __be32 *fw_data =
5905 			(const __be32 *)rdev->rlc_fw->data;
5906 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5907 			WREG32(RLC_UCODE_ADDR, i);
5908 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5909 		}
5910 	}
5911 	WREG32(RLC_UCODE_ADDR, 0);
5912 
5913 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5914 
5915 	si_rlc_start(rdev);
5916 
5917 	return 0;
5918 }
5919 
5920 static void si_enable_interrupts(struct radeon_device *rdev)
5921 {
5922 	u32 ih_cntl = RREG32(IH_CNTL);
5923 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5924 
5925 	ih_cntl |= ENABLE_INTR;
5926 	ih_rb_cntl |= IH_RB_ENABLE;
5927 	WREG32(IH_CNTL, ih_cntl);
5928 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5929 	rdev->ih.enabled = true;
5930 }
5931 
5932 static void si_disable_interrupts(struct radeon_device *rdev)
5933 {
5934 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5935 	u32 ih_cntl = RREG32(IH_CNTL);
5936 
5937 	ih_rb_cntl &= ~IH_RB_ENABLE;
5938 	ih_cntl &= ~ENABLE_INTR;
5939 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5940 	WREG32(IH_CNTL, ih_cntl);
5941 	/* set rptr, wptr to 0 */
5942 	WREG32(IH_RB_RPTR, 0);
5943 	WREG32(IH_RB_WPTR, 0);
5944 	rdev->ih.enabled = false;
5945 	rdev->ih.rptr = 0;
5946 }
5947 
5948 static void si_disable_interrupt_state(struct radeon_device *rdev)
5949 {
5950 	u32 tmp;
5951 
5952 	tmp = RREG32(CP_INT_CNTL_RING0) &
5953 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5954 	WREG32(CP_INT_CNTL_RING0, tmp);
5955 	WREG32(CP_INT_CNTL_RING1, 0);
5956 	WREG32(CP_INT_CNTL_RING2, 0);
5957 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5958 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5959 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5960 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5961 	WREG32(GRBM_INT_CNTL, 0);
5962 	WREG32(SRBM_INT_CNTL, 0);
5963 	if (rdev->num_crtc >= 2) {
5964 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5965 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5966 	}
5967 	if (rdev->num_crtc >= 4) {
5968 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5969 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5970 	}
5971 	if (rdev->num_crtc >= 6) {
5972 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5973 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5974 	}
5975 
5976 	if (rdev->num_crtc >= 2) {
5977 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5978 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5979 	}
5980 	if (rdev->num_crtc >= 4) {
5981 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5982 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5983 	}
5984 	if (rdev->num_crtc >= 6) {
5985 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5986 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5987 	}
5988 
5989 	if (!ASIC_IS_NODCE(rdev)) {
5990 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5991 
5992 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5993 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5994 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5995 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5996 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5997 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5998 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5999 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6000 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6001 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6002 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6003 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6004 	}
6005 }
6006 
6007 static int si_irq_init(struct radeon_device *rdev)
6008 {
6009 	int ret = 0;
6010 	int rb_bufsz;
6011 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6012 
6013 	/* allocate ring */
6014 	ret = r600_ih_ring_alloc(rdev);
6015 	if (ret)
6016 		return ret;
6017 
6018 	/* disable irqs */
6019 	si_disable_interrupts(rdev);
6020 
6021 	/* init rlc */
6022 	ret = si_rlc_resume(rdev);
6023 	if (ret) {
6024 		r600_ih_ring_fini(rdev);
6025 		return ret;
6026 	}
6027 
6028 	/* setup interrupt control */
6029 	/* set dummy read address to ring address */
6030 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6031 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6032 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6033 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6034 	 */
6035 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6036 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6037 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6038 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6039 
6040 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6041 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6042 
6043 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6044 		      IH_WPTR_OVERFLOW_CLEAR |
6045 		      (rb_bufsz << 1));
6046 
6047 	if (rdev->wb.enabled)
6048 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6049 
6050 	/* set the writeback address whether it's enabled or not */
6051 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6052 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6053 
6054 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6055 
6056 	/* set rptr, wptr to 0 */
6057 	WREG32(IH_RB_RPTR, 0);
6058 	WREG32(IH_RB_WPTR, 0);
6059 
6060 	/* Default settings for IH_CNTL (disabled at first) */
6061 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6062 	/* RPTR_REARM only works if msi's are enabled */
6063 	if (rdev->msi_enabled)
6064 		ih_cntl |= RPTR_REARM;
6065 	WREG32(IH_CNTL, ih_cntl);
6066 
6067 	/* force the active interrupt state to all disabled */
6068 	si_disable_interrupt_state(rdev);
6069 
6070 	pci_set_master(rdev->pdev);
6071 
6072 	/* enable irqs */
6073 	si_enable_interrupts(rdev);
6074 
6075 	return ret;
6076 }
6077 
6078 int si_irq_set(struct radeon_device *rdev)
6079 {
6080 	u32 cp_int_cntl;
6081 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6082 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6083 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6084 	u32 grbm_int_cntl = 0;
6085 	u32 dma_cntl, dma_cntl1;
6086 	u32 thermal_int = 0;
6087 
6088 	if (!rdev->irq.installed) {
6089 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6090 		return -EINVAL;
6091 	}
6092 	/* don't enable anything if the ih is disabled */
6093 	if (!rdev->ih.enabled) {
6094 		si_disable_interrupts(rdev);
6095 		/* force the active interrupt state to all disabled */
6096 		si_disable_interrupt_state(rdev);
6097 		return 0;
6098 	}
6099 
6100 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6101 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6102 
6103 	if (!ASIC_IS_NODCE(rdev)) {
6104 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6105 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6106 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6107 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6108 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6109 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6110 	}
6111 
6112 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6113 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6114 
6115 	thermal_int = RREG32(CG_THERMAL_INT) &
6116 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6117 
6118 	/* enable CP interrupts on all rings */
6119 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6120 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6121 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6122 	}
6123 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6124 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6125 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6126 	}
6127 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6128 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6129 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6130 	}
6131 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6132 		DRM_DEBUG("si_irq_set: sw int dma\n");
6133 		dma_cntl |= TRAP_ENABLE;
6134 	}
6135 
6136 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6137 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6138 		dma_cntl1 |= TRAP_ENABLE;
6139 	}
6140 	if (rdev->irq.crtc_vblank_int[0] ||
6141 	    atomic_read(&rdev->irq.pflip[0])) {
6142 		DRM_DEBUG("si_irq_set: vblank 0\n");
6143 		crtc1 |= VBLANK_INT_MASK;
6144 	}
6145 	if (rdev->irq.crtc_vblank_int[1] ||
6146 	    atomic_read(&rdev->irq.pflip[1])) {
6147 		DRM_DEBUG("si_irq_set: vblank 1\n");
6148 		crtc2 |= VBLANK_INT_MASK;
6149 	}
6150 	if (rdev->irq.crtc_vblank_int[2] ||
6151 	    atomic_read(&rdev->irq.pflip[2])) {
6152 		DRM_DEBUG("si_irq_set: vblank 2\n");
6153 		crtc3 |= VBLANK_INT_MASK;
6154 	}
6155 	if (rdev->irq.crtc_vblank_int[3] ||
6156 	    atomic_read(&rdev->irq.pflip[3])) {
6157 		DRM_DEBUG("si_irq_set: vblank 3\n");
6158 		crtc4 |= VBLANK_INT_MASK;
6159 	}
6160 	if (rdev->irq.crtc_vblank_int[4] ||
6161 	    atomic_read(&rdev->irq.pflip[4])) {
6162 		DRM_DEBUG("si_irq_set: vblank 4\n");
6163 		crtc5 |= VBLANK_INT_MASK;
6164 	}
6165 	if (rdev->irq.crtc_vblank_int[5] ||
6166 	    atomic_read(&rdev->irq.pflip[5])) {
6167 		DRM_DEBUG("si_irq_set: vblank 5\n");
6168 		crtc6 |= VBLANK_INT_MASK;
6169 	}
6170 	if (rdev->irq.hpd[0]) {
6171 		DRM_DEBUG("si_irq_set: hpd 1\n");
6172 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6173 	}
6174 	if (rdev->irq.hpd[1]) {
6175 		DRM_DEBUG("si_irq_set: hpd 2\n");
6176 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6177 	}
6178 	if (rdev->irq.hpd[2]) {
6179 		DRM_DEBUG("si_irq_set: hpd 3\n");
6180 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6181 	}
6182 	if (rdev->irq.hpd[3]) {
6183 		DRM_DEBUG("si_irq_set: hpd 4\n");
6184 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6185 	}
6186 	if (rdev->irq.hpd[4]) {
6187 		DRM_DEBUG("si_irq_set: hpd 5\n");
6188 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6189 	}
6190 	if (rdev->irq.hpd[5]) {
6191 		DRM_DEBUG("si_irq_set: hpd 6\n");
6192 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6193 	}
6194 
6195 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6196 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6197 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6198 
6199 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6200 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6201 
6202 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6203 
6204 	if (rdev->irq.dpm_thermal) {
6205 		DRM_DEBUG("dpm thermal\n");
6206 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6207 	}
6208 
6209 	if (rdev->num_crtc >= 2) {
6210 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6211 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6212 	}
6213 	if (rdev->num_crtc >= 4) {
6214 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6215 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6216 	}
6217 	if (rdev->num_crtc >= 6) {
6218 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6219 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6220 	}
6221 
6222 	if (rdev->num_crtc >= 2) {
6223 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6224 		       GRPH_PFLIP_INT_MASK);
6225 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6226 		       GRPH_PFLIP_INT_MASK);
6227 	}
6228 	if (rdev->num_crtc >= 4) {
6229 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6230 		       GRPH_PFLIP_INT_MASK);
6231 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6232 		       GRPH_PFLIP_INT_MASK);
6233 	}
6234 	if (rdev->num_crtc >= 6) {
6235 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6236 		       GRPH_PFLIP_INT_MASK);
6237 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6238 		       GRPH_PFLIP_INT_MASK);
6239 	}
6240 
6241 	if (!ASIC_IS_NODCE(rdev)) {
6242 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6243 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6244 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6245 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6246 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6247 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6248 	}
6249 
6250 	WREG32(CG_THERMAL_INT, thermal_int);
6251 
6252 	/* posting read */
6253 	RREG32(SRBM_STATUS);
6254 
6255 	return 0;
6256 }
6257 
6258 static inline void si_irq_ack(struct radeon_device *rdev)
6259 {
6260 	u32 tmp;
6261 
6262 	if (ASIC_IS_NODCE(rdev))
6263 		return;
6264 
6265 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6266 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6267 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6268 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6269 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6270 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6271 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6272 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6273 	if (rdev->num_crtc >= 4) {
6274 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6275 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6276 	}
6277 	if (rdev->num_crtc >= 6) {
6278 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6279 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6280 	}
6281 
6282 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6283 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6284 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6285 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6286 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6287 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6288 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6289 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6290 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6291 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6292 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6293 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6294 
6295 	if (rdev->num_crtc >= 4) {
6296 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6297 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6298 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6299 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6300 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6301 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6302 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6303 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6304 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6305 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6306 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6307 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6308 	}
6309 
6310 	if (rdev->num_crtc >= 6) {
6311 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6312 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6313 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6314 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6315 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6316 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6317 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6318 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6319 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6320 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6321 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6322 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6323 	}
6324 
6325 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6326 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6327 		tmp |= DC_HPDx_INT_ACK;
6328 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6329 	}
6330 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6331 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6332 		tmp |= DC_HPDx_INT_ACK;
6333 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6334 	}
6335 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6336 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6337 		tmp |= DC_HPDx_INT_ACK;
6338 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6339 	}
6340 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6341 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6342 		tmp |= DC_HPDx_INT_ACK;
6343 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6344 	}
6345 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6346 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6347 		tmp |= DC_HPDx_INT_ACK;
6348 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6349 	}
6350 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6351 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6352 		tmp |= DC_HPDx_INT_ACK;
6353 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6354 	}
6355 
6356 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6357 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6358 		tmp |= DC_HPDx_RX_INT_ACK;
6359 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6360 	}
6361 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6362 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6363 		tmp |= DC_HPDx_RX_INT_ACK;
6364 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6365 	}
6366 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6367 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6368 		tmp |= DC_HPDx_RX_INT_ACK;
6369 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6370 	}
6371 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6372 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6373 		tmp |= DC_HPDx_RX_INT_ACK;
6374 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6375 	}
6376 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6377 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6378 		tmp |= DC_HPDx_RX_INT_ACK;
6379 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6380 	}
6381 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6382 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6383 		tmp |= DC_HPDx_RX_INT_ACK;
6384 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6385 	}
6386 }
6387 
6388 static void si_irq_disable(struct radeon_device *rdev)
6389 {
6390 	si_disable_interrupts(rdev);
6391 	/* Wait and acknowledge irq */
6392 	mdelay(1);
6393 	si_irq_ack(rdev);
6394 	si_disable_interrupt_state(rdev);
6395 }
6396 
6397 static void si_irq_suspend(struct radeon_device *rdev)
6398 {
6399 	si_irq_disable(rdev);
6400 	si_rlc_stop(rdev);
6401 }
6402 
6403 static void si_irq_fini(struct radeon_device *rdev)
6404 {
6405 	si_irq_suspend(rdev);
6406 	r600_ih_ring_fini(rdev);
6407 }
6408 
6409 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6410 {
6411 	u32 wptr, tmp;
6412 
6413 	if (rdev->wb.enabled)
6414 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6415 	else
6416 		wptr = RREG32(IH_RB_WPTR);
6417 
6418 	if (wptr & RB_OVERFLOW) {
6419 		wptr &= ~RB_OVERFLOW;
6420 		/* When a ring buffer overflow happen start parsing interrupt
6421 		 * from the last not overwritten vector (wptr + 16). Hopefully
6422 		 * this should allow us to catchup.
6423 		 */
6424 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6425 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6426 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6427 		tmp = RREG32(IH_RB_CNTL);
6428 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6429 		WREG32(IH_RB_CNTL, tmp);
6430 	}
6431 	return (wptr & rdev->ih.ptr_mask);
6432 }
6433 
6434 /*        SI IV Ring
6435  * Each IV ring entry is 128 bits:
6436  * [7:0]    - interrupt source id
6437  * [31:8]   - reserved
6438  * [59:32]  - interrupt source data
6439  * [63:60]  - reserved
6440  * [71:64]  - RINGID
6441  * [79:72]  - VMID
6442  * [127:80] - reserved
6443  */
6444 irqreturn_t si_irq_process(struct radeon_device *rdev)
6445 {
6446 	u32 wptr;
6447 	u32 rptr;
6448 	u32 src_id, src_data, ring_id;
6449 	u32 ring_index;
6450 	bool queue_hotplug = false;
6451 	bool queue_dp = false;
6452 	bool queue_thermal = false;
6453 	u32 status, addr;
6454 
6455 	if (!rdev->ih.enabled || rdev->shutdown)
6456 		return IRQ_NONE;
6457 
6458 	wptr = si_get_ih_wptr(rdev);
6459 
6460 restart_ih:
6461 	/* is somebody else already processing irqs? */
6462 	if (atomic_xchg(&rdev->ih.lock, 1))
6463 		return IRQ_NONE;
6464 
6465 	rptr = rdev->ih.rptr;
6466 	DRM_DEBUG_VBLANK("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6467 
6468 	/* Order reading of wptr vs. reading of IH ring data */
6469 	rmb();
6470 
6471 	/* display interrupts */
6472 	si_irq_ack(rdev);
6473 
6474 	while (rptr != wptr) {
6475 		/* wptr/rptr are in bytes! */
6476 		ring_index = rptr / 4;
6477 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6478 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6479 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6480 
6481 		switch (src_id) {
6482 		case 1: /* D1 vblank/vline */
6483 			switch (src_data) {
6484 			case 0: /* D1 vblank */
6485 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6486 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6487 
6488 				if (rdev->irq.crtc_vblank_int[0]) {
6489 					drm_handle_vblank(rdev->ddev, 0);
6490 					rdev->pm.vblank_sync = true;
6491 					wake_up(&rdev->irq.vblank_queue);
6492 				}
6493 				if (atomic_read(&rdev->irq.pflip[0]))
6494 					radeon_crtc_handle_vblank(rdev, 0);
6495 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6496 				DRM_DEBUG_VBLANK("IH: D1 vblank\n");
6497 
6498 				break;
6499 			case 1: /* D1 vline */
6500 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6501 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6502 
6503 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6504 				DRM_DEBUG_VBLANK("IH: D1 vline\n");
6505 
6506 				break;
6507 			default:
6508 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6509 				break;
6510 			}
6511 			break;
6512 		case 2: /* D2 vblank/vline */
6513 			switch (src_data) {
6514 			case 0: /* D2 vblank */
6515 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6516 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6517 
6518 				if (rdev->irq.crtc_vblank_int[1]) {
6519 					drm_handle_vblank(rdev->ddev, 1);
6520 					rdev->pm.vblank_sync = true;
6521 					wake_up(&rdev->irq.vblank_queue);
6522 				}
6523 				if (atomic_read(&rdev->irq.pflip[1]))
6524 					radeon_crtc_handle_vblank(rdev, 1);
6525 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6526 				DRM_DEBUG_VBLANK("IH: D2 vblank\n");
6527 
6528 				break;
6529 			case 1: /* D2 vline */
6530 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6531 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6532 
6533 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6534 				DRM_DEBUG_VBLANK("IH: D2 vline\n");
6535 
6536 				break;
6537 			default:
6538 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6539 				break;
6540 			}
6541 			break;
6542 		case 3: /* D3 vblank/vline */
6543 			switch (src_data) {
6544 			case 0: /* D3 vblank */
6545 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6546 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6547 
6548 				if (rdev->irq.crtc_vblank_int[2]) {
6549 					drm_handle_vblank(rdev->ddev, 2);
6550 					rdev->pm.vblank_sync = true;
6551 					wake_up(&rdev->irq.vblank_queue);
6552 				}
6553 				if (atomic_read(&rdev->irq.pflip[2]))
6554 					radeon_crtc_handle_vblank(rdev, 2);
6555 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6556 				DRM_DEBUG_VBLANK("IH: D3 vblank\n");
6557 
6558 				break;
6559 			case 1: /* D3 vline */
6560 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6561 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6562 
6563 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6564 				DRM_DEBUG_VBLANK("IH: D3 vline\n");
6565 
6566 				break;
6567 			default:
6568 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6569 				break;
6570 			}
6571 			break;
6572 		case 4: /* D4 vblank/vline */
6573 			switch (src_data) {
6574 			case 0: /* D4 vblank */
6575 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6576 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6577 
6578 				if (rdev->irq.crtc_vblank_int[3]) {
6579 					drm_handle_vblank(rdev->ddev, 3);
6580 					rdev->pm.vblank_sync = true;
6581 					wake_up(&rdev->irq.vblank_queue);
6582 				}
6583 				if (atomic_read(&rdev->irq.pflip[3]))
6584 					radeon_crtc_handle_vblank(rdev, 3);
6585 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6586 				DRM_DEBUG_VBLANK("IH: D4 vblank\n");
6587 
6588 				break;
6589 			case 1: /* D4 vline */
6590 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6591 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6592 
6593 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6594 				DRM_DEBUG_VBLANK("IH: D4 vline\n");
6595 
6596 				break;
6597 			default:
6598 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6599 				break;
6600 			}
6601 			break;
6602 		case 5: /* D5 vblank/vline */
6603 			switch (src_data) {
6604 			case 0: /* D5 vblank */
6605 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6606 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6607 
6608 				if (rdev->irq.crtc_vblank_int[4]) {
6609 					drm_handle_vblank(rdev->ddev, 4);
6610 					rdev->pm.vblank_sync = true;
6611 					wake_up(&rdev->irq.vblank_queue);
6612 				}
6613 				if (atomic_read(&rdev->irq.pflip[4]))
6614 					radeon_crtc_handle_vblank(rdev, 4);
6615 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6616 				DRM_DEBUG_VBLANK("IH: D5 vblank\n");
6617 
6618 				break;
6619 			case 1: /* D5 vline */
6620 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6621 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6622 
6623 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6624 				DRM_DEBUG_VBLANK("IH: D5 vline\n");
6625 
6626 				break;
6627 			default:
6628 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6629 				break;
6630 			}
6631 			break;
6632 		case 6: /* D6 vblank/vline */
6633 			switch (src_data) {
6634 			case 0: /* D6 vblank */
6635 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6636 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6637 
6638 				if (rdev->irq.crtc_vblank_int[5]) {
6639 					drm_handle_vblank(rdev->ddev, 5);
6640 					rdev->pm.vblank_sync = true;
6641 					wake_up(&rdev->irq.vblank_queue);
6642 				}
6643 				if (atomic_read(&rdev->irq.pflip[5]))
6644 					radeon_crtc_handle_vblank(rdev, 5);
6645 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6646 				DRM_DEBUG_VBLANK("IH: D6 vblank\n");
6647 
6648 				break;
6649 			case 1: /* D6 vline */
6650 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6651 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6652 
6653 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6654 				DRM_DEBUG_VBLANK("IH: D6 vline\n");
6655 
6656 				break;
6657 			default:
6658 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6659 				break;
6660 			}
6661 			break;
6662 		case 8: /* D1 page flip */
6663 		case 10: /* D2 page flip */
6664 		case 12: /* D3 page flip */
6665 		case 14: /* D4 page flip */
6666 		case 16: /* D5 page flip */
6667 		case 18: /* D6 page flip */
6668 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6669 			if (radeon_use_pflipirq > 0)
6670 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6671 			break;
6672 		case 42: /* HPD hotplug */
6673 			switch (src_data) {
6674 			case 0:
6675 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6676 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6677 
6678 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6679 				queue_hotplug = true;
6680 				DRM_DEBUG("IH: HPD1\n");
6681 
6682 				break;
6683 			case 1:
6684 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6685 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6686 
6687 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6688 				queue_hotplug = true;
6689 				DRM_DEBUG("IH: HPD2\n");
6690 
6691 				break;
6692 			case 2:
6693 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6694 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6695 
6696 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6697 				queue_hotplug = true;
6698 				DRM_DEBUG("IH: HPD3\n");
6699 
6700 				break;
6701 			case 3:
6702 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6703 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6704 
6705 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6706 				queue_hotplug = true;
6707 				DRM_DEBUG("IH: HPD4\n");
6708 
6709 				break;
6710 			case 4:
6711 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6712 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6713 
6714 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6715 				queue_hotplug = true;
6716 				DRM_DEBUG("IH: HPD5\n");
6717 
6718 				break;
6719 			case 5:
6720 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6721 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6722 
6723 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6724 				queue_hotplug = true;
6725 				DRM_DEBUG("IH: HPD6\n");
6726 
6727 				break;
6728 			case 6:
6729 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6730 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6731 
6732 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6733 				queue_dp = true;
6734 				DRM_DEBUG("IH: HPD_RX 1\n");
6735 
6736 				break;
6737 			case 7:
6738 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6739 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6740 
6741 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6742 				queue_dp = true;
6743 				DRM_DEBUG("IH: HPD_RX 2\n");
6744 
6745 				break;
6746 			case 8:
6747 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6748 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6749 
6750 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6751 				queue_dp = true;
6752 				DRM_DEBUG("IH: HPD_RX 3\n");
6753 
6754 				break;
6755 			case 9:
6756 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6757 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6758 
6759 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6760 				queue_dp = true;
6761 				DRM_DEBUG("IH: HPD_RX 4\n");
6762 
6763 				break;
6764 			case 10:
6765 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6766 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6767 
6768 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6769 				queue_dp = true;
6770 				DRM_DEBUG("IH: HPD_RX 5\n");
6771 
6772 				break;
6773 			case 11:
6774 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6775 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6776 
6777 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6778 				queue_dp = true;
6779 				DRM_DEBUG("IH: HPD_RX 6\n");
6780 
6781 				break;
6782 			default:
6783 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6784 				break;
6785 			}
6786 			break;
6787 		case 96:
6788 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6789 			WREG32(SRBM_INT_ACK, 0x1);
6790 			break;
6791 		case 124: /* UVD */
6792 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6793 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6794 			break;
6795 		case 146:
6796 		case 147:
6797 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6798 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6799 			/* reset addr and status */
6800 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6801 			if (addr == 0x0 && status == 0x0)
6802 				break;
6803 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6804 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6805 				addr);
6806 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6807 				status);
6808 			si_vm_decode_fault(rdev, status, addr);
6809 			break;
6810 		case 176: /* RINGID0 CP_INT */
6811 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6812 			break;
6813 		case 177: /* RINGID1 CP_INT */
6814 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6815 			break;
6816 		case 178: /* RINGID2 CP_INT */
6817 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6818 			break;
6819 		case 181: /* CP EOP event */
6820 			DRM_DEBUG("IH: CP EOP\n");
6821 			switch (ring_id) {
6822 			case 0:
6823 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6824 				break;
6825 			case 1:
6826 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6827 				break;
6828 			case 2:
6829 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6830 				break;
6831 			}
6832 			break;
6833 		case 224: /* DMA trap event */
6834 			DRM_DEBUG("IH: DMA trap\n");
6835 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6836 			break;
6837 		case 230: /* thermal low to high */
6838 			DRM_DEBUG("IH: thermal low to high\n");
6839 			rdev->pm.dpm.thermal.high_to_low = false;
6840 			queue_thermal = true;
6841 			break;
6842 		case 231: /* thermal high to low */
6843 			DRM_DEBUG("IH: thermal high to low\n");
6844 			rdev->pm.dpm.thermal.high_to_low = true;
6845 			queue_thermal = true;
6846 			break;
6847 		case 233: /* GUI IDLE */
6848 			DRM_DEBUG("IH: GUI idle\n");
6849 			break;
6850 		case 244: /* DMA trap event */
6851 			DRM_DEBUG("IH: DMA1 trap\n");
6852 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6853 			break;
6854 		default:
6855 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6856 			break;
6857 		}
6858 
6859 		/* wptr/rptr are in bytes! */
6860 		rptr += 16;
6861 		rptr &= rdev->ih.ptr_mask;
6862 		WREG32(IH_RB_RPTR, rptr);
6863 	}
6864 	if (queue_dp)
6865 		schedule_work(&rdev->dp_work);
6866 	if (queue_hotplug)
6867 		schedule_delayed_work(&rdev->hotplug_work, 0);
6868 	if (queue_thermal && rdev->pm.dpm_enabled)
6869 		schedule_work(&rdev->pm.dpm.thermal.work);
6870 	rdev->ih.rptr = rptr;
6871 	atomic_set(&rdev->ih.lock, 0);
6872 
6873 	/* make sure wptr hasn't changed while processing */
6874 	wptr = si_get_ih_wptr(rdev);
6875 	if (wptr != rptr)
6876 		goto restart_ih;
6877 
6878 	return IRQ_HANDLED;
6879 }
6880 
6881 /*
6882  * startup/shutdown callbacks
6883  */
6884 static int si_startup(struct radeon_device *rdev)
6885 {
6886 	struct radeon_ring *ring;
6887 	int r;
6888 
6889 	/* enable pcie gen2/3 link */
6890 	si_pcie_gen3_enable(rdev);
6891 	/* enable aspm */
6892 	si_program_aspm(rdev);
6893 
6894 	/* scratch needs to be initialized before MC */
6895 	r = r600_vram_scratch_init(rdev);
6896 	if (r)
6897 		return r;
6898 
6899 	si_mc_program(rdev);
6900 
6901 	if (!rdev->pm.dpm_enabled) {
6902 		r = si_mc_load_microcode(rdev);
6903 		if (r) {
6904 			DRM_ERROR("Failed to load MC firmware!\n");
6905 			return r;
6906 		}
6907 	}
6908 
6909 	r = si_pcie_gart_enable(rdev);
6910 	if (r)
6911 		return r;
6912 	si_gpu_init(rdev);
6913 
6914 	/* allocate rlc buffers */
6915 	if (rdev->family == CHIP_VERDE) {
6916 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6917 		rdev->rlc.reg_list_size =
6918 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6919 	}
6920 	rdev->rlc.cs_data = si_cs_data;
6921 	r = sumo_rlc_init(rdev);
6922 	if (r) {
6923 		DRM_ERROR("Failed to init rlc BOs!\n");
6924 		return r;
6925 	}
6926 
6927 	/* allocate wb buffer */
6928 	r = radeon_wb_init(rdev);
6929 	if (r)
6930 		return r;
6931 
6932 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6933 	if (r) {
6934 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6935 		return r;
6936 	}
6937 
6938 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6939 	if (r) {
6940 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6941 		return r;
6942 	}
6943 
6944 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6945 	if (r) {
6946 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6947 		return r;
6948 	}
6949 
6950 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6951 	if (r) {
6952 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6953 		return r;
6954 	}
6955 
6956 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6957 	if (r) {
6958 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6959 		return r;
6960 	}
6961 
6962 	if (rdev->has_uvd) {
6963 		r = uvd_v2_2_resume(rdev);
6964 		if (!r) {
6965 			r = radeon_fence_driver_start_ring(rdev,
6966 							   R600_RING_TYPE_UVD_INDEX);
6967 			if (r)
6968 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6969 		}
6970 		if (r)
6971 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6972 	}
6973 
6974 	r = radeon_vce_resume(rdev);
6975 	if (!r) {
6976 		r = vce_v1_0_resume(rdev);
6977 		if (!r)
6978 			r = radeon_fence_driver_start_ring(rdev,
6979 							   TN_RING_TYPE_VCE1_INDEX);
6980 		if (!r)
6981 			r = radeon_fence_driver_start_ring(rdev,
6982 							   TN_RING_TYPE_VCE2_INDEX);
6983 	}
6984 	if (r) {
6985 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
6986 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6987 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6988 	}
6989 
6990 	/* Enable IRQ */
6991 	if (!rdev->irq.installed) {
6992 		r = radeon_irq_kms_init(rdev);
6993 		if (r)
6994 			return r;
6995 	}
6996 
6997 	r = si_irq_init(rdev);
6998 	if (r) {
6999 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7000 		radeon_irq_kms_fini(rdev);
7001 		return r;
7002 	}
7003 	si_irq_set(rdev);
7004 
7005 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7006 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7007 			     RADEON_CP_PACKET2);
7008 	if (r)
7009 		return r;
7010 
7011 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7012 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7013 			     RADEON_CP_PACKET2);
7014 	if (r)
7015 		return r;
7016 
7017 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7018 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7019 			     RADEON_CP_PACKET2);
7020 	if (r)
7021 		return r;
7022 
7023 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7024 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7025 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7026 	if (r)
7027 		return r;
7028 
7029 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7030 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7031 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7032 	if (r)
7033 		return r;
7034 
7035 	r = si_cp_load_microcode(rdev);
7036 	if (r)
7037 		return r;
7038 	r = si_cp_resume(rdev);
7039 	if (r)
7040 		return r;
7041 
7042 	r = cayman_dma_resume(rdev);
7043 	if (r)
7044 		return r;
7045 
7046 	if (rdev->has_uvd) {
7047 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7048 		if (ring->ring_size) {
7049 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7050 					     RADEON_CP_PACKET2);
7051 			if (!r)
7052 				r = uvd_v1_0_init(rdev);
7053 			if (r)
7054 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7055 		}
7056 	}
7057 
7058 	r = -ENOENT;
7059 
7060 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7061 	if (ring->ring_size)
7062 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7063 				     VCE_CMD_NO_OP);
7064 
7065 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7066 	if (ring->ring_size)
7067 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7068 				     VCE_CMD_NO_OP);
7069 
7070 	if (!r)
7071 		r = vce_v1_0_init(rdev);
7072 	else if (r != -ENOENT)
7073 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
7074 
7075 	r = radeon_ib_pool_init(rdev);
7076 	if (r) {
7077 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7078 		return r;
7079 	}
7080 
7081 	r = radeon_vm_manager_init(rdev);
7082 	if (r) {
7083 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7084 		return r;
7085 	}
7086 
7087 	r = radeon_audio_init(rdev);
7088 	if (r)
7089 		return r;
7090 
7091 	return 0;
7092 }
7093 
7094 int si_resume(struct radeon_device *rdev)
7095 {
7096 	int r;
7097 
7098 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7099 	 * posting will perform necessary task to bring back GPU into good
7100 	 * shape.
7101 	 */
7102 	/* post card */
7103 	atom_asic_init(rdev->mode_info.atom_context);
7104 
7105 	/* init golden registers */
7106 	si_init_golden_registers(rdev);
7107 
7108 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7109 		radeon_pm_resume(rdev);
7110 
7111 	rdev->accel_working = true;
7112 	r = si_startup(rdev);
7113 	if (r) {
7114 		DRM_ERROR("si startup failed on resume\n");
7115 		rdev->accel_working = false;
7116 		return r;
7117 	}
7118 
7119 	return r;
7120 
7121 }
7122 
7123 int si_suspend(struct radeon_device *rdev)
7124 {
7125 	radeon_pm_suspend(rdev);
7126 	radeon_audio_fini(rdev);
7127 	radeon_vm_manager_fini(rdev);
7128 	si_cp_enable(rdev, false);
7129 	cayman_dma_stop(rdev);
7130 	if (rdev->has_uvd) {
7131 		uvd_v1_0_fini(rdev);
7132 		radeon_uvd_suspend(rdev);
7133 		radeon_vce_suspend(rdev);
7134 	}
7135 	si_fini_pg(rdev);
7136 	si_fini_cg(rdev);
7137 	si_irq_suspend(rdev);
7138 	radeon_wb_disable(rdev);
7139 	si_pcie_gart_disable(rdev);
7140 	return 0;
7141 }
7142 
7143 /* Plan is to move initialization in that function and use
7144  * helper function so that radeon_device_init pretty much
7145  * do nothing more than calling asic specific function. This
7146  * should also allow to remove a bunch of callback function
7147  * like vram_info.
7148  */
7149 int si_init(struct radeon_device *rdev)
7150 {
7151 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7152 	int r;
7153 
7154 	/* Read BIOS */
7155 	if (!radeon_get_bios(rdev)) {
7156 		if (ASIC_IS_AVIVO(rdev))
7157 			return -EINVAL;
7158 	}
7159 	/* Must be an ATOMBIOS */
7160 	if (!rdev->is_atom_bios) {
7161 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7162 		return -EINVAL;
7163 	}
7164 	r = radeon_atombios_init(rdev);
7165 	if (r)
7166 		return r;
7167 
7168 	/* Post card if necessary */
7169 	if (!radeon_card_posted(rdev)) {
7170 		if (!rdev->bios) {
7171 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7172 			return -EINVAL;
7173 		}
7174 		DRM_INFO("GPU not posted. posting now...\n");
7175 		atom_asic_init(rdev->mode_info.atom_context);
7176 	}
7177 	/* init golden registers */
7178 	si_init_golden_registers(rdev);
7179 	/* Initialize scratch registers */
7180 	si_scratch_init(rdev);
7181 	/* Initialize surface registers */
7182 	radeon_surface_init(rdev);
7183 	/* Initialize clocks */
7184 	radeon_get_clock_info(rdev->ddev);
7185 
7186 	/* Fence driver */
7187 	r = radeon_fence_driver_init(rdev);
7188 	if (r)
7189 		return r;
7190 
7191 	/* initialize memory controller */
7192 	r = si_mc_init(rdev);
7193 	if (r)
7194 		return r;
7195 	/* Memory manager */
7196 	r = radeon_bo_init(rdev);
7197 	if (r)
7198 		return r;
7199 
7200 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7201 	    !rdev->rlc_fw || !rdev->mc_fw) {
7202 		r = si_init_microcode(rdev);
7203 		if (r) {
7204 			DRM_ERROR("Failed to load firmware!\n");
7205 			return r;
7206 		}
7207 	}
7208 
7209 	/* Initialize power management */
7210 	radeon_pm_init(rdev);
7211 
7212 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7213 	ring->ring_obj = NULL;
7214 	r600_ring_init(rdev, ring, 1024 * 1024);
7215 
7216 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7217 	ring->ring_obj = NULL;
7218 	r600_ring_init(rdev, ring, 1024 * 1024);
7219 
7220 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7221 	ring->ring_obj = NULL;
7222 	r600_ring_init(rdev, ring, 1024 * 1024);
7223 
7224 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7225 	ring->ring_obj = NULL;
7226 	r600_ring_init(rdev, ring, 64 * 1024);
7227 
7228 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7229 	ring->ring_obj = NULL;
7230 	r600_ring_init(rdev, ring, 64 * 1024);
7231 
7232 	if (rdev->has_uvd) {
7233 		r = radeon_uvd_init(rdev);
7234 		if (!r) {
7235 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7236 			ring->ring_obj = NULL;
7237 			r600_ring_init(rdev, ring, 4096);
7238 		}
7239 	}
7240 
7241 	r = radeon_vce_init(rdev);
7242 	if (!r) {
7243 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7244 		ring->ring_obj = NULL;
7245 		r600_ring_init(rdev, ring, 4096);
7246 
7247 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7248 		ring->ring_obj = NULL;
7249 		r600_ring_init(rdev, ring, 4096);
7250 	}
7251 
7252 	rdev->ih.ring_obj = NULL;
7253 	r600_ih_ring_init(rdev, 64 * 1024);
7254 
7255 	r = r600_pcie_gart_init(rdev);
7256 	if (r)
7257 		return r;
7258 
7259 	rdev->accel_working = true;
7260 	r = si_startup(rdev);
7261 	if (r) {
7262 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7263 		si_cp_fini(rdev);
7264 		cayman_dma_fini(rdev);
7265 		si_irq_fini(rdev);
7266 		sumo_rlc_fini(rdev);
7267 		radeon_wb_fini(rdev);
7268 		radeon_ib_pool_fini(rdev);
7269 		radeon_vm_manager_fini(rdev);
7270 		radeon_irq_kms_fini(rdev);
7271 		si_pcie_gart_fini(rdev);
7272 		rdev->accel_working = false;
7273 	}
7274 
7275 	/* Don't start up if the MC ucode is missing.
7276 	 * The default clocks and voltages before the MC ucode
7277 	 * is loaded are not suffient for advanced operations.
7278 	 */
7279 	if (!rdev->mc_fw) {
7280 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7281 		return -EINVAL;
7282 	}
7283 
7284 	return 0;
7285 }
7286 
7287 void si_fini(struct radeon_device *rdev)
7288 {
7289 	radeon_pm_fini(rdev);
7290 	si_cp_fini(rdev);
7291 	cayman_dma_fini(rdev);
7292 	si_fini_pg(rdev);
7293 	si_fini_cg(rdev);
7294 	si_irq_fini(rdev);
7295 	sumo_rlc_fini(rdev);
7296 	radeon_wb_fini(rdev);
7297 	radeon_vm_manager_fini(rdev);
7298 	radeon_ib_pool_fini(rdev);
7299 	radeon_irq_kms_fini(rdev);
7300 	if (rdev->has_uvd) {
7301 		uvd_v1_0_fini(rdev);
7302 		radeon_uvd_fini(rdev);
7303 		radeon_vce_fini(rdev);
7304 	}
7305 	si_pcie_gart_fini(rdev);
7306 	r600_vram_scratch_fini(rdev);
7307 	radeon_gem_fini(rdev);
7308 	radeon_fence_driver_fini(rdev);
7309 	radeon_bo_fini(rdev);
7310 	radeon_atombios_fini(rdev);
7311 	si_fini_microcode(rdev);
7312 	kfree(rdev->bios);
7313 	rdev->bios = NULL;
7314 }
7315 
7316 /**
7317  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7318  *
7319  * @rdev: radeon_device pointer
7320  *
7321  * Fetches a GPU clock counter snapshot (SI).
7322  * Returns the 64 bit clock counter snapshot.
7323  */
7324 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7325 {
7326 	uint64_t clock;
7327 
7328 	mutex_lock(&rdev->gpu_clock_mutex);
7329 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7330 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7331 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7332 	mutex_unlock(&rdev->gpu_clock_mutex);
7333 	return clock;
7334 }
7335 
7336 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7337 {
7338 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7339 	int r;
7340 
7341 	/* bypass vclk and dclk with bclk */
7342 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7343 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7344 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7345 
7346 	/* put PLL in bypass mode */
7347 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7348 
7349 	if (!vclk || !dclk) {
7350 		/* keep the Bypass mode */
7351 		return 0;
7352 	}
7353 
7354 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7355 					  16384, 0x03FFFFFF, 0, 128, 5,
7356 					  &fb_div, &vclk_div, &dclk_div);
7357 	if (r)
7358 		return r;
7359 
7360 	/* set RESET_ANTI_MUX to 0 */
7361 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7362 
7363 	/* set VCO_MODE to 1 */
7364 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7365 
7366 	/* disable sleep mode */
7367 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7368 
7369 	/* deassert UPLL_RESET */
7370 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7371 
7372 	mdelay(1);
7373 
7374 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7375 	if (r)
7376 		return r;
7377 
7378 	/* assert UPLL_RESET again */
7379 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7380 
7381 	/* disable spread spectrum. */
7382 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7383 
7384 	/* set feedback divider */
7385 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7386 
7387 	/* set ref divider to 0 */
7388 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7389 
7390 	if (fb_div < 307200)
7391 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7392 	else
7393 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7394 
7395 	/* set PDIV_A and PDIV_B */
7396 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7397 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7398 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7399 
7400 	/* give the PLL some time to settle */
7401 	mdelay(15);
7402 
7403 	/* deassert PLL_RESET */
7404 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7405 
7406 	mdelay(15);
7407 
7408 	/* switch from bypass mode to normal mode */
7409 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7410 
7411 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7412 	if (r)
7413 		return r;
7414 
7415 	/* switch VCLK and DCLK selection */
7416 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7417 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7418 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7419 
7420 	mdelay(100);
7421 
7422 	return 0;
7423 }
7424 
7425 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7426 {
7427 	struct pci_dev *root = rdev->pdev->bus->self;
7428 	int bridge_pos, gpu_pos;
7429 	u32 speed_cntl, mask, current_data_rate;
7430 	int ret, i;
7431 	u16 tmp16;
7432 
7433 #if 0
7434 	if (pci_is_root_bus(rdev->pdev->bus))
7435 		return;
7436 #endif
7437 
7438 	if (radeon_pcie_gen2 == 0)
7439 		return;
7440 
7441 	if (rdev->flags & RADEON_IS_IGP)
7442 		return;
7443 
7444 	if (!(rdev->flags & RADEON_IS_PCIE))
7445 		return;
7446 
7447 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7448 	if (ret != 0)
7449 		return;
7450 
7451 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7452 		return;
7453 
7454 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7455 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7456 		LC_CURRENT_DATA_RATE_SHIFT;
7457 	if (mask & DRM_PCIE_SPEED_80) {
7458 		if (current_data_rate == 2) {
7459 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7460 			return;
7461 		}
7462 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7463 	} else if (mask & DRM_PCIE_SPEED_50) {
7464 		if (current_data_rate == 1) {
7465 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7466 			return;
7467 		}
7468 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7469 	}
7470 
7471 	bridge_pos = pci_pcie_cap(root);
7472 	if (!bridge_pos)
7473 		return;
7474 
7475 	gpu_pos = pci_pcie_cap(rdev->pdev);
7476 	if (!gpu_pos)
7477 		return;
7478 
7479 	if (mask & DRM_PCIE_SPEED_80) {
7480 		/* re-try equalization if gen3 is not already enabled */
7481 		if (current_data_rate != 2) {
7482 			u16 bridge_cfg, gpu_cfg;
7483 			u16 bridge_cfg2, gpu_cfg2;
7484 			u32 max_lw, current_lw, tmp;
7485 
7486 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7487 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7488 
7489 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7490 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7491 
7492 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7493 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7494 
7495 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7496 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7497 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7498 
7499 			if (current_lw < max_lw) {
7500 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7501 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7502 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7503 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7504 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7505 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7506 				}
7507 			}
7508 
7509 			for (i = 0; i < 10; i++) {
7510 				/* check status */
7511 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7512 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7513 					break;
7514 
7515 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7516 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7517 
7518 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7519 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7520 
7521 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7522 				tmp |= LC_SET_QUIESCE;
7523 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7524 
7525 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7526 				tmp |= LC_REDO_EQ;
7527 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7528 
7529 				mdelay(100);
7530 
7531 				/* linkctl */
7532 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7533 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7534 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7535 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7536 
7537 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7538 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7539 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7540 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7541 
7542 				/* linkctl2 */
7543 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7544 				tmp16 &= ~((1 << 4) | (7 << 9));
7545 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7546 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7547 
7548 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7549 				tmp16 &= ~((1 << 4) | (7 << 9));
7550 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7551 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7552 
7553 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7554 				tmp &= ~LC_SET_QUIESCE;
7555 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7556 			}
7557 		}
7558 	}
7559 
7560 	/* set the link speed */
7561 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7562 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7563 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7564 
7565 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7566 	tmp16 &= ~0xf;
7567 	if (mask & DRM_PCIE_SPEED_80)
7568 		tmp16 |= 3; /* gen3 */
7569 	else if (mask & DRM_PCIE_SPEED_50)
7570 		tmp16 |= 2; /* gen2 */
7571 	else
7572 		tmp16 |= 1; /* gen1 */
7573 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7574 
7575 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7576 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7577 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7578 
7579 	for (i = 0; i < rdev->usec_timeout; i++) {
7580 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7581 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7582 			break;
7583 		udelay(1);
7584 	}
7585 }
7586 
7587 static void si_program_aspm(struct radeon_device *rdev)
7588 {
7589 	u32 data, orig;
7590 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7591 #if 0
7592 	bool disable_clkreq = false;
7593 #endif
7594 
7595 	if (radeon_aspm == 0)
7596 		return;
7597 
7598 	if (!(rdev->flags & RADEON_IS_PCIE))
7599 		return;
7600 
7601 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7602 	data &= ~LC_XMIT_N_FTS_MASK;
7603 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7604 	if (orig != data)
7605 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7606 
7607 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7608 	data |= LC_GO_TO_RECOVERY;
7609 	if (orig != data)
7610 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7611 
7612 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7613 	data |= P_IGNORE_EDB_ERR;
7614 	if (orig != data)
7615 		WREG32_PCIE(PCIE_P_CNTL, data);
7616 
7617 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7618 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7619 	data |= LC_PMI_TO_L1_DIS;
7620 	if (!disable_l0s)
7621 		data |= LC_L0S_INACTIVITY(7);
7622 
7623 	if (!disable_l1) {
7624 		data |= LC_L1_INACTIVITY(7);
7625 		data &= ~LC_PMI_TO_L1_DIS;
7626 		if (orig != data)
7627 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7628 
7629 		if (!disable_plloff_in_l1) {
7630 			bool clk_req_support;
7631 
7632 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7633 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7634 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7635 			if (orig != data)
7636 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7637 
7638 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7639 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7640 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7641 			if (orig != data)
7642 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7643 
7644 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7645 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7646 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7647 			if (orig != data)
7648 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7649 
7650 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7651 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7652 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7653 			if (orig != data)
7654 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7655 
7656 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7657 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7658 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7659 				if (orig != data)
7660 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7661 
7662 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7663 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7664 				if (orig != data)
7665 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7666 
7667 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7668 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7669 				if (orig != data)
7670 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7671 
7672 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7673 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7674 				if (orig != data)
7675 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7676 
7677 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7678 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7679 				if (orig != data)
7680 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7681 
7682 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7683 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7684 				if (orig != data)
7685 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7686 
7687 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7688 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7689 				if (orig != data)
7690 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7691 
7692 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7693 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7694 				if (orig != data)
7695 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7696 			}
7697 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7698 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7699 			data |= LC_DYN_LANES_PWR_STATE(3);
7700 			if (orig != data)
7701 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7702 
7703 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7704 			data &= ~LS2_EXIT_TIME_MASK;
7705 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7706 				data |= LS2_EXIT_TIME(5);
7707 			if (orig != data)
7708 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7709 
7710 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7711 			data &= ~LS2_EXIT_TIME_MASK;
7712 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7713 				data |= LS2_EXIT_TIME(5);
7714 			if (orig != data)
7715 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7716 
7717 #ifdef zMN_TODO
7718 			if (!disable_clkreq &&
7719 			    !pci_is_root_bus(rdev->pdev->bus)) {
7720 				struct pci_dev *root = rdev->pdev->bus->self;
7721 				u32 lnkcap;
7722 
7723 				clk_req_support = false;
7724 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7725 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7726 					clk_req_support = true;
7727 			} else {
7728 				clk_req_support = false;
7729 			}
7730 #else
7731 			clk_req_support = false;
7732 #endif
7733 
7734 			if (clk_req_support) {
7735 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7736 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7737 				if (orig != data)
7738 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7739 
7740 				orig = data = RREG32(THM_CLK_CNTL);
7741 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7742 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7743 				if (orig != data)
7744 					WREG32(THM_CLK_CNTL, data);
7745 
7746 				orig = data = RREG32(MISC_CLK_CNTL);
7747 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7748 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7749 				if (orig != data)
7750 					WREG32(MISC_CLK_CNTL, data);
7751 
7752 				orig = data = RREG32(CG_CLKPIN_CNTL);
7753 				data &= ~BCLK_AS_XCLK;
7754 				if (orig != data)
7755 					WREG32(CG_CLKPIN_CNTL, data);
7756 
7757 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7758 				data &= ~FORCE_BIF_REFCLK_EN;
7759 				if (orig != data)
7760 					WREG32(CG_CLKPIN_CNTL_2, data);
7761 
7762 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7763 				data &= ~MPLL_CLKOUT_SEL_MASK;
7764 				data |= MPLL_CLKOUT_SEL(4);
7765 				if (orig != data)
7766 					WREG32(MPLL_BYPASSCLK_SEL, data);
7767 
7768 				orig = data = RREG32(SPLL_CNTL_MODE);
7769 				data &= ~SPLL_REFCLK_SEL_MASK;
7770 				if (orig != data)
7771 					WREG32(SPLL_CNTL_MODE, data);
7772 			}
7773 		}
7774 	} else {
7775 		if (orig != data)
7776 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7777 	}
7778 
7779 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7780 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7781 	if (orig != data)
7782 		WREG32_PCIE(PCIE_CNTL2, data);
7783 
7784 	if (!disable_l0s) {
7785 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7786 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7787 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7788 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7789 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7790 				data &= ~LC_L0S_INACTIVITY_MASK;
7791 				if (orig != data)
7792 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7793 			}
7794 		}
7795 	}
7796 }
7797 
7798 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7799 {
7800         unsigned i;
7801 
7802         /* make sure VCEPLL_CTLREQ is deasserted */
7803         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7804 
7805         mdelay(10);
7806 
7807         /* assert UPLL_CTLREQ */
7808         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7809 
7810         /* wait for CTLACK and CTLACK2 to get asserted */
7811         for (i = 0; i < 100; ++i) {
7812                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7813                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7814                         break;
7815                 mdelay(10);
7816         }
7817 
7818         /* deassert UPLL_CTLREQ */
7819         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7820 
7821         if (i == 100) {
7822                 DRM_ERROR("Timeout setting UVD clocks!\n");
7823                 return -ETIMEDOUT;
7824         }
7825 
7826         return 0;
7827 }
7828 
7829 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7830 {
7831 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7832 	int r;
7833 
7834 	/* bypass evclk and ecclk with bclk */
7835 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7836 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7837 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7838 
7839 	/* put PLL in bypass mode */
7840 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7841 		     ~VCEPLL_BYPASS_EN_MASK);
7842 
7843 	if (!evclk || !ecclk) {
7844 		/* keep the Bypass mode, put PLL to sleep */
7845 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7846 			     ~VCEPLL_SLEEP_MASK);
7847 		return 0;
7848 	}
7849 
7850 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7851 					  16384, 0x03FFFFFF, 0, 128, 5,
7852 					  &fb_div, &evclk_div, &ecclk_div);
7853 	if (r)
7854 		return r;
7855 
7856 	/* set RESET_ANTI_MUX to 0 */
7857 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7858 
7859 	/* set VCO_MODE to 1 */
7860 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7861 		     ~VCEPLL_VCO_MODE_MASK);
7862 
7863 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7864 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7865 		     ~VCEPLL_SLEEP_MASK);
7866 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7867 
7868 	/* deassert VCEPLL_RESET */
7869 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7870 
7871 	mdelay(1);
7872 
7873 	r = si_vce_send_vcepll_ctlreq(rdev);
7874 	if (r)
7875 		return r;
7876 
7877 	/* assert VCEPLL_RESET again */
7878 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7879 
7880 	/* disable spread spectrum. */
7881 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7882 
7883 	/* set feedback divider */
7884 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7885 
7886 	/* set ref divider to 0 */
7887 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7888 
7889 	/* set PDIV_A and PDIV_B */
7890 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7891 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7892 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7893 
7894 	/* give the PLL some time to settle */
7895 	mdelay(15);
7896 
7897 	/* deassert PLL_RESET */
7898 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7899 
7900 	mdelay(15);
7901 
7902 	/* switch from bypass mode to normal mode */
7903 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7904 
7905 	r = si_vce_send_vcepll_ctlreq(rdev);
7906 	if (r)
7907 		return r;
7908 
7909 	/* switch VCLK and DCLK selection */
7910 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7911 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7912 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7913 
7914 	mdelay(100);
7915 
7916 	return 0;
7917 }
7918