1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include <drm/drmP.h> 27 #include "radeon.h" 28 #include "radeon_asic.h" 29 #include "cikd.h" 30 #include "atom.h" 31 #include "cik_blit_shaders.h" 32 #include "radeon_ucode.h" 33 #include "clearstate_ci.h" 34 35 #define PCI_EXP_LNKCTL PCIER_LINKCTRL /* 16 */ 36 #define PCI_EXP_LNKCTL2 48 37 #define PCI_EXP_LNKCTL_HAWD PCIEM_LNKCTL_HAWD /* 0x0200 */ 38 #define PCI_EXP_DEVSTA PCIER_DEVSTS /* 10 */ 39 #define PCI_EXP_DEVSTA_TRPND 0x0020 40 #define PCI_EXP_LNKCAP_CLKPM 0x00040000 41 42 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 43 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); 44 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin"); 45 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 46 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 47 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 48 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 49 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin"); 50 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 51 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 52 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 53 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 54 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 55 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 56 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 57 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 58 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 59 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 60 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 61 MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 62 63 static void cik_rlc_stop(struct radeon_device *rdev); 64 static void cik_pcie_gen3_enable(struct radeon_device *rdev); 65 static void cik_program_aspm(struct radeon_device *rdev); 66 static void cik_init_pg(struct radeon_device *rdev); 67 static void cik_init_cg(struct radeon_device *rdev); 68 static void cik_fini_pg(struct radeon_device *rdev); 69 static void cik_fini_cg(struct radeon_device *rdev); 70 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 71 bool enable); 72 73 /* get temperature in millidegrees */ 74 int ci_get_temp(struct radeon_device *rdev) 75 { 76 u32 temp; 77 int actual_temp = 0; 78 79 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> 80 CTF_TEMP_SHIFT; 81 82 if (temp & 0x200) 83 actual_temp = 255; 84 else 85 actual_temp = temp & 0x1ff; 86 87 actual_temp = actual_temp * 1000; 88 89 return actual_temp; 90 } 91 92 /* get temperature in millidegrees */ 93 int kv_get_temp(struct radeon_device *rdev) 94 { 95 u32 temp; 96 int actual_temp = 0; 97 98 temp = RREG32_SMC(0xC0300E0C); 99 100 if (temp) 101 actual_temp = (temp / 8) - 49; 102 else 103 actual_temp = 0; 104 105 actual_temp = actual_temp * 1000; 106 107 return actual_temp; 108 } 109 110 /* 111 * Indirect registers accessor 112 */ 113 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) 114 { 115 u32 r; 116 117 WREG32(PCIE_INDEX, reg); 118 (void)RREG32(PCIE_INDEX); 119 r = RREG32(PCIE_DATA); 120 return r; 121 } 122 123 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) 124 { 125 WREG32(PCIE_INDEX, reg); 126 (void)RREG32(PCIE_INDEX); 127 WREG32(PCIE_DATA, v); 128 (void)RREG32(PCIE_DATA); 129 } 130 131 static const u32 spectre_rlc_save_restore_register_list[] = 132 { 133 (0x0e00 << 16) | (0xc12c >> 2), 134 0x00000000, 135 (0x0e00 << 16) | (0xc140 >> 2), 136 0x00000000, 137 (0x0e00 << 16) | (0xc150 >> 2), 138 0x00000000, 139 (0x0e00 << 16) | (0xc15c >> 2), 140 0x00000000, 141 (0x0e00 << 16) | (0xc168 >> 2), 142 0x00000000, 143 (0x0e00 << 16) | (0xc170 >> 2), 144 0x00000000, 145 (0x0e00 << 16) | (0xc178 >> 2), 146 0x00000000, 147 (0x0e00 << 16) | (0xc204 >> 2), 148 0x00000000, 149 (0x0e00 << 16) | (0xc2b4 >> 2), 150 0x00000000, 151 (0x0e00 << 16) | (0xc2b8 >> 2), 152 0x00000000, 153 (0x0e00 << 16) | (0xc2bc >> 2), 154 0x00000000, 155 (0x0e00 << 16) | (0xc2c0 >> 2), 156 0x00000000, 157 (0x0e00 << 16) | (0x8228 >> 2), 158 0x00000000, 159 (0x0e00 << 16) | (0x829c >> 2), 160 0x00000000, 161 (0x0e00 << 16) | (0x869c >> 2), 162 0x00000000, 163 (0x0600 << 16) | (0x98f4 >> 2), 164 0x00000000, 165 (0x0e00 << 16) | (0x98f8 >> 2), 166 0x00000000, 167 (0x0e00 << 16) | (0x9900 >> 2), 168 0x00000000, 169 (0x0e00 << 16) | (0xc260 >> 2), 170 0x00000000, 171 (0x0e00 << 16) | (0x90e8 >> 2), 172 0x00000000, 173 (0x0e00 << 16) | (0x3c000 >> 2), 174 0x00000000, 175 (0x0e00 << 16) | (0x3c00c >> 2), 176 0x00000000, 177 (0x0e00 << 16) | (0x8c1c >> 2), 178 0x00000000, 179 (0x0e00 << 16) | (0x9700 >> 2), 180 0x00000000, 181 (0x0e00 << 16) | (0xcd20 >> 2), 182 0x00000000, 183 (0x4e00 << 16) | (0xcd20 >> 2), 184 0x00000000, 185 (0x5e00 << 16) | (0xcd20 >> 2), 186 0x00000000, 187 (0x6e00 << 16) | (0xcd20 >> 2), 188 0x00000000, 189 (0x7e00 << 16) | (0xcd20 >> 2), 190 0x00000000, 191 (0x8e00 << 16) | (0xcd20 >> 2), 192 0x00000000, 193 (0x9e00 << 16) | (0xcd20 >> 2), 194 0x00000000, 195 (0xae00 << 16) | (0xcd20 >> 2), 196 0x00000000, 197 (0xbe00 << 16) | (0xcd20 >> 2), 198 0x00000000, 199 (0x0e00 << 16) | (0x89bc >> 2), 200 0x00000000, 201 (0x0e00 << 16) | (0x8900 >> 2), 202 0x00000000, 203 0x3, 204 (0x0e00 << 16) | (0xc130 >> 2), 205 0x00000000, 206 (0x0e00 << 16) | (0xc134 >> 2), 207 0x00000000, 208 (0x0e00 << 16) | (0xc1fc >> 2), 209 0x00000000, 210 (0x0e00 << 16) | (0xc208 >> 2), 211 0x00000000, 212 (0x0e00 << 16) | (0xc264 >> 2), 213 0x00000000, 214 (0x0e00 << 16) | (0xc268 >> 2), 215 0x00000000, 216 (0x0e00 << 16) | (0xc26c >> 2), 217 0x00000000, 218 (0x0e00 << 16) | (0xc270 >> 2), 219 0x00000000, 220 (0x0e00 << 16) | (0xc274 >> 2), 221 0x00000000, 222 (0x0e00 << 16) | (0xc278 >> 2), 223 0x00000000, 224 (0x0e00 << 16) | (0xc27c >> 2), 225 0x00000000, 226 (0x0e00 << 16) | (0xc280 >> 2), 227 0x00000000, 228 (0x0e00 << 16) | (0xc284 >> 2), 229 0x00000000, 230 (0x0e00 << 16) | (0xc288 >> 2), 231 0x00000000, 232 (0x0e00 << 16) | (0xc28c >> 2), 233 0x00000000, 234 (0x0e00 << 16) | (0xc290 >> 2), 235 0x00000000, 236 (0x0e00 << 16) | (0xc294 >> 2), 237 0x00000000, 238 (0x0e00 << 16) | (0xc298 >> 2), 239 0x00000000, 240 (0x0e00 << 16) | (0xc29c >> 2), 241 0x00000000, 242 (0x0e00 << 16) | (0xc2a0 >> 2), 243 0x00000000, 244 (0x0e00 << 16) | (0xc2a4 >> 2), 245 0x00000000, 246 (0x0e00 << 16) | (0xc2a8 >> 2), 247 0x00000000, 248 (0x0e00 << 16) | (0xc2ac >> 2), 249 0x00000000, 250 (0x0e00 << 16) | (0xc2b0 >> 2), 251 0x00000000, 252 (0x0e00 << 16) | (0x301d0 >> 2), 253 0x00000000, 254 (0x0e00 << 16) | (0x30238 >> 2), 255 0x00000000, 256 (0x0e00 << 16) | (0x30250 >> 2), 257 0x00000000, 258 (0x0e00 << 16) | (0x30254 >> 2), 259 0x00000000, 260 (0x0e00 << 16) | (0x30258 >> 2), 261 0x00000000, 262 (0x0e00 << 16) | (0x3025c >> 2), 263 0x00000000, 264 (0x4e00 << 16) | (0xc900 >> 2), 265 0x00000000, 266 (0x5e00 << 16) | (0xc900 >> 2), 267 0x00000000, 268 (0x6e00 << 16) | (0xc900 >> 2), 269 0x00000000, 270 (0x7e00 << 16) | (0xc900 >> 2), 271 0x00000000, 272 (0x8e00 << 16) | (0xc900 >> 2), 273 0x00000000, 274 (0x9e00 << 16) | (0xc900 >> 2), 275 0x00000000, 276 (0xae00 << 16) | (0xc900 >> 2), 277 0x00000000, 278 (0xbe00 << 16) | (0xc900 >> 2), 279 0x00000000, 280 (0x4e00 << 16) | (0xc904 >> 2), 281 0x00000000, 282 (0x5e00 << 16) | (0xc904 >> 2), 283 0x00000000, 284 (0x6e00 << 16) | (0xc904 >> 2), 285 0x00000000, 286 (0x7e00 << 16) | (0xc904 >> 2), 287 0x00000000, 288 (0x8e00 << 16) | (0xc904 >> 2), 289 0x00000000, 290 (0x9e00 << 16) | (0xc904 >> 2), 291 0x00000000, 292 (0xae00 << 16) | (0xc904 >> 2), 293 0x00000000, 294 (0xbe00 << 16) | (0xc904 >> 2), 295 0x00000000, 296 (0x4e00 << 16) | (0xc908 >> 2), 297 0x00000000, 298 (0x5e00 << 16) | (0xc908 >> 2), 299 0x00000000, 300 (0x6e00 << 16) | (0xc908 >> 2), 301 0x00000000, 302 (0x7e00 << 16) | (0xc908 >> 2), 303 0x00000000, 304 (0x8e00 << 16) | (0xc908 >> 2), 305 0x00000000, 306 (0x9e00 << 16) | (0xc908 >> 2), 307 0x00000000, 308 (0xae00 << 16) | (0xc908 >> 2), 309 0x00000000, 310 (0xbe00 << 16) | (0xc908 >> 2), 311 0x00000000, 312 (0x4e00 << 16) | (0xc90c >> 2), 313 0x00000000, 314 (0x5e00 << 16) | (0xc90c >> 2), 315 0x00000000, 316 (0x6e00 << 16) | (0xc90c >> 2), 317 0x00000000, 318 (0x7e00 << 16) | (0xc90c >> 2), 319 0x00000000, 320 (0x8e00 << 16) | (0xc90c >> 2), 321 0x00000000, 322 (0x9e00 << 16) | (0xc90c >> 2), 323 0x00000000, 324 (0xae00 << 16) | (0xc90c >> 2), 325 0x00000000, 326 (0xbe00 << 16) | (0xc90c >> 2), 327 0x00000000, 328 (0x4e00 << 16) | (0xc910 >> 2), 329 0x00000000, 330 (0x5e00 << 16) | (0xc910 >> 2), 331 0x00000000, 332 (0x6e00 << 16) | (0xc910 >> 2), 333 0x00000000, 334 (0x7e00 << 16) | (0xc910 >> 2), 335 0x00000000, 336 (0x8e00 << 16) | (0xc910 >> 2), 337 0x00000000, 338 (0x9e00 << 16) | (0xc910 >> 2), 339 0x00000000, 340 (0xae00 << 16) | (0xc910 >> 2), 341 0x00000000, 342 (0xbe00 << 16) | (0xc910 >> 2), 343 0x00000000, 344 (0x0e00 << 16) | (0xc99c >> 2), 345 0x00000000, 346 (0x0e00 << 16) | (0x9834 >> 2), 347 0x00000000, 348 (0x0000 << 16) | (0x30f00 >> 2), 349 0x00000000, 350 (0x0001 << 16) | (0x30f00 >> 2), 351 0x00000000, 352 (0x0000 << 16) | (0x30f04 >> 2), 353 0x00000000, 354 (0x0001 << 16) | (0x30f04 >> 2), 355 0x00000000, 356 (0x0000 << 16) | (0x30f08 >> 2), 357 0x00000000, 358 (0x0001 << 16) | (0x30f08 >> 2), 359 0x00000000, 360 (0x0000 << 16) | (0x30f0c >> 2), 361 0x00000000, 362 (0x0001 << 16) | (0x30f0c >> 2), 363 0x00000000, 364 (0x0600 << 16) | (0x9b7c >> 2), 365 0x00000000, 366 (0x0e00 << 16) | (0x8a14 >> 2), 367 0x00000000, 368 (0x0e00 << 16) | (0x8a18 >> 2), 369 0x00000000, 370 (0x0600 << 16) | (0x30a00 >> 2), 371 0x00000000, 372 (0x0e00 << 16) | (0x8bf0 >> 2), 373 0x00000000, 374 (0x0e00 << 16) | (0x8bcc >> 2), 375 0x00000000, 376 (0x0e00 << 16) | (0x8b24 >> 2), 377 0x00000000, 378 (0x0e00 << 16) | (0x30a04 >> 2), 379 0x00000000, 380 (0x0600 << 16) | (0x30a10 >> 2), 381 0x00000000, 382 (0x0600 << 16) | (0x30a14 >> 2), 383 0x00000000, 384 (0x0600 << 16) | (0x30a18 >> 2), 385 0x00000000, 386 (0x0600 << 16) | (0x30a2c >> 2), 387 0x00000000, 388 (0x0e00 << 16) | (0xc700 >> 2), 389 0x00000000, 390 (0x0e00 << 16) | (0xc704 >> 2), 391 0x00000000, 392 (0x0e00 << 16) | (0xc708 >> 2), 393 0x00000000, 394 (0x0e00 << 16) | (0xc768 >> 2), 395 0x00000000, 396 (0x0400 << 16) | (0xc770 >> 2), 397 0x00000000, 398 (0x0400 << 16) | (0xc774 >> 2), 399 0x00000000, 400 (0x0400 << 16) | (0xc778 >> 2), 401 0x00000000, 402 (0x0400 << 16) | (0xc77c >> 2), 403 0x00000000, 404 (0x0400 << 16) | (0xc780 >> 2), 405 0x00000000, 406 (0x0400 << 16) | (0xc784 >> 2), 407 0x00000000, 408 (0x0400 << 16) | (0xc788 >> 2), 409 0x00000000, 410 (0x0400 << 16) | (0xc78c >> 2), 411 0x00000000, 412 (0x0400 << 16) | (0xc798 >> 2), 413 0x00000000, 414 (0x0400 << 16) | (0xc79c >> 2), 415 0x00000000, 416 (0x0400 << 16) | (0xc7a0 >> 2), 417 0x00000000, 418 (0x0400 << 16) | (0xc7a4 >> 2), 419 0x00000000, 420 (0x0400 << 16) | (0xc7a8 >> 2), 421 0x00000000, 422 (0x0400 << 16) | (0xc7ac >> 2), 423 0x00000000, 424 (0x0400 << 16) | (0xc7b0 >> 2), 425 0x00000000, 426 (0x0400 << 16) | (0xc7b4 >> 2), 427 0x00000000, 428 (0x0e00 << 16) | (0x9100 >> 2), 429 0x00000000, 430 (0x0e00 << 16) | (0x3c010 >> 2), 431 0x00000000, 432 (0x0e00 << 16) | (0x92a8 >> 2), 433 0x00000000, 434 (0x0e00 << 16) | (0x92ac >> 2), 435 0x00000000, 436 (0x0e00 << 16) | (0x92b4 >> 2), 437 0x00000000, 438 (0x0e00 << 16) | (0x92b8 >> 2), 439 0x00000000, 440 (0x0e00 << 16) | (0x92bc >> 2), 441 0x00000000, 442 (0x0e00 << 16) | (0x92c0 >> 2), 443 0x00000000, 444 (0x0e00 << 16) | (0x92c4 >> 2), 445 0x00000000, 446 (0x0e00 << 16) | (0x92c8 >> 2), 447 0x00000000, 448 (0x0e00 << 16) | (0x92cc >> 2), 449 0x00000000, 450 (0x0e00 << 16) | (0x92d0 >> 2), 451 0x00000000, 452 (0x0e00 << 16) | (0x8c00 >> 2), 453 0x00000000, 454 (0x0e00 << 16) | (0x8c04 >> 2), 455 0x00000000, 456 (0x0e00 << 16) | (0x8c20 >> 2), 457 0x00000000, 458 (0x0e00 << 16) | (0x8c38 >> 2), 459 0x00000000, 460 (0x0e00 << 16) | (0x8c3c >> 2), 461 0x00000000, 462 (0x0e00 << 16) | (0xae00 >> 2), 463 0x00000000, 464 (0x0e00 << 16) | (0x9604 >> 2), 465 0x00000000, 466 (0x0e00 << 16) | (0xac08 >> 2), 467 0x00000000, 468 (0x0e00 << 16) | (0xac0c >> 2), 469 0x00000000, 470 (0x0e00 << 16) | (0xac10 >> 2), 471 0x00000000, 472 (0x0e00 << 16) | (0xac14 >> 2), 473 0x00000000, 474 (0x0e00 << 16) | (0xac58 >> 2), 475 0x00000000, 476 (0x0e00 << 16) | (0xac68 >> 2), 477 0x00000000, 478 (0x0e00 << 16) | (0xac6c >> 2), 479 0x00000000, 480 (0x0e00 << 16) | (0xac70 >> 2), 481 0x00000000, 482 (0x0e00 << 16) | (0xac74 >> 2), 483 0x00000000, 484 (0x0e00 << 16) | (0xac78 >> 2), 485 0x00000000, 486 (0x0e00 << 16) | (0xac7c >> 2), 487 0x00000000, 488 (0x0e00 << 16) | (0xac80 >> 2), 489 0x00000000, 490 (0x0e00 << 16) | (0xac84 >> 2), 491 0x00000000, 492 (0x0e00 << 16) | (0xac88 >> 2), 493 0x00000000, 494 (0x0e00 << 16) | (0xac8c >> 2), 495 0x00000000, 496 (0x0e00 << 16) | (0x970c >> 2), 497 0x00000000, 498 (0x0e00 << 16) | (0x9714 >> 2), 499 0x00000000, 500 (0x0e00 << 16) | (0x9718 >> 2), 501 0x00000000, 502 (0x0e00 << 16) | (0x971c >> 2), 503 0x00000000, 504 (0x0e00 << 16) | (0x31068 >> 2), 505 0x00000000, 506 (0x4e00 << 16) | (0x31068 >> 2), 507 0x00000000, 508 (0x5e00 << 16) | (0x31068 >> 2), 509 0x00000000, 510 (0x6e00 << 16) | (0x31068 >> 2), 511 0x00000000, 512 (0x7e00 << 16) | (0x31068 >> 2), 513 0x00000000, 514 (0x8e00 << 16) | (0x31068 >> 2), 515 0x00000000, 516 (0x9e00 << 16) | (0x31068 >> 2), 517 0x00000000, 518 (0xae00 << 16) | (0x31068 >> 2), 519 0x00000000, 520 (0xbe00 << 16) | (0x31068 >> 2), 521 0x00000000, 522 (0x0e00 << 16) | (0xcd10 >> 2), 523 0x00000000, 524 (0x0e00 << 16) | (0xcd14 >> 2), 525 0x00000000, 526 (0x0e00 << 16) | (0x88b0 >> 2), 527 0x00000000, 528 (0x0e00 << 16) | (0x88b4 >> 2), 529 0x00000000, 530 (0x0e00 << 16) | (0x88b8 >> 2), 531 0x00000000, 532 (0x0e00 << 16) | (0x88bc >> 2), 533 0x00000000, 534 (0x0400 << 16) | (0x89c0 >> 2), 535 0x00000000, 536 (0x0e00 << 16) | (0x88c4 >> 2), 537 0x00000000, 538 (0x0e00 << 16) | (0x88c8 >> 2), 539 0x00000000, 540 (0x0e00 << 16) | (0x88d0 >> 2), 541 0x00000000, 542 (0x0e00 << 16) | (0x88d4 >> 2), 543 0x00000000, 544 (0x0e00 << 16) | (0x88d8 >> 2), 545 0x00000000, 546 (0x0e00 << 16) | (0x8980 >> 2), 547 0x00000000, 548 (0x0e00 << 16) | (0x30938 >> 2), 549 0x00000000, 550 (0x0e00 << 16) | (0x3093c >> 2), 551 0x00000000, 552 (0x0e00 << 16) | (0x30940 >> 2), 553 0x00000000, 554 (0x0e00 << 16) | (0x89a0 >> 2), 555 0x00000000, 556 (0x0e00 << 16) | (0x30900 >> 2), 557 0x00000000, 558 (0x0e00 << 16) | (0x30904 >> 2), 559 0x00000000, 560 (0x0e00 << 16) | (0x89b4 >> 2), 561 0x00000000, 562 (0x0e00 << 16) | (0x3c210 >> 2), 563 0x00000000, 564 (0x0e00 << 16) | (0x3c214 >> 2), 565 0x00000000, 566 (0x0e00 << 16) | (0x3c218 >> 2), 567 0x00000000, 568 (0x0e00 << 16) | (0x8904 >> 2), 569 0x00000000, 570 0x5, 571 (0x0e00 << 16) | (0x8c28 >> 2), 572 (0x0e00 << 16) | (0x8c2c >> 2), 573 (0x0e00 << 16) | (0x8c30 >> 2), 574 (0x0e00 << 16) | (0x8c34 >> 2), 575 (0x0e00 << 16) | (0x9600 >> 2), 576 }; 577 578 static const u32 kalindi_rlc_save_restore_register_list[] = 579 { 580 (0x0e00 << 16) | (0xc12c >> 2), 581 0x00000000, 582 (0x0e00 << 16) | (0xc140 >> 2), 583 0x00000000, 584 (0x0e00 << 16) | (0xc150 >> 2), 585 0x00000000, 586 (0x0e00 << 16) | (0xc15c >> 2), 587 0x00000000, 588 (0x0e00 << 16) | (0xc168 >> 2), 589 0x00000000, 590 (0x0e00 << 16) | (0xc170 >> 2), 591 0x00000000, 592 (0x0e00 << 16) | (0xc204 >> 2), 593 0x00000000, 594 (0x0e00 << 16) | (0xc2b4 >> 2), 595 0x00000000, 596 (0x0e00 << 16) | (0xc2b8 >> 2), 597 0x00000000, 598 (0x0e00 << 16) | (0xc2bc >> 2), 599 0x00000000, 600 (0x0e00 << 16) | (0xc2c0 >> 2), 601 0x00000000, 602 (0x0e00 << 16) | (0x8228 >> 2), 603 0x00000000, 604 (0x0e00 << 16) | (0x829c >> 2), 605 0x00000000, 606 (0x0e00 << 16) | (0x869c >> 2), 607 0x00000000, 608 (0x0600 << 16) | (0x98f4 >> 2), 609 0x00000000, 610 (0x0e00 << 16) | (0x98f8 >> 2), 611 0x00000000, 612 (0x0e00 << 16) | (0x9900 >> 2), 613 0x00000000, 614 (0x0e00 << 16) | (0xc260 >> 2), 615 0x00000000, 616 (0x0e00 << 16) | (0x90e8 >> 2), 617 0x00000000, 618 (0x0e00 << 16) | (0x3c000 >> 2), 619 0x00000000, 620 (0x0e00 << 16) | (0x3c00c >> 2), 621 0x00000000, 622 (0x0e00 << 16) | (0x8c1c >> 2), 623 0x00000000, 624 (0x0e00 << 16) | (0x9700 >> 2), 625 0x00000000, 626 (0x0e00 << 16) | (0xcd20 >> 2), 627 0x00000000, 628 (0x4e00 << 16) | (0xcd20 >> 2), 629 0x00000000, 630 (0x5e00 << 16) | (0xcd20 >> 2), 631 0x00000000, 632 (0x6e00 << 16) | (0xcd20 >> 2), 633 0x00000000, 634 (0x7e00 << 16) | (0xcd20 >> 2), 635 0x00000000, 636 (0x0e00 << 16) | (0x89bc >> 2), 637 0x00000000, 638 (0x0e00 << 16) | (0x8900 >> 2), 639 0x00000000, 640 0x3, 641 (0x0e00 << 16) | (0xc130 >> 2), 642 0x00000000, 643 (0x0e00 << 16) | (0xc134 >> 2), 644 0x00000000, 645 (0x0e00 << 16) | (0xc1fc >> 2), 646 0x00000000, 647 (0x0e00 << 16) | (0xc208 >> 2), 648 0x00000000, 649 (0x0e00 << 16) | (0xc264 >> 2), 650 0x00000000, 651 (0x0e00 << 16) | (0xc268 >> 2), 652 0x00000000, 653 (0x0e00 << 16) | (0xc26c >> 2), 654 0x00000000, 655 (0x0e00 << 16) | (0xc270 >> 2), 656 0x00000000, 657 (0x0e00 << 16) | (0xc274 >> 2), 658 0x00000000, 659 (0x0e00 << 16) | (0xc28c >> 2), 660 0x00000000, 661 (0x0e00 << 16) | (0xc290 >> 2), 662 0x00000000, 663 (0x0e00 << 16) | (0xc294 >> 2), 664 0x00000000, 665 (0x0e00 << 16) | (0xc298 >> 2), 666 0x00000000, 667 (0x0e00 << 16) | (0xc2a0 >> 2), 668 0x00000000, 669 (0x0e00 << 16) | (0xc2a4 >> 2), 670 0x00000000, 671 (0x0e00 << 16) | (0xc2a8 >> 2), 672 0x00000000, 673 (0x0e00 << 16) | (0xc2ac >> 2), 674 0x00000000, 675 (0x0e00 << 16) | (0x301d0 >> 2), 676 0x00000000, 677 (0x0e00 << 16) | (0x30238 >> 2), 678 0x00000000, 679 (0x0e00 << 16) | (0x30250 >> 2), 680 0x00000000, 681 (0x0e00 << 16) | (0x30254 >> 2), 682 0x00000000, 683 (0x0e00 << 16) | (0x30258 >> 2), 684 0x00000000, 685 (0x0e00 << 16) | (0x3025c >> 2), 686 0x00000000, 687 (0x4e00 << 16) | (0xc900 >> 2), 688 0x00000000, 689 (0x5e00 << 16) | (0xc900 >> 2), 690 0x00000000, 691 (0x6e00 << 16) | (0xc900 >> 2), 692 0x00000000, 693 (0x7e00 << 16) | (0xc900 >> 2), 694 0x00000000, 695 (0x4e00 << 16) | (0xc904 >> 2), 696 0x00000000, 697 (0x5e00 << 16) | (0xc904 >> 2), 698 0x00000000, 699 (0x6e00 << 16) | (0xc904 >> 2), 700 0x00000000, 701 (0x7e00 << 16) | (0xc904 >> 2), 702 0x00000000, 703 (0x4e00 << 16) | (0xc908 >> 2), 704 0x00000000, 705 (0x5e00 << 16) | (0xc908 >> 2), 706 0x00000000, 707 (0x6e00 << 16) | (0xc908 >> 2), 708 0x00000000, 709 (0x7e00 << 16) | (0xc908 >> 2), 710 0x00000000, 711 (0x4e00 << 16) | (0xc90c >> 2), 712 0x00000000, 713 (0x5e00 << 16) | (0xc90c >> 2), 714 0x00000000, 715 (0x6e00 << 16) | (0xc90c >> 2), 716 0x00000000, 717 (0x7e00 << 16) | (0xc90c >> 2), 718 0x00000000, 719 (0x4e00 << 16) | (0xc910 >> 2), 720 0x00000000, 721 (0x5e00 << 16) | (0xc910 >> 2), 722 0x00000000, 723 (0x6e00 << 16) | (0xc910 >> 2), 724 0x00000000, 725 (0x7e00 << 16) | (0xc910 >> 2), 726 0x00000000, 727 (0x0e00 << 16) | (0xc99c >> 2), 728 0x00000000, 729 (0x0e00 << 16) | (0x9834 >> 2), 730 0x00000000, 731 (0x0000 << 16) | (0x30f00 >> 2), 732 0x00000000, 733 (0x0000 << 16) | (0x30f04 >> 2), 734 0x00000000, 735 (0x0000 << 16) | (0x30f08 >> 2), 736 0x00000000, 737 (0x0000 << 16) | (0x30f0c >> 2), 738 0x00000000, 739 (0x0600 << 16) | (0x9b7c >> 2), 740 0x00000000, 741 (0x0e00 << 16) | (0x8a14 >> 2), 742 0x00000000, 743 (0x0e00 << 16) | (0x8a18 >> 2), 744 0x00000000, 745 (0x0600 << 16) | (0x30a00 >> 2), 746 0x00000000, 747 (0x0e00 << 16) | (0x8bf0 >> 2), 748 0x00000000, 749 (0x0e00 << 16) | (0x8bcc >> 2), 750 0x00000000, 751 (0x0e00 << 16) | (0x8b24 >> 2), 752 0x00000000, 753 (0x0e00 << 16) | (0x30a04 >> 2), 754 0x00000000, 755 (0x0600 << 16) | (0x30a10 >> 2), 756 0x00000000, 757 (0x0600 << 16) | (0x30a14 >> 2), 758 0x00000000, 759 (0x0600 << 16) | (0x30a18 >> 2), 760 0x00000000, 761 (0x0600 << 16) | (0x30a2c >> 2), 762 0x00000000, 763 (0x0e00 << 16) | (0xc700 >> 2), 764 0x00000000, 765 (0x0e00 << 16) | (0xc704 >> 2), 766 0x00000000, 767 (0x0e00 << 16) | (0xc708 >> 2), 768 0x00000000, 769 (0x0e00 << 16) | (0xc768 >> 2), 770 0x00000000, 771 (0x0400 << 16) | (0xc770 >> 2), 772 0x00000000, 773 (0x0400 << 16) | (0xc774 >> 2), 774 0x00000000, 775 (0x0400 << 16) | (0xc798 >> 2), 776 0x00000000, 777 (0x0400 << 16) | (0xc79c >> 2), 778 0x00000000, 779 (0x0e00 << 16) | (0x9100 >> 2), 780 0x00000000, 781 (0x0e00 << 16) | (0x3c010 >> 2), 782 0x00000000, 783 (0x0e00 << 16) | (0x8c00 >> 2), 784 0x00000000, 785 (0x0e00 << 16) | (0x8c04 >> 2), 786 0x00000000, 787 (0x0e00 << 16) | (0x8c20 >> 2), 788 0x00000000, 789 (0x0e00 << 16) | (0x8c38 >> 2), 790 0x00000000, 791 (0x0e00 << 16) | (0x8c3c >> 2), 792 0x00000000, 793 (0x0e00 << 16) | (0xae00 >> 2), 794 0x00000000, 795 (0x0e00 << 16) | (0x9604 >> 2), 796 0x00000000, 797 (0x0e00 << 16) | (0xac08 >> 2), 798 0x00000000, 799 (0x0e00 << 16) | (0xac0c >> 2), 800 0x00000000, 801 (0x0e00 << 16) | (0xac10 >> 2), 802 0x00000000, 803 (0x0e00 << 16) | (0xac14 >> 2), 804 0x00000000, 805 (0x0e00 << 16) | (0xac58 >> 2), 806 0x00000000, 807 (0x0e00 << 16) | (0xac68 >> 2), 808 0x00000000, 809 (0x0e00 << 16) | (0xac6c >> 2), 810 0x00000000, 811 (0x0e00 << 16) | (0xac70 >> 2), 812 0x00000000, 813 (0x0e00 << 16) | (0xac74 >> 2), 814 0x00000000, 815 (0x0e00 << 16) | (0xac78 >> 2), 816 0x00000000, 817 (0x0e00 << 16) | (0xac7c >> 2), 818 0x00000000, 819 (0x0e00 << 16) | (0xac80 >> 2), 820 0x00000000, 821 (0x0e00 << 16) | (0xac84 >> 2), 822 0x00000000, 823 (0x0e00 << 16) | (0xac88 >> 2), 824 0x00000000, 825 (0x0e00 << 16) | (0xac8c >> 2), 826 0x00000000, 827 (0x0e00 << 16) | (0x970c >> 2), 828 0x00000000, 829 (0x0e00 << 16) | (0x9714 >> 2), 830 0x00000000, 831 (0x0e00 << 16) | (0x9718 >> 2), 832 0x00000000, 833 (0x0e00 << 16) | (0x971c >> 2), 834 0x00000000, 835 (0x0e00 << 16) | (0x31068 >> 2), 836 0x00000000, 837 (0x4e00 << 16) | (0x31068 >> 2), 838 0x00000000, 839 (0x5e00 << 16) | (0x31068 >> 2), 840 0x00000000, 841 (0x6e00 << 16) | (0x31068 >> 2), 842 0x00000000, 843 (0x7e00 << 16) | (0x31068 >> 2), 844 0x00000000, 845 (0x0e00 << 16) | (0xcd10 >> 2), 846 0x00000000, 847 (0x0e00 << 16) | (0xcd14 >> 2), 848 0x00000000, 849 (0x0e00 << 16) | (0x88b0 >> 2), 850 0x00000000, 851 (0x0e00 << 16) | (0x88b4 >> 2), 852 0x00000000, 853 (0x0e00 << 16) | (0x88b8 >> 2), 854 0x00000000, 855 (0x0e00 << 16) | (0x88bc >> 2), 856 0x00000000, 857 (0x0400 << 16) | (0x89c0 >> 2), 858 0x00000000, 859 (0x0e00 << 16) | (0x88c4 >> 2), 860 0x00000000, 861 (0x0e00 << 16) | (0x88c8 >> 2), 862 0x00000000, 863 (0x0e00 << 16) | (0x88d0 >> 2), 864 0x00000000, 865 (0x0e00 << 16) | (0x88d4 >> 2), 866 0x00000000, 867 (0x0e00 << 16) | (0x88d8 >> 2), 868 0x00000000, 869 (0x0e00 << 16) | (0x8980 >> 2), 870 0x00000000, 871 (0x0e00 << 16) | (0x30938 >> 2), 872 0x00000000, 873 (0x0e00 << 16) | (0x3093c >> 2), 874 0x00000000, 875 (0x0e00 << 16) | (0x30940 >> 2), 876 0x00000000, 877 (0x0e00 << 16) | (0x89a0 >> 2), 878 0x00000000, 879 (0x0e00 << 16) | (0x30900 >> 2), 880 0x00000000, 881 (0x0e00 << 16) | (0x30904 >> 2), 882 0x00000000, 883 (0x0e00 << 16) | (0x89b4 >> 2), 884 0x00000000, 885 (0x0e00 << 16) | (0x3e1fc >> 2), 886 0x00000000, 887 (0x0e00 << 16) | (0x3c210 >> 2), 888 0x00000000, 889 (0x0e00 << 16) | (0x3c214 >> 2), 890 0x00000000, 891 (0x0e00 << 16) | (0x3c218 >> 2), 892 0x00000000, 893 (0x0e00 << 16) | (0x8904 >> 2), 894 0x00000000, 895 0x5, 896 (0x0e00 << 16) | (0x8c28 >> 2), 897 (0x0e00 << 16) | (0x8c2c >> 2), 898 (0x0e00 << 16) | (0x8c30 >> 2), 899 (0x0e00 << 16) | (0x8c34 >> 2), 900 (0x0e00 << 16) | (0x9600 >> 2), 901 }; 902 903 static const u32 bonaire_golden_spm_registers[] = 904 { 905 0x30800, 0xe0ffffff, 0xe0000000 906 }; 907 908 static const u32 bonaire_golden_common_registers[] = 909 { 910 0xc770, 0xffffffff, 0x00000800, 911 0xc774, 0xffffffff, 0x00000800, 912 0xc798, 0xffffffff, 0x00007fbf, 913 0xc79c, 0xffffffff, 0x00007faf 914 }; 915 916 static const u32 bonaire_golden_registers[] = 917 { 918 0x3354, 0x00000333, 0x00000333, 919 0x3350, 0x000c0fc0, 0x00040200, 920 0x9a10, 0x00010000, 0x00058208, 921 0x3c000, 0xffff1fff, 0x00140000, 922 0x3c200, 0xfdfc0fff, 0x00000100, 923 0x3c234, 0x40000000, 0x40000200, 924 0x9830, 0xffffffff, 0x00000000, 925 0x9834, 0xf00fffff, 0x00000400, 926 0x9838, 0x0002021c, 0x00020200, 927 0xc78, 0x00000080, 0x00000000, 928 0x5bb0, 0x000000f0, 0x00000070, 929 0x5bc0, 0xf0311fff, 0x80300000, 930 0x98f8, 0x73773777, 0x12010001, 931 0x350c, 0x00810000, 0x408af000, 932 0x7030, 0x31000111, 0x00000011, 933 0x2f48, 0x73773777, 0x12010001, 934 0x220c, 0x00007fb6, 0x0021a1b1, 935 0x2210, 0x00007fb6, 0x002021b1, 936 0x2180, 0x00007fb6, 0x00002191, 937 0x2218, 0x00007fb6, 0x002121b1, 938 0x221c, 0x00007fb6, 0x002021b1, 939 0x21dc, 0x00007fb6, 0x00002191, 940 0x21e0, 0x00007fb6, 0x00002191, 941 0x3628, 0x0000003f, 0x0000000a, 942 0x362c, 0x0000003f, 0x0000000a, 943 0x2ae4, 0x00073ffe, 0x000022a2, 944 0x240c, 0x000007ff, 0x00000000, 945 0x8a14, 0xf000003f, 0x00000007, 946 0x8bf0, 0x00002001, 0x00000001, 947 0x8b24, 0xffffffff, 0x00ffffff, 948 0x30a04, 0x0000ff0f, 0x00000000, 949 0x28a4c, 0x07ffffff, 0x06000000, 950 0x4d8, 0x00000fff, 0x00000100, 951 0x3e78, 0x00000001, 0x00000002, 952 0x9100, 0x03000000, 0x0362c688, 953 0x8c00, 0x000000ff, 0x00000001, 954 0xe40, 0x00001fff, 0x00001fff, 955 0x9060, 0x0000007f, 0x00000020, 956 0x9508, 0x00010000, 0x00010000, 957 0xac14, 0x000003ff, 0x000000f3, 958 0xac0c, 0xffffffff, 0x00001032 959 }; 960 961 static const u32 bonaire_mgcg_cgcg_init[] = 962 { 963 0xc420, 0xffffffff, 0xfffffffc, 964 0x30800, 0xffffffff, 0xe0000000, 965 0x3c2a0, 0xffffffff, 0x00000100, 966 0x3c208, 0xffffffff, 0x00000100, 967 0x3c2c0, 0xffffffff, 0xc0000100, 968 0x3c2c8, 0xffffffff, 0xc0000100, 969 0x3c2c4, 0xffffffff, 0xc0000100, 970 0x55e4, 0xffffffff, 0x00600100, 971 0x3c280, 0xffffffff, 0x00000100, 972 0x3c214, 0xffffffff, 0x06000100, 973 0x3c220, 0xffffffff, 0x00000100, 974 0x3c218, 0xffffffff, 0x06000100, 975 0x3c204, 0xffffffff, 0x00000100, 976 0x3c2e0, 0xffffffff, 0x00000100, 977 0x3c224, 0xffffffff, 0x00000100, 978 0x3c200, 0xffffffff, 0x00000100, 979 0x3c230, 0xffffffff, 0x00000100, 980 0x3c234, 0xffffffff, 0x00000100, 981 0x3c250, 0xffffffff, 0x00000100, 982 0x3c254, 0xffffffff, 0x00000100, 983 0x3c258, 0xffffffff, 0x00000100, 984 0x3c25c, 0xffffffff, 0x00000100, 985 0x3c260, 0xffffffff, 0x00000100, 986 0x3c27c, 0xffffffff, 0x00000100, 987 0x3c278, 0xffffffff, 0x00000100, 988 0x3c210, 0xffffffff, 0x06000100, 989 0x3c290, 0xffffffff, 0x00000100, 990 0x3c274, 0xffffffff, 0x00000100, 991 0x3c2b4, 0xffffffff, 0x00000100, 992 0x3c2b0, 0xffffffff, 0x00000100, 993 0x3c270, 0xffffffff, 0x00000100, 994 0x30800, 0xffffffff, 0xe0000000, 995 0x3c020, 0xffffffff, 0x00010000, 996 0x3c024, 0xffffffff, 0x00030002, 997 0x3c028, 0xffffffff, 0x00040007, 998 0x3c02c, 0xffffffff, 0x00060005, 999 0x3c030, 0xffffffff, 0x00090008, 1000 0x3c034, 0xffffffff, 0x00010000, 1001 0x3c038, 0xffffffff, 0x00030002, 1002 0x3c03c, 0xffffffff, 0x00040007, 1003 0x3c040, 0xffffffff, 0x00060005, 1004 0x3c044, 0xffffffff, 0x00090008, 1005 0x3c048, 0xffffffff, 0x00010000, 1006 0x3c04c, 0xffffffff, 0x00030002, 1007 0x3c050, 0xffffffff, 0x00040007, 1008 0x3c054, 0xffffffff, 0x00060005, 1009 0x3c058, 0xffffffff, 0x00090008, 1010 0x3c05c, 0xffffffff, 0x00010000, 1011 0x3c060, 0xffffffff, 0x00030002, 1012 0x3c064, 0xffffffff, 0x00040007, 1013 0x3c068, 0xffffffff, 0x00060005, 1014 0x3c06c, 0xffffffff, 0x00090008, 1015 0x3c070, 0xffffffff, 0x00010000, 1016 0x3c074, 0xffffffff, 0x00030002, 1017 0x3c078, 0xffffffff, 0x00040007, 1018 0x3c07c, 0xffffffff, 0x00060005, 1019 0x3c080, 0xffffffff, 0x00090008, 1020 0x3c084, 0xffffffff, 0x00010000, 1021 0x3c088, 0xffffffff, 0x00030002, 1022 0x3c08c, 0xffffffff, 0x00040007, 1023 0x3c090, 0xffffffff, 0x00060005, 1024 0x3c094, 0xffffffff, 0x00090008, 1025 0x3c098, 0xffffffff, 0x00010000, 1026 0x3c09c, 0xffffffff, 0x00030002, 1027 0x3c0a0, 0xffffffff, 0x00040007, 1028 0x3c0a4, 0xffffffff, 0x00060005, 1029 0x3c0a8, 0xffffffff, 0x00090008, 1030 0x3c000, 0xffffffff, 0x96e00200, 1031 0x8708, 0xffffffff, 0x00900100, 1032 0xc424, 0xffffffff, 0x0020003f, 1033 0x38, 0xffffffff, 0x0140001c, 1034 0x3c, 0x000f0000, 0x000f0000, 1035 0x220, 0xffffffff, 0xC060000C, 1036 0x224, 0xc0000fff, 0x00000100, 1037 0xf90, 0xffffffff, 0x00000100, 1038 0xf98, 0x00000101, 0x00000000, 1039 0x20a8, 0xffffffff, 0x00000104, 1040 0x55e4, 0xff000fff, 0x00000100, 1041 0x30cc, 0xc0000fff, 0x00000104, 1042 0xc1e4, 0x00000001, 0x00000001, 1043 0xd00c, 0xff000ff0, 0x00000100, 1044 0xd80c, 0xff000ff0, 0x00000100 1045 }; 1046 1047 static const u32 spectre_golden_spm_registers[] = 1048 { 1049 0x30800, 0xe0ffffff, 0xe0000000 1050 }; 1051 1052 static const u32 spectre_golden_common_registers[] = 1053 { 1054 0xc770, 0xffffffff, 0x00000800, 1055 0xc774, 0xffffffff, 0x00000800, 1056 0xc798, 0xffffffff, 0x00007fbf, 1057 0xc79c, 0xffffffff, 0x00007faf 1058 }; 1059 1060 static const u32 spectre_golden_registers[] = 1061 { 1062 0x3c000, 0xffff1fff, 0x96940200, 1063 0x3c00c, 0xffff0001, 0xff000000, 1064 0x3c200, 0xfffc0fff, 0x00000100, 1065 0x6ed8, 0x00010101, 0x00010000, 1066 0x9834, 0xf00fffff, 0x00000400, 1067 0x9838, 0xfffffffc, 0x00020200, 1068 0x5bb0, 0x000000f0, 0x00000070, 1069 0x5bc0, 0xf0311fff, 0x80300000, 1070 0x98f8, 0x73773777, 0x12010001, 1071 0x9b7c, 0x00ff0000, 0x00fc0000, 1072 0x2f48, 0x73773777, 0x12010001, 1073 0x8a14, 0xf000003f, 0x00000007, 1074 0x8b24, 0xffffffff, 0x00ffffff, 1075 0x28350, 0x3f3f3fff, 0x00000082, 1076 0x28355, 0x0000003f, 0x00000000, 1077 0x3e78, 0x00000001, 0x00000002, 1078 0x913c, 0xffff03df, 0x00000004, 1079 0xc768, 0x00000008, 0x00000008, 1080 0x8c00, 0x000008ff, 0x00000800, 1081 0x9508, 0x00010000, 0x00010000, 1082 0xac0c, 0xffffffff, 0x54763210, 1083 0x214f8, 0x01ff01ff, 0x00000002, 1084 0x21498, 0x007ff800, 0x00200000, 1085 0x2015c, 0xffffffff, 0x00000f40, 1086 0x30934, 0xffffffff, 0x00000001 1087 }; 1088 1089 static const u32 spectre_mgcg_cgcg_init[] = 1090 { 1091 0xc420, 0xffffffff, 0xfffffffc, 1092 0x30800, 0xffffffff, 0xe0000000, 1093 0x3c2a0, 0xffffffff, 0x00000100, 1094 0x3c208, 0xffffffff, 0x00000100, 1095 0x3c2c0, 0xffffffff, 0x00000100, 1096 0x3c2c8, 0xffffffff, 0x00000100, 1097 0x3c2c4, 0xffffffff, 0x00000100, 1098 0x55e4, 0xffffffff, 0x00600100, 1099 0x3c280, 0xffffffff, 0x00000100, 1100 0x3c214, 0xffffffff, 0x06000100, 1101 0x3c220, 0xffffffff, 0x00000100, 1102 0x3c218, 0xffffffff, 0x06000100, 1103 0x3c204, 0xffffffff, 0x00000100, 1104 0x3c2e0, 0xffffffff, 0x00000100, 1105 0x3c224, 0xffffffff, 0x00000100, 1106 0x3c200, 0xffffffff, 0x00000100, 1107 0x3c230, 0xffffffff, 0x00000100, 1108 0x3c234, 0xffffffff, 0x00000100, 1109 0x3c250, 0xffffffff, 0x00000100, 1110 0x3c254, 0xffffffff, 0x00000100, 1111 0x3c258, 0xffffffff, 0x00000100, 1112 0x3c25c, 0xffffffff, 0x00000100, 1113 0x3c260, 0xffffffff, 0x00000100, 1114 0x3c27c, 0xffffffff, 0x00000100, 1115 0x3c278, 0xffffffff, 0x00000100, 1116 0x3c210, 0xffffffff, 0x06000100, 1117 0x3c290, 0xffffffff, 0x00000100, 1118 0x3c274, 0xffffffff, 0x00000100, 1119 0x3c2b4, 0xffffffff, 0x00000100, 1120 0x3c2b0, 0xffffffff, 0x00000100, 1121 0x3c270, 0xffffffff, 0x00000100, 1122 0x30800, 0xffffffff, 0xe0000000, 1123 0x3c020, 0xffffffff, 0x00010000, 1124 0x3c024, 0xffffffff, 0x00030002, 1125 0x3c028, 0xffffffff, 0x00040007, 1126 0x3c02c, 0xffffffff, 0x00060005, 1127 0x3c030, 0xffffffff, 0x00090008, 1128 0x3c034, 0xffffffff, 0x00010000, 1129 0x3c038, 0xffffffff, 0x00030002, 1130 0x3c03c, 0xffffffff, 0x00040007, 1131 0x3c040, 0xffffffff, 0x00060005, 1132 0x3c044, 0xffffffff, 0x00090008, 1133 0x3c048, 0xffffffff, 0x00010000, 1134 0x3c04c, 0xffffffff, 0x00030002, 1135 0x3c050, 0xffffffff, 0x00040007, 1136 0x3c054, 0xffffffff, 0x00060005, 1137 0x3c058, 0xffffffff, 0x00090008, 1138 0x3c05c, 0xffffffff, 0x00010000, 1139 0x3c060, 0xffffffff, 0x00030002, 1140 0x3c064, 0xffffffff, 0x00040007, 1141 0x3c068, 0xffffffff, 0x00060005, 1142 0x3c06c, 0xffffffff, 0x00090008, 1143 0x3c070, 0xffffffff, 0x00010000, 1144 0x3c074, 0xffffffff, 0x00030002, 1145 0x3c078, 0xffffffff, 0x00040007, 1146 0x3c07c, 0xffffffff, 0x00060005, 1147 0x3c080, 0xffffffff, 0x00090008, 1148 0x3c084, 0xffffffff, 0x00010000, 1149 0x3c088, 0xffffffff, 0x00030002, 1150 0x3c08c, 0xffffffff, 0x00040007, 1151 0x3c090, 0xffffffff, 0x00060005, 1152 0x3c094, 0xffffffff, 0x00090008, 1153 0x3c098, 0xffffffff, 0x00010000, 1154 0x3c09c, 0xffffffff, 0x00030002, 1155 0x3c0a0, 0xffffffff, 0x00040007, 1156 0x3c0a4, 0xffffffff, 0x00060005, 1157 0x3c0a8, 0xffffffff, 0x00090008, 1158 0x3c0ac, 0xffffffff, 0x00010000, 1159 0x3c0b0, 0xffffffff, 0x00030002, 1160 0x3c0b4, 0xffffffff, 0x00040007, 1161 0x3c0b8, 0xffffffff, 0x00060005, 1162 0x3c0bc, 0xffffffff, 0x00090008, 1163 0x3c000, 0xffffffff, 0x96e00200, 1164 0x8708, 0xffffffff, 0x00900100, 1165 0xc424, 0xffffffff, 0x0020003f, 1166 0x38, 0xffffffff, 0x0140001c, 1167 0x3c, 0x000f0000, 0x000f0000, 1168 0x220, 0xffffffff, 0xC060000C, 1169 0x224, 0xc0000fff, 0x00000100, 1170 0xf90, 0xffffffff, 0x00000100, 1171 0xf98, 0x00000101, 0x00000000, 1172 0x20a8, 0xffffffff, 0x00000104, 1173 0x55e4, 0xff000fff, 0x00000100, 1174 0x30cc, 0xc0000fff, 0x00000104, 1175 0xc1e4, 0x00000001, 0x00000001, 1176 0xd00c, 0xff000ff0, 0x00000100, 1177 0xd80c, 0xff000ff0, 0x00000100 1178 }; 1179 1180 static const u32 kalindi_golden_spm_registers[] = 1181 { 1182 0x30800, 0xe0ffffff, 0xe0000000 1183 }; 1184 1185 static const u32 kalindi_golden_common_registers[] = 1186 { 1187 0xc770, 0xffffffff, 0x00000800, 1188 0xc774, 0xffffffff, 0x00000800, 1189 0xc798, 0xffffffff, 0x00007fbf, 1190 0xc79c, 0xffffffff, 0x00007faf 1191 }; 1192 1193 static const u32 kalindi_golden_registers[] = 1194 { 1195 0x3c000, 0xffffdfff, 0x6e944040, 1196 0x55e4, 0xff607fff, 0xfc000100, 1197 0x3c220, 0xff000fff, 0x00000100, 1198 0x3c224, 0xff000fff, 0x00000100, 1199 0x3c200, 0xfffc0fff, 0x00000100, 1200 0x6ed8, 0x00010101, 0x00010000, 1201 0x9830, 0xffffffff, 0x00000000, 1202 0x9834, 0xf00fffff, 0x00000400, 1203 0x5bb0, 0x000000f0, 0x00000070, 1204 0x5bc0, 0xf0311fff, 0x80300000, 1205 0x98f8, 0x73773777, 0x12010001, 1206 0x98fc, 0xffffffff, 0x00000010, 1207 0x9b7c, 0x00ff0000, 0x00fc0000, 1208 0x8030, 0x00001f0f, 0x0000100a, 1209 0x2f48, 0x73773777, 0x12010001, 1210 0x2408, 0x000fffff, 0x000c007f, 1211 0x8a14, 0xf000003f, 0x00000007, 1212 0x8b24, 0x3fff3fff, 0x00ffcfff, 1213 0x30a04, 0x0000ff0f, 0x00000000, 1214 0x28a4c, 0x07ffffff, 0x06000000, 1215 0x4d8, 0x00000fff, 0x00000100, 1216 0x3e78, 0x00000001, 0x00000002, 1217 0xc768, 0x00000008, 0x00000008, 1218 0x8c00, 0x000000ff, 0x00000003, 1219 0x214f8, 0x01ff01ff, 0x00000002, 1220 0x21498, 0x007ff800, 0x00200000, 1221 0x2015c, 0xffffffff, 0x00000f40, 1222 0x88c4, 0x001f3ae3, 0x00000082, 1223 0x88d4, 0x0000001f, 0x00000010, 1224 0x30934, 0xffffffff, 0x00000000 1225 }; 1226 1227 static const u32 kalindi_mgcg_cgcg_init[] = 1228 { 1229 0xc420, 0xffffffff, 0xfffffffc, 1230 0x30800, 0xffffffff, 0xe0000000, 1231 0x3c2a0, 0xffffffff, 0x00000100, 1232 0x3c208, 0xffffffff, 0x00000100, 1233 0x3c2c0, 0xffffffff, 0x00000100, 1234 0x3c2c8, 0xffffffff, 0x00000100, 1235 0x3c2c4, 0xffffffff, 0x00000100, 1236 0x55e4, 0xffffffff, 0x00600100, 1237 0x3c280, 0xffffffff, 0x00000100, 1238 0x3c214, 0xffffffff, 0x06000100, 1239 0x3c220, 0xffffffff, 0x00000100, 1240 0x3c218, 0xffffffff, 0x06000100, 1241 0x3c204, 0xffffffff, 0x00000100, 1242 0x3c2e0, 0xffffffff, 0x00000100, 1243 0x3c224, 0xffffffff, 0x00000100, 1244 0x3c200, 0xffffffff, 0x00000100, 1245 0x3c230, 0xffffffff, 0x00000100, 1246 0x3c234, 0xffffffff, 0x00000100, 1247 0x3c250, 0xffffffff, 0x00000100, 1248 0x3c254, 0xffffffff, 0x00000100, 1249 0x3c258, 0xffffffff, 0x00000100, 1250 0x3c25c, 0xffffffff, 0x00000100, 1251 0x3c260, 0xffffffff, 0x00000100, 1252 0x3c27c, 0xffffffff, 0x00000100, 1253 0x3c278, 0xffffffff, 0x00000100, 1254 0x3c210, 0xffffffff, 0x06000100, 1255 0x3c290, 0xffffffff, 0x00000100, 1256 0x3c274, 0xffffffff, 0x00000100, 1257 0x3c2b4, 0xffffffff, 0x00000100, 1258 0x3c2b0, 0xffffffff, 0x00000100, 1259 0x3c270, 0xffffffff, 0x00000100, 1260 0x30800, 0xffffffff, 0xe0000000, 1261 0x3c020, 0xffffffff, 0x00010000, 1262 0x3c024, 0xffffffff, 0x00030002, 1263 0x3c028, 0xffffffff, 0x00040007, 1264 0x3c02c, 0xffffffff, 0x00060005, 1265 0x3c030, 0xffffffff, 0x00090008, 1266 0x3c034, 0xffffffff, 0x00010000, 1267 0x3c038, 0xffffffff, 0x00030002, 1268 0x3c03c, 0xffffffff, 0x00040007, 1269 0x3c040, 0xffffffff, 0x00060005, 1270 0x3c044, 0xffffffff, 0x00090008, 1271 0x3c000, 0xffffffff, 0x96e00200, 1272 0x8708, 0xffffffff, 0x00900100, 1273 0xc424, 0xffffffff, 0x0020003f, 1274 0x38, 0xffffffff, 0x0140001c, 1275 0x3c, 0x000f0000, 0x000f0000, 1276 0x220, 0xffffffff, 0xC060000C, 1277 0x224, 0xc0000fff, 0x00000100, 1278 0x20a8, 0xffffffff, 0x00000104, 1279 0x55e4, 0xff000fff, 0x00000100, 1280 0x30cc, 0xc0000fff, 0x00000104, 1281 0xc1e4, 0x00000001, 0x00000001, 1282 0xd00c, 0xff000ff0, 0x00000100, 1283 0xd80c, 0xff000ff0, 0x00000100 1284 }; 1285 1286 static void cik_init_golden_registers(struct radeon_device *rdev) 1287 { 1288 switch (rdev->family) { 1289 case CHIP_BONAIRE: 1290 radeon_program_register_sequence(rdev, 1291 bonaire_mgcg_cgcg_init, 1292 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); 1293 radeon_program_register_sequence(rdev, 1294 bonaire_golden_registers, 1295 (const u32)ARRAY_SIZE(bonaire_golden_registers)); 1296 radeon_program_register_sequence(rdev, 1297 bonaire_golden_common_registers, 1298 (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); 1299 radeon_program_register_sequence(rdev, 1300 bonaire_golden_spm_registers, 1301 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); 1302 break; 1303 case CHIP_KABINI: 1304 radeon_program_register_sequence(rdev, 1305 kalindi_mgcg_cgcg_init, 1306 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 1307 radeon_program_register_sequence(rdev, 1308 kalindi_golden_registers, 1309 (const u32)ARRAY_SIZE(kalindi_golden_registers)); 1310 radeon_program_register_sequence(rdev, 1311 kalindi_golden_common_registers, 1312 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 1313 radeon_program_register_sequence(rdev, 1314 kalindi_golden_spm_registers, 1315 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 1316 break; 1317 case CHIP_KAVERI: 1318 radeon_program_register_sequence(rdev, 1319 spectre_mgcg_cgcg_init, 1320 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); 1321 radeon_program_register_sequence(rdev, 1322 spectre_golden_registers, 1323 (const u32)ARRAY_SIZE(spectre_golden_registers)); 1324 radeon_program_register_sequence(rdev, 1325 spectre_golden_common_registers, 1326 (const u32)ARRAY_SIZE(spectre_golden_common_registers)); 1327 radeon_program_register_sequence(rdev, 1328 spectre_golden_spm_registers, 1329 (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); 1330 break; 1331 default: 1332 break; 1333 } 1334 } 1335 1336 /** 1337 * cik_get_xclk - get the xclk 1338 * 1339 * @rdev: radeon_device pointer 1340 * 1341 * Returns the reference clock used by the gfx engine 1342 * (CIK). 1343 */ 1344 u32 cik_get_xclk(struct radeon_device *rdev) 1345 { 1346 u32 reference_clock = rdev->clock.spll.reference_freq; 1347 1348 if (rdev->flags & RADEON_IS_IGP) { 1349 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK) 1350 return reference_clock / 2; 1351 } else { 1352 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE) 1353 return reference_clock / 4; 1354 } 1355 return reference_clock; 1356 } 1357 1358 /** 1359 * cik_mm_rdoorbell - read a doorbell dword 1360 * 1361 * @rdev: radeon_device pointer 1362 * @offset: byte offset into the aperture 1363 * 1364 * Returns the value in the doorbell aperture at the 1365 * requested offset (CIK). 1366 */ 1367 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) 1368 { 1369 if (offset < rdev->doorbell.size) { 1370 return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset); 1371 } else { 1372 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset); 1373 return 0; 1374 } 1375 } 1376 1377 /** 1378 * cik_mm_wdoorbell - write a doorbell dword 1379 * 1380 * @rdev: radeon_device pointer 1381 * @offset: byte offset into the aperture 1382 * @v: value to write 1383 * 1384 * Writes @v to the doorbell aperture at the 1385 * requested offset (CIK). 1386 */ 1387 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v) 1388 { 1389 if (offset < rdev->doorbell.size) { 1390 writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset); 1391 } else { 1392 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset); 1393 } 1394 } 1395 1396 #define BONAIRE_IO_MC_REGS_SIZE 36 1397 1398 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] = 1399 { 1400 {0x00000070, 0x04400000}, 1401 {0x00000071, 0x80c01803}, 1402 {0x00000072, 0x00004004}, 1403 {0x00000073, 0x00000100}, 1404 {0x00000074, 0x00ff0000}, 1405 {0x00000075, 0x34000000}, 1406 {0x00000076, 0x08000014}, 1407 {0x00000077, 0x00cc08ec}, 1408 {0x00000078, 0x00000400}, 1409 {0x00000079, 0x00000000}, 1410 {0x0000007a, 0x04090000}, 1411 {0x0000007c, 0x00000000}, 1412 {0x0000007e, 0x4408a8e8}, 1413 {0x0000007f, 0x00000304}, 1414 {0x00000080, 0x00000000}, 1415 {0x00000082, 0x00000001}, 1416 {0x00000083, 0x00000002}, 1417 {0x00000084, 0xf3e4f400}, 1418 {0x00000085, 0x052024e3}, 1419 {0x00000087, 0x00000000}, 1420 {0x00000088, 0x01000000}, 1421 {0x0000008a, 0x1c0a0000}, 1422 {0x0000008b, 0xff010000}, 1423 {0x0000008d, 0xffffefff}, 1424 {0x0000008e, 0xfff3efff}, 1425 {0x0000008f, 0xfff3efbf}, 1426 {0x00000092, 0xf7ffffff}, 1427 {0x00000093, 0xffffff7f}, 1428 {0x00000095, 0x00101101}, 1429 {0x00000096, 0x00000fff}, 1430 {0x00000097, 0x00116fff}, 1431 {0x00000098, 0x60010000}, 1432 {0x00000099, 0x10010000}, 1433 {0x0000009a, 0x00006000}, 1434 {0x0000009b, 0x00001000}, 1435 {0x0000009f, 0x00b48000} 1436 }; 1437 1438 /** 1439 * cik_srbm_select - select specific register instances 1440 * 1441 * @rdev: radeon_device pointer 1442 * @me: selected ME (micro engine) 1443 * @pipe: pipe 1444 * @queue: queue 1445 * @vmid: VMID 1446 * 1447 * Switches the currently active registers instances. Some 1448 * registers are instanced per VMID, others are instanced per 1449 * me/pipe/queue combination. 1450 */ 1451 static void cik_srbm_select(struct radeon_device *rdev, 1452 u32 me, u32 pipe, u32 queue, u32 vmid) 1453 { 1454 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) | 1455 MEID(me & 0x3) | 1456 VMID(vmid & 0xf) | 1457 QUEUEID(queue & 0x7)); 1458 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl); 1459 } 1460 1461 /* ucode loading */ 1462 /** 1463 * ci_mc_load_microcode - load MC ucode into the hw 1464 * 1465 * @rdev: radeon_device pointer 1466 * 1467 * Load the GDDR MC ucode into the hw (CIK). 1468 * Returns 0 on success, error on failure. 1469 */ 1470 static int ci_mc_load_microcode(struct radeon_device *rdev) 1471 { 1472 const __be32 *fw_data; 1473 u32 running, blackout = 0; 1474 u32 *io_mc_regs; 1475 int i, ucode_size, regs_size; 1476 1477 if (!rdev->mc_fw) 1478 return -EINVAL; 1479 1480 switch (rdev->family) { 1481 case CHIP_BONAIRE: 1482 default: 1483 io_mc_regs = (u32 *)&bonaire_io_mc_regs; 1484 ucode_size = CIK_MC_UCODE_SIZE; 1485 regs_size = BONAIRE_IO_MC_REGS_SIZE; 1486 break; 1487 } 1488 1489 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; 1490 1491 if (running == 0) { 1492 if (running) { 1493 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL); 1494 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1); 1495 } 1496 1497 /* reset the engine and set to writable */ 1498 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1499 WREG32(MC_SEQ_SUP_CNTL, 0x00000010); 1500 1501 /* load mc io regs */ 1502 for (i = 0; i < regs_size; i++) { 1503 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); 1504 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); 1505 } 1506 /* load the MC ucode */ 1507 fw_data = (const __be32 *)rdev->mc_fw->data; 1508 for (i = 0; i < ucode_size; i++) 1509 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); 1510 1511 /* put the engine back into the active state */ 1512 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1513 WREG32(MC_SEQ_SUP_CNTL, 0x00000004); 1514 WREG32(MC_SEQ_SUP_CNTL, 0x00000001); 1515 1516 /* wait for training to complete */ 1517 for (i = 0; i < rdev->usec_timeout; i++) { 1518 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) 1519 break; 1520 udelay(1); 1521 } 1522 for (i = 0; i < rdev->usec_timeout; i++) { 1523 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) 1524 break; 1525 udelay(1); 1526 } 1527 1528 if (running) 1529 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout); 1530 } 1531 1532 return 0; 1533 } 1534 1535 /** 1536 * cik_init_microcode - load ucode images from disk 1537 * 1538 * @rdev: radeon_device pointer 1539 * 1540 * Use the firmware interface to load the ucode images into 1541 * the driver (not loaded into hw). 1542 * Returns 0 on success, error on failure. 1543 */ 1544 static int cik_init_microcode(struct radeon_device *rdev) 1545 { 1546 const char *chip_name; 1547 size_t pfp_req_size, me_req_size, ce_req_size, 1548 mec_req_size, rlc_req_size, mc_req_size, 1549 sdma_req_size, smc_req_size; 1550 char fw_name[30]; 1551 int err; 1552 1553 DRM_DEBUG("\n"); 1554 1555 switch (rdev->family) { 1556 case CHIP_BONAIRE: 1557 chip_name = "BONAIRE"; 1558 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1559 me_req_size = CIK_ME_UCODE_SIZE * 4; 1560 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1561 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1562 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 1563 mc_req_size = CIK_MC_UCODE_SIZE * 4; 1564 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1565 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4); 1566 break; 1567 case CHIP_KAVERI: 1568 chip_name = "KAVERI"; 1569 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1570 me_req_size = CIK_ME_UCODE_SIZE * 4; 1571 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1572 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1573 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 1574 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1575 break; 1576 case CHIP_KABINI: 1577 chip_name = "KABINI"; 1578 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1579 me_req_size = CIK_ME_UCODE_SIZE * 4; 1580 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1581 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1582 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1584 break; 1585 default: BUG(); 1586 } 1587 1588 DRM_INFO("Loading %s Microcode\n", chip_name); 1589 1590 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name); 1591 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 1592 if (err) 1593 goto out; 1594 if (rdev->pfp_fw->datasize != pfp_req_size) { 1595 printk(KERN_ERR 1596 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1597 rdev->pfp_fw->datasize, fw_name); 1598 err = -EINVAL; 1599 goto out; 1600 } 1601 1602 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name); 1603 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 1604 if (err) 1605 goto out; 1606 if (rdev->me_fw->datasize != me_req_size) { 1607 printk(KERN_ERR 1608 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1609 rdev->me_fw->datasize, fw_name); 1610 err = -EINVAL; 1611 } 1612 1613 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name); 1614 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 1615 if (err) 1616 goto out; 1617 if (rdev->ce_fw->datasize != ce_req_size) { 1618 printk(KERN_ERR 1619 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1620 rdev->ce_fw->datasize, fw_name); 1621 err = -EINVAL; 1622 } 1623 1624 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name); 1625 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 1626 if (err) 1627 goto out; 1628 if (rdev->mec_fw->datasize != mec_req_size) { 1629 printk(KERN_ERR 1630 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1631 rdev->mec_fw->datasize, fw_name); 1632 err = -EINVAL; 1633 } 1634 1635 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name); 1636 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 1637 if (err) 1638 goto out; 1639 if (rdev->rlc_fw->datasize != rlc_req_size) { 1640 printk(KERN_ERR 1641 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 1642 rdev->rlc_fw->datasize, fw_name); 1643 err = -EINVAL; 1644 } 1645 1646 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name); 1647 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 1648 if (err) 1649 goto out; 1650 if (rdev->sdma_fw->datasize != sdma_req_size) { 1651 printk(KERN_ERR 1652 "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 1653 rdev->sdma_fw->datasize, fw_name); 1654 err = -EINVAL; 1655 } 1656 1657 /* No SMC, MC ucode on APUs */ 1658 if (!(rdev->flags & RADEON_IS_IGP)) { 1659 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name); 1660 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 1661 if (err) 1662 goto out; 1663 if (rdev->mc_fw->datasize != mc_req_size) { 1664 printk(KERN_ERR 1665 "cik_mc: Bogus length %zu in firmware \"%s\"\n", 1666 rdev->mc_fw->datasize, fw_name); 1667 err = -EINVAL; 1668 } 1669 1670 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name); 1671 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); 1672 if (err) { 1673 printk(KERN_ERR 1674 "smc: error loading firmware \"%s\"\n", 1675 fw_name); 1676 release_firmware(rdev->smc_fw); 1677 rdev->smc_fw = NULL; 1678 err = 0; 1679 } else if (rdev->smc_fw->datasize != smc_req_size) { 1680 printk(KERN_ERR 1681 "cik_smc: Bogus length %zu in firmware \"%s\"\n", 1682 rdev->smc_fw->datasize, fw_name); 1683 err = -EINVAL; 1684 } 1685 } 1686 1687 out: 1688 if (err) { 1689 if (err != -EINVAL) 1690 printk(KERN_ERR 1691 "cik_cp: Failed to load firmware \"%s\"\n", 1692 fw_name); 1693 release_firmware(rdev->pfp_fw); 1694 rdev->pfp_fw = NULL; 1695 release_firmware(rdev->me_fw); 1696 rdev->me_fw = NULL; 1697 release_firmware(rdev->ce_fw); 1698 rdev->ce_fw = NULL; 1699 release_firmware(rdev->mec_fw); 1700 rdev->mec_fw = NULL; 1701 release_firmware(rdev->rlc_fw); 1702 rdev->rlc_fw = NULL; 1703 release_firmware(rdev->sdma_fw); 1704 rdev->sdma_fw = NULL; 1705 release_firmware(rdev->mc_fw); 1706 rdev->mc_fw = NULL; 1707 release_firmware(rdev->smc_fw); 1708 rdev->smc_fw = NULL; 1709 } 1710 return err; 1711 } 1712 1713 /* 1714 * Core functions 1715 */ 1716 /** 1717 * cik_tiling_mode_table_init - init the hw tiling table 1718 * 1719 * @rdev: radeon_device pointer 1720 * 1721 * Starting with SI, the tiling setup is done globally in a 1722 * set of 32 tiling modes. Rather than selecting each set of 1723 * parameters per surface as on older asics, we just select 1724 * which index in the tiling table we want to use, and the 1725 * surface uses those parameters (CIK). 1726 */ 1727 static void cik_tiling_mode_table_init(struct radeon_device *rdev) 1728 { 1729 const u32 num_tile_mode_states = 32; 1730 const u32 num_secondary_tile_mode_states = 16; 1731 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 1732 u32 num_pipe_configs; 1733 u32 num_rbs = rdev->config.cik.max_backends_per_se * 1734 rdev->config.cik.max_shader_engines; 1735 1736 switch (rdev->config.cik.mem_row_size_in_kb) { 1737 case 1: 1738 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; 1739 break; 1740 case 2: 1741 default: 1742 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; 1743 break; 1744 case 4: 1745 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; 1746 break; 1747 } 1748 1749 num_pipe_configs = rdev->config.cik.max_tile_pipes; 1750 if (num_pipe_configs > 8) 1751 num_pipe_configs = 8; /* ??? */ 1752 1753 if (num_pipe_configs == 8) { 1754 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1755 switch (reg_offset) { 1756 case 0: 1757 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1758 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1759 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1760 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1761 break; 1762 case 1: 1763 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1764 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1765 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1766 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1767 break; 1768 case 2: 1769 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1770 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1771 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1772 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1773 break; 1774 case 3: 1775 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1776 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1777 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1778 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1779 break; 1780 case 4: 1781 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1782 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1783 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1784 TILE_SPLIT(split_equal_to_row_size)); 1785 break; 1786 case 5: 1787 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1789 break; 1790 case 6: 1791 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1792 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1793 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1794 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1795 break; 1796 case 7: 1797 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1798 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1799 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1800 TILE_SPLIT(split_equal_to_row_size)); 1801 break; 1802 case 8: 1803 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1804 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 1805 break; 1806 case 9: 1807 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1808 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1809 break; 1810 case 10: 1811 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1812 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1813 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1814 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1815 break; 1816 case 11: 1817 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1818 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1819 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1821 break; 1822 case 12: 1823 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1824 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1825 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1827 break; 1828 case 13: 1829 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1830 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1831 break; 1832 case 14: 1833 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1834 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1837 break; 1838 case 16: 1839 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1840 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1841 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1843 break; 1844 case 17: 1845 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1846 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1849 break; 1850 case 27: 1851 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1852 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1853 break; 1854 case 28: 1855 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1856 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1857 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1859 break; 1860 case 29: 1861 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1862 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1863 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1864 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1865 break; 1866 case 30: 1867 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1868 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1869 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1871 break; 1872 default: 1873 gb_tile_moden = 0; 1874 break; 1875 } 1876 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1877 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1878 } 1879 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1880 switch (reg_offset) { 1881 case 0: 1882 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1883 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1884 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1885 NUM_BANKS(ADDR_SURF_16_BANK)); 1886 break; 1887 case 1: 1888 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1889 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1890 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1891 NUM_BANKS(ADDR_SURF_16_BANK)); 1892 break; 1893 case 2: 1894 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1895 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1896 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1897 NUM_BANKS(ADDR_SURF_16_BANK)); 1898 break; 1899 case 3: 1900 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1903 NUM_BANKS(ADDR_SURF_16_BANK)); 1904 break; 1905 case 4: 1906 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1909 NUM_BANKS(ADDR_SURF_8_BANK)); 1910 break; 1911 case 5: 1912 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1913 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1914 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1915 NUM_BANKS(ADDR_SURF_4_BANK)); 1916 break; 1917 case 6: 1918 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1921 NUM_BANKS(ADDR_SURF_2_BANK)); 1922 break; 1923 case 8: 1924 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1927 NUM_BANKS(ADDR_SURF_16_BANK)); 1928 break; 1929 case 9: 1930 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1933 NUM_BANKS(ADDR_SURF_16_BANK)); 1934 break; 1935 case 10: 1936 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1939 NUM_BANKS(ADDR_SURF_16_BANK)); 1940 break; 1941 case 11: 1942 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1945 NUM_BANKS(ADDR_SURF_16_BANK)); 1946 break; 1947 case 12: 1948 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1951 NUM_BANKS(ADDR_SURF_8_BANK)); 1952 break; 1953 case 13: 1954 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1957 NUM_BANKS(ADDR_SURF_4_BANK)); 1958 break; 1959 case 14: 1960 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1963 NUM_BANKS(ADDR_SURF_2_BANK)); 1964 break; 1965 default: 1966 gb_tile_moden = 0; 1967 break; 1968 } 1969 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1970 } 1971 } else if (num_pipe_configs == 4) { 1972 if (num_rbs == 4) { 1973 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1974 switch (reg_offset) { 1975 case 0: 1976 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1978 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1979 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1980 break; 1981 case 1: 1982 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1983 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1984 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1985 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1986 break; 1987 case 2: 1988 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1990 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1991 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1992 break; 1993 case 3: 1994 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1995 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1996 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1997 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1998 break; 1999 case 4: 2000 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2002 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2003 TILE_SPLIT(split_equal_to_row_size)); 2004 break; 2005 case 5: 2006 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2008 break; 2009 case 6: 2010 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2011 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2012 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2014 break; 2015 case 7: 2016 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2017 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2018 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2019 TILE_SPLIT(split_equal_to_row_size)); 2020 break; 2021 case 8: 2022 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2023 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2024 break; 2025 case 9: 2026 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2027 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2028 break; 2029 case 10: 2030 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2031 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2032 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2034 break; 2035 case 11: 2036 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2037 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2038 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2040 break; 2041 case 12: 2042 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2043 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2044 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2046 break; 2047 case 13: 2048 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2050 break; 2051 case 14: 2052 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2053 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2054 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2056 break; 2057 case 16: 2058 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2059 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2060 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2061 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2062 break; 2063 case 17: 2064 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2066 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2068 break; 2069 case 27: 2070 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2071 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2072 break; 2073 case 28: 2074 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2075 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2076 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2078 break; 2079 case 29: 2080 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2081 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2082 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2084 break; 2085 case 30: 2086 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2087 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2088 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2090 break; 2091 default: 2092 gb_tile_moden = 0; 2093 break; 2094 } 2095 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2096 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2097 } 2098 } else if (num_rbs < 4) { 2099 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2100 switch (reg_offset) { 2101 case 0: 2102 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2103 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2104 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2105 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2106 break; 2107 case 1: 2108 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2110 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2111 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2112 break; 2113 case 2: 2114 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2116 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2118 break; 2119 case 3: 2120 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2121 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2122 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2123 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2124 break; 2125 case 4: 2126 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2128 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2129 TILE_SPLIT(split_equal_to_row_size)); 2130 break; 2131 case 5: 2132 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2133 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2134 break; 2135 case 6: 2136 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2137 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2138 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2139 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2140 break; 2141 case 7: 2142 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2143 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2144 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2145 TILE_SPLIT(split_equal_to_row_size)); 2146 break; 2147 case 8: 2148 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2149 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 2150 break; 2151 case 9: 2152 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2153 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2154 break; 2155 case 10: 2156 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2157 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2158 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2160 break; 2161 case 11: 2162 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2163 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2164 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2166 break; 2167 case 12: 2168 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2169 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2170 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2172 break; 2173 case 13: 2174 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2175 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2176 break; 2177 case 14: 2178 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2179 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2180 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2182 break; 2183 case 16: 2184 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2185 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2186 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2187 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2188 break; 2189 case 17: 2190 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2192 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2194 break; 2195 case 27: 2196 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2197 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2198 break; 2199 case 28: 2200 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2201 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2202 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2204 break; 2205 case 29: 2206 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2207 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2208 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2210 break; 2211 case 30: 2212 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2213 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2214 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2216 break; 2217 default: 2218 gb_tile_moden = 0; 2219 break; 2220 } 2221 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2222 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2223 } 2224 } 2225 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2226 switch (reg_offset) { 2227 case 0: 2228 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2231 NUM_BANKS(ADDR_SURF_16_BANK)); 2232 break; 2233 case 1: 2234 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2237 NUM_BANKS(ADDR_SURF_16_BANK)); 2238 break; 2239 case 2: 2240 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2243 NUM_BANKS(ADDR_SURF_16_BANK)); 2244 break; 2245 case 3: 2246 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2249 NUM_BANKS(ADDR_SURF_16_BANK)); 2250 break; 2251 case 4: 2252 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2255 NUM_BANKS(ADDR_SURF_16_BANK)); 2256 break; 2257 case 5: 2258 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2261 NUM_BANKS(ADDR_SURF_8_BANK)); 2262 break; 2263 case 6: 2264 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2267 NUM_BANKS(ADDR_SURF_4_BANK)); 2268 break; 2269 case 8: 2270 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2273 NUM_BANKS(ADDR_SURF_16_BANK)); 2274 break; 2275 case 9: 2276 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2279 NUM_BANKS(ADDR_SURF_16_BANK)); 2280 break; 2281 case 10: 2282 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2285 NUM_BANKS(ADDR_SURF_16_BANK)); 2286 break; 2287 case 11: 2288 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2291 NUM_BANKS(ADDR_SURF_16_BANK)); 2292 break; 2293 case 12: 2294 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2295 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2296 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2297 NUM_BANKS(ADDR_SURF_16_BANK)); 2298 break; 2299 case 13: 2300 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2303 NUM_BANKS(ADDR_SURF_8_BANK)); 2304 break; 2305 case 14: 2306 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2309 NUM_BANKS(ADDR_SURF_4_BANK)); 2310 break; 2311 default: 2312 gb_tile_moden = 0; 2313 break; 2314 } 2315 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2316 } 2317 } else if (num_pipe_configs == 2) { 2318 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2319 switch (reg_offset) { 2320 case 0: 2321 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2322 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2323 PIPE_CONFIG(ADDR_SURF_P2) | 2324 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2325 break; 2326 case 1: 2327 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2328 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2329 PIPE_CONFIG(ADDR_SURF_P2) | 2330 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2331 break; 2332 case 2: 2333 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2335 PIPE_CONFIG(ADDR_SURF_P2) | 2336 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2337 break; 2338 case 3: 2339 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2340 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2341 PIPE_CONFIG(ADDR_SURF_P2) | 2342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2343 break; 2344 case 4: 2345 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2346 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2347 PIPE_CONFIG(ADDR_SURF_P2) | 2348 TILE_SPLIT(split_equal_to_row_size)); 2349 break; 2350 case 5: 2351 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2353 break; 2354 case 6: 2355 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2356 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2357 PIPE_CONFIG(ADDR_SURF_P2) | 2358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2359 break; 2360 case 7: 2361 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2362 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2363 PIPE_CONFIG(ADDR_SURF_P2) | 2364 TILE_SPLIT(split_equal_to_row_size)); 2365 break; 2366 case 8: 2367 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); 2368 break; 2369 case 9: 2370 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2372 break; 2373 case 10: 2374 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2375 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2376 PIPE_CONFIG(ADDR_SURF_P2) | 2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2378 break; 2379 case 11: 2380 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2382 PIPE_CONFIG(ADDR_SURF_P2) | 2383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2384 break; 2385 case 12: 2386 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2387 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2388 PIPE_CONFIG(ADDR_SURF_P2) | 2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2390 break; 2391 case 13: 2392 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2394 break; 2395 case 14: 2396 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2397 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2398 PIPE_CONFIG(ADDR_SURF_P2) | 2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2400 break; 2401 case 16: 2402 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2404 PIPE_CONFIG(ADDR_SURF_P2) | 2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2406 break; 2407 case 17: 2408 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2409 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2410 PIPE_CONFIG(ADDR_SURF_P2) | 2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2412 break; 2413 case 27: 2414 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2415 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2416 break; 2417 case 28: 2418 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2419 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2420 PIPE_CONFIG(ADDR_SURF_P2) | 2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2422 break; 2423 case 29: 2424 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2425 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2426 PIPE_CONFIG(ADDR_SURF_P2) | 2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2428 break; 2429 case 30: 2430 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2431 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2432 PIPE_CONFIG(ADDR_SURF_P2) | 2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2434 break; 2435 default: 2436 gb_tile_moden = 0; 2437 break; 2438 } 2439 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2440 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2441 } 2442 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2443 switch (reg_offset) { 2444 case 0: 2445 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2448 NUM_BANKS(ADDR_SURF_16_BANK)); 2449 break; 2450 case 1: 2451 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2454 NUM_BANKS(ADDR_SURF_16_BANK)); 2455 break; 2456 case 2: 2457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2460 NUM_BANKS(ADDR_SURF_16_BANK)); 2461 break; 2462 case 3: 2463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2466 NUM_BANKS(ADDR_SURF_16_BANK)); 2467 break; 2468 case 4: 2469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2472 NUM_BANKS(ADDR_SURF_16_BANK)); 2473 break; 2474 case 5: 2475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2478 NUM_BANKS(ADDR_SURF_16_BANK)); 2479 break; 2480 case 6: 2481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2484 NUM_BANKS(ADDR_SURF_8_BANK)); 2485 break; 2486 case 8: 2487 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2490 NUM_BANKS(ADDR_SURF_16_BANK)); 2491 break; 2492 case 9: 2493 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2496 NUM_BANKS(ADDR_SURF_16_BANK)); 2497 break; 2498 case 10: 2499 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2502 NUM_BANKS(ADDR_SURF_16_BANK)); 2503 break; 2504 case 11: 2505 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2508 NUM_BANKS(ADDR_SURF_16_BANK)); 2509 break; 2510 case 12: 2511 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2514 NUM_BANKS(ADDR_SURF_16_BANK)); 2515 break; 2516 case 13: 2517 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2520 NUM_BANKS(ADDR_SURF_16_BANK)); 2521 break; 2522 case 14: 2523 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2526 NUM_BANKS(ADDR_SURF_8_BANK)); 2527 break; 2528 default: 2529 gb_tile_moden = 0; 2530 break; 2531 } 2532 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2533 } 2534 } else 2535 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs); 2536 } 2537 2538 /** 2539 * cik_select_se_sh - select which SE, SH to address 2540 * 2541 * @rdev: radeon_device pointer 2542 * @se_num: shader engine to address 2543 * @sh_num: sh block to address 2544 * 2545 * Select which SE, SH combinations to address. Certain 2546 * registers are instanced per SE or SH. 0xffffffff means 2547 * broadcast to all SEs or SHs (CIK). 2548 */ 2549 static void cik_select_se_sh(struct radeon_device *rdev, 2550 u32 se_num, u32 sh_num) 2551 { 2552 u32 data = INSTANCE_BROADCAST_WRITES; 2553 2554 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) 2555 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; 2556 else if (se_num == 0xffffffff) 2557 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); 2558 else if (sh_num == 0xffffffff) 2559 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); 2560 else 2561 data |= SH_INDEX(sh_num) | SE_INDEX(se_num); 2562 WREG32(GRBM_GFX_INDEX, data); 2563 } 2564 2565 /** 2566 * cik_create_bitmask - create a bitmask 2567 * 2568 * @bit_width: length of the mask 2569 * 2570 * create a variable length bit mask (CIK). 2571 * Returns the bitmask. 2572 */ 2573 static u32 cik_create_bitmask(u32 bit_width) 2574 { 2575 u32 i, mask = 0; 2576 2577 for (i = 0; i < bit_width; i++) { 2578 mask <<= 1; 2579 mask |= 1; 2580 } 2581 return mask; 2582 } 2583 2584 /** 2585 * cik_select_se_sh - select which SE, SH to address 2586 * 2587 * @rdev: radeon_device pointer 2588 * @max_rb_num: max RBs (render backends) for the asic 2589 * @se_num: number of SEs (shader engines) for the asic 2590 * @sh_per_se: number of SH blocks per SE for the asic 2591 * 2592 * Calculates the bitmask of disabled RBs (CIK). 2593 * Returns the disabled RB bitmask. 2594 */ 2595 static u32 cik_get_rb_disabled(struct radeon_device *rdev, 2596 u32 max_rb_num, u32 se_num, 2597 u32 sh_per_se) 2598 { 2599 u32 data, mask; 2600 2601 data = RREG32(CC_RB_BACKEND_DISABLE); 2602 if (data & 1) 2603 data &= BACKEND_DISABLE_MASK; 2604 else 2605 data = 0; 2606 data |= RREG32(GC_USER_RB_BACKEND_DISABLE); 2607 2608 data >>= BACKEND_DISABLE_SHIFT; 2609 2610 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se); 2611 2612 return data & mask; 2613 } 2614 2615 /** 2616 * cik_setup_rb - setup the RBs on the asic 2617 * 2618 * @rdev: radeon_device pointer 2619 * @se_num: number of SEs (shader engines) for the asic 2620 * @sh_per_se: number of SH blocks per SE for the asic 2621 * @max_rb_num: max RBs (render backends) for the asic 2622 * 2623 * Configures per-SE/SH RB registers (CIK). 2624 */ 2625 static void cik_setup_rb(struct radeon_device *rdev, 2626 u32 se_num, u32 sh_per_se, 2627 u32 max_rb_num) 2628 { 2629 int i, j; 2630 u32 data, mask; 2631 u32 disabled_rbs = 0; 2632 u32 enabled_rbs = 0; 2633 2634 for (i = 0; i < se_num; i++) { 2635 for (j = 0; j < sh_per_se; j++) { 2636 cik_select_se_sh(rdev, i, j); 2637 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); 2638 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 2639 } 2640 } 2641 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 2642 2643 mask = 1; 2644 for (i = 0; i < max_rb_num; i++) { 2645 if (!(disabled_rbs & mask)) 2646 enabled_rbs |= mask; 2647 mask <<= 1; 2648 } 2649 2650 for (i = 0; i < se_num; i++) { 2651 cik_select_se_sh(rdev, i, 0xffffffff); 2652 data = 0; 2653 for (j = 0; j < sh_per_se; j++) { 2654 switch (enabled_rbs & 3) { 2655 case 1: 2656 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); 2657 break; 2658 case 2: 2659 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); 2660 break; 2661 case 3: 2662 default: 2663 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); 2664 break; 2665 } 2666 enabled_rbs >>= 2; 2667 } 2668 WREG32(PA_SC_RASTER_CONFIG, data); 2669 } 2670 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 2671 } 2672 2673 /** 2674 * cik_gpu_init - setup the 3D engine 2675 * 2676 * @rdev: radeon_device pointer 2677 * 2678 * Configures the 3D engine and tiling configuration 2679 * registers so that the 3D engine is usable. 2680 */ 2681 static void cik_gpu_init(struct radeon_device *rdev) 2682 { 2683 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); 2684 u32 mc_shared_chmap, mc_arb_ramcfg; 2685 u32 hdp_host_path_cntl; 2686 u32 tmp; 2687 int i, j; 2688 2689 switch (rdev->family) { 2690 case CHIP_BONAIRE: 2691 rdev->config.cik.max_shader_engines = 2; 2692 rdev->config.cik.max_tile_pipes = 4; 2693 rdev->config.cik.max_cu_per_sh = 7; 2694 rdev->config.cik.max_sh_per_se = 1; 2695 rdev->config.cik.max_backends_per_se = 2; 2696 rdev->config.cik.max_texture_channel_caches = 4; 2697 rdev->config.cik.max_gprs = 256; 2698 rdev->config.cik.max_gs_threads = 32; 2699 rdev->config.cik.max_hw_contexts = 8; 2700 2701 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2702 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2703 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2704 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2705 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2706 break; 2707 case CHIP_KAVERI: 2708 rdev->config.cik.max_shader_engines = 1; 2709 rdev->config.cik.max_tile_pipes = 4; 2710 if ((rdev->ddev->pci_device == 0x1304) || 2711 (rdev->ddev->pci_device == 0x1305) || 2712 (rdev->ddev->pci_device == 0x130C) || 2713 (rdev->ddev->pci_device == 0x130F) || 2714 (rdev->ddev->pci_device == 0x1310) || 2715 (rdev->ddev->pci_device == 0x1311) || 2716 (rdev->ddev->pci_device == 0x131C)) { 2717 rdev->config.cik.max_cu_per_sh = 8; 2718 rdev->config.cik.max_backends_per_se = 2; 2719 } else if ((rdev->ddev->pci_device == 0x1309) || 2720 (rdev->ddev->pci_device == 0x130A) || 2721 (rdev->ddev->pci_device == 0x130D) || 2722 (rdev->ddev->pci_device == 0x1313) || 2723 (rdev->ddev->pci_device == 0x131D)) { 2724 rdev->config.cik.max_cu_per_sh = 6; 2725 rdev->config.cik.max_backends_per_se = 2; 2726 } else if ((rdev->ddev->pci_device == 0x1306) || 2727 (rdev->ddev->pci_device == 0x1307) || 2728 (rdev->ddev->pci_device == 0x130B) || 2729 (rdev->ddev->pci_device == 0x130E) || 2730 (rdev->ddev->pci_device == 0x1315) || 2731 (rdev->ddev->pci_device == 0x131B)) { 2732 rdev->config.cik.max_cu_per_sh = 4; 2733 rdev->config.cik.max_backends_per_se = 1; 2734 } else { 2735 rdev->config.cik.max_cu_per_sh = 3; 2736 rdev->config.cik.max_backends_per_se = 1; 2737 } 2738 rdev->config.cik.max_sh_per_se = 1; 2739 rdev->config.cik.max_texture_channel_caches = 4; 2740 rdev->config.cik.max_gprs = 256; 2741 rdev->config.cik.max_gs_threads = 16; 2742 rdev->config.cik.max_hw_contexts = 8; 2743 2744 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2745 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2746 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2747 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2748 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2749 break; 2750 case CHIP_KABINI: 2751 default: 2752 rdev->config.cik.max_shader_engines = 1; 2753 rdev->config.cik.max_tile_pipes = 2; 2754 rdev->config.cik.max_cu_per_sh = 2; 2755 rdev->config.cik.max_sh_per_se = 1; 2756 rdev->config.cik.max_backends_per_se = 1; 2757 rdev->config.cik.max_texture_channel_caches = 2; 2758 rdev->config.cik.max_gprs = 256; 2759 rdev->config.cik.max_gs_threads = 16; 2760 rdev->config.cik.max_hw_contexts = 8; 2761 2762 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 2763 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 2764 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 2765 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 2766 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 2767 break; 2768 } 2769 2770 /* Initialize HDP */ 2771 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 2772 WREG32((0x2c14 + j), 0x00000000); 2773 WREG32((0x2c18 + j), 0x00000000); 2774 WREG32((0x2c1c + j), 0x00000000); 2775 WREG32((0x2c20 + j), 0x00000000); 2776 WREG32((0x2c24 + j), 0x00000000); 2777 } 2778 2779 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 2780 2781 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); 2782 2783 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 2784 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 2785 2786 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes; 2787 rdev->config.cik.mem_max_burst_length_bytes = 256; 2788 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 2789 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 2790 if (rdev->config.cik.mem_row_size_in_kb > 4) 2791 rdev->config.cik.mem_row_size_in_kb = 4; 2792 /* XXX use MC settings? */ 2793 rdev->config.cik.shader_engine_tile_size = 32; 2794 rdev->config.cik.num_gpus = 1; 2795 rdev->config.cik.multi_gpu_tile_size = 64; 2796 2797 /* fix up row size */ 2798 gb_addr_config &= ~ROW_SIZE_MASK; 2799 switch (rdev->config.cik.mem_row_size_in_kb) { 2800 case 1: 2801 default: 2802 gb_addr_config |= ROW_SIZE(0); 2803 break; 2804 case 2: 2805 gb_addr_config |= ROW_SIZE(1); 2806 break; 2807 case 4: 2808 gb_addr_config |= ROW_SIZE(2); 2809 break; 2810 } 2811 2812 /* setup tiling info dword. gb_addr_config is not adequate since it does 2813 * not have bank info, so create a custom tiling dword. 2814 * bits 3:0 num_pipes 2815 * bits 7:4 num_banks 2816 * bits 11:8 group_size 2817 * bits 15:12 row_size 2818 */ 2819 rdev->config.cik.tile_config = 0; 2820 switch (rdev->config.cik.num_tile_pipes) { 2821 case 1: 2822 rdev->config.cik.tile_config |= (0 << 0); 2823 break; 2824 case 2: 2825 rdev->config.cik.tile_config |= (1 << 0); 2826 break; 2827 case 4: 2828 rdev->config.cik.tile_config |= (2 << 0); 2829 break; 2830 case 8: 2831 default: 2832 /* XXX what about 12? */ 2833 rdev->config.cik.tile_config |= (3 << 0); 2834 break; 2835 } 2836 rdev->config.cik.tile_config |= 2837 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; 2838 rdev->config.cik.tile_config |= 2839 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 2840 rdev->config.cik.tile_config |= 2841 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 2842 2843 WREG32(GB_ADDR_CONFIG, gb_addr_config); 2844 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 2845 WREG32(DMIF_ADDR_CALC, gb_addr_config); 2846 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 2847 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 2848 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); 2849 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 2850 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 2851 2852 cik_tiling_mode_table_init(rdev); 2853 2854 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines, 2855 rdev->config.cik.max_sh_per_se, 2856 rdev->config.cik.max_backends_per_se); 2857 2858 /* set HW defaults for 3D engine */ 2859 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); 2860 2861 WREG32(SX_DEBUG_1, 0x20); 2862 2863 WREG32(TA_CNTL_AUX, 0x00010000); 2864 2865 tmp = RREG32(SPI_CONFIG_CNTL); 2866 tmp |= 0x03000000; 2867 WREG32(SPI_CONFIG_CNTL, tmp); 2868 2869 WREG32(SQ_CONFIG, 1); 2870 2871 WREG32(DB_DEBUG, 0); 2872 2873 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff; 2874 tmp |= 0x00000400; 2875 WREG32(DB_DEBUG2, tmp); 2876 2877 tmp = RREG32(DB_DEBUG3) & ~0x0002021c; 2878 tmp |= 0x00020200; 2879 WREG32(DB_DEBUG3, tmp); 2880 2881 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000; 2882 tmp |= 0x00018208; 2883 WREG32(CB_HW_CONTROL, tmp); 2884 2885 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 2886 2887 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) | 2888 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) | 2889 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) | 2890 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size))); 2891 2892 WREG32(VGT_NUM_INSTANCES, 1); 2893 2894 WREG32(CP_PERFMON_CNTL, 0); 2895 2896 WREG32(SQ_CONFIG, 0); 2897 2898 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 2899 FORCE_EOV_MAX_REZ_CNT(255))); 2900 2901 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | 2902 AUTO_INVLD_EN(ES_AND_GS_AUTO)); 2903 2904 WREG32(VGT_GS_VERTEX_REUSE, 16); 2905 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 2906 2907 tmp = RREG32(HDP_MISC_CNTL); 2908 tmp |= HDP_FLUSH_INVALIDATE_CACHE; 2909 WREG32(HDP_MISC_CNTL, tmp); 2910 2911 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 2912 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 2913 2914 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); 2915 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 2916 2917 udelay(50); 2918 } 2919 2920 /* 2921 * GPU scratch registers helpers function. 2922 */ 2923 /** 2924 * cik_scratch_init - setup driver info for CP scratch regs 2925 * 2926 * @rdev: radeon_device pointer 2927 * 2928 * Set up the number and offset of the CP scratch registers. 2929 * NOTE: use of CP scratch registers is a legacy inferface and 2930 * is not used by default on newer asics (r6xx+). On newer asics, 2931 * memory buffers are used for fences rather than scratch regs. 2932 */ 2933 static void cik_scratch_init(struct radeon_device *rdev) 2934 { 2935 int i; 2936 2937 rdev->scratch.num_reg = 7; 2938 rdev->scratch.reg_base = SCRATCH_REG0; 2939 for (i = 0; i < rdev->scratch.num_reg; i++) { 2940 rdev->scratch.free[i] = true; 2941 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 2942 } 2943 } 2944 2945 /** 2946 * cik_ring_test - basic gfx ring test 2947 * 2948 * @rdev: radeon_device pointer 2949 * @ring: radeon_ring structure holding ring information 2950 * 2951 * Allocate a scratch register and write to it using the gfx ring (CIK). 2952 * Provides a basic gfx ring test to verify that the ring is working. 2953 * Used by cik_cp_gfx_resume(); 2954 * Returns 0 on success, error on failure. 2955 */ 2956 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 2957 { 2958 uint32_t scratch; 2959 uint32_t tmp = 0; 2960 unsigned i; 2961 int r; 2962 2963 r = radeon_scratch_get(rdev, &scratch); 2964 if (r) { 2965 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 2966 return r; 2967 } 2968 WREG32(scratch, 0xCAFEDEAD); 2969 r = radeon_ring_lock(rdev, ring, 3); 2970 if (r) { 2971 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); 2972 radeon_scratch_free(rdev, scratch); 2973 return r; 2974 } 2975 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2976 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); 2977 radeon_ring_write(ring, 0xDEADBEEF); 2978 radeon_ring_unlock_commit(rdev, ring); 2979 2980 for (i = 0; i < rdev->usec_timeout; i++) { 2981 tmp = RREG32(scratch); 2982 if (tmp == 0xDEADBEEF) 2983 break; 2984 DRM_UDELAY(1); 2985 } 2986 if (i < rdev->usec_timeout) { 2987 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 2988 } else { 2989 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 2990 ring->idx, scratch, tmp); 2991 r = -EINVAL; 2992 } 2993 radeon_scratch_free(rdev, scratch); 2994 return r; 2995 } 2996 2997 /** 2998 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring 2999 * 3000 * @rdev: radeon_device pointer 3001 * @fence: radeon fence object 3002 * 3003 * Emits a fence sequnce number on the gfx ring and flushes 3004 * GPU caches. 3005 */ 3006 void cik_fence_gfx_ring_emit(struct radeon_device *rdev, 3007 struct radeon_fence *fence) 3008 { 3009 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3010 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3011 3012 /* EVENT_WRITE_EOP - flush caches, send int */ 3013 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 3014 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3015 EOP_TC_ACTION_EN | 3016 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3017 EVENT_INDEX(5))); 3018 radeon_ring_write(ring, addr & 0xfffffffc); 3019 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 3020 radeon_ring_write(ring, fence->seq); 3021 radeon_ring_write(ring, 0); 3022 /* HDP flush */ 3023 /* We should be using the new WAIT_REG_MEM special op packet here 3024 * but it causes the CP to hang 3025 */ 3026 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3027 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3028 WRITE_DATA_DST_SEL(0))); 3029 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 3030 radeon_ring_write(ring, 0); 3031 radeon_ring_write(ring, 0); 3032 } 3033 3034 /** 3035 * cik_fence_compute_ring_emit - emit a fence on the compute ring 3036 * 3037 * @rdev: radeon_device pointer 3038 * @fence: radeon fence object 3039 * 3040 * Emits a fence sequnce number on the compute ring and flushes 3041 * GPU caches. 3042 */ 3043 void cik_fence_compute_ring_emit(struct radeon_device *rdev, 3044 struct radeon_fence *fence) 3045 { 3046 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3047 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3048 3049 /* RELEASE_MEM - flush caches, send int */ 3050 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 3051 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3052 EOP_TC_ACTION_EN | 3053 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3054 EVENT_INDEX(5))); 3055 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2)); 3056 radeon_ring_write(ring, addr & 0xfffffffc); 3057 radeon_ring_write(ring, upper_32_bits(addr)); 3058 radeon_ring_write(ring, fence->seq); 3059 radeon_ring_write(ring, 0); 3060 /* HDP flush */ 3061 /* We should be using the new WAIT_REG_MEM special op packet here 3062 * but it causes the CP to hang 3063 */ 3064 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3065 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3066 WRITE_DATA_DST_SEL(0))); 3067 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 3068 radeon_ring_write(ring, 0); 3069 radeon_ring_write(ring, 0); 3070 } 3071 3072 void cik_semaphore_ring_emit(struct radeon_device *rdev, 3073 struct radeon_ring *ring, 3074 struct radeon_semaphore *semaphore, 3075 bool emit_wait) 3076 { 3077 uint64_t addr = semaphore->gpu_addr; 3078 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 3079 3080 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 3081 radeon_ring_write(ring, addr & 0xffffffff); 3082 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 3083 } 3084 3085 /* 3086 * IB stuff 3087 */ 3088 /** 3089 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 3090 * 3091 * @rdev: radeon_device pointer 3092 * @ib: radeon indirect buffer object 3093 * 3094 * Emits an DE (drawing engine) or CE (constant engine) IB 3095 * on the gfx ring. IBs are usually generated by userspace 3096 * acceleration drivers and submitted to the kernel for 3097 * sheduling on the ring. This function schedules the IB 3098 * on the gfx ring for execution by the GPU. 3099 */ 3100 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3101 { 3102 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3103 u32 header, control = INDIRECT_BUFFER_VALID; 3104 3105 if (ib->is_const_ib) { 3106 /* set switch buffer packet before const IB */ 3107 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 3108 radeon_ring_write(ring, 0); 3109 3110 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 3111 } else { 3112 u32 next_rptr; 3113 if (ring->rptr_save_reg) { 3114 next_rptr = ring->wptr + 3 + 4; 3115 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 3116 radeon_ring_write(ring, ((ring->rptr_save_reg - 3117 PACKET3_SET_UCONFIG_REG_START) >> 2)); 3118 radeon_ring_write(ring, next_rptr); 3119 } else if (rdev->wb.enabled) { 3120 next_rptr = ring->wptr + 5 + 4; 3121 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3122 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 3123 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3124 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 3125 radeon_ring_write(ring, next_rptr); 3126 } 3127 3128 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 3129 } 3130 3131 control |= ib->length_dw | 3132 (ib->vm ? (ib->vm->id << 24) : 0); 3133 3134 radeon_ring_write(ring, header); 3135 radeon_ring_write(ring, 3136 #ifdef __BIG_ENDIAN 3137 (2 << 0) | 3138 #endif 3139 (ib->gpu_addr & 0xFFFFFFFC)); 3140 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 3141 radeon_ring_write(ring, control); 3142 } 3143 3144 /** 3145 * cik_ib_test - basic gfx ring IB test 3146 * 3147 * @rdev: radeon_device pointer 3148 * @ring: radeon_ring structure holding ring information 3149 * 3150 * Allocate an IB and execute it on the gfx ring (CIK). 3151 * Provides a basic gfx ring test to verify that IBs are working. 3152 * Returns 0 on success, error on failure. 3153 */ 3154 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3155 { 3156 struct radeon_ib ib; 3157 uint32_t scratch; 3158 uint32_t tmp = 0; 3159 unsigned i; 3160 int r; 3161 3162 r = radeon_scratch_get(rdev, &scratch); 3163 if (r) { 3164 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3165 return r; 3166 } 3167 WREG32(scratch, 0xCAFEDEAD); 3168 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3169 if (r) { 3170 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3171 radeon_scratch_free(rdev, scratch); 3172 return r; 3173 } 3174 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3175 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); 3176 ib.ptr[2] = 0xDEADBEEF; 3177 ib.length_dw = 3; 3178 r = radeon_ib_schedule(rdev, &ib, NULL); 3179 if (r) { 3180 radeon_scratch_free(rdev, scratch); 3181 radeon_ib_free(rdev, &ib); 3182 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3183 return r; 3184 } 3185 r = radeon_fence_wait(ib.fence, false); 3186 if (r) { 3187 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3188 radeon_scratch_free(rdev, scratch); 3189 radeon_ib_free(rdev, &ib); 3190 return r; 3191 } 3192 for (i = 0; i < rdev->usec_timeout; i++) { 3193 tmp = RREG32(scratch); 3194 if (tmp == 0xDEADBEEF) 3195 break; 3196 DRM_UDELAY(1); 3197 } 3198 if (i < rdev->usec_timeout) { 3199 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3200 } else { 3201 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3202 scratch, tmp); 3203 r = -EINVAL; 3204 } 3205 radeon_scratch_free(rdev, scratch); 3206 radeon_ib_free(rdev, &ib); 3207 return r; 3208 } 3209 3210 /* 3211 * CP. 3212 * On CIK, gfx and compute now have independant command processors. 3213 * 3214 * GFX 3215 * Gfx consists of a single ring and can process both gfx jobs and 3216 * compute jobs. The gfx CP consists of three microengines (ME): 3217 * PFP - Pre-Fetch Parser 3218 * ME - Micro Engine 3219 * CE - Constant Engine 3220 * The PFP and ME make up what is considered the Drawing Engine (DE). 3221 * The CE is an asynchronous engine used for updating buffer desciptors 3222 * used by the DE so that they can be loaded into cache in parallel 3223 * while the DE is processing state update packets. 3224 * 3225 * Compute 3226 * The compute CP consists of two microengines (ME): 3227 * MEC1 - Compute MicroEngine 1 3228 * MEC2 - Compute MicroEngine 2 3229 * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 3230 * The queues are exposed to userspace and are programmed directly 3231 * by the compute runtime. 3232 */ 3233 /** 3234 * cik_cp_gfx_enable - enable/disable the gfx CP MEs 3235 * 3236 * @rdev: radeon_device pointer 3237 * @enable: enable or disable the MEs 3238 * 3239 * Halts or unhalts the gfx MEs. 3240 */ 3241 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 3242 { 3243 if (enable) 3244 WREG32(CP_ME_CNTL, 0); 3245 else { 3246 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 3247 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3248 } 3249 udelay(50); 3250 } 3251 3252 /** 3253 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 3254 * 3255 * @rdev: radeon_device pointer 3256 * 3257 * Loads the gfx PFP, ME, and CE ucode. 3258 * Returns 0 for success, -EINVAL if the ucode is not available. 3259 */ 3260 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 3261 { 3262 const __be32 *fw_data; 3263 int i; 3264 3265 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 3266 return -EINVAL; 3267 3268 cik_cp_gfx_enable(rdev, false); 3269 3270 /* PFP */ 3271 fw_data = (const __be32 *)rdev->pfp_fw->data; 3272 WREG32(CP_PFP_UCODE_ADDR, 0); 3273 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 3274 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 3275 WREG32(CP_PFP_UCODE_ADDR, 0); 3276 3277 /* CE */ 3278 fw_data = (const __be32 *)rdev->ce_fw->data; 3279 WREG32(CP_CE_UCODE_ADDR, 0); 3280 for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 3281 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 3282 WREG32(CP_CE_UCODE_ADDR, 0); 3283 3284 /* ME */ 3285 fw_data = (const __be32 *)rdev->me_fw->data; 3286 WREG32(CP_ME_RAM_WADDR, 0); 3287 for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 3288 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 3289 WREG32(CP_ME_RAM_WADDR, 0); 3290 3291 WREG32(CP_PFP_UCODE_ADDR, 0); 3292 WREG32(CP_CE_UCODE_ADDR, 0); 3293 WREG32(CP_ME_RAM_WADDR, 0); 3294 WREG32(CP_ME_RAM_RADDR, 0); 3295 return 0; 3296 } 3297 3298 /** 3299 * cik_cp_gfx_start - start the gfx ring 3300 * 3301 * @rdev: radeon_device pointer 3302 * 3303 * Enables the ring and loads the clear state context and other 3304 * packets required to init the ring. 3305 * Returns 0 for success, error for failure. 3306 */ 3307 static int cik_cp_gfx_start(struct radeon_device *rdev) 3308 { 3309 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3310 int r, i; 3311 3312 /* init the CP */ 3313 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 3314 WREG32(CP_ENDIAN_SWAP, 0); 3315 WREG32(CP_DEVICE_ID, 1); 3316 3317 cik_cp_gfx_enable(rdev, true); 3318 3319 r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 3320 if (r) { 3321 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 3322 return r; 3323 } 3324 3325 /* init the CE partitions. CE only used for gfx on CIK */ 3326 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3327 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3328 radeon_ring_write(ring, 0xc000); 3329 radeon_ring_write(ring, 0xc000); 3330 3331 /* setup clear context state */ 3332 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3333 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3334 3335 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3336 radeon_ring_write(ring, 0x80000000); 3337 radeon_ring_write(ring, 0x80000000); 3338 3339 for (i = 0; i < cik_default_size; i++) 3340 radeon_ring_write(ring, cik_default_state[i]); 3341 3342 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3343 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3344 3345 /* set clear context state */ 3346 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3347 radeon_ring_write(ring, 0); 3348 3349 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 3350 radeon_ring_write(ring, 0x00000316); 3351 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 3352 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 3353 3354 radeon_ring_unlock_commit(rdev, ring); 3355 3356 return 0; 3357 } 3358 3359 /** 3360 * cik_cp_gfx_fini - stop the gfx ring 3361 * 3362 * @rdev: radeon_device pointer 3363 * 3364 * Stop the gfx ring and tear down the driver ring 3365 * info. 3366 */ 3367 static void cik_cp_gfx_fini(struct radeon_device *rdev) 3368 { 3369 cik_cp_gfx_enable(rdev, false); 3370 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 3371 } 3372 3373 /** 3374 * cik_cp_gfx_resume - setup the gfx ring buffer registers 3375 * 3376 * @rdev: radeon_device pointer 3377 * 3378 * Program the location and size of the gfx ring buffer 3379 * and test it to make sure it's working. 3380 * Returns 0 for success, error for failure. 3381 */ 3382 static int cik_cp_gfx_resume(struct radeon_device *rdev) 3383 { 3384 struct radeon_ring *ring; 3385 u32 tmp; 3386 u32 rb_bufsz; 3387 u64 rb_addr; 3388 int r; 3389 3390 WREG32(CP_SEM_WAIT_TIMER, 0x0); 3391 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 3392 3393 /* Set the write pointer delay */ 3394 WREG32(CP_RB_WPTR_DELAY, 0); 3395 3396 /* set the RB to use vmid 0 */ 3397 WREG32(CP_RB_VMID, 0); 3398 3399 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 3400 3401 /* ring 0 - compute and gfx */ 3402 /* Set ring buffer size */ 3403 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3404 rb_bufsz = order_base_2(ring->ring_size / 8); 3405 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 3406 #ifdef __BIG_ENDIAN 3407 tmp |= BUF_SWAP_32BIT; 3408 #endif 3409 WREG32(CP_RB0_CNTL, tmp); 3410 3411 /* Initialize the ring buffer's read and write pointers */ 3412 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 3413 ring->wptr = 0; 3414 WREG32(CP_RB0_WPTR, ring->wptr); 3415 3416 /* set the wb address wether it's enabled or not */ 3417 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 3418 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 3419 3420 /* scratch register shadowing is no longer supported */ 3421 WREG32(SCRATCH_UMSK, 0); 3422 3423 if (!rdev->wb.enabled) 3424 tmp |= RB_NO_UPDATE; 3425 3426 mdelay(1); 3427 WREG32(CP_RB0_CNTL, tmp); 3428 3429 rb_addr = ring->gpu_addr >> 8; 3430 WREG32(CP_RB0_BASE, rb_addr); 3431 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3432 3433 ring->rptr = RREG32(CP_RB0_RPTR); 3434 3435 /* start the ring */ 3436 cik_cp_gfx_start(rdev); 3437 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 3438 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 3439 if (r) { 3440 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3441 return r; 3442 } 3443 return 0; 3444 } 3445 3446 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, 3447 struct radeon_ring *ring) 3448 { 3449 u32 rptr; 3450 3451 3452 3453 if (rdev->wb.enabled) { 3454 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); 3455 } else { 3456 spin_lock(&rdev->srbm_mutex); 3457 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 3458 rptr = RREG32(CP_HQD_PQ_RPTR); 3459 cik_srbm_select(rdev, 0, 0, 0, 0); 3460 spin_unlock(&rdev->srbm_mutex); 3461 } 3462 3463 return rptr; 3464 } 3465 3466 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, 3467 struct radeon_ring *ring) 3468 { 3469 u32 wptr; 3470 3471 if (rdev->wb.enabled) { 3472 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); 3473 } else { 3474 spin_lock(&rdev->srbm_mutex); 3475 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 3476 wptr = RREG32(CP_HQD_PQ_WPTR); 3477 cik_srbm_select(rdev, 0, 0, 0, 0); 3478 spin_unlock(&rdev->srbm_mutex); 3479 } 3480 3481 return wptr; 3482 } 3483 3484 void cik_compute_ring_set_wptr(struct radeon_device *rdev, 3485 struct radeon_ring *ring) 3486 { 3487 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); 3488 WDOORBELL32(ring->doorbell_offset, ring->wptr); 3489 } 3490 3491 /** 3492 * cik_cp_compute_enable - enable/disable the compute CP MEs 3493 * 3494 * @rdev: radeon_device pointer 3495 * @enable: enable or disable the MEs 3496 * 3497 * Halts or unhalts the compute MEs. 3498 */ 3499 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 3500 { 3501 if (enable) 3502 WREG32(CP_MEC_CNTL, 0); 3503 else 3504 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 3505 udelay(50); 3506 } 3507 3508 /** 3509 * cik_cp_compute_load_microcode - load the compute CP ME ucode 3510 * 3511 * @rdev: radeon_device pointer 3512 * 3513 * Loads the compute MEC1&2 ucode. 3514 * Returns 0 for success, -EINVAL if the ucode is not available. 3515 */ 3516 static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 3517 { 3518 const __be32 *fw_data; 3519 int i; 3520 3521 if (!rdev->mec_fw) 3522 return -EINVAL; 3523 3524 cik_cp_compute_enable(rdev, false); 3525 3526 /* MEC1 */ 3527 fw_data = (const __be32 *)rdev->mec_fw->data; 3528 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 3529 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 3530 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 3531 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 3532 3533 if (rdev->family == CHIP_KAVERI) { 3534 /* MEC2 */ 3535 fw_data = (const __be32 *)rdev->mec_fw->data; 3536 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 3537 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 3538 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 3539 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 3540 } 3541 3542 return 0; 3543 } 3544 3545 /** 3546 * cik_cp_compute_start - start the compute queues 3547 * 3548 * @rdev: radeon_device pointer 3549 * 3550 * Enable the compute queues. 3551 * Returns 0 for success, error for failure. 3552 */ 3553 static int cik_cp_compute_start(struct radeon_device *rdev) 3554 { 3555 cik_cp_compute_enable(rdev, true); 3556 3557 return 0; 3558 } 3559 3560 /** 3561 * cik_cp_compute_fini - stop the compute queues 3562 * 3563 * @rdev: radeon_device pointer 3564 * 3565 * Stop the compute queues and tear down the driver queue 3566 * info. 3567 */ 3568 static void cik_cp_compute_fini(struct radeon_device *rdev) 3569 { 3570 int i, idx, r; 3571 3572 cik_cp_compute_enable(rdev, false); 3573 3574 for (i = 0; i < 2; i++) { 3575 if (i == 0) 3576 idx = CAYMAN_RING_TYPE_CP1_INDEX; 3577 else 3578 idx = CAYMAN_RING_TYPE_CP2_INDEX; 3579 3580 if (rdev->ring[idx].mqd_obj) { 3581 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 3582 if (unlikely(r != 0)) 3583 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); 3584 3585 radeon_bo_unpin(rdev->ring[idx].mqd_obj); 3586 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3587 3588 radeon_bo_unref(&rdev->ring[idx].mqd_obj); 3589 rdev->ring[idx].mqd_obj = NULL; 3590 } 3591 } 3592 } 3593 3594 static void cik_mec_fini(struct radeon_device *rdev) 3595 { 3596 int r; 3597 3598 if (rdev->mec.hpd_eop_obj) { 3599 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 3600 if (unlikely(r != 0)) 3601 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); 3602 radeon_bo_unpin(rdev->mec.hpd_eop_obj); 3603 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 3604 3605 radeon_bo_unref(&rdev->mec.hpd_eop_obj); 3606 rdev->mec.hpd_eop_obj = NULL; 3607 } 3608 } 3609 3610 #define MEC_HPD_SIZE 2048 3611 3612 static int cik_mec_init(struct radeon_device *rdev) 3613 { 3614 int r; 3615 u32 *hpd; 3616 3617 /* 3618 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 3619 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 3620 */ 3621 if (rdev->family == CHIP_KAVERI) 3622 rdev->mec.num_mec = 2; 3623 else 3624 rdev->mec.num_mec = 1; 3625 rdev->mec.num_pipe = 4; 3626 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; 3627 3628 if (rdev->mec.hpd_eop_obj == NULL) { 3629 r = radeon_bo_create(rdev, 3630 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, 3631 PAGE_SIZE, true, 3632 RADEON_GEM_DOMAIN_GTT, NULL, 3633 &rdev->mec.hpd_eop_obj); 3634 if (r) { 3635 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); 3636 return r; 3637 } 3638 } 3639 3640 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 3641 if (unlikely(r != 0)) { 3642 cik_mec_fini(rdev); 3643 return r; 3644 } 3645 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, 3646 &rdev->mec.hpd_eop_gpu_addr); 3647 if (r) { 3648 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); 3649 cik_mec_fini(rdev); 3650 return r; 3651 } 3652 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); 3653 if (r) { 3654 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); 3655 cik_mec_fini(rdev); 3656 return r; 3657 } 3658 3659 /* clear memory. Not sure if this is required or not */ 3660 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); 3661 3662 radeon_bo_kunmap(rdev->mec.hpd_eop_obj); 3663 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 3664 3665 return 0; 3666 } 3667 3668 struct hqd_registers 3669 { 3670 u32 cp_mqd_base_addr; 3671 u32 cp_mqd_base_addr_hi; 3672 u32 cp_hqd_active; 3673 u32 cp_hqd_vmid; 3674 u32 cp_hqd_persistent_state; 3675 u32 cp_hqd_pipe_priority; 3676 u32 cp_hqd_queue_priority; 3677 u32 cp_hqd_quantum; 3678 u32 cp_hqd_pq_base; 3679 u32 cp_hqd_pq_base_hi; 3680 u32 cp_hqd_pq_rptr; 3681 u32 cp_hqd_pq_rptr_report_addr; 3682 u32 cp_hqd_pq_rptr_report_addr_hi; 3683 u32 cp_hqd_pq_wptr_poll_addr; 3684 u32 cp_hqd_pq_wptr_poll_addr_hi; 3685 u32 cp_hqd_pq_doorbell_control; 3686 u32 cp_hqd_pq_wptr; 3687 u32 cp_hqd_pq_control; 3688 u32 cp_hqd_ib_base_addr; 3689 u32 cp_hqd_ib_base_addr_hi; 3690 u32 cp_hqd_ib_rptr; 3691 u32 cp_hqd_ib_control; 3692 u32 cp_hqd_iq_timer; 3693 u32 cp_hqd_iq_rptr; 3694 u32 cp_hqd_dequeue_request; 3695 u32 cp_hqd_dma_offload; 3696 u32 cp_hqd_sema_cmd; 3697 u32 cp_hqd_msg_type; 3698 u32 cp_hqd_atomic0_preop_lo; 3699 u32 cp_hqd_atomic0_preop_hi; 3700 u32 cp_hqd_atomic1_preop_lo; 3701 u32 cp_hqd_atomic1_preop_hi; 3702 u32 cp_hqd_hq_scheduler0; 3703 u32 cp_hqd_hq_scheduler1; 3704 u32 cp_mqd_control; 3705 }; 3706 3707 struct bonaire_mqd 3708 { 3709 u32 header; 3710 u32 dispatch_initiator; 3711 u32 dimensions[3]; 3712 u32 start_idx[3]; 3713 u32 num_threads[3]; 3714 u32 pipeline_stat_enable; 3715 u32 perf_counter_enable; 3716 u32 pgm[2]; 3717 u32 tba[2]; 3718 u32 tma[2]; 3719 u32 pgm_rsrc[2]; 3720 u32 vmid; 3721 u32 resource_limits; 3722 u32 static_thread_mgmt01[2]; 3723 u32 tmp_ring_size; 3724 u32 static_thread_mgmt23[2]; 3725 u32 restart[3]; 3726 u32 thread_trace_enable; 3727 u32 reserved1; 3728 u32 user_data[16]; 3729 u32 vgtcs_invoke_count[2]; 3730 struct hqd_registers queue_state; 3731 u32 dequeue_cntr; 3732 u32 interrupt_queue[64]; 3733 }; 3734 3735 /** 3736 * cik_cp_compute_resume - setup the compute queue registers 3737 * 3738 * @rdev: radeon_device pointer 3739 * 3740 * Program the compute queues and test them to make sure they 3741 * are working. 3742 * Returns 0 for success, error for failure. 3743 */ 3744 static int cik_cp_compute_resume(struct radeon_device *rdev) 3745 { 3746 int r, i, idx; 3747 u32 tmp; 3748 bool use_doorbell = true; 3749 u64 hqd_gpu_addr; 3750 u64 mqd_gpu_addr; 3751 u64 eop_gpu_addr; 3752 u64 wb_gpu_addr; 3753 u32 *buf; 3754 struct bonaire_mqd *mqd; 3755 3756 r = cik_cp_compute_start(rdev); 3757 if (r) 3758 return r; 3759 3760 /* fix up chicken bits */ 3761 tmp = RREG32(CP_CPF_DEBUG); 3762 tmp |= (1 << 23); 3763 WREG32(CP_CPF_DEBUG, tmp); 3764 3765 /* init the pipes */ 3766 spin_lock(&rdev->srbm_mutex); 3767 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { 3768 int me = (i < 4) ? 1 : 2; 3769 int pipe = (i < 4) ? i : (i - 4); 3770 3771 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); 3772 3773 cik_srbm_select(rdev, me, pipe, 0, 0); 3774 3775 /* write the EOP addr */ 3776 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 3777 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); 3778 3779 /* set the VMID assigned */ 3780 WREG32(CP_HPD_EOP_VMID, 0); 3781 3782 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3783 tmp = RREG32(CP_HPD_EOP_CONTROL); 3784 tmp &= ~EOP_SIZE_MASK; 3785 tmp |= order_base_2(MEC_HPD_SIZE / 8); 3786 WREG32(CP_HPD_EOP_CONTROL, tmp); 3787 } 3788 cik_srbm_select(rdev, 0, 0, 0, 0); 3789 spin_unlock(&rdev->srbm_mutex); 3790 3791 /* init the queues. Just two for now. */ 3792 for (i = 0; i < 2; i++) { 3793 if (i == 0) 3794 idx = CAYMAN_RING_TYPE_CP1_INDEX; 3795 else 3796 idx = CAYMAN_RING_TYPE_CP2_INDEX; 3797 3798 if (rdev->ring[idx].mqd_obj == NULL) { 3799 r = radeon_bo_create(rdev, 3800 sizeof(struct bonaire_mqd), 3801 PAGE_SIZE, true, 3802 RADEON_GEM_DOMAIN_GTT, NULL, 3803 &rdev->ring[idx].mqd_obj); 3804 if (r) { 3805 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); 3806 return r; 3807 } 3808 } 3809 3810 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 3811 if (unlikely(r != 0)) { 3812 cik_cp_compute_fini(rdev); 3813 return r; 3814 } 3815 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, 3816 &mqd_gpu_addr); 3817 if (r) { 3818 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); 3819 cik_cp_compute_fini(rdev); 3820 return r; 3821 } 3822 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); 3823 if (r) { 3824 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); 3825 cik_cp_compute_fini(rdev); 3826 return r; 3827 } 3828 3829 /* doorbell offset */ 3830 rdev->ring[idx].doorbell_offset = 3831 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0; 3832 3833 /* init the mqd struct */ 3834 memset(buf, 0, sizeof(struct bonaire_mqd)); 3835 3836 mqd = (struct bonaire_mqd *)buf; 3837 mqd->header = 0xC0310800; 3838 mqd->static_thread_mgmt01[0] = 0xffffffff; 3839 mqd->static_thread_mgmt01[1] = 0xffffffff; 3840 mqd->static_thread_mgmt23[0] = 0xffffffff; 3841 mqd->static_thread_mgmt23[1] = 0xffffffff; 3842 3843 spin_lock(&rdev->srbm_mutex); 3844 cik_srbm_select(rdev, rdev->ring[idx].me, 3845 rdev->ring[idx].pipe, 3846 rdev->ring[idx].queue, 0); 3847 3848 /* disable wptr polling */ 3849 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 3850 tmp &= ~WPTR_POLL_EN; 3851 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 3852 3853 /* enable doorbell? */ 3854 mqd->queue_state.cp_hqd_pq_doorbell_control = 3855 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3856 if (use_doorbell) 3857 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3858 else 3859 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; 3860 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3861 mqd->queue_state.cp_hqd_pq_doorbell_control); 3862 3863 /* disable the queue if it's active */ 3864 mqd->queue_state.cp_hqd_dequeue_request = 0; 3865 mqd->queue_state.cp_hqd_pq_rptr = 0; 3866 mqd->queue_state.cp_hqd_pq_wptr= 0; 3867 if (RREG32(CP_HQD_ACTIVE) & 1) { 3868 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 3869 for (i = 0; i < rdev->usec_timeout; i++) { 3870 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 3871 break; 3872 udelay(1); 3873 } 3874 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); 3875 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); 3876 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3877 } 3878 3879 /* set the pointer to the MQD */ 3880 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 3881 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3882 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 3883 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 3884 /* set MQD vmid to 0 */ 3885 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); 3886 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; 3887 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 3888 3889 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3890 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; 3891 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 3892 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3893 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 3894 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 3895 3896 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3897 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); 3898 mqd->queue_state.cp_hqd_pq_control &= 3899 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); 3900 3901 mqd->queue_state.cp_hqd_pq_control |= 3902 order_base_2(rdev->ring[idx].ring_size / 8); 3903 mqd->queue_state.cp_hqd_pq_control |= 3904 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8); 3905 #ifdef __BIG_ENDIAN 3906 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; 3907 #endif 3908 mqd->queue_state.cp_hqd_pq_control &= 3909 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); 3910 mqd->queue_state.cp_hqd_pq_control |= 3911 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ 3912 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); 3913 3914 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ 3915 if (i == 0) 3916 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; 3917 else 3918 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; 3919 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 3920 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3921 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); 3922 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, 3923 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); 3924 3925 /* set the wb address wether it's enabled or not */ 3926 if (i == 0) 3927 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; 3928 else 3929 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; 3930 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; 3931 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = 3932 upper_32_bits(wb_gpu_addr) & 0xffff; 3933 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, 3934 mqd->queue_state.cp_hqd_pq_rptr_report_addr); 3935 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3936 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); 3937 3938 /* enable the doorbell if requested */ 3939 if (use_doorbell) { 3940 mqd->queue_state.cp_hqd_pq_doorbell_control = 3941 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3942 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 3943 mqd->queue_state.cp_hqd_pq_doorbell_control |= 3944 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); 3945 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3946 mqd->queue_state.cp_hqd_pq_doorbell_control &= 3947 ~(DOORBELL_SOURCE | DOORBELL_HIT); 3948 3949 } else { 3950 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 3951 } 3952 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3953 mqd->queue_state.cp_hqd_pq_doorbell_control); 3954 3955 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3956 rdev->ring[idx].wptr = 0; 3957 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 3958 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3959 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); 3960 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; 3961 3962 /* set the vmid for the queue */ 3963 mqd->queue_state.cp_hqd_vmid = 0; 3964 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); 3965 3966 /* activate the queue */ 3967 mqd->queue_state.cp_hqd_active = 1; 3968 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 3969 3970 cik_srbm_select(rdev, 0, 0, 0, 0); 3971 spin_unlock(&rdev->srbm_mutex); 3972 3973 radeon_bo_kunmap(rdev->ring[idx].mqd_obj); 3974 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3975 3976 rdev->ring[idx].ready = true; 3977 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); 3978 if (r) 3979 rdev->ring[idx].ready = false; 3980 } 3981 3982 return 0; 3983 } 3984 3985 static void cik_cp_enable(struct radeon_device *rdev, bool enable) 3986 { 3987 cik_cp_gfx_enable(rdev, enable); 3988 cik_cp_compute_enable(rdev, enable); 3989 } 3990 3991 static int cik_cp_load_microcode(struct radeon_device *rdev) 3992 { 3993 int r; 3994 3995 r = cik_cp_gfx_load_microcode(rdev); 3996 if (r) 3997 return r; 3998 r = cik_cp_compute_load_microcode(rdev); 3999 if (r) 4000 return r; 4001 4002 return 0; 4003 } 4004 4005 static void cik_cp_fini(struct radeon_device *rdev) 4006 { 4007 cik_cp_gfx_fini(rdev); 4008 cik_cp_compute_fini(rdev); 4009 } 4010 4011 static int cik_cp_resume(struct radeon_device *rdev) 4012 { 4013 int r; 4014 4015 cik_enable_gui_idle_interrupt(rdev, false); 4016 4017 r = cik_cp_load_microcode(rdev); 4018 if (r) 4019 return r; 4020 4021 r = cik_cp_gfx_resume(rdev); 4022 if (r) 4023 return r; 4024 r = cik_cp_compute_resume(rdev); 4025 if (r) 4026 return r; 4027 4028 cik_enable_gui_idle_interrupt(rdev, true); 4029 4030 return 0; 4031 } 4032 4033 static void cik_print_gpu_status_regs(struct radeon_device *rdev) 4034 { 4035 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 4036 RREG32(GRBM_STATUS)); 4037 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", 4038 RREG32(GRBM_STATUS2)); 4039 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 4040 RREG32(GRBM_STATUS_SE0)); 4041 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", 4042 RREG32(GRBM_STATUS_SE1)); 4043 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", 4044 RREG32(GRBM_STATUS_SE2)); 4045 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", 4046 RREG32(GRBM_STATUS_SE3)); 4047 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 4048 RREG32(SRBM_STATUS)); 4049 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", 4050 RREG32(SRBM_STATUS2)); 4051 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 4052 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 4053 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 4054 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 4055 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); 4056 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 4057 RREG32(CP_STALLED_STAT1)); 4058 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 4059 RREG32(CP_STALLED_STAT2)); 4060 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 4061 RREG32(CP_STALLED_STAT3)); 4062 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 4063 RREG32(CP_CPF_BUSY_STAT)); 4064 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 4065 RREG32(CP_CPF_STALLED_STAT1)); 4066 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); 4067 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); 4068 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 4069 RREG32(CP_CPC_STALLED_STAT1)); 4070 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); 4071 } 4072 4073 /** 4074 * cik_gpu_check_soft_reset - check which blocks are busy 4075 * 4076 * @rdev: radeon_device pointer 4077 * 4078 * Check which blocks are busy and return the relevant reset 4079 * mask to be used by cik_gpu_soft_reset(). 4080 * Returns a mask of the blocks to be reset. 4081 */ 4082 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 4083 { 4084 u32 reset_mask = 0; 4085 u32 tmp; 4086 4087 /* GRBM_STATUS */ 4088 tmp = RREG32(GRBM_STATUS); 4089 if (tmp & (PA_BUSY | SC_BUSY | 4090 BCI_BUSY | SX_BUSY | 4091 TA_BUSY | VGT_BUSY | 4092 DB_BUSY | CB_BUSY | 4093 GDS_BUSY | SPI_BUSY | 4094 IA_BUSY | IA_BUSY_NO_DMA)) 4095 reset_mask |= RADEON_RESET_GFX; 4096 4097 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) 4098 reset_mask |= RADEON_RESET_CP; 4099 4100 /* GRBM_STATUS2 */ 4101 tmp = RREG32(GRBM_STATUS2); 4102 if (tmp & RLC_BUSY) 4103 reset_mask |= RADEON_RESET_RLC; 4104 4105 /* SDMA0_STATUS_REG */ 4106 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 4107 if (!(tmp & SDMA_IDLE)) 4108 reset_mask |= RADEON_RESET_DMA; 4109 4110 /* SDMA1_STATUS_REG */ 4111 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 4112 if (!(tmp & SDMA_IDLE)) 4113 reset_mask |= RADEON_RESET_DMA1; 4114 4115 /* SRBM_STATUS2 */ 4116 tmp = RREG32(SRBM_STATUS2); 4117 if (tmp & SDMA_BUSY) 4118 reset_mask |= RADEON_RESET_DMA; 4119 4120 if (tmp & SDMA1_BUSY) 4121 reset_mask |= RADEON_RESET_DMA1; 4122 4123 /* SRBM_STATUS */ 4124 tmp = RREG32(SRBM_STATUS); 4125 4126 if (tmp & IH_BUSY) 4127 reset_mask |= RADEON_RESET_IH; 4128 4129 if (tmp & SEM_BUSY) 4130 reset_mask |= RADEON_RESET_SEM; 4131 4132 if (tmp & GRBM_RQ_PENDING) 4133 reset_mask |= RADEON_RESET_GRBM; 4134 4135 if (tmp & VMC_BUSY) 4136 reset_mask |= RADEON_RESET_VMC; 4137 4138 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | 4139 MCC_BUSY | MCD_BUSY)) 4140 reset_mask |= RADEON_RESET_MC; 4141 4142 if (evergreen_is_display_hung(rdev)) 4143 reset_mask |= RADEON_RESET_DISPLAY; 4144 4145 /* Skip MC reset as it's mostly likely not hung, just busy */ 4146 if (reset_mask & RADEON_RESET_MC) { 4147 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); 4148 reset_mask &= ~RADEON_RESET_MC; 4149 } 4150 4151 return reset_mask; 4152 } 4153 4154 /** 4155 * cik_gpu_soft_reset - soft reset GPU 4156 * 4157 * @rdev: radeon_device pointer 4158 * @reset_mask: mask of which blocks to reset 4159 * 4160 * Soft reset the blocks specified in @reset_mask. 4161 */ 4162 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) 4163 { 4164 struct evergreen_mc_save save; 4165 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4166 u32 tmp; 4167 4168 if (reset_mask == 0) 4169 return; 4170 4171 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); 4172 4173 cik_print_gpu_status_regs(rdev); 4174 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 4175 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); 4176 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 4177 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); 4178 4179 /* disable CG/PG */ 4180 cik_fini_pg(rdev); 4181 cik_fini_cg(rdev); 4182 4183 /* stop the rlc */ 4184 cik_rlc_stop(rdev); 4185 4186 /* Disable GFX parsing/prefetching */ 4187 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 4188 4189 /* Disable MEC parsing/prefetching */ 4190 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 4191 4192 if (reset_mask & RADEON_RESET_DMA) { 4193 /* sdma0 */ 4194 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 4195 tmp |= SDMA_HALT; 4196 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 4197 } 4198 if (reset_mask & RADEON_RESET_DMA1) { 4199 /* sdma1 */ 4200 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 4201 tmp |= SDMA_HALT; 4202 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 4203 } 4204 4205 evergreen_mc_stop(rdev, &save); 4206 if (evergreen_mc_wait_for_idle(rdev)) { 4207 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4208 } 4209 4210 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) 4211 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; 4212 4213 if (reset_mask & RADEON_RESET_CP) { 4214 grbm_soft_reset |= SOFT_RESET_CP; 4215 4216 srbm_soft_reset |= SOFT_RESET_GRBM; 4217 } 4218 4219 if (reset_mask & RADEON_RESET_DMA) 4220 srbm_soft_reset |= SOFT_RESET_SDMA; 4221 4222 if (reset_mask & RADEON_RESET_DMA1) 4223 srbm_soft_reset |= SOFT_RESET_SDMA1; 4224 4225 if (reset_mask & RADEON_RESET_DISPLAY) 4226 srbm_soft_reset |= SOFT_RESET_DC; 4227 4228 if (reset_mask & RADEON_RESET_RLC) 4229 grbm_soft_reset |= SOFT_RESET_RLC; 4230 4231 if (reset_mask & RADEON_RESET_SEM) 4232 srbm_soft_reset |= SOFT_RESET_SEM; 4233 4234 if (reset_mask & RADEON_RESET_IH) 4235 srbm_soft_reset |= SOFT_RESET_IH; 4236 4237 if (reset_mask & RADEON_RESET_GRBM) 4238 srbm_soft_reset |= SOFT_RESET_GRBM; 4239 4240 if (reset_mask & RADEON_RESET_VMC) 4241 srbm_soft_reset |= SOFT_RESET_VMC; 4242 4243 if (!(rdev->flags & RADEON_IS_IGP)) { 4244 if (reset_mask & RADEON_RESET_MC) 4245 srbm_soft_reset |= SOFT_RESET_MC; 4246 } 4247 4248 if (grbm_soft_reset) { 4249 tmp = RREG32(GRBM_SOFT_RESET); 4250 tmp |= grbm_soft_reset; 4251 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4252 WREG32(GRBM_SOFT_RESET, tmp); 4253 tmp = RREG32(GRBM_SOFT_RESET); 4254 4255 udelay(50); 4256 4257 tmp &= ~grbm_soft_reset; 4258 WREG32(GRBM_SOFT_RESET, tmp); 4259 tmp = RREG32(GRBM_SOFT_RESET); 4260 } 4261 4262 if (srbm_soft_reset) { 4263 tmp = RREG32(SRBM_SOFT_RESET); 4264 tmp |= srbm_soft_reset; 4265 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 4266 WREG32(SRBM_SOFT_RESET, tmp); 4267 tmp = RREG32(SRBM_SOFT_RESET); 4268 4269 udelay(50); 4270 4271 tmp &= ~srbm_soft_reset; 4272 WREG32(SRBM_SOFT_RESET, tmp); 4273 tmp = RREG32(SRBM_SOFT_RESET); 4274 } 4275 4276 /* Wait a little for things to settle down */ 4277 udelay(50); 4278 4279 evergreen_mc_resume(rdev, &save); 4280 udelay(50); 4281 4282 cik_print_gpu_status_regs(rdev); 4283 } 4284 4285 /** 4286 * cik_asic_reset - soft reset GPU 4287 * 4288 * @rdev: radeon_device pointer 4289 * 4290 * Look up which blocks are hung and attempt 4291 * to reset them. 4292 * Returns 0 for success. 4293 */ 4294 int cik_asic_reset(struct radeon_device *rdev) 4295 { 4296 u32 reset_mask; 4297 4298 reset_mask = cik_gpu_check_soft_reset(rdev); 4299 4300 if (reset_mask) 4301 r600_set_bios_scratch_engine_hung(rdev, true); 4302 4303 cik_gpu_soft_reset(rdev, reset_mask); 4304 4305 reset_mask = cik_gpu_check_soft_reset(rdev); 4306 4307 if (!reset_mask) 4308 r600_set_bios_scratch_engine_hung(rdev, false); 4309 4310 return 0; 4311 } 4312 4313 /** 4314 * cik_gfx_is_lockup - check if the 3D engine is locked up 4315 * 4316 * @rdev: radeon_device pointer 4317 * @ring: radeon_ring structure holding ring information 4318 * 4319 * Check if the 3D engine is locked up (CIK). 4320 * Returns true if the engine is locked, false if not. 4321 */ 4322 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 4323 { 4324 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 4325 4326 if (!(reset_mask & (RADEON_RESET_GFX | 4327 RADEON_RESET_COMPUTE | 4328 RADEON_RESET_CP))) { 4329 radeon_ring_lockup_update(ring); 4330 return false; 4331 } 4332 /* force CP activities */ 4333 radeon_ring_force_activity(rdev, ring); 4334 return radeon_ring_test_lockup(rdev, ring); 4335 } 4336 4337 /* MC */ 4338 /** 4339 * cik_mc_program - program the GPU memory controller 4340 * 4341 * @rdev: radeon_device pointer 4342 * 4343 * Set the location of vram, gart, and AGP in the GPU's 4344 * physical address space (CIK). 4345 */ 4346 static void cik_mc_program(struct radeon_device *rdev) 4347 { 4348 struct evergreen_mc_save save; 4349 u32 tmp; 4350 int i, j; 4351 4352 /* Initialize HDP */ 4353 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 4354 WREG32((0x2c14 + j), 0x00000000); 4355 WREG32((0x2c18 + j), 0x00000000); 4356 WREG32((0x2c1c + j), 0x00000000); 4357 WREG32((0x2c20 + j), 0x00000000); 4358 WREG32((0x2c24 + j), 0x00000000); 4359 } 4360 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); 4361 4362 evergreen_mc_stop(rdev, &save); 4363 if (radeon_mc_wait_for_idle(rdev)) { 4364 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4365 } 4366 /* Lockout access through VGA aperture*/ 4367 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 4368 /* Update configuration */ 4369 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 4370 rdev->mc.vram_start >> 12); 4371 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 4372 rdev->mc.vram_end >> 12); 4373 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 4374 rdev->vram_scratch.gpu_addr >> 12); 4375 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 4376 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 4377 WREG32(MC_VM_FB_LOCATION, tmp); 4378 /* XXX double check these! */ 4379 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 4380 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 4381 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 4382 WREG32(MC_VM_AGP_BASE, 0); 4383 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 4384 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 4385 if (radeon_mc_wait_for_idle(rdev)) { 4386 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4387 } 4388 evergreen_mc_resume(rdev, &save); 4389 /* we need to own VRAM, so turn off the VGA renderer here 4390 * to stop it overwriting our objects */ 4391 rv515_vga_render_disable(rdev); 4392 } 4393 4394 /** 4395 * cik_mc_init - initialize the memory controller driver params 4396 * 4397 * @rdev: radeon_device pointer 4398 * 4399 * Look up the amount of vram, vram width, and decide how to place 4400 * vram and gart within the GPU's physical address space (CIK). 4401 * Returns 0 for success. 4402 */ 4403 static int cik_mc_init(struct radeon_device *rdev) 4404 { 4405 u32 tmp; 4406 int chansize, numchan; 4407 4408 /* Get VRAM informations */ 4409 rdev->mc.vram_is_ddr = true; 4410 tmp = RREG32(MC_ARB_RAMCFG); 4411 if (tmp & CHANSIZE_MASK) { 4412 chansize = 64; 4413 } else { 4414 chansize = 32; 4415 } 4416 tmp = RREG32(MC_SHARED_CHMAP); 4417 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 4418 case 0: 4419 default: 4420 numchan = 1; 4421 break; 4422 case 1: 4423 numchan = 2; 4424 break; 4425 case 2: 4426 numchan = 4; 4427 break; 4428 case 3: 4429 numchan = 8; 4430 break; 4431 case 4: 4432 numchan = 3; 4433 break; 4434 case 5: 4435 numchan = 6; 4436 break; 4437 case 6: 4438 numchan = 10; 4439 break; 4440 case 7: 4441 numchan = 12; 4442 break; 4443 case 8: 4444 numchan = 16; 4445 break; 4446 } 4447 rdev->mc.vram_width = numchan * chansize; 4448 /* Could aper size report 0 ? */ 4449 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); 4450 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); 4451 /* size in MB on si */ 4452 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 4453 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 4454 rdev->mc.visible_vram_size = rdev->mc.aper_size; 4455 si_vram_gtt_location(rdev, &rdev->mc); 4456 radeon_update_bandwidth_info(rdev); 4457 4458 return 0; 4459 } 4460 4461 /* 4462 * GART 4463 * VMID 0 is the physical GPU addresses as used by the kernel. 4464 * VMIDs 1-15 are used for userspace clients and are handled 4465 * by the radeon vm/hsa code. 4466 */ 4467 /** 4468 * cik_pcie_gart_tlb_flush - gart tlb flush callback 4469 * 4470 * @rdev: radeon_device pointer 4471 * 4472 * Flush the TLB for the VMID 0 page table (CIK). 4473 */ 4474 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) 4475 { 4476 /* flush hdp cache */ 4477 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 4478 4479 /* bits 0-15 are the VM contexts0-15 */ 4480 WREG32(VM_INVALIDATE_REQUEST, 0x1); 4481 } 4482 4483 /** 4484 * cik_pcie_gart_enable - gart enable 4485 * 4486 * @rdev: radeon_device pointer 4487 * 4488 * This sets up the TLBs, programs the page tables for VMID0, 4489 * sets up the hw for VMIDs 1-15 which are allocated on 4490 * demand, and sets up the global locations for the LDS, GDS, 4491 * and GPUVM for FSA64 clients (CIK). 4492 * Returns 0 for success, errors for failure. 4493 */ 4494 static int cik_pcie_gart_enable(struct radeon_device *rdev) 4495 { 4496 int r, i; 4497 4498 if (rdev->gart.robj == NULL) { 4499 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 4500 return -EINVAL; 4501 } 4502 r = radeon_gart_table_vram_pin(rdev); 4503 if (r) 4504 return r; 4505 radeon_gart_restore(rdev); 4506 /* Setup TLB control */ 4507 WREG32(MC_VM_MX_L1_TLB_CNTL, 4508 (0xA << 7) | 4509 ENABLE_L1_TLB | 4510 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4511 ENABLE_ADVANCED_DRIVER_MODEL | 4512 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4513 /* Setup L2 cache */ 4514 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 4515 ENABLE_L2_FRAGMENT_PROCESSING | 4516 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4517 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4518 EFFECTIVE_L2_QUEUE_SIZE(7) | 4519 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4520 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 4521 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4522 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4523 /* setup context0 */ 4524 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 4525 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 4526 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 4527 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 4528 (u32)(rdev->dummy_page.addr >> 12)); 4529 WREG32(VM_CONTEXT0_CNTL2, 0); 4530 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 4531 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); 4532 4533 WREG32(0x15D4, 0); 4534 WREG32(0x15D8, 0); 4535 WREG32(0x15DC, 0); 4536 4537 /* empty context1-15 */ 4538 /* FIXME start with 4G, once using 2 level pt switch to full 4539 * vm size space 4540 */ 4541 /* set vm size, must be a multiple of 4 */ 4542 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); 4543 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); 4544 for (i = 1; i < 16; i++) { 4545 if (i < 8) 4546 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), 4547 rdev->gart.table_addr >> 12); 4548 else 4549 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), 4550 rdev->gart.table_addr >> 12); 4551 } 4552 4553 /* enable context1-15 */ 4554 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 4555 (u32)(rdev->dummy_page.addr >> 12)); 4556 WREG32(VM_CONTEXT1_CNTL2, 4); 4557 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 4558 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4559 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4560 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4561 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4562 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | 4563 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | 4564 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | 4565 VALID_PROTECTION_FAULT_ENABLE_DEFAULT | 4566 READ_PROTECTION_FAULT_ENABLE_INTERRUPT | 4567 READ_PROTECTION_FAULT_ENABLE_DEFAULT | 4568 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4569 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); 4570 4571 /* TC cache setup ??? */ 4572 WREG32(TC_CFG_L1_LOAD_POLICY0, 0); 4573 WREG32(TC_CFG_L1_LOAD_POLICY1, 0); 4574 WREG32(TC_CFG_L1_STORE_POLICY, 0); 4575 4576 WREG32(TC_CFG_L2_LOAD_POLICY0, 0); 4577 WREG32(TC_CFG_L2_LOAD_POLICY1, 0); 4578 WREG32(TC_CFG_L2_STORE_POLICY0, 0); 4579 WREG32(TC_CFG_L2_STORE_POLICY1, 0); 4580 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0); 4581 4582 WREG32(TC_CFG_L1_VOLATILE, 0); 4583 WREG32(TC_CFG_L2_VOLATILE, 0); 4584 4585 if (rdev->family == CHIP_KAVERI) { 4586 u32 tmp = RREG32(CHUB_CONTROL); 4587 tmp &= ~BYPASS_VM; 4588 WREG32(CHUB_CONTROL, tmp); 4589 } 4590 4591 /* XXX SH_MEM regs */ 4592 /* where to put LDS, scratch, GPUVM in FSA64 space */ 4593 spin_lock(&rdev->srbm_mutex); 4594 for (i = 0; i < 16; i++) { 4595 cik_srbm_select(rdev, 0, 0, 0, i); 4596 /* CP and shaders */ 4597 WREG32(SH_MEM_CONFIG, 0); 4598 WREG32(SH_MEM_APE1_BASE, 1); 4599 WREG32(SH_MEM_APE1_LIMIT, 0); 4600 WREG32(SH_MEM_BASES, 0); 4601 /* SDMA GFX */ 4602 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 4603 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 4604 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 4605 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 4606 /* XXX SDMA RLC - todo */ 4607 } 4608 cik_srbm_select(rdev, 0, 0, 0, 0); 4609 spin_unlock(&rdev->srbm_mutex); 4610 4611 cik_pcie_gart_tlb_flush(rdev); 4612 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 4613 (unsigned)(rdev->mc.gtt_size >> 20), 4614 (unsigned long long)rdev->gart.table_addr); 4615 rdev->gart.ready = true; 4616 return 0; 4617 } 4618 4619 /** 4620 * cik_pcie_gart_disable - gart disable 4621 * 4622 * @rdev: radeon_device pointer 4623 * 4624 * This disables all VM page table (CIK). 4625 */ 4626 static void cik_pcie_gart_disable(struct radeon_device *rdev) 4627 { 4628 /* Disable all tables */ 4629 WREG32(VM_CONTEXT0_CNTL, 0); 4630 WREG32(VM_CONTEXT1_CNTL, 0); 4631 /* Setup TLB control */ 4632 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4633 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4634 /* Setup L2 cache */ 4635 WREG32(VM_L2_CNTL, 4636 ENABLE_L2_FRAGMENT_PROCESSING | 4637 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4638 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4639 EFFECTIVE_L2_QUEUE_SIZE(7) | 4640 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4641 WREG32(VM_L2_CNTL2, 0); 4642 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4643 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4644 radeon_gart_table_vram_unpin(rdev); 4645 } 4646 4647 /** 4648 * cik_pcie_gart_fini - vm fini callback 4649 * 4650 * @rdev: radeon_device pointer 4651 * 4652 * Tears down the driver GART/VM setup (CIK). 4653 */ 4654 static void cik_pcie_gart_fini(struct radeon_device *rdev) 4655 { 4656 cik_pcie_gart_disable(rdev); 4657 radeon_gart_table_vram_free(rdev); 4658 radeon_gart_fini(rdev); 4659 } 4660 4661 /* vm parser */ 4662 /** 4663 * cik_ib_parse - vm ib_parse callback 4664 * 4665 * @rdev: radeon_device pointer 4666 * @ib: indirect buffer pointer 4667 * 4668 * CIK uses hw IB checking so this is a nop (CIK). 4669 */ 4670 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 4671 { 4672 return 0; 4673 } 4674 4675 /* 4676 * vm 4677 * VMID 0 is the physical GPU addresses as used by the kernel. 4678 * VMIDs 1-15 are used for userspace clients and are handled 4679 * by the radeon vm/hsa code. 4680 */ 4681 /** 4682 * cik_vm_init - cik vm init callback 4683 * 4684 * @rdev: radeon_device pointer 4685 * 4686 * Inits cik specific vm parameters (number of VMs, base of vram for 4687 * VMIDs 1-15) (CIK). 4688 * Returns 0 for success. 4689 */ 4690 int cik_vm_init(struct radeon_device *rdev) 4691 { 4692 /* number of VMs */ 4693 rdev->vm_manager.nvm = 16; 4694 /* base offset of vram pages */ 4695 if (rdev->flags & RADEON_IS_IGP) { 4696 u64 tmp = RREG32(MC_VM_FB_OFFSET); 4697 tmp <<= 22; 4698 rdev->vm_manager.vram_base_offset = tmp; 4699 } else 4700 rdev->vm_manager.vram_base_offset = 0; 4701 4702 return 0; 4703 } 4704 4705 /** 4706 * cik_vm_fini - cik vm fini callback 4707 * 4708 * @rdev: radeon_device pointer 4709 * 4710 * Tear down any asic specific VM setup (CIK). 4711 */ 4712 void cik_vm_fini(struct radeon_device *rdev) 4713 { 4714 } 4715 4716 /** 4717 * cik_vm_decode_fault - print human readable fault info 4718 * 4719 * @rdev: radeon_device pointer 4720 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 4721 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 4722 * 4723 * Print human readable fault information (CIK). 4724 */ 4725 static void cik_vm_decode_fault(struct radeon_device *rdev, 4726 u32 status, u32 addr, u32 mc_client) 4727 { 4728 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 4729 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; 4730 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; 4731 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, 4732 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; 4733 4734 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", 4735 protections, vmid, addr, 4736 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", 4737 block, mc_client, mc_id); 4738 } 4739 4740 /** 4741 * cik_vm_flush - cik vm flush using the CP 4742 * 4743 * @rdev: radeon_device pointer 4744 * 4745 * Update the page table base and flush the VM TLB 4746 * using the CP (CIK). 4747 */ 4748 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 4749 { 4750 struct radeon_ring *ring = &rdev->ring[ridx]; 4751 4752 if (vm == NULL) 4753 return; 4754 4755 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4756 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4757 WRITE_DATA_DST_SEL(0))); 4758 if (vm->id < 8) { 4759 radeon_ring_write(ring, 4760 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 4761 } else { 4762 radeon_ring_write(ring, 4763 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 4764 } 4765 radeon_ring_write(ring, 0); 4766 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 4767 4768 /* update SH_MEM_* regs */ 4769 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4770 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4771 WRITE_DATA_DST_SEL(0))); 4772 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4773 radeon_ring_write(ring, 0); 4774 radeon_ring_write(ring, VMID(vm->id)); 4775 4776 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); 4777 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4778 WRITE_DATA_DST_SEL(0))); 4779 radeon_ring_write(ring, SH_MEM_BASES >> 2); 4780 radeon_ring_write(ring, 0); 4781 4782 radeon_ring_write(ring, 0); /* SH_MEM_BASES */ 4783 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ 4784 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ 4785 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ 4786 4787 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4788 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4789 WRITE_DATA_DST_SEL(0))); 4790 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4791 radeon_ring_write(ring, 0); 4792 radeon_ring_write(ring, VMID(0)); 4793 4794 /* HDP flush */ 4795 /* We should be using the WAIT_REG_MEM packet here like in 4796 * cik_fence_ring_emit(), but it causes the CP to hang in this 4797 * context... 4798 */ 4799 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4800 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4801 WRITE_DATA_DST_SEL(0))); 4802 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 4803 radeon_ring_write(ring, 0); 4804 radeon_ring_write(ring, 0); 4805 4806 /* bits 0-15 are the VM contexts0-15 */ 4807 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4808 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4809 WRITE_DATA_DST_SEL(0))); 4810 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 4811 radeon_ring_write(ring, 0); 4812 radeon_ring_write(ring, 1 << vm->id); 4813 4814 /* compute doesn't have PFP */ 4815 if (ridx == RADEON_RING_TYPE_GFX_INDEX) { 4816 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4817 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4818 radeon_ring_write(ring, 0x0); 4819 } 4820 } 4821 4822 /** 4823 * cik_vm_set_page - update the page tables using sDMA 4824 * 4825 * @rdev: radeon_device pointer 4826 * @ib: indirect buffer to fill with commands 4827 * @pe: addr of the page entry 4828 * @addr: dst addr to write into pe 4829 * @count: number of page entries to update 4830 * @incr: increase next addr by incr bytes 4831 * @flags: access flags 4832 * 4833 * Update the page tables using CP or sDMA (CIK). 4834 */ 4835 void cik_vm_set_page(struct radeon_device *rdev, 4836 struct radeon_ib *ib, 4837 uint64_t pe, 4838 uint64_t addr, unsigned count, 4839 uint32_t incr, uint32_t flags) 4840 { 4841 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 4842 uint64_t value; 4843 unsigned ndw; 4844 4845 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { 4846 /* CP */ 4847 while (count) { 4848 ndw = 2 + count * 2; 4849 if (ndw > 0x3FFE) 4850 ndw = 0x3FFE; 4851 4852 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); 4853 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | 4854 WRITE_DATA_DST_SEL(1)); 4855 ib->ptr[ib->length_dw++] = pe; 4856 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4857 for (; ndw > 2; ndw -= 2, --count, pe += 8) { 4858 if (flags & RADEON_VM_PAGE_SYSTEM) { 4859 value = radeon_vm_map_gart(rdev, addr); 4860 value &= 0xFFFFFFFFFFFFF000ULL; 4861 } else if (flags & RADEON_VM_PAGE_VALID) { 4862 value = addr; 4863 } else { 4864 value = 0; 4865 } 4866 addr += incr; 4867 value |= r600_flags; 4868 ib->ptr[ib->length_dw++] = value; 4869 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4870 } 4871 } 4872 } else { 4873 /* DMA */ 4874 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags); 4875 } 4876 } 4877 4878 /* 4879 * RLC 4880 * The RLC is a multi-purpose microengine that handles a 4881 * variety of functions, the most important of which is 4882 * the interrupt controller. 4883 */ 4884 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 4885 bool enable) 4886 { 4887 u32 tmp = RREG32(CP_INT_CNTL_RING0); 4888 4889 if (enable) 4890 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4891 else 4892 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4893 WREG32(CP_INT_CNTL_RING0, tmp); 4894 } 4895 4896 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable) 4897 { 4898 u32 tmp; 4899 4900 tmp = RREG32(RLC_LB_CNTL); 4901 if (enable) 4902 tmp |= LOAD_BALANCE_ENABLE; 4903 else 4904 tmp &= ~LOAD_BALANCE_ENABLE; 4905 WREG32(RLC_LB_CNTL, tmp); 4906 } 4907 4908 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) 4909 { 4910 u32 i, j, k; 4911 u32 mask; 4912 4913 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 4914 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 4915 cik_select_se_sh(rdev, i, j); 4916 for (k = 0; k < rdev->usec_timeout; k++) { 4917 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) 4918 break; 4919 udelay(1); 4920 } 4921 } 4922 } 4923 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 4924 4925 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; 4926 for (k = 0; k < rdev->usec_timeout; k++) { 4927 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 4928 break; 4929 udelay(1); 4930 } 4931 } 4932 4933 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc) 4934 { 4935 u32 tmp; 4936 4937 tmp = RREG32(RLC_CNTL); 4938 if (tmp != rlc) 4939 WREG32(RLC_CNTL, rlc); 4940 } 4941 4942 static u32 cik_halt_rlc(struct radeon_device *rdev) 4943 { 4944 u32 data, orig; 4945 4946 orig = data = RREG32(RLC_CNTL); 4947 4948 if (data & RLC_ENABLE) { 4949 u32 i; 4950 4951 data &= ~RLC_ENABLE; 4952 WREG32(RLC_CNTL, data); 4953 4954 for (i = 0; i < rdev->usec_timeout; i++) { 4955 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0) 4956 break; 4957 udelay(1); 4958 } 4959 4960 cik_wait_for_rlc_serdes(rdev); 4961 } 4962 4963 return orig; 4964 } 4965 4966 void cik_enter_rlc_safe_mode(struct radeon_device *rdev) 4967 { 4968 u32 tmp, i, mask; 4969 4970 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE); 4971 WREG32(RLC_GPR_REG2, tmp); 4972 4973 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS; 4974 for (i = 0; i < rdev->usec_timeout; i++) { 4975 if ((RREG32(RLC_GPM_STAT) & mask) == mask) 4976 break; 4977 udelay(1); 4978 } 4979 4980 for (i = 0; i < rdev->usec_timeout; i++) { 4981 if ((RREG32(RLC_GPR_REG2) & REQ) == 0) 4982 break; 4983 udelay(1); 4984 } 4985 } 4986 4987 void cik_exit_rlc_safe_mode(struct radeon_device *rdev) 4988 { 4989 u32 tmp; 4990 4991 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE); 4992 WREG32(RLC_GPR_REG2, tmp); 4993 } 4994 4995 /** 4996 * cik_rlc_stop - stop the RLC ME 4997 * 4998 * @rdev: radeon_device pointer 4999 * 5000 * Halt the RLC ME (MicroEngine) (CIK). 5001 */ 5002 static void cik_rlc_stop(struct radeon_device *rdev) 5003 { 5004 WREG32(RLC_CNTL, 0); 5005 5006 cik_enable_gui_idle_interrupt(rdev, false); 5007 5008 cik_wait_for_rlc_serdes(rdev); 5009 } 5010 5011 /** 5012 * cik_rlc_start - start the RLC ME 5013 * 5014 * @rdev: radeon_device pointer 5015 * 5016 * Unhalt the RLC ME (MicroEngine) (CIK). 5017 */ 5018 static void cik_rlc_start(struct radeon_device *rdev) 5019 { 5020 WREG32(RLC_CNTL, RLC_ENABLE); 5021 5022 cik_enable_gui_idle_interrupt(rdev, true); 5023 5024 udelay(50); 5025 } 5026 5027 /** 5028 * cik_rlc_resume - setup the RLC hw 5029 * 5030 * @rdev: radeon_device pointer 5031 * 5032 * Initialize the RLC registers, load the ucode, 5033 * and start the RLC (CIK). 5034 * Returns 0 for success, -EINVAL if the ucode is not available. 5035 */ 5036 static int cik_rlc_resume(struct radeon_device *rdev) 5037 { 5038 u32 i, size, tmp; 5039 const __be32 *fw_data; 5040 5041 if (!rdev->rlc_fw) 5042 return -EINVAL; 5043 5044 switch (rdev->family) { 5045 case CHIP_BONAIRE: 5046 default: 5047 size = BONAIRE_RLC_UCODE_SIZE; 5048 break; 5049 case CHIP_KAVERI: 5050 size = KV_RLC_UCODE_SIZE; 5051 break; 5052 case CHIP_KABINI: 5053 size = KB_RLC_UCODE_SIZE; 5054 break; 5055 } 5056 5057 cik_rlc_stop(rdev); 5058 5059 /* disable CG */ 5060 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; 5061 WREG32(RLC_CGCG_CGLS_CTRL, tmp); 5062 5063 si_rlc_reset(rdev); 5064 5065 cik_init_pg(rdev); 5066 5067 cik_init_cg(rdev); 5068 5069 WREG32(RLC_LB_CNTR_INIT, 0); 5070 WREG32(RLC_LB_CNTR_MAX, 0x00008000); 5071 5072 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5073 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); 5074 WREG32(RLC_LB_PARAMS, 0x00600408); 5075 WREG32(RLC_LB_CNTL, 0x80000004); 5076 5077 WREG32(RLC_MC_CNTL, 0); 5078 WREG32(RLC_UCODE_CNTL, 0); 5079 5080 fw_data = (const __be32 *)rdev->rlc_fw->data; 5081 WREG32(RLC_GPM_UCODE_ADDR, 0); 5082 for (i = 0; i < size; i++) 5083 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); 5084 WREG32(RLC_GPM_UCODE_ADDR, 0); 5085 5086 /* XXX - find out what chips support lbpw */ 5087 cik_enable_lbpw(rdev, false); 5088 5089 if (rdev->family == CHIP_BONAIRE) 5090 WREG32(RLC_DRIVER_DMA_STATUS, 0); 5091 5092 cik_rlc_start(rdev); 5093 5094 return 0; 5095 } 5096 5097 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) 5098 { 5099 u32 data, orig, tmp, tmp2; 5100 5101 orig = data = RREG32(RLC_CGCG_CGLS_CTRL); 5102 5103 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { 5104 cik_enable_gui_idle_interrupt(rdev, true); 5105 5106 tmp = cik_halt_rlc(rdev); 5107 5108 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5109 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5110 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5111 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; 5112 WREG32(RLC_SERDES_WR_CTRL, tmp2); 5113 5114 cik_update_rlc(rdev, tmp); 5115 5116 data |= CGCG_EN | CGLS_EN; 5117 } else { 5118 cik_enable_gui_idle_interrupt(rdev, false); 5119 5120 RREG32(CB_CGTT_SCLK_CTRL); 5121 RREG32(CB_CGTT_SCLK_CTRL); 5122 RREG32(CB_CGTT_SCLK_CTRL); 5123 RREG32(CB_CGTT_SCLK_CTRL); 5124 5125 data &= ~(CGCG_EN | CGLS_EN); 5126 } 5127 5128 if (orig != data) 5129 WREG32(RLC_CGCG_CGLS_CTRL, data); 5130 5131 } 5132 5133 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) 5134 { 5135 u32 data, orig, tmp = 0; 5136 5137 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { 5138 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) { 5139 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { 5140 orig = data = RREG32(CP_MEM_SLP_CNTL); 5141 data |= CP_MEM_LS_EN; 5142 if (orig != data) 5143 WREG32(CP_MEM_SLP_CNTL, data); 5144 } 5145 } 5146 5147 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5148 data &= 0xfffffffd; 5149 if (orig != data) 5150 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5151 5152 tmp = cik_halt_rlc(rdev); 5153 5154 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5155 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5156 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5157 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; 5158 WREG32(RLC_SERDES_WR_CTRL, data); 5159 5160 cik_update_rlc(rdev, tmp); 5161 5162 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) { 5163 orig = data = RREG32(CGTS_SM_CTRL_REG); 5164 data &= ~SM_MODE_MASK; 5165 data |= SM_MODE(0x2); 5166 data |= SM_MODE_ENABLE; 5167 data &= ~CGTS_OVERRIDE; 5168 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) && 5169 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS)) 5170 data &= ~CGTS_LS_OVERRIDE; 5171 data &= ~ON_MONITOR_ADD_MASK; 5172 data |= ON_MONITOR_ADD_EN; 5173 data |= ON_MONITOR_ADD(0x96); 5174 if (orig != data) 5175 WREG32(CGTS_SM_CTRL_REG, data); 5176 } 5177 } else { 5178 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5179 data |= 0x00000002; 5180 if (orig != data) 5181 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5182 5183 data = RREG32(RLC_MEM_SLP_CNTL); 5184 if (data & RLC_MEM_LS_EN) { 5185 data &= ~RLC_MEM_LS_EN; 5186 WREG32(RLC_MEM_SLP_CNTL, data); 5187 } 5188 5189 data = RREG32(CP_MEM_SLP_CNTL); 5190 if (data & CP_MEM_LS_EN) { 5191 data &= ~CP_MEM_LS_EN; 5192 WREG32(CP_MEM_SLP_CNTL, data); 5193 } 5194 5195 orig = data = RREG32(CGTS_SM_CTRL_REG); 5196 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE; 5197 if (orig != data) 5198 WREG32(CGTS_SM_CTRL_REG, data); 5199 5200 tmp = cik_halt_rlc(rdev); 5201 5202 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5203 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5204 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5205 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; 5206 WREG32(RLC_SERDES_WR_CTRL, data); 5207 5208 cik_update_rlc(rdev, tmp); 5209 } 5210 } 5211 5212 static const u32 mc_cg_registers[] = 5213 { 5214 MC_HUB_MISC_HUB_CG, 5215 MC_HUB_MISC_SIP_CG, 5216 MC_HUB_MISC_VM_CG, 5217 MC_XPB_CLK_GAT, 5218 ATC_MISC_CG, 5219 MC_CITF_MISC_WR_CG, 5220 MC_CITF_MISC_RD_CG, 5221 MC_CITF_MISC_VM_CG, 5222 VM_L2_CG, 5223 }; 5224 5225 static void cik_enable_mc_ls(struct radeon_device *rdev, 5226 bool enable) 5227 { 5228 int i; 5229 u32 orig, data; 5230 5231 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 5232 orig = data = RREG32(mc_cg_registers[i]); 5233 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) 5234 data |= MC_LS_ENABLE; 5235 else 5236 data &= ~MC_LS_ENABLE; 5237 if (data != orig) 5238 WREG32(mc_cg_registers[i], data); 5239 } 5240 } 5241 5242 static void cik_enable_mc_mgcg(struct radeon_device *rdev, 5243 bool enable) 5244 { 5245 int i; 5246 u32 orig, data; 5247 5248 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 5249 orig = data = RREG32(mc_cg_registers[i]); 5250 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) 5251 data |= MC_CG_ENABLE; 5252 else 5253 data &= ~MC_CG_ENABLE; 5254 if (data != orig) 5255 WREG32(mc_cg_registers[i], data); 5256 } 5257 } 5258 5259 static void cik_enable_sdma_mgcg(struct radeon_device *rdev, 5260 bool enable) 5261 { 5262 u32 orig, data; 5263 5264 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { 5265 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); 5266 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); 5267 } else { 5268 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET); 5269 data |= 0xff000000; 5270 if (data != orig) 5271 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data); 5272 5273 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET); 5274 data |= 0xff000000; 5275 if (data != orig) 5276 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data); 5277 } 5278 } 5279 5280 static void cik_enable_sdma_mgls(struct radeon_device *rdev, 5281 bool enable) 5282 { 5283 u32 orig, data; 5284 5285 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) { 5286 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 5287 data |= 0x100; 5288 if (orig != data) 5289 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 5290 5291 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 5292 data |= 0x100; 5293 if (orig != data) 5294 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 5295 } else { 5296 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 5297 data &= ~0x100; 5298 if (orig != data) 5299 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 5300 5301 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 5302 data &= ~0x100; 5303 if (orig != data) 5304 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 5305 } 5306 } 5307 5308 static void cik_enable_uvd_mgcg(struct radeon_device *rdev, 5309 bool enable) 5310 { 5311 u32 orig, data; 5312 5313 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { 5314 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 5315 data = 0xfff; 5316 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 5317 5318 orig = data = RREG32(UVD_CGC_CTRL); 5319 data |= DCM; 5320 if (orig != data) 5321 WREG32(UVD_CGC_CTRL, data); 5322 } else { 5323 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 5324 data &= ~0xfff; 5325 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 5326 5327 orig = data = RREG32(UVD_CGC_CTRL); 5328 data &= ~DCM; 5329 if (orig != data) 5330 WREG32(UVD_CGC_CTRL, data); 5331 } 5332 } 5333 5334 static void cik_enable_bif_mgls(struct radeon_device *rdev, 5335 bool enable) 5336 { 5337 u32 orig, data; 5338 5339 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 5340 5341 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) 5342 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | 5343 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; 5344 else 5345 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | 5346 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); 5347 5348 if (orig != data) 5349 WREG32_PCIE_PORT(PCIE_CNTL2, data); 5350 } 5351 5352 static void cik_enable_hdp_mgcg(struct radeon_device *rdev, 5353 bool enable) 5354 { 5355 u32 orig, data; 5356 5357 orig = data = RREG32(HDP_HOST_PATH_CNTL); 5358 5359 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) 5360 data &= ~CLOCK_GATING_DIS; 5361 else 5362 data |= CLOCK_GATING_DIS; 5363 5364 if (orig != data) 5365 WREG32(HDP_HOST_PATH_CNTL, data); 5366 } 5367 5368 static void cik_enable_hdp_ls(struct radeon_device *rdev, 5369 bool enable) 5370 { 5371 u32 orig, data; 5372 5373 orig = data = RREG32(HDP_MEM_POWER_LS); 5374 5375 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) 5376 data |= HDP_LS_ENABLE; 5377 else 5378 data &= ~HDP_LS_ENABLE; 5379 5380 if (orig != data) 5381 WREG32(HDP_MEM_POWER_LS, data); 5382 } 5383 5384 void cik_update_cg(struct radeon_device *rdev, 5385 u32 block, bool enable) 5386 { 5387 5388 if (block & RADEON_CG_BLOCK_GFX) { 5389 cik_enable_gui_idle_interrupt(rdev, false); 5390 /* order matters! */ 5391 if (enable) { 5392 cik_enable_mgcg(rdev, true); 5393 cik_enable_cgcg(rdev, true); 5394 } else { 5395 cik_enable_cgcg(rdev, false); 5396 cik_enable_mgcg(rdev, false); 5397 } 5398 cik_enable_gui_idle_interrupt(rdev, true); 5399 } 5400 5401 if (block & RADEON_CG_BLOCK_MC) { 5402 if (!(rdev->flags & RADEON_IS_IGP)) { 5403 cik_enable_mc_mgcg(rdev, enable); 5404 cik_enable_mc_ls(rdev, enable); 5405 } 5406 } 5407 5408 if (block & RADEON_CG_BLOCK_SDMA) { 5409 cik_enable_sdma_mgcg(rdev, enable); 5410 cik_enable_sdma_mgls(rdev, enable); 5411 } 5412 5413 if (block & RADEON_CG_BLOCK_BIF) { 5414 cik_enable_bif_mgls(rdev, enable); 5415 } 5416 5417 if (block & RADEON_CG_BLOCK_UVD) { 5418 if (rdev->has_uvd) 5419 cik_enable_uvd_mgcg(rdev, enable); 5420 } 5421 5422 if (block & RADEON_CG_BLOCK_HDP) { 5423 cik_enable_hdp_mgcg(rdev, enable); 5424 cik_enable_hdp_ls(rdev, enable); 5425 } 5426 } 5427 5428 static void cik_init_cg(struct radeon_device *rdev) 5429 { 5430 5431 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true); 5432 5433 if (rdev->has_uvd) 5434 si_init_uvd_internal_cg(rdev); 5435 5436 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 5437 RADEON_CG_BLOCK_SDMA | 5438 RADEON_CG_BLOCK_BIF | 5439 RADEON_CG_BLOCK_UVD | 5440 RADEON_CG_BLOCK_HDP), true); 5441 } 5442 5443 static void cik_fini_cg(struct radeon_device *rdev) 5444 { 5445 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 5446 RADEON_CG_BLOCK_SDMA | 5447 RADEON_CG_BLOCK_BIF | 5448 RADEON_CG_BLOCK_UVD | 5449 RADEON_CG_BLOCK_HDP), false); 5450 5451 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); 5452 } 5453 5454 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, 5455 bool enable) 5456 { 5457 u32 data, orig; 5458 5459 orig = data = RREG32(RLC_PG_CNTL); 5460 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 5461 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 5462 else 5463 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 5464 if (orig != data) 5465 WREG32(RLC_PG_CNTL, data); 5466 } 5467 5468 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, 5469 bool enable) 5470 { 5471 u32 data, orig; 5472 5473 orig = data = RREG32(RLC_PG_CNTL); 5474 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 5475 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 5476 else 5477 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 5478 if (orig != data) 5479 WREG32(RLC_PG_CNTL, data); 5480 } 5481 5482 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) 5483 { 5484 u32 data, orig; 5485 5486 orig = data = RREG32(RLC_PG_CNTL); 5487 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP)) 5488 data &= ~DISABLE_CP_PG; 5489 else 5490 data |= DISABLE_CP_PG; 5491 if (orig != data) 5492 WREG32(RLC_PG_CNTL, data); 5493 } 5494 5495 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) 5496 { 5497 u32 data, orig; 5498 5499 orig = data = RREG32(RLC_PG_CNTL); 5500 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS)) 5501 data &= ~DISABLE_GDS_PG; 5502 else 5503 data |= DISABLE_GDS_PG; 5504 if (orig != data) 5505 WREG32(RLC_PG_CNTL, data); 5506 } 5507 5508 #define CP_ME_TABLE_SIZE 96 5509 #define CP_ME_TABLE_OFFSET 2048 5510 #define CP_MEC_TABLE_OFFSET 4096 5511 5512 void cik_init_cp_pg_table(struct radeon_device *rdev) 5513 { 5514 const __be32 *fw_data; 5515 volatile u32 *dst_ptr; 5516 int me, i, max_me = 4; 5517 u32 bo_offset = 0; 5518 u32 table_offset; 5519 5520 if (rdev->family == CHIP_KAVERI) 5521 max_me = 5; 5522 5523 if (rdev->rlc.cp_table_ptr == NULL) 5524 return; 5525 5526 /* write the cp table buffer */ 5527 dst_ptr = rdev->rlc.cp_table_ptr; 5528 for (me = 0; me < max_me; me++) { 5529 if (me == 0) { 5530 fw_data = (const __be32 *)rdev->ce_fw->data; 5531 table_offset = CP_ME_TABLE_OFFSET; 5532 } else if (me == 1) { 5533 fw_data = (const __be32 *)rdev->pfp_fw->data; 5534 table_offset = CP_ME_TABLE_OFFSET; 5535 } else if (me == 2) { 5536 fw_data = (const __be32 *)rdev->me_fw->data; 5537 table_offset = CP_ME_TABLE_OFFSET; 5538 } else { 5539 fw_data = (const __be32 *)rdev->mec_fw->data; 5540 table_offset = CP_MEC_TABLE_OFFSET; 5541 } 5542 5543 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) { 5544 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]); 5545 } 5546 bo_offset += CP_ME_TABLE_SIZE; 5547 } 5548 } 5549 5550 static void cik_enable_gfx_cgpg(struct radeon_device *rdev, 5551 bool enable) 5552 { 5553 u32 data, orig; 5554 5555 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { 5556 orig = data = RREG32(RLC_PG_CNTL); 5557 data |= GFX_PG_ENABLE; 5558 if (orig != data) 5559 WREG32(RLC_PG_CNTL, data); 5560 5561 orig = data = RREG32(RLC_AUTO_PG_CTRL); 5562 data |= AUTO_PG_EN; 5563 if (orig != data) 5564 WREG32(RLC_AUTO_PG_CTRL, data); 5565 } else { 5566 orig = data = RREG32(RLC_PG_CNTL); 5567 data &= ~GFX_PG_ENABLE; 5568 if (orig != data) 5569 WREG32(RLC_PG_CNTL, data); 5570 5571 orig = data = RREG32(RLC_AUTO_PG_CTRL); 5572 data &= ~AUTO_PG_EN; 5573 if (orig != data) 5574 WREG32(RLC_AUTO_PG_CTRL, data); 5575 5576 data = RREG32(DB_RENDER_CONTROL); 5577 } 5578 } 5579 5580 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) 5581 { 5582 u32 mask = 0, tmp, tmp1; 5583 int i; 5584 5585 cik_select_se_sh(rdev, se, sh); 5586 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); 5587 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); 5588 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5589 5590 tmp &= 0xffff0000; 5591 5592 tmp |= tmp1; 5593 tmp >>= 16; 5594 5595 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) { 5596 mask <<= 1; 5597 mask |= 1; 5598 } 5599 5600 return (~tmp) & mask; 5601 } 5602 5603 static void cik_init_ao_cu_mask(struct radeon_device *rdev) 5604 { 5605 u32 i, j, k, active_cu_number = 0; 5606 u32 mask, counter, cu_bitmap; 5607 u32 tmp = 0; 5608 5609 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 5610 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 5611 mask = 1; 5612 cu_bitmap = 0; 5613 counter = 0; 5614 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) { 5615 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) { 5616 if (counter < 2) 5617 cu_bitmap |= mask; 5618 counter ++; 5619 } 5620 mask <<= 1; 5621 } 5622 5623 active_cu_number += counter; 5624 tmp |= (cu_bitmap << (i * 16 + j * 8)); 5625 } 5626 } 5627 5628 WREG32(RLC_PG_AO_CU_MASK, tmp); 5629 5630 tmp = RREG32(RLC_MAX_PG_CU); 5631 tmp &= ~MAX_PU_CU_MASK; 5632 tmp |= MAX_PU_CU(active_cu_number); 5633 WREG32(RLC_MAX_PG_CU, tmp); 5634 } 5635 5636 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, 5637 bool enable) 5638 { 5639 u32 data, orig; 5640 5641 orig = data = RREG32(RLC_PG_CNTL); 5642 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG)) 5643 data |= STATIC_PER_CU_PG_ENABLE; 5644 else 5645 data &= ~STATIC_PER_CU_PG_ENABLE; 5646 if (orig != data) 5647 WREG32(RLC_PG_CNTL, data); 5648 } 5649 5650 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, 5651 bool enable) 5652 { 5653 u32 data, orig; 5654 5655 orig = data = RREG32(RLC_PG_CNTL); 5656 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG)) 5657 data |= DYN_PER_CU_PG_ENABLE; 5658 else 5659 data &= ~DYN_PER_CU_PG_ENABLE; 5660 if (orig != data) 5661 WREG32(RLC_PG_CNTL, data); 5662 } 5663 5664 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 5665 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D 5666 5667 static void cik_init_gfx_cgpg(struct radeon_device *rdev) 5668 { 5669 u32 data, orig; 5670 u32 i; 5671 5672 if (rdev->rlc.cs_data) { 5673 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 5674 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); 5675 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr)); 5676 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); 5677 } else { 5678 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 5679 for (i = 0; i < 3; i++) 5680 WREG32(RLC_GPM_SCRATCH_DATA, 0); 5681 } 5682 if (rdev->rlc.reg_list) { 5683 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET); 5684 for (i = 0; i < rdev->rlc.reg_list_size; i++) 5685 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]); 5686 } 5687 5688 orig = data = RREG32(RLC_PG_CNTL); 5689 data |= GFX_PG_SRC; 5690 if (orig != data) 5691 WREG32(RLC_PG_CNTL, data); 5692 5693 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); 5694 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8); 5695 5696 data = RREG32(CP_RB_WPTR_POLL_CNTL); 5697 data &= ~IDLE_POLL_COUNT_MASK; 5698 data |= IDLE_POLL_COUNT(0x60); 5699 WREG32(CP_RB_WPTR_POLL_CNTL, data); 5700 5701 data = 0x10101010; 5702 WREG32(RLC_PG_DELAY, data); 5703 5704 data = RREG32(RLC_PG_DELAY_2); 5705 data &= ~0xff; 5706 data |= 0x3; 5707 WREG32(RLC_PG_DELAY_2, data); 5708 5709 data = RREG32(RLC_AUTO_PG_CTRL); 5710 data &= ~GRBM_REG_SGIT_MASK; 5711 data |= GRBM_REG_SGIT(0x700); 5712 WREG32(RLC_AUTO_PG_CTRL, data); 5713 5714 } 5715 5716 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) 5717 { 5718 cik_enable_gfx_cgpg(rdev, enable); 5719 cik_enable_gfx_static_mgpg(rdev, enable); 5720 cik_enable_gfx_dynamic_mgpg(rdev, enable); 5721 } 5722 5723 u32 cik_get_csb_size(struct radeon_device *rdev) 5724 { 5725 u32 count = 0; 5726 const struct cs_section_def *sect = NULL; 5727 const struct cs_extent_def *ext = NULL; 5728 5729 if (rdev->rlc.cs_data == NULL) 5730 return 0; 5731 5732 /* begin clear state */ 5733 count += 2; 5734 /* context control state */ 5735 count += 3; 5736 5737 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 5738 for (ext = sect->section; ext->extent != NULL; ++ext) { 5739 if (sect->id == SECT_CONTEXT) 5740 count += 2 + ext->reg_count; 5741 else 5742 return 0; 5743 } 5744 } 5745 /* pa_sc_raster_config/pa_sc_raster_config1 */ 5746 count += 4; 5747 /* end clear state */ 5748 count += 2; 5749 /* clear state */ 5750 count += 2; 5751 5752 return count; 5753 } 5754 5755 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) 5756 { 5757 u32 count = 0, i; 5758 const struct cs_section_def *sect = NULL; 5759 const struct cs_extent_def *ext = NULL; 5760 5761 if (rdev->rlc.cs_data == NULL) 5762 return; 5763 if (buffer == NULL) 5764 return; 5765 5766 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); 5767 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE; 5768 5769 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1); 5770 buffer[count++] = 0x80000000; 5771 buffer[count++] = 0x80000000; 5772 5773 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 5774 for (ext = sect->section; ext->extent != NULL; ++ext) { 5775 if (sect->id == SECT_CONTEXT) { 5776 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count); 5777 buffer[count++] = ext->reg_index - 0xa000; 5778 for (i = 0; i < ext->reg_count; i++) 5779 buffer[count++] = ext->extent[i]; 5780 } else { 5781 return; 5782 } 5783 } 5784 } 5785 5786 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 5787 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START; 5788 switch (rdev->family) { 5789 case CHIP_BONAIRE: 5790 buffer[count++] = 0x16000012; 5791 buffer[count++] = 0x00000000; 5792 break; 5793 case CHIP_KAVERI: 5794 buffer[count++] = 0x00000000; /* XXX */ 5795 buffer[count++] = 0x00000000; 5796 break; 5797 case CHIP_KABINI: 5798 buffer[count++] = 0x00000000; /* XXX */ 5799 buffer[count++] = 0x00000000; 5800 break; 5801 default: 5802 buffer[count++] = 0x00000000; 5803 buffer[count++] = 0x00000000; 5804 break; 5805 } 5806 5807 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0); 5808 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE; 5809 5810 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0); 5811 buffer[count++] = 0; 5812 } 5813 5814 static void cik_init_pg(struct radeon_device *rdev) 5815 { 5816 if (rdev->pg_flags) { 5817 cik_enable_sck_slowdown_on_pu(rdev, true); 5818 cik_enable_sck_slowdown_on_pd(rdev, true); 5819 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 5820 cik_init_gfx_cgpg(rdev); 5821 cik_enable_cp_pg(rdev, true); 5822 cik_enable_gds_pg(rdev, true); 5823 } 5824 cik_init_ao_cu_mask(rdev); 5825 cik_update_gfx_pg(rdev, true); 5826 } 5827 } 5828 5829 static void cik_fini_pg(struct radeon_device *rdev) 5830 { 5831 if (rdev->pg_flags) { 5832 cik_update_gfx_pg(rdev, false); 5833 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 5834 cik_enable_cp_pg(rdev, false); 5835 cik_enable_gds_pg(rdev, false); 5836 } 5837 } 5838 } 5839 5840 /* 5841 * Interrupts 5842 * Starting with r6xx, interrupts are handled via a ring buffer. 5843 * Ring buffers are areas of GPU accessible memory that the GPU 5844 * writes interrupt vectors into and the host reads vectors out of. 5845 * There is a rptr (read pointer) that determines where the 5846 * host is currently reading, and a wptr (write pointer) 5847 * which determines where the GPU has written. When the 5848 * pointers are equal, the ring is idle. When the GPU 5849 * writes vectors to the ring buffer, it increments the 5850 * wptr. When there is an interrupt, the host then starts 5851 * fetching commands and processing them until the pointers are 5852 * equal again at which point it updates the rptr. 5853 */ 5854 5855 /** 5856 * cik_enable_interrupts - Enable the interrupt ring buffer 5857 * 5858 * @rdev: radeon_device pointer 5859 * 5860 * Enable the interrupt ring buffer (CIK). 5861 */ 5862 static void cik_enable_interrupts(struct radeon_device *rdev) 5863 { 5864 u32 ih_cntl = RREG32(IH_CNTL); 5865 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 5866 5867 ih_cntl |= ENABLE_INTR; 5868 ih_rb_cntl |= IH_RB_ENABLE; 5869 WREG32(IH_CNTL, ih_cntl); 5870 WREG32(IH_RB_CNTL, ih_rb_cntl); 5871 rdev->ih.enabled = true; 5872 } 5873 5874 /** 5875 * cik_disable_interrupts - Disable the interrupt ring buffer 5876 * 5877 * @rdev: radeon_device pointer 5878 * 5879 * Disable the interrupt ring buffer (CIK). 5880 */ 5881 static void cik_disable_interrupts(struct radeon_device *rdev) 5882 { 5883 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 5884 u32 ih_cntl = RREG32(IH_CNTL); 5885 5886 ih_rb_cntl &= ~IH_RB_ENABLE; 5887 ih_cntl &= ~ENABLE_INTR; 5888 WREG32(IH_RB_CNTL, ih_rb_cntl); 5889 WREG32(IH_CNTL, ih_cntl); 5890 /* set rptr, wptr to 0 */ 5891 WREG32(IH_RB_RPTR, 0); 5892 WREG32(IH_RB_WPTR, 0); 5893 rdev->ih.enabled = false; 5894 rdev->ih.rptr = 0; 5895 } 5896 5897 /** 5898 * cik_disable_interrupt_state - Disable all interrupt sources 5899 * 5900 * @rdev: radeon_device pointer 5901 * 5902 * Clear all interrupt enable bits used by the driver (CIK). 5903 */ 5904 static void cik_disable_interrupt_state(struct radeon_device *rdev) 5905 { 5906 u32 tmp; 5907 5908 /* gfx ring */ 5909 tmp = RREG32(CP_INT_CNTL_RING0) & 5910 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 5911 WREG32(CP_INT_CNTL_RING0, tmp); 5912 /* sdma */ 5913 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 5914 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 5915 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 5916 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 5917 /* compute queues */ 5918 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 5919 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); 5920 WREG32(CP_ME1_PIPE2_INT_CNTL, 0); 5921 WREG32(CP_ME1_PIPE3_INT_CNTL, 0); 5922 WREG32(CP_ME2_PIPE0_INT_CNTL, 0); 5923 WREG32(CP_ME2_PIPE1_INT_CNTL, 0); 5924 WREG32(CP_ME2_PIPE2_INT_CNTL, 0); 5925 WREG32(CP_ME2_PIPE3_INT_CNTL, 0); 5926 /* grbm */ 5927 WREG32(GRBM_INT_CNTL, 0); 5928 /* vline/vblank, etc. */ 5929 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 5930 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 5931 if (rdev->num_crtc >= 4) { 5932 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 5933 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 5934 } 5935 if (rdev->num_crtc >= 6) { 5936 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 5937 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 5938 } 5939 5940 /* dac hotplug */ 5941 WREG32(DAC_AUTODETECT_INT_CONTROL, 0); 5942 5943 /* digital hotplug */ 5944 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5945 WREG32(DC_HPD1_INT_CONTROL, tmp); 5946 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5947 WREG32(DC_HPD2_INT_CONTROL, tmp); 5948 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5949 WREG32(DC_HPD3_INT_CONTROL, tmp); 5950 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5951 WREG32(DC_HPD4_INT_CONTROL, tmp); 5952 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5953 WREG32(DC_HPD5_INT_CONTROL, tmp); 5954 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; 5955 WREG32(DC_HPD6_INT_CONTROL, tmp); 5956 5957 } 5958 5959 /** 5960 * cik_irq_init - init and enable the interrupt ring 5961 * 5962 * @rdev: radeon_device pointer 5963 * 5964 * Allocate a ring buffer for the interrupt controller, 5965 * enable the RLC, disable interrupts, enable the IH 5966 * ring buffer and enable it (CIK). 5967 * Called at device load and reume. 5968 * Returns 0 for success, errors for failure. 5969 */ 5970 static int cik_irq_init(struct radeon_device *rdev) 5971 { 5972 int ret = 0; 5973 int rb_bufsz; 5974 u32 interrupt_cntl, ih_cntl, ih_rb_cntl; 5975 5976 /* allocate ring */ 5977 ret = r600_ih_ring_alloc(rdev); 5978 if (ret) 5979 return ret; 5980 5981 /* disable irqs */ 5982 cik_disable_interrupts(rdev); 5983 5984 /* init rlc */ 5985 ret = cik_rlc_resume(rdev); 5986 if (ret) { 5987 r600_ih_ring_fini(rdev); 5988 return ret; 5989 } 5990 5991 /* setup interrupt control */ 5992 /* XXX this should actually be a bus address, not an MC address. same on older asics */ 5993 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); 5994 interrupt_cntl = RREG32(INTERRUPT_CNTL); 5995 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi 5996 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN 5997 */ 5998 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; 5999 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ 6000 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; 6001 WREG32(INTERRUPT_CNTL, interrupt_cntl); 6002 6003 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); 6004 rb_bufsz = order_base_2(rdev->ih.ring_size / 4); 6005 6006 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | 6007 IH_WPTR_OVERFLOW_CLEAR | 6008 (rb_bufsz << 1)); 6009 6010 if (rdev->wb.enabled) 6011 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; 6012 6013 /* set the writeback address whether it's enabled or not */ 6014 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); 6015 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); 6016 6017 WREG32(IH_RB_CNTL, ih_rb_cntl); 6018 6019 /* set rptr, wptr to 0 */ 6020 WREG32(IH_RB_RPTR, 0); 6021 WREG32(IH_RB_WPTR, 0); 6022 6023 /* Default settings for IH_CNTL (disabled at first) */ 6024 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); 6025 /* RPTR_REARM only works if msi's are enabled */ 6026 if (rdev->msi_enabled) 6027 ih_cntl |= RPTR_REARM; 6028 WREG32(IH_CNTL, ih_cntl); 6029 6030 /* force the active interrupt state to all disabled */ 6031 cik_disable_interrupt_state(rdev); 6032 6033 pci_enable_busmaster(rdev->dev); 6034 6035 /* enable irqs */ 6036 cik_enable_interrupts(rdev); 6037 6038 return ret; 6039 } 6040 6041 /** 6042 * cik_irq_set - enable/disable interrupt sources 6043 * 6044 * @rdev: radeon_device pointer 6045 * 6046 * Enable interrupt sources on the GPU (vblanks, hpd, 6047 * etc.) (CIK). 6048 * Returns 0 for success, errors for failure. 6049 */ 6050 int cik_irq_set(struct radeon_device *rdev) 6051 { 6052 u32 cp_int_cntl; 6053 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; 6054 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; 6055 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 6056 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 6057 u32 grbm_int_cntl = 0; 6058 u32 dma_cntl, dma_cntl1; 6059 u32 thermal_int; 6060 6061 if (!rdev->irq.installed) { 6062 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 6063 return -EINVAL; 6064 } 6065 /* don't enable anything if the ih is disabled */ 6066 if (!rdev->ih.enabled) { 6067 cik_disable_interrupts(rdev); 6068 /* force the active interrupt state to all disabled */ 6069 cik_disable_interrupt_state(rdev); 6070 return 0; 6071 } 6072 6073 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & 6074 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 6075 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; 6076 6077 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; 6078 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; 6079 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; 6080 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN; 6081 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 6082 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 6083 6084 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 6085 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 6086 6087 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6088 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6089 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6090 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6091 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6092 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6093 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6094 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6095 6096 if (rdev->flags & RADEON_IS_IGP) 6097 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & 6098 ~(THERM_INTH_MASK | THERM_INTL_MASK); 6099 else 6100 thermal_int = RREG32_SMC(CG_THERMAL_INT) & 6101 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); 6102 6103 /* enable CP interrupts on all rings */ 6104 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 6105 DRM_DEBUG("cik_irq_set: sw int gfx\n"); 6106 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 6107 } 6108 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { 6109 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6110 DRM_DEBUG("si_irq_set: sw int cp1\n"); 6111 if (ring->me == 1) { 6112 switch (ring->pipe) { 6113 case 0: 6114 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6115 break; 6116 case 1: 6117 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6118 break; 6119 case 2: 6120 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6121 break; 6122 case 3: 6123 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6124 break; 6125 default: 6126 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6127 break; 6128 } 6129 } else if (ring->me == 2) { 6130 switch (ring->pipe) { 6131 case 0: 6132 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6133 break; 6134 case 1: 6135 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6136 break; 6137 case 2: 6138 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6139 break; 6140 case 3: 6141 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6142 break; 6143 default: 6144 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6145 break; 6146 } 6147 } else { 6148 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me); 6149 } 6150 } 6151 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { 6152 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6153 DRM_DEBUG("si_irq_set: sw int cp2\n"); 6154 if (ring->me == 1) { 6155 switch (ring->pipe) { 6156 case 0: 6157 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6158 break; 6159 case 1: 6160 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6161 break; 6162 case 2: 6163 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6164 break; 6165 case 3: 6166 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6167 break; 6168 default: 6169 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 6170 break; 6171 } 6172 } else if (ring->me == 2) { 6173 switch (ring->pipe) { 6174 case 0: 6175 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6176 break; 6177 case 1: 6178 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6179 break; 6180 case 2: 6181 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6182 break; 6183 case 3: 6184 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 6185 break; 6186 default: 6187 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 6188 break; 6189 } 6190 } else { 6191 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me); 6192 } 6193 } 6194 6195 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 6196 DRM_DEBUG("cik_irq_set: sw int dma\n"); 6197 dma_cntl |= TRAP_ENABLE; 6198 } 6199 6200 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 6201 DRM_DEBUG("cik_irq_set: sw int dma1\n"); 6202 dma_cntl1 |= TRAP_ENABLE; 6203 } 6204 6205 if (rdev->irq.crtc_vblank_int[0] || 6206 atomic_read(&rdev->irq.pflip[0])) { 6207 DRM_DEBUG("cik_irq_set: vblank 0\n"); 6208 crtc1 |= VBLANK_INTERRUPT_MASK; 6209 } 6210 if (rdev->irq.crtc_vblank_int[1] || 6211 atomic_read(&rdev->irq.pflip[1])) { 6212 DRM_DEBUG("cik_irq_set: vblank 1\n"); 6213 crtc2 |= VBLANK_INTERRUPT_MASK; 6214 } 6215 if (rdev->irq.crtc_vblank_int[2] || 6216 atomic_read(&rdev->irq.pflip[2])) { 6217 DRM_DEBUG("cik_irq_set: vblank 2\n"); 6218 crtc3 |= VBLANK_INTERRUPT_MASK; 6219 } 6220 if (rdev->irq.crtc_vblank_int[3] || 6221 atomic_read(&rdev->irq.pflip[3])) { 6222 DRM_DEBUG("cik_irq_set: vblank 3\n"); 6223 crtc4 |= VBLANK_INTERRUPT_MASK; 6224 } 6225 if (rdev->irq.crtc_vblank_int[4] || 6226 atomic_read(&rdev->irq.pflip[4])) { 6227 DRM_DEBUG("cik_irq_set: vblank 4\n"); 6228 crtc5 |= VBLANK_INTERRUPT_MASK; 6229 } 6230 if (rdev->irq.crtc_vblank_int[5] || 6231 atomic_read(&rdev->irq.pflip[5])) { 6232 DRM_DEBUG("cik_irq_set: vblank 5\n"); 6233 crtc6 |= VBLANK_INTERRUPT_MASK; 6234 } 6235 if (rdev->irq.hpd[0]) { 6236 DRM_DEBUG("cik_irq_set: hpd 1\n"); 6237 hpd1 |= DC_HPDx_INT_EN; 6238 } 6239 if (rdev->irq.hpd[1]) { 6240 DRM_DEBUG("cik_irq_set: hpd 2\n"); 6241 hpd2 |= DC_HPDx_INT_EN; 6242 } 6243 if (rdev->irq.hpd[2]) { 6244 DRM_DEBUG("cik_irq_set: hpd 3\n"); 6245 hpd3 |= DC_HPDx_INT_EN; 6246 } 6247 if (rdev->irq.hpd[3]) { 6248 DRM_DEBUG("cik_irq_set: hpd 4\n"); 6249 hpd4 |= DC_HPDx_INT_EN; 6250 } 6251 if (rdev->irq.hpd[4]) { 6252 DRM_DEBUG("cik_irq_set: hpd 5\n"); 6253 hpd5 |= DC_HPDx_INT_EN; 6254 } 6255 if (rdev->irq.hpd[5]) { 6256 DRM_DEBUG("cik_irq_set: hpd 6\n"); 6257 hpd6 |= DC_HPDx_INT_EN; 6258 } 6259 6260 if (rdev->irq.dpm_thermal) { 6261 DRM_DEBUG("dpm thermal\n"); 6262 if (rdev->flags & RADEON_IS_IGP) 6263 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; 6264 else 6265 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; 6266 } 6267 6268 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 6269 6270 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 6271 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 6272 6273 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); 6274 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1); 6275 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2); 6276 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3); 6277 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0); 6278 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1); 6279 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2); 6280 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3); 6281 6282 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 6283 6284 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 6285 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); 6286 if (rdev->num_crtc >= 4) { 6287 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); 6288 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); 6289 } 6290 if (rdev->num_crtc >= 6) { 6291 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); 6292 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); 6293 } 6294 6295 WREG32(DC_HPD1_INT_CONTROL, hpd1); 6296 WREG32(DC_HPD2_INT_CONTROL, hpd2); 6297 WREG32(DC_HPD3_INT_CONTROL, hpd3); 6298 WREG32(DC_HPD4_INT_CONTROL, hpd4); 6299 WREG32(DC_HPD5_INT_CONTROL, hpd5); 6300 WREG32(DC_HPD6_INT_CONTROL, hpd6); 6301 6302 if (rdev->flags & RADEON_IS_IGP) 6303 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); 6304 else 6305 WREG32_SMC(CG_THERMAL_INT, thermal_int); 6306 6307 return 0; 6308 } 6309 6310 /** 6311 * cik_irq_ack - ack interrupt sources 6312 * 6313 * @rdev: radeon_device pointer 6314 * 6315 * Ack interrupt sources on the GPU (vblanks, hpd, 6316 * etc.) (CIK). Certain interrupts sources are sw 6317 * generated and do not require an explicit ack. 6318 */ 6319 static inline void cik_irq_ack(struct radeon_device *rdev) 6320 { 6321 u32 tmp; 6322 6323 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS); 6324 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); 6325 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); 6326 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); 6327 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); 6328 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); 6329 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); 6330 6331 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) 6332 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); 6333 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) 6334 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); 6335 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) 6336 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); 6337 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) 6338 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); 6339 6340 if (rdev->num_crtc >= 4) { 6341 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) 6342 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); 6343 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) 6344 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); 6345 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) 6346 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); 6347 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) 6348 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); 6349 } 6350 6351 if (rdev->num_crtc >= 6) { 6352 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) 6353 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); 6354 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) 6355 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); 6356 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) 6357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); 6358 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) 6359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); 6360 } 6361 6362 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 6363 tmp = RREG32(DC_HPD1_INT_CONTROL); 6364 tmp |= DC_HPDx_INT_ACK; 6365 WREG32(DC_HPD1_INT_CONTROL, tmp); 6366 } 6367 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 6368 tmp = RREG32(DC_HPD2_INT_CONTROL); 6369 tmp |= DC_HPDx_INT_ACK; 6370 WREG32(DC_HPD2_INT_CONTROL, tmp); 6371 } 6372 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 6373 tmp = RREG32(DC_HPD3_INT_CONTROL); 6374 tmp |= DC_HPDx_INT_ACK; 6375 WREG32(DC_HPD3_INT_CONTROL, tmp); 6376 } 6377 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 6378 tmp = RREG32(DC_HPD4_INT_CONTROL); 6379 tmp |= DC_HPDx_INT_ACK; 6380 WREG32(DC_HPD4_INT_CONTROL, tmp); 6381 } 6382 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 6383 tmp = RREG32(DC_HPD5_INT_CONTROL); 6384 tmp |= DC_HPDx_INT_ACK; 6385 WREG32(DC_HPD5_INT_CONTROL, tmp); 6386 } 6387 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 6388 tmp = RREG32(DC_HPD5_INT_CONTROL); 6389 tmp |= DC_HPDx_INT_ACK; 6390 WREG32(DC_HPD6_INT_CONTROL, tmp); 6391 } 6392 } 6393 6394 /** 6395 * cik_irq_disable - disable interrupts 6396 * 6397 * @rdev: radeon_device pointer 6398 * 6399 * Disable interrupts on the hw (CIK). 6400 */ 6401 static void cik_irq_disable(struct radeon_device *rdev) 6402 { 6403 cik_disable_interrupts(rdev); 6404 /* Wait and acknowledge irq */ 6405 mdelay(1); 6406 cik_irq_ack(rdev); 6407 cik_disable_interrupt_state(rdev); 6408 } 6409 6410 /** 6411 * cik_irq_disable - disable interrupts for suspend 6412 * 6413 * @rdev: radeon_device pointer 6414 * 6415 * Disable interrupts and stop the RLC (CIK). 6416 * Used for suspend. 6417 */ 6418 static void cik_irq_suspend(struct radeon_device *rdev) 6419 { 6420 cik_irq_disable(rdev); 6421 cik_rlc_stop(rdev); 6422 } 6423 6424 /** 6425 * cik_irq_fini - tear down interrupt support 6426 * 6427 * @rdev: radeon_device pointer 6428 * 6429 * Disable interrupts on the hw and free the IH ring 6430 * buffer (CIK). 6431 * Used for driver unload. 6432 */ 6433 static void cik_irq_fini(struct radeon_device *rdev) 6434 { 6435 cik_irq_suspend(rdev); 6436 r600_ih_ring_fini(rdev); 6437 } 6438 6439 /** 6440 * cik_get_ih_wptr - get the IH ring buffer wptr 6441 * 6442 * @rdev: radeon_device pointer 6443 * 6444 * Get the IH ring buffer wptr from either the register 6445 * or the writeback memory buffer (CIK). Also check for 6446 * ring buffer overflow and deal with it. 6447 * Used by cik_irq_process(). 6448 * Returns the value of the wptr. 6449 */ 6450 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) 6451 { 6452 u32 wptr, tmp; 6453 6454 if (rdev->wb.enabled) 6455 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); 6456 else 6457 wptr = RREG32(IH_RB_WPTR); 6458 6459 if (wptr & RB_OVERFLOW) { 6460 /* When a ring buffer overflow happen start parsing interrupt 6461 * from the last not overwritten vector (wptr + 16). Hopefully 6462 * this should allow us to catchup. 6463 */ 6464 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n", 6465 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask); 6466 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; 6467 tmp = RREG32(IH_RB_CNTL); 6468 tmp |= IH_WPTR_OVERFLOW_CLEAR; 6469 WREG32(IH_RB_CNTL, tmp); 6470 } 6471 return (wptr & rdev->ih.ptr_mask); 6472 } 6473 6474 /* CIK IV Ring 6475 * Each IV ring entry is 128 bits: 6476 * [7:0] - interrupt source id 6477 * [31:8] - reserved 6478 * [59:32] - interrupt source data 6479 * [63:60] - reserved 6480 * [71:64] - RINGID 6481 * CP: 6482 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 6483 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 6484 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 6485 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 6486 * PIPE_ID - ME0 0=3D 6487 * - ME1&2 compute dispatcher (4 pipes each) 6488 * SDMA: 6489 * INSTANCE_ID [1:0], QUEUE_ID[1:0] 6490 * INSTANCE_ID - 0 = sdma0, 1 = sdma1 6491 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 6492 * [79:72] - VMID 6493 * [95:80] - PASID 6494 * [127:96] - reserved 6495 */ 6496 /** 6497 * cik_irq_process - interrupt handler 6498 * 6499 * @rdev: radeon_device pointer 6500 * 6501 * Interrupt hander (CIK). Walk the IH ring, 6502 * ack interrupts and schedule work to handle 6503 * interrupt events. 6504 * Returns irq process return code. 6505 */ 6506 irqreturn_t cik_irq_process(struct radeon_device *rdev) 6507 { 6508 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6509 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6510 u32 wptr; 6511 u32 rptr; 6512 u32 src_id, src_data, ring_id; 6513 u8 me_id, pipe_id, queue_id; 6514 u32 ring_index; 6515 bool queue_hotplug = false; 6516 bool queue_reset = false; 6517 u32 addr, status, mc_client; 6518 bool queue_thermal = false; 6519 6520 if (!rdev->ih.enabled || rdev->shutdown) 6521 return IRQ_NONE; 6522 6523 wptr = cik_get_ih_wptr(rdev); 6524 6525 restart_ih: 6526 /* is somebody else already processing irqs? */ 6527 if (atomic_xchg(&rdev->ih.lock, 1)) 6528 return IRQ_NONE; 6529 6530 rptr = rdev->ih.rptr; 6531 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr); 6532 6533 /* Order reading of wptr vs. reading of IH ring data */ 6534 rmb(); 6535 6536 /* display interrupts */ 6537 cik_irq_ack(rdev); 6538 6539 while (rptr != wptr) { 6540 /* wptr/rptr are in bytes! */ 6541 ring_index = rptr / 4; 6542 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 6543 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 6544 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 6545 6546 switch (src_id) { 6547 case 1: /* D1 vblank/vline */ 6548 switch (src_data) { 6549 case 0: /* D1 vblank */ 6550 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { 6551 if (rdev->irq.crtc_vblank_int[0]) { 6552 drm_handle_vblank(rdev->ddev, 0); 6553 rdev->pm.vblank_sync = true; 6554 wake_up(&rdev->irq.vblank_queue); 6555 } 6556 if (atomic_read(&rdev->irq.pflip[0])) 6557 radeon_crtc_handle_flip(rdev, 0); 6558 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; 6559 DRM_DEBUG("IH: D1 vblank\n"); 6560 } 6561 break; 6562 case 1: /* D1 vline */ 6563 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { 6564 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; 6565 DRM_DEBUG("IH: D1 vline\n"); 6566 } 6567 break; 6568 default: 6569 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6570 break; 6571 } 6572 break; 6573 case 2: /* D2 vblank/vline */ 6574 switch (src_data) { 6575 case 0: /* D2 vblank */ 6576 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { 6577 if (rdev->irq.crtc_vblank_int[1]) { 6578 drm_handle_vblank(rdev->ddev, 1); 6579 rdev->pm.vblank_sync = true; 6580 wake_up(&rdev->irq.vblank_queue); 6581 } 6582 if (atomic_read(&rdev->irq.pflip[1])) 6583 radeon_crtc_handle_flip(rdev, 1); 6584 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; 6585 DRM_DEBUG("IH: D2 vblank\n"); 6586 } 6587 break; 6588 case 1: /* D2 vline */ 6589 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { 6590 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; 6591 DRM_DEBUG("IH: D2 vline\n"); 6592 } 6593 break; 6594 default: 6595 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6596 break; 6597 } 6598 break; 6599 case 3: /* D3 vblank/vline */ 6600 switch (src_data) { 6601 case 0: /* D3 vblank */ 6602 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { 6603 if (rdev->irq.crtc_vblank_int[2]) { 6604 drm_handle_vblank(rdev->ddev, 2); 6605 rdev->pm.vblank_sync = true; 6606 wake_up(&rdev->irq.vblank_queue); 6607 } 6608 if (atomic_read(&rdev->irq.pflip[2])) 6609 radeon_crtc_handle_flip(rdev, 2); 6610 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; 6611 DRM_DEBUG("IH: D3 vblank\n"); 6612 } 6613 break; 6614 case 1: /* D3 vline */ 6615 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { 6616 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; 6617 DRM_DEBUG("IH: D3 vline\n"); 6618 } 6619 break; 6620 default: 6621 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6622 break; 6623 } 6624 break; 6625 case 4: /* D4 vblank/vline */ 6626 switch (src_data) { 6627 case 0: /* D4 vblank */ 6628 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { 6629 if (rdev->irq.crtc_vblank_int[3]) { 6630 drm_handle_vblank(rdev->ddev, 3); 6631 rdev->pm.vblank_sync = true; 6632 wake_up(&rdev->irq.vblank_queue); 6633 } 6634 if (atomic_read(&rdev->irq.pflip[3])) 6635 radeon_crtc_handle_flip(rdev, 3); 6636 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; 6637 DRM_DEBUG("IH: D4 vblank\n"); 6638 } 6639 break; 6640 case 1: /* D4 vline */ 6641 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { 6642 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; 6643 DRM_DEBUG("IH: D4 vline\n"); 6644 } 6645 break; 6646 default: 6647 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6648 break; 6649 } 6650 break; 6651 case 5: /* D5 vblank/vline */ 6652 switch (src_data) { 6653 case 0: /* D5 vblank */ 6654 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { 6655 if (rdev->irq.crtc_vblank_int[4]) { 6656 drm_handle_vblank(rdev->ddev, 4); 6657 rdev->pm.vblank_sync = true; 6658 wake_up(&rdev->irq.vblank_queue); 6659 } 6660 if (atomic_read(&rdev->irq.pflip[4])) 6661 radeon_crtc_handle_flip(rdev, 4); 6662 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; 6663 DRM_DEBUG("IH: D5 vblank\n"); 6664 } 6665 break; 6666 case 1: /* D5 vline */ 6667 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { 6668 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; 6669 DRM_DEBUG("IH: D5 vline\n"); 6670 } 6671 break; 6672 default: 6673 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6674 break; 6675 } 6676 break; 6677 case 6: /* D6 vblank/vline */ 6678 switch (src_data) { 6679 case 0: /* D6 vblank */ 6680 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { 6681 if (rdev->irq.crtc_vblank_int[5]) { 6682 drm_handle_vblank(rdev->ddev, 5); 6683 rdev->pm.vblank_sync = true; 6684 wake_up(&rdev->irq.vblank_queue); 6685 } 6686 if (atomic_read(&rdev->irq.pflip[5])) 6687 radeon_crtc_handle_flip(rdev, 5); 6688 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; 6689 DRM_DEBUG("IH: D6 vblank\n"); 6690 } 6691 break; 6692 case 1: /* D6 vline */ 6693 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { 6694 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; 6695 DRM_DEBUG("IH: D6 vline\n"); 6696 } 6697 break; 6698 default: 6699 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6700 break; 6701 } 6702 break; 6703 case 42: /* HPD hotplug */ 6704 switch (src_data) { 6705 case 0: 6706 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 6707 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; 6708 queue_hotplug = true; 6709 DRM_DEBUG("IH: HPD1\n"); 6710 } 6711 break; 6712 case 1: 6713 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 6714 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; 6715 queue_hotplug = true; 6716 DRM_DEBUG("IH: HPD2\n"); 6717 } 6718 break; 6719 case 2: 6720 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 6721 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; 6722 queue_hotplug = true; 6723 DRM_DEBUG("IH: HPD3\n"); 6724 } 6725 break; 6726 case 3: 6727 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 6728 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; 6729 queue_hotplug = true; 6730 DRM_DEBUG("IH: HPD4\n"); 6731 } 6732 break; 6733 case 4: 6734 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 6735 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; 6736 queue_hotplug = true; 6737 DRM_DEBUG("IH: HPD5\n"); 6738 } 6739 break; 6740 case 5: 6741 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 6742 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; 6743 queue_hotplug = true; 6744 DRM_DEBUG("IH: HPD6\n"); 6745 } 6746 break; 6747 default: 6748 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6749 break; 6750 } 6751 break; 6752 case 124: /* UVD */ 6753 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); 6754 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); 6755 break; 6756 case 146: 6757 case 147: 6758 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); 6759 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS); 6760 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); 6761 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); 6762 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 6763 addr); 6764 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 6765 status); 6766 cik_vm_decode_fault(rdev, status, addr, mc_client); 6767 /* reset addr and status */ 6768 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 6769 break; 6770 case 176: /* GFX RB CP_INT */ 6771 case 177: /* GFX IB CP_INT */ 6772 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 6773 break; 6774 case 181: /* CP EOP event */ 6775 DRM_DEBUG("IH: CP EOP\n"); 6776 /* XXX check the bitfield order! */ 6777 me_id = (ring_id & 0x60) >> 5; 6778 pipe_id = (ring_id & 0x18) >> 3; 6779 queue_id = (ring_id & 0x7) >> 0; 6780 switch (me_id) { 6781 case 0: 6782 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 6783 break; 6784 case 1: 6785 case 2: 6786 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id)) 6787 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 6788 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id)) 6789 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 6790 break; 6791 } 6792 break; 6793 case 184: /* CP Privileged reg access */ 6794 DRM_ERROR("Illegal register access in command stream\n"); 6795 /* XXX check the bitfield order! */ 6796 me_id = (ring_id & 0x60) >> 5; 6797 pipe_id = (ring_id & 0x18) >> 3; 6798 queue_id = (ring_id & 0x7) >> 0; 6799 switch (me_id) { 6800 case 0: 6801 /* This results in a full GPU reset, but all we need to do is soft 6802 * reset the CP for gfx 6803 */ 6804 queue_reset = true; 6805 break; 6806 case 1: 6807 /* XXX compute */ 6808 queue_reset = true; 6809 break; 6810 case 2: 6811 /* XXX compute */ 6812 queue_reset = true; 6813 break; 6814 } 6815 break; 6816 case 185: /* CP Privileged inst */ 6817 DRM_ERROR("Illegal instruction in command stream\n"); 6818 /* XXX check the bitfield order! */ 6819 me_id = (ring_id & 0x60) >> 5; 6820 pipe_id = (ring_id & 0x18) >> 3; 6821 queue_id = (ring_id & 0x7) >> 0; 6822 switch (me_id) { 6823 case 0: 6824 /* This results in a full GPU reset, but all we need to do is soft 6825 * reset the CP for gfx 6826 */ 6827 queue_reset = true; 6828 break; 6829 case 1: 6830 /* XXX compute */ 6831 queue_reset = true; 6832 break; 6833 case 2: 6834 /* XXX compute */ 6835 queue_reset = true; 6836 break; 6837 } 6838 break; 6839 case 224: /* SDMA trap event */ 6840 /* XXX check the bitfield order! */ 6841 me_id = (ring_id & 0x3) >> 0; 6842 queue_id = (ring_id & 0xc) >> 2; 6843 DRM_DEBUG("IH: SDMA trap\n"); 6844 switch (me_id) { 6845 case 0: 6846 switch (queue_id) { 6847 case 0: 6848 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 6849 break; 6850 case 1: 6851 /* XXX compute */ 6852 break; 6853 case 2: 6854 /* XXX compute */ 6855 break; 6856 } 6857 break; 6858 case 1: 6859 switch (queue_id) { 6860 case 0: 6861 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 6862 break; 6863 case 1: 6864 /* XXX compute */ 6865 break; 6866 case 2: 6867 /* XXX compute */ 6868 break; 6869 } 6870 break; 6871 } 6872 break; 6873 case 230: /* thermal low to high */ 6874 DRM_DEBUG("IH: thermal low to high\n"); 6875 rdev->pm.dpm.thermal.high_to_low = false; 6876 queue_thermal = true; 6877 break; 6878 case 231: /* thermal high to low */ 6879 DRM_DEBUG("IH: thermal high to low\n"); 6880 rdev->pm.dpm.thermal.high_to_low = true; 6881 queue_thermal = true; 6882 break; 6883 case 233: /* GUI IDLE */ 6884 DRM_DEBUG("IH: GUI idle\n"); 6885 break; 6886 case 241: /* SDMA Privileged inst */ 6887 case 247: /* SDMA Privileged inst */ 6888 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 6889 /* XXX check the bitfield order! */ 6890 me_id = (ring_id & 0x3) >> 0; 6891 queue_id = (ring_id & 0xc) >> 2; 6892 switch (me_id) { 6893 case 0: 6894 switch (queue_id) { 6895 case 0: 6896 queue_reset = true; 6897 break; 6898 case 1: 6899 /* XXX compute */ 6900 queue_reset = true; 6901 break; 6902 case 2: 6903 /* XXX compute */ 6904 queue_reset = true; 6905 break; 6906 } 6907 break; 6908 case 1: 6909 switch (queue_id) { 6910 case 0: 6911 queue_reset = true; 6912 break; 6913 case 1: 6914 /* XXX compute */ 6915 queue_reset = true; 6916 break; 6917 case 2: 6918 /* XXX compute */ 6919 queue_reset = true; 6920 break; 6921 } 6922 break; 6923 } 6924 break; 6925 default: 6926 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 6927 break; 6928 } 6929 6930 /* wptr/rptr are in bytes! */ 6931 rptr += 16; 6932 rptr &= rdev->ih.ptr_mask; 6933 } 6934 if (queue_hotplug) 6935 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work); 6936 if (queue_reset) 6937 taskqueue_enqueue(rdev->tq, &rdev->reset_work); 6938 if (queue_thermal) 6939 taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work); 6940 rdev->ih.rptr = rptr; 6941 WREG32(IH_RB_RPTR, rdev->ih.rptr); 6942 atomic_set(&rdev->ih.lock, 0); 6943 6944 /* make sure wptr hasn't changed while processing */ 6945 wptr = cik_get_ih_wptr(rdev); 6946 if (wptr != rptr) 6947 goto restart_ih; 6948 6949 return IRQ_HANDLED; 6950 } 6951 6952 /* 6953 * startup/shutdown callbacks 6954 */ 6955 /** 6956 * cik_startup - program the asic to a functional state 6957 * 6958 * @rdev: radeon_device pointer 6959 * 6960 * Programs the asic to a functional state (CIK). 6961 * Called by cik_init() and cik_resume(). 6962 * Returns 0 for success, error for failure. 6963 */ 6964 static int cik_startup(struct radeon_device *rdev) 6965 { 6966 struct radeon_ring *ring; 6967 int r; 6968 6969 /* enable pcie gen2/3 link */ 6970 cik_pcie_gen3_enable(rdev); 6971 /* enable aspm */ 6972 cik_program_aspm(rdev); 6973 6974 /* scratch needs to be initialized before MC */ 6975 r = r600_vram_scratch_init(rdev); 6976 if (r) 6977 return r; 6978 6979 cik_mc_program(rdev); 6980 6981 if (rdev->flags & RADEON_IS_IGP) { 6982 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 6983 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { 6984 r = cik_init_microcode(rdev); 6985 if (r) { 6986 DRM_ERROR("Failed to load firmware!\n"); 6987 return r; 6988 } 6989 } 6990 } else { 6991 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 6992 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw || 6993 !rdev->mc_fw) { 6994 r = cik_init_microcode(rdev); 6995 if (r) { 6996 DRM_ERROR("Failed to load firmware!\n"); 6997 return r; 6998 } 6999 } 7000 7001 r = ci_mc_load_microcode(rdev); 7002 if (r) { 7003 DRM_ERROR("Failed to load MC firmware!\n"); 7004 return r; 7005 } 7006 } 7007 7008 r = cik_pcie_gart_enable(rdev); 7009 if (r) 7010 return r; 7011 cik_gpu_init(rdev); 7012 7013 /* allocate rlc buffers */ 7014 if (rdev->flags & RADEON_IS_IGP) { 7015 if (rdev->family == CHIP_KAVERI) { 7016 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list; 7017 rdev->rlc.reg_list_size = 7018 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list); 7019 } else { 7020 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list; 7021 rdev->rlc.reg_list_size = 7022 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list); 7023 } 7024 } 7025 rdev->rlc.cs_data = ci_cs_data; 7026 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; 7027 r = sumo_rlc_init(rdev); 7028 if (r) { 7029 DRM_ERROR("Failed to init rlc BOs!\n"); 7030 return r; 7031 } 7032 7033 /* allocate wb buffer */ 7034 r = radeon_wb_init(rdev); 7035 if (r) 7036 return r; 7037 7038 /* allocate mec buffers */ 7039 r = cik_mec_init(rdev); 7040 if (r) { 7041 DRM_ERROR("Failed to init MEC BOs!\n"); 7042 return r; 7043 } 7044 7045 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 7046 if (r) { 7047 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7048 return r; 7049 } 7050 7051 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 7052 if (r) { 7053 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7054 return r; 7055 } 7056 7057 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 7058 if (r) { 7059 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7060 return r; 7061 } 7062 7063 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 7064 if (r) { 7065 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7066 return r; 7067 } 7068 7069 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 7070 if (r) { 7071 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7072 return r; 7073 } 7074 7075 r = radeon_uvd_resume(rdev); 7076 if (!r) { 7077 r = uvd_v4_2_resume(rdev); 7078 if (!r) { 7079 r = radeon_fence_driver_start_ring(rdev, 7080 R600_RING_TYPE_UVD_INDEX); 7081 if (r) 7082 dev_err(rdev->dev, "UVD fences init error (%d).\n", r); 7083 } 7084 } 7085 if (r) 7086 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 7087 7088 /* Enable IRQ */ 7089 if (!rdev->irq.installed) { 7090 r = radeon_irq_kms_init(rdev); 7091 if (r) 7092 return r; 7093 } 7094 7095 r = cik_irq_init(rdev); 7096 if (r) { 7097 DRM_ERROR("radeon: IH init failed (%d).\n", r); 7098 radeon_irq_kms_fini(rdev); 7099 return r; 7100 } 7101 cik_irq_set(rdev); 7102 7103 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 7104 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 7105 CP_RB0_RPTR, CP_RB0_WPTR, 7106 RADEON_CP_PACKET2); 7107 if (r) 7108 return r; 7109 7110 /* set up the compute queues */ 7111 /* type-2 packets are deprecated on MEC, use type-3 instead */ 7112 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7113 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, 7114 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 7115 PACKET3(PACKET3_NOP, 0x3FFF)); 7116 if (r) 7117 return r; 7118 ring->me = 1; /* first MEC */ 7119 ring->pipe = 0; /* first pipe */ 7120 ring->queue = 0; /* first queue */ 7121 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; 7122 7123 /* type-2 packets are deprecated on MEC, use type-3 instead */ 7124 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7125 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, 7126 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 7127 PACKET3(PACKET3_NOP, 0x3FFF)); 7128 if (r) 7129 return r; 7130 /* dGPU only have 1 MEC */ 7131 ring->me = 1; /* first MEC */ 7132 ring->pipe = 0; /* first pipe */ 7133 ring->queue = 1; /* second queue */ 7134 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; 7135 7136 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 7137 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 7138 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, 7139 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, 7140 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 7141 if (r) 7142 return r; 7143 7144 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 7145 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, 7146 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, 7147 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, 7148 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 7149 if (r) 7150 return r; 7151 7152 r = cik_cp_resume(rdev); 7153 if (r) 7154 return r; 7155 7156 r = cik_sdma_resume(rdev); 7157 if (r) 7158 return r; 7159 7160 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 7161 if (ring->ring_size) { 7162 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 7163 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 7164 RADEON_CP_PACKET2); 7165 if (!r) 7166 r = uvd_v1_0_init(rdev); 7167 if (r) 7168 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); 7169 } 7170 7171 r = radeon_ib_pool_init(rdev); 7172 if (r) { 7173 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 7174 return r; 7175 } 7176 7177 r = radeon_vm_manager_init(rdev); 7178 if (r) { 7179 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); 7180 return r; 7181 } 7182 7183 r = dce6_audio_init(rdev); 7184 if (r) 7185 return r; 7186 7187 return 0; 7188 } 7189 7190 /** 7191 * cik_resume - resume the asic to a functional state 7192 * 7193 * @rdev: radeon_device pointer 7194 * 7195 * Programs the asic to a functional state (CIK). 7196 * Called at resume. 7197 * Returns 0 for success, error for failure. 7198 */ 7199 int cik_resume(struct radeon_device *rdev) 7200 { 7201 int r; 7202 7203 /* post card */ 7204 atom_asic_init(rdev->mode_info.atom_context); 7205 7206 /* init golden registers */ 7207 cik_init_golden_registers(rdev); 7208 7209 rdev->accel_working = true; 7210 r = cik_startup(rdev); 7211 if (r) { 7212 DRM_ERROR("cik startup failed on resume\n"); 7213 rdev->accel_working = false; 7214 return r; 7215 } 7216 7217 return r; 7218 7219 } 7220 7221 /** 7222 * cik_suspend - suspend the asic 7223 * 7224 * @rdev: radeon_device pointer 7225 * 7226 * Bring the chip into a state suitable for suspend (CIK). 7227 * Called at suspend. 7228 * Returns 0 for success. 7229 */ 7230 int cik_suspend(struct radeon_device *rdev) 7231 { 7232 dce6_audio_fini(rdev); 7233 radeon_vm_manager_fini(rdev); 7234 cik_cp_enable(rdev, false); 7235 cik_sdma_enable(rdev, false); 7236 uvd_v1_0_fini(rdev); 7237 radeon_uvd_suspend(rdev); 7238 cik_fini_pg(rdev); 7239 cik_fini_cg(rdev); 7240 cik_irq_suspend(rdev); 7241 radeon_wb_disable(rdev); 7242 cik_pcie_gart_disable(rdev); 7243 return 0; 7244 } 7245 7246 /* Plan is to move initialization in that function and use 7247 * helper function so that radeon_device_init pretty much 7248 * do nothing more than calling asic specific function. This 7249 * should also allow to remove a bunch of callback function 7250 * like vram_info. 7251 */ 7252 /** 7253 * cik_init - asic specific driver and hw init 7254 * 7255 * @rdev: radeon_device pointer 7256 * 7257 * Setup asic specific driver variables and program the hw 7258 * to a functional state (CIK). 7259 * Called at driver startup. 7260 * Returns 0 for success, errors for failure. 7261 */ 7262 int cik_init(struct radeon_device *rdev) 7263 { 7264 struct radeon_ring *ring; 7265 int r; 7266 7267 /* Read BIOS */ 7268 if (!radeon_get_bios(rdev)) { 7269 if (ASIC_IS_AVIVO(rdev)) 7270 return -EINVAL; 7271 } 7272 /* Must be an ATOMBIOS */ 7273 if (!rdev->is_atom_bios) { 7274 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); 7275 return -EINVAL; 7276 } 7277 r = radeon_atombios_init(rdev); 7278 if (r) 7279 return r; 7280 7281 /* Post card if necessary */ 7282 if (!radeon_card_posted(rdev)) { 7283 if (!rdev->bios) { 7284 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 7285 return -EINVAL; 7286 } 7287 DRM_INFO("GPU not posted. posting now...\n"); 7288 atom_asic_init(rdev->mode_info.atom_context); 7289 } 7290 /* init golden registers */ 7291 cik_init_golden_registers(rdev); 7292 /* Initialize scratch registers */ 7293 cik_scratch_init(rdev); 7294 /* Initialize surface registers */ 7295 radeon_surface_init(rdev); 7296 /* Initialize clocks */ 7297 radeon_get_clock_info(rdev->ddev); 7298 7299 /* Fence driver */ 7300 r = radeon_fence_driver_init(rdev); 7301 if (r) 7302 return r; 7303 7304 /* initialize memory controller */ 7305 r = cik_mc_init(rdev); 7306 if (r) 7307 return r; 7308 /* Memory manager */ 7309 r = radeon_bo_init(rdev); 7310 if (r) 7311 return r; 7312 7313 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 7314 ring->ring_obj = NULL; 7315 r600_ring_init(rdev, ring, 1024 * 1024); 7316 7317 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7318 ring->ring_obj = NULL; 7319 r600_ring_init(rdev, ring, 1024 * 1024); 7320 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7321 if (r) 7322 return r; 7323 7324 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7325 ring->ring_obj = NULL; 7326 r600_ring_init(rdev, ring, 1024 * 1024); 7327 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7328 if (r) 7329 return r; 7330 7331 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 7332 ring->ring_obj = NULL; 7333 r600_ring_init(rdev, ring, 256 * 1024); 7334 7335 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 7336 ring->ring_obj = NULL; 7337 r600_ring_init(rdev, ring, 256 * 1024); 7338 7339 r = radeon_uvd_init(rdev); 7340 if (!r) { 7341 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 7342 ring->ring_obj = NULL; 7343 r600_ring_init(rdev, ring, 4096); 7344 } 7345 7346 rdev->ih.ring_obj = NULL; 7347 r600_ih_ring_init(rdev, 64 * 1024); 7348 7349 r = r600_pcie_gart_init(rdev); 7350 if (r) 7351 return r; 7352 7353 rdev->accel_working = true; 7354 r = cik_startup(rdev); 7355 if (r) { 7356 dev_err(rdev->dev, "disabling GPU acceleration\n"); 7357 cik_cp_fini(rdev); 7358 cik_sdma_fini(rdev); 7359 cik_irq_fini(rdev); 7360 sumo_rlc_fini(rdev); 7361 cik_mec_fini(rdev); 7362 radeon_wb_fini(rdev); 7363 radeon_ib_pool_fini(rdev); 7364 radeon_vm_manager_fini(rdev); 7365 radeon_irq_kms_fini(rdev); 7366 cik_pcie_gart_fini(rdev); 7367 rdev->accel_working = false; 7368 } 7369 7370 /* Don't start up if the MC ucode is missing. 7371 * The default clocks and voltages before the MC ucode 7372 * is loaded are not suffient for advanced operations. 7373 */ 7374 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { 7375 DRM_ERROR("radeon: MC ucode required for NI+.\n"); 7376 return -EINVAL; 7377 } 7378 7379 return 0; 7380 } 7381 7382 /** 7383 * cik_fini - asic specific driver and hw fini 7384 * 7385 * @rdev: radeon_device pointer 7386 * 7387 * Tear down the asic specific driver variables and program the hw 7388 * to an idle state (CIK). 7389 * Called at driver unload. 7390 */ 7391 void cik_fini(struct radeon_device *rdev) 7392 { 7393 cik_cp_fini(rdev); 7394 cik_sdma_fini(rdev); 7395 cik_fini_pg(rdev); 7396 cik_fini_cg(rdev); 7397 cik_irq_fini(rdev); 7398 sumo_rlc_fini(rdev); 7399 cik_mec_fini(rdev); 7400 radeon_wb_fini(rdev); 7401 radeon_vm_manager_fini(rdev); 7402 radeon_ib_pool_fini(rdev); 7403 radeon_irq_kms_fini(rdev); 7404 uvd_v1_0_fini(rdev); 7405 radeon_uvd_fini(rdev); 7406 cik_pcie_gart_fini(rdev); 7407 r600_vram_scratch_fini(rdev); 7408 radeon_gem_fini(rdev); 7409 radeon_fence_driver_fini(rdev); 7410 radeon_bo_fini(rdev); 7411 radeon_atombios_fini(rdev); 7412 kfree(rdev->bios); 7413 rdev->bios = NULL; 7414 } 7415 7416 /* display watermark setup */ 7417 /** 7418 * dce8_line_buffer_adjust - Set up the line buffer 7419 * 7420 * @rdev: radeon_device pointer 7421 * @radeon_crtc: the selected display controller 7422 * @mode: the current display mode on the selected display 7423 * controller 7424 * 7425 * Setup up the line buffer allocation for 7426 * the selected display controller (CIK). 7427 * Returns the line buffer size in pixels. 7428 */ 7429 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, 7430 struct radeon_crtc *radeon_crtc, 7431 struct drm_display_mode *mode) 7432 { 7433 u32 tmp, buffer_alloc, i; 7434 u32 pipe_offset = radeon_crtc->crtc_id * 0x20; 7435 /* 7436 * Line Buffer Setup 7437 * There are 6 line buffers, one for each display controllers. 7438 * There are 3 partitions per LB. Select the number of partitions 7439 * to enable based on the display width. For display widths larger 7440 * than 4096, you need use to use 2 display controllers and combine 7441 * them using the stereo blender. 7442 */ 7443 if (radeon_crtc->base.enabled && mode) { 7444 if (mode->crtc_hdisplay < 1920) { 7445 tmp = 1; 7446 buffer_alloc = 2; 7447 } else if (mode->crtc_hdisplay < 2560) { 7448 tmp = 2; 7449 buffer_alloc = 2; 7450 } else if (mode->crtc_hdisplay < 4096) { 7451 tmp = 0; 7452 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 7453 } else { 7454 DRM_DEBUG_KMS("Mode too big for LB!\n"); 7455 tmp = 0; 7456 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 7457 } 7458 } else { 7459 tmp = 1; 7460 buffer_alloc = 0; 7461 } 7462 7463 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, 7464 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); 7465 7466 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, 7467 DMIF_BUFFERS_ALLOCATED(buffer_alloc)); 7468 for (i = 0; i < rdev->usec_timeout; i++) { 7469 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & 7470 DMIF_BUFFERS_ALLOCATED_COMPLETED) 7471 break; 7472 udelay(1); 7473 } 7474 7475 if (radeon_crtc->base.enabled && mode) { 7476 switch (tmp) { 7477 case 0: 7478 default: 7479 return 4096 * 2; 7480 case 1: 7481 return 1920 * 2; 7482 case 2: 7483 return 2560 * 2; 7484 } 7485 } 7486 7487 /* controller not enabled, so no lb used */ 7488 return 0; 7489 } 7490 7491 /** 7492 * cik_get_number_of_dram_channels - get the number of dram channels 7493 * 7494 * @rdev: radeon_device pointer 7495 * 7496 * Look up the number of video ram channels (CIK). 7497 * Used for display watermark bandwidth calculations 7498 * Returns the number of dram channels 7499 */ 7500 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev) 7501 { 7502 u32 tmp = RREG32(MC_SHARED_CHMAP); 7503 7504 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 7505 case 0: 7506 default: 7507 return 1; 7508 case 1: 7509 return 2; 7510 case 2: 7511 return 4; 7512 case 3: 7513 return 8; 7514 case 4: 7515 return 3; 7516 case 5: 7517 return 6; 7518 case 6: 7519 return 10; 7520 case 7: 7521 return 12; 7522 case 8: 7523 return 16; 7524 } 7525 } 7526 7527 struct dce8_wm_params { 7528 u32 dram_channels; /* number of dram channels */ 7529 u32 yclk; /* bandwidth per dram data pin in kHz */ 7530 u32 sclk; /* engine clock in kHz */ 7531 u32 disp_clk; /* display clock in kHz */ 7532 u32 src_width; /* viewport width */ 7533 u32 active_time; /* active display time in ns */ 7534 u32 blank_time; /* blank time in ns */ 7535 bool interlaced; /* mode is interlaced */ 7536 fixed20_12 vsc; /* vertical scale ratio */ 7537 u32 num_heads; /* number of active crtcs */ 7538 u32 bytes_per_pixel; /* bytes per pixel display + overlay */ 7539 u32 lb_size; /* line buffer allocated to pipe */ 7540 u32 vtaps; /* vertical scaler taps */ 7541 }; 7542 7543 /** 7544 * dce8_dram_bandwidth - get the dram bandwidth 7545 * 7546 * @wm: watermark calculation data 7547 * 7548 * Calculate the raw dram bandwidth (CIK). 7549 * Used for display watermark bandwidth calculations 7550 * Returns the dram bandwidth in MBytes/s 7551 */ 7552 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm) 7553 { 7554 /* Calculate raw DRAM Bandwidth */ 7555 fixed20_12 dram_efficiency; /* 0.7 */ 7556 fixed20_12 yclk, dram_channels, bandwidth; 7557 fixed20_12 a; 7558 7559 a.full = dfixed_const(1000); 7560 yclk.full = dfixed_const(wm->yclk); 7561 yclk.full = dfixed_div(yclk, a); 7562 dram_channels.full = dfixed_const(wm->dram_channels * 4); 7563 a.full = dfixed_const(10); 7564 dram_efficiency.full = dfixed_const(7); 7565 dram_efficiency.full = dfixed_div(dram_efficiency, a); 7566 bandwidth.full = dfixed_mul(dram_channels, yclk); 7567 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); 7568 7569 return dfixed_trunc(bandwidth); 7570 } 7571 7572 /** 7573 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display 7574 * 7575 * @wm: watermark calculation data 7576 * 7577 * Calculate the dram bandwidth used for display (CIK). 7578 * Used for display watermark bandwidth calculations 7579 * Returns the dram bandwidth for display in MBytes/s 7580 */ 7581 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm) 7582 { 7583 /* Calculate DRAM Bandwidth and the part allocated to display. */ 7584 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ 7585 fixed20_12 yclk, dram_channels, bandwidth; 7586 fixed20_12 a; 7587 7588 a.full = dfixed_const(1000); 7589 yclk.full = dfixed_const(wm->yclk); 7590 yclk.full = dfixed_div(yclk, a); 7591 dram_channels.full = dfixed_const(wm->dram_channels * 4); 7592 a.full = dfixed_const(10); 7593 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ 7594 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); 7595 bandwidth.full = dfixed_mul(dram_channels, yclk); 7596 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); 7597 7598 return dfixed_trunc(bandwidth); 7599 } 7600 7601 /** 7602 * dce8_data_return_bandwidth - get the data return bandwidth 7603 * 7604 * @wm: watermark calculation data 7605 * 7606 * Calculate the data return bandwidth used for display (CIK). 7607 * Used for display watermark bandwidth calculations 7608 * Returns the data return bandwidth in MBytes/s 7609 */ 7610 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm) 7611 { 7612 /* Calculate the display Data return Bandwidth */ 7613 fixed20_12 return_efficiency; /* 0.8 */ 7614 fixed20_12 sclk, bandwidth; 7615 fixed20_12 a; 7616 7617 a.full = dfixed_const(1000); 7618 sclk.full = dfixed_const(wm->sclk); 7619 sclk.full = dfixed_div(sclk, a); 7620 a.full = dfixed_const(10); 7621 return_efficiency.full = dfixed_const(8); 7622 return_efficiency.full = dfixed_div(return_efficiency, a); 7623 a.full = dfixed_const(32); 7624 bandwidth.full = dfixed_mul(a, sclk); 7625 bandwidth.full = dfixed_mul(bandwidth, return_efficiency); 7626 7627 return dfixed_trunc(bandwidth); 7628 } 7629 7630 /** 7631 * dce8_dmif_request_bandwidth - get the dmif bandwidth 7632 * 7633 * @wm: watermark calculation data 7634 * 7635 * Calculate the dmif bandwidth used for display (CIK). 7636 * Used for display watermark bandwidth calculations 7637 * Returns the dmif bandwidth in MBytes/s 7638 */ 7639 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm) 7640 { 7641 /* Calculate the DMIF Request Bandwidth */ 7642 fixed20_12 disp_clk_request_efficiency; /* 0.8 */ 7643 fixed20_12 disp_clk, bandwidth; 7644 fixed20_12 a, b; 7645 7646 a.full = dfixed_const(1000); 7647 disp_clk.full = dfixed_const(wm->disp_clk); 7648 disp_clk.full = dfixed_div(disp_clk, a); 7649 a.full = dfixed_const(32); 7650 b.full = dfixed_mul(a, disp_clk); 7651 7652 a.full = dfixed_const(10); 7653 disp_clk_request_efficiency.full = dfixed_const(8); 7654 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); 7655 7656 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); 7657 7658 return dfixed_trunc(bandwidth); 7659 } 7660 7661 /** 7662 * dce8_available_bandwidth - get the min available bandwidth 7663 * 7664 * @wm: watermark calculation data 7665 * 7666 * Calculate the min available bandwidth used for display (CIK). 7667 * Used for display watermark bandwidth calculations 7668 * Returns the min available bandwidth in MBytes/s 7669 */ 7670 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm) 7671 { 7672 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ 7673 u32 dram_bandwidth = dce8_dram_bandwidth(wm); 7674 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm); 7675 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm); 7676 7677 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); 7678 } 7679 7680 /** 7681 * dce8_average_bandwidth - get the average available bandwidth 7682 * 7683 * @wm: watermark calculation data 7684 * 7685 * Calculate the average available bandwidth used for display (CIK). 7686 * Used for display watermark bandwidth calculations 7687 * Returns the average available bandwidth in MBytes/s 7688 */ 7689 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm) 7690 { 7691 /* Calculate the display mode Average Bandwidth 7692 * DisplayMode should contain the source and destination dimensions, 7693 * timing, etc. 7694 */ 7695 fixed20_12 bpp; 7696 fixed20_12 line_time; 7697 fixed20_12 src_width; 7698 fixed20_12 bandwidth; 7699 fixed20_12 a; 7700 7701 a.full = dfixed_const(1000); 7702 line_time.full = dfixed_const(wm->active_time + wm->blank_time); 7703 line_time.full = dfixed_div(line_time, a); 7704 bpp.full = dfixed_const(wm->bytes_per_pixel); 7705 src_width.full = dfixed_const(wm->src_width); 7706 bandwidth.full = dfixed_mul(src_width, bpp); 7707 bandwidth.full = dfixed_mul(bandwidth, wm->vsc); 7708 bandwidth.full = dfixed_div(bandwidth, line_time); 7709 7710 return dfixed_trunc(bandwidth); 7711 } 7712 7713 /** 7714 * dce8_latency_watermark - get the latency watermark 7715 * 7716 * @wm: watermark calculation data 7717 * 7718 * Calculate the latency watermark (CIK). 7719 * Used for display watermark bandwidth calculations 7720 * Returns the latency watermark in ns 7721 */ 7722 static u32 dce8_latency_watermark(struct dce8_wm_params *wm) 7723 { 7724 /* First calculate the latency in ns */ 7725 u32 mc_latency = 2000; /* 2000 ns. */ 7726 u32 available_bandwidth = dce8_available_bandwidth(wm); 7727 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; 7728 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; 7729 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ 7730 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + 7731 (wm->num_heads * cursor_line_pair_return_time); 7732 u32 latency = mc_latency + other_heads_data_return_time + dc_latency; 7733 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; 7734 u32 tmp, dmif_size = 12288; 7735 fixed20_12 a, b, c; 7736 7737 if (wm->num_heads == 0) 7738 return 0; 7739 7740 a.full = dfixed_const(2); 7741 b.full = dfixed_const(1); 7742 if ((wm->vsc.full > a.full) || 7743 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || 7744 (wm->vtaps >= 5) || 7745 ((wm->vsc.full >= a.full) && wm->interlaced)) 7746 max_src_lines_per_dst_line = 4; 7747 else 7748 max_src_lines_per_dst_line = 2; 7749 7750 a.full = dfixed_const(available_bandwidth); 7751 b.full = dfixed_const(wm->num_heads); 7752 a.full = dfixed_div(a, b); 7753 7754 b.full = dfixed_const(mc_latency + 512); 7755 c.full = dfixed_const(wm->disp_clk); 7756 b.full = dfixed_div(b, c); 7757 7758 c.full = dfixed_const(dmif_size); 7759 b.full = dfixed_div(c, b); 7760 7761 tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 7762 7763 b.full = dfixed_const(1000); 7764 c.full = dfixed_const(wm->disp_clk); 7765 b.full = dfixed_div(c, b); 7766 c.full = dfixed_const(wm->bytes_per_pixel); 7767 b.full = dfixed_mul(b, c); 7768 7769 lb_fill_bw = min(tmp, dfixed_trunc(b)); 7770 7771 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 7772 b.full = dfixed_const(1000); 7773 c.full = dfixed_const(lb_fill_bw); 7774 b.full = dfixed_div(c, b); 7775 a.full = dfixed_div(a, b); 7776 line_fill_time = dfixed_trunc(a); 7777 7778 if (line_fill_time < wm->active_time) 7779 return latency; 7780 else 7781 return latency + (line_fill_time - wm->active_time); 7782 7783 } 7784 7785 /** 7786 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check 7787 * average and available dram bandwidth 7788 * 7789 * @wm: watermark calculation data 7790 * 7791 * Check if the display average bandwidth fits in the display 7792 * dram bandwidth (CIK). 7793 * Used for display watermark bandwidth calculations 7794 * Returns true if the display fits, false if not. 7795 */ 7796 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm) 7797 { 7798 if (dce8_average_bandwidth(wm) <= 7799 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads)) 7800 return true; 7801 else 7802 return false; 7803 } 7804 7805 /** 7806 * dce8_average_bandwidth_vs_available_bandwidth - check 7807 * average and available bandwidth 7808 * 7809 * @wm: watermark calculation data 7810 * 7811 * Check if the display average bandwidth fits in the display 7812 * available bandwidth (CIK). 7813 * Used for display watermark bandwidth calculations 7814 * Returns true if the display fits, false if not. 7815 */ 7816 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm) 7817 { 7818 if (dce8_average_bandwidth(wm) <= 7819 (dce8_available_bandwidth(wm) / wm->num_heads)) 7820 return true; 7821 else 7822 return false; 7823 } 7824 7825 /** 7826 * dce8_check_latency_hiding - check latency hiding 7827 * 7828 * @wm: watermark calculation data 7829 * 7830 * Check latency hiding (CIK). 7831 * Used for display watermark bandwidth calculations 7832 * Returns true if the display fits, false if not. 7833 */ 7834 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm) 7835 { 7836 u32 lb_partitions = wm->lb_size / wm->src_width; 7837 u32 line_time = wm->active_time + wm->blank_time; 7838 u32 latency_tolerant_lines; 7839 u32 latency_hiding; 7840 fixed20_12 a; 7841 7842 a.full = dfixed_const(1); 7843 if (wm->vsc.full > a.full) 7844 latency_tolerant_lines = 1; 7845 else { 7846 if (lb_partitions <= (wm->vtaps + 1)) 7847 latency_tolerant_lines = 1; 7848 else 7849 latency_tolerant_lines = 2; 7850 } 7851 7852 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); 7853 7854 if (dce8_latency_watermark(wm) <= latency_hiding) 7855 return true; 7856 else 7857 return false; 7858 } 7859 7860 /** 7861 * dce8_program_watermarks - program display watermarks 7862 * 7863 * @rdev: radeon_device pointer 7864 * @radeon_crtc: the selected display controller 7865 * @lb_size: line buffer size 7866 * @num_heads: number of display controllers in use 7867 * 7868 * Calculate and program the display watermarks for the 7869 * selected display controller (CIK). 7870 */ 7871 static void dce8_program_watermarks(struct radeon_device *rdev, 7872 struct radeon_crtc *radeon_crtc, 7873 u32 lb_size, u32 num_heads) 7874 { 7875 struct drm_display_mode *mode = &radeon_crtc->base.mode; 7876 struct dce8_wm_params wm_low, wm_high; 7877 u32 pixel_period; 7878 u32 line_time = 0; 7879 u32 latency_watermark_a = 0, latency_watermark_b = 0; 7880 u32 tmp, wm_mask; 7881 7882 if (radeon_crtc->base.enabled && num_heads && mode) { 7883 pixel_period = 1000000 / (u32)mode->clock; 7884 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 7885 7886 /* watermark for high clocks */ 7887 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 7888 rdev->pm.dpm_enabled) { 7889 wm_high.yclk = 7890 radeon_dpm_get_mclk(rdev, false) * 10; 7891 wm_high.sclk = 7892 radeon_dpm_get_sclk(rdev, false) * 10; 7893 } else { 7894 wm_high.yclk = rdev->pm.current_mclk * 10; 7895 wm_high.sclk = rdev->pm.current_sclk * 10; 7896 } 7897 7898 wm_high.disp_clk = mode->clock; 7899 wm_high.src_width = mode->crtc_hdisplay; 7900 wm_high.active_time = mode->crtc_hdisplay * pixel_period; 7901 wm_high.blank_time = line_time - wm_high.active_time; 7902 wm_high.interlaced = false; 7903 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 7904 wm_high.interlaced = true; 7905 wm_high.vsc = radeon_crtc->vsc; 7906 wm_high.vtaps = 1; 7907 if (radeon_crtc->rmx_type != RMX_OFF) 7908 wm_high.vtaps = 2; 7909 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ 7910 wm_high.lb_size = lb_size; 7911 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev); 7912 wm_high.num_heads = num_heads; 7913 7914 /* set for high clocks */ 7915 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535); 7916 7917 /* possibly force display priority to high */ 7918 /* should really do this at mode validation time... */ 7919 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || 7920 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) || 7921 !dce8_check_latency_hiding(&wm_high) || 7922 (rdev->disp_priority == 2)) { 7923 DRM_DEBUG_KMS("force priority to high\n"); 7924 } 7925 7926 /* watermark for low clocks */ 7927 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 7928 rdev->pm.dpm_enabled) { 7929 wm_low.yclk = 7930 radeon_dpm_get_mclk(rdev, true) * 10; 7931 wm_low.sclk = 7932 radeon_dpm_get_sclk(rdev, true) * 10; 7933 } else { 7934 wm_low.yclk = rdev->pm.current_mclk * 10; 7935 wm_low.sclk = rdev->pm.current_sclk * 10; 7936 } 7937 7938 wm_low.disp_clk = mode->clock; 7939 wm_low.src_width = mode->crtc_hdisplay; 7940 wm_low.active_time = mode->crtc_hdisplay * pixel_period; 7941 wm_low.blank_time = line_time - wm_low.active_time; 7942 wm_low.interlaced = false; 7943 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 7944 wm_low.interlaced = true; 7945 wm_low.vsc = radeon_crtc->vsc; 7946 wm_low.vtaps = 1; 7947 if (radeon_crtc->rmx_type != RMX_OFF) 7948 wm_low.vtaps = 2; 7949 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ 7950 wm_low.lb_size = lb_size; 7951 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev); 7952 wm_low.num_heads = num_heads; 7953 7954 /* set for low clocks */ 7955 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535); 7956 7957 /* possibly force display priority to high */ 7958 /* should really do this at mode validation time... */ 7959 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || 7960 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) || 7961 !dce8_check_latency_hiding(&wm_low) || 7962 (rdev->disp_priority == 2)) { 7963 DRM_DEBUG_KMS("force priority to high\n"); 7964 } 7965 } 7966 7967 /* select wm A */ 7968 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 7969 tmp = wm_mask; 7970 tmp &= ~LATENCY_WATERMARK_MASK(3); 7971 tmp |= LATENCY_WATERMARK_MASK(1); 7972 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 7973 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 7974 (LATENCY_LOW_WATERMARK(latency_watermark_a) | 7975 LATENCY_HIGH_WATERMARK(line_time))); 7976 /* select wm B */ 7977 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 7978 tmp &= ~LATENCY_WATERMARK_MASK(3); 7979 tmp |= LATENCY_WATERMARK_MASK(2); 7980 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 7981 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 7982 (LATENCY_LOW_WATERMARK(latency_watermark_b) | 7983 LATENCY_HIGH_WATERMARK(line_time))); 7984 /* restore original selection */ 7985 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); 7986 7987 /* save values for DPM */ 7988 radeon_crtc->line_time = line_time; 7989 radeon_crtc->wm_high = latency_watermark_a; 7990 radeon_crtc->wm_low = latency_watermark_b; 7991 } 7992 7993 /** 7994 * dce8_bandwidth_update - program display watermarks 7995 * 7996 * @rdev: radeon_device pointer 7997 * 7998 * Calculate and program the display watermarks and line 7999 * buffer allocation (CIK). 8000 */ 8001 void dce8_bandwidth_update(struct radeon_device *rdev) 8002 { 8003 struct drm_display_mode *mode = NULL; 8004 u32 num_heads = 0, lb_size; 8005 int i; 8006 8007 radeon_update_display_priority(rdev); 8008 8009 for (i = 0; i < rdev->num_crtc; i++) { 8010 if (rdev->mode_info.crtcs[i]->base.enabled) 8011 num_heads++; 8012 } 8013 for (i = 0; i < rdev->num_crtc; i++) { 8014 mode = &rdev->mode_info.crtcs[i]->base.mode; 8015 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode); 8016 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); 8017 } 8018 } 8019 8020 /** 8021 * cik_get_gpu_clock_counter - return GPU clock counter snapshot 8022 * 8023 * @rdev: radeon_device pointer 8024 * 8025 * Fetches a GPU clock counter snapshot (SI). 8026 * Returns the 64 bit clock counter snapshot. 8027 */ 8028 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev) 8029 { 8030 uint64_t clock; 8031 8032 spin_lock(&rdev->gpu_clock_mutex); 8033 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); 8034 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | 8035 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 8036 spin_unlock(&rdev->gpu_clock_mutex); 8037 return clock; 8038 } 8039 8040 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock, 8041 u32 cntl_reg, u32 status_reg) 8042 { 8043 int r, i; 8044 struct atom_clock_dividers dividers; 8045 uint32_t tmp; 8046 8047 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 8048 clock, false, ÷rs); 8049 if (r) 8050 return r; 8051 8052 tmp = RREG32_SMC(cntl_reg); 8053 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK); 8054 tmp |= dividers.post_divider; 8055 WREG32_SMC(cntl_reg, tmp); 8056 8057 for (i = 0; i < 100; i++) { 8058 if (RREG32_SMC(status_reg) & DCLK_STATUS) 8059 break; 8060 mdelay(10); 8061 } 8062 if (i == 100) 8063 return -ETIMEDOUT; 8064 8065 return 0; 8066 } 8067 8068 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) 8069 { 8070 int r = 0; 8071 8072 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS); 8073 if (r) 8074 return r; 8075 8076 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 8077 return r; 8078 } 8079 8080 static struct pci_dev dev_to_pcidev(device_t dev) 8081 { 8082 struct pci_dev pdev; 8083 pdev.dev = dev; 8084 return pdev; 8085 } 8086 8087 static void cik_pcie_gen3_enable(struct radeon_device *rdev) 8088 { 8089 #if 0 8090 struct pci_dev *root = rdev->pdev->bus->self; 8091 #else 8092 device_t root = device_get_parent(rdev->dev); 8093 #endif 8094 int bridge_pos, gpu_pos; 8095 u32 speed_cntl, mask, current_data_rate; 8096 int ret, i; 8097 u16 tmp16; 8098 struct pci_dev root_pdev = dev_to_pcidev(root); 8099 struct pci_dev pdev = dev_to_pcidev(rdev->dev); 8100 8101 if (radeon_pcie_gen2 == 0) 8102 return; 8103 8104 if (rdev->flags & RADEON_IS_IGP) 8105 return; 8106 8107 if (!(rdev->flags & RADEON_IS_PCIE)) 8108 return; 8109 8110 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); 8111 if (ret != 0) 8112 return; 8113 8114 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) 8115 return; 8116 8117 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8118 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> 8119 LC_CURRENT_DATA_RATE_SHIFT; 8120 if (mask & DRM_PCIE_SPEED_80) { 8121 if (current_data_rate == 2) { 8122 DRM_INFO("PCIE gen 3 link speeds already enabled\n"); 8123 return; 8124 } 8125 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); 8126 } else if (mask & DRM_PCIE_SPEED_50) { 8127 if (current_data_rate == 1) { 8128 DRM_INFO("PCIE gen 2 link speeds already enabled\n"); 8129 return; 8130 } 8131 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); 8132 } 8133 8134 bridge_pos = pci_get_pciecap_ptr(root); 8135 if (!bridge_pos) 8136 return; 8137 8138 gpu_pos = pci_get_pciecap_ptr(rdev->dev); 8139 if (!gpu_pos) 8140 return; 8141 8142 if (mask & DRM_PCIE_SPEED_80) { 8143 /* re-try equalization if gen3 is not already enabled */ 8144 if (current_data_rate != 2) { 8145 u16 bridge_cfg, gpu_cfg; 8146 u16 bridge_cfg2, gpu_cfg2; 8147 u32 max_lw, current_lw, tmp; 8148 8149 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 8150 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 8151 8152 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; 8153 pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16); 8154 8155 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; 8156 pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 8157 8158 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 8159 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; 8160 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; 8161 8162 if (current_lw < max_lw) { 8163 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 8164 if (tmp & LC_RENEGOTIATION_SUPPORT) { 8165 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); 8166 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); 8167 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; 8168 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); 8169 } 8170 } 8171 8172 for (i = 0; i < 10; i++) { 8173 /* check status */ 8174 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); 8175 if (tmp16 & PCI_EXP_DEVSTA_TRPND) 8176 break; 8177 8178 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 8179 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 8180 8181 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); 8182 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); 8183 8184 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8185 tmp |= LC_SET_QUIESCE; 8186 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8187 8188 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8189 tmp |= LC_REDO_EQ; 8190 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8191 8192 mdelay(100); 8193 8194 /* linkctl */ 8195 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &tmp16); 8196 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 8197 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); 8198 pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16); 8199 8200 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); 8201 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 8202 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); 8203 pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 8204 8205 /* linkctl2 */ 8206 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); 8207 tmp16 &= ~((1 << 4) | (7 << 9)); 8208 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); 8209 pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, tmp16); 8210 8211 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 8212 tmp16 &= ~((1 << 4) | (7 << 9)); 8213 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); 8214 pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 8215 8216 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 8217 tmp &= ~LC_SET_QUIESCE; 8218 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 8219 } 8220 } 8221 } 8222 8223 /* set the link speed */ 8224 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; 8225 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; 8226 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 8227 8228 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 8229 tmp16 &= ~0xf; 8230 if (mask & DRM_PCIE_SPEED_80) 8231 tmp16 |= 3; /* gen3 */ 8232 else if (mask & DRM_PCIE_SPEED_50) 8233 tmp16 |= 2; /* gen2 */ 8234 else 8235 tmp16 |= 1; /* gen1 */ 8236 pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 8237 8238 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8239 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; 8240 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 8241 8242 for (i = 0; i < rdev->usec_timeout; i++) { 8243 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 8244 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) 8245 break; 8246 udelay(1); 8247 } 8248 } 8249 8250 static void cik_program_aspm(struct radeon_device *rdev) 8251 { 8252 u32 data, orig; 8253 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; 8254 bool disable_clkreq = false; 8255 8256 if (radeon_aspm == 0) 8257 return; 8258 8259 /* XXX double check IGPs */ 8260 if (rdev->flags & RADEON_IS_IGP) 8261 return; 8262 8263 if (!(rdev->flags & RADEON_IS_PCIE)) 8264 return; 8265 8266 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 8267 data &= ~LC_XMIT_N_FTS_MASK; 8268 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; 8269 if (orig != data) 8270 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); 8271 8272 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); 8273 data |= LC_GO_TO_RECOVERY; 8274 if (orig != data) 8275 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); 8276 8277 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL); 8278 data |= P_IGNORE_EDB_ERR; 8279 if (orig != data) 8280 WREG32_PCIE_PORT(PCIE_P_CNTL, data); 8281 8282 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 8283 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); 8284 data |= LC_PMI_TO_L1_DIS; 8285 if (!disable_l0s) 8286 data |= LC_L0S_INACTIVITY(7); 8287 8288 if (!disable_l1) { 8289 data |= LC_L1_INACTIVITY(7); 8290 data &= ~LC_PMI_TO_L1_DIS; 8291 if (orig != data) 8292 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8293 8294 if (!disable_plloff_in_l1) { 8295 bool clk_req_support; 8296 8297 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0); 8298 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 8299 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 8300 if (orig != data) 8301 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data); 8302 8303 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1); 8304 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 8305 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 8306 if (orig != data) 8307 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data); 8308 8309 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0); 8310 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 8311 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 8312 if (orig != data) 8313 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data); 8314 8315 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1); 8316 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 8317 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 8318 if (orig != data) 8319 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data); 8320 8321 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 8322 data &= ~LC_DYN_LANES_PWR_STATE_MASK; 8323 data |= LC_DYN_LANES_PWR_STATE(3); 8324 if (orig != data) 8325 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); 8326 8327 if (!disable_clkreq) { 8328 #ifdef zMN_TODO 8329 struct pci_dev *root = rdev->pdev->bus->self; 8330 u32 lnkcap; 8331 8332 clk_req_support = false; 8333 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); 8334 if (lnkcap & PCI_EXP_LNKCAP_CLKPM) 8335 clk_req_support = true; 8336 #else 8337 clk_req_support = false; 8338 #endif 8339 } else { 8340 clk_req_support = false; 8341 } 8342 8343 if (clk_req_support) { 8344 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); 8345 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; 8346 if (orig != data) 8347 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); 8348 8349 orig = data = RREG32_SMC(THM_CLK_CNTL); 8350 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); 8351 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); 8352 if (orig != data) 8353 WREG32_SMC(THM_CLK_CNTL, data); 8354 8355 orig = data = RREG32_SMC(MISC_CLK_CTRL); 8356 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); 8357 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); 8358 if (orig != data) 8359 WREG32_SMC(MISC_CLK_CTRL, data); 8360 8361 orig = data = RREG32_SMC(CG_CLKPIN_CNTL); 8362 data &= ~BCLK_AS_XCLK; 8363 if (orig != data) 8364 WREG32_SMC(CG_CLKPIN_CNTL, data); 8365 8366 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2); 8367 data &= ~FORCE_BIF_REFCLK_EN; 8368 if (orig != data) 8369 WREG32_SMC(CG_CLKPIN_CNTL_2, data); 8370 8371 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL); 8372 data &= ~MPLL_CLKOUT_SEL_MASK; 8373 data |= MPLL_CLKOUT_SEL(4); 8374 if (orig != data) 8375 WREG32_SMC(MPLL_BYPASSCLK_SEL, data); 8376 } 8377 } 8378 } else { 8379 if (orig != data) 8380 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8381 } 8382 8383 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 8384 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; 8385 if (orig != data) 8386 WREG32_PCIE_PORT(PCIE_CNTL2, data); 8387 8388 if (!disable_l0s) { 8389 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 8390 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { 8391 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 8392 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { 8393 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 8394 data &= ~LC_L0S_INACTIVITY_MASK; 8395 if (orig != data) 8396 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 8397 } 8398 } 8399 } 8400 } 8401