1 /* $NetBSD: amdgpu_uvd.c,v 1.4 2019/01/01 08:07:47 maya Exp $ */ 2 3 /* 4 * Copyright 2011 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28 /* 29 * Authors: 30 * Christian König <deathsimple@vodafone.de> 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: amdgpu_uvd.c,v 1.4 2019/01/01 08:07:47 maya Exp $"); 35 36 #include <linux/firmware.h> 37 #include <linux/module.h> 38 #include <asm/byteorder.h> 39 #include <drm/drmP.h> 40 #include <drm/drm.h> 41 42 #include "amdgpu.h" 43 #include "amdgpu_pm.h" 44 #include "amdgpu_uvd.h" 45 #include "cikd.h" 46 #include "uvd/uvd_4_2_d.h" 47 48 /* 1 second timeout */ 49 #define UVD_IDLE_TIMEOUT_MS 1000 50 51 /* Firmware Names */ 52 #ifdef CONFIG_DRM_AMDGPU_CIK 53 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 54 #define FIRMWARE_KABINI "radeon/kabini_uvd.bin" 55 #define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin" 56 #define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin" 57 #define FIRMWARE_MULLINS "radeon/mullins_uvd.bin" 58 #endif 59 #define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin" 60 #define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin" 61 #define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin" 62 #define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin" 63 64 /** 65 * amdgpu_uvd_cs_ctx - Command submission parser context 66 * 67 * Used for emulating virtual memory support on UVD 4.2. 68 */ 69 struct amdgpu_uvd_cs_ctx { 70 struct amdgpu_cs_parser *parser; 71 unsigned reg, count; 72 unsigned data0, data1; 73 unsigned idx; 74 unsigned ib_idx; 75 76 /* does the IB has a msg command */ 77 bool has_msg_cmd; 78 79 /* minimum buffer sizes */ 80 unsigned *buf_sizes; 81 }; 82 83 #ifdef CONFIG_DRM_AMDGPU_CIK 84 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 85 MODULE_FIRMWARE(FIRMWARE_KABINI); 86 MODULE_FIRMWARE(FIRMWARE_KAVERI); 87 MODULE_FIRMWARE(FIRMWARE_HAWAII); 88 MODULE_FIRMWARE(FIRMWARE_MULLINS); 89 #endif 90 MODULE_FIRMWARE(FIRMWARE_TONGA); 91 MODULE_FIRMWARE(FIRMWARE_CARRIZO); 92 MODULE_FIRMWARE(FIRMWARE_FIJI); 93 MODULE_FIRMWARE(FIRMWARE_STONEY); 94 95 static void amdgpu_uvd_note_usage(struct amdgpu_device *adev); 96 static void amdgpu_uvd_idle_work_handler(struct work_struct *work); 97 98 int amdgpu_uvd_sw_init(struct amdgpu_device *adev) 99 { 100 unsigned long bo_size; 101 const char *fw_name; 102 const struct common_firmware_header *hdr; 103 unsigned version_major, version_minor, family_id; 104 int i, r; 105 106 INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); 107 108 switch (adev->asic_type) { 109 #ifdef CONFIG_DRM_AMDGPU_CIK 110 case CHIP_BONAIRE: 111 fw_name = FIRMWARE_BONAIRE; 112 break; 113 case CHIP_KABINI: 114 fw_name = FIRMWARE_KABINI; 115 break; 116 case CHIP_KAVERI: 117 fw_name = FIRMWARE_KAVERI; 118 break; 119 case CHIP_HAWAII: 120 fw_name = FIRMWARE_HAWAII; 121 break; 122 case CHIP_MULLINS: 123 fw_name = FIRMWARE_MULLINS; 124 break; 125 #endif 126 case CHIP_TONGA: 127 fw_name = FIRMWARE_TONGA; 128 break; 129 case CHIP_FIJI: 130 fw_name = FIRMWARE_FIJI; 131 break; 132 case CHIP_CARRIZO: 133 fw_name = FIRMWARE_CARRIZO; 134 break; 135 case CHIP_STONEY: 136 fw_name = FIRMWARE_STONEY; 137 break; 138 default: 139 return -EINVAL; 140 } 141 142 r = request_firmware(&adev->uvd.fw, fw_name, adev->dev); 143 if (r) { 144 dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n", 145 fw_name); 146 return r; 147 } 148 149 r = amdgpu_ucode_validate(adev->uvd.fw); 150 if (r) { 151 dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", 152 fw_name); 153 release_firmware(adev->uvd.fw); 154 adev->uvd.fw = NULL; 155 return r; 156 } 157 158 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 159 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 160 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 161 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 162 DRM_INFO("Found UVD firmware Version: %x.%x Family ID: %x\n", 163 version_major, version_minor, family_id); 164 165 adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | 166 (family_id << 8)); 167 168 bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) 169 + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE; 170 r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, 171 AMDGPU_GEM_DOMAIN_VRAM, 172 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 173 NULL, NULL, &adev->uvd.vcpu_bo); 174 if (r) { 175 dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); 176 return r; 177 } 178 179 r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); 180 if (r) { 181 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 182 dev_err(adev->dev, "(%d) failed to reserve UVD bo\n", r); 183 return r; 184 } 185 186 r = amdgpu_bo_pin(adev->uvd.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, 187 &adev->uvd.gpu_addr); 188 if (r) { 189 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 190 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 191 dev_err(adev->dev, "(%d) UVD bo pin failed\n", r); 192 return r; 193 } 194 195 r = amdgpu_bo_kmap(adev->uvd.vcpu_bo, &adev->uvd.cpu_addr); 196 if (r) { 197 dev_err(adev->dev, "(%d) UVD map failed\n", r); 198 return r; 199 } 200 201 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 202 203 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 204 atomic_set(&adev->uvd.handles[i], 0); 205 adev->uvd.filp[i] = NULL; 206 } 207 208 /* from uvd v5.0 HW addressing capacity increased to 64 bits */ 209 if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) 210 adev->uvd.address_64_bit = true; 211 212 return 0; 213 } 214 215 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) 216 { 217 int r; 218 219 if (adev->uvd.vcpu_bo == NULL) 220 return 0; 221 222 r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); 223 if (!r) { 224 amdgpu_bo_kunmap(adev->uvd.vcpu_bo); 225 amdgpu_bo_unpin(adev->uvd.vcpu_bo); 226 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 227 } 228 229 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 230 231 amdgpu_ring_fini(&adev->uvd.ring); 232 233 release_firmware(adev->uvd.fw); 234 235 return 0; 236 } 237 238 int amdgpu_uvd_suspend(struct amdgpu_device *adev) 239 { 240 struct amdgpu_ring *ring = &adev->uvd.ring; 241 int i, r; 242 243 if (adev->uvd.vcpu_bo == NULL) 244 return 0; 245 246 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 247 uint32_t handle = atomic_read(&adev->uvd.handles[i]); 248 if (handle != 0) { 249 struct fence *fence; 250 251 amdgpu_uvd_note_usage(adev); 252 253 r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); 254 if (r) { 255 DRM_ERROR("Error destroying UVD (%d)!\n", r); 256 continue; 257 } 258 259 fence_wait(fence, false); 260 fence_put(fence); 261 262 adev->uvd.filp[i] = NULL; 263 atomic_set(&adev->uvd.handles[i], 0); 264 } 265 } 266 267 return 0; 268 } 269 270 int amdgpu_uvd_resume(struct amdgpu_device *adev) 271 { 272 unsigned size; 273 void *ptr; 274 const struct common_firmware_header *hdr; 275 unsigned offset; 276 277 if (adev->uvd.vcpu_bo == NULL) 278 return -EINVAL; 279 280 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 281 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 282 memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset, 283 (adev->uvd.fw->size) - offset); 284 285 cancel_delayed_work_sync(&adev->uvd.idle_work); 286 287 size = amdgpu_bo_size(adev->uvd.vcpu_bo); 288 size -= le32_to_cpu(hdr->ucode_size_bytes); 289 ptr = adev->uvd.cpu_addr; 290 ptr = (char *)ptr + le32_to_cpu(hdr->ucode_size_bytes); 291 292 memset(ptr, 0, size); 293 294 return 0; 295 } 296 297 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) 298 { 299 struct amdgpu_ring *ring = &adev->uvd.ring; 300 int i, r; 301 302 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 303 uint32_t handle = atomic_read(&adev->uvd.handles[i]); 304 if (handle != 0 && adev->uvd.filp[i] == filp) { 305 struct fence *fence; 306 307 amdgpu_uvd_note_usage(adev); 308 309 r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); 310 if (r) { 311 DRM_ERROR("Error destroying UVD (%d)!\n", r); 312 continue; 313 } 314 315 fence_wait(fence, false); 316 fence_put(fence); 317 318 adev->uvd.filp[i] = NULL; 319 atomic_set(&adev->uvd.handles[i], 0); 320 } 321 } 322 } 323 324 static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *rbo) 325 { 326 int i; 327 for (i = 0; i < rbo->placement.num_placement; ++i) { 328 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 329 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 330 } 331 } 332 333 /** 334 * amdgpu_uvd_cs_pass1 - first parsing round 335 * 336 * @ctx: UVD parser context 337 * 338 * Make sure UVD message and feedback buffers are in VRAM and 339 * nobody is violating an 256MB boundary. 340 */ 341 static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) 342 { 343 struct amdgpu_bo_va_mapping *mapping; 344 struct amdgpu_bo *bo; 345 uint32_t cmd, lo, hi; 346 uint64_t addr; 347 int r = 0; 348 349 lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); 350 hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); 351 addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); 352 353 mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); 354 if (mapping == NULL) { 355 DRM_ERROR("Can't find BO for addr 0x%08"PRIx64"\n", addr); 356 return -EINVAL; 357 } 358 359 if (!ctx->parser->adev->uvd.address_64_bit) { 360 /* check if it's a message or feedback command */ 361 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; 362 if (cmd == 0x0 || cmd == 0x3) { 363 /* yes, force it into VRAM */ 364 uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; 365 amdgpu_ttm_placement_from_domain(bo, domain); 366 } 367 amdgpu_uvd_force_into_uvd_segment(bo); 368 369 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); 370 } 371 372 return r; 373 } 374 375 /** 376 * amdgpu_uvd_cs_msg_decode - handle UVD decode message 377 * 378 * @msg: pointer to message structure 379 * @buf_sizes: returned buffer sizes 380 * 381 * Peek into the decode message and calculate the necessary buffer sizes. 382 */ 383 static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 384 { 385 unsigned stream_type = msg[4]; 386 unsigned width = msg[6]; 387 unsigned height = msg[7]; 388 unsigned dpb_size = msg[9]; 389 unsigned pitch = msg[28]; 390 unsigned level = msg[57]; 391 392 unsigned width_in_mb = width / 16; 393 #ifdef __NetBSD__ /* XXX ALIGN means something else */ 394 unsigned height_in_mb = round_up(height / 16, 2); 395 #else 396 unsigned height_in_mb = ALIGN(height / 16, 2); 397 #endif 398 unsigned fs_in_mb = width_in_mb * height_in_mb; 399 400 unsigned image_size, tmp, min_dpb_size, num_dpb_buffer; 401 unsigned min_ctx_size = 0; 402 403 image_size = width * height; 404 image_size += image_size / 2; 405 #ifdef __NetBSD__ /* XXX ALIGN means something else */ 406 image_size = round_up(image_size, 1024); 407 #else 408 image_size = ALIGN(image_size, 1024); 409 #endif 410 411 switch (stream_type) { 412 case 0: /* H264 */ 413 case 7: /* H264 Perf */ 414 switch(level) { 415 case 30: 416 num_dpb_buffer = 8100 / fs_in_mb; 417 break; 418 case 31: 419 num_dpb_buffer = 18000 / fs_in_mb; 420 break; 421 case 32: 422 num_dpb_buffer = 20480 / fs_in_mb; 423 break; 424 case 41: 425 num_dpb_buffer = 32768 / fs_in_mb; 426 break; 427 case 42: 428 num_dpb_buffer = 34816 / fs_in_mb; 429 break; 430 case 50: 431 num_dpb_buffer = 110400 / fs_in_mb; 432 break; 433 case 51: 434 num_dpb_buffer = 184320 / fs_in_mb; 435 break; 436 default: 437 num_dpb_buffer = 184320 / fs_in_mb; 438 break; 439 } 440 num_dpb_buffer++; 441 if (num_dpb_buffer > 17) 442 num_dpb_buffer = 17; 443 444 /* reference picture buffer */ 445 min_dpb_size = image_size * num_dpb_buffer; 446 447 /* macroblock context buffer */ 448 min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192; 449 450 /* IT surface buffer */ 451 min_dpb_size += width_in_mb * height_in_mb * 32; 452 break; 453 454 case 1: /* VC1 */ 455 456 /* reference picture buffer */ 457 min_dpb_size = image_size * 3; 458 459 /* CONTEXT_BUFFER */ 460 min_dpb_size += width_in_mb * height_in_mb * 128; 461 462 /* IT surface buffer */ 463 min_dpb_size += width_in_mb * 64; 464 465 /* DB surface buffer */ 466 min_dpb_size += width_in_mb * 128; 467 468 /* BP */ 469 tmp = max(width_in_mb, height_in_mb); 470 #ifdef __NetBSD__ /* XXX ALIGN means something else */ 471 min_dpb_size += round_up(tmp * 7 * 16, 64); 472 #else 473 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 474 #endif 475 break; 476 477 case 3: /* MPEG2 */ 478 479 /* reference picture buffer */ 480 min_dpb_size = image_size * 3; 481 break; 482 483 case 4: /* MPEG4 */ 484 485 /* reference picture buffer */ 486 min_dpb_size = image_size * 3; 487 488 /* CM */ 489 min_dpb_size += width_in_mb * height_in_mb * 64; 490 491 /* IT surface buffer */ 492 #ifdef __NetBSD__ /* XXX ALIGN means something else */ 493 min_dpb_size += round_up(width_in_mb * height_in_mb * 32, 64); 494 #else 495 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 496 #endif 497 break; 498 499 case 16: /* H265 */ 500 #ifdef __NetBSD__ /* XXX ALIGN means something else */ 501 image_size = (round_up(width, 16) * round_up(height, 16) * 3) / 2; 502 image_size = round_up(image_size, 256); 503 #else 504 image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; 505 image_size = ALIGN(image_size, 256); 506 #endif 507 508 num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2; 509 min_dpb_size = image_size * num_dpb_buffer; 510 min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16) 511 * 16 * num_dpb_buffer + 52 * 1024; 512 break; 513 514 default: 515 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 516 return -EINVAL; 517 } 518 519 if (width > pitch) { 520 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 521 return -EINVAL; 522 } 523 524 if (dpb_size < min_dpb_size) { 525 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 526 dpb_size, min_dpb_size); 527 return -EINVAL; 528 } 529 530 buf_sizes[0x1] = dpb_size; 531 buf_sizes[0x2] = image_size; 532 buf_sizes[0x4] = min_ctx_size; 533 return 0; 534 } 535 536 /** 537 * amdgpu_uvd_cs_msg - handle UVD message 538 * 539 * @ctx: UVD parser context 540 * @bo: buffer object containing the message 541 * @offset: offset into the buffer object 542 * 543 * Peek into the UVD message and extract the session id. 544 * Make sure that we don't open up to many sessions. 545 */ 546 static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, 547 struct amdgpu_bo *bo, unsigned offset) 548 { 549 struct amdgpu_device *adev = ctx->parser->adev; 550 int32_t *msg, msg_type, handle; 551 void *ptr; 552 long r; 553 int i; 554 555 if (offset & 0x3F) { 556 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 557 return -EINVAL; 558 } 559 560 r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, 561 MAX_SCHEDULE_TIMEOUT); 562 if (r < 0) { 563 DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); 564 return r; 565 } 566 567 r = amdgpu_bo_kmap(bo, &ptr); 568 if (r) { 569 DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); 570 return r; 571 } 572 573 msg = (void *)((char *)ptr + offset); 574 575 msg_type = msg[1]; 576 handle = msg[2]; 577 578 if (handle == 0) { 579 DRM_ERROR("Invalid UVD handle!\n"); 580 return -EINVAL; 581 } 582 583 switch (msg_type) { 584 case 0: 585 /* it's a create msg, calc image size (width * height) */ 586 amdgpu_bo_kunmap(bo); 587 588 /* try to alloc a new handle */ 589 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 590 if (atomic_read(&adev->uvd.handles[i]) == handle) { 591 DRM_ERROR("Handle 0x%x already in use!\n", handle); 592 return -EINVAL; 593 } 594 595 if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { 596 adev->uvd.filp[i] = ctx->parser->filp; 597 return 0; 598 } 599 } 600 601 DRM_ERROR("No more free UVD handles!\n"); 602 return -EINVAL; 603 604 case 1: 605 /* it's a decode msg, calc buffer sizes */ 606 r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes); 607 amdgpu_bo_kunmap(bo); 608 if (r) 609 return r; 610 611 /* validate the handle */ 612 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 613 if (atomic_read(&adev->uvd.handles[i]) == handle) { 614 if (adev->uvd.filp[i] != ctx->parser->filp) { 615 DRM_ERROR("UVD handle collision detected!\n"); 616 return -EINVAL; 617 } 618 return 0; 619 } 620 } 621 622 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 623 return -ENOENT; 624 625 case 2: 626 /* it's a destroy msg, free the handle */ 627 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) 628 atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); 629 amdgpu_bo_kunmap(bo); 630 return 0; 631 632 default: 633 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 634 return -EINVAL; 635 } 636 BUG(); 637 return -EINVAL; 638 } 639 640 /** 641 * amdgpu_uvd_cs_pass2 - second parsing round 642 * 643 * @ctx: UVD parser context 644 * 645 * Patch buffer addresses, make sure buffer sizes are correct. 646 */ 647 static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) 648 { 649 struct amdgpu_bo_va_mapping *mapping; 650 struct amdgpu_bo *bo; 651 struct amdgpu_ib *ib; 652 uint32_t cmd, lo, hi; 653 uint64_t start, end; 654 uint64_t addr; 655 int r; 656 657 lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); 658 hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); 659 addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); 660 661 mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); 662 if (mapping == NULL) 663 return -EINVAL; 664 665 start = amdgpu_bo_gpu_offset(bo); 666 667 end = (mapping->it.last + 1 - mapping->it.start); 668 end = end * AMDGPU_GPU_PAGE_SIZE + start; 669 670 addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; 671 start += addr; 672 673 ib = &ctx->parser->ibs[ctx->ib_idx]; 674 ib->ptr[ctx->data0] = start & 0xFFFFFFFF; 675 ib->ptr[ctx->data1] = start >> 32; 676 677 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; 678 if (cmd < 0x4) { 679 if ((end - start) < ctx->buf_sizes[cmd]) { 680 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 681 (unsigned)(end - start), 682 ctx->buf_sizes[cmd]); 683 return -EINVAL; 684 } 685 686 } else if (cmd == 0x206) { 687 if ((end - start) < ctx->buf_sizes[4]) { 688 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 689 (unsigned)(end - start), 690 ctx->buf_sizes[4]); 691 return -EINVAL; 692 } 693 } else if ((cmd != 0x100) && (cmd != 0x204)) { 694 DRM_ERROR("invalid UVD command %X!\n", cmd); 695 return -EINVAL; 696 } 697 698 if (!ctx->parser->adev->uvd.address_64_bit) { 699 if ((start >> 28) != ((end - 1) >> 28)) { 700 DRM_ERROR("reloc %"PRIX64"-%"PRIX64" crossing 256MB boundary!\n", 701 start, end); 702 return -EINVAL; 703 } 704 705 if ((cmd == 0 || cmd == 0x3) && 706 (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) { 707 DRM_ERROR("msg/fb buffer %"PRIX64"-%"PRIX64" out of 256MB segment!\n", 708 start, end); 709 return -EINVAL; 710 } 711 } 712 713 if (cmd == 0) { 714 ctx->has_msg_cmd = true; 715 r = amdgpu_uvd_cs_msg(ctx, bo, addr); 716 if (r) 717 return r; 718 } else if (!ctx->has_msg_cmd) { 719 DRM_ERROR("Message needed before other commands are send!\n"); 720 return -EINVAL; 721 } 722 723 return 0; 724 } 725 726 /** 727 * amdgpu_uvd_cs_reg - parse register writes 728 * 729 * @ctx: UVD parser context 730 * @cb: callback function 731 * 732 * Parse the register writes, call cb on each complete command. 733 */ 734 static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, 735 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 736 { 737 struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; 738 int i, r; 739 740 ctx->idx++; 741 for (i = 0; i <= ctx->count; ++i) { 742 unsigned reg = ctx->reg + i; 743 744 if (ctx->idx >= ib->length_dw) { 745 DRM_ERROR("Register command after end of CS!\n"); 746 return -EINVAL; 747 } 748 749 switch (reg) { 750 case mmUVD_GPCOM_VCPU_DATA0: 751 ctx->data0 = ctx->idx; 752 break; 753 case mmUVD_GPCOM_VCPU_DATA1: 754 ctx->data1 = ctx->idx; 755 break; 756 case mmUVD_GPCOM_VCPU_CMD: 757 r = cb(ctx); 758 if (r) 759 return r; 760 break; 761 case mmUVD_ENGINE_CNTL: 762 break; 763 default: 764 DRM_ERROR("Invalid reg 0x%X!\n", reg); 765 return -EINVAL; 766 } 767 ctx->idx++; 768 } 769 return 0; 770 } 771 772 /** 773 * amdgpu_uvd_cs_packets - parse UVD packets 774 * 775 * @ctx: UVD parser context 776 * @cb: callback function 777 * 778 * Parse the command stream packets. 779 */ 780 static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, 781 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 782 { 783 struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; 784 int r; 785 786 for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { 787 uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx); 788 unsigned type = CP_PACKET_GET_TYPE(cmd); 789 switch (type) { 790 case PACKET_TYPE0: 791 ctx->reg = CP_PACKET0_GET_REG(cmd); 792 ctx->count = CP_PACKET_GET_COUNT(cmd); 793 r = amdgpu_uvd_cs_reg(ctx, cb); 794 if (r) 795 return r; 796 break; 797 case PACKET_TYPE2: 798 ++ctx->idx; 799 break; 800 default: 801 DRM_ERROR("Unknown packet type %d !\n", type); 802 return -EINVAL; 803 } 804 } 805 return 0; 806 } 807 808 /** 809 * amdgpu_uvd_ring_parse_cs - UVD command submission parser 810 * 811 * @parser: Command submission parser context 812 * 813 * Parse the command stream, patch in addresses as necessary. 814 */ 815 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) 816 { 817 struct amdgpu_uvd_cs_ctx ctx = {}; 818 unsigned buf_sizes[] = { 819 [0x00000000] = 2048, 820 [0x00000001] = 0xFFFFFFFF, 821 [0x00000002] = 0xFFFFFFFF, 822 [0x00000003] = 2048, 823 [0x00000004] = 0xFFFFFFFF, 824 }; 825 struct amdgpu_ib *ib = &parser->ibs[ib_idx]; 826 int r; 827 828 if (ib->length_dw % 16) { 829 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 830 ib->length_dw); 831 return -EINVAL; 832 } 833 834 ctx.parser = parser; 835 ctx.buf_sizes = buf_sizes; 836 ctx.ib_idx = ib_idx; 837 838 /* first round, make sure the buffers are actually in the UVD segment */ 839 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1); 840 if (r) 841 return r; 842 843 /* second round, patch buffer addresses into the command stream */ 844 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2); 845 if (r) 846 return r; 847 848 if (!ctx.has_msg_cmd) { 849 DRM_ERROR("UVD-IBs need a msg command!\n"); 850 return -EINVAL; 851 } 852 853 amdgpu_uvd_note_usage(ctx.parser->adev); 854 855 return 0; 856 } 857 858 static int amdgpu_uvd_free_job( 859 struct amdgpu_job *job) 860 { 861 amdgpu_ib_free(job->adev, job->ibs); 862 kfree(job->ibs); 863 return 0; 864 } 865 866 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 867 struct amdgpu_bo *bo, 868 struct fence **fence) 869 { 870 struct ttm_validate_buffer tv; 871 struct ww_acquire_ctx ticket; 872 struct list_head head; 873 struct amdgpu_ib *ib = NULL; 874 struct fence *f = NULL; 875 struct amdgpu_device *adev = ring->adev; 876 uint64_t addr; 877 int i, r; 878 879 memset(&tv, 0, sizeof(tv)); 880 tv.bo = &bo->tbo; 881 882 INIT_LIST_HEAD(&head); 883 list_add(&tv.head, &head); 884 885 r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); 886 if (r) 887 return r; 888 889 if (!bo->adev->uvd.address_64_bit) { 890 amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); 891 amdgpu_uvd_force_into_uvd_segment(bo); 892 } 893 894 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 895 if (r) 896 goto err; 897 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 898 if (!ib) { 899 r = -ENOMEM; 900 goto err; 901 } 902 r = amdgpu_ib_get(ring, NULL, 64, ib); 903 if (r) 904 goto err1; 905 906 addr = amdgpu_bo_gpu_offset(bo); 907 ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); 908 ib->ptr[1] = addr; 909 ib->ptr[2] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0); 910 ib->ptr[3] = addr >> 32; 911 ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0); 912 ib->ptr[5] = 0; 913 for (i = 6; i < 16; ++i) 914 ib->ptr[i] = PACKET2(0); 915 ib->length_dw = 16; 916 917 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 918 &amdgpu_uvd_free_job, 919 AMDGPU_FENCE_OWNER_UNDEFINED, 920 &f); 921 if (r) 922 goto err2; 923 924 ttm_eu_fence_buffer_objects(&ticket, &head, f); 925 926 if (fence) 927 *fence = fence_get(f); 928 amdgpu_bo_unref(&bo); 929 fence_put(f); 930 if (amdgpu_enable_scheduler) 931 return 0; 932 933 amdgpu_ib_free(ring->adev, ib); 934 kfree(ib); 935 return 0; 936 err2: 937 amdgpu_ib_free(ring->adev, ib); 938 err1: 939 kfree(ib); 940 err: 941 ttm_eu_backoff_reservation(&ticket, &head); 942 return r; 943 } 944 945 /* multiple fence commands without any stream commands in between can 946 crash the vcpu so just try to emmit a dummy create/destroy msg to 947 avoid this */ 948 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 949 struct fence **fence) 950 { 951 struct amdgpu_device *adev = ring->adev; 952 struct amdgpu_bo *bo; 953 uint32_t *msg; 954 int r, i; 955 956 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 957 AMDGPU_GEM_DOMAIN_VRAM, 958 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 959 NULL, NULL, &bo); 960 if (r) 961 return r; 962 963 r = amdgpu_bo_reserve(bo, false); 964 if (r) { 965 amdgpu_bo_unref(&bo); 966 return r; 967 } 968 969 r = amdgpu_bo_kmap(bo, (void **)&msg); 970 if (r) { 971 amdgpu_bo_unreserve(bo); 972 amdgpu_bo_unref(&bo); 973 return r; 974 } 975 976 /* stitch together an UVD create msg */ 977 msg[0] = cpu_to_le32(0x00000de4); 978 msg[1] = cpu_to_le32(0x00000000); 979 msg[2] = cpu_to_le32(handle); 980 msg[3] = cpu_to_le32(0x00000000); 981 msg[4] = cpu_to_le32(0x00000000); 982 msg[5] = cpu_to_le32(0x00000000); 983 msg[6] = cpu_to_le32(0x00000000); 984 msg[7] = cpu_to_le32(0x00000780); 985 msg[8] = cpu_to_le32(0x00000440); 986 msg[9] = cpu_to_le32(0x00000000); 987 msg[10] = cpu_to_le32(0x01b37000); 988 for (i = 11; i < 1024; ++i) 989 msg[i] = cpu_to_le32(0x0); 990 991 amdgpu_bo_kunmap(bo); 992 amdgpu_bo_unreserve(bo); 993 994 return amdgpu_uvd_send_msg(ring, bo, fence); 995 } 996 997 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 998 struct fence **fence) 999 { 1000 struct amdgpu_device *adev = ring->adev; 1001 struct amdgpu_bo *bo; 1002 uint32_t *msg; 1003 int r, i; 1004 1005 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 1006 AMDGPU_GEM_DOMAIN_VRAM, 1007 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 1008 NULL, NULL, &bo); 1009 if (r) 1010 return r; 1011 1012 r = amdgpu_bo_reserve(bo, false); 1013 if (r) { 1014 amdgpu_bo_unref(&bo); 1015 return r; 1016 } 1017 1018 r = amdgpu_bo_kmap(bo, (void **)&msg); 1019 if (r) { 1020 amdgpu_bo_unreserve(bo); 1021 amdgpu_bo_unref(&bo); 1022 return r; 1023 } 1024 1025 /* stitch together an UVD destroy msg */ 1026 msg[0] = cpu_to_le32(0x00000de4); 1027 msg[1] = cpu_to_le32(0x00000002); 1028 msg[2] = cpu_to_le32(handle); 1029 msg[3] = cpu_to_le32(0x00000000); 1030 for (i = 4; i < 1024; ++i) 1031 msg[i] = cpu_to_le32(0x0); 1032 1033 amdgpu_bo_kunmap(bo); 1034 amdgpu_bo_unreserve(bo); 1035 1036 return amdgpu_uvd_send_msg(ring, bo, fence); 1037 } 1038 1039 static void amdgpu_uvd_idle_work_handler(struct work_struct *work) 1040 { 1041 struct amdgpu_device *adev = 1042 container_of(work, struct amdgpu_device, uvd.idle_work.work); 1043 unsigned i, fences, handles = 0; 1044 1045 fences = amdgpu_fence_count_emitted(&adev->uvd.ring); 1046 1047 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) 1048 if (atomic_read(&adev->uvd.handles[i])) 1049 ++handles; 1050 1051 if (fences == 0 && handles == 0) { 1052 if (adev->pm.dpm_enabled) { 1053 amdgpu_dpm_enable_uvd(adev, false); 1054 } else { 1055 amdgpu_asic_set_uvd_clocks(adev, 0, 0); 1056 } 1057 } else { 1058 schedule_delayed_work(&adev->uvd.idle_work, 1059 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 1060 } 1061 } 1062 1063 static void amdgpu_uvd_note_usage(struct amdgpu_device *adev) 1064 { 1065 bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); 1066 set_clocks &= schedule_delayed_work(&adev->uvd.idle_work, 1067 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 1068 1069 if (set_clocks) { 1070 if (adev->pm.dpm_enabled) { 1071 amdgpu_dpm_enable_uvd(adev, true); 1072 } else { 1073 amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); 1074 } 1075 } 1076 } 1077