17ccd5a2cSjsg /*
27ccd5a2cSjsg * Copyright 2011 Advanced Micro Devices, Inc.
37ccd5a2cSjsg * All Rights Reserved.
47ccd5a2cSjsg *
57ccd5a2cSjsg * Permission is hereby granted, free of charge, to any person obtaining a
67ccd5a2cSjsg * copy of this software and associated documentation files (the
77ccd5a2cSjsg * "Software"), to deal in the Software without restriction, including
87ccd5a2cSjsg * without limitation the rights to use, copy, modify, merge, publish,
97ccd5a2cSjsg * distribute, sub license, and/or sell copies of the Software, and to
107ccd5a2cSjsg * permit persons to whom the Software is furnished to do so, subject to
117ccd5a2cSjsg * the following conditions:
127ccd5a2cSjsg *
137ccd5a2cSjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
147ccd5a2cSjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
157ccd5a2cSjsg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
167ccd5a2cSjsg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
177ccd5a2cSjsg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
187ccd5a2cSjsg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
197ccd5a2cSjsg * USE OR OTHER DEALINGS IN THE SOFTWARE.
207ccd5a2cSjsg *
217ccd5a2cSjsg * The above copyright notice and this permission notice (including the
227ccd5a2cSjsg * next paragraph) shall be included in all copies or substantial portions
237ccd5a2cSjsg * of the Software.
247ccd5a2cSjsg *
257ccd5a2cSjsg */
267ccd5a2cSjsg /*
277ccd5a2cSjsg * Authors:
287ccd5a2cSjsg * Christian König <deathsimple@vodafone.de>
297ccd5a2cSjsg */
307ccd5a2cSjsg
317f4dd379Sjsg #include <linux/firmware.h>
327f4dd379Sjsg #include <linux/module.h>
33c349dbc7Sjsg
347f4dd379Sjsg #include <drm/drm.h>
357ccd5a2cSjsg
367ccd5a2cSjsg #include "radeon.h"
377f4dd379Sjsg #include "radeon_ucode.h"
387ccd5a2cSjsg #include "r600d.h"
397ccd5a2cSjsg
407ccd5a2cSjsg /* 1 second timeout */
417ccd5a2cSjsg #define UVD_IDLE_TIMEOUT_MS 1000
427ccd5a2cSjsg
437ccd5a2cSjsg /* Firmware Names */
447ccd5a2cSjsg #define FIRMWARE_R600 "radeon/R600_uvd.bin"
457ccd5a2cSjsg #define FIRMWARE_RS780 "radeon/RS780_uvd.bin"
467ccd5a2cSjsg #define FIRMWARE_RV770 "radeon/RV770_uvd.bin"
477ccd5a2cSjsg #define FIRMWARE_RV710 "radeon/RV710_uvd.bin"
487ccd5a2cSjsg #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin"
497ccd5a2cSjsg #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin"
507ccd5a2cSjsg #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin"
517f4dd379Sjsg #define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin"
527f4dd379Sjsg #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin"
537ccd5a2cSjsg
547ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_R600);
557ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_RS780);
567ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_RV770);
577ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_RV710);
587ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_CYPRESS);
597ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_SUMO);
607ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_TAHITI);
617f4dd379Sjsg MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY);
627ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_BONAIRE);
637ccd5a2cSjsg
647ccd5a2cSjsg static void radeon_uvd_idle_work_handler(struct work_struct *work);
657ccd5a2cSjsg
radeon_uvd_init(struct radeon_device * rdev)667ccd5a2cSjsg int radeon_uvd_init(struct radeon_device *rdev)
677ccd5a2cSjsg {
687ccd5a2cSjsg unsigned long bo_size;
697f4dd379Sjsg const char *fw_name = NULL, *legacy_fw_name = NULL;
707ccd5a2cSjsg int i, r;
717ccd5a2cSjsg
727ccd5a2cSjsg INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
737ccd5a2cSjsg
747ccd5a2cSjsg switch (rdev->family) {
757ccd5a2cSjsg case CHIP_RV610:
767ccd5a2cSjsg case CHIP_RV630:
777ccd5a2cSjsg case CHIP_RV670:
787ccd5a2cSjsg case CHIP_RV620:
797ccd5a2cSjsg case CHIP_RV635:
807f4dd379Sjsg legacy_fw_name = FIRMWARE_R600;
817ccd5a2cSjsg break;
827ccd5a2cSjsg
837ccd5a2cSjsg case CHIP_RS780:
847ccd5a2cSjsg case CHIP_RS880:
857f4dd379Sjsg legacy_fw_name = FIRMWARE_RS780;
867ccd5a2cSjsg break;
877ccd5a2cSjsg
887ccd5a2cSjsg case CHIP_RV770:
897f4dd379Sjsg legacy_fw_name = FIRMWARE_RV770;
907ccd5a2cSjsg break;
917ccd5a2cSjsg
927ccd5a2cSjsg case CHIP_RV710:
937ccd5a2cSjsg case CHIP_RV730:
947ccd5a2cSjsg case CHIP_RV740:
957f4dd379Sjsg legacy_fw_name = FIRMWARE_RV710;
967ccd5a2cSjsg break;
977ccd5a2cSjsg
987ccd5a2cSjsg case CHIP_CYPRESS:
997ccd5a2cSjsg case CHIP_HEMLOCK:
1007ccd5a2cSjsg case CHIP_JUNIPER:
1017ccd5a2cSjsg case CHIP_REDWOOD:
1027ccd5a2cSjsg case CHIP_CEDAR:
1037f4dd379Sjsg legacy_fw_name = FIRMWARE_CYPRESS;
1047ccd5a2cSjsg break;
1057ccd5a2cSjsg
1067ccd5a2cSjsg case CHIP_SUMO:
1077ccd5a2cSjsg case CHIP_SUMO2:
1087ccd5a2cSjsg case CHIP_PALM:
1097ccd5a2cSjsg case CHIP_CAYMAN:
1107ccd5a2cSjsg case CHIP_BARTS:
1117ccd5a2cSjsg case CHIP_TURKS:
1127ccd5a2cSjsg case CHIP_CAICOS:
1137f4dd379Sjsg legacy_fw_name = FIRMWARE_SUMO;
1147ccd5a2cSjsg break;
1157ccd5a2cSjsg
1167ccd5a2cSjsg case CHIP_TAHITI:
1177ccd5a2cSjsg case CHIP_VERDE:
1187ccd5a2cSjsg case CHIP_PITCAIRN:
1197ccd5a2cSjsg case CHIP_ARUBA:
1207ccd5a2cSjsg case CHIP_OLAND:
1217f4dd379Sjsg legacy_fw_name = FIRMWARE_TAHITI;
1227ccd5a2cSjsg break;
1237ccd5a2cSjsg
1247ccd5a2cSjsg case CHIP_BONAIRE:
1257ccd5a2cSjsg case CHIP_KABINI:
1267ccd5a2cSjsg case CHIP_KAVERI:
1277ccd5a2cSjsg case CHIP_HAWAII:
1287ccd5a2cSjsg case CHIP_MULLINS:
1297f4dd379Sjsg legacy_fw_name = FIRMWARE_BONAIRE_LEGACY;
1307ccd5a2cSjsg fw_name = FIRMWARE_BONAIRE;
1317ccd5a2cSjsg break;
1327ccd5a2cSjsg
1337ccd5a2cSjsg default:
1347ccd5a2cSjsg return -EINVAL;
1357ccd5a2cSjsg }
1367ccd5a2cSjsg
1377f4dd379Sjsg rdev->uvd.fw_header_present = false;
1387f4dd379Sjsg rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES;
1397f4dd379Sjsg if (fw_name) {
1407f4dd379Sjsg /* Let's try to load the newer firmware first */
1417ccd5a2cSjsg r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev);
1427ccd5a2cSjsg if (r) {
1437ccd5a2cSjsg dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
1447ccd5a2cSjsg fw_name);
1457f4dd379Sjsg } else {
1467f4dd379Sjsg struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data;
1477f4dd379Sjsg unsigned version_major, version_minor, family_id;
1487f4dd379Sjsg
1497f4dd379Sjsg r = radeon_ucode_validate(rdev->uvd_fw);
1507f4dd379Sjsg if (r)
1517ccd5a2cSjsg return r;
1527f4dd379Sjsg
1537f4dd379Sjsg rdev->uvd.fw_header_present = true;
1547f4dd379Sjsg
1555ca02815Sjsg family_id = (__force u32)(hdr->ucode_version) & 0xff;
1565ca02815Sjsg version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version))
1575ca02815Sjsg >> 24) & 0xff;
1585ca02815Sjsg version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version))
1595ca02815Sjsg >> 8) & 0xff;
1607f4dd379Sjsg DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
1617f4dd379Sjsg version_major, version_minor, family_id);
1627f4dd379Sjsg
1637f4dd379Sjsg /*
1647f4dd379Sjsg * Limit the number of UVD handles depending on
1657f4dd379Sjsg * microcode major and minor versions.
1667f4dd379Sjsg */
1677f4dd379Sjsg if ((version_major >= 0x01) && (version_minor >= 0x37))
1687f4dd379Sjsg rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES;
1697f4dd379Sjsg }
1707f4dd379Sjsg }
1717f4dd379Sjsg
1727f4dd379Sjsg /*
1737f4dd379Sjsg * In case there is only legacy firmware, or we encounter an error
1747f4dd379Sjsg * while loading the new firmware, we fall back to loading the legacy
1757f4dd379Sjsg * firmware now.
1767f4dd379Sjsg */
1777f4dd379Sjsg if (!fw_name || r) {
1787f4dd379Sjsg r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev);
1797f4dd379Sjsg if (r) {
1807f4dd379Sjsg dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
1817f4dd379Sjsg legacy_fw_name);
1827f4dd379Sjsg return r;
1837f4dd379Sjsg }
1847ccd5a2cSjsg }
1857ccd5a2cSjsg
1867ccd5a2cSjsg bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
1877ccd5a2cSjsg RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
1887f4dd379Sjsg RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles;
1897ccd5a2cSjsg r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
1907ccd5a2cSjsg RADEON_GEM_DOMAIN_VRAM, 0, NULL,
1917ccd5a2cSjsg NULL, &rdev->uvd.vcpu_bo);
1927ccd5a2cSjsg if (r) {
1937ccd5a2cSjsg dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
1947ccd5a2cSjsg return r;
1957ccd5a2cSjsg }
1967ccd5a2cSjsg
1977ccd5a2cSjsg r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
1987ccd5a2cSjsg if (r) {
1997ccd5a2cSjsg radeon_bo_unref(&rdev->uvd.vcpu_bo);
2007ccd5a2cSjsg dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
2017ccd5a2cSjsg return r;
2027ccd5a2cSjsg }
2037ccd5a2cSjsg
2047ccd5a2cSjsg r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
2057ccd5a2cSjsg &rdev->uvd.gpu_addr);
2067ccd5a2cSjsg if (r) {
2077ccd5a2cSjsg radeon_bo_unreserve(rdev->uvd.vcpu_bo);
2087ccd5a2cSjsg radeon_bo_unref(&rdev->uvd.vcpu_bo);
2097ccd5a2cSjsg dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
2107ccd5a2cSjsg return r;
2117ccd5a2cSjsg }
2127ccd5a2cSjsg
2137ccd5a2cSjsg r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
2147ccd5a2cSjsg if (r) {
2157ccd5a2cSjsg dev_err(rdev->dev, "(%d) UVD map failed\n", r);
2167ccd5a2cSjsg return r;
2177ccd5a2cSjsg }
2187ccd5a2cSjsg
2197ccd5a2cSjsg radeon_bo_unreserve(rdev->uvd.vcpu_bo);
2207ccd5a2cSjsg
2217f4dd379Sjsg for (i = 0; i < rdev->uvd.max_handles; ++i) {
2227ccd5a2cSjsg atomic_set(&rdev->uvd.handles[i], 0);
2237ccd5a2cSjsg rdev->uvd.filp[i] = NULL;
2247ccd5a2cSjsg rdev->uvd.img_size[i] = 0;
2257ccd5a2cSjsg }
2267ccd5a2cSjsg
2277ccd5a2cSjsg return 0;
2287ccd5a2cSjsg }
2297ccd5a2cSjsg
radeon_uvd_fini(struct radeon_device * rdev)2307ccd5a2cSjsg void radeon_uvd_fini(struct radeon_device *rdev)
2317ccd5a2cSjsg {
2327ccd5a2cSjsg int r;
2337ccd5a2cSjsg
2347ccd5a2cSjsg if (rdev->uvd.vcpu_bo == NULL)
2357ccd5a2cSjsg return;
2367ccd5a2cSjsg
2377ccd5a2cSjsg r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
2387ccd5a2cSjsg if (!r) {
2397ccd5a2cSjsg radeon_bo_kunmap(rdev->uvd.vcpu_bo);
2407ccd5a2cSjsg radeon_bo_unpin(rdev->uvd.vcpu_bo);
2417ccd5a2cSjsg radeon_bo_unreserve(rdev->uvd.vcpu_bo);
2427ccd5a2cSjsg }
2437ccd5a2cSjsg
2447ccd5a2cSjsg radeon_bo_unref(&rdev->uvd.vcpu_bo);
2457ccd5a2cSjsg
2467ccd5a2cSjsg radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]);
2477ccd5a2cSjsg
2487ccd5a2cSjsg release_firmware(rdev->uvd_fw);
2497ccd5a2cSjsg }
2507ccd5a2cSjsg
radeon_uvd_suspend(struct radeon_device * rdev)2517ccd5a2cSjsg int radeon_uvd_suspend(struct radeon_device *rdev)
2527ccd5a2cSjsg {
2537ccd5a2cSjsg int i, r;
2547ccd5a2cSjsg
2557ccd5a2cSjsg if (rdev->uvd.vcpu_bo == NULL)
2567ccd5a2cSjsg return 0;
2577ccd5a2cSjsg
2587f4dd379Sjsg for (i = 0; i < rdev->uvd.max_handles; ++i) {
2597ccd5a2cSjsg uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
2607ccd5a2cSjsg if (handle != 0) {
2617ccd5a2cSjsg struct radeon_fence *fence;
2627ccd5a2cSjsg
2637ccd5a2cSjsg radeon_uvd_note_usage(rdev);
2647ccd5a2cSjsg
2657ccd5a2cSjsg r = radeon_uvd_get_destroy_msg(rdev,
2667ccd5a2cSjsg R600_RING_TYPE_UVD_INDEX, handle, &fence);
2677ccd5a2cSjsg if (r) {
2687ccd5a2cSjsg DRM_ERROR("Error destroying UVD (%d)!\n", r);
2697ccd5a2cSjsg continue;
2707ccd5a2cSjsg }
2717ccd5a2cSjsg
2727ccd5a2cSjsg radeon_fence_wait(fence, false);
2737ccd5a2cSjsg radeon_fence_unref(&fence);
2747ccd5a2cSjsg
2757ccd5a2cSjsg rdev->uvd.filp[i] = NULL;
2767ccd5a2cSjsg atomic_set(&rdev->uvd.handles[i], 0);
2777ccd5a2cSjsg }
2787ccd5a2cSjsg }
2797ccd5a2cSjsg
2807ccd5a2cSjsg return 0;
2817ccd5a2cSjsg }
2827ccd5a2cSjsg
radeon_uvd_resume(struct radeon_device * rdev)2837ccd5a2cSjsg int radeon_uvd_resume(struct radeon_device *rdev)
2847ccd5a2cSjsg {
2857ccd5a2cSjsg unsigned size;
2867ccd5a2cSjsg void *ptr;
2877ccd5a2cSjsg
2887ccd5a2cSjsg if (rdev->uvd.vcpu_bo == NULL)
2897ccd5a2cSjsg return -EINVAL;
2907ccd5a2cSjsg
291ad8b1aafSjsg memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
2927ccd5a2cSjsg
2937ccd5a2cSjsg size = radeon_bo_size(rdev->uvd.vcpu_bo);
2947ccd5a2cSjsg size -= rdev->uvd_fw->size;
2957ccd5a2cSjsg
2967ccd5a2cSjsg ptr = rdev->uvd.cpu_addr;
2977ccd5a2cSjsg ptr += rdev->uvd_fw->size;
2987ccd5a2cSjsg
299ad8b1aafSjsg memset_io((void __iomem *)ptr, 0, size);
3007ccd5a2cSjsg
3017ccd5a2cSjsg return 0;
3027ccd5a2cSjsg }
3037ccd5a2cSjsg
radeon_uvd_force_into_uvd_segment(struct radeon_bo * rbo,uint32_t allowed_domains)3047ccd5a2cSjsg void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
3057ccd5a2cSjsg uint32_t allowed_domains)
3067ccd5a2cSjsg {
3077ccd5a2cSjsg int i;
3087ccd5a2cSjsg
3097ccd5a2cSjsg for (i = 0; i < rbo->placement.num_placement; ++i) {
3107ccd5a2cSjsg rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
3117ccd5a2cSjsg rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
3127ccd5a2cSjsg }
3137ccd5a2cSjsg
3147ccd5a2cSjsg /* If it must be in VRAM it must be in the first segment as well */
3157ccd5a2cSjsg if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
3167ccd5a2cSjsg return;
3177ccd5a2cSjsg
3187ccd5a2cSjsg /* abort if we already have more than one placement */
3197ccd5a2cSjsg if (rbo->placement.num_placement > 1)
3207ccd5a2cSjsg return;
3217ccd5a2cSjsg
3227ccd5a2cSjsg /* add another 256MB segment */
3237ccd5a2cSjsg rbo->placements[1] = rbo->placements[0];
3247ccd5a2cSjsg rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
3257ccd5a2cSjsg rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
3267ccd5a2cSjsg rbo->placement.num_placement++;
3277ccd5a2cSjsg rbo->placement.num_busy_placement++;
3287ccd5a2cSjsg }
3297ccd5a2cSjsg
radeon_uvd_free_handles(struct radeon_device * rdev,struct drm_file * filp)3307ccd5a2cSjsg void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
3317ccd5a2cSjsg {
3327ccd5a2cSjsg int i, r;
3337f4dd379Sjsg for (i = 0; i < rdev->uvd.max_handles; ++i) {
3347ccd5a2cSjsg uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
3357ccd5a2cSjsg if (handle != 0 && rdev->uvd.filp[i] == filp) {
3367ccd5a2cSjsg struct radeon_fence *fence;
3377ccd5a2cSjsg
3387ccd5a2cSjsg radeon_uvd_note_usage(rdev);
3397ccd5a2cSjsg
3407ccd5a2cSjsg r = radeon_uvd_get_destroy_msg(rdev,
3417ccd5a2cSjsg R600_RING_TYPE_UVD_INDEX, handle, &fence);
3427ccd5a2cSjsg if (r) {
3437ccd5a2cSjsg DRM_ERROR("Error destroying UVD (%d)!\n", r);
3447ccd5a2cSjsg continue;
3457ccd5a2cSjsg }
3467ccd5a2cSjsg
3477ccd5a2cSjsg radeon_fence_wait(fence, false);
3487ccd5a2cSjsg radeon_fence_unref(&fence);
3497ccd5a2cSjsg
3507ccd5a2cSjsg rdev->uvd.filp[i] = NULL;
3517ccd5a2cSjsg atomic_set(&rdev->uvd.handles[i], 0);
3527ccd5a2cSjsg }
3537ccd5a2cSjsg }
3547ccd5a2cSjsg }
3557ccd5a2cSjsg
radeon_uvd_cs_msg_decode(uint32_t * msg,unsigned buf_sizes[])3567ccd5a2cSjsg static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
3577ccd5a2cSjsg {
3587ccd5a2cSjsg unsigned stream_type = msg[4];
3597ccd5a2cSjsg unsigned width = msg[6];
3607ccd5a2cSjsg unsigned height = msg[7];
3617ccd5a2cSjsg unsigned dpb_size = msg[9];
3627ccd5a2cSjsg unsigned pitch = msg[28];
3637ccd5a2cSjsg
3647ccd5a2cSjsg unsigned width_in_mb = width / 16;
365*f005ef32Sjsg unsigned height_in_mb = ALIGN(height / 16, 2);
3667ccd5a2cSjsg
3677ccd5a2cSjsg unsigned image_size, tmp, min_dpb_size;
3687ccd5a2cSjsg
3697ccd5a2cSjsg image_size = width * height;
3707ccd5a2cSjsg image_size += image_size / 2;
371*f005ef32Sjsg image_size = ALIGN(image_size, 1024);
3727ccd5a2cSjsg
3737ccd5a2cSjsg switch (stream_type) {
3747ccd5a2cSjsg case 0: /* H264 */
3757ccd5a2cSjsg
3767ccd5a2cSjsg /* reference picture buffer */
3777ccd5a2cSjsg min_dpb_size = image_size * 17;
3787ccd5a2cSjsg
3797ccd5a2cSjsg /* macroblock context buffer */
3807ccd5a2cSjsg min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
3817ccd5a2cSjsg
3827ccd5a2cSjsg /* IT surface buffer */
3837ccd5a2cSjsg min_dpb_size += width_in_mb * height_in_mb * 32;
3847ccd5a2cSjsg break;
3857ccd5a2cSjsg
3867ccd5a2cSjsg case 1: /* VC1 */
3877ccd5a2cSjsg
3887ccd5a2cSjsg /* reference picture buffer */
3897ccd5a2cSjsg min_dpb_size = image_size * 3;
3907ccd5a2cSjsg
3917ccd5a2cSjsg /* CONTEXT_BUFFER */
3927ccd5a2cSjsg min_dpb_size += width_in_mb * height_in_mb * 128;
3937ccd5a2cSjsg
3947ccd5a2cSjsg /* IT surface buffer */
3957ccd5a2cSjsg min_dpb_size += width_in_mb * 64;
3967ccd5a2cSjsg
3977ccd5a2cSjsg /* DB surface buffer */
3987ccd5a2cSjsg min_dpb_size += width_in_mb * 128;
3997ccd5a2cSjsg
4007ccd5a2cSjsg /* BP */
4017ccd5a2cSjsg tmp = max(width_in_mb, height_in_mb);
402*f005ef32Sjsg min_dpb_size += ALIGN(tmp * 7 * 16, 64);
4037ccd5a2cSjsg break;
4047ccd5a2cSjsg
4057ccd5a2cSjsg case 3: /* MPEG2 */
4067ccd5a2cSjsg
4077ccd5a2cSjsg /* reference picture buffer */
4087ccd5a2cSjsg min_dpb_size = image_size * 3;
4097ccd5a2cSjsg break;
4107ccd5a2cSjsg
4117ccd5a2cSjsg case 4: /* MPEG4 */
4127ccd5a2cSjsg
4137ccd5a2cSjsg /* reference picture buffer */
4147ccd5a2cSjsg min_dpb_size = image_size * 3;
4157ccd5a2cSjsg
4167ccd5a2cSjsg /* CM */
4177ccd5a2cSjsg min_dpb_size += width_in_mb * height_in_mb * 64;
4187ccd5a2cSjsg
4197ccd5a2cSjsg /* IT surface buffer */
420*f005ef32Sjsg min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
4217ccd5a2cSjsg break;
4227ccd5a2cSjsg
4237ccd5a2cSjsg default:
4247ccd5a2cSjsg DRM_ERROR("UVD codec not handled %d!\n", stream_type);
4257ccd5a2cSjsg return -EINVAL;
4267ccd5a2cSjsg }
4277ccd5a2cSjsg
4287ccd5a2cSjsg if (width > pitch) {
4297ccd5a2cSjsg DRM_ERROR("Invalid UVD decoding target pitch!\n");
4307ccd5a2cSjsg return -EINVAL;
4317ccd5a2cSjsg }
4327ccd5a2cSjsg
4337ccd5a2cSjsg if (dpb_size < min_dpb_size) {
4347ccd5a2cSjsg DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
4357ccd5a2cSjsg dpb_size, min_dpb_size);
4367ccd5a2cSjsg return -EINVAL;
4377ccd5a2cSjsg }
4387ccd5a2cSjsg
4397ccd5a2cSjsg buf_sizes[0x1] = dpb_size;
4407ccd5a2cSjsg buf_sizes[0x2] = image_size;
4417ccd5a2cSjsg return 0;
4427ccd5a2cSjsg }
4437ccd5a2cSjsg
radeon_uvd_validate_codec(struct radeon_cs_parser * p,unsigned stream_type)4447ccd5a2cSjsg static int radeon_uvd_validate_codec(struct radeon_cs_parser *p,
4457ccd5a2cSjsg unsigned stream_type)
4467ccd5a2cSjsg {
4477ccd5a2cSjsg switch (stream_type) {
4487ccd5a2cSjsg case 0: /* H264 */
4497ccd5a2cSjsg case 1: /* VC1 */
4507ccd5a2cSjsg /* always supported */
4517ccd5a2cSjsg return 0;
4527ccd5a2cSjsg
4537ccd5a2cSjsg case 3: /* MPEG2 */
4547ccd5a2cSjsg case 4: /* MPEG4 */
4557ccd5a2cSjsg /* only since UVD 3 */
4567ccd5a2cSjsg if (p->rdev->family >= CHIP_PALM)
4577ccd5a2cSjsg return 0;
4587ccd5a2cSjsg
459ad8b1aafSjsg fallthrough;
4607ccd5a2cSjsg default:
4617ccd5a2cSjsg DRM_ERROR("UVD codec not supported by hardware %d!\n",
4627ccd5a2cSjsg stream_type);
4637ccd5a2cSjsg return -EINVAL;
4647ccd5a2cSjsg }
4657ccd5a2cSjsg }
4667ccd5a2cSjsg
radeon_uvd_cs_msg(struct radeon_cs_parser * p,struct radeon_bo * bo,unsigned offset,unsigned buf_sizes[])4677ccd5a2cSjsg static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
4687ccd5a2cSjsg unsigned offset, unsigned buf_sizes[])
4697ccd5a2cSjsg {
4707ccd5a2cSjsg int32_t *msg, msg_type, handle;
4717ccd5a2cSjsg unsigned img_size = 0;
4727ccd5a2cSjsg void *ptr;
4737ccd5a2cSjsg int i, r;
4747ccd5a2cSjsg
4757ccd5a2cSjsg if (offset & 0x3F) {
4767ccd5a2cSjsg DRM_ERROR("UVD messages must be 64 byte aligned!\n");
4777ccd5a2cSjsg return -EINVAL;
4787ccd5a2cSjsg }
4797ccd5a2cSjsg
4807ccd5a2cSjsg r = radeon_bo_kmap(bo, &ptr);
4817ccd5a2cSjsg if (r) {
4827ccd5a2cSjsg DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
4837ccd5a2cSjsg return r;
4847ccd5a2cSjsg }
4857ccd5a2cSjsg
4867ccd5a2cSjsg msg = ptr + offset;
4877ccd5a2cSjsg
4887ccd5a2cSjsg msg_type = msg[1];
4897ccd5a2cSjsg handle = msg[2];
4907ccd5a2cSjsg
4917ccd5a2cSjsg if (handle == 0) {
4921bb76ff1Sjsg radeon_bo_kunmap(bo);
4937ccd5a2cSjsg DRM_ERROR("Invalid UVD handle!\n");
4947ccd5a2cSjsg return -EINVAL;
4957ccd5a2cSjsg }
4967ccd5a2cSjsg
4977ccd5a2cSjsg switch (msg_type) {
4987ccd5a2cSjsg case 0:
4997ccd5a2cSjsg /* it's a create msg, calc image size (width * height) */
5007ccd5a2cSjsg img_size = msg[7] * msg[8];
5017ccd5a2cSjsg
5027ccd5a2cSjsg r = radeon_uvd_validate_codec(p, msg[4]);
5037ccd5a2cSjsg radeon_bo_kunmap(bo);
5047ccd5a2cSjsg if (r)
5057ccd5a2cSjsg return r;
5067ccd5a2cSjsg
5077ccd5a2cSjsg /* try to alloc a new handle */
5087f4dd379Sjsg for (i = 0; i < p->rdev->uvd.max_handles; ++i) {
5097ccd5a2cSjsg if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
5107ccd5a2cSjsg DRM_ERROR("Handle 0x%x already in use!\n", handle);
5117ccd5a2cSjsg return -EINVAL;
5127ccd5a2cSjsg }
5137ccd5a2cSjsg
5147ccd5a2cSjsg if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
5157ccd5a2cSjsg p->rdev->uvd.filp[i] = p->filp;
5167ccd5a2cSjsg p->rdev->uvd.img_size[i] = img_size;
5177ccd5a2cSjsg return 0;
5187ccd5a2cSjsg }
5197ccd5a2cSjsg }
5207ccd5a2cSjsg
5217ccd5a2cSjsg DRM_ERROR("No more free UVD handles!\n");
5227ccd5a2cSjsg return -EINVAL;
5237ccd5a2cSjsg
5247ccd5a2cSjsg case 1:
5257ccd5a2cSjsg /* it's a decode msg, validate codec and calc buffer sizes */
5267ccd5a2cSjsg r = radeon_uvd_validate_codec(p, msg[4]);
5277ccd5a2cSjsg if (!r)
5287ccd5a2cSjsg r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
5297ccd5a2cSjsg radeon_bo_kunmap(bo);
5307ccd5a2cSjsg if (r)
5317ccd5a2cSjsg return r;
5327ccd5a2cSjsg
5337ccd5a2cSjsg /* validate the handle */
5347f4dd379Sjsg for (i = 0; i < p->rdev->uvd.max_handles; ++i) {
5357ccd5a2cSjsg if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
5367ccd5a2cSjsg if (p->rdev->uvd.filp[i] != p->filp) {
5377ccd5a2cSjsg DRM_ERROR("UVD handle collision detected!\n");
5387ccd5a2cSjsg return -EINVAL;
5397ccd5a2cSjsg }
5407ccd5a2cSjsg return 0;
5417ccd5a2cSjsg }
5427ccd5a2cSjsg }
5437ccd5a2cSjsg
5447ccd5a2cSjsg DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
5457ccd5a2cSjsg return -ENOENT;
5467ccd5a2cSjsg
5477ccd5a2cSjsg case 2:
5487ccd5a2cSjsg /* it's a destroy msg, free the handle */
5497f4dd379Sjsg for (i = 0; i < p->rdev->uvd.max_handles; ++i)
5507ccd5a2cSjsg atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
5517ccd5a2cSjsg radeon_bo_kunmap(bo);
5527ccd5a2cSjsg return 0;
5537ccd5a2cSjsg
5547ccd5a2cSjsg default:
5557ccd5a2cSjsg DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
5567ccd5a2cSjsg }
5577ccd5a2cSjsg
5581bb76ff1Sjsg radeon_bo_kunmap(bo);
5597ccd5a2cSjsg return -EINVAL;
5607ccd5a2cSjsg }
5617ccd5a2cSjsg
radeon_uvd_cs_reloc(struct radeon_cs_parser * p,int data0,int data1,unsigned buf_sizes[],bool * has_msg_cmd)5627ccd5a2cSjsg static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
5637ccd5a2cSjsg int data0, int data1,
5647ccd5a2cSjsg unsigned buf_sizes[], bool *has_msg_cmd)
5657ccd5a2cSjsg {
5667ccd5a2cSjsg struct radeon_cs_chunk *relocs_chunk;
5677ccd5a2cSjsg struct radeon_bo_list *reloc;
5687ccd5a2cSjsg unsigned idx, cmd, offset;
5697ccd5a2cSjsg uint64_t start, end;
5707ccd5a2cSjsg int r;
5717ccd5a2cSjsg
5727ccd5a2cSjsg relocs_chunk = p->chunk_relocs;
5737ccd5a2cSjsg offset = radeon_get_ib_value(p, data0);
5747ccd5a2cSjsg idx = radeon_get_ib_value(p, data1);
5757ccd5a2cSjsg if (idx >= relocs_chunk->length_dw) {
5767ccd5a2cSjsg DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
5777ccd5a2cSjsg idx, relocs_chunk->length_dw);
5787ccd5a2cSjsg return -EINVAL;
5797ccd5a2cSjsg }
5807ccd5a2cSjsg
5817ccd5a2cSjsg reloc = &p->relocs[(idx / 4)];
5827ccd5a2cSjsg start = reloc->gpu_offset;
5837ccd5a2cSjsg end = start + radeon_bo_size(reloc->robj);
5847ccd5a2cSjsg start += offset;
5857ccd5a2cSjsg
5867ccd5a2cSjsg p->ib.ptr[data0] = start & 0xFFFFFFFF;
5877ccd5a2cSjsg p->ib.ptr[data1] = start >> 32;
5887ccd5a2cSjsg
5897ccd5a2cSjsg cmd = radeon_get_ib_value(p, p->idx) >> 1;
5907ccd5a2cSjsg
5917ccd5a2cSjsg if (cmd < 0x4) {
5927ccd5a2cSjsg if (end <= start) {
5937ccd5a2cSjsg DRM_ERROR("invalid reloc offset %X!\n", offset);
5947ccd5a2cSjsg return -EINVAL;
5957ccd5a2cSjsg }
5967ccd5a2cSjsg if ((end - start) < buf_sizes[cmd]) {
5977ccd5a2cSjsg DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
5987ccd5a2cSjsg (unsigned)(end - start), buf_sizes[cmd]);
5997ccd5a2cSjsg return -EINVAL;
6007ccd5a2cSjsg }
6017ccd5a2cSjsg
6027ccd5a2cSjsg } else if (cmd != 0x100) {
6037ccd5a2cSjsg DRM_ERROR("invalid UVD command %X!\n", cmd);
6047ccd5a2cSjsg return -EINVAL;
6057ccd5a2cSjsg }
6067ccd5a2cSjsg
6077ccd5a2cSjsg if ((start >> 28) != ((end - 1) >> 28)) {
6087ccd5a2cSjsg DRM_ERROR("reloc %llX-%llX crossing 256MB boundary!\n",
6097ccd5a2cSjsg start, end);
6107ccd5a2cSjsg return -EINVAL;
6117ccd5a2cSjsg }
6127ccd5a2cSjsg
6137ccd5a2cSjsg /* TODO: is this still necessary on NI+ ? */
6147ccd5a2cSjsg if ((cmd == 0 || cmd == 0x3) &&
6157ccd5a2cSjsg (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
6167ccd5a2cSjsg DRM_ERROR("msg/fb buffer %llX-%llX out of 256MB segment!\n",
6177ccd5a2cSjsg start, end);
6187ccd5a2cSjsg return -EINVAL;
6197ccd5a2cSjsg }
6207ccd5a2cSjsg
6217ccd5a2cSjsg if (cmd == 0) {
6227ccd5a2cSjsg if (*has_msg_cmd) {
6237ccd5a2cSjsg DRM_ERROR("More than one message in a UVD-IB!\n");
6247ccd5a2cSjsg return -EINVAL;
6257ccd5a2cSjsg }
6267ccd5a2cSjsg *has_msg_cmd = true;
6277ccd5a2cSjsg r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
6287ccd5a2cSjsg if (r)
6297ccd5a2cSjsg return r;
6307ccd5a2cSjsg } else if (!*has_msg_cmd) {
6317ccd5a2cSjsg DRM_ERROR("Message needed before other commands are send!\n");
6327ccd5a2cSjsg return -EINVAL;
6337ccd5a2cSjsg }
6347ccd5a2cSjsg
6357ccd5a2cSjsg return 0;
6367ccd5a2cSjsg }
6377ccd5a2cSjsg
radeon_uvd_cs_reg(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,int * data0,int * data1,unsigned buf_sizes[],bool * has_msg_cmd)6387ccd5a2cSjsg static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
6397ccd5a2cSjsg struct radeon_cs_packet *pkt,
6407ccd5a2cSjsg int *data0, int *data1,
6417ccd5a2cSjsg unsigned buf_sizes[],
6427ccd5a2cSjsg bool *has_msg_cmd)
6437ccd5a2cSjsg {
6447ccd5a2cSjsg int i, r;
6457ccd5a2cSjsg
6467ccd5a2cSjsg p->idx++;
6477ccd5a2cSjsg for (i = 0; i <= pkt->count; ++i) {
6487ccd5a2cSjsg switch (pkt->reg + i*4) {
6497ccd5a2cSjsg case UVD_GPCOM_VCPU_DATA0:
6507ccd5a2cSjsg *data0 = p->idx;
6517ccd5a2cSjsg break;
6527ccd5a2cSjsg case UVD_GPCOM_VCPU_DATA1:
6537ccd5a2cSjsg *data1 = p->idx;
6547ccd5a2cSjsg break;
6557ccd5a2cSjsg case UVD_GPCOM_VCPU_CMD:
6567ccd5a2cSjsg r = radeon_uvd_cs_reloc(p, *data0, *data1,
6577ccd5a2cSjsg buf_sizes, has_msg_cmd);
6587ccd5a2cSjsg if (r)
6597ccd5a2cSjsg return r;
6607ccd5a2cSjsg break;
6617ccd5a2cSjsg case UVD_ENGINE_CNTL:
6627f4dd379Sjsg case UVD_NO_OP:
6637ccd5a2cSjsg break;
6647ccd5a2cSjsg default:
6657ccd5a2cSjsg DRM_ERROR("Invalid reg 0x%X!\n",
6667ccd5a2cSjsg pkt->reg + i*4);
6677ccd5a2cSjsg return -EINVAL;
6687ccd5a2cSjsg }
6697ccd5a2cSjsg p->idx++;
6707ccd5a2cSjsg }
6717ccd5a2cSjsg return 0;
6727ccd5a2cSjsg }
6737ccd5a2cSjsg
radeon_uvd_cs_parse(struct radeon_cs_parser * p)6747ccd5a2cSjsg int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
6757ccd5a2cSjsg {
6767ccd5a2cSjsg struct radeon_cs_packet pkt;
6777ccd5a2cSjsg int r, data0 = 0, data1 = 0;
6787ccd5a2cSjsg
6797ccd5a2cSjsg /* does the IB has a msg command */
6807ccd5a2cSjsg bool has_msg_cmd = false;
6817ccd5a2cSjsg
6827ccd5a2cSjsg /* minimum buffer sizes */
6837ccd5a2cSjsg unsigned buf_sizes[] = {
6847ccd5a2cSjsg [0x00000000] = 2048,
6857ccd5a2cSjsg [0x00000001] = 32 * 1024 * 1024,
6867ccd5a2cSjsg [0x00000002] = 2048 * 1152 * 3,
6877ccd5a2cSjsg [0x00000003] = 2048,
6887ccd5a2cSjsg };
6897ccd5a2cSjsg
6907ccd5a2cSjsg if (p->chunk_ib->length_dw % 16) {
6917ccd5a2cSjsg DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
6927ccd5a2cSjsg p->chunk_ib->length_dw);
6937ccd5a2cSjsg return -EINVAL;
6947ccd5a2cSjsg }
6957ccd5a2cSjsg
6967ccd5a2cSjsg if (p->chunk_relocs == NULL) {
6977ccd5a2cSjsg DRM_ERROR("No relocation chunk !\n");
6987ccd5a2cSjsg return -EINVAL;
6997ccd5a2cSjsg }
7007ccd5a2cSjsg
7017ccd5a2cSjsg
7027ccd5a2cSjsg do {
7037ccd5a2cSjsg r = radeon_cs_packet_parse(p, &pkt, p->idx);
7047ccd5a2cSjsg if (r)
7057ccd5a2cSjsg return r;
7067ccd5a2cSjsg switch (pkt.type) {
7077ccd5a2cSjsg case RADEON_PACKET_TYPE0:
7087ccd5a2cSjsg r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1,
7097ccd5a2cSjsg buf_sizes, &has_msg_cmd);
7107ccd5a2cSjsg if (r)
7117ccd5a2cSjsg return r;
7127ccd5a2cSjsg break;
7137ccd5a2cSjsg case RADEON_PACKET_TYPE2:
7147ccd5a2cSjsg p->idx += pkt.count + 2;
7157ccd5a2cSjsg break;
7167ccd5a2cSjsg default:
7177ccd5a2cSjsg DRM_ERROR("Unknown packet type %d !\n", pkt.type);
7187ccd5a2cSjsg return -EINVAL;
7197ccd5a2cSjsg }
7207ccd5a2cSjsg } while (p->idx < p->chunk_ib->length_dw);
7217ccd5a2cSjsg
7227ccd5a2cSjsg if (!has_msg_cmd) {
7237ccd5a2cSjsg DRM_ERROR("UVD-IBs need a msg command!\n");
7247ccd5a2cSjsg return -EINVAL;
7257ccd5a2cSjsg }
7267ccd5a2cSjsg
7277ccd5a2cSjsg return 0;
7287ccd5a2cSjsg }
7297ccd5a2cSjsg
radeon_uvd_send_msg(struct radeon_device * rdev,int ring,uint64_t addr,struct radeon_fence ** fence)7307ccd5a2cSjsg static int radeon_uvd_send_msg(struct radeon_device *rdev,
7317ccd5a2cSjsg int ring, uint64_t addr,
7327ccd5a2cSjsg struct radeon_fence **fence)
7337ccd5a2cSjsg {
7347ccd5a2cSjsg struct radeon_ib ib;
7357ccd5a2cSjsg int i, r;
7367ccd5a2cSjsg
7377ccd5a2cSjsg r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
7387ccd5a2cSjsg if (r)
7397ccd5a2cSjsg return r;
7407ccd5a2cSjsg
7417ccd5a2cSjsg ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
7427ccd5a2cSjsg ib.ptr[1] = addr;
7437ccd5a2cSjsg ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
7447ccd5a2cSjsg ib.ptr[3] = addr >> 32;
7457ccd5a2cSjsg ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
7467ccd5a2cSjsg ib.ptr[5] = 0;
7477f4dd379Sjsg for (i = 6; i < 16; i += 2) {
7487f4dd379Sjsg ib.ptr[i] = PACKET0(UVD_NO_OP, 0);
7497f4dd379Sjsg ib.ptr[i+1] = 0;
7507f4dd379Sjsg }
7517ccd5a2cSjsg ib.length_dw = 16;
7527ccd5a2cSjsg
7537ccd5a2cSjsg r = radeon_ib_schedule(rdev, &ib, NULL, false);
7547ccd5a2cSjsg
7557ccd5a2cSjsg if (fence)
7567ccd5a2cSjsg *fence = radeon_fence_ref(ib.fence);
7577ccd5a2cSjsg
7587ccd5a2cSjsg radeon_ib_free(rdev, &ib);
7597ccd5a2cSjsg return r;
7607ccd5a2cSjsg }
7617ccd5a2cSjsg
7627f4dd379Sjsg /*
7637f4dd379Sjsg * multiple fence commands without any stream commands in between can
7647f4dd379Sjsg * crash the vcpu so just try to emmit a dummy create/destroy msg to
7657f4dd379Sjsg * avoid this
7667f4dd379Sjsg */
radeon_uvd_get_create_msg(struct radeon_device * rdev,int ring,uint32_t handle,struct radeon_fence ** fence)7677ccd5a2cSjsg int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
7687ccd5a2cSjsg uint32_t handle, struct radeon_fence **fence)
7697ccd5a2cSjsg {
7707ccd5a2cSjsg /* we use the last page of the vcpu bo for the UVD message */
7717ccd5a2cSjsg uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
7727ccd5a2cSjsg RADEON_GPU_PAGE_SIZE;
7737ccd5a2cSjsg
7745ca02815Sjsg uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs);
7757ccd5a2cSjsg uint64_t addr = rdev->uvd.gpu_addr + offs;
7767ccd5a2cSjsg
7777ccd5a2cSjsg int r, i;
7787ccd5a2cSjsg
7797ccd5a2cSjsg r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
7807ccd5a2cSjsg if (r)
7817ccd5a2cSjsg return r;
7827ccd5a2cSjsg
7837ccd5a2cSjsg /* stitch together an UVD create msg */
7845ca02815Sjsg writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]);
7855ca02815Sjsg writel(0x0, (void __iomem *)&msg[1]);
7865ca02815Sjsg writel((__force u32)cpu_to_le32(handle), &msg[2]);
7875ca02815Sjsg writel(0x0, &msg[3]);
7885ca02815Sjsg writel(0x0, &msg[4]);
7895ca02815Sjsg writel(0x0, &msg[5]);
7905ca02815Sjsg writel(0x0, &msg[6]);
7915ca02815Sjsg writel((__force u32)cpu_to_le32(0x00000780), &msg[7]);
7925ca02815Sjsg writel((__force u32)cpu_to_le32(0x00000440), &msg[8]);
7935ca02815Sjsg writel(0x0, &msg[9]);
7945ca02815Sjsg writel((__force u32)cpu_to_le32(0x01b37000), &msg[10]);
7957ccd5a2cSjsg for (i = 11; i < 1024; ++i)
7965ca02815Sjsg writel(0x0, &msg[i]);
7977ccd5a2cSjsg
7987ccd5a2cSjsg r = radeon_uvd_send_msg(rdev, ring, addr, fence);
7997ccd5a2cSjsg radeon_bo_unreserve(rdev->uvd.vcpu_bo);
8007ccd5a2cSjsg return r;
8017ccd5a2cSjsg }
8027ccd5a2cSjsg
radeon_uvd_get_destroy_msg(struct radeon_device * rdev,int ring,uint32_t handle,struct radeon_fence ** fence)8037ccd5a2cSjsg int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
8047ccd5a2cSjsg uint32_t handle, struct radeon_fence **fence)
8057ccd5a2cSjsg {
8067ccd5a2cSjsg /* we use the last page of the vcpu bo for the UVD message */
8077ccd5a2cSjsg uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
8087ccd5a2cSjsg RADEON_GPU_PAGE_SIZE;
8097ccd5a2cSjsg
8105ca02815Sjsg uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs);
8117ccd5a2cSjsg uint64_t addr = rdev->uvd.gpu_addr + offs;
8127ccd5a2cSjsg
8137ccd5a2cSjsg int r, i;
8147ccd5a2cSjsg
8157ccd5a2cSjsg r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
8167ccd5a2cSjsg if (r)
8177ccd5a2cSjsg return r;
8187ccd5a2cSjsg
8197ccd5a2cSjsg /* stitch together an UVD destroy msg */
8205ca02815Sjsg writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]);
8215ca02815Sjsg writel((__force u32)cpu_to_le32(0x00000002), &msg[1]);
8225ca02815Sjsg writel((__force u32)cpu_to_le32(handle), &msg[2]);
8235ca02815Sjsg writel(0x0, &msg[3]);
8247ccd5a2cSjsg for (i = 4; i < 1024; ++i)
8255ca02815Sjsg writel(0x0, &msg[i]);
8267ccd5a2cSjsg
8277ccd5a2cSjsg r = radeon_uvd_send_msg(rdev, ring, addr, fence);
8287ccd5a2cSjsg radeon_bo_unreserve(rdev->uvd.vcpu_bo);
8297ccd5a2cSjsg return r;
8307ccd5a2cSjsg }
8317ccd5a2cSjsg
8327ccd5a2cSjsg /**
8337ccd5a2cSjsg * radeon_uvd_count_handles - count number of open streams
8347ccd5a2cSjsg *
8357ccd5a2cSjsg * @rdev: radeon_device pointer
8367ccd5a2cSjsg * @sd: number of SD streams
8377ccd5a2cSjsg * @hd: number of HD streams
8387ccd5a2cSjsg *
8397ccd5a2cSjsg * Count the number of open SD/HD streams as a hint for power mangement
8407ccd5a2cSjsg */
radeon_uvd_count_handles(struct radeon_device * rdev,unsigned * sd,unsigned * hd)8417ccd5a2cSjsg static void radeon_uvd_count_handles(struct radeon_device *rdev,
8427ccd5a2cSjsg unsigned *sd, unsigned *hd)
8437ccd5a2cSjsg {
8447ccd5a2cSjsg unsigned i;
8457ccd5a2cSjsg
8467ccd5a2cSjsg *sd = 0;
8477ccd5a2cSjsg *hd = 0;
8487ccd5a2cSjsg
8497f4dd379Sjsg for (i = 0; i < rdev->uvd.max_handles; ++i) {
8507ccd5a2cSjsg if (!atomic_read(&rdev->uvd.handles[i]))
8517ccd5a2cSjsg continue;
8527ccd5a2cSjsg
8537ccd5a2cSjsg if (rdev->uvd.img_size[i] >= 720*576)
8547ccd5a2cSjsg ++(*hd);
8557ccd5a2cSjsg else
8567ccd5a2cSjsg ++(*sd);
8577ccd5a2cSjsg }
8587ccd5a2cSjsg }
8597ccd5a2cSjsg
radeon_uvd_idle_work_handler(struct work_struct * work)8607ccd5a2cSjsg static void radeon_uvd_idle_work_handler(struct work_struct *work)
8617ccd5a2cSjsg {
8627ccd5a2cSjsg struct radeon_device *rdev =
8637ccd5a2cSjsg container_of(work, struct radeon_device, uvd.idle_work.work);
8647ccd5a2cSjsg
8657ccd5a2cSjsg if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) {
8667ccd5a2cSjsg if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
8677ccd5a2cSjsg radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd,
8687ccd5a2cSjsg &rdev->pm.dpm.hd);
8697ccd5a2cSjsg radeon_dpm_enable_uvd(rdev, false);
8707ccd5a2cSjsg } else {
8717ccd5a2cSjsg radeon_set_uvd_clocks(rdev, 0, 0);
8727ccd5a2cSjsg }
8737ccd5a2cSjsg } else {
8747ccd5a2cSjsg schedule_delayed_work(&rdev->uvd.idle_work,
8757ccd5a2cSjsg msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
8767ccd5a2cSjsg }
8777ccd5a2cSjsg }
8787ccd5a2cSjsg
radeon_uvd_note_usage(struct radeon_device * rdev)8797ccd5a2cSjsg void radeon_uvd_note_usage(struct radeon_device *rdev)
8807ccd5a2cSjsg {
8817ccd5a2cSjsg bool streams_changed = false;
8827ccd5a2cSjsg bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
8837ccd5a2cSjsg set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
8847ccd5a2cSjsg msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
8857ccd5a2cSjsg
8867ccd5a2cSjsg if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
8877ccd5a2cSjsg unsigned hd = 0, sd = 0;
8887ccd5a2cSjsg radeon_uvd_count_handles(rdev, &sd, &hd);
8897ccd5a2cSjsg if ((rdev->pm.dpm.sd != sd) ||
8907ccd5a2cSjsg (rdev->pm.dpm.hd != hd)) {
8917ccd5a2cSjsg rdev->pm.dpm.sd = sd;
8927ccd5a2cSjsg rdev->pm.dpm.hd = hd;
8937ccd5a2cSjsg /* disable this for now */
8947ccd5a2cSjsg /*streams_changed = true;*/
8957ccd5a2cSjsg }
8967ccd5a2cSjsg }
8977ccd5a2cSjsg
8987ccd5a2cSjsg if (set_clocks || streams_changed) {
8997ccd5a2cSjsg if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
9007ccd5a2cSjsg radeon_dpm_enable_uvd(rdev, true);
9017ccd5a2cSjsg } else {
9027ccd5a2cSjsg radeon_set_uvd_clocks(rdev, 53300, 40000);
9037ccd5a2cSjsg }
9047ccd5a2cSjsg }
9057ccd5a2cSjsg }
9067ccd5a2cSjsg
radeon_uvd_calc_upll_post_div(unsigned vco_freq,unsigned target_freq,unsigned pd_min,unsigned pd_even)9077ccd5a2cSjsg static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
9087ccd5a2cSjsg unsigned target_freq,
9097ccd5a2cSjsg unsigned pd_min,
9107ccd5a2cSjsg unsigned pd_even)
9117ccd5a2cSjsg {
9127ccd5a2cSjsg unsigned post_div = vco_freq / target_freq;
9137ccd5a2cSjsg
9147ccd5a2cSjsg /* adjust to post divider minimum value */
9157ccd5a2cSjsg if (post_div < pd_min)
9167ccd5a2cSjsg post_div = pd_min;
9177ccd5a2cSjsg
9187ccd5a2cSjsg /* we alway need a frequency less than or equal the target */
9197ccd5a2cSjsg if ((vco_freq / post_div) > target_freq)
9207ccd5a2cSjsg post_div += 1;
9217ccd5a2cSjsg
9227ccd5a2cSjsg /* post dividers above a certain value must be even */
9237ccd5a2cSjsg if (post_div > pd_even && post_div % 2)
9247ccd5a2cSjsg post_div += 1;
9257ccd5a2cSjsg
9267ccd5a2cSjsg return post_div;
9277ccd5a2cSjsg }
9287ccd5a2cSjsg
9297ccd5a2cSjsg /**
9307ccd5a2cSjsg * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
9317ccd5a2cSjsg *
9327ccd5a2cSjsg * @rdev: radeon_device pointer
9337ccd5a2cSjsg * @vclk: wanted VCLK
9347ccd5a2cSjsg * @dclk: wanted DCLK
9357ccd5a2cSjsg * @vco_min: minimum VCO frequency
9367ccd5a2cSjsg * @vco_max: maximum VCO frequency
9377ccd5a2cSjsg * @fb_factor: factor to multiply vco freq with
9387ccd5a2cSjsg * @fb_mask: limit and bitmask for feedback divider
9397ccd5a2cSjsg * @pd_min: post divider minimum
9407ccd5a2cSjsg * @pd_max: post divider maximum
9417ccd5a2cSjsg * @pd_even: post divider must be even above this value
9427ccd5a2cSjsg * @optimal_fb_div: resulting feedback divider
9437ccd5a2cSjsg * @optimal_vclk_div: resulting vclk post divider
9447ccd5a2cSjsg * @optimal_dclk_div: resulting dclk post divider
9457ccd5a2cSjsg *
9467ccd5a2cSjsg * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
9477ccd5a2cSjsg * Returns zero on success -EINVAL on error.
9487ccd5a2cSjsg */
radeon_uvd_calc_upll_dividers(struct radeon_device * rdev,unsigned vclk,unsigned dclk,unsigned vco_min,unsigned vco_max,unsigned fb_factor,unsigned fb_mask,unsigned pd_min,unsigned pd_max,unsigned pd_even,unsigned * optimal_fb_div,unsigned * optimal_vclk_div,unsigned * optimal_dclk_div)9497ccd5a2cSjsg int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
9507ccd5a2cSjsg unsigned vclk, unsigned dclk,
9517ccd5a2cSjsg unsigned vco_min, unsigned vco_max,
9527ccd5a2cSjsg unsigned fb_factor, unsigned fb_mask,
9537ccd5a2cSjsg unsigned pd_min, unsigned pd_max,
9547ccd5a2cSjsg unsigned pd_even,
9557ccd5a2cSjsg unsigned *optimal_fb_div,
9567ccd5a2cSjsg unsigned *optimal_vclk_div,
9577ccd5a2cSjsg unsigned *optimal_dclk_div)
9587ccd5a2cSjsg {
9597ccd5a2cSjsg unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
9607ccd5a2cSjsg
9617ccd5a2cSjsg /* start off with something large */
9627ccd5a2cSjsg unsigned optimal_score = ~0;
9637ccd5a2cSjsg
9647ccd5a2cSjsg /* loop through vco from low to high */
9657ccd5a2cSjsg vco_min = max(max(vco_min, vclk), dclk);
9667ccd5a2cSjsg for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
9677ccd5a2cSjsg
9687ccd5a2cSjsg uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
9697ccd5a2cSjsg unsigned vclk_div, dclk_div, score;
9707ccd5a2cSjsg
9717ccd5a2cSjsg do_div(fb_div, ref_freq);
9727ccd5a2cSjsg
9737ccd5a2cSjsg /* fb div out of range ? */
9747ccd5a2cSjsg if (fb_div > fb_mask)
9757ccd5a2cSjsg break; /* it can oly get worse */
9767ccd5a2cSjsg
9777ccd5a2cSjsg fb_div &= fb_mask;
9787ccd5a2cSjsg
9797ccd5a2cSjsg /* calc vclk divider with current vco freq */
9807ccd5a2cSjsg vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
9817ccd5a2cSjsg pd_min, pd_even);
9827ccd5a2cSjsg if (vclk_div > pd_max)
9837ccd5a2cSjsg break; /* vco is too big, it has to stop */
9847ccd5a2cSjsg
9857ccd5a2cSjsg /* calc dclk divider with current vco freq */
9867ccd5a2cSjsg dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
9877ccd5a2cSjsg pd_min, pd_even);
9887ccd5a2cSjsg if (dclk_div > pd_max)
9897ccd5a2cSjsg break; /* vco is too big, it has to stop */
9907ccd5a2cSjsg
9917ccd5a2cSjsg /* calc score with current vco freq */
9927ccd5a2cSjsg score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
9937ccd5a2cSjsg
9947ccd5a2cSjsg /* determine if this vco setting is better than current optimal settings */
9957ccd5a2cSjsg if (score < optimal_score) {
9967ccd5a2cSjsg *optimal_fb_div = fb_div;
9977ccd5a2cSjsg *optimal_vclk_div = vclk_div;
9987ccd5a2cSjsg *optimal_dclk_div = dclk_div;
9997ccd5a2cSjsg optimal_score = score;
10007ccd5a2cSjsg if (optimal_score == 0)
10017ccd5a2cSjsg break; /* it can't get better than this */
10027ccd5a2cSjsg }
10037ccd5a2cSjsg }
10047ccd5a2cSjsg
10057ccd5a2cSjsg /* did we found a valid setup ? */
10067ccd5a2cSjsg if (optimal_score == ~0)
10077ccd5a2cSjsg return -EINVAL;
10087ccd5a2cSjsg
10097ccd5a2cSjsg return 0;
10107ccd5a2cSjsg }
10117ccd5a2cSjsg
radeon_uvd_send_upll_ctlreq(struct radeon_device * rdev,unsigned cg_upll_func_cntl)10127ccd5a2cSjsg int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
10137ccd5a2cSjsg unsigned cg_upll_func_cntl)
10147ccd5a2cSjsg {
10157ccd5a2cSjsg unsigned i;
10167ccd5a2cSjsg
10177ccd5a2cSjsg /* make sure UPLL_CTLREQ is deasserted */
10187ccd5a2cSjsg WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
10197ccd5a2cSjsg
10207ccd5a2cSjsg mdelay(10);
10217ccd5a2cSjsg
10227ccd5a2cSjsg /* assert UPLL_CTLREQ */
10237ccd5a2cSjsg WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
10247ccd5a2cSjsg
10257ccd5a2cSjsg /* wait for CTLACK and CTLACK2 to get asserted */
10267ccd5a2cSjsg for (i = 0; i < 100; ++i) {
10277ccd5a2cSjsg uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
10287ccd5a2cSjsg if ((RREG32(cg_upll_func_cntl) & mask) == mask)
10297ccd5a2cSjsg break;
10307ccd5a2cSjsg mdelay(10);
10317ccd5a2cSjsg }
10327ccd5a2cSjsg
10337ccd5a2cSjsg /* deassert UPLL_CTLREQ */
10347ccd5a2cSjsg WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
10357ccd5a2cSjsg
10367ccd5a2cSjsg if (i == 100) {
10377ccd5a2cSjsg DRM_ERROR("Timeout setting UVD clocks!\n");
10387ccd5a2cSjsg return -ETIMEDOUT;
10397ccd5a2cSjsg }
10407ccd5a2cSjsg
10417ccd5a2cSjsg return 0;
10427ccd5a2cSjsg }
1043