xref: /openbsd-src/sys/dev/pci/drm/radeon/radeon_uvd.c (revision f005ef32267c16bdb134f0e9fa4477dbe07c263a)
17ccd5a2cSjsg /*
27ccd5a2cSjsg  * Copyright 2011 Advanced Micro Devices, Inc.
37ccd5a2cSjsg  * All Rights Reserved.
47ccd5a2cSjsg  *
57ccd5a2cSjsg  * Permission is hereby granted, free of charge, to any person obtaining a
67ccd5a2cSjsg  * copy of this software and associated documentation files (the
77ccd5a2cSjsg  * "Software"), to deal in the Software without restriction, including
87ccd5a2cSjsg  * without limitation the rights to use, copy, modify, merge, publish,
97ccd5a2cSjsg  * distribute, sub license, and/or sell copies of the Software, and to
107ccd5a2cSjsg  * permit persons to whom the Software is furnished to do so, subject to
117ccd5a2cSjsg  * the following conditions:
127ccd5a2cSjsg  *
137ccd5a2cSjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
147ccd5a2cSjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
157ccd5a2cSjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
167ccd5a2cSjsg  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
177ccd5a2cSjsg  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
187ccd5a2cSjsg  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
197ccd5a2cSjsg  * USE OR OTHER DEALINGS IN THE SOFTWARE.
207ccd5a2cSjsg  *
217ccd5a2cSjsg  * The above copyright notice and this permission notice (including the
227ccd5a2cSjsg  * next paragraph) shall be included in all copies or substantial portions
237ccd5a2cSjsg  * of the Software.
247ccd5a2cSjsg  *
257ccd5a2cSjsg  */
267ccd5a2cSjsg /*
277ccd5a2cSjsg  * Authors:
287ccd5a2cSjsg  *    Christian König <deathsimple@vodafone.de>
297ccd5a2cSjsg  */
307ccd5a2cSjsg 
317f4dd379Sjsg #include <linux/firmware.h>
327f4dd379Sjsg #include <linux/module.h>
33c349dbc7Sjsg 
347f4dd379Sjsg #include <drm/drm.h>
357ccd5a2cSjsg 
367ccd5a2cSjsg #include "radeon.h"
377f4dd379Sjsg #include "radeon_ucode.h"
387ccd5a2cSjsg #include "r600d.h"
397ccd5a2cSjsg 
407ccd5a2cSjsg /* 1 second timeout */
417ccd5a2cSjsg #define UVD_IDLE_TIMEOUT_MS	1000
427ccd5a2cSjsg 
437ccd5a2cSjsg /* Firmware Names */
447ccd5a2cSjsg #define FIRMWARE_R600		"radeon/R600_uvd.bin"
457ccd5a2cSjsg #define FIRMWARE_RS780		"radeon/RS780_uvd.bin"
467ccd5a2cSjsg #define FIRMWARE_RV770		"radeon/RV770_uvd.bin"
477ccd5a2cSjsg #define FIRMWARE_RV710		"radeon/RV710_uvd.bin"
487ccd5a2cSjsg #define FIRMWARE_CYPRESS	"radeon/CYPRESS_uvd.bin"
497ccd5a2cSjsg #define FIRMWARE_SUMO		"radeon/SUMO_uvd.bin"
507ccd5a2cSjsg #define FIRMWARE_TAHITI		"radeon/TAHITI_uvd.bin"
517f4dd379Sjsg #define FIRMWARE_BONAIRE_LEGACY	"radeon/BONAIRE_uvd.bin"
527f4dd379Sjsg #define FIRMWARE_BONAIRE	"radeon/bonaire_uvd.bin"
537ccd5a2cSjsg 
547ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_R600);
557ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_RS780);
567ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_RV770);
577ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_RV710);
587ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_CYPRESS);
597ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_SUMO);
607ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_TAHITI);
617f4dd379Sjsg MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY);
627ccd5a2cSjsg MODULE_FIRMWARE(FIRMWARE_BONAIRE);
637ccd5a2cSjsg 
647ccd5a2cSjsg static void radeon_uvd_idle_work_handler(struct work_struct *work);
657ccd5a2cSjsg 
radeon_uvd_init(struct radeon_device * rdev)667ccd5a2cSjsg int radeon_uvd_init(struct radeon_device *rdev)
677ccd5a2cSjsg {
687ccd5a2cSjsg 	unsigned long bo_size;
697f4dd379Sjsg 	const char *fw_name = NULL, *legacy_fw_name = NULL;
707ccd5a2cSjsg 	int i, r;
717ccd5a2cSjsg 
727ccd5a2cSjsg 	INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
737ccd5a2cSjsg 
747ccd5a2cSjsg 	switch (rdev->family) {
757ccd5a2cSjsg 	case CHIP_RV610:
767ccd5a2cSjsg 	case CHIP_RV630:
777ccd5a2cSjsg 	case CHIP_RV670:
787ccd5a2cSjsg 	case CHIP_RV620:
797ccd5a2cSjsg 	case CHIP_RV635:
807f4dd379Sjsg 		legacy_fw_name = FIRMWARE_R600;
817ccd5a2cSjsg 		break;
827ccd5a2cSjsg 
837ccd5a2cSjsg 	case CHIP_RS780:
847ccd5a2cSjsg 	case CHIP_RS880:
857f4dd379Sjsg 		legacy_fw_name = FIRMWARE_RS780;
867ccd5a2cSjsg 		break;
877ccd5a2cSjsg 
887ccd5a2cSjsg 	case CHIP_RV770:
897f4dd379Sjsg 		legacy_fw_name = FIRMWARE_RV770;
907ccd5a2cSjsg 		break;
917ccd5a2cSjsg 
927ccd5a2cSjsg 	case CHIP_RV710:
937ccd5a2cSjsg 	case CHIP_RV730:
947ccd5a2cSjsg 	case CHIP_RV740:
957f4dd379Sjsg 		legacy_fw_name = FIRMWARE_RV710;
967ccd5a2cSjsg 		break;
977ccd5a2cSjsg 
987ccd5a2cSjsg 	case CHIP_CYPRESS:
997ccd5a2cSjsg 	case CHIP_HEMLOCK:
1007ccd5a2cSjsg 	case CHIP_JUNIPER:
1017ccd5a2cSjsg 	case CHIP_REDWOOD:
1027ccd5a2cSjsg 	case CHIP_CEDAR:
1037f4dd379Sjsg 		legacy_fw_name = FIRMWARE_CYPRESS;
1047ccd5a2cSjsg 		break;
1057ccd5a2cSjsg 
1067ccd5a2cSjsg 	case CHIP_SUMO:
1077ccd5a2cSjsg 	case CHIP_SUMO2:
1087ccd5a2cSjsg 	case CHIP_PALM:
1097ccd5a2cSjsg 	case CHIP_CAYMAN:
1107ccd5a2cSjsg 	case CHIP_BARTS:
1117ccd5a2cSjsg 	case CHIP_TURKS:
1127ccd5a2cSjsg 	case CHIP_CAICOS:
1137f4dd379Sjsg 		legacy_fw_name = FIRMWARE_SUMO;
1147ccd5a2cSjsg 		break;
1157ccd5a2cSjsg 
1167ccd5a2cSjsg 	case CHIP_TAHITI:
1177ccd5a2cSjsg 	case CHIP_VERDE:
1187ccd5a2cSjsg 	case CHIP_PITCAIRN:
1197ccd5a2cSjsg 	case CHIP_ARUBA:
1207ccd5a2cSjsg 	case CHIP_OLAND:
1217f4dd379Sjsg 		legacy_fw_name = FIRMWARE_TAHITI;
1227ccd5a2cSjsg 		break;
1237ccd5a2cSjsg 
1247ccd5a2cSjsg 	case CHIP_BONAIRE:
1257ccd5a2cSjsg 	case CHIP_KABINI:
1267ccd5a2cSjsg 	case CHIP_KAVERI:
1277ccd5a2cSjsg 	case CHIP_HAWAII:
1287ccd5a2cSjsg 	case CHIP_MULLINS:
1297f4dd379Sjsg 		legacy_fw_name = FIRMWARE_BONAIRE_LEGACY;
1307ccd5a2cSjsg 		fw_name = FIRMWARE_BONAIRE;
1317ccd5a2cSjsg 		break;
1327ccd5a2cSjsg 
1337ccd5a2cSjsg 	default:
1347ccd5a2cSjsg 		return -EINVAL;
1357ccd5a2cSjsg 	}
1367ccd5a2cSjsg 
1377f4dd379Sjsg 	rdev->uvd.fw_header_present = false;
1387f4dd379Sjsg 	rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES;
1397f4dd379Sjsg 	if (fw_name) {
1407f4dd379Sjsg 		/* Let's try to load the newer firmware first */
1417ccd5a2cSjsg 		r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev);
1427ccd5a2cSjsg 		if (r) {
1437ccd5a2cSjsg 			dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
1447ccd5a2cSjsg 				fw_name);
1457f4dd379Sjsg 		} else {
1467f4dd379Sjsg 			struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data;
1477f4dd379Sjsg 			unsigned version_major, version_minor, family_id;
1487f4dd379Sjsg 
1497f4dd379Sjsg 			r = radeon_ucode_validate(rdev->uvd_fw);
1507f4dd379Sjsg 			if (r)
1517ccd5a2cSjsg 				return r;
1527f4dd379Sjsg 
1537f4dd379Sjsg 			rdev->uvd.fw_header_present = true;
1547f4dd379Sjsg 
1555ca02815Sjsg 			family_id = (__force u32)(hdr->ucode_version) & 0xff;
1565ca02815Sjsg 			version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version))
1575ca02815Sjsg 							 >> 24) & 0xff;
1585ca02815Sjsg 			version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version))
1595ca02815Sjsg 							 >> 8) & 0xff;
1607f4dd379Sjsg 			DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
1617f4dd379Sjsg 				 version_major, version_minor, family_id);
1627f4dd379Sjsg 
1637f4dd379Sjsg 			/*
1647f4dd379Sjsg 			 * Limit the number of UVD handles depending on
1657f4dd379Sjsg 			 * microcode major and minor versions.
1667f4dd379Sjsg 			 */
1677f4dd379Sjsg 			if ((version_major >= 0x01) && (version_minor >= 0x37))
1687f4dd379Sjsg 				rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES;
1697f4dd379Sjsg 		}
1707f4dd379Sjsg 	}
1717f4dd379Sjsg 
1727f4dd379Sjsg 	/*
1737f4dd379Sjsg 	 * In case there is only legacy firmware, or we encounter an error
1747f4dd379Sjsg 	 * while loading the new firmware, we fall back to loading the legacy
1757f4dd379Sjsg 	 * firmware now.
1767f4dd379Sjsg 	 */
1777f4dd379Sjsg 	if (!fw_name || r) {
1787f4dd379Sjsg 		r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev);
1797f4dd379Sjsg 		if (r) {
1807f4dd379Sjsg 			dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
1817f4dd379Sjsg 				legacy_fw_name);
1827f4dd379Sjsg 			return r;
1837f4dd379Sjsg 		}
1847ccd5a2cSjsg 	}
1857ccd5a2cSjsg 
1867ccd5a2cSjsg 	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
1877ccd5a2cSjsg 		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
1887f4dd379Sjsg 		  RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles;
1897ccd5a2cSjsg 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
1907ccd5a2cSjsg 			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
1917ccd5a2cSjsg 			     NULL, &rdev->uvd.vcpu_bo);
1927ccd5a2cSjsg 	if (r) {
1937ccd5a2cSjsg 		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
1947ccd5a2cSjsg 		return r;
1957ccd5a2cSjsg 	}
1967ccd5a2cSjsg 
1977ccd5a2cSjsg 	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
1987ccd5a2cSjsg 	if (r) {
1997ccd5a2cSjsg 		radeon_bo_unref(&rdev->uvd.vcpu_bo);
2007ccd5a2cSjsg 		dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
2017ccd5a2cSjsg 		return r;
2027ccd5a2cSjsg 	}
2037ccd5a2cSjsg 
2047ccd5a2cSjsg 	r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
2057ccd5a2cSjsg 			  &rdev->uvd.gpu_addr);
2067ccd5a2cSjsg 	if (r) {
2077ccd5a2cSjsg 		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
2087ccd5a2cSjsg 		radeon_bo_unref(&rdev->uvd.vcpu_bo);
2097ccd5a2cSjsg 		dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
2107ccd5a2cSjsg 		return r;
2117ccd5a2cSjsg 	}
2127ccd5a2cSjsg 
2137ccd5a2cSjsg 	r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
2147ccd5a2cSjsg 	if (r) {
2157ccd5a2cSjsg 		dev_err(rdev->dev, "(%d) UVD map failed\n", r);
2167ccd5a2cSjsg 		return r;
2177ccd5a2cSjsg 	}
2187ccd5a2cSjsg 
2197ccd5a2cSjsg 	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
2207ccd5a2cSjsg 
2217f4dd379Sjsg 	for (i = 0; i < rdev->uvd.max_handles; ++i) {
2227ccd5a2cSjsg 		atomic_set(&rdev->uvd.handles[i], 0);
2237ccd5a2cSjsg 		rdev->uvd.filp[i] = NULL;
2247ccd5a2cSjsg 		rdev->uvd.img_size[i] = 0;
2257ccd5a2cSjsg 	}
2267ccd5a2cSjsg 
2277ccd5a2cSjsg 	return 0;
2287ccd5a2cSjsg }
2297ccd5a2cSjsg 
radeon_uvd_fini(struct radeon_device * rdev)2307ccd5a2cSjsg void radeon_uvd_fini(struct radeon_device *rdev)
2317ccd5a2cSjsg {
2327ccd5a2cSjsg 	int r;
2337ccd5a2cSjsg 
2347ccd5a2cSjsg 	if (rdev->uvd.vcpu_bo == NULL)
2357ccd5a2cSjsg 		return;
2367ccd5a2cSjsg 
2377ccd5a2cSjsg 	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
2387ccd5a2cSjsg 	if (!r) {
2397ccd5a2cSjsg 		radeon_bo_kunmap(rdev->uvd.vcpu_bo);
2407ccd5a2cSjsg 		radeon_bo_unpin(rdev->uvd.vcpu_bo);
2417ccd5a2cSjsg 		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
2427ccd5a2cSjsg 	}
2437ccd5a2cSjsg 
2447ccd5a2cSjsg 	radeon_bo_unref(&rdev->uvd.vcpu_bo);
2457ccd5a2cSjsg 
2467ccd5a2cSjsg 	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]);
2477ccd5a2cSjsg 
2487ccd5a2cSjsg 	release_firmware(rdev->uvd_fw);
2497ccd5a2cSjsg }
2507ccd5a2cSjsg 
radeon_uvd_suspend(struct radeon_device * rdev)2517ccd5a2cSjsg int radeon_uvd_suspend(struct radeon_device *rdev)
2527ccd5a2cSjsg {
2537ccd5a2cSjsg 	int i, r;
2547ccd5a2cSjsg 
2557ccd5a2cSjsg 	if (rdev->uvd.vcpu_bo == NULL)
2567ccd5a2cSjsg 		return 0;
2577ccd5a2cSjsg 
2587f4dd379Sjsg 	for (i = 0; i < rdev->uvd.max_handles; ++i) {
2597ccd5a2cSjsg 		uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
2607ccd5a2cSjsg 		if (handle != 0) {
2617ccd5a2cSjsg 			struct radeon_fence *fence;
2627ccd5a2cSjsg 
2637ccd5a2cSjsg 			radeon_uvd_note_usage(rdev);
2647ccd5a2cSjsg 
2657ccd5a2cSjsg 			r = radeon_uvd_get_destroy_msg(rdev,
2667ccd5a2cSjsg 				R600_RING_TYPE_UVD_INDEX, handle, &fence);
2677ccd5a2cSjsg 			if (r) {
2687ccd5a2cSjsg 				DRM_ERROR("Error destroying UVD (%d)!\n", r);
2697ccd5a2cSjsg 				continue;
2707ccd5a2cSjsg 			}
2717ccd5a2cSjsg 
2727ccd5a2cSjsg 			radeon_fence_wait(fence, false);
2737ccd5a2cSjsg 			radeon_fence_unref(&fence);
2747ccd5a2cSjsg 
2757ccd5a2cSjsg 			rdev->uvd.filp[i] = NULL;
2767ccd5a2cSjsg 			atomic_set(&rdev->uvd.handles[i], 0);
2777ccd5a2cSjsg 		}
2787ccd5a2cSjsg 	}
2797ccd5a2cSjsg 
2807ccd5a2cSjsg 	return 0;
2817ccd5a2cSjsg }
2827ccd5a2cSjsg 
radeon_uvd_resume(struct radeon_device * rdev)2837ccd5a2cSjsg int radeon_uvd_resume(struct radeon_device *rdev)
2847ccd5a2cSjsg {
2857ccd5a2cSjsg 	unsigned size;
2867ccd5a2cSjsg 	void *ptr;
2877ccd5a2cSjsg 
2887ccd5a2cSjsg 	if (rdev->uvd.vcpu_bo == NULL)
2897ccd5a2cSjsg 		return -EINVAL;
2907ccd5a2cSjsg 
291ad8b1aafSjsg 	memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
2927ccd5a2cSjsg 
2937ccd5a2cSjsg 	size = radeon_bo_size(rdev->uvd.vcpu_bo);
2947ccd5a2cSjsg 	size -= rdev->uvd_fw->size;
2957ccd5a2cSjsg 
2967ccd5a2cSjsg 	ptr = rdev->uvd.cpu_addr;
2977ccd5a2cSjsg 	ptr += rdev->uvd_fw->size;
2987ccd5a2cSjsg 
299ad8b1aafSjsg 	memset_io((void __iomem *)ptr, 0, size);
3007ccd5a2cSjsg 
3017ccd5a2cSjsg 	return 0;
3027ccd5a2cSjsg }
3037ccd5a2cSjsg 
radeon_uvd_force_into_uvd_segment(struct radeon_bo * rbo,uint32_t allowed_domains)3047ccd5a2cSjsg void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
3057ccd5a2cSjsg 				       uint32_t allowed_domains)
3067ccd5a2cSjsg {
3077ccd5a2cSjsg 	int i;
3087ccd5a2cSjsg 
3097ccd5a2cSjsg 	for (i = 0; i < rbo->placement.num_placement; ++i) {
3107ccd5a2cSjsg 		rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
3117ccd5a2cSjsg 		rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
3127ccd5a2cSjsg 	}
3137ccd5a2cSjsg 
3147ccd5a2cSjsg 	/* If it must be in VRAM it must be in the first segment as well */
3157ccd5a2cSjsg 	if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
3167ccd5a2cSjsg 		return;
3177ccd5a2cSjsg 
3187ccd5a2cSjsg 	/* abort if we already have more than one placement */
3197ccd5a2cSjsg 	if (rbo->placement.num_placement > 1)
3207ccd5a2cSjsg 		return;
3217ccd5a2cSjsg 
3227ccd5a2cSjsg 	/* add another 256MB segment */
3237ccd5a2cSjsg 	rbo->placements[1] = rbo->placements[0];
3247ccd5a2cSjsg 	rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
3257ccd5a2cSjsg 	rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
3267ccd5a2cSjsg 	rbo->placement.num_placement++;
3277ccd5a2cSjsg 	rbo->placement.num_busy_placement++;
3287ccd5a2cSjsg }
3297ccd5a2cSjsg 
radeon_uvd_free_handles(struct radeon_device * rdev,struct drm_file * filp)3307ccd5a2cSjsg void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
3317ccd5a2cSjsg {
3327ccd5a2cSjsg 	int i, r;
3337f4dd379Sjsg 	for (i = 0; i < rdev->uvd.max_handles; ++i) {
3347ccd5a2cSjsg 		uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
3357ccd5a2cSjsg 		if (handle != 0 && rdev->uvd.filp[i] == filp) {
3367ccd5a2cSjsg 			struct radeon_fence *fence;
3377ccd5a2cSjsg 
3387ccd5a2cSjsg 			radeon_uvd_note_usage(rdev);
3397ccd5a2cSjsg 
3407ccd5a2cSjsg 			r = radeon_uvd_get_destroy_msg(rdev,
3417ccd5a2cSjsg 				R600_RING_TYPE_UVD_INDEX, handle, &fence);
3427ccd5a2cSjsg 			if (r) {
3437ccd5a2cSjsg 				DRM_ERROR("Error destroying UVD (%d)!\n", r);
3447ccd5a2cSjsg 				continue;
3457ccd5a2cSjsg 			}
3467ccd5a2cSjsg 
3477ccd5a2cSjsg 			radeon_fence_wait(fence, false);
3487ccd5a2cSjsg 			radeon_fence_unref(&fence);
3497ccd5a2cSjsg 
3507ccd5a2cSjsg 			rdev->uvd.filp[i] = NULL;
3517ccd5a2cSjsg 			atomic_set(&rdev->uvd.handles[i], 0);
3527ccd5a2cSjsg 		}
3537ccd5a2cSjsg 	}
3547ccd5a2cSjsg }
3557ccd5a2cSjsg 
radeon_uvd_cs_msg_decode(uint32_t * msg,unsigned buf_sizes[])3567ccd5a2cSjsg static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
3577ccd5a2cSjsg {
3587ccd5a2cSjsg 	unsigned stream_type = msg[4];
3597ccd5a2cSjsg 	unsigned width = msg[6];
3607ccd5a2cSjsg 	unsigned height = msg[7];
3617ccd5a2cSjsg 	unsigned dpb_size = msg[9];
3627ccd5a2cSjsg 	unsigned pitch = msg[28];
3637ccd5a2cSjsg 
3647ccd5a2cSjsg 	unsigned width_in_mb = width / 16;
365*f005ef32Sjsg 	unsigned height_in_mb = ALIGN(height / 16, 2);
3667ccd5a2cSjsg 
3677ccd5a2cSjsg 	unsigned image_size, tmp, min_dpb_size;
3687ccd5a2cSjsg 
3697ccd5a2cSjsg 	image_size = width * height;
3707ccd5a2cSjsg 	image_size += image_size / 2;
371*f005ef32Sjsg 	image_size = ALIGN(image_size, 1024);
3727ccd5a2cSjsg 
3737ccd5a2cSjsg 	switch (stream_type) {
3747ccd5a2cSjsg 	case 0: /* H264 */
3757ccd5a2cSjsg 
3767ccd5a2cSjsg 		/* reference picture buffer */
3777ccd5a2cSjsg 		min_dpb_size = image_size * 17;
3787ccd5a2cSjsg 
3797ccd5a2cSjsg 		/* macroblock context buffer */
3807ccd5a2cSjsg 		min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
3817ccd5a2cSjsg 
3827ccd5a2cSjsg 		/* IT surface buffer */
3837ccd5a2cSjsg 		min_dpb_size += width_in_mb * height_in_mb * 32;
3847ccd5a2cSjsg 		break;
3857ccd5a2cSjsg 
3867ccd5a2cSjsg 	case 1: /* VC1 */
3877ccd5a2cSjsg 
3887ccd5a2cSjsg 		/* reference picture buffer */
3897ccd5a2cSjsg 		min_dpb_size = image_size * 3;
3907ccd5a2cSjsg 
3917ccd5a2cSjsg 		/* CONTEXT_BUFFER */
3927ccd5a2cSjsg 		min_dpb_size += width_in_mb * height_in_mb * 128;
3937ccd5a2cSjsg 
3947ccd5a2cSjsg 		/* IT surface buffer */
3957ccd5a2cSjsg 		min_dpb_size += width_in_mb * 64;
3967ccd5a2cSjsg 
3977ccd5a2cSjsg 		/* DB surface buffer */
3987ccd5a2cSjsg 		min_dpb_size += width_in_mb * 128;
3997ccd5a2cSjsg 
4007ccd5a2cSjsg 		/* BP */
4017ccd5a2cSjsg 		tmp = max(width_in_mb, height_in_mb);
402*f005ef32Sjsg 		min_dpb_size += ALIGN(tmp * 7 * 16, 64);
4037ccd5a2cSjsg 		break;
4047ccd5a2cSjsg 
4057ccd5a2cSjsg 	case 3: /* MPEG2 */
4067ccd5a2cSjsg 
4077ccd5a2cSjsg 		/* reference picture buffer */
4087ccd5a2cSjsg 		min_dpb_size = image_size * 3;
4097ccd5a2cSjsg 		break;
4107ccd5a2cSjsg 
4117ccd5a2cSjsg 	case 4: /* MPEG4 */
4127ccd5a2cSjsg 
4137ccd5a2cSjsg 		/* reference picture buffer */
4147ccd5a2cSjsg 		min_dpb_size = image_size * 3;
4157ccd5a2cSjsg 
4167ccd5a2cSjsg 		/* CM */
4177ccd5a2cSjsg 		min_dpb_size += width_in_mb * height_in_mb * 64;
4187ccd5a2cSjsg 
4197ccd5a2cSjsg 		/* IT surface buffer */
420*f005ef32Sjsg 		min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
4217ccd5a2cSjsg 		break;
4227ccd5a2cSjsg 
4237ccd5a2cSjsg 	default:
4247ccd5a2cSjsg 		DRM_ERROR("UVD codec not handled %d!\n", stream_type);
4257ccd5a2cSjsg 		return -EINVAL;
4267ccd5a2cSjsg 	}
4277ccd5a2cSjsg 
4287ccd5a2cSjsg 	if (width > pitch) {
4297ccd5a2cSjsg 		DRM_ERROR("Invalid UVD decoding target pitch!\n");
4307ccd5a2cSjsg 		return -EINVAL;
4317ccd5a2cSjsg 	}
4327ccd5a2cSjsg 
4337ccd5a2cSjsg 	if (dpb_size < min_dpb_size) {
4347ccd5a2cSjsg 		DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
4357ccd5a2cSjsg 			  dpb_size, min_dpb_size);
4367ccd5a2cSjsg 		return -EINVAL;
4377ccd5a2cSjsg 	}
4387ccd5a2cSjsg 
4397ccd5a2cSjsg 	buf_sizes[0x1] = dpb_size;
4407ccd5a2cSjsg 	buf_sizes[0x2] = image_size;
4417ccd5a2cSjsg 	return 0;
4427ccd5a2cSjsg }
4437ccd5a2cSjsg 
radeon_uvd_validate_codec(struct radeon_cs_parser * p,unsigned stream_type)4447ccd5a2cSjsg static int radeon_uvd_validate_codec(struct radeon_cs_parser *p,
4457ccd5a2cSjsg 				     unsigned stream_type)
4467ccd5a2cSjsg {
4477ccd5a2cSjsg 	switch (stream_type) {
4487ccd5a2cSjsg 	case 0: /* H264 */
4497ccd5a2cSjsg 	case 1: /* VC1 */
4507ccd5a2cSjsg 		/* always supported */
4517ccd5a2cSjsg 		return 0;
4527ccd5a2cSjsg 
4537ccd5a2cSjsg 	case 3: /* MPEG2 */
4547ccd5a2cSjsg 	case 4: /* MPEG4 */
4557ccd5a2cSjsg 		/* only since UVD 3 */
4567ccd5a2cSjsg 		if (p->rdev->family >= CHIP_PALM)
4577ccd5a2cSjsg 			return 0;
4587ccd5a2cSjsg 
459ad8b1aafSjsg 		fallthrough;
4607ccd5a2cSjsg 	default:
4617ccd5a2cSjsg 		DRM_ERROR("UVD codec not supported by hardware %d!\n",
4627ccd5a2cSjsg 			  stream_type);
4637ccd5a2cSjsg 		return -EINVAL;
4647ccd5a2cSjsg 	}
4657ccd5a2cSjsg }
4667ccd5a2cSjsg 
radeon_uvd_cs_msg(struct radeon_cs_parser * p,struct radeon_bo * bo,unsigned offset,unsigned buf_sizes[])4677ccd5a2cSjsg static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
4687ccd5a2cSjsg 			     unsigned offset, unsigned buf_sizes[])
4697ccd5a2cSjsg {
4707ccd5a2cSjsg 	int32_t *msg, msg_type, handle;
4717ccd5a2cSjsg 	unsigned img_size = 0;
4727ccd5a2cSjsg 	void *ptr;
4737ccd5a2cSjsg 	int i, r;
4747ccd5a2cSjsg 
4757ccd5a2cSjsg 	if (offset & 0x3F) {
4767ccd5a2cSjsg 		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
4777ccd5a2cSjsg 		return -EINVAL;
4787ccd5a2cSjsg 	}
4797ccd5a2cSjsg 
4807ccd5a2cSjsg 	r = radeon_bo_kmap(bo, &ptr);
4817ccd5a2cSjsg 	if (r) {
4827ccd5a2cSjsg 		DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
4837ccd5a2cSjsg 		return r;
4847ccd5a2cSjsg 	}
4857ccd5a2cSjsg 
4867ccd5a2cSjsg 	msg = ptr + offset;
4877ccd5a2cSjsg 
4887ccd5a2cSjsg 	msg_type = msg[1];
4897ccd5a2cSjsg 	handle = msg[2];
4907ccd5a2cSjsg 
4917ccd5a2cSjsg 	if (handle == 0) {
4921bb76ff1Sjsg 		radeon_bo_kunmap(bo);
4937ccd5a2cSjsg 		DRM_ERROR("Invalid UVD handle!\n");
4947ccd5a2cSjsg 		return -EINVAL;
4957ccd5a2cSjsg 	}
4967ccd5a2cSjsg 
4977ccd5a2cSjsg 	switch (msg_type) {
4987ccd5a2cSjsg 	case 0:
4997ccd5a2cSjsg 		/* it's a create msg, calc image size (width * height) */
5007ccd5a2cSjsg 		img_size = msg[7] * msg[8];
5017ccd5a2cSjsg 
5027ccd5a2cSjsg 		r = radeon_uvd_validate_codec(p, msg[4]);
5037ccd5a2cSjsg 		radeon_bo_kunmap(bo);
5047ccd5a2cSjsg 		if (r)
5057ccd5a2cSjsg 			return r;
5067ccd5a2cSjsg 
5077ccd5a2cSjsg 		/* try to alloc a new handle */
5087f4dd379Sjsg 		for (i = 0; i < p->rdev->uvd.max_handles; ++i) {
5097ccd5a2cSjsg 			if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
5107ccd5a2cSjsg 				DRM_ERROR("Handle 0x%x already in use!\n", handle);
5117ccd5a2cSjsg 				return -EINVAL;
5127ccd5a2cSjsg 			}
5137ccd5a2cSjsg 
5147ccd5a2cSjsg 			if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
5157ccd5a2cSjsg 				p->rdev->uvd.filp[i] = p->filp;
5167ccd5a2cSjsg 				p->rdev->uvd.img_size[i] = img_size;
5177ccd5a2cSjsg 				return 0;
5187ccd5a2cSjsg 			}
5197ccd5a2cSjsg 		}
5207ccd5a2cSjsg 
5217ccd5a2cSjsg 		DRM_ERROR("No more free UVD handles!\n");
5227ccd5a2cSjsg 		return -EINVAL;
5237ccd5a2cSjsg 
5247ccd5a2cSjsg 	case 1:
5257ccd5a2cSjsg 		/* it's a decode msg, validate codec and calc buffer sizes */
5267ccd5a2cSjsg 		r = radeon_uvd_validate_codec(p, msg[4]);
5277ccd5a2cSjsg 		if (!r)
5287ccd5a2cSjsg 			r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
5297ccd5a2cSjsg 		radeon_bo_kunmap(bo);
5307ccd5a2cSjsg 		if (r)
5317ccd5a2cSjsg 			return r;
5327ccd5a2cSjsg 
5337ccd5a2cSjsg 		/* validate the handle */
5347f4dd379Sjsg 		for (i = 0; i < p->rdev->uvd.max_handles; ++i) {
5357ccd5a2cSjsg 			if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
5367ccd5a2cSjsg 				if (p->rdev->uvd.filp[i] != p->filp) {
5377ccd5a2cSjsg 					DRM_ERROR("UVD handle collision detected!\n");
5387ccd5a2cSjsg 					return -EINVAL;
5397ccd5a2cSjsg 				}
5407ccd5a2cSjsg 				return 0;
5417ccd5a2cSjsg 			}
5427ccd5a2cSjsg 		}
5437ccd5a2cSjsg 
5447ccd5a2cSjsg 		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
5457ccd5a2cSjsg 		return -ENOENT;
5467ccd5a2cSjsg 
5477ccd5a2cSjsg 	case 2:
5487ccd5a2cSjsg 		/* it's a destroy msg, free the handle */
5497f4dd379Sjsg 		for (i = 0; i < p->rdev->uvd.max_handles; ++i)
5507ccd5a2cSjsg 			atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
5517ccd5a2cSjsg 		radeon_bo_kunmap(bo);
5527ccd5a2cSjsg 		return 0;
5537ccd5a2cSjsg 
5547ccd5a2cSjsg 	default:
5557ccd5a2cSjsg 		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
5567ccd5a2cSjsg 	}
5577ccd5a2cSjsg 
5581bb76ff1Sjsg 	radeon_bo_kunmap(bo);
5597ccd5a2cSjsg 	return -EINVAL;
5607ccd5a2cSjsg }
5617ccd5a2cSjsg 
radeon_uvd_cs_reloc(struct radeon_cs_parser * p,int data0,int data1,unsigned buf_sizes[],bool * has_msg_cmd)5627ccd5a2cSjsg static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
5637ccd5a2cSjsg 			       int data0, int data1,
5647ccd5a2cSjsg 			       unsigned buf_sizes[], bool *has_msg_cmd)
5657ccd5a2cSjsg {
5667ccd5a2cSjsg 	struct radeon_cs_chunk *relocs_chunk;
5677ccd5a2cSjsg 	struct radeon_bo_list *reloc;
5687ccd5a2cSjsg 	unsigned idx, cmd, offset;
5697ccd5a2cSjsg 	uint64_t start, end;
5707ccd5a2cSjsg 	int r;
5717ccd5a2cSjsg 
5727ccd5a2cSjsg 	relocs_chunk = p->chunk_relocs;
5737ccd5a2cSjsg 	offset = radeon_get_ib_value(p, data0);
5747ccd5a2cSjsg 	idx = radeon_get_ib_value(p, data1);
5757ccd5a2cSjsg 	if (idx >= relocs_chunk->length_dw) {
5767ccd5a2cSjsg 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
5777ccd5a2cSjsg 			  idx, relocs_chunk->length_dw);
5787ccd5a2cSjsg 		return -EINVAL;
5797ccd5a2cSjsg 	}
5807ccd5a2cSjsg 
5817ccd5a2cSjsg 	reloc = &p->relocs[(idx / 4)];
5827ccd5a2cSjsg 	start = reloc->gpu_offset;
5837ccd5a2cSjsg 	end = start + radeon_bo_size(reloc->robj);
5847ccd5a2cSjsg 	start += offset;
5857ccd5a2cSjsg 
5867ccd5a2cSjsg 	p->ib.ptr[data0] = start & 0xFFFFFFFF;
5877ccd5a2cSjsg 	p->ib.ptr[data1] = start >> 32;
5887ccd5a2cSjsg 
5897ccd5a2cSjsg 	cmd = radeon_get_ib_value(p, p->idx) >> 1;
5907ccd5a2cSjsg 
5917ccd5a2cSjsg 	if (cmd < 0x4) {
5927ccd5a2cSjsg 		if (end <= start) {
5937ccd5a2cSjsg 			DRM_ERROR("invalid reloc offset %X!\n", offset);
5947ccd5a2cSjsg 			return -EINVAL;
5957ccd5a2cSjsg 		}
5967ccd5a2cSjsg 		if ((end - start) < buf_sizes[cmd]) {
5977ccd5a2cSjsg 			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
5987ccd5a2cSjsg 				  (unsigned)(end - start), buf_sizes[cmd]);
5997ccd5a2cSjsg 			return -EINVAL;
6007ccd5a2cSjsg 		}
6017ccd5a2cSjsg 
6027ccd5a2cSjsg 	} else if (cmd != 0x100) {
6037ccd5a2cSjsg 		DRM_ERROR("invalid UVD command %X!\n", cmd);
6047ccd5a2cSjsg 		return -EINVAL;
6057ccd5a2cSjsg 	}
6067ccd5a2cSjsg 
6077ccd5a2cSjsg 	if ((start >> 28) != ((end - 1) >> 28)) {
6087ccd5a2cSjsg 		DRM_ERROR("reloc %llX-%llX crossing 256MB boundary!\n",
6097ccd5a2cSjsg 			  start, end);
6107ccd5a2cSjsg 		return -EINVAL;
6117ccd5a2cSjsg 	}
6127ccd5a2cSjsg 
6137ccd5a2cSjsg 	/* TODO: is this still necessary on NI+ ? */
6147ccd5a2cSjsg 	if ((cmd == 0 || cmd == 0x3) &&
6157ccd5a2cSjsg 	    (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
6167ccd5a2cSjsg 		DRM_ERROR("msg/fb buffer %llX-%llX out of 256MB segment!\n",
6177ccd5a2cSjsg 			  start, end);
6187ccd5a2cSjsg 		return -EINVAL;
6197ccd5a2cSjsg 	}
6207ccd5a2cSjsg 
6217ccd5a2cSjsg 	if (cmd == 0) {
6227ccd5a2cSjsg 		if (*has_msg_cmd) {
6237ccd5a2cSjsg 			DRM_ERROR("More than one message in a UVD-IB!\n");
6247ccd5a2cSjsg 			return -EINVAL;
6257ccd5a2cSjsg 		}
6267ccd5a2cSjsg 		*has_msg_cmd = true;
6277ccd5a2cSjsg 		r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
6287ccd5a2cSjsg 		if (r)
6297ccd5a2cSjsg 			return r;
6307ccd5a2cSjsg 	} else if (!*has_msg_cmd) {
6317ccd5a2cSjsg 		DRM_ERROR("Message needed before other commands are send!\n");
6327ccd5a2cSjsg 		return -EINVAL;
6337ccd5a2cSjsg 	}
6347ccd5a2cSjsg 
6357ccd5a2cSjsg 	return 0;
6367ccd5a2cSjsg }
6377ccd5a2cSjsg 
radeon_uvd_cs_reg(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,int * data0,int * data1,unsigned buf_sizes[],bool * has_msg_cmd)6387ccd5a2cSjsg static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
6397ccd5a2cSjsg 			     struct radeon_cs_packet *pkt,
6407ccd5a2cSjsg 			     int *data0, int *data1,
6417ccd5a2cSjsg 			     unsigned buf_sizes[],
6427ccd5a2cSjsg 			     bool *has_msg_cmd)
6437ccd5a2cSjsg {
6447ccd5a2cSjsg 	int i, r;
6457ccd5a2cSjsg 
6467ccd5a2cSjsg 	p->idx++;
6477ccd5a2cSjsg 	for (i = 0; i <= pkt->count; ++i) {
6487ccd5a2cSjsg 		switch (pkt->reg + i*4) {
6497ccd5a2cSjsg 		case UVD_GPCOM_VCPU_DATA0:
6507ccd5a2cSjsg 			*data0 = p->idx;
6517ccd5a2cSjsg 			break;
6527ccd5a2cSjsg 		case UVD_GPCOM_VCPU_DATA1:
6537ccd5a2cSjsg 			*data1 = p->idx;
6547ccd5a2cSjsg 			break;
6557ccd5a2cSjsg 		case UVD_GPCOM_VCPU_CMD:
6567ccd5a2cSjsg 			r = radeon_uvd_cs_reloc(p, *data0, *data1,
6577ccd5a2cSjsg 						buf_sizes, has_msg_cmd);
6587ccd5a2cSjsg 			if (r)
6597ccd5a2cSjsg 				return r;
6607ccd5a2cSjsg 			break;
6617ccd5a2cSjsg 		case UVD_ENGINE_CNTL:
6627f4dd379Sjsg 		case UVD_NO_OP:
6637ccd5a2cSjsg 			break;
6647ccd5a2cSjsg 		default:
6657ccd5a2cSjsg 			DRM_ERROR("Invalid reg 0x%X!\n",
6667ccd5a2cSjsg 				  pkt->reg + i*4);
6677ccd5a2cSjsg 			return -EINVAL;
6687ccd5a2cSjsg 		}
6697ccd5a2cSjsg 		p->idx++;
6707ccd5a2cSjsg 	}
6717ccd5a2cSjsg 	return 0;
6727ccd5a2cSjsg }
6737ccd5a2cSjsg 
radeon_uvd_cs_parse(struct radeon_cs_parser * p)6747ccd5a2cSjsg int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
6757ccd5a2cSjsg {
6767ccd5a2cSjsg 	struct radeon_cs_packet pkt;
6777ccd5a2cSjsg 	int r, data0 = 0, data1 = 0;
6787ccd5a2cSjsg 
6797ccd5a2cSjsg 	/* does the IB has a msg command */
6807ccd5a2cSjsg 	bool has_msg_cmd = false;
6817ccd5a2cSjsg 
6827ccd5a2cSjsg 	/* minimum buffer sizes */
6837ccd5a2cSjsg 	unsigned buf_sizes[] = {
6847ccd5a2cSjsg 		[0x00000000]	=	2048,
6857ccd5a2cSjsg 		[0x00000001]	=	32 * 1024 * 1024,
6867ccd5a2cSjsg 		[0x00000002]	=	2048 * 1152 * 3,
6877ccd5a2cSjsg 		[0x00000003]	=	2048,
6887ccd5a2cSjsg 	};
6897ccd5a2cSjsg 
6907ccd5a2cSjsg 	if (p->chunk_ib->length_dw % 16) {
6917ccd5a2cSjsg 		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
6927ccd5a2cSjsg 			  p->chunk_ib->length_dw);
6937ccd5a2cSjsg 		return -EINVAL;
6947ccd5a2cSjsg 	}
6957ccd5a2cSjsg 
6967ccd5a2cSjsg 	if (p->chunk_relocs == NULL) {
6977ccd5a2cSjsg 		DRM_ERROR("No relocation chunk !\n");
6987ccd5a2cSjsg 		return -EINVAL;
6997ccd5a2cSjsg 	}
7007ccd5a2cSjsg 
7017ccd5a2cSjsg 
7027ccd5a2cSjsg 	do {
7037ccd5a2cSjsg 		r = radeon_cs_packet_parse(p, &pkt, p->idx);
7047ccd5a2cSjsg 		if (r)
7057ccd5a2cSjsg 			return r;
7067ccd5a2cSjsg 		switch (pkt.type) {
7077ccd5a2cSjsg 		case RADEON_PACKET_TYPE0:
7087ccd5a2cSjsg 			r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1,
7097ccd5a2cSjsg 					      buf_sizes, &has_msg_cmd);
7107ccd5a2cSjsg 			if (r)
7117ccd5a2cSjsg 				return r;
7127ccd5a2cSjsg 			break;
7137ccd5a2cSjsg 		case RADEON_PACKET_TYPE2:
7147ccd5a2cSjsg 			p->idx += pkt.count + 2;
7157ccd5a2cSjsg 			break;
7167ccd5a2cSjsg 		default:
7177ccd5a2cSjsg 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
7187ccd5a2cSjsg 			return -EINVAL;
7197ccd5a2cSjsg 		}
7207ccd5a2cSjsg 	} while (p->idx < p->chunk_ib->length_dw);
7217ccd5a2cSjsg 
7227ccd5a2cSjsg 	if (!has_msg_cmd) {
7237ccd5a2cSjsg 		DRM_ERROR("UVD-IBs need a msg command!\n");
7247ccd5a2cSjsg 		return -EINVAL;
7257ccd5a2cSjsg 	}
7267ccd5a2cSjsg 
7277ccd5a2cSjsg 	return 0;
7287ccd5a2cSjsg }
7297ccd5a2cSjsg 
radeon_uvd_send_msg(struct radeon_device * rdev,int ring,uint64_t addr,struct radeon_fence ** fence)7307ccd5a2cSjsg static int radeon_uvd_send_msg(struct radeon_device *rdev,
7317ccd5a2cSjsg 			       int ring, uint64_t addr,
7327ccd5a2cSjsg 			       struct radeon_fence **fence)
7337ccd5a2cSjsg {
7347ccd5a2cSjsg 	struct radeon_ib ib;
7357ccd5a2cSjsg 	int i, r;
7367ccd5a2cSjsg 
7377ccd5a2cSjsg 	r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
7387ccd5a2cSjsg 	if (r)
7397ccd5a2cSjsg 		return r;
7407ccd5a2cSjsg 
7417ccd5a2cSjsg 	ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
7427ccd5a2cSjsg 	ib.ptr[1] = addr;
7437ccd5a2cSjsg 	ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
7447ccd5a2cSjsg 	ib.ptr[3] = addr >> 32;
7457ccd5a2cSjsg 	ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
7467ccd5a2cSjsg 	ib.ptr[5] = 0;
7477f4dd379Sjsg 	for (i = 6; i < 16; i += 2) {
7487f4dd379Sjsg 		ib.ptr[i] = PACKET0(UVD_NO_OP, 0);
7497f4dd379Sjsg 		ib.ptr[i+1] = 0;
7507f4dd379Sjsg 	}
7517ccd5a2cSjsg 	ib.length_dw = 16;
7527ccd5a2cSjsg 
7537ccd5a2cSjsg 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
7547ccd5a2cSjsg 
7557ccd5a2cSjsg 	if (fence)
7567ccd5a2cSjsg 		*fence = radeon_fence_ref(ib.fence);
7577ccd5a2cSjsg 
7587ccd5a2cSjsg 	radeon_ib_free(rdev, &ib);
7597ccd5a2cSjsg 	return r;
7607ccd5a2cSjsg }
7617ccd5a2cSjsg 
7627f4dd379Sjsg /*
7637f4dd379Sjsg  * multiple fence commands without any stream commands in between can
7647f4dd379Sjsg  * crash the vcpu so just try to emmit a dummy create/destroy msg to
7657f4dd379Sjsg  * avoid this
7667f4dd379Sjsg  */
radeon_uvd_get_create_msg(struct radeon_device * rdev,int ring,uint32_t handle,struct radeon_fence ** fence)7677ccd5a2cSjsg int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
7687ccd5a2cSjsg 			      uint32_t handle, struct radeon_fence **fence)
7697ccd5a2cSjsg {
7707ccd5a2cSjsg 	/* we use the last page of the vcpu bo for the UVD message */
7717ccd5a2cSjsg 	uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
7727ccd5a2cSjsg 		RADEON_GPU_PAGE_SIZE;
7737ccd5a2cSjsg 
7745ca02815Sjsg 	uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs);
7757ccd5a2cSjsg 	uint64_t addr = rdev->uvd.gpu_addr + offs;
7767ccd5a2cSjsg 
7777ccd5a2cSjsg 	int r, i;
7787ccd5a2cSjsg 
7797ccd5a2cSjsg 	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
7807ccd5a2cSjsg 	if (r)
7817ccd5a2cSjsg 		return r;
7827ccd5a2cSjsg 
7837ccd5a2cSjsg 	/* stitch together an UVD create msg */
7845ca02815Sjsg 	writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]);
7855ca02815Sjsg 	writel(0x0, (void __iomem *)&msg[1]);
7865ca02815Sjsg 	writel((__force u32)cpu_to_le32(handle), &msg[2]);
7875ca02815Sjsg 	writel(0x0, &msg[3]);
7885ca02815Sjsg 	writel(0x0, &msg[4]);
7895ca02815Sjsg 	writel(0x0, &msg[5]);
7905ca02815Sjsg 	writel(0x0, &msg[6]);
7915ca02815Sjsg 	writel((__force u32)cpu_to_le32(0x00000780), &msg[7]);
7925ca02815Sjsg 	writel((__force u32)cpu_to_le32(0x00000440), &msg[8]);
7935ca02815Sjsg 	writel(0x0, &msg[9]);
7945ca02815Sjsg 	writel((__force u32)cpu_to_le32(0x01b37000), &msg[10]);
7957ccd5a2cSjsg 	for (i = 11; i < 1024; ++i)
7965ca02815Sjsg 		writel(0x0, &msg[i]);
7977ccd5a2cSjsg 
7987ccd5a2cSjsg 	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
7997ccd5a2cSjsg 	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
8007ccd5a2cSjsg 	return r;
8017ccd5a2cSjsg }
8027ccd5a2cSjsg 
radeon_uvd_get_destroy_msg(struct radeon_device * rdev,int ring,uint32_t handle,struct radeon_fence ** fence)8037ccd5a2cSjsg int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
8047ccd5a2cSjsg 			       uint32_t handle, struct radeon_fence **fence)
8057ccd5a2cSjsg {
8067ccd5a2cSjsg 	/* we use the last page of the vcpu bo for the UVD message */
8077ccd5a2cSjsg 	uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
8087ccd5a2cSjsg 		RADEON_GPU_PAGE_SIZE;
8097ccd5a2cSjsg 
8105ca02815Sjsg 	uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs);
8117ccd5a2cSjsg 	uint64_t addr = rdev->uvd.gpu_addr + offs;
8127ccd5a2cSjsg 
8137ccd5a2cSjsg 	int r, i;
8147ccd5a2cSjsg 
8157ccd5a2cSjsg 	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
8167ccd5a2cSjsg 	if (r)
8177ccd5a2cSjsg 		return r;
8187ccd5a2cSjsg 
8197ccd5a2cSjsg 	/* stitch together an UVD destroy msg */
8205ca02815Sjsg 	writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]);
8215ca02815Sjsg 	writel((__force u32)cpu_to_le32(0x00000002), &msg[1]);
8225ca02815Sjsg 	writel((__force u32)cpu_to_le32(handle), &msg[2]);
8235ca02815Sjsg 	writel(0x0, &msg[3]);
8247ccd5a2cSjsg 	for (i = 4; i < 1024; ++i)
8255ca02815Sjsg 		writel(0x0, &msg[i]);
8267ccd5a2cSjsg 
8277ccd5a2cSjsg 	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
8287ccd5a2cSjsg 	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
8297ccd5a2cSjsg 	return r;
8307ccd5a2cSjsg }
8317ccd5a2cSjsg 
8327ccd5a2cSjsg /**
8337ccd5a2cSjsg  * radeon_uvd_count_handles - count number of open streams
8347ccd5a2cSjsg  *
8357ccd5a2cSjsg  * @rdev: radeon_device pointer
8367ccd5a2cSjsg  * @sd: number of SD streams
8377ccd5a2cSjsg  * @hd: number of HD streams
8387ccd5a2cSjsg  *
8397ccd5a2cSjsg  * Count the number of open SD/HD streams as a hint for power mangement
8407ccd5a2cSjsg  */
radeon_uvd_count_handles(struct radeon_device * rdev,unsigned * sd,unsigned * hd)8417ccd5a2cSjsg static void radeon_uvd_count_handles(struct radeon_device *rdev,
8427ccd5a2cSjsg 				     unsigned *sd, unsigned *hd)
8437ccd5a2cSjsg {
8447ccd5a2cSjsg 	unsigned i;
8457ccd5a2cSjsg 
8467ccd5a2cSjsg 	*sd = 0;
8477ccd5a2cSjsg 	*hd = 0;
8487ccd5a2cSjsg 
8497f4dd379Sjsg 	for (i = 0; i < rdev->uvd.max_handles; ++i) {
8507ccd5a2cSjsg 		if (!atomic_read(&rdev->uvd.handles[i]))
8517ccd5a2cSjsg 			continue;
8527ccd5a2cSjsg 
8537ccd5a2cSjsg 		if (rdev->uvd.img_size[i] >= 720*576)
8547ccd5a2cSjsg 			++(*hd);
8557ccd5a2cSjsg 		else
8567ccd5a2cSjsg 			++(*sd);
8577ccd5a2cSjsg 	}
8587ccd5a2cSjsg }
8597ccd5a2cSjsg 
radeon_uvd_idle_work_handler(struct work_struct * work)8607ccd5a2cSjsg static void radeon_uvd_idle_work_handler(struct work_struct *work)
8617ccd5a2cSjsg {
8627ccd5a2cSjsg 	struct radeon_device *rdev =
8637ccd5a2cSjsg 		container_of(work, struct radeon_device, uvd.idle_work.work);
8647ccd5a2cSjsg 
8657ccd5a2cSjsg 	if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) {
8667ccd5a2cSjsg 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
8677ccd5a2cSjsg 			radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd,
8687ccd5a2cSjsg 						 &rdev->pm.dpm.hd);
8697ccd5a2cSjsg 			radeon_dpm_enable_uvd(rdev, false);
8707ccd5a2cSjsg 		} else {
8717ccd5a2cSjsg 			radeon_set_uvd_clocks(rdev, 0, 0);
8727ccd5a2cSjsg 		}
8737ccd5a2cSjsg 	} else {
8747ccd5a2cSjsg 		schedule_delayed_work(&rdev->uvd.idle_work,
8757ccd5a2cSjsg 				      msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
8767ccd5a2cSjsg 	}
8777ccd5a2cSjsg }
8787ccd5a2cSjsg 
radeon_uvd_note_usage(struct radeon_device * rdev)8797ccd5a2cSjsg void radeon_uvd_note_usage(struct radeon_device *rdev)
8807ccd5a2cSjsg {
8817ccd5a2cSjsg 	bool streams_changed = false;
8827ccd5a2cSjsg 	bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
8837ccd5a2cSjsg 	set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
8847ccd5a2cSjsg 					    msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
8857ccd5a2cSjsg 
8867ccd5a2cSjsg 	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
8877ccd5a2cSjsg 		unsigned hd = 0, sd = 0;
8887ccd5a2cSjsg 		radeon_uvd_count_handles(rdev, &sd, &hd);
8897ccd5a2cSjsg 		if ((rdev->pm.dpm.sd != sd) ||
8907ccd5a2cSjsg 		    (rdev->pm.dpm.hd != hd)) {
8917ccd5a2cSjsg 			rdev->pm.dpm.sd = sd;
8927ccd5a2cSjsg 			rdev->pm.dpm.hd = hd;
8937ccd5a2cSjsg 			/* disable this for now */
8947ccd5a2cSjsg 			/*streams_changed = true;*/
8957ccd5a2cSjsg 		}
8967ccd5a2cSjsg 	}
8977ccd5a2cSjsg 
8987ccd5a2cSjsg 	if (set_clocks || streams_changed) {
8997ccd5a2cSjsg 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
9007ccd5a2cSjsg 			radeon_dpm_enable_uvd(rdev, true);
9017ccd5a2cSjsg 		} else {
9027ccd5a2cSjsg 			radeon_set_uvd_clocks(rdev, 53300, 40000);
9037ccd5a2cSjsg 		}
9047ccd5a2cSjsg 	}
9057ccd5a2cSjsg }
9067ccd5a2cSjsg 
radeon_uvd_calc_upll_post_div(unsigned vco_freq,unsigned target_freq,unsigned pd_min,unsigned pd_even)9077ccd5a2cSjsg static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
9087ccd5a2cSjsg 					      unsigned target_freq,
9097ccd5a2cSjsg 					      unsigned pd_min,
9107ccd5a2cSjsg 					      unsigned pd_even)
9117ccd5a2cSjsg {
9127ccd5a2cSjsg 	unsigned post_div = vco_freq / target_freq;
9137ccd5a2cSjsg 
9147ccd5a2cSjsg 	/* adjust to post divider minimum value */
9157ccd5a2cSjsg 	if (post_div < pd_min)
9167ccd5a2cSjsg 		post_div = pd_min;
9177ccd5a2cSjsg 
9187ccd5a2cSjsg 	/* we alway need a frequency less than or equal the target */
9197ccd5a2cSjsg 	if ((vco_freq / post_div) > target_freq)
9207ccd5a2cSjsg 		post_div += 1;
9217ccd5a2cSjsg 
9227ccd5a2cSjsg 	/* post dividers above a certain value must be even */
9237ccd5a2cSjsg 	if (post_div > pd_even && post_div % 2)
9247ccd5a2cSjsg 		post_div += 1;
9257ccd5a2cSjsg 
9267ccd5a2cSjsg 	return post_div;
9277ccd5a2cSjsg }
9287ccd5a2cSjsg 
9297ccd5a2cSjsg /**
9307ccd5a2cSjsg  * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
9317ccd5a2cSjsg  *
9327ccd5a2cSjsg  * @rdev: radeon_device pointer
9337ccd5a2cSjsg  * @vclk: wanted VCLK
9347ccd5a2cSjsg  * @dclk: wanted DCLK
9357ccd5a2cSjsg  * @vco_min: minimum VCO frequency
9367ccd5a2cSjsg  * @vco_max: maximum VCO frequency
9377ccd5a2cSjsg  * @fb_factor: factor to multiply vco freq with
9387ccd5a2cSjsg  * @fb_mask: limit and bitmask for feedback divider
9397ccd5a2cSjsg  * @pd_min: post divider minimum
9407ccd5a2cSjsg  * @pd_max: post divider maximum
9417ccd5a2cSjsg  * @pd_even: post divider must be even above this value
9427ccd5a2cSjsg  * @optimal_fb_div: resulting feedback divider
9437ccd5a2cSjsg  * @optimal_vclk_div: resulting vclk post divider
9447ccd5a2cSjsg  * @optimal_dclk_div: resulting dclk post divider
9457ccd5a2cSjsg  *
9467ccd5a2cSjsg  * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
9477ccd5a2cSjsg  * Returns zero on success -EINVAL on error.
9487ccd5a2cSjsg  */
radeon_uvd_calc_upll_dividers(struct radeon_device * rdev,unsigned vclk,unsigned dclk,unsigned vco_min,unsigned vco_max,unsigned fb_factor,unsigned fb_mask,unsigned pd_min,unsigned pd_max,unsigned pd_even,unsigned * optimal_fb_div,unsigned * optimal_vclk_div,unsigned * optimal_dclk_div)9497ccd5a2cSjsg int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
9507ccd5a2cSjsg 				  unsigned vclk, unsigned dclk,
9517ccd5a2cSjsg 				  unsigned vco_min, unsigned vco_max,
9527ccd5a2cSjsg 				  unsigned fb_factor, unsigned fb_mask,
9537ccd5a2cSjsg 				  unsigned pd_min, unsigned pd_max,
9547ccd5a2cSjsg 				  unsigned pd_even,
9557ccd5a2cSjsg 				  unsigned *optimal_fb_div,
9567ccd5a2cSjsg 				  unsigned *optimal_vclk_div,
9577ccd5a2cSjsg 				  unsigned *optimal_dclk_div)
9587ccd5a2cSjsg {
9597ccd5a2cSjsg 	unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
9607ccd5a2cSjsg 
9617ccd5a2cSjsg 	/* start off with something large */
9627ccd5a2cSjsg 	unsigned optimal_score = ~0;
9637ccd5a2cSjsg 
9647ccd5a2cSjsg 	/* loop through vco from low to high */
9657ccd5a2cSjsg 	vco_min = max(max(vco_min, vclk), dclk);
9667ccd5a2cSjsg 	for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
9677ccd5a2cSjsg 
9687ccd5a2cSjsg 		uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
9697ccd5a2cSjsg 		unsigned vclk_div, dclk_div, score;
9707ccd5a2cSjsg 
9717ccd5a2cSjsg 		do_div(fb_div, ref_freq);
9727ccd5a2cSjsg 
9737ccd5a2cSjsg 		/* fb div out of range ? */
9747ccd5a2cSjsg 		if (fb_div > fb_mask)
9757ccd5a2cSjsg 			break; /* it can oly get worse */
9767ccd5a2cSjsg 
9777ccd5a2cSjsg 		fb_div &= fb_mask;
9787ccd5a2cSjsg 
9797ccd5a2cSjsg 		/* calc vclk divider with current vco freq */
9807ccd5a2cSjsg 		vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
9817ccd5a2cSjsg 							 pd_min, pd_even);
9827ccd5a2cSjsg 		if (vclk_div > pd_max)
9837ccd5a2cSjsg 			break; /* vco is too big, it has to stop */
9847ccd5a2cSjsg 
9857ccd5a2cSjsg 		/* calc dclk divider with current vco freq */
9867ccd5a2cSjsg 		dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
9877ccd5a2cSjsg 							 pd_min, pd_even);
9887ccd5a2cSjsg 		if (dclk_div > pd_max)
9897ccd5a2cSjsg 			break; /* vco is too big, it has to stop */
9907ccd5a2cSjsg 
9917ccd5a2cSjsg 		/* calc score with current vco freq */
9927ccd5a2cSjsg 		score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
9937ccd5a2cSjsg 
9947ccd5a2cSjsg 		/* determine if this vco setting is better than current optimal settings */
9957ccd5a2cSjsg 		if (score < optimal_score) {
9967ccd5a2cSjsg 			*optimal_fb_div = fb_div;
9977ccd5a2cSjsg 			*optimal_vclk_div = vclk_div;
9987ccd5a2cSjsg 			*optimal_dclk_div = dclk_div;
9997ccd5a2cSjsg 			optimal_score = score;
10007ccd5a2cSjsg 			if (optimal_score == 0)
10017ccd5a2cSjsg 				break; /* it can't get better than this */
10027ccd5a2cSjsg 		}
10037ccd5a2cSjsg 	}
10047ccd5a2cSjsg 
10057ccd5a2cSjsg 	/* did we found a valid setup ? */
10067ccd5a2cSjsg 	if (optimal_score == ~0)
10077ccd5a2cSjsg 		return -EINVAL;
10087ccd5a2cSjsg 
10097ccd5a2cSjsg 	return 0;
10107ccd5a2cSjsg }
10117ccd5a2cSjsg 
radeon_uvd_send_upll_ctlreq(struct radeon_device * rdev,unsigned cg_upll_func_cntl)10127ccd5a2cSjsg int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
10137ccd5a2cSjsg 				unsigned cg_upll_func_cntl)
10147ccd5a2cSjsg {
10157ccd5a2cSjsg 	unsigned i;
10167ccd5a2cSjsg 
10177ccd5a2cSjsg 	/* make sure UPLL_CTLREQ is deasserted */
10187ccd5a2cSjsg 	WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
10197ccd5a2cSjsg 
10207ccd5a2cSjsg 	mdelay(10);
10217ccd5a2cSjsg 
10227ccd5a2cSjsg 	/* assert UPLL_CTLREQ */
10237ccd5a2cSjsg 	WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
10247ccd5a2cSjsg 
10257ccd5a2cSjsg 	/* wait for CTLACK and CTLACK2 to get asserted */
10267ccd5a2cSjsg 	for (i = 0; i < 100; ++i) {
10277ccd5a2cSjsg 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
10287ccd5a2cSjsg 		if ((RREG32(cg_upll_func_cntl) & mask) == mask)
10297ccd5a2cSjsg 			break;
10307ccd5a2cSjsg 		mdelay(10);
10317ccd5a2cSjsg 	}
10327ccd5a2cSjsg 
10337ccd5a2cSjsg 	/* deassert UPLL_CTLREQ */
10347ccd5a2cSjsg 	WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
10357ccd5a2cSjsg 
10367ccd5a2cSjsg 	if (i == 100) {
10377ccd5a2cSjsg 		DRM_ERROR("Timeout setting UVD clocks!\n");
10387ccd5a2cSjsg 		return -ETIMEDOUT;
10397ccd5a2cSjsg 	}
10407ccd5a2cSjsg 
10417ccd5a2cSjsg 	return 0;
10427ccd5a2cSjsg }
1043