xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_uvd.c (revision 2b73d18af7a98bc9907041875c671f63165f1d3e)
1*2b73d18aSriastradh /*	$NetBSD: amdgpu_uvd.c,v 1.9 2021/12/19 12:21:29 riastradh Exp $	*/
2efa246c0Sriastradh 
3efa246c0Sriastradh /*
4efa246c0Sriastradh  * Copyright 2011 Advanced Micro Devices, Inc.
5efa246c0Sriastradh  * All Rights Reserved.
6efa246c0Sriastradh  *
7efa246c0Sriastradh  * Permission is hereby granted, free of charge, to any person obtaining a
8efa246c0Sriastradh  * copy of this software and associated documentation files (the
9efa246c0Sriastradh  * "Software"), to deal in the Software without restriction, including
10efa246c0Sriastradh  * without limitation the rights to use, copy, modify, merge, publish,
11efa246c0Sriastradh  * distribute, sub license, and/or sell copies of the Software, and to
12efa246c0Sriastradh  * permit persons to whom the Software is furnished to do so, subject to
13efa246c0Sriastradh  * the following conditions:
14efa246c0Sriastradh  *
15efa246c0Sriastradh  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16efa246c0Sriastradh  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17efa246c0Sriastradh  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18efa246c0Sriastradh  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19efa246c0Sriastradh  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20efa246c0Sriastradh  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21efa246c0Sriastradh  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22efa246c0Sriastradh  *
23efa246c0Sriastradh  * The above copyright notice and this permission notice (including the
24efa246c0Sriastradh  * next paragraph) shall be included in all copies or substantial portions
25efa246c0Sriastradh  * of the Software.
26efa246c0Sriastradh  *
27efa246c0Sriastradh  */
28efa246c0Sriastradh /*
29efa246c0Sriastradh  * Authors:
30efa246c0Sriastradh  *    Christian König <deathsimple@vodafone.de>
31efa246c0Sriastradh  */
32efa246c0Sriastradh 
33efa246c0Sriastradh #include <sys/cdefs.h>
34*2b73d18aSriastradh __KERNEL_RCSID(0, "$NetBSD: amdgpu_uvd.c,v 1.9 2021/12/19 12:21:29 riastradh Exp $");
35efa246c0Sriastradh 
36efa246c0Sriastradh #include <linux/firmware.h>
37efa246c0Sriastradh #include <linux/module.h>
3841ec0267Sriastradh 
39efa246c0Sriastradh #include <drm/drm.h>
40efa246c0Sriastradh 
41efa246c0Sriastradh #include "amdgpu.h"
42efa246c0Sriastradh #include "amdgpu_pm.h"
43efa246c0Sriastradh #include "amdgpu_uvd.h"
44efa246c0Sriastradh #include "cikd.h"
45efa246c0Sriastradh #include "uvd/uvd_4_2_d.h"
46efa246c0Sriastradh 
4741ec0267Sriastradh #include "amdgpu_ras.h"
481b46a69aSriastradh #include <linux/nbsd-namespace.h>
491b46a69aSriastradh 
50efa246c0Sriastradh /* 1 second timeout */
5141ec0267Sriastradh #define UVD_IDLE_TIMEOUT	msecs_to_jiffies(1000)
5241ec0267Sriastradh 
5341ec0267Sriastradh /* Firmware versions for VI */
5441ec0267Sriastradh #define FW_1_65_10	((1 << 24) | (65 << 16) | (10 << 8))
5541ec0267Sriastradh #define FW_1_87_11	((1 << 24) | (87 << 16) | (11 << 8))
5641ec0267Sriastradh #define FW_1_87_12	((1 << 24) | (87 << 16) | (12 << 8))
5741ec0267Sriastradh #define FW_1_37_15	((1 << 24) | (37 << 16) | (15 << 8))
5841ec0267Sriastradh 
5941ec0267Sriastradh /* Polaris10/11 firmware version */
6041ec0267Sriastradh #define FW_1_66_16	((1 << 24) | (66 << 16) | (16 << 8))
61efa246c0Sriastradh 
62efa246c0Sriastradh /* Firmware Names */
63efa246c0Sriastradh #ifdef CONFIG_DRM_AMDGPU_CIK
6441ec0267Sriastradh #define FIRMWARE_BONAIRE	"amdgpu/bonaire_uvd.bin"
6541ec0267Sriastradh #define FIRMWARE_KABINI	"amdgpu/kabini_uvd.bin"
6641ec0267Sriastradh #define FIRMWARE_KAVERI	"amdgpu/kaveri_uvd.bin"
6741ec0267Sriastradh #define FIRMWARE_HAWAII	"amdgpu/hawaii_uvd.bin"
6841ec0267Sriastradh #define FIRMWARE_MULLINS	"amdgpu/mullins_uvd.bin"
69efa246c0Sriastradh #endif
70efa246c0Sriastradh #define FIRMWARE_TONGA		"amdgpu/tonga_uvd.bin"
71efa246c0Sriastradh #define FIRMWARE_CARRIZO	"amdgpu/carrizo_uvd.bin"
72efa246c0Sriastradh #define FIRMWARE_FIJI		"amdgpu/fiji_uvd.bin"
73efa246c0Sriastradh #define FIRMWARE_STONEY		"amdgpu/stoney_uvd.bin"
7441ec0267Sriastradh #define FIRMWARE_POLARIS10	"amdgpu/polaris10_uvd.bin"
7541ec0267Sriastradh #define FIRMWARE_POLARIS11	"amdgpu/polaris11_uvd.bin"
7641ec0267Sriastradh #define FIRMWARE_POLARIS12	"amdgpu/polaris12_uvd.bin"
7741ec0267Sriastradh #define FIRMWARE_VEGAM		"amdgpu/vegam_uvd.bin"
7841ec0267Sriastradh 
7941ec0267Sriastradh #define FIRMWARE_VEGA10		"amdgpu/vega10_uvd.bin"
8041ec0267Sriastradh #define FIRMWARE_VEGA12		"amdgpu/vega12_uvd.bin"
8141ec0267Sriastradh #define FIRMWARE_VEGA20		"amdgpu/vega20_uvd.bin"
8241ec0267Sriastradh 
8341ec0267Sriastradh /* These are common relative offsets for all asics, from uvd_7_0_offset.h,  */
8441ec0267Sriastradh #define UVD_GPCOM_VCPU_CMD		0x03c3
8541ec0267Sriastradh #define UVD_GPCOM_VCPU_DATA0	0x03c4
8641ec0267Sriastradh #define UVD_GPCOM_VCPU_DATA1	0x03c5
8741ec0267Sriastradh #define UVD_NO_OP				0x03ff
8841ec0267Sriastradh #define UVD_BASE_SI				0x3800
89efa246c0Sriastradh 
90efa246c0Sriastradh /**
91efa246c0Sriastradh  * amdgpu_uvd_cs_ctx - Command submission parser context
92efa246c0Sriastradh  *
93efa246c0Sriastradh  * Used for emulating virtual memory support on UVD 4.2.
94efa246c0Sriastradh  */
95efa246c0Sriastradh struct amdgpu_uvd_cs_ctx {
96efa246c0Sriastradh 	struct amdgpu_cs_parser *parser;
97efa246c0Sriastradh 	unsigned reg, count;
98efa246c0Sriastradh 	unsigned data0, data1;
99efa246c0Sriastradh 	unsigned idx;
100efa246c0Sriastradh 	unsigned ib_idx;
101efa246c0Sriastradh 
102efa246c0Sriastradh 	/* does the IB has a msg command */
103efa246c0Sriastradh 	bool has_msg_cmd;
104efa246c0Sriastradh 
105efa246c0Sriastradh 	/* minimum buffer sizes */
106efa246c0Sriastradh 	unsigned *buf_sizes;
107efa246c0Sriastradh };
108efa246c0Sriastradh 
109efa246c0Sriastradh #ifdef CONFIG_DRM_AMDGPU_CIK
110efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_BONAIRE);
111efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_KABINI);
112efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_KAVERI);
113efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_HAWAII);
114efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_MULLINS);
115efa246c0Sriastradh #endif
116efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_TONGA);
117efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_CARRIZO);
118efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_FIJI);
119efa246c0Sriastradh MODULE_FIRMWARE(FIRMWARE_STONEY);
12041ec0267Sriastradh MODULE_FIRMWARE(FIRMWARE_POLARIS10);
12141ec0267Sriastradh MODULE_FIRMWARE(FIRMWARE_POLARIS11);
12241ec0267Sriastradh MODULE_FIRMWARE(FIRMWARE_POLARIS12);
12341ec0267Sriastradh MODULE_FIRMWARE(FIRMWARE_VEGAM);
124efa246c0Sriastradh 
12541ec0267Sriastradh MODULE_FIRMWARE(FIRMWARE_VEGA10);
12641ec0267Sriastradh MODULE_FIRMWARE(FIRMWARE_VEGA12);
12741ec0267Sriastradh MODULE_FIRMWARE(FIRMWARE_VEGA20);
12841ec0267Sriastradh 
129efa246c0Sriastradh static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
130efa246c0Sriastradh 
amdgpu_uvd_sw_init(struct amdgpu_device * adev)131efa246c0Sriastradh int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
132efa246c0Sriastradh {
133efa246c0Sriastradh 	unsigned long bo_size;
134efa246c0Sriastradh 	const char *fw_name;
135efa246c0Sriastradh 	const struct common_firmware_header *hdr;
13641ec0267Sriastradh 	unsigned family_id;
13741ec0267Sriastradh 	int i, j, r;
138efa246c0Sriastradh 
139efa246c0Sriastradh 	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
140efa246c0Sriastradh 
141efa246c0Sriastradh 	switch (adev->asic_type) {
142efa246c0Sriastradh #ifdef CONFIG_DRM_AMDGPU_CIK
143efa246c0Sriastradh 	case CHIP_BONAIRE:
144efa246c0Sriastradh 		fw_name = FIRMWARE_BONAIRE;
145efa246c0Sriastradh 		break;
146efa246c0Sriastradh 	case CHIP_KABINI:
147efa246c0Sriastradh 		fw_name = FIRMWARE_KABINI;
148efa246c0Sriastradh 		break;
149efa246c0Sriastradh 	case CHIP_KAVERI:
150efa246c0Sriastradh 		fw_name = FIRMWARE_KAVERI;
151efa246c0Sriastradh 		break;
152efa246c0Sriastradh 	case CHIP_HAWAII:
153efa246c0Sriastradh 		fw_name = FIRMWARE_HAWAII;
154efa246c0Sriastradh 		break;
155efa246c0Sriastradh 	case CHIP_MULLINS:
156efa246c0Sriastradh 		fw_name = FIRMWARE_MULLINS;
157efa246c0Sriastradh 		break;
158efa246c0Sriastradh #endif
159efa246c0Sriastradh 	case CHIP_TONGA:
160efa246c0Sriastradh 		fw_name = FIRMWARE_TONGA;
161efa246c0Sriastradh 		break;
162efa246c0Sriastradh 	case CHIP_FIJI:
163efa246c0Sriastradh 		fw_name = FIRMWARE_FIJI;
164efa246c0Sriastradh 		break;
165efa246c0Sriastradh 	case CHIP_CARRIZO:
166efa246c0Sriastradh 		fw_name = FIRMWARE_CARRIZO;
167efa246c0Sriastradh 		break;
168efa246c0Sriastradh 	case CHIP_STONEY:
169efa246c0Sriastradh 		fw_name = FIRMWARE_STONEY;
170efa246c0Sriastradh 		break;
17141ec0267Sriastradh 	case CHIP_POLARIS10:
17241ec0267Sriastradh 		fw_name = FIRMWARE_POLARIS10;
17341ec0267Sriastradh 		break;
17441ec0267Sriastradh 	case CHIP_POLARIS11:
17541ec0267Sriastradh 		fw_name = FIRMWARE_POLARIS11;
17641ec0267Sriastradh 		break;
17741ec0267Sriastradh 	case CHIP_POLARIS12:
17841ec0267Sriastradh 		fw_name = FIRMWARE_POLARIS12;
17941ec0267Sriastradh 		break;
18041ec0267Sriastradh 	case CHIP_VEGA10:
18141ec0267Sriastradh 		fw_name = FIRMWARE_VEGA10;
18241ec0267Sriastradh 		break;
18341ec0267Sriastradh 	case CHIP_VEGA12:
18441ec0267Sriastradh 		fw_name = FIRMWARE_VEGA12;
18541ec0267Sriastradh 		break;
18641ec0267Sriastradh 	case CHIP_VEGAM:
18741ec0267Sriastradh 		fw_name = FIRMWARE_VEGAM;
18841ec0267Sriastradh 		break;
18941ec0267Sriastradh 	case CHIP_VEGA20:
19041ec0267Sriastradh 		fw_name = FIRMWARE_VEGA20;
19141ec0267Sriastradh 		break;
192efa246c0Sriastradh 	default:
193efa246c0Sriastradh 		return -EINVAL;
194efa246c0Sriastradh 	}
195efa246c0Sriastradh 
196efa246c0Sriastradh 	r = request_firmware(&adev->uvd.fw, fw_name, adev->dev);
197efa246c0Sriastradh 	if (r) {
198efa246c0Sriastradh 		dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n",
199efa246c0Sriastradh 			fw_name);
200efa246c0Sriastradh 		return r;
201efa246c0Sriastradh 	}
202efa246c0Sriastradh 
203efa246c0Sriastradh 	r = amdgpu_ucode_validate(adev->uvd.fw);
204efa246c0Sriastradh 	if (r) {
205efa246c0Sriastradh 		dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
206efa246c0Sriastradh 			fw_name);
207efa246c0Sriastradh 		release_firmware(adev->uvd.fw);
208efa246c0Sriastradh 		adev->uvd.fw = NULL;
209efa246c0Sriastradh 		return r;
210efa246c0Sriastradh 	}
211efa246c0Sriastradh 
21241ec0267Sriastradh 	/* Set the default UVD handles that the firmware can handle */
21341ec0267Sriastradh 	adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES;
21441ec0267Sriastradh 
215efa246c0Sriastradh 	hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
216efa246c0Sriastradh 	family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
21741ec0267Sriastradh 
21841ec0267Sriastradh 	if (adev->asic_type < CHIP_VEGA20) {
21941ec0267Sriastradh 		unsigned version_major, version_minor;
22041ec0267Sriastradh 
221efa246c0Sriastradh 		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
222efa246c0Sriastradh 		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
22341ec0267Sriastradh 		DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
224efa246c0Sriastradh 			version_major, version_minor, family_id);
225efa246c0Sriastradh 
22641ec0267Sriastradh 		/*
22741ec0267Sriastradh 		 * Limit the number of UVD handles depending on microcode major
22841ec0267Sriastradh 		 * and minor versions. The firmware version which has 40 UVD
22941ec0267Sriastradh 		 * instances support is 1.80. So all subsequent versions should
23041ec0267Sriastradh 		 * also have the same support.
23141ec0267Sriastradh 		 */
23241ec0267Sriastradh 		if ((version_major > 0x01) ||
23341ec0267Sriastradh 		    ((version_major == 0x01) && (version_minor >= 0x50)))
23441ec0267Sriastradh 			adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
23541ec0267Sriastradh 
236efa246c0Sriastradh 		adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
237efa246c0Sriastradh 					(family_id << 8));
238efa246c0Sriastradh 
23941ec0267Sriastradh 		if ((adev->asic_type == CHIP_POLARIS10 ||
24041ec0267Sriastradh 		     adev->asic_type == CHIP_POLARIS11) &&
24141ec0267Sriastradh 		    (adev->uvd.fw_version < FW_1_66_16))
24241ec0267Sriastradh 			DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
24341ec0267Sriastradh 				  version_major, version_minor);
24441ec0267Sriastradh 	} else {
24541ec0267Sriastradh 		unsigned int enc_major, enc_minor, dec_minor;
24641ec0267Sriastradh 
24741ec0267Sriastradh 		dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
24841ec0267Sriastradh 		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
24941ec0267Sriastradh 		enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
25041ec0267Sriastradh 		DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n",
25141ec0267Sriastradh 			enc_major, enc_minor, dec_minor, family_id);
25241ec0267Sriastradh 
25341ec0267Sriastradh 		adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
25441ec0267Sriastradh 
25541ec0267Sriastradh 		adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
25641ec0267Sriastradh 	}
25741ec0267Sriastradh 
25841ec0267Sriastradh 	bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
25941ec0267Sriastradh 		  +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
26041ec0267Sriastradh 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
26141ec0267Sriastradh 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
26241ec0267Sriastradh 
26341ec0267Sriastradh 	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
26441ec0267Sriastradh 		if (adev->uvd.harvest_config & (1 << j))
26541ec0267Sriastradh 			continue;
26641ec0267Sriastradh 		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
26741ec0267Sriastradh 					    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
26841ec0267Sriastradh 					    &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
269efa246c0Sriastradh 		if (r) {
270efa246c0Sriastradh 			dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
271efa246c0Sriastradh 			return r;
272efa246c0Sriastradh 		}
273efa246c0Sriastradh 	}
274efa246c0Sriastradh 
27541ec0267Sriastradh 	for (i = 0; i < adev->uvd.max_handles; ++i) {
276efa246c0Sriastradh 		atomic_set(&adev->uvd.handles[i], 0);
277efa246c0Sriastradh 		adev->uvd.filp[i] = NULL;
278efa246c0Sriastradh 	}
279efa246c0Sriastradh 
280efa246c0Sriastradh 	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
28141ec0267Sriastradh 	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
282efa246c0Sriastradh 		adev->uvd.address_64_bit = true;
283efa246c0Sriastradh 
28441ec0267Sriastradh 	switch (adev->asic_type) {
28541ec0267Sriastradh 	case CHIP_TONGA:
28641ec0267Sriastradh 		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
28741ec0267Sriastradh 		break;
28841ec0267Sriastradh 	case CHIP_CARRIZO:
28941ec0267Sriastradh 		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
29041ec0267Sriastradh 		break;
29141ec0267Sriastradh 	case CHIP_FIJI:
29241ec0267Sriastradh 		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
29341ec0267Sriastradh 		break;
29441ec0267Sriastradh 	case CHIP_STONEY:
29541ec0267Sriastradh 		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
29641ec0267Sriastradh 		break;
29741ec0267Sriastradh 	default:
29841ec0267Sriastradh 		adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
29941ec0267Sriastradh 	}
30041ec0267Sriastradh 
301efa246c0Sriastradh 	return 0;
302efa246c0Sriastradh }
303efa246c0Sriastradh 
amdgpu_uvd_sw_fini(struct amdgpu_device * adev)304efa246c0Sriastradh int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
305efa246c0Sriastradh {
30641ec0267Sriastradh 	int i, j;
307efa246c0Sriastradh 
30841ec0267Sriastradh 	cancel_delayed_work_sync(&adev->uvd.idle_work);
30941ec0267Sriastradh 	drm_sched_entity_destroy(&adev->uvd.entity);
31041ec0267Sriastradh 
31141ec0267Sriastradh 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
31241ec0267Sriastradh 		if (adev->uvd.harvest_config & (1 << j))
31341ec0267Sriastradh 			continue;
31441ec0267Sriastradh 		kvfree(adev->uvd.inst[j].saved_bo);
31541ec0267Sriastradh 
31641ec0267Sriastradh 		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
31741ec0267Sriastradh 				      &adev->uvd.inst[j].gpu_addr,
31841ec0267Sriastradh 				      (void **)&adev->uvd.inst[j].cpu_addr);
31941ec0267Sriastradh 
32041ec0267Sriastradh 		amdgpu_ring_fini(&adev->uvd.inst[j].ring);
32141ec0267Sriastradh 
32241ec0267Sriastradh 		for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
32341ec0267Sriastradh 			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
32441ec0267Sriastradh 	}
32541ec0267Sriastradh 	release_firmware(adev->uvd.fw);
32641ec0267Sriastradh 
327efa246c0Sriastradh 	return 0;
328efa246c0Sriastradh }
329efa246c0Sriastradh 
33041ec0267Sriastradh /**
33141ec0267Sriastradh  * amdgpu_uvd_entity_init - init entity
33241ec0267Sriastradh  *
33341ec0267Sriastradh  * @adev: amdgpu_device pointer
33441ec0267Sriastradh  *
33541ec0267Sriastradh  */
amdgpu_uvd_entity_init(struct amdgpu_device * adev)33641ec0267Sriastradh int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
33741ec0267Sriastradh {
33841ec0267Sriastradh 	struct amdgpu_ring *ring;
33941ec0267Sriastradh 	struct drm_gpu_scheduler *sched;
34041ec0267Sriastradh 	int r;
341efa246c0Sriastradh 
34241ec0267Sriastradh 	ring = &adev->uvd.inst[0].ring;
34341ec0267Sriastradh 	sched = &ring->sched;
34441ec0267Sriastradh 	r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
34541ec0267Sriastradh 				  &sched, 1, NULL);
34641ec0267Sriastradh 	if (r) {
34741ec0267Sriastradh 		DRM_ERROR("Failed setting up UVD kernel entity.\n");
34841ec0267Sriastradh 		return r;
34941ec0267Sriastradh 	}
350efa246c0Sriastradh 
351efa246c0Sriastradh 	return 0;
352efa246c0Sriastradh }
353efa246c0Sriastradh 
amdgpu_uvd_suspend(struct amdgpu_device * adev)354efa246c0Sriastradh int amdgpu_uvd_suspend(struct amdgpu_device *adev)
355efa246c0Sriastradh {
35641ec0267Sriastradh 	unsigned size;
35741ec0267Sriastradh 	void *ptr;
35841ec0267Sriastradh 	int i, j;
35941ec0267Sriastradh 	bool in_ras_intr = amdgpu_ras_intr_triggered();
360efa246c0Sriastradh 
36141ec0267Sriastradh 	cancel_delayed_work_sync(&adev->uvd.idle_work);
36241ec0267Sriastradh 
36341ec0267Sriastradh 	/* only valid for physical mode */
36441ec0267Sriastradh 	if (adev->asic_type < CHIP_POLARIS10) {
36541ec0267Sriastradh 		for (i = 0; i < adev->uvd.max_handles; ++i)
36641ec0267Sriastradh 			if (atomic_read(&adev->uvd.handles[i]))
36741ec0267Sriastradh 				break;
36841ec0267Sriastradh 
36941ec0267Sriastradh 		if (i == adev->uvd.max_handles)
370efa246c0Sriastradh 			return 0;
37141ec0267Sriastradh 	}
372efa246c0Sriastradh 
37341ec0267Sriastradh 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
37441ec0267Sriastradh 		if (adev->uvd.harvest_config & (1 << j))
375efa246c0Sriastradh 			continue;
37641ec0267Sriastradh 		if (adev->uvd.inst[j].vcpu_bo == NULL)
37741ec0267Sriastradh 			continue;
37841ec0267Sriastradh 
37941ec0267Sriastradh 		size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
38041ec0267Sriastradh 		ptr = adev->uvd.inst[j].cpu_addr;
38141ec0267Sriastradh 
38241ec0267Sriastradh 		adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);
38341ec0267Sriastradh 		if (!adev->uvd.inst[j].saved_bo)
38441ec0267Sriastradh 			return -ENOMEM;
38541ec0267Sriastradh 
38641ec0267Sriastradh 		/* re-write 0 since err_event_athub will corrupt VCPU buffer */
38741ec0267Sriastradh 		if (in_ras_intr)
38841ec0267Sriastradh 			memset(adev->uvd.inst[j].saved_bo, 0, size);
38941ec0267Sriastradh 		else
39041ec0267Sriastradh 			memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
391efa246c0Sriastradh 	}
392efa246c0Sriastradh 
39341ec0267Sriastradh 	if (in_ras_intr)
39441ec0267Sriastradh 		DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
395efa246c0Sriastradh 
396efa246c0Sriastradh 	return 0;
397efa246c0Sriastradh }
398efa246c0Sriastradh 
amdgpu_uvd_resume(struct amdgpu_device * adev)399efa246c0Sriastradh int amdgpu_uvd_resume(struct amdgpu_device *adev)
400efa246c0Sriastradh {
401efa246c0Sriastradh 	unsigned size;
402efa246c0Sriastradh 	void *ptr;
40341ec0267Sriastradh 	int i;
40441ec0267Sriastradh 
40541ec0267Sriastradh 	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
40641ec0267Sriastradh 		if (adev->uvd.harvest_config & (1 << i))
40741ec0267Sriastradh 			continue;
40841ec0267Sriastradh 		if (adev->uvd.inst[i].vcpu_bo == NULL)
40941ec0267Sriastradh 			return -EINVAL;
41041ec0267Sriastradh 
41141ec0267Sriastradh 		size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
41241ec0267Sriastradh 		ptr = adev->uvd.inst[i].cpu_addr;
41341ec0267Sriastradh 
41441ec0267Sriastradh 		if (adev->uvd.inst[i].saved_bo != NULL) {
41541ec0267Sriastradh 			memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
41641ec0267Sriastradh 			kvfree(adev->uvd.inst[i].saved_bo);
41741ec0267Sriastradh 			adev->uvd.inst[i].saved_bo = NULL;
41841ec0267Sriastradh 		} else {
419efa246c0Sriastradh 			const struct common_firmware_header *hdr;
420efa246c0Sriastradh 			unsigned offset;
421efa246c0Sriastradh 
422efa246c0Sriastradh 			hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
42341ec0267Sriastradh 			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
424efa246c0Sriastradh 				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
42541ec0267Sriastradh 				memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
42641ec0267Sriastradh 					    le32_to_cpu(hdr->ucode_size_bytes));
427efa246c0Sriastradh 				size -= le32_to_cpu(hdr->ucode_size_bytes);
428e66da3d6Sriastradh 				ptr += le32_to_cpu(hdr->ucode_size_bytes);
42941ec0267Sriastradh 			}
43041ec0267Sriastradh 			memset_io(ptr, 0, size);
43141ec0267Sriastradh 			/* to restore uvd fence seq */
43241ec0267Sriastradh 			amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
43341ec0267Sriastradh 		}
43441ec0267Sriastradh 	}
435efa246c0Sriastradh 	return 0;
436efa246c0Sriastradh }
437efa246c0Sriastradh 
amdgpu_uvd_free_handles(struct amdgpu_device * adev,struct drm_file * filp)438efa246c0Sriastradh void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
439efa246c0Sriastradh {
44041ec0267Sriastradh 	struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
441efa246c0Sriastradh 	int i, r;
442efa246c0Sriastradh 
44341ec0267Sriastradh 	for (i = 0; i < adev->uvd.max_handles; ++i) {
444efa246c0Sriastradh 		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
44541ec0267Sriastradh 
446efa246c0Sriastradh 		if (handle != 0 && adev->uvd.filp[i] == filp) {
44741ec0267Sriastradh 			struct dma_fence *fence;
448efa246c0Sriastradh 
44941ec0267Sriastradh 			r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
45041ec0267Sriastradh 						       &fence);
451efa246c0Sriastradh 			if (r) {
45241ec0267Sriastradh 				DRM_ERROR("Error destroying UVD %d!\n", r);
453efa246c0Sriastradh 				continue;
454efa246c0Sriastradh 			}
455efa246c0Sriastradh 
45641ec0267Sriastradh 			dma_fence_wait(fence, false);
45741ec0267Sriastradh 			dma_fence_put(fence);
458efa246c0Sriastradh 
459efa246c0Sriastradh 			adev->uvd.filp[i] = NULL;
460efa246c0Sriastradh 			atomic_set(&adev->uvd.handles[i], 0);
461efa246c0Sriastradh 		}
462efa246c0Sriastradh 	}
463efa246c0Sriastradh }
464efa246c0Sriastradh 
amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo * abo)46541ec0267Sriastradh static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
466efa246c0Sriastradh {
467efa246c0Sriastradh 	int i;
46841ec0267Sriastradh 	for (i = 0; i < abo->placement.num_placement; ++i) {
46941ec0267Sriastradh 		abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
47041ec0267Sriastradh 		abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
471efa246c0Sriastradh 	}
472efa246c0Sriastradh }
473efa246c0Sriastradh 
amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx * ctx)47441ec0267Sriastradh static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
47541ec0267Sriastradh {
47641ec0267Sriastradh 	uint32_t lo, hi;
47741ec0267Sriastradh 	uint64_t addr;
47841ec0267Sriastradh 
47941ec0267Sriastradh 	lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
48041ec0267Sriastradh 	hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
48141ec0267Sriastradh 	addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
48241ec0267Sriastradh 
48341ec0267Sriastradh 	return addr;
48441ec0267Sriastradh }
48541ec0267Sriastradh 
486efa246c0Sriastradh /**
487efa246c0Sriastradh  * amdgpu_uvd_cs_pass1 - first parsing round
488efa246c0Sriastradh  *
489efa246c0Sriastradh  * @ctx: UVD parser context
490efa246c0Sriastradh  *
491efa246c0Sriastradh  * Make sure UVD message and feedback buffers are in VRAM and
492efa246c0Sriastradh  * nobody is violating an 256MB boundary.
493efa246c0Sriastradh  */
amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx * ctx)494efa246c0Sriastradh static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
495efa246c0Sriastradh {
49641ec0267Sriastradh 	struct ttm_operation_ctx tctx = { false, false };
497efa246c0Sriastradh 	struct amdgpu_bo_va_mapping *mapping;
498efa246c0Sriastradh 	struct amdgpu_bo *bo;
49941ec0267Sriastradh 	uint32_t cmd;
50041ec0267Sriastradh 	uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
501efa246c0Sriastradh 	int r = 0;
502efa246c0Sriastradh 
50341ec0267Sriastradh 	r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
50441ec0267Sriastradh 	if (r) {
5050d50c49dSriastradh 		DRM_ERROR("Can't find BO for addr 0x%08"PRIx64"\n", addr);
50641ec0267Sriastradh 		return r;
507efa246c0Sriastradh 	}
508efa246c0Sriastradh 
509efa246c0Sriastradh 	if (!ctx->parser->adev->uvd.address_64_bit) {
510efa246c0Sriastradh 		/* check if it's a message or feedback command */
511efa246c0Sriastradh 		cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
512efa246c0Sriastradh 		if (cmd == 0x0 || cmd == 0x3) {
513efa246c0Sriastradh 			/* yes, force it into VRAM */
514efa246c0Sriastradh 			uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
51541ec0267Sriastradh 			amdgpu_bo_placement_from_domain(bo, domain);
516efa246c0Sriastradh 		}
517efa246c0Sriastradh 		amdgpu_uvd_force_into_uvd_segment(bo);
518efa246c0Sriastradh 
51941ec0267Sriastradh 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx);
520efa246c0Sriastradh 	}
521efa246c0Sriastradh 
522efa246c0Sriastradh 	return r;
523efa246c0Sriastradh }
524efa246c0Sriastradh 
525efa246c0Sriastradh /**
526efa246c0Sriastradh  * amdgpu_uvd_cs_msg_decode - handle UVD decode message
527efa246c0Sriastradh  *
528efa246c0Sriastradh  * @msg: pointer to message structure
529efa246c0Sriastradh  * @buf_sizes: returned buffer sizes
530efa246c0Sriastradh  *
531efa246c0Sriastradh  * Peek into the decode message and calculate the necessary buffer sizes.
532efa246c0Sriastradh  */
amdgpu_uvd_cs_msg_decode(struct amdgpu_device * adev,uint32_t * msg,unsigned buf_sizes[])53341ec0267Sriastradh static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
53441ec0267Sriastradh 	unsigned buf_sizes[])
535efa246c0Sriastradh {
536efa246c0Sriastradh 	unsigned stream_type = msg[4];
537efa246c0Sriastradh 	unsigned width = msg[6];
538efa246c0Sriastradh 	unsigned height = msg[7];
539efa246c0Sriastradh 	unsigned dpb_size = msg[9];
540efa246c0Sriastradh 	unsigned pitch = msg[28];
541efa246c0Sriastradh 	unsigned level = msg[57];
542efa246c0Sriastradh 
543efa246c0Sriastradh 	unsigned width_in_mb = width / 16;
544efa246c0Sriastradh 	unsigned height_in_mb = ALIGN(height / 16, 2);
545efa246c0Sriastradh 	unsigned fs_in_mb = width_in_mb * height_in_mb;
546efa246c0Sriastradh 
547efa246c0Sriastradh 	unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
54841ec0267Sriastradh 	unsigned min_ctx_size = ~0;
549efa246c0Sriastradh 
550efa246c0Sriastradh 	image_size = width * height;
551efa246c0Sriastradh 	image_size += image_size / 2;
552efa246c0Sriastradh 	image_size = ALIGN(image_size, 1024);
553efa246c0Sriastradh 
554efa246c0Sriastradh 	switch (stream_type) {
555efa246c0Sriastradh 	case 0: /* H264 */
556efa246c0Sriastradh 		switch(level) {
557efa246c0Sriastradh 		case 30:
558efa246c0Sriastradh 			num_dpb_buffer = 8100 / fs_in_mb;
559efa246c0Sriastradh 			break;
560efa246c0Sriastradh 		case 31:
561efa246c0Sriastradh 			num_dpb_buffer = 18000 / fs_in_mb;
562efa246c0Sriastradh 			break;
563efa246c0Sriastradh 		case 32:
564efa246c0Sriastradh 			num_dpb_buffer = 20480 / fs_in_mb;
565efa246c0Sriastradh 			break;
566efa246c0Sriastradh 		case 41:
567efa246c0Sriastradh 			num_dpb_buffer = 32768 / fs_in_mb;
568efa246c0Sriastradh 			break;
569efa246c0Sriastradh 		case 42:
570efa246c0Sriastradh 			num_dpb_buffer = 34816 / fs_in_mb;
571efa246c0Sriastradh 			break;
572efa246c0Sriastradh 		case 50:
573efa246c0Sriastradh 			num_dpb_buffer = 110400 / fs_in_mb;
574efa246c0Sriastradh 			break;
575efa246c0Sriastradh 		case 51:
576efa246c0Sriastradh 			num_dpb_buffer = 184320 / fs_in_mb;
577efa246c0Sriastradh 			break;
578efa246c0Sriastradh 		default:
579efa246c0Sriastradh 			num_dpb_buffer = 184320 / fs_in_mb;
580efa246c0Sriastradh 			break;
581efa246c0Sriastradh 		}
582efa246c0Sriastradh 		num_dpb_buffer++;
583efa246c0Sriastradh 		if (num_dpb_buffer > 17)
584efa246c0Sriastradh 			num_dpb_buffer = 17;
585efa246c0Sriastradh 
586efa246c0Sriastradh 		/* reference picture buffer */
587efa246c0Sriastradh 		min_dpb_size = image_size * num_dpb_buffer;
588efa246c0Sriastradh 
589efa246c0Sriastradh 		/* macroblock context buffer */
590efa246c0Sriastradh 		min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192;
591efa246c0Sriastradh 
592efa246c0Sriastradh 		/* IT surface buffer */
593efa246c0Sriastradh 		min_dpb_size += width_in_mb * height_in_mb * 32;
594efa246c0Sriastradh 		break;
595efa246c0Sriastradh 
596efa246c0Sriastradh 	case 1: /* VC1 */
597efa246c0Sriastradh 
598efa246c0Sriastradh 		/* reference picture buffer */
599efa246c0Sriastradh 		min_dpb_size = image_size * 3;
600efa246c0Sriastradh 
601efa246c0Sriastradh 		/* CONTEXT_BUFFER */
602efa246c0Sriastradh 		min_dpb_size += width_in_mb * height_in_mb * 128;
603efa246c0Sriastradh 
604efa246c0Sriastradh 		/* IT surface buffer */
605efa246c0Sriastradh 		min_dpb_size += width_in_mb * 64;
606efa246c0Sriastradh 
607efa246c0Sriastradh 		/* DB surface buffer */
608efa246c0Sriastradh 		min_dpb_size += width_in_mb * 128;
609efa246c0Sriastradh 
610efa246c0Sriastradh 		/* BP */
611efa246c0Sriastradh 		tmp = max(width_in_mb, height_in_mb);
612efa246c0Sriastradh 		min_dpb_size += ALIGN(tmp * 7 * 16, 64);
613efa246c0Sriastradh 		break;
614efa246c0Sriastradh 
615efa246c0Sriastradh 	case 3: /* MPEG2 */
616efa246c0Sriastradh 
617efa246c0Sriastradh 		/* reference picture buffer */
618efa246c0Sriastradh 		min_dpb_size = image_size * 3;
619efa246c0Sriastradh 		break;
620efa246c0Sriastradh 
621efa246c0Sriastradh 	case 4: /* MPEG4 */
622efa246c0Sriastradh 
623efa246c0Sriastradh 		/* reference picture buffer */
624efa246c0Sriastradh 		min_dpb_size = image_size * 3;
625efa246c0Sriastradh 
626efa246c0Sriastradh 		/* CM */
627efa246c0Sriastradh 		min_dpb_size += width_in_mb * height_in_mb * 64;
628efa246c0Sriastradh 
629efa246c0Sriastradh 		/* IT surface buffer */
630efa246c0Sriastradh 		min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
631efa246c0Sriastradh 		break;
632efa246c0Sriastradh 
63341ec0267Sriastradh 	case 7: /* H264 Perf */
63441ec0267Sriastradh 		switch(level) {
63541ec0267Sriastradh 		case 30:
63641ec0267Sriastradh 			num_dpb_buffer = 8100 / fs_in_mb;
63741ec0267Sriastradh 			break;
63841ec0267Sriastradh 		case 31:
63941ec0267Sriastradh 			num_dpb_buffer = 18000 / fs_in_mb;
64041ec0267Sriastradh 			break;
64141ec0267Sriastradh 		case 32:
64241ec0267Sriastradh 			num_dpb_buffer = 20480 / fs_in_mb;
64341ec0267Sriastradh 			break;
64441ec0267Sriastradh 		case 41:
64541ec0267Sriastradh 			num_dpb_buffer = 32768 / fs_in_mb;
64641ec0267Sriastradh 			break;
64741ec0267Sriastradh 		case 42:
64841ec0267Sriastradh 			num_dpb_buffer = 34816 / fs_in_mb;
64941ec0267Sriastradh 			break;
65041ec0267Sriastradh 		case 50:
65141ec0267Sriastradh 			num_dpb_buffer = 110400 / fs_in_mb;
65241ec0267Sriastradh 			break;
65341ec0267Sriastradh 		case 51:
65441ec0267Sriastradh 			num_dpb_buffer = 184320 / fs_in_mb;
65541ec0267Sriastradh 			break;
65641ec0267Sriastradh 		default:
65741ec0267Sriastradh 			num_dpb_buffer = 184320 / fs_in_mb;
65841ec0267Sriastradh 			break;
65941ec0267Sriastradh 		}
66041ec0267Sriastradh 		num_dpb_buffer++;
66141ec0267Sriastradh 		if (num_dpb_buffer > 17)
66241ec0267Sriastradh 			num_dpb_buffer = 17;
66341ec0267Sriastradh 
66441ec0267Sriastradh 		/* reference picture buffer */
66541ec0267Sriastradh 		min_dpb_size = image_size * num_dpb_buffer;
66641ec0267Sriastradh 
66741ec0267Sriastradh 		if (!adev->uvd.use_ctx_buf){
66841ec0267Sriastradh 			/* macroblock context buffer */
66941ec0267Sriastradh 			min_dpb_size +=
67041ec0267Sriastradh 				width_in_mb * height_in_mb * num_dpb_buffer * 192;
67141ec0267Sriastradh 
67241ec0267Sriastradh 			/* IT surface buffer */
67341ec0267Sriastradh 			min_dpb_size += width_in_mb * height_in_mb * 32;
67441ec0267Sriastradh 		} else {
67541ec0267Sriastradh 			/* macroblock context buffer */
67641ec0267Sriastradh 			min_ctx_size =
67741ec0267Sriastradh 				width_in_mb * height_in_mb * num_dpb_buffer * 192;
67841ec0267Sriastradh 		}
67941ec0267Sriastradh 		break;
68041ec0267Sriastradh 
68141ec0267Sriastradh 	case 8: /* MJPEG */
68241ec0267Sriastradh 		min_dpb_size = 0;
68341ec0267Sriastradh 		break;
68441ec0267Sriastradh 
685efa246c0Sriastradh 	case 16: /* H265 */
686efa246c0Sriastradh 		image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
687efa246c0Sriastradh 		image_size = ALIGN(image_size, 256);
688efa246c0Sriastradh 
689efa246c0Sriastradh 		num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2;
690efa246c0Sriastradh 		min_dpb_size = image_size * num_dpb_buffer;
691efa246c0Sriastradh 		min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16)
692efa246c0Sriastradh 					   * 16 * num_dpb_buffer + 52 * 1024;
693efa246c0Sriastradh 		break;
694efa246c0Sriastradh 
695efa246c0Sriastradh 	default:
696efa246c0Sriastradh 		DRM_ERROR("UVD codec not handled %d!\n", stream_type);
697efa246c0Sriastradh 		return -EINVAL;
698efa246c0Sriastradh 	}
699efa246c0Sriastradh 
700efa246c0Sriastradh 	if (width > pitch) {
701efa246c0Sriastradh 		DRM_ERROR("Invalid UVD decoding target pitch!\n");
702efa246c0Sriastradh 		return -EINVAL;
703efa246c0Sriastradh 	}
704efa246c0Sriastradh 
705efa246c0Sriastradh 	if (dpb_size < min_dpb_size) {
706efa246c0Sriastradh 		DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
707efa246c0Sriastradh 			  dpb_size, min_dpb_size);
708efa246c0Sriastradh 		return -EINVAL;
709efa246c0Sriastradh 	}
710efa246c0Sriastradh 
711efa246c0Sriastradh 	buf_sizes[0x1] = dpb_size;
712efa246c0Sriastradh 	buf_sizes[0x2] = image_size;
713efa246c0Sriastradh 	buf_sizes[0x4] = min_ctx_size;
71441ec0267Sriastradh 	/* store image width to adjust nb memory pstate */
71541ec0267Sriastradh 	adev->uvd.decode_image_width = width;
716efa246c0Sriastradh 	return 0;
717efa246c0Sriastradh }
718efa246c0Sriastradh 
719efa246c0Sriastradh /**
720efa246c0Sriastradh  * amdgpu_uvd_cs_msg - handle UVD message
721efa246c0Sriastradh  *
722efa246c0Sriastradh  * @ctx: UVD parser context
723efa246c0Sriastradh  * @bo: buffer object containing the message
724efa246c0Sriastradh  * @offset: offset into the buffer object
725efa246c0Sriastradh  *
726efa246c0Sriastradh  * Peek into the UVD message and extract the session id.
727efa246c0Sriastradh  * Make sure that we don't open up to many sessions.
728efa246c0Sriastradh  */
amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx * ctx,struct amdgpu_bo * bo,unsigned offset)729efa246c0Sriastradh static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
730efa246c0Sriastradh 			     struct amdgpu_bo *bo, unsigned offset)
731efa246c0Sriastradh {
732efa246c0Sriastradh 	struct amdgpu_device *adev = ctx->parser->adev;
733efa246c0Sriastradh 	int32_t *msg, msg_type, handle;
734efa246c0Sriastradh 	void *ptr;
735efa246c0Sriastradh 	long r;
736efa246c0Sriastradh 	int i;
737efa246c0Sriastradh 
738efa246c0Sriastradh 	if (offset & 0x3F) {
739efa246c0Sriastradh 		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
740efa246c0Sriastradh 		return -EINVAL;
741efa246c0Sriastradh 	}
742efa246c0Sriastradh 
743efa246c0Sriastradh 	r = amdgpu_bo_kmap(bo, &ptr);
744efa246c0Sriastradh 	if (r) {
74541ec0267Sriastradh 		DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
746efa246c0Sriastradh 		return r;
747efa246c0Sriastradh 	}
748efa246c0Sriastradh 
749e66da3d6Sriastradh 	msg = ptr + offset;
750efa246c0Sriastradh 
751efa246c0Sriastradh 	msg_type = msg[1];
752efa246c0Sriastradh 	handle = msg[2];
753efa246c0Sriastradh 
754efa246c0Sriastradh 	if (handle == 0) {
755efa246c0Sriastradh 		DRM_ERROR("Invalid UVD handle!\n");
756efa246c0Sriastradh 		return -EINVAL;
757efa246c0Sriastradh 	}
758efa246c0Sriastradh 
759efa246c0Sriastradh 	switch (msg_type) {
760efa246c0Sriastradh 	case 0:
761efa246c0Sriastradh 		/* it's a create msg, calc image size (width * height) */
762efa246c0Sriastradh 		amdgpu_bo_kunmap(bo);
763efa246c0Sriastradh 
764efa246c0Sriastradh 		/* try to alloc a new handle */
76541ec0267Sriastradh 		for (i = 0; i < adev->uvd.max_handles; ++i) {
766efa246c0Sriastradh 			if (atomic_read(&adev->uvd.handles[i]) == handle) {
76741ec0267Sriastradh 				DRM_ERROR(")Handle 0x%x already in use!\n",
76841ec0267Sriastradh 					  handle);
769efa246c0Sriastradh 				return -EINVAL;
770efa246c0Sriastradh 			}
771efa246c0Sriastradh 
772efa246c0Sriastradh 			if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
773efa246c0Sriastradh 				adev->uvd.filp[i] = ctx->parser->filp;
774efa246c0Sriastradh 				return 0;
775efa246c0Sriastradh 			}
776efa246c0Sriastradh 		}
777efa246c0Sriastradh 
778efa246c0Sriastradh 		DRM_ERROR("No more free UVD handles!\n");
77941ec0267Sriastradh 		return -ENOSPC;
780efa246c0Sriastradh 
781efa246c0Sriastradh 	case 1:
782efa246c0Sriastradh 		/* it's a decode msg, calc buffer sizes */
78341ec0267Sriastradh 		r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes);
784efa246c0Sriastradh 		amdgpu_bo_kunmap(bo);
785efa246c0Sriastradh 		if (r)
786efa246c0Sriastradh 			return r;
787efa246c0Sriastradh 
788efa246c0Sriastradh 		/* validate the handle */
78941ec0267Sriastradh 		for (i = 0; i < adev->uvd.max_handles; ++i) {
790efa246c0Sriastradh 			if (atomic_read(&adev->uvd.handles[i]) == handle) {
791efa246c0Sriastradh 				if (adev->uvd.filp[i] != ctx->parser->filp) {
792efa246c0Sriastradh 					DRM_ERROR("UVD handle collision detected!\n");
793efa246c0Sriastradh 					return -EINVAL;
794efa246c0Sriastradh 				}
795efa246c0Sriastradh 				return 0;
796efa246c0Sriastradh 			}
797efa246c0Sriastradh 		}
798efa246c0Sriastradh 
799efa246c0Sriastradh 		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
800efa246c0Sriastradh 		return -ENOENT;
801efa246c0Sriastradh 
802efa246c0Sriastradh 	case 2:
803efa246c0Sriastradh 		/* it's a destroy msg, free the handle */
80441ec0267Sriastradh 		for (i = 0; i < adev->uvd.max_handles; ++i)
805efa246c0Sriastradh 			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
806efa246c0Sriastradh 		amdgpu_bo_kunmap(bo);
807efa246c0Sriastradh 		return 0;
808efa246c0Sriastradh 
809efa246c0Sriastradh 	default:
810efa246c0Sriastradh 		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
811efa246c0Sriastradh 		return -EINVAL;
812efa246c0Sriastradh 	}
813efa246c0Sriastradh 	BUG();
814efa246c0Sriastradh 	return -EINVAL;
815efa246c0Sriastradh }
816efa246c0Sriastradh 
817efa246c0Sriastradh /**
818efa246c0Sriastradh  * amdgpu_uvd_cs_pass2 - second parsing round
819efa246c0Sriastradh  *
820efa246c0Sriastradh  * @ctx: UVD parser context
821efa246c0Sriastradh  *
822efa246c0Sriastradh  * Patch buffer addresses, make sure buffer sizes are correct.
823efa246c0Sriastradh  */
amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx * ctx)824efa246c0Sriastradh static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
825efa246c0Sriastradh {
826efa246c0Sriastradh 	struct amdgpu_bo_va_mapping *mapping;
827efa246c0Sriastradh 	struct amdgpu_bo *bo;
82841ec0267Sriastradh 	uint32_t cmd;
829efa246c0Sriastradh 	uint64_t start, end;
83041ec0267Sriastradh 	uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
831efa246c0Sriastradh 	int r;
832efa246c0Sriastradh 
83341ec0267Sriastradh 	r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
83441ec0267Sriastradh 	if (r) {
835*2b73d18aSriastradh 		DRM_ERROR("Can't find BO for addr 0x%08"PRIx64"\n", addr);
83641ec0267Sriastradh 		return r;
83741ec0267Sriastradh 	}
838efa246c0Sriastradh 
839efa246c0Sriastradh 	start = amdgpu_bo_gpu_offset(bo);
840efa246c0Sriastradh 
84141ec0267Sriastradh 	end = (mapping->last + 1 - mapping->start);
842efa246c0Sriastradh 	end = end * AMDGPU_GPU_PAGE_SIZE + start;
843efa246c0Sriastradh 
84441ec0267Sriastradh 	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
845efa246c0Sriastradh 	start += addr;
846efa246c0Sriastradh 
84741ec0267Sriastradh 	amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
84841ec0267Sriastradh 			    lower_32_bits(start));
84941ec0267Sriastradh 	amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
85041ec0267Sriastradh 			    upper_32_bits(start));
851efa246c0Sriastradh 
852efa246c0Sriastradh 	cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
853efa246c0Sriastradh 	if (cmd < 0x4) {
854efa246c0Sriastradh 		if ((end - start) < ctx->buf_sizes[cmd]) {
855efa246c0Sriastradh 			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
856efa246c0Sriastradh 				  (unsigned)(end - start),
857efa246c0Sriastradh 				  ctx->buf_sizes[cmd]);
858efa246c0Sriastradh 			return -EINVAL;
859efa246c0Sriastradh 		}
860efa246c0Sriastradh 
861efa246c0Sriastradh 	} else if (cmd == 0x206) {
862efa246c0Sriastradh 		if ((end - start) < ctx->buf_sizes[4]) {
863efa246c0Sriastradh 			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
864efa246c0Sriastradh 					  (unsigned)(end - start),
865efa246c0Sriastradh 					  ctx->buf_sizes[4]);
866efa246c0Sriastradh 			return -EINVAL;
867efa246c0Sriastradh 		}
868efa246c0Sriastradh 	} else if ((cmd != 0x100) && (cmd != 0x204)) {
869efa246c0Sriastradh 		DRM_ERROR("invalid UVD command %X!\n", cmd);
870efa246c0Sriastradh 		return -EINVAL;
871efa246c0Sriastradh 	}
872efa246c0Sriastradh 
873efa246c0Sriastradh 	if (!ctx->parser->adev->uvd.address_64_bit) {
874efa246c0Sriastradh 		if ((start >> 28) != ((end - 1) >> 28)) {
8750d50c49dSriastradh 			DRM_ERROR("reloc %"PRIX64"-%"PRIX64" crossing 256MB boundary!\n",
876efa246c0Sriastradh 				  start, end);
877efa246c0Sriastradh 			return -EINVAL;
878efa246c0Sriastradh 		}
879efa246c0Sriastradh 
880efa246c0Sriastradh 		if ((cmd == 0 || cmd == 0x3) &&
88141ec0267Sriastradh 		    (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
8820d50c49dSriastradh 			DRM_ERROR("msg/fb buffer %"PRIX64"-%"PRIX64" out of 256MB segment!\n",
883efa246c0Sriastradh 				  start, end);
884efa246c0Sriastradh 			return -EINVAL;
885efa246c0Sriastradh 		}
886efa246c0Sriastradh 	}
887efa246c0Sriastradh 
888efa246c0Sriastradh 	if (cmd == 0) {
889efa246c0Sriastradh 		ctx->has_msg_cmd = true;
890efa246c0Sriastradh 		r = amdgpu_uvd_cs_msg(ctx, bo, addr);
891efa246c0Sriastradh 		if (r)
892efa246c0Sriastradh 			return r;
893efa246c0Sriastradh 	} else if (!ctx->has_msg_cmd) {
894efa246c0Sriastradh 		DRM_ERROR("Message needed before other commands are send!\n");
895efa246c0Sriastradh 		return -EINVAL;
896efa246c0Sriastradh 	}
897efa246c0Sriastradh 
898efa246c0Sriastradh 	return 0;
899efa246c0Sriastradh }
900efa246c0Sriastradh 
901efa246c0Sriastradh /**
902efa246c0Sriastradh  * amdgpu_uvd_cs_reg - parse register writes
903efa246c0Sriastradh  *
904efa246c0Sriastradh  * @ctx: UVD parser context
905efa246c0Sriastradh  * @cb: callback function
906efa246c0Sriastradh  *
907efa246c0Sriastradh  * Parse the register writes, call cb on each complete command.
908efa246c0Sriastradh  */
amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx * ctx,int (* cb)(struct amdgpu_uvd_cs_ctx * ctx))909efa246c0Sriastradh static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
910efa246c0Sriastradh 			     int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
911efa246c0Sriastradh {
91241ec0267Sriastradh 	struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
913efa246c0Sriastradh 	int i, r;
914efa246c0Sriastradh 
915efa246c0Sriastradh 	ctx->idx++;
916efa246c0Sriastradh 	for (i = 0; i <= ctx->count; ++i) {
917efa246c0Sriastradh 		unsigned reg = ctx->reg + i;
918efa246c0Sriastradh 
919efa246c0Sriastradh 		if (ctx->idx >= ib->length_dw) {
920efa246c0Sriastradh 			DRM_ERROR("Register command after end of CS!\n");
921efa246c0Sriastradh 			return -EINVAL;
922efa246c0Sriastradh 		}
923efa246c0Sriastradh 
924efa246c0Sriastradh 		switch (reg) {
925efa246c0Sriastradh 		case mmUVD_GPCOM_VCPU_DATA0:
926efa246c0Sriastradh 			ctx->data0 = ctx->idx;
927efa246c0Sriastradh 			break;
928efa246c0Sriastradh 		case mmUVD_GPCOM_VCPU_DATA1:
929efa246c0Sriastradh 			ctx->data1 = ctx->idx;
930efa246c0Sriastradh 			break;
931efa246c0Sriastradh 		case mmUVD_GPCOM_VCPU_CMD:
932efa246c0Sriastradh 			r = cb(ctx);
933efa246c0Sriastradh 			if (r)
934efa246c0Sriastradh 				return r;
935efa246c0Sriastradh 			break;
936efa246c0Sriastradh 		case mmUVD_ENGINE_CNTL:
93741ec0267Sriastradh 		case mmUVD_NO_OP:
938efa246c0Sriastradh 			break;
939efa246c0Sriastradh 		default:
940efa246c0Sriastradh 			DRM_ERROR("Invalid reg 0x%X!\n", reg);
941efa246c0Sriastradh 			return -EINVAL;
942efa246c0Sriastradh 		}
943efa246c0Sriastradh 		ctx->idx++;
944efa246c0Sriastradh 	}
945efa246c0Sriastradh 	return 0;
946efa246c0Sriastradh }
947efa246c0Sriastradh 
948efa246c0Sriastradh /**
949efa246c0Sriastradh  * amdgpu_uvd_cs_packets - parse UVD packets
950efa246c0Sriastradh  *
951efa246c0Sriastradh  * @ctx: UVD parser context
952efa246c0Sriastradh  * @cb: callback function
953efa246c0Sriastradh  *
954efa246c0Sriastradh  * Parse the command stream packets.
955efa246c0Sriastradh  */
amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx * ctx,int (* cb)(struct amdgpu_uvd_cs_ctx * ctx))956efa246c0Sriastradh static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
957efa246c0Sriastradh 				 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
958efa246c0Sriastradh {
95941ec0267Sriastradh 	struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
960efa246c0Sriastradh 	int r;
961efa246c0Sriastradh 
962efa246c0Sriastradh 	for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
963efa246c0Sriastradh 		uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx);
964efa246c0Sriastradh 		unsigned type = CP_PACKET_GET_TYPE(cmd);
965efa246c0Sriastradh 		switch (type) {
966efa246c0Sriastradh 		case PACKET_TYPE0:
967efa246c0Sriastradh 			ctx->reg = CP_PACKET0_GET_REG(cmd);
968efa246c0Sriastradh 			ctx->count = CP_PACKET_GET_COUNT(cmd);
969efa246c0Sriastradh 			r = amdgpu_uvd_cs_reg(ctx, cb);
970efa246c0Sriastradh 			if (r)
971efa246c0Sriastradh 				return r;
972efa246c0Sriastradh 			break;
973efa246c0Sriastradh 		case PACKET_TYPE2:
974efa246c0Sriastradh 			++ctx->idx;
975efa246c0Sriastradh 			break;
976efa246c0Sriastradh 		default:
977efa246c0Sriastradh 			DRM_ERROR("Unknown packet type %d !\n", type);
978efa246c0Sriastradh 			return -EINVAL;
979efa246c0Sriastradh 		}
980efa246c0Sriastradh 	}
981efa246c0Sriastradh 	return 0;
982efa246c0Sriastradh }
983efa246c0Sriastradh 
984efa246c0Sriastradh /**
985efa246c0Sriastradh  * amdgpu_uvd_ring_parse_cs - UVD command submission parser
986efa246c0Sriastradh  *
987efa246c0Sriastradh  * @parser: Command submission parser context
988efa246c0Sriastradh  *
989efa246c0Sriastradh  * Parse the command stream, patch in addresses as necessary.
990efa246c0Sriastradh  */
amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser * parser,uint32_t ib_idx)991efa246c0Sriastradh int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
992efa246c0Sriastradh {
993efa246c0Sriastradh 	struct amdgpu_uvd_cs_ctx ctx = {};
994efa246c0Sriastradh 	unsigned buf_sizes[] = {
995efa246c0Sriastradh 		[0x00000000]	=	2048,
996efa246c0Sriastradh 		[0x00000001]	=	0xFFFFFFFF,
997efa246c0Sriastradh 		[0x00000002]	=	0xFFFFFFFF,
998efa246c0Sriastradh 		[0x00000003]	=	2048,
999efa246c0Sriastradh 		[0x00000004]	=	0xFFFFFFFF,
1000efa246c0Sriastradh 	};
100141ec0267Sriastradh 	struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
1002efa246c0Sriastradh 	int r;
1003efa246c0Sriastradh 
100441ec0267Sriastradh 	parser->job->vm = NULL;
100541ec0267Sriastradh 	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
100641ec0267Sriastradh 
1007efa246c0Sriastradh 	if (ib->length_dw % 16) {
1008efa246c0Sriastradh 		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
1009efa246c0Sriastradh 			  ib->length_dw);
1010efa246c0Sriastradh 		return -EINVAL;
1011efa246c0Sriastradh 	}
1012efa246c0Sriastradh 
1013efa246c0Sriastradh 	ctx.parser = parser;
1014efa246c0Sriastradh 	ctx.buf_sizes = buf_sizes;
1015efa246c0Sriastradh 	ctx.ib_idx = ib_idx;
1016efa246c0Sriastradh 
101741ec0267Sriastradh 	/* first round only required on chips without UVD 64 bit address support */
101841ec0267Sriastradh 	if (!parser->adev->uvd.address_64_bit) {
1019efa246c0Sriastradh 		/* first round, make sure the buffers are actually in the UVD segment */
1020efa246c0Sriastradh 		r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1);
1021efa246c0Sriastradh 		if (r)
1022efa246c0Sriastradh 			return r;
102341ec0267Sriastradh 	}
1024efa246c0Sriastradh 
1025efa246c0Sriastradh 	/* second round, patch buffer addresses into the command stream */
1026efa246c0Sriastradh 	r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2);
1027efa246c0Sriastradh 	if (r)
1028efa246c0Sriastradh 		return r;
1029efa246c0Sriastradh 
1030efa246c0Sriastradh 	if (!ctx.has_msg_cmd) {
1031efa246c0Sriastradh 		DRM_ERROR("UVD-IBs need a msg command!\n");
1032efa246c0Sriastradh 		return -EINVAL;
1033efa246c0Sriastradh 	}
1034efa246c0Sriastradh 
1035efa246c0Sriastradh 	return 0;
1036efa246c0Sriastradh }
1037efa246c0Sriastradh 
amdgpu_uvd_send_msg(struct amdgpu_ring * ring,struct amdgpu_bo * bo,bool direct,struct dma_fence ** fence)103841ec0267Sriastradh static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
103941ec0267Sriastradh 			       bool direct, struct dma_fence **fence)
1040efa246c0Sriastradh {
1041efa246c0Sriastradh 	struct amdgpu_device *adev = ring->adev;
104241ec0267Sriastradh 	struct dma_fence *f = NULL;
104341ec0267Sriastradh 	struct amdgpu_job *job;
104441ec0267Sriastradh 	struct amdgpu_ib *ib;
104541ec0267Sriastradh 	uint32_t data[4];
1046efa246c0Sriastradh 	uint64_t addr;
104741ec0267Sriastradh 	long r;
104841ec0267Sriastradh 	int i;
104941ec0267Sriastradh 	unsigned offset_idx = 0;
105041ec0267Sriastradh 	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
1051efa246c0Sriastradh 
105241ec0267Sriastradh 	amdgpu_bo_kunmap(bo);
105341ec0267Sriastradh 	amdgpu_bo_unpin(bo);
1054efa246c0Sriastradh 
105541ec0267Sriastradh 	if (!ring->adev->uvd.address_64_bit) {
105641ec0267Sriastradh 		struct ttm_operation_ctx ctx = { true, false };
1057efa246c0Sriastradh 
105841ec0267Sriastradh 		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
1059efa246c0Sriastradh 		amdgpu_uvd_force_into_uvd_segment(bo);
106041ec0267Sriastradh 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1061efa246c0Sriastradh 		if (r)
1062efa246c0Sriastradh 			goto err;
1063efa246c0Sriastradh 	}
1064efa246c0Sriastradh 
106541ec0267Sriastradh 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
106641ec0267Sriastradh 	if (r)
106741ec0267Sriastradh 		goto err;
106841ec0267Sriastradh 
106941ec0267Sriastradh 	if (adev->asic_type >= CHIP_VEGA10) {
107041ec0267Sriastradh 		offset_idx = 1 + ring->me;
107141ec0267Sriastradh 		offset[1] = adev->reg_offset[UVD_HWIP][0][1];
107241ec0267Sriastradh 		offset[2] = adev->reg_offset[UVD_HWIP][1][1];
107341ec0267Sriastradh 	}
107441ec0267Sriastradh 
107541ec0267Sriastradh 	data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
107641ec0267Sriastradh 	data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
107741ec0267Sriastradh 	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
107841ec0267Sriastradh 	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
107941ec0267Sriastradh 
108041ec0267Sriastradh 	ib = &job->ibs[0];
1081efa246c0Sriastradh 	addr = amdgpu_bo_gpu_offset(bo);
108241ec0267Sriastradh 	ib->ptr[0] = data[0];
1083efa246c0Sriastradh 	ib->ptr[1] = addr;
108441ec0267Sriastradh 	ib->ptr[2] = data[1];
1085efa246c0Sriastradh 	ib->ptr[3] = addr >> 32;
108641ec0267Sriastradh 	ib->ptr[4] = data[2];
1087efa246c0Sriastradh 	ib->ptr[5] = 0;
108841ec0267Sriastradh 	for (i = 6; i < 16; i += 2) {
108941ec0267Sriastradh 		ib->ptr[i] = data[3];
109041ec0267Sriastradh 		ib->ptr[i+1] = 0;
109141ec0267Sriastradh 	}
1092efa246c0Sriastradh 	ib->length_dw = 16;
1093efa246c0Sriastradh 
109441ec0267Sriastradh 	if (direct) {
109541ec0267Sriastradh 		r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
109641ec0267Sriastradh 							true, false,
109741ec0267Sriastradh 							msecs_to_jiffies(10));
109841ec0267Sriastradh 		if (r == 0)
109941ec0267Sriastradh 			r = -ETIMEDOUT;
110041ec0267Sriastradh 		if (r < 0)
110141ec0267Sriastradh 			goto err_free;
1102efa246c0Sriastradh 
110341ec0267Sriastradh 		r = amdgpu_job_submit_direct(job, ring, &f);
110441ec0267Sriastradh 		if (r)
110541ec0267Sriastradh 			goto err_free;
110641ec0267Sriastradh 	} else {
110741ec0267Sriastradh 		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
110841ec0267Sriastradh 				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
110941ec0267Sriastradh 		if (r)
111041ec0267Sriastradh 			goto err_free;
111141ec0267Sriastradh 
111241ec0267Sriastradh 		r = amdgpu_job_submit(job, &adev->uvd.entity,
111341ec0267Sriastradh 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
111441ec0267Sriastradh 		if (r)
111541ec0267Sriastradh 			goto err_free;
111641ec0267Sriastradh 	}
111741ec0267Sriastradh 
111841ec0267Sriastradh 	amdgpu_bo_fence(bo, f, false);
111941ec0267Sriastradh 	amdgpu_bo_unreserve(bo);
112041ec0267Sriastradh 	amdgpu_bo_unref(&bo);
1121efa246c0Sriastradh 
1122efa246c0Sriastradh 	if (fence)
112341ec0267Sriastradh 		*fence = dma_fence_get(f);
112441ec0267Sriastradh 	dma_fence_put(f);
112541ec0267Sriastradh 
1126efa246c0Sriastradh 	return 0;
1127efa246c0Sriastradh 
112841ec0267Sriastradh err_free:
112941ec0267Sriastradh 	amdgpu_job_free(job);
113041ec0267Sriastradh 
1131efa246c0Sriastradh err:
113241ec0267Sriastradh 	amdgpu_bo_unreserve(bo);
113341ec0267Sriastradh 	amdgpu_bo_unref(&bo);
1134efa246c0Sriastradh 	return r;
1135efa246c0Sriastradh }
1136efa246c0Sriastradh 
1137efa246c0Sriastradh /* multiple fence commands without any stream commands in between can
1138efa246c0Sriastradh    crash the vcpu so just try to emmit a dummy create/destroy msg to
1139efa246c0Sriastradh    avoid this */
amdgpu_uvd_get_create_msg(struct amdgpu_ring * ring,uint32_t handle,struct dma_fence ** fence)1140efa246c0Sriastradh int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
114141ec0267Sriastradh 			      struct dma_fence **fence)
1142efa246c0Sriastradh {
1143efa246c0Sriastradh 	struct amdgpu_device *adev = ring->adev;
114441ec0267Sriastradh 	struct amdgpu_bo *bo = NULL;
1145efa246c0Sriastradh 	uint32_t *msg;
1146efa246c0Sriastradh 	int r, i;
1147efa246c0Sriastradh 
114841ec0267Sriastradh 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
1149efa246c0Sriastradh 				      AMDGPU_GEM_DOMAIN_VRAM,
115041ec0267Sriastradh 				      &bo, NULL, (void **)&msg);
1151efa246c0Sriastradh 	if (r)
1152efa246c0Sriastradh 		return r;
1153efa246c0Sriastradh 
1154efa246c0Sriastradh 	/* stitch together an UVD create msg */
1155efa246c0Sriastradh 	msg[0] = cpu_to_le32(0x00000de4);
1156efa246c0Sriastradh 	msg[1] = cpu_to_le32(0x00000000);
1157efa246c0Sriastradh 	msg[2] = cpu_to_le32(handle);
1158efa246c0Sriastradh 	msg[3] = cpu_to_le32(0x00000000);
1159efa246c0Sriastradh 	msg[4] = cpu_to_le32(0x00000000);
1160efa246c0Sriastradh 	msg[5] = cpu_to_le32(0x00000000);
1161efa246c0Sriastradh 	msg[6] = cpu_to_le32(0x00000000);
1162efa246c0Sriastradh 	msg[7] = cpu_to_le32(0x00000780);
1163efa246c0Sriastradh 	msg[8] = cpu_to_le32(0x00000440);
1164efa246c0Sriastradh 	msg[9] = cpu_to_le32(0x00000000);
1165efa246c0Sriastradh 	msg[10] = cpu_to_le32(0x01b37000);
1166efa246c0Sriastradh 	for (i = 11; i < 1024; ++i)
1167efa246c0Sriastradh 		msg[i] = cpu_to_le32(0x0);
1168efa246c0Sriastradh 
116941ec0267Sriastradh 	return amdgpu_uvd_send_msg(ring, bo, true, fence);
1170efa246c0Sriastradh }
1171efa246c0Sriastradh 
amdgpu_uvd_get_destroy_msg(struct amdgpu_ring * ring,uint32_t handle,bool direct,struct dma_fence ** fence)1172efa246c0Sriastradh int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
117341ec0267Sriastradh 			       bool direct, struct dma_fence **fence)
1174efa246c0Sriastradh {
1175efa246c0Sriastradh 	struct amdgpu_device *adev = ring->adev;
117641ec0267Sriastradh 	struct amdgpu_bo *bo = NULL;
1177efa246c0Sriastradh 	uint32_t *msg;
1178efa246c0Sriastradh 	int r, i;
1179efa246c0Sriastradh 
118041ec0267Sriastradh 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
1181efa246c0Sriastradh 				      AMDGPU_GEM_DOMAIN_VRAM,
118241ec0267Sriastradh 				      &bo, NULL, (void **)&msg);
1183efa246c0Sriastradh 	if (r)
1184efa246c0Sriastradh 		return r;
1185efa246c0Sriastradh 
1186efa246c0Sriastradh 	/* stitch together an UVD destroy msg */
1187efa246c0Sriastradh 	msg[0] = cpu_to_le32(0x00000de4);
1188efa246c0Sriastradh 	msg[1] = cpu_to_le32(0x00000002);
1189efa246c0Sriastradh 	msg[2] = cpu_to_le32(handle);
1190efa246c0Sriastradh 	msg[3] = cpu_to_le32(0x00000000);
1191efa246c0Sriastradh 	for (i = 4; i < 1024; ++i)
1192efa246c0Sriastradh 		msg[i] = cpu_to_le32(0x0);
1193efa246c0Sriastradh 
119441ec0267Sriastradh 	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
1195efa246c0Sriastradh }
1196efa246c0Sriastradh 
amdgpu_uvd_idle_work_handler(struct work_struct * work)1197efa246c0Sriastradh static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
1198efa246c0Sriastradh {
1199efa246c0Sriastradh 	struct amdgpu_device *adev =
1200efa246c0Sriastradh 		container_of(work, struct amdgpu_device, uvd.idle_work.work);
120141ec0267Sriastradh 	unsigned fences = 0, i, j;
1202efa246c0Sriastradh 
120341ec0267Sriastradh 	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
120441ec0267Sriastradh 		if (adev->uvd.harvest_config & (1 << i))
120541ec0267Sriastradh 			continue;
120641ec0267Sriastradh 		fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
120741ec0267Sriastradh 		for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
120841ec0267Sriastradh 			fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
120941ec0267Sriastradh 		}
121041ec0267Sriastradh 	}
1211efa246c0Sriastradh 
121241ec0267Sriastradh 	if (fences == 0) {
1213efa246c0Sriastradh 		if (adev->pm.dpm_enabled) {
1214efa246c0Sriastradh 			amdgpu_dpm_enable_uvd(adev, false);
1215efa246c0Sriastradh 		} else {
1216efa246c0Sriastradh 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
121741ec0267Sriastradh 			/* shutdown the UVD block */
121841ec0267Sriastradh 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
121941ec0267Sriastradh 							       AMD_PG_STATE_GATE);
122041ec0267Sriastradh 			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
122141ec0267Sriastradh 							       AMD_CG_STATE_GATE);
1222efa246c0Sriastradh 		}
1223efa246c0Sriastradh 	} else {
122441ec0267Sriastradh 		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
1225efa246c0Sriastradh 	}
1226efa246c0Sriastradh }
1227efa246c0Sriastradh 
amdgpu_uvd_ring_begin_use(struct amdgpu_ring * ring)122841ec0267Sriastradh void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
1229efa246c0Sriastradh {
123041ec0267Sriastradh 	struct amdgpu_device *adev = ring->adev;
123141ec0267Sriastradh 	bool set_clocks;
1232efa246c0Sriastradh 
123341ec0267Sriastradh 	if (amdgpu_sriov_vf(adev))
123441ec0267Sriastradh 		return;
123541ec0267Sriastradh 
123641ec0267Sriastradh 	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
1237efa246c0Sriastradh 	if (set_clocks) {
1238efa246c0Sriastradh 		if (adev->pm.dpm_enabled) {
1239efa246c0Sriastradh 			amdgpu_dpm_enable_uvd(adev, true);
1240efa246c0Sriastradh 		} else {
1241efa246c0Sriastradh 			amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
124241ec0267Sriastradh 			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
124341ec0267Sriastradh 							       AMD_CG_STATE_UNGATE);
124441ec0267Sriastradh 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
124541ec0267Sriastradh 							       AMD_PG_STATE_UNGATE);
1246efa246c0Sriastradh 		}
1247efa246c0Sriastradh 	}
1248efa246c0Sriastradh }
124941ec0267Sriastradh 
amdgpu_uvd_ring_end_use(struct amdgpu_ring * ring)125041ec0267Sriastradh void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
125141ec0267Sriastradh {
125241ec0267Sriastradh 	if (!amdgpu_sriov_vf(ring->adev))
125341ec0267Sriastradh 		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
125441ec0267Sriastradh }
125541ec0267Sriastradh 
125641ec0267Sriastradh /**
125741ec0267Sriastradh  * amdgpu_uvd_ring_test_ib - test ib execution
125841ec0267Sriastradh  *
125941ec0267Sriastradh  * @ring: amdgpu_ring pointer
126041ec0267Sriastradh  *
126141ec0267Sriastradh  * Test if we can successfully execute an IB
126241ec0267Sriastradh  */
amdgpu_uvd_ring_test_ib(struct amdgpu_ring * ring,long timeout)126341ec0267Sriastradh int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
126441ec0267Sriastradh {
126541ec0267Sriastradh 	struct dma_fence *fence;
126641ec0267Sriastradh 	long r;
126741ec0267Sriastradh 
126841ec0267Sriastradh 	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
126941ec0267Sriastradh 	if (r)
127041ec0267Sriastradh 		goto error;
127141ec0267Sriastradh 
127241ec0267Sriastradh 	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
127341ec0267Sriastradh 	if (r)
127441ec0267Sriastradh 		goto error;
127541ec0267Sriastradh 
127641ec0267Sriastradh 	r = dma_fence_wait_timeout(fence, false, timeout);
127741ec0267Sriastradh 	if (r == 0)
127841ec0267Sriastradh 		r = -ETIMEDOUT;
127941ec0267Sriastradh 	else if (r > 0)
128041ec0267Sriastradh 		r = 0;
128141ec0267Sriastradh 
128241ec0267Sriastradh 	dma_fence_put(fence);
128341ec0267Sriastradh 
128441ec0267Sriastradh error:
128541ec0267Sriastradh 	return r;
128641ec0267Sriastradh }
128741ec0267Sriastradh 
128841ec0267Sriastradh /**
128941ec0267Sriastradh  * amdgpu_uvd_used_handles - returns used UVD handles
129041ec0267Sriastradh  *
129141ec0267Sriastradh  * @adev: amdgpu_device pointer
129241ec0267Sriastradh  *
129341ec0267Sriastradh  * Returns the number of UVD handles in use
129441ec0267Sriastradh  */
amdgpu_uvd_used_handles(struct amdgpu_device * adev)129541ec0267Sriastradh uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
129641ec0267Sriastradh {
129741ec0267Sriastradh 	unsigned i;
129841ec0267Sriastradh 	uint32_t used_handles = 0;
129941ec0267Sriastradh 
130041ec0267Sriastradh 	for (i = 0; i < adev->uvd.max_handles; ++i) {
130141ec0267Sriastradh 		/*
130241ec0267Sriastradh 		 * Handles can be freed in any order, and not
130341ec0267Sriastradh 		 * necessarily linear. So we need to count
130441ec0267Sriastradh 		 * all non-zero handles.
130541ec0267Sriastradh 		 */
130641ec0267Sriastradh 		if (atomic_read(&adev->uvd.handles[i]))
130741ec0267Sriastradh 			used_handles++;
130841ec0267Sriastradh 	}
130941ec0267Sriastradh 
131041ec0267Sriastradh 	return used_handles;
131141ec0267Sriastradh }
1312