xref: /openbsd-src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vcn.c (revision ff0e7be1ebbcc809ea8ad2b6dafe215824da9e46)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <linux/module.h>
29 #include <linux/dmi.h>
30 #include <linux/pci.h>
31 #include <linux/debugfs.h>
32 #include <drm/drm_drv.h>
33 
34 #include "amdgpu.h"
35 #include "amdgpu_pm.h"
36 #include "amdgpu_vcn.h"
37 #include "soc15d.h"
38 
39 /* Firmware Names */
40 #define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
41 #define FIRMWARE_PICASSO	"amdgpu/picasso_vcn.bin"
42 #define FIRMWARE_RAVEN2		"amdgpu/raven2_vcn.bin"
43 #define FIRMWARE_ARCTURUS	"amdgpu/arcturus_vcn.bin"
44 #define FIRMWARE_RENOIR		"amdgpu/renoir_vcn.bin"
45 #define FIRMWARE_GREEN_SARDINE	"amdgpu/green_sardine_vcn.bin"
46 #define FIRMWARE_NAVI10		"amdgpu/navi10_vcn.bin"
47 #define FIRMWARE_NAVI14		"amdgpu/navi14_vcn.bin"
48 #define FIRMWARE_NAVI12		"amdgpu/navi12_vcn.bin"
49 #define FIRMWARE_SIENNA_CICHLID	"amdgpu/sienna_cichlid_vcn.bin"
50 #define FIRMWARE_NAVY_FLOUNDER	"amdgpu/navy_flounder_vcn.bin"
51 #define FIRMWARE_VANGOGH	"amdgpu/vangogh_vcn.bin"
52 #define FIRMWARE_DIMGREY_CAVEFISH	"amdgpu/dimgrey_cavefish_vcn.bin"
53 #define FIRMWARE_ALDEBARAN	"amdgpu/aldebaran_vcn.bin"
54 #define FIRMWARE_BEIGE_GOBY	"amdgpu/beige_goby_vcn.bin"
55 #define FIRMWARE_YELLOW_CARP	"amdgpu/yellow_carp_vcn.bin"
56 #define FIRMWARE_VCN_3_1_2	"amdgpu/vcn_3_1_2.bin"
57 #define FIRMWARE_VCN4_0_0	"amdgpu/vcn_4_0_0.bin"
58 #define FIRMWARE_VCN4_0_2	"amdgpu/vcn_4_0_2.bin"
59 #define FIRMWARE_VCN4_0_4      "amdgpu/vcn_4_0_4.bin"
60 
61 MODULE_FIRMWARE(FIRMWARE_RAVEN);
62 MODULE_FIRMWARE(FIRMWARE_PICASSO);
63 MODULE_FIRMWARE(FIRMWARE_RAVEN2);
64 MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
65 MODULE_FIRMWARE(FIRMWARE_RENOIR);
66 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE);
67 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN);
68 MODULE_FIRMWARE(FIRMWARE_NAVI10);
69 MODULE_FIRMWARE(FIRMWARE_NAVI14);
70 MODULE_FIRMWARE(FIRMWARE_NAVI12);
71 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
72 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER);
73 MODULE_FIRMWARE(FIRMWARE_VANGOGH);
74 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
75 MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
76 MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
77 MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
78 MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
79 MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
80 MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
81 
82 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
83 
84 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
85 {
86 	unsigned long bo_size;
87 	const char *fw_name;
88 	const struct common_firmware_header *hdr;
89 	unsigned char fw_check;
90 	unsigned int fw_shared_size, log_offset;
91 	int i, r;
92 
93 	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
94 	rw_init(&adev->vcn.vcn_pg_lock, "vcnpg");
95 	rw_init(&adev->vcn.vcn1_jpeg1_workaround, "vcnwa");
96 	atomic_set(&adev->vcn.total_submission_cnt, 0);
97 	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
98 		atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
99 
100 	switch (adev->ip_versions[UVD_HWIP][0]) {
101 	case IP_VERSION(1, 0, 0):
102 	case IP_VERSION(1, 0, 1):
103 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
104 			fw_name = FIRMWARE_RAVEN2;
105 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
106 			fw_name = FIRMWARE_PICASSO;
107 		else
108 			fw_name = FIRMWARE_RAVEN;
109 		break;
110 	case IP_VERSION(2, 5, 0):
111 		fw_name = FIRMWARE_ARCTURUS;
112 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
113 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
114 			adev->vcn.indirect_sram = true;
115 		break;
116 	case IP_VERSION(2, 2, 0):
117 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
118 			fw_name = FIRMWARE_RENOIR;
119 		else
120 			fw_name = FIRMWARE_GREEN_SARDINE;
121 
122 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
123 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
124 			adev->vcn.indirect_sram = true;
125 		break;
126 	case IP_VERSION(2, 6, 0):
127 		fw_name = FIRMWARE_ALDEBARAN;
128 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
129 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
130 			adev->vcn.indirect_sram = true;
131 		break;
132 	case IP_VERSION(2, 0, 0):
133 		fw_name = FIRMWARE_NAVI10;
134 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
135 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
136 			adev->vcn.indirect_sram = true;
137 		break;
138 	case IP_VERSION(2, 0, 2):
139 		if (adev->asic_type == CHIP_NAVI12)
140 			fw_name = FIRMWARE_NAVI12;
141 		else
142 			fw_name = FIRMWARE_NAVI14;
143 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
144 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
145 			adev->vcn.indirect_sram = true;
146 		break;
147 	case IP_VERSION(3, 0, 0):
148 	case IP_VERSION(3, 0, 64):
149 	case IP_VERSION(3, 0, 192):
150 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
151 			fw_name = FIRMWARE_SIENNA_CICHLID;
152 		else
153 			fw_name = FIRMWARE_NAVY_FLOUNDER;
154 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
155 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
156 			adev->vcn.indirect_sram = true;
157 		break;
158 	case IP_VERSION(3, 0, 2):
159 		fw_name = FIRMWARE_VANGOGH;
160 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
161 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
162 			adev->vcn.indirect_sram = true;
163 		break;
164 	case IP_VERSION(3, 0, 16):
165 		fw_name = FIRMWARE_DIMGREY_CAVEFISH;
166 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
167 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
168 			adev->vcn.indirect_sram = true;
169 		break;
170 	case IP_VERSION(3, 0, 33):
171 		fw_name = FIRMWARE_BEIGE_GOBY;
172 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
173 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
174 			adev->vcn.indirect_sram = true;
175 		break;
176 	case IP_VERSION(3, 1, 1):
177 		fw_name = FIRMWARE_YELLOW_CARP;
178 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
179 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
180 			adev->vcn.indirect_sram = true;
181 		break;
182 	case IP_VERSION(3, 1, 2):
183 		fw_name = FIRMWARE_VCN_3_1_2;
184 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
185 		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
186 			adev->vcn.indirect_sram = true;
187 		break;
188 	case IP_VERSION(4, 0, 0):
189 		fw_name = FIRMWARE_VCN4_0_0;
190 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
191 			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
192 			adev->vcn.indirect_sram = true;
193 		break;
194 	case IP_VERSION(4, 0, 2):
195 		fw_name = FIRMWARE_VCN4_0_2;
196 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
197 			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
198 			adev->vcn.indirect_sram = true;
199 		break;
200 	case IP_VERSION(4, 0, 4):
201 		fw_name = FIRMWARE_VCN4_0_4;
202 		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
203 			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
204 			adev->vcn.indirect_sram = true;
205 		break;
206 	default:
207 		return -EINVAL;
208 	}
209 
210 	r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
211 	if (r) {
212 		dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
213 			fw_name);
214 		return r;
215 	}
216 
217 	r = amdgpu_ucode_validate(adev->vcn.fw);
218 	if (r) {
219 		dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
220 			fw_name);
221 		release_firmware(adev->vcn.fw);
222 		adev->vcn.fw = NULL;
223 		return r;
224 	}
225 
226 	/*
227 	 * Some Steam Deck's BIOS versions are incompatible with the
228 	 * indirect SRAM mode, leading to amdgpu being unable to get
229 	 * properly probed (and even potentially crashing the kernel).
230 	 * Hence, check for these versions here - notice this is
231 	 * restricted to Vangogh (Deck's APU).
232 	 */
233 	if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) {
234 		const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
235 
236 		if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
237 		     !strncmp("F7A0114", bios_ver, 7))) {
238 			adev->vcn.indirect_sram = false;
239 			dev_info(adev->dev,
240 				"Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
241 		}
242 	}
243 
244 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
245 	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
246 
247 	/* Bit 20-23, it is encode major and non-zero for new naming convention.
248 	 * This field is part of version minor and DRM_DISABLED_FLAG in old naming
249 	 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
250 	 * is zero in old naming convention, this field is always zero so far.
251 	 * These four bits are used to tell which naming convention is present.
252 	 */
253 	fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
254 	if (fw_check) {
255 		unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
256 
257 		fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
258 		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
259 		enc_major = fw_check;
260 		dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
261 		vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
262 		DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
263 			enc_major, enc_minor, dec_ver, vep, fw_rev);
264 	} else {
265 		unsigned int version_major, version_minor, family_id;
266 
267 		family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
268 		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
269 		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
270 		DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
271 			version_major, version_minor, family_id);
272 	}
273 
274 	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
275 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
276 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
277 
278 	if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)){
279 		fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
280 		log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
281 	} else {
282 		fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
283 		log_offset = offsetof(struct amdgpu_fw_shared, fw_log);
284 	}
285 
286 	bo_size += fw_shared_size;
287 
288 	if (amdgpu_vcnfw_log)
289 		bo_size += AMDGPU_VCNFW_LOG_SIZE;
290 
291 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
292 		if (adev->vcn.harvest_config & (1 << i))
293 			continue;
294 
295 		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
296 						AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
297 						&adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
298 		if (r) {
299 			dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
300 			return r;
301 		}
302 
303 		adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
304 				bo_size - fw_shared_size;
305 		adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
306 				bo_size - fw_shared_size;
307 
308 		adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
309 
310 		if (amdgpu_vcnfw_log) {
311 			adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
312 			adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
313 			adev->vcn.inst[i].fw_shared.log_offset = log_offset;
314 		}
315 
316 		if (adev->vcn.indirect_sram) {
317 			r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
318 					AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
319 					&adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
320 			if (r) {
321 				dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
322 				return r;
323 			}
324 		}
325 	}
326 
327 	return 0;
328 }
329 
330 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
331 {
332 	int i, j;
333 
334 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
335 		if (adev->vcn.harvest_config & (1 << j))
336 			continue;
337 
338 		if (adev->vcn.indirect_sram) {
339 			amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
340 						  &adev->vcn.inst[j].dpg_sram_gpu_addr,
341 						  (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
342 		}
343 		kvfree(adev->vcn.inst[j].saved_bo);
344 
345 		amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
346 					  &adev->vcn.inst[j].gpu_addr,
347 					  (void **)&adev->vcn.inst[j].cpu_addr);
348 
349 		amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
350 
351 		for (i = 0; i < adev->vcn.num_enc_rings; ++i)
352 			amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
353 	}
354 
355 	release_firmware(adev->vcn.fw);
356 	mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
357 	mutex_destroy(&adev->vcn.vcn_pg_lock);
358 
359 	return 0;
360 }
361 
362 /* from vcn4 and above, only unified queue is used */
363 static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring)
364 {
365 	struct amdgpu_device *adev = ring->adev;
366 	bool ret = false;
367 
368 	if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0))
369 		ret = true;
370 
371 	return ret;
372 }
373 
374 bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
375 {
376 	bool ret = false;
377 	int vcn_config = adev->vcn.vcn_config[vcn_instance];
378 
379 	if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) {
380 		ret = true;
381 	} else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) {
382 		ret = true;
383 	} else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) {
384 		ret = true;
385 	}
386 
387 	return ret;
388 }
389 
390 int amdgpu_vcn_suspend(struct amdgpu_device *adev)
391 {
392 	unsigned size;
393 	void *ptr;
394 	int i, idx;
395 
396 	cancel_delayed_work_sync(&adev->vcn.idle_work);
397 
398 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
399 		if (adev->vcn.harvest_config & (1 << i))
400 			continue;
401 		if (adev->vcn.inst[i].vcpu_bo == NULL)
402 			return 0;
403 
404 		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
405 		ptr = adev->vcn.inst[i].cpu_addr;
406 
407 		adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
408 		if (!adev->vcn.inst[i].saved_bo)
409 			return -ENOMEM;
410 
411 		if (drm_dev_enter(adev_to_drm(adev), &idx)) {
412 			memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
413 			drm_dev_exit(idx);
414 		}
415 	}
416 	return 0;
417 }
418 
419 int amdgpu_vcn_resume(struct amdgpu_device *adev)
420 {
421 	unsigned size;
422 	void *ptr;
423 	int i, idx;
424 
425 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
426 		if (adev->vcn.harvest_config & (1 << i))
427 			continue;
428 		if (adev->vcn.inst[i].vcpu_bo == NULL)
429 			return -EINVAL;
430 
431 		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
432 		ptr = adev->vcn.inst[i].cpu_addr;
433 
434 		if (adev->vcn.inst[i].saved_bo != NULL) {
435 			if (drm_dev_enter(adev_to_drm(adev), &idx)) {
436 				memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
437 				drm_dev_exit(idx);
438 			}
439 			kvfree(adev->vcn.inst[i].saved_bo);
440 			adev->vcn.inst[i].saved_bo = NULL;
441 		} else {
442 			const struct common_firmware_header *hdr;
443 			unsigned offset;
444 
445 			hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
446 			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
447 				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
448 				if (drm_dev_enter(adev_to_drm(adev), &idx)) {
449 					memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
450 						    le32_to_cpu(hdr->ucode_size_bytes));
451 					drm_dev_exit(idx);
452 				}
453 				size -= le32_to_cpu(hdr->ucode_size_bytes);
454 				ptr += le32_to_cpu(hdr->ucode_size_bytes);
455 			}
456 			memset_io(ptr, 0, size);
457 		}
458 	}
459 	return 0;
460 }
461 
462 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
463 {
464 	struct amdgpu_device *adev =
465 		container_of(work, struct amdgpu_device, vcn.idle_work.work);
466 	unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
467 	unsigned int i, j;
468 	int r = 0;
469 
470 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
471 		if (adev->vcn.harvest_config & (1 << j))
472 			continue;
473 
474 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
475 			fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
476 		}
477 
478 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
479 			struct dpg_pause_state new_state;
480 
481 			if (fence[j] ||
482 				unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
483 				new_state.fw_based = VCN_DPG_STATE__PAUSE;
484 			else
485 				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
486 
487 			adev->vcn.pause_dpg_mode(adev, j, &new_state);
488 		}
489 
490 		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
491 		fences += fence[j];
492 	}
493 
494 	if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
495 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
496 		       AMD_PG_STATE_GATE);
497 		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
498 				false);
499 		if (r)
500 			dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
501 	} else {
502 		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
503 	}
504 }
505 
506 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
507 {
508 	struct amdgpu_device *adev = ring->adev;
509 	int r = 0;
510 
511 	atomic_inc(&adev->vcn.total_submission_cnt);
512 
513 	if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
514 		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
515 				true);
516 		if (r)
517 			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
518 	}
519 
520 	mutex_lock(&adev->vcn.vcn_pg_lock);
521 	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
522 	       AMD_PG_STATE_UNGATE);
523 
524 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
525 		struct dpg_pause_state new_state;
526 
527 		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
528 			atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
529 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
530 		} else {
531 			unsigned int fences = 0;
532 			unsigned int i;
533 
534 			for (i = 0; i < adev->vcn.num_enc_rings; ++i)
535 				fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
536 
537 			if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
538 				new_state.fw_based = VCN_DPG_STATE__PAUSE;
539 			else
540 				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
541 		}
542 
543 		adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
544 	}
545 	mutex_unlock(&adev->vcn.vcn_pg_lock);
546 }
547 
548 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
549 {
550 	if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
551 		ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
552 		atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
553 
554 	atomic_dec(&ring->adev->vcn.total_submission_cnt);
555 
556 	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
557 }
558 
559 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
560 {
561 	struct amdgpu_device *adev = ring->adev;
562 	uint32_t tmp = 0;
563 	unsigned i;
564 	int r;
565 
566 	/* VCN in SRIOV does not support direct register read/write */
567 	if (amdgpu_sriov_vf(adev))
568 		return 0;
569 
570 	WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
571 	r = amdgpu_ring_alloc(ring, 3);
572 	if (r)
573 		return r;
574 	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
575 	amdgpu_ring_write(ring, 0xDEADBEEF);
576 	amdgpu_ring_commit(ring);
577 	for (i = 0; i < adev->usec_timeout; i++) {
578 		tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
579 		if (tmp == 0xDEADBEEF)
580 			break;
581 		udelay(1);
582 	}
583 
584 	if (i >= adev->usec_timeout)
585 		r = -ETIMEDOUT;
586 
587 	return r;
588 }
589 
590 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
591 {
592 	struct amdgpu_device *adev = ring->adev;
593 	uint32_t rptr;
594 	unsigned int i;
595 	int r;
596 
597 	if (amdgpu_sriov_vf(adev))
598 		return 0;
599 
600 	r = amdgpu_ring_alloc(ring, 16);
601 	if (r)
602 		return r;
603 
604 	rptr = amdgpu_ring_get_rptr(ring);
605 
606 	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
607 	amdgpu_ring_commit(ring);
608 
609 	for (i = 0; i < adev->usec_timeout; i++) {
610 		if (amdgpu_ring_get_rptr(ring) != rptr)
611 			break;
612 		udelay(1);
613 	}
614 
615 	if (i >= adev->usec_timeout)
616 		r = -ETIMEDOUT;
617 
618 	return r;
619 }
620 
621 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
622 				   struct amdgpu_ib *ib_msg,
623 				   struct dma_fence **fence)
624 {
625 	struct amdgpu_device *adev = ring->adev;
626 	struct dma_fence *f = NULL;
627 	struct amdgpu_job *job;
628 	struct amdgpu_ib *ib;
629 	uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
630 	int i, r;
631 
632 	r = amdgpu_job_alloc_with_ib(adev, 64,
633 					AMDGPU_IB_POOL_DIRECT, &job);
634 	if (r)
635 		goto err;
636 
637 	ib = &job->ibs[0];
638 	ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
639 	ib->ptr[1] = addr;
640 	ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
641 	ib->ptr[3] = addr >> 32;
642 	ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
643 	ib->ptr[5] = 0;
644 	for (i = 6; i < 16; i += 2) {
645 		ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
646 		ib->ptr[i+1] = 0;
647 	}
648 	ib->length_dw = 16;
649 
650 	r = amdgpu_job_submit_direct(job, ring, &f);
651 	if (r)
652 		goto err_free;
653 
654 	amdgpu_ib_free(adev, ib_msg, f);
655 
656 	if (fence)
657 		*fence = dma_fence_get(f);
658 	dma_fence_put(f);
659 
660 	return 0;
661 
662 err_free:
663 	amdgpu_job_free(job);
664 err:
665 	amdgpu_ib_free(adev, ib_msg, f);
666 	return r;
667 }
668 
669 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
670 		struct amdgpu_ib *ib)
671 {
672 	struct amdgpu_device *adev = ring->adev;
673 	uint32_t *msg;
674 	int r, i;
675 
676 	memset(ib, 0, sizeof(*ib));
677 	r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
678 			AMDGPU_IB_POOL_DIRECT,
679 			ib);
680 	if (r)
681 		return r;
682 
683 	msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
684 	msg[0] = cpu_to_le32(0x00000028);
685 	msg[1] = cpu_to_le32(0x00000038);
686 	msg[2] = cpu_to_le32(0x00000001);
687 	msg[3] = cpu_to_le32(0x00000000);
688 	msg[4] = cpu_to_le32(handle);
689 	msg[5] = cpu_to_le32(0x00000000);
690 	msg[6] = cpu_to_le32(0x00000001);
691 	msg[7] = cpu_to_le32(0x00000028);
692 	msg[8] = cpu_to_le32(0x00000010);
693 	msg[9] = cpu_to_le32(0x00000000);
694 	msg[10] = cpu_to_le32(0x00000007);
695 	msg[11] = cpu_to_le32(0x00000000);
696 	msg[12] = cpu_to_le32(0x00000780);
697 	msg[13] = cpu_to_le32(0x00000440);
698 	for (i = 14; i < 1024; ++i)
699 		msg[i] = cpu_to_le32(0x0);
700 
701 	return 0;
702 }
703 
704 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
705 					  struct amdgpu_ib *ib)
706 {
707 	struct amdgpu_device *adev = ring->adev;
708 	uint32_t *msg;
709 	int r, i;
710 
711 	memset(ib, 0, sizeof(*ib));
712 	r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
713 			AMDGPU_IB_POOL_DIRECT,
714 			ib);
715 	if (r)
716 		return r;
717 
718 	msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
719 	msg[0] = cpu_to_le32(0x00000028);
720 	msg[1] = cpu_to_le32(0x00000018);
721 	msg[2] = cpu_to_le32(0x00000000);
722 	msg[3] = cpu_to_le32(0x00000002);
723 	msg[4] = cpu_to_le32(handle);
724 	msg[5] = cpu_to_le32(0x00000000);
725 	for (i = 6; i < 1024; ++i)
726 		msg[i] = cpu_to_le32(0x0);
727 
728 	return 0;
729 }
730 
731 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
732 {
733 	struct dma_fence *fence = NULL;
734 	struct amdgpu_ib ib;
735 	long r;
736 
737 	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
738 	if (r)
739 		goto error;
740 
741 	r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL);
742 	if (r)
743 		goto error;
744 	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
745 	if (r)
746 		goto error;
747 
748 	r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence);
749 	if (r)
750 		goto error;
751 
752 	r = dma_fence_wait_timeout(fence, false, timeout);
753 	if (r == 0)
754 		r = -ETIMEDOUT;
755 	else if (r > 0)
756 		r = 0;
757 
758 	dma_fence_put(fence);
759 error:
760 	return r;
761 }
762 
763 static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib,
764 						uint32_t ib_pack_in_dw, bool enc)
765 {
766 	uint32_t *ib_checksum;
767 
768 	ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */
769 	ib->ptr[ib->length_dw++] = 0x30000002;
770 	ib_checksum = &ib->ptr[ib->length_dw++];
771 	ib->ptr[ib->length_dw++] = ib_pack_in_dw;
772 
773 	ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */
774 	ib->ptr[ib->length_dw++] = 0x30000001;
775 	ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3;
776 	ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t);
777 
778 	return ib_checksum;
779 }
780 
781 static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum,
782 						uint32_t ib_pack_in_dw)
783 {
784 	uint32_t i;
785 	uint32_t checksum = 0;
786 
787 	for (i = 0; i < ib_pack_in_dw; i++)
788 		checksum += *(*ib_checksum + 2 + i);
789 
790 	**ib_checksum = checksum;
791 }
792 
793 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
794 				      struct amdgpu_ib *ib_msg,
795 				      struct dma_fence **fence)
796 {
797 	struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
798 	unsigned int ib_size_dw = 64;
799 	struct amdgpu_device *adev = ring->adev;
800 	struct dma_fence *f = NULL;
801 	struct amdgpu_job *job;
802 	struct amdgpu_ib *ib;
803 	uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
804 	bool sq = amdgpu_vcn_using_unified_queue(ring);
805 	uint32_t *ib_checksum;
806 	uint32_t ib_pack_in_dw;
807 	int i, r;
808 
809 	if (sq)
810 		ib_size_dw += 8;
811 
812 	r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
813 				AMDGPU_IB_POOL_DIRECT, &job);
814 	if (r)
815 		goto err;
816 
817 	ib = &job->ibs[0];
818 	ib->length_dw = 0;
819 
820 	/* single queue headers */
821 	if (sq) {
822 		ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
823 						+ 4 + 2; /* engine info + decoding ib in dw */
824 		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
825 	}
826 
827 	ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
828 	ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
829 	decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
830 	ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
831 	memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));
832 
833 	decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);
834 	decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
835 	decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);
836 
837 	for (i = ib->length_dw; i < ib_size_dw; ++i)
838 		ib->ptr[i] = 0x0;
839 
840 	if (sq)
841 		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
842 
843 	r = amdgpu_job_submit_direct(job, ring, &f);
844 	if (r)
845 		goto err_free;
846 
847 	amdgpu_ib_free(adev, ib_msg, f);
848 
849 	if (fence)
850 		*fence = dma_fence_get(f);
851 	dma_fence_put(f);
852 
853 	return 0;
854 
855 err_free:
856 	amdgpu_job_free(job);
857 err:
858 	amdgpu_ib_free(adev, ib_msg, f);
859 	return r;
860 }
861 
862 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
863 {
864 	struct dma_fence *fence = NULL;
865 	struct amdgpu_ib ib;
866 	long r;
867 
868 	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
869 	if (r)
870 		goto error;
871 
872 	r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL);
873 	if (r)
874 		goto error;
875 	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
876 	if (r)
877 		goto error;
878 
879 	r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence);
880 	if (r)
881 		goto error;
882 
883 	r = dma_fence_wait_timeout(fence, false, timeout);
884 	if (r == 0)
885 		r = -ETIMEDOUT;
886 	else if (r > 0)
887 		r = 0;
888 
889 	dma_fence_put(fence);
890 error:
891 	return r;
892 }
893 
894 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
895 {
896 	struct amdgpu_device *adev = ring->adev;
897 	uint32_t rptr;
898 	unsigned i;
899 	int r;
900 
901 	if (amdgpu_sriov_vf(adev))
902 		return 0;
903 
904 	r = amdgpu_ring_alloc(ring, 16);
905 	if (r)
906 		return r;
907 
908 	rptr = amdgpu_ring_get_rptr(ring);
909 
910 	amdgpu_ring_write(ring, VCN_ENC_CMD_END);
911 	amdgpu_ring_commit(ring);
912 
913 	for (i = 0; i < adev->usec_timeout; i++) {
914 		if (amdgpu_ring_get_rptr(ring) != rptr)
915 			break;
916 		udelay(1);
917 	}
918 
919 	if (i >= adev->usec_timeout)
920 		r = -ETIMEDOUT;
921 
922 	return r;
923 }
924 
925 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
926 					 struct amdgpu_ib *ib_msg,
927 					 struct dma_fence **fence)
928 {
929 	unsigned int ib_size_dw = 16;
930 	struct amdgpu_job *job;
931 	struct amdgpu_ib *ib;
932 	struct dma_fence *f = NULL;
933 	uint32_t *ib_checksum = NULL;
934 	uint64_t addr;
935 	bool sq = amdgpu_vcn_using_unified_queue(ring);
936 	int i, r;
937 
938 	if (sq)
939 		ib_size_dw += 8;
940 
941 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
942 					AMDGPU_IB_POOL_DIRECT, &job);
943 	if (r)
944 		return r;
945 
946 	ib = &job->ibs[0];
947 	addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
948 
949 	ib->length_dw = 0;
950 
951 	if (sq)
952 		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
953 
954 	ib->ptr[ib->length_dw++] = 0x00000018;
955 	ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
956 	ib->ptr[ib->length_dw++] = handle;
957 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
958 	ib->ptr[ib->length_dw++] = addr;
959 	ib->ptr[ib->length_dw++] = 0x0000000b;
960 
961 	ib->ptr[ib->length_dw++] = 0x00000014;
962 	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
963 	ib->ptr[ib->length_dw++] = 0x0000001c;
964 	ib->ptr[ib->length_dw++] = 0x00000000;
965 	ib->ptr[ib->length_dw++] = 0x00000000;
966 
967 	ib->ptr[ib->length_dw++] = 0x00000008;
968 	ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
969 
970 	for (i = ib->length_dw; i < ib_size_dw; ++i)
971 		ib->ptr[i] = 0x0;
972 
973 	if (sq)
974 		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
975 
976 	r = amdgpu_job_submit_direct(job, ring, &f);
977 	if (r)
978 		goto err;
979 
980 	if (fence)
981 		*fence = dma_fence_get(f);
982 	dma_fence_put(f);
983 
984 	return 0;
985 
986 err:
987 	amdgpu_job_free(job);
988 	return r;
989 }
990 
991 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
992 					  struct amdgpu_ib *ib_msg,
993 					  struct dma_fence **fence)
994 {
995 	unsigned int ib_size_dw = 16;
996 	struct amdgpu_job *job;
997 	struct amdgpu_ib *ib;
998 	struct dma_fence *f = NULL;
999 	uint32_t *ib_checksum = NULL;
1000 	uint64_t addr;
1001 	bool sq = amdgpu_vcn_using_unified_queue(ring);
1002 	int i, r;
1003 
1004 	if (sq)
1005 		ib_size_dw += 8;
1006 
1007 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
1008 					AMDGPU_IB_POOL_DIRECT, &job);
1009 	if (r)
1010 		return r;
1011 
1012 	ib = &job->ibs[0];
1013 	addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
1014 
1015 	ib->length_dw = 0;
1016 
1017 	if (sq)
1018 		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
1019 
1020 	ib->ptr[ib->length_dw++] = 0x00000018;
1021 	ib->ptr[ib->length_dw++] = 0x00000001;
1022 	ib->ptr[ib->length_dw++] = handle;
1023 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1024 	ib->ptr[ib->length_dw++] = addr;
1025 	ib->ptr[ib->length_dw++] = 0x0000000b;
1026 
1027 	ib->ptr[ib->length_dw++] = 0x00000014;
1028 	ib->ptr[ib->length_dw++] = 0x00000002;
1029 	ib->ptr[ib->length_dw++] = 0x0000001c;
1030 	ib->ptr[ib->length_dw++] = 0x00000000;
1031 	ib->ptr[ib->length_dw++] = 0x00000000;
1032 
1033 	ib->ptr[ib->length_dw++] = 0x00000008;
1034 	ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
1035 
1036 	for (i = ib->length_dw; i < ib_size_dw; ++i)
1037 		ib->ptr[i] = 0x0;
1038 
1039 	if (sq)
1040 		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
1041 
1042 	r = amdgpu_job_submit_direct(job, ring, &f);
1043 	if (r)
1044 		goto err;
1045 
1046 	if (fence)
1047 		*fence = dma_fence_get(f);
1048 	dma_fence_put(f);
1049 
1050 	return 0;
1051 
1052 err:
1053 	amdgpu_job_free(job);
1054 	return r;
1055 }
1056 
1057 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1058 {
1059 	struct amdgpu_device *adev = ring->adev;
1060 	struct dma_fence *fence = NULL;
1061 	struct amdgpu_ib ib;
1062 	long r;
1063 
1064 	memset(&ib, 0, sizeof(ib));
1065 	r = amdgpu_ib_get(adev, NULL, (128 << 10) + AMDGPU_GPU_PAGE_SIZE,
1066 			AMDGPU_IB_POOL_DIRECT,
1067 			&ib);
1068 	if (r)
1069 		return r;
1070 
1071 	r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL);
1072 	if (r)
1073 		goto error;
1074 
1075 	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence);
1076 	if (r)
1077 		goto error;
1078 
1079 	r = dma_fence_wait_timeout(fence, false, timeout);
1080 	if (r == 0)
1081 		r = -ETIMEDOUT;
1082 	else if (r > 0)
1083 		r = 0;
1084 
1085 error:
1086 	amdgpu_ib_free(adev, &ib, fence);
1087 	dma_fence_put(fence);
1088 
1089 	return r;
1090 }
1091 
1092 int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1093 {
1094 	long r;
1095 
1096 	r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
1097 	if (r)
1098 		goto error;
1099 
1100 	r =  amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
1101 
1102 error:
1103 	return r;
1104 }
1105 
1106 enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
1107 {
1108 	switch(ring) {
1109 	case 0:
1110 		return AMDGPU_RING_PRIO_0;
1111 	case 1:
1112 		return AMDGPU_RING_PRIO_1;
1113 	case 2:
1114 		return AMDGPU_RING_PRIO_2;
1115 	default:
1116 		return AMDGPU_RING_PRIO_0;
1117 	}
1118 }
1119 
1120 void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
1121 {
1122 	int i;
1123 	unsigned int idx;
1124 
1125 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1126 		const struct common_firmware_header *hdr;
1127 		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
1128 
1129 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
1130 			if (adev->vcn.harvest_config & (1 << i))
1131 				continue;
1132 			/* currently only support 2 FW instances */
1133 			if (i >= 2) {
1134 				dev_info(adev->dev, "More then 2 VCN FW instances!\n");
1135 				break;
1136 			}
1137 			idx = AMDGPU_UCODE_ID_VCN + i;
1138 			adev->firmware.ucode[idx].ucode_id = idx;
1139 			adev->firmware.ucode[idx].fw = adev->vcn.fw;
1140 			adev->firmware.fw_size +=
1141 				roundup2(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
1142 		}
1143 		dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
1144 	}
1145 }
1146 
1147 /*
1148  * debugfs for mapping vcn firmware log buffer.
1149  */
1150 #if defined(CONFIG_DEBUG_FS)
1151 static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
1152                                              size_t size, loff_t *pos)
1153 {
1154 	struct amdgpu_vcn_inst *vcn;
1155 	void *log_buf;
1156 	volatile struct amdgpu_vcn_fwlog *plog;
1157 	unsigned int read_pos, write_pos, available, i, read_bytes = 0;
1158 	unsigned int read_num[2] = {0};
1159 
1160 	vcn = file_inode(f)->i_private;
1161 	if (!vcn)
1162 		return -ENODEV;
1163 
1164 	if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log)
1165 		return -EFAULT;
1166 
1167 	log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
1168 
1169 	plog = (volatile struct amdgpu_vcn_fwlog *)log_buf;
1170 	read_pos = plog->rptr;
1171 	write_pos = plog->wptr;
1172 
1173 	if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE)
1174 		return -EFAULT;
1175 
1176 	if (!size || (read_pos == write_pos))
1177 		return 0;
1178 
1179 	if (write_pos > read_pos) {
1180 		available = write_pos - read_pos;
1181 		read_num[0] = min(size, (size_t)available);
1182 	} else {
1183 		read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos;
1184 		available = read_num[0] + write_pos - plog->header_size;
1185 		if (size > available)
1186 			read_num[1] = write_pos - plog->header_size;
1187 		else if (size > read_num[0])
1188 			read_num[1] = size - read_num[0];
1189 		else
1190 			read_num[0] = size;
1191 	}
1192 
1193 	for (i = 0; i < 2; i++) {
1194 		if (read_num[i]) {
1195 			if (read_pos == AMDGPU_VCNFW_LOG_SIZE)
1196 				read_pos = plog->header_size;
1197 			if (read_num[i] == copy_to_user((buf + read_bytes),
1198 			                                (log_buf + read_pos), read_num[i]))
1199 				return -EFAULT;
1200 
1201 			read_bytes += read_num[i];
1202 			read_pos += read_num[i];
1203 		}
1204 	}
1205 
1206 	plog->rptr = read_pos;
1207 	*pos += read_bytes;
1208 	return read_bytes;
1209 }
1210 
1211 static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = {
1212 	.owner = THIS_MODULE,
1213 	.read = amdgpu_debugfs_vcn_fwlog_read,
1214 	.llseek = default_llseek
1215 };
1216 #endif
1217 
1218 void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
1219                                    struct amdgpu_vcn_inst *vcn)
1220 {
1221 #if defined(CONFIG_DEBUG_FS)
1222 	struct drm_minor *minor = adev_to_drm(adev)->primary;
1223 	struct dentry *root = minor->debugfs_root;
1224 	char name[32];
1225 
1226 	sprintf(name, "amdgpu_vcn_%d_fwlog", i);
1227 	debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn,
1228 				 &amdgpu_debugfs_vcnfwlog_fops,
1229 				 AMDGPU_VCNFW_LOG_SIZE);
1230 #endif
1231 }
1232 
1233 void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
1234 {
1235 #if defined(CONFIG_DEBUG_FS)
1236 	volatile uint32_t *flag = vcn->fw_shared.cpu_addr;
1237 	void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
1238 	uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
1239 	volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
1240 	volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
1241                                                          + vcn->fw_shared.log_offset;
1242 	*flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
1243 	fw_log->is_enabled = 1;
1244 	fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF);
1245 	fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32);
1246 	fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE);
1247 
1248 	log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog);
1249 	log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE;
1250 	log_buf->rptr = log_buf->header_size;
1251 	log_buf->wptr = log_buf->header_size;
1252 	log_buf->wrapped = 0;
1253 #endif
1254 }
1255 
1256 int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
1257 				struct amdgpu_irq_src *source,
1258 				struct amdgpu_iv_entry *entry)
1259 {
1260 	struct ras_common_if *ras_if = adev->vcn.ras_if;
1261 	struct ras_dispatch_if ih_data = {
1262 		.entry = entry,
1263 	};
1264 
1265 	if (!ras_if)
1266 		return 0;
1267 
1268 	ih_data.head = *ras_if;
1269 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1270 
1271 	return 0;
1272 }
1273