xref: /openbsd-src/sys/dev/pci/drm/amd/amdgpu/vcn_v4_0.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include "amdgpu.h"
26 #include "amdgpu_vcn.h"
27 #include "amdgpu_pm.h"
28 #include "amdgpu_cs.h"
29 #include "soc15.h"
30 #include "soc15d.h"
31 #include "soc15_hw_ip.h"
32 #include "vcn_v2_0.h"
33 #include "mmsch_v4_0.h"
34 
35 #include "vcn/vcn_4_0_0_offset.h"
36 #include "vcn/vcn_4_0_0_sh_mask.h"
37 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
38 
39 #include <drm/drm_drv.h>
40 
41 #define mmUVD_DPG_LMA_CTL							regUVD_DPG_LMA_CTL
42 #define mmUVD_DPG_LMA_CTL_BASE_IDX						regUVD_DPG_LMA_CTL_BASE_IDX
43 #define mmUVD_DPG_LMA_DATA							regUVD_DPG_LMA_DATA
44 #define mmUVD_DPG_LMA_DATA_BASE_IDX						regUVD_DPG_LMA_DATA_BASE_IDX
45 
46 #define VCN_VID_SOC_ADDRESS_2_0							0x1fb00
47 #define VCN1_VID_SOC_ADDRESS_3_0						0x48300
48 
49 #define VCN_HARVEST_MMSCH								0
50 
51 #define RDECODE_MSG_CREATE							0x00000000
52 #define RDECODE_MESSAGE_CREATE							0x00000001
53 
54 static int amdgpu_ih_clientid_vcns[] = {
55 	SOC15_IH_CLIENTID_VCN,
56 	SOC15_IH_CLIENTID_VCN1
57 };
58 
59 static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
60 static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
61 static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
62 static int vcn_v4_0_set_powergating_state(void *handle,
63         enum amd_powergating_state state);
64 static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
65         int inst_idx, struct dpg_pause_state *new_state);
66 static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
67 
68 /**
69  * vcn_v4_0_early_init - set function pointers
70  *
71  * @handle: amdgpu_device pointer
72  *
73  * Set ring and irq function pointers
74  */
75 static int vcn_v4_0_early_init(void *handle)
76 {
77 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
78 
79 	if (amdgpu_sriov_vf(adev))
80 		adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
81 
82 	/* re-use enc ring as unified ring */
83 	adev->vcn.num_enc_rings = 1;
84 
85 	vcn_v4_0_set_unified_ring_funcs(adev);
86 	vcn_v4_0_set_irq_funcs(adev);
87 
88 	return 0;
89 }
90 
91 /**
92  * vcn_v4_0_sw_init - sw init for VCN block
93  *
94  * @handle: amdgpu_device pointer
95  *
96  * Load firmware and sw initialization
97  */
98 static int vcn_v4_0_sw_init(void *handle)
99 {
100 	struct amdgpu_ring *ring;
101 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
102 	int i, r;
103 
104 	r = amdgpu_vcn_sw_init(adev);
105 	if (r)
106 		return r;
107 
108 	amdgpu_vcn_setup_ucode(adev);
109 
110 	r = amdgpu_vcn_resume(adev);
111 	if (r)
112 		return r;
113 
114 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
115 		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
116 
117 		if (adev->vcn.harvest_config & (1 << i))
118 			continue;
119 
120 		/* Init instance 0 sched_score to 1, so it's scheduled after other instances */
121 		if (i == 0)
122 			atomic_set(&adev->vcn.inst[i].sched_score, 1);
123 		else
124 			atomic_set(&adev->vcn.inst[i].sched_score, 0);
125 
126 		/* VCN UNIFIED TRAP */
127 		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
128 				VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
129 		if (r)
130 			return r;
131 
132 		ring = &adev->vcn.inst[i].ring_enc[0];
133 		ring->use_doorbell = true;
134 		if (amdgpu_sriov_vf(adev))
135 			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
136 		else
137 			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
138 
139 		snprintf(ring->name, sizeof(ring->name), "vcn_unified_%d", i);
140 
141 		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
142 						AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
143 		if (r)
144 			return r;
145 
146 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
147 		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
148 		fw_shared->sq.is_enabled = 1;
149 
150 		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
151 		fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
152 			AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
153 
154 		if (amdgpu_sriov_vf(adev))
155 			fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
156 
157 		if (amdgpu_vcnfw_log)
158 			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
159 	}
160 
161 	if (amdgpu_sriov_vf(adev)) {
162 		r = amdgpu_virt_alloc_mm_table(adev);
163 		if (r)
164 			return r;
165 	}
166 
167 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
168 		adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
169 
170 	return 0;
171 }
172 
173 /**
174  * vcn_v4_0_sw_fini - sw fini for VCN block
175  *
176  * @handle: amdgpu_device pointer
177  *
178  * VCN suspend and free up sw allocation
179  */
180 static int vcn_v4_0_sw_fini(void *handle)
181 {
182 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
183 	int i, r, idx;
184 
185 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
186 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
187 			volatile struct amdgpu_vcn4_fw_shared *fw_shared;
188 
189 			if (adev->vcn.harvest_config & (1 << i))
190 				continue;
191 
192 			fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
193 			fw_shared->present_flag_0 = 0;
194 			fw_shared->sq.is_enabled = 0;
195 		}
196 
197 		drm_dev_exit(idx);
198 	}
199 
200 	if (amdgpu_sriov_vf(adev))
201 		amdgpu_virt_free_mm_table(adev);
202 
203 	r = amdgpu_vcn_suspend(adev);
204 	if (r)
205 		return r;
206 
207 	r = amdgpu_vcn_sw_fini(adev);
208 
209 	return r;
210 }
211 
212 /**
213  * vcn_v4_0_hw_init - start and test VCN block
214  *
215  * @handle: amdgpu_device pointer
216  *
217  * Initialize the hardware, boot up the VCPU and do some testing
218  */
219 static int vcn_v4_0_hw_init(void *handle)
220 {
221 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
222 	struct amdgpu_ring *ring;
223 	int i, r;
224 
225 	if (amdgpu_sriov_vf(adev)) {
226 		r = vcn_v4_0_start_sriov(adev);
227 		if (r)
228 			goto done;
229 
230 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
231 			if (adev->vcn.harvest_config & (1 << i))
232 				continue;
233 
234 			ring = &adev->vcn.inst[i].ring_enc[0];
235 			if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
236 				ring->sched.ready = false;
237 				ring->no_scheduler = true;
238 				dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
239 			} else {
240 				ring->wptr = 0;
241 				ring->wptr_old = 0;
242 				vcn_v4_0_unified_ring_set_wptr(ring);
243 				ring->sched.ready = true;
244 			}
245 		}
246 	} else {
247 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
248 			if (adev->vcn.harvest_config & (1 << i))
249 				continue;
250 
251 			ring = &adev->vcn.inst[i].ring_enc[0];
252 
253 			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
254 					((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
255 
256 			r = amdgpu_ring_test_helper(ring);
257 			if (r)
258 				goto done;
259 
260 		}
261 	}
262 
263 done:
264 	if (!r)
265 		DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
266 			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
267 
268 	return r;
269 }
270 
271 /**
272  * vcn_v4_0_hw_fini - stop the hardware block
273  *
274  * @handle: amdgpu_device pointer
275  *
276  * Stop the VCN block, mark ring as not ready any more
277  */
278 static int vcn_v4_0_hw_fini(void *handle)
279 {
280 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
281 	int i;
282 
283 	cancel_delayed_work_sync(&adev->vcn.idle_work);
284 
285 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
286 		if (adev->vcn.harvest_config & (1 << i))
287 			continue;
288 		if (!amdgpu_sriov_vf(adev)) {
289 			if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
290                         (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
291                                 RREG32_SOC15(VCN, i, regUVD_STATUS))) {
292                         vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
293 			}
294 		}
295 
296 	}
297 
298 	return 0;
299 }
300 
301 /**
302  * vcn_v4_0_suspend - suspend VCN block
303  *
304  * @handle: amdgpu_device pointer
305  *
306  * HW fini and suspend VCN block
307  */
308 static int vcn_v4_0_suspend(void *handle)
309 {
310 	int r;
311 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
312 
313 	r = vcn_v4_0_hw_fini(adev);
314 	if (r)
315 		return r;
316 
317 	r = amdgpu_vcn_suspend(adev);
318 
319 	return r;
320 }
321 
322 /**
323  * vcn_v4_0_resume - resume VCN block
324  *
325  * @handle: amdgpu_device pointer
326  *
327  * Resume firmware and hw init VCN block
328  */
329 static int vcn_v4_0_resume(void *handle)
330 {
331 	int r;
332 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
333 
334 	r = amdgpu_vcn_resume(adev);
335 	if (r)
336 		return r;
337 
338 	r = vcn_v4_0_hw_init(adev);
339 
340 	return r;
341 }
342 
343 /**
344  * vcn_v4_0_mc_resume - memory controller programming
345  *
346  * @adev: amdgpu_device pointer
347  * @inst: instance number
348  *
349  * Let the VCN memory controller know it's offsets
350  */
351 static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
352 {
353 	uint32_t offset, size;
354 	const struct common_firmware_header *hdr;
355 
356 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
357 	size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
358 
359 	/* cache window 0: fw */
360 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
361 		WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
362 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
363 		WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
364 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
365 		WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
366 		offset = 0;
367 	} else {
368 		WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
369 			lower_32_bits(adev->vcn.inst[inst].gpu_addr));
370 		WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
371 			upper_32_bits(adev->vcn.inst[inst].gpu_addr));
372 		offset = size;
373                 WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
374 	}
375 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
376 
377 	/* cache window 1: stack */
378 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
379 		lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
380 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
381 		upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
382 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
383 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
384 
385 	/* cache window 2: context */
386 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
387 		lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
388 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
389 		upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
390 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
391 	WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
392 
393 	/* non-cache window */
394 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
395 		lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
396 	WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
397 		upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
398 	WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
399 	WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
400 		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
401 }
402 
403 /**
404  * vcn_v4_0_mc_resume_dpg_mode - memory controller programming for dpg mode
405  *
406  * @adev: amdgpu_device pointer
407  * @inst_idx: instance number index
408  * @indirect: indirectly write sram
409  *
410  * Let the VCN memory controller know it's offsets with dpg mode
411  */
412 static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
413 {
414 	uint32_t offset, size;
415 	const struct common_firmware_header *hdr;
416 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
417 	size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
418 
419 	/* cache window 0: fw */
420 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
421 		if (!indirect) {
422 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
423 				VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
424 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
425 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
426 				VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
427 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
428 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
429 				VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
430 		} else {
431 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
432 				VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
433 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
434 				VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
435 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
436 				VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
437 		}
438 		offset = 0;
439 	} else {
440 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
441 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
442 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
443 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
444 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
445 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
446 		offset = size;
447 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
448 			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
449 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
450 	}
451 
452 	if (!indirect)
453 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
454 			VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
455 	else
456 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
457 			VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
458 
459 	/* cache window 1: stack */
460 	if (!indirect) {
461 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
462 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
463 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
464 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
465 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
466 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
467 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
468 			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
469 	} else {
470 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
471 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
472 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
473 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
474 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
475 			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
476 	}
477 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
478 			VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
479 
480 	/* cache window 2: context */
481 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
482 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
483 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
484 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
485 			VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
486 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
487 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
488 			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
489 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
490 			VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
491 
492 	/* non-cache window */
493 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
494 			VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
495 			lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
496 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
497 			VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
498 			upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
499 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
500 			VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
501 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
502 			VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
503 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
504 
505 	/* VCN global tiling registers */
506 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
507 		VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
508 }
509 
510 /**
511  * vcn_v4_0_disable_static_power_gating - disable VCN static power gating
512  *
513  * @adev: amdgpu_device pointer
514  * @inst: instance number
515  *
516  * Disable static power gating for VCN block
517  */
518 static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
519 {
520 	uint32_t data = 0;
521 
522 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
523 		data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
524 			| 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
525 			| 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
526 			| 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
527 			| 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
528 			| 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
529 			| 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
530 			| 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
531 			| 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
532 			| 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
533 			| 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
534 			| 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
535 			| 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
536 			| 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
537 
538 		WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
539 		SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS,
540 			UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
541 	} else {
542 		uint32_t value;
543 
544 		value = (inst) ? 0x2200800 : 0;
545 		data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
546 			| 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
547 			| 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
548 			| 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
549 			| 1 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
550 			| 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
551 			| 1 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
552 			| 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
553 			| 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
554 			| 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
555 			| 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
556 			| 1 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
557 			| 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
558 			| 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
559 
560                 WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
561                 SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, value,  0x3F3FFFFF);
562         }
563 
564         data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
565         data &= ~0x103;
566         if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
567                 data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
568                         UVD_POWER_STATUS__UVD_PG_EN_MASK;
569 
570         WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
571 
572         return;
573 }
574 
575 /**
576  * vcn_v4_0_enable_static_power_gating - enable VCN static power gating
577  *
578  * @adev: amdgpu_device pointer
579  * @inst: instance number
580  *
581  * Enable static power gating for VCN block
582  */
583 static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
584 {
585 	uint32_t data;
586 
587 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
588 		/* Before power off, this indicator has to be turned on */
589 		data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
590 		data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
591 		data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
592 		WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
593 
594 		data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
595 			| 2 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
596 			| 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
597 			| 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
598 			| 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
599 			| 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
600 			| 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
601 			| 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
602 			| 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
603 			| 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
604 			| 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
605 			| 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
606 			| 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
607 			| 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
608 		WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
609 
610 		data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
611 			| 2 << UVD_PGFSM_STATUS__UVDS_PWR_STATUS__SHIFT
612 			| 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
613 			| 2 << UVD_PGFSM_STATUS__UVDTC_PWR_STATUS__SHIFT
614 			| 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
615 			| 2 << UVD_PGFSM_STATUS__UVDTA_PWR_STATUS__SHIFT
616 			| 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
617 			| 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
618 			| 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
619 			| 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
620 			| 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
621 			| 2 << UVD_PGFSM_STATUS__UVDTB_PWR_STATUS__SHIFT
622 			| 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
623 			| 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
624 		SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
625 	}
626 
627         return;
628 }
629 
630 /**
631  * vcn_v4_0_disable_clock_gating - disable VCN clock gating
632  *
633  * @adev: amdgpu_device pointer
634  * @inst: instance number
635  *
636  * Disable clock gating for VCN block
637  */
638 static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
639 {
640 	uint32_t data;
641 
642 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
643 		return;
644 
645 	/* VCN disable CGC */
646 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
647 	data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
648 	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
649 	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
650 	WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
651 
652 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_GATE);
653 	data &= ~(UVD_CGC_GATE__SYS_MASK
654 		| UVD_CGC_GATE__UDEC_MASK
655 		| UVD_CGC_GATE__MPEG2_MASK
656 		| UVD_CGC_GATE__REGS_MASK
657 		| UVD_CGC_GATE__RBC_MASK
658 		| UVD_CGC_GATE__LMI_MC_MASK
659 		| UVD_CGC_GATE__LMI_UMC_MASK
660 		| UVD_CGC_GATE__IDCT_MASK
661 		| UVD_CGC_GATE__MPRD_MASK
662 		| UVD_CGC_GATE__MPC_MASK
663 		| UVD_CGC_GATE__LBSI_MASK
664 		| UVD_CGC_GATE__LRBBM_MASK
665 		| UVD_CGC_GATE__UDEC_RE_MASK
666 		| UVD_CGC_GATE__UDEC_CM_MASK
667 		| UVD_CGC_GATE__UDEC_IT_MASK
668 		| UVD_CGC_GATE__UDEC_DB_MASK
669 		| UVD_CGC_GATE__UDEC_MP_MASK
670 		| UVD_CGC_GATE__WCB_MASK
671 		| UVD_CGC_GATE__VCPU_MASK
672 		| UVD_CGC_GATE__MMSCH_MASK);
673 
674 	WREG32_SOC15(VCN, inst, regUVD_CGC_GATE, data);
675 	SOC15_WAIT_ON_RREG(VCN, inst, regUVD_CGC_GATE, 0,  0xFFFFFFFF);
676 
677 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
678 	data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
679 		| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
680 		| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
681 		| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
682 		| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
683 		| UVD_CGC_CTRL__SYS_MODE_MASK
684 		| UVD_CGC_CTRL__UDEC_MODE_MASK
685 		| UVD_CGC_CTRL__MPEG2_MODE_MASK
686 		| UVD_CGC_CTRL__REGS_MODE_MASK
687 		| UVD_CGC_CTRL__RBC_MODE_MASK
688 		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
689 		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
690 		| UVD_CGC_CTRL__IDCT_MODE_MASK
691 		| UVD_CGC_CTRL__MPRD_MODE_MASK
692 		| UVD_CGC_CTRL__MPC_MODE_MASK
693 		| UVD_CGC_CTRL__LBSI_MODE_MASK
694 		| UVD_CGC_CTRL__LRBBM_MODE_MASK
695 		| UVD_CGC_CTRL__WCB_MODE_MASK
696 		| UVD_CGC_CTRL__VCPU_MODE_MASK
697 		| UVD_CGC_CTRL__MMSCH_MODE_MASK);
698 	WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
699 
700 	data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE);
701 	data |= (UVD_SUVD_CGC_GATE__SRE_MASK
702 		| UVD_SUVD_CGC_GATE__SIT_MASK
703 		| UVD_SUVD_CGC_GATE__SMP_MASK
704 		| UVD_SUVD_CGC_GATE__SCM_MASK
705 		| UVD_SUVD_CGC_GATE__SDB_MASK
706 		| UVD_SUVD_CGC_GATE__SRE_H264_MASK
707 		| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
708 		| UVD_SUVD_CGC_GATE__SIT_H264_MASK
709 		| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
710 		| UVD_SUVD_CGC_GATE__SCM_H264_MASK
711 		| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
712 		| UVD_SUVD_CGC_GATE__SDB_H264_MASK
713 		| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
714 		| UVD_SUVD_CGC_GATE__SCLR_MASK
715 		| UVD_SUVD_CGC_GATE__UVD_SC_MASK
716 		| UVD_SUVD_CGC_GATE__ENT_MASK
717 		| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
718 		| UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
719 		| UVD_SUVD_CGC_GATE__SITE_MASK
720 		| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
721 		| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
722 		| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
723 		| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
724 		| UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
725 	WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE, data);
726 
727 	data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
728 	data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
729 		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
730 		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
731 		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
732 		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
733 		| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
734 		| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
735 		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
736 		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
737 		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
738 	WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
739 }
740 
741 /**
742  * vcn_v4_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
743  *
744  * @adev: amdgpu_device pointer
745  * @sram_sel: sram select
746  * @inst_idx: instance number index
747  * @indirect: indirectly write sram
748  *
749  * Disable clock gating for VCN block with dpg mode
750  */
751 static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
752       int inst_idx, uint8_t indirect)
753 {
754 	uint32_t reg_data = 0;
755 
756 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
757 		return;
758 
759 	/* enable sw clock gating control */
760 	reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
761 	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
762 	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
763 	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
764 		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
765 		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
766 		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
767 		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
768 		 UVD_CGC_CTRL__SYS_MODE_MASK |
769 		 UVD_CGC_CTRL__UDEC_MODE_MASK |
770 		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
771 		 UVD_CGC_CTRL__REGS_MODE_MASK |
772 		 UVD_CGC_CTRL__RBC_MODE_MASK |
773 		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
774 		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
775 		 UVD_CGC_CTRL__IDCT_MODE_MASK |
776 		 UVD_CGC_CTRL__MPRD_MODE_MASK |
777 		 UVD_CGC_CTRL__MPC_MODE_MASK |
778 		 UVD_CGC_CTRL__LBSI_MODE_MASK |
779 		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
780 		 UVD_CGC_CTRL__WCB_MODE_MASK |
781 		 UVD_CGC_CTRL__VCPU_MODE_MASK);
782 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
783 		VCN, inst_idx, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
784 
785 	/* turn off clock gating */
786 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
787 		VCN, inst_idx, regUVD_CGC_GATE), 0, sram_sel, indirect);
788 
789 	/* turn on SUVD clock gating */
790 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
791 		VCN, inst_idx, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
792 
793 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
794 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
795 		VCN, inst_idx, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
796 }
797 
798 /**
799  * vcn_v4_0_enable_clock_gating - enable VCN clock gating
800  *
801  * @adev: amdgpu_device pointer
802  * @inst: instance number
803  *
804  * Enable clock gating for VCN block
805  */
806 static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
807 {
808 	uint32_t data;
809 
810 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
811 		return;
812 
813 	/* enable VCN CGC */
814 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
815 	data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
816 	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
817 	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
818 	WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
819 
820 	data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
821 	data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
822 		| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
823 		| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
824 		| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
825 		| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
826 		| UVD_CGC_CTRL__SYS_MODE_MASK
827 		| UVD_CGC_CTRL__UDEC_MODE_MASK
828 		| UVD_CGC_CTRL__MPEG2_MODE_MASK
829 		| UVD_CGC_CTRL__REGS_MODE_MASK
830 		| UVD_CGC_CTRL__RBC_MODE_MASK
831 		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
832 		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
833 		| UVD_CGC_CTRL__IDCT_MODE_MASK
834 		| UVD_CGC_CTRL__MPRD_MODE_MASK
835 		| UVD_CGC_CTRL__MPC_MODE_MASK
836 		| UVD_CGC_CTRL__LBSI_MODE_MASK
837 		| UVD_CGC_CTRL__LRBBM_MODE_MASK
838 		| UVD_CGC_CTRL__WCB_MODE_MASK
839 		| UVD_CGC_CTRL__VCPU_MODE_MASK
840 		| UVD_CGC_CTRL__MMSCH_MODE_MASK);
841 	WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
842 
843 	data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
844 	data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
845 		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
846 		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
847 		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
848 		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
849 		| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
850 		| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
851 		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
852 		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
853 		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
854 	WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
855 
856 	return;
857 }
858 
859 /**
860  * vcn_v4_0_start_dpg_mode - VCN start with dpg mode
861  *
862  * @adev: amdgpu_device pointer
863  * @inst_idx: instance number index
864  * @indirect: indirectly write sram
865  *
866  * Start VCN block with dpg mode
867  */
868 static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
869 {
870 	volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
871 	struct amdgpu_ring *ring;
872 	uint32_t tmp;
873 
874 	/* disable register anti-hang mechanism */
875 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
876 		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
877 	/* enable dynamic power gating mode */
878 	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
879 	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
880 	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
881 	WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
882 
883 	if (indirect)
884 		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
885 
886 	/* enable clock gating */
887 	vcn_v4_0_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
888 
889 	/* enable VCPU clock */
890 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
891 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
892 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
893 		VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
894 
895 	/* disable master interupt */
896 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
897 		VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
898 
899 	/* setup regUVD_LMI_CTRL */
900 	tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
901 		UVD_LMI_CTRL__REQ_MODE_MASK |
902 		UVD_LMI_CTRL__CRC_RESET_MASK |
903 		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
904 		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
905 		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
906 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
907 		0x00100000L);
908 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
909 		VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
910 
911 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
912 		VCN, inst_idx, regUVD_MPC_CNTL),
913 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
914 
915 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
916 		VCN, inst_idx, regUVD_MPC_SET_MUXA0),
917 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
918 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
919 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
920 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
921 
922 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
923 		VCN, inst_idx, regUVD_MPC_SET_MUXB0),
924 		 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
925 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
926 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
927 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
928 
929 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
930 		VCN, inst_idx, regUVD_MPC_SET_MUX),
931 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
932 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
933 		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
934 
935 	vcn_v4_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
936 
937 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
938 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
939 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
940 		VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
941 
942 	/* enable LMI MC and UMC channels */
943 	tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
944 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
945 		VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
946 
947 	/* enable master interrupt */
948 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
949 		VCN, inst_idx, regUVD_MASTINT_EN),
950 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
951 
952 
953 	if (indirect)
954 		psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
955 			(uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
956 				(uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
957 
958 	ring = &adev->vcn.inst[inst_idx].ring_enc[0];
959 
960 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
961 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
962 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
963 
964 	tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
965 	tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
966 	WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
967 	fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
968 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
969 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
970 
971 	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
972 	WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
973 	ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
974 
975 	tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
976 	tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
977 	WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
978 	fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
979 
980 	WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
981 			ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
982 			VCN_RB1_DB_CTRL__EN_MASK);
983 
984 	return 0;
985 }
986 
987 
988 /**
989  * vcn_v4_0_start - VCN start
990  *
991  * @adev: amdgpu_device pointer
992  *
993  * Start VCN block
994  */
995 static int vcn_v4_0_start(struct amdgpu_device *adev)
996 {
997 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
998 	struct amdgpu_ring *ring;
999 	uint32_t tmp;
1000 	int i, j, k, r;
1001 
1002 	if (adev->pm.dpm_enabled)
1003 		amdgpu_dpm_enable_uvd(adev, true);
1004 
1005 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1006 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1007 
1008 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1009 			r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
1010 			continue;
1011 		}
1012 
1013 		/* disable VCN power gating */
1014 		vcn_v4_0_disable_static_power_gating(adev, i);
1015 
1016 		/* set VCN status busy */
1017 		tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
1018 		WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
1019 
1020 		/*SW clock gating */
1021 		vcn_v4_0_disable_clock_gating(adev, i);
1022 
1023 		/* enable VCPU clock */
1024 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1025 				UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
1026 
1027 		/* disable master interrupt */
1028 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
1029 				~UVD_MASTINT_EN__VCPU_EN_MASK);
1030 
1031 		/* enable LMI MC and UMC channels */
1032 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
1033 				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1034 
1035 		tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1036 		tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1037 		tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1038 		WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1039 
1040 		/* setup regUVD_LMI_CTRL */
1041 		tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
1042 		WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
1043 				UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1044 				UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1045 				UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1046 				UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1047 
1048 		/* setup regUVD_MPC_CNTL */
1049 		tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
1050 		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1051 		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1052 		WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
1053 
1054 		/* setup UVD_MPC_SET_MUXA0 */
1055 		WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
1056 				((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1057 				 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1058 				 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1059 				 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1060 
1061 		/* setup UVD_MPC_SET_MUXB0 */
1062 		WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
1063 				((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1064 				 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1065 				 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1066 				 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1067 
1068 		/* setup UVD_MPC_SET_MUX */
1069 		WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
1070 				((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1071 				 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1072 				 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1073 
1074 		vcn_v4_0_mc_resume(adev, i);
1075 
1076 		/* VCN global tiling registers */
1077 		WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
1078 				adev->gfx.config.gb_addr_config);
1079 
1080 		/* unblock VCPU register access */
1081 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
1082 				~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1083 
1084 		/* release VCPU reset to boot */
1085 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1086 				~UVD_VCPU_CNTL__BLK_RST_MASK);
1087 
1088 		for (j = 0; j < 10; ++j) {
1089 			uint32_t status;
1090 
1091 			for (k = 0; k < 100; ++k) {
1092 				status = RREG32_SOC15(VCN, i, regUVD_STATUS);
1093 				if (status & 2)
1094 					break;
1095 				mdelay(10);
1096 				if (amdgpu_emu_mode==1)
1097 					drm_msleep(1);
1098 			}
1099 
1100 			if (amdgpu_emu_mode==1) {
1101 				r = -1;
1102 				if (status & 2) {
1103 					r = 0;
1104 					break;
1105 				}
1106 			} else {
1107 				r = 0;
1108 				if (status & 2)
1109 					break;
1110 
1111 				dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
1112 				WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1113 							UVD_VCPU_CNTL__BLK_RST_MASK,
1114 							~UVD_VCPU_CNTL__BLK_RST_MASK);
1115 				mdelay(10);
1116 				WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1117 						~UVD_VCPU_CNTL__BLK_RST_MASK);
1118 
1119 				mdelay(10);
1120 				r = -1;
1121 			}
1122 		}
1123 
1124 		if (r) {
1125 			dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
1126 			return r;
1127 		}
1128 
1129 		/* enable master interrupt */
1130 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
1131 				UVD_MASTINT_EN__VCPU_EN_MASK,
1132 				~UVD_MASTINT_EN__VCPU_EN_MASK);
1133 
1134 		/* clear the busy bit of VCN_STATUS */
1135 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
1136 				~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1137 
1138 		ring = &adev->vcn.inst[i].ring_enc[0];
1139 		WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
1140 				ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
1141 				VCN_RB1_DB_CTRL__EN_MASK);
1142 
1143 		WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
1144 		WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1145 		WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
1146 
1147 		tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
1148 		tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
1149 		WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
1150 		fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
1151 		WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
1152 		WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
1153 
1154 		tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
1155 		WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
1156 		ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
1157 
1158 		tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
1159 		tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
1160 		WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
1161 		fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
1162 	}
1163 
1164 	return 0;
1165 }
1166 
1167 static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
1168 {
1169 	int i;
1170 	struct amdgpu_ring *ring_enc;
1171 	uint64_t cache_addr;
1172 	uint64_t rb_enc_addr;
1173 	uint64_t ctx_addr;
1174 	uint32_t param, resp, expected;
1175 	uint32_t offset, cache_size;
1176 	uint32_t tmp, timeout;
1177 
1178 	struct amdgpu_mm_table *table = &adev->virt.mm_table;
1179 	uint32_t *table_loc;
1180 	uint32_t table_size;
1181 	uint32_t size, size_dw;
1182 	uint32_t init_status;
1183 	uint32_t enabled_vcn;
1184 
1185 	struct mmsch_v4_0_cmd_direct_write
1186 		direct_wt = { {0} };
1187 	struct mmsch_v4_0_cmd_direct_read_modify_write
1188 		direct_rd_mod_wt = { {0} };
1189 	struct mmsch_v4_0_cmd_end end = { {0} };
1190 	struct mmsch_v4_0_init_header header;
1191 
1192 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1193 	volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
1194 
1195 	direct_wt.cmd_header.command_type =
1196 		MMSCH_COMMAND__DIRECT_REG_WRITE;
1197 	direct_rd_mod_wt.cmd_header.command_type =
1198 		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
1199 	end.cmd_header.command_type =
1200 		MMSCH_COMMAND__END;
1201 
1202 	header.version = MMSCH_VERSION;
1203 	header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
1204 	for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
1205 		header.inst[i].init_status = 0;
1206 		header.inst[i].table_offset = 0;
1207 		header.inst[i].table_size = 0;
1208 	}
1209 
1210 	table_loc = (uint32_t *)table->cpu_addr;
1211 	table_loc += header.total_size;
1212 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
1213 		if (adev->vcn.harvest_config & (1 << i))
1214 			continue;
1215 
1216 		table_size = 0;
1217 
1218 		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
1219 			regUVD_STATUS),
1220 			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
1221 
1222 		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
1223 
1224 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1225 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1226 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1227 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
1228 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1229 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1230 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
1231 			offset = 0;
1232 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1233 				regUVD_VCPU_CACHE_OFFSET0),
1234 				0);
1235 		} else {
1236 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1237 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1238 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
1239 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1240 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1241 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
1242 			offset = cache_size;
1243 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1244 				regUVD_VCPU_CACHE_OFFSET0),
1245 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
1246 		}
1247 
1248 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1249 			regUVD_VCPU_CACHE_SIZE0),
1250 			cache_size);
1251 
1252 		cache_addr = adev->vcn.inst[i].gpu_addr + offset;
1253 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1254 			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
1255 			lower_32_bits(cache_addr));
1256 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1257 			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
1258 			upper_32_bits(cache_addr));
1259 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1260 			regUVD_VCPU_CACHE_OFFSET1),
1261 			0);
1262 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1263 			regUVD_VCPU_CACHE_SIZE1),
1264 			AMDGPU_VCN_STACK_SIZE);
1265 
1266 		cache_addr = adev->vcn.inst[i].gpu_addr + offset +
1267 			AMDGPU_VCN_STACK_SIZE;
1268 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1269 			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
1270 			lower_32_bits(cache_addr));
1271 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1272 			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
1273 			upper_32_bits(cache_addr));
1274 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1275 			regUVD_VCPU_CACHE_OFFSET2),
1276 			0);
1277 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1278 			regUVD_VCPU_CACHE_SIZE2),
1279 			AMDGPU_VCN_CONTEXT_SIZE);
1280 
1281 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1282 		rb_setup = &fw_shared->rb_setup;
1283 
1284 		ring_enc = &adev->vcn.inst[i].ring_enc[0];
1285 		ring_enc->wptr = 0;
1286 		rb_enc_addr = ring_enc->gpu_addr;
1287 
1288 		rb_setup->is_rb_enabled_flags |= RB_ENABLED;
1289 		rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
1290 		rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
1291 		rb_setup->rb_size = ring_enc->ring_size / 4;
1292 		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
1293 
1294 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1295 			regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
1296 			lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
1297 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1298 			regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
1299 			upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
1300 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1301 			regUVD_VCPU_NONCACHE_SIZE0),
1302 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
1303 
1304 		/* add end packet */
1305 		MMSCH_V4_0_INSERT_END();
1306 
1307 		/* refine header */
1308 		header.inst[i].init_status = 0;
1309 		header.inst[i].table_offset = header.total_size;
1310 		header.inst[i].table_size = table_size;
1311 		header.total_size += table_size;
1312 	}
1313 
1314 	/* Update init table header in memory */
1315 	size = sizeof(struct mmsch_v4_0_init_header);
1316 	table_loc = (uint32_t *)table->cpu_addr;
1317 	memcpy((void *)table_loc, &header, size);
1318 
1319 	/* message MMSCH (in VCN[0]) to initialize this client
1320 	 * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
1321 	 * of memory descriptor location
1322 	 */
1323 	ctx_addr = table->gpu_addr;
1324 	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
1325 	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
1326 
1327 	/* 2, update vmid of descriptor */
1328 	tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
1329 	tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1330 	/* use domain0 for MM scheduler */
1331 	tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1332 	WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
1333 
1334 	/* 3, notify mmsch about the size of this descriptor */
1335 	size = header.total_size;
1336 	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
1337 
1338 	/* 4, set resp to zero */
1339 	WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
1340 
1341 	/* 5, kick off the initialization and wait until
1342 	 * MMSCH_VF_MAILBOX_RESP becomes non-zero
1343 	 */
1344 	param = 0x00000001;
1345 	WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
1346 	tmp = 0;
1347 	timeout = 1000;
1348 	resp = 0;
1349 	expected = MMSCH_VF_MAILBOX_RESP__OK;
1350 	while (resp != expected) {
1351 		resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
1352 		if (resp != 0)
1353 			break;
1354 
1355 		udelay(10);
1356 		tmp = tmp + 10;
1357 		if (tmp >= timeout) {
1358 			DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1359 				" waiting for regMMSCH_VF_MAILBOX_RESP "\
1360 				"(expected=0x%08x, readback=0x%08x)\n",
1361 				tmp, expected, resp);
1362 			return -EBUSY;
1363 		}
1364 	}
1365 	enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
1366 	init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status;
1367 	if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
1368 	&& init_status != MMSCH_VF_ENGINE_STATUS__PASS)
1369 		DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
1370 			"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
1371 
1372 	return 0;
1373 }
1374 
1375 /**
1376  * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
1377  *
1378  * @adev: amdgpu_device pointer
1379  * @inst_idx: instance number index
1380  *
1381  * Stop VCN block with dpg mode
1382  */
1383 static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1384 {
1385 	uint32_t tmp;
1386 
1387 	/* Wait for power status to be 1 */
1388 	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
1389 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1390 
1391 	/* wait for read ptr to be equal to write ptr */
1392 	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
1393 	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1394 
1395 	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
1396 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1397 
1398 	/* disable dynamic power gating mode */
1399 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
1400 		~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1401 }
1402 
1403 /**
1404  * vcn_v4_0_stop - VCN stop
1405  *
1406  * @adev: amdgpu_device pointer
1407  *
1408  * Stop VCN block
1409  */
1410 static int vcn_v4_0_stop(struct amdgpu_device *adev)
1411 {
1412 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1413 	uint32_t tmp;
1414 	int i, r = 0;
1415 
1416 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1417 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1418 		fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
1419 
1420 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1421 			vcn_v4_0_stop_dpg_mode(adev, i);
1422 			continue;
1423 		}
1424 
1425 		/* wait for vcn idle */
1426 		r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1427 		if (r)
1428 			return r;
1429 
1430 		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1431 			UVD_LMI_STATUS__READ_CLEAN_MASK |
1432 			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1433 			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1434 		r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
1435 		if (r)
1436 			return r;
1437 
1438 		/* disable LMI UMC channel */
1439 		tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
1440 		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1441 		WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
1442 		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
1443 			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1444 		r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
1445 		if (r)
1446 			return r;
1447 
1448 		/* block VCPU register access */
1449 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
1450 				UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1451 				~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1452 
1453 		/* reset VCPU */
1454 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
1455 				UVD_VCPU_CNTL__BLK_RST_MASK,
1456 				~UVD_VCPU_CNTL__BLK_RST_MASK);
1457 
1458 		/* disable VCPU clock */
1459 		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
1460 				~(UVD_VCPU_CNTL__CLK_EN_MASK));
1461 
1462 		/* apply soft reset */
1463 		tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1464 		tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1465 		WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1466 		tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
1467 		tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1468 		WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
1469 
1470 		/* clear status */
1471 		WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
1472 
1473 		/* apply HW clock gating */
1474 		vcn_v4_0_enable_clock_gating(adev, i);
1475 
1476 		/* enable VCN power gating */
1477 		vcn_v4_0_enable_static_power_gating(adev, i);
1478 	}
1479 
1480 	if (adev->pm.dpm_enabled)
1481 		amdgpu_dpm_enable_uvd(adev, false);
1482 
1483 	return 0;
1484 }
1485 
1486 /**
1487  * vcn_v4_0_pause_dpg_mode - VCN pause with dpg mode
1488  *
1489  * @adev: amdgpu_device pointer
1490  * @inst_idx: instance number index
1491  * @new_state: pause state
1492  *
1493  * Pause dpg mode for VCN block
1494  */
1495 static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
1496       struct dpg_pause_state *new_state)
1497 {
1498 	uint32_t reg_data = 0;
1499 	int ret_code;
1500 
1501 	/* pause/unpause if state is changed */
1502 	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1503 		DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
1504 			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
1505 		reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
1506 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1507 
1508 		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1509 			ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
1510 				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1511 
1512 			if (!ret_code) {
1513 				/* pause DPG */
1514 				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1515 				WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
1516 
1517 				/* wait for ACK */
1518 				SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
1519 					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1520 					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1521 
1522 				SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS,
1523 					UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1524 			}
1525 		} else {
1526 			/* unpause dpg, no need to wait */
1527 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1528 			WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
1529 		}
1530 		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1531 	}
1532 
1533 	return 0;
1534 }
1535 
1536 /**
1537  * vcn_v4_0_unified_ring_get_rptr - get unified read pointer
1538  *
1539  * @ring: amdgpu_ring pointer
1540  *
1541  * Returns the current hardware unified read pointer
1542  */
1543 static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
1544 {
1545 	struct amdgpu_device *adev = ring->adev;
1546 
1547 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1548 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1549 
1550 	return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
1551 }
1552 
1553 /**
1554  * vcn_v4_0_unified_ring_get_wptr - get unified write pointer
1555  *
1556  * @ring: amdgpu_ring pointer
1557  *
1558  * Returns the current hardware unified write pointer
1559  */
1560 static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
1561 {
1562 	struct amdgpu_device *adev = ring->adev;
1563 
1564 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1565 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1566 
1567 	if (ring->use_doorbell)
1568 		return *ring->wptr_cpu_addr;
1569 	else
1570 		return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
1571 }
1572 
1573 /**
1574  * vcn_v4_0_unified_ring_set_wptr - set enc write pointer
1575  *
1576  * @ring: amdgpu_ring pointer
1577  *
1578  * Commits the enc write pointer to the hardware
1579  */
1580 static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
1581 {
1582 	struct amdgpu_device *adev = ring->adev;
1583 
1584 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1585 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1586 
1587 	if (ring->use_doorbell) {
1588 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1589 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1590 	} else {
1591 		WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
1592 	}
1593 }
1594 
1595 static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
1596 				struct amdgpu_job *job)
1597 {
1598 	struct drm_gpu_scheduler **scheds;
1599 
1600 	/* The create msg must be in the first IB submitted */
1601 	if (atomic_read(&job->base.entity->fence_seq))
1602 		return -EINVAL;
1603 
1604 	scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
1605 		[AMDGPU_RING_PRIO_0].sched;
1606 	drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
1607 	return 0;
1608 }
1609 
1610 static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
1611 			    uint64_t addr)
1612 {
1613 	struct ttm_operation_ctx ctx = { false, false };
1614 	struct amdgpu_bo_va_mapping *map;
1615 	uint32_t *msg, num_buffers;
1616 	struct amdgpu_bo *bo;
1617 	uint64_t start, end;
1618 	unsigned int i;
1619 	void *ptr;
1620 	int r;
1621 
1622 	addr &= AMDGPU_GMC_HOLE_MASK;
1623 	r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
1624 	if (r) {
1625 		DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
1626 		return r;
1627 	}
1628 
1629 	start = map->start * AMDGPU_GPU_PAGE_SIZE;
1630 	end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
1631 	if (addr & 0x7) {
1632 		DRM_ERROR("VCN messages must be 8 byte aligned!\n");
1633 		return -EINVAL;
1634 	}
1635 
1636 	bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
1637 	amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
1638 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1639 	if (r) {
1640 		DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
1641 		return r;
1642 	}
1643 
1644 	r = amdgpu_bo_kmap(bo, &ptr);
1645 	if (r) {
1646 		DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
1647 		return r;
1648 	}
1649 
1650 	msg = ptr + addr - start;
1651 
1652 	/* Check length */
1653 	if (msg[1] > end - addr) {
1654 		r = -EINVAL;
1655 		goto out;
1656 	}
1657 
1658 	if (msg[3] != RDECODE_MSG_CREATE)
1659 		goto out;
1660 
1661 	num_buffers = msg[2];
1662 	for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
1663 		uint32_t offset, size, *create;
1664 
1665 		if (msg[0] != RDECODE_MESSAGE_CREATE)
1666 			continue;
1667 
1668 		offset = msg[1];
1669 		size = msg[2];
1670 
1671 		if (offset + size > end) {
1672 			r = -EINVAL;
1673 			goto out;
1674 		}
1675 
1676 		create = ptr + addr + offset - start;
1677 
1678 		/* H246, HEVC and VP9 can run on any instance */
1679 		if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
1680 			continue;
1681 
1682 		r = vcn_v4_0_limit_sched(p, job);
1683 		if (r)
1684 			goto out;
1685 	}
1686 
1687 out:
1688 	amdgpu_bo_kunmap(bo);
1689 	return r;
1690 }
1691 
1692 #define RADEON_VCN_ENGINE_TYPE_DECODE                                 (0x00000003)
1693 
1694 static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
1695 					   struct amdgpu_job *job,
1696 					   struct amdgpu_ib *ib)
1697 {
1698 	struct amdgpu_ring *ring = amdgpu_job_ring(job);
1699 	struct amdgpu_vcn_decode_buffer *decode_buffer;
1700 	uint64_t addr;
1701 	uint32_t val;
1702 
1703 	/* The first instance can decode anything */
1704 	if (!ring->me)
1705 		return 0;
1706 
1707 	/* unified queue ib header has 8 double words. */
1708 	if (ib->length_dw < 8)
1709 		return 0;
1710 
1711 	val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
1712 	if (val != RADEON_VCN_ENGINE_TYPE_DECODE)
1713 		return 0;
1714 
1715 	decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
1716 
1717 	if (!(decode_buffer->valid_buf_flag  & 0x1))
1718 		return 0;
1719 
1720 	addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
1721 		decode_buffer->msg_buffer_address_lo;
1722 	return vcn_v4_0_dec_msg(p, job, addr);
1723 }
1724 
1725 static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
1726 	.type = AMDGPU_RING_TYPE_VCN_ENC,
1727 	.align_mask = 0x3f,
1728 	.nop = VCN_ENC_CMD_NO_OP,
1729 	.vmhub = AMDGPU_MMHUB_0,
1730 	.get_rptr = vcn_v4_0_unified_ring_get_rptr,
1731 	.get_wptr = vcn_v4_0_unified_ring_get_wptr,
1732 	.set_wptr = vcn_v4_0_unified_ring_set_wptr,
1733 	.patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,
1734 	.emit_frame_size =
1735 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1736 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1737 		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1738 		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1739 		1, /* vcn_v2_0_enc_ring_insert_end */
1740 	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1741 	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
1742 	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
1743 	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1744 	.test_ring = amdgpu_vcn_enc_ring_test_ring,
1745 	.test_ib = amdgpu_vcn_unified_ring_test_ib,
1746 	.insert_nop = amdgpu_ring_insert_nop,
1747 	.insert_end = vcn_v2_0_enc_ring_insert_end,
1748 	.pad_ib = amdgpu_ring_generic_pad_ib,
1749 	.begin_use = amdgpu_vcn_ring_begin_use,
1750 	.end_use = amdgpu_vcn_ring_end_use,
1751 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1752 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1753 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1754 };
1755 
1756 /**
1757  * vcn_v4_0_set_unified_ring_funcs - set unified ring functions
1758  *
1759  * @adev: amdgpu_device pointer
1760  *
1761  * Set unified ring functions
1762  */
1763 static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
1764 {
1765 	int i;
1766 
1767 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1768 		if (adev->vcn.harvest_config & (1 << i))
1769 			continue;
1770 
1771 		adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs;
1772 		adev->vcn.inst[i].ring_enc[0].me = i;
1773 
1774 		DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
1775 	}
1776 }
1777 
1778 /**
1779  * vcn_v4_0_is_idle - check VCN block is idle
1780  *
1781  * @handle: amdgpu_device pointer
1782  *
1783  * Check whether VCN block is idle
1784  */
1785 static bool vcn_v4_0_is_idle(void *handle)
1786 {
1787 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1788 	int i, ret = 1;
1789 
1790 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1791 		if (adev->vcn.harvest_config & (1 << i))
1792 			continue;
1793 
1794 		ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
1795 	}
1796 
1797 	return ret;
1798 }
1799 
1800 /**
1801  * vcn_v4_0_wait_for_idle - wait for VCN block idle
1802  *
1803  * @handle: amdgpu_device pointer
1804  *
1805  * Wait for VCN block idle
1806  */
1807 static int vcn_v4_0_wait_for_idle(void *handle)
1808 {
1809 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1810 	int i, ret = 0;
1811 
1812 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1813 		if (adev->vcn.harvest_config & (1 << i))
1814 			continue;
1815 
1816 		ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
1817 			UVD_STATUS__IDLE);
1818 		if (ret)
1819 			return ret;
1820 	}
1821 
1822 	return ret;
1823 }
1824 
1825 /**
1826  * vcn_v4_0_set_clockgating_state - set VCN block clockgating state
1827  *
1828  * @handle: amdgpu_device pointer
1829  * @state: clock gating state
1830  *
1831  * Set VCN block clockgating state
1832  */
1833 static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
1834 {
1835 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1836 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
1837 	int i;
1838 
1839 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1840 		if (adev->vcn.harvest_config & (1 << i))
1841 			continue;
1842 
1843 		if (enable) {
1844 			if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
1845 				return -EBUSY;
1846 			vcn_v4_0_enable_clock_gating(adev, i);
1847 		} else {
1848 			vcn_v4_0_disable_clock_gating(adev, i);
1849 		}
1850 	}
1851 
1852 	return 0;
1853 }
1854 
1855 /**
1856  * vcn_v4_0_set_powergating_state - set VCN block powergating state
1857  *
1858  * @handle: amdgpu_device pointer
1859  * @state: power gating state
1860  *
1861  * Set VCN block powergating state
1862  */
1863 static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state)
1864 {
1865 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1866 	int ret;
1867 
1868 	/* for SRIOV, guest should not control VCN Power-gating
1869 	 * MMSCH FW should control Power-gating and clock-gating
1870 	 * guest should avoid touching CGC and PG
1871 	 */
1872 	if (amdgpu_sriov_vf(adev)) {
1873 		adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
1874 		return 0;
1875 	}
1876 
1877 	if(state == adev->vcn.cur_state)
1878 		return 0;
1879 
1880 	if (state == AMD_PG_STATE_GATE)
1881 		ret = vcn_v4_0_stop(adev);
1882 	else
1883 		ret = vcn_v4_0_start(adev);
1884 
1885 	if(!ret)
1886 		adev->vcn.cur_state = state;
1887 
1888 	return ret;
1889 }
1890 
1891 /**
1892  * vcn_v4_0_set_interrupt_state - set VCN block interrupt state
1893  *
1894  * @adev: amdgpu_device pointer
1895  * @source: interrupt sources
1896  * @type: interrupt types
1897  * @state: interrupt states
1898  *
1899  * Set VCN block interrupt state
1900  */
1901 static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
1902       unsigned type, enum amdgpu_interrupt_state state)
1903 {
1904 	return 0;
1905 }
1906 
1907 /**
1908  * vcn_v4_0_process_interrupt - process VCN block interrupt
1909  *
1910  * @adev: amdgpu_device pointer
1911  * @source: interrupt sources
1912  * @entry: interrupt entry from clients and sources
1913  *
1914  * Process VCN block interrupt
1915  */
1916 static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
1917       struct amdgpu_iv_entry *entry)
1918 {
1919 	uint32_t ip_instance;
1920 
1921 	switch (entry->client_id) {
1922 	case SOC15_IH_CLIENTID_VCN:
1923 		ip_instance = 0;
1924 		break;
1925 	case SOC15_IH_CLIENTID_VCN1:
1926 		ip_instance = 1;
1927 		break;
1928 	default:
1929 		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
1930 		return 0;
1931 	}
1932 
1933 	DRM_DEBUG("IH: VCN TRAP\n");
1934 
1935 	switch (entry->src_id) {
1936 	case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1937 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
1938 		break;
1939 	default:
1940 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1941 			  entry->src_id, entry->src_data[0]);
1942 		break;
1943 	}
1944 
1945 	return 0;
1946 }
1947 
1948 static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
1949 	.set = vcn_v4_0_set_interrupt_state,
1950 	.process = vcn_v4_0_process_interrupt,
1951 };
1952 
1953 /**
1954  * vcn_v4_0_set_irq_funcs - set VCN block interrupt irq functions
1955  *
1956  * @adev: amdgpu_device pointer
1957  *
1958  * Set VCN block interrupt irq functions
1959  */
1960 static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1961 {
1962 	int i;
1963 
1964 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1965 		if (adev->vcn.harvest_config & (1 << i))
1966 			continue;
1967 
1968 		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
1969 		adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs;
1970 	}
1971 }
1972 
1973 static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
1974 	.name = "vcn_v4_0",
1975 	.early_init = vcn_v4_0_early_init,
1976 	.late_init = NULL,
1977 	.sw_init = vcn_v4_0_sw_init,
1978 	.sw_fini = vcn_v4_0_sw_fini,
1979 	.hw_init = vcn_v4_0_hw_init,
1980 	.hw_fini = vcn_v4_0_hw_fini,
1981 	.suspend = vcn_v4_0_suspend,
1982 	.resume = vcn_v4_0_resume,
1983 	.is_idle = vcn_v4_0_is_idle,
1984 	.wait_for_idle = vcn_v4_0_wait_for_idle,
1985 	.check_soft_reset = NULL,
1986 	.pre_soft_reset = NULL,
1987 	.soft_reset = NULL,
1988 	.post_soft_reset = NULL,
1989 	.set_clockgating_state = vcn_v4_0_set_clockgating_state,
1990 	.set_powergating_state = vcn_v4_0_set_powergating_state,
1991 };
1992 
1993 const struct amdgpu_ip_block_version vcn_v4_0_ip_block =
1994 {
1995 	.type = AMD_IP_BLOCK_TYPE_VCN,
1996 	.major = 4,
1997 	.minor = 0,
1998 	.rev = 0,
1999 	.funcs = &vcn_v4_0_ip_funcs,
2000 };
2001