1 /* $NetBSD: amdgpu_vce_v3_0.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $ */
2
3 /*
4 * Copyright 2014 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 * Authors: Christian König <christian.koenig@amd.com>
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce_v3_0.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $");
32
33 #include <linux/firmware.h>
34
35 #include "amdgpu.h"
36 #include "amdgpu_vce.h"
37 #include "vid.h"
38 #include "vce/vce_3_0_d.h"
39 #include "vce/vce_3_0_sh_mask.h"
40 #include "oss/oss_3_0_d.h"
41 #include "oss/oss_3_0_sh_mask.h"
42 #include "gca/gfx_8_0_d.h"
43 #include "smu/smu_7_1_2_d.h"
44 #include "smu/smu_7_1_2_sh_mask.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "ivsrcid/ivsrcid_vislands30.h"
47
48
49 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04
50 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10
51 #define GRBM_GFX_INDEX__VCE_ALL_PIPE 0x07
52
53 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
54 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
55 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
56 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
57
58 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
59
60 #define VCE_V3_0_FW_SIZE (384 * 1024)
61 #define VCE_V3_0_STACK_SIZE (64 * 1024)
62 #define VCE_V3_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
63
64 #define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
65
66 #define GET_VCE_INSTANCE(i) ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
67 | GRBM_GFX_INDEX__VCE_ALL_PIPE)
68
69 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
70 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
71 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
72 static int vce_v3_0_wait_for_idle(void *handle);
73 static int vce_v3_0_set_clockgating_state(void *handle,
74 enum amd_clockgating_state state);
75 /**
76 * vce_v3_0_ring_get_rptr - get read pointer
77 *
78 * @ring: amdgpu_ring pointer
79 *
80 * Returns the current hardware read pointer
81 */
vce_v3_0_ring_get_rptr(struct amdgpu_ring * ring)82 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
83 {
84 struct amdgpu_device *adev = ring->adev;
85 u32 v;
86
87 mutex_lock(&adev->grbm_idx_mutex);
88 if (adev->vce.harvest_config == 0 ||
89 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
90 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
91 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
92 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
93
94 if (ring->me == 0)
95 v = RREG32(mmVCE_RB_RPTR);
96 else if (ring->me == 1)
97 v = RREG32(mmVCE_RB_RPTR2);
98 else
99 v = RREG32(mmVCE_RB_RPTR3);
100
101 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
102 mutex_unlock(&adev->grbm_idx_mutex);
103
104 return v;
105 }
106
107 /**
108 * vce_v3_0_ring_get_wptr - get write pointer
109 *
110 * @ring: amdgpu_ring pointer
111 *
112 * Returns the current hardware write pointer
113 */
vce_v3_0_ring_get_wptr(struct amdgpu_ring * ring)114 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
115 {
116 struct amdgpu_device *adev = ring->adev;
117 u32 v;
118
119 mutex_lock(&adev->grbm_idx_mutex);
120 if (adev->vce.harvest_config == 0 ||
121 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
122 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
123 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
124 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
125
126 if (ring->me == 0)
127 v = RREG32(mmVCE_RB_WPTR);
128 else if (ring->me == 1)
129 v = RREG32(mmVCE_RB_WPTR2);
130 else
131 v = RREG32(mmVCE_RB_WPTR3);
132
133 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
134 mutex_unlock(&adev->grbm_idx_mutex);
135
136 return v;
137 }
138
139 /**
140 * vce_v3_0_ring_set_wptr - set write pointer
141 *
142 * @ring: amdgpu_ring pointer
143 *
144 * Commits the write pointer to the hardware
145 */
vce_v3_0_ring_set_wptr(struct amdgpu_ring * ring)146 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
147 {
148 struct amdgpu_device *adev = ring->adev;
149
150 mutex_lock(&adev->grbm_idx_mutex);
151 if (adev->vce.harvest_config == 0 ||
152 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
153 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
154 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
155 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
156
157 if (ring->me == 0)
158 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
159 else if (ring->me == 1)
160 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
161 else
162 WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
163
164 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
165 mutex_unlock(&adev->grbm_idx_mutex);
166 }
167
vce_v3_0_override_vce_clock_gating(struct amdgpu_device * adev,bool override)168 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
169 {
170 WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
171 }
172
vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device * adev,bool gated)173 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
174 bool gated)
175 {
176 u32 data;
177
178 /* Set Override to disable Clock Gating */
179 vce_v3_0_override_vce_clock_gating(adev, true);
180
181 /* This function enables MGCG which is controlled by firmware.
182 With the clocks in the gated state the core is still
183 accessible but the firmware will throttle the clocks on the
184 fly as necessary.
185 */
186 if (!gated) {
187 data = RREG32(mmVCE_CLOCK_GATING_B);
188 data |= 0x1ff;
189 data &= ~0xef0000;
190 WREG32(mmVCE_CLOCK_GATING_B, data);
191
192 data = RREG32(mmVCE_UENC_CLOCK_GATING);
193 data |= 0x3ff000;
194 data &= ~0xffc00000;
195 WREG32(mmVCE_UENC_CLOCK_GATING, data);
196
197 data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
198 data |= 0x2;
199 data &= ~0x00010000;
200 WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
201
202 data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
203 data |= 0x37f;
204 WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
205
206 data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
207 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
208 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
209 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
210 0x8;
211 WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
212 } else {
213 data = RREG32(mmVCE_CLOCK_GATING_B);
214 data &= ~0x80010;
215 data |= 0xe70008;
216 WREG32(mmVCE_CLOCK_GATING_B, data);
217
218 data = RREG32(mmVCE_UENC_CLOCK_GATING);
219 data |= 0xffc00000;
220 WREG32(mmVCE_UENC_CLOCK_GATING, data);
221
222 data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
223 data |= 0x10000;
224 WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
225
226 data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
227 data &= ~0x3ff;
228 WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
229
230 data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
231 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
232 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
233 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
234 0x8);
235 WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
236 }
237 vce_v3_0_override_vce_clock_gating(adev, false);
238 }
239
vce_v3_0_firmware_loaded(struct amdgpu_device * adev)240 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
241 {
242 int i, j;
243
244 for (i = 0; i < 10; ++i) {
245 for (j = 0; j < 100; ++j) {
246 uint32_t status = RREG32(mmVCE_STATUS);
247
248 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
249 return 0;
250 mdelay(10);
251 }
252
253 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
254 WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
255 mdelay(10);
256 WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
257 mdelay(10);
258 }
259
260 return -ETIMEDOUT;
261 }
262
263 /**
264 * vce_v3_0_start - start VCE block
265 *
266 * @adev: amdgpu_device pointer
267 *
268 * Setup and start the VCE block
269 */
vce_v3_0_start(struct amdgpu_device * adev)270 static int vce_v3_0_start(struct amdgpu_device *adev)
271 {
272 struct amdgpu_ring *ring;
273 int idx, r;
274
275 mutex_lock(&adev->grbm_idx_mutex);
276 for (idx = 0; idx < 2; ++idx) {
277 if (adev->vce.harvest_config & (1 << idx))
278 continue;
279
280 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
281
282 /* Program instance 0 reg space for two instances or instance 0 case
283 program instance 1 reg space for only instance 1 available case */
284 if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
285 ring = &adev->vce.ring[0];
286 WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
287 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
288 WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
289 WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
290 WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
291
292 ring = &adev->vce.ring[1];
293 WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
294 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
295 WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
296 WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
297 WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
298
299 ring = &adev->vce.ring[2];
300 WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
301 WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
302 WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
303 WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
304 WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
305 }
306
307 vce_v3_0_mc_resume(adev, idx);
308 WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
309
310 if (adev->asic_type >= CHIP_STONEY)
311 WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
312 else
313 WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
314
315 WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
316 mdelay(100);
317
318 r = vce_v3_0_firmware_loaded(adev);
319
320 /* clear BUSY flag */
321 WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
322
323 if (r) {
324 DRM_ERROR("VCE not responding, giving up!!!\n");
325 mutex_unlock(&adev->grbm_idx_mutex);
326 return r;
327 }
328 }
329
330 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
331 mutex_unlock(&adev->grbm_idx_mutex);
332
333 return 0;
334 }
335
vce_v3_0_stop(struct amdgpu_device * adev)336 static int vce_v3_0_stop(struct amdgpu_device *adev)
337 {
338 int idx;
339
340 mutex_lock(&adev->grbm_idx_mutex);
341 for (idx = 0; idx < 2; ++idx) {
342 if (adev->vce.harvest_config & (1 << idx))
343 continue;
344
345 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
346
347 if (adev->asic_type >= CHIP_STONEY)
348 WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
349 else
350 WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
351
352 /* hold on ECPU */
353 WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
354
355 /* clear VCE STATUS */
356 WREG32(mmVCE_STATUS, 0);
357 }
358
359 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
360 mutex_unlock(&adev->grbm_idx_mutex);
361
362 return 0;
363 }
364
365 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS 0xC0014074
366 #define VCE_HARVEST_FUSE_MACRO__SHIFT 27
367 #define VCE_HARVEST_FUSE_MACRO__MASK 0x18000000
368
vce_v3_0_get_harvest_config(struct amdgpu_device * adev)369 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
370 {
371 u32 tmp;
372
373 if ((adev->asic_type == CHIP_FIJI) ||
374 (adev->asic_type == CHIP_STONEY))
375 return AMDGPU_VCE_HARVEST_VCE1;
376
377 if (adev->flags & AMD_IS_APU)
378 tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
379 VCE_HARVEST_FUSE_MACRO__MASK) >>
380 VCE_HARVEST_FUSE_MACRO__SHIFT;
381 else
382 tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
383 CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
384 CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
385
386 switch (tmp) {
387 case 1:
388 return AMDGPU_VCE_HARVEST_VCE0;
389 case 2:
390 return AMDGPU_VCE_HARVEST_VCE1;
391 case 3:
392 return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
393 default:
394 if ((adev->asic_type == CHIP_POLARIS10) ||
395 (adev->asic_type == CHIP_POLARIS11) ||
396 (adev->asic_type == CHIP_POLARIS12) ||
397 (adev->asic_type == CHIP_VEGAM))
398 return AMDGPU_VCE_HARVEST_VCE1;
399
400 return 0;
401 }
402 }
403
vce_v3_0_early_init(void * handle)404 static int vce_v3_0_early_init(void *handle)
405 {
406 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
407
408 adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
409
410 if ((adev->vce.harvest_config &
411 (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
412 (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
413 return -ENOENT;
414
415 adev->vce.num_rings = 3;
416
417 vce_v3_0_set_ring_funcs(adev);
418 vce_v3_0_set_irq_funcs(adev);
419
420 return 0;
421 }
422
vce_v3_0_sw_init(void * handle)423 static int vce_v3_0_sw_init(void *handle)
424 {
425 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
426 struct amdgpu_ring *ring;
427 int r, i;
428
429 /* VCE */
430 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
431 if (r)
432 return r;
433
434 r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
435 (VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
436 if (r)
437 return r;
438
439 /* 52.8.3 required for 3 ring support */
440 if (adev->vce.fw_version < FW_52_8_3)
441 adev->vce.num_rings = 2;
442
443 r = amdgpu_vce_resume(adev);
444 if (r)
445 return r;
446
447 for (i = 0; i < adev->vce.num_rings; i++) {
448 ring = &adev->vce.ring[i];
449 snprintf(ring->name, sizeof ring->name, "vce%d", i);
450 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
451 if (r)
452 return r;
453 }
454
455 r = amdgpu_vce_entity_init(adev);
456
457 return r;
458 }
459
vce_v3_0_sw_fini(void * handle)460 static int vce_v3_0_sw_fini(void *handle)
461 {
462 int r;
463 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
464
465 r = amdgpu_vce_suspend(adev);
466 if (r)
467 return r;
468
469 return amdgpu_vce_sw_fini(adev);
470 }
471
vce_v3_0_hw_init(void * handle)472 static int vce_v3_0_hw_init(void *handle)
473 {
474 int r, i;
475 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
476
477 vce_v3_0_override_vce_clock_gating(adev, true);
478
479 amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
480
481 for (i = 0; i < adev->vce.num_rings; i++) {
482 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
483 if (r)
484 return r;
485 }
486
487 DRM_INFO("VCE initialized successfully.\n");
488
489 return 0;
490 }
491
vce_v3_0_hw_fini(void * handle)492 static int vce_v3_0_hw_fini(void *handle)
493 {
494 int r;
495 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
496
497 r = vce_v3_0_wait_for_idle(handle);
498 if (r)
499 return r;
500
501 vce_v3_0_stop(adev);
502 return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
503 }
504
vce_v3_0_suspend(void * handle)505 static int vce_v3_0_suspend(void *handle)
506 {
507 int r;
508 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
509
510 r = vce_v3_0_hw_fini(adev);
511 if (r)
512 return r;
513
514 return amdgpu_vce_suspend(adev);
515 }
516
vce_v3_0_resume(void * handle)517 static int vce_v3_0_resume(void *handle)
518 {
519 int r;
520 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
521
522 r = amdgpu_vce_resume(adev);
523 if (r)
524 return r;
525
526 return vce_v3_0_hw_init(adev);
527 }
528
vce_v3_0_mc_resume(struct amdgpu_device * adev,int idx)529 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
530 {
531 uint32_t offset, size;
532
533 WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
534 WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
535 WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
536 WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
537
538 WREG32(mmVCE_LMI_CTRL, 0x00398000);
539 WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
540 WREG32(mmVCE_LMI_SWAP_CNTL, 0);
541 WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
542 WREG32(mmVCE_LMI_VM_CTRL, 0);
543 WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
544
545 if (adev->asic_type >= CHIP_STONEY) {
546 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
547 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
548 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
549 } else
550 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
551 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
552 size = VCE_V3_0_FW_SIZE;
553 WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
554 WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
555
556 if (idx == 0) {
557 offset += size;
558 size = VCE_V3_0_STACK_SIZE;
559 WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
560 WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
561 offset += size;
562 size = VCE_V3_0_DATA_SIZE;
563 WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
564 WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
565 } else {
566 offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
567 size = VCE_V3_0_STACK_SIZE;
568 WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
569 WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
570 offset += size;
571 size = VCE_V3_0_DATA_SIZE;
572 WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
573 WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
574 }
575
576 WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
577 WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
578 }
579
vce_v3_0_is_idle(void * handle)580 static bool vce_v3_0_is_idle(void *handle)
581 {
582 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
583 u32 mask = 0;
584
585 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
586 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
587
588 return !(RREG32(mmSRBM_STATUS2) & mask);
589 }
590
vce_v3_0_wait_for_idle(void * handle)591 static int vce_v3_0_wait_for_idle(void *handle)
592 {
593 unsigned i;
594 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
595
596 for (i = 0; i < adev->usec_timeout; i++)
597 if (vce_v3_0_is_idle(handle))
598 return 0;
599
600 return -ETIMEDOUT;
601 }
602
603 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
604 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
605 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
606 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
607 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
608
vce_v3_0_check_soft_reset(void * handle)609 static bool vce_v3_0_check_soft_reset(void *handle)
610 {
611 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
612 u32 srbm_soft_reset = 0;
613
614 /* According to VCE team , we should use VCE_STATUS instead
615 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
616 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
617 * instance's registers are accessed
618 * (0 for 1st instance, 10 for 2nd instance).
619 *
620 *VCE_STATUS
621 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
622 *|----+----+-----------+----+----+----+----------+---------+----|
623 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
624 *
625 * VCE team suggest use bit 3--bit 6 for busy status check
626 */
627 mutex_lock(&adev->grbm_idx_mutex);
628 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
629 if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
630 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
631 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
632 }
633 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
634 if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
635 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
636 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
637 }
638 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
639 mutex_unlock(&adev->grbm_idx_mutex);
640
641 if (srbm_soft_reset) {
642 adev->vce.srbm_soft_reset = srbm_soft_reset;
643 return true;
644 } else {
645 adev->vce.srbm_soft_reset = 0;
646 return false;
647 }
648 }
649
vce_v3_0_soft_reset(void * handle)650 static int vce_v3_0_soft_reset(void *handle)
651 {
652 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
653 u32 srbm_soft_reset;
654
655 if (!adev->vce.srbm_soft_reset)
656 return 0;
657 srbm_soft_reset = adev->vce.srbm_soft_reset;
658
659 if (srbm_soft_reset) {
660 u32 tmp;
661
662 tmp = RREG32(mmSRBM_SOFT_RESET);
663 tmp |= srbm_soft_reset;
664 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
665 WREG32(mmSRBM_SOFT_RESET, tmp);
666 tmp = RREG32(mmSRBM_SOFT_RESET);
667
668 udelay(50);
669
670 tmp &= ~srbm_soft_reset;
671 WREG32(mmSRBM_SOFT_RESET, tmp);
672 tmp = RREG32(mmSRBM_SOFT_RESET);
673
674 /* Wait a little for things to settle down */
675 udelay(50);
676 }
677
678 return 0;
679 }
680
vce_v3_0_pre_soft_reset(void * handle)681 static int vce_v3_0_pre_soft_reset(void *handle)
682 {
683 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
684
685 if (!adev->vce.srbm_soft_reset)
686 return 0;
687
688 mdelay(5);
689
690 return vce_v3_0_suspend(adev);
691 }
692
693
vce_v3_0_post_soft_reset(void * handle)694 static int vce_v3_0_post_soft_reset(void *handle)
695 {
696 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
697
698 if (!adev->vce.srbm_soft_reset)
699 return 0;
700
701 mdelay(5);
702
703 return vce_v3_0_resume(adev);
704 }
705
vce_v3_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)706 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
707 struct amdgpu_irq_src *source,
708 unsigned type,
709 enum amdgpu_interrupt_state state)
710 {
711 uint32_t val = 0;
712
713 if (state == AMDGPU_IRQ_STATE_ENABLE)
714 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
715
716 WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
717 return 0;
718 }
719
vce_v3_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)720 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
721 struct amdgpu_irq_src *source,
722 struct amdgpu_iv_entry *entry)
723 {
724 DRM_DEBUG("IH: VCE\n");
725
726 WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
727
728 switch (entry->src_data[0]) {
729 case 0:
730 case 1:
731 case 2:
732 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
733 break;
734 default:
735 DRM_ERROR("Unhandled interrupt: %d %d\n",
736 entry->src_id, entry->src_data[0]);
737 break;
738 }
739
740 return 0;
741 }
742
vce_v3_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)743 static int vce_v3_0_set_clockgating_state(void *handle,
744 enum amd_clockgating_state state)
745 {
746 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
747 bool enable = (state == AMD_CG_STATE_GATE);
748 int i;
749
750 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
751 return 0;
752
753 mutex_lock(&adev->grbm_idx_mutex);
754 for (i = 0; i < 2; i++) {
755 /* Program VCE Instance 0 or 1 if not harvested */
756 if (adev->vce.harvest_config & (1 << i))
757 continue;
758
759 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
760
761 if (!enable) {
762 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
763 uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
764 data &= ~(0xf | 0xff0);
765 data |= ((0x0 << 0) | (0x04 << 4));
766 WREG32(mmVCE_CLOCK_GATING_A, data);
767
768 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
769 data = RREG32(mmVCE_UENC_CLOCK_GATING);
770 data &= ~(0xf | 0xff0);
771 data |= ((0x0 << 0) | (0x04 << 4));
772 WREG32(mmVCE_UENC_CLOCK_GATING, data);
773 }
774
775 vce_v3_0_set_vce_sw_clock_gating(adev, enable);
776 }
777
778 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
779 mutex_unlock(&adev->grbm_idx_mutex);
780
781 return 0;
782 }
783
vce_v3_0_set_powergating_state(void * handle,enum amd_powergating_state state)784 static int vce_v3_0_set_powergating_state(void *handle,
785 enum amd_powergating_state state)
786 {
787 /* This doesn't actually powergate the VCE block.
788 * That's done in the dpm code via the SMC. This
789 * just re-inits the block as necessary. The actual
790 * gating still happens in the dpm code. We should
791 * revisit this when there is a cleaner line between
792 * the smc and the hw blocks
793 */
794 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
795 int ret = 0;
796
797 if (state == AMD_PG_STATE_GATE) {
798 ret = vce_v3_0_stop(adev);
799 if (ret)
800 goto out;
801 } else {
802 ret = vce_v3_0_start(adev);
803 if (ret)
804 goto out;
805 }
806
807 out:
808 return ret;
809 }
810
vce_v3_0_get_clockgating_state(void * handle,u32 * flags)811 static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
812 {
813 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
814 int data;
815
816 mutex_lock(&adev->pm.mutex);
817
818 if (adev->flags & AMD_IS_APU)
819 data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
820 else
821 data = RREG32_SMC(ixCURRENT_PG_STATUS);
822
823 if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
824 DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
825 goto out;
826 }
827
828 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
829
830 /* AMD_CG_SUPPORT_VCE_MGCG */
831 data = RREG32(mmVCE_CLOCK_GATING_A);
832 if (data & (0x04 << 4))
833 *flags |= AMD_CG_SUPPORT_VCE_MGCG;
834
835 out:
836 mutex_unlock(&adev->pm.mutex);
837 }
838
vce_v3_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)839 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
840 struct amdgpu_job *job,
841 struct amdgpu_ib *ib,
842 uint32_t flags)
843 {
844 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
845
846 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
847 amdgpu_ring_write(ring, vmid);
848 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
849 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
850 amdgpu_ring_write(ring, ib->length_dw);
851 }
852
vce_v3_0_emit_vm_flush(struct amdgpu_ring * ring,unsigned int vmid,uint64_t pd_addr)853 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
854 unsigned int vmid, uint64_t pd_addr)
855 {
856 amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
857 amdgpu_ring_write(ring, vmid);
858 amdgpu_ring_write(ring, pd_addr >> 12);
859
860 amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
861 amdgpu_ring_write(ring, vmid);
862 amdgpu_ring_write(ring, VCE_CMD_END);
863 }
864
vce_v3_0_emit_pipeline_sync(struct amdgpu_ring * ring)865 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
866 {
867 uint32_t seq = ring->fence_drv.sync_seq;
868 uint64_t addr = ring->fence_drv.gpu_addr;
869
870 amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
871 amdgpu_ring_write(ring, lower_32_bits(addr));
872 amdgpu_ring_write(ring, upper_32_bits(addr));
873 amdgpu_ring_write(ring, seq);
874 }
875
876 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
877 .name = "vce_v3_0",
878 .early_init = vce_v3_0_early_init,
879 .late_init = NULL,
880 .sw_init = vce_v3_0_sw_init,
881 .sw_fini = vce_v3_0_sw_fini,
882 .hw_init = vce_v3_0_hw_init,
883 .hw_fini = vce_v3_0_hw_fini,
884 .suspend = vce_v3_0_suspend,
885 .resume = vce_v3_0_resume,
886 .is_idle = vce_v3_0_is_idle,
887 .wait_for_idle = vce_v3_0_wait_for_idle,
888 .check_soft_reset = vce_v3_0_check_soft_reset,
889 .pre_soft_reset = vce_v3_0_pre_soft_reset,
890 .soft_reset = vce_v3_0_soft_reset,
891 .post_soft_reset = vce_v3_0_post_soft_reset,
892 .set_clockgating_state = vce_v3_0_set_clockgating_state,
893 .set_powergating_state = vce_v3_0_set_powergating_state,
894 .get_clockgating_state = vce_v3_0_get_clockgating_state,
895 };
896
897 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
898 .type = AMDGPU_RING_TYPE_VCE,
899 .align_mask = 0xf,
900 .nop = VCE_CMD_NO_OP,
901 .support_64bit_ptrs = false,
902 .no_user_fence = true,
903 .get_rptr = vce_v3_0_ring_get_rptr,
904 .get_wptr = vce_v3_0_ring_get_wptr,
905 .set_wptr = vce_v3_0_ring_set_wptr,
906 .parse_cs = amdgpu_vce_ring_parse_cs,
907 .emit_frame_size =
908 4 + /* vce_v3_0_emit_pipeline_sync */
909 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
910 .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
911 .emit_ib = amdgpu_vce_ring_emit_ib,
912 .emit_fence = amdgpu_vce_ring_emit_fence,
913 .test_ring = amdgpu_vce_ring_test_ring,
914 .test_ib = amdgpu_vce_ring_test_ib,
915 .insert_nop = amdgpu_ring_insert_nop,
916 .pad_ib = amdgpu_ring_generic_pad_ib,
917 .begin_use = amdgpu_vce_ring_begin_use,
918 .end_use = amdgpu_vce_ring_end_use,
919 };
920
921 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
922 .type = AMDGPU_RING_TYPE_VCE,
923 .align_mask = 0xf,
924 .nop = VCE_CMD_NO_OP,
925 .support_64bit_ptrs = false,
926 .no_user_fence = true,
927 .get_rptr = vce_v3_0_ring_get_rptr,
928 .get_wptr = vce_v3_0_ring_get_wptr,
929 .set_wptr = vce_v3_0_ring_set_wptr,
930 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
931 .emit_frame_size =
932 6 + /* vce_v3_0_emit_vm_flush */
933 4 + /* vce_v3_0_emit_pipeline_sync */
934 6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
935 .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
936 .emit_ib = vce_v3_0_ring_emit_ib,
937 .emit_vm_flush = vce_v3_0_emit_vm_flush,
938 .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
939 .emit_fence = amdgpu_vce_ring_emit_fence,
940 .test_ring = amdgpu_vce_ring_test_ring,
941 .test_ib = amdgpu_vce_ring_test_ib,
942 .insert_nop = amdgpu_ring_insert_nop,
943 .pad_ib = amdgpu_ring_generic_pad_ib,
944 .begin_use = amdgpu_vce_ring_begin_use,
945 .end_use = amdgpu_vce_ring_end_use,
946 };
947
vce_v3_0_set_ring_funcs(struct amdgpu_device * adev)948 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
949 {
950 int i;
951
952 if (adev->asic_type >= CHIP_STONEY) {
953 for (i = 0; i < adev->vce.num_rings; i++) {
954 adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
955 adev->vce.ring[i].me = i;
956 }
957 DRM_INFO("VCE enabled in VM mode\n");
958 } else {
959 for (i = 0; i < adev->vce.num_rings; i++) {
960 adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
961 adev->vce.ring[i].me = i;
962 }
963 DRM_INFO("VCE enabled in physical mode\n");
964 }
965 }
966
967 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
968 .set = vce_v3_0_set_interrupt_state,
969 .process = vce_v3_0_process_interrupt,
970 };
971
vce_v3_0_set_irq_funcs(struct amdgpu_device * adev)972 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
973 {
974 adev->vce.irq.num_types = 1;
975 adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
976 };
977
978 const struct amdgpu_ip_block_version vce_v3_0_ip_block =
979 {
980 .type = AMD_IP_BLOCK_TYPE_VCE,
981 .major = 3,
982 .minor = 0,
983 .rev = 0,
984 .funcs = &vce_v3_0_ip_funcs,
985 };
986
987 const struct amdgpu_ip_block_version vce_v3_1_ip_block =
988 {
989 .type = AMD_IP_BLOCK_TYPE_VCE,
990 .major = 3,
991 .minor = 1,
992 .rev = 0,
993 .funcs = &vce_v3_0_ip_funcs,
994 };
995
996 const struct amdgpu_ip_block_version vce_v3_4_ip_block =
997 {
998 .type = AMD_IP_BLOCK_TYPE_VCE,
999 .major = 3,
1000 .minor = 4,
1001 .rev = 0,
1002 .funcs = &vce_v3_0_ip_funcs,
1003 };
1004