1 /* $NetBSD: amdgpu_df_v3_6.c,v 1.5 2021/12/19 11:59:53 riastradh Exp $ */
2
3 /*
4 * Copyright 2018 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25 #include <sys/cdefs.h>
26 __KERNEL_RCSID(0, "$NetBSD: amdgpu_df_v3_6.c,v 1.5 2021/12/19 11:59:53 riastradh Exp $");
27
28 #include "amdgpu.h"
29 #include "df_v3_6.h"
30
31 #include "df/df_3_6_default.h"
32 #include "df/df_3_6_offset.h"
33 #include "df/df_3_6_sh_mask.h"
34
35 #define DF_3_6_SMN_REG_INST_DIST 0x8
36 #define DF_3_6_INST_CNT 8
37
38 static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
39 16, 32, 0, 0, 0, 2, 4, 8};
40
41 #ifndef __NetBSD__ /* XXX amdgpu sysfs */
42
43 /* init df format attrs */
44 AMDGPU_PMU_ATTR(event, "config:0-7");
45 AMDGPU_PMU_ATTR(instance, "config:8-15");
46 AMDGPU_PMU_ATTR(umask, "config:16-23");
47
48 /* df format attributes */
49 static struct attribute *df_v3_6_format_attrs[] = {
50 &pmu_attr_event.attr,
51 &pmu_attr_instance.attr,
52 &pmu_attr_umask.attr,
53 NULL
54 };
55
56 /* df format attribute group */
57 static struct attribute_group df_v3_6_format_attr_group = {
58 .name = "format",
59 .attrs = df_v3_6_format_attrs,
60 };
61
62 /* df event attrs */
63 AMDGPU_PMU_ATTR(cake0_pcsout_txdata,
64 "event=0x7,instance=0x46,umask=0x2");
65 AMDGPU_PMU_ATTR(cake1_pcsout_txdata,
66 "event=0x7,instance=0x47,umask=0x2");
67 AMDGPU_PMU_ATTR(cake0_pcsout_txmeta,
68 "event=0x7,instance=0x46,umask=0x4");
69 AMDGPU_PMU_ATTR(cake1_pcsout_txmeta,
70 "event=0x7,instance=0x47,umask=0x4");
71 AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc,
72 "event=0xb,instance=0x46,umask=0x4");
73 AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc,
74 "event=0xb,instance=0x47,umask=0x4");
75 AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc,
76 "event=0xb,instance=0x46,umask=0x8");
77 AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc,
78 "event=0xb,instance=0x47,umask=0x8");
79
80 /* df event attributes */
81 static struct attribute *df_v3_6_event_attrs[] = {
82 &pmu_attr_cake0_pcsout_txdata.attr,
83 &pmu_attr_cake1_pcsout_txdata.attr,
84 &pmu_attr_cake0_pcsout_txmeta.attr,
85 &pmu_attr_cake1_pcsout_txmeta.attr,
86 &pmu_attr_cake0_ftiinstat_reqalloc.attr,
87 &pmu_attr_cake1_ftiinstat_reqalloc.attr,
88 &pmu_attr_cake0_ftiinstat_rspalloc.attr,
89 &pmu_attr_cake1_ftiinstat_rspalloc.attr,
90 NULL
91 };
92
93 /* df event attribute group */
94 static struct attribute_group df_v3_6_event_attr_group = {
95 .name = "events",
96 .attrs = df_v3_6_event_attrs
97 };
98
99 /* df event attr groups */
100 const struct attribute_group *df_v3_6_attr_groups[] = {
101 &df_v3_6_format_attr_group,
102 &df_v3_6_event_attr_group,
103 NULL
104 };
105
106 #endif /* __NetBSD__ */
107
df_v3_6_get_fica(struct amdgpu_device * adev,uint32_t ficaa_val)108 static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
109 uint32_t ficaa_val)
110 {
111 unsigned long flags, address, data;
112 uint32_t ficadl_val, ficadh_val;
113
114 address = adev->nbio.funcs->get_pcie_index_offset(adev);
115 data = adev->nbio.funcs->get_pcie_data_offset(adev);
116
117 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
118 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
119 WREG32(data, ficaa_val);
120
121 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
122 ficadl_val = RREG32(data);
123
124 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
125 ficadh_val = RREG32(data);
126
127 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
128
129 return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
130 }
131
df_v3_6_set_fica(struct amdgpu_device * adev,uint32_t ficaa_val,uint32_t ficadl_val,uint32_t ficadh_val)132 static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
133 uint32_t ficadl_val, uint32_t ficadh_val)
134 {
135 unsigned long flags, address, data;
136
137 address = adev->nbio.funcs->get_pcie_index_offset(adev);
138 data = adev->nbio.funcs->get_pcie_data_offset(adev);
139
140 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
141 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
142 WREG32(data, ficaa_val);
143
144 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
145 WREG32(data, ficadl_val);
146
147 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
148 WREG32(data, ficadh_val);
149
150 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
151 }
152
153 /*
154 * df_v3_6_perfmon_rreg - read perfmon lo and hi
155 *
156 * required to be atomic. no mmio method provided so subsequent reads for lo
157 * and hi require to preserve df finite state machine
158 */
df_v3_6_perfmon_rreg(struct amdgpu_device * adev,uint32_t lo_addr,uint32_t * lo_val,uint32_t hi_addr,uint32_t * hi_val)159 static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
160 uint32_t lo_addr, uint32_t *lo_val,
161 uint32_t hi_addr, uint32_t *hi_val)
162 {
163 unsigned long flags, address, data;
164
165 address = adev->nbio.funcs->get_pcie_index_offset(adev);
166 data = adev->nbio.funcs->get_pcie_data_offset(adev);
167
168 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
169 WREG32(address, lo_addr);
170 *lo_val = RREG32(data);
171 WREG32(address, hi_addr);
172 *hi_val = RREG32(data);
173 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
174 }
175
176 /*
177 * df_v3_6_perfmon_wreg - write to perfmon lo and hi
178 *
179 * required to be atomic. no mmio method provided so subsequent reads after
180 * data writes cannot occur to preserve data fabrics finite state machine.
181 */
df_v3_6_perfmon_wreg(struct amdgpu_device * adev,uint32_t lo_addr,uint32_t lo_val,uint32_t hi_addr,uint32_t hi_val)182 static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
183 uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
184 {
185 unsigned long flags, address, data;
186
187 address = adev->nbio.funcs->get_pcie_index_offset(adev);
188 data = adev->nbio.funcs->get_pcie_data_offset(adev);
189
190 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
191 WREG32(address, lo_addr);
192 WREG32(data, lo_val);
193 WREG32(address, hi_addr);
194 WREG32(data, hi_val);
195 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
196 }
197
198 /* same as perfmon_wreg but return status on write value check */
df_v3_6_perfmon_arm_with_status(struct amdgpu_device * adev,uint32_t lo_addr,uint32_t lo_val,uint32_t hi_addr,uint32_t hi_val)199 static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
200 uint32_t lo_addr, uint32_t lo_val,
201 uint32_t hi_addr, uint32_t hi_val)
202 {
203 unsigned long flags, address, data;
204 uint32_t lo_val_rb, hi_val_rb;
205
206 address = adev->nbio.funcs->get_pcie_index_offset(adev);
207 data = adev->nbio.funcs->get_pcie_data_offset(adev);
208
209 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
210 WREG32(address, lo_addr);
211 WREG32(data, lo_val);
212 WREG32(address, hi_addr);
213 WREG32(data, hi_val);
214
215 WREG32(address, lo_addr);
216 lo_val_rb = RREG32(data);
217 WREG32(address, hi_addr);
218 hi_val_rb = RREG32(data);
219 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
220
221 if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
222 return -EBUSY;
223
224 return 0;
225 }
226
227
228 /*
229 * retry arming counters every 100 usecs within 1 millisecond interval.
230 * if retry fails after time out, return error.
231 */
232 #define ARM_RETRY_USEC_TIMEOUT 1000
233 #define ARM_RETRY_USEC_INTERVAL 100
df_v3_6_perfmon_arm_with_retry(struct amdgpu_device * adev,uint32_t lo_addr,uint32_t lo_val,uint32_t hi_addr,uint32_t hi_val)234 static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
235 uint32_t lo_addr, uint32_t lo_val,
236 uint32_t hi_addr, uint32_t hi_val)
237 {
238 int countdown = ARM_RETRY_USEC_TIMEOUT;
239
240 while (countdown) {
241
242 if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
243 hi_addr, hi_val))
244 break;
245
246 countdown -= ARM_RETRY_USEC_INTERVAL;
247 udelay(ARM_RETRY_USEC_INTERVAL);
248 }
249
250 return countdown > 0 ? 0 : -ETIME;
251 }
252
253 #ifndef __NetBSD__ /* XXX amdgpu sysfs */
254
255 /* get the number of df counters available */
df_v3_6_get_df_cntr_avail(struct device * dev,struct device_attribute * attr,char * buf)256 static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
257 struct device_attribute *attr,
258 char *buf)
259 {
260 struct amdgpu_device *adev;
261 struct drm_device *ddev;
262 int i, count;
263
264 ddev = dev_get_drvdata(dev);
265 adev = ddev->dev_private;
266 count = 0;
267
268 for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
269 if (adev->df_perfmon_config_assign_mask[i] == 0)
270 count++;
271 }
272
273 return snprintf(buf, PAGE_SIZE, "%i\n", count);
274 }
275
276 /* device attr for available perfmon counters */
277 static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
278
279 #endif /* __NetBSD__ */
280
df_v3_6_query_hashes(struct amdgpu_device * adev)281 static void df_v3_6_query_hashes(struct amdgpu_device *adev)
282 {
283 u32 tmp;
284
285 adev->df.hash_status.hash_64k = false;
286 adev->df.hash_status.hash_2m = false;
287 adev->df.hash_status.hash_1g = false;
288
289 if (adev->asic_type != CHIP_ARCTURUS)
290 return;
291
292 /* encoding for hash-enabled on Arcturus */
293 if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
294 tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
295 adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
296 DF_CS_UMC_AON0_DfGlobalCtrl,
297 GlbHashIntlvCtl64K);
298 adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
299 DF_CS_UMC_AON0_DfGlobalCtrl,
300 GlbHashIntlvCtl2M);
301 adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
302 DF_CS_UMC_AON0_DfGlobalCtrl,
303 GlbHashIntlvCtl1G);
304 }
305 }
306
307 /* init perfmons */
df_v3_6_sw_init(struct amdgpu_device * adev)308 static void df_v3_6_sw_init(struct amdgpu_device *adev)
309 {
310 int i, ret;
311
312 #ifdef __NetBSD__ /* XXX amdgpu sysfs */
313 __USE(ret);
314 #else
315 ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
316 if (ret)
317 DRM_ERROR("failed to create file for available df counters\n");
318 #endif
319
320 for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
321 adev->df_perfmon_config_assign_mask[i] = 0;
322
323 df_v3_6_query_hashes(adev);
324 }
325
df_v3_6_sw_fini(struct amdgpu_device * adev)326 static void df_v3_6_sw_fini(struct amdgpu_device *adev)
327 {
328
329 #ifndef __NetBSD__ /* XXX amdgpu sysfs */
330 device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
331 #endif
332
333 }
334
df_v3_6_enable_broadcast_mode(struct amdgpu_device * adev,bool enable)335 static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
336 bool enable)
337 {
338 u32 tmp;
339
340 if (enable) {
341 tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
342 tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
343 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
344 } else
345 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
346 mmFabricConfigAccessControl_DEFAULT);
347 }
348
df_v3_6_get_fb_channel_number(struct amdgpu_device * adev)349 static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
350 {
351 u32 tmp;
352
353 tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
354 tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
355 tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
356
357 return tmp;
358 }
359
df_v3_6_get_hbm_channel_number(struct amdgpu_device * adev)360 static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
361 {
362 int fb_channel_number;
363
364 fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
365 if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
366 fb_channel_number = 0;
367
368 return df_v3_6_channel_number[fb_channel_number];
369 }
370
df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)371 static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
372 bool enable)
373 {
374 u32 tmp;
375
376 if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
377 /* Put DF on broadcast mode */
378 adev->df.funcs->enable_broadcast_mode(adev, true);
379
380 if (enable) {
381 tmp = RREG32_SOC15(DF, 0,
382 mmDF_PIE_AON0_DfGlobalClkGater);
383 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
384 tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
385 WREG32_SOC15(DF, 0,
386 mmDF_PIE_AON0_DfGlobalClkGater, tmp);
387 } else {
388 tmp = RREG32_SOC15(DF, 0,
389 mmDF_PIE_AON0_DfGlobalClkGater);
390 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
391 tmp |= DF_V3_6_MGCG_DISABLE;
392 WREG32_SOC15(DF, 0,
393 mmDF_PIE_AON0_DfGlobalClkGater, tmp);
394 }
395
396 /* Exit broadcast mode */
397 adev->df.funcs->enable_broadcast_mode(adev, false);
398 }
399 }
400
df_v3_6_get_clockgating_state(struct amdgpu_device * adev,u32 * flags)401 static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
402 u32 *flags)
403 {
404 u32 tmp;
405
406 /* AMD_CG_SUPPORT_DF_MGCG */
407 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
408 if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
409 *flags |= AMD_CG_SUPPORT_DF_MGCG;
410 }
411
412 /* get assigned df perfmon ctr as int */
df_v3_6_pmc_config_2_cntr(struct amdgpu_device * adev,uint64_t config)413 static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
414 uint64_t config)
415 {
416 int i;
417
418 for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
419 if ((config & 0x0FFFFFFUL) ==
420 adev->df_perfmon_config_assign_mask[i])
421 return i;
422 }
423
424 return -EINVAL;
425 }
426
427 /* get address based on counter assignment */
df_v3_6_pmc_get_addr(struct amdgpu_device * adev,uint64_t config,int is_ctrl,uint32_t * lo_base_addr,uint32_t * hi_base_addr)428 static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
429 uint64_t config,
430 int is_ctrl,
431 uint32_t *lo_base_addr,
432 uint32_t *hi_base_addr)
433 {
434 int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
435
436 if (target_cntr < 0)
437 return;
438
439 switch (target_cntr) {
440
441 case 0:
442 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
443 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
444 break;
445 case 1:
446 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
447 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
448 break;
449 case 2:
450 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
451 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
452 break;
453 case 3:
454 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
455 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
456 break;
457
458 }
459
460 }
461
462 /* get read counter address */
df_v3_6_pmc_get_read_settings(struct amdgpu_device * adev,uint64_t config,uint32_t * lo_base_addr,uint32_t * hi_base_addr)463 static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
464 uint64_t config,
465 uint32_t *lo_base_addr,
466 uint32_t *hi_base_addr)
467 {
468 df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
469 }
470
471 /* get control counter settings i.e. address and values to set */
df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device * adev,uint64_t config,uint32_t * lo_base_addr,uint32_t * hi_base_addr,uint32_t * lo_val,uint32_t * hi_val)472 static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
473 uint64_t config,
474 uint32_t *lo_base_addr,
475 uint32_t *hi_base_addr,
476 uint32_t *lo_val,
477 uint32_t *hi_val)
478 {
479
480 uint32_t eventsel, instance, unitmask;
481 uint32_t instance_10, instance_5432, instance_76;
482
483 df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
484
485 if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
486 DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
487 *lo_base_addr, *hi_base_addr);
488 return -ENXIO;
489 }
490
491 eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
492 unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
493 instance = DF_V3_6_GET_INSTANCE(config);
494
495 instance_10 = instance & 0x3;
496 instance_5432 = (instance >> 2) & 0xf;
497 instance_76 = (instance >> 6) & 0x3;
498
499 *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22);
500 *hi_val = (instance_76 << 29) | instance_5432;
501
502 DRM_DEBUG_DRIVER("config=%"PRIx64" addr=%08x:%08x val=%08x:%08x",
503 config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
504
505 return 0;
506 }
507
508 /* add df performance counters for read */
df_v3_6_pmc_add_cntr(struct amdgpu_device * adev,uint64_t config)509 static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
510 uint64_t config)
511 {
512 int i, target_cntr;
513
514 target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
515
516 if (target_cntr >= 0)
517 return 0;
518
519 for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
520 if (adev->df_perfmon_config_assign_mask[i] == 0U) {
521 adev->df_perfmon_config_assign_mask[i] =
522 config & 0x0FFFFFFUL;
523 return 0;
524 }
525 }
526
527 return -ENOSPC;
528 }
529
530 #define DEFERRED_ARM_MASK (1 << 31)
df_v3_6_pmc_set_deferred(struct amdgpu_device * adev,uint64_t config,bool is_deferred)531 static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
532 uint64_t config, bool is_deferred)
533 {
534 int target_cntr;
535
536 target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
537
538 if (target_cntr < 0)
539 return -EINVAL;
540
541 if (is_deferred)
542 adev->df_perfmon_config_assign_mask[target_cntr] |=
543 DEFERRED_ARM_MASK;
544 else
545 adev->df_perfmon_config_assign_mask[target_cntr] &=
546 ~DEFERRED_ARM_MASK;
547
548 return 0;
549 }
550
df_v3_6_pmc_is_deferred(struct amdgpu_device * adev,uint64_t config)551 static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
552 uint64_t config)
553 {
554 int target_cntr;
555
556 target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
557
558 /*
559 * we never get target_cntr < 0 since this funciton is only called in
560 * pmc_count for now but we should check anyways.
561 */
562 return (target_cntr >= 0 &&
563 (adev->df_perfmon_config_assign_mask[target_cntr]
564 & DEFERRED_ARM_MASK));
565
566 }
567
568 /* release performance counter */
df_v3_6_pmc_release_cntr(struct amdgpu_device * adev,uint64_t config)569 static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
570 uint64_t config)
571 {
572 int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
573
574 if (target_cntr >= 0)
575 adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL;
576 }
577
578
df_v3_6_reset_perfmon_cntr(struct amdgpu_device * adev,uint64_t config)579 static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
580 uint64_t config)
581 {
582 uint32_t lo_base_addr = 0, hi_base_addr = 0;
583
584 df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
585 &hi_base_addr);
586
587 if ((lo_base_addr == 0) || (hi_base_addr == 0))
588 return;
589
590 df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
591 }
592
df_v3_6_pmc_start(struct amdgpu_device * adev,uint64_t config,int is_enable)593 static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
594 int is_enable)
595 {
596 uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
597 int err = 0, ret = 0;
598
599 switch (adev->asic_type) {
600 case CHIP_VEGA20:
601 if (is_enable)
602 return df_v3_6_pmc_add_cntr(adev, config);
603
604 df_v3_6_reset_perfmon_cntr(adev, config);
605
606 ret = df_v3_6_pmc_get_ctrl_settings(adev,
607 config,
608 &lo_base_addr,
609 &hi_base_addr,
610 &lo_val,
611 &hi_val);
612
613 if (ret)
614 return ret;
615
616 err = df_v3_6_perfmon_arm_with_retry(adev,
617 lo_base_addr,
618 lo_val,
619 hi_base_addr,
620 hi_val);
621
622 if (err)
623 ret = df_v3_6_pmc_set_deferred(adev, config, true);
624
625 break;
626 default:
627 break;
628 }
629
630 return ret;
631 }
632
df_v3_6_pmc_stop(struct amdgpu_device * adev,uint64_t config,int is_disable)633 static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
634 int is_disable)
635 {
636 uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
637 int ret = 0;
638
639 switch (adev->asic_type) {
640 case CHIP_VEGA20:
641 ret = df_v3_6_pmc_get_ctrl_settings(adev,
642 config,
643 &lo_base_addr,
644 &hi_base_addr,
645 &lo_val,
646 &hi_val);
647
648 if (ret)
649 return ret;
650
651 df_v3_6_reset_perfmon_cntr(adev, config);
652
653 if (is_disable)
654 df_v3_6_pmc_release_cntr(adev, config);
655
656 break;
657 default:
658 break;
659 }
660
661 return ret;
662 }
663
df_v3_6_pmc_get_count(struct amdgpu_device * adev,uint64_t config,uint64_t * count)664 static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
665 uint64_t config,
666 uint64_t *count)
667 {
668 uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0;
669 *count = 0;
670
671 switch (adev->asic_type) {
672 case CHIP_VEGA20:
673 df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
674 &hi_base_addr);
675
676 if ((lo_base_addr == 0) || (hi_base_addr == 0))
677 return;
678
679 /* rearm the counter or throw away count value on failure */
680 if (df_v3_6_pmc_is_deferred(adev, config)) {
681 int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
682 lo_base_addr, lo_val,
683 hi_base_addr, hi_val);
684
685 if (rearm_err)
686 return;
687
688 df_v3_6_pmc_set_deferred(adev, config, false);
689 }
690
691 df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
692 hi_base_addr, &hi_val);
693
694 *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
695
696 if (*count >= DF_V3_6_PERFMON_OVERFLOW)
697 *count = 0;
698
699 DRM_DEBUG_DRIVER("config=%"PRIx64" addr=%08x:%08x val=%08x:%08x",
700 config, lo_base_addr, hi_base_addr, lo_val, hi_val);
701
702 break;
703 default:
704 break;
705 }
706 }
707
df_v3_6_get_dram_base_addr(struct amdgpu_device * adev,uint32_t df_inst)708 static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
709 uint32_t df_inst)
710 {
711 uint32_t base_addr_reg_val = 0;
712 uint64_t base_addr = 0;
713
714 base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
715 df_inst * DF_3_6_SMN_REG_INST_DIST);
716
717 if (REG_GET_FIELD(base_addr_reg_val,
718 DF_CS_UMC_AON0_DramBaseAddress0,
719 AddrRngVal) == 0) {
720 DRM_WARN("address range not valid");
721 return 0;
722 }
723
724 base_addr = REG_GET_FIELD(base_addr_reg_val,
725 DF_CS_UMC_AON0_DramBaseAddress0,
726 DramBaseAddr);
727
728 return base_addr << 28;
729 }
730
df_v3_6_get_df_inst_id(struct amdgpu_device * adev)731 static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
732 {
733 uint32_t xgmi_node_id = 0;
734 uint32_t df_inst_id = 0;
735
736 /* Walk through DF dst nodes to find current XGMI node */
737 for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
738
739 xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
740 df_inst_id * DF_3_6_SMN_REG_INST_DIST);
741 xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
742 DF_CS_UMC_AON0_DramLimitAddress0,
743 DstFabricID);
744
745 /* TODO: establish reason dest fabric id is offset by 7 */
746 xgmi_node_id = xgmi_node_id >> 7;
747
748 if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
749 break;
750 }
751
752 if (df_inst_id == DF_3_6_INST_CNT) {
753 DRM_WARN("cant match df dst id with gpu node");
754 return 0;
755 }
756
757 return df_inst_id;
758 }
759
760 const struct amdgpu_df_funcs df_v3_6_funcs = {
761 .sw_init = df_v3_6_sw_init,
762 .sw_fini = df_v3_6_sw_fini,
763 .enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
764 .get_fb_channel_number = df_v3_6_get_fb_channel_number,
765 .get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
766 .update_medium_grain_clock_gating =
767 df_v3_6_update_medium_grain_clock_gating,
768 .get_clockgating_state = df_v3_6_get_clockgating_state,
769 .pmc_start = df_v3_6_pmc_start,
770 .pmc_stop = df_v3_6_pmc_stop,
771 .pmc_get_count = df_v3_6_pmc_get_count,
772 .get_fica = df_v3_6_get_fica,
773 .set_fica = df_v3_6_set_fica,
774 .get_dram_base_addr = df_v3_6_get_dram_base_addr,
775 .get_df_inst_id = df_v3_6_get_df_inst_id
776 };
777