xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_df_v3_6.c (revision 8fc5502a3d9398bb7b894acb2f2c505683ac9473)
1 /*	$NetBSD: amdgpu_df_v3_6.c,v 1.5 2021/12/19 11:59:53 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2018 Advanced Micro Devices, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 #include <sys/cdefs.h>
26 __KERNEL_RCSID(0, "$NetBSD: amdgpu_df_v3_6.c,v 1.5 2021/12/19 11:59:53 riastradh Exp $");
27 
28 #include "amdgpu.h"
29 #include "df_v3_6.h"
30 
31 #include "df/df_3_6_default.h"
32 #include "df/df_3_6_offset.h"
33 #include "df/df_3_6_sh_mask.h"
34 
35 #define DF_3_6_SMN_REG_INST_DIST        0x8
36 #define DF_3_6_INST_CNT                 8
37 
38 static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
39 				       16, 32, 0, 0, 0, 2, 4, 8};
40 
41 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
42 
43 /* init df format attrs */
44 AMDGPU_PMU_ATTR(event,		"config:0-7");
45 AMDGPU_PMU_ATTR(instance,	"config:8-15");
46 AMDGPU_PMU_ATTR(umask,		"config:16-23");
47 
48 /* df format attributes  */
49 static struct attribute *df_v3_6_format_attrs[] = {
50 	&pmu_attr_event.attr,
51 	&pmu_attr_instance.attr,
52 	&pmu_attr_umask.attr,
53 	NULL
54 };
55 
56 /* df format attribute group */
57 static struct attribute_group df_v3_6_format_attr_group = {
58 	.name = "format",
59 	.attrs = df_v3_6_format_attrs,
60 };
61 
62 /* df event attrs */
63 AMDGPU_PMU_ATTR(cake0_pcsout_txdata,
64 		      "event=0x7,instance=0x46,umask=0x2");
65 AMDGPU_PMU_ATTR(cake1_pcsout_txdata,
66 		      "event=0x7,instance=0x47,umask=0x2");
67 AMDGPU_PMU_ATTR(cake0_pcsout_txmeta,
68 		      "event=0x7,instance=0x46,umask=0x4");
69 AMDGPU_PMU_ATTR(cake1_pcsout_txmeta,
70 		      "event=0x7,instance=0x47,umask=0x4");
71 AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc,
72 		      "event=0xb,instance=0x46,umask=0x4");
73 AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc,
74 		      "event=0xb,instance=0x47,umask=0x4");
75 AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc,
76 		      "event=0xb,instance=0x46,umask=0x8");
77 AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc,
78 		      "event=0xb,instance=0x47,umask=0x8");
79 
80 /* df event attributes  */
81 static struct attribute *df_v3_6_event_attrs[] = {
82 	&pmu_attr_cake0_pcsout_txdata.attr,
83 	&pmu_attr_cake1_pcsout_txdata.attr,
84 	&pmu_attr_cake0_pcsout_txmeta.attr,
85 	&pmu_attr_cake1_pcsout_txmeta.attr,
86 	&pmu_attr_cake0_ftiinstat_reqalloc.attr,
87 	&pmu_attr_cake1_ftiinstat_reqalloc.attr,
88 	&pmu_attr_cake0_ftiinstat_rspalloc.attr,
89 	&pmu_attr_cake1_ftiinstat_rspalloc.attr,
90 	NULL
91 };
92 
93 /* df event attribute group */
94 static struct attribute_group df_v3_6_event_attr_group = {
95 	.name = "events",
96 	.attrs = df_v3_6_event_attrs
97 };
98 
99 /* df event attr groups  */
100 const struct attribute_group *df_v3_6_attr_groups[] = {
101 		&df_v3_6_format_attr_group,
102 		&df_v3_6_event_attr_group,
103 		NULL
104 };
105 
106 #endif	/* __NetBSD__ */
107 
df_v3_6_get_fica(struct amdgpu_device * adev,uint32_t ficaa_val)108 static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
109 				 uint32_t ficaa_val)
110 {
111 	unsigned long flags, address, data;
112 	uint32_t ficadl_val, ficadh_val;
113 
114 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
115 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
116 
117 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
118 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
119 	WREG32(data, ficaa_val);
120 
121 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
122 	ficadl_val = RREG32(data);
123 
124 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
125 	ficadh_val = RREG32(data);
126 
127 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
128 
129 	return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
130 }
131 
df_v3_6_set_fica(struct amdgpu_device * adev,uint32_t ficaa_val,uint32_t ficadl_val,uint32_t ficadh_val)132 static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
133 			     uint32_t ficadl_val, uint32_t ficadh_val)
134 {
135 	unsigned long flags, address, data;
136 
137 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
138 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
139 
140 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
141 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
142 	WREG32(data, ficaa_val);
143 
144 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
145 	WREG32(data, ficadl_val);
146 
147 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
148 	WREG32(data, ficadh_val);
149 
150 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
151 }
152 
153 /*
154  * df_v3_6_perfmon_rreg - read perfmon lo and hi
155  *
156  * required to be atomic.  no mmio method provided so subsequent reads for lo
157  * and hi require to preserve df finite state machine
158  */
df_v3_6_perfmon_rreg(struct amdgpu_device * adev,uint32_t lo_addr,uint32_t * lo_val,uint32_t hi_addr,uint32_t * hi_val)159 static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
160 			    uint32_t lo_addr, uint32_t *lo_val,
161 			    uint32_t hi_addr, uint32_t *hi_val)
162 {
163 	unsigned long flags, address, data;
164 
165 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
166 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
167 
168 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
169 	WREG32(address, lo_addr);
170 	*lo_val = RREG32(data);
171 	WREG32(address, hi_addr);
172 	*hi_val = RREG32(data);
173 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
174 }
175 
176 /*
177  * df_v3_6_perfmon_wreg - write to perfmon lo and hi
178  *
179  * required to be atomic.  no mmio method provided so subsequent reads after
180  * data writes cannot occur to preserve data fabrics finite state machine.
181  */
df_v3_6_perfmon_wreg(struct amdgpu_device * adev,uint32_t lo_addr,uint32_t lo_val,uint32_t hi_addr,uint32_t hi_val)182 static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
183 			    uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
184 {
185 	unsigned long flags, address, data;
186 
187 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
188 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
189 
190 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
191 	WREG32(address, lo_addr);
192 	WREG32(data, lo_val);
193 	WREG32(address, hi_addr);
194 	WREG32(data, hi_val);
195 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
196 }
197 
198 /* same as perfmon_wreg but return status on write value check */
df_v3_6_perfmon_arm_with_status(struct amdgpu_device * adev,uint32_t lo_addr,uint32_t lo_val,uint32_t hi_addr,uint32_t hi_val)199 static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
200 					  uint32_t lo_addr, uint32_t lo_val,
201 					  uint32_t hi_addr, uint32_t  hi_val)
202 {
203 	unsigned long flags, address, data;
204 	uint32_t lo_val_rb, hi_val_rb;
205 
206 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
207 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
208 
209 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
210 	WREG32(address, lo_addr);
211 	WREG32(data, lo_val);
212 	WREG32(address, hi_addr);
213 	WREG32(data, hi_val);
214 
215 	WREG32(address, lo_addr);
216 	lo_val_rb = RREG32(data);
217 	WREG32(address, hi_addr);
218 	hi_val_rb = RREG32(data);
219 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
220 
221 	if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
222 		return -EBUSY;
223 
224 	return 0;
225 }
226 
227 
228 /*
229  * retry arming counters every 100 usecs within 1 millisecond interval.
230  * if retry fails after time out, return error.
231  */
232 #define ARM_RETRY_USEC_TIMEOUT	1000
233 #define ARM_RETRY_USEC_INTERVAL	100
df_v3_6_perfmon_arm_with_retry(struct amdgpu_device * adev,uint32_t lo_addr,uint32_t lo_val,uint32_t hi_addr,uint32_t hi_val)234 static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
235 					  uint32_t lo_addr, uint32_t lo_val,
236 					  uint32_t hi_addr, uint32_t  hi_val)
237 {
238 	int countdown = ARM_RETRY_USEC_TIMEOUT;
239 
240 	while (countdown) {
241 
242 		if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
243 						     hi_addr, hi_val))
244 			break;
245 
246 		countdown -= ARM_RETRY_USEC_INTERVAL;
247 		udelay(ARM_RETRY_USEC_INTERVAL);
248 	}
249 
250 	return countdown > 0 ? 0 : -ETIME;
251 }
252 
253 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
254 
255 /* get the number of df counters available */
df_v3_6_get_df_cntr_avail(struct device * dev,struct device_attribute * attr,char * buf)256 static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
257 		struct device_attribute *attr,
258 		char *buf)
259 {
260 	struct amdgpu_device *adev;
261 	struct drm_device *ddev;
262 	int i, count;
263 
264 	ddev = dev_get_drvdata(dev);
265 	adev = ddev->dev_private;
266 	count = 0;
267 
268 	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
269 		if (adev->df_perfmon_config_assign_mask[i] == 0)
270 			count++;
271 	}
272 
273 	return snprintf(buf, PAGE_SIZE,	"%i\n", count);
274 }
275 
276 /* device attr for available perfmon counters */
277 static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
278 
279 #endif	/* __NetBSD__ */
280 
df_v3_6_query_hashes(struct amdgpu_device * adev)281 static void df_v3_6_query_hashes(struct amdgpu_device *adev)
282 {
283 	u32 tmp;
284 
285 	adev->df.hash_status.hash_64k = false;
286 	adev->df.hash_status.hash_2m = false;
287 	adev->df.hash_status.hash_1g = false;
288 
289 	if (adev->asic_type != CHIP_ARCTURUS)
290 		return;
291 
292 	/* encoding for hash-enabled on Arcturus */
293 	if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
294 		tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
295 		adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
296 						DF_CS_UMC_AON0_DfGlobalCtrl,
297 						GlbHashIntlvCtl64K);
298 		adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
299 						DF_CS_UMC_AON0_DfGlobalCtrl,
300 						GlbHashIntlvCtl2M);
301 		adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
302 						DF_CS_UMC_AON0_DfGlobalCtrl,
303 						GlbHashIntlvCtl1G);
304 	}
305 }
306 
307 /* init perfmons */
df_v3_6_sw_init(struct amdgpu_device * adev)308 static void df_v3_6_sw_init(struct amdgpu_device *adev)
309 {
310 	int i, ret;
311 
312 #ifdef __NetBSD__		/* XXX amdgpu sysfs */
313 	__USE(ret);
314 #else
315 	ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
316 	if (ret)
317 		DRM_ERROR("failed to create file for available df counters\n");
318 #endif
319 
320 	for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
321 		adev->df_perfmon_config_assign_mask[i] = 0;
322 
323 	df_v3_6_query_hashes(adev);
324 }
325 
df_v3_6_sw_fini(struct amdgpu_device * adev)326 static void df_v3_6_sw_fini(struct amdgpu_device *adev)
327 {
328 
329 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
330 	device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
331 #endif
332 
333 }
334 
df_v3_6_enable_broadcast_mode(struct amdgpu_device * adev,bool enable)335 static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
336 					  bool enable)
337 {
338 	u32 tmp;
339 
340 	if (enable) {
341 		tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
342 		tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
343 		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
344 	} else
345 		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
346 			     mmFabricConfigAccessControl_DEFAULT);
347 }
348 
df_v3_6_get_fb_channel_number(struct amdgpu_device * adev)349 static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
350 {
351 	u32 tmp;
352 
353 	tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
354 	tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
355 	tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
356 
357 	return tmp;
358 }
359 
df_v3_6_get_hbm_channel_number(struct amdgpu_device * adev)360 static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
361 {
362 	int fb_channel_number;
363 
364 	fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
365 	if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
366 		fb_channel_number = 0;
367 
368 	return df_v3_6_channel_number[fb_channel_number];
369 }
370 
df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)371 static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
372 						     bool enable)
373 {
374 	u32 tmp;
375 
376 	if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
377 		/* Put DF on broadcast mode */
378 		adev->df.funcs->enable_broadcast_mode(adev, true);
379 
380 		if (enable) {
381 			tmp = RREG32_SOC15(DF, 0,
382 					mmDF_PIE_AON0_DfGlobalClkGater);
383 			tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
384 			tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
385 			WREG32_SOC15(DF, 0,
386 					mmDF_PIE_AON0_DfGlobalClkGater, tmp);
387 		} else {
388 			tmp = RREG32_SOC15(DF, 0,
389 					mmDF_PIE_AON0_DfGlobalClkGater);
390 			tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
391 			tmp |= DF_V3_6_MGCG_DISABLE;
392 			WREG32_SOC15(DF, 0,
393 					mmDF_PIE_AON0_DfGlobalClkGater, tmp);
394 		}
395 
396 		/* Exit broadcast mode */
397 		adev->df.funcs->enable_broadcast_mode(adev, false);
398 	}
399 }
400 
df_v3_6_get_clockgating_state(struct amdgpu_device * adev,u32 * flags)401 static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
402 					  u32 *flags)
403 {
404 	u32 tmp;
405 
406 	/* AMD_CG_SUPPORT_DF_MGCG */
407 	tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
408 	if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
409 		*flags |= AMD_CG_SUPPORT_DF_MGCG;
410 }
411 
412 /* get assigned df perfmon ctr as int */
df_v3_6_pmc_config_2_cntr(struct amdgpu_device * adev,uint64_t config)413 static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
414 				      uint64_t config)
415 {
416 	int i;
417 
418 	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
419 		if ((config & 0x0FFFFFFUL) ==
420 					adev->df_perfmon_config_assign_mask[i])
421 			return i;
422 	}
423 
424 	return -EINVAL;
425 }
426 
427 /* get address based on counter assignment */
df_v3_6_pmc_get_addr(struct amdgpu_device * adev,uint64_t config,int is_ctrl,uint32_t * lo_base_addr,uint32_t * hi_base_addr)428 static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
429 				 uint64_t config,
430 				 int is_ctrl,
431 				 uint32_t *lo_base_addr,
432 				 uint32_t *hi_base_addr)
433 {
434 	int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
435 
436 	if (target_cntr < 0)
437 		return;
438 
439 	switch (target_cntr) {
440 
441 	case 0:
442 		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
443 		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
444 		break;
445 	case 1:
446 		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
447 		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
448 		break;
449 	case 2:
450 		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
451 		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
452 		break;
453 	case 3:
454 		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
455 		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
456 		break;
457 
458 	}
459 
460 }
461 
462 /* get read counter address */
df_v3_6_pmc_get_read_settings(struct amdgpu_device * adev,uint64_t config,uint32_t * lo_base_addr,uint32_t * hi_base_addr)463 static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
464 					  uint64_t config,
465 					  uint32_t *lo_base_addr,
466 					  uint32_t *hi_base_addr)
467 {
468 	df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
469 }
470 
471 /* get control counter settings i.e. address and values to set */
df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device * adev,uint64_t config,uint32_t * lo_base_addr,uint32_t * hi_base_addr,uint32_t * lo_val,uint32_t * hi_val)472 static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
473 					  uint64_t config,
474 					  uint32_t *lo_base_addr,
475 					  uint32_t *hi_base_addr,
476 					  uint32_t *lo_val,
477 					  uint32_t *hi_val)
478 {
479 
480 	uint32_t eventsel, instance, unitmask;
481 	uint32_t instance_10, instance_5432, instance_76;
482 
483 	df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
484 
485 	if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
486 		DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
487 				*lo_base_addr, *hi_base_addr);
488 		return -ENXIO;
489 	}
490 
491 	eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
492 	unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
493 	instance = DF_V3_6_GET_INSTANCE(config);
494 
495 	instance_10 = instance & 0x3;
496 	instance_5432 = (instance >> 2) & 0xf;
497 	instance_76 = (instance >> 6) & 0x3;
498 
499 	*lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22);
500 	*hi_val = (instance_76 << 29) | instance_5432;
501 
502 	DRM_DEBUG_DRIVER("config=%"PRIx64" addr=%08x:%08x val=%08x:%08x",
503 		config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
504 
505 	return 0;
506 }
507 
508 /* add df performance counters for read */
df_v3_6_pmc_add_cntr(struct amdgpu_device * adev,uint64_t config)509 static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
510 				   uint64_t config)
511 {
512 	int i, target_cntr;
513 
514 	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
515 
516 	if (target_cntr >= 0)
517 		return 0;
518 
519 	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
520 		if (adev->df_perfmon_config_assign_mask[i] == 0U) {
521 			adev->df_perfmon_config_assign_mask[i] =
522 							config & 0x0FFFFFFUL;
523 			return 0;
524 		}
525 	}
526 
527 	return -ENOSPC;
528 }
529 
530 #define DEFERRED_ARM_MASK	(1 << 31)
df_v3_6_pmc_set_deferred(struct amdgpu_device * adev,uint64_t config,bool is_deferred)531 static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
532 				    uint64_t config, bool is_deferred)
533 {
534 	int target_cntr;
535 
536 	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
537 
538 	if (target_cntr < 0)
539 		return -EINVAL;
540 
541 	if (is_deferred)
542 		adev->df_perfmon_config_assign_mask[target_cntr] |=
543 							DEFERRED_ARM_MASK;
544 	else
545 		adev->df_perfmon_config_assign_mask[target_cntr] &=
546 							~DEFERRED_ARM_MASK;
547 
548 	return 0;
549 }
550 
df_v3_6_pmc_is_deferred(struct amdgpu_device * adev,uint64_t config)551 static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
552 				    uint64_t config)
553 {
554 	int target_cntr;
555 
556 	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
557 
558 	/*
559 	 * we never get target_cntr < 0 since this funciton is only called in
560 	 * pmc_count for now but we should check anyways.
561 	 */
562 	return (target_cntr >= 0 &&
563 			(adev->df_perfmon_config_assign_mask[target_cntr]
564 			& DEFERRED_ARM_MASK));
565 
566 }
567 
568 /* release performance counter */
df_v3_6_pmc_release_cntr(struct amdgpu_device * adev,uint64_t config)569 static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
570 				     uint64_t config)
571 {
572 	int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
573 
574 	if (target_cntr >= 0)
575 		adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL;
576 }
577 
578 
df_v3_6_reset_perfmon_cntr(struct amdgpu_device * adev,uint64_t config)579 static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
580 					 uint64_t config)
581 {
582 	uint32_t lo_base_addr = 0, hi_base_addr = 0;
583 
584 	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
585 				      &hi_base_addr);
586 
587 	if ((lo_base_addr == 0) || (hi_base_addr == 0))
588 		return;
589 
590 	df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
591 }
592 
df_v3_6_pmc_start(struct amdgpu_device * adev,uint64_t config,int is_enable)593 static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
594 			     int is_enable)
595 {
596 	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
597 	int err = 0, ret = 0;
598 
599 	switch (adev->asic_type) {
600 	case CHIP_VEGA20:
601 		if (is_enable)
602 			return df_v3_6_pmc_add_cntr(adev, config);
603 
604 		df_v3_6_reset_perfmon_cntr(adev, config);
605 
606 		ret = df_v3_6_pmc_get_ctrl_settings(adev,
607 					config,
608 					&lo_base_addr,
609 					&hi_base_addr,
610 					&lo_val,
611 					&hi_val);
612 
613 		if (ret)
614 			return ret;
615 
616 		err = df_v3_6_perfmon_arm_with_retry(adev,
617 						     lo_base_addr,
618 						     lo_val,
619 						     hi_base_addr,
620 						     hi_val);
621 
622 		if (err)
623 			ret = df_v3_6_pmc_set_deferred(adev, config, true);
624 
625 		break;
626 	default:
627 		break;
628 	}
629 
630 	return ret;
631 }
632 
df_v3_6_pmc_stop(struct amdgpu_device * adev,uint64_t config,int is_disable)633 static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
634 			    int is_disable)
635 {
636 	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
637 	int ret = 0;
638 
639 	switch (adev->asic_type) {
640 	case CHIP_VEGA20:
641 		ret = df_v3_6_pmc_get_ctrl_settings(adev,
642 			config,
643 			&lo_base_addr,
644 			&hi_base_addr,
645 			&lo_val,
646 			&hi_val);
647 
648 		if (ret)
649 			return ret;
650 
651 		df_v3_6_reset_perfmon_cntr(adev, config);
652 
653 		if (is_disable)
654 			df_v3_6_pmc_release_cntr(adev, config);
655 
656 		break;
657 	default:
658 		break;
659 	}
660 
661 	return ret;
662 }
663 
df_v3_6_pmc_get_count(struct amdgpu_device * adev,uint64_t config,uint64_t * count)664 static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
665 				  uint64_t config,
666 				  uint64_t *count)
667 {
668 	uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0;
669 	*count = 0;
670 
671 	switch (adev->asic_type) {
672 	case CHIP_VEGA20:
673 		df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
674 				      &hi_base_addr);
675 
676 		if ((lo_base_addr == 0) || (hi_base_addr == 0))
677 			return;
678 
679 		/* rearm the counter or throw away count value on failure */
680 		if (df_v3_6_pmc_is_deferred(adev, config)) {
681 			int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
682 							lo_base_addr, lo_val,
683 							hi_base_addr, hi_val);
684 
685 			if (rearm_err)
686 				return;
687 
688 			df_v3_6_pmc_set_deferred(adev, config, false);
689 		}
690 
691 		df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
692 				hi_base_addr, &hi_val);
693 
694 		*count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
695 
696 		if (*count >= DF_V3_6_PERFMON_OVERFLOW)
697 			*count = 0;
698 
699 		DRM_DEBUG_DRIVER("config=%"PRIx64" addr=%08x:%08x val=%08x:%08x",
700 			 config, lo_base_addr, hi_base_addr, lo_val, hi_val);
701 
702 		break;
703 	default:
704 		break;
705 	}
706 }
707 
df_v3_6_get_dram_base_addr(struct amdgpu_device * adev,uint32_t df_inst)708 static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
709 					   uint32_t df_inst)
710 {
711 	uint32_t base_addr_reg_val 	= 0;
712 	uint64_t base_addr	 	= 0;
713 
714 	base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
715 					df_inst * DF_3_6_SMN_REG_INST_DIST);
716 
717 	if (REG_GET_FIELD(base_addr_reg_val,
718 			  DF_CS_UMC_AON0_DramBaseAddress0,
719 			  AddrRngVal) == 0) {
720 		DRM_WARN("address range not valid");
721 		return 0;
722 	}
723 
724 	base_addr = REG_GET_FIELD(base_addr_reg_val,
725 				  DF_CS_UMC_AON0_DramBaseAddress0,
726 				  DramBaseAddr);
727 
728 	return base_addr << 28;
729 }
730 
df_v3_6_get_df_inst_id(struct amdgpu_device * adev)731 static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
732 {
733 	uint32_t xgmi_node_id	= 0;
734 	uint32_t df_inst_id 	= 0;
735 
736 	/* Walk through DF dst nodes to find current XGMI node */
737 	for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
738 
739 		xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
740 					   df_inst_id * DF_3_6_SMN_REG_INST_DIST);
741 		xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
742 					     DF_CS_UMC_AON0_DramLimitAddress0,
743 					     DstFabricID);
744 
745 		/* TODO: establish reason dest fabric id is offset by 7 */
746 		xgmi_node_id = xgmi_node_id >> 7;
747 
748 		if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
749 			break;
750 	}
751 
752 	if (df_inst_id == DF_3_6_INST_CNT) {
753 		DRM_WARN("cant match df dst id with gpu node");
754 		return 0;
755 	}
756 
757 	return df_inst_id;
758 }
759 
760 const struct amdgpu_df_funcs df_v3_6_funcs = {
761 	.sw_init = df_v3_6_sw_init,
762 	.sw_fini = df_v3_6_sw_fini,
763 	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
764 	.get_fb_channel_number = df_v3_6_get_fb_channel_number,
765 	.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
766 	.update_medium_grain_clock_gating =
767 			df_v3_6_update_medium_grain_clock_gating,
768 	.get_clockgating_state = df_v3_6_get_clockgating_state,
769 	.pmc_start = df_v3_6_pmc_start,
770 	.pmc_stop = df_v3_6_pmc_stop,
771 	.pmc_get_count = df_v3_6_pmc_get_count,
772 	.get_fica = df_v3_6_get_fica,
773 	.set_fica = df_v3_6_set_fica,
774 	.get_dram_base_addr = df_v3_6_get_dram_base_addr,
775 	.get_df_inst_id = df_v3_6_get_df_inst_id
776 };
777