xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/amd/amdkfd/kfd_dbgdev.c (revision 41ec02673d281bbb3d38e6c78504ce6e30c228c1)
1 /*	$NetBSD: kfd_dbgdev.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2014 Advanced Micro Devices, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 
26 #include <sys/cdefs.h>
27 __KERNEL_RCSID(0, "$NetBSD: kfd_dbgdev.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $");
28 
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/log2.h>
32 #include <linux/sched.h>
33 #include <linux/slab.h>
34 #include <linux/mutex.h>
35 #include <linux/device.h>
36 
37 #include "kfd_pm4_headers.h"
38 #include "kfd_pm4_headers_diq.h"
39 #include "kfd_kernel_queue.h"
40 #include "kfd_priv.h"
41 #include "kfd_pm4_opcodes.h"
42 #include "cik_regs.h"
43 #include "kfd_dbgmgr.h"
44 #include "kfd_dbgdev.h"
45 #include "kfd_device_queue_manager.h"
46 
dbgdev_address_watch_disable_nodiq(struct kfd_dev * dev)47 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
48 {
49 	dev->kfd2kgd->address_watch_disable(dev->kgd);
50 }
51 
dbgdev_diq_submit_ib(struct kfd_dbgdev * dbgdev,unsigned int pasid,uint64_t vmid0_address,uint32_t * packet_buff,size_t size_in_bytes)52 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
53 				unsigned int pasid, uint64_t vmid0_address,
54 				uint32_t *packet_buff, size_t size_in_bytes)
55 {
56 	struct pm4__release_mem *rm_packet;
57 	struct pm4__indirect_buffer_pasid *ib_packet;
58 	struct kfd_mem_obj *mem_obj;
59 	size_t pq_packets_size_in_bytes;
60 	union ULARGE_INTEGER *largep;
61 	union ULARGE_INTEGER addr;
62 	struct kernel_queue *kq;
63 	uint64_t *rm_state;
64 	unsigned int *ib_packet_buff;
65 	int status;
66 
67 	if (WARN_ON(!size_in_bytes))
68 		return -EINVAL;
69 
70 	kq = dbgdev->kq;
71 
72 	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
73 				sizeof(struct pm4__indirect_buffer_pasid);
74 
75 	/*
76 	 * We acquire a buffer from DIQ
77 	 * The receive packet buff will be sitting on the Indirect Buffer
78 	 * and in the PQ we put the IB packet + sync packet(s).
79 	 */
80 	status = kq_acquire_packet_buffer(kq,
81 				pq_packets_size_in_bytes / sizeof(uint32_t),
82 				&ib_packet_buff);
83 	if (status) {
84 		pr_err("kq_acquire_packet_buffer failed\n");
85 		return status;
86 	}
87 
88 	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
89 
90 	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
91 
92 	ib_packet->header.count = 3;
93 	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
94 	ib_packet->header.type = PM4_TYPE_3;
95 
96 	largep = (union ULARGE_INTEGER *) &vmid0_address;
97 
98 	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
99 	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
100 
101 	ib_packet->control = (1 << 23) | (1 << 31) |
102 			((size_in_bytes / 4) & 0xfffff);
103 
104 	ib_packet->bitfields5.pasid = pasid;
105 
106 	/*
107 	 * for now we use release mem for GPU-CPU synchronization
108 	 * Consider WaitRegMem + WriteData as a better alternative
109 	 * we get a GART allocations ( gpu/cpu mapping),
110 	 * for the sync variable, and wait until:
111 	 * (a) Sync with HW
112 	 * (b) Sync var is written by CP to mem.
113 	 */
114 	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
115 			(sizeof(struct pm4__indirect_buffer_pasid) /
116 					sizeof(unsigned int)));
117 
118 	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
119 					&mem_obj);
120 
121 	if (status) {
122 		pr_err("Failed to allocate GART memory\n");
123 		kq_rollback_packet(kq);
124 		return status;
125 	}
126 
127 	rm_state = (uint64_t *) mem_obj->cpu_ptr;
128 
129 	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
130 
131 	rm_packet->header.opcode = IT_RELEASE_MEM;
132 	rm_packet->header.type = PM4_TYPE_3;
133 	rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
134 
135 	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
136 	rm_packet->bitfields2.event_index =
137 				event_index___release_mem__end_of_pipe;
138 
139 	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
140 	rm_packet->bitfields2.atc = 0;
141 	rm_packet->bitfields2.tc_wb_action_ena = 1;
142 
143 	addr.quad_part = mem_obj->gpu_addr;
144 
145 	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
146 	rm_packet->address_hi = addr.u.high_part;
147 
148 	rm_packet->bitfields3.data_sel =
149 				data_sel___release_mem__send_64_bit_data;
150 
151 	rm_packet->bitfields3.int_sel =
152 			int_sel___release_mem__send_data_after_write_confirm;
153 
154 	rm_packet->bitfields3.dst_sel =
155 			dst_sel___release_mem__memory_controller;
156 
157 	rm_packet->data_lo = QUEUESTATE__ACTIVE;
158 
159 	kq_submit_packet(kq);
160 
161 	/* Wait till CP writes sync code: */
162 	status = amdkfd_fence_wait_timeout(
163 			(unsigned int *) rm_state,
164 			QUEUESTATE__ACTIVE, 1500);
165 
166 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
167 
168 	return status;
169 }
170 
dbgdev_register_nodiq(struct kfd_dbgdev * dbgdev)171 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
172 {
173 	/*
174 	 * no action is needed in this case,
175 	 * just make sure diq will not be used
176 	 */
177 
178 	dbgdev->kq = NULL;
179 
180 	return 0;
181 }
182 
dbgdev_register_diq(struct kfd_dbgdev * dbgdev)183 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
184 {
185 	struct queue_properties properties;
186 	unsigned int qid;
187 	struct kernel_queue *kq = NULL;
188 	int status;
189 
190 	properties.type = KFD_QUEUE_TYPE_DIQ;
191 
192 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
193 				&properties, &qid, NULL);
194 
195 	if (status) {
196 		pr_err("Failed to create DIQ\n");
197 		return status;
198 	}
199 
200 	pr_debug("DIQ Created with queue id: %d\n", qid);
201 
202 	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
203 
204 	if (!kq) {
205 		pr_err("Error getting DIQ\n");
206 		pqm_destroy_queue(dbgdev->pqm, qid);
207 		return -EFAULT;
208 	}
209 
210 	dbgdev->kq = kq;
211 
212 	return status;
213 }
214 
dbgdev_unregister_nodiq(struct kfd_dbgdev * dbgdev)215 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
216 {
217 	/* disable watch address */
218 	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
219 	return 0;
220 }
221 
dbgdev_unregister_diq(struct kfd_dbgdev * dbgdev)222 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
223 {
224 	/* todo - disable address watch */
225 	int status;
226 
227 	status = pqm_destroy_queue(dbgdev->pqm,
228 			dbgdev->kq->queue->properties.queue_id);
229 	dbgdev->kq = NULL;
230 
231 	return status;
232 }
233 
dbgdev_address_watch_set_registers(const struct dbg_address_watch_info * adw_info,union TCP_WATCH_ADDR_H_BITS * addrHi,union TCP_WATCH_ADDR_L_BITS * addrLo,union TCP_WATCH_CNTL_BITS * cntl,unsigned int index,unsigned int vmid)234 static void dbgdev_address_watch_set_registers(
235 			const struct dbg_address_watch_info *adw_info,
236 			union TCP_WATCH_ADDR_H_BITS *addrHi,
237 			union TCP_WATCH_ADDR_L_BITS *addrLo,
238 			union TCP_WATCH_CNTL_BITS *cntl,
239 			unsigned int index, unsigned int vmid)
240 {
241 	union ULARGE_INTEGER addr;
242 
243 	addr.quad_part = 0;
244 	addrHi->u32All = 0;
245 	addrLo->u32All = 0;
246 	cntl->u32All = 0;
247 
248 	if (adw_info->watch_mask)
249 		cntl->bitfields.mask =
250 			(uint32_t) (adw_info->watch_mask[index] &
251 					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
252 	else
253 		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
254 
255 	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
256 
257 	addrHi->bitfields.addr = addr.u.high_part &
258 					ADDRESS_WATCH_REG_ADDHIGH_MASK;
259 	addrLo->bitfields.addr =
260 			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
261 
262 	cntl->bitfields.mode = adw_info->watch_mode[index];
263 	cntl->bitfields.vmid = (uint32_t) vmid;
264 	/* for now assume it is an ATC address */
265 	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
266 
267 	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
268 	pr_debug("\t\t%20s %08x\n", "set reg add high :",
269 			addrHi->bitfields.addr);
270 	pr_debug("\t\t%20s %08x\n", "set reg add low :",
271 			addrLo->bitfields.addr);
272 }
273 
dbgdev_address_watch_nodiq(struct kfd_dbgdev * dbgdev,struct dbg_address_watch_info * adw_info)274 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
275 				      struct dbg_address_watch_info *adw_info)
276 {
277 	union TCP_WATCH_ADDR_H_BITS addrHi;
278 	union TCP_WATCH_ADDR_L_BITS addrLo;
279 	union TCP_WATCH_CNTL_BITS cntl;
280 	struct kfd_process_device *pdd;
281 	unsigned int i;
282 
283 	/* taking the vmid for that process on the safe way using pdd */
284 	pdd = kfd_get_process_device_data(dbgdev->dev,
285 					adw_info->process);
286 	if (!pdd) {
287 		pr_err("Failed to get pdd for wave control no DIQ\n");
288 		return -EFAULT;
289 	}
290 
291 	addrHi.u32All = 0;
292 	addrLo.u32All = 0;
293 	cntl.u32All = 0;
294 
295 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
296 			(adw_info->num_watch_points == 0)) {
297 		pr_err("num_watch_points is invalid\n");
298 		return -EINVAL;
299 	}
300 
301 	if (!adw_info->watch_mode || !adw_info->watch_address) {
302 		pr_err("adw_info fields are not valid\n");
303 		return -EINVAL;
304 	}
305 
306 	for (i = 0; i < adw_info->num_watch_points; i++) {
307 		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
308 						&cntl, i, pdd->qpd.vmid);
309 
310 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
311 		pr_debug("\t\t%20s %08x\n", "register index :", i);
312 		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
313 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
314 				addrLo.bitfields.addr);
315 		pr_debug("\t\t%20s %08x\n", "Address high is :",
316 				addrHi.bitfields.addr);
317 		pr_debug("\t\t%20s %08x\n", "Address high is :",
318 				addrHi.bitfields.addr);
319 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
320 				cntl.bitfields.mask);
321 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
322 				cntl.bitfields.mode);
323 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
324 				cntl.bitfields.vmid);
325 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
326 				cntl.bitfields.atc);
327 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
328 
329 		pdd->dev->kfd2kgd->address_watch_execute(
330 						dbgdev->dev->kgd,
331 						i,
332 						cntl.u32All,
333 						addrHi.u32All,
334 						addrLo.u32All);
335 	}
336 
337 	return 0;
338 }
339 
dbgdev_address_watch_diq(struct kfd_dbgdev * dbgdev,struct dbg_address_watch_info * adw_info)340 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
341 				    struct dbg_address_watch_info *adw_info)
342 {
343 	struct pm4__set_config_reg *packets_vec;
344 	union TCP_WATCH_ADDR_H_BITS addrHi;
345 	union TCP_WATCH_ADDR_L_BITS addrLo;
346 	union TCP_WATCH_CNTL_BITS cntl;
347 	struct kfd_mem_obj *mem_obj;
348 	unsigned int aw_reg_add_dword;
349 	uint32_t *packet_buff_uint;
350 	unsigned int i;
351 	int status;
352 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
353 	/* we do not control the vmid in DIQ mode, just a place holder */
354 	unsigned int vmid = 0;
355 
356 	addrHi.u32All = 0;
357 	addrLo.u32All = 0;
358 	cntl.u32All = 0;
359 
360 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
361 			(adw_info->num_watch_points == 0)) {
362 		pr_err("num_watch_points is invalid\n");
363 		return -EINVAL;
364 	}
365 
366 	if (!adw_info->watch_mode || !adw_info->watch_address) {
367 		pr_err("adw_info fields are not valid\n");
368 		return -EINVAL;
369 	}
370 
371 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
372 
373 	if (status) {
374 		pr_err("Failed to allocate GART memory\n");
375 		return status;
376 	}
377 
378 	packet_buff_uint = mem_obj->cpu_ptr;
379 
380 	memset(packet_buff_uint, 0, ib_size);
381 
382 	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
383 
384 	packets_vec[0].header.count = 1;
385 	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
386 	packets_vec[0].header.type = PM4_TYPE_3;
387 	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
388 	packets_vec[0].bitfields2.insert_vmid = 1;
389 	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
390 	packets_vec[1].bitfields2.insert_vmid = 0;
391 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
392 	packets_vec[2].bitfields2.insert_vmid = 0;
393 	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
394 	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
395 	packets_vec[3].bitfields2.insert_vmid = 1;
396 
397 	for (i = 0; i < adw_info->num_watch_points; i++) {
398 		dbgdev_address_watch_set_registers(adw_info,
399 						&addrHi,
400 						&addrLo,
401 						&cntl,
402 						i,
403 						vmid);
404 
405 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
406 		pr_debug("\t\t%20s %08x\n", "register index :", i);
407 		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
408 		pr_debug("\t\t%20s %p\n", "Add ptr is :",
409 				adw_info->watch_address);
410 		pr_debug("\t\t%20s %08llx\n", "Add     is :",
411 				adw_info->watch_address[i]);
412 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
413 				addrLo.bitfields.addr);
414 		pr_debug("\t\t%20s %08x\n", "Address high is :",
415 				addrHi.bitfields.addr);
416 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
417 				cntl.bitfields.mask);
418 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
419 				cntl.bitfields.mode);
420 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
421 				cntl.bitfields.vmid);
422 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
423 				cntl.bitfields.atc);
424 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
425 
426 		aw_reg_add_dword =
427 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
428 					dbgdev->dev->kgd,
429 					i,
430 					ADDRESS_WATCH_REG_CNTL);
431 
432 		packets_vec[0].bitfields2.reg_offset =
433 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
434 
435 		packets_vec[0].reg_data[0] = cntl.u32All;
436 
437 		aw_reg_add_dword =
438 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
439 					dbgdev->dev->kgd,
440 					i,
441 					ADDRESS_WATCH_REG_ADDR_HI);
442 
443 		packets_vec[1].bitfields2.reg_offset =
444 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
445 		packets_vec[1].reg_data[0] = addrHi.u32All;
446 
447 		aw_reg_add_dword =
448 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
449 					dbgdev->dev->kgd,
450 					i,
451 					ADDRESS_WATCH_REG_ADDR_LO);
452 
453 		packets_vec[2].bitfields2.reg_offset =
454 				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
455 		packets_vec[2].reg_data[0] = addrLo.u32All;
456 
457 		/* enable watch flag if address is not zero*/
458 		if (adw_info->watch_address[i] > 0)
459 			cntl.bitfields.valid = 1;
460 		else
461 			cntl.bitfields.valid = 0;
462 
463 		aw_reg_add_dword =
464 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
465 					dbgdev->dev->kgd,
466 					i,
467 					ADDRESS_WATCH_REG_CNTL);
468 
469 		packets_vec[3].bitfields2.reg_offset =
470 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
471 		packets_vec[3].reg_data[0] = cntl.u32All;
472 
473 		status = dbgdev_diq_submit_ib(
474 					dbgdev,
475 					adw_info->process->pasid,
476 					mem_obj->gpu_addr,
477 					packet_buff_uint,
478 					ib_size);
479 
480 		if (status) {
481 			pr_err("Failed to submit IB to DIQ\n");
482 			break;
483 		}
484 	}
485 
486 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
487 	return status;
488 }
489 
dbgdev_wave_control_set_registers(struct dbg_wave_control_info * wac_info,union SQ_CMD_BITS * in_reg_sq_cmd,union GRBM_GFX_INDEX_BITS * in_reg_gfx_index)490 static int dbgdev_wave_control_set_registers(
491 				struct dbg_wave_control_info *wac_info,
492 				union SQ_CMD_BITS *in_reg_sq_cmd,
493 				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
494 {
495 	int status = 0;
496 	union SQ_CMD_BITS reg_sq_cmd;
497 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
498 	struct HsaDbgWaveMsgAMDGen2 *pMsg;
499 
500 	reg_sq_cmd.u32All = 0;
501 	reg_gfx_index.u32All = 0;
502 	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
503 
504 	switch (wac_info->mode) {
505 	/* Send command to single wave */
506 	case HSA_DBG_WAVEMODE_SINGLE:
507 		/*
508 		 * Limit access to the process waves only,
509 		 * by setting vmid check
510 		 */
511 		reg_sq_cmd.bits.check_vmid = 1;
512 		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
513 		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
514 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
515 
516 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
517 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
518 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
519 
520 		break;
521 
522 	/* Send command to all waves with matching VMID */
523 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
524 
525 		reg_gfx_index.bits.sh_broadcast_writes = 1;
526 		reg_gfx_index.bits.se_broadcast_writes = 1;
527 		reg_gfx_index.bits.instance_broadcast_writes = 1;
528 
529 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
530 
531 		break;
532 
533 	/* Send command to all CU waves with matching VMID */
534 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
535 
536 		reg_sq_cmd.bits.check_vmid = 1;
537 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
538 
539 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
540 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
541 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
542 
543 		break;
544 
545 	default:
546 		return -EINVAL;
547 	}
548 
549 	switch (wac_info->operand) {
550 	case HSA_DBG_WAVEOP_HALT:
551 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
552 		break;
553 
554 	case HSA_DBG_WAVEOP_RESUME:
555 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
556 		break;
557 
558 	case HSA_DBG_WAVEOP_KILL:
559 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
560 		break;
561 
562 	case HSA_DBG_WAVEOP_DEBUG:
563 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
564 		break;
565 
566 	case HSA_DBG_WAVEOP_TRAP:
567 		if (wac_info->trapId < MAX_TRAPID) {
568 			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
569 			reg_sq_cmd.bits.trap_id = wac_info->trapId;
570 		} else {
571 			status = -EINVAL;
572 		}
573 		break;
574 
575 	default:
576 		status = -EINVAL;
577 		break;
578 	}
579 
580 	if (status == 0) {
581 		*in_reg_sq_cmd = reg_sq_cmd;
582 		*in_reg_gfx_index = reg_gfx_index;
583 	}
584 
585 	return status;
586 }
587 
dbgdev_wave_control_diq(struct kfd_dbgdev * dbgdev,struct dbg_wave_control_info * wac_info)588 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
589 					struct dbg_wave_control_info *wac_info)
590 {
591 
592 	int status;
593 	union SQ_CMD_BITS reg_sq_cmd;
594 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
595 	struct kfd_mem_obj *mem_obj;
596 	uint32_t *packet_buff_uint;
597 	struct pm4__set_config_reg *packets_vec;
598 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
599 
600 	reg_sq_cmd.u32All = 0;
601 
602 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
603 							&reg_gfx_index);
604 	if (status) {
605 		pr_err("Failed to set wave control registers\n");
606 		return status;
607 	}
608 
609 	/* we do not control the VMID in DIQ, so reset it to a known value */
610 	reg_sq_cmd.bits.vm_id = 0;
611 
612 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
613 
614 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
615 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
616 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
617 	pr_debug("\t\t msg value is: %u\n",
618 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
619 	pr_debug("\t\t vmid      is: N/A\n");
620 
621 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
622 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
623 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
624 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
625 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
626 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
627 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
628 
629 	pr_debug("\t\t ibw       is : %u\n",
630 			reg_gfx_index.bitfields.instance_broadcast_writes);
631 	pr_debug("\t\t ii        is : %u\n",
632 			reg_gfx_index.bitfields.instance_index);
633 	pr_debug("\t\t sebw      is : %u\n",
634 			reg_gfx_index.bitfields.se_broadcast_writes);
635 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
636 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
637 	pr_debug("\t\t sbw       is : %u\n",
638 			reg_gfx_index.bitfields.sh_broadcast_writes);
639 
640 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
641 
642 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
643 
644 	if (status != 0) {
645 		pr_err("Failed to allocate GART memory\n");
646 		return status;
647 	}
648 
649 	packet_buff_uint = mem_obj->cpu_ptr;
650 
651 	memset(packet_buff_uint, 0, ib_size);
652 
653 	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
654 	packets_vec[0].header.count = 1;
655 	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
656 	packets_vec[0].header.type = PM4_TYPE_3;
657 	packets_vec[0].bitfields2.reg_offset =
658 			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
659 
660 	packets_vec[0].bitfields2.insert_vmid = 0;
661 	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
662 
663 	packets_vec[1].header.count = 1;
664 	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
665 	packets_vec[1].header.type = PM4_TYPE_3;
666 	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
667 
668 	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
669 	packets_vec[1].bitfields2.insert_vmid = 1;
670 	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
671 
672 	/* Restore the GRBM_GFX_INDEX register */
673 
674 	reg_gfx_index.u32All = 0;
675 	reg_gfx_index.bits.sh_broadcast_writes = 1;
676 	reg_gfx_index.bits.instance_broadcast_writes = 1;
677 	reg_gfx_index.bits.se_broadcast_writes = 1;
678 
679 
680 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
681 	packets_vec[2].bitfields2.reg_offset =
682 				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
683 
684 	packets_vec[2].bitfields2.insert_vmid = 0;
685 	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
686 
687 	status = dbgdev_diq_submit_ib(
688 			dbgdev,
689 			wac_info->process->pasid,
690 			mem_obj->gpu_addr,
691 			packet_buff_uint,
692 			ib_size);
693 
694 	if (status)
695 		pr_err("Failed to submit IB to DIQ\n");
696 
697 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
698 
699 	return status;
700 }
701 
dbgdev_wave_control_nodiq(struct kfd_dbgdev * dbgdev,struct dbg_wave_control_info * wac_info)702 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
703 					struct dbg_wave_control_info *wac_info)
704 {
705 	int status;
706 	union SQ_CMD_BITS reg_sq_cmd;
707 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
708 	struct kfd_process_device *pdd;
709 
710 	reg_sq_cmd.u32All = 0;
711 
712 	/* taking the VMID for that process on the safe way using PDD */
713 	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
714 
715 	if (!pdd) {
716 		pr_err("Failed to get pdd for wave control no DIQ\n");
717 		return -EFAULT;
718 	}
719 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
720 							&reg_gfx_index);
721 	if (status) {
722 		pr_err("Failed to set wave control registers\n");
723 		return status;
724 	}
725 
726 	/* for non DIQ we need to patch the VMID: */
727 
728 	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
729 
730 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
731 
732 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
733 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
734 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
735 	pr_debug("\t\t msg value is: %u\n",
736 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
737 	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
738 
739 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
740 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
741 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
742 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
743 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
744 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
745 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
746 
747 	pr_debug("\t\t ibw       is : %u\n",
748 			reg_gfx_index.bitfields.instance_broadcast_writes);
749 	pr_debug("\t\t ii        is : %u\n",
750 			reg_gfx_index.bitfields.instance_index);
751 	pr_debug("\t\t sebw      is : %u\n",
752 			reg_gfx_index.bitfields.se_broadcast_writes);
753 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
754 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
755 	pr_debug("\t\t sbw       is : %u\n",
756 			reg_gfx_index.bitfields.sh_broadcast_writes);
757 
758 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
759 
760 	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
761 							reg_gfx_index.u32All,
762 							reg_sq_cmd.u32All);
763 }
764 
dbgdev_wave_reset_wavefronts(struct kfd_dev * dev,struct kfd_process * p)765 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
766 {
767 	int status = 0;
768 	unsigned int vmid;
769 	uint16_t queried_pasid;
770 	union SQ_CMD_BITS reg_sq_cmd;
771 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
772 	struct kfd_process_device *pdd;
773 	struct dbg_wave_control_info wac_info;
774 	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
775 	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
776 
777 	reg_sq_cmd.u32All = 0;
778 	status = 0;
779 
780 	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
781 	wac_info.operand = HSA_DBG_WAVEOP_KILL;
782 
783 	pr_debug("Killing all process wavefronts\n");
784 
785 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
786 	 * ATC_VMID15_PASID_MAPPING
787 	 * to check which VMID the current process is mapped to.
788 	 */
789 
790 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
791 		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
792 				(dev->kgd, vmid, &queried_pasid);
793 
794 		if (status && queried_pasid == p->pasid) {
795 			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
796 					vmid, p->pasid);
797 			break;
798 		}
799 	}
800 
801 	if (vmid > last_vmid_to_scan) {
802 		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
803 		return -EFAULT;
804 	}
805 
806 	/* taking the VMID for that process on the safe way using PDD */
807 	pdd = kfd_get_process_device_data(dev, p);
808 	if (!pdd)
809 		return -EFAULT;
810 
811 	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
812 			&reg_gfx_index);
813 	if (status != 0)
814 		return -EINVAL;
815 
816 	/* for non DIQ we need to patch the VMID: */
817 	reg_sq_cmd.bits.vm_id = vmid;
818 
819 	dev->kfd2kgd->wave_control_execute(dev->kgd,
820 					reg_gfx_index.u32All,
821 					reg_sq_cmd.u32All);
822 
823 	return 0;
824 }
825 
kfd_dbgdev_init(struct kfd_dbgdev * pdbgdev,struct kfd_dev * pdev,enum DBGDEV_TYPE type)826 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
827 			enum DBGDEV_TYPE type)
828 {
829 	pdbgdev->dev = pdev;
830 	pdbgdev->kq = NULL;
831 	pdbgdev->type = type;
832 	pdbgdev->pqm = NULL;
833 
834 	switch (type) {
835 	case DBGDEV_TYPE_NODIQ:
836 		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
837 		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
838 		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
839 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
840 		break;
841 	case DBGDEV_TYPE_DIQ:
842 	default:
843 		pdbgdev->dbgdev_register = dbgdev_register_diq;
844 		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
845 		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
846 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
847 		break;
848 	}
849 
850 }
851