1 /* $NetBSD: kfd_device_queue_manager.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $ */
2
3 /*
4 * Copyright 2014 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26 #include <sys/cdefs.h>
27 __KERNEL_RCSID(0, "$NetBSD: kfd_device_queue_manager.c,v 1.3 2021/12/18 23:44:59 riastradh Exp $");
28
29 #include <linux/ratelimit.h>
30 #include <linux/printk.h>
31 #include <linux/slab.h>
32 #include <linux/list.h>
33 #include <linux/types.h>
34 #include <linux/bitops.h>
35 #include <linux/sched.h>
36 #include "kfd_priv.h"
37 #include "kfd_device_queue_manager.h"
38 #include "kfd_mqd_manager.h"
39 #include "cik_regs.h"
40 #include "kfd_kernel_queue.h"
41 #include "amdgpu_amdkfd.h"
42
43 /* Size of the per-pipe EOP queue */
44 #define CIK_HPD_EOP_BYTES_LOG2 11
45 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
46
47 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
48 unsigned int pasid, unsigned int vmid);
49
50 static int execute_queues_cpsch(struct device_queue_manager *dqm,
51 enum kfd_unmap_queues_filter filter,
52 uint32_t filter_param);
53 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
54 enum kfd_unmap_queues_filter filter,
55 uint32_t filter_param);
56
57 static int map_queues_cpsch(struct device_queue_manager *dqm);
58
59 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
60 struct queue *q);
61
62 static inline void deallocate_hqd(struct device_queue_manager *dqm,
63 struct queue *q);
64 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
65 static int allocate_sdma_queue(struct device_queue_manager *dqm,
66 struct queue *q);
67 static void kfd_process_hw_exception(struct work_struct *work);
68
69 static inline
get_mqd_type_from_queue_type(enum kfd_queue_type type)70 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
71 {
72 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
73 return KFD_MQD_TYPE_SDMA;
74 return KFD_MQD_TYPE_CP;
75 }
76
is_pipe_enabled(struct device_queue_manager * dqm,int mec,int pipe)77 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
78 {
79 int i;
80 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
81 + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
82
83 /* queue is available for KFD usage if bit is 1 */
84 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
85 if (test_bit(pipe_offset + i,
86 dqm->dev->shared_resources.queue_bitmap))
87 return true;
88 return false;
89 }
90
get_queues_num(struct device_queue_manager * dqm)91 unsigned int get_queues_num(struct device_queue_manager *dqm)
92 {
93 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
94 KGD_MAX_QUEUES);
95 }
96
get_queues_per_pipe(struct device_queue_manager * dqm)97 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
98 {
99 return dqm->dev->shared_resources.num_queue_per_pipe;
100 }
101
get_pipes_per_mec(struct device_queue_manager * dqm)102 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
103 {
104 return dqm->dev->shared_resources.num_pipe_per_mec;
105 }
106
get_num_sdma_engines(struct device_queue_manager * dqm)107 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
108 {
109 return dqm->dev->device_info->num_sdma_engines;
110 }
111
get_num_xgmi_sdma_engines(struct device_queue_manager * dqm)112 static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
113 {
114 return dqm->dev->device_info->num_xgmi_sdma_engines;
115 }
116
get_num_sdma_queues(struct device_queue_manager * dqm)117 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
118 {
119 return dqm->dev->device_info->num_sdma_engines
120 * dqm->dev->device_info->num_sdma_queues_per_engine;
121 }
122
get_num_xgmi_sdma_queues(struct device_queue_manager * dqm)123 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
124 {
125 return dqm->dev->device_info->num_xgmi_sdma_engines
126 * dqm->dev->device_info->num_sdma_queues_per_engine;
127 }
128
program_sh_mem_settings(struct device_queue_manager * dqm,struct qcm_process_device * qpd)129 void program_sh_mem_settings(struct device_queue_manager *dqm,
130 struct qcm_process_device *qpd)
131 {
132 return dqm->dev->kfd2kgd->program_sh_mem_settings(
133 dqm->dev->kgd, qpd->vmid,
134 qpd->sh_mem_config,
135 qpd->sh_mem_ape1_base,
136 qpd->sh_mem_ape1_limit,
137 qpd->sh_mem_bases);
138 }
139
allocate_doorbell(struct qcm_process_device * qpd,struct queue * q)140 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
141 {
142 struct kfd_dev *dev = qpd->dqm->dev;
143
144 if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
145 /* On pre-SOC15 chips we need to use the queue ID to
146 * preserve the user mode ABI.
147 */
148 q->doorbell_id = q->properties.queue_id;
149 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
150 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
151 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
152 * doorbell assignments based on the engine and queue id.
153 * The doobell index distance between RLC (2*i) and (2*i+1)
154 * for a SDMA engine is 512.
155 */
156 uint32_t *idx_offset =
157 dev->shared_resources.sdma_doorbell_idx;
158
159 q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
160 + (q->properties.sdma_queue_id & 1)
161 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
162 + (q->properties.sdma_queue_id >> 1);
163 } else {
164 /* For CP queues on SOC15 reserve a free doorbell ID */
165 unsigned int found;
166
167 found = find_first_zero_bit(qpd->doorbell_bitmap,
168 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
169 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
170 pr_debug("No doorbells available");
171 return -EBUSY;
172 }
173 set_bit(found, qpd->doorbell_bitmap);
174 q->doorbell_id = found;
175 }
176
177 q->properties.doorbell_off =
178 kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
179 q->doorbell_id);
180
181 return 0;
182 }
183
deallocate_doorbell(struct qcm_process_device * qpd,struct queue * q)184 static void deallocate_doorbell(struct qcm_process_device *qpd,
185 struct queue *q)
186 {
187 unsigned int old;
188 struct kfd_dev *dev = qpd->dqm->dev;
189
190 if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
191 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
192 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
193 return;
194
195 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
196 WARN_ON(!old);
197 }
198
allocate_vmid(struct device_queue_manager * dqm,struct qcm_process_device * qpd,struct queue * q)199 static int allocate_vmid(struct device_queue_manager *dqm,
200 struct qcm_process_device *qpd,
201 struct queue *q)
202 {
203 int allocated_vmid = -1, i;
204
205 for (i = dqm->dev->vm_info.first_vmid_kfd;
206 i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
207 if (!dqm->vmid_pasid[i]) {
208 allocated_vmid = i;
209 break;
210 }
211 }
212
213 if (allocated_vmid < 0) {
214 pr_err("no more vmid to allocate\n");
215 return -ENOSPC;
216 }
217
218 pr_debug("vmid allocated: %d\n", allocated_vmid);
219
220 dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
221
222 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
223
224 qpd->vmid = allocated_vmid;
225 q->properties.vmid = allocated_vmid;
226
227 program_sh_mem_settings(dqm, qpd);
228
229 /* qpd->page_table_base is set earlier when register_process()
230 * is called, i.e. when the first queue is created.
231 */
232 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
233 qpd->vmid,
234 qpd->page_table_base);
235 /* invalidate the VM context after pasid and vmid mapping is set up */
236 kfd_flush_tlb(qpd_to_pdd(qpd));
237
238 if (dqm->dev->kfd2kgd->set_scratch_backing_va)
239 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
240 qpd->sh_hidden_private_base, qpd->vmid);
241
242 return 0;
243 }
244
flush_texture_cache_nocpsch(struct kfd_dev * kdev,struct qcm_process_device * qpd)245 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
246 struct qcm_process_device *qpd)
247 {
248 const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
249 int ret;
250
251 if (!qpd->ib_kaddr)
252 return -ENOMEM;
253
254 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
255 if (ret)
256 return ret;
257
258 return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
259 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
260 pmf->release_mem_size / sizeof(uint32_t));
261 }
262
deallocate_vmid(struct device_queue_manager * dqm,struct qcm_process_device * qpd,struct queue * q)263 static void deallocate_vmid(struct device_queue_manager *dqm,
264 struct qcm_process_device *qpd,
265 struct queue *q)
266 {
267 /* On GFX v7, CP doesn't flush TC at dequeue */
268 if (q->device->device_info->asic_family == CHIP_HAWAII)
269 if (flush_texture_cache_nocpsch(q->device, qpd))
270 pr_err("Failed to flush TC\n");
271
272 kfd_flush_tlb(qpd_to_pdd(qpd));
273
274 /* Release the vmid mapping */
275 set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
276 dqm->vmid_pasid[qpd->vmid] = 0;
277
278 qpd->vmid = 0;
279 q->properties.vmid = 0;
280 }
281
create_queue_nocpsch(struct device_queue_manager * dqm,struct queue * q,struct qcm_process_device * qpd)282 static int create_queue_nocpsch(struct device_queue_manager *dqm,
283 struct queue *q,
284 struct qcm_process_device *qpd)
285 {
286 struct mqd_manager *mqd_mgr;
287 int retval;
288
289 print_queue(q);
290
291 dqm_lock(dqm);
292
293 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
294 pr_warn("Can't create new usermode queue because %d queues were already created\n",
295 dqm->total_queue_count);
296 retval = -EPERM;
297 goto out_unlock;
298 }
299
300 if (list_empty(&qpd->queues_list)) {
301 retval = allocate_vmid(dqm, qpd, q);
302 if (retval)
303 goto out_unlock;
304 }
305 q->properties.vmid = qpd->vmid;
306 /*
307 * Eviction state logic: mark all queues as evicted, even ones
308 * not currently active. Restoring inactive queues later only
309 * updates the is_evicted flag but is a no-op otherwise.
310 */
311 q->properties.is_evicted = !!qpd->evicted;
312
313 q->properties.tba_addr = qpd->tba_addr;
314 q->properties.tma_addr = qpd->tma_addr;
315
316 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
317 q->properties.type)];
318 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
319 retval = allocate_hqd(dqm, q);
320 if (retval)
321 goto deallocate_vmid;
322 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
323 q->pipe, q->queue);
324 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
325 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
326 retval = allocate_sdma_queue(dqm, q);
327 if (retval)
328 goto deallocate_vmid;
329 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
330 }
331
332 retval = allocate_doorbell(qpd, q);
333 if (retval)
334 goto out_deallocate_hqd;
335
336 /* Temporarily release dqm lock to avoid a circular lock dependency */
337 dqm_unlock(dqm);
338 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
339 dqm_lock(dqm);
340
341 if (!q->mqd_mem_obj) {
342 retval = -ENOMEM;
343 goto out_deallocate_doorbell;
344 }
345 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
346 &q->gart_mqd_addr, &q->properties);
347 if (q->properties.is_active) {
348 if (!dqm->sched_running) {
349 WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
350 goto add_queue_to_list;
351 }
352
353 if (WARN(q->process->mm != current->mm,
354 "should only run in user thread"))
355 retval = -EFAULT;
356 else
357 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
358 q->queue, &q->properties, current->mm);
359 if (retval)
360 goto out_free_mqd;
361 }
362
363 add_queue_to_list:
364 list_add(&q->list, &qpd->queues_list);
365 qpd->queue_count++;
366 if (q->properties.is_active)
367 dqm->queue_count++;
368
369 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
370 dqm->sdma_queue_count++;
371 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
372 dqm->xgmi_sdma_queue_count++;
373
374 /*
375 * Unconditionally increment this counter, regardless of the queue's
376 * type or whether the queue is active.
377 */
378 dqm->total_queue_count++;
379 pr_debug("Total of %d queues are accountable so far\n",
380 dqm->total_queue_count);
381 goto out_unlock;
382
383 out_free_mqd:
384 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
385 out_deallocate_doorbell:
386 deallocate_doorbell(qpd, q);
387 out_deallocate_hqd:
388 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
389 deallocate_hqd(dqm, q);
390 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
391 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
392 deallocate_sdma_queue(dqm, q);
393 deallocate_vmid:
394 if (list_empty(&qpd->queues_list))
395 deallocate_vmid(dqm, qpd, q);
396 out_unlock:
397 dqm_unlock(dqm);
398 return retval;
399 }
400
allocate_hqd(struct device_queue_manager * dqm,struct queue * q)401 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
402 {
403 bool set;
404 int pipe, bit, i;
405
406 set = false;
407
408 for (pipe = dqm->next_pipe_to_allocate, i = 0;
409 i < get_pipes_per_mec(dqm);
410 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
411
412 if (!is_pipe_enabled(dqm, 0, pipe))
413 continue;
414
415 if (dqm->allocated_queues[pipe] != 0) {
416 bit = ffs(dqm->allocated_queues[pipe]) - 1;
417 dqm->allocated_queues[pipe] &= ~(1 << bit);
418 q->pipe = pipe;
419 q->queue = bit;
420 set = true;
421 break;
422 }
423 }
424
425 if (!set)
426 return -EBUSY;
427
428 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
429 /* horizontal hqd allocation */
430 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
431
432 return 0;
433 }
434
deallocate_hqd(struct device_queue_manager * dqm,struct queue * q)435 static inline void deallocate_hqd(struct device_queue_manager *dqm,
436 struct queue *q)
437 {
438 dqm->allocated_queues[q->pipe] |= (1 << q->queue);
439 }
440
441 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
442 * to avoid asynchronized access
443 */
destroy_queue_nocpsch_locked(struct device_queue_manager * dqm,struct qcm_process_device * qpd,struct queue * q)444 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
445 struct qcm_process_device *qpd,
446 struct queue *q)
447 {
448 int retval;
449 struct mqd_manager *mqd_mgr;
450
451 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
452 q->properties.type)];
453
454 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
455 deallocate_hqd(dqm, q);
456 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
457 dqm->sdma_queue_count--;
458 deallocate_sdma_queue(dqm, q);
459 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
460 dqm->xgmi_sdma_queue_count--;
461 deallocate_sdma_queue(dqm, q);
462 } else {
463 pr_debug("q->properties.type %d is invalid\n",
464 q->properties.type);
465 return -EINVAL;
466 }
467 dqm->total_queue_count--;
468
469 deallocate_doorbell(qpd, q);
470
471 if (!dqm->sched_running) {
472 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
473 return 0;
474 }
475
476 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
477 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
478 KFD_UNMAP_LATENCY_MS,
479 q->pipe, q->queue);
480 if (retval == -ETIME)
481 qpd->reset_wavefronts = true;
482
483 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
484
485 list_del(&q->list);
486 if (list_empty(&qpd->queues_list)) {
487 if (qpd->reset_wavefronts) {
488 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
489 dqm->dev);
490 /* dbgdev_wave_reset_wavefronts has to be called before
491 * deallocate_vmid(), i.e. when vmid is still in use.
492 */
493 dbgdev_wave_reset_wavefronts(dqm->dev,
494 qpd->pqm->process);
495 qpd->reset_wavefronts = false;
496 }
497
498 deallocate_vmid(dqm, qpd, q);
499 }
500 qpd->queue_count--;
501 if (q->properties.is_active)
502 dqm->queue_count--;
503
504 return retval;
505 }
506
destroy_queue_nocpsch(struct device_queue_manager * dqm,struct qcm_process_device * qpd,struct queue * q)507 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
508 struct qcm_process_device *qpd,
509 struct queue *q)
510 {
511 int retval;
512
513 dqm_lock(dqm);
514 retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
515 dqm_unlock(dqm);
516
517 return retval;
518 }
519
update_queue(struct device_queue_manager * dqm,struct queue * q)520 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
521 {
522 int retval = 0;
523 struct mqd_manager *mqd_mgr;
524 struct kfd_process_device *pdd;
525 bool prev_active = false;
526
527 dqm_lock(dqm);
528 pdd = kfd_get_process_device_data(q->device, q->process);
529 if (!pdd) {
530 retval = -ENODEV;
531 goto out_unlock;
532 }
533 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
534 q->properties.type)];
535
536 /* Save previous activity state for counters */
537 prev_active = q->properties.is_active;
538
539 /* Make sure the queue is unmapped before updating the MQD */
540 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
541 retval = unmap_queues_cpsch(dqm,
542 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
543 if (retval) {
544 pr_err("unmap queue failed\n");
545 goto out_unlock;
546 }
547 } else if (prev_active &&
548 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
549 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
550 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
551
552 if (!dqm->sched_running) {
553 WARN_ONCE(1, "Update non-HWS queue while stopped\n");
554 goto out_unlock;
555 }
556
557 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
558 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
559 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
560 if (retval) {
561 pr_err("destroy mqd failed\n");
562 goto out_unlock;
563 }
564 }
565
566 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
567
568 /*
569 * check active state vs. the previous state and modify
570 * counter accordingly. map_queues_cpsch uses the
571 * dqm->queue_count to determine whether a new runlist must be
572 * uploaded.
573 */
574 if (q->properties.is_active && !prev_active)
575 dqm->queue_count++;
576 else if (!q->properties.is_active && prev_active)
577 dqm->queue_count--;
578
579 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
580 retval = map_queues_cpsch(dqm);
581 else if (q->properties.is_active &&
582 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
583 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
584 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
585 if (WARN(q->process->mm != current->mm,
586 "should only run in user thread"))
587 retval = -EFAULT;
588 else
589 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
590 q->pipe, q->queue,
591 &q->properties, current->mm);
592 }
593
594 out_unlock:
595 dqm_unlock(dqm);
596 return retval;
597 }
598
evict_process_queues_nocpsch(struct device_queue_manager * dqm,struct qcm_process_device * qpd)599 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
600 struct qcm_process_device *qpd)
601 {
602 struct queue *q;
603 struct mqd_manager *mqd_mgr;
604 struct kfd_process_device *pdd;
605 int retval, ret = 0;
606
607 dqm_lock(dqm);
608 if (qpd->evicted++ > 0) /* already evicted, do nothing */
609 goto out;
610
611 pdd = qpd_to_pdd(qpd);
612 pr_info_ratelimited("Evicting PASID 0x%x queues\n",
613 pdd->process->pasid);
614
615 /* Mark all queues as evicted. Deactivate all active queues on
616 * the qpd.
617 */
618 list_for_each_entry(q, &qpd->queues_list, list) {
619 q->properties.is_evicted = true;
620 if (!q->properties.is_active)
621 continue;
622
623 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
624 q->properties.type)];
625 q->properties.is_active = false;
626 dqm->queue_count--;
627
628 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
629 continue;
630
631 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
632 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
633 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
634 if (retval && !ret)
635 /* Return the first error, but keep going to
636 * maintain a consistent eviction state
637 */
638 ret = retval;
639 }
640
641 out:
642 dqm_unlock(dqm);
643 return ret;
644 }
645
evict_process_queues_cpsch(struct device_queue_manager * dqm,struct qcm_process_device * qpd)646 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
647 struct qcm_process_device *qpd)
648 {
649 struct queue *q;
650 struct kfd_process_device *pdd;
651 int retval = 0;
652
653 dqm_lock(dqm);
654 if (qpd->evicted++ > 0) /* already evicted, do nothing */
655 goto out;
656
657 pdd = qpd_to_pdd(qpd);
658 pr_info_ratelimited("Evicting PASID 0x%x queues\n",
659 pdd->process->pasid);
660
661 /* Mark all queues as evicted. Deactivate all active queues on
662 * the qpd.
663 */
664 list_for_each_entry(q, &qpd->queues_list, list) {
665 q->properties.is_evicted = true;
666 if (!q->properties.is_active)
667 continue;
668
669 q->properties.is_active = false;
670 dqm->queue_count--;
671 }
672 retval = execute_queues_cpsch(dqm,
673 qpd->is_debug ?
674 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
675 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
676
677 out:
678 dqm_unlock(dqm);
679 return retval;
680 }
681
restore_process_queues_nocpsch(struct device_queue_manager * dqm,struct qcm_process_device * qpd)682 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
683 struct qcm_process_device *qpd)
684 {
685 struct mm_struct *mm = NULL;
686 struct queue *q;
687 struct mqd_manager *mqd_mgr;
688 struct kfd_process_device *pdd;
689 uint64_t pd_base;
690 int retval, ret = 0;
691
692 pdd = qpd_to_pdd(qpd);
693 /* Retrieve PD base */
694 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
695
696 dqm_lock(dqm);
697 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
698 goto out;
699 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
700 qpd->evicted--;
701 goto out;
702 }
703
704 pr_info_ratelimited("Restoring PASID 0x%x queues\n",
705 pdd->process->pasid);
706
707 /* Update PD Base in QPD */
708 qpd->page_table_base = pd_base;
709 pr_debug("Updated PD address to 0x%llx\n", pd_base);
710
711 if (!list_empty(&qpd->queues_list)) {
712 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
713 dqm->dev->kgd,
714 qpd->vmid,
715 qpd->page_table_base);
716 kfd_flush_tlb(pdd);
717 }
718
719 /* Take a safe reference to the mm_struct, which may otherwise
720 * disappear even while the kfd_process is still referenced.
721 */
722 mm = get_task_mm(pdd->process->lead_thread);
723 if (!mm) {
724 ret = -EFAULT;
725 goto out;
726 }
727
728 /* Remove the eviction flags. Activate queues that are not
729 * inactive for other reasons.
730 */
731 list_for_each_entry(q, &qpd->queues_list, list) {
732 q->properties.is_evicted = false;
733 if (!QUEUE_IS_ACTIVE(q->properties))
734 continue;
735
736 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
737 q->properties.type)];
738 q->properties.is_active = true;
739 dqm->queue_count++;
740
741 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
742 continue;
743
744 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
745 q->queue, &q->properties, mm);
746 if (retval && !ret)
747 /* Return the first error, but keep going to
748 * maintain a consistent eviction state
749 */
750 ret = retval;
751 }
752 qpd->evicted = 0;
753 out:
754 if (mm)
755 mmput(mm);
756 dqm_unlock(dqm);
757 return ret;
758 }
759
restore_process_queues_cpsch(struct device_queue_manager * dqm,struct qcm_process_device * qpd)760 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
761 struct qcm_process_device *qpd)
762 {
763 struct queue *q;
764 struct kfd_process_device *pdd;
765 uint64_t pd_base;
766 int retval = 0;
767
768 pdd = qpd_to_pdd(qpd);
769 /* Retrieve PD base */
770 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
771
772 dqm_lock(dqm);
773 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
774 goto out;
775 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
776 qpd->evicted--;
777 goto out;
778 }
779
780 pr_info_ratelimited("Restoring PASID 0x%x queues\n",
781 pdd->process->pasid);
782
783 /* Update PD Base in QPD */
784 qpd->page_table_base = pd_base;
785 pr_debug("Updated PD address to 0x%llx\n", pd_base);
786
787 /* activate all active queues on the qpd */
788 list_for_each_entry(q, &qpd->queues_list, list) {
789 q->properties.is_evicted = false;
790 if (!QUEUE_IS_ACTIVE(q->properties))
791 continue;
792
793 q->properties.is_active = true;
794 dqm->queue_count++;
795 }
796 retval = execute_queues_cpsch(dqm,
797 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
798 qpd->evicted = 0;
799 out:
800 dqm_unlock(dqm);
801 return retval;
802 }
803
register_process(struct device_queue_manager * dqm,struct qcm_process_device * qpd)804 static int register_process(struct device_queue_manager *dqm,
805 struct qcm_process_device *qpd)
806 {
807 struct device_process_node *n;
808 struct kfd_process_device *pdd;
809 uint64_t pd_base;
810 int retval;
811
812 n = kzalloc(sizeof(*n), GFP_KERNEL);
813 if (!n)
814 return -ENOMEM;
815
816 n->qpd = qpd;
817
818 pdd = qpd_to_pdd(qpd);
819 /* Retrieve PD base */
820 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
821
822 dqm_lock(dqm);
823 list_add(&n->list, &dqm->queues);
824
825 /* Update PD Base in QPD */
826 qpd->page_table_base = pd_base;
827 pr_debug("Updated PD address to 0x%llx\n", pd_base);
828
829 retval = dqm->asic_ops.update_qpd(dqm, qpd);
830
831 dqm->processes_count++;
832
833 dqm_unlock(dqm);
834
835 /* Outside the DQM lock because under the DQM lock we can't do
836 * reclaim or take other locks that others hold while reclaiming.
837 */
838 kfd_inc_compute_active(dqm->dev);
839
840 return retval;
841 }
842
unregister_process(struct device_queue_manager * dqm,struct qcm_process_device * qpd)843 static int unregister_process(struct device_queue_manager *dqm,
844 struct qcm_process_device *qpd)
845 {
846 int retval;
847 struct device_process_node *cur, *next;
848
849 pr_debug("qpd->queues_list is %s\n",
850 list_empty(&qpd->queues_list) ? "empty" : "not empty");
851
852 retval = 0;
853 dqm_lock(dqm);
854
855 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
856 if (qpd == cur->qpd) {
857 list_del(&cur->list);
858 kfree(cur);
859 dqm->processes_count--;
860 goto out;
861 }
862 }
863 /* qpd not found in dqm list */
864 retval = 1;
865 out:
866 dqm_unlock(dqm);
867
868 /* Outside the DQM lock because under the DQM lock we can't do
869 * reclaim or take other locks that others hold while reclaiming.
870 */
871 if (!retval)
872 kfd_dec_compute_active(dqm->dev);
873
874 return retval;
875 }
876
877 static int
set_pasid_vmid_mapping(struct device_queue_manager * dqm,unsigned int pasid,unsigned int vmid)878 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
879 unsigned int vmid)
880 {
881 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
882 dqm->dev->kgd, pasid, vmid);
883 }
884
init_interrupts(struct device_queue_manager * dqm)885 static void init_interrupts(struct device_queue_manager *dqm)
886 {
887 unsigned int i;
888
889 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
890 if (is_pipe_enabled(dqm, 0, i))
891 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
892 }
893
initialize_nocpsch(struct device_queue_manager * dqm)894 static int initialize_nocpsch(struct device_queue_manager *dqm)
895 {
896 int pipe, queue;
897
898 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
899
900 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
901 sizeof(unsigned int), GFP_KERNEL);
902 if (!dqm->allocated_queues)
903 return -ENOMEM;
904
905 mutex_init(&dqm->lock_hidden);
906 INIT_LIST_HEAD(&dqm->queues);
907 dqm->queue_count = dqm->next_pipe_to_allocate = 0;
908 dqm->sdma_queue_count = 0;
909 dqm->xgmi_sdma_queue_count = 0;
910
911 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
912 int pipe_offset = pipe * get_queues_per_pipe(dqm);
913
914 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
915 if (test_bit(pipe_offset + queue,
916 dqm->dev->shared_resources.queue_bitmap))
917 dqm->allocated_queues[pipe] |= 1 << queue;
918 }
919
920 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
921
922 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
923 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
924
925 return 0;
926 }
927
uninitialize(struct device_queue_manager * dqm)928 static void uninitialize(struct device_queue_manager *dqm)
929 {
930 int i;
931
932 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
933
934 kfree(dqm->allocated_queues);
935 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
936 kfree(dqm->mqd_mgrs[i]);
937 mutex_destroy(&dqm->lock_hidden);
938 }
939
start_nocpsch(struct device_queue_manager * dqm)940 static int start_nocpsch(struct device_queue_manager *dqm)
941 {
942 pr_info("SW scheduler is used");
943 init_interrupts(dqm);
944
945 if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
946 return pm_init(&dqm->packets, dqm);
947 dqm->sched_running = true;
948
949 return 0;
950 }
951
stop_nocpsch(struct device_queue_manager * dqm)952 static int stop_nocpsch(struct device_queue_manager *dqm)
953 {
954 if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
955 pm_uninit(&dqm->packets, false);
956 dqm->sched_running = false;
957
958 return 0;
959 }
960
pre_reset(struct device_queue_manager * dqm)961 static void pre_reset(struct device_queue_manager *dqm)
962 {
963 dqm_lock(dqm);
964 dqm->is_resetting = true;
965 dqm_unlock(dqm);
966 }
967
allocate_sdma_queue(struct device_queue_manager * dqm,struct queue * q)968 static int allocate_sdma_queue(struct device_queue_manager *dqm,
969 struct queue *q)
970 {
971 int bit;
972
973 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
974 if (dqm->sdma_bitmap == 0)
975 return -ENOMEM;
976 bit = __ffs64(dqm->sdma_bitmap);
977 dqm->sdma_bitmap &= ~(1ULL << bit);
978 q->sdma_id = bit;
979 q->properties.sdma_engine_id = q->sdma_id %
980 get_num_sdma_engines(dqm);
981 q->properties.sdma_queue_id = q->sdma_id /
982 get_num_sdma_engines(dqm);
983 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
984 if (dqm->xgmi_sdma_bitmap == 0)
985 return -ENOMEM;
986 bit = __ffs64(dqm->xgmi_sdma_bitmap);
987 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
988 q->sdma_id = bit;
989 /* sdma_engine_id is sdma id including
990 * both PCIe-optimized SDMAs and XGMI-
991 * optimized SDMAs. The calculation below
992 * assumes the first N engines are always
993 * PCIe-optimized ones
994 */
995 q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
996 q->sdma_id % get_num_xgmi_sdma_engines(dqm);
997 q->properties.sdma_queue_id = q->sdma_id /
998 get_num_xgmi_sdma_engines(dqm);
999 }
1000
1001 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1002 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1003
1004 return 0;
1005 }
1006
deallocate_sdma_queue(struct device_queue_manager * dqm,struct queue * q)1007 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1008 struct queue *q)
1009 {
1010 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1011 if (q->sdma_id >= get_num_sdma_queues(dqm))
1012 return;
1013 dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1014 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1015 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1016 return;
1017 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1018 }
1019 }
1020
1021 /*
1022 * Device Queue Manager implementation for cp scheduler
1023 */
1024
set_sched_resources(struct device_queue_manager * dqm)1025 static int set_sched_resources(struct device_queue_manager *dqm)
1026 {
1027 int i, mec;
1028 struct scheduling_resources res;
1029
1030 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1031
1032 res.queue_mask = 0;
1033 for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1034 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1035 / dqm->dev->shared_resources.num_pipe_per_mec;
1036
1037 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
1038 continue;
1039
1040 /* only acquire queues from the first MEC */
1041 if (mec > 0)
1042 continue;
1043
1044 /* This situation may be hit in the future if a new HW
1045 * generation exposes more than 64 queues. If so, the
1046 * definition of res.queue_mask needs updating
1047 */
1048 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1049 pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1050 break;
1051 }
1052
1053 res.queue_mask |= (1ull << i);
1054 }
1055 res.gws_mask = ~0ull;
1056 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1057
1058 pr_debug("Scheduling resources:\n"
1059 "vmid mask: 0x%8X\n"
1060 "queue mask: 0x%8llX\n",
1061 res.vmid_mask, res.queue_mask);
1062
1063 return pm_send_set_resources(&dqm->packets, &res);
1064 }
1065
initialize_cpsch(struct device_queue_manager * dqm)1066 static int initialize_cpsch(struct device_queue_manager *dqm)
1067 {
1068 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1069
1070 mutex_init(&dqm->lock_hidden);
1071 INIT_LIST_HEAD(&dqm->queues);
1072 dqm->queue_count = dqm->processes_count = 0;
1073 dqm->sdma_queue_count = 0;
1074 dqm->xgmi_sdma_queue_count = 0;
1075 dqm->active_runlist = false;
1076 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
1077 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
1078
1079 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1080
1081 return 0;
1082 }
1083
start_cpsch(struct device_queue_manager * dqm)1084 static int start_cpsch(struct device_queue_manager *dqm)
1085 {
1086 int retval;
1087
1088 retval = 0;
1089
1090 retval = pm_init(&dqm->packets, dqm);
1091 if (retval)
1092 goto fail_packet_manager_init;
1093
1094 retval = set_sched_resources(dqm);
1095 if (retval)
1096 goto fail_set_sched_resources;
1097
1098 pr_debug("Allocating fence memory\n");
1099
1100 /* allocate fence memory on the gart */
1101 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1102 &dqm->fence_mem);
1103
1104 if (retval)
1105 goto fail_allocate_vidmem;
1106
1107 dqm->fence_addr = dqm->fence_mem->cpu_ptr;
1108 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1109
1110 init_interrupts(dqm);
1111
1112 dqm_lock(dqm);
1113 /* clear hang status when driver try to start the hw scheduler */
1114 dqm->is_hws_hang = false;
1115 dqm->is_resetting = false;
1116 dqm->sched_running = true;
1117 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1118 dqm_unlock(dqm);
1119
1120 return 0;
1121 fail_allocate_vidmem:
1122 fail_set_sched_resources:
1123 pm_uninit(&dqm->packets, false);
1124 fail_packet_manager_init:
1125 return retval;
1126 }
1127
stop_cpsch(struct device_queue_manager * dqm)1128 static int stop_cpsch(struct device_queue_manager *dqm)
1129 {
1130 bool hanging;
1131
1132 dqm_lock(dqm);
1133 if (!dqm->is_hws_hang)
1134 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1135 hanging = dqm->is_hws_hang || dqm->is_resetting;
1136 dqm->sched_running = false;
1137 dqm_unlock(dqm);
1138
1139 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1140 pm_uninit(&dqm->packets, hanging);
1141
1142 return 0;
1143 }
1144
create_kernel_queue_cpsch(struct device_queue_manager * dqm,struct kernel_queue * kq,struct qcm_process_device * qpd)1145 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1146 struct kernel_queue *kq,
1147 struct qcm_process_device *qpd)
1148 {
1149 dqm_lock(dqm);
1150 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1151 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1152 dqm->total_queue_count);
1153 dqm_unlock(dqm);
1154 return -EPERM;
1155 }
1156
1157 /*
1158 * Unconditionally increment this counter, regardless of the queue's
1159 * type or whether the queue is active.
1160 */
1161 dqm->total_queue_count++;
1162 pr_debug("Total of %d queues are accountable so far\n",
1163 dqm->total_queue_count);
1164
1165 list_add(&kq->list, &qpd->priv_queue_list);
1166 dqm->queue_count++;
1167 qpd->is_debug = true;
1168 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1169 dqm_unlock(dqm);
1170
1171 return 0;
1172 }
1173
destroy_kernel_queue_cpsch(struct device_queue_manager * dqm,struct kernel_queue * kq,struct qcm_process_device * qpd)1174 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1175 struct kernel_queue *kq,
1176 struct qcm_process_device *qpd)
1177 {
1178 dqm_lock(dqm);
1179 list_del(&kq->list);
1180 dqm->queue_count--;
1181 qpd->is_debug = false;
1182 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1183 /*
1184 * Unconditionally decrement this counter, regardless of the queue's
1185 * type.
1186 */
1187 dqm->total_queue_count--;
1188 pr_debug("Total of %d queues are accountable so far\n",
1189 dqm->total_queue_count);
1190 dqm_unlock(dqm);
1191 }
1192
create_queue_cpsch(struct device_queue_manager * dqm,struct queue * q,struct qcm_process_device * qpd)1193 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1194 struct qcm_process_device *qpd)
1195 {
1196 int retval;
1197 struct mqd_manager *mqd_mgr;
1198
1199 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1200 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1201 dqm->total_queue_count);
1202 retval = -EPERM;
1203 goto out;
1204 }
1205
1206 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1207 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1208 dqm_lock(dqm);
1209 retval = allocate_sdma_queue(dqm, q);
1210 dqm_unlock(dqm);
1211 if (retval)
1212 goto out;
1213 }
1214
1215 retval = allocate_doorbell(qpd, q);
1216 if (retval)
1217 goto out_deallocate_sdma_queue;
1218
1219 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1220 q->properties.type)];
1221
1222 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1223 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1224 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1225 q->properties.tba_addr = qpd->tba_addr;
1226 q->properties.tma_addr = qpd->tma_addr;
1227 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1228 if (!q->mqd_mem_obj) {
1229 retval = -ENOMEM;
1230 goto out_deallocate_doorbell;
1231 }
1232
1233 dqm_lock(dqm);
1234 /*
1235 * Eviction state logic: mark all queues as evicted, even ones
1236 * not currently active. Restoring inactive queues later only
1237 * updates the is_evicted flag but is a no-op otherwise.
1238 */
1239 q->properties.is_evicted = !!qpd->evicted;
1240 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1241 &q->gart_mqd_addr, &q->properties);
1242
1243 list_add(&q->list, &qpd->queues_list);
1244 qpd->queue_count++;
1245
1246 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1247 dqm->sdma_queue_count++;
1248 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1249 dqm->xgmi_sdma_queue_count++;
1250
1251 if (q->properties.is_active) {
1252 dqm->queue_count++;
1253 retval = execute_queues_cpsch(dqm,
1254 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1255 }
1256
1257 /*
1258 * Unconditionally increment this counter, regardless of the queue's
1259 * type or whether the queue is active.
1260 */
1261 dqm->total_queue_count++;
1262
1263 pr_debug("Total of %d queues are accountable so far\n",
1264 dqm->total_queue_count);
1265
1266 dqm_unlock(dqm);
1267 return retval;
1268
1269 out_deallocate_doorbell:
1270 deallocate_doorbell(qpd, q);
1271 out_deallocate_sdma_queue:
1272 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1273 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1274 dqm_lock(dqm);
1275 deallocate_sdma_queue(dqm, q);
1276 dqm_unlock(dqm);
1277 }
1278 out:
1279 return retval;
1280 }
1281
amdkfd_fence_wait_timeout(unsigned int * fence_addr,unsigned int fence_value,unsigned int timeout_ms)1282 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1283 unsigned int fence_value,
1284 unsigned int timeout_ms)
1285 {
1286 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1287
1288 while (*fence_addr != fence_value) {
1289 if (time_after(jiffies, end_jiffies)) {
1290 pr_err("qcm fence wait loop timeout expired\n");
1291 /* In HWS case, this is used to halt the driver thread
1292 * in order not to mess up CP states before doing
1293 * scandumps for FW debugging.
1294 */
1295 while (halt_if_hws_hang)
1296 schedule();
1297
1298 return -ETIME;
1299 }
1300 schedule();
1301 }
1302
1303 return 0;
1304 }
1305
unmap_sdma_queues(struct device_queue_manager * dqm)1306 static int unmap_sdma_queues(struct device_queue_manager *dqm)
1307 {
1308 int i, retval = 0;
1309
1310 for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
1311 dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
1312 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1313 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
1314 if (retval)
1315 return retval;
1316 }
1317 return retval;
1318 }
1319
1320 /* dqm->lock mutex has to be locked before calling this function */
map_queues_cpsch(struct device_queue_manager * dqm)1321 static int map_queues_cpsch(struct device_queue_manager *dqm)
1322 {
1323 int retval;
1324
1325 if (!dqm->sched_running)
1326 return 0;
1327 if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
1328 return 0;
1329 if (dqm->active_runlist)
1330 return 0;
1331
1332 retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1333 pr_debug("%s sent runlist\n", __func__);
1334 if (retval) {
1335 pr_err("failed to execute runlist\n");
1336 return retval;
1337 }
1338 dqm->active_runlist = true;
1339
1340 return retval;
1341 }
1342
1343 /* dqm->lock mutex has to be locked before calling this function */
unmap_queues_cpsch(struct device_queue_manager * dqm,enum kfd_unmap_queues_filter filter,uint32_t filter_param)1344 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1345 enum kfd_unmap_queues_filter filter,
1346 uint32_t filter_param)
1347 {
1348 int retval = 0;
1349
1350 if (!dqm->sched_running)
1351 return 0;
1352 if (dqm->is_hws_hang)
1353 return -EIO;
1354 if (!dqm->active_runlist)
1355 return retval;
1356
1357 pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
1358 dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
1359
1360 if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
1361 unmap_sdma_queues(dqm);
1362
1363 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1364 filter, filter_param, false, 0);
1365 if (retval)
1366 return retval;
1367
1368 *dqm->fence_addr = KFD_FENCE_INIT;
1369 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1370 KFD_FENCE_COMPLETED);
1371 /* should be timed out */
1372 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1373 queue_preemption_timeout_ms);
1374 if (retval) {
1375 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1376 dqm->is_hws_hang = true;
1377 /* It's possible we're detecting a HWS hang in the
1378 * middle of a GPU reset. No need to schedule another
1379 * reset in this case.
1380 */
1381 if (!dqm->is_resetting)
1382 schedule_work(&dqm->hw_exception_work);
1383 return retval;
1384 }
1385
1386 pm_release_ib(&dqm->packets);
1387 dqm->active_runlist = false;
1388
1389 return retval;
1390 }
1391
1392 /* dqm->lock mutex has to be locked before calling this function */
execute_queues_cpsch(struct device_queue_manager * dqm,enum kfd_unmap_queues_filter filter,uint32_t filter_param)1393 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1394 enum kfd_unmap_queues_filter filter,
1395 uint32_t filter_param)
1396 {
1397 int retval;
1398
1399 if (dqm->is_hws_hang)
1400 return -EIO;
1401 retval = unmap_queues_cpsch(dqm, filter, filter_param);
1402 if (retval)
1403 return retval;
1404
1405 return map_queues_cpsch(dqm);
1406 }
1407
destroy_queue_cpsch(struct device_queue_manager * dqm,struct qcm_process_device * qpd,struct queue * q)1408 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1409 struct qcm_process_device *qpd,
1410 struct queue *q)
1411 {
1412 int retval;
1413 struct mqd_manager *mqd_mgr;
1414
1415 retval = 0;
1416
1417 /* remove queue from list to prevent rescheduling after preemption */
1418 dqm_lock(dqm);
1419
1420 if (qpd->is_debug) {
1421 /*
1422 * error, currently we do not allow to destroy a queue
1423 * of a currently debugged process
1424 */
1425 retval = -EBUSY;
1426 goto failed_try_destroy_debugged_queue;
1427
1428 }
1429
1430 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1431 q->properties.type)];
1432
1433 deallocate_doorbell(qpd, q);
1434
1435 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1436 dqm->sdma_queue_count--;
1437 deallocate_sdma_queue(dqm, q);
1438 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1439 dqm->xgmi_sdma_queue_count--;
1440 deallocate_sdma_queue(dqm, q);
1441 }
1442
1443 list_del(&q->list);
1444 qpd->queue_count--;
1445 if (q->properties.is_active) {
1446 dqm->queue_count--;
1447 retval = execute_queues_cpsch(dqm,
1448 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1449 if (retval == -ETIME)
1450 qpd->reset_wavefronts = true;
1451 }
1452
1453 /*
1454 * Unconditionally decrement this counter, regardless of the queue's
1455 * type
1456 */
1457 dqm->total_queue_count--;
1458 pr_debug("Total of %d queues are accountable so far\n",
1459 dqm->total_queue_count);
1460
1461 dqm_unlock(dqm);
1462
1463 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1464 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1465
1466 return retval;
1467
1468 failed_try_destroy_debugged_queue:
1469
1470 dqm_unlock(dqm);
1471 return retval;
1472 }
1473
1474 /*
1475 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1476 * stay in user mode.
1477 */
1478 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1479 /* APE1 limit is inclusive and 64K aligned. */
1480 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1481
set_cache_memory_policy(struct device_queue_manager * dqm,struct qcm_process_device * qpd,enum cache_policy default_policy,enum cache_policy alternate_policy,void __user * alternate_aperture_base,uint64_t alternate_aperture_size)1482 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1483 struct qcm_process_device *qpd,
1484 enum cache_policy default_policy,
1485 enum cache_policy alternate_policy,
1486 void __user *alternate_aperture_base,
1487 uint64_t alternate_aperture_size)
1488 {
1489 bool retval = true;
1490
1491 if (!dqm->asic_ops.set_cache_memory_policy)
1492 return retval;
1493
1494 dqm_lock(dqm);
1495
1496 if (alternate_aperture_size == 0) {
1497 /* base > limit disables APE1 */
1498 qpd->sh_mem_ape1_base = 1;
1499 qpd->sh_mem_ape1_limit = 0;
1500 } else {
1501 /*
1502 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1503 * SH_MEM_APE1_BASE[31:0], 0x0000 }
1504 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1505 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1506 * Verify that the base and size parameters can be
1507 * represented in this format and convert them.
1508 * Additionally restrict APE1 to user-mode addresses.
1509 */
1510
1511 uint64_t base = (uintptr_t)alternate_aperture_base;
1512 uint64_t limit = base + alternate_aperture_size - 1;
1513
1514 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1515 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1516 retval = false;
1517 goto out;
1518 }
1519
1520 qpd->sh_mem_ape1_base = base >> 16;
1521 qpd->sh_mem_ape1_limit = limit >> 16;
1522 }
1523
1524 retval = dqm->asic_ops.set_cache_memory_policy(
1525 dqm,
1526 qpd,
1527 default_policy,
1528 alternate_policy,
1529 alternate_aperture_base,
1530 alternate_aperture_size);
1531
1532 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1533 program_sh_mem_settings(dqm, qpd);
1534
1535 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1536 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1537 qpd->sh_mem_ape1_limit);
1538
1539 out:
1540 dqm_unlock(dqm);
1541 return retval;
1542 }
1543
set_trap_handler(struct device_queue_manager * dqm,struct qcm_process_device * qpd,uint64_t tba_addr,uint64_t tma_addr)1544 static int set_trap_handler(struct device_queue_manager *dqm,
1545 struct qcm_process_device *qpd,
1546 uint64_t tba_addr,
1547 uint64_t tma_addr)
1548 {
1549 uint64_t *tma;
1550
1551 if (dqm->dev->cwsr_enabled) {
1552 /* Jump from CWSR trap handler to user trap */
1553 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1554 tma[0] = tba_addr;
1555 tma[1] = tma_addr;
1556 } else {
1557 qpd->tba_addr = tba_addr;
1558 qpd->tma_addr = tma_addr;
1559 }
1560
1561 return 0;
1562 }
1563
process_termination_nocpsch(struct device_queue_manager * dqm,struct qcm_process_device * qpd)1564 static int process_termination_nocpsch(struct device_queue_manager *dqm,
1565 struct qcm_process_device *qpd)
1566 {
1567 struct queue *q, *next;
1568 struct device_process_node *cur, *next_dpn;
1569 int retval = 0;
1570 bool found = false;
1571
1572 dqm_lock(dqm);
1573
1574 /* Clear all user mode queues */
1575 list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1576 int ret;
1577
1578 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1579 if (ret)
1580 retval = ret;
1581 }
1582
1583 /* Unregister process */
1584 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1585 if (qpd == cur->qpd) {
1586 list_del(&cur->list);
1587 kfree(cur);
1588 dqm->processes_count--;
1589 found = true;
1590 break;
1591 }
1592 }
1593
1594 dqm_unlock(dqm);
1595
1596 /* Outside the DQM lock because under the DQM lock we can't do
1597 * reclaim or take other locks that others hold while reclaiming.
1598 */
1599 if (found)
1600 kfd_dec_compute_active(dqm->dev);
1601
1602 return retval;
1603 }
1604
get_wave_state(struct device_queue_manager * dqm,struct queue * q,void __user * ctl_stack,u32 * ctl_stack_used_size,u32 * save_area_used_size)1605 static int get_wave_state(struct device_queue_manager *dqm,
1606 struct queue *q,
1607 void __user *ctl_stack,
1608 u32 *ctl_stack_used_size,
1609 u32 *save_area_used_size)
1610 {
1611 struct mqd_manager *mqd_mgr;
1612 int r;
1613
1614 dqm_lock(dqm);
1615
1616 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1617 q->properties.is_active || !q->device->cwsr_enabled) {
1618 r = -EINVAL;
1619 goto dqm_unlock;
1620 }
1621
1622 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
1623
1624 if (!mqd_mgr->get_wave_state) {
1625 r = -EINVAL;
1626 goto dqm_unlock;
1627 }
1628
1629 r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1630 ctl_stack_used_size, save_area_used_size);
1631
1632 dqm_unlock:
1633 dqm_unlock(dqm);
1634 return r;
1635 }
1636
process_termination_cpsch(struct device_queue_manager * dqm,struct qcm_process_device * qpd)1637 static int process_termination_cpsch(struct device_queue_manager *dqm,
1638 struct qcm_process_device *qpd)
1639 {
1640 int retval;
1641 struct queue *q, *next;
1642 struct kernel_queue *kq, *kq_next;
1643 struct mqd_manager *mqd_mgr;
1644 struct device_process_node *cur, *next_dpn;
1645 enum kfd_unmap_queues_filter filter =
1646 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1647 bool found = false;
1648
1649 retval = 0;
1650
1651 dqm_lock(dqm);
1652
1653 /* Clean all kernel queues */
1654 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1655 list_del(&kq->list);
1656 dqm->queue_count--;
1657 qpd->is_debug = false;
1658 dqm->total_queue_count--;
1659 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1660 }
1661
1662 /* Clear all user mode queues */
1663 list_for_each_entry(q, &qpd->queues_list, list) {
1664 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1665 dqm->sdma_queue_count--;
1666 deallocate_sdma_queue(dqm, q);
1667 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1668 dqm->xgmi_sdma_queue_count--;
1669 deallocate_sdma_queue(dqm, q);
1670 }
1671
1672 if (q->properties.is_active)
1673 dqm->queue_count--;
1674
1675 dqm->total_queue_count--;
1676 }
1677
1678 /* Unregister process */
1679 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1680 if (qpd == cur->qpd) {
1681 list_del(&cur->list);
1682 kfree(cur);
1683 dqm->processes_count--;
1684 found = true;
1685 break;
1686 }
1687 }
1688
1689 retval = execute_queues_cpsch(dqm, filter, 0);
1690 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1691 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1692 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1693 qpd->reset_wavefronts = false;
1694 }
1695
1696 dqm_unlock(dqm);
1697
1698 /* Outside the DQM lock because under the DQM lock we can't do
1699 * reclaim or take other locks that others hold while reclaiming.
1700 */
1701 if (found)
1702 kfd_dec_compute_active(dqm->dev);
1703
1704 /* Lastly, free mqd resources.
1705 * Do free_mqd() after dqm_unlock to avoid circular locking.
1706 */
1707 list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1708 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1709 q->properties.type)];
1710 list_del(&q->list);
1711 qpd->queue_count--;
1712 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1713 }
1714
1715 return retval;
1716 }
1717
init_mqd_managers(struct device_queue_manager * dqm)1718 static int init_mqd_managers(struct device_queue_manager *dqm)
1719 {
1720 int i, j;
1721 struct mqd_manager *mqd_mgr;
1722
1723 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1724 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1725 if (!mqd_mgr) {
1726 pr_err("mqd manager [%d] initialization failed\n", i);
1727 goto out_free;
1728 }
1729 dqm->mqd_mgrs[i] = mqd_mgr;
1730 }
1731
1732 return 0;
1733
1734 out_free:
1735 for (j = 0; j < i; j++) {
1736 kfree(dqm->mqd_mgrs[j]);
1737 dqm->mqd_mgrs[j] = NULL;
1738 }
1739
1740 return -ENOMEM;
1741 }
1742
1743 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
allocate_hiq_sdma_mqd(struct device_queue_manager * dqm)1744 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1745 {
1746 int retval;
1747 struct kfd_dev *dev = dqm->dev;
1748 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1749 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
1750 (dev->device_info->num_sdma_engines +
1751 dev->device_info->num_xgmi_sdma_engines) *
1752 dev->device_info->num_sdma_queues_per_engine +
1753 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1754
1755 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
1756 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
1757 (void *)&(mem_obj->cpu_ptr), true);
1758
1759 return retval;
1760 }
1761
device_queue_manager_init(struct kfd_dev * dev)1762 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1763 {
1764 struct device_queue_manager *dqm;
1765
1766 pr_debug("Loading device queue manager\n");
1767
1768 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1769 if (!dqm)
1770 return NULL;
1771
1772 switch (dev->device_info->asic_family) {
1773 /* HWS is not available on Hawaii. */
1774 case CHIP_HAWAII:
1775 /* HWS depends on CWSR for timely dequeue. CWSR is not
1776 * available on Tonga.
1777 *
1778 * FIXME: This argument also applies to Kaveri.
1779 */
1780 case CHIP_TONGA:
1781 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1782 break;
1783 default:
1784 dqm->sched_policy = sched_policy;
1785 break;
1786 }
1787
1788 dqm->dev = dev;
1789 switch (dqm->sched_policy) {
1790 case KFD_SCHED_POLICY_HWS:
1791 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1792 /* initialize dqm for cp scheduling */
1793 dqm->ops.create_queue = create_queue_cpsch;
1794 dqm->ops.initialize = initialize_cpsch;
1795 dqm->ops.start = start_cpsch;
1796 dqm->ops.stop = stop_cpsch;
1797 dqm->ops.pre_reset = pre_reset;
1798 dqm->ops.destroy_queue = destroy_queue_cpsch;
1799 dqm->ops.update_queue = update_queue;
1800 dqm->ops.register_process = register_process;
1801 dqm->ops.unregister_process = unregister_process;
1802 dqm->ops.uninitialize = uninitialize;
1803 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1804 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1805 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1806 dqm->ops.set_trap_handler = set_trap_handler;
1807 dqm->ops.process_termination = process_termination_cpsch;
1808 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1809 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1810 dqm->ops.get_wave_state = get_wave_state;
1811 break;
1812 case KFD_SCHED_POLICY_NO_HWS:
1813 /* initialize dqm for no cp scheduling */
1814 dqm->ops.start = start_nocpsch;
1815 dqm->ops.stop = stop_nocpsch;
1816 dqm->ops.pre_reset = pre_reset;
1817 dqm->ops.create_queue = create_queue_nocpsch;
1818 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1819 dqm->ops.update_queue = update_queue;
1820 dqm->ops.register_process = register_process;
1821 dqm->ops.unregister_process = unregister_process;
1822 dqm->ops.initialize = initialize_nocpsch;
1823 dqm->ops.uninitialize = uninitialize;
1824 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1825 dqm->ops.set_trap_handler = set_trap_handler;
1826 dqm->ops.process_termination = process_termination_nocpsch;
1827 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1828 dqm->ops.restore_process_queues =
1829 restore_process_queues_nocpsch;
1830 dqm->ops.get_wave_state = get_wave_state;
1831 break;
1832 default:
1833 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1834 goto out_free;
1835 }
1836
1837 switch (dev->device_info->asic_family) {
1838 case CHIP_CARRIZO:
1839 device_queue_manager_init_vi(&dqm->asic_ops);
1840 break;
1841
1842 case CHIP_KAVERI:
1843 device_queue_manager_init_cik(&dqm->asic_ops);
1844 break;
1845
1846 case CHIP_HAWAII:
1847 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1848 break;
1849
1850 case CHIP_TONGA:
1851 case CHIP_FIJI:
1852 case CHIP_POLARIS10:
1853 case CHIP_POLARIS11:
1854 case CHIP_POLARIS12:
1855 case CHIP_VEGAM:
1856 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1857 break;
1858
1859 case CHIP_VEGA10:
1860 case CHIP_VEGA12:
1861 case CHIP_VEGA20:
1862 case CHIP_RAVEN:
1863 case CHIP_RENOIR:
1864 case CHIP_ARCTURUS:
1865 device_queue_manager_init_v9(&dqm->asic_ops);
1866 break;
1867 case CHIP_NAVI10:
1868 case CHIP_NAVI12:
1869 case CHIP_NAVI14:
1870 device_queue_manager_init_v10_navi10(&dqm->asic_ops);
1871 break;
1872 default:
1873 WARN(1, "Unexpected ASIC family %u",
1874 dev->device_info->asic_family);
1875 goto out_free;
1876 }
1877
1878 if (init_mqd_managers(dqm))
1879 goto out_free;
1880
1881 if (allocate_hiq_sdma_mqd(dqm)) {
1882 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
1883 goto out_free;
1884 }
1885
1886 if (!dqm->ops.initialize(dqm))
1887 return dqm;
1888
1889 out_free:
1890 kfree(dqm);
1891 return NULL;
1892 }
1893
deallocate_hiq_sdma_mqd(struct kfd_dev * dev,struct kfd_mem_obj * mqd)1894 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
1895 struct kfd_mem_obj *mqd)
1896 {
1897 WARN(!mqd, "No hiq sdma mqd trunk to free");
1898
1899 amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
1900 }
1901
device_queue_manager_uninit(struct device_queue_manager * dqm)1902 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1903 {
1904 dqm->ops.uninitialize(dqm);
1905 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
1906 kfree(dqm);
1907 }
1908
kfd_process_vm_fault(struct device_queue_manager * dqm,unsigned int pasid)1909 int kfd_process_vm_fault(struct device_queue_manager *dqm,
1910 unsigned int pasid)
1911 {
1912 struct kfd_process_device *pdd;
1913 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1914 int ret = 0;
1915
1916 if (!p)
1917 return -EINVAL;
1918 pdd = kfd_get_process_device_data(dqm->dev, p);
1919 if (pdd)
1920 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
1921 kfd_unref_process(p);
1922
1923 return ret;
1924 }
1925
kfd_process_hw_exception(struct work_struct * work)1926 static void kfd_process_hw_exception(struct work_struct *work)
1927 {
1928 struct device_queue_manager *dqm = container_of(work,
1929 struct device_queue_manager, hw_exception_work);
1930 amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
1931 }
1932
1933 #if defined(CONFIG_DEBUG_FS)
1934
seq_reg_dump(struct seq_file * m,uint32_t (* dump)[2],uint32_t n_regs)1935 static void seq_reg_dump(struct seq_file *m,
1936 uint32_t (*dump)[2], uint32_t n_regs)
1937 {
1938 uint32_t i, count;
1939
1940 for (i = 0, count = 0; i < n_regs; i++) {
1941 if (count == 0 ||
1942 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
1943 seq_printf(m, "%s %08x: %08x",
1944 i ? "\n" : "",
1945 dump[i][0], dump[i][1]);
1946 count = 7;
1947 } else {
1948 seq_printf(m, " %08x", dump[i][1]);
1949 count--;
1950 }
1951 }
1952
1953 seq_puts(m, "\n");
1954 }
1955
dqm_debugfs_hqds(struct seq_file * m,void * data)1956 int dqm_debugfs_hqds(struct seq_file *m, void *data)
1957 {
1958 struct device_queue_manager *dqm = data;
1959 uint32_t (*dump)[2], n_regs;
1960 int pipe, queue;
1961 int r = 0;
1962
1963 if (!dqm->sched_running) {
1964 seq_printf(m, " Device is stopped\n");
1965
1966 return 0;
1967 }
1968
1969 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
1970 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
1971 &dump, &n_regs);
1972 if (!r) {
1973 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
1974 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
1975 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
1976 KFD_CIK_HIQ_QUEUE);
1977 seq_reg_dump(m, dump, n_regs);
1978
1979 kfree(dump);
1980 }
1981
1982 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1983 int pipe_offset = pipe * get_queues_per_pipe(dqm);
1984
1985 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
1986 if (!test_bit(pipe_offset + queue,
1987 dqm->dev->shared_resources.queue_bitmap))
1988 continue;
1989
1990 r = dqm->dev->kfd2kgd->hqd_dump(
1991 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1992 if (r)
1993 break;
1994
1995 seq_printf(m, " CP Pipe %d, Queue %d\n",
1996 pipe, queue);
1997 seq_reg_dump(m, dump, n_regs);
1998
1999 kfree(dump);
2000 }
2001 }
2002
2003 for (pipe = 0; pipe < get_num_sdma_engines(dqm) +
2004 get_num_xgmi_sdma_engines(dqm); pipe++) {
2005 for (queue = 0;
2006 queue < dqm->dev->device_info->num_sdma_queues_per_engine;
2007 queue++) {
2008 r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2009 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2010 if (r)
2011 break;
2012
2013 seq_printf(m, " SDMA Engine %d, RLC %d\n",
2014 pipe, queue);
2015 seq_reg_dump(m, dump, n_regs);
2016
2017 kfree(dump);
2018 }
2019 }
2020
2021 return r;
2022 }
2023
dqm_debugfs_execute_queues(struct device_queue_manager * dqm)2024 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
2025 {
2026 int r = 0;
2027
2028 dqm_lock(dqm);
2029 dqm->active_runlist = true;
2030 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2031 dqm_unlock(dqm);
2032
2033 return r;
2034 }
2035
2036 #endif
2037