xref: /openbsd-src/sys/dev/pci/drm/amd/amdkfd/kfd_process.c (revision e6965b26415c7cee31dd25fe09685ed74fa01108)
11bb76ff1Sjsg // SPDX-License-Identifier: GPL-2.0 OR MIT
2fb4d8502Sjsg /*
31bb76ff1Sjsg  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4fb4d8502Sjsg  *
5fb4d8502Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
6fb4d8502Sjsg  * copy of this software and associated documentation files (the "Software"),
7fb4d8502Sjsg  * to deal in the Software without restriction, including without limitation
8fb4d8502Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9fb4d8502Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
10fb4d8502Sjsg  * Software is furnished to do so, subject to the following conditions:
11fb4d8502Sjsg  *
12fb4d8502Sjsg  * The above copyright notice and this permission notice shall be included in
13fb4d8502Sjsg  * all copies or substantial portions of the Software.
14fb4d8502Sjsg  *
15fb4d8502Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16fb4d8502Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17fb4d8502Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18fb4d8502Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19fb4d8502Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20fb4d8502Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21fb4d8502Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
22fb4d8502Sjsg  */
23fb4d8502Sjsg 
24fb4d8502Sjsg #include <linux/mutex.h>
25fb4d8502Sjsg #include <linux/log2.h>
26fb4d8502Sjsg #include <linux/sched.h>
27fb4d8502Sjsg #include <linux/sched/mm.h>
28fb4d8502Sjsg #include <linux/sched/task.h>
29ad8b1aafSjsg #include <linux/mmu_context.h>
30fb4d8502Sjsg #include <linux/slab.h>
31fb4d8502Sjsg #include <linux/notifier.h>
32fb4d8502Sjsg #include <linux/compat.h>
33fb4d8502Sjsg #include <linux/mman.h>
34fb4d8502Sjsg #include <linux/file.h>
35c349dbc7Sjsg #include <linux/pm_runtime.h>
36c349dbc7Sjsg #include "amdgpu_amdkfd.h"
37c349dbc7Sjsg #include "amdgpu.h"
38fb4d8502Sjsg 
39fb4d8502Sjsg struct mm_struct;
40fb4d8502Sjsg 
41fb4d8502Sjsg #include "kfd_priv.h"
42fb4d8502Sjsg #include "kfd_device_queue_manager.h"
435ca02815Sjsg #include "kfd_svm.h"
441bb76ff1Sjsg #include "kfd_smi_events.h"
45f005ef32Sjsg #include "kfd_debug.h"
46fb4d8502Sjsg 
47fb4d8502Sjsg /*
48fb4d8502Sjsg  * List of struct kfd_process (field kfd_process).
49fb4d8502Sjsg  * Unique/indexed by mm_struct*
50fb4d8502Sjsg  */
51fb4d8502Sjsg DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
52f005ef32Sjsg DEFINE_MUTEX(kfd_processes_mutex);
53fb4d8502Sjsg 
54fb4d8502Sjsg DEFINE_SRCU(kfd_processes_srcu);
55fb4d8502Sjsg 
56fb4d8502Sjsg /* For process termination handling */
57fb4d8502Sjsg static struct workqueue_struct *kfd_process_wq;
58fb4d8502Sjsg 
59fb4d8502Sjsg /* Ordered, single-threaded workqueue for restoring evicted
60fb4d8502Sjsg  * processes. Restoring multiple processes concurrently under memory
61fb4d8502Sjsg  * pressure can lead to processes blocking each other from validating
62fb4d8502Sjsg  * their BOs and result in a live-lock situation where processes
63fb4d8502Sjsg  * remain evicted indefinitely.
64fb4d8502Sjsg  */
65fb4d8502Sjsg static struct workqueue_struct *kfd_restore_wq;
66fb4d8502Sjsg 
671bb76ff1Sjsg static struct kfd_process *find_process(const struct task_struct *thread,
681bb76ff1Sjsg 					bool ref);
69fb4d8502Sjsg static void kfd_process_ref_release(struct kref *ref);
70c349dbc7Sjsg static struct kfd_process *create_process(const struct task_struct *thread);
71fb4d8502Sjsg 
72fb4d8502Sjsg static void evict_process_worker(struct work_struct *work);
73fb4d8502Sjsg static void restore_process_worker(struct work_struct *work);
74fb4d8502Sjsg 
751bb76ff1Sjsg static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd);
761bb76ff1Sjsg 
77c349dbc7Sjsg struct kfd_procfs_tree {
78c349dbc7Sjsg 	struct kobject *kobj;
79c349dbc7Sjsg };
80c349dbc7Sjsg 
81c349dbc7Sjsg static struct kfd_procfs_tree procfs;
82c349dbc7Sjsg 
83ad8b1aafSjsg /*
84ad8b1aafSjsg  * Structure for SDMA activity tracking
85ad8b1aafSjsg  */
86ad8b1aafSjsg struct kfd_sdma_activity_handler_workarea {
87ad8b1aafSjsg 	struct work_struct sdma_activity_work;
88ad8b1aafSjsg 	struct kfd_process_device *pdd;
89ad8b1aafSjsg 	uint64_t sdma_activity_counter;
90ad8b1aafSjsg };
91ad8b1aafSjsg 
92ad8b1aafSjsg struct temp_sdma_queue_list {
93ad8b1aafSjsg 	uint64_t __user *rptr;
94ad8b1aafSjsg 	uint64_t sdma_val;
95ad8b1aafSjsg 	unsigned int queue_id;
96ad8b1aafSjsg 	struct list_head list;
97ad8b1aafSjsg };
98ad8b1aafSjsg 
99ad8b1aafSjsg static void kfd_sdma_activity_worker(struct work_struct *work)
100ad8b1aafSjsg {
101ad8b1aafSjsg 	struct kfd_sdma_activity_handler_workarea *workarea;
102ad8b1aafSjsg 	struct kfd_process_device *pdd;
103ad8b1aafSjsg 	uint64_t val;
104ad8b1aafSjsg 	struct mm_struct *mm;
105ad8b1aafSjsg 	struct queue *q;
106ad8b1aafSjsg 	struct qcm_process_device *qpd;
107ad8b1aafSjsg 	struct device_queue_manager *dqm;
108ad8b1aafSjsg 	int ret = 0;
109ad8b1aafSjsg 	struct temp_sdma_queue_list sdma_q_list;
110ad8b1aafSjsg 	struct temp_sdma_queue_list *sdma_q, *next;
111ad8b1aafSjsg 
112ad8b1aafSjsg 	workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
113ad8b1aafSjsg 				sdma_activity_work);
114ad8b1aafSjsg 
115ad8b1aafSjsg 	pdd = workarea->pdd;
116ad8b1aafSjsg 	if (!pdd)
117ad8b1aafSjsg 		return;
118ad8b1aafSjsg 	dqm = pdd->dev->dqm;
119ad8b1aafSjsg 	qpd = &pdd->qpd;
120ad8b1aafSjsg 	if (!dqm || !qpd)
121ad8b1aafSjsg 		return;
122ad8b1aafSjsg 	/*
123ad8b1aafSjsg 	 * Total SDMA activity is current SDMA activity + past SDMA activity
124ad8b1aafSjsg 	 * Past SDMA count is stored in pdd.
125ad8b1aafSjsg 	 * To get the current activity counters for all active SDMA queues,
126ad8b1aafSjsg 	 * we loop over all SDMA queues and get their counts from user-space.
127ad8b1aafSjsg 	 *
128ad8b1aafSjsg 	 * We cannot call get_user() with dqm_lock held as it can cause
129ad8b1aafSjsg 	 * a circular lock dependency situation. To read the SDMA stats,
130ad8b1aafSjsg 	 * we need to do the following:
131ad8b1aafSjsg 	 *
132ad8b1aafSjsg 	 * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
133ad8b1aafSjsg 	 *    with dqm_lock/dqm_unlock().
134ad8b1aafSjsg 	 * 2. Call get_user() for each node in temporary list without dqm_lock.
135ad8b1aafSjsg 	 *    Save the SDMA count for each node and also add the count to the total
136ad8b1aafSjsg 	 *    SDMA count counter.
137ad8b1aafSjsg 	 *    Its possible, during this step, a few SDMA queue nodes got deleted
138ad8b1aafSjsg 	 *    from the qpd->queues_list.
139ad8b1aafSjsg 	 * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
140ad8b1aafSjsg 	 *    If any node got deleted, its SDMA count would be captured in the sdma
141ad8b1aafSjsg 	 *    past activity counter. So subtract the SDMA counter stored in step 2
142ad8b1aafSjsg 	 *    for this node from the total SDMA count.
143ad8b1aafSjsg 	 */
144ad8b1aafSjsg 	INIT_LIST_HEAD(&sdma_q_list.list);
145ad8b1aafSjsg 
146ad8b1aafSjsg 	/*
147ad8b1aafSjsg 	 * Create the temp list of all SDMA queues
148ad8b1aafSjsg 	 */
149ad8b1aafSjsg 	dqm_lock(dqm);
150ad8b1aafSjsg 
151ad8b1aafSjsg 	list_for_each_entry(q, &qpd->queues_list, list) {
152ad8b1aafSjsg 		if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
153ad8b1aafSjsg 		    (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
154ad8b1aafSjsg 			continue;
155ad8b1aafSjsg 
156ad8b1aafSjsg 		sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
157ad8b1aafSjsg 		if (!sdma_q) {
158ad8b1aafSjsg 			dqm_unlock(dqm);
159ad8b1aafSjsg 			goto cleanup;
160ad8b1aafSjsg 		}
161ad8b1aafSjsg 
162ad8b1aafSjsg 		INIT_LIST_HEAD(&sdma_q->list);
163ad8b1aafSjsg 		sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
164ad8b1aafSjsg 		sdma_q->queue_id = q->properties.queue_id;
165ad8b1aafSjsg 		list_add_tail(&sdma_q->list, &sdma_q_list.list);
166ad8b1aafSjsg 	}
167ad8b1aafSjsg 
168ad8b1aafSjsg 	/*
169ad8b1aafSjsg 	 * If the temp list is empty, then no SDMA queues nodes were found in
170ad8b1aafSjsg 	 * qpd->queues_list. Return the past activity count as the total sdma
171ad8b1aafSjsg 	 * count
172ad8b1aafSjsg 	 */
173ad8b1aafSjsg 	if (list_empty(&sdma_q_list.list)) {
174ad8b1aafSjsg 		workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
175ad8b1aafSjsg 		dqm_unlock(dqm);
176ad8b1aafSjsg 		return;
177ad8b1aafSjsg 	}
178ad8b1aafSjsg 
179ad8b1aafSjsg 	dqm_unlock(dqm);
180ad8b1aafSjsg 
181ad8b1aafSjsg 	/*
182ad8b1aafSjsg 	 * Get the usage count for each SDMA queue in temp_list.
183ad8b1aafSjsg 	 */
184ad8b1aafSjsg 	mm = get_task_mm(pdd->process->lead_thread);
185ad8b1aafSjsg 	if (!mm)
186ad8b1aafSjsg 		goto cleanup;
187ad8b1aafSjsg 
188ad8b1aafSjsg 	kthread_use_mm(mm);
189ad8b1aafSjsg 
190ad8b1aafSjsg 	list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
191ad8b1aafSjsg 		val = 0;
192ad8b1aafSjsg 		ret = read_sdma_queue_counter(sdma_q->rptr, &val);
193ad8b1aafSjsg 		if (ret) {
194ad8b1aafSjsg 			pr_debug("Failed to read SDMA queue active counter for queue id: %d",
195ad8b1aafSjsg 				 sdma_q->queue_id);
196ad8b1aafSjsg 		} else {
197ad8b1aafSjsg 			sdma_q->sdma_val = val;
198ad8b1aafSjsg 			workarea->sdma_activity_counter += val;
199ad8b1aafSjsg 		}
200ad8b1aafSjsg 	}
201ad8b1aafSjsg 
202ad8b1aafSjsg 	kthread_unuse_mm(mm);
203ad8b1aafSjsg 	mmput(mm);
204ad8b1aafSjsg 
205ad8b1aafSjsg 	/*
206ad8b1aafSjsg 	 * Do a second iteration over qpd_queues_list to check if any SDMA
207ad8b1aafSjsg 	 * nodes got deleted while fetching SDMA counter.
208ad8b1aafSjsg 	 */
209ad8b1aafSjsg 	dqm_lock(dqm);
210ad8b1aafSjsg 
211ad8b1aafSjsg 	workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
212ad8b1aafSjsg 
213ad8b1aafSjsg 	list_for_each_entry(q, &qpd->queues_list, list) {
214ad8b1aafSjsg 		if (list_empty(&sdma_q_list.list))
215ad8b1aafSjsg 			break;
216ad8b1aafSjsg 
217ad8b1aafSjsg 		if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
218ad8b1aafSjsg 		    (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
219ad8b1aafSjsg 			continue;
220ad8b1aafSjsg 
221ad8b1aafSjsg 		list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
222ad8b1aafSjsg 			if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
223ad8b1aafSjsg 			     (sdma_q->queue_id == q->properties.queue_id)) {
224ad8b1aafSjsg 				list_del(&sdma_q->list);
225ad8b1aafSjsg 				kfree(sdma_q);
226ad8b1aafSjsg 				break;
227ad8b1aafSjsg 			}
228ad8b1aafSjsg 		}
229ad8b1aafSjsg 	}
230ad8b1aafSjsg 
231ad8b1aafSjsg 	dqm_unlock(dqm);
232ad8b1aafSjsg 
233ad8b1aafSjsg 	/*
234ad8b1aafSjsg 	 * If temp list is not empty, it implies some queues got deleted
235ad8b1aafSjsg 	 * from qpd->queues_list during SDMA usage read. Subtract the SDMA
236ad8b1aafSjsg 	 * count for each node from the total SDMA count.
237ad8b1aafSjsg 	 */
238ad8b1aafSjsg 	list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
239ad8b1aafSjsg 		workarea->sdma_activity_counter -= sdma_q->sdma_val;
240ad8b1aafSjsg 		list_del(&sdma_q->list);
241ad8b1aafSjsg 		kfree(sdma_q);
242ad8b1aafSjsg 	}
243ad8b1aafSjsg 
244ad8b1aafSjsg 	return;
245ad8b1aafSjsg 
246ad8b1aafSjsg cleanup:
247ad8b1aafSjsg 	list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
248ad8b1aafSjsg 		list_del(&sdma_q->list);
249ad8b1aafSjsg 		kfree(sdma_q);
250ad8b1aafSjsg 	}
251ad8b1aafSjsg }
252ad8b1aafSjsg 
253ad8b1aafSjsg /**
2541bb76ff1Sjsg  * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
255ad8b1aafSjsg  * by current process. Translates acquired wave count into number of compute units
256ad8b1aafSjsg  * that are occupied.
257ad8b1aafSjsg  *
2581bb76ff1Sjsg  * @attr: Handle of attribute that allows reporting of wave count. The attribute
259ad8b1aafSjsg  * handle encapsulates GPU device it is associated with, thereby allowing collection
260ad8b1aafSjsg  * of waves in flight, etc
261ad8b1aafSjsg  * @buffer: Handle of user provided buffer updated with wave count
262ad8b1aafSjsg  *
263ad8b1aafSjsg  * Return: Number of bytes written to user buffer or an error value
264ad8b1aafSjsg  */
265ad8b1aafSjsg static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
266ad8b1aafSjsg {
267ad8b1aafSjsg 	int cu_cnt;
268ad8b1aafSjsg 	int wave_cnt;
269ad8b1aafSjsg 	int max_waves_per_cu;
270f005ef32Sjsg 	struct kfd_node *dev = NULL;
271ad8b1aafSjsg 	struct kfd_process *proc = NULL;
272ad8b1aafSjsg 	struct kfd_process_device *pdd = NULL;
273ad8b1aafSjsg 
274ad8b1aafSjsg 	pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
275ad8b1aafSjsg 	dev = pdd->dev;
276ad8b1aafSjsg 	if (dev->kfd2kgd->get_cu_occupancy == NULL)
277ad8b1aafSjsg 		return -EINVAL;
278ad8b1aafSjsg 
279ad8b1aafSjsg 	cu_cnt = 0;
280ad8b1aafSjsg 	proc = pdd->process;
281ad8b1aafSjsg 	if (pdd->qpd.queue_count == 0) {
282ad8b1aafSjsg 		pr_debug("Gpu-Id: %d has no active queues for process %d\n",
283ad8b1aafSjsg 			 dev->id, proc->pasid);
284ad8b1aafSjsg 		return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
285ad8b1aafSjsg 	}
286ad8b1aafSjsg 
287ad8b1aafSjsg 	/* Collect wave count from device if it supports */
288ad8b1aafSjsg 	wave_cnt = 0;
289ad8b1aafSjsg 	max_waves_per_cu = 0;
2901bb76ff1Sjsg 	dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
291f005ef32Sjsg 			&max_waves_per_cu, 0);
292ad8b1aafSjsg 
293ad8b1aafSjsg 	/* Translate wave count to number of compute units */
294ad8b1aafSjsg 	cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
295ad8b1aafSjsg 	return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
296ad8b1aafSjsg }
297ad8b1aafSjsg 
298c349dbc7Sjsg static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
299c349dbc7Sjsg 			       char *buffer)
300c349dbc7Sjsg {
301c349dbc7Sjsg 	if (strcmp(attr->name, "pasid") == 0) {
302c349dbc7Sjsg 		struct kfd_process *p = container_of(attr, struct kfd_process,
303c349dbc7Sjsg 						     attr_pasid);
304ad8b1aafSjsg 
305ad8b1aafSjsg 		return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
306ad8b1aafSjsg 	} else if (strncmp(attr->name, "vram_", 5) == 0) {
307ad8b1aafSjsg 		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
308ad8b1aafSjsg 							      attr_vram);
309f7304f60Sjsg 		return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
310ad8b1aafSjsg 	} else if (strncmp(attr->name, "sdma_", 5) == 0) {
311ad8b1aafSjsg 		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
312ad8b1aafSjsg 							      attr_sdma);
313ad8b1aafSjsg 		struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
314ad8b1aafSjsg 
315*e6965b26Sjsg 		INIT_WORK_ONSTACK(&sdma_activity_work_handler.sdma_activity_work,
316ad8b1aafSjsg 				  kfd_sdma_activity_worker);
317ad8b1aafSjsg 
318ad8b1aafSjsg 		sdma_activity_work_handler.pdd = pdd;
319ad8b1aafSjsg 		sdma_activity_work_handler.sdma_activity_counter = 0;
320ad8b1aafSjsg 
321ad8b1aafSjsg 		schedule_work(&sdma_activity_work_handler.sdma_activity_work);
322ad8b1aafSjsg 
323ad8b1aafSjsg 		flush_work(&sdma_activity_work_handler.sdma_activity_work);
324*e6965b26Sjsg 		destroy_work_on_stack(&sdma_activity_work_handler.sdma_activity_work);
325ad8b1aafSjsg 
326ad8b1aafSjsg 		return snprintf(buffer, PAGE_SIZE, "%llu\n",
327ad8b1aafSjsg 				(sdma_activity_work_handler.sdma_activity_counter)/
328ad8b1aafSjsg 				 SDMA_ACTIVITY_DIVISOR);
329c349dbc7Sjsg 	} else {
330c349dbc7Sjsg 		pr_err("Invalid attribute");
331c349dbc7Sjsg 		return -EINVAL;
332c349dbc7Sjsg 	}
333c349dbc7Sjsg 
334ad8b1aafSjsg 	return 0;
335c349dbc7Sjsg }
336c349dbc7Sjsg 
337c349dbc7Sjsg static void kfd_procfs_kobj_release(struct kobject *kobj)
338c349dbc7Sjsg {
339c349dbc7Sjsg 	kfree(kobj);
340c349dbc7Sjsg }
341c349dbc7Sjsg 
342c349dbc7Sjsg static const struct sysfs_ops kfd_procfs_ops = {
343c349dbc7Sjsg 	.show = kfd_procfs_show,
344c349dbc7Sjsg };
345c349dbc7Sjsg 
346f005ef32Sjsg static const struct kobj_type procfs_type = {
347c349dbc7Sjsg 	.release = kfd_procfs_kobj_release,
348c349dbc7Sjsg 	.sysfs_ops = &kfd_procfs_ops,
349c349dbc7Sjsg };
350c349dbc7Sjsg 
351c349dbc7Sjsg void kfd_procfs_init(void)
352c349dbc7Sjsg {
353c349dbc7Sjsg 	int ret = 0;
354c349dbc7Sjsg 
355c349dbc7Sjsg 	procfs.kobj = kfd_alloc_struct(procfs.kobj);
356c349dbc7Sjsg 	if (!procfs.kobj)
357c349dbc7Sjsg 		return;
358c349dbc7Sjsg 
359c349dbc7Sjsg 	ret = kobject_init_and_add(procfs.kobj, &procfs_type,
360c349dbc7Sjsg 				   &kfd_device->kobj, "proc");
361c349dbc7Sjsg 	if (ret) {
362c349dbc7Sjsg 		pr_warn("Could not create procfs proc folder");
363c349dbc7Sjsg 		/* If we fail to create the procfs, clean up */
364c349dbc7Sjsg 		kfd_procfs_shutdown();
365c349dbc7Sjsg 	}
366c349dbc7Sjsg }
367c349dbc7Sjsg 
368c349dbc7Sjsg void kfd_procfs_shutdown(void)
369c349dbc7Sjsg {
370c349dbc7Sjsg 	if (procfs.kobj) {
371c349dbc7Sjsg 		kobject_del(procfs.kobj);
372c349dbc7Sjsg 		kobject_put(procfs.kobj);
373c349dbc7Sjsg 		procfs.kobj = NULL;
374c349dbc7Sjsg 	}
375c349dbc7Sjsg }
376c349dbc7Sjsg 
377c349dbc7Sjsg static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
378c349dbc7Sjsg 				     struct attribute *attr, char *buffer)
379c349dbc7Sjsg {
380c349dbc7Sjsg 	struct queue *q = container_of(kobj, struct queue, kobj);
381c349dbc7Sjsg 
382c349dbc7Sjsg 	if (!strcmp(attr->name, "size"))
383c349dbc7Sjsg 		return snprintf(buffer, PAGE_SIZE, "%llu",
384c349dbc7Sjsg 				q->properties.queue_size);
385c349dbc7Sjsg 	else if (!strcmp(attr->name, "type"))
386c349dbc7Sjsg 		return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
387c349dbc7Sjsg 	else if (!strcmp(attr->name, "gpuid"))
388c349dbc7Sjsg 		return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
389c349dbc7Sjsg 	else
390c349dbc7Sjsg 		pr_err("Invalid attribute");
391c349dbc7Sjsg 
392c349dbc7Sjsg 	return 0;
393c349dbc7Sjsg }
394c349dbc7Sjsg 
395ad8b1aafSjsg static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
396ad8b1aafSjsg 				     struct attribute *attr, char *buffer)
397ad8b1aafSjsg {
398ad8b1aafSjsg 	if (strcmp(attr->name, "evicted_ms") == 0) {
399ad8b1aafSjsg 		struct kfd_process_device *pdd = container_of(attr,
400ad8b1aafSjsg 				struct kfd_process_device,
401ad8b1aafSjsg 				attr_evict);
402ad8b1aafSjsg 		uint64_t evict_jiffies;
403ad8b1aafSjsg 
404ad8b1aafSjsg 		evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
405ad8b1aafSjsg 
406ad8b1aafSjsg 		return snprintf(buffer,
407ad8b1aafSjsg 				PAGE_SIZE,
408ad8b1aafSjsg 				"%llu\n",
409ad8b1aafSjsg 				jiffies64_to_msecs(evict_jiffies));
410ad8b1aafSjsg 
411ad8b1aafSjsg 	/* Sysfs handle that gets CU occupancy is per device */
412ad8b1aafSjsg 	} else if (strcmp(attr->name, "cu_occupancy") == 0) {
413ad8b1aafSjsg 		return kfd_get_cu_occupancy(attr, buffer);
414ad8b1aafSjsg 	} else {
415ad8b1aafSjsg 		pr_err("Invalid attribute");
416ad8b1aafSjsg 	}
417ad8b1aafSjsg 
418ad8b1aafSjsg 	return 0;
419ad8b1aafSjsg }
420ad8b1aafSjsg 
4215ca02815Sjsg static ssize_t kfd_sysfs_counters_show(struct kobject *kobj,
4225ca02815Sjsg 				       struct attribute *attr, char *buf)
4235ca02815Sjsg {
4245ca02815Sjsg 	struct kfd_process_device *pdd;
4255ca02815Sjsg 
4265ca02815Sjsg 	if (!strcmp(attr->name, "faults")) {
4275ca02815Sjsg 		pdd = container_of(attr, struct kfd_process_device,
4285ca02815Sjsg 				   attr_faults);
4295ca02815Sjsg 		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults));
4305ca02815Sjsg 	}
4315ca02815Sjsg 	if (!strcmp(attr->name, "page_in")) {
4325ca02815Sjsg 		pdd = container_of(attr, struct kfd_process_device,
4335ca02815Sjsg 				   attr_page_in);
4345ca02815Sjsg 		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in));
4355ca02815Sjsg 	}
4365ca02815Sjsg 	if (!strcmp(attr->name, "page_out")) {
4375ca02815Sjsg 		pdd = container_of(attr, struct kfd_process_device,
4385ca02815Sjsg 				   attr_page_out);
4395ca02815Sjsg 		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out));
4405ca02815Sjsg 	}
4415ca02815Sjsg 	return 0;
4425ca02815Sjsg }
4435ca02815Sjsg 
444c349dbc7Sjsg static struct attribute attr_queue_size = {
445c349dbc7Sjsg 	.name = "size",
446c349dbc7Sjsg 	.mode = KFD_SYSFS_FILE_MODE
447c349dbc7Sjsg };
448c349dbc7Sjsg 
449c349dbc7Sjsg static struct attribute attr_queue_type = {
450c349dbc7Sjsg 	.name = "type",
451c349dbc7Sjsg 	.mode = KFD_SYSFS_FILE_MODE
452c349dbc7Sjsg };
453c349dbc7Sjsg 
454c349dbc7Sjsg static struct attribute attr_queue_gpuid = {
455c349dbc7Sjsg 	.name = "gpuid",
456c349dbc7Sjsg 	.mode = KFD_SYSFS_FILE_MODE
457c349dbc7Sjsg };
458c349dbc7Sjsg 
459c349dbc7Sjsg static struct attribute *procfs_queue_attrs[] = {
460c349dbc7Sjsg 	&attr_queue_size,
461c349dbc7Sjsg 	&attr_queue_type,
462c349dbc7Sjsg 	&attr_queue_gpuid,
463c349dbc7Sjsg 	NULL
464c349dbc7Sjsg };
4651bb76ff1Sjsg ATTRIBUTE_GROUPS(procfs_queue);
466c349dbc7Sjsg 
467c349dbc7Sjsg static const struct sysfs_ops procfs_queue_ops = {
468c349dbc7Sjsg 	.show = kfd_procfs_queue_show,
469c349dbc7Sjsg };
470c349dbc7Sjsg 
471f005ef32Sjsg static const struct kobj_type procfs_queue_type = {
472c349dbc7Sjsg 	.sysfs_ops = &procfs_queue_ops,
4731bb76ff1Sjsg 	.default_groups = procfs_queue_groups,
474c349dbc7Sjsg };
475c349dbc7Sjsg 
476ad8b1aafSjsg static const struct sysfs_ops procfs_stats_ops = {
477ad8b1aafSjsg 	.show = kfd_procfs_stats_show,
478ad8b1aafSjsg };
479ad8b1aafSjsg 
480f005ef32Sjsg static const struct kobj_type procfs_stats_type = {
481ad8b1aafSjsg 	.sysfs_ops = &procfs_stats_ops,
4828f3bef5aSjsg 	.release = kfd_procfs_kobj_release,
483ad8b1aafSjsg };
484ad8b1aafSjsg 
4855ca02815Sjsg static const struct sysfs_ops sysfs_counters_ops = {
4865ca02815Sjsg 	.show = kfd_sysfs_counters_show,
4875ca02815Sjsg };
4885ca02815Sjsg 
489f005ef32Sjsg static const struct kobj_type sysfs_counters_type = {
4905ca02815Sjsg 	.sysfs_ops = &sysfs_counters_ops,
4915ca02815Sjsg 	.release = kfd_procfs_kobj_release,
4925ca02815Sjsg };
4935ca02815Sjsg 
494c349dbc7Sjsg int kfd_procfs_add_queue(struct queue *q)
495c349dbc7Sjsg {
496c349dbc7Sjsg 	struct kfd_process *proc;
497c349dbc7Sjsg 	int ret;
498c349dbc7Sjsg 
499c349dbc7Sjsg 	if (!q || !q->process)
500c349dbc7Sjsg 		return -EINVAL;
501c349dbc7Sjsg 	proc = q->process;
502c349dbc7Sjsg 
503c349dbc7Sjsg 	/* Create proc/<pid>/queues/<queue id> folder */
504c349dbc7Sjsg 	if (!proc->kobj_queues)
505c349dbc7Sjsg 		return -EFAULT;
506c349dbc7Sjsg 	ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
507c349dbc7Sjsg 			proc->kobj_queues, "%u", q->properties.queue_id);
508c349dbc7Sjsg 	if (ret < 0) {
509c349dbc7Sjsg 		pr_warn("Creating proc/<pid>/queues/%u failed",
510c349dbc7Sjsg 			q->properties.queue_id);
511c349dbc7Sjsg 		kobject_put(&q->kobj);
512c349dbc7Sjsg 		return ret;
513c349dbc7Sjsg 	}
514c349dbc7Sjsg 
515c349dbc7Sjsg 	return 0;
516c349dbc7Sjsg }
517c349dbc7Sjsg 
5185ca02815Sjsg static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr,
519ad8b1aafSjsg 				 char *name)
520ad8b1aafSjsg {
5215ca02815Sjsg 	int ret;
522ad8b1aafSjsg 
5235ca02815Sjsg 	if (!kobj || !attr || !name)
5245ca02815Sjsg 		return;
525ad8b1aafSjsg 
526ad8b1aafSjsg 	attr->name = name;
527ad8b1aafSjsg 	attr->mode = KFD_SYSFS_FILE_MODE;
528ad8b1aafSjsg 	sysfs_attr_init(attr);
529ad8b1aafSjsg 
5305ca02815Sjsg 	ret = sysfs_create_file(kobj, attr);
5315ca02815Sjsg 	if (ret)
5325ca02815Sjsg 		pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret);
533ad8b1aafSjsg }
534ad8b1aafSjsg 
5355ca02815Sjsg static void kfd_procfs_add_sysfs_stats(struct kfd_process *p)
536ad8b1aafSjsg {
5375ca02815Sjsg 	int ret;
5385ca02815Sjsg 	int i;
539ad8b1aafSjsg 	char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
540ad8b1aafSjsg 
5415ca02815Sjsg 	if (!p || !p->kobj)
5425ca02815Sjsg 		return;
543ad8b1aafSjsg 
544ad8b1aafSjsg 	/*
545ad8b1aafSjsg 	 * Create sysfs files for each GPU:
546ad8b1aafSjsg 	 * - proc/<pid>/stats_<gpuid>/
547ad8b1aafSjsg 	 * - proc/<pid>/stats_<gpuid>/evicted_ms
548ad8b1aafSjsg 	 * - proc/<pid>/stats_<gpuid>/cu_occupancy
549ad8b1aafSjsg 	 */
5505ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
5515ca02815Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
552ad8b1aafSjsg 
553ad8b1aafSjsg 		snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
554ad8b1aafSjsg 				"stats_%u", pdd->dev->id);
5555ca02815Sjsg 		pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats);
5565ca02815Sjsg 		if (!pdd->kobj_stats)
5575ca02815Sjsg 			return;
558ad8b1aafSjsg 
5595ca02815Sjsg 		ret = kobject_init_and_add(pdd->kobj_stats,
560ad8b1aafSjsg 					   &procfs_stats_type,
561ad8b1aafSjsg 					   p->kobj,
562ad8b1aafSjsg 					   stats_dir_filename);
563ad8b1aafSjsg 
564ad8b1aafSjsg 		if (ret) {
565ad8b1aafSjsg 			pr_warn("Creating KFD proc/stats_%s folder failed",
566ad8b1aafSjsg 				stats_dir_filename);
5675ca02815Sjsg 			kobject_put(pdd->kobj_stats);
5685ca02815Sjsg 			pdd->kobj_stats = NULL;
5695ca02815Sjsg 			return;
570ad8b1aafSjsg 		}
571ad8b1aafSjsg 
5725ca02815Sjsg 		kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict,
5735ca02815Sjsg 				      "evicted_ms");
574ad8b1aafSjsg 		/* Add sysfs file to report compute unit occupancy */
5755ca02815Sjsg 		if (pdd->dev->kfd2kgd->get_cu_occupancy)
5765ca02815Sjsg 			kfd_sysfs_create_file(pdd->kobj_stats,
5775ca02815Sjsg 					      &pdd->attr_cu_occupancy,
5785ca02815Sjsg 					      "cu_occupancy");
579ad8b1aafSjsg 	}
580ad8b1aafSjsg }
581ad8b1aafSjsg 
5825ca02815Sjsg static void kfd_procfs_add_sysfs_counters(struct kfd_process *p)
583ad8b1aafSjsg {
584ad8b1aafSjsg 	int ret = 0;
5855ca02815Sjsg 	int i;
5865ca02815Sjsg 	char counters_dir_filename[MAX_SYSFS_FILENAME_LEN];
587ad8b1aafSjsg 
5885ca02815Sjsg 	if (!p || !p->kobj)
5895ca02815Sjsg 		return;
590ad8b1aafSjsg 
5915ca02815Sjsg 	/*
5925ca02815Sjsg 	 * Create sysfs files for each GPU which supports SVM
5935ca02815Sjsg 	 * - proc/<pid>/counters_<gpuid>/
5945ca02815Sjsg 	 * - proc/<pid>/counters_<gpuid>/faults
5955ca02815Sjsg 	 * - proc/<pid>/counters_<gpuid>/page_in
5965ca02815Sjsg 	 * - proc/<pid>/counters_<gpuid>/page_out
5975ca02815Sjsg 	 */
5985ca02815Sjsg 	for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
5995ca02815Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
6005ca02815Sjsg 		struct kobject *kobj_counters;
6015ca02815Sjsg 
6025ca02815Sjsg 		snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN,
6035ca02815Sjsg 			"counters_%u", pdd->dev->id);
6045ca02815Sjsg 		kobj_counters = kfd_alloc_struct(kobj_counters);
6055ca02815Sjsg 		if (!kobj_counters)
6065ca02815Sjsg 			return;
6075ca02815Sjsg 
6085ca02815Sjsg 		ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type,
6095ca02815Sjsg 					   p->kobj, counters_dir_filename);
6105ca02815Sjsg 		if (ret) {
6115ca02815Sjsg 			pr_warn("Creating KFD proc/%s folder failed",
6125ca02815Sjsg 				counters_dir_filename);
6135ca02815Sjsg 			kobject_put(kobj_counters);
6145ca02815Sjsg 			return;
6155ca02815Sjsg 		}
6165ca02815Sjsg 
6175ca02815Sjsg 		pdd->kobj_counters = kobj_counters;
6185ca02815Sjsg 		kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults,
6195ca02815Sjsg 				      "faults");
6205ca02815Sjsg 		kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in,
6215ca02815Sjsg 				      "page_in");
6225ca02815Sjsg 		kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out,
6235ca02815Sjsg 				      "page_out");
6245ca02815Sjsg 	}
6255ca02815Sjsg }
6265ca02815Sjsg 
6275ca02815Sjsg static void kfd_procfs_add_sysfs_files(struct kfd_process *p)
6285ca02815Sjsg {
6295ca02815Sjsg 	int i;
6305ca02815Sjsg 
6315ca02815Sjsg 	if (!p || !p->kobj)
6325ca02815Sjsg 		return;
633ad8b1aafSjsg 
634ad8b1aafSjsg 	/*
635ad8b1aafSjsg 	 * Create sysfs files for each GPU:
636ad8b1aafSjsg 	 * - proc/<pid>/vram_<gpuid>
637ad8b1aafSjsg 	 * - proc/<pid>/sdma_<gpuid>
638ad8b1aafSjsg 	 */
6395ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
6405ca02815Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
6415ca02815Sjsg 
642ad8b1aafSjsg 		snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
643ad8b1aafSjsg 			 pdd->dev->id);
6445ca02815Sjsg 		kfd_sysfs_create_file(p->kobj, &pdd->attr_vram,
6455ca02815Sjsg 				      pdd->vram_filename);
646ad8b1aafSjsg 
647ad8b1aafSjsg 		snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
648ad8b1aafSjsg 			 pdd->dev->id);
6495ca02815Sjsg 		kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma,
6505ca02815Sjsg 					    pdd->sdma_filename);
651ad8b1aafSjsg 	}
652ad8b1aafSjsg }
653ad8b1aafSjsg 
654c349dbc7Sjsg void kfd_procfs_del_queue(struct queue *q)
655c349dbc7Sjsg {
656c349dbc7Sjsg 	if (!q)
657c349dbc7Sjsg 		return;
658c349dbc7Sjsg 
659c349dbc7Sjsg 	kobject_del(&q->kobj);
660c349dbc7Sjsg 	kobject_put(&q->kobj);
661c349dbc7Sjsg }
662fb4d8502Sjsg 
663fb4d8502Sjsg int kfd_process_create_wq(void)
664fb4d8502Sjsg {
665fb4d8502Sjsg 	if (!kfd_process_wq)
666fb4d8502Sjsg 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
667fb4d8502Sjsg 	if (!kfd_restore_wq)
668fb4d8502Sjsg 		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
669fb4d8502Sjsg 
670fb4d8502Sjsg 	if (!kfd_process_wq || !kfd_restore_wq) {
671fb4d8502Sjsg 		kfd_process_destroy_wq();
672fb4d8502Sjsg 		return -ENOMEM;
673fb4d8502Sjsg 	}
674fb4d8502Sjsg 
675fb4d8502Sjsg 	return 0;
676fb4d8502Sjsg }
677fb4d8502Sjsg 
678fb4d8502Sjsg void kfd_process_destroy_wq(void)
679fb4d8502Sjsg {
680fb4d8502Sjsg 	if (kfd_process_wq) {
681fb4d8502Sjsg 		destroy_workqueue(kfd_process_wq);
682fb4d8502Sjsg 		kfd_process_wq = NULL;
683fb4d8502Sjsg 	}
684fb4d8502Sjsg 	if (kfd_restore_wq) {
685fb4d8502Sjsg 		destroy_workqueue(kfd_restore_wq);
686fb4d8502Sjsg 		kfd_restore_wq = NULL;
687fb4d8502Sjsg 	}
688fb4d8502Sjsg }
689fb4d8502Sjsg 
690fb4d8502Sjsg static void kfd_process_free_gpuvm(struct kgd_mem *mem,
691ec0ca080Sjsg 			struct kfd_process_device *pdd, void **kptr)
692fb4d8502Sjsg {
693f005ef32Sjsg 	struct kfd_node *dev = pdd->dev;
694fb4d8502Sjsg 
695ec0ca080Sjsg 	if (kptr && *kptr) {
6961bb76ff1Sjsg 		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
697ec0ca080Sjsg 		*kptr = NULL;
6981bb76ff1Sjsg 	}
6991bb76ff1Sjsg 
7001bb76ff1Sjsg 	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
7011bb76ff1Sjsg 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv,
7025ca02815Sjsg 					       NULL);
703fb4d8502Sjsg }
704fb4d8502Sjsg 
705fb4d8502Sjsg /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
706fb4d8502Sjsg  *	This function should be only called right after the process
707fb4d8502Sjsg  *	is created and when kfd_processes_mutex is still being held
708fb4d8502Sjsg  *	to avoid concurrency. Because of that exclusiveness, we do
709fb4d8502Sjsg  *	not need to take p->mutex.
710fb4d8502Sjsg  */
711fb4d8502Sjsg static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
712fb4d8502Sjsg 				   uint64_t gpu_va, uint32_t size,
7131bb76ff1Sjsg 				   uint32_t flags, struct kgd_mem **mem, void **kptr)
714fb4d8502Sjsg {
715f005ef32Sjsg 	struct kfd_node *kdev = pdd->dev;
716fb4d8502Sjsg 	int err;
717fb4d8502Sjsg 
7181bb76ff1Sjsg 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
7191bb76ff1Sjsg 						 pdd->drm_priv, mem, NULL,
7201bb76ff1Sjsg 						 flags, false);
721fb4d8502Sjsg 	if (err)
722fb4d8502Sjsg 		goto err_alloc_mem;
723fb4d8502Sjsg 
7241bb76ff1Sjsg 	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem,
7251bb76ff1Sjsg 			pdd->drm_priv);
726fb4d8502Sjsg 	if (err)
727fb4d8502Sjsg 		goto err_map_mem;
728fb4d8502Sjsg 
7291bb76ff1Sjsg 	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true);
730fb4d8502Sjsg 	if (err) {
731fb4d8502Sjsg 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
732fb4d8502Sjsg 		goto sync_memory_failed;
733fb4d8502Sjsg 	}
734fb4d8502Sjsg 
735fb4d8502Sjsg 	if (kptr) {
7361bb76ff1Sjsg 		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(
7371bb76ff1Sjsg 				(struct kgd_mem *)*mem, kptr, NULL);
738fb4d8502Sjsg 		if (err) {
739fb4d8502Sjsg 			pr_debug("Map GTT BO to kernel failed\n");
7401bb76ff1Sjsg 			goto sync_memory_failed;
741fb4d8502Sjsg 		}
742fb4d8502Sjsg 	}
743fb4d8502Sjsg 
744fb4d8502Sjsg 	return err;
745fb4d8502Sjsg 
746fb4d8502Sjsg sync_memory_failed:
7471bb76ff1Sjsg 	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv);
748fb4d8502Sjsg 
749fb4d8502Sjsg err_map_mem:
7501bb76ff1Sjsg 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv,
7515ca02815Sjsg 					       NULL);
752fb4d8502Sjsg err_alloc_mem:
7531bb76ff1Sjsg 	*mem = NULL;
754fb4d8502Sjsg 	*kptr = NULL;
755fb4d8502Sjsg 	return err;
756fb4d8502Sjsg }
757fb4d8502Sjsg 
758fb4d8502Sjsg /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
759fb4d8502Sjsg  *	process for IB usage The memory reserved is for KFD to submit
760fb4d8502Sjsg  *	IB to AMDGPU from kernel.  If the memory is reserved
761fb4d8502Sjsg  *	successfully, ib_kaddr will have the CPU/kernel
762fb4d8502Sjsg  *	address. Check ib_kaddr before accessing the memory.
763fb4d8502Sjsg  */
764fb4d8502Sjsg static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
765fb4d8502Sjsg {
766fb4d8502Sjsg 	struct qcm_process_device *qpd = &pdd->qpd;
767c349dbc7Sjsg 	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
768c349dbc7Sjsg 			KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
769c349dbc7Sjsg 			KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
770c349dbc7Sjsg 			KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
7711bb76ff1Sjsg 	struct kgd_mem *mem;
772fb4d8502Sjsg 	void *kaddr;
773fb4d8502Sjsg 	int ret;
774fb4d8502Sjsg 
775fb4d8502Sjsg 	if (qpd->ib_kaddr || !qpd->ib_base)
776fb4d8502Sjsg 		return 0;
777fb4d8502Sjsg 
778fb4d8502Sjsg 	/* ib_base is only set for dGPU */
779fb4d8502Sjsg 	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
7801bb76ff1Sjsg 				      &mem, &kaddr);
781fb4d8502Sjsg 	if (ret)
782fb4d8502Sjsg 		return ret;
783fb4d8502Sjsg 
7841bb76ff1Sjsg 	qpd->ib_mem = mem;
785fb4d8502Sjsg 	qpd->ib_kaddr = kaddr;
786fb4d8502Sjsg 
787fb4d8502Sjsg 	return 0;
788fb4d8502Sjsg }
789fb4d8502Sjsg 
7901bb76ff1Sjsg static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
7911bb76ff1Sjsg {
7921bb76ff1Sjsg 	struct qcm_process_device *qpd = &pdd->qpd;
7931bb76ff1Sjsg 
7941bb76ff1Sjsg 	if (!qpd->ib_kaddr || !qpd->ib_base)
7951bb76ff1Sjsg 		return;
7961bb76ff1Sjsg 
797ec0ca080Sjsg 	kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr);
7981bb76ff1Sjsg }
7991bb76ff1Sjsg 
800f005ef32Sjsg struct kfd_process *kfd_create_process(struct task_struct *thread)
801fb4d8502Sjsg {
802fb4d8502Sjsg 	struct kfd_process *process;
803c349dbc7Sjsg 	int ret;
804fb4d8502Sjsg 
805f005ef32Sjsg 	if (!(thread->mm && mmget_not_zero(thread->mm)))
806fb4d8502Sjsg 		return ERR_PTR(-EINVAL);
807fb4d8502Sjsg 
808fb4d8502Sjsg 	/* Only the pthreads threading model is supported. */
809f005ef32Sjsg 	if (thread->group_leader->mm != thread->mm) {
810f005ef32Sjsg 		mmput(thread->mm);
811fb4d8502Sjsg 		return ERR_PTR(-EINVAL);
812f005ef32Sjsg 	}
813fb4d8502Sjsg 
814fb4d8502Sjsg 	/*
815fb4d8502Sjsg 	 * take kfd processes mutex before starting of process creation
816fb4d8502Sjsg 	 * so there won't be a case where two threads of the same process
817fb4d8502Sjsg 	 * create two kfd_process structures
818fb4d8502Sjsg 	 */
819fb4d8502Sjsg 	mutex_lock(&kfd_processes_mutex);
820fb4d8502Sjsg 
821f005ef32Sjsg 	if (kfd_is_locked()) {
822f005ef32Sjsg 		pr_debug("KFD is locked! Cannot create process");
823daa6cf7dSjsg 		process = ERR_PTR(-EINVAL);
824daa6cf7dSjsg 		goto out;
825f005ef32Sjsg 	}
826f005ef32Sjsg 
827fb4d8502Sjsg 	/* A prior open of /dev/kfd could have already created the process. */
8281bb76ff1Sjsg 	process = find_process(thread, false);
829c349dbc7Sjsg 	if (process) {
830fb4d8502Sjsg 		pr_debug("Process already found\n");
831c349dbc7Sjsg 	} else {
8325ad48b22Sjsg 		/* If the process just called exec(3), it is possible that the
8335ad48b22Sjsg 		 * cleanup of the kfd_process (following the release of the mm
8345ad48b22Sjsg 		 * of the old process image) is still in the cleanup work queue.
8355ad48b22Sjsg 		 * Make sure to drain any job before trying to recreate any
8365ad48b22Sjsg 		 * resource for this process.
8375ad48b22Sjsg 		 */
8385ad48b22Sjsg 		flush_workqueue(kfd_process_wq);
8395ad48b22Sjsg 
840c349dbc7Sjsg 		process = create_process(thread);
841c349dbc7Sjsg 		if (IS_ERR(process))
842c349dbc7Sjsg 			goto out;
843fb4d8502Sjsg 
844c349dbc7Sjsg 		if (!procfs.kobj)
845c349dbc7Sjsg 			goto out;
846c349dbc7Sjsg 
847c349dbc7Sjsg 		process->kobj = kfd_alloc_struct(process->kobj);
848c349dbc7Sjsg 		if (!process->kobj) {
849c349dbc7Sjsg 			pr_warn("Creating procfs kobject failed");
850c349dbc7Sjsg 			goto out;
851c349dbc7Sjsg 		}
852c349dbc7Sjsg 		ret = kobject_init_and_add(process->kobj, &procfs_type,
853c349dbc7Sjsg 					   procfs.kobj, "%d",
854c349dbc7Sjsg 					   (int)process->lead_thread->pid);
855c349dbc7Sjsg 		if (ret) {
856c349dbc7Sjsg 			pr_warn("Creating procfs pid directory failed");
85760c25accSjsg 			kobject_put(process->kobj);
858c349dbc7Sjsg 			goto out;
859c349dbc7Sjsg 		}
860c349dbc7Sjsg 
8615ca02815Sjsg 		kfd_sysfs_create_file(process->kobj, &process->attr_pasid,
8625ca02815Sjsg 				      "pasid");
863c349dbc7Sjsg 
864c349dbc7Sjsg 		process->kobj_queues = kobject_create_and_add("queues",
865c349dbc7Sjsg 							process->kobj);
866c349dbc7Sjsg 		if (!process->kobj_queues)
867c349dbc7Sjsg 			pr_warn("Creating KFD proc/queues folder failed");
868ad8b1aafSjsg 
8695ca02815Sjsg 		kfd_procfs_add_sysfs_stats(process);
8705ca02815Sjsg 		kfd_procfs_add_sysfs_files(process);
8715ca02815Sjsg 		kfd_procfs_add_sysfs_counters(process);
872f005ef32Sjsg 
873f005ef32Sjsg 		init_waitqueue_head(&process->wait_irq_drain);
874c349dbc7Sjsg 	}
875c349dbc7Sjsg out:
876c349dbc7Sjsg 	if (!IS_ERR(process))
877c349dbc7Sjsg 		kref_get(&process->ref);
878fb4d8502Sjsg 	mutex_unlock(&kfd_processes_mutex);
879f005ef32Sjsg 	mmput(thread->mm);
880fb4d8502Sjsg 
881fb4d8502Sjsg 	return process;
882fb4d8502Sjsg }
883fb4d8502Sjsg 
884fb4d8502Sjsg struct kfd_process *kfd_get_process(const struct task_struct *thread)
885fb4d8502Sjsg {
886fb4d8502Sjsg 	struct kfd_process *process;
887fb4d8502Sjsg 
888fb4d8502Sjsg 	if (!thread->mm)
889fb4d8502Sjsg 		return ERR_PTR(-EINVAL);
890fb4d8502Sjsg 
891fb4d8502Sjsg 	/* Only the pthreads threading model is supported. */
892fb4d8502Sjsg 	if (thread->group_leader->mm != thread->mm)
893fb4d8502Sjsg 		return ERR_PTR(-EINVAL);
894fb4d8502Sjsg 
8951bb76ff1Sjsg 	process = find_process(thread, false);
896fb4d8502Sjsg 	if (!process)
897fb4d8502Sjsg 		return ERR_PTR(-EINVAL);
898fb4d8502Sjsg 
899fb4d8502Sjsg 	return process;
900fb4d8502Sjsg }
901fb4d8502Sjsg 
902fb4d8502Sjsg static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
903fb4d8502Sjsg {
904fb4d8502Sjsg 	struct kfd_process *process;
905fb4d8502Sjsg 
906fb4d8502Sjsg 	hash_for_each_possible_rcu(kfd_processes_table, process,
907fb4d8502Sjsg 					kfd_processes, (uintptr_t)mm)
908fb4d8502Sjsg 		if (process->mm == mm)
909fb4d8502Sjsg 			return process;
910fb4d8502Sjsg 
911fb4d8502Sjsg 	return NULL;
912fb4d8502Sjsg }
913fb4d8502Sjsg 
9141bb76ff1Sjsg static struct kfd_process *find_process(const struct task_struct *thread,
9151bb76ff1Sjsg 					bool ref)
916fb4d8502Sjsg {
917fb4d8502Sjsg 	struct kfd_process *p;
918fb4d8502Sjsg 	int idx;
919fb4d8502Sjsg 
920fb4d8502Sjsg 	idx = srcu_read_lock(&kfd_processes_srcu);
921fb4d8502Sjsg 	p = find_process_by_mm(thread->mm);
9221bb76ff1Sjsg 	if (p && ref)
9231bb76ff1Sjsg 		kref_get(&p->ref);
924fb4d8502Sjsg 	srcu_read_unlock(&kfd_processes_srcu, idx);
925fb4d8502Sjsg 
926fb4d8502Sjsg 	return p;
927fb4d8502Sjsg }
928fb4d8502Sjsg 
929fb4d8502Sjsg void kfd_unref_process(struct kfd_process *p)
930fb4d8502Sjsg {
931fb4d8502Sjsg 	kref_put(&p->ref, kfd_process_ref_release);
932fb4d8502Sjsg }
933fb4d8502Sjsg 
9341bb76ff1Sjsg /* This increments the process->ref counter. */
9351bb76ff1Sjsg struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
9361bb76ff1Sjsg {
9371bb76ff1Sjsg 	struct task_struct *task = NULL;
9381bb76ff1Sjsg 	struct kfd_process *p    = NULL;
9391bb76ff1Sjsg 
9401bb76ff1Sjsg 	if (!pid) {
9411bb76ff1Sjsg 		task = current;
9421bb76ff1Sjsg 		get_task_struct(task);
9431bb76ff1Sjsg 	} else {
9441bb76ff1Sjsg 		task = get_pid_task(pid, PIDTYPE_PID);
9451bb76ff1Sjsg 	}
9461bb76ff1Sjsg 
9471bb76ff1Sjsg 	if (task) {
9481bb76ff1Sjsg 		p = find_process(task, true);
9491bb76ff1Sjsg 		put_task_struct(task);
9501bb76ff1Sjsg 	}
9511bb76ff1Sjsg 
9521bb76ff1Sjsg 	return p;
9531bb76ff1Sjsg }
9545ca02815Sjsg 
955fb4d8502Sjsg static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
956fb4d8502Sjsg {
957fb4d8502Sjsg 	struct kfd_process *p = pdd->process;
958fb4d8502Sjsg 	void *mem;
959fb4d8502Sjsg 	int id;
9605ca02815Sjsg 	int i;
961fb4d8502Sjsg 
962fb4d8502Sjsg 	/*
963fb4d8502Sjsg 	 * Remove all handles from idr and release appropriate
964fb4d8502Sjsg 	 * local memory object
965fb4d8502Sjsg 	 */
966fb4d8502Sjsg 	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
967fb4d8502Sjsg 
9685ca02815Sjsg 		for (i = 0; i < p->n_pdds; i++) {
9695ca02815Sjsg 			struct kfd_process_device *peer_pdd = p->pdds[i];
9705ca02815Sjsg 
9715ca02815Sjsg 			if (!peer_pdd->drm_priv)
972fb4d8502Sjsg 				continue;
973c349dbc7Sjsg 			amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
9741bb76ff1Sjsg 				peer_pdd->dev->adev, mem, peer_pdd->drm_priv);
975fb4d8502Sjsg 		}
976fb4d8502Sjsg 
9771bb76ff1Sjsg 		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem,
9785ca02815Sjsg 						       pdd->drm_priv, NULL);
979fb4d8502Sjsg 		kfd_process_device_remove_obj_handle(pdd, id);
980fb4d8502Sjsg 	}
981fb4d8502Sjsg }
982fb4d8502Sjsg 
9831bb76ff1Sjsg /*
9841bb76ff1Sjsg  * Just kunmap and unpin signal BO here. It will be freed in
9851bb76ff1Sjsg  * kfd_process_free_outstanding_kfd_bos()
9861bb76ff1Sjsg  */
9871bb76ff1Sjsg static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
9881bb76ff1Sjsg {
9891bb76ff1Sjsg 	struct kfd_process_device *pdd;
990f005ef32Sjsg 	struct kfd_node *kdev;
9911bb76ff1Sjsg 	void *mem;
9921bb76ff1Sjsg 
9931bb76ff1Sjsg 	kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
9941bb76ff1Sjsg 	if (!kdev)
9951bb76ff1Sjsg 		return;
9961bb76ff1Sjsg 
9971bb76ff1Sjsg 	mutex_lock(&p->mutex);
9981bb76ff1Sjsg 
9991bb76ff1Sjsg 	pdd = kfd_get_process_device_data(kdev, p);
10001bb76ff1Sjsg 	if (!pdd)
10011bb76ff1Sjsg 		goto out;
10021bb76ff1Sjsg 
10031bb76ff1Sjsg 	mem = kfd_process_device_translate_handle(
10041bb76ff1Sjsg 		pdd, GET_IDR_HANDLE(p->signal_handle));
10051bb76ff1Sjsg 	if (!mem)
10061bb76ff1Sjsg 		goto out;
10071bb76ff1Sjsg 
10081bb76ff1Sjsg 	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
10091bb76ff1Sjsg 
10101bb76ff1Sjsg out:
10111bb76ff1Sjsg 	mutex_unlock(&p->mutex);
10121bb76ff1Sjsg }
10131bb76ff1Sjsg 
1014fb4d8502Sjsg static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
1015fb4d8502Sjsg {
10165ca02815Sjsg 	int i;
1017fb4d8502Sjsg 
10185ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++)
10195ca02815Sjsg 		kfd_process_device_free_bos(p->pdds[i]);
1020fb4d8502Sjsg }
1021fb4d8502Sjsg 
1022fb4d8502Sjsg static void kfd_process_destroy_pdds(struct kfd_process *p)
1023fb4d8502Sjsg {
10245ca02815Sjsg 	int i;
1025fb4d8502Sjsg 
10265ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
10275ca02815Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
10285ca02815Sjsg 
1029c349dbc7Sjsg 		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
1030fb4d8502Sjsg 				pdd->dev->id, p->pasid);
1031fb4d8502Sjsg 
10321bb76ff1Sjsg 		kfd_process_device_destroy_cwsr_dgpu(pdd);
10331bb76ff1Sjsg 		kfd_process_device_destroy_ib_mem(pdd);
10341bb76ff1Sjsg 
1035c349dbc7Sjsg 		if (pdd->drm_file) {
1036c349dbc7Sjsg 			amdgpu_amdkfd_gpuvm_release_process_vm(
10371bb76ff1Sjsg 					pdd->dev->adev, pdd->drm_priv);
1038fb4d8502Sjsg 			fput(pdd->drm_file);
1039c349dbc7Sjsg 		}
1040fb4d8502Sjsg 
1041fb4d8502Sjsg 		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
1042fb4d8502Sjsg 			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
1043fb4d8502Sjsg 				get_order(KFD_CWSR_TBA_TMA_SIZE));
1044fb4d8502Sjsg 
1045fb4d8502Sjsg 		idr_destroy(&pdd->alloc_idr);
1046fb4d8502Sjsg 
1047f005ef32Sjsg 		kfd_free_process_doorbells(pdd->dev->kfd, pdd);
1048ad8b1aafSjsg 
1049f005ef32Sjsg 		if (pdd->dev->kfd->shared_resources.enable_mes)
10501bb76ff1Sjsg 			amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
1051ff6d5195Sjsg 						   &pdd->proc_ctx_bo);
1052c349dbc7Sjsg 		/*
1053c349dbc7Sjsg 		 * before destroying pdd, make sure to report availability
1054c349dbc7Sjsg 		 * for auto suspend
1055c349dbc7Sjsg 		 */
1056c349dbc7Sjsg 		if (pdd->runtime_inuse) {
1057f005ef32Sjsg 			pm_runtime_mark_last_busy(adev_to_drm(pdd->dev->adev)->dev);
1058f005ef32Sjsg 			pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev);
1059c349dbc7Sjsg 			pdd->runtime_inuse = false;
1060c349dbc7Sjsg 		}
1061c349dbc7Sjsg 
1062fb4d8502Sjsg 		kfree(pdd);
10635ca02815Sjsg 		p->pdds[i] = NULL;
1064fb4d8502Sjsg 	}
10655ca02815Sjsg 	p->n_pdds = 0;
1066fb4d8502Sjsg }
1067fb4d8502Sjsg 
10685ca02815Sjsg static void kfd_process_remove_sysfs(struct kfd_process *p)
1069fb4d8502Sjsg {
1070ad8b1aafSjsg 	struct kfd_process_device *pdd;
10715ca02815Sjsg 	int i;
1072fb4d8502Sjsg 
10735ca02815Sjsg 	if (!p->kobj)
10745ca02815Sjsg 		return;
10755ca02815Sjsg 
1076c349dbc7Sjsg 	sysfs_remove_file(p->kobj, &p->attr_pasid);
1077c349dbc7Sjsg 	kobject_del(p->kobj_queues);
1078c349dbc7Sjsg 	kobject_put(p->kobj_queues);
1079c349dbc7Sjsg 	p->kobj_queues = NULL;
1080ad8b1aafSjsg 
10815ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
10825ca02815Sjsg 		pdd = p->pdds[i];
10835ca02815Sjsg 
1084ad8b1aafSjsg 		sysfs_remove_file(p->kobj, &pdd->attr_vram);
1085ad8b1aafSjsg 		sysfs_remove_file(p->kobj, &pdd->attr_sdma);
10868f3bef5aSjsg 
10878f3bef5aSjsg 		sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
10888f3bef5aSjsg 		if (pdd->dev->kfd2kgd->get_cu_occupancy)
10898f3bef5aSjsg 			sysfs_remove_file(pdd->kobj_stats,
10908f3bef5aSjsg 					  &pdd->attr_cu_occupancy);
1091ad8b1aafSjsg 		kobject_del(pdd->kobj_stats);
1092ad8b1aafSjsg 		kobject_put(pdd->kobj_stats);
1093ad8b1aafSjsg 		pdd->kobj_stats = NULL;
1094ad8b1aafSjsg 	}
1095ad8b1aafSjsg 
10965ca02815Sjsg 	for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
10975ca02815Sjsg 		pdd = p->pdds[i];
10985ca02815Sjsg 
10995ca02815Sjsg 		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults);
11005ca02815Sjsg 		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in);
11015ca02815Sjsg 		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out);
11025ca02815Sjsg 		kobject_del(pdd->kobj_counters);
11035ca02815Sjsg 		kobject_put(pdd->kobj_counters);
11045ca02815Sjsg 		pdd->kobj_counters = NULL;
11055ca02815Sjsg 	}
11065ca02815Sjsg 
1107c349dbc7Sjsg 	kobject_del(p->kobj);
1108c349dbc7Sjsg 	kobject_put(p->kobj);
1109c349dbc7Sjsg 	p->kobj = NULL;
1110c349dbc7Sjsg }
1111c349dbc7Sjsg 
11125ca02815Sjsg /* No process locking is needed in this function, because the process
11135ca02815Sjsg  * is not findable any more. We must assume that no other thread is
11145ca02815Sjsg  * using it any more, otherwise we couldn't safely free the process
11155ca02815Sjsg  * structure in the end.
11165ca02815Sjsg  */
11175ca02815Sjsg static void kfd_process_wq_release(struct work_struct *work)
11185ca02815Sjsg {
11195ca02815Sjsg 	struct kfd_process *p = container_of(work, struct kfd_process,
11205ca02815Sjsg 					     release_work);
11211bb76ff1Sjsg 
11221bb76ff1Sjsg 	kfd_process_dequeue_from_all_devices(p);
11231bb76ff1Sjsg 	pqm_uninit(&p->pqm);
11241bb76ff1Sjsg 
11251bb76ff1Sjsg 	/* Signal the eviction fence after user mode queues are
11261bb76ff1Sjsg 	 * destroyed. This allows any BOs to be freed without
11271bb76ff1Sjsg 	 * triggering pointless evictions or waiting for fences.
11281bb76ff1Sjsg 	 */
11291bb76ff1Sjsg 	dma_fence_signal(p->ef);
11301bb76ff1Sjsg 
11315ca02815Sjsg 	kfd_process_remove_sysfs(p);
1132fb4d8502Sjsg 
11331bb76ff1Sjsg 	kfd_process_kunmap_signal_bo(p);
1134fb4d8502Sjsg 	kfd_process_free_outstanding_kfd_bos(p);
11355ca02815Sjsg 	svm_range_list_fini(p);
1136fb4d8502Sjsg 
1137fb4d8502Sjsg 	kfd_process_destroy_pdds(p);
1138fb4d8502Sjsg 	dma_fence_put(p->ef);
1139fb4d8502Sjsg 
1140fb4d8502Sjsg 	kfd_event_free_process(p);
1141fb4d8502Sjsg 
1142fb4d8502Sjsg 	kfd_pasid_free(p->pasid);
1143fb4d8502Sjsg 	mutex_destroy(&p->mutex);
1144fb4d8502Sjsg 
1145fb4d8502Sjsg 	put_task_struct(p->lead_thread);
1146fb4d8502Sjsg 
1147fb4d8502Sjsg 	kfree(p);
1148fb4d8502Sjsg }
1149fb4d8502Sjsg 
1150fb4d8502Sjsg static void kfd_process_ref_release(struct kref *ref)
1151fb4d8502Sjsg {
1152fb4d8502Sjsg 	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
1153fb4d8502Sjsg 
1154fb4d8502Sjsg 	INIT_WORK(&p->release_work, kfd_process_wq_release);
1155fb4d8502Sjsg 	queue_work(kfd_process_wq, &p->release_work);
1156fb4d8502Sjsg }
1157fb4d8502Sjsg 
11585ca02815Sjsg static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
11595ca02815Sjsg {
11605ca02815Sjsg 	int idx = srcu_read_lock(&kfd_processes_srcu);
11615ca02815Sjsg 	struct kfd_process *p = find_process_by_mm(mm);
11625ca02815Sjsg 
11635ca02815Sjsg 	srcu_read_unlock(&kfd_processes_srcu, idx);
11645ca02815Sjsg 
11655ca02815Sjsg 	return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
11665ca02815Sjsg }
11675ca02815Sjsg 
1168c349dbc7Sjsg static void kfd_process_free_notifier(struct mmu_notifier *mn)
1169fb4d8502Sjsg {
1170c349dbc7Sjsg 	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
1171fb4d8502Sjsg }
1172fb4d8502Sjsg 
11731a346658Sjsg static void kfd_process_notifier_release_internal(struct kfd_process *p)
11741a346658Sjsg {
1175f005ef32Sjsg 	int i;
1176f005ef32Sjsg 
11771a346658Sjsg 	cancel_delayed_work_sync(&p->eviction_work);
11781a346658Sjsg 	cancel_delayed_work_sync(&p->restore_work);
11791a346658Sjsg 
1180f005ef32Sjsg 	for (i = 0; i < p->n_pdds; i++) {
1181f005ef32Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
1182f005ef32Sjsg 
1183f005ef32Sjsg 		/* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */
1184f005ef32Sjsg 		if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup)
1185f005ef32Sjsg 			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
1186f005ef32Sjsg 	}
1187f005ef32Sjsg 
11881a346658Sjsg 	/* Indicate to other users that MM is no longer valid */
11891a346658Sjsg 	p->mm = NULL;
1190f005ef32Sjsg 	kfd_dbg_trap_disable(p);
1191f005ef32Sjsg 
1192f005ef32Sjsg 	if (atomic_read(&p->debugged_process_count) > 0) {
1193f005ef32Sjsg 		struct kfd_process *target;
1194f005ef32Sjsg 		unsigned int temp;
1195f005ef32Sjsg 		int idx = srcu_read_lock(&kfd_processes_srcu);
1196f005ef32Sjsg 
1197f005ef32Sjsg 		hash_for_each_rcu(kfd_processes_table, temp, target, kfd_processes) {
1198f005ef32Sjsg 			if (target->debugger_process && target->debugger_process == p) {
1199f005ef32Sjsg 				mutex_lock_nested(&target->mutex, 1);
1200f005ef32Sjsg 				kfd_dbg_trap_disable(target);
1201f005ef32Sjsg 				mutex_unlock(&target->mutex);
1202f005ef32Sjsg 				if (atomic_read(&p->debugged_process_count) == 0)
1203f005ef32Sjsg 					break;
1204f005ef32Sjsg 			}
1205f005ef32Sjsg 		}
1206f005ef32Sjsg 
1207f005ef32Sjsg 		srcu_read_unlock(&kfd_processes_srcu, idx);
1208f005ef32Sjsg 	}
12091a346658Sjsg 
12101a346658Sjsg 	mmu_notifier_put(&p->mmu_notifier);
12111a346658Sjsg }
12121a346658Sjsg 
1213fb4d8502Sjsg static void kfd_process_notifier_release(struct mmu_notifier *mn,
1214fb4d8502Sjsg 					struct mm_struct *mm)
1215fb4d8502Sjsg {
1216fb4d8502Sjsg 	struct kfd_process *p;
1217fb4d8502Sjsg 
1218fb4d8502Sjsg 	/*
1219fb4d8502Sjsg 	 * The kfd_process structure can not be free because the
1220fb4d8502Sjsg 	 * mmu_notifier srcu is read locked
1221fb4d8502Sjsg 	 */
1222fb4d8502Sjsg 	p = container_of(mn, struct kfd_process, mmu_notifier);
1223fb4d8502Sjsg 	if (WARN_ON(p->mm != mm))
1224fb4d8502Sjsg 		return;
1225fb4d8502Sjsg 
1226fb4d8502Sjsg 	mutex_lock(&kfd_processes_mutex);
12271a346658Sjsg 	/*
12281a346658Sjsg 	 * Do early return if table is empty.
12291a346658Sjsg 	 *
12301a346658Sjsg 	 * This could potentially happen if this function is called concurrently
12311a346658Sjsg 	 * by mmu_notifier and by kfd_cleanup_pocesses.
12321a346658Sjsg 	 *
12331a346658Sjsg 	 */
12341a346658Sjsg 	if (hash_empty(kfd_processes_table)) {
12351a346658Sjsg 		mutex_unlock(&kfd_processes_mutex);
12361a346658Sjsg 		return;
12371a346658Sjsg 	}
1238fb4d8502Sjsg 	hash_del_rcu(&p->kfd_processes);
1239fb4d8502Sjsg 	mutex_unlock(&kfd_processes_mutex);
1240fb4d8502Sjsg 	synchronize_srcu(&kfd_processes_srcu);
1241fb4d8502Sjsg 
12421a346658Sjsg 	kfd_process_notifier_release_internal(p);
1243fb4d8502Sjsg }
1244fb4d8502Sjsg 
1245fb4d8502Sjsg static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
1246fb4d8502Sjsg 	.release = kfd_process_notifier_release,
12475ca02815Sjsg 	.alloc_notifier = kfd_process_alloc_notifier,
1248c349dbc7Sjsg 	.free_notifier = kfd_process_free_notifier,
1249fb4d8502Sjsg };
1250fb4d8502Sjsg 
12511a346658Sjsg /*
12521a346658Sjsg  * This code handles the case when driver is being unloaded before all
12531a346658Sjsg  * mm_struct are released.  We need to safely free the kfd_process and
12541a346658Sjsg  * avoid race conditions with mmu_notifier that might try to free them.
12551a346658Sjsg  *
12561a346658Sjsg  */
12571a346658Sjsg void kfd_cleanup_processes(void)
12581a346658Sjsg {
12591a346658Sjsg 	struct kfd_process *p;
12601a346658Sjsg 	struct hlist_node *p_temp;
12611a346658Sjsg 	unsigned int temp;
12621a346658Sjsg 	HLIST_HEAD(cleanup_list);
12631a346658Sjsg 
12641a346658Sjsg 	/*
12651a346658Sjsg 	 * Move all remaining kfd_process from the process table to a
12661a346658Sjsg 	 * temp list for processing.   Once done, callback from mmu_notifier
12671a346658Sjsg 	 * release will not see the kfd_process in the table and do early return,
12681a346658Sjsg 	 * avoiding double free issues.
12691a346658Sjsg 	 */
12701a346658Sjsg 	mutex_lock(&kfd_processes_mutex);
12711a346658Sjsg 	hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
12721a346658Sjsg 		hash_del_rcu(&p->kfd_processes);
12731a346658Sjsg 		synchronize_srcu(&kfd_processes_srcu);
12741a346658Sjsg 		hlist_add_head(&p->kfd_processes, &cleanup_list);
12751a346658Sjsg 	}
12761a346658Sjsg 	mutex_unlock(&kfd_processes_mutex);
12771a346658Sjsg 
12781a346658Sjsg 	hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes)
12791a346658Sjsg 		kfd_process_notifier_release_internal(p);
12801a346658Sjsg 
12811a346658Sjsg 	/*
12821a346658Sjsg 	 * Ensures that all outstanding free_notifier get called, triggering
12831a346658Sjsg 	 * the release of the kfd_process struct.
12841a346658Sjsg 	 */
12851a346658Sjsg 	mmu_notifier_synchronize();
12861a346658Sjsg }
12871a346658Sjsg 
1288f005ef32Sjsg int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
1289fb4d8502Sjsg {
1290fb4d8502Sjsg 	unsigned long  offset;
12915ca02815Sjsg 	int i;
1292fb4d8502Sjsg 
1293f005ef32Sjsg 	if (p->has_cwsr)
1294f005ef32Sjsg 		return 0;
1295f005ef32Sjsg 
12965ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
1297f005ef32Sjsg 		struct kfd_node *dev = p->pdds[i]->dev;
12985ca02815Sjsg 		struct qcm_process_device *qpd = &p->pdds[i]->qpd;
1299fb4d8502Sjsg 
1300f005ef32Sjsg 		if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
1301fb4d8502Sjsg 			continue;
1302fb4d8502Sjsg 
1303c349dbc7Sjsg 		offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
1304fb4d8502Sjsg 		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
1305fb4d8502Sjsg 			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
1306fb4d8502Sjsg 			MAP_SHARED, offset);
1307fb4d8502Sjsg 
1308fb4d8502Sjsg 		if (IS_ERR_VALUE(qpd->tba_addr)) {
1309fb4d8502Sjsg 			int err = qpd->tba_addr;
1310fb4d8502Sjsg 
1311fb4d8502Sjsg 			pr_err("Failure to set tba address. error %d.\n", err);
1312fb4d8502Sjsg 			qpd->tba_addr = 0;
1313fb4d8502Sjsg 			qpd->cwsr_kaddr = NULL;
1314fb4d8502Sjsg 			return err;
1315fb4d8502Sjsg 		}
1316fb4d8502Sjsg 
1317f005ef32Sjsg 		memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
1318f005ef32Sjsg 
1319f005ef32Sjsg 		kfd_process_set_trap_debug_flag(qpd, p->debug_trap_enabled);
1320fb4d8502Sjsg 
1321fb4d8502Sjsg 		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1322fb4d8502Sjsg 		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1323fb4d8502Sjsg 			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1324fb4d8502Sjsg 	}
1325fb4d8502Sjsg 
1326f005ef32Sjsg 	p->has_cwsr = true;
1327f005ef32Sjsg 
1328fb4d8502Sjsg 	return 0;
1329fb4d8502Sjsg }
1330fb4d8502Sjsg 
1331fb4d8502Sjsg static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
1332fb4d8502Sjsg {
1333f005ef32Sjsg 	struct kfd_node *dev = pdd->dev;
1334fb4d8502Sjsg 	struct qcm_process_device *qpd = &pdd->qpd;
1335c349dbc7Sjsg 	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
1336c349dbc7Sjsg 			| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
1337c349dbc7Sjsg 			| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
13381bb76ff1Sjsg 	struct kgd_mem *mem;
1339fb4d8502Sjsg 	void *kaddr;
1340fb4d8502Sjsg 	int ret;
1341fb4d8502Sjsg 
1342f005ef32Sjsg 	if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
1343fb4d8502Sjsg 		return 0;
1344fb4d8502Sjsg 
1345fb4d8502Sjsg 	/* cwsr_base is only set for dGPU */
1346fb4d8502Sjsg 	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
13471bb76ff1Sjsg 				      KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr);
1348fb4d8502Sjsg 	if (ret)
1349fb4d8502Sjsg 		return ret;
1350fb4d8502Sjsg 
13511bb76ff1Sjsg 	qpd->cwsr_mem = mem;
1352fb4d8502Sjsg 	qpd->cwsr_kaddr = kaddr;
1353fb4d8502Sjsg 	qpd->tba_addr = qpd->cwsr_base;
1354fb4d8502Sjsg 
1355f005ef32Sjsg 	memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
1356f005ef32Sjsg 
1357f005ef32Sjsg 	kfd_process_set_trap_debug_flag(&pdd->qpd,
1358f005ef32Sjsg 					pdd->process->debug_trap_enabled);
1359fb4d8502Sjsg 
1360fb4d8502Sjsg 	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1361fb4d8502Sjsg 	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1362fb4d8502Sjsg 		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1363fb4d8502Sjsg 
1364fb4d8502Sjsg 	return 0;
1365fb4d8502Sjsg }
1366fb4d8502Sjsg 
13671bb76ff1Sjsg static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
13681bb76ff1Sjsg {
1369f005ef32Sjsg 	struct kfd_node *dev = pdd->dev;
13701bb76ff1Sjsg 	struct qcm_process_device *qpd = &pdd->qpd;
13711bb76ff1Sjsg 
1372f005ef32Sjsg 	if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
13731bb76ff1Sjsg 		return;
13741bb76ff1Sjsg 
1375ec0ca080Sjsg 	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
13761bb76ff1Sjsg }
13771bb76ff1Sjsg 
13785ca02815Sjsg void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
13795ca02815Sjsg 				  uint64_t tba_addr,
13805ca02815Sjsg 				  uint64_t tma_addr)
13815ca02815Sjsg {
13825ca02815Sjsg 	if (qpd->cwsr_kaddr) {
13835ca02815Sjsg 		/* KFD trap handler is bound, record as second-level TBA/TMA
13845ca02815Sjsg 		 * in first-level TMA. First-level trap will jump to second.
13855ca02815Sjsg 		 */
13865ca02815Sjsg 		uint64_t *tma =
13875ca02815Sjsg 			(uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
13885ca02815Sjsg 		tma[0] = tba_addr;
13895ca02815Sjsg 		tma[1] = tma_addr;
13905ca02815Sjsg 	} else {
13915ca02815Sjsg 		/* No trap handler bound, bind as first-level TBA/TMA. */
13925ca02815Sjsg 		qpd->tba_addr = tba_addr;
13935ca02815Sjsg 		qpd->tma_addr = tma_addr;
13945ca02815Sjsg 	}
13955ca02815Sjsg }
13965ca02815Sjsg 
13975ca02815Sjsg bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
13985ca02815Sjsg {
13995ca02815Sjsg 	int i;
14005ca02815Sjsg 
14015ca02815Sjsg 	/* On most GFXv9 GPUs, the retry mode in the SQ must match the
14025ca02815Sjsg 	 * boot time retry setting. Mixing processes with different
14035ca02815Sjsg 	 * XNACK/retry settings can hang the GPU.
14045ca02815Sjsg 	 *
14055ca02815Sjsg 	 * Different GPUs can have different noretry settings depending
14065ca02815Sjsg 	 * on HW bugs or limitations. We need to find at least one
14075ca02815Sjsg 	 * XNACK mode for this process that's compatible with all GPUs.
14085ca02815Sjsg 	 * Fortunately GPUs with retry enabled (noretry=0) can run code
14095ca02815Sjsg 	 * built for XNACK-off. On GFXv9 it may perform slower.
14105ca02815Sjsg 	 *
14115ca02815Sjsg 	 * Therefore applications built for XNACK-off can always be
14125ca02815Sjsg 	 * supported and will be our fallback if any GPU does not
14135ca02815Sjsg 	 * support retry.
14145ca02815Sjsg 	 */
14155ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
1416f005ef32Sjsg 		struct kfd_node *dev = p->pdds[i]->dev;
14175ca02815Sjsg 
14185ca02815Sjsg 		/* Only consider GFXv9 and higher GPUs. Older GPUs don't
14195ca02815Sjsg 		 * support the SVM APIs and don't need to be considered
14205ca02815Sjsg 		 * for the XNACK mode selection.
14215ca02815Sjsg 		 */
14221bb76ff1Sjsg 		if (!KFD_IS_SOC15(dev))
14235ca02815Sjsg 			continue;
14245ca02815Sjsg 		/* Aldebaran can always support XNACK because it can support
14255ca02815Sjsg 		 * per-process XNACK mode selection. But let the dev->noretry
14265ca02815Sjsg 		 * setting still influence the default XNACK mode.
14275ca02815Sjsg 		 */
1428f005ef32Sjsg 		if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev))
14295ca02815Sjsg 			continue;
14305ca02815Sjsg 
14315ca02815Sjsg 		/* GFXv10 and later GPUs do not support shader preemption
14325ca02815Sjsg 		 * during page faults. This can lead to poor QoS for queue
14335ca02815Sjsg 		 * management and memory-manager-related preemptions or
14345ca02815Sjsg 		 * even deadlocks.
14355ca02815Sjsg 		 */
14361bb76ff1Sjsg 		if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
14375ca02815Sjsg 			return false;
14385ca02815Sjsg 
1439f005ef32Sjsg 		if (dev->kfd->noretry)
14405ca02815Sjsg 			return false;
14415ca02815Sjsg 	}
14425ca02815Sjsg 
14435ca02815Sjsg 	return true;
14445ca02815Sjsg }
14455ca02815Sjsg 
1446f005ef32Sjsg void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd,
1447f005ef32Sjsg 				     bool enabled)
1448f005ef32Sjsg {
1449f005ef32Sjsg 	if (qpd->cwsr_kaddr) {
1450f005ef32Sjsg 		uint64_t *tma =
1451f005ef32Sjsg 			(uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1452f005ef32Sjsg 		tma[2] = enabled;
1453f005ef32Sjsg 	}
1454f005ef32Sjsg }
1455f005ef32Sjsg 
1456c349dbc7Sjsg /*
1457c349dbc7Sjsg  * On return the kfd_process is fully operational and will be freed when the
1458c349dbc7Sjsg  * mm is released
1459c349dbc7Sjsg  */
1460c349dbc7Sjsg static struct kfd_process *create_process(const struct task_struct *thread)
1461fb4d8502Sjsg {
1462fb4d8502Sjsg 	struct kfd_process *process;
14635ca02815Sjsg 	struct mmu_notifier *mn;
1464fb4d8502Sjsg 	int err = -ENOMEM;
1465fb4d8502Sjsg 
1466fb4d8502Sjsg 	process = kzalloc(sizeof(*process), GFP_KERNEL);
1467fb4d8502Sjsg 	if (!process)
1468fb4d8502Sjsg 		goto err_alloc_process;
1469fb4d8502Sjsg 
1470c349dbc7Sjsg 	kref_init(&process->ref);
1471ad8b1aafSjsg 	mutex_init(&process->mutex);
1472c349dbc7Sjsg 	process->mm = thread->mm;
1473c349dbc7Sjsg 	process->lead_thread = thread->group_leader;
14745ca02815Sjsg 	process->n_pdds = 0;
14751bb76ff1Sjsg 	process->queues_paused = false;
1476c349dbc7Sjsg 	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
1477c349dbc7Sjsg 	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
1478c349dbc7Sjsg 	process->last_restore_timestamp = get_jiffies_64();
14791bb76ff1Sjsg 	err = kfd_event_init_process(process);
14801bb76ff1Sjsg 	if (err)
14811bb76ff1Sjsg 		goto err_event_init;
1482c349dbc7Sjsg 	process->is_32bit_user_mode = in_compat_syscall();
1483f005ef32Sjsg 	process->debug_trap_enabled = false;
1484f005ef32Sjsg 	process->debugger_process = NULL;
1485f005ef32Sjsg 	process->exception_enable_mask = 0;
1486f005ef32Sjsg 	atomic_set(&process->debugged_process_count, 0);
1487f005ef32Sjsg 	sema_init(&process->runtime_enable_sema, 0);
1488c349dbc7Sjsg 
1489fb4d8502Sjsg 	process->pasid = kfd_pasid_alloc();
14901bb76ff1Sjsg 	if (process->pasid == 0) {
14911bb76ff1Sjsg 		err = -ENOSPC;
1492fb4d8502Sjsg 		goto err_alloc_pasid;
14931bb76ff1Sjsg 	}
1494fb4d8502Sjsg 
1495fb4d8502Sjsg 	err = pqm_init(&process->pqm, process);
1496fb4d8502Sjsg 	if (err != 0)
1497fb4d8502Sjsg 		goto err_process_pqm_init;
1498fb4d8502Sjsg 
1499fb4d8502Sjsg 	/* init process apertures*/
1500fb4d8502Sjsg 	err = kfd_init_apertures(process);
1501fb4d8502Sjsg 	if (err != 0)
1502fb4d8502Sjsg 		goto err_init_apertures;
1503fb4d8502Sjsg 
15045ca02815Sjsg 	/* Check XNACK support after PDDs are created in kfd_init_apertures */
15055ca02815Sjsg 	process->xnack_enabled = kfd_process_xnack_mode(process, false);
1506c349dbc7Sjsg 
15075ca02815Sjsg 	err = svm_range_list_init(process);
15085ca02815Sjsg 	if (err)
15095ca02815Sjsg 		goto err_init_svm_range_list;
15105ca02815Sjsg 
15115ca02815Sjsg 	/* alloc_notifier needs to find the process in the hash table */
1512c349dbc7Sjsg 	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
1513c349dbc7Sjsg 			(uintptr_t)process->mm);
1514fb4d8502Sjsg 
15151bb76ff1Sjsg 	/* Avoid free_notifier to start kfd_process_wq_release if
15161bb76ff1Sjsg 	 * mmu_notifier_get failed because of pending signal.
15171bb76ff1Sjsg 	 */
15181bb76ff1Sjsg 	kref_get(&process->ref);
15191bb76ff1Sjsg 
15205ca02815Sjsg 	/* MMU notifier registration must be the last call that can fail
15215ca02815Sjsg 	 * because after this point we cannot unwind the process creation.
15225ca02815Sjsg 	 * After this point, mmu_notifier_put will trigger the cleanup by
15235ca02815Sjsg 	 * dropping the last process reference in the free_notifier.
15245ca02815Sjsg 	 */
15255ca02815Sjsg 	mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
15265ca02815Sjsg 	if (IS_ERR(mn)) {
15275ca02815Sjsg 		err = PTR_ERR(mn);
15285ca02815Sjsg 		goto err_register_notifier;
15295ca02815Sjsg 	}
15305ca02815Sjsg 	BUG_ON(mn != &process->mmu_notifier);
15315ca02815Sjsg 
15321bb76ff1Sjsg 	kfd_unref_process(process);
15335ca02815Sjsg 	get_task_struct(process->lead_thread);
15345ca02815Sjsg 
1535f005ef32Sjsg 	INIT_WORK(&process->debug_event_workarea, debug_event_write_work_handler);
1536f005ef32Sjsg 
1537fb4d8502Sjsg 	return process;
1538fb4d8502Sjsg 
1539c349dbc7Sjsg err_register_notifier:
15405ca02815Sjsg 	hash_del_rcu(&process->kfd_processes);
15415ca02815Sjsg 	svm_range_list_fini(process);
15425ca02815Sjsg err_init_svm_range_list:
1543fb4d8502Sjsg 	kfd_process_free_outstanding_kfd_bos(process);
1544fb4d8502Sjsg 	kfd_process_destroy_pdds(process);
1545fb4d8502Sjsg err_init_apertures:
1546fb4d8502Sjsg 	pqm_uninit(&process->pqm);
1547fb4d8502Sjsg err_process_pqm_init:
1548fb4d8502Sjsg 	kfd_pasid_free(process->pasid);
1549fb4d8502Sjsg err_alloc_pasid:
15501bb76ff1Sjsg 	kfd_event_free_process(process);
15511bb76ff1Sjsg err_event_init:
1552c349dbc7Sjsg 	mutex_destroy(&process->mutex);
1553fb4d8502Sjsg 	kfree(process);
1554fb4d8502Sjsg err_alloc_process:
1555fb4d8502Sjsg 	return ERR_PTR(err);
1556fb4d8502Sjsg }
1557fb4d8502Sjsg 
1558f005ef32Sjsg struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
1559fb4d8502Sjsg 							struct kfd_process *p)
1560fb4d8502Sjsg {
15615ca02815Sjsg 	int i;
1562fb4d8502Sjsg 
15635ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++)
15645ca02815Sjsg 		if (p->pdds[i]->dev == dev)
15655ca02815Sjsg 			return p->pdds[i];
1566fb4d8502Sjsg 
1567fb4d8502Sjsg 	return NULL;
1568fb4d8502Sjsg }
1569fb4d8502Sjsg 
1570f005ef32Sjsg struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
1571fb4d8502Sjsg 							struct kfd_process *p)
1572fb4d8502Sjsg {
1573fb4d8502Sjsg 	struct kfd_process_device *pdd = NULL;
15741bb76ff1Sjsg 	int retval = 0;
1575fb4d8502Sjsg 
15765ca02815Sjsg 	if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
15775ca02815Sjsg 		return NULL;
1578fb4d8502Sjsg 	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
1579fb4d8502Sjsg 	if (!pdd)
1580fb4d8502Sjsg 		return NULL;
1581fb4d8502Sjsg 
1582fb4d8502Sjsg 	pdd->dev = dev;
1583fb4d8502Sjsg 	INIT_LIST_HEAD(&pdd->qpd.queues_list);
1584fb4d8502Sjsg 	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
1585fb4d8502Sjsg 	pdd->qpd.dqm = dev->dqm;
1586fb4d8502Sjsg 	pdd->qpd.pqm = &p->pqm;
1587fb4d8502Sjsg 	pdd->qpd.evicted = 0;
1588ad8b1aafSjsg 	pdd->qpd.mapped_gws_queue = false;
1589fb4d8502Sjsg 	pdd->process = p;
1590fb4d8502Sjsg 	pdd->bound = PDD_UNBOUND;
1591fb4d8502Sjsg 	pdd->already_dequeued = false;
1592c349dbc7Sjsg 	pdd->runtime_inuse = false;
1593f7304f60Sjsg 	atomic64_set(&pdd->vram_usage, 0);
1594ad8b1aafSjsg 	pdd->sdma_past_activity_counter = 0;
15951bb76ff1Sjsg 	pdd->user_gpu_id = dev->id;
1596ad8b1aafSjsg 	atomic64_set(&pdd->evict_duration_counter, 0);
15971bb76ff1Sjsg 
1598f005ef32Sjsg 	if (dev->kfd->shared_resources.enable_mes) {
15991bb76ff1Sjsg 		retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
16001bb76ff1Sjsg 						AMDGPU_MES_PROC_CTX_SIZE,
16011bb76ff1Sjsg 						&pdd->proc_ctx_bo,
16021bb76ff1Sjsg 						&pdd->proc_ctx_gpu_addr,
16031bb76ff1Sjsg 						&pdd->proc_ctx_cpu_ptr,
16041bb76ff1Sjsg 						false);
16051bb76ff1Sjsg 		if (retval) {
16061bb76ff1Sjsg 			pr_err("failed to allocate process context bo\n");
16071bb76ff1Sjsg 			goto err_free_pdd;
16081bb76ff1Sjsg 		}
16091bb76ff1Sjsg 		memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
16101bb76ff1Sjsg 	}
16111bb76ff1Sjsg 
16125ca02815Sjsg 	p->pdds[p->n_pdds++] = pdd;
1613f005ef32Sjsg 	if (kfd_dbg_is_per_vmid_supported(pdd->dev))
1614f005ef32Sjsg 		pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
1615f005ef32Sjsg 							pdd->dev->adev,
1616f005ef32Sjsg 							false,
1617f005ef32Sjsg 							0);
1618fb4d8502Sjsg 
1619fb4d8502Sjsg 	/* Init idr used for memory handle translation */
1620fb4d8502Sjsg 	idr_init(&pdd->alloc_idr);
1621fb4d8502Sjsg 
1622fb4d8502Sjsg 	return pdd;
1623ad8b1aafSjsg 
1624ad8b1aafSjsg err_free_pdd:
1625ad8b1aafSjsg 	kfree(pdd);
1626ad8b1aafSjsg 	return NULL;
1627fb4d8502Sjsg }
1628fb4d8502Sjsg 
1629fb4d8502Sjsg /**
1630fb4d8502Sjsg  * kfd_process_device_init_vm - Initialize a VM for a process-device
1631fb4d8502Sjsg  *
1632fb4d8502Sjsg  * @pdd: The process-device
1633fb4d8502Sjsg  * @drm_file: Optional pointer to a DRM file descriptor
1634fb4d8502Sjsg  *
1635fb4d8502Sjsg  * If @drm_file is specified, it will be used to acquire the VM from
1636fb4d8502Sjsg  * that file descriptor. If successful, the @pdd takes ownership of
1637fb4d8502Sjsg  * the file descriptor.
1638fb4d8502Sjsg  *
1639fb4d8502Sjsg  * If @drm_file is NULL, a new VM is created.
1640fb4d8502Sjsg  *
1641fb4d8502Sjsg  * Returns 0 on success, -errno on failure.
1642fb4d8502Sjsg  */
1643fb4d8502Sjsg int kfd_process_device_init_vm(struct kfd_process_device *pdd,
1644fb4d8502Sjsg 			       struct file *drm_file)
1645fb4d8502Sjsg {
1646f005ef32Sjsg 	struct amdgpu_fpriv *drv_priv;
1647f005ef32Sjsg 	struct amdgpu_vm *avm;
1648fb4d8502Sjsg 	struct kfd_process *p;
1649f005ef32Sjsg 	struct kfd_node *dev;
1650fb4d8502Sjsg 	int ret;
1651fb4d8502Sjsg 
16525ca02815Sjsg 	if (!drm_file)
16535ca02815Sjsg 		return -EINVAL;
16545ca02815Sjsg 
16555ca02815Sjsg 	if (pdd->drm_priv)
16565ca02815Sjsg 		return -EBUSY;
1657fb4d8502Sjsg 
1658f005ef32Sjsg 	ret = amdgpu_file_to_fpriv(drm_file, &drv_priv);
1659f005ef32Sjsg 	if (ret)
1660f005ef32Sjsg 		return ret;
1661f005ef32Sjsg 	avm = &drv_priv->vm;
1662f005ef32Sjsg 
1663fb4d8502Sjsg 	p = pdd->process;
1664fb4d8502Sjsg 	dev = pdd->dev;
1665fb4d8502Sjsg 
1666f005ef32Sjsg 	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
1667c4a5d5d2Sjsg 						     &p->kgd_process_info,
1668c4a5d5d2Sjsg 						     &p->ef);
1669fb4d8502Sjsg 	if (ret) {
1670fb4d8502Sjsg 		pr_err("Failed to create process VM object\n");
1671fb4d8502Sjsg 		return ret;
1672fb4d8502Sjsg 	}
16735ca02815Sjsg 	pdd->drm_priv = drm_file->private_data;
16741bb76ff1Sjsg 	atomic64_set(&pdd->tlb_seq, 0);
1675c349dbc7Sjsg 
1676fb4d8502Sjsg 	ret = kfd_process_device_reserve_ib_mem(pdd);
1677fb4d8502Sjsg 	if (ret)
1678fb4d8502Sjsg 		goto err_reserve_ib_mem;
1679fb4d8502Sjsg 	ret = kfd_process_device_init_cwsr_dgpu(pdd);
1680fb4d8502Sjsg 	if (ret)
1681fb4d8502Sjsg 		goto err_init_cwsr;
1682fb4d8502Sjsg 
1683f005ef32Sjsg 	ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid);
1684c4a5d5d2Sjsg 	if (ret)
1685c4a5d5d2Sjsg 		goto err_set_pasid;
1686c4a5d5d2Sjsg 
1687fb4d8502Sjsg 	pdd->drm_file = drm_file;
1688fb4d8502Sjsg 
1689fb4d8502Sjsg 	return 0;
1690fb4d8502Sjsg 
1691c4a5d5d2Sjsg err_set_pasid:
1692c4a5d5d2Sjsg 	kfd_process_device_destroy_cwsr_dgpu(pdd);
1693fb4d8502Sjsg err_init_cwsr:
1694ec0ca080Sjsg 	kfd_process_device_destroy_ib_mem(pdd);
1695fb4d8502Sjsg err_reserve_ib_mem:
16965ca02815Sjsg 	pdd->drm_priv = NULL;
1697f005ef32Sjsg 	amdgpu_amdkfd_gpuvm_destroy_cb(dev->adev, avm);
1698fb4d8502Sjsg 
1699fb4d8502Sjsg 	return ret;
1700fb4d8502Sjsg }
1701fb4d8502Sjsg 
1702fb4d8502Sjsg /*
1703fb4d8502Sjsg  * Direct the IOMMU to bind the process (specifically the pasid->mm)
1704fb4d8502Sjsg  * to the device.
1705fb4d8502Sjsg  * Unbinding occurs when the process dies or the device is removed.
1706fb4d8502Sjsg  *
1707fb4d8502Sjsg  * Assumes that the process lock is held.
1708fb4d8502Sjsg  */
1709f005ef32Sjsg struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
1710fb4d8502Sjsg 							struct kfd_process *p)
1711fb4d8502Sjsg {
1712fb4d8502Sjsg 	struct kfd_process_device *pdd;
1713fb4d8502Sjsg 	int err;
1714fb4d8502Sjsg 
1715fb4d8502Sjsg 	pdd = kfd_get_process_device_data(dev, p);
1716fb4d8502Sjsg 	if (!pdd) {
1717fb4d8502Sjsg 		pr_err("Process device data doesn't exist\n");
1718fb4d8502Sjsg 		return ERR_PTR(-ENOMEM);
1719fb4d8502Sjsg 	}
1720fb4d8502Sjsg 
17215ca02815Sjsg 	if (!pdd->drm_priv)
17225ca02815Sjsg 		return ERR_PTR(-ENODEV);
17235ca02815Sjsg 
1724c349dbc7Sjsg 	/*
1725c349dbc7Sjsg 	 * signal runtime-pm system to auto resume and prevent
1726c349dbc7Sjsg 	 * further runtime suspend once device pdd is created until
1727c349dbc7Sjsg 	 * pdd is destroyed.
1728c349dbc7Sjsg 	 */
1729c349dbc7Sjsg 	if (!pdd->runtime_inuse) {
1730f005ef32Sjsg 		err = pm_runtime_get_sync(adev_to_drm(dev->adev)->dev);
1731ad8b1aafSjsg 		if (err < 0) {
1732f005ef32Sjsg 			pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev);
1733c349dbc7Sjsg 			return ERR_PTR(err);
1734c349dbc7Sjsg 		}
1735ad8b1aafSjsg 	}
1736c349dbc7Sjsg 
1737c349dbc7Sjsg 	/*
1738c349dbc7Sjsg 	 * make sure that runtime_usage counter is incremented just once
1739c349dbc7Sjsg 	 * per pdd
1740c349dbc7Sjsg 	 */
1741c349dbc7Sjsg 	pdd->runtime_inuse = true;
1742fb4d8502Sjsg 
1743fb4d8502Sjsg 	return pdd;
1744fb4d8502Sjsg }
1745fb4d8502Sjsg 
1746fb4d8502Sjsg /* Create specific handle mapped to mem from process local memory idr
1747fb4d8502Sjsg  * Assumes that the process lock is held.
1748fb4d8502Sjsg  */
1749fb4d8502Sjsg int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
1750fb4d8502Sjsg 					void *mem)
1751fb4d8502Sjsg {
1752fb4d8502Sjsg 	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
1753fb4d8502Sjsg }
1754fb4d8502Sjsg 
1755fb4d8502Sjsg /* Translate specific handle from process local memory idr
1756fb4d8502Sjsg  * Assumes that the process lock is held.
1757fb4d8502Sjsg  */
1758fb4d8502Sjsg void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
1759fb4d8502Sjsg 					int handle)
1760fb4d8502Sjsg {
1761fb4d8502Sjsg 	if (handle < 0)
1762fb4d8502Sjsg 		return NULL;
1763fb4d8502Sjsg 
1764fb4d8502Sjsg 	return idr_find(&pdd->alloc_idr, handle);
1765fb4d8502Sjsg }
1766fb4d8502Sjsg 
1767fb4d8502Sjsg /* Remove specific handle from process local memory idr
1768fb4d8502Sjsg  * Assumes that the process lock is held.
1769fb4d8502Sjsg  */
1770fb4d8502Sjsg void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
1771fb4d8502Sjsg 					int handle)
1772fb4d8502Sjsg {
1773fb4d8502Sjsg 	if (handle >= 0)
1774fb4d8502Sjsg 		idr_remove(&pdd->alloc_idr, handle);
1775fb4d8502Sjsg }
1776fb4d8502Sjsg 
1777fb4d8502Sjsg /* This increments the process->ref counter. */
1778ad8b1aafSjsg struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
1779fb4d8502Sjsg {
1780fb4d8502Sjsg 	struct kfd_process *p, *ret_p = NULL;
1781fb4d8502Sjsg 	unsigned int temp;
1782fb4d8502Sjsg 
1783fb4d8502Sjsg 	int idx = srcu_read_lock(&kfd_processes_srcu);
1784fb4d8502Sjsg 
1785fb4d8502Sjsg 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1786fb4d8502Sjsg 		if (p->pasid == pasid) {
1787fb4d8502Sjsg 			kref_get(&p->ref);
1788fb4d8502Sjsg 			ret_p = p;
1789fb4d8502Sjsg 			break;
1790fb4d8502Sjsg 		}
1791fb4d8502Sjsg 	}
1792fb4d8502Sjsg 
1793fb4d8502Sjsg 	srcu_read_unlock(&kfd_processes_srcu, idx);
1794fb4d8502Sjsg 
1795fb4d8502Sjsg 	return ret_p;
1796fb4d8502Sjsg }
1797fb4d8502Sjsg 
1798fb4d8502Sjsg /* This increments the process->ref counter. */
1799fb4d8502Sjsg struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
1800fb4d8502Sjsg {
1801fb4d8502Sjsg 	struct kfd_process *p;
1802fb4d8502Sjsg 
1803fb4d8502Sjsg 	int idx = srcu_read_lock(&kfd_processes_srcu);
1804fb4d8502Sjsg 
1805fb4d8502Sjsg 	p = find_process_by_mm(mm);
1806fb4d8502Sjsg 	if (p)
1807fb4d8502Sjsg 		kref_get(&p->ref);
1808fb4d8502Sjsg 
1809fb4d8502Sjsg 	srcu_read_unlock(&kfd_processes_srcu, idx);
1810fb4d8502Sjsg 
1811fb4d8502Sjsg 	return p;
1812fb4d8502Sjsg }
1813fb4d8502Sjsg 
1814ad8b1aafSjsg /* kfd_process_evict_queues - Evict all user queues of a process
1815fb4d8502Sjsg  *
1816fb4d8502Sjsg  * Eviction is reference-counted per process-device. This means multiple
1817fb4d8502Sjsg  * evictions from different sources can be nested safely.
1818fb4d8502Sjsg  */
18191bb76ff1Sjsg int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
1820fb4d8502Sjsg {
1821fb4d8502Sjsg 	int r = 0;
18225ca02815Sjsg 	int i;
1823fb4d8502Sjsg 	unsigned int n_evicted = 0;
1824fb4d8502Sjsg 
18255ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
18265ca02815Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
18275ca02815Sjsg 
18281bb76ff1Sjsg 		kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
18291bb76ff1Sjsg 					     trigger);
18301bb76ff1Sjsg 
1831fb4d8502Sjsg 		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
1832fb4d8502Sjsg 							    &pdd->qpd);
18331bb76ff1Sjsg 		/* evict return -EIO if HWS is hang or asic is resetting, in this case
18341bb76ff1Sjsg 		 * we would like to set all the queues to be in evicted state to prevent
18351bb76ff1Sjsg 		 * them been add back since they actually not be saved right now.
18361bb76ff1Sjsg 		 */
18371bb76ff1Sjsg 		if (r && r != -EIO) {
1838fb4d8502Sjsg 			pr_err("Failed to evict process queues\n");
1839fb4d8502Sjsg 			goto fail;
1840fb4d8502Sjsg 		}
1841fb4d8502Sjsg 		n_evicted++;
1842fb4d8502Sjsg 	}
1843fb4d8502Sjsg 
1844fb4d8502Sjsg 	return r;
1845fb4d8502Sjsg 
1846fb4d8502Sjsg fail:
1847fb4d8502Sjsg 	/* To keep state consistent, roll back partial eviction by
1848fb4d8502Sjsg 	 * restoring queues
1849fb4d8502Sjsg 	 */
18505ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
18515ca02815Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
18525ca02815Sjsg 
1853fb4d8502Sjsg 		if (n_evicted == 0)
1854fb4d8502Sjsg 			break;
18551bb76ff1Sjsg 
18561bb76ff1Sjsg 		kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
18571bb76ff1Sjsg 
1858fb4d8502Sjsg 		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1859fb4d8502Sjsg 							      &pdd->qpd))
1860fb4d8502Sjsg 			pr_err("Failed to restore queues\n");
1861fb4d8502Sjsg 
1862fb4d8502Sjsg 		n_evicted--;
1863fb4d8502Sjsg 	}
1864fb4d8502Sjsg 
1865fb4d8502Sjsg 	return r;
1866fb4d8502Sjsg }
1867fb4d8502Sjsg 
1868ad8b1aafSjsg /* kfd_process_restore_queues - Restore all user queues of a process */
1869fb4d8502Sjsg int kfd_process_restore_queues(struct kfd_process *p)
1870fb4d8502Sjsg {
1871fb4d8502Sjsg 	int r, ret = 0;
18725ca02815Sjsg 	int i;
1873fb4d8502Sjsg 
18745ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++) {
18755ca02815Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
18765ca02815Sjsg 
18771bb76ff1Sjsg 		kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
18781bb76ff1Sjsg 
1879fb4d8502Sjsg 		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1880fb4d8502Sjsg 							      &pdd->qpd);
1881fb4d8502Sjsg 		if (r) {
1882fb4d8502Sjsg 			pr_err("Failed to restore process queues\n");
1883fb4d8502Sjsg 			if (!ret)
1884fb4d8502Sjsg 				ret = r;
1885fb4d8502Sjsg 		}
1886fb4d8502Sjsg 	}
1887fb4d8502Sjsg 
1888fb4d8502Sjsg 	return ret;
1889fb4d8502Sjsg }
1890fb4d8502Sjsg 
18915ca02815Sjsg int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
18925ca02815Sjsg {
18935ca02815Sjsg 	int i;
18945ca02815Sjsg 
18955ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++)
18961bb76ff1Sjsg 		if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id)
18975ca02815Sjsg 			return i;
18985ca02815Sjsg 	return -EINVAL;
18995ca02815Sjsg }
19005ca02815Sjsg 
19015ca02815Sjsg int
1902f005ef32Sjsg kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
19035ca02815Sjsg 			    uint32_t *gpuid, uint32_t *gpuidx)
19045ca02815Sjsg {
19055ca02815Sjsg 	int i;
19065ca02815Sjsg 
19075ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++)
1908f005ef32Sjsg 		if (p->pdds[i] && p->pdds[i]->dev == node) {
19091bb76ff1Sjsg 			*gpuid = p->pdds[i]->user_gpu_id;
19105ca02815Sjsg 			*gpuidx = i;
19115ca02815Sjsg 			return 0;
19125ca02815Sjsg 		}
19135ca02815Sjsg 	return -EINVAL;
19145ca02815Sjsg }
19155ca02815Sjsg 
1916fb4d8502Sjsg static void evict_process_worker(struct work_struct *work)
1917fb4d8502Sjsg {
1918fb4d8502Sjsg 	int ret;
1919fb4d8502Sjsg 	struct kfd_process *p;
1920fb4d8502Sjsg 	struct delayed_work *dwork;
1921fb4d8502Sjsg 
1922fb4d8502Sjsg 	dwork = to_delayed_work(work);
1923fb4d8502Sjsg 
1924fb4d8502Sjsg 	/* Process termination destroys this worker thread. So during the
1925fb4d8502Sjsg 	 * lifetime of this thread, kfd_process p will be valid
1926fb4d8502Sjsg 	 */
1927fb4d8502Sjsg 	p = container_of(dwork, struct kfd_process, eviction_work);
1928fb4d8502Sjsg 	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1929fb4d8502Sjsg 		  "Eviction fence mismatch\n");
1930fb4d8502Sjsg 
1931fb4d8502Sjsg 	/* Narrow window of overlap between restore and evict work
1932fb4d8502Sjsg 	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1933fb4d8502Sjsg 	 * unreserves KFD BOs, it is possible to evicted again. But
1934fb4d8502Sjsg 	 * restore has few more steps of finish. So lets wait for any
1935fb4d8502Sjsg 	 * previous restore work to complete
1936fb4d8502Sjsg 	 */
1937fb4d8502Sjsg 	flush_delayed_work(&p->restore_work);
1938fb4d8502Sjsg 
1939c349dbc7Sjsg 	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
19401bb76ff1Sjsg 	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
1941fb4d8502Sjsg 	if (!ret) {
1942fb4d8502Sjsg 		dma_fence_signal(p->ef);
1943fb4d8502Sjsg 		dma_fence_put(p->ef);
1944fb4d8502Sjsg 		p->ef = NULL;
1945fb4d8502Sjsg 		queue_delayed_work(kfd_restore_wq, &p->restore_work,
1946fb4d8502Sjsg 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1947fb4d8502Sjsg 
1948c349dbc7Sjsg 		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1949fb4d8502Sjsg 	} else
1950c349dbc7Sjsg 		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1951fb4d8502Sjsg }
1952fb4d8502Sjsg 
1953fb4d8502Sjsg static void restore_process_worker(struct work_struct *work)
1954fb4d8502Sjsg {
1955fb4d8502Sjsg 	struct delayed_work *dwork;
1956fb4d8502Sjsg 	struct kfd_process *p;
1957fb4d8502Sjsg 	int ret = 0;
1958fb4d8502Sjsg 
1959fb4d8502Sjsg 	dwork = to_delayed_work(work);
1960fb4d8502Sjsg 
1961fb4d8502Sjsg 	/* Process termination destroys this worker thread. So during the
1962fb4d8502Sjsg 	 * lifetime of this thread, kfd_process p will be valid
1963fb4d8502Sjsg 	 */
1964fb4d8502Sjsg 	p = container_of(dwork, struct kfd_process, restore_work);
1965c349dbc7Sjsg 	pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1966fb4d8502Sjsg 
1967fb4d8502Sjsg 	/* Setting last_restore_timestamp before successful restoration.
1968fb4d8502Sjsg 	 * Otherwise this would have to be set by KGD (restore_process_bos)
1969fb4d8502Sjsg 	 * before KFD BOs are unreserved. If not, the process can be evicted
1970fb4d8502Sjsg 	 * again before the timestamp is set.
1971fb4d8502Sjsg 	 * If restore fails, the timestamp will be set again in the next
1972fb4d8502Sjsg 	 * attempt. This would mean that the minimum GPU quanta would be
1973fb4d8502Sjsg 	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1974fb4d8502Sjsg 	 * functions)
1975fb4d8502Sjsg 	 */
1976fb4d8502Sjsg 
1977fb4d8502Sjsg 	p->last_restore_timestamp = get_jiffies_64();
1978f005ef32Sjsg 	/* VMs may not have been acquired yet during debugging. */
1979f005ef32Sjsg 	if (p->kgd_process_info)
1980c349dbc7Sjsg 		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1981fb4d8502Sjsg 							     &p->ef);
1982fb4d8502Sjsg 	if (ret) {
1983c349dbc7Sjsg 		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1984fb4d8502Sjsg 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
1985fb4d8502Sjsg 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1986fb4d8502Sjsg 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1987fb4d8502Sjsg 		WARN(!ret, "reschedule restore work failed\n");
1988fb4d8502Sjsg 		return;
1989fb4d8502Sjsg 	}
1990fb4d8502Sjsg 
1991fb4d8502Sjsg 	ret = kfd_process_restore_queues(p);
1992fb4d8502Sjsg 	if (!ret)
1993c349dbc7Sjsg 		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1994fb4d8502Sjsg 	else
1995c349dbc7Sjsg 		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1996fb4d8502Sjsg }
1997fb4d8502Sjsg 
1998fb4d8502Sjsg void kfd_suspend_all_processes(void)
1999fb4d8502Sjsg {
2000fb4d8502Sjsg 	struct kfd_process *p;
2001fb4d8502Sjsg 	unsigned int temp;
2002fb4d8502Sjsg 	int idx = srcu_read_lock(&kfd_processes_srcu);
2003fb4d8502Sjsg 
2004ad8b1aafSjsg 	WARN(debug_evictions, "Evicting all processes");
2005fb4d8502Sjsg 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
2006fb4d8502Sjsg 		cancel_delayed_work_sync(&p->eviction_work);
2007f005ef32Sjsg 		flush_delayed_work(&p->restore_work);
2008fb4d8502Sjsg 
20091bb76ff1Sjsg 		if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
2010c349dbc7Sjsg 			pr_err("Failed to suspend process 0x%x\n", p->pasid);
2011fb4d8502Sjsg 		dma_fence_signal(p->ef);
2012fb4d8502Sjsg 		dma_fence_put(p->ef);
2013fb4d8502Sjsg 		p->ef = NULL;
2014fb4d8502Sjsg 	}
2015fb4d8502Sjsg 	srcu_read_unlock(&kfd_processes_srcu, idx);
2016fb4d8502Sjsg }
2017fb4d8502Sjsg 
2018fb4d8502Sjsg int kfd_resume_all_processes(void)
2019fb4d8502Sjsg {
2020fb4d8502Sjsg 	struct kfd_process *p;
2021fb4d8502Sjsg 	unsigned int temp;
2022fb4d8502Sjsg 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
2023fb4d8502Sjsg 
2024fb4d8502Sjsg 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
2025fb4d8502Sjsg 		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
2026fb4d8502Sjsg 			pr_err("Restore process %d failed during resume\n",
2027fb4d8502Sjsg 			       p->pasid);
2028fb4d8502Sjsg 			ret = -EFAULT;
2029fb4d8502Sjsg 		}
2030fb4d8502Sjsg 	}
2031fb4d8502Sjsg 	srcu_read_unlock(&kfd_processes_srcu, idx);
2032fb4d8502Sjsg 	return ret;
2033fb4d8502Sjsg }
2034fb4d8502Sjsg 
2035f005ef32Sjsg int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
2036fb4d8502Sjsg 			  struct vm_area_struct *vma)
2037fb4d8502Sjsg {
2038fb4d8502Sjsg 	struct kfd_process_device *pdd;
2039fb4d8502Sjsg 	struct qcm_process_device *qpd;
2040fb4d8502Sjsg 
2041fb4d8502Sjsg 	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
2042fb4d8502Sjsg 		pr_err("Incorrect CWSR mapping size.\n");
2043fb4d8502Sjsg 		return -EINVAL;
2044fb4d8502Sjsg 	}
2045fb4d8502Sjsg 
2046fb4d8502Sjsg 	pdd = kfd_get_process_device_data(dev, process);
2047fb4d8502Sjsg 	if (!pdd)
2048fb4d8502Sjsg 		return -EINVAL;
2049fb4d8502Sjsg 	qpd = &pdd->qpd;
2050fb4d8502Sjsg 
2051fb4d8502Sjsg 	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
2052fb4d8502Sjsg 					get_order(KFD_CWSR_TBA_TMA_SIZE));
2053fb4d8502Sjsg 	if (!qpd->cwsr_kaddr) {
2054fb4d8502Sjsg 		pr_err("Error allocating per process CWSR buffer.\n");
2055fb4d8502Sjsg 		return -ENOMEM;
2056fb4d8502Sjsg 	}
2057fb4d8502Sjsg 
2058f005ef32Sjsg 	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND
2059f005ef32Sjsg 		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
2060fb4d8502Sjsg 	/* Mapping pages to user process */
2061fb4d8502Sjsg 	return remap_pfn_range(vma, vma->vm_start,
2062fb4d8502Sjsg 			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
2063fb4d8502Sjsg 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
2064fb4d8502Sjsg }
2065fb4d8502Sjsg 
20665ca02815Sjsg void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
2067fb4d8502Sjsg {
20681bb76ff1Sjsg 	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
20691bb76ff1Sjsg 	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
2070f005ef32Sjsg 	struct kfd_node *dev = pdd->dev;
2071f005ef32Sjsg 	uint32_t xcc_mask = dev->xcc_mask;
2072f005ef32Sjsg 	int xcc = 0;
2073fb4d8502Sjsg 
20741bb76ff1Sjsg 	/*
20751bb76ff1Sjsg 	 * It can be that we race and lose here, but that is extremely unlikely
20761bb76ff1Sjsg 	 * and the worst thing which could happen is that we flush the changes
20771bb76ff1Sjsg 	 * into the TLB once more which is harmless.
20781bb76ff1Sjsg 	 */
20791bb76ff1Sjsg 	if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
20801bb76ff1Sjsg 		return;
20811bb76ff1Sjsg 
2082fb4d8502Sjsg 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
2083fb4d8502Sjsg 		/* Nothing to flush until a VMID is assigned, which
2084fb4d8502Sjsg 		 * only happens when the first queue is created.
2085fb4d8502Sjsg 		 */
2086fb4d8502Sjsg 		if (pdd->qpd.vmid)
20871bb76ff1Sjsg 			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
2088c349dbc7Sjsg 							pdd->qpd.vmid);
2089fb4d8502Sjsg 	} else {
2090f005ef32Sjsg 		for_each_inst(xcc, xcc_mask)
2091f005ef32Sjsg 			amdgpu_amdkfd_flush_gpu_tlb_pasid(
2092f005ef32Sjsg 				dev->adev, pdd->process->pasid, type, xcc);
2093fb4d8502Sjsg 	}
2094fb4d8502Sjsg }
2095fb4d8502Sjsg 
2096f005ef32Sjsg /* assumes caller holds process lock. */
2097f005ef32Sjsg int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
2098f005ef32Sjsg {
2099f005ef32Sjsg 	uint32_t irq_drain_fence[8];
2100f005ef32Sjsg 	uint8_t node_id = 0;
2101f005ef32Sjsg 	int r = 0;
2102f005ef32Sjsg 
2103f005ef32Sjsg 	if (!KFD_IS_SOC15(pdd->dev))
2104f005ef32Sjsg 		return 0;
2105f005ef32Sjsg 
2106f005ef32Sjsg 	pdd->process->irq_drain_is_open = true;
2107f005ef32Sjsg 
2108f005ef32Sjsg 	memset(irq_drain_fence, 0, sizeof(irq_drain_fence));
2109f005ef32Sjsg 	irq_drain_fence[0] = (KFD_IRQ_FENCE_SOURCEID << 8) |
2110f005ef32Sjsg 							KFD_IRQ_FENCE_CLIENTID;
2111f005ef32Sjsg 	irq_drain_fence[3] = pdd->process->pasid;
2112f005ef32Sjsg 
2113f005ef32Sjsg 	/*
2114f005ef32Sjsg 	 * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
2115f005ef32Sjsg 	 */
2116f005ef32Sjsg 	if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3)) {
2117f005ef32Sjsg 		node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
2118f005ef32Sjsg 		irq_drain_fence[3] |= node_id << 16;
2119f005ef32Sjsg 	}
2120f005ef32Sjsg 
2121f005ef32Sjsg 	/* ensure stale irqs scheduled KFD interrupts and send drain fence. */
2122f005ef32Sjsg 	if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev,
2123f005ef32Sjsg 						     irq_drain_fence)) {
2124f005ef32Sjsg 		pdd->process->irq_drain_is_open = false;
2125f005ef32Sjsg 		return 0;
2126f005ef32Sjsg 	}
2127f005ef32Sjsg 
2128f005ef32Sjsg 	r = wait_event_interruptible(pdd->process->wait_irq_drain,
2129f005ef32Sjsg 				     !READ_ONCE(pdd->process->irq_drain_is_open));
2130f005ef32Sjsg 	if (r)
2131f005ef32Sjsg 		pdd->process->irq_drain_is_open = false;
2132f005ef32Sjsg 
2133f005ef32Sjsg 	return r;
2134f005ef32Sjsg }
2135f005ef32Sjsg 
2136f005ef32Sjsg void kfd_process_close_interrupt_drain(unsigned int pasid)
2137f005ef32Sjsg {
2138f005ef32Sjsg 	struct kfd_process *p;
2139f005ef32Sjsg 
2140f005ef32Sjsg 	p = kfd_lookup_process_by_pasid(pasid);
2141f005ef32Sjsg 
2142f005ef32Sjsg 	if (!p)
2143f005ef32Sjsg 		return;
2144f005ef32Sjsg 
2145f005ef32Sjsg 	WRITE_ONCE(p->irq_drain_is_open, false);
2146f005ef32Sjsg 	wake_up_all(&p->wait_irq_drain);
2147f005ef32Sjsg 	kfd_unref_process(p);
2148f005ef32Sjsg }
2149f005ef32Sjsg 
2150f005ef32Sjsg struct send_exception_work_handler_workarea {
2151f005ef32Sjsg 	struct work_struct work;
2152f005ef32Sjsg 	struct kfd_process *p;
2153f005ef32Sjsg 	unsigned int queue_id;
2154f005ef32Sjsg 	uint64_t error_reason;
2155f005ef32Sjsg };
2156f005ef32Sjsg 
2157f005ef32Sjsg static void send_exception_work_handler(struct work_struct *work)
2158f005ef32Sjsg {
2159f005ef32Sjsg 	struct send_exception_work_handler_workarea *workarea;
2160f005ef32Sjsg 	struct kfd_process *p;
2161f005ef32Sjsg 	struct queue *q;
2162f005ef32Sjsg 	struct mm_struct *mm;
2163f005ef32Sjsg 	struct kfd_context_save_area_header __user *csa_header;
2164f005ef32Sjsg 	uint64_t __user *err_payload_ptr;
2165f005ef32Sjsg 	uint64_t cur_err;
2166f005ef32Sjsg 	uint32_t ev_id;
2167f005ef32Sjsg 
2168f005ef32Sjsg 	workarea = container_of(work,
2169f005ef32Sjsg 				struct send_exception_work_handler_workarea,
2170f005ef32Sjsg 				work);
2171f005ef32Sjsg 	p = workarea->p;
2172f005ef32Sjsg 
2173f005ef32Sjsg 	mm = get_task_mm(p->lead_thread);
2174f005ef32Sjsg 
2175f005ef32Sjsg 	if (!mm)
2176f005ef32Sjsg 		return;
2177f005ef32Sjsg 
2178f005ef32Sjsg 	kthread_use_mm(mm);
2179f005ef32Sjsg 
2180f005ef32Sjsg 	q = pqm_get_user_queue(&p->pqm, workarea->queue_id);
2181f005ef32Sjsg 
2182f005ef32Sjsg 	if (!q)
2183f005ef32Sjsg 		goto out;
2184f005ef32Sjsg 
2185f005ef32Sjsg 	csa_header = (void __user *)q->properties.ctx_save_restore_area_address;
2186f005ef32Sjsg 
2187f005ef32Sjsg 	get_user(err_payload_ptr, (uint64_t __user **)&csa_header->err_payload_addr);
2188f005ef32Sjsg 	get_user(cur_err, err_payload_ptr);
2189f005ef32Sjsg 	cur_err |= workarea->error_reason;
2190f005ef32Sjsg 	put_user(cur_err, err_payload_ptr);
2191f005ef32Sjsg 	get_user(ev_id, &csa_header->err_event_id);
2192f005ef32Sjsg 
2193f005ef32Sjsg 	kfd_set_event(p, ev_id);
2194f005ef32Sjsg 
2195f005ef32Sjsg out:
2196f005ef32Sjsg 	kthread_unuse_mm(mm);
2197f005ef32Sjsg 	mmput(mm);
2198f005ef32Sjsg }
2199f005ef32Sjsg 
2200f005ef32Sjsg int kfd_send_exception_to_runtime(struct kfd_process *p,
2201f005ef32Sjsg 			unsigned int queue_id,
2202f005ef32Sjsg 			uint64_t error_reason)
2203f005ef32Sjsg {
2204f005ef32Sjsg 	struct send_exception_work_handler_workarea worker;
2205f005ef32Sjsg 
2206f005ef32Sjsg 	INIT_WORK_ONSTACK(&worker.work, send_exception_work_handler);
2207f005ef32Sjsg 
2208f005ef32Sjsg 	worker.p = p;
2209f005ef32Sjsg 	worker.queue_id = queue_id;
2210f005ef32Sjsg 	worker.error_reason = error_reason;
2211f005ef32Sjsg 
2212f005ef32Sjsg 	schedule_work(&worker.work);
2213f005ef32Sjsg 	flush_work(&worker.work);
2214f005ef32Sjsg 	destroy_work_on_stack(&worker.work);
2215f005ef32Sjsg 
2216f005ef32Sjsg 	return 0;
2217f005ef32Sjsg }
2218f005ef32Sjsg 
22191bb76ff1Sjsg struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
22201bb76ff1Sjsg {
22211bb76ff1Sjsg 	int i;
22221bb76ff1Sjsg 
22231bb76ff1Sjsg 	if (gpu_id) {
22241bb76ff1Sjsg 		for (i = 0; i < p->n_pdds; i++) {
22251bb76ff1Sjsg 			struct kfd_process_device *pdd = p->pdds[i];
22261bb76ff1Sjsg 
22271bb76ff1Sjsg 			if (pdd->user_gpu_id == gpu_id)
22281bb76ff1Sjsg 				return pdd;
22291bb76ff1Sjsg 		}
22301bb76ff1Sjsg 	}
22311bb76ff1Sjsg 	return NULL;
22321bb76ff1Sjsg }
22331bb76ff1Sjsg 
22341bb76ff1Sjsg int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id)
22351bb76ff1Sjsg {
22361bb76ff1Sjsg 	int i;
22371bb76ff1Sjsg 
22381bb76ff1Sjsg 	if (!actual_gpu_id)
22391bb76ff1Sjsg 		return 0;
22401bb76ff1Sjsg 
22411bb76ff1Sjsg 	for (i = 0; i < p->n_pdds; i++) {
22421bb76ff1Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
22431bb76ff1Sjsg 
22441bb76ff1Sjsg 		if (pdd->dev->id == actual_gpu_id)
22451bb76ff1Sjsg 			return pdd->user_gpu_id;
22461bb76ff1Sjsg 	}
22471bb76ff1Sjsg 	return -EINVAL;
22481bb76ff1Sjsg }
22491bb76ff1Sjsg 
2250fb4d8502Sjsg #if defined(CONFIG_DEBUG_FS)
2251fb4d8502Sjsg 
2252fb4d8502Sjsg int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
2253fb4d8502Sjsg {
2254fb4d8502Sjsg 	struct kfd_process *p;
2255fb4d8502Sjsg 	unsigned int temp;
2256fb4d8502Sjsg 	int r = 0;
2257fb4d8502Sjsg 
2258fb4d8502Sjsg 	int idx = srcu_read_lock(&kfd_processes_srcu);
2259fb4d8502Sjsg 
2260fb4d8502Sjsg 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
2261c349dbc7Sjsg 		seq_printf(m, "Process %d PASID 0x%x:\n",
2262fb4d8502Sjsg 			   p->lead_thread->tgid, p->pasid);
2263fb4d8502Sjsg 
2264fb4d8502Sjsg 		mutex_lock(&p->mutex);
2265fb4d8502Sjsg 		r = pqm_debugfs_mqds(m, &p->pqm);
2266fb4d8502Sjsg 		mutex_unlock(&p->mutex);
2267fb4d8502Sjsg 
2268fb4d8502Sjsg 		if (r)
2269fb4d8502Sjsg 			break;
2270fb4d8502Sjsg 	}
2271fb4d8502Sjsg 
2272fb4d8502Sjsg 	srcu_read_unlock(&kfd_processes_srcu, idx);
2273fb4d8502Sjsg 
2274fb4d8502Sjsg 	return r;
2275fb4d8502Sjsg }
2276fb4d8502Sjsg 
2277fb4d8502Sjsg #endif
2278