xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/i915/gem/i915_gem_userptr.c (revision 2a8c33eaff5adddac3ef2c5cb48ee67ef6d5d6dc)
1 /*	$NetBSD: i915_gem_userptr.c,v 1.5 2021/12/19 12:32:15 riastradh Exp $	*/
2 
3 /*
4  * SPDX-License-Identifier: MIT
5  *
6  * Copyright © 2012-2014 Intel Corporation
7  */
8 
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: i915_gem_userptr.c,v 1.5 2021/12/19 12:32:15 riastradh Exp $");
11 
12 #include <linux/mmu_context.h>
13 #include <linux/mmu_notifier.h>
14 #include <linux/mempolicy.h>
15 #include <linux/swap.h>
16 #include <linux/sched/mm.h>
17 
18 #include <drm/i915_drm.h>
19 
20 #include "i915_drv.h"
21 #include "i915_gem_ioctls.h"
22 #include "i915_gem_object.h"
23 #include "i915_scatterlist.h"
24 
25 #include <linux/nbsd-namespace.h>
26 
27 struct i915_mm_struct {
28 #ifdef __NetBSD__
29 	struct vmspace *mm;
30 #else
31 	struct mm_struct *mm;
32 #endif
33 	struct drm_i915_private *i915;
34 	struct i915_mmu_notifier *mn;
35 	struct hlist_node node;
36 	struct kref kref;
37 	struct work_struct work;
38 };
39 
40 #if defined(CONFIG_MMU_NOTIFIER)
41 #include <linux/interval_tree.h>
42 
43 struct i915_mmu_notifier {
44 	spinlock_t lock;
45 	struct hlist_node node;
46 	struct mmu_notifier mn;
47 	struct rb_root_cached objects;
48 	struct i915_mm_struct *mm;
49 };
50 
51 struct i915_mmu_object {
52 	struct i915_mmu_notifier *mn;
53 	struct drm_i915_gem_object *obj;
54 	struct interval_tree_node it;
55 };
56 
add_object(struct i915_mmu_object * mo)57 static void add_object(struct i915_mmu_object *mo)
58 {
59 	GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
60 	interval_tree_insert(&mo->it, &mo->mn->objects);
61 }
62 
del_object(struct i915_mmu_object * mo)63 static void del_object(struct i915_mmu_object *mo)
64 {
65 	if (RB_EMPTY_NODE(&mo->it.rb))
66 		return;
67 
68 	interval_tree_remove(&mo->it, &mo->mn->objects);
69 	RB_CLEAR_NODE(&mo->it.rb);
70 }
71 
72 static void
__i915_gem_userptr_set_active(struct drm_i915_gem_object * obj,bool value)73 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
74 {
75 	struct i915_mmu_object *mo = obj->userptr.mmu_object;
76 
77 	/*
78 	 * During mm_invalidate_range we need to cancel any userptr that
79 	 * overlaps the range being invalidated. Doing so requires the
80 	 * struct_mutex, and that risks recursion. In order to cause
81 	 * recursion, the user must alias the userptr address space with
82 	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
83 	 * to invalidate that mmaping, mm_invalidate_range is called with
84 	 * the userptr address *and* the struct_mutex held.  To prevent that
85 	 * we set a flag under the i915_mmu_notifier spinlock to indicate
86 	 * whether this object is valid.
87 	 */
88 	if (!mo)
89 		return;
90 
91 	spin_lock(&mo->mn->lock);
92 	if (value)
93 		add_object(mo);
94 	else
95 		del_object(mo);
96 	spin_unlock(&mo->mn->lock);
97 }
98 
99 static int
userptr_mn_invalidate_range_start(struct mmu_notifier * _mn,const struct mmu_notifier_range * range)100 userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
101 				  const struct mmu_notifier_range *range)
102 {
103 	struct i915_mmu_notifier *mn =
104 		container_of(_mn, struct i915_mmu_notifier, mn);
105 	struct interval_tree_node *it;
106 	unsigned long end;
107 	int ret = 0;
108 
109 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
110 		return 0;
111 
112 	/* interval ranges are inclusive, but invalidate range is exclusive */
113 	end = range->end - 1;
114 
115 	spin_lock(&mn->lock);
116 	it = interval_tree_iter_first(&mn->objects, range->start, end);
117 	while (it) {
118 		struct drm_i915_gem_object *obj;
119 
120 		if (!mmu_notifier_range_blockable(range)) {
121 			ret = -EAGAIN;
122 			break;
123 		}
124 
125 		/*
126 		 * The mmu_object is released late when destroying the
127 		 * GEM object so it is entirely possible to gain a
128 		 * reference on an object in the process of being freed
129 		 * since our serialisation is via the spinlock and not
130 		 * the struct_mutex - and consequently use it after it
131 		 * is freed and then double free it. To prevent that
132 		 * use-after-free we only acquire a reference on the
133 		 * object if it is not in the process of being destroyed.
134 		 */
135 		obj = container_of(it, struct i915_mmu_object, it)->obj;
136 		if (!kref_get_unless_zero(&obj->base.refcount)) {
137 			it = interval_tree_iter_next(it, range->start, end);
138 			continue;
139 		}
140 		spin_unlock(&mn->lock);
141 
142 		ret = i915_gem_object_unbind(obj,
143 					     I915_GEM_OBJECT_UNBIND_ACTIVE |
144 					     I915_GEM_OBJECT_UNBIND_BARRIER);
145 		if (ret == 0)
146 			ret = __i915_gem_object_put_pages(obj);
147 		i915_gem_object_put(obj);
148 		if (ret)
149 			return ret;
150 
151 		spin_lock(&mn->lock);
152 
153 		/*
154 		 * As we do not (yet) protect the mmu from concurrent insertion
155 		 * over this range, there is no guarantee that this search will
156 		 * terminate given a pathologic workload.
157 		 */
158 		it = interval_tree_iter_first(&mn->objects, range->start, end);
159 	}
160 	spin_unlock(&mn->lock);
161 
162 	return ret;
163 
164 }
165 
166 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
167 	.invalidate_range_start = userptr_mn_invalidate_range_start,
168 };
169 
170 static struct i915_mmu_notifier *
i915_mmu_notifier_create(struct i915_mm_struct * mm)171 i915_mmu_notifier_create(struct i915_mm_struct *mm)
172 {
173 	struct i915_mmu_notifier *mn;
174 
175 	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
176 	if (mn == NULL)
177 		return ERR_PTR(-ENOMEM);
178 
179 	spin_lock_init(&mn->lock);
180 	mn->mn.ops = &i915_gem_userptr_notifier;
181 	mn->objects = RB_ROOT_CACHED;
182 	mn->mm = mm;
183 
184 	return mn;
185 }
186 
187 static void
i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object * obj)188 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
189 {
190 	struct i915_mmu_object *mo;
191 
192 	mo = fetch_and_zero(&obj->userptr.mmu_object);
193 	if (!mo)
194 		return;
195 
196 	spin_lock(&mo->mn->lock);
197 	del_object(mo);
198 	spin_unlock(&mo->mn->lock);
199 	kfree(mo);
200 }
201 
202 static struct i915_mmu_notifier *
i915_mmu_notifier_find(struct i915_mm_struct * mm)203 i915_mmu_notifier_find(struct i915_mm_struct *mm)
204 {
205 	struct i915_mmu_notifier *mn;
206 	int err = 0;
207 
208 	mn = mm->mn;
209 	if (mn)
210 		return mn;
211 
212 	mn = i915_mmu_notifier_create(mm);
213 	if (IS_ERR(mn))
214 		err = PTR_ERR(mn);
215 
216 	down_write(&mm->mm->mmap_sem);
217 	mutex_lock(&mm->i915->mm_lock);
218 	if (mm->mn == NULL && !err) {
219 		/* Protected by mmap_sem (write-lock) */
220 		err = __mmu_notifier_register(&mn->mn, mm->mm);
221 		if (!err) {
222 			/* Protected by mm_lock */
223 			mm->mn = fetch_and_zero(&mn);
224 		}
225 	} else if (mm->mn) {
226 		/*
227 		 * Someone else raced and successfully installed the mmu
228 		 * notifier, we can cancel our own errors.
229 		 */
230 		err = 0;
231 	}
232 	mutex_unlock(&mm->i915->mm_lock);
233 	up_write(&mm->mm->mmap_sem);
234 
235 	if (mn && !IS_ERR(mn)) {
236 		spin_lock_destroy(&mn->lock);
237 		kfree(mn);
238 	}
239 
240 	return err ? ERR_PTR(err) : mm->mn;
241 }
242 
243 static int
i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object * obj,unsigned flags)244 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
245 				    unsigned flags)
246 {
247 	struct i915_mmu_notifier *mn;
248 	struct i915_mmu_object *mo;
249 
250 	if (flags & I915_USERPTR_UNSYNCHRONIZED)
251 		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
252 
253 	if (WARN_ON(obj->userptr.mm == NULL))
254 		return -EINVAL;
255 
256 	mn = i915_mmu_notifier_find(obj->userptr.mm);
257 	if (IS_ERR(mn))
258 		return PTR_ERR(mn);
259 
260 	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
261 	if (!mo)
262 		return -ENOMEM;
263 
264 	mo->mn = mn;
265 	mo->obj = obj;
266 	mo->it.start = obj->userptr.ptr;
267 	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
268 	RB_CLEAR_NODE(&mo->it.rb);
269 
270 	obj->userptr.mmu_object = mo;
271 	return 0;
272 }
273 
274 static void
275 #ifdef __NetBSD__
i915_mmu_notifier_free(struct i915_mmu_notifier * mn,struct vmspace * mm)276 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
277 		       struct vmspace *mm)
278 #else
279 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
280 		       struct mm_struct *mm)
281 #endif
282 {
283 	if (mn == NULL)
284 		return;
285 
286 	mmu_notifier_unregister(&mn->mn, mm);
287 	spin_lock_destroy(&mn->lock);
288 	kfree(mn);
289 }
290 
291 #else
292 
293 static void
__i915_gem_userptr_set_active(struct drm_i915_gem_object * obj,bool value)294 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
295 {
296 }
297 
298 static void
i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object * obj)299 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
300 {
301 }
302 
303 static int
i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object * obj,unsigned flags)304 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
305 				    unsigned flags)
306 {
307 	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
308 		return -ENODEV;
309 
310 	if (!capable(CAP_SYS_ADMIN))
311 		return -EPERM;
312 
313 	return 0;
314 }
315 
316 static void
317 #ifdef __NetBSD__
i915_mmu_notifier_free(struct i915_mmu_notifier * mn,struct vmspace * mm)318 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
319 		       struct vmspace *mm)
320 #else
321 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
322 		       struct mm_struct *mm)
323 #endif
324 {
325 }
326 
327 #endif
328 
329 static struct i915_mm_struct *
330 #ifdef __NetBSD__
__i915_mm_struct_find(struct drm_i915_private * dev_priv,struct vmspace * real)331 __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct vmspace *real)
332 #else
333 __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
334 #endif
335 {
336 	struct i915_mm_struct *mm;
337 
338 	/* Protected by dev_priv->mm_lock */
339 	hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
340 		if (mm->mm == real)
341 			return mm;
342 
343 	return NULL;
344 }
345 
346 static int
i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object * obj)347 i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
348 {
349 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
350 	struct i915_mm_struct *mm;
351 	int ret = 0;
352 
353 	/* During release of the GEM object we hold the struct_mutex. This
354 	 * precludes us from calling mmput() at that time as that may be
355 	 * the last reference and so call exit_mmap(). exit_mmap() will
356 	 * attempt to reap the vma, and if we were holding a GTT mmap
357 	 * would then call drm_gem_vm_close() and attempt to reacquire
358 	 * the struct mutex. So in order to avoid that recursion, we have
359 	 * to defer releasing the mm reference until after we drop the
360 	 * struct_mutex, i.e. we need to schedule a worker to do the clean
361 	 * up.
362 	 */
363 	mutex_lock(&dev_priv->mm_lock);
364 #ifdef __NetBSD__
365 	mm = __i915_mm_struct_find(dev_priv, curproc->p_vmspace);
366 #else
367 	mm = __i915_mm_struct_find(dev_priv, current->mm);
368 #endif
369 	if (mm == NULL) {
370 		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
371 		if (mm == NULL) {
372 			ret = -ENOMEM;
373 			goto out;
374 		}
375 
376 		kref_init(&mm->kref);
377 		mm->i915 = to_i915(obj->base.dev);
378 
379 #ifdef __NetBSD__
380 		mm->mm = curproc->p_vmspace;
381 #else
382 		mm->mm = current->mm;
383 #endif
384 		mmgrab(mm->mm);
385 
386 		mm->mn = NULL;
387 
388 		/* Protected by dev_priv->mm_lock */
389 		hash_add(dev_priv->mm_structs,
390 			 &mm->node, (unsigned long)mm->mm);
391 	} else
392 		kref_get(&mm->kref);
393 
394 	obj->userptr.mm = mm;
395 out:
396 	mutex_unlock(&dev_priv->mm_lock);
397 	return ret;
398 }
399 
400 static void
__i915_mm_struct_free__worker(struct work_struct * work)401 __i915_mm_struct_free__worker(struct work_struct *work)
402 {
403 	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
404 	i915_mmu_notifier_free(mm->mn, mm->mm);
405 	mmdrop(mm->mm);
406 	kfree(mm);
407 }
408 
409 static void
__i915_mm_struct_free(struct kref * kref)410 __i915_mm_struct_free(struct kref *kref)
411 {
412 	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
413 
414 	/* Protected by dev_priv->mm_lock */
415 	hash_del(&mm->node);
416 	mutex_unlock(&mm->i915->mm_lock);
417 
418 	INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
419 	queue_work(mm->i915->mm.userptr_wq, &mm->work);
420 }
421 
422 static void
i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object * obj)423 i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
424 {
425 	if (obj->userptr.mm == NULL)
426 		return;
427 
428 	kref_put_mutex(&obj->userptr.mm->kref,
429 		       __i915_mm_struct_free,
430 		       &to_i915(obj->base.dev)->mm_lock);
431 	obj->userptr.mm = NULL;
432 }
433 
434 struct get_pages_work {
435 	struct work_struct work;
436 	struct drm_i915_gem_object *obj;
437 	struct task_struct *task;
438 };
439 
440 static struct sg_table *
__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object * obj,struct page ** pvec,unsigned long num_pages)441 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
442 			       struct page **pvec, unsigned long num_pages)
443 {
444 	unsigned int max_segment = i915_sg_segment_size();
445 	struct sg_table *st;
446 	unsigned int sg_page_sizes;
447 	int ret;
448 
449 	st = kmalloc(sizeof(*st), GFP_KERNEL);
450 	if (!st)
451 		return ERR_PTR(-ENOMEM);
452 
453 alloc_table:
454 	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
455 					  0, num_pages << PAGE_SHIFT,
456 					  max_segment,
457 					  GFP_KERNEL);
458 	if (ret) {
459 		kfree(st);
460 		return ERR_PTR(ret);
461 	}
462 
463 	ret = i915_gem_gtt_prepare_pages(obj, st);
464 	if (ret) {
465 		sg_free_table(st);
466 
467 		if (max_segment > PAGE_SIZE) {
468 			max_segment = PAGE_SIZE;
469 			goto alloc_table;
470 		}
471 
472 		kfree(st);
473 		return ERR_PTR(ret);
474 	}
475 
476 	sg_page_sizes = i915_sg_page_sizes(st->sgl);
477 
478 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
479 
480 	return st;
481 }
482 
483 static void
__i915_gem_userptr_get_pages_worker(struct work_struct * _work)484 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
485 {
486 	struct get_pages_work *work = container_of(_work, typeof(*work), work);
487 	struct drm_i915_gem_object *obj = work->obj;
488 	const unsigned long npages = obj->base.size >> PAGE_SHIFT;
489 	unsigned long pinned;
490 	struct page **pvec;
491 	int ret;
492 
493 	ret = -ENOMEM;
494 	pinned = 0;
495 
496 	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
497 	if (pvec != NULL) {
498 #ifdef __NetBSD__
499 		struct vmspace *mm = obj->userptr.mm->mm;
500 #else
501 		struct mm_struct *mm = obj->userptr.mm->mm;
502 #endif
503 		unsigned int flags = 0;
504 		int locked = 0;
505 
506 		if (!i915_gem_object_is_readonly(obj))
507 			flags |= FOLL_WRITE;
508 
509 		ret = -EFAULT;
510 		if (mmget_not_zero(mm)) {
511 			while (pinned < npages) {
512 				if (!locked) {
513 					down_read(&mm->mmap_sem);
514 					locked = 1;
515 				}
516 				ret = get_user_pages_remote
517 					(work->task, mm,
518 					 obj->userptr.ptr + pinned * PAGE_SIZE,
519 					 npages - pinned,
520 					 flags,
521 					 pvec + pinned, NULL, &locked);
522 				if (ret < 0)
523 					break;
524 
525 				pinned += ret;
526 			}
527 			if (locked)
528 				up_read(&mm->mmap_sem);
529 			mmput(mm);
530 		}
531 	}
532 
533 	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
534 	if (obj->userptr.work == &work->work) {
535 		struct sg_table *pages = ERR_PTR(ret);
536 
537 		if (pinned == npages) {
538 			pages = __i915_gem_userptr_alloc_pages(obj, pvec,
539 							       npages);
540 			if (!IS_ERR(pages)) {
541 				pinned = 0;
542 				pages = NULL;
543 			}
544 		}
545 
546 		obj->userptr.work = ERR_CAST(pages);
547 		if (IS_ERR(pages))
548 			__i915_gem_userptr_set_active(obj, false);
549 	}
550 	mutex_unlock(&obj->mm.lock);
551 
552 	release_pages(pvec, pinned);
553 	kvfree(pvec);
554 
555 	i915_gem_object_put(obj);
556 	put_task_struct(work->task);
557 	kfree(work);
558 }
559 
560 static struct sg_table *
__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object * obj)561 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
562 {
563 	struct get_pages_work *work;
564 
565 	/* Spawn a worker so that we can acquire the
566 	 * user pages without holding our mutex. Access
567 	 * to the user pages requires mmap_sem, and we have
568 	 * a strict lock ordering of mmap_sem, struct_mutex -
569 	 * we already hold struct_mutex here and so cannot
570 	 * call gup without encountering a lock inversion.
571 	 *
572 	 * Userspace will keep on repeating the operation
573 	 * (thanks to EAGAIN) until either we hit the fast
574 	 * path or the worker completes. If the worker is
575 	 * cancelled or superseded, the task is still run
576 	 * but the results ignored. (This leads to
577 	 * complications that we may have a stray object
578 	 * refcount that we need to be wary of when
579 	 * checking for existing objects during creation.)
580 	 * If the worker encounters an error, it reports
581 	 * that error back to this function through
582 	 * obj->userptr.work = ERR_PTR.
583 	 */
584 	work = kmalloc(sizeof(*work), GFP_KERNEL);
585 	if (work == NULL)
586 		return ERR_PTR(-ENOMEM);
587 
588 	obj->userptr.work = &work->work;
589 
590 	work->obj = i915_gem_object_get(obj);
591 
592 	work->task = current;
593 	get_task_struct(work->task);
594 
595 	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
596 	queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
597 
598 	return ERR_PTR(-EAGAIN);
599 }
600 
i915_gem_userptr_get_pages(struct drm_i915_gem_object * obj)601 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
602 {
603 	const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
604 #ifdef __NetBSD__
605 	struct vmspace *mm = obj->userptr.mm->mm;
606 #else
607 	struct mm_struct *mm = obj->userptr.mm->mm;
608 #endif
609 	struct page **pvec;
610 	struct sg_table *pages;
611 	bool active;
612 	int pinned;
613 
614 	/* If userspace should engineer that these pages are replaced in
615 	 * the vma between us binding this page into the GTT and completion
616 	 * of rendering... Their loss. If they change the mapping of their
617 	 * pages they need to create a new bo to point to the new vma.
618 	 *
619 	 * However, that still leaves open the possibility of the vma
620 	 * being copied upon fork. Which falls under the same userspace
621 	 * synchronisation issue as a regular bo, except that this time
622 	 * the process may not be expecting that a particular piece of
623 	 * memory is tied to the GPU.
624 	 *
625 	 * Fortunately, we can hook into the mmu_notifier in order to
626 	 * discard the page references prior to anything nasty happening
627 	 * to the vma (discard or cloning) which should prevent the more
628 	 * egregious cases from causing harm.
629 	 */
630 
631 	if (obj->userptr.work) {
632 		/* active flag should still be held for the pending work */
633 		if (IS_ERR(obj->userptr.work))
634 			return PTR_ERR(obj->userptr.work);
635 		else
636 			return -EAGAIN;
637 	}
638 
639 	pvec = NULL;
640 	pinned = 0;
641 
642 #ifdef __NetBSD__
643 	if (mm == curproc->p_vmspace)
644 #else
645 	if (mm == current->mm)
646 #endif
647 	{
648 		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
649 				      GFP_KERNEL |
650 				      __GFP_NORETRY |
651 				      __GFP_NOWARN);
652 		if (pvec) /* defer to worker if malloc fails */
653 			pinned = __get_user_pages_fast(obj->userptr.ptr,
654 						       num_pages,
655 						       !i915_gem_object_is_readonly(obj),
656 						       pvec);
657 	}
658 
659 	active = false;
660 	if (pinned < 0) {
661 		pages = ERR_PTR(pinned);
662 		pinned = 0;
663 	} else if (pinned < num_pages) {
664 		pages = __i915_gem_userptr_get_pages_schedule(obj);
665 		active = pages == ERR_PTR(-EAGAIN);
666 	} else {
667 		pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
668 		active = !IS_ERR(pages);
669 	}
670 	if (active)
671 		__i915_gem_userptr_set_active(obj, true);
672 
673 	if (IS_ERR(pages))
674 		release_pages(pvec, pinned);
675 	kvfree(pvec);
676 
677 	return PTR_ERR_OR_ZERO(pages);
678 }
679 
680 static void
i915_gem_userptr_put_pages(struct drm_i915_gem_object * obj,struct sg_table * pages)681 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
682 			   struct sg_table *pages)
683 {
684 	struct sgt_iter sgt_iter;
685 	struct page *page;
686 
687 	/* Cancel any inflight work and force them to restart their gup */
688 	obj->userptr.work = NULL;
689 	__i915_gem_userptr_set_active(obj, false);
690 	if (!pages)
691 		return;
692 
693 	__i915_gem_object_release_shmem(obj, pages, true);
694 	i915_gem_gtt_finish_pages(obj, pages);
695 
696 	/*
697 	 * We always mark objects as dirty when they are used by the GPU,
698 	 * just in case. However, if we set the vma as being read-only we know
699 	 * that the object will never have been written to.
700 	 */
701 	if (i915_gem_object_is_readonly(obj))
702 		obj->mm.dirty = false;
703 
704 	for_each_sgt_page(page, sgt_iter, pages) {
705 		if (obj->mm.dirty && trylock_page(page)) {
706 			/*
707 			 * As this may not be anonymous memory (e.g. shmem)
708 			 * but exist on a real mapping, we have to lock
709 			 * the page in order to dirty it -- holding
710 			 * the page reference is not sufficient to
711 			 * prevent the inode from being truncated.
712 			 * Play safe and take the lock.
713 			 *
714 			 * However...!
715 			 *
716 			 * The mmu-notifier can be invalidated for a
717 			 * migrate_page, that is alreadying holding the lock
718 			 * on the page. Such a try_to_unmap() will result
719 			 * in us calling put_pages() and so recursively try
720 			 * to lock the page. We avoid that deadlock with
721 			 * a trylock_page() and in exchange we risk missing
722 			 * some page dirtying.
723 			 */
724 			set_page_dirty(page);
725 			unlock_page(page);
726 		}
727 
728 		mark_page_accessed(page);
729 		put_page(page);
730 	}
731 	obj->mm.dirty = false;
732 
733 	sg_free_table(pages);
734 	kfree(pages);
735 }
736 
737 static void
i915_gem_userptr_release(struct drm_i915_gem_object * obj)738 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
739 {
740 	i915_gem_userptr_release__mmu_notifier(obj);
741 	i915_gem_userptr_release__mm_struct(obj);
742 }
743 
744 static int
i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object * obj)745 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
746 {
747 	if (obj->userptr.mmu_object)
748 		return 0;
749 
750 	return i915_gem_userptr_init__mmu_notifier(obj, 0);
751 }
752 
753 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
754 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
755 		 I915_GEM_OBJECT_IS_SHRINKABLE |
756 		 I915_GEM_OBJECT_NO_GGTT |
757 		 I915_GEM_OBJECT_ASYNC_CANCEL,
758 	.get_pages = i915_gem_userptr_get_pages,
759 	.put_pages = i915_gem_userptr_put_pages,
760 	.dmabuf_export = i915_gem_userptr_dmabuf_export,
761 	.release = i915_gem_userptr_release,
762 };
763 
764 /*
765  * Creates a new mm object that wraps some normal memory from the process
766  * context - user memory.
767  *
768  * We impose several restrictions upon the memory being mapped
769  * into the GPU.
770  * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
771  * 2. It must be normal system memory, not a pointer into another map of IO
772  *    space (e.g. it must not be a GTT mmapping of another object).
773  * 3. We only allow a bo as large as we could in theory map into the GTT,
774  *    that is we limit the size to the total size of the GTT.
775  * 4. The bo is marked as being snoopable. The backing pages are left
776  *    accessible directly by the CPU, but reads and writes by the GPU may
777  *    incur the cost of a snoop (unless you have an LLC architecture).
778  *
779  * Synchronisation between multiple users and the GPU is left to userspace
780  * through the normal set-domain-ioctl. The kernel will enforce that the
781  * GPU relinquishes the VMA before it is returned back to the system
782  * i.e. upon free(), munmap() or process termination. However, the userspace
783  * malloc() library may not immediately relinquish the VMA after free() and
784  * instead reuse it whilst the GPU is still reading and writing to the VMA.
785  * Caveat emptor.
786  *
787  * Also note, that the object created here is not currently a "first class"
788  * object, in that several ioctls are banned. These are the CPU access
789  * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
790  * direct access via your pointer rather than use those ioctls. Another
791  * restriction is that we do not allow userptr surfaces to be pinned to the
792  * hardware and so we reject any attempt to create a framebuffer out of a
793  * userptr.
794  *
795  * If you think this is a good interface to use to pass GPU memory between
796  * drivers, please use dma-buf instead. In fact, wherever possible use
797  * dma-buf instead.
798  */
799 int
i915_gem_userptr_ioctl(struct drm_device * dev,void * data,struct drm_file * file)800 i915_gem_userptr_ioctl(struct drm_device *dev,
801 		       void *data,
802 		       struct drm_file *file)
803 {
804 	static struct lock_class_key lock_class;
805 	struct drm_i915_private *dev_priv = to_i915(dev);
806 	struct drm_i915_gem_userptr *args = data;
807 	struct drm_i915_gem_object *obj;
808 	int ret;
809 	u32 handle;
810 
811 	if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
812 		/* We cannot support coherent userptr objects on hw without
813 		 * LLC and broken snooping.
814 		 */
815 		return -ENODEV;
816 	}
817 
818 	if (args->flags & ~(I915_USERPTR_READ_ONLY |
819 			    I915_USERPTR_UNSYNCHRONIZED))
820 		return -EINVAL;
821 
822 	if (!args->user_size)
823 		return -EINVAL;
824 
825 	if (offset_in_page(args->user_ptr | args->user_size))
826 		return -EINVAL;
827 
828 	if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
829 		return -EFAULT;
830 
831 	if (args->flags & I915_USERPTR_READ_ONLY) {
832 		/*
833 		 * On almost all of the older hw, we cannot tell the GPU that
834 		 * a page is readonly.
835 		 */
836 		if (!dev_priv->gt.vm->has_read_only)
837 			return -ENODEV;
838 	}
839 
840 	obj = i915_gem_object_alloc();
841 	if (obj == NULL)
842 		return -ENOMEM;
843 
844 	drm_gem_private_object_init(dev, &obj->base, args->user_size);
845 	i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
846 	obj->read_domains = I915_GEM_DOMAIN_CPU;
847 	obj->write_domain = I915_GEM_DOMAIN_CPU;
848 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
849 
850 	obj->userptr.ptr = args->user_ptr;
851 	if (args->flags & I915_USERPTR_READ_ONLY)
852 		i915_gem_object_set_readonly(obj);
853 
854 	/* And keep a pointer to the current->mm for resolving the user pages
855 	 * at binding. This means that we need to hook into the mmu_notifier
856 	 * in order to detect if the mmu is destroyed.
857 	 */
858 	ret = i915_gem_userptr_init__mm_struct(obj);
859 	if (ret == 0)
860 		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
861 	if (ret == 0)
862 		ret = drm_gem_handle_create(file, &obj->base, &handle);
863 
864 	/* drop reference from allocate - handle holds it now */
865 	i915_gem_object_put(obj);
866 	if (ret)
867 		return ret;
868 
869 	args->handle = handle;
870 	return 0;
871 }
872 
i915_gem_init_userptr(struct drm_i915_private * dev_priv)873 int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
874 {
875 	mutex_init(&dev_priv->mm_lock);
876 	hash_init(dev_priv->mm_structs);
877 
878 	dev_priv->mm.userptr_wq =
879 		alloc_workqueue("i915-userptr-acquire",
880 				WQ_HIGHPRI | WQ_UNBOUND,
881 				0);
882 	if (!dev_priv->mm.userptr_wq)
883 		return -ENOMEM;
884 
885 	return 0;
886 }
887 
i915_gem_cleanup_userptr(struct drm_i915_private * dev_priv)888 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
889 {
890 	destroy_workqueue(dev_priv->mm.userptr_wq);
891 	mutex_destroy(&dev_priv->mm_lock);
892 }
893