xref: /openbsd-src/sys/dev/pci/drm/ttm/ttm_bo_vm.c (revision f0cad5c99b2bb814c8d45ed0f44a97bdf676a560)
1 /* SPDX-License-Identifier: GPL-2.0 OR MIT */
2 /**************************************************************************
3  *
4  * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25  * USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 /*
29  * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
30  */
31 
32 #define pr_fmt(fmt) "[TTM] " fmt
33 
34 #include <drm/ttm/ttm_module.h>
35 #include <drm/ttm/ttm_bo_driver.h>
36 #include <drm/ttm/ttm_placement.h>
37 #include <drm/drm_vma_manager.h>
38 #include <linux/mm.h>
39 #include <linux/pfn_t.h>
40 #include <linux/rbtree.h>
41 #include <linux/module.h>
42 #include <linux/uaccess.h>
43 #include <linux/mem_encrypt.h>
44 
45 #ifdef __linux__
46 
47 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
48 				struct vm_fault *vmf)
49 {
50 	vm_fault_t ret = 0;
51 	int err = 0;
52 
53 	if (likely(!bo->moving))
54 		goto out_unlock;
55 
56 	/*
57 	 * Quick non-stalling check for idle.
58 	 */
59 	if (dma_fence_is_signaled(bo->moving))
60 		goto out_clear;
61 
62 	/*
63 	 * If possible, avoid waiting for GPU with mmap_lock
64 	 * held.  We only do this if the fault allows retry and this
65 	 * is the first attempt.
66 	 */
67 	if (fault_flag_allow_retry_first(vmf->flags)) {
68 		ret = VM_FAULT_RETRY;
69 		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
70 			goto out_unlock;
71 
72 		ttm_bo_get(bo);
73 		mmap_read_unlock(vmf->vma->vm_mm);
74 		(void) dma_fence_wait(bo->moving, true);
75 		dma_resv_unlock(bo->base.resv);
76 		ttm_bo_put(bo);
77 		goto out_unlock;
78 	}
79 
80 	/*
81 	 * Ordinary wait.
82 	 */
83 	err = dma_fence_wait(bo->moving, true);
84 	if (unlikely(err != 0)) {
85 		ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
86 			VM_FAULT_NOPAGE;
87 		goto out_unlock;
88 	}
89 
90 out_clear:
91 	dma_fence_put(bo->moving);
92 	bo->moving = NULL;
93 
94 out_unlock:
95 	return ret;
96 }
97 
98 static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
99 				       unsigned long page_offset)
100 {
101 	struct ttm_bo_device *bdev = bo->bdev;
102 
103 	if (bdev->driver->io_mem_pfn)
104 		return bdev->driver->io_mem_pfn(bo, page_offset);
105 
106 	return (bo->mem.bus.offset >> PAGE_SHIFT) + page_offset;
107 }
108 
109 /**
110  * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
111  * @bo: The buffer object
112  * @vmf: The fault structure handed to the callback
113  *
114  * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
115  * during long waits, and after the wait the callback will be restarted. This
116  * is to allow other threads using the same virtual memory space concurrent
117  * access to map(), unmap() completely unrelated buffer objects. TTM buffer
118  * object reservations sometimes wait for GPU and should therefore be
119  * considered long waits. This function reserves the buffer object interruptibly
120  * taking this into account. Starvation is avoided by the vm system not
121  * allowing too many repeated restarts.
122  * This function is intended to be used in customized fault() and _mkwrite()
123  * handlers.
124  *
125  * Return:
126  *    0 on success and the bo was reserved.
127  *    VM_FAULT_RETRY if blocking wait.
128  *    VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
129  */
130 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
131 			     struct vm_fault *vmf)
132 {
133 	/*
134 	 * Work around locking order reversal in fault / nopfn
135 	 * between mmap_lock and bo_reserve: Perform a trylock operation
136 	 * for reserve, and if it fails, retry the fault after waiting
137 	 * for the buffer to become unreserved.
138 	 */
139 	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
140 		/*
141 		 * If the fault allows retry and this is the first
142 		 * fault attempt, we try to release the mmap_lock
143 		 * before waiting
144 		 */
145 		if (fault_flag_allow_retry_first(vmf->flags)) {
146 			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
147 				ttm_bo_get(bo);
148 				mmap_read_unlock(vmf->vma->vm_mm);
149 				if (!dma_resv_lock_interruptible(bo->base.resv,
150 								 NULL))
151 					dma_resv_unlock(bo->base.resv);
152 				ttm_bo_put(bo);
153 			}
154 
155 			return VM_FAULT_RETRY;
156 		}
157 
158 		if (dma_resv_lock_interruptible(bo->base.resv, NULL))
159 			return VM_FAULT_NOPAGE;
160 	}
161 
162 	return 0;
163 }
164 EXPORT_SYMBOL(ttm_bo_vm_reserve);
165 
166 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
167 /**
168  * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
169  * @vmf: Fault data
170  * @bo: The buffer object
171  * @page_offset: Page offset from bo start
172  * @fault_page_size: The size of the fault in pages.
173  * @pgprot: The page protections.
174  * Does additional checking whether it's possible to insert a PUD or PMD
175  * pfn and performs the insertion.
176  *
177  * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
178  * a huge fault was not possible, or on insertion error.
179  */
180 static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
181 					struct ttm_buffer_object *bo,
182 					pgoff_t page_offset,
183 					pgoff_t fault_page_size,
184 					pgprot_t pgprot)
185 {
186 	pgoff_t i;
187 	vm_fault_t ret;
188 	unsigned long pfn;
189 	pfn_t pfnt;
190 	struct ttm_tt *ttm = bo->ttm;
191 	bool write = vmf->flags & FAULT_FLAG_WRITE;
192 
193 	/* Fault should not cross bo boundary. */
194 	page_offset &= ~(fault_page_size - 1);
195 	if (page_offset + fault_page_size > bo->num_pages)
196 		goto out_fallback;
197 
198 	if (bo->mem.bus.is_iomem)
199 		pfn = ttm_bo_io_mem_pfn(bo, page_offset);
200 	else
201 		pfn = page_to_pfn(ttm->pages[page_offset]);
202 
203 	/* pfn must be fault_page_size aligned. */
204 	if ((pfn & (fault_page_size - 1)) != 0)
205 		goto out_fallback;
206 
207 	/* Check that memory is contiguous. */
208 	if (!bo->mem.bus.is_iomem) {
209 		for (i = 1; i < fault_page_size; ++i) {
210 			if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
211 				goto out_fallback;
212 		}
213 	} else if (bo->bdev->driver->io_mem_pfn) {
214 		for (i = 1; i < fault_page_size; ++i) {
215 			if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
216 				goto out_fallback;
217 		}
218 	}
219 
220 	pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
221 	if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
222 		ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
223 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
224 	else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
225 		ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
226 #endif
227 	else
228 		WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);
229 
230 	if (ret != VM_FAULT_NOPAGE)
231 		goto out_fallback;
232 
233 	return VM_FAULT_NOPAGE;
234 out_fallback:
235 	count_vm_event(THP_FAULT_FALLBACK);
236 	return VM_FAULT_FALLBACK;
237 }
238 #else
239 static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
240 					struct ttm_buffer_object *bo,
241 					pgoff_t page_offset,
242 					pgoff_t fault_page_size,
243 					pgprot_t pgprot)
244 {
245 	return VM_FAULT_FALLBACK;
246 }
247 #endif
248 
249 /**
250  * ttm_bo_vm_fault_reserved - TTM fault helper
251  * @vmf: The struct vm_fault given as argument to the fault callback
252  * @prot: The page protection to be used for this memory area.
253  * @num_prefault: Maximum number of prefault pages. The caller may want to
254  * specify this based on madvice settings and the size of the GPU object
255  * backed by the memory.
256  * @fault_page_size: The size of the fault in pages.
257  *
258  * This function inserts one or more page table entries pointing to the
259  * memory backing the buffer object, and then returns a return code
260  * instructing the caller to retry the page access.
261  *
262  * Return:
263  *   VM_FAULT_NOPAGE on success or pending signal
264  *   VM_FAULT_SIGBUS on unspecified error
265  *   VM_FAULT_OOM on out-of-memory
266  *   VM_FAULT_RETRY if retryable wait
267  */
268 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
269 				    pgprot_t prot,
270 				    pgoff_t num_prefault,
271 				    pgoff_t fault_page_size)
272 {
273 	struct vm_area_struct *vma = vmf->vma;
274 	struct ttm_buffer_object *bo = vma->vm_private_data;
275 	struct ttm_bo_device *bdev = bo->bdev;
276 	unsigned long page_offset;
277 	unsigned long page_last;
278 	unsigned long pfn;
279 	struct ttm_tt *ttm = NULL;
280 	struct vm_page *page;
281 	int err;
282 	pgoff_t i;
283 	vm_fault_t ret = VM_FAULT_NOPAGE;
284 	unsigned long address = vmf->address;
285 
286 	/*
287 	 * Refuse to fault imported pages. This should be handled
288 	 * (if at all) by redirecting mmap to the exporter.
289 	 */
290 	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
291 		return VM_FAULT_SIGBUS;
292 
293 	if (bdev->driver->fault_reserve_notify) {
294 		struct dma_fence *moving = dma_fence_get(bo->moving);
295 
296 		err = bdev->driver->fault_reserve_notify(bo);
297 		switch (err) {
298 		case 0:
299 			break;
300 		case -EBUSY:
301 		case -ERESTARTSYS:
302 			dma_fence_put(moving);
303 			return VM_FAULT_NOPAGE;
304 		default:
305 			dma_fence_put(moving);
306 			return VM_FAULT_SIGBUS;
307 		}
308 
309 		if (bo->moving != moving) {
310 			ttm_bo_move_to_lru_tail_unlocked(bo);
311 		}
312 		dma_fence_put(moving);
313 	}
314 
315 	/*
316 	 * Wait for buffer data in transit, due to a pipelined
317 	 * move.
318 	 */
319 	ret = ttm_bo_vm_fault_idle(bo, vmf);
320 	if (unlikely(ret != 0))
321 		return ret;
322 
323 	err = ttm_mem_io_reserve(bdev, &bo->mem);
324 	if (unlikely(err != 0))
325 		return VM_FAULT_SIGBUS;
326 
327 	page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
328 		vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node);
329 	page_last = vma_pages(vma) + vma->vm_pgoff -
330 		drm_vma_node_start(&bo->base.vma_node);
331 
332 	if (unlikely(page_offset >= bo->num_pages))
333 		return VM_FAULT_SIGBUS;
334 
335 	prot = ttm_io_prot(bo->mem.placement, prot);
336 	if (!bo->mem.bus.is_iomem) {
337 		struct ttm_operation_ctx ctx = {
338 			.interruptible = false,
339 			.no_wait_gpu = false,
340 			.flags = TTM_OPT_FLAG_FORCE_ALLOC
341 
342 		};
343 
344 		ttm = bo->ttm;
345 		if (ttm_tt_populate(bdev, bo->ttm, &ctx))
346 			return VM_FAULT_OOM;
347 	} else {
348 		/* Iomem should not be marked encrypted */
349 		prot = pgprot_decrypted(prot);
350 	}
351 
352 	/* We don't prefault on huge faults. Yet. */
353 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1)
354 		return ttm_bo_vm_insert_huge(vmf, bo, page_offset,
355 					     fault_page_size, prot);
356 
357 	/*
358 	 * Speculatively prefault a number of pages. Only error on
359 	 * first page.
360 	 */
361 	for (i = 0; i < num_prefault; ++i) {
362 		if (bo->mem.bus.is_iomem) {
363 			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
364 		} else {
365 			page = ttm->pages[page_offset];
366 			if (unlikely(!page && i == 0)) {
367 				return VM_FAULT_OOM;
368 			} else if (unlikely(!page)) {
369 				break;
370 			}
371 			page->index = drm_vma_node_start(&bo->base.vma_node) +
372 				page_offset;
373 			pfn = page_to_pfn(page);
374 		}
375 
376 		/*
377 		 * Note that the value of @prot at this point may differ from
378 		 * the value of @vma->vm_page_prot in the caching- and
379 		 * encryption bits. This is because the exact location of the
380 		 * data may not be known at mmap() time and may also change
381 		 * at arbitrary times while the data is mmap'ed.
382 		 * See vmf_insert_mixed_prot() for a discussion.
383 		 */
384 		if (vma->vm_flags & VM_MIXEDMAP)
385 			ret = vmf_insert_mixed_prot(vma, address,
386 						    __pfn_to_pfn_t(pfn, PFN_DEV),
387 						    prot);
388 		else
389 			ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
390 
391 		/* Never error on prefaulted PTEs */
392 		if (unlikely((ret & VM_FAULT_ERROR))) {
393 			if (i == 0)
394 				return VM_FAULT_NOPAGE;
395 			else
396 				break;
397 		}
398 
399 		address += PAGE_SIZE;
400 		if (unlikely(++page_offset >= page_last))
401 			break;
402 	}
403 	return ret;
404 }
405 EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
406 
407 vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
408 {
409 	struct vm_area_struct *vma = vmf->vma;
410 	pgprot_t prot;
411 	struct ttm_buffer_object *bo = vma->vm_private_data;
412 	vm_fault_t ret;
413 
414 	ret = ttm_bo_vm_reserve(bo, vmf);
415 	if (ret)
416 		return ret;
417 
418 	prot = vma->vm_page_prot;
419 	ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
420 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
421 		return ret;
422 
423 	dma_resv_unlock(bo->base.resv);
424 
425 	return ret;
426 }
427 EXPORT_SYMBOL(ttm_bo_vm_fault);
428 
429 #else /* !__linux__ */
430 
431 #define VM_FAULT_NOPAGE		1
432 #define VM_FAULT_SIGBUS		2
433 #define VM_FAULT_RETRY		3
434 #define VM_FAULT_OOM		4
435 
436 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
437     struct uvm_faultinfo *ufi)
438 {
439 	vm_fault_t ret = 0;
440 	int err = 0;
441 
442 	if (likely(!bo->moving))
443 		goto out_unlock;
444 
445 	/*
446 	 * Quick non-stalling check for idle.
447 	 */
448 	if (dma_fence_is_signaled(bo->moving))
449 		goto out_clear;
450 
451 #ifdef __linux__
452 	/*
453 	 * If possible, avoid waiting for GPU with mmap_lock
454 	 * held.  We only do this if the fault allows retry and this
455 	 * is the first attempt.
456 	 */
457 	if (fault_flag_allow_retry_first(vmf->flags)) {
458 		ret = VM_FAULT_RETRY;
459 		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
460 			goto out_unlock;
461 
462 		ttm_bo_get(bo);
463 		mmap_read_unlock(vmf->vma->vm_mm);
464 		(void) dma_fence_wait(bo->moving, true);
465 		dma_resv_unlock(bo->base.resv);
466 		ttm_bo_put(bo);
467 		goto out_unlock;
468 	}
469 #endif
470 
471 	/*
472 	 * Ordinary wait.
473 	 */
474 	err = dma_fence_wait(bo->moving, true);
475 	if (unlikely(err != 0)) {
476 		ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
477 			VM_FAULT_NOPAGE;
478 		goto out_unlock;
479 	}
480 
481 out_clear:
482 	dma_fence_put(bo->moving);
483 	bo->moving = NULL;
484 
485 out_unlock:
486 	return ret;
487 }
488 
489 static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
490 				       unsigned long page_offset)
491 {
492 	struct ttm_bo_device *bdev = bo->bdev;
493 
494 	if (bdev->driver->io_mem_pfn)
495 		return bdev->driver->io_mem_pfn(bo, page_offset);
496 
497 	return (bo->mem.bus.offset >> PAGE_SHIFT) + page_offset;
498 }
499 
500 /**
501  * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
502  * @bo: The buffer object
503  * @vmf: The fault structure handed to the callback
504  *
505  * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
506  * during long waits, and after the wait the callback will be restarted. This
507  * is to allow other threads using the same virtual memory space concurrent
508  * access to map(), unmap() completely unrelated buffer objects. TTM buffer
509  * object reservations sometimes wait for GPU and should therefore be
510  * considered long waits. This function reserves the buffer object interruptibly
511  * taking this into account. Starvation is avoided by the vm system not
512  * allowing too many repeated restarts.
513  * This function is intended to be used in customized fault() and _mkwrite()
514  * handlers.
515  *
516  * Return:
517  *    0 on success and the bo was reserved.
518  *    VM_FAULT_RETRY if blocking wait.
519  *    VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
520  */
521 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo)
522 {
523 	/*
524 	 * Work around locking order reversal in fault / nopfn
525 	 * between mmap_lock and bo_reserve: Perform a trylock operation
526 	 * for reserve, and if it fails, retry the fault after waiting
527 	 * for the buffer to become unreserved.
528 	 */
529 	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
530 #ifdef __linux__
531 		/*
532 		 * If the fault allows retry and this is the first
533 		 * fault attempt, we try to release the mmap_lock
534 		 * before waiting
535 		 */
536 		if (fault_flag_allow_retry_first(vmf->flags)) {
537 			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
538 				ttm_bo_get(bo);
539 				mmap_read_unlock(vmf->vma->vm_mm);
540 				if (!dma_resv_lock_interruptible(bo->base.resv,
541 								 NULL))
542 					dma_resv_unlock(bo->base.resv);
543 				ttm_bo_put(bo);
544 			}
545 
546 			return VM_FAULT_RETRY;
547 		}
548 #endif
549 
550 		if (dma_resv_lock_interruptible(bo->base.resv, NULL))
551 			return VM_FAULT_NOPAGE;
552 	}
553 
554 	return 0;
555 }
556 
557 vm_fault_t ttm_bo_vm_fault_reserved(struct uvm_faultinfo *ufi,
558 				    vaddr_t vaddr,
559 				    pgoff_t num_prefault,
560 				    pgoff_t fault_page_size)
561 {
562 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
563 	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)uobj;
564 	struct ttm_bo_device *bdev = bo->bdev;
565 	unsigned long page_offset;
566 	unsigned long page_last;
567 	unsigned long pfn;
568 	struct ttm_tt *ttm = NULL;
569 	struct vm_page *page;
570 	bus_addr_t addr;
571 	paddr_t paddr;
572 	vm_prot_t prot;
573 	int pmap_flags;
574 	int err;
575 	pgoff_t i;
576 	vm_fault_t ret = VM_FAULT_NOPAGE;
577 	unsigned long address = (unsigned long)vaddr;
578 
579 	/*
580 	 * Refuse to fault imported pages. This should be handled
581 	 * (if at all) by redirecting mmap to the exporter.
582 	 */
583 	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
584 		return VM_FAULT_SIGBUS;
585 
586 	if (bdev->driver->fault_reserve_notify) {
587 		struct dma_fence *moving = dma_fence_get(bo->moving);
588 
589 		err = bdev->driver->fault_reserve_notify(bo);
590 		switch (err) {
591 		case 0:
592 			break;
593 		case -EBUSY:
594 		case -ERESTARTSYS:
595 			dma_fence_put(moving);
596 			return VM_FAULT_NOPAGE;
597 		default:
598 			dma_fence_put(moving);
599 			return VM_FAULT_SIGBUS;
600 		}
601 
602 		if (bo->moving != moving) {
603 			ttm_bo_move_to_lru_tail_unlocked(bo);
604 		}
605 		dma_fence_put(moving);
606 	}
607 
608 	/*
609 	 * Wait for buffer data in transit, due to a pipelined
610 	 * move.
611 	 */
612 	ret = ttm_bo_vm_fault_idle(bo, ufi);
613 	if (unlikely(ret != 0))
614 		return ret;
615 	ret = VM_FAULT_NOPAGE;
616 
617 	err = ttm_mem_io_reserve(bdev, &bo->mem);
618 	if (unlikely(err != 0))
619 		return VM_FAULT_SIGBUS;
620 
621 	page_offset = ((address - ufi->entry->start) >> PAGE_SHIFT) +
622 	    drm_vma_node_start(&bo->base.vma_node) - (ufi->entry->offset >> PAGE_SHIFT);
623 	page_last = ((ufi->entry->end - ufi->entry->start) >> PAGE_SHIFT) +
624 	    drm_vma_node_start(&bo->base.vma_node) - (ufi->entry->offset >> PAGE_SHIFT);
625 
626 	if (unlikely(page_offset >= bo->num_pages))
627 		return VM_FAULT_SIGBUS;
628 
629 	prot = ufi->entry->protection;
630 	pmap_flags = ttm_io_prot(bo->mem.placement, 0);
631 	if (!bo->mem.bus.is_iomem) {
632 		struct ttm_operation_ctx ctx = {
633 			.interruptible = false,
634 			.no_wait_gpu = false,
635 			.flags = TTM_OPT_FLAG_FORCE_ALLOC
636 
637 		};
638 
639 		ttm = bo->ttm;
640 		if (ttm_tt_populate(bdev, bo->ttm, &ctx))
641 			return VM_FAULT_OOM;
642 	}
643 
644 #ifdef __linux__
645 	/* We don't prefault on huge faults. Yet. */
646 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1)
647 		return ttm_bo_vm_insert_huge(vmf, bo, page_offset,
648 					     fault_page_size, prot);
649 #endif
650 
651 	/*
652 	 * Speculatively prefault a number of pages. Only error on
653 	 * first page.
654 	 */
655 	for (i = 0; i < num_prefault; ++i) {
656 		if (bo->mem.bus.is_iomem) {
657 			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
658 			addr = pfn << PAGE_SHIFT;
659 			paddr = bus_space_mmap(bdev->memt, addr, 0, prot, 0);
660 		} else {
661 			page = ttm->pages[page_offset];
662 			if (unlikely(!page && i == 0)) {
663 				return VM_FAULT_OOM;
664 			} else if (unlikely(!page)) {
665 				break;
666 			}
667 			paddr = VM_PAGE_TO_PHYS(page);
668 		}
669 
670 		err = pmap_enter(ufi->orig_map->pmap, address,
671 		    paddr | pmap_flags, prot, PMAP_CANFAIL | prot);
672 
673 		/* Never error on prefaulted PTEs */
674 		if (unlikely(err)) {
675 			ret = VM_FAULT_OOM;
676 			if (i == 0)
677 				return VM_FAULT_NOPAGE;
678 			else
679 				break;
680 		}
681 
682 		address += PAGE_SIZE;
683 		if (unlikely(++page_offset >= page_last))
684 			break;
685 	}
686 	pmap_update(ufi->orig_map->pmap);
687 	return ret;
688 }
689 EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
690 
691 int
692 ttm_bo_vm_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, vm_page_t *pps,
693     int npages, int centeridx, vm_fault_t fault_type,
694     vm_prot_t access_type, int flags)
695 {
696 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
697 	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)uobj;
698 	vm_fault_t ret;
699 
700 	ret = ttm_bo_vm_reserve(bo);
701 	if (ret) {
702 		switch (ret) {
703 		case VM_FAULT_NOPAGE:
704 			ret = VM_PAGER_OK;
705 			break;
706 		case VM_FAULT_RETRY:
707 			ret = VM_PAGER_REFAULT;
708 			break;
709 		default:
710 			ret = VM_PAGER_BAD;
711 			break;
712 		}
713 
714 		uvmfault_unlockall(ufi, NULL, uobj);
715 		return ret;
716 	}
717 
718 	ret = ttm_bo_vm_fault_reserved(ufi, vaddr, TTM_BO_VM_NUM_PREFAULT, 1);
719 	switch (ret) {
720 	case VM_FAULT_NOPAGE:
721 		ret = VM_PAGER_OK;
722 		break;
723 	case VM_FAULT_RETRY:
724 		ret = VM_PAGER_REFAULT;
725 		break;
726 	default:
727 		ret = VM_PAGER_BAD;
728 		break;
729 	}
730 
731 	dma_resv_unlock(bo->base.resv);
732 
733 	uvmfault_unlockall(ufi, NULL, uobj);
734 	return ret;
735 }
736 EXPORT_SYMBOL(ttm_bo_vm_fault);
737 
738 #endif /* !__linux__ */
739 
740 #ifdef notyet
741 void ttm_bo_vm_open(struct vm_area_struct *vma)
742 {
743 	struct ttm_buffer_object *bo = vma->vm_private_data;
744 
745 	WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
746 
747 	ttm_bo_get(bo);
748 }
749 EXPORT_SYMBOL(ttm_bo_vm_open);
750 
751 void ttm_bo_vm_close(struct vm_area_struct *vma)
752 {
753 	struct ttm_buffer_object *bo = vma->vm_private_data;
754 
755 	ttm_bo_put(bo);
756 	vma->vm_private_data = NULL;
757 }
758 EXPORT_SYMBOL(ttm_bo_vm_close);
759 
760 static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
761 				 unsigned long offset,
762 				 uint8_t *buf, int len, int write)
763 {
764 	unsigned long page = offset >> PAGE_SHIFT;
765 	unsigned long bytes_left = len;
766 	int ret;
767 
768 	/* Copy a page at a time, that way no extra virtual address
769 	 * mapping is needed
770 	 */
771 	offset -= page << PAGE_SHIFT;
772 	do {
773 		unsigned long bytes = min(bytes_left, PAGE_SIZE - offset);
774 		struct ttm_bo_kmap_obj map;
775 		void *ptr;
776 		bool is_iomem;
777 
778 		ret = ttm_bo_kmap(bo, page, 1, &map);
779 		if (ret)
780 			return ret;
781 
782 		ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
783 		WARN_ON_ONCE(is_iomem);
784 		if (write)
785 			memcpy(ptr, buf, bytes);
786 		else
787 			memcpy(buf, ptr, bytes);
788 		ttm_bo_kunmap(&map);
789 
790 		page++;
791 		buf += bytes;
792 		bytes_left -= bytes;
793 		offset = 0;
794 	} while (bytes_left);
795 
796 	return len;
797 }
798 
799 int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
800 		     void *buf, int len, int write)
801 {
802 	struct ttm_buffer_object *bo = vma->vm_private_data;
803 	unsigned long offset = (addr) - vma->vm_start +
804 		((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
805 		 << PAGE_SHIFT);
806 	int ret;
807 
808 	if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->num_pages)
809 		return -EIO;
810 
811 	ret = ttm_bo_reserve(bo, true, false, NULL);
812 	if (ret)
813 		return ret;
814 
815 	switch (bo->mem.mem_type) {
816 	case TTM_PL_SYSTEM:
817 		if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
818 			ret = ttm_tt_swapin(bo->ttm);
819 			if (unlikely(ret != 0))
820 				return ret;
821 		}
822 		fallthrough;
823 	case TTM_PL_TT:
824 		ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write);
825 		break;
826 	default:
827 		if (bo->bdev->driver->access_memory)
828 			ret = bo->bdev->driver->access_memory(
829 				bo, offset, buf, len, write);
830 		else
831 			ret = -EIO;
832 	}
833 
834 	ttm_bo_unreserve(bo);
835 
836 	return ret;
837 }
838 EXPORT_SYMBOL(ttm_bo_vm_access);
839 
840 static const struct vm_operations_struct ttm_bo_vm_ops = {
841 	.fault = ttm_bo_vm_fault,
842 	.open = ttm_bo_vm_open,
843 	.close = ttm_bo_vm_close,
844 	.access = ttm_bo_vm_access,
845 };
846 #endif
847 
848 void
849 ttm_bo_vm_reference(struct uvm_object *uobj)
850 {
851 	struct ttm_buffer_object *bo =
852 	    (struct ttm_buffer_object *)uobj;
853 
854 	ttm_bo_get(bo);
855 	uobj->uo_refs++;
856 }
857 
858 void
859 ttm_bo_vm_detach(struct uvm_object *uobj)
860 {
861 	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)uobj;
862 
863 	uobj->uo_refs--;
864 	ttm_bo_put(bo);
865 }
866 
867 const struct uvm_pagerops ttm_bo_vm_ops = {
868 	.pgo_fault = ttm_bo_vm_fault,
869 	.pgo_reference = ttm_bo_vm_reference,
870 	.pgo_detach = ttm_bo_vm_detach
871 };
872 
873 static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
874 						  unsigned long offset,
875 						  unsigned long pages)
876 {
877 	struct drm_vma_offset_node *node;
878 	struct ttm_buffer_object *bo = NULL;
879 
880 	drm_vma_offset_lock_lookup(bdev->vma_manager);
881 
882 	node = drm_vma_offset_lookup_locked(bdev->vma_manager, offset, pages);
883 	if (likely(node)) {
884 		bo = container_of(node, struct ttm_buffer_object,
885 				  base.vma_node);
886 		bo = ttm_bo_get_unless_zero(bo);
887 	}
888 
889 	drm_vma_offset_unlock_lookup(bdev->vma_manager);
890 
891 	if (!bo)
892 		pr_err("Could not find buffer object to map\n");
893 
894 	return bo;
895 }
896 
897 #ifdef notyet
898 static void ttm_bo_mmap_vma_setup(struct ttm_buffer_object *bo, struct vm_area_struct *vma)
899 {
900 	vma->vm_ops = &ttm_bo_vm_ops;
901 
902 	/*
903 	 * Note: We're transferring the bo reference to
904 	 * vma->vm_private_data here.
905 	 */
906 
907 	vma->vm_private_data = bo;
908 
909 	/*
910 	 * We'd like to use VM_PFNMAP on shared mappings, where
911 	 * (vma->vm_flags & VM_SHARED) != 0, for performance reasons,
912 	 * but for some reason VM_PFNMAP + x86 PAT + write-combine is very
913 	 * bad for performance. Until that has been sorted out, use
914 	 * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719
915 	 */
916 	vma->vm_flags |= VM_MIXEDMAP;
917 	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
918 }
919 #endif
920 
921 #ifdef __linux__
922 int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
923 		struct ttm_bo_device *bdev)
924 {
925 	struct ttm_bo_driver *driver;
926 	struct ttm_buffer_object *bo;
927 	int ret;
928 
929 	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START))
930 		return -EINVAL;
931 
932 	bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma));
933 	if (unlikely(!bo))
934 		return -EINVAL;
935 
936 	driver = bo->bdev->driver;
937 	if (unlikely(!driver->verify_access)) {
938 		ret = -EPERM;
939 		goto out_unref;
940 	}
941 	ret = driver->verify_access(bo, filp);
942 	if (unlikely(ret != 0))
943 		goto out_unref;
944 
945 	ttm_bo_mmap_vma_setup(bo, vma);
946 	return 0;
947 out_unref:
948 	ttm_bo_put(bo);
949 	return ret;
950 }
951 EXPORT_SYMBOL(ttm_bo_mmap);
952 #else
953 struct uvm_object *
954 ttm_bo_mmap(struct file *filp, voff_t off, vsize_t size,
955 	    struct ttm_bo_device *bdev)
956 {
957 	struct ttm_bo_driver *driver;
958 	struct ttm_buffer_object *bo;
959 	int ret;
960 
961 	bo = ttm_bo_vm_lookup(bdev, off >> PAGE_SHIFT, size >> PAGE_SHIFT);
962 	if (unlikely(!bo))
963 		return NULL;
964 
965 	driver = bo->bdev->driver;
966 	if (unlikely(!driver->verify_access)) {
967 		ret = -EPERM;
968 		goto out_unref;
969 	}
970 	ret = driver->verify_access(bo, filp);
971 	if (unlikely(ret != 0))
972 		goto out_unref;
973 
974 	uvm_obj_init(&bo->base.uobj, &ttm_bo_vm_ops, 1);
975 	return &bo->base.uobj;
976 out_unref:
977 	ttm_bo_put(bo);
978 	return NULL;
979 }
980 #endif
981 
982 #ifdef notyet
983 int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
984 {
985 	ttm_bo_get(bo);
986 	ttm_bo_mmap_vma_setup(bo, vma);
987 	return 0;
988 }
989 EXPORT_SYMBOL(ttm_bo_mmap_obj);
990 #endif
991