1 /* $NetBSD: gen6_ppgtt.c,v 1.8 2021/12/19 12:27:32 riastradh Exp $ */
2
3 // SPDX-License-Identifier: MIT
4 /*
5 * Copyright © 2020 Intel Corporation
6 */
7
8 #include <sys/cdefs.h>
9 __KERNEL_RCSID(0, "$NetBSD: gen6_ppgtt.c,v 1.8 2021/12/19 12:27:32 riastradh Exp $");
10
11 #include <linux/log2.h>
12
13 #include "gen6_ppgtt.h"
14 #include "i915_scatterlist.h"
15 #include "i915_trace.h"
16 #include "i915_vgpu.h"
17 #include "intel_gt.h"
18 #include <linux/nbsd-namespace.h>
19
20 /* Write pde (index) from the page directory @pd to the page table @pt */
gen6_write_pde(const struct gen6_ppgtt * ppgtt,const unsigned int pde,const struct i915_page_table * pt)21 static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
22 const unsigned int pde,
23 const struct i915_page_table *pt)
24 {
25 /* Caller needs to make sure the write completes if necessary */
26 #ifdef __NetBSD__
27 CTASSERT(sizeof(gen6_pte_t) == 4);
28 bus_space_write_4(ppgtt->pd_bst, ppgtt->pd_bsh, pde*sizeof(gen6_pte_t),
29 GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID);
30 #else
31 iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
32 ppgtt->pd_addr + pde);
33 #endif
34 }
35
gen7_ppgtt_enable(struct intel_gt * gt)36 void gen7_ppgtt_enable(struct intel_gt *gt)
37 {
38 struct drm_i915_private *i915 = gt->i915;
39 struct intel_uncore *uncore = gt->uncore;
40 struct intel_engine_cs *engine;
41 enum intel_engine_id id;
42 u32 ecochk;
43
44 intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
45
46 ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
47 if (IS_HASWELL(i915)) {
48 ecochk |= ECOCHK_PPGTT_WB_HSW;
49 } else {
50 ecochk |= ECOCHK_PPGTT_LLC_IVB;
51 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
52 }
53 intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
54
55 for_each_engine(engine, gt, id) {
56 /* GFX_MODE is per-ring on gen7+ */
57 ENGINE_WRITE(engine,
58 RING_MODE_GEN7,
59 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
60 }
61 }
62
gen6_ppgtt_enable(struct intel_gt * gt)63 void gen6_ppgtt_enable(struct intel_gt *gt)
64 {
65 struct intel_uncore *uncore = gt->uncore;
66
67 intel_uncore_rmw(uncore,
68 GAC_ECO_BITS,
69 0,
70 ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
71
72 intel_uncore_rmw(uncore,
73 GAB_CTL,
74 0,
75 GAB_CTL_CONT_AFTER_PAGEFAULT);
76
77 intel_uncore_rmw(uncore,
78 GAM_ECOCHK,
79 0,
80 ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
81
82 if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
83 intel_uncore_write(uncore,
84 GFX_MODE,
85 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
86 }
87
88 /* PPGTT support for Sandybdrige/Gen6 and later */
gen6_ppgtt_clear_range(struct i915_address_space * vm,u64 start,u64 length)89 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
90 u64 start, u64 length)
91 {
92 struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
93 const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
94 const gen6_pte_t scratch_pte = vm->scratch[0].encode;
95 unsigned int pde = first_entry / GEN6_PTES;
96 unsigned int pte = first_entry % GEN6_PTES;
97 unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
98
99 while (num_entries) {
100 struct i915_page_table * const pt =
101 i915_pt_entry(ppgtt->base.pd, pde++);
102 const unsigned int count = min(num_entries, GEN6_PTES - pte);
103 gen6_pte_t *vaddr;
104
105 GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
106
107 num_entries -= count;
108
109 GEM_BUG_ON(count > atomic_read(&pt->used));
110 if (!atomic_sub_return(count, &pt->used))
111 ppgtt->scan_for_unused_pt = true;
112
113 /*
114 * Note that the hw doesn't support removing PDE on the fly
115 * (they are cached inside the context with no means to
116 * invalidate the cache), so we can only reset the PTE
117 * entries back to scratch.
118 */
119
120 vaddr = kmap_atomic_px(pt);
121 memset32(vaddr + pte, scratch_pte, count);
122 kunmap_atomic(vaddr);
123
124 pte = 0;
125 }
126 }
127
gen6_ppgtt_insert_entries(struct i915_address_space * vm,struct i915_vma * vma,enum i915_cache_level cache_level,u32 flags)128 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
129 struct i915_vma *vma,
130 enum i915_cache_level cache_level,
131 u32 flags)
132 {
133 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
134 struct i915_page_directory * const pd = ppgtt->pd;
135 unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
136 unsigned int act_pt = first_entry / GEN6_PTES;
137 unsigned int act_pte = first_entry % GEN6_PTES;
138 const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
139 struct sgt_dma iter = sgt_dma(vma);
140 gen6_pte_t *vaddr;
141
142 GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
143
144 vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
145 do {
146 #ifdef __NetBSD__
147 KASSERT(iter.seg < iter.map->dm_nsegs);
148 KASSERT((iter.off & (PAGE_SIZE - 1)) == 0);
149 const bus_dma_segment_t *seg = &iter.map->dm_segs[iter.seg];
150 KASSERT((seg->ds_addr & (PAGE_SIZE - 1)) == 0);
151 KASSERT((seg->ds_len & (PAGE_SIZE - 1)) == 0);
152 KASSERT(iter.off <= seg->ds_len - PAGE_SIZE);
153 vaddr[act_pte] = pte_encode |
154 GEN6_PTE_ADDR_ENCODE(seg->ds_addr + iter.off);
155 iter.off += PAGE_SIZE;
156 if (iter.off >= seg->ds_len) {
157 GEM_BUG_ON(iter.off > seg->ds_len);
158 iter.off = 0;
159 if (++iter.seg >= iter.map->dm_nsegs) {
160 GEM_BUG_ON(iter.seg > iter.map->dm_nsegs);
161 break;
162 }
163 }
164 #else
165 GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
166 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
167
168 iter.dma += I915_GTT_PAGE_SIZE;
169 if (iter.dma == iter.max) {
170 iter.sg = __sg_next(iter.sg);
171 if (!iter.sg)
172 break;
173
174 iter.dma = sg_dma_address(iter.sg);
175 iter.max = iter.dma + iter.sg->length;
176 }
177 #endif
178
179 if (++act_pte == GEN6_PTES) {
180 kunmap_atomic(vaddr);
181 vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
182 act_pte = 0;
183 }
184 } while (1);
185 kunmap_atomic(vaddr);
186
187 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
188 }
189
gen6_flush_pd(struct gen6_ppgtt * ppgtt,u64 start,u64 end)190 static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
191 {
192 struct i915_page_directory * const pd = ppgtt->base.pd;
193 struct i915_page_table *pt;
194 unsigned int pde;
195
196 start = round_down(start, SZ_64K);
197 end = round_up(end, SZ_64K) - start;
198
199 mutex_lock(&ppgtt->flush);
200
201 gen6_for_each_pde(pt, pd, start, end, pde)
202 gen6_write_pde(ppgtt, pde, pt);
203
204 mb();
205 #ifdef __NetBSD__
206 (void)bus_space_read_4(ppgtt->pd_bst, ppgtt->pd_bsh, 4*(pde - 1));
207 #else
208 ioread32(ppgtt->pd_addr + pde - 1);
209 #endif
210 gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
211 mb();
212
213 mutex_unlock(&ppgtt->flush);
214 }
215
gen6_alloc_va_range(struct i915_address_space * vm,u64 start,u64 length)216 static int gen6_alloc_va_range(struct i915_address_space *vm,
217 u64 start, u64 length)
218 {
219 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
220 struct i915_page_directory * const pd = ppgtt->base.pd;
221 struct i915_page_table *pt, *alloc = NULL;
222 intel_wakeref_t wakeref;
223 u64 from = start;
224 unsigned int pde;
225 int ret = 0;
226
227 wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
228
229 spin_lock(&pd->lock);
230 gen6_for_each_pde(pt, pd, start, length, pde) {
231 const unsigned int count = gen6_pte_count(start, length);
232
233 if (px_base(pt) == px_base(&vm->scratch[1])) {
234 spin_unlock(&pd->lock);
235
236 pt = fetch_and_zero(&alloc);
237 if (!pt)
238 pt = alloc_pt(vm);
239 if (IS_ERR(pt)) {
240 ret = PTR_ERR(pt);
241 goto unwind_out;
242 }
243
244 fill32_px(pt, vm->scratch[0].encode);
245
246 spin_lock(&pd->lock);
247 if (pd->entry[pde] == &vm->scratch[1]) {
248 pd->entry[pde] = pt;
249 } else {
250 alloc = pt;
251 pt = pd->entry[pde];
252 }
253 }
254
255 atomic_add(count, &pt->used);
256 }
257 spin_unlock(&pd->lock);
258
259 if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND))
260 gen6_flush_pd(ppgtt, from, start);
261
262 goto out;
263
264 unwind_out:
265 gen6_ppgtt_clear_range(vm, from, start - from);
266 out:
267 if (alloc)
268 free_px(vm, alloc);
269 intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
270 return ret;
271 }
272
gen6_ppgtt_init_scratch(struct gen6_ppgtt * ppgtt)273 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
274 {
275 struct i915_address_space * const vm = &ppgtt->base.vm;
276 struct i915_page_directory * const pd = ppgtt->base.pd;
277 int ret;
278
279 ret = setup_scratch_page(vm, __GFP_HIGHMEM);
280 if (ret)
281 return ret;
282
283 vm->scratch[0].encode =
284 vm->pte_encode(px_dma(&vm->scratch[0]),
285 I915_CACHE_NONE, PTE_READ_ONLY);
286
287 if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
288 cleanup_scratch_page(vm);
289 return -ENOMEM;
290 }
291
292 fill32_px(&vm->scratch[1], vm->scratch[0].encode);
293 memset_p(pd->entry, &vm->scratch[1], I915_PDES);
294
295 return 0;
296 }
297
gen6_ppgtt_free_pd(struct gen6_ppgtt * ppgtt)298 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
299 {
300 struct i915_page_directory * const pd = ppgtt->base.pd;
301 struct i915_page_dma * const scratch =
302 px_base(&ppgtt->base.vm.scratch[1]);
303 struct i915_page_table *pt;
304 u32 pde;
305
306 gen6_for_all_pdes(pt, pd, pde)
307 if (px_base(pt) != scratch)
308 free_px(&ppgtt->base.vm, pt);
309 }
310
gen6_ppgtt_cleanup(struct i915_address_space * vm)311 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
312 {
313 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
314
315 __i915_vma_put(ppgtt->vma);
316
317 gen6_ppgtt_free_pd(ppgtt);
318 free_scratch(vm);
319
320 mutex_destroy(&ppgtt->flush);
321 mutex_destroy(&ppgtt->pin_mutex);
322 spin_lock_destroy(&ppgtt->base.pd->lock);
323 kfree(ppgtt->base.pd);
324 }
325
pd_vma_set_pages(struct i915_vma * vma)326 static int pd_vma_set_pages(struct i915_vma *vma)
327 {
328 vma->pages = ERR_PTR(-ENODEV);
329 return 0;
330 }
331
pd_vma_clear_pages(struct i915_vma * vma)332 static void pd_vma_clear_pages(struct i915_vma *vma)
333 {
334 GEM_BUG_ON(!vma->pages);
335
336 vma->pages = NULL;
337 }
338
pd_vma_bind(struct i915_vma * vma,enum i915_cache_level cache_level,u32 unused)339 static int pd_vma_bind(struct i915_vma *vma,
340 enum i915_cache_level cache_level,
341 u32 unused)
342 {
343 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
344 struct gen6_ppgtt *ppgtt = vma->private;
345 u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
346
347 px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
348 #ifdef __NetBSD__
349 {
350 bus_size_t npgs = vma->size >> PAGE_SHIFT;
351 bus_size_t gtt_nbytes = npgs * sizeof(gen6_pte_t);
352 bus_size_t ggtt_offset_bytes =
353 (bus_size_t)ggtt_offset * sizeof(gen6_pte_t);
354 int ret;
355
356 KASSERTMSG(gtt_nbytes <= ggtt->gsmsz - ggtt_offset_bytes,
357 "oversize ppgtt size 0x%"PRIx64" bytes 0x%"PRIx64" pgs,"
358 " requiring 0x%"PRIx64" bytes of ptes at 0x%"PRIx64";"
359 " gsm has 0x%"PRIx64" bytes total"
360 " with only 0x%"PRIx64" for ptes",
361 (uint64_t)vma->size, (uint64_t)npgs,
362 (uint64_t)gtt_nbytes, (uint64_t)ggtt_offset_bytes,
363 (uint64_t)ggtt->gsmsz,
364 (uint64_t)(ggtt->gsmsz - ggtt_offset_bytes));
365 ret = -bus_space_subregion(ggtt->gsmt, ggtt->gsmh, ggtt_offset_bytes,
366 gtt_nbytes, &ppgtt->pd_bsh);
367 if (ret) {
368 DRM_ERROR("Unable to subregion the GGTT: %d\n", ret);
369 return ret;
370 }
371 ppgtt->pd_bst = ggtt->gsmt;
372 }
373 #else
374 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
375 #endif
376
377 gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
378 return 0;
379 }
380
pd_vma_unbind(struct i915_vma * vma)381 static void pd_vma_unbind(struct i915_vma *vma)
382 {
383 struct gen6_ppgtt *ppgtt = vma->private;
384 struct i915_page_directory * const pd = ppgtt->base.pd;
385 struct i915_page_dma * const scratch =
386 px_base(&ppgtt->base.vm.scratch[1]);
387 struct i915_page_table *pt;
388 unsigned int pde;
389
390 if (!ppgtt->scan_for_unused_pt)
391 return;
392
393 /* Free all no longer used page tables */
394 gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
395 if (px_base(pt) == scratch || atomic_read(&pt->used))
396 continue;
397
398 free_px(&ppgtt->base.vm, pt);
399 pd->entry[pde] = scratch;
400 }
401
402 ppgtt->scan_for_unused_pt = false;
403 }
404
405 static const struct i915_vma_ops pd_vma_ops = {
406 .set_pages = pd_vma_set_pages,
407 .clear_pages = pd_vma_clear_pages,
408 .bind_vma = pd_vma_bind,
409 .unbind_vma = pd_vma_unbind,
410 };
411
pd_vma_create(struct gen6_ppgtt * ppgtt,int size)412 static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
413 {
414 struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
415 struct i915_vma *vma;
416
417 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
418 GEM_BUG_ON(size > ggtt->vm.total);
419
420 vma = i915_vma_alloc();
421 if (!vma)
422 return ERR_PTR(-ENOMEM);
423
424 i915_active_init(&vma->active, NULL, NULL);
425
426 kref_init(&vma->ref);
427 mutex_init(&vma->pages_mutex);
428 vma->vm = i915_vm_get(&ggtt->vm);
429 vma->ops = &pd_vma_ops;
430 vma->private = ppgtt;
431
432 vma->size = size;
433 vma->fence_size = size;
434 atomic_set(&vma->flags, I915_VMA_GGTT);
435 vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
436
437 INIT_LIST_HEAD(&vma->obj_link);
438 INIT_LIST_HEAD(&vma->closed_link);
439
440 return vma;
441 }
442
gen6_ppgtt_pin(struct i915_ppgtt * base)443 int gen6_ppgtt_pin(struct i915_ppgtt *base)
444 {
445 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
446 int err;
447
448 GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
449
450 /*
451 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
452 * which will be pinned into every active context.
453 * (When vma->pin_count becomes atomic, I expect we will naturally
454 * need a larger, unpacked, type and kill this redundancy.)
455 */
456 if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
457 return 0;
458
459 if (mutex_lock_interruptible(&ppgtt->pin_mutex))
460 return -EINTR;
461
462 /*
463 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
464 * allocator works in address space sizes, so it's multiplied by page
465 * size. We allocate at the top of the GTT to avoid fragmentation.
466 */
467 err = 0;
468 if (!atomic_read(&ppgtt->pin_count))
469 err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
470 if (!err)
471 atomic_inc(&ppgtt->pin_count);
472 mutex_unlock(&ppgtt->pin_mutex);
473
474 return err;
475 }
476
gen6_ppgtt_unpin(struct i915_ppgtt * base)477 void gen6_ppgtt_unpin(struct i915_ppgtt *base)
478 {
479 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
480
481 GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
482 if (atomic_dec_and_test(&ppgtt->pin_count))
483 i915_vma_unpin(ppgtt->vma);
484 }
485
gen6_ppgtt_unpin_all(struct i915_ppgtt * base)486 void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
487 {
488 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
489
490 if (!atomic_read(&ppgtt->pin_count))
491 return;
492
493 i915_vma_unpin(ppgtt->vma);
494 atomic_set(&ppgtt->pin_count, 0);
495 }
496
gen6_ppgtt_create(struct intel_gt * gt)497 struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
498 {
499 struct i915_ggtt * const ggtt = gt->ggtt;
500 struct gen6_ppgtt *ppgtt;
501 int err;
502
503 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
504 if (!ppgtt)
505 return ERR_PTR(-ENOMEM);
506
507 mutex_init(&ppgtt->flush);
508 mutex_init(&ppgtt->pin_mutex);
509
510 ppgtt_init(&ppgtt->base, gt);
511 ppgtt->base.vm.top = 1;
512
513 ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
514 ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
515 ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
516 ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
517 ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
518
519 ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
520
521 ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
522 if (!ppgtt->base.pd) {
523 err = -ENOMEM;
524 goto err_free;
525 }
526
527 err = gen6_ppgtt_init_scratch(ppgtt);
528 if (err)
529 goto err_pd;
530
531 ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
532 if (IS_ERR(ppgtt->vma)) {
533 err = PTR_ERR(ppgtt->vma);
534 goto err_scratch;
535 }
536
537 return &ppgtt->base;
538
539 err_scratch:
540 free_scratch(&ppgtt->base.vm);
541 err_pd:
542 spin_lock_destroy(&ppgtt->base.pd->lock);
543 kfree(ppgtt->base.pd);
544 err_free:
545 mutex_destroy(&ppgtt->pin_mutex);
546 kfree(ppgtt);
547 return ERR_PTR(err);
548 }
549