1c349dbc7Sjsg // SPDX-License-Identifier: MIT
2c349dbc7Sjsg /*
3c349dbc7Sjsg * Copyright © 2020 Intel Corporation
4c349dbc7Sjsg */
5c349dbc7Sjsg
6c349dbc7Sjsg #include <linux/log2.h>
7c349dbc7Sjsg
85ca02815Sjsg #include "gem/i915_gem_lmem.h"
95ca02815Sjsg
10c349dbc7Sjsg #include "gen8_ppgtt.h"
11c349dbc7Sjsg #include "i915_scatterlist.h"
12c349dbc7Sjsg #include "i915_trace.h"
13c349dbc7Sjsg #include "i915_pvinfo.h"
14c349dbc7Sjsg #include "i915_vgpu.h"
15c349dbc7Sjsg #include "intel_gt.h"
16c349dbc7Sjsg #include "intel_gtt.h"
17c349dbc7Sjsg
gen8_pde_encode(const dma_addr_t addr,const enum i915_cache_level level)18c349dbc7Sjsg static u64 gen8_pde_encode(const dma_addr_t addr,
19c349dbc7Sjsg const enum i915_cache_level level)
20c349dbc7Sjsg {
211bb76ff1Sjsg u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
22c349dbc7Sjsg
23c349dbc7Sjsg if (level != I915_CACHE_NONE)
24c349dbc7Sjsg pde |= PPAT_CACHED_PDE;
25c349dbc7Sjsg else
26c349dbc7Sjsg pde |= PPAT_UNCACHED;
27c349dbc7Sjsg
28c349dbc7Sjsg return pde;
29c349dbc7Sjsg }
30c349dbc7Sjsg
gen8_pte_encode(dma_addr_t addr,unsigned int pat_index,u32 flags)31c349dbc7Sjsg static u64 gen8_pte_encode(dma_addr_t addr,
32*f005ef32Sjsg unsigned int pat_index,
33c349dbc7Sjsg u32 flags)
34c349dbc7Sjsg {
351bb76ff1Sjsg gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
36c349dbc7Sjsg
37c349dbc7Sjsg if (unlikely(flags & PTE_READ_ONLY))
381bb76ff1Sjsg pte &= ~GEN8_PAGE_RW;
39c349dbc7Sjsg
40*f005ef32Sjsg /*
41*f005ef32Sjsg * For pre-gen12 platforms pat_index is the same as enum
42*f005ef32Sjsg * i915_cache_level, so the switch-case here is still valid.
43*f005ef32Sjsg * See translation table defined by LEGACY_CACHELEVEL.
44*f005ef32Sjsg */
45*f005ef32Sjsg switch (pat_index) {
46c349dbc7Sjsg case I915_CACHE_NONE:
47c349dbc7Sjsg pte |= PPAT_UNCACHED;
48c349dbc7Sjsg break;
49c349dbc7Sjsg case I915_CACHE_WT:
50c349dbc7Sjsg pte |= PPAT_DISPLAY_ELLC;
51c349dbc7Sjsg break;
52c349dbc7Sjsg default:
53c349dbc7Sjsg pte |= PPAT_CACHED;
54c349dbc7Sjsg break;
55c349dbc7Sjsg }
56c349dbc7Sjsg
57c349dbc7Sjsg return pte;
58c349dbc7Sjsg }
59c349dbc7Sjsg
gen12_pte_encode(dma_addr_t addr,unsigned int pat_index,u32 flags)60*f005ef32Sjsg static u64 gen12_pte_encode(dma_addr_t addr,
61*f005ef32Sjsg unsigned int pat_index,
62*f005ef32Sjsg u32 flags)
63*f005ef32Sjsg {
64*f005ef32Sjsg gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
65*f005ef32Sjsg
66*f005ef32Sjsg if (unlikely(flags & PTE_READ_ONLY))
67*f005ef32Sjsg pte &= ~GEN8_PAGE_RW;
68*f005ef32Sjsg
69*f005ef32Sjsg if (flags & PTE_LM)
70*f005ef32Sjsg pte |= GEN12_PPGTT_PTE_LM;
71*f005ef32Sjsg
72*f005ef32Sjsg if (pat_index & BIT(0))
73*f005ef32Sjsg pte |= GEN12_PPGTT_PTE_PAT0;
74*f005ef32Sjsg
75*f005ef32Sjsg if (pat_index & BIT(1))
76*f005ef32Sjsg pte |= GEN12_PPGTT_PTE_PAT1;
77*f005ef32Sjsg
78*f005ef32Sjsg if (pat_index & BIT(2))
79*f005ef32Sjsg pte |= GEN12_PPGTT_PTE_PAT2;
80*f005ef32Sjsg
81*f005ef32Sjsg if (pat_index & BIT(3))
82*f005ef32Sjsg pte |= MTL_PPGTT_PTE_PAT3;
83*f005ef32Sjsg
84*f005ef32Sjsg return pte;
85*f005ef32Sjsg }
86*f005ef32Sjsg
gen8_ppgtt_notify_vgt(struct i915_ppgtt * ppgtt,bool create)87c349dbc7Sjsg static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
88c349dbc7Sjsg {
89c349dbc7Sjsg struct drm_i915_private *i915 = ppgtt->vm.i915;
90c349dbc7Sjsg struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
91c349dbc7Sjsg enum vgt_g2v_type msg;
92c349dbc7Sjsg int i;
93c349dbc7Sjsg
94c349dbc7Sjsg if (create)
95c349dbc7Sjsg atomic_inc(px_used(ppgtt->pd)); /* never remove */
96c349dbc7Sjsg else
97c349dbc7Sjsg atomic_dec(px_used(ppgtt->pd));
98c349dbc7Sjsg
99c349dbc7Sjsg mutex_lock(&i915->vgpu.lock);
100c349dbc7Sjsg
101c349dbc7Sjsg if (i915_vm_is_4lvl(&ppgtt->vm)) {
102c349dbc7Sjsg const u64 daddr = px_dma(ppgtt->pd);
103c349dbc7Sjsg
104c349dbc7Sjsg intel_uncore_write(uncore,
105c349dbc7Sjsg vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
106c349dbc7Sjsg intel_uncore_write(uncore,
107c349dbc7Sjsg vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
108c349dbc7Sjsg
109c349dbc7Sjsg msg = create ?
110c349dbc7Sjsg VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
111c349dbc7Sjsg VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
112c349dbc7Sjsg } else {
113c349dbc7Sjsg for (i = 0; i < GEN8_3LVL_PDPES; i++) {
114c349dbc7Sjsg const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
115c349dbc7Sjsg
116c349dbc7Sjsg intel_uncore_write(uncore,
117c349dbc7Sjsg vgtif_reg(pdp[i].lo),
118c349dbc7Sjsg lower_32_bits(daddr));
119c349dbc7Sjsg intel_uncore_write(uncore,
120c349dbc7Sjsg vgtif_reg(pdp[i].hi),
121c349dbc7Sjsg upper_32_bits(daddr));
122c349dbc7Sjsg }
123c349dbc7Sjsg
124c349dbc7Sjsg msg = create ?
125c349dbc7Sjsg VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
126c349dbc7Sjsg VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
127c349dbc7Sjsg }
128c349dbc7Sjsg
129c349dbc7Sjsg /* g2v_notify atomically (via hv trap) consumes the message packet. */
130c349dbc7Sjsg intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
131c349dbc7Sjsg
132c349dbc7Sjsg mutex_unlock(&i915->vgpu.lock);
133c349dbc7Sjsg }
134c349dbc7Sjsg
135c349dbc7Sjsg /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
136c349dbc7Sjsg #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
137c349dbc7Sjsg #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
138c349dbc7Sjsg #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
139c349dbc7Sjsg #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
140c349dbc7Sjsg #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
141c349dbc7Sjsg #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
142c349dbc7Sjsg #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
143c349dbc7Sjsg
144c349dbc7Sjsg #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
145c349dbc7Sjsg
1465ca02815Sjsg static unsigned int
gen8_pd_range(u64 start,u64 end,int lvl,unsigned int * idx)147c349dbc7Sjsg gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
148c349dbc7Sjsg {
149c349dbc7Sjsg const int shift = gen8_pd_shift(lvl);
150c349dbc7Sjsg const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
151c349dbc7Sjsg
152c349dbc7Sjsg GEM_BUG_ON(start >= end);
153c349dbc7Sjsg end += ~mask >> gen8_pd_shift(1);
154c349dbc7Sjsg
155c349dbc7Sjsg *idx = i915_pde_index(start, shift);
156c349dbc7Sjsg if ((start ^ end) & mask)
157c349dbc7Sjsg return GEN8_PDES - *idx;
158c349dbc7Sjsg else
159c349dbc7Sjsg return i915_pde_index(end, shift) - *idx;
160c349dbc7Sjsg }
161c349dbc7Sjsg
gen8_pd_contains(u64 start,u64 end,int lvl)1625ca02815Sjsg static bool gen8_pd_contains(u64 start, u64 end, int lvl)
163c349dbc7Sjsg {
164c349dbc7Sjsg const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
165c349dbc7Sjsg
166c349dbc7Sjsg GEM_BUG_ON(start >= end);
167c349dbc7Sjsg return (start ^ end) & mask && (start & ~mask) == 0;
168c349dbc7Sjsg }
169c349dbc7Sjsg
gen8_pt_count(u64 start,u64 end)1705ca02815Sjsg static unsigned int gen8_pt_count(u64 start, u64 end)
171c349dbc7Sjsg {
172c349dbc7Sjsg GEM_BUG_ON(start >= end);
173c349dbc7Sjsg if ((start ^ end) >> gen8_pd_shift(1))
174c349dbc7Sjsg return GEN8_PDES - (start & (GEN8_PDES - 1));
175c349dbc7Sjsg else
176c349dbc7Sjsg return end - start;
177c349dbc7Sjsg }
178c349dbc7Sjsg
gen8_pd_top_count(const struct i915_address_space * vm)1795ca02815Sjsg static unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
180c349dbc7Sjsg {
181c349dbc7Sjsg unsigned int shift = __gen8_pte_shift(vm->top);
1825ca02815Sjsg
183c349dbc7Sjsg return (vm->total + (1ull << shift) - 1) >> shift;
184c349dbc7Sjsg }
185c349dbc7Sjsg
1865ca02815Sjsg static struct i915_page_directory *
gen8_pdp_for_page_index(struct i915_address_space * const vm,const u64 idx)187c349dbc7Sjsg gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
188c349dbc7Sjsg {
189c349dbc7Sjsg struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
190c349dbc7Sjsg
191c349dbc7Sjsg if (vm->top == 2)
192c349dbc7Sjsg return ppgtt->pd;
193c349dbc7Sjsg else
194c349dbc7Sjsg return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
195c349dbc7Sjsg }
196c349dbc7Sjsg
1975ca02815Sjsg static struct i915_page_directory *
gen8_pdp_for_page_address(struct i915_address_space * const vm,const u64 addr)198c349dbc7Sjsg gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
199c349dbc7Sjsg {
200c349dbc7Sjsg return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
201c349dbc7Sjsg }
202c349dbc7Sjsg
__gen8_ppgtt_cleanup(struct i915_address_space * vm,struct i915_page_directory * pd,int count,int lvl)203c349dbc7Sjsg static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
204c349dbc7Sjsg struct i915_page_directory *pd,
205c349dbc7Sjsg int count, int lvl)
206c349dbc7Sjsg {
207c349dbc7Sjsg if (lvl) {
208c349dbc7Sjsg void **pde = pd->entry;
209c349dbc7Sjsg
210c349dbc7Sjsg do {
211c349dbc7Sjsg if (!*pde)
212c349dbc7Sjsg continue;
213c349dbc7Sjsg
214c349dbc7Sjsg __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
215c349dbc7Sjsg } while (pde++, --count);
216c349dbc7Sjsg }
217c349dbc7Sjsg
218ad8b1aafSjsg free_px(vm, &pd->pt, lvl);
219c349dbc7Sjsg }
220c349dbc7Sjsg
gen8_ppgtt_cleanup(struct i915_address_space * vm)221c349dbc7Sjsg static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
222c349dbc7Sjsg {
223c349dbc7Sjsg struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
224c349dbc7Sjsg
225c349dbc7Sjsg if (intel_vgpu_active(vm->i915))
226c349dbc7Sjsg gen8_ppgtt_notify_vgt(ppgtt, false);
227c349dbc7Sjsg
2281bb76ff1Sjsg if (ppgtt->pd)
2291bb76ff1Sjsg __gen8_ppgtt_cleanup(vm, ppgtt->pd,
2301bb76ff1Sjsg gen8_pd_top_count(vm), vm->top);
2311bb76ff1Sjsg
232c349dbc7Sjsg free_scratch(vm);
233c349dbc7Sjsg }
234c349dbc7Sjsg
__gen8_ppgtt_clear(struct i915_address_space * const vm,struct i915_page_directory * const pd,u64 start,const u64 end,int lvl)235c349dbc7Sjsg static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
236c349dbc7Sjsg struct i915_page_directory * const pd,
237c349dbc7Sjsg u64 start, const u64 end, int lvl)
238c349dbc7Sjsg {
239ad8b1aafSjsg const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
240c349dbc7Sjsg unsigned int idx, len;
241c349dbc7Sjsg
242c349dbc7Sjsg GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
243c349dbc7Sjsg
244c349dbc7Sjsg len = gen8_pd_range(start, end, lvl--, &idx);
245c349dbc7Sjsg DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
246c349dbc7Sjsg __func__, vm, lvl + 1, start, end,
247c349dbc7Sjsg idx, len, atomic_read(px_used(pd)));
248c349dbc7Sjsg GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
249c349dbc7Sjsg
250c349dbc7Sjsg do {
251c349dbc7Sjsg struct i915_page_table *pt = pd->entry[idx];
252c349dbc7Sjsg
253c349dbc7Sjsg if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
254c349dbc7Sjsg gen8_pd_contains(start, end, lvl)) {
255c349dbc7Sjsg DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
256c349dbc7Sjsg __func__, vm, lvl + 1, idx, start, end);
257c349dbc7Sjsg clear_pd_entry(pd, idx, scratch);
258c349dbc7Sjsg __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
259c349dbc7Sjsg start += (u64)I915_PDES << gen8_pd_shift(lvl);
260c349dbc7Sjsg continue;
261c349dbc7Sjsg }
262c349dbc7Sjsg
263c349dbc7Sjsg if (lvl) {
264c349dbc7Sjsg start = __gen8_ppgtt_clear(vm, as_pd(pt),
265c349dbc7Sjsg start, end, lvl);
266c349dbc7Sjsg } else {
267c349dbc7Sjsg unsigned int count;
2681bb76ff1Sjsg unsigned int pte = gen8_pd_index(start, 0);
2691bb76ff1Sjsg unsigned int num_ptes;
270c349dbc7Sjsg u64 *vaddr;
271c349dbc7Sjsg
272c349dbc7Sjsg count = gen8_pt_count(start, end);
273c349dbc7Sjsg DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
274c349dbc7Sjsg __func__, vm, lvl, start, end,
275c349dbc7Sjsg gen8_pd_index(start, 0), count,
276c349dbc7Sjsg atomic_read(&pt->used));
277c349dbc7Sjsg GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
278c349dbc7Sjsg
2791bb76ff1Sjsg num_ptes = count;
2801bb76ff1Sjsg if (pt->is_compact) {
2811bb76ff1Sjsg GEM_BUG_ON(num_ptes % 16);
2821bb76ff1Sjsg GEM_BUG_ON(pte % 16);
2831bb76ff1Sjsg num_ptes /= 16;
2841bb76ff1Sjsg pte /= 16;
2851bb76ff1Sjsg }
2861bb76ff1Sjsg
2875ca02815Sjsg vaddr = px_vaddr(pt);
2881bb76ff1Sjsg memset64(vaddr + pte,
289ad8b1aafSjsg vm->scratch[0]->encode,
2901bb76ff1Sjsg num_ptes);
291c349dbc7Sjsg
292c349dbc7Sjsg atomic_sub(count, &pt->used);
293c349dbc7Sjsg start += count;
294c349dbc7Sjsg }
295c349dbc7Sjsg
296c349dbc7Sjsg if (release_pd_entry(pd, idx, pt, scratch))
297ad8b1aafSjsg free_px(vm, pt, lvl);
298c349dbc7Sjsg } while (idx++, --len);
299c349dbc7Sjsg
300c349dbc7Sjsg return start;
301c349dbc7Sjsg }
302c349dbc7Sjsg
gen8_ppgtt_clear(struct i915_address_space * vm,u64 start,u64 length)303c349dbc7Sjsg static void gen8_ppgtt_clear(struct i915_address_space *vm,
304c349dbc7Sjsg u64 start, u64 length)
305c349dbc7Sjsg {
306c349dbc7Sjsg GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
307c349dbc7Sjsg GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
308c349dbc7Sjsg GEM_BUG_ON(range_overflows(start, length, vm->total));
309c349dbc7Sjsg
310c349dbc7Sjsg start >>= GEN8_PTE_SHIFT;
311c349dbc7Sjsg length >>= GEN8_PTE_SHIFT;
312c349dbc7Sjsg GEM_BUG_ON(length == 0);
313c349dbc7Sjsg
314c349dbc7Sjsg __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
315c349dbc7Sjsg start, start + length, vm->top);
316c349dbc7Sjsg }
317c349dbc7Sjsg
__gen8_ppgtt_alloc(struct i915_address_space * const vm,struct i915_vm_pt_stash * stash,struct i915_page_directory * const pd,u64 * const start,const u64 end,int lvl)318ad8b1aafSjsg static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
319ad8b1aafSjsg struct i915_vm_pt_stash *stash,
320c349dbc7Sjsg struct i915_page_directory * const pd,
321c349dbc7Sjsg u64 * const start, const u64 end, int lvl)
322c349dbc7Sjsg {
323c349dbc7Sjsg unsigned int idx, len;
324c349dbc7Sjsg
325c349dbc7Sjsg GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
326c349dbc7Sjsg
327c349dbc7Sjsg len = gen8_pd_range(*start, end, lvl--, &idx);
328c349dbc7Sjsg DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
329c349dbc7Sjsg __func__, vm, lvl + 1, *start, end,
330c349dbc7Sjsg idx, len, atomic_read(px_used(pd)));
331c349dbc7Sjsg GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
332c349dbc7Sjsg
333c349dbc7Sjsg spin_lock(&pd->lock);
334c349dbc7Sjsg GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
335c349dbc7Sjsg do {
336c349dbc7Sjsg struct i915_page_table *pt = pd->entry[idx];
337c349dbc7Sjsg
338c349dbc7Sjsg if (!pt) {
339c349dbc7Sjsg spin_unlock(&pd->lock);
340c349dbc7Sjsg
341c349dbc7Sjsg DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
342c349dbc7Sjsg __func__, vm, lvl + 1, idx);
343c349dbc7Sjsg
344ad8b1aafSjsg pt = stash->pt[!!lvl];
345ad8b1aafSjsg __i915_gem_object_pin_pages(pt->base);
346c349dbc7Sjsg
347ad8b1aafSjsg fill_px(pt, vm->scratch[lvl]->encode);
348c349dbc7Sjsg
349c349dbc7Sjsg spin_lock(&pd->lock);
350ad8b1aafSjsg if (likely(!pd->entry[idx])) {
351ad8b1aafSjsg stash->pt[!!lvl] = pt->stash;
352ad8b1aafSjsg atomic_set(&pt->used, 0);
353c349dbc7Sjsg set_pd_entry(pd, idx, pt);
354ad8b1aafSjsg } else {
355ad8b1aafSjsg pt = pd->entry[idx];
356ad8b1aafSjsg }
357c349dbc7Sjsg }
358c349dbc7Sjsg
359c349dbc7Sjsg if (lvl) {
360c349dbc7Sjsg atomic_inc(&pt->used);
361c349dbc7Sjsg spin_unlock(&pd->lock);
362c349dbc7Sjsg
363ad8b1aafSjsg __gen8_ppgtt_alloc(vm, stash,
364ad8b1aafSjsg as_pd(pt), start, end, lvl);
365c349dbc7Sjsg
366c349dbc7Sjsg spin_lock(&pd->lock);
367c349dbc7Sjsg atomic_dec(&pt->used);
368c349dbc7Sjsg GEM_BUG_ON(!atomic_read(&pt->used));
369c349dbc7Sjsg } else {
370c349dbc7Sjsg unsigned int count = gen8_pt_count(*start, end);
371c349dbc7Sjsg
372c349dbc7Sjsg DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
373c349dbc7Sjsg __func__, vm, lvl, *start, end,
374c349dbc7Sjsg gen8_pd_index(*start, 0), count,
375c349dbc7Sjsg atomic_read(&pt->used));
376c349dbc7Sjsg
377c349dbc7Sjsg atomic_add(count, &pt->used);
378c349dbc7Sjsg /* All other pdes may be simultaneously removed */
379c349dbc7Sjsg GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
380c349dbc7Sjsg *start += count;
381c349dbc7Sjsg }
382c349dbc7Sjsg } while (idx++, --len);
383c349dbc7Sjsg spin_unlock(&pd->lock);
384c349dbc7Sjsg }
385c349dbc7Sjsg
gen8_ppgtt_alloc(struct i915_address_space * vm,struct i915_vm_pt_stash * stash,u64 start,u64 length)386ad8b1aafSjsg static void gen8_ppgtt_alloc(struct i915_address_space *vm,
387ad8b1aafSjsg struct i915_vm_pt_stash *stash,
388c349dbc7Sjsg u64 start, u64 length)
389c349dbc7Sjsg {
390c349dbc7Sjsg GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
391c349dbc7Sjsg GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
392c349dbc7Sjsg GEM_BUG_ON(range_overflows(start, length, vm->total));
393c349dbc7Sjsg
394c349dbc7Sjsg start >>= GEN8_PTE_SHIFT;
395c349dbc7Sjsg length >>= GEN8_PTE_SHIFT;
396c349dbc7Sjsg GEM_BUG_ON(length == 0);
397c349dbc7Sjsg
398ad8b1aafSjsg __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
399c349dbc7Sjsg &start, start + length, vm->top);
400c349dbc7Sjsg }
401c349dbc7Sjsg
__gen8_ppgtt_foreach(struct i915_address_space * vm,struct i915_page_directory * pd,u64 * start,u64 end,int lvl,void (* fn)(struct i915_address_space * vm,struct i915_page_table * pt,void * data),void * data)4025ca02815Sjsg static void __gen8_ppgtt_foreach(struct i915_address_space *vm,
4035ca02815Sjsg struct i915_page_directory *pd,
4045ca02815Sjsg u64 *start, u64 end, int lvl,
4055ca02815Sjsg void (*fn)(struct i915_address_space *vm,
4065ca02815Sjsg struct i915_page_table *pt,
4075ca02815Sjsg void *data),
4085ca02815Sjsg void *data)
4095ca02815Sjsg {
4105ca02815Sjsg unsigned int idx, len;
4115ca02815Sjsg
4125ca02815Sjsg len = gen8_pd_range(*start, end, lvl--, &idx);
4135ca02815Sjsg
4145ca02815Sjsg spin_lock(&pd->lock);
4155ca02815Sjsg do {
4165ca02815Sjsg struct i915_page_table *pt = pd->entry[idx];
4175ca02815Sjsg
4185ca02815Sjsg atomic_inc(&pt->used);
4195ca02815Sjsg spin_unlock(&pd->lock);
4205ca02815Sjsg
4215ca02815Sjsg if (lvl) {
4225ca02815Sjsg __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl,
4235ca02815Sjsg fn, data);
4245ca02815Sjsg } else {
4255ca02815Sjsg fn(vm, pt, data);
4265ca02815Sjsg *start += gen8_pt_count(*start, end);
4275ca02815Sjsg }
4285ca02815Sjsg
4295ca02815Sjsg spin_lock(&pd->lock);
4305ca02815Sjsg atomic_dec(&pt->used);
4315ca02815Sjsg } while (idx++, --len);
4325ca02815Sjsg spin_unlock(&pd->lock);
4335ca02815Sjsg }
4345ca02815Sjsg
gen8_ppgtt_foreach(struct i915_address_space * vm,u64 start,u64 length,void (* fn)(struct i915_address_space * vm,struct i915_page_table * pt,void * data),void * data)4355ca02815Sjsg static void gen8_ppgtt_foreach(struct i915_address_space *vm,
4365ca02815Sjsg u64 start, u64 length,
4375ca02815Sjsg void (*fn)(struct i915_address_space *vm,
4385ca02815Sjsg struct i915_page_table *pt,
4395ca02815Sjsg void *data),
4405ca02815Sjsg void *data)
4415ca02815Sjsg {
4425ca02815Sjsg start >>= GEN8_PTE_SHIFT;
4435ca02815Sjsg length >>= GEN8_PTE_SHIFT;
4445ca02815Sjsg
4455ca02815Sjsg __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd,
4465ca02815Sjsg &start, start + length, vm->top,
4475ca02815Sjsg fn, data);
4485ca02815Sjsg }
4495ca02815Sjsg
450c349dbc7Sjsg static __always_inline u64
gen8_ppgtt_insert_pte(struct i915_ppgtt * ppgtt,struct i915_page_directory * pdp,struct sgt_dma * iter,u64 idx,unsigned int pat_index,u32 flags)451c349dbc7Sjsg gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
452c349dbc7Sjsg struct i915_page_directory *pdp,
453c349dbc7Sjsg struct sgt_dma *iter,
454c349dbc7Sjsg u64 idx,
455*f005ef32Sjsg unsigned int pat_index,
456c349dbc7Sjsg u32 flags)
457c349dbc7Sjsg {
458c349dbc7Sjsg struct i915_page_directory *pd;
459*f005ef32Sjsg const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags);
460c349dbc7Sjsg gen8_pte_t *vaddr;
461c349dbc7Sjsg
462c349dbc7Sjsg pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
4635ca02815Sjsg vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
464c349dbc7Sjsg do {
4655ca02815Sjsg GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
466c349dbc7Sjsg vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
467c349dbc7Sjsg
468c349dbc7Sjsg iter->dma += I915_GTT_PAGE_SIZE;
469c349dbc7Sjsg if (iter->dma >= iter->max) {
470c349dbc7Sjsg iter->sg = __sg_next(iter->sg);
4715ca02815Sjsg if (!iter->sg || sg_dma_len(iter->sg) == 0) {
472c349dbc7Sjsg idx = 0;
473c349dbc7Sjsg break;
474c349dbc7Sjsg }
475c349dbc7Sjsg
476c349dbc7Sjsg iter->dma = sg_dma_address(iter->sg);
4775ca02815Sjsg iter->max = iter->dma + sg_dma_len(iter->sg);
478c349dbc7Sjsg }
479c349dbc7Sjsg
480c349dbc7Sjsg if (gen8_pd_index(++idx, 0) == 0) {
481c349dbc7Sjsg if (gen8_pd_index(idx, 1) == 0) {
482c349dbc7Sjsg /* Limited by sg length for 3lvl */
483c349dbc7Sjsg if (gen8_pd_index(idx, 2) == 0)
484c349dbc7Sjsg break;
485c349dbc7Sjsg
486c349dbc7Sjsg pd = pdp->entry[gen8_pd_index(idx, 2)];
487c349dbc7Sjsg }
488c349dbc7Sjsg
4891bb76ff1Sjsg drm_clflush_virt_range(vaddr, PAGE_SIZE);
4905ca02815Sjsg vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
491c349dbc7Sjsg }
492c349dbc7Sjsg } while (1);
4931bb76ff1Sjsg drm_clflush_virt_range(vaddr, PAGE_SIZE);
494c349dbc7Sjsg
495c349dbc7Sjsg return idx;
496c349dbc7Sjsg }
497c349dbc7Sjsg
4981bb76ff1Sjsg static void
xehpsdv_ppgtt_insert_huge(struct i915_address_space * vm,struct i915_vma_resource * vma_res,struct sgt_dma * iter,unsigned int pat_index,u32 flags)4991bb76ff1Sjsg xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
5001bb76ff1Sjsg struct i915_vma_resource *vma_res,
5011bb76ff1Sjsg struct sgt_dma *iter,
502*f005ef32Sjsg unsigned int pat_index,
5031bb76ff1Sjsg u32 flags)
5041bb76ff1Sjsg {
505*f005ef32Sjsg const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
5061bb76ff1Sjsg unsigned int rem = sg_dma_len(iter->sg);
5071bb76ff1Sjsg u64 start = vma_res->start;
508*f005ef32Sjsg u64 end = start + vma_res->vma_size;
5091bb76ff1Sjsg
5101bb76ff1Sjsg GEM_BUG_ON(!i915_vm_is_4lvl(vm));
5111bb76ff1Sjsg
5121bb76ff1Sjsg do {
5131bb76ff1Sjsg struct i915_page_directory * const pdp =
5141bb76ff1Sjsg gen8_pdp_for_page_address(vm, start);
5151bb76ff1Sjsg struct i915_page_directory * const pd =
5161bb76ff1Sjsg i915_pd_entry(pdp, __gen8_pte_index(start, 2));
5171bb76ff1Sjsg struct i915_page_table *pt =
5181bb76ff1Sjsg i915_pt_entry(pd, __gen8_pte_index(start, 1));
5191bb76ff1Sjsg gen8_pte_t encode = pte_encode;
5201bb76ff1Sjsg unsigned int page_size;
5211bb76ff1Sjsg gen8_pte_t *vaddr;
522*f005ef32Sjsg u16 index, max, nent, i;
5231bb76ff1Sjsg
5241bb76ff1Sjsg max = I915_PDES;
525*f005ef32Sjsg nent = 1;
5261bb76ff1Sjsg
5271bb76ff1Sjsg if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
5281bb76ff1Sjsg IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
5291bb76ff1Sjsg rem >= I915_GTT_PAGE_SIZE_2M &&
5301bb76ff1Sjsg !__gen8_pte_index(start, 0)) {
5311bb76ff1Sjsg index = __gen8_pte_index(start, 1);
5321bb76ff1Sjsg encode |= GEN8_PDE_PS_2M;
5331bb76ff1Sjsg page_size = I915_GTT_PAGE_SIZE_2M;
5341bb76ff1Sjsg
5351bb76ff1Sjsg vaddr = px_vaddr(pd);
5361bb76ff1Sjsg } else {
537*f005ef32Sjsg index = __gen8_pte_index(start, 0);
538*f005ef32Sjsg page_size = I915_GTT_PAGE_SIZE;
5391bb76ff1Sjsg
540*f005ef32Sjsg if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
541*f005ef32Sjsg /*
542*f005ef32Sjsg * Device local-memory on these platforms should
543*f005ef32Sjsg * always use 64K pages or larger (including GTT
544*f005ef32Sjsg * alignment), therefore if we know the whole
545*f005ef32Sjsg * page-table needs to be filled we can always
546*f005ef32Sjsg * safely use the compact-layout. Otherwise fall
547*f005ef32Sjsg * back to the TLB hint with PS64. If this is
548*f005ef32Sjsg * system memory we only bother with PS64.
549*f005ef32Sjsg */
550*f005ef32Sjsg if ((encode & GEN12_PPGTT_PTE_LM) &&
551*f005ef32Sjsg end - start >= SZ_2M && !index) {
5521bb76ff1Sjsg index = __gen8_pte_index(start, 0) / 16;
5531bb76ff1Sjsg page_size = I915_GTT_PAGE_SIZE_64K;
5541bb76ff1Sjsg
5551bb76ff1Sjsg max /= 16;
5561bb76ff1Sjsg
5571bb76ff1Sjsg vaddr = px_vaddr(pd);
5581bb76ff1Sjsg vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
5591bb76ff1Sjsg
5601bb76ff1Sjsg pt->is_compact = true;
561*f005ef32Sjsg } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
562*f005ef32Sjsg rem >= I915_GTT_PAGE_SIZE_64K &&
563*f005ef32Sjsg !(index % 16)) {
564*f005ef32Sjsg encode |= GEN12_PTE_PS64;
565*f005ef32Sjsg page_size = I915_GTT_PAGE_SIZE_64K;
566*f005ef32Sjsg nent = 16;
567*f005ef32Sjsg }
5681bb76ff1Sjsg }
5691bb76ff1Sjsg
5701bb76ff1Sjsg vaddr = px_vaddr(pt);
5711bb76ff1Sjsg }
5721bb76ff1Sjsg
5731bb76ff1Sjsg do {
5741bb76ff1Sjsg GEM_BUG_ON(rem < page_size);
575*f005ef32Sjsg
576*f005ef32Sjsg for (i = 0; i < nent; i++) {
577*f005ef32Sjsg vaddr[index++] =
578*f005ef32Sjsg encode | (iter->dma + i *
579*f005ef32Sjsg I915_GTT_PAGE_SIZE);
580*f005ef32Sjsg }
5811bb76ff1Sjsg
5821bb76ff1Sjsg start += page_size;
5831bb76ff1Sjsg iter->dma += page_size;
5841bb76ff1Sjsg rem -= page_size;
5851bb76ff1Sjsg if (iter->dma >= iter->max) {
5861bb76ff1Sjsg iter->sg = __sg_next(iter->sg);
5871bb76ff1Sjsg if (!iter->sg)
5881bb76ff1Sjsg break;
5891bb76ff1Sjsg
5901bb76ff1Sjsg rem = sg_dma_len(iter->sg);
5911bb76ff1Sjsg if (!rem)
5921bb76ff1Sjsg break;
5931bb76ff1Sjsg
5941bb76ff1Sjsg iter->dma = sg_dma_address(iter->sg);
5951bb76ff1Sjsg iter->max = iter->dma + rem;
5961bb76ff1Sjsg
5971bb76ff1Sjsg if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
5981bb76ff1Sjsg break;
5991bb76ff1Sjsg }
6001bb76ff1Sjsg } while (rem >= page_size && index < max);
6011bb76ff1Sjsg
602*f005ef32Sjsg drm_clflush_virt_range(vaddr, PAGE_SIZE);
6031bb76ff1Sjsg vma_res->page_sizes_gtt |= page_size;
6041bb76ff1Sjsg } while (iter->sg && sg_dma_len(iter->sg));
6051bb76ff1Sjsg }
6061bb76ff1Sjsg
gen8_ppgtt_insert_huge(struct i915_address_space * vm,struct i915_vma_resource * vma_res,struct sgt_dma * iter,unsigned int pat_index,u32 flags)6071bb76ff1Sjsg static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
6081bb76ff1Sjsg struct i915_vma_resource *vma_res,
609c349dbc7Sjsg struct sgt_dma *iter,
610*f005ef32Sjsg unsigned int pat_index,
611c349dbc7Sjsg u32 flags)
612c349dbc7Sjsg {
613*f005ef32Sjsg const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
6145ca02815Sjsg unsigned int rem = sg_dma_len(iter->sg);
6151bb76ff1Sjsg u64 start = vma_res->start;
616c349dbc7Sjsg
6171bb76ff1Sjsg GEM_BUG_ON(!i915_vm_is_4lvl(vm));
618c349dbc7Sjsg
619c349dbc7Sjsg do {
620c349dbc7Sjsg struct i915_page_directory * const pdp =
6211bb76ff1Sjsg gen8_pdp_for_page_address(vm, start);
622c349dbc7Sjsg struct i915_page_directory * const pd =
623c349dbc7Sjsg i915_pd_entry(pdp, __gen8_pte_index(start, 2));
624c349dbc7Sjsg gen8_pte_t encode = pte_encode;
625c349dbc7Sjsg unsigned int maybe_64K = -1;
626c349dbc7Sjsg unsigned int page_size;
627c349dbc7Sjsg gen8_pte_t *vaddr;
628c349dbc7Sjsg u16 index;
629c349dbc7Sjsg
6301bb76ff1Sjsg if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
631c349dbc7Sjsg IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
632c349dbc7Sjsg rem >= I915_GTT_PAGE_SIZE_2M &&
633c349dbc7Sjsg !__gen8_pte_index(start, 0)) {
634c349dbc7Sjsg index = __gen8_pte_index(start, 1);
635c349dbc7Sjsg encode |= GEN8_PDE_PS_2M;
636c349dbc7Sjsg page_size = I915_GTT_PAGE_SIZE_2M;
637c349dbc7Sjsg
6385ca02815Sjsg vaddr = px_vaddr(pd);
639c349dbc7Sjsg } else {
640c349dbc7Sjsg struct i915_page_table *pt =
641c349dbc7Sjsg i915_pt_entry(pd, __gen8_pte_index(start, 1));
642c349dbc7Sjsg
643c349dbc7Sjsg index = __gen8_pte_index(start, 0);
644c349dbc7Sjsg page_size = I915_GTT_PAGE_SIZE;
645c349dbc7Sjsg
646c349dbc7Sjsg if (!index &&
6471bb76ff1Sjsg vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
648c349dbc7Sjsg IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
649c349dbc7Sjsg (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
650c349dbc7Sjsg rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
651c349dbc7Sjsg maybe_64K = __gen8_pte_index(start, 1);
652c349dbc7Sjsg
6535ca02815Sjsg vaddr = px_vaddr(pt);
654c349dbc7Sjsg }
655c349dbc7Sjsg
656c349dbc7Sjsg do {
6575ca02815Sjsg GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
658c349dbc7Sjsg vaddr[index++] = encode | iter->dma;
659c349dbc7Sjsg
660c349dbc7Sjsg start += page_size;
661c349dbc7Sjsg iter->dma += page_size;
662c349dbc7Sjsg rem -= page_size;
663c349dbc7Sjsg if (iter->dma >= iter->max) {
664c349dbc7Sjsg iter->sg = __sg_next(iter->sg);
665c349dbc7Sjsg if (!iter->sg)
666c349dbc7Sjsg break;
667c349dbc7Sjsg
6685ca02815Sjsg rem = sg_dma_len(iter->sg);
6695ca02815Sjsg if (!rem)
6705ca02815Sjsg break;
6715ca02815Sjsg
672c349dbc7Sjsg iter->dma = sg_dma_address(iter->sg);
673c349dbc7Sjsg iter->max = iter->dma + rem;
674c349dbc7Sjsg
675c349dbc7Sjsg if (maybe_64K != -1 && index < I915_PDES &&
676c349dbc7Sjsg !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
677c349dbc7Sjsg (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
678c349dbc7Sjsg rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
679c349dbc7Sjsg maybe_64K = -1;
680c349dbc7Sjsg
681c349dbc7Sjsg if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
682c349dbc7Sjsg break;
683c349dbc7Sjsg }
684c349dbc7Sjsg } while (rem >= page_size && index < I915_PDES);
685c349dbc7Sjsg
6861bb76ff1Sjsg drm_clflush_virt_range(vaddr, PAGE_SIZE);
687c349dbc7Sjsg
688c349dbc7Sjsg /*
689c349dbc7Sjsg * Is it safe to mark the 2M block as 64K? -- Either we have
690c349dbc7Sjsg * filled whole page-table with 64K entries, or filled part of
691c349dbc7Sjsg * it and have reached the end of the sg table and we have
692c349dbc7Sjsg * enough padding.
693c349dbc7Sjsg */
694c349dbc7Sjsg if (maybe_64K != -1 &&
695c349dbc7Sjsg (index == I915_PDES ||
6961bb76ff1Sjsg (i915_vm_has_scratch_64K(vm) &&
6971bb76ff1Sjsg !iter->sg && IS_ALIGNED(vma_res->start +
6981bb76ff1Sjsg vma_res->node_size,
699c349dbc7Sjsg I915_GTT_PAGE_SIZE_2M)))) {
7005ca02815Sjsg vaddr = px_vaddr(pd);
701c349dbc7Sjsg vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
7021bb76ff1Sjsg drm_clflush_virt_range(vaddr, PAGE_SIZE);
703c349dbc7Sjsg page_size = I915_GTT_PAGE_SIZE_64K;
704c349dbc7Sjsg
705c349dbc7Sjsg /*
706c349dbc7Sjsg * We write all 4K page entries, even when using 64K
707c349dbc7Sjsg * pages. In order to verify that the HW isn't cheating
708c349dbc7Sjsg * by using the 4K PTE instead of the 64K PTE, we want
709c349dbc7Sjsg * to remove all the surplus entries. If the HW skipped
710c349dbc7Sjsg * the 64K PTE, it will read/write into the scratch page
711c349dbc7Sjsg * instead - which we detect as missing results during
712c349dbc7Sjsg * selftests.
713c349dbc7Sjsg */
7141bb76ff1Sjsg if (I915_SELFTEST_ONLY(vm->scrub_64K)) {
715c349dbc7Sjsg u16 i;
716c349dbc7Sjsg
7171bb76ff1Sjsg encode = vm->scratch[0]->encode;
7185ca02815Sjsg vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));
719c349dbc7Sjsg
720c349dbc7Sjsg for (i = 1; i < index; i += 16)
721c349dbc7Sjsg memset64(vaddr + i, encode, 15);
722c349dbc7Sjsg
7231bb76ff1Sjsg drm_clflush_virt_range(vaddr, PAGE_SIZE);
724c349dbc7Sjsg }
725c349dbc7Sjsg }
726c349dbc7Sjsg
7271bb76ff1Sjsg vma_res->page_sizes_gtt |= page_size;
7285ca02815Sjsg } while (iter->sg && sg_dma_len(iter->sg));
729c349dbc7Sjsg }
730c349dbc7Sjsg
gen8_ppgtt_insert(struct i915_address_space * vm,struct i915_vma_resource * vma_res,unsigned int pat_index,u32 flags)731c349dbc7Sjsg static void gen8_ppgtt_insert(struct i915_address_space *vm,
7321bb76ff1Sjsg struct i915_vma_resource *vma_res,
733*f005ef32Sjsg unsigned int pat_index,
734c349dbc7Sjsg u32 flags)
735c349dbc7Sjsg {
736c349dbc7Sjsg struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
7371bb76ff1Sjsg struct sgt_dma iter = sgt_dma(vma_res);
738c349dbc7Sjsg
7391bb76ff1Sjsg if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
740*f005ef32Sjsg if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50))
741*f005ef32Sjsg xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
7421bb76ff1Sjsg else
743*f005ef32Sjsg gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
744c349dbc7Sjsg } else {
7451bb76ff1Sjsg u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
746c349dbc7Sjsg
747c349dbc7Sjsg do {
748c349dbc7Sjsg struct i915_page_directory * const pdp =
749c349dbc7Sjsg gen8_pdp_for_page_index(vm, idx);
750c349dbc7Sjsg
751c349dbc7Sjsg idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
752*f005ef32Sjsg pat_index, flags);
753c349dbc7Sjsg } while (idx);
754c349dbc7Sjsg
7551bb76ff1Sjsg vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
756c349dbc7Sjsg }
757c349dbc7Sjsg }
758c349dbc7Sjsg
gen8_ppgtt_insert_entry(struct i915_address_space * vm,dma_addr_t addr,u64 offset,unsigned int pat_index,u32 flags)7595ca02815Sjsg static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
7605ca02815Sjsg dma_addr_t addr,
7615ca02815Sjsg u64 offset,
762*f005ef32Sjsg unsigned int pat_index,
7635ca02815Sjsg u32 flags)
7645ca02815Sjsg {
7655ca02815Sjsg u64 idx = offset >> GEN8_PTE_SHIFT;
7665ca02815Sjsg struct i915_page_directory * const pdp =
7675ca02815Sjsg gen8_pdp_for_page_index(vm, idx);
7685ca02815Sjsg struct i915_page_directory *pd =
7695ca02815Sjsg i915_pd_entry(pdp, gen8_pd_index(idx, 2));
7701bb76ff1Sjsg struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
7715ca02815Sjsg gen8_pte_t *vaddr;
7725ca02815Sjsg
7731bb76ff1Sjsg GEM_BUG_ON(pt->is_compact);
7741bb76ff1Sjsg
7751bb76ff1Sjsg vaddr = px_vaddr(pt);
776*f005ef32Sjsg vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags);
7771bb76ff1Sjsg drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
7781bb76ff1Sjsg }
7791bb76ff1Sjsg
__xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space * vm,dma_addr_t addr,u64 offset,unsigned int pat_index,u32 flags)7801bb76ff1Sjsg static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
7811bb76ff1Sjsg dma_addr_t addr,
7821bb76ff1Sjsg u64 offset,
783*f005ef32Sjsg unsigned int pat_index,
7841bb76ff1Sjsg u32 flags)
7851bb76ff1Sjsg {
7861bb76ff1Sjsg u64 idx = offset >> GEN8_PTE_SHIFT;
7871bb76ff1Sjsg struct i915_page_directory * const pdp =
7881bb76ff1Sjsg gen8_pdp_for_page_index(vm, idx);
7891bb76ff1Sjsg struct i915_page_directory *pd =
7901bb76ff1Sjsg i915_pd_entry(pdp, gen8_pd_index(idx, 2));
7911bb76ff1Sjsg struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
7921bb76ff1Sjsg gen8_pte_t *vaddr;
7931bb76ff1Sjsg
7941bb76ff1Sjsg GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
7951bb76ff1Sjsg GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
7961bb76ff1Sjsg
797*f005ef32Sjsg /* XXX: we don't strictly need to use this layout */
798*f005ef32Sjsg
7991bb76ff1Sjsg if (!pt->is_compact) {
8001bb76ff1Sjsg vaddr = px_vaddr(pd);
8011bb76ff1Sjsg vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
8021bb76ff1Sjsg pt->is_compact = true;
8031bb76ff1Sjsg }
8041bb76ff1Sjsg
8051bb76ff1Sjsg vaddr = px_vaddr(pt);
806*f005ef32Sjsg vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags);
8071bb76ff1Sjsg }
8081bb76ff1Sjsg
xehpsdv_ppgtt_insert_entry(struct i915_address_space * vm,dma_addr_t addr,u64 offset,unsigned int pat_index,u32 flags)8091bb76ff1Sjsg static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
8101bb76ff1Sjsg dma_addr_t addr,
8111bb76ff1Sjsg u64 offset,
812*f005ef32Sjsg unsigned int pat_index,
8131bb76ff1Sjsg u32 flags)
8141bb76ff1Sjsg {
8151bb76ff1Sjsg if (flags & PTE_LM)
8161bb76ff1Sjsg return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
817*f005ef32Sjsg pat_index, flags);
8181bb76ff1Sjsg
819*f005ef32Sjsg return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags);
8205ca02815Sjsg }
8215ca02815Sjsg
gen8_init_scratch(struct i915_address_space * vm)822c349dbc7Sjsg static int gen8_init_scratch(struct i915_address_space *vm)
823c349dbc7Sjsg {
8245ca02815Sjsg u32 pte_flags;
825c349dbc7Sjsg int ret;
826c349dbc7Sjsg int i;
827c349dbc7Sjsg
828c349dbc7Sjsg /*
829c349dbc7Sjsg * If everybody agrees to not to write into the scratch page,
830c349dbc7Sjsg * we can reuse it for all vm, keeping contexts and processes separate.
831c349dbc7Sjsg */
832c349dbc7Sjsg if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
833c349dbc7Sjsg struct i915_address_space *clone = vm->gt->vm;
834c349dbc7Sjsg
835c349dbc7Sjsg GEM_BUG_ON(!clone->has_read_only);
836c349dbc7Sjsg
837c349dbc7Sjsg vm->scratch_order = clone->scratch_order;
838ad8b1aafSjsg for (i = 0; i <= vm->top; i++)
839ad8b1aafSjsg vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
840ad8b1aafSjsg
841c349dbc7Sjsg return 0;
842c349dbc7Sjsg }
843c349dbc7Sjsg
844ad8b1aafSjsg ret = setup_scratch_page(vm);
845c349dbc7Sjsg if (ret)
846c349dbc7Sjsg return ret;
847c349dbc7Sjsg
8485ca02815Sjsg pte_flags = vm->has_read_only;
8495ca02815Sjsg if (i915_gem_object_is_lmem(vm->scratch[0]))
8505ca02815Sjsg pte_flags |= PTE_LM;
8515ca02815Sjsg
852ad8b1aafSjsg vm->scratch[0]->encode =
853*f005ef32Sjsg vm->pte_encode(px_dma(vm->scratch[0]),
854*f005ef32Sjsg i915_gem_get_pat_index(vm->i915,
855*f005ef32Sjsg I915_CACHE_NONE),
856*f005ef32Sjsg pte_flags);
857c349dbc7Sjsg
858c349dbc7Sjsg for (i = 1; i <= vm->top; i++) {
859ad8b1aafSjsg struct drm_i915_gem_object *obj;
860ad8b1aafSjsg
861ad8b1aafSjsg obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
8621bb76ff1Sjsg if (IS_ERR(obj)) {
8631bb76ff1Sjsg ret = PTR_ERR(obj);
864c349dbc7Sjsg goto free_scratch;
8651bb76ff1Sjsg }
866c349dbc7Sjsg
8675ca02815Sjsg ret = map_pt_dma(vm, obj);
868ad8b1aafSjsg if (ret) {
869ad8b1aafSjsg i915_gem_object_put(obj);
870ad8b1aafSjsg goto free_scratch;
871ad8b1aafSjsg }
872ad8b1aafSjsg
873ad8b1aafSjsg fill_px(obj, vm->scratch[i - 1]->encode);
8741bb76ff1Sjsg obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE);
875ad8b1aafSjsg
876ad8b1aafSjsg vm->scratch[i] = obj;
877c349dbc7Sjsg }
878c349dbc7Sjsg
879c349dbc7Sjsg return 0;
880c349dbc7Sjsg
881c349dbc7Sjsg free_scratch:
882ad8b1aafSjsg while (i--)
883ad8b1aafSjsg i915_gem_object_put(vm->scratch[i]);
8841bb76ff1Sjsg vm->scratch[0] = NULL;
8851bb76ff1Sjsg return ret;
886c349dbc7Sjsg }
887c349dbc7Sjsg
gen8_preallocate_top_level_pdp(struct i915_ppgtt * ppgtt)888c349dbc7Sjsg static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
889c349dbc7Sjsg {
890c349dbc7Sjsg struct i915_address_space *vm = &ppgtt->vm;
891c349dbc7Sjsg struct i915_page_directory *pd = ppgtt->pd;
892c349dbc7Sjsg unsigned int idx;
893c349dbc7Sjsg
894c349dbc7Sjsg GEM_BUG_ON(vm->top != 2);
895c349dbc7Sjsg GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
896c349dbc7Sjsg
897c349dbc7Sjsg for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
898c349dbc7Sjsg struct i915_page_directory *pde;
899ad8b1aafSjsg int err;
900c349dbc7Sjsg
901c349dbc7Sjsg pde = alloc_pd(vm);
902c349dbc7Sjsg if (IS_ERR(pde))
903c349dbc7Sjsg return PTR_ERR(pde);
904c349dbc7Sjsg
9055ca02815Sjsg err = map_pt_dma(vm, pde->pt.base);
906ad8b1aafSjsg if (err) {
907ad8b1aafSjsg free_pd(vm, pde);
908ad8b1aafSjsg return err;
909ad8b1aafSjsg }
910ad8b1aafSjsg
911ad8b1aafSjsg fill_px(pde, vm->scratch[1]->encode);
912c349dbc7Sjsg set_pd_entry(pd, idx, pde);
913c349dbc7Sjsg atomic_inc(px_used(pde)); /* keep pinned */
914c349dbc7Sjsg }
915c349dbc7Sjsg wmb();
916c349dbc7Sjsg
917c349dbc7Sjsg return 0;
918c349dbc7Sjsg }
919c349dbc7Sjsg
920c349dbc7Sjsg static struct i915_page_directory *
gen8_alloc_top_pd(struct i915_address_space * vm)921c349dbc7Sjsg gen8_alloc_top_pd(struct i915_address_space *vm)
922c349dbc7Sjsg {
923c349dbc7Sjsg const unsigned int count = gen8_pd_top_count(vm);
924c349dbc7Sjsg struct i915_page_directory *pd;
925ad8b1aafSjsg int err;
926c349dbc7Sjsg
927ad8b1aafSjsg GEM_BUG_ON(count > I915_PDES);
928c349dbc7Sjsg
929ad8b1aafSjsg pd = __alloc_pd(count);
930c349dbc7Sjsg if (unlikely(!pd))
931c349dbc7Sjsg return ERR_PTR(-ENOMEM);
932c349dbc7Sjsg
933ad8b1aafSjsg pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
934ad8b1aafSjsg if (IS_ERR(pd->pt.base)) {
935ad8b1aafSjsg err = PTR_ERR(pd->pt.base);
936ad8b1aafSjsg pd->pt.base = NULL;
937ad8b1aafSjsg goto err_pd;
938c349dbc7Sjsg }
939c349dbc7Sjsg
9405ca02815Sjsg err = map_pt_dma(vm, pd->pt.base);
941ad8b1aafSjsg if (err)
942ad8b1aafSjsg goto err_pd;
943ad8b1aafSjsg
944ad8b1aafSjsg fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
945c349dbc7Sjsg atomic_inc(px_used(pd)); /* mark as pinned */
946c349dbc7Sjsg return pd;
947ad8b1aafSjsg
948ad8b1aafSjsg err_pd:
949ad8b1aafSjsg free_pd(vm, pd);
950ad8b1aafSjsg return ERR_PTR(err);
951c349dbc7Sjsg }
952c349dbc7Sjsg
953c349dbc7Sjsg /*
954c349dbc7Sjsg * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
955c349dbc7Sjsg * with a net effect resembling a 2-level page table in normal x86 terms. Each
956c349dbc7Sjsg * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
957c349dbc7Sjsg * space.
958c349dbc7Sjsg *
959c349dbc7Sjsg */
gen8_ppgtt_create(struct intel_gt * gt,unsigned long lmem_pt_obj_flags)9601bb76ff1Sjsg struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
9611bb76ff1Sjsg unsigned long lmem_pt_obj_flags)
962c349dbc7Sjsg {
9631bb76ff1Sjsg struct i915_page_directory *pd;
964c349dbc7Sjsg struct i915_ppgtt *ppgtt;
965c349dbc7Sjsg int err;
966c349dbc7Sjsg
967c349dbc7Sjsg ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
968c349dbc7Sjsg if (!ppgtt)
969c349dbc7Sjsg return ERR_PTR(-ENOMEM);
970c349dbc7Sjsg
9711bb76ff1Sjsg ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
972c349dbc7Sjsg ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
973ad8b1aafSjsg ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
974c349dbc7Sjsg
975c349dbc7Sjsg /*
976c349dbc7Sjsg * From bdw, there is hw support for read-only pages in the PPGTT.
977c349dbc7Sjsg *
978c349dbc7Sjsg * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
979c349dbc7Sjsg * for now.
980c349dbc7Sjsg *
981c349dbc7Sjsg * Gen12 has inherited the same read-only fault issue from gen11.
982c349dbc7Sjsg */
9835ca02815Sjsg ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
984c349dbc7Sjsg
985*f005ef32Sjsg if (HAS_LMEM(gt->i915))
9865ca02815Sjsg ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
987*f005ef32Sjsg else
988*f005ef32Sjsg ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
9891bb76ff1Sjsg
9901bb76ff1Sjsg /*
991*f005ef32Sjsg * Using SMEM here instead of LMEM has the advantage of not reserving
992*f005ef32Sjsg * high performance memory for a "never" used filler page. It also
993*f005ef32Sjsg * removes the device access that would be required to initialise the
994*f005ef32Sjsg * scratch page, reducing pressure on an even scarcer resource.
9951bb76ff1Sjsg */
9961bb76ff1Sjsg ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
997c349dbc7Sjsg
998*f005ef32Sjsg if (GRAPHICS_VER(gt->i915) >= 12)
999*f005ef32Sjsg ppgtt->vm.pte_encode = gen12_pte_encode;
1000*f005ef32Sjsg else
10011bb76ff1Sjsg ppgtt->vm.pte_encode = gen8_pte_encode;
1002c349dbc7Sjsg
1003c349dbc7Sjsg ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
1004c349dbc7Sjsg ppgtt->vm.insert_entries = gen8_ppgtt_insert;
10051bb76ff1Sjsg if (HAS_64K_PAGES(gt->i915))
10061bb76ff1Sjsg ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
10071bb76ff1Sjsg else
10085ca02815Sjsg ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
1009c349dbc7Sjsg ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
1010c349dbc7Sjsg ppgtt->vm.clear_range = gen8_ppgtt_clear;
10115ca02815Sjsg ppgtt->vm.foreach = gen8_ppgtt_foreach;
10121bb76ff1Sjsg ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
1013c349dbc7Sjsg
10141bb76ff1Sjsg err = gen8_init_scratch(&ppgtt->vm);
10151bb76ff1Sjsg if (err)
10161bb76ff1Sjsg goto err_put;
10171bb76ff1Sjsg
10181bb76ff1Sjsg pd = gen8_alloc_top_pd(&ppgtt->vm);
10191bb76ff1Sjsg if (IS_ERR(pd)) {
10201bb76ff1Sjsg err = PTR_ERR(pd);
10211bb76ff1Sjsg goto err_put;
10221bb76ff1Sjsg }
10231bb76ff1Sjsg ppgtt->pd = pd;
10241bb76ff1Sjsg
10251bb76ff1Sjsg if (!i915_vm_is_4lvl(&ppgtt->vm)) {
10261bb76ff1Sjsg err = gen8_preallocate_top_level_pdp(ppgtt);
10271bb76ff1Sjsg if (err)
10281bb76ff1Sjsg goto err_put;
10291bb76ff1Sjsg }
1030c349dbc7Sjsg
1031c349dbc7Sjsg if (intel_vgpu_active(gt->i915))
1032c349dbc7Sjsg gen8_ppgtt_notify_vgt(ppgtt, true);
1033c349dbc7Sjsg
1034c349dbc7Sjsg return ppgtt;
1035c349dbc7Sjsg
10361bb76ff1Sjsg err_put:
10371bb76ff1Sjsg i915_vm_put(&ppgtt->vm);
1038c349dbc7Sjsg return ERR_PTR(err);
1039c349dbc7Sjsg }
1040