xref: /openbsd-src/sys/dev/pci/drm/i915/gt/gen8_ppgtt.c (revision f005ef32267c16bdb134f0e9fa4477dbe07c263a)
1c349dbc7Sjsg // SPDX-License-Identifier: MIT
2c349dbc7Sjsg /*
3c349dbc7Sjsg  * Copyright © 2020 Intel Corporation
4c349dbc7Sjsg  */
5c349dbc7Sjsg 
6c349dbc7Sjsg #include <linux/log2.h>
7c349dbc7Sjsg 
85ca02815Sjsg #include "gem/i915_gem_lmem.h"
95ca02815Sjsg 
10c349dbc7Sjsg #include "gen8_ppgtt.h"
11c349dbc7Sjsg #include "i915_scatterlist.h"
12c349dbc7Sjsg #include "i915_trace.h"
13c349dbc7Sjsg #include "i915_pvinfo.h"
14c349dbc7Sjsg #include "i915_vgpu.h"
15c349dbc7Sjsg #include "intel_gt.h"
16c349dbc7Sjsg #include "intel_gtt.h"
17c349dbc7Sjsg 
gen8_pde_encode(const dma_addr_t addr,const enum i915_cache_level level)18c349dbc7Sjsg static u64 gen8_pde_encode(const dma_addr_t addr,
19c349dbc7Sjsg 			   const enum i915_cache_level level)
20c349dbc7Sjsg {
211bb76ff1Sjsg 	u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
22c349dbc7Sjsg 
23c349dbc7Sjsg 	if (level != I915_CACHE_NONE)
24c349dbc7Sjsg 		pde |= PPAT_CACHED_PDE;
25c349dbc7Sjsg 	else
26c349dbc7Sjsg 		pde |= PPAT_UNCACHED;
27c349dbc7Sjsg 
28c349dbc7Sjsg 	return pde;
29c349dbc7Sjsg }
30c349dbc7Sjsg 
gen8_pte_encode(dma_addr_t addr,unsigned int pat_index,u32 flags)31c349dbc7Sjsg static u64 gen8_pte_encode(dma_addr_t addr,
32*f005ef32Sjsg 			   unsigned int pat_index,
33c349dbc7Sjsg 			   u32 flags)
34c349dbc7Sjsg {
351bb76ff1Sjsg 	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
36c349dbc7Sjsg 
37c349dbc7Sjsg 	if (unlikely(flags & PTE_READ_ONLY))
381bb76ff1Sjsg 		pte &= ~GEN8_PAGE_RW;
39c349dbc7Sjsg 
40*f005ef32Sjsg 	/*
41*f005ef32Sjsg 	 * For pre-gen12 platforms pat_index is the same as enum
42*f005ef32Sjsg 	 * i915_cache_level, so the switch-case here is still valid.
43*f005ef32Sjsg 	 * See translation table defined by LEGACY_CACHELEVEL.
44*f005ef32Sjsg 	 */
45*f005ef32Sjsg 	switch (pat_index) {
46c349dbc7Sjsg 	case I915_CACHE_NONE:
47c349dbc7Sjsg 		pte |= PPAT_UNCACHED;
48c349dbc7Sjsg 		break;
49c349dbc7Sjsg 	case I915_CACHE_WT:
50c349dbc7Sjsg 		pte |= PPAT_DISPLAY_ELLC;
51c349dbc7Sjsg 		break;
52c349dbc7Sjsg 	default:
53c349dbc7Sjsg 		pte |= PPAT_CACHED;
54c349dbc7Sjsg 		break;
55c349dbc7Sjsg 	}
56c349dbc7Sjsg 
57c349dbc7Sjsg 	return pte;
58c349dbc7Sjsg }
59c349dbc7Sjsg 
gen12_pte_encode(dma_addr_t addr,unsigned int pat_index,u32 flags)60*f005ef32Sjsg static u64 gen12_pte_encode(dma_addr_t addr,
61*f005ef32Sjsg 			    unsigned int pat_index,
62*f005ef32Sjsg 			    u32 flags)
63*f005ef32Sjsg {
64*f005ef32Sjsg 	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
65*f005ef32Sjsg 
66*f005ef32Sjsg 	if (unlikely(flags & PTE_READ_ONLY))
67*f005ef32Sjsg 		pte &= ~GEN8_PAGE_RW;
68*f005ef32Sjsg 
69*f005ef32Sjsg 	if (flags & PTE_LM)
70*f005ef32Sjsg 		pte |= GEN12_PPGTT_PTE_LM;
71*f005ef32Sjsg 
72*f005ef32Sjsg 	if (pat_index & BIT(0))
73*f005ef32Sjsg 		pte |= GEN12_PPGTT_PTE_PAT0;
74*f005ef32Sjsg 
75*f005ef32Sjsg 	if (pat_index & BIT(1))
76*f005ef32Sjsg 		pte |= GEN12_PPGTT_PTE_PAT1;
77*f005ef32Sjsg 
78*f005ef32Sjsg 	if (pat_index & BIT(2))
79*f005ef32Sjsg 		pte |= GEN12_PPGTT_PTE_PAT2;
80*f005ef32Sjsg 
81*f005ef32Sjsg 	if (pat_index & BIT(3))
82*f005ef32Sjsg 		pte |= MTL_PPGTT_PTE_PAT3;
83*f005ef32Sjsg 
84*f005ef32Sjsg 	return pte;
85*f005ef32Sjsg }
86*f005ef32Sjsg 
gen8_ppgtt_notify_vgt(struct i915_ppgtt * ppgtt,bool create)87c349dbc7Sjsg static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
88c349dbc7Sjsg {
89c349dbc7Sjsg 	struct drm_i915_private *i915 = ppgtt->vm.i915;
90c349dbc7Sjsg 	struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
91c349dbc7Sjsg 	enum vgt_g2v_type msg;
92c349dbc7Sjsg 	int i;
93c349dbc7Sjsg 
94c349dbc7Sjsg 	if (create)
95c349dbc7Sjsg 		atomic_inc(px_used(ppgtt->pd)); /* never remove */
96c349dbc7Sjsg 	else
97c349dbc7Sjsg 		atomic_dec(px_used(ppgtt->pd));
98c349dbc7Sjsg 
99c349dbc7Sjsg 	mutex_lock(&i915->vgpu.lock);
100c349dbc7Sjsg 
101c349dbc7Sjsg 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
102c349dbc7Sjsg 		const u64 daddr = px_dma(ppgtt->pd);
103c349dbc7Sjsg 
104c349dbc7Sjsg 		intel_uncore_write(uncore,
105c349dbc7Sjsg 				   vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
106c349dbc7Sjsg 		intel_uncore_write(uncore,
107c349dbc7Sjsg 				   vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
108c349dbc7Sjsg 
109c349dbc7Sjsg 		msg = create ?
110c349dbc7Sjsg 			VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
111c349dbc7Sjsg 			VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
112c349dbc7Sjsg 	} else {
113c349dbc7Sjsg 		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
114c349dbc7Sjsg 			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
115c349dbc7Sjsg 
116c349dbc7Sjsg 			intel_uncore_write(uncore,
117c349dbc7Sjsg 					   vgtif_reg(pdp[i].lo),
118c349dbc7Sjsg 					   lower_32_bits(daddr));
119c349dbc7Sjsg 			intel_uncore_write(uncore,
120c349dbc7Sjsg 					   vgtif_reg(pdp[i].hi),
121c349dbc7Sjsg 					   upper_32_bits(daddr));
122c349dbc7Sjsg 		}
123c349dbc7Sjsg 
124c349dbc7Sjsg 		msg = create ?
125c349dbc7Sjsg 			VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
126c349dbc7Sjsg 			VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
127c349dbc7Sjsg 	}
128c349dbc7Sjsg 
129c349dbc7Sjsg 	/* g2v_notify atomically (via hv trap) consumes the message packet. */
130c349dbc7Sjsg 	intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
131c349dbc7Sjsg 
132c349dbc7Sjsg 	mutex_unlock(&i915->vgpu.lock);
133c349dbc7Sjsg }
134c349dbc7Sjsg 
135c349dbc7Sjsg /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
136c349dbc7Sjsg #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
137c349dbc7Sjsg #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
138c349dbc7Sjsg #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
139c349dbc7Sjsg #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
140c349dbc7Sjsg #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
141c349dbc7Sjsg #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
142c349dbc7Sjsg #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
143c349dbc7Sjsg 
144c349dbc7Sjsg #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
145c349dbc7Sjsg 
1465ca02815Sjsg static unsigned int
gen8_pd_range(u64 start,u64 end,int lvl,unsigned int * idx)147c349dbc7Sjsg gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
148c349dbc7Sjsg {
149c349dbc7Sjsg 	const int shift = gen8_pd_shift(lvl);
150c349dbc7Sjsg 	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
151c349dbc7Sjsg 
152c349dbc7Sjsg 	GEM_BUG_ON(start >= end);
153c349dbc7Sjsg 	end += ~mask >> gen8_pd_shift(1);
154c349dbc7Sjsg 
155c349dbc7Sjsg 	*idx = i915_pde_index(start, shift);
156c349dbc7Sjsg 	if ((start ^ end) & mask)
157c349dbc7Sjsg 		return GEN8_PDES - *idx;
158c349dbc7Sjsg 	else
159c349dbc7Sjsg 		return i915_pde_index(end, shift) - *idx;
160c349dbc7Sjsg }
161c349dbc7Sjsg 
gen8_pd_contains(u64 start,u64 end,int lvl)1625ca02815Sjsg static bool gen8_pd_contains(u64 start, u64 end, int lvl)
163c349dbc7Sjsg {
164c349dbc7Sjsg 	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
165c349dbc7Sjsg 
166c349dbc7Sjsg 	GEM_BUG_ON(start >= end);
167c349dbc7Sjsg 	return (start ^ end) & mask && (start & ~mask) == 0;
168c349dbc7Sjsg }
169c349dbc7Sjsg 
gen8_pt_count(u64 start,u64 end)1705ca02815Sjsg static unsigned int gen8_pt_count(u64 start, u64 end)
171c349dbc7Sjsg {
172c349dbc7Sjsg 	GEM_BUG_ON(start >= end);
173c349dbc7Sjsg 	if ((start ^ end) >> gen8_pd_shift(1))
174c349dbc7Sjsg 		return GEN8_PDES - (start & (GEN8_PDES - 1));
175c349dbc7Sjsg 	else
176c349dbc7Sjsg 		return end - start;
177c349dbc7Sjsg }
178c349dbc7Sjsg 
gen8_pd_top_count(const struct i915_address_space * vm)1795ca02815Sjsg static unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
180c349dbc7Sjsg {
181c349dbc7Sjsg 	unsigned int shift = __gen8_pte_shift(vm->top);
1825ca02815Sjsg 
183c349dbc7Sjsg 	return (vm->total + (1ull << shift) - 1) >> shift;
184c349dbc7Sjsg }
185c349dbc7Sjsg 
1865ca02815Sjsg static struct i915_page_directory *
gen8_pdp_for_page_index(struct i915_address_space * const vm,const u64 idx)187c349dbc7Sjsg gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
188c349dbc7Sjsg {
189c349dbc7Sjsg 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
190c349dbc7Sjsg 
191c349dbc7Sjsg 	if (vm->top == 2)
192c349dbc7Sjsg 		return ppgtt->pd;
193c349dbc7Sjsg 	else
194c349dbc7Sjsg 		return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
195c349dbc7Sjsg }
196c349dbc7Sjsg 
1975ca02815Sjsg static struct i915_page_directory *
gen8_pdp_for_page_address(struct i915_address_space * const vm,const u64 addr)198c349dbc7Sjsg gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
199c349dbc7Sjsg {
200c349dbc7Sjsg 	return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
201c349dbc7Sjsg }
202c349dbc7Sjsg 
__gen8_ppgtt_cleanup(struct i915_address_space * vm,struct i915_page_directory * pd,int count,int lvl)203c349dbc7Sjsg static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
204c349dbc7Sjsg 				 struct i915_page_directory *pd,
205c349dbc7Sjsg 				 int count, int lvl)
206c349dbc7Sjsg {
207c349dbc7Sjsg 	if (lvl) {
208c349dbc7Sjsg 		void **pde = pd->entry;
209c349dbc7Sjsg 
210c349dbc7Sjsg 		do {
211c349dbc7Sjsg 			if (!*pde)
212c349dbc7Sjsg 				continue;
213c349dbc7Sjsg 
214c349dbc7Sjsg 			__gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
215c349dbc7Sjsg 		} while (pde++, --count);
216c349dbc7Sjsg 	}
217c349dbc7Sjsg 
218ad8b1aafSjsg 	free_px(vm, &pd->pt, lvl);
219c349dbc7Sjsg }
220c349dbc7Sjsg 
gen8_ppgtt_cleanup(struct i915_address_space * vm)221c349dbc7Sjsg static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
222c349dbc7Sjsg {
223c349dbc7Sjsg 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
224c349dbc7Sjsg 
225c349dbc7Sjsg 	if (intel_vgpu_active(vm->i915))
226c349dbc7Sjsg 		gen8_ppgtt_notify_vgt(ppgtt, false);
227c349dbc7Sjsg 
2281bb76ff1Sjsg 	if (ppgtt->pd)
2291bb76ff1Sjsg 		__gen8_ppgtt_cleanup(vm, ppgtt->pd,
2301bb76ff1Sjsg 				     gen8_pd_top_count(vm), vm->top);
2311bb76ff1Sjsg 
232c349dbc7Sjsg 	free_scratch(vm);
233c349dbc7Sjsg }
234c349dbc7Sjsg 
__gen8_ppgtt_clear(struct i915_address_space * const vm,struct i915_page_directory * const pd,u64 start,const u64 end,int lvl)235c349dbc7Sjsg static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
236c349dbc7Sjsg 			      struct i915_page_directory * const pd,
237c349dbc7Sjsg 			      u64 start, const u64 end, int lvl)
238c349dbc7Sjsg {
239ad8b1aafSjsg 	const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
240c349dbc7Sjsg 	unsigned int idx, len;
241c349dbc7Sjsg 
242c349dbc7Sjsg 	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
243c349dbc7Sjsg 
244c349dbc7Sjsg 	len = gen8_pd_range(start, end, lvl--, &idx);
245c349dbc7Sjsg 	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
246c349dbc7Sjsg 	    __func__, vm, lvl + 1, start, end,
247c349dbc7Sjsg 	    idx, len, atomic_read(px_used(pd)));
248c349dbc7Sjsg 	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
249c349dbc7Sjsg 
250c349dbc7Sjsg 	do {
251c349dbc7Sjsg 		struct i915_page_table *pt = pd->entry[idx];
252c349dbc7Sjsg 
253c349dbc7Sjsg 		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
254c349dbc7Sjsg 		    gen8_pd_contains(start, end, lvl)) {
255c349dbc7Sjsg 			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
256c349dbc7Sjsg 			    __func__, vm, lvl + 1, idx, start, end);
257c349dbc7Sjsg 			clear_pd_entry(pd, idx, scratch);
258c349dbc7Sjsg 			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
259c349dbc7Sjsg 			start += (u64)I915_PDES << gen8_pd_shift(lvl);
260c349dbc7Sjsg 			continue;
261c349dbc7Sjsg 		}
262c349dbc7Sjsg 
263c349dbc7Sjsg 		if (lvl) {
264c349dbc7Sjsg 			start = __gen8_ppgtt_clear(vm, as_pd(pt),
265c349dbc7Sjsg 						   start, end, lvl);
266c349dbc7Sjsg 		} else {
267c349dbc7Sjsg 			unsigned int count;
2681bb76ff1Sjsg 			unsigned int pte = gen8_pd_index(start, 0);
2691bb76ff1Sjsg 			unsigned int num_ptes;
270c349dbc7Sjsg 			u64 *vaddr;
271c349dbc7Sjsg 
272c349dbc7Sjsg 			count = gen8_pt_count(start, end);
273c349dbc7Sjsg 			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
274c349dbc7Sjsg 			    __func__, vm, lvl, start, end,
275c349dbc7Sjsg 			    gen8_pd_index(start, 0), count,
276c349dbc7Sjsg 			    atomic_read(&pt->used));
277c349dbc7Sjsg 			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
278c349dbc7Sjsg 
2791bb76ff1Sjsg 			num_ptes = count;
2801bb76ff1Sjsg 			if (pt->is_compact) {
2811bb76ff1Sjsg 				GEM_BUG_ON(num_ptes % 16);
2821bb76ff1Sjsg 				GEM_BUG_ON(pte % 16);
2831bb76ff1Sjsg 				num_ptes /= 16;
2841bb76ff1Sjsg 				pte /= 16;
2851bb76ff1Sjsg 			}
2861bb76ff1Sjsg 
2875ca02815Sjsg 			vaddr = px_vaddr(pt);
2881bb76ff1Sjsg 			memset64(vaddr + pte,
289ad8b1aafSjsg 				 vm->scratch[0]->encode,
2901bb76ff1Sjsg 				 num_ptes);
291c349dbc7Sjsg 
292c349dbc7Sjsg 			atomic_sub(count, &pt->used);
293c349dbc7Sjsg 			start += count;
294c349dbc7Sjsg 		}
295c349dbc7Sjsg 
296c349dbc7Sjsg 		if (release_pd_entry(pd, idx, pt, scratch))
297ad8b1aafSjsg 			free_px(vm, pt, lvl);
298c349dbc7Sjsg 	} while (idx++, --len);
299c349dbc7Sjsg 
300c349dbc7Sjsg 	return start;
301c349dbc7Sjsg }
302c349dbc7Sjsg 
gen8_ppgtt_clear(struct i915_address_space * vm,u64 start,u64 length)303c349dbc7Sjsg static void gen8_ppgtt_clear(struct i915_address_space *vm,
304c349dbc7Sjsg 			     u64 start, u64 length)
305c349dbc7Sjsg {
306c349dbc7Sjsg 	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
307c349dbc7Sjsg 	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
308c349dbc7Sjsg 	GEM_BUG_ON(range_overflows(start, length, vm->total));
309c349dbc7Sjsg 
310c349dbc7Sjsg 	start >>= GEN8_PTE_SHIFT;
311c349dbc7Sjsg 	length >>= GEN8_PTE_SHIFT;
312c349dbc7Sjsg 	GEM_BUG_ON(length == 0);
313c349dbc7Sjsg 
314c349dbc7Sjsg 	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
315c349dbc7Sjsg 			   start, start + length, vm->top);
316c349dbc7Sjsg }
317c349dbc7Sjsg 
__gen8_ppgtt_alloc(struct i915_address_space * const vm,struct i915_vm_pt_stash * stash,struct i915_page_directory * const pd,u64 * const start,const u64 end,int lvl)318ad8b1aafSjsg static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
319ad8b1aafSjsg 			       struct i915_vm_pt_stash *stash,
320c349dbc7Sjsg 			       struct i915_page_directory * const pd,
321c349dbc7Sjsg 			       u64 * const start, const u64 end, int lvl)
322c349dbc7Sjsg {
323c349dbc7Sjsg 	unsigned int idx, len;
324c349dbc7Sjsg 
325c349dbc7Sjsg 	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
326c349dbc7Sjsg 
327c349dbc7Sjsg 	len = gen8_pd_range(*start, end, lvl--, &idx);
328c349dbc7Sjsg 	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
329c349dbc7Sjsg 	    __func__, vm, lvl + 1, *start, end,
330c349dbc7Sjsg 	    idx, len, atomic_read(px_used(pd)));
331c349dbc7Sjsg 	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
332c349dbc7Sjsg 
333c349dbc7Sjsg 	spin_lock(&pd->lock);
334c349dbc7Sjsg 	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
335c349dbc7Sjsg 	do {
336c349dbc7Sjsg 		struct i915_page_table *pt = pd->entry[idx];
337c349dbc7Sjsg 
338c349dbc7Sjsg 		if (!pt) {
339c349dbc7Sjsg 			spin_unlock(&pd->lock);
340c349dbc7Sjsg 
341c349dbc7Sjsg 			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
342c349dbc7Sjsg 			    __func__, vm, lvl + 1, idx);
343c349dbc7Sjsg 
344ad8b1aafSjsg 			pt = stash->pt[!!lvl];
345ad8b1aafSjsg 			__i915_gem_object_pin_pages(pt->base);
346c349dbc7Sjsg 
347ad8b1aafSjsg 			fill_px(pt, vm->scratch[lvl]->encode);
348c349dbc7Sjsg 
349c349dbc7Sjsg 			spin_lock(&pd->lock);
350ad8b1aafSjsg 			if (likely(!pd->entry[idx])) {
351ad8b1aafSjsg 				stash->pt[!!lvl] = pt->stash;
352ad8b1aafSjsg 				atomic_set(&pt->used, 0);
353c349dbc7Sjsg 				set_pd_entry(pd, idx, pt);
354ad8b1aafSjsg 			} else {
355ad8b1aafSjsg 				pt = pd->entry[idx];
356ad8b1aafSjsg 			}
357c349dbc7Sjsg 		}
358c349dbc7Sjsg 
359c349dbc7Sjsg 		if (lvl) {
360c349dbc7Sjsg 			atomic_inc(&pt->used);
361c349dbc7Sjsg 			spin_unlock(&pd->lock);
362c349dbc7Sjsg 
363ad8b1aafSjsg 			__gen8_ppgtt_alloc(vm, stash,
364ad8b1aafSjsg 					   as_pd(pt), start, end, lvl);
365c349dbc7Sjsg 
366c349dbc7Sjsg 			spin_lock(&pd->lock);
367c349dbc7Sjsg 			atomic_dec(&pt->used);
368c349dbc7Sjsg 			GEM_BUG_ON(!atomic_read(&pt->used));
369c349dbc7Sjsg 		} else {
370c349dbc7Sjsg 			unsigned int count = gen8_pt_count(*start, end);
371c349dbc7Sjsg 
372c349dbc7Sjsg 			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
373c349dbc7Sjsg 			    __func__, vm, lvl, *start, end,
374c349dbc7Sjsg 			    gen8_pd_index(*start, 0), count,
375c349dbc7Sjsg 			    atomic_read(&pt->used));
376c349dbc7Sjsg 
377c349dbc7Sjsg 			atomic_add(count, &pt->used);
378c349dbc7Sjsg 			/* All other pdes may be simultaneously removed */
379c349dbc7Sjsg 			GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
380c349dbc7Sjsg 			*start += count;
381c349dbc7Sjsg 		}
382c349dbc7Sjsg 	} while (idx++, --len);
383c349dbc7Sjsg 	spin_unlock(&pd->lock);
384c349dbc7Sjsg }
385c349dbc7Sjsg 
gen8_ppgtt_alloc(struct i915_address_space * vm,struct i915_vm_pt_stash * stash,u64 start,u64 length)386ad8b1aafSjsg static void gen8_ppgtt_alloc(struct i915_address_space *vm,
387ad8b1aafSjsg 			     struct i915_vm_pt_stash *stash,
388c349dbc7Sjsg 			     u64 start, u64 length)
389c349dbc7Sjsg {
390c349dbc7Sjsg 	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
391c349dbc7Sjsg 	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
392c349dbc7Sjsg 	GEM_BUG_ON(range_overflows(start, length, vm->total));
393c349dbc7Sjsg 
394c349dbc7Sjsg 	start >>= GEN8_PTE_SHIFT;
395c349dbc7Sjsg 	length >>= GEN8_PTE_SHIFT;
396c349dbc7Sjsg 	GEM_BUG_ON(length == 0);
397c349dbc7Sjsg 
398ad8b1aafSjsg 	__gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
399c349dbc7Sjsg 			   &start, start + length, vm->top);
400c349dbc7Sjsg }
401c349dbc7Sjsg 
__gen8_ppgtt_foreach(struct i915_address_space * vm,struct i915_page_directory * pd,u64 * start,u64 end,int lvl,void (* fn)(struct i915_address_space * vm,struct i915_page_table * pt,void * data),void * data)4025ca02815Sjsg static void __gen8_ppgtt_foreach(struct i915_address_space *vm,
4035ca02815Sjsg 				 struct i915_page_directory *pd,
4045ca02815Sjsg 				 u64 *start, u64 end, int lvl,
4055ca02815Sjsg 				 void (*fn)(struct i915_address_space *vm,
4065ca02815Sjsg 					    struct i915_page_table *pt,
4075ca02815Sjsg 					    void *data),
4085ca02815Sjsg 				 void *data)
4095ca02815Sjsg {
4105ca02815Sjsg 	unsigned int idx, len;
4115ca02815Sjsg 
4125ca02815Sjsg 	len = gen8_pd_range(*start, end, lvl--, &idx);
4135ca02815Sjsg 
4145ca02815Sjsg 	spin_lock(&pd->lock);
4155ca02815Sjsg 	do {
4165ca02815Sjsg 		struct i915_page_table *pt = pd->entry[idx];
4175ca02815Sjsg 
4185ca02815Sjsg 		atomic_inc(&pt->used);
4195ca02815Sjsg 		spin_unlock(&pd->lock);
4205ca02815Sjsg 
4215ca02815Sjsg 		if (lvl) {
4225ca02815Sjsg 			__gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl,
4235ca02815Sjsg 					     fn, data);
4245ca02815Sjsg 		} else {
4255ca02815Sjsg 			fn(vm, pt, data);
4265ca02815Sjsg 			*start += gen8_pt_count(*start, end);
4275ca02815Sjsg 		}
4285ca02815Sjsg 
4295ca02815Sjsg 		spin_lock(&pd->lock);
4305ca02815Sjsg 		atomic_dec(&pt->used);
4315ca02815Sjsg 	} while (idx++, --len);
4325ca02815Sjsg 	spin_unlock(&pd->lock);
4335ca02815Sjsg }
4345ca02815Sjsg 
gen8_ppgtt_foreach(struct i915_address_space * vm,u64 start,u64 length,void (* fn)(struct i915_address_space * vm,struct i915_page_table * pt,void * data),void * data)4355ca02815Sjsg static void gen8_ppgtt_foreach(struct i915_address_space *vm,
4365ca02815Sjsg 			       u64 start, u64 length,
4375ca02815Sjsg 			       void (*fn)(struct i915_address_space *vm,
4385ca02815Sjsg 					  struct i915_page_table *pt,
4395ca02815Sjsg 					  void *data),
4405ca02815Sjsg 			       void *data)
4415ca02815Sjsg {
4425ca02815Sjsg 	start >>= GEN8_PTE_SHIFT;
4435ca02815Sjsg 	length >>= GEN8_PTE_SHIFT;
4445ca02815Sjsg 
4455ca02815Sjsg 	__gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd,
4465ca02815Sjsg 			     &start, start + length, vm->top,
4475ca02815Sjsg 			     fn, data);
4485ca02815Sjsg }
4495ca02815Sjsg 
450c349dbc7Sjsg static __always_inline u64
gen8_ppgtt_insert_pte(struct i915_ppgtt * ppgtt,struct i915_page_directory * pdp,struct sgt_dma * iter,u64 idx,unsigned int pat_index,u32 flags)451c349dbc7Sjsg gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
452c349dbc7Sjsg 		      struct i915_page_directory *pdp,
453c349dbc7Sjsg 		      struct sgt_dma *iter,
454c349dbc7Sjsg 		      u64 idx,
455*f005ef32Sjsg 		      unsigned int pat_index,
456c349dbc7Sjsg 		      u32 flags)
457c349dbc7Sjsg {
458c349dbc7Sjsg 	struct i915_page_directory *pd;
459*f005ef32Sjsg 	const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags);
460c349dbc7Sjsg 	gen8_pte_t *vaddr;
461c349dbc7Sjsg 
462c349dbc7Sjsg 	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
4635ca02815Sjsg 	vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
464c349dbc7Sjsg 	do {
4655ca02815Sjsg 		GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
466c349dbc7Sjsg 		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
467c349dbc7Sjsg 
468c349dbc7Sjsg 		iter->dma += I915_GTT_PAGE_SIZE;
469c349dbc7Sjsg 		if (iter->dma >= iter->max) {
470c349dbc7Sjsg 			iter->sg = __sg_next(iter->sg);
4715ca02815Sjsg 			if (!iter->sg || sg_dma_len(iter->sg) == 0) {
472c349dbc7Sjsg 				idx = 0;
473c349dbc7Sjsg 				break;
474c349dbc7Sjsg 			}
475c349dbc7Sjsg 
476c349dbc7Sjsg 			iter->dma = sg_dma_address(iter->sg);
4775ca02815Sjsg 			iter->max = iter->dma + sg_dma_len(iter->sg);
478c349dbc7Sjsg 		}
479c349dbc7Sjsg 
480c349dbc7Sjsg 		if (gen8_pd_index(++idx, 0) == 0) {
481c349dbc7Sjsg 			if (gen8_pd_index(idx, 1) == 0) {
482c349dbc7Sjsg 				/* Limited by sg length for 3lvl */
483c349dbc7Sjsg 				if (gen8_pd_index(idx, 2) == 0)
484c349dbc7Sjsg 					break;
485c349dbc7Sjsg 
486c349dbc7Sjsg 				pd = pdp->entry[gen8_pd_index(idx, 2)];
487c349dbc7Sjsg 			}
488c349dbc7Sjsg 
4891bb76ff1Sjsg 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
4905ca02815Sjsg 			vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
491c349dbc7Sjsg 		}
492c349dbc7Sjsg 	} while (1);
4931bb76ff1Sjsg 	drm_clflush_virt_range(vaddr, PAGE_SIZE);
494c349dbc7Sjsg 
495c349dbc7Sjsg 	return idx;
496c349dbc7Sjsg }
497c349dbc7Sjsg 
4981bb76ff1Sjsg static void
xehpsdv_ppgtt_insert_huge(struct i915_address_space * vm,struct i915_vma_resource * vma_res,struct sgt_dma * iter,unsigned int pat_index,u32 flags)4991bb76ff1Sjsg xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
5001bb76ff1Sjsg 			  struct i915_vma_resource *vma_res,
5011bb76ff1Sjsg 			  struct sgt_dma *iter,
502*f005ef32Sjsg 			  unsigned int pat_index,
5031bb76ff1Sjsg 			  u32 flags)
5041bb76ff1Sjsg {
505*f005ef32Sjsg 	const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
5061bb76ff1Sjsg 	unsigned int rem = sg_dma_len(iter->sg);
5071bb76ff1Sjsg 	u64 start = vma_res->start;
508*f005ef32Sjsg 	u64 end = start + vma_res->vma_size;
5091bb76ff1Sjsg 
5101bb76ff1Sjsg 	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
5111bb76ff1Sjsg 
5121bb76ff1Sjsg 	do {
5131bb76ff1Sjsg 		struct i915_page_directory * const pdp =
5141bb76ff1Sjsg 			gen8_pdp_for_page_address(vm, start);
5151bb76ff1Sjsg 		struct i915_page_directory * const pd =
5161bb76ff1Sjsg 			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
5171bb76ff1Sjsg 		struct i915_page_table *pt =
5181bb76ff1Sjsg 			i915_pt_entry(pd, __gen8_pte_index(start, 1));
5191bb76ff1Sjsg 		gen8_pte_t encode = pte_encode;
5201bb76ff1Sjsg 		unsigned int page_size;
5211bb76ff1Sjsg 		gen8_pte_t *vaddr;
522*f005ef32Sjsg 		u16 index, max, nent, i;
5231bb76ff1Sjsg 
5241bb76ff1Sjsg 		max = I915_PDES;
525*f005ef32Sjsg 		nent = 1;
5261bb76ff1Sjsg 
5271bb76ff1Sjsg 		if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
5281bb76ff1Sjsg 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
5291bb76ff1Sjsg 		    rem >= I915_GTT_PAGE_SIZE_2M &&
5301bb76ff1Sjsg 		    !__gen8_pte_index(start, 0)) {
5311bb76ff1Sjsg 			index = __gen8_pte_index(start, 1);
5321bb76ff1Sjsg 			encode |= GEN8_PDE_PS_2M;
5331bb76ff1Sjsg 			page_size = I915_GTT_PAGE_SIZE_2M;
5341bb76ff1Sjsg 
5351bb76ff1Sjsg 			vaddr = px_vaddr(pd);
5361bb76ff1Sjsg 		} else {
537*f005ef32Sjsg 			index =  __gen8_pte_index(start, 0);
538*f005ef32Sjsg 			page_size = I915_GTT_PAGE_SIZE;
5391bb76ff1Sjsg 
540*f005ef32Sjsg 			if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
541*f005ef32Sjsg 				/*
542*f005ef32Sjsg 				 * Device local-memory on these platforms should
543*f005ef32Sjsg 				 * always use 64K pages or larger (including GTT
544*f005ef32Sjsg 				 * alignment), therefore if we know the whole
545*f005ef32Sjsg 				 * page-table needs to be filled we can always
546*f005ef32Sjsg 				 * safely use the compact-layout. Otherwise fall
547*f005ef32Sjsg 				 * back to the TLB hint with PS64. If this is
548*f005ef32Sjsg 				 * system memory we only bother with PS64.
549*f005ef32Sjsg 				 */
550*f005ef32Sjsg 				if ((encode & GEN12_PPGTT_PTE_LM) &&
551*f005ef32Sjsg 				    end - start >= SZ_2M && !index) {
5521bb76ff1Sjsg 					index = __gen8_pte_index(start, 0) / 16;
5531bb76ff1Sjsg 					page_size = I915_GTT_PAGE_SIZE_64K;
5541bb76ff1Sjsg 
5551bb76ff1Sjsg 					max /= 16;
5561bb76ff1Sjsg 
5571bb76ff1Sjsg 					vaddr = px_vaddr(pd);
5581bb76ff1Sjsg 					vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
5591bb76ff1Sjsg 
5601bb76ff1Sjsg 					pt->is_compact = true;
561*f005ef32Sjsg 				} else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
562*f005ef32Sjsg 					   rem >= I915_GTT_PAGE_SIZE_64K &&
563*f005ef32Sjsg 					   !(index % 16)) {
564*f005ef32Sjsg 					encode |= GEN12_PTE_PS64;
565*f005ef32Sjsg 					page_size = I915_GTT_PAGE_SIZE_64K;
566*f005ef32Sjsg 					nent = 16;
567*f005ef32Sjsg 				}
5681bb76ff1Sjsg 			}
5691bb76ff1Sjsg 
5701bb76ff1Sjsg 			vaddr = px_vaddr(pt);
5711bb76ff1Sjsg 		}
5721bb76ff1Sjsg 
5731bb76ff1Sjsg 		do {
5741bb76ff1Sjsg 			GEM_BUG_ON(rem < page_size);
575*f005ef32Sjsg 
576*f005ef32Sjsg 			for (i = 0; i < nent; i++) {
577*f005ef32Sjsg 				vaddr[index++] =
578*f005ef32Sjsg 					encode | (iter->dma + i *
579*f005ef32Sjsg 						  I915_GTT_PAGE_SIZE);
580*f005ef32Sjsg 			}
5811bb76ff1Sjsg 
5821bb76ff1Sjsg 			start += page_size;
5831bb76ff1Sjsg 			iter->dma += page_size;
5841bb76ff1Sjsg 			rem -= page_size;
5851bb76ff1Sjsg 			if (iter->dma >= iter->max) {
5861bb76ff1Sjsg 				iter->sg = __sg_next(iter->sg);
5871bb76ff1Sjsg 				if (!iter->sg)
5881bb76ff1Sjsg 					break;
5891bb76ff1Sjsg 
5901bb76ff1Sjsg 				rem = sg_dma_len(iter->sg);
5911bb76ff1Sjsg 				if (!rem)
5921bb76ff1Sjsg 					break;
5931bb76ff1Sjsg 
5941bb76ff1Sjsg 				iter->dma = sg_dma_address(iter->sg);
5951bb76ff1Sjsg 				iter->max = iter->dma + rem;
5961bb76ff1Sjsg 
5971bb76ff1Sjsg 				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
5981bb76ff1Sjsg 					break;
5991bb76ff1Sjsg 			}
6001bb76ff1Sjsg 		} while (rem >= page_size && index < max);
6011bb76ff1Sjsg 
602*f005ef32Sjsg 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
6031bb76ff1Sjsg 		vma_res->page_sizes_gtt |= page_size;
6041bb76ff1Sjsg 	} while (iter->sg && sg_dma_len(iter->sg));
6051bb76ff1Sjsg }
6061bb76ff1Sjsg 
gen8_ppgtt_insert_huge(struct i915_address_space * vm,struct i915_vma_resource * vma_res,struct sgt_dma * iter,unsigned int pat_index,u32 flags)6071bb76ff1Sjsg static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
6081bb76ff1Sjsg 				   struct i915_vma_resource *vma_res,
609c349dbc7Sjsg 				   struct sgt_dma *iter,
610*f005ef32Sjsg 				   unsigned int pat_index,
611c349dbc7Sjsg 				   u32 flags)
612c349dbc7Sjsg {
613*f005ef32Sjsg 	const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
6145ca02815Sjsg 	unsigned int rem = sg_dma_len(iter->sg);
6151bb76ff1Sjsg 	u64 start = vma_res->start;
616c349dbc7Sjsg 
6171bb76ff1Sjsg 	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
618c349dbc7Sjsg 
619c349dbc7Sjsg 	do {
620c349dbc7Sjsg 		struct i915_page_directory * const pdp =
6211bb76ff1Sjsg 			gen8_pdp_for_page_address(vm, start);
622c349dbc7Sjsg 		struct i915_page_directory * const pd =
623c349dbc7Sjsg 			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
624c349dbc7Sjsg 		gen8_pte_t encode = pte_encode;
625c349dbc7Sjsg 		unsigned int maybe_64K = -1;
626c349dbc7Sjsg 		unsigned int page_size;
627c349dbc7Sjsg 		gen8_pte_t *vaddr;
628c349dbc7Sjsg 		u16 index;
629c349dbc7Sjsg 
6301bb76ff1Sjsg 		if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
631c349dbc7Sjsg 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
632c349dbc7Sjsg 		    rem >= I915_GTT_PAGE_SIZE_2M &&
633c349dbc7Sjsg 		    !__gen8_pte_index(start, 0)) {
634c349dbc7Sjsg 			index = __gen8_pte_index(start, 1);
635c349dbc7Sjsg 			encode |= GEN8_PDE_PS_2M;
636c349dbc7Sjsg 			page_size = I915_GTT_PAGE_SIZE_2M;
637c349dbc7Sjsg 
6385ca02815Sjsg 			vaddr = px_vaddr(pd);
639c349dbc7Sjsg 		} else {
640c349dbc7Sjsg 			struct i915_page_table *pt =
641c349dbc7Sjsg 				i915_pt_entry(pd, __gen8_pte_index(start, 1));
642c349dbc7Sjsg 
643c349dbc7Sjsg 			index = __gen8_pte_index(start, 0);
644c349dbc7Sjsg 			page_size = I915_GTT_PAGE_SIZE;
645c349dbc7Sjsg 
646c349dbc7Sjsg 			if (!index &&
6471bb76ff1Sjsg 			    vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
648c349dbc7Sjsg 			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
649c349dbc7Sjsg 			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
650c349dbc7Sjsg 			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
651c349dbc7Sjsg 				maybe_64K = __gen8_pte_index(start, 1);
652c349dbc7Sjsg 
6535ca02815Sjsg 			vaddr = px_vaddr(pt);
654c349dbc7Sjsg 		}
655c349dbc7Sjsg 
656c349dbc7Sjsg 		do {
6575ca02815Sjsg 			GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
658c349dbc7Sjsg 			vaddr[index++] = encode | iter->dma;
659c349dbc7Sjsg 
660c349dbc7Sjsg 			start += page_size;
661c349dbc7Sjsg 			iter->dma += page_size;
662c349dbc7Sjsg 			rem -= page_size;
663c349dbc7Sjsg 			if (iter->dma >= iter->max) {
664c349dbc7Sjsg 				iter->sg = __sg_next(iter->sg);
665c349dbc7Sjsg 				if (!iter->sg)
666c349dbc7Sjsg 					break;
667c349dbc7Sjsg 
6685ca02815Sjsg 				rem = sg_dma_len(iter->sg);
6695ca02815Sjsg 				if (!rem)
6705ca02815Sjsg 					break;
6715ca02815Sjsg 
672c349dbc7Sjsg 				iter->dma = sg_dma_address(iter->sg);
673c349dbc7Sjsg 				iter->max = iter->dma + rem;
674c349dbc7Sjsg 
675c349dbc7Sjsg 				if (maybe_64K != -1 && index < I915_PDES &&
676c349dbc7Sjsg 				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
677c349dbc7Sjsg 				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
678c349dbc7Sjsg 				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
679c349dbc7Sjsg 					maybe_64K = -1;
680c349dbc7Sjsg 
681c349dbc7Sjsg 				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
682c349dbc7Sjsg 					break;
683c349dbc7Sjsg 			}
684c349dbc7Sjsg 		} while (rem >= page_size && index < I915_PDES);
685c349dbc7Sjsg 
6861bb76ff1Sjsg 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
687c349dbc7Sjsg 
688c349dbc7Sjsg 		/*
689c349dbc7Sjsg 		 * Is it safe to mark the 2M block as 64K? -- Either we have
690c349dbc7Sjsg 		 * filled whole page-table with 64K entries, or filled part of
691c349dbc7Sjsg 		 * it and have reached the end of the sg table and we have
692c349dbc7Sjsg 		 * enough padding.
693c349dbc7Sjsg 		 */
694c349dbc7Sjsg 		if (maybe_64K != -1 &&
695c349dbc7Sjsg 		    (index == I915_PDES ||
6961bb76ff1Sjsg 		     (i915_vm_has_scratch_64K(vm) &&
6971bb76ff1Sjsg 		      !iter->sg && IS_ALIGNED(vma_res->start +
6981bb76ff1Sjsg 					      vma_res->node_size,
699c349dbc7Sjsg 					      I915_GTT_PAGE_SIZE_2M)))) {
7005ca02815Sjsg 			vaddr = px_vaddr(pd);
701c349dbc7Sjsg 			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
7021bb76ff1Sjsg 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
703c349dbc7Sjsg 			page_size = I915_GTT_PAGE_SIZE_64K;
704c349dbc7Sjsg 
705c349dbc7Sjsg 			/*
706c349dbc7Sjsg 			 * We write all 4K page entries, even when using 64K
707c349dbc7Sjsg 			 * pages. In order to verify that the HW isn't cheating
708c349dbc7Sjsg 			 * by using the 4K PTE instead of the 64K PTE, we want
709c349dbc7Sjsg 			 * to remove all the surplus entries. If the HW skipped
710c349dbc7Sjsg 			 * the 64K PTE, it will read/write into the scratch page
711c349dbc7Sjsg 			 * instead - which we detect as missing results during
712c349dbc7Sjsg 			 * selftests.
713c349dbc7Sjsg 			 */
7141bb76ff1Sjsg 			if (I915_SELFTEST_ONLY(vm->scrub_64K)) {
715c349dbc7Sjsg 				u16 i;
716c349dbc7Sjsg 
7171bb76ff1Sjsg 				encode = vm->scratch[0]->encode;
7185ca02815Sjsg 				vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));
719c349dbc7Sjsg 
720c349dbc7Sjsg 				for (i = 1; i < index; i += 16)
721c349dbc7Sjsg 					memset64(vaddr + i, encode, 15);
722c349dbc7Sjsg 
7231bb76ff1Sjsg 				drm_clflush_virt_range(vaddr, PAGE_SIZE);
724c349dbc7Sjsg 			}
725c349dbc7Sjsg 		}
726c349dbc7Sjsg 
7271bb76ff1Sjsg 		vma_res->page_sizes_gtt |= page_size;
7285ca02815Sjsg 	} while (iter->sg && sg_dma_len(iter->sg));
729c349dbc7Sjsg }
730c349dbc7Sjsg 
gen8_ppgtt_insert(struct i915_address_space * vm,struct i915_vma_resource * vma_res,unsigned int pat_index,u32 flags)731c349dbc7Sjsg static void gen8_ppgtt_insert(struct i915_address_space *vm,
7321bb76ff1Sjsg 			      struct i915_vma_resource *vma_res,
733*f005ef32Sjsg 			      unsigned int pat_index,
734c349dbc7Sjsg 			      u32 flags)
735c349dbc7Sjsg {
736c349dbc7Sjsg 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
7371bb76ff1Sjsg 	struct sgt_dma iter = sgt_dma(vma_res);
738c349dbc7Sjsg 
7391bb76ff1Sjsg 	if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
740*f005ef32Sjsg 		if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50))
741*f005ef32Sjsg 			xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
7421bb76ff1Sjsg 		else
743*f005ef32Sjsg 			gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags);
744c349dbc7Sjsg 	} else  {
7451bb76ff1Sjsg 		u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
746c349dbc7Sjsg 
747c349dbc7Sjsg 		do {
748c349dbc7Sjsg 			struct i915_page_directory * const pdp =
749c349dbc7Sjsg 				gen8_pdp_for_page_index(vm, idx);
750c349dbc7Sjsg 
751c349dbc7Sjsg 			idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
752*f005ef32Sjsg 						    pat_index, flags);
753c349dbc7Sjsg 		} while (idx);
754c349dbc7Sjsg 
7551bb76ff1Sjsg 		vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
756c349dbc7Sjsg 	}
757c349dbc7Sjsg }
758c349dbc7Sjsg 
gen8_ppgtt_insert_entry(struct i915_address_space * vm,dma_addr_t addr,u64 offset,unsigned int pat_index,u32 flags)7595ca02815Sjsg static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
7605ca02815Sjsg 				    dma_addr_t addr,
7615ca02815Sjsg 				    u64 offset,
762*f005ef32Sjsg 				    unsigned int pat_index,
7635ca02815Sjsg 				    u32 flags)
7645ca02815Sjsg {
7655ca02815Sjsg 	u64 idx = offset >> GEN8_PTE_SHIFT;
7665ca02815Sjsg 	struct i915_page_directory * const pdp =
7675ca02815Sjsg 		gen8_pdp_for_page_index(vm, idx);
7685ca02815Sjsg 	struct i915_page_directory *pd =
7695ca02815Sjsg 		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
7701bb76ff1Sjsg 	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
7715ca02815Sjsg 	gen8_pte_t *vaddr;
7725ca02815Sjsg 
7731bb76ff1Sjsg 	GEM_BUG_ON(pt->is_compact);
7741bb76ff1Sjsg 
7751bb76ff1Sjsg 	vaddr = px_vaddr(pt);
776*f005ef32Sjsg 	vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags);
7771bb76ff1Sjsg 	drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
7781bb76ff1Sjsg }
7791bb76ff1Sjsg 
__xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space * vm,dma_addr_t addr,u64 offset,unsigned int pat_index,u32 flags)7801bb76ff1Sjsg static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
7811bb76ff1Sjsg 					    dma_addr_t addr,
7821bb76ff1Sjsg 					    u64 offset,
783*f005ef32Sjsg 					    unsigned int pat_index,
7841bb76ff1Sjsg 					    u32 flags)
7851bb76ff1Sjsg {
7861bb76ff1Sjsg 	u64 idx = offset >> GEN8_PTE_SHIFT;
7871bb76ff1Sjsg 	struct i915_page_directory * const pdp =
7881bb76ff1Sjsg 		gen8_pdp_for_page_index(vm, idx);
7891bb76ff1Sjsg 	struct i915_page_directory *pd =
7901bb76ff1Sjsg 		i915_pd_entry(pdp, gen8_pd_index(idx, 2));
7911bb76ff1Sjsg 	struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
7921bb76ff1Sjsg 	gen8_pte_t *vaddr;
7931bb76ff1Sjsg 
7941bb76ff1Sjsg 	GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
7951bb76ff1Sjsg 	GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
7961bb76ff1Sjsg 
797*f005ef32Sjsg 	/* XXX: we don't strictly need to use this layout */
798*f005ef32Sjsg 
7991bb76ff1Sjsg 	if (!pt->is_compact) {
8001bb76ff1Sjsg 		vaddr = px_vaddr(pd);
8011bb76ff1Sjsg 		vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
8021bb76ff1Sjsg 		pt->is_compact = true;
8031bb76ff1Sjsg 	}
8041bb76ff1Sjsg 
8051bb76ff1Sjsg 	vaddr = px_vaddr(pt);
806*f005ef32Sjsg 	vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags);
8071bb76ff1Sjsg }
8081bb76ff1Sjsg 
xehpsdv_ppgtt_insert_entry(struct i915_address_space * vm,dma_addr_t addr,u64 offset,unsigned int pat_index,u32 flags)8091bb76ff1Sjsg static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
8101bb76ff1Sjsg 				       dma_addr_t addr,
8111bb76ff1Sjsg 				       u64 offset,
812*f005ef32Sjsg 				       unsigned int pat_index,
8131bb76ff1Sjsg 				       u32 flags)
8141bb76ff1Sjsg {
8151bb76ff1Sjsg 	if (flags & PTE_LM)
8161bb76ff1Sjsg 		return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
817*f005ef32Sjsg 						       pat_index, flags);
8181bb76ff1Sjsg 
819*f005ef32Sjsg 	return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags);
8205ca02815Sjsg }
8215ca02815Sjsg 
gen8_init_scratch(struct i915_address_space * vm)822c349dbc7Sjsg static int gen8_init_scratch(struct i915_address_space *vm)
823c349dbc7Sjsg {
8245ca02815Sjsg 	u32 pte_flags;
825c349dbc7Sjsg 	int ret;
826c349dbc7Sjsg 	int i;
827c349dbc7Sjsg 
828c349dbc7Sjsg 	/*
829c349dbc7Sjsg 	 * If everybody agrees to not to write into the scratch page,
830c349dbc7Sjsg 	 * we can reuse it for all vm, keeping contexts and processes separate.
831c349dbc7Sjsg 	 */
832c349dbc7Sjsg 	if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
833c349dbc7Sjsg 		struct i915_address_space *clone = vm->gt->vm;
834c349dbc7Sjsg 
835c349dbc7Sjsg 		GEM_BUG_ON(!clone->has_read_only);
836c349dbc7Sjsg 
837c349dbc7Sjsg 		vm->scratch_order = clone->scratch_order;
838ad8b1aafSjsg 		for (i = 0; i <= vm->top; i++)
839ad8b1aafSjsg 			vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
840ad8b1aafSjsg 
841c349dbc7Sjsg 		return 0;
842c349dbc7Sjsg 	}
843c349dbc7Sjsg 
844ad8b1aafSjsg 	ret = setup_scratch_page(vm);
845c349dbc7Sjsg 	if (ret)
846c349dbc7Sjsg 		return ret;
847c349dbc7Sjsg 
8485ca02815Sjsg 	pte_flags = vm->has_read_only;
8495ca02815Sjsg 	if (i915_gem_object_is_lmem(vm->scratch[0]))
8505ca02815Sjsg 		pte_flags |= PTE_LM;
8515ca02815Sjsg 
852ad8b1aafSjsg 	vm->scratch[0]->encode =
853*f005ef32Sjsg 		vm->pte_encode(px_dma(vm->scratch[0]),
854*f005ef32Sjsg 			       i915_gem_get_pat_index(vm->i915,
855*f005ef32Sjsg 						      I915_CACHE_NONE),
856*f005ef32Sjsg 			       pte_flags);
857c349dbc7Sjsg 
858c349dbc7Sjsg 	for (i = 1; i <= vm->top; i++) {
859ad8b1aafSjsg 		struct drm_i915_gem_object *obj;
860ad8b1aafSjsg 
861ad8b1aafSjsg 		obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
8621bb76ff1Sjsg 		if (IS_ERR(obj)) {
8631bb76ff1Sjsg 			ret = PTR_ERR(obj);
864c349dbc7Sjsg 			goto free_scratch;
8651bb76ff1Sjsg 		}
866c349dbc7Sjsg 
8675ca02815Sjsg 		ret = map_pt_dma(vm, obj);
868ad8b1aafSjsg 		if (ret) {
869ad8b1aafSjsg 			i915_gem_object_put(obj);
870ad8b1aafSjsg 			goto free_scratch;
871ad8b1aafSjsg 		}
872ad8b1aafSjsg 
873ad8b1aafSjsg 		fill_px(obj, vm->scratch[i - 1]->encode);
8741bb76ff1Sjsg 		obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE);
875ad8b1aafSjsg 
876ad8b1aafSjsg 		vm->scratch[i] = obj;
877c349dbc7Sjsg 	}
878c349dbc7Sjsg 
879c349dbc7Sjsg 	return 0;
880c349dbc7Sjsg 
881c349dbc7Sjsg free_scratch:
882ad8b1aafSjsg 	while (i--)
883ad8b1aafSjsg 		i915_gem_object_put(vm->scratch[i]);
8841bb76ff1Sjsg 	vm->scratch[0] = NULL;
8851bb76ff1Sjsg 	return ret;
886c349dbc7Sjsg }
887c349dbc7Sjsg 
gen8_preallocate_top_level_pdp(struct i915_ppgtt * ppgtt)888c349dbc7Sjsg static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
889c349dbc7Sjsg {
890c349dbc7Sjsg 	struct i915_address_space *vm = &ppgtt->vm;
891c349dbc7Sjsg 	struct i915_page_directory *pd = ppgtt->pd;
892c349dbc7Sjsg 	unsigned int idx;
893c349dbc7Sjsg 
894c349dbc7Sjsg 	GEM_BUG_ON(vm->top != 2);
895c349dbc7Sjsg 	GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
896c349dbc7Sjsg 
897c349dbc7Sjsg 	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
898c349dbc7Sjsg 		struct i915_page_directory *pde;
899ad8b1aafSjsg 		int err;
900c349dbc7Sjsg 
901c349dbc7Sjsg 		pde = alloc_pd(vm);
902c349dbc7Sjsg 		if (IS_ERR(pde))
903c349dbc7Sjsg 			return PTR_ERR(pde);
904c349dbc7Sjsg 
9055ca02815Sjsg 		err = map_pt_dma(vm, pde->pt.base);
906ad8b1aafSjsg 		if (err) {
907ad8b1aafSjsg 			free_pd(vm, pde);
908ad8b1aafSjsg 			return err;
909ad8b1aafSjsg 		}
910ad8b1aafSjsg 
911ad8b1aafSjsg 		fill_px(pde, vm->scratch[1]->encode);
912c349dbc7Sjsg 		set_pd_entry(pd, idx, pde);
913c349dbc7Sjsg 		atomic_inc(px_used(pde)); /* keep pinned */
914c349dbc7Sjsg 	}
915c349dbc7Sjsg 	wmb();
916c349dbc7Sjsg 
917c349dbc7Sjsg 	return 0;
918c349dbc7Sjsg }
919c349dbc7Sjsg 
920c349dbc7Sjsg static struct i915_page_directory *
gen8_alloc_top_pd(struct i915_address_space * vm)921c349dbc7Sjsg gen8_alloc_top_pd(struct i915_address_space *vm)
922c349dbc7Sjsg {
923c349dbc7Sjsg 	const unsigned int count = gen8_pd_top_count(vm);
924c349dbc7Sjsg 	struct i915_page_directory *pd;
925ad8b1aafSjsg 	int err;
926c349dbc7Sjsg 
927ad8b1aafSjsg 	GEM_BUG_ON(count > I915_PDES);
928c349dbc7Sjsg 
929ad8b1aafSjsg 	pd = __alloc_pd(count);
930c349dbc7Sjsg 	if (unlikely(!pd))
931c349dbc7Sjsg 		return ERR_PTR(-ENOMEM);
932c349dbc7Sjsg 
933ad8b1aafSjsg 	pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
934ad8b1aafSjsg 	if (IS_ERR(pd->pt.base)) {
935ad8b1aafSjsg 		err = PTR_ERR(pd->pt.base);
936ad8b1aafSjsg 		pd->pt.base = NULL;
937ad8b1aafSjsg 		goto err_pd;
938c349dbc7Sjsg 	}
939c349dbc7Sjsg 
9405ca02815Sjsg 	err = map_pt_dma(vm, pd->pt.base);
941ad8b1aafSjsg 	if (err)
942ad8b1aafSjsg 		goto err_pd;
943ad8b1aafSjsg 
944ad8b1aafSjsg 	fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
945c349dbc7Sjsg 	atomic_inc(px_used(pd)); /* mark as pinned */
946c349dbc7Sjsg 	return pd;
947ad8b1aafSjsg 
948ad8b1aafSjsg err_pd:
949ad8b1aafSjsg 	free_pd(vm, pd);
950ad8b1aafSjsg 	return ERR_PTR(err);
951c349dbc7Sjsg }
952c349dbc7Sjsg 
953c349dbc7Sjsg /*
954c349dbc7Sjsg  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
955c349dbc7Sjsg  * with a net effect resembling a 2-level page table in normal x86 terms. Each
956c349dbc7Sjsg  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
957c349dbc7Sjsg  * space.
958c349dbc7Sjsg  *
959c349dbc7Sjsg  */
gen8_ppgtt_create(struct intel_gt * gt,unsigned long lmem_pt_obj_flags)9601bb76ff1Sjsg struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
9611bb76ff1Sjsg 				     unsigned long lmem_pt_obj_flags)
962c349dbc7Sjsg {
9631bb76ff1Sjsg 	struct i915_page_directory *pd;
964c349dbc7Sjsg 	struct i915_ppgtt *ppgtt;
965c349dbc7Sjsg 	int err;
966c349dbc7Sjsg 
967c349dbc7Sjsg 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
968c349dbc7Sjsg 	if (!ppgtt)
969c349dbc7Sjsg 		return ERR_PTR(-ENOMEM);
970c349dbc7Sjsg 
9711bb76ff1Sjsg 	ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
972c349dbc7Sjsg 	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
973ad8b1aafSjsg 	ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
974c349dbc7Sjsg 
975c349dbc7Sjsg 	/*
976c349dbc7Sjsg 	 * From bdw, there is hw support for read-only pages in the PPGTT.
977c349dbc7Sjsg 	 *
978c349dbc7Sjsg 	 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
979c349dbc7Sjsg 	 * for now.
980c349dbc7Sjsg 	 *
981c349dbc7Sjsg 	 * Gen12 has inherited the same read-only fault issue from gen11.
982c349dbc7Sjsg 	 */
9835ca02815Sjsg 	ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
984c349dbc7Sjsg 
985*f005ef32Sjsg 	if (HAS_LMEM(gt->i915))
9865ca02815Sjsg 		ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
987*f005ef32Sjsg 	else
988*f005ef32Sjsg 		ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
9891bb76ff1Sjsg 
9901bb76ff1Sjsg 	/*
991*f005ef32Sjsg 	 * Using SMEM here instead of LMEM has the advantage of not reserving
992*f005ef32Sjsg 	 * high performance memory for a "never" used filler page. It also
993*f005ef32Sjsg 	 * removes the device access that would be required to initialise the
994*f005ef32Sjsg 	 * scratch page, reducing pressure on an even scarcer resource.
9951bb76ff1Sjsg 	 */
9961bb76ff1Sjsg 	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
997c349dbc7Sjsg 
998*f005ef32Sjsg 	if (GRAPHICS_VER(gt->i915) >= 12)
999*f005ef32Sjsg 		ppgtt->vm.pte_encode = gen12_pte_encode;
1000*f005ef32Sjsg 	else
10011bb76ff1Sjsg 		ppgtt->vm.pte_encode = gen8_pte_encode;
1002c349dbc7Sjsg 
1003c349dbc7Sjsg 	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
1004c349dbc7Sjsg 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
10051bb76ff1Sjsg 	if (HAS_64K_PAGES(gt->i915))
10061bb76ff1Sjsg 		ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
10071bb76ff1Sjsg 	else
10085ca02815Sjsg 		ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
1009c349dbc7Sjsg 	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
1010c349dbc7Sjsg 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
10115ca02815Sjsg 	ppgtt->vm.foreach = gen8_ppgtt_foreach;
10121bb76ff1Sjsg 	ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
1013c349dbc7Sjsg 
10141bb76ff1Sjsg 	err = gen8_init_scratch(&ppgtt->vm);
10151bb76ff1Sjsg 	if (err)
10161bb76ff1Sjsg 		goto err_put;
10171bb76ff1Sjsg 
10181bb76ff1Sjsg 	pd = gen8_alloc_top_pd(&ppgtt->vm);
10191bb76ff1Sjsg 	if (IS_ERR(pd)) {
10201bb76ff1Sjsg 		err = PTR_ERR(pd);
10211bb76ff1Sjsg 		goto err_put;
10221bb76ff1Sjsg 	}
10231bb76ff1Sjsg 	ppgtt->pd = pd;
10241bb76ff1Sjsg 
10251bb76ff1Sjsg 	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
10261bb76ff1Sjsg 		err = gen8_preallocate_top_level_pdp(ppgtt);
10271bb76ff1Sjsg 		if (err)
10281bb76ff1Sjsg 			goto err_put;
10291bb76ff1Sjsg 	}
1030c349dbc7Sjsg 
1031c349dbc7Sjsg 	if (intel_vgpu_active(gt->i915))
1032c349dbc7Sjsg 		gen8_ppgtt_notify_vgt(ppgtt, true);
1033c349dbc7Sjsg 
1034c349dbc7Sjsg 	return ppgtt;
1035c349dbc7Sjsg 
10361bb76ff1Sjsg err_put:
10371bb76ff1Sjsg 	i915_vm_put(&ppgtt->vm);
1038c349dbc7Sjsg 	return ERR_PTR(err);
1039c349dbc7Sjsg }
1040