xref: /openbsd-src/sys/dev/pci/drm/i915/gt/intel_ggtt.c (revision 99fa8e5059948214766d3e193640ed5ccc47c606)
1c349dbc7Sjsg // SPDX-License-Identifier: MIT
2c349dbc7Sjsg /*
3c349dbc7Sjsg  * Copyright © 2020 Intel Corporation
4c349dbc7Sjsg  */
5c349dbc7Sjsg 
6c349dbc7Sjsg #include <asm/set_memory.h>
7c349dbc7Sjsg #include <asm/smp.h>
81bb76ff1Sjsg #include <linux/types.h>
91bb76ff1Sjsg #include <linux/stop_machine.h>
10c349dbc7Sjsg 
11f005ef32Sjsg #include <drm/drm_managed.h>
12c349dbc7Sjsg #include <drm/i915_drm.h>
131bb76ff1Sjsg #include <drm/intel-gtt.h>
14c349dbc7Sjsg 
15f005ef32Sjsg #include "display/intel_display.h"
165ca02815Sjsg #include "gem/i915_gem_lmem.h"
175ca02815Sjsg 
186589bdd4Sjsg #include "intel_context.h"
191bb76ff1Sjsg #include "intel_ggtt_gmch.h"
206589bdd4Sjsg #include "intel_gpu_commands.h"
21c349dbc7Sjsg #include "intel_gt.h"
221bb76ff1Sjsg #include "intel_gt_regs.h"
231bb76ff1Sjsg #include "intel_pci_config.h"
246589bdd4Sjsg #include "intel_ring.h"
25c349dbc7Sjsg #include "i915_drv.h"
261bb76ff1Sjsg #include "i915_pci.h"
276c50df30Sjsg #include "i915_reg.h"
286589bdd4Sjsg #include "i915_request.h"
29c349dbc7Sjsg #include "i915_scatterlist.h"
301bb76ff1Sjsg #include "i915_utils.h"
31c349dbc7Sjsg #include "i915_vgpu.h"
32c349dbc7Sjsg 
33c349dbc7Sjsg #include "intel_gtt.h"
345ca02815Sjsg #include "gen8_ppgtt.h"
356589bdd4Sjsg #include "intel_engine_pm.h"
36c349dbc7Sjsg 
37c349dbc7Sjsg static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
38c349dbc7Sjsg 				   unsigned long color,
39c349dbc7Sjsg 				   u64 *start,
40c349dbc7Sjsg 				   u64 *end)
41c349dbc7Sjsg {
42c349dbc7Sjsg 	if (i915_node_color_differs(node, color))
43c349dbc7Sjsg 		*start += I915_GTT_PAGE_SIZE;
44c349dbc7Sjsg 
45c349dbc7Sjsg 	/*
46c349dbc7Sjsg 	 * Also leave a space between the unallocated reserved node after the
47c349dbc7Sjsg 	 * GTT and any objects within the GTT, i.e. we use the color adjustment
48c349dbc7Sjsg 	 * to insert a guard page to prevent prefetches crossing over the
49c349dbc7Sjsg 	 * GTT boundary.
50c349dbc7Sjsg 	 */
51c349dbc7Sjsg 	node = list_next_entry(node, node_list);
52c349dbc7Sjsg 	if (node->color != color)
53c349dbc7Sjsg 		*end -= I915_GTT_PAGE_SIZE;
54c349dbc7Sjsg }
55c349dbc7Sjsg 
56c349dbc7Sjsg static int ggtt_init_hw(struct i915_ggtt *ggtt)
57c349dbc7Sjsg {
58c349dbc7Sjsg 	struct drm_i915_private *i915 = ggtt->vm.i915;
59c349dbc7Sjsg 
60c349dbc7Sjsg 	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
61c349dbc7Sjsg 
62c349dbc7Sjsg 	ggtt->vm.is_ggtt = true;
63c349dbc7Sjsg 
64c349dbc7Sjsg 	/* Only VLV supports read-only GGTT mappings */
65c349dbc7Sjsg 	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
66c349dbc7Sjsg 
67c349dbc7Sjsg 	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
68c349dbc7Sjsg 		ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
69c349dbc7Sjsg 
70c349dbc7Sjsg 	if (ggtt->mappable_end) {
71c349dbc7Sjsg #ifdef __linux__
72c349dbc7Sjsg 		if (!io_mapping_init_wc(&ggtt->iomap,
73c349dbc7Sjsg 					ggtt->gmadr.start,
74c349dbc7Sjsg 					ggtt->mappable_end)) {
75c349dbc7Sjsg 			ggtt->vm.cleanup(&ggtt->vm);
76c349dbc7Sjsg 			return -EIO;
77c349dbc7Sjsg 		}
78c349dbc7Sjsg 
79c349dbc7Sjsg 		ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
80c349dbc7Sjsg 					      ggtt->mappable_end);
81c349dbc7Sjsg #else
82e64fda40Sjsg 		bus_space_handle_t bsh;
83e64fda40Sjsg 		int i;
84e64fda40Sjsg 
85c349dbc7Sjsg 		/* XXX would be a lot nicer to get agp info before now */
861bb76ff1Sjsg 		uvm_page_physload(atop(ggtt->gmadr.start),
871bb76ff1Sjsg 		    atop(ggtt->gmadr.start + ggtt->mappable_end),
881bb76ff1Sjsg 		    atop(ggtt->gmadr.start),
891bb76ff1Sjsg 		    atop(ggtt->gmadr.start + ggtt->mappable_end),
90c349dbc7Sjsg 		    PHYSLOAD_DEVICE);
91c349dbc7Sjsg 		/* array of vm pages that physload introduced. */
921bb76ff1Sjsg 		i915->pgs = PHYS_TO_VM_PAGE(ggtt->gmadr.start);
93c349dbc7Sjsg 		KASSERT(i915->pgs != NULL);
94c349dbc7Sjsg 		/*
95c349dbc7Sjsg 		 * XXX mark all pages write combining so user mmaps get the
96c349dbc7Sjsg 		 * right bits. We really need a proper MI api for doing this,
97c349dbc7Sjsg 		 * but for now this allows us to use PAT where available.
98c349dbc7Sjsg 		 */
991bb76ff1Sjsg 		for (i = 0; i < atop(ggtt->mappable_end); i++)
100c349dbc7Sjsg 			atomic_setbits_int(&(i915->pgs[i].pg_flags),
101c349dbc7Sjsg 			    PG_PMAP_WC);
102e64fda40Sjsg 		if (bus_space_map(i915->bst, ggtt->gmadr.start,
1031bb76ff1Sjsg 		    ggtt->mappable_end,
104e64fda40Sjsg 		    BUS_SPACE_MAP_LINEAR | BUS_SPACE_MAP_PREFETCHABLE, &bsh))
105c349dbc7Sjsg 			panic("can't map aperture");
106e64fda40Sjsg 		ggtt->iomap.base = ggtt->gmadr.start;
107e64fda40Sjsg 		ggtt->iomap.size = ggtt->mappable_end;
108e64fda40Sjsg 		ggtt->iomap.iomem = bus_space_vaddr(i915->bst, bsh);
109c349dbc7Sjsg #endif
110c349dbc7Sjsg 	}
111c349dbc7Sjsg 
112ad8b1aafSjsg 	intel_ggtt_init_fences(ggtt);
113c349dbc7Sjsg 
114c349dbc7Sjsg 	return 0;
115c349dbc7Sjsg }
116c349dbc7Sjsg 
117c349dbc7Sjsg /**
118c349dbc7Sjsg  * i915_ggtt_init_hw - Initialize GGTT hardware
119c349dbc7Sjsg  * @i915: i915 device
120c349dbc7Sjsg  */
121c349dbc7Sjsg int i915_ggtt_init_hw(struct drm_i915_private *i915)
122c349dbc7Sjsg {
123c349dbc7Sjsg 	int ret;
124c349dbc7Sjsg 
125c349dbc7Sjsg 	/*
126c349dbc7Sjsg 	 * Note that we use page colouring to enforce a guard page at the
127c349dbc7Sjsg 	 * end of the address space. This is required as the CS may prefetch
128c349dbc7Sjsg 	 * beyond the end of the batch buffer, across the page boundary,
129c349dbc7Sjsg 	 * and beyond the end of the GTT if we do not provide a guard.
130c349dbc7Sjsg 	 */
1311bb76ff1Sjsg 	ret = ggtt_init_hw(to_gt(i915)->ggtt);
132c349dbc7Sjsg 	if (ret)
133c349dbc7Sjsg 		return ret;
134c349dbc7Sjsg 
135c349dbc7Sjsg 	return 0;
136c349dbc7Sjsg }
137c349dbc7Sjsg 
1381bb76ff1Sjsg /**
1391bb76ff1Sjsg  * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
1401bb76ff1Sjsg  * @vm: The VM to suspend the mappings for
1411bb76ff1Sjsg  *
1421bb76ff1Sjsg  * Suspend the memory mappings for all objects mapped to HW via the GGTT or a
1431bb76ff1Sjsg  * DPT page table.
144c349dbc7Sjsg  */
1451bb76ff1Sjsg void i915_ggtt_suspend_vm(struct i915_address_space *vm)
1461bb76ff1Sjsg {
1471bb76ff1Sjsg 	struct i915_vma *vma, *vn;
1481bb76ff1Sjsg 	int save_skip_rewrite;
1495ca02815Sjsg 
1501bb76ff1Sjsg 	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
1515ca02815Sjsg 
1521bb76ff1Sjsg retry:
1531bb76ff1Sjsg 	i915_gem_drain_freed_objects(vm->i915);
1545ca02815Sjsg 
1551bb76ff1Sjsg 	mutex_lock(&vm->mutex);
1561bb76ff1Sjsg 
1571bb76ff1Sjsg 	/*
1581bb76ff1Sjsg 	 * Skip rewriting PTE on VMA unbind.
1591bb76ff1Sjsg 	 * FIXME: Use an argument to i915_vma_unbind() instead?
1601bb76ff1Sjsg 	 */
1611bb76ff1Sjsg 	save_skip_rewrite = vm->skip_pte_rewrite;
1621bb76ff1Sjsg 	vm->skip_pte_rewrite = true;
1631bb76ff1Sjsg 
1641bb76ff1Sjsg 	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
1651bb76ff1Sjsg 		struct drm_i915_gem_object *obj = vma->obj;
1661bb76ff1Sjsg 
1671bb76ff1Sjsg 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1681bb76ff1Sjsg 
1691bb76ff1Sjsg 		if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
1701bb76ff1Sjsg 			continue;
1711bb76ff1Sjsg 
1721bb76ff1Sjsg 		/* unlikely to race when GPU is idle, so no worry about slowpath.. */
1731bb76ff1Sjsg 		if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) {
1741bb76ff1Sjsg 			/*
1751bb76ff1Sjsg 			 * No dead objects should appear here, GPU should be
1761bb76ff1Sjsg 			 * completely idle, and userspace suspended
1771bb76ff1Sjsg 			 */
1781bb76ff1Sjsg 			i915_gem_object_get(obj);
1791bb76ff1Sjsg 
1801bb76ff1Sjsg 			mutex_unlock(&vm->mutex);
1811bb76ff1Sjsg 
1821bb76ff1Sjsg 			i915_gem_object_lock(obj, NULL);
1831bb76ff1Sjsg 			GEM_WARN_ON(i915_vma_unbind(vma));
1841bb76ff1Sjsg 			i915_gem_object_unlock(obj);
1851bb76ff1Sjsg 			i915_gem_object_put(obj);
1861bb76ff1Sjsg 
1871bb76ff1Sjsg 			vm->skip_pte_rewrite = save_skip_rewrite;
1881bb76ff1Sjsg 			goto retry;
1891bb76ff1Sjsg 		}
1901bb76ff1Sjsg 
1911bb76ff1Sjsg 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
1921bb76ff1Sjsg 			i915_vma_wait_for_bind(vma);
1931bb76ff1Sjsg 
1941bb76ff1Sjsg 			__i915_vma_evict(vma, false);
1951bb76ff1Sjsg 			drm_mm_remove_node(&vma->node);
1961bb76ff1Sjsg 		}
1971bb76ff1Sjsg 
1981bb76ff1Sjsg 		i915_gem_object_unlock(obj);
1991bb76ff1Sjsg 	}
2001bb76ff1Sjsg 
2011bb76ff1Sjsg 	vm->clear_range(vm, 0, vm->total);
2021bb76ff1Sjsg 
2031bb76ff1Sjsg 	vm->skip_pte_rewrite = save_skip_rewrite;
2041bb76ff1Sjsg 
2051bb76ff1Sjsg 	mutex_unlock(&vm->mutex);
206c349dbc7Sjsg }
207c349dbc7Sjsg 
208c349dbc7Sjsg void i915_ggtt_suspend(struct i915_ggtt *ggtt)
209c349dbc7Sjsg {
210f005ef32Sjsg 	struct intel_gt *gt;
211f005ef32Sjsg 
2121bb76ff1Sjsg 	i915_ggtt_suspend_vm(&ggtt->vm);
213c349dbc7Sjsg 	ggtt->invalidate(ggtt);
214c349dbc7Sjsg 
215f005ef32Sjsg 	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
216f005ef32Sjsg 		intel_gt_check_and_clear_faults(gt);
217c349dbc7Sjsg }
218c349dbc7Sjsg 
219c349dbc7Sjsg void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
220c349dbc7Sjsg {
221c349dbc7Sjsg 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
222c349dbc7Sjsg 
223c349dbc7Sjsg 	spin_lock_irq(&uncore->lock);
224c349dbc7Sjsg 	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
225c349dbc7Sjsg 	intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
226c349dbc7Sjsg 	spin_unlock_irq(&uncore->lock);
227c349dbc7Sjsg }
228c349dbc7Sjsg 
229f005ef32Sjsg static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915)
230f005ef32Sjsg {
231f005ef32Sjsg 	/*
232f005ef32Sjsg 	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
233f005ef32Sjsg 	 * will be dropped. For WC mappings in general we have 64 byte burst
234f005ef32Sjsg 	 * writes when the WC buffer is flushed, so we can't use it, but have to
235f005ef32Sjsg 	 * resort to an uncached mapping. The WC issue is easily caught by the
236f005ef32Sjsg 	 * readback check when writing GTT PTE entries.
237f005ef32Sjsg 	 */
238f005ef32Sjsg 	if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11)
239f005ef32Sjsg 		return true;
240f005ef32Sjsg 
241f005ef32Sjsg 	return false;
242f005ef32Sjsg }
243f005ef32Sjsg 
244c349dbc7Sjsg static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
245c349dbc7Sjsg {
246c349dbc7Sjsg 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
247c349dbc7Sjsg 
248c349dbc7Sjsg 	/*
249c349dbc7Sjsg 	 * Note that as an uncached mmio write, this will flush the
250c349dbc7Sjsg 	 * WCB of the writes into the GGTT before it triggers the invalidate.
251f005ef32Sjsg 	 *
252f005ef32Sjsg 	 * Only perform this when GGTT is mapped as WC, see ggtt_probe_common().
253c349dbc7Sjsg 	 */
254f005ef32Sjsg 	if (needs_wc_ggtt_mapping(ggtt->vm.i915))
255f005ef32Sjsg 		intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6,
256f005ef32Sjsg 				      GFX_FLSH_CNTL_EN);
257c349dbc7Sjsg }
258c349dbc7Sjsg 
259c349dbc7Sjsg static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
260c349dbc7Sjsg {
261c349dbc7Sjsg 	struct drm_i915_private *i915 = ggtt->vm.i915;
262c349dbc7Sjsg 
263c349dbc7Sjsg 	gen8_ggtt_invalidate(ggtt);
264c349dbc7Sjsg 
265f005ef32Sjsg 	if (GRAPHICS_VER(i915) >= 12) {
266f005ef32Sjsg 		struct intel_gt *gt;
267f005ef32Sjsg 
268f005ef32Sjsg 		list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
269f005ef32Sjsg 			intel_uncore_write_fw(gt->uncore,
270f005ef32Sjsg 					      GEN12_GUC_TLB_INV_CR,
271c349dbc7Sjsg 					      GEN12_GUC_TLB_INV_CR_INVALIDATE);
272f005ef32Sjsg 	} else {
273f005ef32Sjsg 		intel_uncore_write_fw(ggtt->vm.gt->uncore,
274f005ef32Sjsg 				      GEN8_GTCR, GEN8_GTCR_INVALIDATE);
275f005ef32Sjsg 	}
276f005ef32Sjsg }
277f005ef32Sjsg 
278f005ef32Sjsg static u64 mtl_ggtt_pte_encode(dma_addr_t addr,
279f005ef32Sjsg 			       unsigned int pat_index,
280f005ef32Sjsg 			       u32 flags)
281f005ef32Sjsg {
282f005ef32Sjsg 	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
283f005ef32Sjsg 
284f005ef32Sjsg 	WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
285f005ef32Sjsg 
286f005ef32Sjsg 	if (flags & PTE_LM)
287f005ef32Sjsg 		pte |= GEN12_GGTT_PTE_LM;
288f005ef32Sjsg 
289f005ef32Sjsg 	if (pat_index & BIT(0))
290f005ef32Sjsg 		pte |= MTL_GGTT_PTE_PAT0;
291f005ef32Sjsg 
292f005ef32Sjsg 	if (pat_index & BIT(1))
293f005ef32Sjsg 		pte |= MTL_GGTT_PTE_PAT1;
294f005ef32Sjsg 
295f005ef32Sjsg 	return pte;
296c349dbc7Sjsg }
297c349dbc7Sjsg 
2985ca02815Sjsg u64 gen8_ggtt_pte_encode(dma_addr_t addr,
299f005ef32Sjsg 			 unsigned int pat_index,
300c349dbc7Sjsg 			 u32 flags)
301c349dbc7Sjsg {
3021bb76ff1Sjsg 	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
3035ca02815Sjsg 
3045ca02815Sjsg 	if (flags & PTE_LM)
3055ca02815Sjsg 		pte |= GEN12_GGTT_PTE_LM;
3065ca02815Sjsg 
3075ca02815Sjsg 	return pte;
308c349dbc7Sjsg }
309c349dbc7Sjsg 
3106589bdd4Sjsg static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
3116589bdd4Sjsg {
3126589bdd4Sjsg 	struct intel_gt *gt = ggtt->vm.gt;
3136589bdd4Sjsg 
3146589bdd4Sjsg 	return intel_gt_is_bind_context_ready(gt);
3156589bdd4Sjsg }
3166589bdd4Sjsg 
3176589bdd4Sjsg static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
3186589bdd4Sjsg {
3196589bdd4Sjsg 	struct intel_context *ce;
3206589bdd4Sjsg 	struct intel_gt *gt = ggtt->vm.gt;
3216589bdd4Sjsg 
3226589bdd4Sjsg 	if (intel_gt_is_wedged(gt))
3236589bdd4Sjsg 		return NULL;
3246589bdd4Sjsg 
3256589bdd4Sjsg 	ce = gt->engine[BCS0]->bind_context;
3266589bdd4Sjsg 	GEM_BUG_ON(!ce);
3276589bdd4Sjsg 
3286589bdd4Sjsg 	/*
3296589bdd4Sjsg 	 * If the GT is not awake already at this stage then fallback
3306589bdd4Sjsg 	 * to pci based GGTT update otherwise __intel_wakeref_get_first()
3316589bdd4Sjsg 	 * would conflict with fs_reclaim trying to allocate memory while
3326589bdd4Sjsg 	 * doing rpm_resume().
3336589bdd4Sjsg 	 */
3346589bdd4Sjsg 	if (!intel_gt_pm_get_if_awake(gt))
3356589bdd4Sjsg 		return NULL;
3366589bdd4Sjsg 
3376589bdd4Sjsg 	intel_engine_pm_get(ce->engine);
3386589bdd4Sjsg 
3396589bdd4Sjsg 	return ce;
3406589bdd4Sjsg }
3416589bdd4Sjsg 
3426589bdd4Sjsg static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
3436589bdd4Sjsg {
3446589bdd4Sjsg 	intel_engine_pm_put(ce->engine);
3456589bdd4Sjsg 	intel_gt_pm_put(ce->engine->gt);
3466589bdd4Sjsg }
3476589bdd4Sjsg 
3486589bdd4Sjsg static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
3496589bdd4Sjsg 				struct sg_table *pages, u32 num_entries,
3506589bdd4Sjsg 				const gen8_pte_t pte)
3516589bdd4Sjsg {
3526589bdd4Sjsg 	struct i915_sched_attr attr = {};
3536589bdd4Sjsg 	struct intel_gt *gt = ggtt->vm.gt;
3546589bdd4Sjsg 	const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
3556589bdd4Sjsg 	struct sgt_iter iter;
3566589bdd4Sjsg 	struct i915_request *rq;
3576589bdd4Sjsg 	struct intel_context *ce;
3586589bdd4Sjsg 	u32 *cs;
3596589bdd4Sjsg 
3606589bdd4Sjsg 	if (!num_entries)
3616589bdd4Sjsg 		return true;
3626589bdd4Sjsg 
3636589bdd4Sjsg 	ce = gen8_ggtt_bind_get_ce(ggtt);
3646589bdd4Sjsg 	if (!ce)
3656589bdd4Sjsg 		return false;
3666589bdd4Sjsg 
3676589bdd4Sjsg 	if (pages)
3686589bdd4Sjsg 		iter = __sgt_iter(pages->sgl, true);
3696589bdd4Sjsg 
3706589bdd4Sjsg 	while (num_entries) {
3716589bdd4Sjsg 		int count = 0;
3726589bdd4Sjsg 		dma_addr_t addr;
3736589bdd4Sjsg 		/*
3746589bdd4Sjsg 		 * MI_UPDATE_GTT can update 512 entries in a single command but
3756589bdd4Sjsg 		 * that end up with engine reset, 511 works.
3766589bdd4Sjsg 		 */
3776589bdd4Sjsg 		u32 n_ptes = min_t(u32, 511, num_entries);
3786589bdd4Sjsg 
3796589bdd4Sjsg 		if (mutex_lock_interruptible(&ce->timeline->mutex))
3806589bdd4Sjsg 			goto put_ce;
3816589bdd4Sjsg 
3826589bdd4Sjsg 		intel_context_enter(ce);
3836589bdd4Sjsg 		rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
3846589bdd4Sjsg 		intel_context_exit(ce);
3856589bdd4Sjsg 		if (IS_ERR(rq)) {
3866589bdd4Sjsg 			GT_TRACE(gt, "Failed to get bind request\n");
3876589bdd4Sjsg 			mutex_unlock(&ce->timeline->mutex);
3886589bdd4Sjsg 			goto put_ce;
3896589bdd4Sjsg 		}
3906589bdd4Sjsg 
3916589bdd4Sjsg 		cs = intel_ring_begin(rq, 2 * n_ptes + 2);
3926589bdd4Sjsg 		if (IS_ERR(cs)) {
3936589bdd4Sjsg 			GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
3946589bdd4Sjsg 			i915_request_set_error_once(rq, PTR_ERR(cs));
3956589bdd4Sjsg 			/* once a request is created, it must be queued */
3966589bdd4Sjsg 			goto queue_err_rq;
3976589bdd4Sjsg 		}
3986589bdd4Sjsg 
3996589bdd4Sjsg 		*cs++ = MI_UPDATE_GTT | (2 * n_ptes);
4006589bdd4Sjsg 		*cs++ = offset << 12;
4016589bdd4Sjsg 
4026589bdd4Sjsg 		if (pages) {
4036589bdd4Sjsg 			for_each_sgt_daddr_next(addr, iter) {
4046589bdd4Sjsg 				if (count == n_ptes)
4056589bdd4Sjsg 					break;
4066589bdd4Sjsg 				*cs++ = lower_32_bits(pte | addr);
4076589bdd4Sjsg 				*cs++ = upper_32_bits(pte | addr);
4086589bdd4Sjsg 				count++;
4096589bdd4Sjsg 			}
4106589bdd4Sjsg 			/* fill remaining with scratch pte, if any */
4116589bdd4Sjsg 			if (count < n_ptes) {
4126589bdd4Sjsg 				memset64((u64 *)cs, scratch_pte,
4136589bdd4Sjsg 					 n_ptes - count);
4146589bdd4Sjsg 				cs += (n_ptes - count) * 2;
4156589bdd4Sjsg 			}
4166589bdd4Sjsg 		} else {
4176589bdd4Sjsg 			memset64((u64 *)cs, pte, n_ptes);
4186589bdd4Sjsg 			cs += n_ptes * 2;
4196589bdd4Sjsg 		}
4206589bdd4Sjsg 
4216589bdd4Sjsg 		intel_ring_advance(rq, cs);
4226589bdd4Sjsg queue_err_rq:
4236589bdd4Sjsg 		i915_request_get(rq);
4246589bdd4Sjsg 		__i915_request_commit(rq);
4256589bdd4Sjsg 		__i915_request_queue(rq, &attr);
4266589bdd4Sjsg 
4276589bdd4Sjsg 		mutex_unlock(&ce->timeline->mutex);
4286589bdd4Sjsg 		/* This will break if the request is complete or after engine reset */
4296589bdd4Sjsg 		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
4306589bdd4Sjsg 		if (rq->fence.error)
4316589bdd4Sjsg 			goto err_rq;
4326589bdd4Sjsg 
4336589bdd4Sjsg 		i915_request_put(rq);
4346589bdd4Sjsg 
4356589bdd4Sjsg 		num_entries -= n_ptes;
4366589bdd4Sjsg 		offset += n_ptes;
4376589bdd4Sjsg 	}
4386589bdd4Sjsg 
4396589bdd4Sjsg 	gen8_ggtt_bind_put_ce(ce);
4406589bdd4Sjsg 	return true;
4416589bdd4Sjsg 
4426589bdd4Sjsg err_rq:
4436589bdd4Sjsg 	i915_request_put(rq);
4446589bdd4Sjsg put_ce:
4456589bdd4Sjsg 	gen8_ggtt_bind_put_ce(ce);
4466589bdd4Sjsg 	return false;
4476589bdd4Sjsg }
4486589bdd4Sjsg 
449c349dbc7Sjsg static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
450c349dbc7Sjsg {
451c349dbc7Sjsg 	writeq(pte, addr);
452c349dbc7Sjsg }
453c349dbc7Sjsg 
454c349dbc7Sjsg static void gen8_ggtt_insert_page(struct i915_address_space *vm,
455c349dbc7Sjsg 				  dma_addr_t addr,
456c349dbc7Sjsg 				  u64 offset,
457f005ef32Sjsg 				  unsigned int pat_index,
4585ca02815Sjsg 				  u32 flags)
459c349dbc7Sjsg {
460c349dbc7Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
461c349dbc7Sjsg 	gen8_pte_t __iomem *pte =
462c349dbc7Sjsg 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
463c349dbc7Sjsg 
464f005ef32Sjsg 	gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags));
465c349dbc7Sjsg 
466c349dbc7Sjsg 	ggtt->invalidate(ggtt);
467c349dbc7Sjsg }
468c349dbc7Sjsg 
4696589bdd4Sjsg static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
4706589bdd4Sjsg 				       dma_addr_t addr, u64 offset,
4716589bdd4Sjsg 				       unsigned int pat_index, u32 flags)
4726589bdd4Sjsg {
4736589bdd4Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
4746589bdd4Sjsg 	gen8_pte_t pte;
4756589bdd4Sjsg 
4766589bdd4Sjsg 	pte = ggtt->vm.pte_encode(addr, pat_index, flags);
4776589bdd4Sjsg 	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
4786589bdd4Sjsg 	    gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
4796589bdd4Sjsg 		return ggtt->invalidate(ggtt);
4806589bdd4Sjsg 
4816589bdd4Sjsg 	gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
4826589bdd4Sjsg }
4836589bdd4Sjsg 
484c349dbc7Sjsg static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
4851bb76ff1Sjsg 				     struct i915_vma_resource *vma_res,
486f005ef32Sjsg 				     unsigned int pat_index,
487c349dbc7Sjsg 				     u32 flags)
488c349dbc7Sjsg {
489c349dbc7Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
490f005ef32Sjsg 	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
491c349dbc7Sjsg 	gen8_pte_t __iomem *gte;
492c349dbc7Sjsg 	gen8_pte_t __iomem *end;
493c349dbc7Sjsg 	struct sgt_iter iter;
494c349dbc7Sjsg 	dma_addr_t addr;
495c349dbc7Sjsg 
496c349dbc7Sjsg 	/*
497c349dbc7Sjsg 	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
498c349dbc7Sjsg 	 * not to allow the user to override access to a read only page.
499c349dbc7Sjsg 	 */
500c349dbc7Sjsg 
501c349dbc7Sjsg 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
502f005ef32Sjsg 	gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
503f005ef32Sjsg 	end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
504f005ef32Sjsg 	while (gte < end)
505f005ef32Sjsg 		gen8_set_pte(gte++, vm->scratch[0]->encode);
506f005ef32Sjsg 	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
507c349dbc7Sjsg 
5081bb76ff1Sjsg 	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
509c349dbc7Sjsg 		gen8_set_pte(gte++, pte_encode | addr);
510c349dbc7Sjsg 	GEM_BUG_ON(gte > end);
511c349dbc7Sjsg 
512c349dbc7Sjsg 	/* Fill the allocated but "unused" space beyond the end of the buffer */
513c349dbc7Sjsg 	while (gte < end)
514ad8b1aafSjsg 		gen8_set_pte(gte++, vm->scratch[0]->encode);
515c349dbc7Sjsg 
516c349dbc7Sjsg 	/*
517c349dbc7Sjsg 	 * We want to flush the TLBs only after we're certain all the PTE
518c349dbc7Sjsg 	 * updates have finished.
519c349dbc7Sjsg 	 */
520c349dbc7Sjsg 	ggtt->invalidate(ggtt);
521c349dbc7Sjsg }
522c349dbc7Sjsg 
5236589bdd4Sjsg static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
5246589bdd4Sjsg 					    struct i915_vma_resource *vma_res,
5256589bdd4Sjsg 					    unsigned int pat_index, u32 flags)
5266589bdd4Sjsg {
5276589bdd4Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
5286589bdd4Sjsg 	gen8_pte_t scratch_pte = vm->scratch[0]->encode;
5296589bdd4Sjsg 	gen8_pte_t pte_encode;
5306589bdd4Sjsg 	u64 start, end;
5316589bdd4Sjsg 
5326589bdd4Sjsg 	pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
5336589bdd4Sjsg 	start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
5346589bdd4Sjsg 	end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
5356589bdd4Sjsg 	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
5366589bdd4Sjsg 		goto err;
5376589bdd4Sjsg 
5386589bdd4Sjsg 	start = end;
5396589bdd4Sjsg 	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
5406589bdd4Sjsg 	if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
5416589bdd4Sjsg 	      vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
5426589bdd4Sjsg 		goto err;
5436589bdd4Sjsg 
5446589bdd4Sjsg 	start += vma_res->node_size / I915_GTT_PAGE_SIZE;
5456589bdd4Sjsg 	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
5466589bdd4Sjsg 		goto err;
5476589bdd4Sjsg 
5486589bdd4Sjsg 	return true;
5496589bdd4Sjsg 
5506589bdd4Sjsg err:
5516589bdd4Sjsg 	return false;
5526589bdd4Sjsg }
5536589bdd4Sjsg 
5546589bdd4Sjsg static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
5556589bdd4Sjsg 					  struct i915_vma_resource *vma_res,
5566589bdd4Sjsg 					  unsigned int pat_index, u32 flags)
5576589bdd4Sjsg {
5586589bdd4Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
5596589bdd4Sjsg 
5606589bdd4Sjsg 	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
5616589bdd4Sjsg 	    __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
5626589bdd4Sjsg 		return ggtt->invalidate(ggtt);
5636589bdd4Sjsg 
5646589bdd4Sjsg 	gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
5656589bdd4Sjsg }
5666589bdd4Sjsg 
567c349dbc7Sjsg static void gen8_ggtt_clear_range(struct i915_address_space *vm,
568c349dbc7Sjsg 				  u64 start, u64 length)
569c349dbc7Sjsg {
570c349dbc7Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
571c349dbc7Sjsg 	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
572c349dbc7Sjsg 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
573ad8b1aafSjsg 	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
574c349dbc7Sjsg 	gen8_pte_t __iomem *gtt_base =
575c349dbc7Sjsg 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
576c349dbc7Sjsg 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
577c349dbc7Sjsg 	int i;
578c349dbc7Sjsg 
579c349dbc7Sjsg 	if (WARN(num_entries > max_entries,
580c349dbc7Sjsg 		 "First entry = %d; Num entries = %d (max=%d)\n",
581c349dbc7Sjsg 		 first_entry, num_entries, max_entries))
582c349dbc7Sjsg 		num_entries = max_entries;
583c349dbc7Sjsg 
584c349dbc7Sjsg 	for (i = 0; i < num_entries; i++)
585c349dbc7Sjsg 		gen8_set_pte(&gtt_base[i], scratch_pte);
586c349dbc7Sjsg }
587c349dbc7Sjsg 
5886589bdd4Sjsg static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
5896589bdd4Sjsg 					 u64 start, u64 length)
5906589bdd4Sjsg {
5916589bdd4Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
5926589bdd4Sjsg 	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
5936589bdd4Sjsg 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
5946589bdd4Sjsg 	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
5956589bdd4Sjsg 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
5966589bdd4Sjsg 
5976589bdd4Sjsg 	if (WARN(num_entries > max_entries,
5986589bdd4Sjsg 		 "First entry = %d; Num entries = %d (max=%d)\n",
5996589bdd4Sjsg 		 first_entry, num_entries, max_entries))
6006589bdd4Sjsg 		num_entries = max_entries;
6016589bdd4Sjsg 
6026589bdd4Sjsg 	if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry,
6036589bdd4Sjsg 	     NULL, num_entries, scratch_pte))
6046589bdd4Sjsg 		return ggtt->invalidate(ggtt);
6056589bdd4Sjsg 
6066589bdd4Sjsg 	gen8_ggtt_clear_range(vm, start, length);
6076589bdd4Sjsg }
6086589bdd4Sjsg 
609f005ef32Sjsg static void gen6_ggtt_insert_page(struct i915_address_space *vm,
610f005ef32Sjsg 				  dma_addr_t addr,
611f005ef32Sjsg 				  u64 offset,
612f005ef32Sjsg 				  unsigned int pat_index,
613f005ef32Sjsg 				  u32 flags)
614f005ef32Sjsg {
615f005ef32Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
616f005ef32Sjsg 	gen6_pte_t __iomem *pte =
617f005ef32Sjsg 		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
618f005ef32Sjsg 
619f005ef32Sjsg 	iowrite32(vm->pte_encode(addr, pat_index, flags), pte);
620f005ef32Sjsg 
621f005ef32Sjsg 	ggtt->invalidate(ggtt);
622f005ef32Sjsg }
623f005ef32Sjsg 
624f005ef32Sjsg /*
625f005ef32Sjsg  * Binds an object into the global gtt with the specified cache level.
626f005ef32Sjsg  * The object will be accessible to the GPU via commands whose operands
627f005ef32Sjsg  * reference offsets within the global GTT as well as accessible by the GPU
628f005ef32Sjsg  * through the GMADR mapped BAR (i915->mm.gtt->gtt).
629f005ef32Sjsg  */
630f005ef32Sjsg static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
631f005ef32Sjsg 				     struct i915_vma_resource *vma_res,
632f005ef32Sjsg 				     unsigned int pat_index,
633f005ef32Sjsg 				     u32 flags)
634f005ef32Sjsg {
635f005ef32Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
636f005ef32Sjsg 	gen6_pte_t __iomem *gte;
637f005ef32Sjsg 	gen6_pte_t __iomem *end;
638f005ef32Sjsg 	struct sgt_iter iter;
639f005ef32Sjsg 	dma_addr_t addr;
640f005ef32Sjsg 
641f005ef32Sjsg 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
642f005ef32Sjsg 	gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
643f005ef32Sjsg 
644f005ef32Sjsg 	end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
645f005ef32Sjsg 	while (gte < end)
646f005ef32Sjsg 		iowrite32(vm->scratch[0]->encode, gte++);
647f005ef32Sjsg 	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
648f005ef32Sjsg 	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
649f005ef32Sjsg 		iowrite32(vm->pte_encode(addr, pat_index, flags), gte++);
650f005ef32Sjsg 	GEM_BUG_ON(gte > end);
651f005ef32Sjsg 
652f005ef32Sjsg 	/* Fill the allocated but "unused" space beyond the end of the buffer */
653f005ef32Sjsg 	while (gte < end)
654f005ef32Sjsg 		iowrite32(vm->scratch[0]->encode, gte++);
655f005ef32Sjsg 
656f005ef32Sjsg 	/*
657f005ef32Sjsg 	 * We want to flush the TLBs only after we're certain all the PTE
658f005ef32Sjsg 	 * updates have finished.
659f005ef32Sjsg 	 */
660f005ef32Sjsg 	ggtt->invalidate(ggtt);
661f005ef32Sjsg }
662f005ef32Sjsg 
663f005ef32Sjsg static void nop_clear_range(struct i915_address_space *vm,
664f005ef32Sjsg 			    u64 start, u64 length)
665f005ef32Sjsg {
666f005ef32Sjsg }
667f005ef32Sjsg 
668c349dbc7Sjsg static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
669c349dbc7Sjsg {
670c349dbc7Sjsg 	/*
671c349dbc7Sjsg 	 * Make sure the internal GAM fifo has been cleared of all GTT
672c349dbc7Sjsg 	 * writes before exiting stop_machine(). This guarantees that
673c349dbc7Sjsg 	 * any aperture accesses waiting to start in another process
674c349dbc7Sjsg 	 * cannot back up behind the GTT writes causing a hang.
675c349dbc7Sjsg 	 * The register can be any arbitrary GAM register.
676c349dbc7Sjsg 	 */
677c349dbc7Sjsg 	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
678c349dbc7Sjsg }
679c349dbc7Sjsg 
680c349dbc7Sjsg struct insert_page {
681c349dbc7Sjsg 	struct i915_address_space *vm;
682c349dbc7Sjsg 	dma_addr_t addr;
683c349dbc7Sjsg 	u64 offset;
684f005ef32Sjsg 	unsigned int pat_index;
685c349dbc7Sjsg };
686c349dbc7Sjsg 
687c349dbc7Sjsg static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
688c349dbc7Sjsg {
689c349dbc7Sjsg 	struct insert_page *arg = _arg;
690c349dbc7Sjsg 
691f005ef32Sjsg 	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset,
692f005ef32Sjsg 			      arg->pat_index, 0);
693c349dbc7Sjsg 	bxt_vtd_ggtt_wa(arg->vm);
694c349dbc7Sjsg 
695c349dbc7Sjsg 	return 0;
696c349dbc7Sjsg }
697c349dbc7Sjsg 
698c349dbc7Sjsg static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
699c349dbc7Sjsg 					  dma_addr_t addr,
700c349dbc7Sjsg 					  u64 offset,
701f005ef32Sjsg 					  unsigned int pat_index,
702c349dbc7Sjsg 					  u32 unused)
703c349dbc7Sjsg {
704f005ef32Sjsg 	struct insert_page arg = { vm, addr, offset, pat_index };
705c349dbc7Sjsg 
706c349dbc7Sjsg 	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
707c349dbc7Sjsg }
708c349dbc7Sjsg 
709c349dbc7Sjsg struct insert_entries {
710c349dbc7Sjsg 	struct i915_address_space *vm;
7111bb76ff1Sjsg 	struct i915_vma_resource *vma_res;
712f005ef32Sjsg 	unsigned int pat_index;
713c349dbc7Sjsg 	u32 flags;
714c349dbc7Sjsg };
715c349dbc7Sjsg 
716c349dbc7Sjsg static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
717c349dbc7Sjsg {
718c349dbc7Sjsg 	struct insert_entries *arg = _arg;
719c349dbc7Sjsg 
720f005ef32Sjsg 	gen8_ggtt_insert_entries(arg->vm, arg->vma_res,
721f005ef32Sjsg 				 arg->pat_index, arg->flags);
722c349dbc7Sjsg 	bxt_vtd_ggtt_wa(arg->vm);
723c349dbc7Sjsg 
724c349dbc7Sjsg 	return 0;
725c349dbc7Sjsg }
726c349dbc7Sjsg 
727c349dbc7Sjsg static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
7281bb76ff1Sjsg 					     struct i915_vma_resource *vma_res,
729f005ef32Sjsg 					     unsigned int pat_index,
730c349dbc7Sjsg 					     u32 flags)
731c349dbc7Sjsg {
732f005ef32Sjsg 	struct insert_entries arg = { vm, vma_res, pat_index, flags };
733c349dbc7Sjsg 
734c349dbc7Sjsg 	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
735c349dbc7Sjsg }
736c349dbc7Sjsg 
737c349dbc7Sjsg static void gen6_ggtt_clear_range(struct i915_address_space *vm,
738c349dbc7Sjsg 				  u64 start, u64 length)
739c349dbc7Sjsg {
740c349dbc7Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
741c349dbc7Sjsg 	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
742c349dbc7Sjsg 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
743c349dbc7Sjsg 	gen6_pte_t scratch_pte, __iomem *gtt_base =
744c349dbc7Sjsg 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
745c349dbc7Sjsg 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
746c349dbc7Sjsg 	int i;
747c349dbc7Sjsg 
748c349dbc7Sjsg 	if (WARN(num_entries > max_entries,
749c349dbc7Sjsg 		 "First entry = %d; Num entries = %d (max=%d)\n",
750c349dbc7Sjsg 		 first_entry, num_entries, max_entries))
751c349dbc7Sjsg 		num_entries = max_entries;
752c349dbc7Sjsg 
753ad8b1aafSjsg 	scratch_pte = vm->scratch[0]->encode;
754c349dbc7Sjsg 	for (i = 0; i < num_entries; i++)
755c349dbc7Sjsg 		iowrite32(scratch_pte, &gtt_base[i]);
756c349dbc7Sjsg }
757c349dbc7Sjsg 
7581bb76ff1Sjsg void intel_ggtt_bind_vma(struct i915_address_space *vm,
759ad8b1aafSjsg 			 struct i915_vm_pt_stash *stash,
7601bb76ff1Sjsg 			 struct i915_vma_resource *vma_res,
761f005ef32Sjsg 			 unsigned int pat_index,
762c349dbc7Sjsg 			 u32 flags)
763c349dbc7Sjsg {
764c349dbc7Sjsg 	u32 pte_flags;
765c349dbc7Sjsg 
7661bb76ff1Sjsg 	if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK))
767ad8b1aafSjsg 		return;
768ad8b1aafSjsg 
7691bb76ff1Sjsg 	vma_res->bound_flags |= flags;
7701bb76ff1Sjsg 
771c349dbc7Sjsg 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
772c349dbc7Sjsg 	pte_flags = 0;
7731bb76ff1Sjsg 	if (vma_res->bi.readonly)
774c349dbc7Sjsg 		pte_flags |= PTE_READ_ONLY;
7751bb76ff1Sjsg 	if (vma_res->bi.lmem)
7765ca02815Sjsg 		pte_flags |= PTE_LM;
777c349dbc7Sjsg 
778f005ef32Sjsg 	vm->insert_entries(vm, vma_res, pat_index, pte_flags);
7791bb76ff1Sjsg 	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
780c349dbc7Sjsg }
781c349dbc7Sjsg 
7821bb76ff1Sjsg void intel_ggtt_unbind_vma(struct i915_address_space *vm,
7831bb76ff1Sjsg 			   struct i915_vma_resource *vma_res)
784c349dbc7Sjsg {
7851bb76ff1Sjsg 	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
786c349dbc7Sjsg }
787c349dbc7Sjsg 
788b10449e6Sjsg /*
789b10449e6Sjsg  * Reserve the top of the GuC address space for firmware images. Addresses
790b10449e6Sjsg  * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
791b10449e6Sjsg  * which makes for a suitable range to hold GuC/HuC firmware images if the
792b10449e6Sjsg  * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
793b10449e6Sjsg  * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
794b10449e6Sjsg  * of the same size anyway, which is far more than needed, to keep the logic
795b10449e6Sjsg  * in uc_fw_ggtt_offset() simple.
796b10449e6Sjsg  */
797b10449e6Sjsg #define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
798b10449e6Sjsg 
799c349dbc7Sjsg static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
800c349dbc7Sjsg {
801b10449e6Sjsg 	u64 offset;
802c349dbc7Sjsg 	int ret;
803c349dbc7Sjsg 
804c349dbc7Sjsg 	if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
805c349dbc7Sjsg 		return 0;
806c349dbc7Sjsg 
807b10449e6Sjsg 	GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
808b10449e6Sjsg 	offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
809c349dbc7Sjsg 
810b10449e6Sjsg 	ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw,
811b10449e6Sjsg 				   GUC_TOP_RESERVE_SIZE, offset,
812b10449e6Sjsg 				   I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
813c349dbc7Sjsg 	if (ret)
814c349dbc7Sjsg 		drm_dbg(&ggtt->vm.i915->drm,
815c349dbc7Sjsg 			"Failed to reserve top of GGTT for GuC\n");
816c349dbc7Sjsg 
817c349dbc7Sjsg 	return ret;
818c349dbc7Sjsg }
819c349dbc7Sjsg 
820c349dbc7Sjsg static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
821c349dbc7Sjsg {
822c349dbc7Sjsg 	if (drm_mm_node_allocated(&ggtt->uc_fw))
823c349dbc7Sjsg 		drm_mm_remove_node(&ggtt->uc_fw);
824c349dbc7Sjsg }
825c349dbc7Sjsg 
826c349dbc7Sjsg static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
827c349dbc7Sjsg {
828c349dbc7Sjsg 	ggtt_release_guc_top(ggtt);
829c349dbc7Sjsg 	if (drm_mm_node_allocated(&ggtt->error_capture))
830c349dbc7Sjsg 		drm_mm_remove_node(&ggtt->error_capture);
831c349dbc7Sjsg 	mutex_destroy(&ggtt->error_mutex);
832c349dbc7Sjsg }
833c349dbc7Sjsg 
834c349dbc7Sjsg static int init_ggtt(struct i915_ggtt *ggtt)
835c349dbc7Sjsg {
836c349dbc7Sjsg 	/*
837c349dbc7Sjsg 	 * Let GEM Manage all of the aperture.
838c349dbc7Sjsg 	 *
839c349dbc7Sjsg 	 * However, leave one page at the end still bound to the scratch page.
840c349dbc7Sjsg 	 * There are a number of places where the hardware apparently prefetches
841c349dbc7Sjsg 	 * past the end of the object, and we've seen multiple hangs with the
842c349dbc7Sjsg 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
843c349dbc7Sjsg 	 * aperture.  One page should be enough to keep any prefetching inside
844c349dbc7Sjsg 	 * of the aperture.
845c349dbc7Sjsg 	 */
846c349dbc7Sjsg 	unsigned long hole_start, hole_end;
847c349dbc7Sjsg 	struct drm_mm_node *entry;
848c349dbc7Sjsg 	int ret;
849c349dbc7Sjsg 
850c349dbc7Sjsg 	/*
851c349dbc7Sjsg 	 * GuC requires all resources that we're sharing with it to be placed in
852c349dbc7Sjsg 	 * non-WOPCM memory. If GuC is not present or not in use we still need a
853c349dbc7Sjsg 	 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
854c349dbc7Sjsg 	 * why.
855c349dbc7Sjsg 	 */
856c349dbc7Sjsg 	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
857f005ef32Sjsg 			       intel_wopcm_guc_size(&ggtt->vm.gt->wopcm));
858c349dbc7Sjsg 
859c349dbc7Sjsg 	ret = intel_vgt_balloon(ggtt);
860c349dbc7Sjsg 	if (ret)
861c349dbc7Sjsg 		return ret;
862c349dbc7Sjsg 
863c349dbc7Sjsg 	rw_init(&ggtt->error_mutex, "ggtter");
864c349dbc7Sjsg 	if (ggtt->mappable_end) {
865ad8b1aafSjsg 		/*
866ad8b1aafSjsg 		 * Reserve a mappable slot for our lockless error capture.
867ad8b1aafSjsg 		 *
868ad8b1aafSjsg 		 * We strongly prefer taking address 0x0 in order to protect
869ad8b1aafSjsg 		 * other critical buffers against accidental overwrites,
870ad8b1aafSjsg 		 * as writing to address 0 is a very common mistake.
871ad8b1aafSjsg 		 *
872ad8b1aafSjsg 		 * Since 0 may already be in use by the system (e.g. the BIOS
873ad8b1aafSjsg 		 * framebuffer), we let the reservation fail quietly and hope
874ad8b1aafSjsg 		 * 0 remains reserved always.
875ad8b1aafSjsg 		 *
876ad8b1aafSjsg 		 * If we fail to reserve 0, and then fail to find any space
877ad8b1aafSjsg 		 * for an error-capture, remain silent. We can afford not
878ad8b1aafSjsg 		 * to reserve an error_capture node as we have fallback
879ad8b1aafSjsg 		 * paths, and we trust that 0 will remain reserved. However,
880ad8b1aafSjsg 		 * the only likely reason for failure to insert is a driver
881ad8b1aafSjsg 		 * bug, which we expect to cause other failures...
882f005ef32Sjsg 		 *
883f005ef32Sjsg 		 * Since CPU can perform speculative reads on error capture
884f005ef32Sjsg 		 * (write-combining allows it) add scratch page after error
885f005ef32Sjsg 		 * capture to avoid DMAR errors.
886ad8b1aafSjsg 		 */
887f005ef32Sjsg 		ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE;
888ad8b1aafSjsg 		ggtt->error_capture.color = I915_COLOR_UNEVICTABLE;
889ad8b1aafSjsg 		if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture))
890ad8b1aafSjsg 			drm_mm_insert_node_in_range(&ggtt->vm.mm,
891c349dbc7Sjsg 						    &ggtt->error_capture,
892ad8b1aafSjsg 						    ggtt->error_capture.size, 0,
893ad8b1aafSjsg 						    ggtt->error_capture.color,
894c349dbc7Sjsg 						    0, ggtt->mappable_end,
895c349dbc7Sjsg 						    DRM_MM_INSERT_LOW);
896c349dbc7Sjsg 	}
897f005ef32Sjsg 	if (drm_mm_node_allocated(&ggtt->error_capture)) {
898f005ef32Sjsg 		u64 start = ggtt->error_capture.start;
899f005ef32Sjsg 		u64 size = ggtt->error_capture.size;
900f005ef32Sjsg 
901f005ef32Sjsg 		ggtt->vm.scratch_range(&ggtt->vm, start, size);
902ad8b1aafSjsg 		drm_dbg(&ggtt->vm.i915->drm,
903ad8b1aafSjsg 			"Reserved GGTT:[%llx, %llx] for use by error capture\n",
904f005ef32Sjsg 			start, start + size);
905f005ef32Sjsg 	}
906c349dbc7Sjsg 
907c349dbc7Sjsg 	/*
908c349dbc7Sjsg 	 * The upper portion of the GuC address space has a sizeable hole
909c349dbc7Sjsg 	 * (several MB) that is inaccessible by GuC. Reserve this range within
910c349dbc7Sjsg 	 * GGTT as it can comfortably hold GuC/HuC firmware images.
911c349dbc7Sjsg 	 */
912c349dbc7Sjsg 	ret = ggtt_reserve_guc_top(ggtt);
913c349dbc7Sjsg 	if (ret)
914c349dbc7Sjsg 		goto err;
915c349dbc7Sjsg 
916c349dbc7Sjsg 	/* Clear any non-preallocated blocks */
917c349dbc7Sjsg 	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
918ad8b1aafSjsg 		drm_dbg(&ggtt->vm.i915->drm,
919c349dbc7Sjsg 			"clearing unused GTT space: [%lx, %lx]\n",
920c349dbc7Sjsg 			hole_start, hole_end);
921c349dbc7Sjsg 		ggtt->vm.clear_range(&ggtt->vm, hole_start,
922c349dbc7Sjsg 				     hole_end - hole_start);
923c349dbc7Sjsg 	}
924c349dbc7Sjsg 
925c349dbc7Sjsg 	/* And finally clear the reserved guard page */
926c349dbc7Sjsg 	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
927c349dbc7Sjsg 
928c349dbc7Sjsg 	return 0;
929c349dbc7Sjsg 
930c349dbc7Sjsg err:
931c349dbc7Sjsg 	cleanup_init_ggtt(ggtt);
932c349dbc7Sjsg 	return ret;
933c349dbc7Sjsg }
934c349dbc7Sjsg 
935ad8b1aafSjsg static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
936ad8b1aafSjsg 				  struct i915_vm_pt_stash *stash,
9371bb76ff1Sjsg 				  struct i915_vma_resource *vma_res,
938f005ef32Sjsg 				  unsigned int pat_index,
939c349dbc7Sjsg 				  u32 flags)
940c349dbc7Sjsg {
941c349dbc7Sjsg 	u32 pte_flags;
942c349dbc7Sjsg 
943c349dbc7Sjsg 	/* Currently applicable only to VLV */
944c349dbc7Sjsg 	pte_flags = 0;
9451bb76ff1Sjsg 	if (vma_res->bi.readonly)
946c349dbc7Sjsg 		pte_flags |= PTE_READ_ONLY;
947c349dbc7Sjsg 
948ad8b1aafSjsg 	if (flags & I915_VMA_LOCAL_BIND)
949ad8b1aafSjsg 		ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
950f005ef32Sjsg 			       stash, vma_res, pat_index, flags);
951c349dbc7Sjsg 
952c349dbc7Sjsg 	if (flags & I915_VMA_GLOBAL_BIND)
953f005ef32Sjsg 		vm->insert_entries(vm, vma_res, pat_index, pte_flags);
9541bb76ff1Sjsg 
9551bb76ff1Sjsg 	vma_res->bound_flags |= flags;
956c349dbc7Sjsg }
957c349dbc7Sjsg 
958ad8b1aafSjsg static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
9591bb76ff1Sjsg 				    struct i915_vma_resource *vma_res)
960c349dbc7Sjsg {
9611bb76ff1Sjsg 	if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND)
9621bb76ff1Sjsg 		vm->clear_range(vm, vma_res->start, vma_res->vma_size);
963c349dbc7Sjsg 
9641bb76ff1Sjsg 	if (vma_res->bound_flags & I915_VMA_LOCAL_BIND)
9651bb76ff1Sjsg 		ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res);
966c349dbc7Sjsg }
967c349dbc7Sjsg 
968c349dbc7Sjsg static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
969c349dbc7Sjsg {
970ad8b1aafSjsg 	struct i915_vm_pt_stash stash = {};
971c349dbc7Sjsg 	struct i915_ppgtt *ppgtt;
972c349dbc7Sjsg 	int err;
973c349dbc7Sjsg 
9741bb76ff1Sjsg 	ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
975c349dbc7Sjsg 	if (IS_ERR(ppgtt))
976c349dbc7Sjsg 		return PTR_ERR(ppgtt);
977c349dbc7Sjsg 
978c349dbc7Sjsg 	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
979c349dbc7Sjsg 		err = -ENODEV;
980c349dbc7Sjsg 		goto err_ppgtt;
981c349dbc7Sjsg 	}
982c349dbc7Sjsg 
983ad8b1aafSjsg 	err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
984ad8b1aafSjsg 	if (err)
985ad8b1aafSjsg 		goto err_ppgtt;
986ad8b1aafSjsg 
9875ca02815Sjsg 	i915_gem_object_lock(ppgtt->vm.scratch[0], NULL);
9885ca02815Sjsg 	err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
9895ca02815Sjsg 	i915_gem_object_unlock(ppgtt->vm.scratch[0]);
990ad8b1aafSjsg 	if (err)
991ad8b1aafSjsg 		goto err_stash;
992ad8b1aafSjsg 
993c349dbc7Sjsg 	/*
994c349dbc7Sjsg 	 * Note we only pre-allocate as far as the end of the global
995c349dbc7Sjsg 	 * GTT. On 48b / 4-level page-tables, the difference is very,
996c349dbc7Sjsg 	 * very significant! We have to preallocate as GVT/vgpu does
997c349dbc7Sjsg 	 * not like the page directory disappearing.
998c349dbc7Sjsg 	 */
999ad8b1aafSjsg 	ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
1000c349dbc7Sjsg 
1001c349dbc7Sjsg 	ggtt->alias = ppgtt;
1002c349dbc7Sjsg 	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
1003c349dbc7Sjsg 
10041bb76ff1Sjsg 	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma);
1005c349dbc7Sjsg 	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
1006c349dbc7Sjsg 
10071bb76ff1Sjsg 	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma);
1008c349dbc7Sjsg 	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
1009c349dbc7Sjsg 
1010ad8b1aafSjsg 	i915_vm_free_pt_stash(&ppgtt->vm, &stash);
1011c349dbc7Sjsg 	return 0;
1012c349dbc7Sjsg 
1013ad8b1aafSjsg err_stash:
1014ad8b1aafSjsg 	i915_vm_free_pt_stash(&ppgtt->vm, &stash);
1015c349dbc7Sjsg err_ppgtt:
1016c349dbc7Sjsg 	i915_vm_put(&ppgtt->vm);
1017c349dbc7Sjsg 	return err;
1018c349dbc7Sjsg }
1019c349dbc7Sjsg 
1020c349dbc7Sjsg static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
1021c349dbc7Sjsg {
1022c349dbc7Sjsg 	struct i915_ppgtt *ppgtt;
1023c349dbc7Sjsg 
1024c349dbc7Sjsg 	ppgtt = fetch_and_zero(&ggtt->alias);
1025c349dbc7Sjsg 	if (!ppgtt)
1026c349dbc7Sjsg 		return;
1027c349dbc7Sjsg 
1028c349dbc7Sjsg 	i915_vm_put(&ppgtt->vm);
1029c349dbc7Sjsg 
10301bb76ff1Sjsg 	ggtt->vm.vma_ops.bind_vma   = intel_ggtt_bind_vma;
10311bb76ff1Sjsg 	ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
1032c349dbc7Sjsg }
1033c349dbc7Sjsg 
1034c349dbc7Sjsg int i915_init_ggtt(struct drm_i915_private *i915)
1035c349dbc7Sjsg {
1036c349dbc7Sjsg 	int ret;
1037c349dbc7Sjsg 
10381bb76ff1Sjsg 	ret = init_ggtt(to_gt(i915)->ggtt);
1039c349dbc7Sjsg 	if (ret)
1040c349dbc7Sjsg 		return ret;
1041c349dbc7Sjsg 
1042c349dbc7Sjsg 	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
10431bb76ff1Sjsg 		ret = init_aliasing_ppgtt(to_gt(i915)->ggtt);
1044c349dbc7Sjsg 		if (ret)
10451bb76ff1Sjsg 			cleanup_init_ggtt(to_gt(i915)->ggtt);
1046c349dbc7Sjsg 	}
1047c349dbc7Sjsg 
1048c349dbc7Sjsg 	return 0;
1049c349dbc7Sjsg }
1050c349dbc7Sjsg 
1051c349dbc7Sjsg static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
1052c349dbc7Sjsg {
1053c349dbc7Sjsg 	struct i915_vma *vma, *vn;
1054c349dbc7Sjsg 
1055c349dbc7Sjsg 	flush_workqueue(ggtt->vm.i915->wq);
10561bb76ff1Sjsg 	i915_gem_drain_freed_objects(ggtt->vm.i915);
1057c349dbc7Sjsg 
1058c349dbc7Sjsg 	mutex_lock(&ggtt->vm.mutex);
1059c349dbc7Sjsg 
10601bb76ff1Sjsg 	ggtt->vm.skip_pte_rewrite = true;
10611bb76ff1Sjsg 
10621bb76ff1Sjsg 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
10631bb76ff1Sjsg 		struct drm_i915_gem_object *obj = vma->obj;
10641bb76ff1Sjsg 		bool trylock;
10651bb76ff1Sjsg 
10661bb76ff1Sjsg 		trylock = i915_gem_object_trylock(obj, NULL);
10671bb76ff1Sjsg 		WARN_ON(!trylock);
10681bb76ff1Sjsg 
1069c349dbc7Sjsg 		WARN_ON(__i915_vma_unbind(vma));
10701bb76ff1Sjsg 		if (trylock)
10711bb76ff1Sjsg 			i915_gem_object_unlock(obj);
10721bb76ff1Sjsg 	}
1073c349dbc7Sjsg 
1074c349dbc7Sjsg 	if (drm_mm_node_allocated(&ggtt->error_capture))
1075c349dbc7Sjsg 		drm_mm_remove_node(&ggtt->error_capture);
1076c349dbc7Sjsg 	mutex_destroy(&ggtt->error_mutex);
1077c349dbc7Sjsg 
1078c349dbc7Sjsg 	ggtt_release_guc_top(ggtt);
1079c349dbc7Sjsg 	intel_vgt_deballoon(ggtt);
1080c349dbc7Sjsg 
1081c349dbc7Sjsg 	ggtt->vm.cleanup(&ggtt->vm);
1082c349dbc7Sjsg 
1083c349dbc7Sjsg 	mutex_unlock(&ggtt->vm.mutex);
1084c349dbc7Sjsg 	i915_address_space_fini(&ggtt->vm);
1085c349dbc7Sjsg 
1086c349dbc7Sjsg #ifdef notyet
1087c349dbc7Sjsg 	arch_phys_wc_del(ggtt->mtrr);
1088c349dbc7Sjsg 
1089c349dbc7Sjsg 	if (ggtt->iomap.size)
1090c349dbc7Sjsg 		io_mapping_fini(&ggtt->iomap);
1091c349dbc7Sjsg #endif
1092c349dbc7Sjsg }
1093c349dbc7Sjsg 
1094c349dbc7Sjsg /**
1095c349dbc7Sjsg  * i915_ggtt_driver_release - Clean up GGTT hardware initialization
1096c349dbc7Sjsg  * @i915: i915 device
1097c349dbc7Sjsg  */
1098c349dbc7Sjsg void i915_ggtt_driver_release(struct drm_i915_private *i915)
1099c349dbc7Sjsg {
11001bb76ff1Sjsg 	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
1101c349dbc7Sjsg 
1102ad8b1aafSjsg 	fini_aliasing_ppgtt(ggtt);
1103c349dbc7Sjsg 
1104ad8b1aafSjsg 	intel_ggtt_fini_fences(ggtt);
1105ad8b1aafSjsg 	ggtt_cleanup_hw(ggtt);
1106c349dbc7Sjsg }
1107c349dbc7Sjsg 
11085ca02815Sjsg /**
11095ca02815Sjsg  * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after
11105ca02815Sjsg  * all free objects have been drained.
11115ca02815Sjsg  * @i915: i915 device
11125ca02815Sjsg  */
11135ca02815Sjsg void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
11145ca02815Sjsg {
11151bb76ff1Sjsg 	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
11165ca02815Sjsg 
11175ca02815Sjsg 	GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1);
11185ca02815Sjsg 	dma_resv_fini(&ggtt->vm._resv);
11195ca02815Sjsg }
11205ca02815Sjsg 
1121c349dbc7Sjsg static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
1122c349dbc7Sjsg {
1123c349dbc7Sjsg 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
1124c349dbc7Sjsg 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
1125c349dbc7Sjsg 	return snb_gmch_ctl << 20;
1126c349dbc7Sjsg }
1127c349dbc7Sjsg 
1128c349dbc7Sjsg static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
1129c349dbc7Sjsg {
1130c349dbc7Sjsg 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
1131c349dbc7Sjsg 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
1132c349dbc7Sjsg 	if (bdw_gmch_ctl)
1133c349dbc7Sjsg 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
1134c349dbc7Sjsg 
1135c349dbc7Sjsg #ifdef CONFIG_X86_32
1136c349dbc7Sjsg 	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
1137c349dbc7Sjsg 	if (bdw_gmch_ctl > 4)
1138c349dbc7Sjsg 		bdw_gmch_ctl = 4;
1139c349dbc7Sjsg #endif
1140c349dbc7Sjsg 
1141c349dbc7Sjsg 	return bdw_gmch_ctl << 20;
1142c349dbc7Sjsg }
1143c349dbc7Sjsg 
1144c349dbc7Sjsg static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
1145c349dbc7Sjsg {
1146c349dbc7Sjsg 	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
1147c349dbc7Sjsg 	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
1148c349dbc7Sjsg 
1149c349dbc7Sjsg 	if (gmch_ctrl)
1150c349dbc7Sjsg 		return 1 << (20 + gmch_ctrl);
1151c349dbc7Sjsg 
1152c349dbc7Sjsg 	return 0;
1153c349dbc7Sjsg }
1154c349dbc7Sjsg 
11551bb76ff1Sjsg static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
11561bb76ff1Sjsg {
11571bb76ff1Sjsg 	/*
11581bb76ff1Sjsg 	 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
11591bb76ff1Sjsg 	 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
11601bb76ff1Sjsg 	 */
11611bb76ff1Sjsg 	GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
11621bb76ff1Sjsg 	return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
11631bb76ff1Sjsg }
11641bb76ff1Sjsg 
11651bb76ff1Sjsg static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
11661bb76ff1Sjsg {
11671bb76ff1Sjsg 	return gen6_gttmmadr_size(i915) / 2;
11681bb76ff1Sjsg }
11691bb76ff1Sjsg 
1170c349dbc7Sjsg #ifdef __linux__
1171c349dbc7Sjsg 
1172c349dbc7Sjsg static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
1173c349dbc7Sjsg {
1174c349dbc7Sjsg 	struct drm_i915_private *i915 = ggtt->vm.i915;
11756c50df30Sjsg 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
11765ca02815Sjsg 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1177c349dbc7Sjsg 	phys_addr_t phys_addr;
11785ca02815Sjsg 	u32 pte_flags;
1179c349dbc7Sjsg 	int ret;
1180c349dbc7Sjsg 
1181f005ef32Sjsg 	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
11826c50df30Sjsg 
11836c50df30Sjsg 	if (i915_direct_stolen_access(i915)) {
11846c50df30Sjsg 		drm_dbg(&i915->drm, "Using direct GSM access\n");
1185*99fa8e50Sjsg 		phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK;
11866c50df30Sjsg 	} else {
1187f005ef32Sjsg 		phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
11886c50df30Sjsg 	}
1189c349dbc7Sjsg 
1190f005ef32Sjsg 	if (needs_wc_ggtt_mapping(i915))
1191c349dbc7Sjsg 		ggtt->gsm = ioremap_wc(phys_addr, size);
1192f005ef32Sjsg 	else
1193f005ef32Sjsg 		ggtt->gsm = ioremap(phys_addr, size);
1194f005ef32Sjsg 
1195c349dbc7Sjsg 	if (!ggtt->gsm) {
1196ad8b1aafSjsg 		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
1197c349dbc7Sjsg 		return -ENOMEM;
1198c349dbc7Sjsg 	}
1199c349dbc7Sjsg 
12005ca02815Sjsg 	kref_init(&ggtt->vm.resv_ref);
1201ad8b1aafSjsg 	ret = setup_scratch_page(&ggtt->vm);
1202c349dbc7Sjsg 	if (ret) {
1203ad8b1aafSjsg 		drm_err(&i915->drm, "Scratch setup failed\n");
1204c349dbc7Sjsg 		/* iounmap will also get called at remove, but meh */
1205c349dbc7Sjsg 		iounmap(ggtt->gsm);
1206c349dbc7Sjsg 		return ret;
1207c349dbc7Sjsg 	}
1208c349dbc7Sjsg 
12095ca02815Sjsg 	pte_flags = 0;
12105ca02815Sjsg 	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
12115ca02815Sjsg 		pte_flags |= PTE_LM;
12125ca02815Sjsg 
1213ad8b1aafSjsg 	ggtt->vm.scratch[0]->encode =
1214ad8b1aafSjsg 		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
1215f005ef32Sjsg 				    i915_gem_get_pat_index(i915,
1216f005ef32Sjsg 							   I915_CACHE_NONE),
1217f005ef32Sjsg 				    pte_flags);
1218c349dbc7Sjsg 
1219c349dbc7Sjsg 	return 0;
1220c349dbc7Sjsg }
1221c349dbc7Sjsg 
1222c349dbc7Sjsg #else
1223c349dbc7Sjsg 
1224c349dbc7Sjsg static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
1225c349dbc7Sjsg {
1226c349dbc7Sjsg 	struct drm_i915_private *i915 = ggtt->vm.i915;
12276c50df30Sjsg 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
1228c349dbc7Sjsg 	struct pci_dev *pdev = i915->drm.pdev;
1229c349dbc7Sjsg 	phys_addr_t phys_addr;
1230c349dbc7Sjsg 	bus_addr_t addr;
1231c349dbc7Sjsg 	bus_size_t len;
1232c349dbc7Sjsg 	pcireg_t type;
1233c349dbc7Sjsg 	int flags;
123485c0555eSjsg 	u32 pte_flags;
1235c349dbc7Sjsg 	int ret;
1236c349dbc7Sjsg 
1237c349dbc7Sjsg 	type = pci_mapreg_type(i915->pc, i915->tag, 0x10);
1238c349dbc7Sjsg 	ret = -pci_mapreg_info(i915->pc, i915->tag, 0x10, type,
1239c349dbc7Sjsg 	    &addr, &len, NULL);
1240c349dbc7Sjsg 	if (ret)
1241c349dbc7Sjsg 		return ret;
1242c349dbc7Sjsg 
124304fd7d0eSjsg 	GEM_WARN_ON(len != gen6_gttmmadr_size(i915));
12446c50df30Sjsg 
12456c50df30Sjsg 	if (i915_direct_stolen_access(i915)) {
12466c50df30Sjsg 		drm_dbg(&i915->drm, "Using direct GSM access\n");
1247*99fa8e50Sjsg 		phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK;
12486c50df30Sjsg 	} else {
124904fd7d0eSjsg 		phys_addr = addr + gen6_gttadr_offset(i915);
12506c50df30Sjsg 	}
125104fd7d0eSjsg 
125204fd7d0eSjsg 	if (needs_wc_ggtt_mapping(i915))
1253c349dbc7Sjsg 		flags = BUS_SPACE_MAP_PREFETCHABLE;
125404fd7d0eSjsg 	else
125504fd7d0eSjsg 		flags = 0;
125604fd7d0eSjsg 
125704fd7d0eSjsg 	ret = -bus_space_map(i915->bst, phys_addr, size,
1258c349dbc7Sjsg 	    flags | BUS_SPACE_MAP_LINEAR, &ggtt->gsm_bsh);
1259c349dbc7Sjsg 	if (ret) {
1260ad8b1aafSjsg 		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
1261c349dbc7Sjsg 		return ret;
1262c349dbc7Sjsg 	}
1263c349dbc7Sjsg 	ggtt->gsm = bus_space_vaddr(i915->bst, ggtt->gsm_bsh);
1264c349dbc7Sjsg 	ggtt->gsm_size = size;
1265c349dbc7Sjsg 	if (!ggtt->gsm) {
126604fd7d0eSjsg 		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
1267c349dbc7Sjsg 		return -ENOMEM;
1268c349dbc7Sjsg 	}
1269c349dbc7Sjsg 
127085c0555eSjsg 	kref_init(&ggtt->vm.resv_ref);
1271ad8b1aafSjsg 	ret = setup_scratch_page(&ggtt->vm);
1272c349dbc7Sjsg 	if (ret) {
1273ad8b1aafSjsg 		drm_err(&i915->drm, "Scratch setup failed\n");
1274c349dbc7Sjsg 		/* iounmap will also get called at remove, but meh */
1275c349dbc7Sjsg 		bus_space_unmap(i915->bst, ggtt->gsm_bsh, size);
1276c349dbc7Sjsg 		return ret;
1277c349dbc7Sjsg 	}
1278c349dbc7Sjsg 
127985c0555eSjsg 	pte_flags = 0;
128085c0555eSjsg 	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
128185c0555eSjsg 		pte_flags |= PTE_LM;
128285c0555eSjsg 
1283ad8b1aafSjsg 	ggtt->vm.scratch[0]->encode =
1284ad8b1aafSjsg 		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
1285f005ef32Sjsg 				    i915_gem_get_pat_index(i915,
1286f005ef32Sjsg 							   I915_CACHE_NONE),
1287f005ef32Sjsg 				    pte_flags);
1288c349dbc7Sjsg 
1289c349dbc7Sjsg 	return 0;
1290c349dbc7Sjsg }
1291c349dbc7Sjsg 
1292c349dbc7Sjsg #endif
1293c349dbc7Sjsg 
1294c349dbc7Sjsg static void gen6_gmch_remove(struct i915_address_space *vm)
1295c349dbc7Sjsg {
1296c349dbc7Sjsg 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
1297c349dbc7Sjsg 
1298c349dbc7Sjsg #ifdef __linux__
1299c349dbc7Sjsg 	iounmap(ggtt->gsm);
1300c349dbc7Sjsg #else
1301c349dbc7Sjsg 	bus_space_unmap(vm->i915->bst, ggtt->gsm_bsh, ggtt->gsm_size);
1302c349dbc7Sjsg #endif
1303ad8b1aafSjsg 	free_scratch(vm);
1304c349dbc7Sjsg }
1305c349dbc7Sjsg 
1306c349dbc7Sjsg #ifdef __linux__
1307c349dbc7Sjsg static struct resource pci_resource(struct pci_dev *pdev, int bar)
1308c349dbc7Sjsg {
1309f005ef32Sjsg 	return DEFINE_RES_MEM(pci_resource_start(pdev, bar),
1310c349dbc7Sjsg 			      pci_resource_len(pdev, bar));
1311c349dbc7Sjsg }
1312c349dbc7Sjsg #endif
1313c349dbc7Sjsg 
1314c349dbc7Sjsg static int gen8_gmch_probe(struct i915_ggtt *ggtt)
1315c349dbc7Sjsg {
1316c349dbc7Sjsg 	struct drm_i915_private *i915 = ggtt->vm.i915;
1317c349dbc7Sjsg 	struct pci_dev *pdev = i915->drm.pdev;
1318c349dbc7Sjsg 	unsigned int size;
1319c349dbc7Sjsg 	u16 snb_gmch_ctl;
1320c349dbc7Sjsg 
1321f005ef32Sjsg 	if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) {
1322c349dbc7Sjsg #ifdef __linux__
1323f005ef32Sjsg 		if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
13241bb76ff1Sjsg 			return -ENXIO;
13251bb76ff1Sjsg 
1326f005ef32Sjsg 		ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
1327c349dbc7Sjsg 		ggtt->mappable_end = resource_size(&ggtt->gmadr);
1328c349dbc7Sjsg #else
1329c349dbc7Sjsg 		bus_addr_t base;
1330c349dbc7Sjsg 		bus_size_t sz;
1331c349dbc7Sjsg 		pcireg_t type;
1332ad8b1aafSjsg 		int err;
1333c349dbc7Sjsg 
1334c349dbc7Sjsg 		type = pci_mapreg_type(i915->pc, i915->tag, 0x18);
1335c349dbc7Sjsg 		err = -pci_mapreg_info(i915->pc, i915->tag, 0x18, type,
1336c349dbc7Sjsg 		    &base, &sz, NULL);
1337c349dbc7Sjsg 		if (err)
1338c349dbc7Sjsg 			return err;
1339c349dbc7Sjsg 		ggtt->gmadr.start = base;
1340c349dbc7Sjsg 		ggtt->mappable_end = sz;
1341c349dbc7Sjsg #endif
1342c349dbc7Sjsg 	}
1343c349dbc7Sjsg 
1344c349dbc7Sjsg 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1345c349dbc7Sjsg 	if (IS_CHERRYVIEW(i915))
1346c349dbc7Sjsg 		size = chv_get_total_gtt_size(snb_gmch_ctl);
1347c349dbc7Sjsg 	else
1348c349dbc7Sjsg 		size = gen8_get_total_gtt_size(snb_gmch_ctl);
1349c349dbc7Sjsg 
1350ad8b1aafSjsg 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
13511bb76ff1Sjsg 	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
13521bb76ff1Sjsg 	ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
1353ad8b1aafSjsg 
1354c349dbc7Sjsg 	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
1355c349dbc7Sjsg 	ggtt->vm.cleanup = gen6_gmch_remove;
1356c349dbc7Sjsg 	ggtt->vm.insert_page = gen8_ggtt_insert_page;
1357c349dbc7Sjsg 	ggtt->vm.clear_range = nop_clear_range;
1358f005ef32Sjsg 	ggtt->vm.scratch_range = gen8_ggtt_clear_range;
1359c349dbc7Sjsg 
1360c349dbc7Sjsg 	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
1361c349dbc7Sjsg 
13625ca02815Sjsg 	/*
13635ca02815Sjsg 	 * Serialize GTT updates with aperture access on BXT if VT-d is on,
13645ca02815Sjsg 	 * and always on CHV.
13655ca02815Sjsg 	 */
13665ca02815Sjsg 	if (intel_vm_no_concurrent_access_wa(i915)) {
1367c349dbc7Sjsg 		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
1368c349dbc7Sjsg 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
13691bb76ff1Sjsg 
13701bb76ff1Sjsg 		/*
13711bb76ff1Sjsg 		 * Calling stop_machine() version of GGTT update function
13721bb76ff1Sjsg 		 * at error capture/reset path will raise lockdep warning.
13731bb76ff1Sjsg 		 * Allow calling gen8_ggtt_insert_* directly at reset path
13741bb76ff1Sjsg 		 * which is safe from parallel GGTT updates.
13751bb76ff1Sjsg 		 */
13761bb76ff1Sjsg 		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
13771bb76ff1Sjsg 		ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries;
13781bb76ff1Sjsg 
1379c349dbc7Sjsg 		ggtt->vm.bind_async_flags =
1380c349dbc7Sjsg 			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
1381c349dbc7Sjsg 	}
1382c349dbc7Sjsg 
13836589bdd4Sjsg 	if (i915_ggtt_require_binder(i915)) {
13846589bdd4Sjsg 		ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
13856589bdd4Sjsg 		ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
13866589bdd4Sjsg 		ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
13876589bdd4Sjsg 		/*
13886589bdd4Sjsg 		 * On GPU is hung, we might bind VMAs for error capture.
13896589bdd4Sjsg 		 * Fallback to CPU GGTT updates in that case.
13906589bdd4Sjsg 		 */
13916589bdd4Sjsg 		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
13926589bdd4Sjsg 	}
13936589bdd4Sjsg 
1394f005ef32Sjsg 	if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
1395f005ef32Sjsg 		ggtt->invalidate = guc_ggtt_invalidate;
1396f005ef32Sjsg 	else
1397c349dbc7Sjsg 		ggtt->invalidate = gen8_ggtt_invalidate;
1398c349dbc7Sjsg 
13991bb76ff1Sjsg 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
14001bb76ff1Sjsg 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
1401c349dbc7Sjsg 
1402f005ef32Sjsg 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
1403f005ef32Sjsg 		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
1404f005ef32Sjsg 	else
1405c349dbc7Sjsg 		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
1406c349dbc7Sjsg 
1407c349dbc7Sjsg 	return ggtt_probe_common(ggtt, size);
1408c349dbc7Sjsg }
1409c349dbc7Sjsg 
1410f005ef32Sjsg /*
1411f005ef32Sjsg  * For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
1412f005ef32Sjsg  * so the switch-case statements in these PTE encode functions are still valid.
1413f005ef32Sjsg  * See translation table LEGACY_CACHELEVEL.
1414f005ef32Sjsg  */
1415c349dbc7Sjsg static u64 snb_pte_encode(dma_addr_t addr,
1416f005ef32Sjsg 			  unsigned int pat_index,
1417c349dbc7Sjsg 			  u32 flags)
1418c349dbc7Sjsg {
1419c349dbc7Sjsg 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1420c349dbc7Sjsg 
1421f005ef32Sjsg 	switch (pat_index) {
1422c349dbc7Sjsg 	case I915_CACHE_L3_LLC:
1423c349dbc7Sjsg 	case I915_CACHE_LLC:
1424c349dbc7Sjsg 		pte |= GEN6_PTE_CACHE_LLC;
1425c349dbc7Sjsg 		break;
1426c349dbc7Sjsg 	case I915_CACHE_NONE:
1427c349dbc7Sjsg 		pte |= GEN6_PTE_UNCACHED;
1428c349dbc7Sjsg 		break;
1429c349dbc7Sjsg 	default:
1430f005ef32Sjsg 		MISSING_CASE(pat_index);
1431c349dbc7Sjsg 	}
1432c349dbc7Sjsg 
1433c349dbc7Sjsg 	return pte;
1434c349dbc7Sjsg }
1435c349dbc7Sjsg 
1436c349dbc7Sjsg static u64 ivb_pte_encode(dma_addr_t addr,
1437f005ef32Sjsg 			  unsigned int pat_index,
1438c349dbc7Sjsg 			  u32 flags)
1439c349dbc7Sjsg {
1440c349dbc7Sjsg 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1441c349dbc7Sjsg 
1442f005ef32Sjsg 	switch (pat_index) {
1443c349dbc7Sjsg 	case I915_CACHE_L3_LLC:
1444c349dbc7Sjsg 		pte |= GEN7_PTE_CACHE_L3_LLC;
1445c349dbc7Sjsg 		break;
1446c349dbc7Sjsg 	case I915_CACHE_LLC:
1447c349dbc7Sjsg 		pte |= GEN6_PTE_CACHE_LLC;
1448c349dbc7Sjsg 		break;
1449c349dbc7Sjsg 	case I915_CACHE_NONE:
1450c349dbc7Sjsg 		pte |= GEN6_PTE_UNCACHED;
1451c349dbc7Sjsg 		break;
1452c349dbc7Sjsg 	default:
1453f005ef32Sjsg 		MISSING_CASE(pat_index);
1454c349dbc7Sjsg 	}
1455c349dbc7Sjsg 
1456c349dbc7Sjsg 	return pte;
1457c349dbc7Sjsg }
1458c349dbc7Sjsg 
1459c349dbc7Sjsg static u64 byt_pte_encode(dma_addr_t addr,
1460f005ef32Sjsg 			  unsigned int pat_index,
1461c349dbc7Sjsg 			  u32 flags)
1462c349dbc7Sjsg {
1463c349dbc7Sjsg 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1464c349dbc7Sjsg 
1465c349dbc7Sjsg 	if (!(flags & PTE_READ_ONLY))
1466c349dbc7Sjsg 		pte |= BYT_PTE_WRITEABLE;
1467c349dbc7Sjsg 
1468f005ef32Sjsg 	if (pat_index != I915_CACHE_NONE)
1469c349dbc7Sjsg 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
1470c349dbc7Sjsg 
1471c349dbc7Sjsg 	return pte;
1472c349dbc7Sjsg }
1473c349dbc7Sjsg 
1474c349dbc7Sjsg static u64 hsw_pte_encode(dma_addr_t addr,
1475f005ef32Sjsg 			  unsigned int pat_index,
1476c349dbc7Sjsg 			  u32 flags)
1477c349dbc7Sjsg {
1478c349dbc7Sjsg 	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1479c349dbc7Sjsg 
1480f005ef32Sjsg 	if (pat_index != I915_CACHE_NONE)
1481c349dbc7Sjsg 		pte |= HSW_WB_LLC_AGE3;
1482c349dbc7Sjsg 
1483c349dbc7Sjsg 	return pte;
1484c349dbc7Sjsg }
1485c349dbc7Sjsg 
1486c349dbc7Sjsg static u64 iris_pte_encode(dma_addr_t addr,
1487f005ef32Sjsg 			   unsigned int pat_index,
1488c349dbc7Sjsg 			   u32 flags)
1489c349dbc7Sjsg {
1490c349dbc7Sjsg 	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1491c349dbc7Sjsg 
1492f005ef32Sjsg 	switch (pat_index) {
1493c349dbc7Sjsg 	case I915_CACHE_NONE:
1494c349dbc7Sjsg 		break;
1495c349dbc7Sjsg 	case I915_CACHE_WT:
1496c349dbc7Sjsg 		pte |= HSW_WT_ELLC_LLC_AGE3;
1497c349dbc7Sjsg 		break;
1498c349dbc7Sjsg 	default:
1499c349dbc7Sjsg 		pte |= HSW_WB_ELLC_LLC_AGE3;
1500c349dbc7Sjsg 		break;
1501c349dbc7Sjsg 	}
1502c349dbc7Sjsg 
1503c349dbc7Sjsg 	return pte;
1504c349dbc7Sjsg }
1505c349dbc7Sjsg 
1506c349dbc7Sjsg static int gen6_gmch_probe(struct i915_ggtt *ggtt)
1507c349dbc7Sjsg {
1508c349dbc7Sjsg 	struct drm_i915_private *i915 = ggtt->vm.i915;
1509c349dbc7Sjsg 	struct pci_dev *pdev = i915->drm.pdev;
1510c349dbc7Sjsg 	unsigned int size;
1511c349dbc7Sjsg 	u16 snb_gmch_ctl;
1512c349dbc7Sjsg 
1513c349dbc7Sjsg #ifdef __linux__
1514f005ef32Sjsg 	if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
15151bb76ff1Sjsg 		return -ENXIO;
15161bb76ff1Sjsg 
1517f005ef32Sjsg 	ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
1518c349dbc7Sjsg 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
1519c349dbc7Sjsg #else
1520c349dbc7Sjsg 	bus_addr_t base;
1521c349dbc7Sjsg 	bus_size_t sz;
1522c349dbc7Sjsg 	pcireg_t type;
1523ad8b1aafSjsg 	int err;
1524c349dbc7Sjsg 
1525c349dbc7Sjsg 	type = pci_mapreg_type(i915->pc, i915->tag, 0x18);
1526c349dbc7Sjsg 	err = -pci_mapreg_info(i915->pc, i915->tag, 0x18, type,
1527c349dbc7Sjsg 	    &base, &sz, NULL);
1528c349dbc7Sjsg 	if (err)
1529c349dbc7Sjsg 		return err;
1530c349dbc7Sjsg 	ggtt->gmadr.start = base;
1531c349dbc7Sjsg 	ggtt->mappable_end = sz;
1532c349dbc7Sjsg #endif
1533c349dbc7Sjsg 
1534c349dbc7Sjsg 	/*
1535c349dbc7Sjsg 	 * 64/512MB is the current min/max we actually know of, but this is
1536c349dbc7Sjsg 	 * just a coarse sanity check.
1537c349dbc7Sjsg 	 */
15381bb76ff1Sjsg 	if (ggtt->mappable_end < (64 << 20) ||
15391bb76ff1Sjsg 	    ggtt->mappable_end > (512 << 20)) {
1540ad8b1aafSjsg 		drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
1541ad8b1aafSjsg 			&ggtt->mappable_end);
1542c349dbc7Sjsg 		return -ENXIO;
1543c349dbc7Sjsg 	}
1544c349dbc7Sjsg 
1545c349dbc7Sjsg 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1546c349dbc7Sjsg 
1547c349dbc7Sjsg 	size = gen6_get_total_gtt_size(snb_gmch_ctl);
1548c349dbc7Sjsg 	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
1549c349dbc7Sjsg 
1550ad8b1aafSjsg 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
15511bb76ff1Sjsg 	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
1552ad8b1aafSjsg 
1553c349dbc7Sjsg 	ggtt->vm.clear_range = nop_clear_range;
1554f005ef32Sjsg 	if (!HAS_FULL_PPGTT(i915))
1555c349dbc7Sjsg 		ggtt->vm.clear_range = gen6_ggtt_clear_range;
1556f005ef32Sjsg 	ggtt->vm.scratch_range = gen6_ggtt_clear_range;
1557c349dbc7Sjsg 	ggtt->vm.insert_page = gen6_ggtt_insert_page;
1558c349dbc7Sjsg 	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
1559c349dbc7Sjsg 	ggtt->vm.cleanup = gen6_gmch_remove;
1560c349dbc7Sjsg 
1561c349dbc7Sjsg 	ggtt->invalidate = gen6_ggtt_invalidate;
1562c349dbc7Sjsg 
1563c349dbc7Sjsg 	if (HAS_EDRAM(i915))
1564c349dbc7Sjsg 		ggtt->vm.pte_encode = iris_pte_encode;
1565c349dbc7Sjsg 	else if (IS_HASWELL(i915))
1566c349dbc7Sjsg 		ggtt->vm.pte_encode = hsw_pte_encode;
1567c349dbc7Sjsg 	else if (IS_VALLEYVIEW(i915))
1568c349dbc7Sjsg 		ggtt->vm.pte_encode = byt_pte_encode;
15695ca02815Sjsg 	else if (GRAPHICS_VER(i915) >= 7)
1570c349dbc7Sjsg 		ggtt->vm.pte_encode = ivb_pte_encode;
1571c349dbc7Sjsg 	else
1572c349dbc7Sjsg 		ggtt->vm.pte_encode = snb_pte_encode;
1573c349dbc7Sjsg 
15741bb76ff1Sjsg 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
15751bb76ff1Sjsg 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
1576c349dbc7Sjsg 
1577c349dbc7Sjsg 	return ggtt_probe_common(ggtt, size);
1578c349dbc7Sjsg }
1579c349dbc7Sjsg 
1580c349dbc7Sjsg static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
1581c349dbc7Sjsg {
1582c349dbc7Sjsg 	struct drm_i915_private *i915 = gt->i915;
1583c349dbc7Sjsg 	int ret;
1584c349dbc7Sjsg 
1585c349dbc7Sjsg 	ggtt->vm.gt = gt;
1586c349dbc7Sjsg 	ggtt->vm.i915 = i915;
1587c349dbc7Sjsg #ifdef notyet
15885ca02815Sjsg 	ggtt->vm.dma = i915->drm.dev;
1589c349dbc7Sjsg #endif
15905ca02815Sjsg 	dma_resv_init(&ggtt->vm._resv);
1591c349dbc7Sjsg 
15921bb76ff1Sjsg 	if (GRAPHICS_VER(i915) >= 8)
15931bb76ff1Sjsg 		ret = gen8_gmch_probe(ggtt);
15941bb76ff1Sjsg 	else if (GRAPHICS_VER(i915) >= 6)
1595c349dbc7Sjsg 		ret = gen6_gmch_probe(ggtt);
1596c349dbc7Sjsg 	else
15971bb76ff1Sjsg 		ret = intel_ggtt_gmch_probe(ggtt);
15981bb76ff1Sjsg 
15995ca02815Sjsg 	if (ret) {
16005ca02815Sjsg 		dma_resv_fini(&ggtt->vm._resv);
1601c349dbc7Sjsg 		return ret;
16025ca02815Sjsg 	}
1603c349dbc7Sjsg 
1604c349dbc7Sjsg 	if ((ggtt->vm.total - 1) >> 32) {
1605ad8b1aafSjsg 		drm_err(&i915->drm,
1606ad8b1aafSjsg 			"We never expected a Global GTT with more than 32bits"
1607c349dbc7Sjsg 			" of address space! Found %lldM!\n",
1608c349dbc7Sjsg 			ggtt->vm.total >> 20);
1609c349dbc7Sjsg 		ggtt->vm.total = 1ULL << 32;
1610c349dbc7Sjsg 		ggtt->mappable_end =
1611c349dbc7Sjsg 			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
1612c349dbc7Sjsg 	}
1613c349dbc7Sjsg 
1614c349dbc7Sjsg 	if (ggtt->mappable_end > ggtt->vm.total) {
1615ad8b1aafSjsg 		drm_err(&i915->drm,
1616ad8b1aafSjsg 			"mappable aperture extends past end of GGTT,"
1617c349dbc7Sjsg 			" aperture=%pa, total=%llx\n",
1618c349dbc7Sjsg 			&ggtt->mappable_end, ggtt->vm.total);
1619c349dbc7Sjsg 		ggtt->mappable_end = ggtt->vm.total;
1620c349dbc7Sjsg 	}
1621c349dbc7Sjsg 
1622c349dbc7Sjsg 	/* GMADR is the PCI mmio aperture into the global GTT. */
1623ad8b1aafSjsg 	drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20);
1624ad8b1aafSjsg 	drm_dbg(&i915->drm, "GMADR size = %lluM\n",
1625ad8b1aafSjsg 		(u64)ggtt->mappable_end >> 20);
1626ad8b1aafSjsg 	drm_dbg(&i915->drm, "DSM size = %lluM\n",
1627c349dbc7Sjsg 		(u64)resource_size(&intel_graphics_stolen_res) >> 20);
1628c349dbc7Sjsg 
1629c349dbc7Sjsg 	return 0;
1630c349dbc7Sjsg }
1631c349dbc7Sjsg 
1632c349dbc7Sjsg /**
1633c349dbc7Sjsg  * i915_ggtt_probe_hw - Probe GGTT hardware location
1634c349dbc7Sjsg  * @i915: i915 device
1635c349dbc7Sjsg  */
1636c349dbc7Sjsg int i915_ggtt_probe_hw(struct drm_i915_private *i915)
1637c349dbc7Sjsg {
1638f005ef32Sjsg 	struct intel_gt *gt;
1639f005ef32Sjsg 	int ret, i;
1640f005ef32Sjsg 
1641f005ef32Sjsg 	for_each_gt(gt, i915, i) {
1642f005ef32Sjsg 		ret = intel_gt_assign_ggtt(gt);
1643f005ef32Sjsg 		if (ret)
1644f005ef32Sjsg 			return ret;
1645f005ef32Sjsg 	}
1646c349dbc7Sjsg 
16471bb76ff1Sjsg 	ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
1648c349dbc7Sjsg 	if (ret)
1649c349dbc7Sjsg 		return ret;
1650c349dbc7Sjsg 
16511bb76ff1Sjsg 	if (i915_vtd_active(i915))
1652ad8b1aafSjsg 		drm_info(&i915->drm, "VT-d active for gfx access\n");
1653c349dbc7Sjsg 
1654c349dbc7Sjsg 	return 0;
1655c349dbc7Sjsg }
1656c349dbc7Sjsg 
1657f005ef32Sjsg struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915)
1658f005ef32Sjsg {
1659f005ef32Sjsg 	struct i915_ggtt *ggtt;
1660f005ef32Sjsg 
1661f005ef32Sjsg 	ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL);
1662f005ef32Sjsg 	if (!ggtt)
1663f005ef32Sjsg 		return ERR_PTR(-ENOMEM);
1664f005ef32Sjsg 
1665f005ef32Sjsg 	INIT_LIST_HEAD(&ggtt->gt_list);
1666f005ef32Sjsg 
1667f005ef32Sjsg 	return ggtt;
1668f005ef32Sjsg }
1669f005ef32Sjsg 
1670c349dbc7Sjsg int i915_ggtt_enable_hw(struct drm_i915_private *i915)
1671c349dbc7Sjsg {
16721bb76ff1Sjsg 	if (GRAPHICS_VER(i915) < 6)
16731bb76ff1Sjsg 		return intel_ggtt_gmch_enable_hw(i915);
1674c349dbc7Sjsg 
1675c349dbc7Sjsg 	return 0;
1676c349dbc7Sjsg }
1677c349dbc7Sjsg 
16781bb76ff1Sjsg /**
16791bb76ff1Sjsg  * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM
16801bb76ff1Sjsg  * @vm: The VM to restore the mappings for
16811bb76ff1Sjsg  *
16821bb76ff1Sjsg  * Restore the memory mappings for all objects mapped to HW via the GGTT or a
16831bb76ff1Sjsg  * DPT page table.
16841bb76ff1Sjsg  *
16851bb76ff1Sjsg  * Returns %true if restoring the mapping for any object that was in a write
16861bb76ff1Sjsg  * domain before suspend.
16871bb76ff1Sjsg  */
16881bb76ff1Sjsg bool i915_ggtt_resume_vm(struct i915_address_space *vm)
1689c349dbc7Sjsg {
1690c349dbc7Sjsg 	struct i915_vma *vma;
16911bb76ff1Sjsg 	bool write_domain_objs = false;
1692c349dbc7Sjsg 
16931bb76ff1Sjsg 	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
1694c349dbc7Sjsg 
1695f005ef32Sjsg 	/* First fill our portion of the GTT with scratch pages */
16961bb76ff1Sjsg 	vm->clear_range(vm, 0, vm->total);
1697c349dbc7Sjsg 
1698c349dbc7Sjsg 	/* clflush objects bound into the GGTT and rebind them. */
16991bb76ff1Sjsg 	list_for_each_entry(vma, &vm->bound_list, vm_link) {
1700c349dbc7Sjsg 		struct drm_i915_gem_object *obj = vma->obj;
1701ad8b1aafSjsg 		unsigned int was_bound =
1702ad8b1aafSjsg 			atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
1703c349dbc7Sjsg 
1704ad8b1aafSjsg 		GEM_BUG_ON(!was_bound);
1705f005ef32Sjsg 
17061bb76ff1Sjsg 		/*
17071bb76ff1Sjsg 		 * Clear the bound flags of the vma resource to allow
17081bb76ff1Sjsg 		 * ptes to be repopulated.
17091bb76ff1Sjsg 		 */
17101bb76ff1Sjsg 		vma->resource->bound_flags = 0;
17111bb76ff1Sjsg 		vma->ops->bind_vma(vm, NULL, vma->resource,
1712f005ef32Sjsg 				   obj ? obj->pat_index :
1713f005ef32Sjsg 					 i915_gem_get_pat_index(vm->i915,
1714f005ef32Sjsg 								I915_CACHE_NONE),
1715ad8b1aafSjsg 				   was_bound);
1716f005ef32Sjsg 
1717c349dbc7Sjsg 		if (obj) { /* only used during resume => exclusive access */
17181bb76ff1Sjsg 			write_domain_objs |= fetch_and_zero(&obj->write_domain);
1719c349dbc7Sjsg 			obj->read_domains |= I915_GEM_DOMAIN_GTT;
1720c349dbc7Sjsg 		}
1721c349dbc7Sjsg 	}
1722c349dbc7Sjsg 
17231bb76ff1Sjsg 	return write_domain_objs;
17241bb76ff1Sjsg }
17251bb76ff1Sjsg 
17261bb76ff1Sjsg void i915_ggtt_resume(struct i915_ggtt *ggtt)
17271bb76ff1Sjsg {
1728f005ef32Sjsg 	struct intel_gt *gt;
17291bb76ff1Sjsg 	bool flush;
17301bb76ff1Sjsg 
1731f005ef32Sjsg 	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
1732f005ef32Sjsg 		intel_gt_check_and_clear_faults(gt);
17331bb76ff1Sjsg 
17341bb76ff1Sjsg 	flush = i915_ggtt_resume_vm(&ggtt->vm);
17351bb76ff1Sjsg 
1736f005ef32Sjsg 	if (drm_mm_node_allocated(&ggtt->error_capture))
1737f005ef32Sjsg 		ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
1738f005ef32Sjsg 				       ggtt->error_capture.size);
1739f005ef32Sjsg 
1740f005ef32Sjsg 	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
1741f005ef32Sjsg 		intel_uc_resume_mappings(&gt->uc);
1742f005ef32Sjsg 
1743c349dbc7Sjsg 	ggtt->invalidate(ggtt);
1744c349dbc7Sjsg 
1745c349dbc7Sjsg 	if (flush)
1746c349dbc7Sjsg 		wbinvd_on_all_cpus();
1747c349dbc7Sjsg 
1748ad8b1aafSjsg 	intel_ggtt_restore_fences(ggtt);
1749c349dbc7Sjsg }
1750