xref: /openbsd-src/sys/dev/pci/drm/i915/gt/intel_ggtt.c (revision 3374c67d44f9b75b98444cbf63020f777792342e)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <asm/set_memory.h>
7 #include <asm/smp.h>
8 #include <linux/types.h>
9 #include <linux/stop_machine.h>
10 
11 #include <drm/i915_drm.h>
12 #include <drm/intel-gtt.h>
13 
14 #include "gem/i915_gem_lmem.h"
15 
16 #include "intel_ggtt_gmch.h"
17 #include "intel_gt.h"
18 #include "intel_gt_regs.h"
19 #include "intel_pci_config.h"
20 #include "i915_drv.h"
21 #include "i915_pci.h"
22 #include "i915_scatterlist.h"
23 #include "i915_utils.h"
24 #include "i915_vgpu.h"
25 
26 #include "intel_gtt.h"
27 #include "gen8_ppgtt.h"
28 
29 #include <dev/pci/pcivar.h>
30 #include <dev/pci/agpvar.h>
31 
32 static inline bool suspend_retains_ptes(struct i915_address_space *vm)
33 {
34 	return GRAPHICS_VER(vm->i915) >= 8 &&
35 		!HAS_LMEM(vm->i915) &&
36 		vm->is_ggtt;
37 }
38 
39 static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
40 				   unsigned long color,
41 				   u64 *start,
42 				   u64 *end)
43 {
44 	if (i915_node_color_differs(node, color))
45 		*start += I915_GTT_PAGE_SIZE;
46 
47 	/*
48 	 * Also leave a space between the unallocated reserved node after the
49 	 * GTT and any objects within the GTT, i.e. we use the color adjustment
50 	 * to insert a guard page to prevent prefetches crossing over the
51 	 * GTT boundary.
52 	 */
53 	node = list_next_entry(node, node_list);
54 	if (node->color != color)
55 		*end -= I915_GTT_PAGE_SIZE;
56 }
57 
58 static int ggtt_init_hw(struct i915_ggtt *ggtt)
59 {
60 	struct drm_i915_private *i915 = ggtt->vm.i915;
61 	int i;
62 
63 	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
64 
65 	ggtt->vm.is_ggtt = true;
66 
67 	/* Only VLV supports read-only GGTT mappings */
68 	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
69 
70 	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
71 		ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
72 
73 	if (ggtt->mappable_end) {
74 #ifdef __linux__
75 		if (!io_mapping_init_wc(&ggtt->iomap,
76 					ggtt->gmadr.start,
77 					ggtt->mappable_end)) {
78 			ggtt->vm.cleanup(&ggtt->vm);
79 			return -EIO;
80 		}
81 
82 		ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
83 					      ggtt->mappable_end);
84 #else
85 		/* XXX would be a lot nicer to get agp info before now */
86 		uvm_page_physload(atop(ggtt->gmadr.start),
87 		    atop(ggtt->gmadr.start + ggtt->mappable_end),
88 		    atop(ggtt->gmadr.start),
89 		    atop(ggtt->gmadr.start + ggtt->mappable_end),
90 		    PHYSLOAD_DEVICE);
91 		/* array of vm pages that physload introduced. */
92 		i915->pgs = PHYS_TO_VM_PAGE(ggtt->gmadr.start);
93 		KASSERT(i915->pgs != NULL);
94 		/*
95 		 * XXX mark all pages write combining so user mmaps get the
96 		 * right bits. We really need a proper MI api for doing this,
97 		 * but for now this allows us to use PAT where available.
98 		 */
99 		for (i = 0; i < atop(ggtt->mappable_end); i++)
100 			atomic_setbits_int(&(i915->pgs[i].pg_flags),
101 			    PG_PMAP_WC);
102 		if (agp_init_map(i915->bst, ggtt->gmadr.start,
103 		    ggtt->mappable_end,
104 		    BUS_SPACE_MAP_LINEAR | BUS_SPACE_MAP_PREFETCHABLE,
105 		    &i915->agph))
106 			panic("can't map aperture");
107 #endif
108 	}
109 
110 	intel_ggtt_init_fences(ggtt);
111 
112 	return 0;
113 }
114 
115 /**
116  * i915_ggtt_init_hw - Initialize GGTT hardware
117  * @i915: i915 device
118  */
119 int i915_ggtt_init_hw(struct drm_i915_private *i915)
120 {
121 	int ret;
122 
123 	/*
124 	 * Note that we use page colouring to enforce a guard page at the
125 	 * end of the address space. This is required as the CS may prefetch
126 	 * beyond the end of the batch buffer, across the page boundary,
127 	 * and beyond the end of the GTT if we do not provide a guard.
128 	 */
129 	ret = ggtt_init_hw(to_gt(i915)->ggtt);
130 	if (ret)
131 		return ret;
132 
133 	return 0;
134 }
135 
136 /*
137  * Return the value of the last GGTT pte cast to an u64, if
138  * the system is supposed to retain ptes across resume. 0 otherwise.
139  */
140 static u64 read_last_pte(struct i915_address_space *vm)
141 {
142 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
143 	gen8_pte_t __iomem *ptep;
144 
145 	if (!suspend_retains_ptes(vm))
146 		return 0;
147 
148 	GEM_BUG_ON(GRAPHICS_VER(vm->i915) < 8);
149 	ptep = (typeof(ptep))ggtt->gsm + (ggtt_total_entries(ggtt) - 1);
150 	return readq(ptep);
151 }
152 
153 /**
154  * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
155  * @vm: The VM to suspend the mappings for
156  *
157  * Suspend the memory mappings for all objects mapped to HW via the GGTT or a
158  * DPT page table.
159  */
160 void i915_ggtt_suspend_vm(struct i915_address_space *vm)
161 {
162 	struct i915_vma *vma, *vn;
163 	int save_skip_rewrite;
164 
165 	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
166 
167 retry:
168 	i915_gem_drain_freed_objects(vm->i915);
169 
170 	mutex_lock(&vm->mutex);
171 
172 	/*
173 	 * Skip rewriting PTE on VMA unbind.
174 	 * FIXME: Use an argument to i915_vma_unbind() instead?
175 	 */
176 	save_skip_rewrite = vm->skip_pte_rewrite;
177 	vm->skip_pte_rewrite = true;
178 
179 	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
180 		struct drm_i915_gem_object *obj = vma->obj;
181 
182 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
183 
184 		if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
185 			continue;
186 
187 		/* unlikely to race when GPU is idle, so no worry about slowpath.. */
188 		if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) {
189 			/*
190 			 * No dead objects should appear here, GPU should be
191 			 * completely idle, and userspace suspended
192 			 */
193 			i915_gem_object_get(obj);
194 
195 			mutex_unlock(&vm->mutex);
196 
197 			i915_gem_object_lock(obj, NULL);
198 			GEM_WARN_ON(i915_vma_unbind(vma));
199 			i915_gem_object_unlock(obj);
200 			i915_gem_object_put(obj);
201 
202 			vm->skip_pte_rewrite = save_skip_rewrite;
203 			goto retry;
204 		}
205 
206 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
207 			i915_vma_wait_for_bind(vma);
208 
209 			__i915_vma_evict(vma, false);
210 			drm_mm_remove_node(&vma->node);
211 		}
212 
213 		i915_gem_object_unlock(obj);
214 	}
215 
216 	if (!suspend_retains_ptes(vm))
217 		vm->clear_range(vm, 0, vm->total);
218 	else
219 		i915_vm_to_ggtt(vm)->probed_pte = read_last_pte(vm);
220 
221 	vm->skip_pte_rewrite = save_skip_rewrite;
222 
223 	mutex_unlock(&vm->mutex);
224 }
225 
226 void i915_ggtt_suspend(struct i915_ggtt *ggtt)
227 {
228 	i915_ggtt_suspend_vm(&ggtt->vm);
229 	ggtt->invalidate(ggtt);
230 
231 	intel_gt_check_and_clear_faults(ggtt->vm.gt);
232 }
233 
234 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
235 {
236 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
237 
238 	spin_lock_irq(&uncore->lock);
239 	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
240 	intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
241 	spin_unlock_irq(&uncore->lock);
242 }
243 
244 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
245 {
246 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
247 
248 	/*
249 	 * Note that as an uncached mmio write, this will flush the
250 	 * WCB of the writes into the GGTT before it triggers the invalidate.
251 	 */
252 	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
253 }
254 
255 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
256 {
257 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
258 	struct drm_i915_private *i915 = ggtt->vm.i915;
259 
260 	gen8_ggtt_invalidate(ggtt);
261 
262 	if (GRAPHICS_VER(i915) >= 12)
263 		intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
264 				      GEN12_GUC_TLB_INV_CR_INVALIDATE);
265 	else
266 		intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
267 }
268 
269 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
270 			 enum i915_cache_level level,
271 			 u32 flags)
272 {
273 	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
274 
275 	if (flags & PTE_LM)
276 		pte |= GEN12_GGTT_PTE_LM;
277 
278 	return pte;
279 }
280 
281 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
282 {
283 	writeq(pte, addr);
284 }
285 
286 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
287 				  dma_addr_t addr,
288 				  u64 offset,
289 				  enum i915_cache_level level,
290 				  u32 flags)
291 {
292 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
293 	gen8_pte_t __iomem *pte =
294 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
295 
296 	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
297 
298 	ggtt->invalidate(ggtt);
299 }
300 
301 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
302 				     struct i915_vma_resource *vma_res,
303 				     enum i915_cache_level level,
304 				     u32 flags)
305 {
306 	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
307 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
308 	gen8_pte_t __iomem *gte;
309 	gen8_pte_t __iomem *end;
310 	struct sgt_iter iter;
311 	dma_addr_t addr;
312 
313 	/*
314 	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
315 	 * not to allow the user to override access to a read only page.
316 	 */
317 
318 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
319 	gte += vma_res->start / I915_GTT_PAGE_SIZE;
320 	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
321 
322 	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
323 		gen8_set_pte(gte++, pte_encode | addr);
324 	GEM_BUG_ON(gte > end);
325 
326 	/* Fill the allocated but "unused" space beyond the end of the buffer */
327 	while (gte < end)
328 		gen8_set_pte(gte++, vm->scratch[0]->encode);
329 
330 	/*
331 	 * We want to flush the TLBs only after we're certain all the PTE
332 	 * updates have finished.
333 	 */
334 	ggtt->invalidate(ggtt);
335 }
336 
337 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
338 				  dma_addr_t addr,
339 				  u64 offset,
340 				  enum i915_cache_level level,
341 				  u32 flags)
342 {
343 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
344 	gen6_pte_t __iomem *pte =
345 		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
346 
347 	iowrite32(vm->pte_encode(addr, level, flags), pte);
348 
349 	ggtt->invalidate(ggtt);
350 }
351 
352 /*
353  * Binds an object into the global gtt with the specified cache level.
354  * The object will be accessible to the GPU via commands whose operands
355  * reference offsets within the global GTT as well as accessible by the GPU
356  * through the GMADR mapped BAR (i915->mm.gtt->gtt).
357  */
358 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
359 				     struct i915_vma_resource *vma_res,
360 				     enum i915_cache_level level,
361 				     u32 flags)
362 {
363 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
364 	gen6_pte_t __iomem *gte;
365 	gen6_pte_t __iomem *end;
366 	struct sgt_iter iter;
367 	dma_addr_t addr;
368 
369 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
370 	gte += vma_res->start / I915_GTT_PAGE_SIZE;
371 	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
372 
373 	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
374 		iowrite32(vm->pte_encode(addr, level, flags), gte++);
375 	GEM_BUG_ON(gte > end);
376 
377 	/* Fill the allocated but "unused" space beyond the end of the buffer */
378 	while (gte < end)
379 		iowrite32(vm->scratch[0]->encode, gte++);
380 
381 	/*
382 	 * We want to flush the TLBs only after we're certain all the PTE
383 	 * updates have finished.
384 	 */
385 	ggtt->invalidate(ggtt);
386 }
387 
388 static void nop_clear_range(struct i915_address_space *vm,
389 			    u64 start, u64 length)
390 {
391 }
392 
393 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
394 				  u64 start, u64 length)
395 {
396 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
397 	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
398 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
399 	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
400 	gen8_pte_t __iomem *gtt_base =
401 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
402 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
403 	int i;
404 
405 	if (WARN(num_entries > max_entries,
406 		 "First entry = %d; Num entries = %d (max=%d)\n",
407 		 first_entry, num_entries, max_entries))
408 		num_entries = max_entries;
409 
410 	for (i = 0; i < num_entries; i++)
411 		gen8_set_pte(&gtt_base[i], scratch_pte);
412 }
413 
414 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
415 {
416 	/*
417 	 * Make sure the internal GAM fifo has been cleared of all GTT
418 	 * writes before exiting stop_machine(). This guarantees that
419 	 * any aperture accesses waiting to start in another process
420 	 * cannot back up behind the GTT writes causing a hang.
421 	 * The register can be any arbitrary GAM register.
422 	 */
423 	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
424 }
425 
426 struct insert_page {
427 	struct i915_address_space *vm;
428 	dma_addr_t addr;
429 	u64 offset;
430 	enum i915_cache_level level;
431 };
432 
433 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
434 {
435 	struct insert_page *arg = _arg;
436 
437 	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
438 	bxt_vtd_ggtt_wa(arg->vm);
439 
440 	return 0;
441 }
442 
443 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
444 					  dma_addr_t addr,
445 					  u64 offset,
446 					  enum i915_cache_level level,
447 					  u32 unused)
448 {
449 	struct insert_page arg = { vm, addr, offset, level };
450 
451 	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
452 }
453 
454 struct insert_entries {
455 	struct i915_address_space *vm;
456 	struct i915_vma_resource *vma_res;
457 	enum i915_cache_level level;
458 	u32 flags;
459 };
460 
461 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
462 {
463 	struct insert_entries *arg = _arg;
464 
465 	gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
466 	bxt_vtd_ggtt_wa(arg->vm);
467 
468 	return 0;
469 }
470 
471 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
472 					     struct i915_vma_resource *vma_res,
473 					     enum i915_cache_level level,
474 					     u32 flags)
475 {
476 	struct insert_entries arg = { vm, vma_res, level, flags };
477 
478 	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
479 }
480 
481 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
482 				  u64 start, u64 length)
483 {
484 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
485 	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
486 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
487 	gen6_pte_t scratch_pte, __iomem *gtt_base =
488 		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
489 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
490 	int i;
491 
492 	if (WARN(num_entries > max_entries,
493 		 "First entry = %d; Num entries = %d (max=%d)\n",
494 		 first_entry, num_entries, max_entries))
495 		num_entries = max_entries;
496 
497 	scratch_pte = vm->scratch[0]->encode;
498 	for (i = 0; i < num_entries; i++)
499 		iowrite32(scratch_pte, &gtt_base[i]);
500 }
501 
502 void intel_ggtt_bind_vma(struct i915_address_space *vm,
503 			 struct i915_vm_pt_stash *stash,
504 			 struct i915_vma_resource *vma_res,
505 			 enum i915_cache_level cache_level,
506 			 u32 flags)
507 {
508 	u32 pte_flags;
509 
510 	if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK))
511 		return;
512 
513 	vma_res->bound_flags |= flags;
514 
515 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
516 	pte_flags = 0;
517 	if (vma_res->bi.readonly)
518 		pte_flags |= PTE_READ_ONLY;
519 	if (vma_res->bi.lmem)
520 		pte_flags |= PTE_LM;
521 
522 	vm->insert_entries(vm, vma_res, cache_level, pte_flags);
523 	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
524 }
525 
526 void intel_ggtt_unbind_vma(struct i915_address_space *vm,
527 			   struct i915_vma_resource *vma_res)
528 {
529 	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
530 }
531 
532 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
533 {
534 	u64 size;
535 	int ret;
536 
537 	if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
538 		return 0;
539 
540 	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
541 	size = ggtt->vm.total - GUC_GGTT_TOP;
542 
543 	ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
544 				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
545 				   PIN_NOEVICT);
546 	if (ret)
547 		drm_dbg(&ggtt->vm.i915->drm,
548 			"Failed to reserve top of GGTT for GuC\n");
549 
550 	return ret;
551 }
552 
553 static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
554 {
555 	if (drm_mm_node_allocated(&ggtt->uc_fw))
556 		drm_mm_remove_node(&ggtt->uc_fw);
557 }
558 
559 static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
560 {
561 	ggtt_release_guc_top(ggtt);
562 	if (drm_mm_node_allocated(&ggtt->error_capture))
563 		drm_mm_remove_node(&ggtt->error_capture);
564 	mutex_destroy(&ggtt->error_mutex);
565 }
566 
567 static int init_ggtt(struct i915_ggtt *ggtt)
568 {
569 	/*
570 	 * Let GEM Manage all of the aperture.
571 	 *
572 	 * However, leave one page at the end still bound to the scratch page.
573 	 * There are a number of places where the hardware apparently prefetches
574 	 * past the end of the object, and we've seen multiple hangs with the
575 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
576 	 * aperture.  One page should be enough to keep any prefetching inside
577 	 * of the aperture.
578 	 */
579 	unsigned long hole_start, hole_end;
580 	struct drm_mm_node *entry;
581 	int ret;
582 
583 	ggtt->pte_lost = true;
584 
585 	/*
586 	 * GuC requires all resources that we're sharing with it to be placed in
587 	 * non-WOPCM memory. If GuC is not present or not in use we still need a
588 	 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
589 	 * why.
590 	 */
591 	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
592 			       intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
593 
594 	ret = intel_vgt_balloon(ggtt);
595 	if (ret)
596 		return ret;
597 
598 	rw_init(&ggtt->error_mutex, "ggtter");
599 	if (ggtt->mappable_end) {
600 		/*
601 		 * Reserve a mappable slot for our lockless error capture.
602 		 *
603 		 * We strongly prefer taking address 0x0 in order to protect
604 		 * other critical buffers against accidental overwrites,
605 		 * as writing to address 0 is a very common mistake.
606 		 *
607 		 * Since 0 may already be in use by the system (e.g. the BIOS
608 		 * framebuffer), we let the reservation fail quietly and hope
609 		 * 0 remains reserved always.
610 		 *
611 		 * If we fail to reserve 0, and then fail to find any space
612 		 * for an error-capture, remain silent. We can afford not
613 		 * to reserve an error_capture node as we have fallback
614 		 * paths, and we trust that 0 will remain reserved. However,
615 		 * the only likely reason for failure to insert is a driver
616 		 * bug, which we expect to cause other failures...
617 		 */
618 		ggtt->error_capture.size = I915_GTT_PAGE_SIZE;
619 		ggtt->error_capture.color = I915_COLOR_UNEVICTABLE;
620 		if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture))
621 			drm_mm_insert_node_in_range(&ggtt->vm.mm,
622 						    &ggtt->error_capture,
623 						    ggtt->error_capture.size, 0,
624 						    ggtt->error_capture.color,
625 						    0, ggtt->mappable_end,
626 						    DRM_MM_INSERT_LOW);
627 	}
628 	if (drm_mm_node_allocated(&ggtt->error_capture))
629 		drm_dbg(&ggtt->vm.i915->drm,
630 			"Reserved GGTT:[%llx, %llx] for use by error capture\n",
631 			ggtt->error_capture.start,
632 			ggtt->error_capture.start + ggtt->error_capture.size);
633 
634 	/*
635 	 * The upper portion of the GuC address space has a sizeable hole
636 	 * (several MB) that is inaccessible by GuC. Reserve this range within
637 	 * GGTT as it can comfortably hold GuC/HuC firmware images.
638 	 */
639 	ret = ggtt_reserve_guc_top(ggtt);
640 	if (ret)
641 		goto err;
642 
643 	/* Clear any non-preallocated blocks */
644 	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
645 		drm_dbg(&ggtt->vm.i915->drm,
646 			"clearing unused GTT space: [%lx, %lx]\n",
647 			hole_start, hole_end);
648 		ggtt->vm.clear_range(&ggtt->vm, hole_start,
649 				     hole_end - hole_start);
650 	}
651 
652 	/* And finally clear the reserved guard page */
653 	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
654 
655 	return 0;
656 
657 err:
658 	cleanup_init_ggtt(ggtt);
659 	return ret;
660 }
661 
662 static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
663 				  struct i915_vm_pt_stash *stash,
664 				  struct i915_vma_resource *vma_res,
665 				  enum i915_cache_level cache_level,
666 				  u32 flags)
667 {
668 	u32 pte_flags;
669 
670 	/* Currently applicable only to VLV */
671 	pte_flags = 0;
672 	if (vma_res->bi.readonly)
673 		pte_flags |= PTE_READ_ONLY;
674 
675 	if (flags & I915_VMA_LOCAL_BIND)
676 		ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
677 			       stash, vma_res, cache_level, flags);
678 
679 	if (flags & I915_VMA_GLOBAL_BIND)
680 		vm->insert_entries(vm, vma_res, cache_level, pte_flags);
681 
682 	vma_res->bound_flags |= flags;
683 }
684 
685 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
686 				    struct i915_vma_resource *vma_res)
687 {
688 	if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND)
689 		vm->clear_range(vm, vma_res->start, vma_res->vma_size);
690 
691 	if (vma_res->bound_flags & I915_VMA_LOCAL_BIND)
692 		ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res);
693 }
694 
695 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
696 {
697 	struct i915_vm_pt_stash stash = {};
698 	struct i915_ppgtt *ppgtt;
699 	int err;
700 
701 	ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
702 	if (IS_ERR(ppgtt))
703 		return PTR_ERR(ppgtt);
704 
705 	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
706 		err = -ENODEV;
707 		goto err_ppgtt;
708 	}
709 
710 	err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
711 	if (err)
712 		goto err_ppgtt;
713 
714 	i915_gem_object_lock(ppgtt->vm.scratch[0], NULL);
715 	err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
716 	i915_gem_object_unlock(ppgtt->vm.scratch[0]);
717 	if (err)
718 		goto err_stash;
719 
720 	/*
721 	 * Note we only pre-allocate as far as the end of the global
722 	 * GTT. On 48b / 4-level page-tables, the difference is very,
723 	 * very significant! We have to preallocate as GVT/vgpu does
724 	 * not like the page directory disappearing.
725 	 */
726 	ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
727 
728 	ggtt->alias = ppgtt;
729 	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
730 
731 	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma);
732 	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
733 
734 	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma);
735 	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
736 
737 	i915_vm_free_pt_stash(&ppgtt->vm, &stash);
738 	return 0;
739 
740 err_stash:
741 	i915_vm_free_pt_stash(&ppgtt->vm, &stash);
742 err_ppgtt:
743 	i915_vm_put(&ppgtt->vm);
744 	return err;
745 }
746 
747 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
748 {
749 	struct i915_ppgtt *ppgtt;
750 
751 	ppgtt = fetch_and_zero(&ggtt->alias);
752 	if (!ppgtt)
753 		return;
754 
755 	i915_vm_put(&ppgtt->vm);
756 
757 	ggtt->vm.vma_ops.bind_vma   = intel_ggtt_bind_vma;
758 	ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
759 }
760 
761 int i915_init_ggtt(struct drm_i915_private *i915)
762 {
763 	int ret;
764 
765 	ret = init_ggtt(to_gt(i915)->ggtt);
766 	if (ret)
767 		return ret;
768 
769 	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
770 		ret = init_aliasing_ppgtt(to_gt(i915)->ggtt);
771 		if (ret)
772 			cleanup_init_ggtt(to_gt(i915)->ggtt);
773 	}
774 
775 	return 0;
776 }
777 
778 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
779 {
780 	struct i915_vma *vma, *vn;
781 
782 	flush_workqueue(ggtt->vm.i915->wq);
783 	i915_gem_drain_freed_objects(ggtt->vm.i915);
784 
785 	mutex_lock(&ggtt->vm.mutex);
786 
787 	ggtt->vm.skip_pte_rewrite = true;
788 
789 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
790 		struct drm_i915_gem_object *obj = vma->obj;
791 		bool trylock;
792 
793 		trylock = i915_gem_object_trylock(obj, NULL);
794 		WARN_ON(!trylock);
795 
796 		WARN_ON(__i915_vma_unbind(vma));
797 		if (trylock)
798 			i915_gem_object_unlock(obj);
799 	}
800 
801 	if (drm_mm_node_allocated(&ggtt->error_capture))
802 		drm_mm_remove_node(&ggtt->error_capture);
803 	mutex_destroy(&ggtt->error_mutex);
804 
805 	ggtt_release_guc_top(ggtt);
806 	intel_vgt_deballoon(ggtt);
807 
808 	ggtt->vm.cleanup(&ggtt->vm);
809 
810 	mutex_unlock(&ggtt->vm.mutex);
811 	i915_address_space_fini(&ggtt->vm);
812 
813 #ifdef notyet
814 	arch_phys_wc_del(ggtt->mtrr);
815 
816 	if (ggtt->iomap.size)
817 		io_mapping_fini(&ggtt->iomap);
818 #endif
819 }
820 
821 /**
822  * i915_ggtt_driver_release - Clean up GGTT hardware initialization
823  * @i915: i915 device
824  */
825 void i915_ggtt_driver_release(struct drm_i915_private *i915)
826 {
827 	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
828 
829 	fini_aliasing_ppgtt(ggtt);
830 
831 	intel_ggtt_fini_fences(ggtt);
832 	ggtt_cleanup_hw(ggtt);
833 }
834 
835 /**
836  * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after
837  * all free objects have been drained.
838  * @i915: i915 device
839  */
840 void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
841 {
842 	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
843 
844 	GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1);
845 	dma_resv_fini(&ggtt->vm._resv);
846 }
847 
848 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
849 {
850 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
851 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
852 	return snb_gmch_ctl << 20;
853 }
854 
855 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
856 {
857 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
858 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
859 	if (bdw_gmch_ctl)
860 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
861 
862 #ifdef CONFIG_X86_32
863 	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
864 	if (bdw_gmch_ctl > 4)
865 		bdw_gmch_ctl = 4;
866 #endif
867 
868 	return bdw_gmch_ctl << 20;
869 }
870 
871 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
872 {
873 	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
874 	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
875 
876 	if (gmch_ctrl)
877 		return 1 << (20 + gmch_ctrl);
878 
879 	return 0;
880 }
881 
882 static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
883 {
884 	/*
885 	 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
886 	 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
887 	 */
888 	GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
889 	return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
890 }
891 
892 static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
893 {
894 	return gen6_gttmmadr_size(i915) / 2;
895 }
896 
897 #ifdef __linux__
898 
899 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
900 {
901 	struct drm_i915_private *i915 = ggtt->vm.i915;
902 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
903 	phys_addr_t phys_addr;
904 	u32 pte_flags;
905 	int ret;
906 
907 	GEM_WARN_ON(pci_resource_len(pdev, GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
908 	phys_addr = pci_resource_start(pdev, GTTMMADR_BAR) + gen6_gttadr_offset(i915);
909 
910 	/*
911 	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
912 	 * will be dropped. For WC mappings in general we have 64 byte burst
913 	 * writes when the WC buffer is flushed, so we can't use it, but have to
914 	 * resort to an uncached mapping. The WC issue is easily caught by the
915 	 * readback check when writing GTT PTE entries.
916 	 */
917 	if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
918 		ggtt->gsm = ioremap(phys_addr, size);
919 	else
920 		ggtt->gsm = ioremap_wc(phys_addr, size);
921 	if (!ggtt->gsm) {
922 		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
923 		return -ENOMEM;
924 	}
925 
926 	kref_init(&ggtt->vm.resv_ref);
927 	ret = setup_scratch_page(&ggtt->vm);
928 	if (ret) {
929 		drm_err(&i915->drm, "Scratch setup failed\n");
930 		/* iounmap will also get called at remove, but meh */
931 		iounmap(ggtt->gsm);
932 		return ret;
933 	}
934 
935 	pte_flags = 0;
936 	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
937 		pte_flags |= PTE_LM;
938 
939 	ggtt->vm.scratch[0]->encode =
940 		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
941 				    I915_CACHE_NONE, pte_flags);
942 
943 	return 0;
944 }
945 
946 #else
947 
948 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
949 {
950 	struct drm_i915_private *i915 = ggtt->vm.i915;
951 	struct pci_dev *pdev = i915->drm.pdev;
952 	phys_addr_t phys_addr;
953 	bus_addr_t addr;
954 	bus_size_t len;
955 	pcireg_t type;
956 	int flags;
957 	u32 pte_flags;
958 	int ret;
959 
960 	/* For Modern GENs the PTEs and register space are split in the BAR */
961 	type = pci_mapreg_type(i915->pc, i915->tag, 0x10);
962 	ret = -pci_mapreg_info(i915->pc, i915->tag, 0x10, type,
963 	    &addr, &len, NULL);
964 	if (ret)
965 		return ret;
966 
967 	/*
968 	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
969 	 * will be dropped. For WC mappings in general we have 64 byte burst
970 	 * writes when the WC buffer is flushed, so we can't use it, but have to
971 	 * resort to an uncached mapping. The WC issue is easily caught by the
972 	 * readback check when writing GTT PTE entries.
973 	 */
974 	if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
975 		flags = 0;
976 	else
977 		flags = BUS_SPACE_MAP_PREFETCHABLE;
978 	ret = -bus_space_map(i915->bst, addr + len / 2, size,
979 	    flags | BUS_SPACE_MAP_LINEAR, &ggtt->gsm_bsh);
980 	if (ret) {
981 		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
982 		return ret;
983 	}
984 	ggtt->gsm = bus_space_vaddr(i915->bst, ggtt->gsm_bsh);
985 	ggtt->gsm_size = size;
986 	if (!ggtt->gsm) {
987 		DRM_ERROR("Failed to map the ggtt page table\n");
988 		return -ENOMEM;
989 	}
990 
991 	kref_init(&ggtt->vm.resv_ref);
992 	ret = setup_scratch_page(&ggtt->vm);
993 	if (ret) {
994 		drm_err(&i915->drm, "Scratch setup failed\n");
995 		/* iounmap will also get called at remove, but meh */
996 		bus_space_unmap(i915->bst, ggtt->gsm_bsh, size);
997 		return ret;
998 	}
999 
1000 	pte_flags = 0;
1001 	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
1002 		pte_flags |= PTE_LM;
1003 
1004 	ggtt->vm.scratch[0]->encode =
1005 		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
1006 				    I915_CACHE_NONE, pte_flags);
1007 
1008 	return 0;
1009 }
1010 
1011 #endif
1012 
1013 static void gen6_gmch_remove(struct i915_address_space *vm)
1014 {
1015 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
1016 
1017 #ifdef __linux__
1018 	iounmap(ggtt->gsm);
1019 #else
1020 	bus_space_unmap(vm->i915->bst, ggtt->gsm_bsh, ggtt->gsm_size);
1021 #endif
1022 	free_scratch(vm);
1023 }
1024 
1025 #ifdef __linux__
1026 static struct resource pci_resource(struct pci_dev *pdev, int bar)
1027 {
1028 	return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
1029 					       pci_resource_len(pdev, bar));
1030 }
1031 #endif
1032 
1033 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
1034 {
1035 	struct drm_i915_private *i915 = ggtt->vm.i915;
1036 	struct pci_dev *pdev = i915->drm.pdev;
1037 	unsigned int size;
1038 	u16 snb_gmch_ctl;
1039 
1040 	if (!HAS_LMEM(i915)) {
1041 #ifdef __linux__
1042 		if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
1043 			return -ENXIO;
1044 
1045 		ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
1046 		ggtt->mappable_end = resource_size(&ggtt->gmadr);
1047 #else
1048 		bus_addr_t base;
1049 		bus_size_t sz;
1050 		pcireg_t type;
1051 		int err;
1052 
1053 		type = pci_mapreg_type(i915->pc, i915->tag, 0x18);
1054 		err = -pci_mapreg_info(i915->pc, i915->tag, 0x18, type,
1055 		    &base, &sz, NULL);
1056 		if (err)
1057 			return err;
1058 		ggtt->gmadr.start = base;
1059 		ggtt->mappable_end = sz;
1060 #endif
1061 	}
1062 
1063 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1064 	if (IS_CHERRYVIEW(i915))
1065 		size = chv_get_total_gtt_size(snb_gmch_ctl);
1066 	else
1067 		size = gen8_get_total_gtt_size(snb_gmch_ctl);
1068 
1069 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
1070 	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
1071 	ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
1072 
1073 	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
1074 	ggtt->vm.cleanup = gen6_gmch_remove;
1075 	ggtt->vm.insert_page = gen8_ggtt_insert_page;
1076 	ggtt->vm.clear_range = nop_clear_range;
1077 	if (intel_scanout_needs_vtd_wa(i915))
1078 		ggtt->vm.clear_range = gen8_ggtt_clear_range;
1079 
1080 	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
1081 
1082 	/*
1083 	 * Serialize GTT updates with aperture access on BXT if VT-d is on,
1084 	 * and always on CHV.
1085 	 */
1086 	if (intel_vm_no_concurrent_access_wa(i915)) {
1087 		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
1088 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
1089 
1090 		/*
1091 		 * Calling stop_machine() version of GGTT update function
1092 		 * at error capture/reset path will raise lockdep warning.
1093 		 * Allow calling gen8_ggtt_insert_* directly at reset path
1094 		 * which is safe from parallel GGTT updates.
1095 		 */
1096 		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
1097 		ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries;
1098 
1099 		ggtt->vm.bind_async_flags =
1100 			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
1101 	}
1102 
1103 	ggtt->invalidate = gen8_ggtt_invalidate;
1104 
1105 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
1106 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
1107 
1108 	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
1109 
1110 	setup_private_pat(ggtt->vm.gt->uncore);
1111 
1112 	return ggtt_probe_common(ggtt, size);
1113 }
1114 
1115 static u64 snb_pte_encode(dma_addr_t addr,
1116 			  enum i915_cache_level level,
1117 			  u32 flags)
1118 {
1119 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1120 
1121 	switch (level) {
1122 	case I915_CACHE_L3_LLC:
1123 	case I915_CACHE_LLC:
1124 		pte |= GEN6_PTE_CACHE_LLC;
1125 		break;
1126 	case I915_CACHE_NONE:
1127 		pte |= GEN6_PTE_UNCACHED;
1128 		break;
1129 	default:
1130 		MISSING_CASE(level);
1131 	}
1132 
1133 	return pte;
1134 }
1135 
1136 static u64 ivb_pte_encode(dma_addr_t addr,
1137 			  enum i915_cache_level level,
1138 			  u32 flags)
1139 {
1140 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1141 
1142 	switch (level) {
1143 	case I915_CACHE_L3_LLC:
1144 		pte |= GEN7_PTE_CACHE_L3_LLC;
1145 		break;
1146 	case I915_CACHE_LLC:
1147 		pte |= GEN6_PTE_CACHE_LLC;
1148 		break;
1149 	case I915_CACHE_NONE:
1150 		pte |= GEN6_PTE_UNCACHED;
1151 		break;
1152 	default:
1153 		MISSING_CASE(level);
1154 	}
1155 
1156 	return pte;
1157 }
1158 
1159 static u64 byt_pte_encode(dma_addr_t addr,
1160 			  enum i915_cache_level level,
1161 			  u32 flags)
1162 {
1163 	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1164 
1165 	if (!(flags & PTE_READ_ONLY))
1166 		pte |= BYT_PTE_WRITEABLE;
1167 
1168 	if (level != I915_CACHE_NONE)
1169 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
1170 
1171 	return pte;
1172 }
1173 
1174 static u64 hsw_pte_encode(dma_addr_t addr,
1175 			  enum i915_cache_level level,
1176 			  u32 flags)
1177 {
1178 	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1179 
1180 	if (level != I915_CACHE_NONE)
1181 		pte |= HSW_WB_LLC_AGE3;
1182 
1183 	return pte;
1184 }
1185 
1186 static u64 iris_pte_encode(dma_addr_t addr,
1187 			   enum i915_cache_level level,
1188 			   u32 flags)
1189 {
1190 	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1191 
1192 	switch (level) {
1193 	case I915_CACHE_NONE:
1194 		break;
1195 	case I915_CACHE_WT:
1196 		pte |= HSW_WT_ELLC_LLC_AGE3;
1197 		break;
1198 	default:
1199 		pte |= HSW_WB_ELLC_LLC_AGE3;
1200 		break;
1201 	}
1202 
1203 	return pte;
1204 }
1205 
1206 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
1207 {
1208 	struct drm_i915_private *i915 = ggtt->vm.i915;
1209 	struct pci_dev *pdev = i915->drm.pdev;
1210 	unsigned int size;
1211 	u16 snb_gmch_ctl;
1212 
1213 #ifdef __linux__
1214 	if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
1215 		return -ENXIO;
1216 
1217 	ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
1218 	ggtt->mappable_end = resource_size(&ggtt->gmadr);
1219 #else
1220 {
1221 	bus_addr_t base;
1222 	bus_size_t sz;
1223 	pcireg_t type;
1224 	int err;
1225 
1226 	type = pci_mapreg_type(i915->pc, i915->tag, 0x18);
1227 	err = -pci_mapreg_info(i915->pc, i915->tag, 0x18, type,
1228 	    &base, &sz, NULL);
1229 	if (err)
1230 		return err;
1231 	ggtt->gmadr.start = base;
1232 	ggtt->mappable_end = sz;
1233 }
1234 #endif
1235 
1236 	/*
1237 	 * 64/512MB is the current min/max we actually know of, but this is
1238 	 * just a coarse sanity check.
1239 	 */
1240 	if (ggtt->mappable_end < (64 << 20) ||
1241 	    ggtt->mappable_end > (512 << 20)) {
1242 		drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
1243 			&ggtt->mappable_end);
1244 		return -ENXIO;
1245 	}
1246 
1247 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1248 
1249 	size = gen6_get_total_gtt_size(snb_gmch_ctl);
1250 	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
1251 
1252 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
1253 	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
1254 
1255 	ggtt->vm.clear_range = nop_clear_range;
1256 	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
1257 		ggtt->vm.clear_range = gen6_ggtt_clear_range;
1258 	ggtt->vm.insert_page = gen6_ggtt_insert_page;
1259 	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
1260 	ggtt->vm.cleanup = gen6_gmch_remove;
1261 
1262 	ggtt->invalidate = gen6_ggtt_invalidate;
1263 
1264 	if (HAS_EDRAM(i915))
1265 		ggtt->vm.pte_encode = iris_pte_encode;
1266 	else if (IS_HASWELL(i915))
1267 		ggtt->vm.pte_encode = hsw_pte_encode;
1268 	else if (IS_VALLEYVIEW(i915))
1269 		ggtt->vm.pte_encode = byt_pte_encode;
1270 	else if (GRAPHICS_VER(i915) >= 7)
1271 		ggtt->vm.pte_encode = ivb_pte_encode;
1272 	else
1273 		ggtt->vm.pte_encode = snb_pte_encode;
1274 
1275 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
1276 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
1277 
1278 	return ggtt_probe_common(ggtt, size);
1279 }
1280 
1281 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
1282 {
1283 	struct drm_i915_private *i915 = gt->i915;
1284 	int ret;
1285 
1286 	ggtt->vm.gt = gt;
1287 	ggtt->vm.i915 = i915;
1288 #ifdef notyet
1289 	ggtt->vm.dma = i915->drm.dev;
1290 #endif
1291 	dma_resv_init(&ggtt->vm._resv);
1292 
1293 	if (GRAPHICS_VER(i915) >= 8)
1294 		ret = gen8_gmch_probe(ggtt);
1295 	else if (GRAPHICS_VER(i915) >= 6)
1296 		ret = gen6_gmch_probe(ggtt);
1297 	else
1298 		ret = intel_ggtt_gmch_probe(ggtt);
1299 
1300 	if (ret) {
1301 		dma_resv_fini(&ggtt->vm._resv);
1302 		return ret;
1303 	}
1304 
1305 	if ((ggtt->vm.total - 1) >> 32) {
1306 		drm_err(&i915->drm,
1307 			"We never expected a Global GTT with more than 32bits"
1308 			" of address space! Found %lldM!\n",
1309 			ggtt->vm.total >> 20);
1310 		ggtt->vm.total = 1ULL << 32;
1311 		ggtt->mappable_end =
1312 			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
1313 	}
1314 
1315 	if (ggtt->mappable_end > ggtt->vm.total) {
1316 		drm_err(&i915->drm,
1317 			"mappable aperture extends past end of GGTT,"
1318 			" aperture=%pa, total=%llx\n",
1319 			&ggtt->mappable_end, ggtt->vm.total);
1320 		ggtt->mappable_end = ggtt->vm.total;
1321 	}
1322 
1323 	/* GMADR is the PCI mmio aperture into the global GTT. */
1324 	drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20);
1325 	drm_dbg(&i915->drm, "GMADR size = %lluM\n",
1326 		(u64)ggtt->mappable_end >> 20);
1327 	drm_dbg(&i915->drm, "DSM size = %lluM\n",
1328 		(u64)resource_size(&intel_graphics_stolen_res) >> 20);
1329 
1330 	return 0;
1331 }
1332 
1333 /**
1334  * i915_ggtt_probe_hw - Probe GGTT hardware location
1335  * @i915: i915 device
1336  */
1337 int i915_ggtt_probe_hw(struct drm_i915_private *i915)
1338 {
1339 	int ret;
1340 
1341 	ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
1342 	if (ret)
1343 		return ret;
1344 
1345 	if (i915_vtd_active(i915))
1346 		drm_info(&i915->drm, "VT-d active for gfx access\n");
1347 
1348 	return 0;
1349 }
1350 
1351 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
1352 {
1353 	if (GRAPHICS_VER(i915) < 6)
1354 		return intel_ggtt_gmch_enable_hw(i915);
1355 
1356 	return 0;
1357 }
1358 
1359 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
1360 {
1361 	GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
1362 
1363 	ggtt->invalidate = guc_ggtt_invalidate;
1364 
1365 	ggtt->invalidate(ggtt);
1366 }
1367 
1368 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
1369 {
1370 	/* XXX Temporary pardon for error unload */
1371 	if (ggtt->invalidate == gen8_ggtt_invalidate)
1372 		return;
1373 
1374 	/* We should only be called after i915_ggtt_enable_guc() */
1375 	GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
1376 
1377 	ggtt->invalidate = gen8_ggtt_invalidate;
1378 
1379 	ggtt->invalidate(ggtt);
1380 }
1381 
1382 /**
1383  * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM
1384  * @vm: The VM to restore the mappings for
1385  *
1386  * Restore the memory mappings for all objects mapped to HW via the GGTT or a
1387  * DPT page table.
1388  *
1389  * Returns %true if restoring the mapping for any object that was in a write
1390  * domain before suspend.
1391  */
1392 bool i915_ggtt_resume_vm(struct i915_address_space *vm)
1393 {
1394 	struct i915_vma *vma;
1395 	bool write_domain_objs = false;
1396 	bool retained_ptes;
1397 
1398 	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
1399 
1400 	/*
1401 	 * First fill our portion of the GTT with scratch pages if
1402 	 * they were not retained across suspend.
1403 	 */
1404 	retained_ptes = suspend_retains_ptes(vm) &&
1405 		!i915_vm_to_ggtt(vm)->pte_lost &&
1406 		!GEM_WARN_ON(i915_vm_to_ggtt(vm)->probed_pte != read_last_pte(vm));
1407 
1408 	if (!retained_ptes)
1409 		vm->clear_range(vm, 0, vm->total);
1410 
1411 	/* clflush objects bound into the GGTT and rebind them. */
1412 	list_for_each_entry(vma, &vm->bound_list, vm_link) {
1413 		struct drm_i915_gem_object *obj = vma->obj;
1414 		unsigned int was_bound =
1415 			atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
1416 
1417 		GEM_BUG_ON(!was_bound);
1418 		if (!retained_ptes) {
1419 			/*
1420 			 * Clear the bound flags of the vma resource to allow
1421 			 * ptes to be repopulated.
1422 			 */
1423 			vma->resource->bound_flags = 0;
1424 			vma->ops->bind_vma(vm, NULL, vma->resource,
1425 					   obj ? obj->cache_level : 0,
1426 					   was_bound);
1427 		}
1428 		if (obj) { /* only used during resume => exclusive access */
1429 			write_domain_objs |= fetch_and_zero(&obj->write_domain);
1430 			obj->read_domains |= I915_GEM_DOMAIN_GTT;
1431 		}
1432 	}
1433 
1434 	return write_domain_objs;
1435 }
1436 
1437 void i915_ggtt_resume(struct i915_ggtt *ggtt)
1438 {
1439 	bool flush;
1440 
1441 	intel_gt_check_and_clear_faults(ggtt->vm.gt);
1442 
1443 	flush = i915_ggtt_resume_vm(&ggtt->vm);
1444 
1445 	ggtt->invalidate(ggtt);
1446 
1447 	if (flush)
1448 		wbinvd_on_all_cpus();
1449 
1450 	if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
1451 		setup_private_pat(ggtt->vm.gt->uncore);
1452 
1453 	intel_ggtt_restore_fences(ggtt);
1454 }
1455 
1456 void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val)
1457 {
1458 	to_gt(i915)->ggtt->pte_lost = val;
1459 }
1460