xref: /openbsd-src/sys/dev/pci/drm/i915/gt/intel_ring_submission.c (revision f005ef32267c16bdb134f0e9fa4477dbe07c263a)
15ca02815Sjsg // SPDX-License-Identifier: MIT
2c349dbc7Sjsg /*
35ca02815Sjsg  * Copyright © 2008-2021 Intel Corporation
4c349dbc7Sjsg  */
5c349dbc7Sjsg 
61bb76ff1Sjsg #include <drm/drm_cache.h>
71bb76ff1Sjsg 
81bb76ff1Sjsg #include "gem/i915_gem_internal.h"
91bb76ff1Sjsg 
10ad8b1aafSjsg #include "gen2_engine_cs.h"
11ad8b1aafSjsg #include "gen6_engine_cs.h"
12c349dbc7Sjsg #include "gen6_ppgtt.h"
13c349dbc7Sjsg #include "gen7_renderclear.h"
14c349dbc7Sjsg #include "i915_drv.h"
15*f005ef32Sjsg #include "i915_irq.h"
16ad8b1aafSjsg #include "i915_mitigations.h"
17*f005ef32Sjsg #include "i915_reg.h"
18ad8b1aafSjsg #include "intel_breadcrumbs.h"
19c349dbc7Sjsg #include "intel_context.h"
201bb76ff1Sjsg #include "intel_engine_regs.h"
21c349dbc7Sjsg #include "intel_gt.h"
225ca02815Sjsg #include "intel_gt_irq.h"
231bb76ff1Sjsg #include "intel_gt_regs.h"
24c349dbc7Sjsg #include "intel_reset.h"
25c349dbc7Sjsg #include "intel_ring.h"
26ad8b1aafSjsg #include "shmem_utils.h"
275ca02815Sjsg #include "intel_engine_heartbeat.h"
283f069f93Sjsg #include "intel_engine_pm.h"
29c349dbc7Sjsg 
30c349dbc7Sjsg /* Rough estimate of the typical request size, performing a flush,
31c349dbc7Sjsg  * set-context and then emitting the batch.
32c349dbc7Sjsg  */
33c349dbc7Sjsg #define LEGACY_REQUEST_SIZE 200
34c349dbc7Sjsg 
set_hwstam(struct intel_engine_cs * engine,u32 mask)35c349dbc7Sjsg static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
36c349dbc7Sjsg {
37c349dbc7Sjsg 	/*
38c349dbc7Sjsg 	 * Keep the render interrupt unmasked as this papers over
39c349dbc7Sjsg 	 * lost interrupts following a reset.
40c349dbc7Sjsg 	 */
41c349dbc7Sjsg 	if (engine->class == RENDER_CLASS) {
425ca02815Sjsg 		if (GRAPHICS_VER(engine->i915) >= 6)
43c349dbc7Sjsg 			mask &= ~BIT(0);
44c349dbc7Sjsg 		else
45c349dbc7Sjsg 			mask &= ~I915_USER_INTERRUPT;
46c349dbc7Sjsg 	}
47c349dbc7Sjsg 
48c349dbc7Sjsg 	intel_engine_set_hwsp_writemask(engine, mask);
49c349dbc7Sjsg }
50c349dbc7Sjsg 
set_hws_pga(struct intel_engine_cs * engine,phys_addr_t phys)51c349dbc7Sjsg static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
52c349dbc7Sjsg {
53c349dbc7Sjsg 	u32 addr;
54c349dbc7Sjsg 
55c349dbc7Sjsg 	addr = lower_32_bits(phys);
565ca02815Sjsg 	if (GRAPHICS_VER(engine->i915) >= 4)
57c349dbc7Sjsg 		addr |= (phys >> 28) & 0xf0;
58c349dbc7Sjsg 
59c349dbc7Sjsg 	intel_uncore_write(engine->uncore, HWS_PGA, addr);
60c349dbc7Sjsg }
61c349dbc7Sjsg 
status_page(struct intel_engine_cs * engine)62c349dbc7Sjsg static struct vm_page *status_page(struct intel_engine_cs *engine)
63c349dbc7Sjsg {
64c349dbc7Sjsg 	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
65c349dbc7Sjsg 
66c349dbc7Sjsg 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
67c349dbc7Sjsg 	return sg_page(obj->mm.pages->sgl);
68c349dbc7Sjsg }
69c349dbc7Sjsg 
ring_setup_phys_status_page(struct intel_engine_cs * engine)70c349dbc7Sjsg static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
71c349dbc7Sjsg {
72c349dbc7Sjsg 	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
73c349dbc7Sjsg 	set_hwstam(engine, ~0u);
74c349dbc7Sjsg }
75c349dbc7Sjsg 
set_hwsp(struct intel_engine_cs * engine,u32 offset)76c349dbc7Sjsg static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
77c349dbc7Sjsg {
78c349dbc7Sjsg 	i915_reg_t hwsp;
79c349dbc7Sjsg 
80c349dbc7Sjsg 	/*
81c349dbc7Sjsg 	 * The ring status page addresses are no longer next to the rest of
82c349dbc7Sjsg 	 * the ring registers as of gen7.
83c349dbc7Sjsg 	 */
845ca02815Sjsg 	if (GRAPHICS_VER(engine->i915) == 7) {
85c349dbc7Sjsg 		switch (engine->id) {
86c349dbc7Sjsg 		/*
87c349dbc7Sjsg 		 * No more rings exist on Gen7. Default case is only to shut up
88c349dbc7Sjsg 		 * gcc switch check warning.
89c349dbc7Sjsg 		 */
90c349dbc7Sjsg 		default:
91c349dbc7Sjsg 			GEM_BUG_ON(engine->id);
92ad8b1aafSjsg 			fallthrough;
93c349dbc7Sjsg 		case RCS0:
94c349dbc7Sjsg 			hwsp = RENDER_HWS_PGA_GEN7;
95c349dbc7Sjsg 			break;
96c349dbc7Sjsg 		case BCS0:
97c349dbc7Sjsg 			hwsp = BLT_HWS_PGA_GEN7;
98c349dbc7Sjsg 			break;
99c349dbc7Sjsg 		case VCS0:
100c349dbc7Sjsg 			hwsp = BSD_HWS_PGA_GEN7;
101c349dbc7Sjsg 			break;
102c349dbc7Sjsg 		case VECS0:
103c349dbc7Sjsg 			hwsp = VEBOX_HWS_PGA_GEN7;
104c349dbc7Sjsg 			break;
105c349dbc7Sjsg 		}
1065ca02815Sjsg 	} else if (GRAPHICS_VER(engine->i915) == 6) {
107c349dbc7Sjsg 		hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
108c349dbc7Sjsg 	} else {
109c349dbc7Sjsg 		hwsp = RING_HWS_PGA(engine->mmio_base);
110c349dbc7Sjsg 	}
111c349dbc7Sjsg 
1125ca02815Sjsg 	intel_uncore_write_fw(engine->uncore, hwsp, offset);
1135ca02815Sjsg 	intel_uncore_posting_read_fw(engine->uncore, hwsp);
114c349dbc7Sjsg }
115c349dbc7Sjsg 
flush_cs_tlb(struct intel_engine_cs * engine)116c349dbc7Sjsg static void flush_cs_tlb(struct intel_engine_cs *engine)
117c349dbc7Sjsg {
1185ca02815Sjsg 	if (!IS_GRAPHICS_VER(engine->i915, 6, 7))
119c349dbc7Sjsg 		return;
120c349dbc7Sjsg 
121c349dbc7Sjsg 	/* ring should be idle before issuing a sync flush*/
1221bb76ff1Sjsg 	if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0)
1231bb76ff1Sjsg 		drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n",
1241bb76ff1Sjsg 			 engine->name);
125c349dbc7Sjsg 
1265ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_INSTPM,
127c349dbc7Sjsg 			_MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
128c349dbc7Sjsg 					   INSTPM_SYNC_FLUSH));
1295ca02815Sjsg 	if (__intel_wait_for_register_fw(engine->uncore,
130c349dbc7Sjsg 					 RING_INSTPM(engine->mmio_base),
131c349dbc7Sjsg 					 INSTPM_SYNC_FLUSH, 0,
1325ca02815Sjsg 					 2000, 0, NULL))
1335ca02815Sjsg 		ENGINE_TRACE(engine,
1345ca02815Sjsg 			     "wait for SyncFlush to complete for TLB invalidation timed out\n");
135c349dbc7Sjsg }
136c349dbc7Sjsg 
ring_setup_status_page(struct intel_engine_cs * engine)137c349dbc7Sjsg static void ring_setup_status_page(struct intel_engine_cs *engine)
138c349dbc7Sjsg {
139c349dbc7Sjsg 	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
140c349dbc7Sjsg 	set_hwstam(engine, ~0u);
141c349dbc7Sjsg 
142c349dbc7Sjsg 	flush_cs_tlb(engine);
143c349dbc7Sjsg }
144c349dbc7Sjsg 
vm_alias(struct i915_address_space * vm)145c349dbc7Sjsg static struct i915_address_space *vm_alias(struct i915_address_space *vm)
146c349dbc7Sjsg {
147c349dbc7Sjsg 	if (i915_is_ggtt(vm))
148c349dbc7Sjsg 		vm = &i915_vm_to_ggtt(vm)->alias->vm;
149c349dbc7Sjsg 
150c349dbc7Sjsg 	return vm;
151c349dbc7Sjsg }
152c349dbc7Sjsg 
pp_dir(struct i915_address_space * vm)153ad8b1aafSjsg static u32 pp_dir(struct i915_address_space *vm)
154ad8b1aafSjsg {
155ad8b1aafSjsg 	return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir;
156ad8b1aafSjsg }
157ad8b1aafSjsg 
set_pp_dir(struct intel_engine_cs * engine)158c349dbc7Sjsg static void set_pp_dir(struct intel_engine_cs *engine)
159c349dbc7Sjsg {
160c349dbc7Sjsg 	struct i915_address_space *vm = vm_alias(engine->gt->vm);
161c349dbc7Sjsg 
1625ca02815Sjsg 	if (!vm)
1635ca02815Sjsg 		return;
1645ca02815Sjsg 
1655ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
1665ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm));
1675ca02815Sjsg 
1685ca02815Sjsg 	if (GRAPHICS_VER(engine->i915) >= 7) {
1695ca02815Sjsg 		ENGINE_WRITE_FW(engine,
1705ca02815Sjsg 				RING_MODE_GEN7,
1715ca02815Sjsg 				_MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
172c349dbc7Sjsg 	}
173c349dbc7Sjsg }
174c349dbc7Sjsg 
stop_ring(struct intel_engine_cs * engine)1755ca02815Sjsg static bool stop_ring(struct intel_engine_cs *engine)
1765ca02815Sjsg {
1775ca02815Sjsg 	/* Empty the ring by skipping to the end */
1785ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL));
1795ca02815Sjsg 	ENGINE_POSTING_READ(engine, RING_HEAD);
1805ca02815Sjsg 
1815ca02815Sjsg 	/* The ring must be empty before it is disabled */
1825ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_CTL, 0);
1835ca02815Sjsg 	ENGINE_POSTING_READ(engine, RING_CTL);
1845ca02815Sjsg 
1855ca02815Sjsg 	/* Then reset the disabled ring */
1865ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_HEAD, 0);
1875ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_TAIL, 0);
1885ca02815Sjsg 
1895ca02815Sjsg 	return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0;
1905ca02815Sjsg }
1915ca02815Sjsg 
xcs_resume(struct intel_engine_cs * engine)192c349dbc7Sjsg static int xcs_resume(struct intel_engine_cs *engine)
193c349dbc7Sjsg {
194c349dbc7Sjsg 	struct intel_ring *ring = engine->legacy.ring;
195c349dbc7Sjsg 
196c349dbc7Sjsg 	ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
197c349dbc7Sjsg 		     ring->head, ring->tail);
198c349dbc7Sjsg 
1995ca02815Sjsg 	/*
2005ca02815Sjsg 	 * Double check the ring is empty & disabled before we resume. Called
2015ca02815Sjsg 	 * from atomic context during PCI probe, so _hardirq().
2025ca02815Sjsg 	 */
2035ca02815Sjsg 	intel_synchronize_hardirq(engine->i915);
2045ca02815Sjsg 	if (!stop_ring(engine))
2055ca02815Sjsg 		goto err;
206c349dbc7Sjsg 
2075ca02815Sjsg 	if (HWS_NEEDS_PHYSICAL(engine->i915))
208c349dbc7Sjsg 		ring_setup_phys_status_page(engine);
209c349dbc7Sjsg 	else
210c349dbc7Sjsg 		ring_setup_status_page(engine);
211c349dbc7Sjsg 
212ad8b1aafSjsg 	intel_breadcrumbs_reset(engine->breadcrumbs);
213c349dbc7Sjsg 
214c349dbc7Sjsg 	/* Enforce ordering by reading HEAD register back */
215c349dbc7Sjsg 	ENGINE_POSTING_READ(engine, RING_HEAD);
216c349dbc7Sjsg 
217c349dbc7Sjsg 	/*
218c349dbc7Sjsg 	 * Initialize the ring. This must happen _after_ we've cleared the ring
219c349dbc7Sjsg 	 * registers with the above sequence (the readback of the HEAD registers
220c349dbc7Sjsg 	 * also enforces ordering), otherwise the hw might lose the new ring
221c349dbc7Sjsg 	 * register values.
222c349dbc7Sjsg 	 */
2235ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma));
224c349dbc7Sjsg 
225c349dbc7Sjsg 	/* Check that the ring offsets point within the ring! */
226c349dbc7Sjsg 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
227c349dbc7Sjsg 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
228c349dbc7Sjsg 	intel_ring_update_space(ring);
229c349dbc7Sjsg 
230c349dbc7Sjsg 	set_pp_dir(engine);
231c349dbc7Sjsg 
232c349dbc7Sjsg 	/* First wake the ring up to an empty/idle ring */
2335ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
2345ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
235c349dbc7Sjsg 	ENGINE_POSTING_READ(engine, RING_TAIL);
236c349dbc7Sjsg 
2375ca02815Sjsg 	ENGINE_WRITE_FW(engine, RING_CTL,
2385ca02815Sjsg 			RING_CTL_SIZE(ring->size) | RING_VALID);
239c349dbc7Sjsg 
240c349dbc7Sjsg 	/* If the head is still not zero, the ring is dead */
2415ca02815Sjsg 	if (__intel_wait_for_register_fw(engine->uncore,
242c349dbc7Sjsg 					 RING_CTL(engine->mmio_base),
243c349dbc7Sjsg 					 RING_VALID, RING_VALID,
2445ca02815Sjsg 					 5000, 0, NULL))
2455ca02815Sjsg 		goto err;
2465ca02815Sjsg 
2475ca02815Sjsg 	if (GRAPHICS_VER(engine->i915) > 2)
2485ca02815Sjsg 		ENGINE_WRITE_FW(engine,
2495ca02815Sjsg 				RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
2505ca02815Sjsg 
2515ca02815Sjsg 	/* Now awake, let it get started */
2525ca02815Sjsg 	if (ring->tail != ring->head) {
2535ca02815Sjsg 		ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail);
2545ca02815Sjsg 		ENGINE_POSTING_READ(engine, RING_TAIL);
2555ca02815Sjsg 	}
2565ca02815Sjsg 
2575ca02815Sjsg 	/* Papering over lost _interrupts_ immediately following the restart */
2585ca02815Sjsg 	intel_engine_signal_breadcrumbs(engine);
2595ca02815Sjsg 	return 0;
2605ca02815Sjsg 
2615ca02815Sjsg err:
2625ca02815Sjsg 	drm_err(&engine->i915->drm,
2635ca02815Sjsg 		"%s initialization failed; "
264c349dbc7Sjsg 		"ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
265c349dbc7Sjsg 		engine->name,
266c349dbc7Sjsg 		ENGINE_READ(engine, RING_CTL),
267c349dbc7Sjsg 		ENGINE_READ(engine, RING_CTL) & RING_VALID,
268c349dbc7Sjsg 		ENGINE_READ(engine, RING_HEAD), ring->head,
269c349dbc7Sjsg 		ENGINE_READ(engine, RING_TAIL), ring->tail,
270c349dbc7Sjsg 		ENGINE_READ(engine, RING_START),
271c349dbc7Sjsg 		i915_ggtt_offset(ring->vma));
2725ca02815Sjsg 	return -EIO;
273c349dbc7Sjsg }
274c349dbc7Sjsg 
sanitize_hwsp(struct intel_engine_cs * engine)2755ca02815Sjsg static void sanitize_hwsp(struct intel_engine_cs *engine)
2765ca02815Sjsg {
2775ca02815Sjsg 	struct intel_timeline *tl;
278c349dbc7Sjsg 
2795ca02815Sjsg 	list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
2805ca02815Sjsg 		intel_timeline_reset_seqno(tl);
281c349dbc7Sjsg }
282c349dbc7Sjsg 
xcs_sanitize(struct intel_engine_cs * engine)2835ca02815Sjsg static void xcs_sanitize(struct intel_engine_cs *engine)
2845ca02815Sjsg {
2855ca02815Sjsg 	/*
2865ca02815Sjsg 	 * Poison residual state on resume, in case the suspend didn't!
2875ca02815Sjsg 	 *
2885ca02815Sjsg 	 * We have to assume that across suspend/resume (or other loss
2895ca02815Sjsg 	 * of control) that the contents of our pinned buffers has been
2905ca02815Sjsg 	 * lost, replaced by garbage. Since this doesn't always happen,
2915ca02815Sjsg 	 * let's poison such state so that we more quickly spot when
2925ca02815Sjsg 	 * we falsely assume it has been preserved.
2935ca02815Sjsg 	 */
2945ca02815Sjsg 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2955ca02815Sjsg 		memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
296c349dbc7Sjsg 
2975ca02815Sjsg 	/*
2985ca02815Sjsg 	 * The kernel_context HWSP is stored in the status_page. As above,
2995ca02815Sjsg 	 * that may be lost on resume/initialisation, and so we need to
3005ca02815Sjsg 	 * reset the value in the HWSP.
3015ca02815Sjsg 	 */
3025ca02815Sjsg 	sanitize_hwsp(engine);
3035ca02815Sjsg 
3045ca02815Sjsg 	/* And scrub the dirty cachelines for the HWSP */
305dc6d8b3dSjsg 	drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
3063f069f93Sjsg 
3073f069f93Sjsg 	intel_engine_reset_pinned_contexts(engine);
308c349dbc7Sjsg }
309c349dbc7Sjsg 
reset_prepare(struct intel_engine_cs * engine)310c349dbc7Sjsg static void reset_prepare(struct intel_engine_cs *engine)
311c349dbc7Sjsg {
312c349dbc7Sjsg 	/*
313c349dbc7Sjsg 	 * We stop engines, otherwise we might get failed reset and a
314c349dbc7Sjsg 	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
315c349dbc7Sjsg 	 * from system hang if batchbuffer is progressing when
316c349dbc7Sjsg 	 * the reset is issued, regardless of READY_TO_RESET ack.
317c349dbc7Sjsg 	 * Thus assume it is best to stop engines on all gens
318c349dbc7Sjsg 	 * where we have a gpu reset.
319c349dbc7Sjsg 	 *
320c349dbc7Sjsg 	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
321c349dbc7Sjsg 	 *
322c349dbc7Sjsg 	 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
3235ca02815Sjsg 	 * WaClearRingBufHeadRegAtInit:ctg,elk
324c349dbc7Sjsg 	 *
325c349dbc7Sjsg 	 * FIXME: Wa for more modern gens needs to be validated
326c349dbc7Sjsg 	 */
327c349dbc7Sjsg 	ENGINE_TRACE(engine, "\n");
3285ca02815Sjsg 	intel_engine_stop_cs(engine);
329c349dbc7Sjsg 
3305ca02815Sjsg 	if (!stop_ring(engine)) {
3315ca02815Sjsg 		/* G45 ring initialization often fails to reset head to zero */
3325ca02815Sjsg 		ENGINE_TRACE(engine,
3335ca02815Sjsg 			     "HEAD not reset to zero, "
3345ca02815Sjsg 			     "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n",
3355ca02815Sjsg 			     ENGINE_READ_FW(engine, RING_CTL),
3365ca02815Sjsg 			     ENGINE_READ_FW(engine, RING_HEAD),
3375ca02815Sjsg 			     ENGINE_READ_FW(engine, RING_TAIL),
3385ca02815Sjsg 			     ENGINE_READ_FW(engine, RING_START));
3395ca02815Sjsg 		if (!stop_ring(engine)) {
3405ca02815Sjsg 			drm_err(&engine->i915->drm,
3415ca02815Sjsg 				"failed to set %s head to zero "
3425ca02815Sjsg 				"ctl %08x head %08x tail %08x start %08x\n",
3435ca02815Sjsg 				engine->name,
3445ca02815Sjsg 				ENGINE_READ_FW(engine, RING_CTL),
3455ca02815Sjsg 				ENGINE_READ_FW(engine, RING_HEAD),
3465ca02815Sjsg 				ENGINE_READ_FW(engine, RING_TAIL),
3475ca02815Sjsg 				ENGINE_READ_FW(engine, RING_START));
3485ca02815Sjsg 		}
3495ca02815Sjsg 	}
350c349dbc7Sjsg }
351c349dbc7Sjsg 
reset_rewind(struct intel_engine_cs * engine,bool stalled)352c349dbc7Sjsg static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
353c349dbc7Sjsg {
354c349dbc7Sjsg 	struct i915_request *pos, *rq;
355c349dbc7Sjsg 	unsigned long flags;
356c349dbc7Sjsg 	u32 head;
357c349dbc7Sjsg 
358c349dbc7Sjsg 	rq = NULL;
3595ca02815Sjsg 	spin_lock_irqsave(&engine->sched_engine->lock, flags);
3605ca02815Sjsg 	rcu_read_lock();
3615ca02815Sjsg 	list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) {
3625ca02815Sjsg 		if (!__i915_request_is_complete(pos)) {
363c349dbc7Sjsg 			rq = pos;
364c349dbc7Sjsg 			break;
365c349dbc7Sjsg 		}
366c349dbc7Sjsg 	}
3675ca02815Sjsg 	rcu_read_unlock();
368c349dbc7Sjsg 
369c349dbc7Sjsg 	/*
370c349dbc7Sjsg 	 * The guilty request will get skipped on a hung engine.
371c349dbc7Sjsg 	 *
372c349dbc7Sjsg 	 * Users of client default contexts do not rely on logical
373c349dbc7Sjsg 	 * state preserved between batches so it is safe to execute
374c349dbc7Sjsg 	 * queued requests following the hang. Non default contexts
375c349dbc7Sjsg 	 * rely on preserved state, so skipping a batch loses the
376c349dbc7Sjsg 	 * evolution of the state and it needs to be considered corrupted.
377c349dbc7Sjsg 	 * Executing more queued batches on top of corrupted state is
378c349dbc7Sjsg 	 * risky. But we take the risk by trying to advance through
379c349dbc7Sjsg 	 * the queued requests in order to make the client behaviour
380c349dbc7Sjsg 	 * more predictable around resets, by not throwing away random
381c349dbc7Sjsg 	 * amount of batches it has prepared for execution. Sophisticated
382c349dbc7Sjsg 	 * clients can use gem_reset_stats_ioctl and dma fence status
383c349dbc7Sjsg 	 * (exported via sync_file info ioctl on explicit fences) to observe
384c349dbc7Sjsg 	 * when it loses the context state and should rebuild accordingly.
385c349dbc7Sjsg 	 *
386c349dbc7Sjsg 	 * The context ban, and ultimately the client ban, mechanism are safety
387c349dbc7Sjsg 	 * valves if client submission ends up resulting in nothing more than
388c349dbc7Sjsg 	 * subsequent hangs.
389c349dbc7Sjsg 	 */
390c349dbc7Sjsg 
391c349dbc7Sjsg 	if (rq) {
392c349dbc7Sjsg 		/*
393c349dbc7Sjsg 		 * Try to restore the logical GPU state to match the
394c349dbc7Sjsg 		 * continuation of the request queue. If we skip the
395c349dbc7Sjsg 		 * context/PD restore, then the next request may try to execute
396c349dbc7Sjsg 		 * assuming that its context is valid and loaded on the GPU and
397c349dbc7Sjsg 		 * so may try to access invalid memory, prompting repeated GPU
398c349dbc7Sjsg 		 * hangs.
399c349dbc7Sjsg 		 *
400c349dbc7Sjsg 		 * If the request was guilty, we still restore the logical
401c349dbc7Sjsg 		 * state in case the next request requires it (e.g. the
402c349dbc7Sjsg 		 * aliasing ppgtt), but skip over the hung batch.
403c349dbc7Sjsg 		 *
404c349dbc7Sjsg 		 * If the request was innocent, we try to replay the request
405c349dbc7Sjsg 		 * with the restored context.
406c349dbc7Sjsg 		 */
407c349dbc7Sjsg 		__i915_request_reset(rq, stalled);
408c349dbc7Sjsg 
409c349dbc7Sjsg 		GEM_BUG_ON(rq->ring != engine->legacy.ring);
410c349dbc7Sjsg 		head = rq->head;
411c349dbc7Sjsg 	} else {
412c349dbc7Sjsg 		head = engine->legacy.ring->tail;
413c349dbc7Sjsg 	}
414c349dbc7Sjsg 	engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
415c349dbc7Sjsg 
4165ca02815Sjsg 	spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
417c349dbc7Sjsg }
418c349dbc7Sjsg 
reset_finish(struct intel_engine_cs * engine)419c349dbc7Sjsg static void reset_finish(struct intel_engine_cs *engine)
420c349dbc7Sjsg {
421c349dbc7Sjsg }
422c349dbc7Sjsg 
reset_cancel(struct intel_engine_cs * engine)423c349dbc7Sjsg static void reset_cancel(struct intel_engine_cs *engine)
424c349dbc7Sjsg {
425c349dbc7Sjsg 	struct i915_request *request;
426c349dbc7Sjsg 	unsigned long flags;
427c349dbc7Sjsg 
4285ca02815Sjsg 	spin_lock_irqsave(&engine->sched_engine->lock, flags);
429c349dbc7Sjsg 
430c349dbc7Sjsg 	/* Mark all submitted requests as skipped. */
4315ca02815Sjsg 	list_for_each_entry(request, &engine->sched_engine->requests, sched.link)
4325ca02815Sjsg 		i915_request_put(i915_request_mark_eio(request));
4335ca02815Sjsg 	intel_engine_signal_breadcrumbs(engine);
434c349dbc7Sjsg 
435c349dbc7Sjsg 	/* Remaining _unready_ requests will be nop'ed when submitted */
436c349dbc7Sjsg 
4375ca02815Sjsg 	spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
438c349dbc7Sjsg }
439c349dbc7Sjsg 
i9xx_submit_request(struct i915_request * request)440c349dbc7Sjsg static void i9xx_submit_request(struct i915_request *request)
441c349dbc7Sjsg {
442c349dbc7Sjsg 	i915_request_submit(request);
443c349dbc7Sjsg 	wmb(); /* paranoid flush writes out of the WCB before mmio */
444c349dbc7Sjsg 
445c349dbc7Sjsg 	ENGINE_WRITE(request->engine, RING_TAIL,
446c349dbc7Sjsg 		     intel_ring_set_tail(request->ring, request->tail));
447c349dbc7Sjsg }
448c349dbc7Sjsg 
__ring_context_fini(struct intel_context * ce)449c349dbc7Sjsg static void __ring_context_fini(struct intel_context *ce)
450c349dbc7Sjsg {
451c349dbc7Sjsg 	i915_vma_put(ce->state);
452c349dbc7Sjsg }
453c349dbc7Sjsg 
ring_context_destroy(struct kref * ref)454c349dbc7Sjsg static void ring_context_destroy(struct kref *ref)
455c349dbc7Sjsg {
456c349dbc7Sjsg 	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
457c349dbc7Sjsg 
458c349dbc7Sjsg 	GEM_BUG_ON(intel_context_is_pinned(ce));
459c349dbc7Sjsg 
460c349dbc7Sjsg 	if (ce->state)
461c349dbc7Sjsg 		__ring_context_fini(ce);
462c349dbc7Sjsg 
463c349dbc7Sjsg 	intel_context_fini(ce);
464c349dbc7Sjsg 	intel_context_free(ce);
465c349dbc7Sjsg }
466c349dbc7Sjsg 
ring_context_init_default_state(struct intel_context * ce,struct i915_gem_ww_ctx * ww)4675ca02815Sjsg static int ring_context_init_default_state(struct intel_context *ce,
4685ca02815Sjsg 					   struct i915_gem_ww_ctx *ww)
4695ca02815Sjsg {
4705ca02815Sjsg 	struct drm_i915_gem_object *obj = ce->state->obj;
4715ca02815Sjsg 	void *vaddr;
4725ca02815Sjsg 
4735ca02815Sjsg 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
4745ca02815Sjsg 	if (IS_ERR(vaddr))
4755ca02815Sjsg 		return PTR_ERR(vaddr);
4765ca02815Sjsg 
4775ca02815Sjsg #ifdef __linux__
4785ca02815Sjsg 	shmem_read(ce->engine->default_state, 0,
4795ca02815Sjsg 		   vaddr, ce->engine->context_size);
4805ca02815Sjsg #else
4815ca02815Sjsg 	uao_read(ce->engine->default_state, 0,
4825ca02815Sjsg 		   vaddr, ce->engine->context_size);
4835ca02815Sjsg #endif
4845ca02815Sjsg 
4855ca02815Sjsg 	i915_gem_object_flush_map(obj);
4865ca02815Sjsg 	__i915_gem_object_release_map(obj);
4875ca02815Sjsg 
4885ca02815Sjsg 	__set_bit(CONTEXT_VALID_BIT, &ce->flags);
4895ca02815Sjsg 	return 0;
4905ca02815Sjsg }
4915ca02815Sjsg 
ring_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** unused)492ad8b1aafSjsg static int ring_context_pre_pin(struct intel_context *ce,
493ad8b1aafSjsg 				struct i915_gem_ww_ctx *ww,
494ad8b1aafSjsg 				void **unused)
495c349dbc7Sjsg {
496c349dbc7Sjsg 	struct i915_address_space *vm;
497c349dbc7Sjsg 	int err = 0;
498c349dbc7Sjsg 
4995ca02815Sjsg 	if (ce->engine->default_state &&
5005ca02815Sjsg 	    !test_bit(CONTEXT_VALID_BIT, &ce->flags)) {
5015ca02815Sjsg 		err = ring_context_init_default_state(ce, ww);
5025ca02815Sjsg 		if (err)
5035ca02815Sjsg 			return err;
5045ca02815Sjsg 	}
5055ca02815Sjsg 
506c349dbc7Sjsg 	vm = vm_alias(ce->vm);
507c349dbc7Sjsg 	if (vm)
508ad8b1aafSjsg 		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
509c349dbc7Sjsg 
510c349dbc7Sjsg 	return err;
511c349dbc7Sjsg }
512c349dbc7Sjsg 
__context_unpin_ppgtt(struct intel_context * ce)513c349dbc7Sjsg static void __context_unpin_ppgtt(struct intel_context *ce)
514c349dbc7Sjsg {
515c349dbc7Sjsg 	struct i915_address_space *vm;
516c349dbc7Sjsg 
517c349dbc7Sjsg 	vm = vm_alias(ce->vm);
518c349dbc7Sjsg 	if (vm)
519c349dbc7Sjsg 		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
520c349dbc7Sjsg }
521c349dbc7Sjsg 
ring_context_unpin(struct intel_context * ce)522c349dbc7Sjsg static void ring_context_unpin(struct intel_context *ce)
523c349dbc7Sjsg {
524ad8b1aafSjsg }
525ad8b1aafSjsg 
ring_context_post_unpin(struct intel_context * ce)526ad8b1aafSjsg static void ring_context_post_unpin(struct intel_context *ce)
527ad8b1aafSjsg {
528c349dbc7Sjsg 	__context_unpin_ppgtt(ce);
529c349dbc7Sjsg }
530c349dbc7Sjsg 
531c349dbc7Sjsg static struct i915_vma *
alloc_context_vma(struct intel_engine_cs * engine)532c349dbc7Sjsg alloc_context_vma(struct intel_engine_cs *engine)
533c349dbc7Sjsg {
534c349dbc7Sjsg 	struct drm_i915_private *i915 = engine->i915;
535c349dbc7Sjsg 	struct drm_i915_gem_object *obj;
536c349dbc7Sjsg 	struct i915_vma *vma;
537c349dbc7Sjsg 	int err;
538c349dbc7Sjsg 
539c349dbc7Sjsg 	obj = i915_gem_object_create_shmem(i915, engine->context_size);
540c349dbc7Sjsg 	if (IS_ERR(obj))
541c349dbc7Sjsg 		return ERR_CAST(obj);
542c349dbc7Sjsg 
543c349dbc7Sjsg 	/*
544c349dbc7Sjsg 	 * Try to make the context utilize L3 as well as LLC.
545c349dbc7Sjsg 	 *
546c349dbc7Sjsg 	 * On VLV we don't have L3 controls in the PTEs so we
547c349dbc7Sjsg 	 * shouldn't touch the cache level, especially as that
548c349dbc7Sjsg 	 * would make the object snooped which might have a
549c349dbc7Sjsg 	 * negative performance impact.
550c349dbc7Sjsg 	 *
551c349dbc7Sjsg 	 * Snooping is required on non-llc platforms in execlist
552c349dbc7Sjsg 	 * mode, but since all GGTT accesses use PAT entry 0 we
553c349dbc7Sjsg 	 * get snooping anyway regardless of cache_level.
554c349dbc7Sjsg 	 *
555c349dbc7Sjsg 	 * This is only applicable for Ivy Bridge devices since
556c349dbc7Sjsg 	 * later platforms don't have L3 control bits in the PTE.
557c349dbc7Sjsg 	 */
558c349dbc7Sjsg 	if (IS_IVYBRIDGE(i915))
559c349dbc7Sjsg 		i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
560c349dbc7Sjsg 
561c349dbc7Sjsg 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
562c349dbc7Sjsg 	if (IS_ERR(vma)) {
563c349dbc7Sjsg 		err = PTR_ERR(vma);
564c349dbc7Sjsg 		goto err_obj;
565c349dbc7Sjsg 	}
566c349dbc7Sjsg 
567c349dbc7Sjsg 	return vma;
568c349dbc7Sjsg 
569c349dbc7Sjsg err_obj:
570c349dbc7Sjsg 	i915_gem_object_put(obj);
571c349dbc7Sjsg 	return ERR_PTR(err);
572c349dbc7Sjsg }
573c349dbc7Sjsg 
ring_context_alloc(struct intel_context * ce)574c349dbc7Sjsg static int ring_context_alloc(struct intel_context *ce)
575c349dbc7Sjsg {
576c349dbc7Sjsg 	struct intel_engine_cs *engine = ce->engine;
577c349dbc7Sjsg 
578c349dbc7Sjsg 	/* One ringbuffer to rule them all */
579c349dbc7Sjsg 	GEM_BUG_ON(!engine->legacy.ring);
580c349dbc7Sjsg 	ce->ring = engine->legacy.ring;
581c349dbc7Sjsg 	ce->timeline = intel_timeline_get(engine->legacy.timeline);
582c349dbc7Sjsg 
583c349dbc7Sjsg 	GEM_BUG_ON(ce->state);
584c349dbc7Sjsg 	if (engine->context_size) {
585c349dbc7Sjsg 		struct i915_vma *vma;
586c349dbc7Sjsg 
587c349dbc7Sjsg 		vma = alloc_context_vma(engine);
588c349dbc7Sjsg 		if (IS_ERR(vma))
589c349dbc7Sjsg 			return PTR_ERR(vma);
590c349dbc7Sjsg 
591c349dbc7Sjsg 		ce->state = vma;
592c349dbc7Sjsg 	}
593c349dbc7Sjsg 
594c349dbc7Sjsg 	return 0;
595c349dbc7Sjsg }
596c349dbc7Sjsg 
ring_context_pin(struct intel_context * ce,void * unused)597ad8b1aafSjsg static int ring_context_pin(struct intel_context *ce, void *unused)
598c349dbc7Sjsg {
599ad8b1aafSjsg 	return 0;
600c349dbc7Sjsg }
601c349dbc7Sjsg 
ring_context_reset(struct intel_context * ce)602c349dbc7Sjsg static void ring_context_reset(struct intel_context *ce)
603c349dbc7Sjsg {
604c349dbc7Sjsg 	intel_ring_reset(ce->ring, ce->ring->emit);
6055ca02815Sjsg 	clear_bit(CONTEXT_VALID_BIT, &ce->flags);
6065ca02815Sjsg }
6075ca02815Sjsg 
ring_context_revoke(struct intel_context * ce,struct i915_request * rq,unsigned int preempt_timeout_ms)6081bb76ff1Sjsg static void ring_context_revoke(struct intel_context *ce,
6091bb76ff1Sjsg 				struct i915_request *rq,
6101bb76ff1Sjsg 				unsigned int preempt_timeout_ms)
6115ca02815Sjsg {
6125ca02815Sjsg 	struct intel_engine_cs *engine;
6135ca02815Sjsg 
6145ca02815Sjsg 	if (!rq || !i915_request_is_active(rq))
6155ca02815Sjsg 		return;
6165ca02815Sjsg 
6175ca02815Sjsg 	engine = rq->engine;
6185ca02815Sjsg 	lockdep_assert_held(&engine->sched_engine->lock);
6195ca02815Sjsg 	list_for_each_entry_continue(rq, &engine->sched_engine->requests,
6205ca02815Sjsg 				     sched.link)
6215ca02815Sjsg 		if (rq->context == ce) {
6225ca02815Sjsg 			i915_request_set_error_once(rq, -EIO);
6235ca02815Sjsg 			__i915_request_skip(rq);
6245ca02815Sjsg 		}
6255ca02815Sjsg }
6265ca02815Sjsg 
ring_context_cancel_request(struct intel_context * ce,struct i915_request * rq)6275ca02815Sjsg static void ring_context_cancel_request(struct intel_context *ce,
6285ca02815Sjsg 					struct i915_request *rq)
6295ca02815Sjsg {
6305ca02815Sjsg 	struct intel_engine_cs *engine = NULL;
6315ca02815Sjsg 
6325ca02815Sjsg 	i915_request_active_engine(rq, &engine);
6335ca02815Sjsg 
6345ca02815Sjsg 	if (engine && intel_engine_pulse(engine))
6355ca02815Sjsg 		intel_gt_handle_error(engine->gt, engine->mask, 0,
6365ca02815Sjsg 				      "request cancellation by %s",
6375ca02815Sjsg 				      curproc->p_p->ps_comm);
638c349dbc7Sjsg }
639c349dbc7Sjsg 
640c349dbc7Sjsg static const struct intel_context_ops ring_context_ops = {
641c349dbc7Sjsg 	.alloc = ring_context_alloc,
642c349dbc7Sjsg 
6435ca02815Sjsg 	.cancel_request = ring_context_cancel_request,
6445ca02815Sjsg 
6451bb76ff1Sjsg 	.revoke = ring_context_revoke,
6465ca02815Sjsg 
647ad8b1aafSjsg 	.pre_pin = ring_context_pre_pin,
648c349dbc7Sjsg 	.pin = ring_context_pin,
649c349dbc7Sjsg 	.unpin = ring_context_unpin,
650ad8b1aafSjsg 	.post_unpin = ring_context_post_unpin,
651c349dbc7Sjsg 
652c349dbc7Sjsg 	.enter = intel_context_enter_engine,
653c349dbc7Sjsg 	.exit = intel_context_exit_engine,
654c349dbc7Sjsg 
655c349dbc7Sjsg 	.reset = ring_context_reset,
656c349dbc7Sjsg 	.destroy = ring_context_destroy,
657c349dbc7Sjsg };
658c349dbc7Sjsg 
load_pd_dir(struct i915_request * rq,struct i915_address_space * vm,u32 valid)659c349dbc7Sjsg static int load_pd_dir(struct i915_request *rq,
660ad8b1aafSjsg 		       struct i915_address_space *vm,
661c349dbc7Sjsg 		       u32 valid)
662c349dbc7Sjsg {
663c349dbc7Sjsg 	const struct intel_engine_cs * const engine = rq->engine;
664c349dbc7Sjsg 	u32 *cs;
665c349dbc7Sjsg 
666c349dbc7Sjsg 	cs = intel_ring_begin(rq, 12);
667c349dbc7Sjsg 	if (IS_ERR(cs))
668c349dbc7Sjsg 		return PTR_ERR(cs);
669c349dbc7Sjsg 
670c349dbc7Sjsg 	*cs++ = MI_LOAD_REGISTER_IMM(1);
671c349dbc7Sjsg 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
672c349dbc7Sjsg 	*cs++ = valid;
673c349dbc7Sjsg 
674c349dbc7Sjsg 	*cs++ = MI_LOAD_REGISTER_IMM(1);
675c349dbc7Sjsg 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
676ad8b1aafSjsg 	*cs++ = pp_dir(vm);
677c349dbc7Sjsg 
678c349dbc7Sjsg 	/* Stall until the page table load is complete? */
679c349dbc7Sjsg 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
680c349dbc7Sjsg 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
681c349dbc7Sjsg 	*cs++ = intel_gt_scratch_offset(engine->gt,
682c349dbc7Sjsg 					INTEL_GT_SCRATCH_FIELD_DEFAULT);
683c349dbc7Sjsg 
684c349dbc7Sjsg 	*cs++ = MI_LOAD_REGISTER_IMM(1);
685c349dbc7Sjsg 	*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
686c349dbc7Sjsg 	*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
687c349dbc7Sjsg 
688c349dbc7Sjsg 	intel_ring_advance(rq, cs);
689c349dbc7Sjsg 
690c349dbc7Sjsg 	return rq->engine->emit_flush(rq, EMIT_FLUSH);
691c349dbc7Sjsg }
692c349dbc7Sjsg 
mi_set_context(struct i915_request * rq,struct intel_context * ce,u32 flags)6935ca02815Sjsg static int mi_set_context(struct i915_request *rq,
694c349dbc7Sjsg 			  struct intel_context *ce,
695c349dbc7Sjsg 			  u32 flags)
696c349dbc7Sjsg {
697c349dbc7Sjsg 	struct intel_engine_cs *engine = rq->engine;
698ad8b1aafSjsg 	struct drm_i915_private *i915 = engine->i915;
699c349dbc7Sjsg 	enum intel_engine_id id;
700c349dbc7Sjsg 	const int num_engines =
701ad8b1aafSjsg 		IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0;
702c349dbc7Sjsg 	bool force_restore = false;
703c349dbc7Sjsg 	int len;
704c349dbc7Sjsg 	u32 *cs;
705c349dbc7Sjsg 
706c349dbc7Sjsg 	len = 4;
7075ca02815Sjsg 	if (GRAPHICS_VER(i915) == 7)
708c349dbc7Sjsg 		len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
7095ca02815Sjsg 	else if (GRAPHICS_VER(i915) == 5)
710c349dbc7Sjsg 		len += 2;
711c349dbc7Sjsg 	if (flags & MI_FORCE_RESTORE) {
712c349dbc7Sjsg 		GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
713c349dbc7Sjsg 		flags &= ~MI_FORCE_RESTORE;
714c349dbc7Sjsg 		force_restore = true;
715c349dbc7Sjsg 		len += 2;
716c349dbc7Sjsg 	}
717c349dbc7Sjsg 
718c349dbc7Sjsg 	cs = intel_ring_begin(rq, len);
719c349dbc7Sjsg 	if (IS_ERR(cs))
720c349dbc7Sjsg 		return PTR_ERR(cs);
721c349dbc7Sjsg 
722c349dbc7Sjsg 	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
7235ca02815Sjsg 	if (GRAPHICS_VER(i915) == 7) {
724c349dbc7Sjsg 		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
725c349dbc7Sjsg 		if (num_engines) {
726c349dbc7Sjsg 			struct intel_engine_cs *signaller;
727c349dbc7Sjsg 
728c349dbc7Sjsg 			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
729c349dbc7Sjsg 			for_each_engine(signaller, engine->gt, id) {
730c349dbc7Sjsg 				if (signaller == engine)
731c349dbc7Sjsg 					continue;
732c349dbc7Sjsg 
733c349dbc7Sjsg 				*cs++ = i915_mmio_reg_offset(
734c349dbc7Sjsg 					   RING_PSMI_CTL(signaller->mmio_base));
735c349dbc7Sjsg 				*cs++ = _MASKED_BIT_ENABLE(
736c349dbc7Sjsg 						GEN6_PSMI_SLEEP_MSG_DISABLE);
737c349dbc7Sjsg 			}
738c349dbc7Sjsg 		}
7395ca02815Sjsg 	} else if (GRAPHICS_VER(i915) == 5) {
740c349dbc7Sjsg 		/*
741c349dbc7Sjsg 		 * This w/a is only listed for pre-production ilk a/b steppings,
742c349dbc7Sjsg 		 * but is also mentioned for programming the powerctx. To be
743c349dbc7Sjsg 		 * safe, just apply the workaround; we do not use SyncFlush so
744c349dbc7Sjsg 		 * this should never take effect and so be a no-op!
745c349dbc7Sjsg 		 */
746c349dbc7Sjsg 		*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
747c349dbc7Sjsg 	}
748c349dbc7Sjsg 
749c349dbc7Sjsg 	if (force_restore) {
750c349dbc7Sjsg 		/*
751c349dbc7Sjsg 		 * The HW doesn't handle being told to restore the current
752c349dbc7Sjsg 		 * context very well. Quite often it likes goes to go off and
753c349dbc7Sjsg 		 * sulk, especially when it is meant to be reloading PP_DIR.
754c349dbc7Sjsg 		 * A very simple fix to force the reload is to simply switch
755c349dbc7Sjsg 		 * away from the current context and back again.
756c349dbc7Sjsg 		 *
757c349dbc7Sjsg 		 * Note that the kernel_context will contain random state
758c349dbc7Sjsg 		 * following the INHIBIT_RESTORE. We accept this since we
759c349dbc7Sjsg 		 * never use the kernel_context state; it is merely a
760c349dbc7Sjsg 		 * placeholder we use to flush other contexts.
761c349dbc7Sjsg 		 */
762c349dbc7Sjsg 		*cs++ = MI_SET_CONTEXT;
763c349dbc7Sjsg 		*cs++ = i915_ggtt_offset(engine->kernel_context->state) |
764c349dbc7Sjsg 			MI_MM_SPACE_GTT |
765c349dbc7Sjsg 			MI_RESTORE_INHIBIT;
766c349dbc7Sjsg 	}
767c349dbc7Sjsg 
768c349dbc7Sjsg 	*cs++ = MI_NOOP;
769c349dbc7Sjsg 	*cs++ = MI_SET_CONTEXT;
770c349dbc7Sjsg 	*cs++ = i915_ggtt_offset(ce->state) | flags;
771c349dbc7Sjsg 	/*
772c349dbc7Sjsg 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
773c349dbc7Sjsg 	 * WaMiSetContext_Hang:snb,ivb,vlv
774c349dbc7Sjsg 	 */
775c349dbc7Sjsg 	*cs++ = MI_NOOP;
776c349dbc7Sjsg 
7775ca02815Sjsg 	if (GRAPHICS_VER(i915) == 7) {
778c349dbc7Sjsg 		if (num_engines) {
779c349dbc7Sjsg 			struct intel_engine_cs *signaller;
7801bb76ff1Sjsg 			i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */
781c349dbc7Sjsg 
782c349dbc7Sjsg 			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
783c349dbc7Sjsg 			for_each_engine(signaller, engine->gt, id) {
784c349dbc7Sjsg 				if (signaller == engine)
785c349dbc7Sjsg 					continue;
786c349dbc7Sjsg 
787c349dbc7Sjsg 				last_reg = RING_PSMI_CTL(signaller->mmio_base);
788c349dbc7Sjsg 				*cs++ = i915_mmio_reg_offset(last_reg);
789c349dbc7Sjsg 				*cs++ = _MASKED_BIT_DISABLE(
790c349dbc7Sjsg 						GEN6_PSMI_SLEEP_MSG_DISABLE);
791c349dbc7Sjsg 			}
792c349dbc7Sjsg 
793c349dbc7Sjsg 			/* Insert a delay before the next switch! */
794c349dbc7Sjsg 			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
795c349dbc7Sjsg 			*cs++ = i915_mmio_reg_offset(last_reg);
796c349dbc7Sjsg 			*cs++ = intel_gt_scratch_offset(engine->gt,
797c349dbc7Sjsg 							INTEL_GT_SCRATCH_FIELD_DEFAULT);
798c349dbc7Sjsg 			*cs++ = MI_NOOP;
799c349dbc7Sjsg 		}
800c349dbc7Sjsg 		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
8015ca02815Sjsg 	} else if (GRAPHICS_VER(i915) == 5) {
802c349dbc7Sjsg 		*cs++ = MI_SUSPEND_FLUSH;
803c349dbc7Sjsg 	}
804c349dbc7Sjsg 
805c349dbc7Sjsg 	intel_ring_advance(rq, cs);
806c349dbc7Sjsg 
807c349dbc7Sjsg 	return 0;
808c349dbc7Sjsg }
809c349dbc7Sjsg 
remap_l3_slice(struct i915_request * rq,int slice)810c349dbc7Sjsg static int remap_l3_slice(struct i915_request *rq, int slice)
811c349dbc7Sjsg {
8125ca02815Sjsg #define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32))
813*f005ef32Sjsg 	u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
814c349dbc7Sjsg 	int i;
815c349dbc7Sjsg 
816c349dbc7Sjsg 	if (!remap_info)
817c349dbc7Sjsg 		return 0;
818c349dbc7Sjsg 
8195ca02815Sjsg 	cs = intel_ring_begin(rq, L3LOG_DW * 2 + 2);
820c349dbc7Sjsg 	if (IS_ERR(cs))
821c349dbc7Sjsg 		return PTR_ERR(cs);
822c349dbc7Sjsg 
823c349dbc7Sjsg 	/*
824c349dbc7Sjsg 	 * Note: We do not worry about the concurrent register cacheline hang
825c349dbc7Sjsg 	 * here because no other code should access these registers other than
826c349dbc7Sjsg 	 * at initialization time.
827c349dbc7Sjsg 	 */
8285ca02815Sjsg 	*cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW);
8295ca02815Sjsg 	for (i = 0; i < L3LOG_DW; i++) {
830c349dbc7Sjsg 		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
831c349dbc7Sjsg 		*cs++ = remap_info[i];
832c349dbc7Sjsg 	}
833c349dbc7Sjsg 	*cs++ = MI_NOOP;
834c349dbc7Sjsg 	intel_ring_advance(rq, cs);
835c349dbc7Sjsg 
836c349dbc7Sjsg 	return 0;
8375ca02815Sjsg #undef L3LOG_DW
838c349dbc7Sjsg }
839c349dbc7Sjsg 
remap_l3(struct i915_request * rq)840c349dbc7Sjsg static int remap_l3(struct i915_request *rq)
841c349dbc7Sjsg {
842c349dbc7Sjsg 	struct i915_gem_context *ctx = i915_request_gem_context(rq);
843c349dbc7Sjsg 	int i, err;
844c349dbc7Sjsg 
845c349dbc7Sjsg 	if (!ctx || !ctx->remap_slice)
846c349dbc7Sjsg 		return 0;
847c349dbc7Sjsg 
848c349dbc7Sjsg 	for (i = 0; i < MAX_L3_SLICES; i++) {
849c349dbc7Sjsg 		if (!(ctx->remap_slice & BIT(i)))
850c349dbc7Sjsg 			continue;
851c349dbc7Sjsg 
852c349dbc7Sjsg 		err = remap_l3_slice(rq, i);
853c349dbc7Sjsg 		if (err)
854c349dbc7Sjsg 			return err;
855c349dbc7Sjsg 	}
856c349dbc7Sjsg 
857c349dbc7Sjsg 	ctx->remap_slice = 0;
858c349dbc7Sjsg 	return 0;
859c349dbc7Sjsg }
860c349dbc7Sjsg 
switch_mm(struct i915_request * rq,struct i915_address_space * vm)861c349dbc7Sjsg static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
862c349dbc7Sjsg {
863c349dbc7Sjsg 	int ret;
864c349dbc7Sjsg 
865c349dbc7Sjsg 	if (!vm)
866c349dbc7Sjsg 		return 0;
867c349dbc7Sjsg 
868c349dbc7Sjsg 	ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
869c349dbc7Sjsg 	if (ret)
870c349dbc7Sjsg 		return ret;
871c349dbc7Sjsg 
872c349dbc7Sjsg 	/*
873c349dbc7Sjsg 	 * Not only do we need a full barrier (post-sync write) after
874c349dbc7Sjsg 	 * invalidating the TLBs, but we need to wait a little bit
875c349dbc7Sjsg 	 * longer. Whether this is merely delaying us, or the
876c349dbc7Sjsg 	 * subsequent flush is a key part of serialising with the
877c349dbc7Sjsg 	 * post-sync op, this extra pass appears vital before a
878c349dbc7Sjsg 	 * mm switch!
879c349dbc7Sjsg 	 */
880ad8b1aafSjsg 	ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
881c349dbc7Sjsg 	if (ret)
882c349dbc7Sjsg 		return ret;
883c349dbc7Sjsg 
884c349dbc7Sjsg 	return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
885c349dbc7Sjsg }
886c349dbc7Sjsg 
clear_residuals(struct i915_request * rq)887c349dbc7Sjsg static int clear_residuals(struct i915_request *rq)
888c349dbc7Sjsg {
889c349dbc7Sjsg 	struct intel_engine_cs *engine = rq->engine;
890c349dbc7Sjsg 	int ret;
891c349dbc7Sjsg 
892c349dbc7Sjsg 	ret = switch_mm(rq, vm_alias(engine->kernel_context->vm));
893c349dbc7Sjsg 	if (ret)
894c349dbc7Sjsg 		return ret;
895c349dbc7Sjsg 
896c349dbc7Sjsg 	if (engine->kernel_context->state) {
897c349dbc7Sjsg 		ret = mi_set_context(rq,
898c349dbc7Sjsg 				     engine->kernel_context,
899c349dbc7Sjsg 				     MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
900c349dbc7Sjsg 		if (ret)
901c349dbc7Sjsg 			return ret;
902c349dbc7Sjsg 	}
903c349dbc7Sjsg 
904c349dbc7Sjsg 	ret = engine->emit_bb_start(rq,
905*f005ef32Sjsg 				    i915_vma_offset(engine->wa_ctx.vma), 0,
906c349dbc7Sjsg 				    0);
907c349dbc7Sjsg 	if (ret)
908c349dbc7Sjsg 		return ret;
909c349dbc7Sjsg 
910c349dbc7Sjsg 	ret = engine->emit_flush(rq, EMIT_FLUSH);
911c349dbc7Sjsg 	if (ret)
912c349dbc7Sjsg 		return ret;
913c349dbc7Sjsg 
914c349dbc7Sjsg 	/* Always invalidate before the next switch_mm() */
915c349dbc7Sjsg 	return engine->emit_flush(rq, EMIT_INVALIDATE);
916c349dbc7Sjsg }
917c349dbc7Sjsg 
switch_context(struct i915_request * rq)918c349dbc7Sjsg static int switch_context(struct i915_request *rq)
919c349dbc7Sjsg {
920c349dbc7Sjsg 	struct intel_engine_cs *engine = rq->engine;
921c349dbc7Sjsg 	struct intel_context *ce = rq->context;
922c349dbc7Sjsg 	void **residuals = NULL;
923c349dbc7Sjsg 	int ret;
924c349dbc7Sjsg 
925ad8b1aafSjsg 	GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
926c349dbc7Sjsg 
927c349dbc7Sjsg 	if (engine->wa_ctx.vma && ce != engine->kernel_context) {
928ad8b1aafSjsg 		if (engine->wa_ctx.vma->private != ce &&
929ad8b1aafSjsg 		    i915_mitigate_clear_residuals()) {
930c349dbc7Sjsg 			ret = clear_residuals(rq);
931c349dbc7Sjsg 			if (ret)
932c349dbc7Sjsg 				return ret;
933c349dbc7Sjsg 
934c349dbc7Sjsg 			residuals = &engine->wa_ctx.vma->private;
935c349dbc7Sjsg 		}
936c349dbc7Sjsg 	}
937c349dbc7Sjsg 
938c349dbc7Sjsg 	ret = switch_mm(rq, vm_alias(ce->vm));
939c349dbc7Sjsg 	if (ret)
940c349dbc7Sjsg 		return ret;
941c349dbc7Sjsg 
942c349dbc7Sjsg 	if (ce->state) {
943c349dbc7Sjsg 		u32 flags;
944c349dbc7Sjsg 
945c349dbc7Sjsg 		GEM_BUG_ON(engine->id != RCS0);
946c349dbc7Sjsg 
947c349dbc7Sjsg 		/* For resource streamer on HSW+ and power context elsewhere */
948c349dbc7Sjsg 		BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
949c349dbc7Sjsg 		BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
950c349dbc7Sjsg 
951c349dbc7Sjsg 		flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
952c349dbc7Sjsg 		if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
953c349dbc7Sjsg 			flags |= MI_RESTORE_EXT_STATE_EN;
954c349dbc7Sjsg 		else
955c349dbc7Sjsg 			flags |= MI_RESTORE_INHIBIT;
956c349dbc7Sjsg 
957c349dbc7Sjsg 		ret = mi_set_context(rq, ce, flags);
958c349dbc7Sjsg 		if (ret)
959c349dbc7Sjsg 			return ret;
960c349dbc7Sjsg 	}
961c349dbc7Sjsg 
962c349dbc7Sjsg 	ret = remap_l3(rq);
963c349dbc7Sjsg 	if (ret)
964c349dbc7Sjsg 		return ret;
965c349dbc7Sjsg 
966c349dbc7Sjsg 	/*
967c349dbc7Sjsg 	 * Now past the point of no return, this request _will_ be emitted.
968c349dbc7Sjsg 	 *
969c349dbc7Sjsg 	 * Or at least this preamble will be emitted, the request may be
970c349dbc7Sjsg 	 * interrupted prior to submitting the user payload. If so, we
971c349dbc7Sjsg 	 * still submit the "empty" request in order to preserve global
972c349dbc7Sjsg 	 * state tracking such as this, our tracking of the current
973c349dbc7Sjsg 	 * dirty context.
974c349dbc7Sjsg 	 */
975c349dbc7Sjsg 	if (residuals) {
976c349dbc7Sjsg 		intel_context_put(*residuals);
977c349dbc7Sjsg 		*residuals = intel_context_get(ce);
978c349dbc7Sjsg 	}
979c349dbc7Sjsg 
980c349dbc7Sjsg 	return 0;
981c349dbc7Sjsg }
982c349dbc7Sjsg 
ring_request_alloc(struct i915_request * request)983c349dbc7Sjsg static int ring_request_alloc(struct i915_request *request)
984c349dbc7Sjsg {
985c349dbc7Sjsg 	int ret;
986c349dbc7Sjsg 
987c349dbc7Sjsg 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
988c349dbc7Sjsg 	GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
989c349dbc7Sjsg 
990c349dbc7Sjsg 	/*
991c349dbc7Sjsg 	 * Flush enough space to reduce the likelihood of waiting after
992c349dbc7Sjsg 	 * we start building the request - in which case we will just
993c349dbc7Sjsg 	 * have to repeat work.
994c349dbc7Sjsg 	 */
995c349dbc7Sjsg 	request->reserved_space += LEGACY_REQUEST_SIZE;
996c349dbc7Sjsg 
997c349dbc7Sjsg 	/* Unconditionally invalidate GPU caches and TLBs. */
998c349dbc7Sjsg 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
999c349dbc7Sjsg 	if (ret)
1000c349dbc7Sjsg 		return ret;
1001c349dbc7Sjsg 
1002c349dbc7Sjsg 	ret = switch_context(request);
1003c349dbc7Sjsg 	if (ret)
1004c349dbc7Sjsg 		return ret;
1005c349dbc7Sjsg 
1006c349dbc7Sjsg 	request->reserved_space -= LEGACY_REQUEST_SIZE;
1007c349dbc7Sjsg 	return 0;
1008c349dbc7Sjsg }
1009c349dbc7Sjsg 
gen6_bsd_submit_request(struct i915_request * request)1010c349dbc7Sjsg static void gen6_bsd_submit_request(struct i915_request *request)
1011c349dbc7Sjsg {
1012c349dbc7Sjsg 	struct intel_uncore *uncore = request->engine->uncore;
1013c349dbc7Sjsg 
1014c349dbc7Sjsg 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1015c349dbc7Sjsg 
1016c349dbc7Sjsg        /* Every tail move must follow the sequence below */
1017c349dbc7Sjsg 
1018c349dbc7Sjsg 	/* Disable notification that the ring is IDLE. The GT
1019c349dbc7Sjsg 	 * will then assume that it is busy and bring it out of rc6.
1020c349dbc7Sjsg 	 */
10211bb76ff1Sjsg 	intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
10221bb76ff1Sjsg 			      _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
1023c349dbc7Sjsg 
1024c349dbc7Sjsg 	/* Clear the context id. Here be magic! */
1025c349dbc7Sjsg 	intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
1026c349dbc7Sjsg 
1027c349dbc7Sjsg 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
1028c349dbc7Sjsg 	if (__intel_wait_for_register_fw(uncore,
10291bb76ff1Sjsg 					 RING_PSMI_CTL(GEN6_BSD_RING_BASE),
1030c349dbc7Sjsg 					 GEN6_BSD_SLEEP_INDICATOR,
1031c349dbc7Sjsg 					 0,
1032c349dbc7Sjsg 					 1000, 0, NULL))
1033c349dbc7Sjsg 		drm_err(&uncore->i915->drm,
1034c349dbc7Sjsg 			"timed out waiting for the BSD ring to wake up\n");
1035c349dbc7Sjsg 
1036c349dbc7Sjsg 	/* Now that the ring is fully powered up, update the tail */
1037c349dbc7Sjsg 	i9xx_submit_request(request);
1038c349dbc7Sjsg 
1039c349dbc7Sjsg 	/* Let the ring send IDLE messages to the GT again,
1040c349dbc7Sjsg 	 * and so let it sleep to conserve power when idle.
1041c349dbc7Sjsg 	 */
10421bb76ff1Sjsg 	intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
10431bb76ff1Sjsg 			      _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
1044c349dbc7Sjsg 
1045c349dbc7Sjsg 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
1046c349dbc7Sjsg }
1047c349dbc7Sjsg 
i9xx_set_default_submission(struct intel_engine_cs * engine)1048c349dbc7Sjsg static void i9xx_set_default_submission(struct intel_engine_cs *engine)
1049c349dbc7Sjsg {
1050c349dbc7Sjsg 	engine->submit_request = i9xx_submit_request;
1051c349dbc7Sjsg }
1052c349dbc7Sjsg 
gen6_bsd_set_default_submission(struct intel_engine_cs * engine)1053c349dbc7Sjsg static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
1054c349dbc7Sjsg {
1055c349dbc7Sjsg 	engine->submit_request = gen6_bsd_submit_request;
1056c349dbc7Sjsg }
1057c349dbc7Sjsg 
ring_release(struct intel_engine_cs * engine)1058c349dbc7Sjsg static void ring_release(struct intel_engine_cs *engine)
1059c349dbc7Sjsg {
1060*f005ef32Sjsg 	struct drm_i915_private *i915 = engine->i915;
1061c349dbc7Sjsg 
1062*f005ef32Sjsg 	drm_WARN_ON(&i915->drm, GRAPHICS_VER(i915) > 2 &&
1063c349dbc7Sjsg 		    (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
1064c349dbc7Sjsg 
1065c349dbc7Sjsg 	intel_engine_cleanup_common(engine);
1066c349dbc7Sjsg 
1067c349dbc7Sjsg 	if (engine->wa_ctx.vma) {
1068c349dbc7Sjsg 		intel_context_put(engine->wa_ctx.vma->private);
1069c349dbc7Sjsg 		i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1070c349dbc7Sjsg 	}
1071c349dbc7Sjsg 
1072c349dbc7Sjsg 	intel_ring_unpin(engine->legacy.ring);
1073c349dbc7Sjsg 	intel_ring_put(engine->legacy.ring);
1074c349dbc7Sjsg 
1075c349dbc7Sjsg 	intel_timeline_unpin(engine->legacy.timeline);
1076c349dbc7Sjsg 	intel_timeline_put(engine->legacy.timeline);
1077c349dbc7Sjsg }
1078c349dbc7Sjsg 
irq_handler(struct intel_engine_cs * engine,u16 iir)10795ca02815Sjsg static void irq_handler(struct intel_engine_cs *engine, u16 iir)
10805ca02815Sjsg {
10815ca02815Sjsg 	intel_engine_signal_breadcrumbs(engine);
10825ca02815Sjsg }
10835ca02815Sjsg 
setup_irq(struct intel_engine_cs * engine)1084c349dbc7Sjsg static void setup_irq(struct intel_engine_cs *engine)
1085c349dbc7Sjsg {
1086c349dbc7Sjsg 	struct drm_i915_private *i915 = engine->i915;
1087c349dbc7Sjsg 
10885ca02815Sjsg 	intel_engine_set_irq_handler(engine, irq_handler);
10895ca02815Sjsg 
10905ca02815Sjsg 	if (GRAPHICS_VER(i915) >= 6) {
1091c349dbc7Sjsg 		engine->irq_enable = gen6_irq_enable;
1092c349dbc7Sjsg 		engine->irq_disable = gen6_irq_disable;
10935ca02815Sjsg 	} else if (GRAPHICS_VER(i915) >= 5) {
1094c349dbc7Sjsg 		engine->irq_enable = gen5_irq_enable;
1095c349dbc7Sjsg 		engine->irq_disable = gen5_irq_disable;
10965ca02815Sjsg 	} else if (GRAPHICS_VER(i915) >= 3) {
1097ad8b1aafSjsg 		engine->irq_enable = gen3_irq_enable;
1098ad8b1aafSjsg 		engine->irq_disable = gen3_irq_disable;
1099c349dbc7Sjsg 	} else {
1100ad8b1aafSjsg 		engine->irq_enable = gen2_irq_enable;
1101ad8b1aafSjsg 		engine->irq_disable = gen2_irq_disable;
1102c349dbc7Sjsg 	}
1103c349dbc7Sjsg }
1104c349dbc7Sjsg 
add_to_engine(struct i915_request * rq)11055ca02815Sjsg static void add_to_engine(struct i915_request *rq)
11065ca02815Sjsg {
11075ca02815Sjsg 	lockdep_assert_held(&rq->engine->sched_engine->lock);
11085ca02815Sjsg 	list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests);
11095ca02815Sjsg }
11105ca02815Sjsg 
remove_from_engine(struct i915_request * rq)11115ca02815Sjsg static void remove_from_engine(struct i915_request *rq)
11125ca02815Sjsg {
11135ca02815Sjsg 	spin_lock_irq(&rq->engine->sched_engine->lock);
11145ca02815Sjsg 	list_del_init(&rq->sched.link);
11155ca02815Sjsg 
11165ca02815Sjsg 	/* Prevent further __await_execution() registering a cb, then flush */
11175ca02815Sjsg 	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
11185ca02815Sjsg 
11195ca02815Sjsg 	spin_unlock_irq(&rq->engine->sched_engine->lock);
11205ca02815Sjsg 
11215ca02815Sjsg 	i915_request_notify_execute_cb_imm(rq);
11225ca02815Sjsg }
11235ca02815Sjsg 
setup_common(struct intel_engine_cs * engine)1124c349dbc7Sjsg static void setup_common(struct intel_engine_cs *engine)
1125c349dbc7Sjsg {
1126c349dbc7Sjsg 	struct drm_i915_private *i915 = engine->i915;
1127c349dbc7Sjsg 
1128c349dbc7Sjsg 	/* gen8+ are only supported with execlists */
11295ca02815Sjsg 	GEM_BUG_ON(GRAPHICS_VER(i915) >= 8);
1130c349dbc7Sjsg 
1131c349dbc7Sjsg 	setup_irq(engine);
1132c349dbc7Sjsg 
1133c349dbc7Sjsg 	engine->resume = xcs_resume;
11345ca02815Sjsg 	engine->sanitize = xcs_sanitize;
11355ca02815Sjsg 
1136c349dbc7Sjsg 	engine->reset.prepare = reset_prepare;
1137c349dbc7Sjsg 	engine->reset.rewind = reset_rewind;
1138c349dbc7Sjsg 	engine->reset.cancel = reset_cancel;
1139c349dbc7Sjsg 	engine->reset.finish = reset_finish;
1140c349dbc7Sjsg 
11415ca02815Sjsg 	engine->add_active_request = add_to_engine;
11425ca02815Sjsg 	engine->remove_active_request = remove_from_engine;
11435ca02815Sjsg 
1144c349dbc7Sjsg 	engine->cops = &ring_context_ops;
1145c349dbc7Sjsg 	engine->request_alloc = ring_request_alloc;
1146c349dbc7Sjsg 
1147c349dbc7Sjsg 	/*
1148c349dbc7Sjsg 	 * Using a global execution timeline; the previous final breadcrumb is
1149c349dbc7Sjsg 	 * equivalent to our next initial bread so we can elide
1150c349dbc7Sjsg 	 * engine->emit_init_breadcrumb().
1151c349dbc7Sjsg 	 */
1152ad8b1aafSjsg 	engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
11535ca02815Sjsg 	if (GRAPHICS_VER(i915) == 5)
1154c349dbc7Sjsg 		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
1155c349dbc7Sjsg 
1156c349dbc7Sjsg 	engine->set_default_submission = i9xx_set_default_submission;
1157c349dbc7Sjsg 
11585ca02815Sjsg 	if (GRAPHICS_VER(i915) >= 6)
1159c349dbc7Sjsg 		engine->emit_bb_start = gen6_emit_bb_start;
11605ca02815Sjsg 	else if (GRAPHICS_VER(i915) >= 4)
1161ad8b1aafSjsg 		engine->emit_bb_start = gen4_emit_bb_start;
1162c349dbc7Sjsg 	else if (IS_I830(i915) || IS_I845G(i915))
1163c349dbc7Sjsg 		engine->emit_bb_start = i830_emit_bb_start;
1164c349dbc7Sjsg 	else
1165ad8b1aafSjsg 		engine->emit_bb_start = gen3_emit_bb_start;
1166c349dbc7Sjsg }
1167c349dbc7Sjsg 
setup_rcs(struct intel_engine_cs * engine)1168c349dbc7Sjsg static void setup_rcs(struct intel_engine_cs *engine)
1169c349dbc7Sjsg {
1170c349dbc7Sjsg 	struct drm_i915_private *i915 = engine->i915;
1171c349dbc7Sjsg 
1172c349dbc7Sjsg 	if (HAS_L3_DPF(i915))
1173c349dbc7Sjsg 		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1174c349dbc7Sjsg 
1175c349dbc7Sjsg 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
1176c349dbc7Sjsg 
11775ca02815Sjsg 	if (GRAPHICS_VER(i915) >= 7) {
1178ad8b1aafSjsg 		engine->emit_flush = gen7_emit_flush_rcs;
1179ad8b1aafSjsg 		engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs;
11805ca02815Sjsg 	} else if (GRAPHICS_VER(i915) == 6) {
1181ad8b1aafSjsg 		engine->emit_flush = gen6_emit_flush_rcs;
1182ad8b1aafSjsg 		engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs;
11835ca02815Sjsg 	} else if (GRAPHICS_VER(i915) == 5) {
1184ad8b1aafSjsg 		engine->emit_flush = gen4_emit_flush_rcs;
1185c349dbc7Sjsg 	} else {
11865ca02815Sjsg 		if (GRAPHICS_VER(i915) < 4)
1187ad8b1aafSjsg 			engine->emit_flush = gen2_emit_flush;
1188c349dbc7Sjsg 		else
1189ad8b1aafSjsg 			engine->emit_flush = gen4_emit_flush_rcs;
1190c349dbc7Sjsg 		engine->irq_enable_mask = I915_USER_INTERRUPT;
1191c349dbc7Sjsg 	}
1192c349dbc7Sjsg 
1193c349dbc7Sjsg 	if (IS_HASWELL(i915))
1194c349dbc7Sjsg 		engine->emit_bb_start = hsw_emit_bb_start;
1195c349dbc7Sjsg }
1196c349dbc7Sjsg 
setup_vcs(struct intel_engine_cs * engine)1197c349dbc7Sjsg static void setup_vcs(struct intel_engine_cs *engine)
1198c349dbc7Sjsg {
1199c349dbc7Sjsg 	struct drm_i915_private *i915 = engine->i915;
1200c349dbc7Sjsg 
12015ca02815Sjsg 	if (GRAPHICS_VER(i915) >= 6) {
1202c349dbc7Sjsg 		/* gen6 bsd needs a special wa for tail updates */
12035ca02815Sjsg 		if (GRAPHICS_VER(i915) == 6)
1204c349dbc7Sjsg 			engine->set_default_submission = gen6_bsd_set_default_submission;
1205ad8b1aafSjsg 		engine->emit_flush = gen6_emit_flush_vcs;
1206c349dbc7Sjsg 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1207c349dbc7Sjsg 
12085ca02815Sjsg 		if (GRAPHICS_VER(i915) == 6)
1209ad8b1aafSjsg 			engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
1210c349dbc7Sjsg 		else
1211ad8b1aafSjsg 			engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1212c349dbc7Sjsg 	} else {
1213ad8b1aafSjsg 		engine->emit_flush = gen4_emit_flush_vcs;
12145ca02815Sjsg 		if (GRAPHICS_VER(i915) == 5)
1215c349dbc7Sjsg 			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
1216c349dbc7Sjsg 		else
1217c349dbc7Sjsg 			engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1218c349dbc7Sjsg 	}
1219c349dbc7Sjsg }
1220c349dbc7Sjsg 
setup_bcs(struct intel_engine_cs * engine)1221c349dbc7Sjsg static void setup_bcs(struct intel_engine_cs *engine)
1222c349dbc7Sjsg {
1223c349dbc7Sjsg 	struct drm_i915_private *i915 = engine->i915;
1224c349dbc7Sjsg 
1225ad8b1aafSjsg 	engine->emit_flush = gen6_emit_flush_xcs;
1226c349dbc7Sjsg 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
1227c349dbc7Sjsg 
12285ca02815Sjsg 	if (GRAPHICS_VER(i915) == 6)
1229ad8b1aafSjsg 		engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
1230c349dbc7Sjsg 	else
1231ad8b1aafSjsg 		engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1232c349dbc7Sjsg }
1233c349dbc7Sjsg 
setup_vecs(struct intel_engine_cs * engine)1234c349dbc7Sjsg static void setup_vecs(struct intel_engine_cs *engine)
1235c349dbc7Sjsg {
1236c349dbc7Sjsg 	struct drm_i915_private *i915 = engine->i915;
1237c349dbc7Sjsg 
12385ca02815Sjsg 	GEM_BUG_ON(GRAPHICS_VER(i915) < 7);
1239c349dbc7Sjsg 
1240ad8b1aafSjsg 	engine->emit_flush = gen6_emit_flush_xcs;
1241c349dbc7Sjsg 	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
1242ad8b1aafSjsg 	engine->irq_enable = hsw_irq_enable_vecs;
1243ad8b1aafSjsg 	engine->irq_disable = hsw_irq_disable_vecs;
1244c349dbc7Sjsg 
1245ad8b1aafSjsg 	engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1246c349dbc7Sjsg }
1247c349dbc7Sjsg 
gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,struct i915_vma * const vma)1248c349dbc7Sjsg static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
1249c349dbc7Sjsg 				    struct i915_vma * const vma)
1250c349dbc7Sjsg {
1251c349dbc7Sjsg 	return gen7_setup_clear_gpr_bb(engine, vma);
1252c349dbc7Sjsg }
1253c349dbc7Sjsg 
gen7_ctx_switch_bb_init(struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,struct i915_vma * vma)12545ca02815Sjsg static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine,
12555ca02815Sjsg 				   struct i915_gem_ww_ctx *ww,
12565ca02815Sjsg 				   struct i915_vma *vma)
1257c349dbc7Sjsg {
1258c349dbc7Sjsg 	int err;
1259c349dbc7Sjsg 
12605ca02815Sjsg 	err = i915_vma_pin_ww(vma, ww, 0, 0, PIN_USER | PIN_HIGH);
1261c349dbc7Sjsg 	if (err)
12625ca02815Sjsg 		return err;
1263c349dbc7Sjsg 
1264c349dbc7Sjsg 	err = i915_vma_sync(vma);
1265c349dbc7Sjsg 	if (err)
1266c349dbc7Sjsg 		goto err_unpin;
1267c349dbc7Sjsg 
1268c349dbc7Sjsg 	err = gen7_ctx_switch_bb_setup(engine, vma);
1269c349dbc7Sjsg 	if (err)
1270c349dbc7Sjsg 		goto err_unpin;
1271c349dbc7Sjsg 
1272c349dbc7Sjsg 	engine->wa_ctx.vma = vma;
1273c349dbc7Sjsg 	return 0;
1274c349dbc7Sjsg 
1275c349dbc7Sjsg err_unpin:
1276c349dbc7Sjsg 	i915_vma_unpin(vma);
1277c349dbc7Sjsg 	return err;
1278c349dbc7Sjsg }
1279c349dbc7Sjsg 
gen7_ctx_vma(struct intel_engine_cs * engine)12805ca02815Sjsg static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine)
12815ca02815Sjsg {
12825ca02815Sjsg 	struct drm_i915_gem_object *obj;
12835ca02815Sjsg 	struct i915_vma *vma;
12845ca02815Sjsg 	int size, err;
12855ca02815Sjsg 
12865ca02815Sjsg 	if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS)
12871bb76ff1Sjsg 		return NULL;
12885ca02815Sjsg 
12895ca02815Sjsg 	err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
12905ca02815Sjsg 	if (err < 0)
12915ca02815Sjsg 		return ERR_PTR(err);
12925ca02815Sjsg 	if (!err)
12935ca02815Sjsg 		return NULL;
12945ca02815Sjsg 
1295*f005ef32Sjsg 	size = ALIGN(err, PAGE_SIZE);
12965ca02815Sjsg 
12975ca02815Sjsg 	obj = i915_gem_object_create_internal(engine->i915, size);
12985ca02815Sjsg 	if (IS_ERR(obj))
12995ca02815Sjsg 		return ERR_CAST(obj);
13005ca02815Sjsg 
13015ca02815Sjsg 	vma = i915_vma_instance(obj, engine->gt->vm, NULL);
13025ca02815Sjsg 	if (IS_ERR(vma)) {
13035ca02815Sjsg 		i915_gem_object_put(obj);
13045ca02815Sjsg 		return ERR_CAST(vma);
13055ca02815Sjsg 	}
13065ca02815Sjsg 
13075ca02815Sjsg 	vma->private = intel_context_create(engine); /* dummy residuals */
13085ca02815Sjsg 	if (IS_ERR(vma->private)) {
13095ca02815Sjsg 		err = PTR_ERR(vma->private);
13105ca02815Sjsg 		vma->private = NULL;
13115ca02815Sjsg 		i915_gem_object_put(obj);
13125ca02815Sjsg 		return ERR_PTR(err);
13135ca02815Sjsg 	}
13145ca02815Sjsg 
13155ca02815Sjsg 	return vma;
13165ca02815Sjsg }
13175ca02815Sjsg 
intel_ring_submission_setup(struct intel_engine_cs * engine)1318c349dbc7Sjsg int intel_ring_submission_setup(struct intel_engine_cs *engine)
1319c349dbc7Sjsg {
13205ca02815Sjsg 	struct i915_gem_ww_ctx ww;
1321c349dbc7Sjsg 	struct intel_timeline *timeline;
1322c349dbc7Sjsg 	struct intel_ring *ring;
13235ca02815Sjsg 	struct i915_vma *gen7_wa_vma;
1324c349dbc7Sjsg 	int err;
1325c349dbc7Sjsg 
1326c349dbc7Sjsg 	setup_common(engine);
1327c349dbc7Sjsg 
1328c349dbc7Sjsg 	switch (engine->class) {
1329c349dbc7Sjsg 	case RENDER_CLASS:
1330c349dbc7Sjsg 		setup_rcs(engine);
1331c349dbc7Sjsg 		break;
1332c349dbc7Sjsg 	case VIDEO_DECODE_CLASS:
1333c349dbc7Sjsg 		setup_vcs(engine);
1334c349dbc7Sjsg 		break;
1335c349dbc7Sjsg 	case COPY_ENGINE_CLASS:
1336c349dbc7Sjsg 		setup_bcs(engine);
1337c349dbc7Sjsg 		break;
1338c349dbc7Sjsg 	case VIDEO_ENHANCEMENT_CLASS:
1339c349dbc7Sjsg 		setup_vecs(engine);
1340c349dbc7Sjsg 		break;
1341c349dbc7Sjsg 	default:
1342c349dbc7Sjsg 		MISSING_CASE(engine->class);
1343c349dbc7Sjsg 		return -ENODEV;
1344c349dbc7Sjsg 	}
1345c349dbc7Sjsg 
1346ad8b1aafSjsg 	timeline = intel_timeline_create_from_engine(engine,
1347ad8b1aafSjsg 						     I915_GEM_HWS_SEQNO_ADDR);
1348c349dbc7Sjsg 	if (IS_ERR(timeline)) {
1349c349dbc7Sjsg 		err = PTR_ERR(timeline);
1350c349dbc7Sjsg 		goto err;
1351c349dbc7Sjsg 	}
1352c349dbc7Sjsg 	GEM_BUG_ON(timeline->has_initial_breadcrumb);
1353c349dbc7Sjsg 
1354c349dbc7Sjsg 	ring = intel_engine_create_ring(engine, SZ_16K);
1355c349dbc7Sjsg 	if (IS_ERR(ring)) {
1356c349dbc7Sjsg 		err = PTR_ERR(ring);
13575ca02815Sjsg 		goto err_timeline;
1358c349dbc7Sjsg 	}
1359c349dbc7Sjsg 
1360c349dbc7Sjsg 	GEM_BUG_ON(engine->legacy.ring);
1361c349dbc7Sjsg 	engine->legacy.ring = ring;
1362c349dbc7Sjsg 	engine->legacy.timeline = timeline;
1363c349dbc7Sjsg 
13645ca02815Sjsg 	gen7_wa_vma = gen7_ctx_vma(engine);
13655ca02815Sjsg 	if (IS_ERR(gen7_wa_vma)) {
13665ca02815Sjsg 		err = PTR_ERR(gen7_wa_vma);
13675ca02815Sjsg 		goto err_ring;
13685ca02815Sjsg 	}
13695ca02815Sjsg 
13705ca02815Sjsg 	i915_gem_ww_ctx_init(&ww, false);
13715ca02815Sjsg 
13725ca02815Sjsg retry:
13735ca02815Sjsg 	err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww);
13745ca02815Sjsg 	if (!err && gen7_wa_vma)
13755ca02815Sjsg 		err = i915_gem_object_lock(gen7_wa_vma->obj, &ww);
13761bb76ff1Sjsg 	if (!err)
13775ca02815Sjsg 		err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww);
13785ca02815Sjsg 	if (!err)
13795ca02815Sjsg 		err = intel_timeline_pin(timeline, &ww);
13805ca02815Sjsg 	if (!err) {
13815ca02815Sjsg 		err = intel_ring_pin(ring, &ww);
13825ca02815Sjsg 		if (err)
13835ca02815Sjsg 			intel_timeline_unpin(timeline);
13845ca02815Sjsg 	}
13855ca02815Sjsg 	if (err)
13865ca02815Sjsg 		goto out;
13875ca02815Sjsg 
1388c349dbc7Sjsg 	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
1389c349dbc7Sjsg 
13905ca02815Sjsg 	if (gen7_wa_vma) {
13915ca02815Sjsg 		err = gen7_ctx_switch_bb_init(engine, &ww, gen7_wa_vma);
13925ca02815Sjsg 		if (err) {
13935ca02815Sjsg 			intel_ring_unpin(ring);
13945ca02815Sjsg 			intel_timeline_unpin(timeline);
1395c349dbc7Sjsg 		}
13965ca02815Sjsg 	}
13975ca02815Sjsg 
13985ca02815Sjsg out:
13995ca02815Sjsg 	if (err == -EDEADLK) {
14005ca02815Sjsg 		err = i915_gem_ww_ctx_backoff(&ww);
14015ca02815Sjsg 		if (!err)
14025ca02815Sjsg 			goto retry;
14035ca02815Sjsg 	}
14045ca02815Sjsg 	i915_gem_ww_ctx_fini(&ww);
14055ca02815Sjsg 	if (err)
14065ca02815Sjsg 		goto err_gen7_put;
1407c349dbc7Sjsg 
1408c349dbc7Sjsg 	/* Finally, take ownership and responsibility for cleanup! */
1409c349dbc7Sjsg 	engine->release = ring_release;
1410c349dbc7Sjsg 
1411c349dbc7Sjsg 	return 0;
1412c349dbc7Sjsg 
14135ca02815Sjsg err_gen7_put:
14145ca02815Sjsg 	if (gen7_wa_vma) {
14155ca02815Sjsg 		intel_context_put(gen7_wa_vma->private);
14165ca02815Sjsg 		i915_gem_object_put(gen7_wa_vma->obj);
14175ca02815Sjsg 	}
1418c349dbc7Sjsg err_ring:
1419c349dbc7Sjsg 	intel_ring_put(ring);
1420c349dbc7Sjsg err_timeline:
1421c349dbc7Sjsg 	intel_timeline_put(timeline);
1422c349dbc7Sjsg err:
1423c349dbc7Sjsg 	intel_engine_cleanup_common(engine);
1424c349dbc7Sjsg 	return err;
1425c349dbc7Sjsg }
1426c349dbc7Sjsg 
1427c349dbc7Sjsg #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1428c349dbc7Sjsg #include "selftest_ring_submission.c"
1429c349dbc7Sjsg #endif
1430