xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/i915/gt/intel_lrc.c (revision 2a8c33eaff5adddac3ef2c5cb48ee67ef6d5d6dc)
1 /*	$NetBSD: intel_lrc.c,v 1.8 2021/12/19 12:32:15 riastradh Exp $	*/
2 
3 /*
4  * Copyright © 2014 Intel Corporation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Ben Widawsky <ben@bwidawsk.net>
27  *    Michel Thierry <michel.thierry@intel.com>
28  *    Thomas Daniel <thomas.daniel@intel.com>
29  *    Oscar Mateo <oscar.mateo@intel.com>
30  *
31  */
32 
33 /**
34  * DOC: Logical Rings, Logical Ring Contexts and Execlists
35  *
36  * Motivation:
37  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
38  * These expanded contexts enable a number of new abilities, especially
39  * "Execlists" (also implemented in this file).
40  *
41  * One of the main differences with the legacy HW contexts is that logical
42  * ring contexts incorporate many more things to the context's state, like
43  * PDPs or ringbuffer control registers:
44  *
45  * The reason why PDPs are included in the context is straightforward: as
46  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
47  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
48  * instead, the GPU will do it for you on the context switch.
49  *
50  * But, what about the ringbuffer control registers (head, tail, etc..)?
51  * shouldn't we just need a set of those per engine command streamer? This is
52  * where the name "Logical Rings" starts to make sense: by virtualizing the
53  * rings, the engine cs shifts to a new "ring buffer" with every context
54  * switch. When you want to submit a workload to the GPU you: A) choose your
55  * context, B) find its appropriate virtualized ring, C) write commands to it
56  * and then, finally, D) tell the GPU to switch to that context.
57  *
58  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
59  * to a contexts is via a context execution list, ergo "Execlists".
60  *
61  * LRC implementation:
62  * Regarding the creation of contexts, we have:
63  *
64  * - One global default context.
65  * - One local default context for each opened fd.
66  * - One local extra context for each context create ioctl call.
67  *
68  * Now that ringbuffers belong per-context (and not per-engine, like before)
69  * and that contexts are uniquely tied to a given engine (and not reusable,
70  * like before) we need:
71  *
72  * - One ringbuffer per-engine inside each context.
73  * - One backing object per-engine inside each context.
74  *
75  * The global default context starts its life with these new objects fully
76  * allocated and populated. The local default context for each opened fd is
77  * more complex, because we don't know at creation time which engine is going
78  * to use them. To handle this, we have implemented a deferred creation of LR
79  * contexts:
80  *
81  * The local context starts its life as a hollow or blank holder, that only
82  * gets populated for a given engine once we receive an execbuffer. If later
83  * on we receive another execbuffer ioctl for the same context but a different
84  * engine, we allocate/populate a new ringbuffer and context backing object and
85  * so on.
86  *
87  * Finally, regarding local contexts created using the ioctl call: as they are
88  * only allowed with the render ring, we can allocate & populate them right
89  * away (no need to defer anything, at least for now).
90  *
91  * Execlists implementation:
92  * Execlists are the new method by which, on gen8+ hardware, workloads are
93  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
94  * This method works as follows:
95  *
96  * When a request is committed, its commands (the BB start and any leading or
97  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
98  * for the appropriate context. The tail pointer in the hardware context is not
99  * updated at this time, but instead, kept by the driver in the ringbuffer
100  * structure. A structure representing this request is added to a request queue
101  * for the appropriate engine: this structure contains a copy of the context's
102  * tail after the request was written to the ring buffer and a pointer to the
103  * context itself.
104  *
105  * If the engine's request queue was empty before the request was added, the
106  * queue is processed immediately. Otherwise the queue will be processed during
107  * a context switch interrupt. In any case, elements on the queue will get sent
108  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
109  * globally unique 20-bits submission ID.
110  *
111  * When execution of a request completes, the GPU updates the context status
112  * buffer with a context complete event and generates a context switch interrupt.
113  * During the interrupt handling, the driver examines the events in the buffer:
114  * for each context complete event, if the announced ID matches that on the head
115  * of the request queue, then that request is retired and removed from the queue.
116  *
117  * After processing, if any requests were retired and the queue is not empty
118  * then a new execution list can be submitted. The two requests at the front of
119  * the queue are next to be submitted but since a context may not occur twice in
120  * an execution list, if subsequent requests have the same ID as the first then
121  * the two requests must be combined. This is done simply by discarding requests
122  * at the head of the queue until either only one requests is left (in which case
123  * we use a NULL second context) or the first two requests have unique IDs.
124  *
125  * By always executing the first two requests in the queue the driver ensures
126  * that the GPU is kept as busy as possible. In the case where a single context
127  * completes but a second context is still executing, the request for this second
128  * context will be at the head of the queue when we remove the first one. This
129  * request will then be resubmitted along with a new request for a different context,
130  * which will cause the hardware to continue executing the second request and queue
131  * the new request (the GPU detects the condition of a context getting preempted
132  * with the same context and optimizes the context switch flow by not doing
133  * preemption, but just sampling the new tail pointer).
134  *
135  */
136 #include <sys/cdefs.h>
137 __KERNEL_RCSID(0, "$NetBSD: intel_lrc.c,v 1.8 2021/12/19 12:32:15 riastradh Exp $");
138 
139 #include <linux/interrupt.h>
140 
141 #include "i915_drv.h"
142 #include "i915_perf.h"
143 #include "i915_trace.h"
144 #include "i915_vgpu.h"
145 #include "intel_context.h"
146 #include "intel_engine_pm.h"
147 #include "intel_gt.h"
148 #include "intel_gt_pm.h"
149 #include "intel_gt_requests.h"
150 #include "intel_lrc_reg.h"
151 #include "intel_mocs.h"
152 #include "intel_reset.h"
153 #include "intel_ring.h"
154 #include "intel_workarounds.h"
155 
156 #include <linux/nbsd-namespace.h>
157 
158 #define RING_EXECLIST_QFULL		(1 << 0x2)
159 #define RING_EXECLIST1_VALID		(1 << 0x3)
160 #define RING_EXECLIST0_VALID		(1 << 0x4)
161 #define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
162 #define RING_EXECLIST1_ACTIVE		(1 << 0x11)
163 #define RING_EXECLIST0_ACTIVE		(1 << 0x12)
164 
165 #define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
166 #define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
167 #define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
168 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
169 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
170 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
171 
172 #define GEN8_CTX_STATUS_COMPLETED_MASK \
173 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
174 
175 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
176 
177 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
178 #define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
179 #define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
180 #define GEN12_IDLE_CTX_ID		0x7FF
181 #define GEN12_CSB_CTX_VALID(csb_dw) \
182 	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
183 
184 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
185 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
186 #define WA_TAIL_DWORDS 2
187 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
188 
189 struct virtual_engine {
190 	struct intel_engine_cs base;
191 	struct intel_context context;
192 
193 	/*
194 	 * We allow only a single request through the virtual engine at a time
195 	 * (each request in the timeline waits for the completion fence of
196 	 * the previous before being submitted). By restricting ourselves to
197 	 * only submitting a single request, each request is placed on to a
198 	 * physical to maximise load spreading (by virtue of the late greedy
199 	 * scheduling -- each real engine takes the next available request
200 	 * upon idling).
201 	 */
202 	struct i915_request *request;
203 
204 	/*
205 	 * We keep a rbtree of available virtual engines inside each physical
206 	 * engine, sorted by priority. Here we preallocate the nodes we need
207 	 * for the virtual engine, indexed by physical_engine->id.
208 	 */
209 	struct ve_node {
210 		struct rb_node rb;
211 		int prio;
212 		uint64_t order;
213 		bool inserted;
214 	} nodes[I915_NUM_ENGINES];
215 	uint64_t order;
216 
217 	/*
218 	 * Keep track of bonded pairs -- restrictions upon on our selection
219 	 * of physical engines any particular request may be submitted to.
220 	 * If we receive a submit-fence from a master engine, we will only
221 	 * use one of sibling_mask physical engines.
222 	 */
223 	struct ve_bond {
224 		const struct intel_engine_cs *master;
225 		intel_engine_mask_t sibling_mask;
226 	} *bonds;
227 	unsigned int num_bonds;
228 
229 	/* And finally, which physical engines this virtual engine maps onto. */
230 	unsigned int num_siblings;
231 	struct intel_engine_cs *siblings[0];
232 };
233 
234 #ifdef __NetBSD__
235 static int
compare_ve_nodes(void * cookie,const void * va,const void * vb)236 compare_ve_nodes(void *cookie, const void *va, const void *vb)
237 {
238 	const struct ve_node *na = va;
239 	const struct ve_node *nb = vb;
240 
241 	if (na->prio < nb->prio)
242 		return -1;
243 	if (na->prio > nb->prio)
244 		return +1;
245 	if (na->order < nb->order)
246 		return -1;
247 	if (na->order > nb->order)
248 		return +1;
249 	return 0;
250 }
251 
252 static int
compare_ve_node_key(void * cookie,const void * vn,const void * vk)253 compare_ve_node_key(void *cookie, const void *vn, const void *vk)
254 {
255 	const struct ve_node *n = vn;
256 	const int *k = vk;
257 
258 	if (n->prio < *k)
259 		return -1;
260 	if (n->prio > *k)
261 		return +1;
262 	return 0;
263 }
264 
265 static const rb_tree_ops_t ve_tree_ops = {
266 	.rbto_compare_nodes = compare_ve_nodes,
267 	.rbto_compare_key = compare_ve_node_key,
268 	.rbto_node_offset = offsetof(struct ve_node, rb),
269 };
270 #endif
271 
to_virtual_engine(struct intel_engine_cs * engine)272 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
273 {
274 	GEM_BUG_ON(!intel_engine_is_virtual(engine));
275 	return container_of(engine, struct virtual_engine, base);
276 }
277 
278 static int __execlists_context_alloc(struct intel_context *ce,
279 				     struct intel_engine_cs *engine);
280 
281 static void execlists_init_reg_state(u32 *reg_state,
282 				     const struct intel_context *ce,
283 				     const struct intel_engine_cs *engine,
284 				     const struct intel_ring *ring,
285 				     bool close);
286 static void
287 __execlists_update_reg_state(const struct intel_context *ce,
288 			     const struct intel_engine_cs *engine,
289 			     u32 head);
290 
mark_eio(struct i915_request * rq)291 static void mark_eio(struct i915_request *rq)
292 {
293 	if (i915_request_completed(rq))
294 		return;
295 
296 	GEM_BUG_ON(i915_request_signaled(rq));
297 
298 	dma_fence_set_error(&rq->fence, -EIO);
299 	i915_request_mark_complete(rq);
300 }
301 
302 static struct i915_request *
active_request(const struct intel_timeline * const tl,struct i915_request * rq)303 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
304 {
305 	struct i915_request *active = rq;
306 
307 	rcu_read_lock();
308 	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
309 		if (i915_request_completed(rq))
310 			break;
311 
312 		active = rq;
313 	}
314 	rcu_read_unlock();
315 
316 	return active;
317 }
318 
intel_hws_preempt_address(struct intel_engine_cs * engine)319 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
320 {
321 	return (i915_ggtt_offset(engine->status_page.vma) +
322 		I915_GEM_HWS_PREEMPT_ADDR);
323 }
324 
325 static inline void
ring_set_paused(const struct intel_engine_cs * engine,int state)326 ring_set_paused(const struct intel_engine_cs *engine, int state)
327 {
328 	/*
329 	 * We inspect HWS_PREEMPT with a semaphore inside
330 	 * engine->emit_fini_breadcrumb. If the dword is true,
331 	 * the ring is paused as the semaphore will busywait
332 	 * until the dword is false.
333 	 */
334 	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
335 	if (state)
336 		wmb();
337 }
338 
to_priolist(struct rb_node * rb)339 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
340 {
341 	return rb_entry(rb, struct i915_priolist, node);
342 }
343 
rq_prio(const struct i915_request * rq)344 static inline int rq_prio(const struct i915_request *rq)
345 {
346 	return rq->sched.attr.priority;
347 }
348 
effective_prio(const struct i915_request * rq)349 static int effective_prio(const struct i915_request *rq)
350 {
351 	int prio = rq_prio(rq);
352 
353 	/*
354 	 * If this request is special and must not be interrupted at any
355 	 * cost, so be it. Note we are only checking the most recent request
356 	 * in the context and so may be masking an earlier vip request. It
357 	 * is hoped that under the conditions where nopreempt is used, this
358 	 * will not matter (i.e. all requests to that context will be
359 	 * nopreempt for as long as desired).
360 	 */
361 	if (i915_request_has_nopreempt(rq))
362 		prio = I915_PRIORITY_UNPREEMPTABLE;
363 
364 	/*
365 	 * On unwinding the active request, we give it a priority bump
366 	 * if it has completed waiting on any semaphore. If we know that
367 	 * the request has already started, we can prevent an unwanted
368 	 * preempt-to-idle cycle by taking that into account now.
369 	 */
370 	if (__i915_request_has_started(rq))
371 		prio |= I915_PRIORITY_NOSEMAPHORE;
372 
373 	/* Restrict mere WAIT boosts from triggering preemption */
374 	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
375 	return prio | __NO_PREEMPTION;
376 }
377 
queue_prio(const struct intel_engine_execlists * execlists)378 static int queue_prio(const struct intel_engine_execlists *execlists)
379 {
380 	struct i915_priolist *p;
381 	struct rb_node *rb;
382 
383 	rb = rb_first_cached(&execlists->queue);
384 	if (!rb)
385 		return INT_MIN;
386 
387 	/*
388 	 * As the priolist[] are inverted, with the highest priority in [0],
389 	 * we have to flip the index value to become priority.
390 	 */
391 	p = to_priolist(rb);
392 	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
393 }
394 
need_preempt(const struct intel_engine_cs * engine,const struct i915_request * rq,struct rb_node * rb)395 static inline bool need_preempt(const struct intel_engine_cs *engine,
396 				const struct i915_request *rq,
397 				struct rb_node *rb)
398 {
399 	int last_prio;
400 
401 	if (!intel_engine_has_semaphores(engine))
402 		return false;
403 
404 	/*
405 	 * Check if the current priority hint merits a preemption attempt.
406 	 *
407 	 * We record the highest value priority we saw during rescheduling
408 	 * prior to this dequeue, therefore we know that if it is strictly
409 	 * less than the current tail of ESLP[0], we do not need to force
410 	 * a preempt-to-idle cycle.
411 	 *
412 	 * However, the priority hint is a mere hint that we may need to
413 	 * preempt. If that hint is stale or we may be trying to preempt
414 	 * ourselves, ignore the request.
415 	 *
416 	 * More naturally we would write
417 	 *      prio >= max(0, last);
418 	 * except that we wish to prevent triggering preemption at the same
419 	 * priority level: the task that is running should remain running
420 	 * to preserve FIFO ordering of dependencies.
421 	 */
422 	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
423 	if (engine->execlists.queue_priority_hint <= last_prio)
424 		return false;
425 
426 	/*
427 	 * Check against the first request in ELSP[1], it will, thanks to the
428 	 * power of PI, be the highest priority of that context.
429 	 */
430 	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
431 	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
432 		return true;
433 
434 	if (rb) {
435 		struct virtual_engine *ve =
436 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
437 		bool preempt = false;
438 
439 		if (engine == ve->siblings[0]) { /* only preempt one sibling */
440 			struct i915_request *next;
441 
442 			rcu_read_lock();
443 			next = READ_ONCE(ve->request);
444 			if (next)
445 				preempt = rq_prio(next) > last_prio;
446 			rcu_read_unlock();
447 		}
448 
449 		if (preempt)
450 			return preempt;
451 	}
452 
453 	/*
454 	 * If the inflight context did not trigger the preemption, then maybe
455 	 * it was the set of queued requests? Pick the highest priority in
456 	 * the queue (the first active priolist) and see if it deserves to be
457 	 * running instead of ELSP[0].
458 	 *
459 	 * The highest priority request in the queue can not be either
460 	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
461 	 * context, it's priority would not exceed ELSP[0] aka last_prio.
462 	 */
463 	return queue_prio(&engine->execlists) > last_prio;
464 }
465 
466 __maybe_unused static inline bool
assert_priority_queue(const struct i915_request * prev,const struct i915_request * next)467 assert_priority_queue(const struct i915_request *prev,
468 		      const struct i915_request *next)
469 {
470 	/*
471 	 * Without preemption, the prev may refer to the still active element
472 	 * which we refuse to let go.
473 	 *
474 	 * Even with preemption, there are times when we think it is better not
475 	 * to preempt and leave an ostensibly lower priority request in flight.
476 	 */
477 	if (i915_request_is_active(prev))
478 		return true;
479 
480 	return rq_prio(prev) >= rq_prio(next);
481 }
482 
483 /*
484  * The context descriptor encodes various attributes of a context,
485  * including its GTT address and some flags. Because it's fairly
486  * expensive to calculate, we'll just do it once and cache the result,
487  * which remains valid until the context is unpinned.
488  *
489  * This is what a descriptor looks like, from LSB to MSB::
490  *
491  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
492  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
493  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
494  *      bits 53-54:    mbz, reserved for use by hardware
495  *      bits 55-63:    group ID, currently unused and set to 0
496  *
497  * Starting from Gen11, the upper dword of the descriptor has a new format:
498  *
499  *      bits 32-36:    reserved
500  *      bits 37-47:    SW context ID
501  *      bits 48:53:    engine instance
502  *      bit 54:        mbz, reserved for use by hardware
503  *      bits 55-60:    SW counter
504  *      bits 61-63:    engine class
505  *
506  * engine info, SW context ID and SW counter need to form a unique number
507  * (Context ID) per lrc.
508  */
509 static u64
lrc_descriptor(struct intel_context * ce,struct intel_engine_cs * engine)510 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
511 {
512 	u64 desc;
513 
514 	desc = INTEL_LEGACY_32B_CONTEXT;
515 	if (i915_vm_is_4lvl(ce->vm))
516 		desc = INTEL_LEGACY_64B_CONTEXT;
517 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
518 
519 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
520 	if (IS_GEN(engine->i915, 8))
521 		desc |= GEN8_CTX_L3LLC_COHERENT;
522 
523 	desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
524 	/*
525 	 * The following 32bits are copied into the OA reports (dword 2).
526 	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
527 	 * anything below.
528 	 */
529 	if (INTEL_GEN(engine->i915) >= 11) {
530 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
531 								/* bits 48-53 */
532 
533 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
534 								/* bits 61-63 */
535 	}
536 
537 	return desc;
538 }
539 
dword_in_page(void * addr)540 static inline unsigned int dword_in_page(void *addr)
541 {
542 	return offset_in_page(addr) / sizeof(u32);
543 }
544 
set_offsets(u32 * regs,const u8 * data,const struct intel_engine_cs * engine,bool clear)545 static void set_offsets(u32 *regs,
546 			const u8 *data,
547 			const struct intel_engine_cs *engine,
548 			bool clear)
549 #define NOP(x) (BIT(7) | (x))
550 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
551 #define POSTED BIT(0)
552 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
553 #define REG16(x) \
554 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
555 	(((x) >> 2) & 0x7f)
556 #define END(x) 0, (x)
557 {
558 	const u32 base = engine->mmio_base;
559 
560 	while (*data) {
561 		u8 count, flags;
562 
563 		if (*data & BIT(7)) { /* skip */
564 			count = *data++ & ~BIT(7);
565 			if (clear)
566 				memset32(regs, MI_NOOP, count);
567 			regs += count;
568 			continue;
569 		}
570 
571 		count = *data & 0x3f;
572 		flags = *data >> 6;
573 		data++;
574 
575 		*regs = MI_LOAD_REGISTER_IMM(count);
576 		if (flags & POSTED)
577 			*regs |= MI_LRI_FORCE_POSTED;
578 		if (INTEL_GEN(engine->i915) >= 11)
579 			*regs |= MI_LRI_CS_MMIO;
580 		regs++;
581 
582 		GEM_BUG_ON(!count);
583 		do {
584 			u32 offset = 0;
585 			u8 v;
586 
587 			do {
588 				v = *data++;
589 				offset <<= 7;
590 				offset |= v & ~BIT(7);
591 			} while (v & BIT(7));
592 
593 			regs[0] = base + (offset << 2);
594 			if (clear)
595 				regs[1] = 0;
596 			regs += 2;
597 		} while (--count);
598 	}
599 
600 	if (clear) {
601 		u8 count = *++data;
602 
603 		/* Clear past the tail for HW access */
604 		GEM_BUG_ON(dword_in_page(regs) > count);
605 		memset32(regs, MI_NOOP, count - dword_in_page(regs));
606 
607 		/* Close the batch; used mainly by live_lrc_layout() */
608 		*regs = MI_BATCH_BUFFER_END;
609 		if (INTEL_GEN(engine->i915) >= 10)
610 			*regs |= BIT(0);
611 	}
612 }
613 
614 static const u8 gen8_xcs_offsets[] = {
615 	NOP(1),
616 	LRI(11, 0),
617 	REG16(0x244),
618 	REG(0x034),
619 	REG(0x030),
620 	REG(0x038),
621 	REG(0x03c),
622 	REG(0x168),
623 	REG(0x140),
624 	REG(0x110),
625 	REG(0x11c),
626 	REG(0x114),
627 	REG(0x118),
628 
629 	NOP(9),
630 	LRI(9, 0),
631 	REG16(0x3a8),
632 	REG16(0x28c),
633 	REG16(0x288),
634 	REG16(0x284),
635 	REG16(0x280),
636 	REG16(0x27c),
637 	REG16(0x278),
638 	REG16(0x274),
639 	REG16(0x270),
640 
641 	NOP(13),
642 	LRI(2, 0),
643 	REG16(0x200),
644 	REG(0x028),
645 
646 	END(80)
647 };
648 
649 static const u8 gen9_xcs_offsets[] = {
650 	NOP(1),
651 	LRI(14, POSTED),
652 	REG16(0x244),
653 	REG(0x034),
654 	REG(0x030),
655 	REG(0x038),
656 	REG(0x03c),
657 	REG(0x168),
658 	REG(0x140),
659 	REG(0x110),
660 	REG(0x11c),
661 	REG(0x114),
662 	REG(0x118),
663 	REG(0x1c0),
664 	REG(0x1c4),
665 	REG(0x1c8),
666 
667 	NOP(3),
668 	LRI(9, POSTED),
669 	REG16(0x3a8),
670 	REG16(0x28c),
671 	REG16(0x288),
672 	REG16(0x284),
673 	REG16(0x280),
674 	REG16(0x27c),
675 	REG16(0x278),
676 	REG16(0x274),
677 	REG16(0x270),
678 
679 	NOP(13),
680 	LRI(1, POSTED),
681 	REG16(0x200),
682 
683 	NOP(13),
684 	LRI(44, POSTED),
685 	REG(0x028),
686 	REG(0x09c),
687 	REG(0x0c0),
688 	REG(0x178),
689 	REG(0x17c),
690 	REG16(0x358),
691 	REG(0x170),
692 	REG(0x150),
693 	REG(0x154),
694 	REG(0x158),
695 	REG16(0x41c),
696 	REG16(0x600),
697 	REG16(0x604),
698 	REG16(0x608),
699 	REG16(0x60c),
700 	REG16(0x610),
701 	REG16(0x614),
702 	REG16(0x618),
703 	REG16(0x61c),
704 	REG16(0x620),
705 	REG16(0x624),
706 	REG16(0x628),
707 	REG16(0x62c),
708 	REG16(0x630),
709 	REG16(0x634),
710 	REG16(0x638),
711 	REG16(0x63c),
712 	REG16(0x640),
713 	REG16(0x644),
714 	REG16(0x648),
715 	REG16(0x64c),
716 	REG16(0x650),
717 	REG16(0x654),
718 	REG16(0x658),
719 	REG16(0x65c),
720 	REG16(0x660),
721 	REG16(0x664),
722 	REG16(0x668),
723 	REG16(0x66c),
724 	REG16(0x670),
725 	REG16(0x674),
726 	REG16(0x678),
727 	REG16(0x67c),
728 	REG(0x068),
729 
730 	END(176)
731 };
732 
733 static const u8 gen12_xcs_offsets[] = {
734 	NOP(1),
735 	LRI(13, POSTED),
736 	REG16(0x244),
737 	REG(0x034),
738 	REG(0x030),
739 	REG(0x038),
740 	REG(0x03c),
741 	REG(0x168),
742 	REG(0x140),
743 	REG(0x110),
744 	REG(0x1c0),
745 	REG(0x1c4),
746 	REG(0x1c8),
747 	REG(0x180),
748 	REG16(0x2b4),
749 
750 	NOP(5),
751 	LRI(9, POSTED),
752 	REG16(0x3a8),
753 	REG16(0x28c),
754 	REG16(0x288),
755 	REG16(0x284),
756 	REG16(0x280),
757 	REG16(0x27c),
758 	REG16(0x278),
759 	REG16(0x274),
760 	REG16(0x270),
761 
762 	END(80)
763 };
764 
765 static const u8 gen8_rcs_offsets[] = {
766 	NOP(1),
767 	LRI(14, POSTED),
768 	REG16(0x244),
769 	REG(0x034),
770 	REG(0x030),
771 	REG(0x038),
772 	REG(0x03c),
773 	REG(0x168),
774 	REG(0x140),
775 	REG(0x110),
776 	REG(0x11c),
777 	REG(0x114),
778 	REG(0x118),
779 	REG(0x1c0),
780 	REG(0x1c4),
781 	REG(0x1c8),
782 
783 	NOP(3),
784 	LRI(9, POSTED),
785 	REG16(0x3a8),
786 	REG16(0x28c),
787 	REG16(0x288),
788 	REG16(0x284),
789 	REG16(0x280),
790 	REG16(0x27c),
791 	REG16(0x278),
792 	REG16(0x274),
793 	REG16(0x270),
794 
795 	NOP(13),
796 	LRI(1, 0),
797 	REG(0x0c8),
798 
799 	END(80)
800 };
801 
802 static const u8 gen9_rcs_offsets[] = {
803 	NOP(1),
804 	LRI(14, POSTED),
805 	REG16(0x244),
806 	REG(0x34),
807 	REG(0x30),
808 	REG(0x38),
809 	REG(0x3c),
810 	REG(0x168),
811 	REG(0x140),
812 	REG(0x110),
813 	REG(0x11c),
814 	REG(0x114),
815 	REG(0x118),
816 	REG(0x1c0),
817 	REG(0x1c4),
818 	REG(0x1c8),
819 
820 	NOP(3),
821 	LRI(9, POSTED),
822 	REG16(0x3a8),
823 	REG16(0x28c),
824 	REG16(0x288),
825 	REG16(0x284),
826 	REG16(0x280),
827 	REG16(0x27c),
828 	REG16(0x278),
829 	REG16(0x274),
830 	REG16(0x270),
831 
832 	NOP(13),
833 	LRI(1, 0),
834 	REG(0xc8),
835 
836 	NOP(13),
837 	LRI(44, POSTED),
838 	REG(0x28),
839 	REG(0x9c),
840 	REG(0xc0),
841 	REG(0x178),
842 	REG(0x17c),
843 	REG16(0x358),
844 	REG(0x170),
845 	REG(0x150),
846 	REG(0x154),
847 	REG(0x158),
848 	REG16(0x41c),
849 	REG16(0x600),
850 	REG16(0x604),
851 	REG16(0x608),
852 	REG16(0x60c),
853 	REG16(0x610),
854 	REG16(0x614),
855 	REG16(0x618),
856 	REG16(0x61c),
857 	REG16(0x620),
858 	REG16(0x624),
859 	REG16(0x628),
860 	REG16(0x62c),
861 	REG16(0x630),
862 	REG16(0x634),
863 	REG16(0x638),
864 	REG16(0x63c),
865 	REG16(0x640),
866 	REG16(0x644),
867 	REG16(0x648),
868 	REG16(0x64c),
869 	REG16(0x650),
870 	REG16(0x654),
871 	REG16(0x658),
872 	REG16(0x65c),
873 	REG16(0x660),
874 	REG16(0x664),
875 	REG16(0x668),
876 	REG16(0x66c),
877 	REG16(0x670),
878 	REG16(0x674),
879 	REG16(0x678),
880 	REG16(0x67c),
881 	REG(0x68),
882 
883 	END(176)
884 };
885 
886 static const u8 gen11_rcs_offsets[] = {
887 	NOP(1),
888 	LRI(15, POSTED),
889 	REG16(0x244),
890 	REG(0x034),
891 	REG(0x030),
892 	REG(0x038),
893 	REG(0x03c),
894 	REG(0x168),
895 	REG(0x140),
896 	REG(0x110),
897 	REG(0x11c),
898 	REG(0x114),
899 	REG(0x118),
900 	REG(0x1c0),
901 	REG(0x1c4),
902 	REG(0x1c8),
903 	REG(0x180),
904 
905 	NOP(1),
906 	LRI(9, POSTED),
907 	REG16(0x3a8),
908 	REG16(0x28c),
909 	REG16(0x288),
910 	REG16(0x284),
911 	REG16(0x280),
912 	REG16(0x27c),
913 	REG16(0x278),
914 	REG16(0x274),
915 	REG16(0x270),
916 
917 	LRI(1, POSTED),
918 	REG(0x1b0),
919 
920 	NOP(10),
921 	LRI(1, 0),
922 	REG(0x0c8),
923 
924 	END(80)
925 };
926 
927 static const u8 gen12_rcs_offsets[] = {
928 	NOP(1),
929 	LRI(13, POSTED),
930 	REG16(0x244),
931 	REG(0x034),
932 	REG(0x030),
933 	REG(0x038),
934 	REG(0x03c),
935 	REG(0x168),
936 	REG(0x140),
937 	REG(0x110),
938 	REG(0x1c0),
939 	REG(0x1c4),
940 	REG(0x1c8),
941 	REG(0x180),
942 	REG16(0x2b4),
943 
944 	NOP(5),
945 	LRI(9, POSTED),
946 	REG16(0x3a8),
947 	REG16(0x28c),
948 	REG16(0x288),
949 	REG16(0x284),
950 	REG16(0x280),
951 	REG16(0x27c),
952 	REG16(0x278),
953 	REG16(0x274),
954 	REG16(0x270),
955 
956 	LRI(3, POSTED),
957 	REG(0x1b0),
958 	REG16(0x5a8),
959 	REG16(0x5ac),
960 
961 	NOP(6),
962 	LRI(1, 0),
963 	REG(0x0c8),
964 
965 	END(80)
966 };
967 
968 #undef END
969 #undef REG16
970 #undef REG
971 #undef LRI
972 #undef NOP
973 
reg_offsets(const struct intel_engine_cs * engine)974 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
975 {
976 	/*
977 	 * The gen12+ lists only have the registers we program in the basic
978 	 * default state. We rely on the context image using relative
979 	 * addressing to automatic fixup the register state between the
980 	 * physical engines for virtual engine.
981 	 */
982 	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
983 		   !intel_engine_has_relative_mmio(engine));
984 
985 	if (engine->class == RENDER_CLASS) {
986 		if (INTEL_GEN(engine->i915) >= 12)
987 			return gen12_rcs_offsets;
988 		else if (INTEL_GEN(engine->i915) >= 11)
989 			return gen11_rcs_offsets;
990 		else if (INTEL_GEN(engine->i915) >= 9)
991 			return gen9_rcs_offsets;
992 		else
993 			return gen8_rcs_offsets;
994 	} else {
995 		if (INTEL_GEN(engine->i915) >= 12)
996 			return gen12_xcs_offsets;
997 		else if (INTEL_GEN(engine->i915) >= 9)
998 			return gen9_xcs_offsets;
999 		else
1000 			return gen8_xcs_offsets;
1001 	}
1002 }
1003 
1004 static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs * engine)1005 __unwind_incomplete_requests(struct intel_engine_cs *engine)
1006 {
1007 	struct i915_request *rq, *rn, *active = NULL;
1008 	struct list_head *uninitialized_var(pl);
1009 	int prio = I915_PRIORITY_INVALID;
1010 
1011 	lockdep_assert_held(&engine->active.lock);
1012 
1013 	list_for_each_entry_safe_reverse(rq, rn,
1014 					 &engine->active.requests,
1015 					 sched.link) {
1016 		if (i915_request_completed(rq))
1017 			continue; /* XXX */
1018 
1019 		__i915_request_unsubmit(rq);
1020 
1021 		/*
1022 		 * Push the request back into the queue for later resubmission.
1023 		 * If this request is not native to this physical engine (i.e.
1024 		 * it came from a virtual source), push it back onto the virtual
1025 		 * engine so that it can be moved across onto another physical
1026 		 * engine as load dictates.
1027 		 */
1028 		if (likely(rq->execution_mask == engine->mask)) {
1029 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1030 			if (rq_prio(rq) != prio) {
1031 				prio = rq_prio(rq);
1032 				pl = i915_sched_lookup_priolist(engine, prio);
1033 			}
1034 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1035 
1036 			list_move(&rq->sched.link, pl);
1037 			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1038 
1039 			active = rq;
1040 		} else {
1041 			struct intel_engine_cs *owner = rq->context->engine;
1042 
1043 			/*
1044 			 * Decouple the virtual breadcrumb before moving it
1045 			 * back to the virtual engine -- we don't want the
1046 			 * request to complete in the background and try
1047 			 * and cancel the breadcrumb on the virtual engine
1048 			 * (instead of the old engine where it is linked)!
1049 			 */
1050 			if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1051 				     &rq->fence.flags)) {
1052 				spin_lock_nested(&rq->lock,
1053 						 SINGLE_DEPTH_NESTING);
1054 				i915_request_cancel_breadcrumb(rq);
1055 				spin_unlock(&rq->lock);
1056 			}
1057 			rq->engine = owner;
1058 			owner->submit_request(rq);
1059 			active = NULL;
1060 		}
1061 	}
1062 
1063 	return active;
1064 }
1065 
1066 struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists * execlists)1067 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1068 {
1069 	struct intel_engine_cs *engine =
1070 		container_of(execlists, typeof(*engine), execlists);
1071 
1072 	return __unwind_incomplete_requests(engine);
1073 }
1074 
1075 static inline void
execlists_context_status_change(struct i915_request * rq,unsigned long status)1076 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1077 {
1078 	/*
1079 	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
1080 	 * The compiler should eliminate this function as dead-code.
1081 	 */
1082 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1083 		return;
1084 
1085 	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1086 				   status, rq);
1087 }
1088 
intel_engine_context_in(struct intel_engine_cs * engine)1089 static void intel_engine_context_in(struct intel_engine_cs *engine)
1090 {
1091 	unsigned long flags;
1092 
1093 	if (READ_ONCE(engine->stats.enabled) == 0)
1094 		return;
1095 
1096 	write_seqlock_irqsave(&engine->stats.lock, flags);
1097 
1098 	if (engine->stats.enabled > 0) {
1099 		if (engine->stats.active++ == 0)
1100 			engine->stats.start = ktime_get();
1101 		GEM_BUG_ON(engine->stats.active == 0);
1102 	}
1103 
1104 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1105 }
1106 
intel_engine_context_out(struct intel_engine_cs * engine)1107 static void intel_engine_context_out(struct intel_engine_cs *engine)
1108 {
1109 	unsigned long flags;
1110 
1111 	if (READ_ONCE(engine->stats.enabled) == 0)
1112 		return;
1113 
1114 	write_seqlock_irqsave(&engine->stats.lock, flags);
1115 
1116 	if (engine->stats.enabled > 0) {
1117 		ktime_t last;
1118 
1119 		if (engine->stats.active && --engine->stats.active == 0) {
1120 			/*
1121 			 * Decrement the active context count and in case GPU
1122 			 * is now idle add up to the running total.
1123 			 */
1124 			last = ktime_sub(ktime_get(), engine->stats.start);
1125 
1126 			engine->stats.total = ktime_add(engine->stats.total,
1127 							last);
1128 		} else if (engine->stats.active == 0) {
1129 			/*
1130 			 * After turning on engine stats, context out might be
1131 			 * the first event in which case we account from the
1132 			 * time stats gathering was turned on.
1133 			 */
1134 			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1135 
1136 			engine->stats.total = ktime_add(engine->stats.total,
1137 							last);
1138 		}
1139 	}
1140 
1141 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1142 }
1143 
lrc_ring_mi_mode(const struct intel_engine_cs * engine)1144 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
1145 {
1146 	if (INTEL_GEN(engine->i915) >= 12)
1147 		return 0x60;
1148 	else if (INTEL_GEN(engine->i915) >= 9)
1149 		return 0x54;
1150 	else if (engine->class == RENDER_CLASS)
1151 		return 0x58;
1152 	else
1153 		return -1;
1154 }
1155 
1156 static void
execlists_check_context(const struct intel_context * ce,const struct intel_engine_cs * engine)1157 execlists_check_context(const struct intel_context *ce,
1158 			const struct intel_engine_cs *engine)
1159 {
1160 	const struct intel_ring *ring = ce->ring;
1161 	u32 *regs = ce->lrc_reg_state;
1162 	bool valid = true;
1163 	int x;
1164 
1165 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1166 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1167 		       engine->name,
1168 		       regs[CTX_RING_START],
1169 		       i915_ggtt_offset(ring->vma));
1170 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1171 		valid = false;
1172 	}
1173 
1174 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1175 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1176 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1177 		       engine->name,
1178 		       regs[CTX_RING_CTL],
1179 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1180 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1181 		valid = false;
1182 	}
1183 
1184 	x = lrc_ring_mi_mode(engine);
1185 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1186 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1187 		       engine->name, regs[x + 1]);
1188 		regs[x + 1] &= ~STOP_RING;
1189 		regs[x + 1] |= STOP_RING << 16;
1190 		valid = false;
1191 	}
1192 
1193 	WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1194 }
1195 
restore_default_state(struct intel_context * ce,struct intel_engine_cs * engine)1196 static void restore_default_state(struct intel_context *ce,
1197 				  struct intel_engine_cs *engine)
1198 {
1199 	u32 *regs = ce->lrc_reg_state;
1200 
1201 	if (engine->pinned_default_state)
1202 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
1203 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
1204 		       engine->context_size - PAGE_SIZE);
1205 
1206 	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
1207 }
1208 
reset_active(struct i915_request * rq,struct intel_engine_cs * engine)1209 static void reset_active(struct i915_request *rq,
1210 			 struct intel_engine_cs *engine)
1211 {
1212 	struct intel_context * const ce = rq->context;
1213 	u32 head;
1214 
1215 	/*
1216 	 * The executing context has been cancelled. We want to prevent
1217 	 * further execution along this context and propagate the error on
1218 	 * to anything depending on its results.
1219 	 *
1220 	 * In __i915_request_submit(), we apply the -EIO and remove the
1221 	 * requests' payloads for any banned requests. But first, we must
1222 	 * rewind the context back to the start of the incomplete request so
1223 	 * that we do not jump back into the middle of the batch.
1224 	 *
1225 	 * We preserve the breadcrumbs and semaphores of the incomplete
1226 	 * requests so that inter-timeline dependencies (i.e other timelines)
1227 	 * remain correctly ordered. And we defer to __i915_request_submit()
1228 	 * so that all asynchronous waits are correctly handled.
1229 	 */
1230 	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1231 		     rq->fence.context, rq->fence.seqno);
1232 
1233 	/* On resubmission of the active request, payload will be scrubbed */
1234 	if (i915_request_completed(rq))
1235 		head = rq->tail;
1236 	else
1237 		head = active_request(ce->timeline, rq)->head;
1238 	head = intel_ring_wrap(ce->ring, head);
1239 
1240 	/* Scrub the context image to prevent replaying the previous batch */
1241 	restore_default_state(ce, engine);
1242 	__execlists_update_reg_state(ce, engine, head);
1243 
1244 	/* We've switched away, so this should be a no-op, but intent matters */
1245 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1246 }
1247 
1248 static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request * rq)1249 __execlists_schedule_in(struct i915_request *rq)
1250 {
1251 	struct intel_engine_cs * const engine = rq->engine;
1252 	struct intel_context * const ce = rq->context;
1253 
1254 	intel_context_get(ce);
1255 
1256 	if (unlikely(intel_context_is_banned(ce)))
1257 		reset_active(rq, engine);
1258 
1259 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1260 		execlists_check_context(ce, engine);
1261 
1262 	if (ce->tag) {
1263 		/* Use a fixed tag for OA and friends */
1264 		ce->lrc_desc |= (u64)ce->tag << 32;
1265 	} else {
1266 		/* We don't need a strict matching tag, just different values */
1267 		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
1268 		ce->lrc_desc |=
1269 			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
1270 			GEN11_SW_CTX_ID_SHIFT;
1271 		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
1272 	}
1273 
1274 	__intel_gt_pm_get(engine->gt);
1275 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1276 	intel_engine_context_in(engine);
1277 
1278 	return engine;
1279 }
1280 
1281 static inline struct i915_request *
execlists_schedule_in(struct i915_request * rq,int idx)1282 execlists_schedule_in(struct i915_request *rq, int idx)
1283 {
1284 	struct intel_context * const ce = rq->context;
1285 	struct intel_engine_cs *old;
1286 
1287 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1288 	trace_i915_request_in(rq, idx);
1289 
1290 	old = READ_ONCE(ce->inflight);
1291 	do {
1292 		if (!old) {
1293 			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1294 			break;
1295 		}
1296 	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1297 
1298 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1299 	return i915_request_get(rq);
1300 }
1301 
kick_siblings(struct i915_request * rq,struct intel_context * ce)1302 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1303 {
1304 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1305 	struct i915_request *next = READ_ONCE(ve->request);
1306 
1307 	if (next && next->execution_mask & ~rq->execution_mask)
1308 		tasklet_schedule(&ve->base.execlists.tasklet);
1309 }
1310 
1311 static inline void
__execlists_schedule_out(struct i915_request * rq,struct intel_engine_cs * const engine)1312 __execlists_schedule_out(struct i915_request *rq,
1313 			 struct intel_engine_cs * const engine)
1314 {
1315 	struct intel_context * const ce = rq->context;
1316 
1317 	/*
1318 	 * NB process_csb() is not under the engine->active.lock and hence
1319 	 * schedule_out can race with schedule_in meaning that we should
1320 	 * refrain from doing non-trivial work here.
1321 	 */
1322 
1323 	/*
1324 	 * If we have just completed this context, the engine may now be
1325 	 * idle and we want to re-enter powersaving.
1326 	 */
1327 	if (list_is_last(&rq->link, &ce->timeline->requests) &&
1328 	    i915_request_completed(rq))
1329 		intel_engine_add_retire(engine, ce->timeline);
1330 
1331 	intel_engine_context_out(engine);
1332 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1333 	intel_gt_pm_put_async(engine->gt);
1334 
1335 	/*
1336 	 * If this is part of a virtual engine, its next request may
1337 	 * have been blocked waiting for access to the active context.
1338 	 * We have to kick all the siblings again in case we need to
1339 	 * switch (e.g. the next request is not runnable on this
1340 	 * engine). Hopefully, we will already have submitted the next
1341 	 * request before the tasklet runs and do not need to rebuild
1342 	 * each virtual tree and kick everyone again.
1343 	 */
1344 	if (ce->engine != engine)
1345 		kick_siblings(rq, ce);
1346 
1347 	intel_context_put(ce);
1348 }
1349 
1350 static inline void
execlists_schedule_out(struct i915_request * rq)1351 execlists_schedule_out(struct i915_request *rq)
1352 {
1353 	struct intel_context * const ce = rq->context;
1354 	struct intel_engine_cs *cur, *old;
1355 
1356 	trace_i915_request_out(rq);
1357 
1358 	old = READ_ONCE(ce->inflight);
1359 	do
1360 		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1361 	while (!try_cmpxchg(&ce->inflight, &old, cur));
1362 	if (!cur)
1363 		__execlists_schedule_out(rq, old);
1364 
1365 	i915_request_put(rq);
1366 }
1367 
execlists_update_context(struct i915_request * rq)1368 static u64 execlists_update_context(struct i915_request *rq)
1369 {
1370 	struct intel_context *ce = rq->context;
1371 	u64 desc = ce->lrc_desc;
1372 	u32 tail, prev;
1373 
1374 	/*
1375 	 * WaIdleLiteRestore:bdw,skl
1376 	 *
1377 	 * We should never submit the context with the same RING_TAIL twice
1378 	 * just in case we submit an empty ring, which confuses the HW.
1379 	 *
1380 	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1381 	 * the normal request to be able to always advance the RING_TAIL on
1382 	 * subsequent resubmissions (for lite restore). Should that fail us,
1383 	 * and we try and submit the same tail again, force the context
1384 	 * reload.
1385 	 *
1386 	 * If we need to return to a preempted context, we need to skip the
1387 	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1388 	 * HW has a tendency to ignore us rewinding the TAIL to the end of
1389 	 * an earlier request.
1390 	 */
1391 	tail = intel_ring_set_tail(rq->ring, rq->tail);
1392 	prev = ce->lrc_reg_state[CTX_RING_TAIL];
1393 	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1394 		desc |= CTX_DESC_FORCE_RESTORE;
1395 	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1396 	rq->tail = rq->wa_tail;
1397 
1398 	/*
1399 	 * Make sure the context image is complete before we submit it to HW.
1400 	 *
1401 	 * Ostensibly, writes (including the WCB) should be flushed prior to
1402 	 * an uncached write such as our mmio register access, the empirical
1403 	 * evidence (esp. on Braswell) suggests that the WC write into memory
1404 	 * may not be visible to the HW prior to the completion of the UC
1405 	 * register write and that we may begin execution from the context
1406 	 * before its image is complete leading to invalid PD chasing.
1407 	 */
1408 	wmb();
1409 
1410 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1411 	return desc;
1412 }
1413 
write_desc(struct intel_engine_execlists * execlists,u64 desc,u32 port)1414 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1415 {
1416 #ifdef __NetBSD__
1417 	if (execlists->ctrl_reg) {
1418 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2, lower_32_bits(desc));
1419 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2 + 1, upper_32_bits(desc));
1420 	} else {
1421 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, upper_32_bits(desc));
1422 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, lower_32_bits(desc));
1423 	}
1424 #else
1425 	if (execlists->ctrl_reg) {
1426 		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1427 		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1428 	} else {
1429 		writel(upper_32_bits(desc), execlists->submit_reg);
1430 		writel(lower_32_bits(desc), execlists->submit_reg);
1431 	}
1432 #endif
1433 }
1434 
1435 static __maybe_unused void
trace_ports(const struct intel_engine_execlists * execlists,const char * msg,struct i915_request * const * ports)1436 trace_ports(const struct intel_engine_execlists *execlists,
1437 	    const char *msg,
1438 	    struct i915_request * const *ports)
1439 {
1440 	const struct intel_engine_cs *engine =
1441 		const_container_of(execlists, typeof(*engine), execlists);
1442 
1443 	if (!ports[0])
1444 		return;
1445 
1446 	ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
1447 		     ports[0]->fence.context,
1448 		     ports[0]->fence.seqno,
1449 		     i915_request_completed(ports[0]) ? "!" :
1450 		     i915_request_started(ports[0]) ? "*" :
1451 		     "",
1452 		     ports[1] ? ports[1]->fence.context : 0,
1453 		     ports[1] ? ports[1]->fence.seqno : 0);
1454 }
1455 
1456 static __maybe_unused bool
assert_pending_valid(const struct intel_engine_execlists * execlists,const char * msg)1457 assert_pending_valid(const struct intel_engine_execlists *execlists,
1458 		     const char *msg)
1459 {
1460 	struct i915_request * const *port, *rq;
1461 	struct intel_context *ce = NULL;
1462 
1463 	trace_ports(execlists, msg, execlists->pending);
1464 
1465 	if (!execlists->pending[0]) {
1466 		GEM_TRACE_ERR("Nothing pending for promotion!\n");
1467 		return false;
1468 	}
1469 
1470 	if (execlists->pending[execlists_num_ports(execlists)]) {
1471 		GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
1472 			      execlists_num_ports(execlists));
1473 		return false;
1474 	}
1475 
1476 	for (port = execlists->pending; (rq = *port); port++) {
1477 		unsigned long flags;
1478 		bool ok = true;
1479 
1480 		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1481 		GEM_BUG_ON(!i915_request_is_active(rq));
1482 
1483 		if (ce == rq->context) {
1484 			GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
1485 				      ce->timeline->fence_context,
1486 				      port - execlists->pending);
1487 			return false;
1488 		}
1489 		ce = rq->context;
1490 
1491 		/* Hold tightly onto the lock to prevent concurrent retires! */
1492 		if (!spin_trylock_irqsave(&rq->lock, flags))
1493 			continue;
1494 
1495 		if (i915_request_completed(rq))
1496 			goto unlock;
1497 
1498 		if (i915_active_is_idle(&ce->active) &&
1499 		    !intel_context_is_barrier(ce)) {
1500 			GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
1501 				      ce->timeline->fence_context,
1502 				      port - execlists->pending);
1503 			ok = false;
1504 			goto unlock;
1505 		}
1506 
1507 		if (!i915_vma_is_pinned(ce->state)) {
1508 			GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
1509 				      ce->timeline->fence_context,
1510 				      port - execlists->pending);
1511 			ok = false;
1512 			goto unlock;
1513 		}
1514 
1515 		if (!i915_vma_is_pinned(ce->ring->vma)) {
1516 			GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
1517 				      ce->timeline->fence_context,
1518 				      port - execlists->pending);
1519 			ok = false;
1520 			goto unlock;
1521 		}
1522 
1523 unlock:
1524 		spin_unlock_irqrestore(&rq->lock, flags);
1525 		if (!ok)
1526 			return false;
1527 	}
1528 
1529 	return ce;
1530 }
1531 
execlists_submit_ports(struct intel_engine_cs * engine)1532 static void execlists_submit_ports(struct intel_engine_cs *engine)
1533 {
1534 	struct intel_engine_execlists *execlists = &engine->execlists;
1535 	unsigned int n;
1536 
1537 	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1538 
1539 	/*
1540 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
1541 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
1542 	 * not be relinquished until the device is idle (see
1543 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
1544 	 * that all ELSP are drained i.e. we have processed the CSB,
1545 	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
1546 	 */
1547 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1548 
1549 	/*
1550 	 * ELSQ note: the submit queue is not cleared after being submitted
1551 	 * to the HW so we need to make sure we always clean it up. This is
1552 	 * currently ensured by the fact that we always write the same number
1553 	 * of elsq entries, keep this in mind before changing the loop below.
1554 	 */
1555 	for (n = execlists_num_ports(execlists); n--; ) {
1556 		struct i915_request *rq = execlists->pending[n];
1557 
1558 		write_desc(execlists,
1559 			   rq ? execlists_update_context(rq) : 0,
1560 			   n);
1561 	}
1562 
1563 	/* we need to manually load the submit queue */
1564 	if (execlists->ctrl_reg)
1565 #ifdef __NetBSD__
1566 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->ctrl_reg, EL_CTRL_LOAD);
1567 #else
1568 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1569 #endif
1570 }
1571 
ctx_single_port_submission(const struct intel_context * ce)1572 static bool ctx_single_port_submission(const struct intel_context *ce)
1573 {
1574 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1575 		intel_context_force_single_submission(ce));
1576 }
1577 
can_merge_ctx(const struct intel_context * prev,const struct intel_context * next)1578 static bool can_merge_ctx(const struct intel_context *prev,
1579 			  const struct intel_context *next)
1580 {
1581 	if (prev != next)
1582 		return false;
1583 
1584 	if (ctx_single_port_submission(prev))
1585 		return false;
1586 
1587 	return true;
1588 }
1589 
can_merge_rq(const struct i915_request * prev,const struct i915_request * next)1590 static bool can_merge_rq(const struct i915_request *prev,
1591 			 const struct i915_request *next)
1592 {
1593 	GEM_BUG_ON(prev == next);
1594 	GEM_BUG_ON(!assert_priority_queue(prev, next));
1595 
1596 	/*
1597 	 * We do not submit known completed requests. Therefore if the next
1598 	 * request is already completed, we can pretend to merge it in
1599 	 * with the previous context (and we will skip updating the ELSP
1600 	 * and tracking). Thus hopefully keeping the ELSP full with active
1601 	 * contexts, despite the best efforts of preempt-to-busy to confuse
1602 	 * us.
1603 	 */
1604 	if (i915_request_completed(next))
1605 		return true;
1606 
1607 	if (unlikely((prev->fence.flags ^ next->fence.flags) &
1608 		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1609 		      BIT(I915_FENCE_FLAG_SENTINEL))))
1610 		return false;
1611 
1612 	if (!can_merge_ctx(prev->context, next->context))
1613 		return false;
1614 
1615 	return true;
1616 }
1617 
virtual_update_register_offsets(u32 * regs,struct intel_engine_cs * engine)1618 static void virtual_update_register_offsets(u32 *regs,
1619 					    struct intel_engine_cs *engine)
1620 {
1621 	set_offsets(regs, reg_offsets(engine), engine, false);
1622 }
1623 
virtual_matches(const struct virtual_engine * ve,const struct i915_request * rq,const struct intel_engine_cs * engine)1624 static bool virtual_matches(const struct virtual_engine *ve,
1625 			    const struct i915_request *rq,
1626 			    const struct intel_engine_cs *engine)
1627 {
1628 	const struct intel_engine_cs *inflight;
1629 
1630 	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1631 		return false;
1632 
1633 	/*
1634 	 * We track when the HW has completed saving the context image
1635 	 * (i.e. when we have seen the final CS event switching out of
1636 	 * the context) and must not overwrite the context image before
1637 	 * then. This restricts us to only using the active engine
1638 	 * while the previous virtualized request is inflight (so
1639 	 * we reuse the register offsets). This is a very small
1640 	 * hystersis on the greedy seelction algorithm.
1641 	 */
1642 	inflight = intel_context_inflight(&ve->context);
1643 	if (inflight && inflight != engine)
1644 		return false;
1645 
1646 	return true;
1647 }
1648 
virtual_xfer_breadcrumbs(struct virtual_engine * ve,struct intel_engine_cs * engine)1649 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1650 				     struct intel_engine_cs *engine)
1651 {
1652 	struct intel_engine_cs *old = ve->siblings[0];
1653 
1654 	/* All unattached (rq->engine == old) must already be completed */
1655 
1656 	spin_lock(&old->breadcrumbs.irq_lock);
1657 	if (!list_empty(&ve->context.signal_link)) {
1658 		list_move_tail(&ve->context.signal_link,
1659 			       &engine->breadcrumbs.signalers);
1660 		intel_engine_signal_breadcrumbs(engine);
1661 	}
1662 	spin_unlock(&old->breadcrumbs.irq_lock);
1663 }
1664 
1665 static struct i915_request *
last_active(const struct intel_engine_execlists * execlists)1666 last_active(const struct intel_engine_execlists *execlists)
1667 {
1668 	struct i915_request * const *last = READ_ONCE(execlists->active);
1669 
1670 	while (*last && i915_request_completed(*last))
1671 		last++;
1672 
1673 	return *last;
1674 }
1675 
1676 #define for_each_waiter(p__, rq__) \
1677 	list_for_each_entry_lockless(p__, \
1678 				     &(rq__)->sched.waiters_list, \
1679 				     wait_link)
1680 
defer_request(struct i915_request * rq,struct list_head * const pl)1681 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1682 {
1683 	LIST_HEAD(list);
1684 
1685 	/*
1686 	 * We want to move the interrupted request to the back of
1687 	 * the round-robin list (i.e. its priority level), but
1688 	 * in doing so, we must then move all requests that were in
1689 	 * flight and were waiting for the interrupted request to
1690 	 * be run after it again.
1691 	 */
1692 	do {
1693 		struct i915_dependency *p;
1694 
1695 		GEM_BUG_ON(i915_request_is_active(rq));
1696 		list_move_tail(&rq->sched.link, pl);
1697 
1698 		for_each_waiter(p, rq) {
1699 			struct i915_request *w =
1700 				container_of(p->waiter, typeof(*w), sched);
1701 
1702 			/* Leave semaphores spinning on the other engines */
1703 			if (w->engine != rq->engine)
1704 				continue;
1705 
1706 			/* No waiter should start before its signaler */
1707 			GEM_BUG_ON(i915_request_started(w) &&
1708 				   !i915_request_completed(rq));
1709 
1710 			GEM_BUG_ON(i915_request_is_active(w));
1711 			if (!i915_request_is_ready(w))
1712 				continue;
1713 
1714 			if (rq_prio(w) < rq_prio(rq))
1715 				continue;
1716 
1717 			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1718 			list_move_tail(&w->sched.link, &list);
1719 		}
1720 
1721 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1722 	} while (rq);
1723 }
1724 
defer_active(struct intel_engine_cs * engine)1725 static void defer_active(struct intel_engine_cs *engine)
1726 {
1727 	struct i915_request *rq;
1728 
1729 	rq = __unwind_incomplete_requests(engine);
1730 	if (!rq)
1731 		return;
1732 
1733 	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1734 }
1735 
1736 static bool
need_timeslice(struct intel_engine_cs * engine,const struct i915_request * rq)1737 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1738 {
1739 	int hint;
1740 
1741 	if (!intel_engine_has_timeslices(engine))
1742 		return false;
1743 
1744 	if (list_is_last(&rq->sched.link, &engine->active.requests))
1745 		return false;
1746 
1747 	hint = max(rq_prio(list_next_entry(rq, sched.link)),
1748 		   engine->execlists.queue_priority_hint);
1749 
1750 	return hint >= effective_prio(rq);
1751 }
1752 
1753 static int
switch_prio(struct intel_engine_cs * engine,const struct i915_request * rq)1754 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1755 {
1756 	if (list_is_last(&rq->sched.link, &engine->active.requests))
1757 		return INT_MIN;
1758 
1759 	return rq_prio(list_next_entry(rq, sched.link));
1760 }
1761 
1762 static inline unsigned long
timeslice(const struct intel_engine_cs * engine)1763 timeslice(const struct intel_engine_cs *engine)
1764 {
1765 	return READ_ONCE(engine->props.timeslice_duration_ms);
1766 }
1767 
1768 static unsigned long
active_timeslice(const struct intel_engine_cs * engine)1769 active_timeslice(const struct intel_engine_cs *engine)
1770 {
1771 	const struct i915_request *rq = *engine->execlists.active;
1772 
1773 	if (!rq || i915_request_completed(rq))
1774 		return 0;
1775 
1776 	if (engine->execlists.switch_priority_hint < effective_prio(rq))
1777 		return 0;
1778 
1779 	return timeslice(engine);
1780 }
1781 
set_timeslice(struct intel_engine_cs * engine)1782 static void set_timeslice(struct intel_engine_cs *engine)
1783 {
1784 	if (!intel_engine_has_timeslices(engine))
1785 		return;
1786 
1787 	set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
1788 }
1789 
record_preemption(struct intel_engine_execlists * execlists)1790 static void record_preemption(struct intel_engine_execlists *execlists)
1791 {
1792 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1793 }
1794 
active_preempt_timeout(struct intel_engine_cs * engine)1795 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
1796 {
1797 	struct i915_request *rq;
1798 
1799 	rq = last_active(&engine->execlists);
1800 	if (!rq)
1801 		return 0;
1802 
1803 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
1804 	if (unlikely(intel_context_is_banned(rq->context)))
1805 		return 1;
1806 
1807 	return READ_ONCE(engine->props.preempt_timeout_ms);
1808 }
1809 
set_preempt_timeout(struct intel_engine_cs * engine)1810 static void set_preempt_timeout(struct intel_engine_cs *engine)
1811 {
1812 	if (!intel_engine_has_preempt_reset(engine))
1813 		return;
1814 
1815 	set_timer_ms(&engine->execlists.preempt,
1816 		     active_preempt_timeout(engine));
1817 }
1818 
clear_ports(struct i915_request ** ports,int count)1819 static inline void clear_ports(struct i915_request **ports, int count)
1820 {
1821 	memset_p((void **)ports, NULL, count);
1822 }
1823 
execlists_dequeue(struct intel_engine_cs * engine)1824 static void execlists_dequeue(struct intel_engine_cs *engine)
1825 {
1826 	struct intel_engine_execlists * const execlists = &engine->execlists;
1827 	struct i915_request **port = execlists->pending;
1828 	struct i915_request ** const last_port = port + execlists->port_mask;
1829 	struct i915_request *last;
1830 	struct rb_node *rb;
1831 	bool submit = false;
1832 
1833 	/*
1834 	 * Hardware submission is through 2 ports. Conceptually each port
1835 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1836 	 * static for a context, and unique to each, so we only execute
1837 	 * requests belonging to a single context from each ring. RING_HEAD
1838 	 * is maintained by the CS in the context image, it marks the place
1839 	 * where it got up to last time, and through RING_TAIL we tell the CS
1840 	 * where we want to execute up to this time.
1841 	 *
1842 	 * In this list the requests are in order of execution. Consecutive
1843 	 * requests from the same context are adjacent in the ringbuffer. We
1844 	 * can combine these requests into a single RING_TAIL update:
1845 	 *
1846 	 *              RING_HEAD...req1...req2
1847 	 *                                    ^- RING_TAIL
1848 	 * since to execute req2 the CS must first execute req1.
1849 	 *
1850 	 * Our goal then is to point each port to the end of a consecutive
1851 	 * sequence of requests as being the most optimal (fewest wake ups
1852 	 * and context switches) submission.
1853 	 */
1854 
1855 	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1856 		struct virtual_engine *ve =
1857 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1858 		struct i915_request *rq = READ_ONCE(ve->request);
1859 
1860 		if (!rq) { /* lazily cleanup after another engine handled rq */
1861 			rb_erase_cached(rb, &execlists->virtual);
1862 			container_of(rb, struct ve_node, rb)->inserted =
1863 			    false;
1864 			rb = rb_first_cached(&execlists->virtual);
1865 			continue;
1866 		}
1867 
1868 		if (!virtual_matches(ve, rq, engine)) {
1869 			rb = rb_next2(&execlists->virtual.rb_root, rb);
1870 			continue;
1871 		}
1872 
1873 		break;
1874 	}
1875 
1876 	/*
1877 	 * If the queue is higher priority than the last
1878 	 * request in the currently active context, submit afresh.
1879 	 * We will resubmit again afterwards in case we need to split
1880 	 * the active context to interject the preemption request,
1881 	 * i.e. we will retrigger preemption following the ack in case
1882 	 * of trouble.
1883 	 */
1884 	last = last_active(execlists);
1885 	if (last) {
1886 		if (need_preempt(engine, last, rb)) {
1887 			ENGINE_TRACE(engine,
1888 				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
1889 				     last->fence.context,
1890 				     last->fence.seqno,
1891 				     last->sched.attr.priority,
1892 				     execlists->queue_priority_hint);
1893 			record_preemption(execlists);
1894 
1895 			/*
1896 			 * Don't let the RING_HEAD advance past the breadcrumb
1897 			 * as we unwind (and until we resubmit) so that we do
1898 			 * not accidentally tell it to go backwards.
1899 			 */
1900 			ring_set_paused(engine, 1);
1901 
1902 			/*
1903 			 * Note that we have not stopped the GPU at this point,
1904 			 * so we are unwinding the incomplete requests as they
1905 			 * remain inflight and so by the time we do complete
1906 			 * the preemption, some of the unwound requests may
1907 			 * complete!
1908 			 */
1909 			__unwind_incomplete_requests(engine);
1910 
1911 			last = NULL;
1912 		} else if (need_timeslice(engine, last) &&
1913 			   timer_expired(&engine->execlists.timer)) {
1914 			ENGINE_TRACE(engine,
1915 				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
1916 				     last->fence.context,
1917 				     last->fence.seqno,
1918 				     last->sched.attr.priority,
1919 				     execlists->queue_priority_hint);
1920 
1921 			ring_set_paused(engine, 1);
1922 			defer_active(engine);
1923 
1924 			/*
1925 			 * Unlike for preemption, if we rewind and continue
1926 			 * executing the same context as previously active,
1927 			 * the order of execution will remain the same and
1928 			 * the tail will only advance. We do not need to
1929 			 * force a full context restore, as a lite-restore
1930 			 * is sufficient to resample the monotonic TAIL.
1931 			 *
1932 			 * If we switch to any other context, similarly we
1933 			 * will not rewind TAIL of current context, and
1934 			 * normal save/restore will preserve state and allow
1935 			 * us to later continue executing the same request.
1936 			 */
1937 			last = NULL;
1938 		} else {
1939 			/*
1940 			 * Otherwise if we already have a request pending
1941 			 * for execution after the current one, we can
1942 			 * just wait until the next CS event before
1943 			 * queuing more. In either case we will force a
1944 			 * lite-restore preemption event, but if we wait
1945 			 * we hopefully coalesce several updates into a single
1946 			 * submission.
1947 			 */
1948 			if (!list_is_last(&last->sched.link,
1949 					  &engine->active.requests)) {
1950 				/*
1951 				 * Even if ELSP[1] is occupied and not worthy
1952 				 * of timeslices, our queue might be.
1953 				 */
1954 				if (!timer_pending(&execlists->timer) &&
1955 				    need_timeslice(engine, last))
1956 					set_timer_ms(&execlists->timer,
1957 						     timeslice(engine));
1958 
1959 				return;
1960 			}
1961 		}
1962 	}
1963 
1964 	while (rb) { /* XXX virtual is always taking precedence */
1965 		struct virtual_engine *ve =
1966 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1967 		struct i915_request *rq;
1968 
1969 		spin_lock(&ve->base.active.lock);
1970 
1971 		rq = ve->request;
1972 		if (unlikely(!rq)) { /* lost the race to a sibling */
1973 			spin_unlock(&ve->base.active.lock);
1974 			rb_erase_cached(rb, &execlists->virtual);
1975 			container_of(rb, struct ve_node, rb)->inserted =
1976 			    false;
1977 			rb = rb_first_cached(&execlists->virtual);
1978 			continue;
1979 		}
1980 
1981 		GEM_BUG_ON(rq != ve->request);
1982 		GEM_BUG_ON(rq->engine != &ve->base);
1983 		GEM_BUG_ON(rq->context != &ve->context);
1984 
1985 		if (rq_prio(rq) >= queue_prio(execlists)) {
1986 			if (!virtual_matches(ve, rq, engine)) {
1987 				spin_unlock(&ve->base.active.lock);
1988 				rb = rb_next2(&execlists->virtual.rb_root,
1989 				    rb);
1990 				continue;
1991 			}
1992 
1993 			if (last && !can_merge_rq(last, rq)) {
1994 				spin_unlock(&ve->base.active.lock);
1995 				return; /* leave this for another */
1996 			}
1997 
1998 			ENGINE_TRACE(engine,
1999 				     "virtual rq=%llx:%lld%s, new engine? %s\n",
2000 				     rq->fence.context,
2001 				     rq->fence.seqno,
2002 				     i915_request_completed(rq) ? "!" :
2003 				     i915_request_started(rq) ? "*" :
2004 				     "",
2005 				     yesno(engine != ve->siblings[0]));
2006 
2007 			ve->request = NULL;
2008 			ve->base.execlists.queue_priority_hint = INT_MIN;
2009 			rb_erase_cached(rb, &execlists->virtual);
2010 			container_of(rb, struct ve_node, rb)->inserted =
2011 			    false;
2012 
2013 			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2014 			rq->engine = engine;
2015 
2016 			if (engine != ve->siblings[0]) {
2017 				u32 *regs = ve->context.lrc_reg_state;
2018 				unsigned int n;
2019 
2020 				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
2021 
2022 				if (!intel_engine_has_relative_mmio(engine))
2023 					virtual_update_register_offsets(regs,
2024 									engine);
2025 
2026 				if (!list_empty(&ve->context.signals))
2027 					virtual_xfer_breadcrumbs(ve, engine);
2028 
2029 				/*
2030 				 * Move the bound engine to the top of the list
2031 				 * for future execution. We then kick this
2032 				 * tasklet first before checking others, so that
2033 				 * we preferentially reuse this set of bound
2034 				 * registers.
2035 				 */
2036 				for (n = 1; n < ve->num_siblings; n++) {
2037 					if (ve->siblings[n] == engine) {
2038 						swap(ve->siblings[n],
2039 						     ve->siblings[0]);
2040 						break;
2041 					}
2042 				}
2043 
2044 				GEM_BUG_ON(ve->siblings[0] != engine);
2045 			}
2046 
2047 			if (__i915_request_submit(rq)) {
2048 				submit = true;
2049 				last = rq;
2050 			}
2051 			i915_request_put(rq);
2052 
2053 			/*
2054 			 * Hmm, we have a bunch of virtual engine requests,
2055 			 * but the first one was already completed (thanks
2056 			 * preempt-to-busy!). Keep looking at the veng queue
2057 			 * until we have no more relevant requests (i.e.
2058 			 * the normal submit queue has higher priority).
2059 			 */
2060 			if (!submit) {
2061 				spin_unlock(&ve->base.active.lock);
2062 				rb = rb_first_cached(&execlists->virtual);
2063 				continue;
2064 			}
2065 		}
2066 
2067 		spin_unlock(&ve->base.active.lock);
2068 		break;
2069 	}
2070 
2071 	while ((rb = rb_first_cached(&execlists->queue))) {
2072 		struct i915_priolist *p = to_priolist(rb);
2073 		struct i915_request *rq, *rn;
2074 		int i;
2075 
2076 		priolist_for_each_request_consume(rq, rn, p, i) {
2077 			bool merge = true;
2078 
2079 			/*
2080 			 * Can we combine this request with the current port?
2081 			 * It has to be the same context/ringbuffer and not
2082 			 * have any exceptions (e.g. GVT saying never to
2083 			 * combine contexts).
2084 			 *
2085 			 * If we can combine the requests, we can execute both
2086 			 * by updating the RING_TAIL to point to the end of the
2087 			 * second request, and so we never need to tell the
2088 			 * hardware about the first.
2089 			 */
2090 			if (last && !can_merge_rq(last, rq)) {
2091 				/*
2092 				 * If we are on the second port and cannot
2093 				 * combine this request with the last, then we
2094 				 * are done.
2095 				 */
2096 				if (port == last_port)
2097 					goto done;
2098 
2099 				/*
2100 				 * We must not populate both ELSP[] with the
2101 				 * same LRCA, i.e. we must submit 2 different
2102 				 * contexts if we submit 2 ELSP.
2103 				 */
2104 				if (last->context == rq->context)
2105 					goto done;
2106 
2107 				if (i915_request_has_sentinel(last))
2108 					goto done;
2109 
2110 				/*
2111 				 * If GVT overrides us we only ever submit
2112 				 * port[0], leaving port[1] empty. Note that we
2113 				 * also have to be careful that we don't queue
2114 				 * the same context (even though a different
2115 				 * request) to the second port.
2116 				 */
2117 				if (ctx_single_port_submission(last->context) ||
2118 				    ctx_single_port_submission(rq->context))
2119 					goto done;
2120 
2121 				merge = false;
2122 			}
2123 
2124 			if (__i915_request_submit(rq)) {
2125 				if (!merge) {
2126 					*port = execlists_schedule_in(last, port - execlists->pending);
2127 					port++;
2128 					last = NULL;
2129 				}
2130 
2131 				GEM_BUG_ON(last &&
2132 					   !can_merge_ctx(last->context,
2133 							  rq->context));
2134 
2135 				submit = true;
2136 				last = rq;
2137 			}
2138 		}
2139 
2140 		rb_erase_cached(&p->node, &execlists->queue);
2141 		i915_priolist_free(p);
2142 	}
2143 
2144 done:
2145 	/*
2146 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2147 	 *
2148 	 * We choose the priority hint such that if we add a request of greater
2149 	 * priority than this, we kick the submission tasklet to decide on
2150 	 * the right order of submitting the requests to hardware. We must
2151 	 * also be prepared to reorder requests as they are in-flight on the
2152 	 * HW. We derive the priority hint then as the first "hole" in
2153 	 * the HW submission ports and if there are no available slots,
2154 	 * the priority of the lowest executing request, i.e. last.
2155 	 *
2156 	 * When we do receive a higher priority request ready to run from the
2157 	 * user, see queue_request(), the priority hint is bumped to that
2158 	 * request triggering preemption on the next dequeue (or subsequent
2159 	 * interrupt for secondary ports).
2160 	 */
2161 	execlists->queue_priority_hint = queue_prio(execlists);
2162 
2163 	if (submit) {
2164 		*port = execlists_schedule_in(last, port - execlists->pending);
2165 		execlists->switch_priority_hint =
2166 			switch_prio(engine, *execlists->pending);
2167 
2168 		/*
2169 		 * Skip if we ended up with exactly the same set of requests,
2170 		 * e.g. trying to timeslice a pair of ordered contexts
2171 		 */
2172 		if (!memcmp(execlists->active, execlists->pending,
2173 			    (port - execlists->pending + 1) * sizeof(*port))) {
2174 			do
2175 				execlists_schedule_out(fetch_and_zero(port));
2176 			while (port-- != execlists->pending);
2177 
2178 			goto skip_submit;
2179 		}
2180 		clear_ports(port + 1, last_port - port);
2181 
2182 		execlists_submit_ports(engine);
2183 		set_preempt_timeout(engine);
2184 	} else {
2185 skip_submit:
2186 		ring_set_paused(engine, 0);
2187 	}
2188 }
2189 
2190 static void
cancel_port_requests(struct intel_engine_execlists * const execlists)2191 cancel_port_requests(struct intel_engine_execlists * const execlists)
2192 {
2193 	struct i915_request * const *port;
2194 
2195 	for (port = execlists->pending; *port; port++)
2196 		execlists_schedule_out(*port);
2197 	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2198 
2199 	/* Mark the end of active before we overwrite *active */
2200 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2201 		execlists_schedule_out(*port);
2202 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2203 
2204 	WRITE_ONCE(execlists->active, execlists->inflight);
2205 }
2206 
2207 static inline void
invalidate_csb_entries(const u32 * first,const u32 * last)2208 invalidate_csb_entries(const u32 *first, const u32 *last)
2209 {
2210 	clflush(__UNCONST(first));
2211 	clflush(__UNCONST(last));
2212 }
2213 
2214 static inline bool
reset_in_progress(const struct intel_engine_execlists * execlists)2215 reset_in_progress(const struct intel_engine_execlists *execlists)
2216 {
2217 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
2218 }
2219 
2220 /*
2221  * Starting with Gen12, the status has a new format:
2222  *
2223  *     bit  0:     switched to new queue
2224  *     bit  1:     reserved
2225  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2226  *                 switch detail is set to "wait on semaphore"
2227  *     bits 3-5:   engine class
2228  *     bits 6-11:  engine instance
2229  *     bits 12-14: reserved
2230  *     bits 15-25: sw context id of the lrc the GT switched to
2231  *     bits 26-31: sw counter of the lrc the GT switched to
2232  *     bits 32-35: context switch detail
2233  *                  - 0: ctx complete
2234  *                  - 1: wait on sync flip
2235  *                  - 2: wait on vblank
2236  *                  - 3: wait on scanline
2237  *                  - 4: wait on semaphore
2238  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2239  *                       WAIT_FOR_EVENT)
2240  *     bit  36:    reserved
2241  *     bits 37-43: wait detail (for switch detail 1 to 4)
2242  *     bits 44-46: reserved
2243  *     bits 47-57: sw context id of the lrc the GT switched away from
2244  *     bits 58-63: sw counter of the lrc the GT switched away from
2245  */
2246 static inline bool
gen12_csb_parse(const struct intel_engine_execlists * execlists,const u32 * csb)2247 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2248 {
2249 	u32 lower_dw = csb[0];
2250 	u32 upper_dw = csb[1];
2251 	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2252 	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2253 	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2254 
2255 	/*
2256 	 * The context switch detail is not guaranteed to be 5 when a preemption
2257 	 * occurs, so we can't just check for that. The check below works for
2258 	 * all the cases we care about, including preemptions of WAIT
2259 	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
2260 	 * would require some extra handling, but we don't support that.
2261 	 */
2262 	if (!ctx_away_valid || new_queue) {
2263 		GEM_BUG_ON(!ctx_to_valid);
2264 		return true;
2265 	}
2266 
2267 	/*
2268 	 * switch detail = 5 is covered by the case above and we do not expect a
2269 	 * context switch on an unsuccessful wait instruction since we always
2270 	 * use polling mode.
2271 	 */
2272 	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2273 	return false;
2274 }
2275 
2276 static inline bool
gen8_csb_parse(const struct intel_engine_execlists * execlists,const u32 * csb)2277 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2278 {
2279 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2280 }
2281 
process_csb(struct intel_engine_cs * engine)2282 static void process_csb(struct intel_engine_cs *engine)
2283 {
2284 	struct intel_engine_execlists * const execlists = &engine->execlists;
2285 	const u32 * const buf = execlists->csb_status;
2286 	const u8 num_entries = execlists->csb_size;
2287 	u8 head, tail;
2288 
2289 	/*
2290 	 * As we modify our execlists state tracking we require exclusive
2291 	 * access. Either we are inside the tasklet, or the tasklet is disabled
2292 	 * and we assume that is only inside the reset paths and so serialised.
2293 	 */
2294 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2295 		   !reset_in_progress(execlists));
2296 	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2297 
2298 	/*
2299 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
2300 	 * When reading from the csb_write mmio register, we have to be
2301 	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2302 	 * the low 4bits. As it happens we know the next 4bits are always
2303 	 * zero and so we can simply masked off the low u8 of the register
2304 	 * and treat it identically to reading from the HWSP (without having
2305 	 * to use explicit shifting and masking, and probably bifurcating
2306 	 * the code to handle the legacy mmio read).
2307 	 */
2308 	head = execlists->csb_head;
2309 	tail = READ_ONCE(*execlists->csb_write);
2310 	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2311 	if (unlikely(head == tail))
2312 		return;
2313 
2314 	/*
2315 	 * Hopefully paired with a wmb() in HW!
2316 	 *
2317 	 * We must complete the read of the write pointer before any reads
2318 	 * from the CSB, so that we do not see stale values. Without an rmb
2319 	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
2320 	 * we perform the READ_ONCE(*csb_write).
2321 	 */
2322 	rmb();
2323 
2324 	do {
2325 		bool promote;
2326 
2327 		if (++head == num_entries)
2328 			head = 0;
2329 
2330 		/*
2331 		 * We are flying near dragons again.
2332 		 *
2333 		 * We hold a reference to the request in execlist_port[]
2334 		 * but no more than that. We are operating in softirq
2335 		 * context and so cannot hold any mutex or sleep. That
2336 		 * prevents us stopping the requests we are processing
2337 		 * in port[] from being retired simultaneously (the
2338 		 * breadcrumb will be complete before we see the
2339 		 * context-switch). As we only hold the reference to the
2340 		 * request, any pointer chasing underneath the request
2341 		 * is subject to a potential use-after-free. Thus we
2342 		 * store all of the bookkeeping within port[] as
2343 		 * required, and avoid using unguarded pointers beneath
2344 		 * request itself. The same applies to the atomic
2345 		 * status notifier.
2346 		 */
2347 
2348 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2349 			     head, buf[2 * head + 0], buf[2 * head + 1]);
2350 
2351 		if (INTEL_GEN(engine->i915) >= 12)
2352 			promote = gen12_csb_parse(execlists, buf + 2 * head);
2353 		else
2354 			promote = gen8_csb_parse(execlists, buf + 2 * head);
2355 		if (promote) {
2356 			struct i915_request * const *old = execlists->active;
2357 
2358 			/* Point active to the new ELSP; prevent overwriting */
2359 			WRITE_ONCE(execlists->active, execlists->pending);
2360 
2361 			if (!inject_preempt_hang(execlists))
2362 				ring_set_paused(engine, 0);
2363 
2364 			/* cancel old inflight, prepare for switch */
2365 			trace_ports(execlists, "preempted", old);
2366 			while (*old)
2367 				execlists_schedule_out(*old++);
2368 
2369 			/* switch pending to inflight */
2370 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2371 			WRITE_ONCE(execlists->active,
2372 				   memcpy(execlists->inflight,
2373 					  execlists->pending,
2374 					  execlists_num_ports(execlists) *
2375 					  sizeof(*execlists->pending)));
2376 
2377 			WRITE_ONCE(execlists->pending[0], NULL);
2378 		} else {
2379 			GEM_BUG_ON(!*execlists->active);
2380 
2381 			/* port0 completed, advanced to port1 */
2382 			trace_ports(execlists, "completed", execlists->active);
2383 
2384 			/*
2385 			 * We rely on the hardware being strongly
2386 			 * ordered, that the breadcrumb write is
2387 			 * coherent (visible from the CPU) before the
2388 			 * user interrupt and CSB is processed.
2389 			 */
2390 			GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
2391 				   !reset_in_progress(execlists));
2392 			execlists_schedule_out(*execlists->active++);
2393 
2394 			GEM_BUG_ON(execlists->active - execlists->inflight >
2395 				   execlists_num_ports(execlists));
2396 		}
2397 	} while (head != tail);
2398 
2399 	execlists->csb_head = head;
2400 	set_timeslice(engine);
2401 
2402 	/*
2403 	 * Gen11 has proven to fail wrt global observation point between
2404 	 * entry and tail update, failing on the ordering and thus
2405 	 * we see an old entry in the context status buffer.
2406 	 *
2407 	 * Forcibly evict out entries for the next gpu csb update,
2408 	 * to increase the odds that we get a fresh entries with non
2409 	 * working hardware. The cost for doing so comes out mostly with
2410 	 * the wash as hardware, working or not, will need to do the
2411 	 * invalidation before.
2412 	 */
2413 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2414 }
2415 
__execlists_submission_tasklet(struct intel_engine_cs * const engine)2416 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2417 {
2418 	lockdep_assert_held(&engine->active.lock);
2419 	if (!engine->execlists.pending[0]) {
2420 		rcu_read_lock(); /* protect peeking at execlists->active */
2421 		execlists_dequeue(engine);
2422 		rcu_read_unlock();
2423 	}
2424 }
2425 
__execlists_hold(struct i915_request * rq)2426 static void __execlists_hold(struct i915_request *rq)
2427 {
2428 	LIST_HEAD(list);
2429 
2430 	do {
2431 		struct i915_dependency *p;
2432 
2433 		if (i915_request_is_active(rq))
2434 			__i915_request_unsubmit(rq);
2435 
2436 		RQ_TRACE(rq, "on hold\n");
2437 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2438 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2439 		i915_request_set_hold(rq);
2440 
2441 		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2442 			struct i915_request *w =
2443 				container_of(p->waiter, typeof(*w), sched);
2444 
2445 			/* Leave semaphores spinning on the other engines */
2446 			if (w->engine != rq->engine)
2447 				continue;
2448 
2449 			if (!i915_request_is_ready(w))
2450 				continue;
2451 
2452 			if (i915_request_completed(w))
2453 				continue;
2454 
2455 			if (i915_request_on_hold(rq))
2456 				continue;
2457 
2458 			list_move_tail(&w->sched.link, &list);
2459 		}
2460 
2461 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2462 	} while (rq);
2463 }
2464 
execlists_hold(struct intel_engine_cs * engine,struct i915_request * rq)2465 static bool execlists_hold(struct intel_engine_cs *engine,
2466 			   struct i915_request *rq)
2467 {
2468 	spin_lock_irq(&engine->active.lock);
2469 
2470 	if (i915_request_completed(rq)) { /* too late! */
2471 		rq = NULL;
2472 		goto unlock;
2473 	}
2474 
2475 	if (rq->engine != engine) { /* preempted virtual engine */
2476 		struct virtual_engine *ve = to_virtual_engine(rq->engine);
2477 
2478 		/*
2479 		 * intel_context_inflight() is only protected by virtue
2480 		 * of process_csb() being called only by the tasklet (or
2481 		 * directly from inside reset while the tasklet is suspended).
2482 		 * Assert that neither of those are allowed to run while we
2483 		 * poke at the request queues.
2484 		 */
2485 		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2486 
2487 		/*
2488 		 * An unsubmitted request along a virtual engine will
2489 		 * remain on the active (this) engine until we are able
2490 		 * to process the context switch away (and so mark the
2491 		 * context as no longer in flight). That cannot have happened
2492 		 * yet, otherwise we would not be hanging!
2493 		 */
2494 		spin_lock(&ve->base.active.lock);
2495 		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2496 		GEM_BUG_ON(ve->request != rq);
2497 		ve->request = NULL;
2498 		spin_unlock(&ve->base.active.lock);
2499 		i915_request_put(rq);
2500 
2501 		rq->engine = engine;
2502 	}
2503 
2504 	/*
2505 	 * Transfer this request onto the hold queue to prevent it
2506 	 * being resumbitted to HW (and potentially completed) before we have
2507 	 * released it. Since we may have already submitted following
2508 	 * requests, we need to remove those as well.
2509 	 */
2510 	GEM_BUG_ON(i915_request_on_hold(rq));
2511 	GEM_BUG_ON(rq->engine != engine);
2512 	__execlists_hold(rq);
2513 
2514 unlock:
2515 	spin_unlock_irq(&engine->active.lock);
2516 	return rq;
2517 }
2518 
hold_request(const struct i915_request * rq)2519 static bool hold_request(const struct i915_request *rq)
2520 {
2521 	struct i915_dependency *p;
2522 
2523 	/*
2524 	 * If one of our ancestors is on hold, we must also be on hold,
2525 	 * otherwise we will bypass it and execute before it.
2526 	 */
2527 	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
2528 		const struct i915_request *s =
2529 			container_of(p->signaler, typeof(*s), sched);
2530 
2531 		if (s->engine != rq->engine)
2532 			continue;
2533 
2534 		if (i915_request_on_hold(s))
2535 			return true;
2536 	}
2537 
2538 	return false;
2539 }
2540 
__execlists_unhold(struct i915_request * rq)2541 static void __execlists_unhold(struct i915_request *rq)
2542 {
2543 	LIST_HEAD(list);
2544 
2545 	do {
2546 		struct i915_dependency *p;
2547 
2548 		GEM_BUG_ON(!i915_request_on_hold(rq));
2549 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2550 
2551 		i915_request_clear_hold(rq);
2552 		list_move_tail(&rq->sched.link,
2553 			       i915_sched_lookup_priolist(rq->engine,
2554 							  rq_prio(rq)));
2555 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2556 		RQ_TRACE(rq, "hold release\n");
2557 
2558 		/* Also release any children on this engine that are ready */
2559 		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2560 			struct i915_request *w =
2561 				container_of(p->waiter, typeof(*w), sched);
2562 
2563 			if (w->engine != rq->engine)
2564 				continue;
2565 
2566 			if (!i915_request_on_hold(rq))
2567 				continue;
2568 
2569 			/* Check that no other parents are also on hold */
2570 			if (hold_request(rq))
2571 				continue;
2572 
2573 			list_move_tail(&w->sched.link, &list);
2574 		}
2575 
2576 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2577 	} while (rq);
2578 }
2579 
execlists_unhold(struct intel_engine_cs * engine,struct i915_request * rq)2580 static void execlists_unhold(struct intel_engine_cs *engine,
2581 			     struct i915_request *rq)
2582 {
2583 	spin_lock_irq(&engine->active.lock);
2584 
2585 	/*
2586 	 * Move this request back to the priority queue, and all of its
2587 	 * children and grandchildren that were suspended along with it.
2588 	 */
2589 	__execlists_unhold(rq);
2590 
2591 	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2592 		engine->execlists.queue_priority_hint = rq_prio(rq);
2593 		tasklet_hi_schedule(&engine->execlists.tasklet);
2594 	}
2595 
2596 	spin_unlock_irq(&engine->active.lock);
2597 }
2598 
2599 struct execlists_capture {
2600 	struct work_struct work;
2601 	struct i915_request *rq;
2602 	struct i915_gpu_coredump *error;
2603 };
2604 
execlists_capture_work(struct work_struct * work)2605 static void execlists_capture_work(struct work_struct *work)
2606 {
2607 	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2608 	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2609 	struct intel_engine_cs *engine = cap->rq->engine;
2610 	struct intel_gt_coredump *gt = cap->error->gt;
2611 	struct intel_engine_capture_vma *vma;
2612 
2613 	/* Compress all the objects attached to the request, slow! */
2614 	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2615 	if (vma) {
2616 		struct i915_vma_compress *compress =
2617 			i915_vma_capture_prepare(gt);
2618 
2619 		intel_engine_coredump_add_vma(gt->engine, vma, compress);
2620 		i915_vma_capture_finish(gt, compress);
2621 	}
2622 
2623 	gt->simulated = gt->engine->simulated;
2624 	cap->error->simulated = gt->simulated;
2625 
2626 	/* Publish the error state, and announce it to the world */
2627 	i915_error_state_store(cap->error);
2628 	i915_gpu_coredump_put(cap->error);
2629 
2630 	/* Return this request and all that depend upon it for signaling */
2631 	execlists_unhold(engine, cap->rq);
2632 	i915_request_put(cap->rq);
2633 
2634 	kfree(cap);
2635 }
2636 
capture_regs(struct intel_engine_cs * engine)2637 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2638 {
2639 	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2640 	struct execlists_capture *cap;
2641 
2642 	cap = kmalloc(sizeof(*cap), gfp);
2643 	if (!cap)
2644 		return NULL;
2645 
2646 	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2647 	if (!cap->error)
2648 		goto err_cap;
2649 
2650 	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2651 	if (!cap->error->gt)
2652 		goto err_gpu;
2653 
2654 	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2655 	if (!cap->error->gt->engine)
2656 		goto err_gt;
2657 
2658 	return cap;
2659 
2660 err_gt:
2661 	kfree(cap->error->gt);
2662 err_gpu:
2663 	kfree(cap->error);
2664 err_cap:
2665 	kfree(cap);
2666 	return NULL;
2667 }
2668 
execlists_capture(struct intel_engine_cs * engine)2669 static bool execlists_capture(struct intel_engine_cs *engine)
2670 {
2671 	struct execlists_capture *cap;
2672 
2673 	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2674 		return true;
2675 
2676 	/*
2677 	 * We need to _quickly_ capture the engine state before we reset.
2678 	 * We are inside an atomic section (softirq) here and we are delaying
2679 	 * the forced preemption event.
2680 	 */
2681 	cap = capture_regs(engine);
2682 	if (!cap)
2683 		return true;
2684 
2685 	cap->rq = execlists_active(&engine->execlists);
2686 	GEM_BUG_ON(!cap->rq);
2687 
2688 	rcu_read_lock();
2689 	cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2690 	cap->rq = i915_request_get_rcu(cap->rq);
2691 	rcu_read_unlock();
2692 	if (!cap->rq)
2693 		goto err_free;
2694 
2695 	/*
2696 	 * Remove the request from the execlists queue, and take ownership
2697 	 * of the request. We pass it to our worker who will _slowly_ compress
2698 	 * all the pages the _user_ requested for debugging their batch, after
2699 	 * which we return it to the queue for signaling.
2700 	 *
2701 	 * By removing them from the execlists queue, we also remove the
2702 	 * requests from being processed by __unwind_incomplete_requests()
2703 	 * during the intel_engine_reset(), and so they will *not* be replayed
2704 	 * afterwards.
2705 	 *
2706 	 * Note that because we have not yet reset the engine at this point,
2707 	 * it is possible for the request that we have identified as being
2708 	 * guilty, did in fact complete and we will then hit an arbitration
2709 	 * point allowing the outstanding preemption to succeed. The likelihood
2710 	 * of that is very low (as capturing of the engine registers should be
2711 	 * fast enough to run inside an irq-off atomic section!), so we will
2712 	 * simply hold that request accountable for being non-preemptible
2713 	 * long enough to force the reset.
2714 	 */
2715 	if (!execlists_hold(engine, cap->rq))
2716 		goto err_rq;
2717 
2718 	INIT_WORK(&cap->work, execlists_capture_work);
2719 	schedule_work(&cap->work);
2720 	return true;
2721 
2722 err_rq:
2723 	i915_request_put(cap->rq);
2724 err_free:
2725 	i915_gpu_coredump_put(cap->error);
2726 	kfree(cap);
2727 	return false;
2728 }
2729 
preempt_reset(struct intel_engine_cs * engine)2730 static noinline void preempt_reset(struct intel_engine_cs *engine)
2731 {
2732 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
2733 	unsigned long *lock = &engine->gt->reset.flags;
2734 
2735 	if (i915_modparams.reset < 3)
2736 		return;
2737 
2738 	if (test_and_set_bit(bit, lock))
2739 		return;
2740 
2741 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
2742 	tasklet_disable_nosync(&engine->execlists.tasklet);
2743 
2744 	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
2745 		     READ_ONCE(engine->props.preempt_timeout_ms),
2746 		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
2747 
2748 	ring_set_paused(engine, 1); /* Freeze the current request in place */
2749 	if (execlists_capture(engine))
2750 		intel_engine_reset(engine, "preemption time out");
2751 	else
2752 		ring_set_paused(engine, 0);
2753 
2754 	tasklet_enable(&engine->execlists.tasklet);
2755 	clear_and_wake_up_bit(bit, lock);
2756 }
2757 
preempt_timeout(const struct intel_engine_cs * const engine)2758 static bool preempt_timeout(const struct intel_engine_cs *const engine)
2759 {
2760 	const struct timer_list *t = &engine->execlists.preempt;
2761 
2762 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2763 		return false;
2764 
2765 	if (!timer_expired(t))
2766 		return false;
2767 
2768 	return READ_ONCE(engine->execlists.pending[0]);
2769 }
2770 
2771 /*
2772  * Check the unread Context Status Buffers and manage the submission of new
2773  * contexts to the ELSP accordingly.
2774  */
execlists_submission_tasklet(unsigned long data)2775 static void execlists_submission_tasklet(unsigned long data)
2776 {
2777 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
2778 	bool timeout = preempt_timeout(engine);
2779 
2780 	process_csb(engine);
2781 	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
2782 		unsigned long flags;
2783 
2784 		spin_lock_irqsave(&engine->active.lock, flags);
2785 		__execlists_submission_tasklet(engine);
2786 		spin_unlock_irqrestore(&engine->active.lock, flags);
2787 
2788 		/* Recheck after serialising with direct-submission */
2789 		if (timeout && preempt_timeout(engine))
2790 			preempt_reset(engine);
2791 	}
2792 }
2793 
__execlists_kick(struct intel_engine_execlists * execlists)2794 static void __execlists_kick(struct intel_engine_execlists *execlists)
2795 {
2796 	/* Kick the tasklet for some interrupt coalescing and reset handling */
2797 	tasklet_hi_schedule(&execlists->tasklet);
2798 }
2799 
2800 #define execlists_kick(t, member) \
2801 	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
2802 
execlists_timeslice(struct timer_list * timer)2803 static void execlists_timeslice(struct timer_list *timer)
2804 {
2805 	execlists_kick(timer, timer);
2806 }
2807 
execlists_preempt(struct timer_list * timer)2808 static void execlists_preempt(struct timer_list *timer)
2809 {
2810 	execlists_kick(timer, preempt);
2811 }
2812 
queue_request(struct intel_engine_cs * engine,struct i915_request * rq)2813 static void queue_request(struct intel_engine_cs *engine,
2814 			  struct i915_request *rq)
2815 {
2816 	GEM_BUG_ON(!list_empty(&rq->sched.link));
2817 	list_add_tail(&rq->sched.link,
2818 		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
2819 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2820 }
2821 
__submit_queue_imm(struct intel_engine_cs * engine)2822 static void __submit_queue_imm(struct intel_engine_cs *engine)
2823 {
2824 	struct intel_engine_execlists * const execlists = &engine->execlists;
2825 
2826 	if (reset_in_progress(execlists))
2827 		return; /* defer until we restart the engine following reset */
2828 
2829 	if (execlists->tasklet.func == execlists_submission_tasklet)
2830 		__execlists_submission_tasklet(engine);
2831 	else
2832 		tasklet_hi_schedule(&execlists->tasklet);
2833 }
2834 
submit_queue(struct intel_engine_cs * engine,const struct i915_request * rq)2835 static void submit_queue(struct intel_engine_cs *engine,
2836 			 const struct i915_request *rq)
2837 {
2838 	struct intel_engine_execlists *execlists = &engine->execlists;
2839 
2840 	if (rq_prio(rq) <= execlists->queue_priority_hint)
2841 		return;
2842 
2843 	execlists->queue_priority_hint = rq_prio(rq);
2844 	__submit_queue_imm(engine);
2845 }
2846 
ancestor_on_hold(const struct intel_engine_cs * engine,const struct i915_request * rq)2847 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
2848 			     const struct i915_request *rq)
2849 {
2850 	GEM_BUG_ON(i915_request_on_hold(rq));
2851 	return !list_empty(&engine->active.hold) && hold_request(rq);
2852 }
2853 
execlists_submit_request(struct i915_request * request)2854 static void execlists_submit_request(struct i915_request *request)
2855 {
2856 	struct intel_engine_cs *engine = request->engine;
2857 	unsigned long flags;
2858 
2859 	/* Will be called from irq-context when using foreign fences. */
2860 	spin_lock_irqsave(&engine->active.lock, flags);
2861 
2862 	if (unlikely(ancestor_on_hold(engine, request))) {
2863 		list_add_tail(&request->sched.link, &engine->active.hold);
2864 		i915_request_set_hold(request);
2865 	} else {
2866 		queue_request(engine, request);
2867 
2868 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2869 		GEM_BUG_ON(list_empty(&request->sched.link));
2870 
2871 		submit_queue(engine, request);
2872 	}
2873 
2874 	spin_unlock_irqrestore(&engine->active.lock, flags);
2875 }
2876 
__execlists_context_fini(struct intel_context * ce)2877 static void __execlists_context_fini(struct intel_context *ce)
2878 {
2879 	intel_ring_put(ce->ring);
2880 	i915_vma_put(ce->state);
2881 }
2882 
execlists_context_destroy(struct kref * kref)2883 static void execlists_context_destroy(struct kref *kref)
2884 {
2885 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2886 
2887 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2888 	GEM_BUG_ON(intel_context_is_pinned(ce));
2889 
2890 	if (ce->state)
2891 		__execlists_context_fini(ce);
2892 
2893 	intel_context_fini(ce);
2894 	intel_context_free(ce);
2895 }
2896 
2897 static void
set_redzone(void * vaddr,const struct intel_engine_cs * engine)2898 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2899 {
2900 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2901 		return;
2902 
2903 	vaddr += engine->context_size;
2904 
2905 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
2906 }
2907 
2908 static void
check_redzone(const void * vaddr,const struct intel_engine_cs * engine)2909 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2910 {
2911 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2912 		return;
2913 
2914 	vaddr += engine->context_size;
2915 
2916 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
2917 		dev_err_once(engine->i915->drm.dev,
2918 			     "%s context redzone overwritten!\n",
2919 			     engine->name);
2920 }
2921 
execlists_context_unpin(struct intel_context * ce)2922 static void execlists_context_unpin(struct intel_context *ce)
2923 {
2924 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
2925 		      ce->engine);
2926 
2927 	i915_gem_object_unpin_map(ce->state->obj);
2928 }
2929 
2930 static void
__execlists_update_reg_state(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 head)2931 __execlists_update_reg_state(const struct intel_context *ce,
2932 			     const struct intel_engine_cs *engine,
2933 			     u32 head)
2934 {
2935 	struct intel_ring *ring = ce->ring;
2936 	u32 *regs = ce->lrc_reg_state;
2937 
2938 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
2939 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
2940 
2941 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
2942 	regs[CTX_RING_HEAD] = head;
2943 	regs[CTX_RING_TAIL] = ring->tail;
2944 
2945 	/* RPCS */
2946 	if (engine->class == RENDER_CLASS) {
2947 		regs[CTX_R_PWR_CLK_STATE] =
2948 			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
2949 
2950 		i915_oa_init_reg_state(ce, engine);
2951 	}
2952 }
2953 
2954 static int
__execlists_context_pin(struct intel_context * ce,struct intel_engine_cs * engine)2955 __execlists_context_pin(struct intel_context *ce,
2956 			struct intel_engine_cs *engine)
2957 {
2958 	void *vaddr;
2959 
2960 	GEM_BUG_ON(!ce->state);
2961 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2962 
2963 	vaddr = i915_gem_object_pin_map(ce->state->obj,
2964 					i915_coherent_map_type(engine->i915) |
2965 					I915_MAP_OVERRIDE);
2966 	if (IS_ERR(vaddr))
2967 		return PTR_ERR(vaddr);
2968 
2969 	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
2970 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2971 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
2972 
2973 	return 0;
2974 }
2975 
execlists_context_pin(struct intel_context * ce)2976 static int execlists_context_pin(struct intel_context *ce)
2977 {
2978 	return __execlists_context_pin(ce, ce->engine);
2979 }
2980 
execlists_context_alloc(struct intel_context * ce)2981 static int execlists_context_alloc(struct intel_context *ce)
2982 {
2983 	return __execlists_context_alloc(ce, ce->engine);
2984 }
2985 
execlists_context_reset(struct intel_context * ce)2986 static void execlists_context_reset(struct intel_context *ce)
2987 {
2988 	CE_TRACE(ce, "reset\n");
2989 	GEM_BUG_ON(!intel_context_is_pinned(ce));
2990 
2991 	/*
2992 	 * Because we emit WA_TAIL_DWORDS there may be a disparity
2993 	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
2994 	 * that stored in context. As we only write new commands from
2995 	 * ce->ring->tail onwards, everything before that is junk. If the GPU
2996 	 * starts reading from its RING_HEAD from the context, it may try to
2997 	 * execute that junk and die.
2998 	 *
2999 	 * The contexts that are stilled pinned on resume belong to the
3000 	 * kernel, and are local to each engine. All other contexts will
3001 	 * have their head/tail sanitized upon pinning before use, so they
3002 	 * will never see garbage,
3003 	 *
3004 	 * So to avoid that we reset the context images upon resume. For
3005 	 * simplicity, we just zero everything out.
3006 	 */
3007 	intel_ring_reset(ce->ring, ce->ring->emit);
3008 
3009 	/* Scrub away the garbage */
3010 	execlists_init_reg_state(ce->lrc_reg_state,
3011 				 ce, ce->engine, ce->ring, true);
3012 	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3013 
3014 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
3015 }
3016 
3017 static const struct intel_context_ops execlists_context_ops = {
3018 	.alloc = execlists_context_alloc,
3019 
3020 	.pin = execlists_context_pin,
3021 	.unpin = execlists_context_unpin,
3022 
3023 	.enter = intel_context_enter_engine,
3024 	.exit = intel_context_exit_engine,
3025 
3026 	.reset = execlists_context_reset,
3027 	.destroy = execlists_context_destroy,
3028 };
3029 
gen8_emit_init_breadcrumb(struct i915_request * rq)3030 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3031 {
3032 	u32 *cs;
3033 
3034 	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
3035 
3036 	cs = intel_ring_begin(rq, 6);
3037 	if (IS_ERR(cs))
3038 		return PTR_ERR(cs);
3039 
3040 	/*
3041 	 * Check if we have been preempted before we even get started.
3042 	 *
3043 	 * After this point i915_request_started() reports true, even if
3044 	 * we get preempted and so are no longer running.
3045 	 */
3046 	*cs++ = MI_ARB_CHECK;
3047 	*cs++ = MI_NOOP;
3048 
3049 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3050 	*cs++ = i915_request_timeline(rq)->hwsp_offset;
3051 	*cs++ = 0;
3052 	*cs++ = rq->fence.seqno - 1;
3053 
3054 	intel_ring_advance(rq, cs);
3055 
3056 	/* Record the updated position of the request's payload */
3057 	rq->infix = intel_ring_offset(rq, cs);
3058 
3059 	return 0;
3060 }
3061 
execlists_request_alloc(struct i915_request * request)3062 static int execlists_request_alloc(struct i915_request *request)
3063 {
3064 	int ret;
3065 
3066 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
3067 
3068 	/*
3069 	 * Flush enough space to reduce the likelihood of waiting after
3070 	 * we start building the request - in which case we will just
3071 	 * have to repeat work.
3072 	 */
3073 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
3074 
3075 	/*
3076 	 * Note that after this point, we have committed to using
3077 	 * this request as it is being used to both track the
3078 	 * state of engine initialisation and liveness of the
3079 	 * golden renderstate above. Think twice before you try
3080 	 * to cancel/unwind this request now.
3081 	 */
3082 
3083 	/* Unconditionally invalidate GPU caches and TLBs. */
3084 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3085 	if (ret)
3086 		return ret;
3087 
3088 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3089 	return 0;
3090 }
3091 
3092 /*
3093  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3094  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3095  * but there is a slight complication as this is applied in WA batch where the
3096  * values are only initialized once so we cannot take register value at the
3097  * beginning and reuse it further; hence we save its value to memory, upload a
3098  * constant value with bit21 set and then we restore it back with the saved value.
3099  * To simplify the WA, a constant value is formed by using the default value
3100  * of this register. This shouldn't be a problem because we are only modifying
3101  * it for a short period and this batch in non-premptible. We can ofcourse
3102  * use additional instructions that read the actual value of the register
3103  * at that time and set our bit of interest but it makes the WA complicated.
3104  *
3105  * This WA is also required for Gen9 so extracting as a function avoids
3106  * code duplication.
3107  */
3108 static u32 *
gen8_emit_flush_coherentl3_wa(struct intel_engine_cs * engine,u32 * batch)3109 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3110 {
3111 	/* NB no one else is allowed to scribble over scratch + 256! */
3112 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3113 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3114 	*batch++ = intel_gt_scratch_offset(engine->gt,
3115 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3116 	*batch++ = 0;
3117 
3118 	*batch++ = MI_LOAD_REGISTER_IMM(1);
3119 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3120 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3121 
3122 	batch = gen8_emit_pipe_control(batch,
3123 				       PIPE_CONTROL_CS_STALL |
3124 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
3125 				       0);
3126 
3127 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3128 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3129 	*batch++ = intel_gt_scratch_offset(engine->gt,
3130 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3131 	*batch++ = 0;
3132 
3133 	return batch;
3134 }
3135 
3136 /*
3137  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3138  * initialized at the beginning and shared across all contexts but this field
3139  * helps us to have multiple batches at different offsets and select them based
3140  * on a criteria. At the moment this batch always start at the beginning of the page
3141  * and at this point we don't have multiple wa_ctx batch buffers.
3142  *
3143  * The number of WA applied are not known at the beginning; we use this field
3144  * to return the no of DWORDS written.
3145  *
3146  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3147  * so it adds NOOPs as padding to make it cacheline aligned.
3148  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3149  * makes a complete batch buffer.
3150  */
gen8_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)3151 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3152 {
3153 	/* WaDisableCtxRestoreArbitration:bdw,chv */
3154 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3155 
3156 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3157 	if (IS_BROADWELL(engine->i915))
3158 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3159 
3160 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3161 	/* Actual scratch location is at 128 bytes offset */
3162 	batch = gen8_emit_pipe_control(batch,
3163 				       PIPE_CONTROL_FLUSH_L3 |
3164 				       PIPE_CONTROL_STORE_DATA_INDEX |
3165 				       PIPE_CONTROL_CS_STALL |
3166 				       PIPE_CONTROL_QW_WRITE,
3167 				       LRC_PPHWSP_SCRATCH_ADDR);
3168 
3169 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3170 
3171 	/* Pad to end of cacheline */
3172 	while ((unsigned long)batch % CACHELINE_BYTES)
3173 		*batch++ = MI_NOOP;
3174 
3175 	/*
3176 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3177 	 * execution depends on the length specified in terms of cache lines
3178 	 * in the register CTX_RCS_INDIRECT_CTX
3179 	 */
3180 
3181 	return batch;
3182 }
3183 
3184 struct lri {
3185 	i915_reg_t reg;
3186 	u32 value;
3187 };
3188 
emit_lri(u32 * batch,const struct lri * lri,unsigned int count)3189 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3190 {
3191 	GEM_BUG_ON(!count || count > 63);
3192 
3193 	*batch++ = MI_LOAD_REGISTER_IMM(count);
3194 	do {
3195 		*batch++ = i915_mmio_reg_offset(lri->reg);
3196 		*batch++ = lri->value;
3197 	} while (lri++, --count);
3198 	*batch++ = MI_NOOP;
3199 
3200 	return batch;
3201 }
3202 
gen9_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)3203 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3204 {
3205 	static const struct lri lri[] = {
3206 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3207 		{
3208 			COMMON_SLICE_CHICKEN2,
3209 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3210 				       0),
3211 		},
3212 
3213 		/* BSpec: 11391 */
3214 		{
3215 			FF_SLICE_CHICKEN,
3216 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3217 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3218 		},
3219 
3220 		/* BSpec: 11299 */
3221 		{
3222 			_3D_CHICKEN3,
3223 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3224 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3225 		}
3226 	};
3227 
3228 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3229 
3230 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3231 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3232 
3233 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3234 	batch = gen8_emit_pipe_control(batch,
3235 				       PIPE_CONTROL_FLUSH_L3 |
3236 				       PIPE_CONTROL_STORE_DATA_INDEX |
3237 				       PIPE_CONTROL_CS_STALL |
3238 				       PIPE_CONTROL_QW_WRITE,
3239 				       LRC_PPHWSP_SCRATCH_ADDR);
3240 
3241 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3242 
3243 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
3244 	if (HAS_POOLED_EU(engine->i915)) {
3245 		/*
3246 		 * EU pool configuration is setup along with golden context
3247 		 * during context initialization. This value depends on
3248 		 * device type (2x6 or 3x6) and needs to be updated based
3249 		 * on which subslice is disabled especially for 2x6
3250 		 * devices, however it is safe to load default
3251 		 * configuration of 3x6 device instead of masking off
3252 		 * corresponding bits because HW ignores bits of a disabled
3253 		 * subslice and drops down to appropriate config. Please
3254 		 * see render_state_setup() in i915_gem_render_state.c for
3255 		 * possible configurations, to avoid duplication they are
3256 		 * not shown here again.
3257 		 */
3258 		*batch++ = GEN9_MEDIA_POOL_STATE;
3259 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
3260 		*batch++ = 0x00777000;
3261 		*batch++ = 0;
3262 		*batch++ = 0;
3263 		*batch++ = 0;
3264 	}
3265 
3266 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3267 
3268 	/* Pad to end of cacheline */
3269 	while ((unsigned long)batch % CACHELINE_BYTES)
3270 		*batch++ = MI_NOOP;
3271 
3272 	return batch;
3273 }
3274 
3275 static u32 *
gen10_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)3276 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3277 {
3278 	int i;
3279 
3280 	/*
3281 	 * WaPipeControlBefore3DStateSamplePattern: cnl
3282 	 *
3283 	 * Ensure the engine is idle prior to programming a
3284 	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
3285 	 */
3286 	batch = gen8_emit_pipe_control(batch,
3287 				       PIPE_CONTROL_CS_STALL,
3288 				       0);
3289 	/*
3290 	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3291 	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3292 	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3293 	 * confusing. Since gen8_emit_pipe_control() already advances the
3294 	 * batch by 6 dwords, we advance the other 10 here, completing a
3295 	 * cacheline. It's not clear if the workaround requires this padding
3296 	 * before other commands, or if it's just the regular padding we would
3297 	 * already have for the workaround bb, so leave it here for now.
3298 	 */
3299 	for (i = 0; i < 10; i++)
3300 		*batch++ = MI_NOOP;
3301 
3302 	/* Pad to end of cacheline */
3303 	while ((unsigned long)batch % CACHELINE_BYTES)
3304 		*batch++ = MI_NOOP;
3305 
3306 	return batch;
3307 }
3308 
3309 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3310 
lrc_setup_wa_ctx(struct intel_engine_cs * engine)3311 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3312 {
3313 	struct drm_i915_gem_object *obj;
3314 	struct i915_vma *vma;
3315 	int err;
3316 
3317 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3318 	if (IS_ERR(obj))
3319 		return PTR_ERR(obj);
3320 
3321 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3322 	if (IS_ERR(vma)) {
3323 		err = PTR_ERR(vma);
3324 		goto err;
3325 	}
3326 
3327 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
3328 	if (err)
3329 		goto err;
3330 
3331 	engine->wa_ctx.vma = vma;
3332 	return 0;
3333 
3334 err:
3335 	i915_gem_object_put(obj);
3336 	return err;
3337 }
3338 
lrc_destroy_wa_ctx(struct intel_engine_cs * engine)3339 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3340 {
3341 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3342 }
3343 
3344 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3345 
intel_init_workaround_bb(struct intel_engine_cs * engine)3346 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3347 {
3348 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3349 	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3350 					    &wa_ctx->per_ctx };
3351 	wa_bb_func_t wa_bb_fn[2];
3352 	struct page *page;
3353 	void *batch, *batch_ptr;
3354 	unsigned int i;
3355 	int ret;
3356 
3357 	if (engine->class != RENDER_CLASS)
3358 		return 0;
3359 
3360 	switch (INTEL_GEN(engine->i915)) {
3361 	case 12:
3362 	case 11:
3363 		return 0;
3364 	case 10:
3365 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
3366 		wa_bb_fn[1] = NULL;
3367 		break;
3368 	case 9:
3369 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
3370 		wa_bb_fn[1] = NULL;
3371 		break;
3372 	case 8:
3373 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
3374 		wa_bb_fn[1] = NULL;
3375 		break;
3376 	default:
3377 		MISSING_CASE(INTEL_GEN(engine->i915));
3378 		return 0;
3379 	}
3380 
3381 	ret = lrc_setup_wa_ctx(engine);
3382 	if (ret) {
3383 		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
3384 		return ret;
3385 	}
3386 
3387 	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3388 	batch = batch_ptr = kmap_atomic(page);
3389 
3390 	/*
3391 	 * Emit the two workaround batch buffers, recording the offset from the
3392 	 * start of the workaround batch buffer object for each and their
3393 	 * respective sizes.
3394 	 */
3395 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3396 		wa_bb[i]->offset = batch_ptr - batch;
3397 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3398 						  CACHELINE_BYTES))) {
3399 			ret = -EINVAL;
3400 			break;
3401 		}
3402 		if (wa_bb_fn[i])
3403 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3404 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3405 	}
3406 
3407 	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3408 
3409 	kunmap_atomic(batch);
3410 	if (ret)
3411 		lrc_destroy_wa_ctx(engine);
3412 
3413 	return ret;
3414 }
3415 
enable_execlists(struct intel_engine_cs * engine)3416 static void enable_execlists(struct intel_engine_cs *engine)
3417 {
3418 	u32 mode;
3419 
3420 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3421 
3422 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3423 
3424 	if (INTEL_GEN(engine->i915) >= 11)
3425 		mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
3426 	else
3427 		mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
3428 	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
3429 
3430 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3431 
3432 	ENGINE_WRITE_FW(engine,
3433 			RING_HWS_PGA,
3434 			i915_ggtt_offset(engine->status_page.vma));
3435 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
3436 
3437 	engine->context_tag = 0;
3438 }
3439 
unexpected_starting_state(struct intel_engine_cs * engine)3440 static bool unexpected_starting_state(struct intel_engine_cs *engine)
3441 {
3442 	bool unexpected = false;
3443 
3444 	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
3445 		DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
3446 		unexpected = true;
3447 	}
3448 
3449 	return unexpected;
3450 }
3451 
execlists_resume(struct intel_engine_cs * engine)3452 static int execlists_resume(struct intel_engine_cs *engine)
3453 {
3454 	intel_engine_apply_workarounds(engine);
3455 	intel_engine_apply_whitelist(engine);
3456 
3457 	intel_mocs_init_engine(engine);
3458 
3459 	intel_engine_reset_breadcrumbs(engine);
3460 
3461 	if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
3462 		struct drm_printer p = drm_debug_printer(__func__);
3463 
3464 		intel_engine_dump(engine, &p, NULL);
3465 	}
3466 
3467 	enable_execlists(engine);
3468 
3469 	return 0;
3470 }
3471 
execlists_reset_prepare(struct intel_engine_cs * engine)3472 static void execlists_reset_prepare(struct intel_engine_cs *engine)
3473 {
3474 	struct intel_engine_execlists * const execlists = &engine->execlists;
3475 	unsigned long flags;
3476 
3477 	ENGINE_TRACE(engine, "depth<-%d\n",
3478 		     atomic_read(&execlists->tasklet.count));
3479 
3480 	/*
3481 	 * Prevent request submission to the hardware until we have
3482 	 * completed the reset in i915_gem_reset_finish(). If a request
3483 	 * is completed by one engine, it may then queue a request
3484 	 * to a second via its execlists->tasklet *just* as we are
3485 	 * calling engine->resume() and also writing the ELSP.
3486 	 * Turning off the execlists->tasklet until the reset is over
3487 	 * prevents the race.
3488 	 */
3489 	__tasklet_disable_sync_once(&execlists->tasklet);
3490 	GEM_BUG_ON(!reset_in_progress(execlists));
3491 
3492 	/* And flush any current direct submission. */
3493 	spin_lock_irqsave(&engine->active.lock, flags);
3494 	spin_unlock_irqrestore(&engine->active.lock, flags);
3495 
3496 	/*
3497 	 * We stop engines, otherwise we might get failed reset and a
3498 	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
3499 	 * from system hang if batchbuffer is progressing when
3500 	 * the reset is issued, regardless of READY_TO_RESET ack.
3501 	 * Thus assume it is best to stop engines on all gens
3502 	 * where we have a gpu reset.
3503 	 *
3504 	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
3505 	 *
3506 	 * FIXME: Wa for more modern gens needs to be validated
3507 	 */
3508 	intel_engine_stop_cs(engine);
3509 }
3510 
reset_csb_pointers(struct intel_engine_cs * engine)3511 static void reset_csb_pointers(struct intel_engine_cs *engine)
3512 {
3513 	struct intel_engine_execlists * const execlists = &engine->execlists;
3514 	const unsigned int reset_value = execlists->csb_size - 1;
3515 
3516 	ring_set_paused(engine, 0);
3517 
3518 	/*
3519 	 * After a reset, the HW starts writing into CSB entry [0]. We
3520 	 * therefore have to set our HEAD pointer back one entry so that
3521 	 * the *first* entry we check is entry 0. To complicate this further,
3522 	 * as we don't wait for the first interrupt after reset, we have to
3523 	 * fake the HW write to point back to the last entry so that our
3524 	 * inline comparison of our cached head position against the last HW
3525 	 * write works even before the first interrupt.
3526 	 */
3527 	execlists->csb_head = reset_value;
3528 	WRITE_ONCE(*execlists->csb_write, reset_value);
3529 	wmb(); /* Make sure this is visible to HW (paranoia?) */
3530 
3531 	/*
3532 	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3533 	 * Bludgeon them with a mmio update to be sure.
3534 	 */
3535 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3536 		     reset_value << 8 | reset_value);
3537 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3538 
3539 	invalidate_csb_entries(&execlists->csb_status[0],
3540 			       &execlists->csb_status[reset_value]);
3541 }
3542 
__reset_stop_ring(u32 * regs,const struct intel_engine_cs * engine)3543 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
3544 {
3545 	int x;
3546 
3547 	x = lrc_ring_mi_mode(engine);
3548 	if (x != -1) {
3549 		regs[x + 1] &= ~STOP_RING;
3550 		regs[x + 1] |= STOP_RING << 16;
3551 	}
3552 }
3553 
__execlists_reset_reg_state(const struct intel_context * ce,const struct intel_engine_cs * engine)3554 static void __execlists_reset_reg_state(const struct intel_context *ce,
3555 					const struct intel_engine_cs *engine)
3556 {
3557 	u32 *regs = ce->lrc_reg_state;
3558 
3559 	__reset_stop_ring(regs, engine);
3560 }
3561 
__execlists_reset(struct intel_engine_cs * engine,bool stalled)3562 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
3563 {
3564 	struct intel_engine_execlists * const execlists = &engine->execlists;
3565 	struct intel_context *ce;
3566 	struct i915_request *rq;
3567 	u32 head;
3568 
3569 	mb(); /* paranoia: read the CSB pointers from after the reset */
3570 	clflush(execlists->csb_write);
3571 	mb();
3572 
3573 	process_csb(engine); /* drain preemption events */
3574 
3575 	/* Following the reset, we need to reload the CSB read/write pointers */
3576 	reset_csb_pointers(engine);
3577 
3578 	/*
3579 	 * Save the currently executing context, even if we completed
3580 	 * its request, it was still running at the time of the
3581 	 * reset and will have been clobbered.
3582 	 */
3583 	rq = execlists_active(execlists);
3584 	if (!rq)
3585 		goto unwind;
3586 
3587 	/* We still have requests in-flight; the engine should be active */
3588 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
3589 
3590 	ce = rq->context;
3591 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3592 
3593 	if (i915_request_completed(rq)) {
3594 		/* Idle context; tidy up the ring so we can restart afresh */
3595 		head = intel_ring_wrap(ce->ring, rq->tail);
3596 		goto out_replay;
3597 	}
3598 
3599 	/* Context has requests still in-flight; it should not be idle! */
3600 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
3601 	rq = active_request(ce->timeline, rq);
3602 	head = intel_ring_wrap(ce->ring, rq->head);
3603 	GEM_BUG_ON(head == ce->ring->tail);
3604 
3605 	/*
3606 	 * If this request hasn't started yet, e.g. it is waiting on a
3607 	 * semaphore, we need to avoid skipping the request or else we
3608 	 * break the signaling chain. However, if the context is corrupt
3609 	 * the request will not restart and we will be stuck with a wedged
3610 	 * device. It is quite often the case that if we issue a reset
3611 	 * while the GPU is loading the context image, that the context
3612 	 * image becomes corrupt.
3613 	 *
3614 	 * Otherwise, if we have not started yet, the request should replay
3615 	 * perfectly and we do not need to flag the result as being erroneous.
3616 	 */
3617 	if (!i915_request_started(rq))
3618 		goto out_replay;
3619 
3620 	/*
3621 	 * If the request was innocent, we leave the request in the ELSP
3622 	 * and will try to replay it on restarting. The context image may
3623 	 * have been corrupted by the reset, in which case we may have
3624 	 * to service a new GPU hang, but more likely we can continue on
3625 	 * without impact.
3626 	 *
3627 	 * If the request was guilty, we presume the context is corrupt
3628 	 * and have to at least restore the RING register in the context
3629 	 * image back to the expected values to skip over the guilty request.
3630 	 */
3631 	__i915_request_reset(rq, stalled);
3632 	if (!stalled)
3633 		goto out_replay;
3634 
3635 	/*
3636 	 * We want a simple context + ring to execute the breadcrumb update.
3637 	 * We cannot rely on the context being intact across the GPU hang,
3638 	 * so clear it and rebuild just what we need for the breadcrumb.
3639 	 * All pending requests for this context will be zapped, and any
3640 	 * future request will be after userspace has had the opportunity
3641 	 * to recreate its own state.
3642 	 */
3643 	GEM_BUG_ON(!intel_context_is_pinned(ce));
3644 	restore_default_state(ce, engine);
3645 
3646 out_replay:
3647 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
3648 		     head, ce->ring->tail);
3649 	__execlists_reset_reg_state(ce, engine);
3650 	__execlists_update_reg_state(ce, engine, head);
3651 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
3652 
3653 unwind:
3654 	/* Push back any incomplete requests for replay after the reset. */
3655 	cancel_port_requests(execlists);
3656 	__unwind_incomplete_requests(engine);
3657 }
3658 
execlists_reset_rewind(struct intel_engine_cs * engine,bool stalled)3659 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
3660 {
3661 	unsigned long flags;
3662 
3663 	ENGINE_TRACE(engine, "\n");
3664 
3665 	spin_lock_irqsave(&engine->active.lock, flags);
3666 
3667 	__execlists_reset(engine, stalled);
3668 
3669 	spin_unlock_irqrestore(&engine->active.lock, flags);
3670 }
3671 
nop_submission_tasklet(unsigned long data)3672 static void nop_submission_tasklet(unsigned long data)
3673 {
3674 	/* The driver is wedged; don't process any more events. */
3675 }
3676 
execlists_reset_cancel(struct intel_engine_cs * engine)3677 static void execlists_reset_cancel(struct intel_engine_cs *engine)
3678 {
3679 	struct intel_engine_execlists * const execlists = &engine->execlists;
3680 	struct i915_request *rq, *rn;
3681 	struct rb_node *rb;
3682 	unsigned long flags;
3683 
3684 	ENGINE_TRACE(engine, "\n");
3685 
3686 	/*
3687 	 * Before we call engine->cancel_requests(), we should have exclusive
3688 	 * access to the submission state. This is arranged for us by the
3689 	 * caller disabling the interrupt generation, the tasklet and other
3690 	 * threads that may then access the same state, giving us a free hand
3691 	 * to reset state. However, we still need to let lockdep be aware that
3692 	 * we know this state may be accessed in hardirq context, so we
3693 	 * disable the irq around this manipulation and we want to keep
3694 	 * the spinlock focused on its duties and not accidentally conflate
3695 	 * coverage to the submission's irq state. (Similarly, although we
3696 	 * shouldn't need to disable irq around the manipulation of the
3697 	 * submission's irq state, we also wish to remind ourselves that
3698 	 * it is irq state.)
3699 	 */
3700 	spin_lock_irqsave(&engine->active.lock, flags);
3701 
3702 	__execlists_reset(engine, true);
3703 
3704 	/* Mark all executing requests as skipped. */
3705 	list_for_each_entry(rq, &engine->active.requests, sched.link)
3706 		mark_eio(rq);
3707 
3708 	/* Flush the queued requests to the timeline list (for retiring). */
3709 	while ((rb = rb_first_cached(&execlists->queue))) {
3710 		struct i915_priolist *p = to_priolist(rb);
3711 		int i;
3712 
3713 		priolist_for_each_request_consume(rq, rn, p, i) {
3714 			mark_eio(rq);
3715 			__i915_request_submit(rq);
3716 		}
3717 
3718 		rb_erase_cached(&p->node, &execlists->queue);
3719 		i915_priolist_free(p);
3720 	}
3721 
3722 	/* On-hold requests will be flushed to timeline upon their release */
3723 	list_for_each_entry(rq, &engine->active.hold, sched.link)
3724 		mark_eio(rq);
3725 
3726 	/* Cancel all attached virtual engines */
3727 	while ((rb = rb_first_cached(&execlists->virtual))) {
3728 		struct virtual_engine *ve =
3729 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3730 
3731 		rb_erase_cached(rb, &execlists->virtual);
3732 		container_of(rb, struct ve_node, rb)->inserted = false;
3733 
3734 		spin_lock(&ve->base.active.lock);
3735 		rq = fetch_and_zero(&ve->request);
3736 		if (rq) {
3737 			mark_eio(rq);
3738 
3739 			rq->engine = engine;
3740 			__i915_request_submit(rq);
3741 			i915_request_put(rq);
3742 
3743 			ve->base.execlists.queue_priority_hint = INT_MIN;
3744 		}
3745 		spin_unlock(&ve->base.active.lock);
3746 	}
3747 
3748 	/* Remaining _unready_ requests will be nop'ed when submitted */
3749 
3750 	execlists->queue_priority_hint = INT_MIN;
3751 #ifdef __NetBSD__
3752 	i915_sched_init(execlists);
3753 	rb_tree_init(&execlists->virtual.rb_root.rbr_tree, &ve_tree_ops);
3754 #else
3755 	execlists->queue = RB_ROOT_CACHED;
3756 #endif
3757 
3758 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
3759 	execlists->tasklet.func = nop_submission_tasklet;
3760 
3761 	spin_unlock_irqrestore(&engine->active.lock, flags);
3762 }
3763 
execlists_reset_finish(struct intel_engine_cs * engine)3764 static void execlists_reset_finish(struct intel_engine_cs *engine)
3765 {
3766 	struct intel_engine_execlists * const execlists = &engine->execlists;
3767 
3768 	/*
3769 	 * After a GPU reset, we may have requests to replay. Do so now while
3770 	 * we still have the forcewake to be sure that the GPU is not allowed
3771 	 * to sleep before we restart and reload a context.
3772 	 */
3773 	GEM_BUG_ON(!reset_in_progress(execlists));
3774 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
3775 		execlists->tasklet.func(execlists->tasklet.data);
3776 
3777 	if (__tasklet_enable(&execlists->tasklet))
3778 		/* And kick in case we missed a new request submission. */
3779 		tasklet_hi_schedule(&execlists->tasklet);
3780 	ENGINE_TRACE(engine, "depth->%d\n",
3781 		     atomic_read(&execlists->tasklet.count));
3782 }
3783 
gen8_emit_bb_start_noarb(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)3784 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
3785 				    u64 offset, u32 len,
3786 				    const unsigned int flags)
3787 {
3788 	u32 *cs;
3789 
3790 	cs = intel_ring_begin(rq, 4);
3791 	if (IS_ERR(cs))
3792 		return PTR_ERR(cs);
3793 
3794 	/*
3795 	 * WaDisableCtxRestoreArbitration:bdw,chv
3796 	 *
3797 	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
3798 	 * particular all the gen that do not need the w/a at all!), if we
3799 	 * took care to make sure that on every switch into this context
3800 	 * (both ordinary and for preemption) that arbitrartion was enabled
3801 	 * we would be fine.  However, for gen8 there is another w/a that
3802 	 * requires us to not preempt inside GPGPU execution, so we keep
3803 	 * arbitration disabled for gen8 batches. Arbitration will be
3804 	 * re-enabled before we close the request
3805 	 * (engine->emit_fini_breadcrumb).
3806 	 */
3807 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3808 
3809 	/* FIXME(BDW+): Address space and security selectors. */
3810 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
3811 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3812 	*cs++ = lower_32_bits(offset);
3813 	*cs++ = upper_32_bits(offset);
3814 
3815 	intel_ring_advance(rq, cs);
3816 
3817 	return 0;
3818 }
3819 
gen8_emit_bb_start(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)3820 static int gen8_emit_bb_start(struct i915_request *rq,
3821 			      u64 offset, u32 len,
3822 			      const unsigned int flags)
3823 {
3824 	u32 *cs;
3825 
3826 	cs = intel_ring_begin(rq, 6);
3827 	if (IS_ERR(cs))
3828 		return PTR_ERR(cs);
3829 
3830 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3831 
3832 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
3833 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3834 	*cs++ = lower_32_bits(offset);
3835 	*cs++ = upper_32_bits(offset);
3836 
3837 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3838 	*cs++ = MI_NOOP;
3839 
3840 	intel_ring_advance(rq, cs);
3841 
3842 	return 0;
3843 }
3844 
gen8_logical_ring_enable_irq(struct intel_engine_cs * engine)3845 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3846 {
3847 	ENGINE_WRITE(engine, RING_IMR,
3848 		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
3849 	ENGINE_POSTING_READ(engine, RING_IMR);
3850 }
3851 
gen8_logical_ring_disable_irq(struct intel_engine_cs * engine)3852 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3853 {
3854 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3855 }
3856 
gen8_emit_flush(struct i915_request * request,u32 mode)3857 static int gen8_emit_flush(struct i915_request *request, u32 mode)
3858 {
3859 	u32 cmd, *cs;
3860 
3861 	cs = intel_ring_begin(request, 4);
3862 	if (IS_ERR(cs))
3863 		return PTR_ERR(cs);
3864 
3865 	cmd = MI_FLUSH_DW + 1;
3866 
3867 	/* We always require a command barrier so that subsequent
3868 	 * commands, such as breadcrumb interrupts, are strictly ordered
3869 	 * wrt the contents of the write cache being flushed to memory
3870 	 * (and thus being coherent from the CPU).
3871 	 */
3872 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3873 
3874 	if (mode & EMIT_INVALIDATE) {
3875 		cmd |= MI_INVALIDATE_TLB;
3876 		if (request->engine->class == VIDEO_DECODE_CLASS)
3877 			cmd |= MI_INVALIDATE_BSD;
3878 	}
3879 
3880 	*cs++ = cmd;
3881 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3882 	*cs++ = 0; /* upper addr */
3883 	*cs++ = 0; /* value */
3884 	intel_ring_advance(request, cs);
3885 
3886 	return 0;
3887 }
3888 
gen8_emit_flush_render(struct i915_request * request,u32 mode)3889 static int gen8_emit_flush_render(struct i915_request *request,
3890 				  u32 mode)
3891 {
3892 	bool vf_flush_wa = false, dc_flush_wa = false;
3893 	u32 *cs, flags = 0;
3894 	int len;
3895 
3896 	flags |= PIPE_CONTROL_CS_STALL;
3897 
3898 	if (mode & EMIT_FLUSH) {
3899 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3900 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3901 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3902 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
3903 	}
3904 
3905 	if (mode & EMIT_INVALIDATE) {
3906 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
3907 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3908 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3909 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3910 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3911 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3912 		flags |= PIPE_CONTROL_QW_WRITE;
3913 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3914 
3915 		/*
3916 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
3917 		 * pipe control.
3918 		 */
3919 		if (IS_GEN(request->i915, 9))
3920 			vf_flush_wa = true;
3921 
3922 		/* WaForGAMHang:kbl */
3923 		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
3924 			dc_flush_wa = true;
3925 	}
3926 
3927 	len = 6;
3928 
3929 	if (vf_flush_wa)
3930 		len += 6;
3931 
3932 	if (dc_flush_wa)
3933 		len += 12;
3934 
3935 	cs = intel_ring_begin(request, len);
3936 	if (IS_ERR(cs))
3937 		return PTR_ERR(cs);
3938 
3939 	if (vf_flush_wa)
3940 		cs = gen8_emit_pipe_control(cs, 0, 0);
3941 
3942 	if (dc_flush_wa)
3943 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
3944 					    0);
3945 
3946 	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3947 
3948 	if (dc_flush_wa)
3949 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
3950 
3951 	intel_ring_advance(request, cs);
3952 
3953 	return 0;
3954 }
3955 
gen11_emit_flush_render(struct i915_request * request,u32 mode)3956 static int gen11_emit_flush_render(struct i915_request *request,
3957 				   u32 mode)
3958 {
3959 	if (mode & EMIT_FLUSH) {
3960 		u32 *cs;
3961 		u32 flags = 0;
3962 
3963 		flags |= PIPE_CONTROL_CS_STALL;
3964 
3965 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3966 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3967 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3968 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3969 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
3970 		flags |= PIPE_CONTROL_QW_WRITE;
3971 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3972 
3973 		cs = intel_ring_begin(request, 6);
3974 		if (IS_ERR(cs))
3975 			return PTR_ERR(cs);
3976 
3977 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3978 		intel_ring_advance(request, cs);
3979 	}
3980 
3981 	if (mode & EMIT_INVALIDATE) {
3982 		u32 *cs;
3983 		u32 flags = 0;
3984 
3985 		flags |= PIPE_CONTROL_CS_STALL;
3986 
3987 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3988 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
3989 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3990 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3991 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3992 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3993 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3994 		flags |= PIPE_CONTROL_QW_WRITE;
3995 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3996 
3997 		cs = intel_ring_begin(request, 6);
3998 		if (IS_ERR(cs))
3999 			return PTR_ERR(cs);
4000 
4001 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4002 		intel_ring_advance(request, cs);
4003 	}
4004 
4005 	return 0;
4006 }
4007 
preparser_disable(bool state)4008 static u32 preparser_disable(bool state)
4009 {
4010 	return MI_ARB_CHECK | 1 << 8 | state;
4011 }
4012 
gen12_emit_flush_render(struct i915_request * request,u32 mode)4013 static int gen12_emit_flush_render(struct i915_request *request,
4014 				   u32 mode)
4015 {
4016 	if (mode & EMIT_FLUSH) {
4017 		u32 flags = 0;
4018 		u32 *cs;
4019 
4020 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4021 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4022 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4023 		/* Wa_1409600907:tgl */
4024 		flags |= PIPE_CONTROL_DEPTH_STALL;
4025 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4026 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4027 		flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4028 
4029 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4030 		flags |= PIPE_CONTROL_QW_WRITE;
4031 
4032 		flags |= PIPE_CONTROL_CS_STALL;
4033 
4034 		cs = intel_ring_begin(request, 6);
4035 		if (IS_ERR(cs))
4036 			return PTR_ERR(cs);
4037 
4038 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4039 		intel_ring_advance(request, cs);
4040 	}
4041 
4042 	if (mode & EMIT_INVALIDATE) {
4043 		u32 flags = 0;
4044 		u32 *cs;
4045 
4046 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4047 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4048 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4049 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4050 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4051 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4052 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4053 		flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
4054 
4055 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4056 		flags |= PIPE_CONTROL_QW_WRITE;
4057 
4058 		flags |= PIPE_CONTROL_CS_STALL;
4059 
4060 		cs = intel_ring_begin(request, 8);
4061 		if (IS_ERR(cs))
4062 			return PTR_ERR(cs);
4063 
4064 		/*
4065 		 * Prevent the pre-parser from skipping past the TLB
4066 		 * invalidate and loading a stale page for the batch
4067 		 * buffer / request payload.
4068 		 */
4069 		*cs++ = preparser_disable(true);
4070 
4071 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4072 
4073 		*cs++ = preparser_disable(false);
4074 		intel_ring_advance(request, cs);
4075 
4076 		/*
4077 		 * Wa_1604544889:tgl
4078 		 */
4079 		if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
4080 			flags = 0;
4081 			flags |= PIPE_CONTROL_CS_STALL;
4082 			flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4083 
4084 			flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4085 			flags |= PIPE_CONTROL_QW_WRITE;
4086 
4087 			cs = intel_ring_begin(request, 6);
4088 			if (IS_ERR(cs))
4089 				return PTR_ERR(cs);
4090 
4091 			cs = gen8_emit_pipe_control(cs, flags,
4092 						    LRC_PPHWSP_SCRATCH_ADDR);
4093 			intel_ring_advance(request, cs);
4094 		}
4095 	}
4096 
4097 	return 0;
4098 }
4099 
4100 /*
4101  * Reserve space for 2 NOOPs at the end of each request to be
4102  * used as a workaround for not being allowed to do lite
4103  * restore with HEAD==TAIL (WaIdleLiteRestore).
4104  */
gen8_emit_wa_tail(struct i915_request * request,u32 * cs)4105 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4106 {
4107 	/* Ensure there's always at least one preemption point per-request. */
4108 	*cs++ = MI_ARB_CHECK;
4109 	*cs++ = MI_NOOP;
4110 	request->wa_tail = intel_ring_offset(request, cs);
4111 
4112 	return cs;
4113 }
4114 
emit_preempt_busywait(struct i915_request * request,u32 * cs)4115 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4116 {
4117 	*cs++ = MI_SEMAPHORE_WAIT |
4118 		MI_SEMAPHORE_GLOBAL_GTT |
4119 		MI_SEMAPHORE_POLL |
4120 		MI_SEMAPHORE_SAD_EQ_SDD;
4121 	*cs++ = 0;
4122 	*cs++ = intel_hws_preempt_address(request->engine);
4123 	*cs++ = 0;
4124 
4125 	return cs;
4126 }
4127 
4128 static __always_inline u32*
gen8_emit_fini_breadcrumb_footer(struct i915_request * request,u32 * cs)4129 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
4130 				 u32 *cs)
4131 {
4132 	*cs++ = MI_USER_INTERRUPT;
4133 
4134 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4135 	if (intel_engine_has_semaphores(request->engine))
4136 		cs = emit_preempt_busywait(request, cs);
4137 
4138 	request->tail = intel_ring_offset(request, cs);
4139 	assert_ring_tail_valid(request->ring, request->tail);
4140 
4141 	return gen8_emit_wa_tail(request, cs);
4142 }
4143 
gen8_emit_fini_breadcrumb(struct i915_request * request,u32 * cs)4144 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4145 {
4146 	cs = gen8_emit_ggtt_write(cs,
4147 				  request->fence.seqno,
4148 				  i915_request_active_timeline(request)->hwsp_offset,
4149 				  0);
4150 
4151 	return gen8_emit_fini_breadcrumb_footer(request, cs);
4152 }
4153 
gen8_emit_fini_breadcrumb_rcs(struct i915_request * request,u32 * cs)4154 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4155 {
4156 	cs = gen8_emit_pipe_control(cs,
4157 				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4158 				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4159 				    PIPE_CONTROL_DC_FLUSH_ENABLE,
4160 				    0);
4161 
4162 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4163 	cs = gen8_emit_ggtt_write_rcs(cs,
4164 				      request->fence.seqno,
4165 				      i915_request_active_timeline(request)->hwsp_offset,
4166 				      PIPE_CONTROL_FLUSH_ENABLE |
4167 				      PIPE_CONTROL_CS_STALL);
4168 
4169 	return gen8_emit_fini_breadcrumb_footer(request, cs);
4170 }
4171 
4172 static u32 *
gen11_emit_fini_breadcrumb_rcs(struct i915_request * request,u32 * cs)4173 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4174 {
4175 	cs = gen8_emit_ggtt_write_rcs(cs,
4176 				      request->fence.seqno,
4177 				      i915_request_active_timeline(request)->hwsp_offset,
4178 				      PIPE_CONTROL_CS_STALL |
4179 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
4180 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4181 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4182 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
4183 				      PIPE_CONTROL_FLUSH_ENABLE);
4184 
4185 	return gen8_emit_fini_breadcrumb_footer(request, cs);
4186 }
4187 
4188 /*
4189  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4190  * flush and will continue pre-fetching the instructions after it before the
4191  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4192  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4193  * of the next request before the memory has been flushed, we're guaranteed that
4194  * we won't access the batch itself too early.
4195  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4196  * so, if the current request is modifying an instruction in the next request on
4197  * the same intel_context, we might pre-fetch and then execute the pre-update
4198  * instruction. To avoid this, the users of self-modifying code should either
4199  * disable the parser around the code emitting the memory writes, via a new flag
4200  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4201  * the in-kernel use-cases we've opted to use a separate context, see
4202  * reloc_gpu() as an example.
4203  * All the above applies only to the instructions themselves. Non-inline data
4204  * used by the instructions is not pre-fetched.
4205  */
4206 
gen12_emit_preempt_busywait(struct i915_request * request,u32 * cs)4207 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4208 {
4209 	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4210 		MI_SEMAPHORE_GLOBAL_GTT |
4211 		MI_SEMAPHORE_POLL |
4212 		MI_SEMAPHORE_SAD_EQ_SDD;
4213 	*cs++ = 0;
4214 	*cs++ = intel_hws_preempt_address(request->engine);
4215 	*cs++ = 0;
4216 	*cs++ = 0;
4217 	*cs++ = MI_NOOP;
4218 
4219 	return cs;
4220 }
4221 
4222 static __always_inline u32*
gen12_emit_fini_breadcrumb_footer(struct i915_request * request,u32 * cs)4223 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
4224 {
4225 	*cs++ = MI_USER_INTERRUPT;
4226 
4227 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4228 	if (intel_engine_has_semaphores(request->engine))
4229 		cs = gen12_emit_preempt_busywait(request, cs);
4230 
4231 	request->tail = intel_ring_offset(request, cs);
4232 	assert_ring_tail_valid(request->ring, request->tail);
4233 
4234 	return gen8_emit_wa_tail(request, cs);
4235 }
4236 
gen12_emit_fini_breadcrumb(struct i915_request * request,u32 * cs)4237 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4238 {
4239 	cs = gen8_emit_ggtt_write(cs,
4240 				  request->fence.seqno,
4241 				  i915_request_active_timeline(request)->hwsp_offset,
4242 				  0);
4243 
4244 	return gen12_emit_fini_breadcrumb_footer(request, cs);
4245 }
4246 
4247 static u32 *
gen12_emit_fini_breadcrumb_rcs(struct i915_request * request,u32 * cs)4248 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4249 {
4250 	cs = gen8_emit_ggtt_write_rcs(cs,
4251 				      request->fence.seqno,
4252 				      i915_request_active_timeline(request)->hwsp_offset,
4253 				      PIPE_CONTROL_CS_STALL |
4254 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
4255 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4256 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4257 				      /* Wa_1409600907:tgl */
4258 				      PIPE_CONTROL_DEPTH_STALL |
4259 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
4260 				      PIPE_CONTROL_FLUSH_ENABLE |
4261 				      PIPE_CONTROL_HDC_PIPELINE_FLUSH);
4262 
4263 	return gen12_emit_fini_breadcrumb_footer(request, cs);
4264 }
4265 
execlists_park(struct intel_engine_cs * engine)4266 static void execlists_park(struct intel_engine_cs *engine)
4267 {
4268 	cancel_timer(&engine->execlists.timer);
4269 	cancel_timer(&engine->execlists.preempt);
4270 }
4271 
intel_execlists_set_default_submission(struct intel_engine_cs * engine)4272 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4273 {
4274 	engine->submit_request = execlists_submit_request;
4275 	engine->schedule = i915_schedule;
4276 	engine->execlists.tasklet.func = execlists_submission_tasklet;
4277 
4278 	engine->reset.prepare = execlists_reset_prepare;
4279 	engine->reset.rewind = execlists_reset_rewind;
4280 	engine->reset.cancel = execlists_reset_cancel;
4281 	engine->reset.finish = execlists_reset_finish;
4282 
4283 	engine->park = execlists_park;
4284 	engine->unpark = NULL;
4285 
4286 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4287 	if (!intel_vgpu_active(engine->i915)) {
4288 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4289 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
4290 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4291 	}
4292 
4293 	if (INTEL_GEN(engine->i915) >= 12)
4294 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4295 
4296 	if (intel_engine_has_preemption(engine))
4297 		engine->emit_bb_start = gen8_emit_bb_start;
4298 	else
4299 		engine->emit_bb_start = gen8_emit_bb_start_noarb;
4300 }
4301 
execlists_shutdown(struct intel_engine_cs * engine)4302 static void execlists_shutdown(struct intel_engine_cs *engine)
4303 {
4304 	/* Synchronise with residual timers and any softirq they raise */
4305 	del_timer_sync(&engine->execlists.timer);
4306 	del_timer_sync(&engine->execlists.preempt);
4307 	tasklet_kill(&engine->execlists.tasklet);
4308 }
4309 
execlists_release(struct intel_engine_cs * engine)4310 static void execlists_release(struct intel_engine_cs *engine)
4311 {
4312 	execlists_shutdown(engine);
4313 
4314 	intel_engine_cleanup_common(engine);
4315 	lrc_destroy_wa_ctx(engine);
4316 }
4317 
4318 static void
logical_ring_default_vfuncs(struct intel_engine_cs * engine)4319 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
4320 {
4321 	/* Default vfuncs which can be overriden by each engine. */
4322 
4323 	engine->resume = execlists_resume;
4324 
4325 	engine->cops = &execlists_context_ops;
4326 	engine->request_alloc = execlists_request_alloc;
4327 
4328 	engine->emit_flush = gen8_emit_flush;
4329 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4330 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
4331 	if (INTEL_GEN(engine->i915) >= 12)
4332 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
4333 
4334 	engine->set_default_submission = intel_execlists_set_default_submission;
4335 
4336 	if (INTEL_GEN(engine->i915) < 11) {
4337 		engine->irq_enable = gen8_logical_ring_enable_irq;
4338 		engine->irq_disable = gen8_logical_ring_disable_irq;
4339 	} else {
4340 		/*
4341 		 * TODO: On Gen11 interrupt masks need to be clear
4342 		 * to allow C6 entry. Keep interrupts enabled at
4343 		 * and take the hit of generating extra interrupts
4344 		 * until a more refined solution exists.
4345 		 */
4346 	}
4347 }
4348 
4349 static inline void
logical_ring_default_irqs(struct intel_engine_cs * engine)4350 logical_ring_default_irqs(struct intel_engine_cs *engine)
4351 {
4352 	unsigned int shift = 0;
4353 
4354 	if (INTEL_GEN(engine->i915) < 11) {
4355 		const u8 irq_shifts[] = {
4356 			[RCS0]  = GEN8_RCS_IRQ_SHIFT,
4357 			[BCS0]  = GEN8_BCS_IRQ_SHIFT,
4358 			[VCS0]  = GEN8_VCS0_IRQ_SHIFT,
4359 			[VCS1]  = GEN8_VCS1_IRQ_SHIFT,
4360 			[VECS0] = GEN8_VECS_IRQ_SHIFT,
4361 		};
4362 
4363 		shift = irq_shifts[engine->id];
4364 	}
4365 
4366 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
4367 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
4368 }
4369 
rcs_submission_override(struct intel_engine_cs * engine)4370 static void rcs_submission_override(struct intel_engine_cs *engine)
4371 {
4372 	switch (INTEL_GEN(engine->i915)) {
4373 	case 12:
4374 		engine->emit_flush = gen12_emit_flush_render;
4375 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4376 		break;
4377 	case 11:
4378 		engine->emit_flush = gen11_emit_flush_render;
4379 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4380 		break;
4381 	default:
4382 		engine->emit_flush = gen8_emit_flush_render;
4383 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4384 		break;
4385 	}
4386 }
4387 
intel_execlists_submission_setup(struct intel_engine_cs * engine)4388 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
4389 {
4390 	struct intel_engine_execlists * const execlists = &engine->execlists;
4391 	struct drm_i915_private *i915 = engine->i915;
4392 	struct intel_uncore *uncore = engine->uncore;
4393 	u32 base = engine->mmio_base;
4394 
4395 	i915_sched_init(&engine->execlists);
4396 
4397 	tasklet_init(&engine->execlists.tasklet,
4398 		     execlists_submission_tasklet, (unsigned long)engine);
4399 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
4400 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
4401 
4402 	logical_ring_default_vfuncs(engine);
4403 	logical_ring_default_irqs(engine);
4404 
4405 	if (engine->class == RENDER_CLASS)
4406 		rcs_submission_override(engine);
4407 
4408 	if (intel_init_workaround_bb(engine))
4409 		/*
4410 		 * We continue even if we fail to initialize WA batch
4411 		 * because we only expect rare glitches but nothing
4412 		 * critical to prevent us from using GPU
4413 		 */
4414 		DRM_ERROR("WA batch buffer initialization failed\n");
4415 
4416 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
4417 #ifdef __NetBSD__
4418 		execlists->submit_reg = i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4419 		execlists->ctrl_reg = i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4420 		execlists->bsh = uncore->regs_bsh;
4421 		execlists->bst = uncore->regs_bst;
4422 #else
4423 		execlists->submit_reg = uncore->regs +
4424 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4425 		execlists->ctrl_reg = uncore->regs +
4426 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4427 #endif
4428 	} else {
4429 #ifdef __NetBSD__
4430 		execlists->submit_reg = i915_mmio_reg_offset(RING_ELSP(base));
4431 		execlists->bsh = uncore->regs_bsh;
4432 		execlists->bst = uncore->regs_bst;
4433 #else
4434 		execlists->submit_reg = uncore->regs +
4435 			i915_mmio_reg_offset(RING_ELSP(base));
4436 #endif
4437 	}
4438 
4439 	execlists->csb_status =
4440 		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
4441 
4442 	execlists->csb_write =
4443 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
4444 
4445 	if (INTEL_GEN(i915) < 11)
4446 		execlists->csb_size = GEN8_CSB_ENTRIES;
4447 	else
4448 		execlists->csb_size = GEN11_CSB_ENTRIES;
4449 
4450 	reset_csb_pointers(engine);
4451 
4452 	/* Finally, take ownership and responsibility for cleanup! */
4453 	engine->release = execlists_release;
4454 
4455 	return 0;
4456 }
4457 
intel_lr_indirect_ctx_offset(const struct intel_engine_cs * engine)4458 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
4459 {
4460 	u32 indirect_ctx_offset;
4461 
4462 	switch (INTEL_GEN(engine->i915)) {
4463 	default:
4464 		MISSING_CASE(INTEL_GEN(engine->i915));
4465 		/* fall through */
4466 	case 12:
4467 		indirect_ctx_offset =
4468 			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4469 		break;
4470 	case 11:
4471 		indirect_ctx_offset =
4472 			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4473 		break;
4474 	case 10:
4475 		indirect_ctx_offset =
4476 			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4477 		break;
4478 	case 9:
4479 		indirect_ctx_offset =
4480 			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4481 		break;
4482 	case 8:
4483 		indirect_ctx_offset =
4484 			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4485 		break;
4486 	}
4487 
4488 	return indirect_ctx_offset;
4489 }
4490 
4491 
init_common_reg_state(u32 * const regs,const struct intel_engine_cs * engine,const struct intel_ring * ring,bool inhibit)4492 static void init_common_reg_state(u32 * const regs,
4493 				  const struct intel_engine_cs *engine,
4494 				  const struct intel_ring *ring,
4495 				  bool inhibit)
4496 {
4497 	u32 ctl;
4498 
4499 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
4500 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
4501 	if (inhibit)
4502 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
4503 	if (INTEL_GEN(engine->i915) < 11)
4504 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
4505 					   CTX_CTRL_RS_CTX_ENABLE);
4506 	regs[CTX_CONTEXT_CONTROL] = ctl;
4507 
4508 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
4509 }
4510 
init_wa_bb_reg_state(u32 * const regs,const struct intel_engine_cs * engine,u32 pos_bb_per_ctx)4511 static void init_wa_bb_reg_state(u32 * const regs,
4512 				 const struct intel_engine_cs *engine,
4513 				 u32 pos_bb_per_ctx)
4514 {
4515 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
4516 
4517 	if (wa_ctx->per_ctx.size) {
4518 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4519 
4520 		regs[pos_bb_per_ctx] =
4521 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
4522 	}
4523 
4524 	if (wa_ctx->indirect_ctx.size) {
4525 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4526 
4527 		regs[pos_bb_per_ctx + 2] =
4528 			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
4529 			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
4530 
4531 		regs[pos_bb_per_ctx + 4] =
4532 			intel_lr_indirect_ctx_offset(engine) << 6;
4533 	}
4534 }
4535 
init_ppgtt_reg_state(u32 * regs,const struct i915_ppgtt * ppgtt)4536 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
4537 {
4538 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
4539 		/* 64b PPGTT (48bit canonical)
4540 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
4541 		 * other PDP Descriptors are ignored.
4542 		 */
4543 		ASSIGN_CTX_PML4(ppgtt, regs);
4544 	} else {
4545 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
4546 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
4547 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
4548 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
4549 	}
4550 }
4551 
vm_alias(struct i915_address_space * vm)4552 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
4553 {
4554 	if (i915_is_ggtt(vm))
4555 		return i915_vm_to_ggtt(vm)->alias;
4556 	else
4557 		return i915_vm_to_ppgtt(vm);
4558 }
4559 
execlists_init_reg_state(u32 * regs,const struct intel_context * ce,const struct intel_engine_cs * engine,const struct intel_ring * ring,bool inhibit)4560 static void execlists_init_reg_state(u32 *regs,
4561 				     const struct intel_context *ce,
4562 				     const struct intel_engine_cs *engine,
4563 				     const struct intel_ring *ring,
4564 				     bool inhibit)
4565 {
4566 	/*
4567 	 * A context is actually a big batch buffer with several
4568 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
4569 	 * values we are setting here are only for the first context restore:
4570 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
4571 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
4572 	 * we are not initializing here).
4573 	 *
4574 	 * Must keep consistent with virtual_update_register_offsets().
4575 	 */
4576 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
4577 
4578 	init_common_reg_state(regs, engine, ring, inhibit);
4579 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
4580 
4581 	init_wa_bb_reg_state(regs, engine,
4582 			     INTEL_GEN(engine->i915) >= 12 ?
4583 			     GEN12_CTX_BB_PER_CTX_PTR :
4584 			     CTX_BB_PER_CTX_PTR);
4585 
4586 	__reset_stop_ring(regs, engine);
4587 }
4588 
4589 static int
populate_lr_context(struct intel_context * ce,struct drm_i915_gem_object * ctx_obj,struct intel_engine_cs * engine,struct intel_ring * ring)4590 populate_lr_context(struct intel_context *ce,
4591 		    struct drm_i915_gem_object *ctx_obj,
4592 		    struct intel_engine_cs *engine,
4593 		    struct intel_ring *ring)
4594 {
4595 	bool inhibit = true;
4596 	void *vaddr;
4597 	int ret;
4598 
4599 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
4600 	if (IS_ERR(vaddr)) {
4601 		ret = PTR_ERR(vaddr);
4602 		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
4603 		return ret;
4604 	}
4605 
4606 	set_redzone(vaddr, engine);
4607 
4608 	if (engine->default_state) {
4609 		void *defaults;
4610 
4611 		defaults = i915_gem_object_pin_map(engine->default_state,
4612 						   I915_MAP_WB);
4613 		if (IS_ERR(defaults)) {
4614 			ret = PTR_ERR(defaults);
4615 			goto err_unpin_ctx;
4616 		}
4617 
4618 		memcpy(vaddr, defaults, engine->context_size);
4619 		i915_gem_object_unpin_map(engine->default_state);
4620 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
4621 		inhibit = false;
4622 	}
4623 
4624 	/* The second page of the context object contains some fields which must
4625 	 * be set up prior to the first execution. */
4626 	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
4627 				 ce, engine, ring, inhibit);
4628 
4629 	ret = 0;
4630 err_unpin_ctx:
4631 	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
4632 	i915_gem_object_unpin_map(ctx_obj);
4633 	return ret;
4634 }
4635 
__execlists_context_alloc(struct intel_context * ce,struct intel_engine_cs * engine)4636 static int __execlists_context_alloc(struct intel_context *ce,
4637 				     struct intel_engine_cs *engine)
4638 {
4639 	struct drm_i915_gem_object *ctx_obj;
4640 	struct intel_ring *ring;
4641 	struct i915_vma *vma;
4642 	u32 context_size;
4643 	int ret;
4644 
4645 	GEM_BUG_ON(ce->state);
4646 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
4647 
4648 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4649 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
4650 
4651 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
4652 	if (IS_ERR(ctx_obj))
4653 		return PTR_ERR(ctx_obj);
4654 
4655 	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
4656 	if (IS_ERR(vma)) {
4657 		ret = PTR_ERR(vma);
4658 		goto error_deref_obj;
4659 	}
4660 
4661 	if (!ce->timeline) {
4662 		struct intel_timeline *tl;
4663 
4664 		tl = intel_timeline_create(engine->gt, NULL);
4665 		if (IS_ERR(tl)) {
4666 			ret = PTR_ERR(tl);
4667 			goto error_deref_obj;
4668 		}
4669 
4670 		ce->timeline = tl;
4671 	}
4672 
4673 	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
4674 	if (IS_ERR(ring)) {
4675 		ret = PTR_ERR(ring);
4676 		goto error_deref_obj;
4677 	}
4678 
4679 	ret = populate_lr_context(ce, ctx_obj, engine, ring);
4680 	if (ret) {
4681 		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
4682 		goto error_ring_free;
4683 	}
4684 
4685 	ce->ring = ring;
4686 	ce->state = vma;
4687 
4688 	return 0;
4689 
4690 error_ring_free:
4691 	intel_ring_put(ring);
4692 error_deref_obj:
4693 	i915_gem_object_put(ctx_obj);
4694 	return ret;
4695 }
4696 
virtual_queue(struct virtual_engine * ve)4697 static struct list_head *virtual_queue(struct virtual_engine *ve)
4698 {
4699 	return &ve->base.execlists.default_priolist.requests[0];
4700 }
4701 
virtual_context_destroy(struct kref * kref)4702 static void virtual_context_destroy(struct kref *kref)
4703 {
4704 	struct virtual_engine *ve =
4705 		container_of(kref, typeof(*ve), context.ref);
4706 	unsigned int n;
4707 
4708 	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4709 	GEM_BUG_ON(ve->request);
4710 	GEM_BUG_ON(ve->context.inflight);
4711 
4712 	for (n = 0; n < ve->num_siblings; n++) {
4713 		struct intel_engine_cs *sibling = ve->siblings[n];
4714 		struct rb_node *node = &ve->nodes[sibling->id].rb;
4715 		unsigned long flags;
4716 
4717 		if (!ve->nodes[sibling->id].inserted)
4718 			continue;
4719 
4720 		spin_lock_irqsave(&sibling->active.lock, flags);
4721 
4722 		/* Detachment is lazily performed in the execlists tasklet */
4723 		if (ve->nodes[sibling->id].inserted) {
4724 			rb_erase_cached(node, &sibling->execlists.virtual);
4725 			ve->nodes[sibling->id].inserted = false;
4726 		}
4727 
4728 		spin_unlock_irqrestore(&sibling->active.lock, flags);
4729 	}
4730 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
4731 
4732 	if (ve->context.state)
4733 		__execlists_context_fini(&ve->context);
4734 	intel_context_fini(&ve->context);
4735 
4736 	intel_engine_fini_breadcrumbs(&ve->base);
4737 	spin_lock_destroy(&ve->base.active.lock);
4738 
4739 	kfree(ve->bonds);
4740 	kfree(ve);
4741 }
4742 
virtual_engine_initial_hint(struct virtual_engine * ve)4743 static void virtual_engine_initial_hint(struct virtual_engine *ve)
4744 {
4745 	int swp;
4746 
4747 	/*
4748 	 * Pick a random sibling on starting to help spread the load around.
4749 	 *
4750 	 * New contexts are typically created with exactly the same order
4751 	 * of siblings, and often started in batches. Due to the way we iterate
4752 	 * the array of sibling when submitting requests, sibling[0] is
4753 	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
4754 	 * randomised across the system, we also help spread the load by the
4755 	 * first engine we inspect being different each time.
4756 	 *
4757 	 * NB This does not force us to execute on this engine, it will just
4758 	 * typically be the first we inspect for submission.
4759 	 */
4760 	swp = prandom_u32_max(ve->num_siblings);
4761 	if (!swp)
4762 		return;
4763 
4764 	swap(ve->siblings[swp], ve->siblings[0]);
4765 	if (!intel_engine_has_relative_mmio(ve->siblings[0]))
4766 		virtual_update_register_offsets(ve->context.lrc_reg_state,
4767 						ve->siblings[0]);
4768 }
4769 
virtual_context_alloc(struct intel_context * ce)4770 static int virtual_context_alloc(struct intel_context *ce)
4771 {
4772 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4773 
4774 	return __execlists_context_alloc(ce, ve->siblings[0]);
4775 }
4776 
virtual_context_pin(struct intel_context * ce)4777 static int virtual_context_pin(struct intel_context *ce)
4778 {
4779 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4780 	int err;
4781 
4782 	/* Note: we must use a real engine class for setting up reg state */
4783 	err = __execlists_context_pin(ce, ve->siblings[0]);
4784 	if (err)
4785 		return err;
4786 
4787 	virtual_engine_initial_hint(ve);
4788 	return 0;
4789 }
4790 
virtual_context_enter(struct intel_context * ce)4791 static void virtual_context_enter(struct intel_context *ce)
4792 {
4793 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4794 	unsigned int n;
4795 
4796 	for (n = 0; n < ve->num_siblings; n++)
4797 		intel_engine_pm_get(ve->siblings[n]);
4798 
4799 	intel_timeline_enter(ce->timeline);
4800 }
4801 
virtual_context_exit(struct intel_context * ce)4802 static void virtual_context_exit(struct intel_context *ce)
4803 {
4804 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4805 	unsigned int n;
4806 
4807 	intel_timeline_exit(ce->timeline);
4808 
4809 	for (n = 0; n < ve->num_siblings; n++)
4810 		intel_engine_pm_put(ve->siblings[n]);
4811 }
4812 
4813 static const struct intel_context_ops virtual_context_ops = {
4814 	.alloc = virtual_context_alloc,
4815 
4816 	.pin = virtual_context_pin,
4817 	.unpin = execlists_context_unpin,
4818 
4819 	.enter = virtual_context_enter,
4820 	.exit = virtual_context_exit,
4821 
4822 	.destroy = virtual_context_destroy,
4823 };
4824 
virtual_submission_mask(struct virtual_engine * ve)4825 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
4826 {
4827 	struct i915_request *rq;
4828 	intel_engine_mask_t mask;
4829 
4830 	rq = READ_ONCE(ve->request);
4831 	if (!rq)
4832 		return 0;
4833 
4834 	/* The rq is ready for submission; rq->execution_mask is now stable. */
4835 	mask = rq->execution_mask;
4836 	if (unlikely(!mask)) {
4837 		/* Invalid selection, submit to a random engine in error */
4838 		i915_request_skip(rq, -ENODEV);
4839 		mask = ve->siblings[0]->mask;
4840 	}
4841 
4842 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
4843 		     rq->fence.context, rq->fence.seqno,
4844 		     mask, ve->base.execlists.queue_priority_hint);
4845 
4846 	return mask;
4847 }
4848 
virtual_submission_tasklet(unsigned long data)4849 static void virtual_submission_tasklet(unsigned long data)
4850 {
4851 	struct virtual_engine * const ve = (struct virtual_engine *)data;
4852 	const int prio = ve->base.execlists.queue_priority_hint;
4853 	intel_engine_mask_t mask;
4854 	unsigned int n;
4855 
4856 	rcu_read_lock();
4857 	mask = virtual_submission_mask(ve);
4858 	rcu_read_unlock();
4859 	if (unlikely(!mask))
4860 		return;
4861 
4862 #ifdef __NetBSD__
4863 	int s = splsoftserial(); /* block tasklets=softints */
4864 #else
4865 	local_irq_disable();
4866 #endif
4867 	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4868 		struct intel_engine_cs *sibling = ve->siblings[n];
4869 		struct ve_node * const node = &ve->nodes[sibling->id];
4870 		struct rb_node **parent, *rb;
4871 		bool first;
4872 
4873 		if (unlikely(!(mask & sibling->mask))) {
4874 			if (node->inserted) {
4875 				spin_lock(&sibling->active.lock);
4876 				rb_erase_cached(&node->rb,
4877 						&sibling->execlists.virtual);
4878 				node->inserted = false;
4879 				spin_unlock(&sibling->active.lock);
4880 			}
4881 			continue;
4882 		}
4883 
4884 		spin_lock(&sibling->active.lock);
4885 
4886 		if (node->inserted) {
4887 			/*
4888 			 * Cheat and avoid rebalancing the tree if we can
4889 			 * reuse this node in situ.
4890 			 */
4891 			first = rb_first_cached(&sibling->execlists.virtual) ==
4892 				&node->rb;
4893 			if (prio == node->prio || (prio > node->prio && first))
4894 				goto submit_engine;
4895 
4896 			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4897 			node->inserted = false;
4898 		}
4899 
4900 #ifdef __NetBSD__
4901 		__USE(parent);
4902 		__USE(rb);
4903 		struct ve_node *collision __diagused;
4904 		/* XXX kludge to get insertion order */
4905 		node->order = ve->order++;
4906 		collision = rb_tree_insert_node(
4907 			&sibling->execlists.virtual.rb_root.rbr_tree,
4908 			node);
4909 		KASSERT(collision == node);
4910 		node->inserted = true;
4911 		first = rb_tree_find_node_geq(
4912 			&sibling->execlists.virtual.rb_root.rbr_tree,
4913 			&node->prio) == node;
4914 #else
4915 		rb = NULL;
4916 		first = true;
4917 		parent = &sibling->execlists.virtual.rb_root.rb_node;
4918 		while (*parent) {
4919 			struct ve_node *other;
4920 
4921 			rb = *parent;
4922 			other = rb_entry(rb, typeof(*other), rb);
4923 			if (prio > other->prio) {
4924 				parent = &rb->rb_left;
4925 			} else {
4926 				parent = &rb->rb_right;
4927 				first = false;
4928 			}
4929 		}
4930 
4931 		rb_link_node(&node->rb, rb, parent);
4932 		rb_insert_color_cached(&node->rb,
4933 				       &sibling->execlists.virtual,
4934 				       first);
4935 #endif
4936 
4937 submit_engine:
4938 		GEM_BUG_ON(!node->inserted);
4939 		node->prio = prio;
4940 		if (first && prio > sibling->execlists.queue_priority_hint) {
4941 			sibling->execlists.queue_priority_hint = prio;
4942 			tasklet_hi_schedule(&sibling->execlists.tasklet);
4943 		}
4944 
4945 		spin_unlock(&sibling->active.lock);
4946 	}
4947 #ifdef __NetBSD__
4948 	splx(s);
4949 #else
4950 	local_irq_enable();
4951 #endif
4952 }
4953 
virtual_submit_request(struct i915_request * rq)4954 static void virtual_submit_request(struct i915_request *rq)
4955 {
4956 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
4957 	struct i915_request *old;
4958 	unsigned long flags;
4959 
4960 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
4961 		     rq->fence.context,
4962 		     rq->fence.seqno);
4963 
4964 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
4965 
4966 	spin_lock_irqsave(&ve->base.active.lock, flags);
4967 
4968 	old = ve->request;
4969 	if (old) { /* background completion event from preempt-to-busy */
4970 		GEM_BUG_ON(!i915_request_completed(old));
4971 		__i915_request_submit(old);
4972 		i915_request_put(old);
4973 	}
4974 
4975 	if (i915_request_completed(rq)) {
4976 		__i915_request_submit(rq);
4977 
4978 		ve->base.execlists.queue_priority_hint = INT_MIN;
4979 		ve->request = NULL;
4980 	} else {
4981 		ve->base.execlists.queue_priority_hint = rq_prio(rq);
4982 		ve->request = i915_request_get(rq);
4983 
4984 		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4985 		list_move_tail(&rq->sched.link, virtual_queue(ve));
4986 
4987 		tasklet_schedule(&ve->base.execlists.tasklet);
4988 	}
4989 
4990 	spin_unlock_irqrestore(&ve->base.active.lock, flags);
4991 }
4992 
4993 static struct ve_bond *
virtual_find_bond(struct virtual_engine * ve,const struct intel_engine_cs * master)4994 virtual_find_bond(struct virtual_engine *ve,
4995 		  const struct intel_engine_cs *master)
4996 {
4997 	int i;
4998 
4999 	for (i = 0; i < ve->num_bonds; i++) {
5000 		if (ve->bonds[i].master == master)
5001 			return &ve->bonds[i];
5002 	}
5003 
5004 	return NULL;
5005 }
5006 
5007 static void
virtual_bond_execute(struct i915_request * rq,struct dma_fence * signal)5008 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5009 {
5010 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5011 	intel_engine_mask_t allowed, exec;
5012 	struct ve_bond *bond;
5013 
5014 	allowed = ~to_request(signal)->engine->mask;
5015 
5016 	bond = virtual_find_bond(ve, to_request(signal)->engine);
5017 	if (bond)
5018 		allowed &= bond->sibling_mask;
5019 
5020 	/* Restrict the bonded request to run on only the available engines */
5021 	exec = READ_ONCE(rq->execution_mask);
5022 	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5023 		;
5024 
5025 	/* Prevent the master from being re-run on the bonded engines */
5026 	to_request(signal)->execution_mask &= ~allowed;
5027 }
5028 
5029 struct intel_context *
intel_execlists_create_virtual(struct intel_engine_cs ** siblings,unsigned int count)5030 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5031 			       unsigned int count)
5032 {
5033 	struct virtual_engine *ve;
5034 	unsigned int n;
5035 	int err;
5036 
5037 	if (count == 0)
5038 		return ERR_PTR(-EINVAL);
5039 
5040 	if (count == 1)
5041 		return intel_context_create(siblings[0]);
5042 
5043 	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5044 	if (!ve)
5045 		return ERR_PTR(-ENOMEM);
5046 
5047 	ve->base.i915 = siblings[0]->i915;
5048 	ve->base.gt = siblings[0]->gt;
5049 	ve->base.uncore = siblings[0]->uncore;
5050 	ve->base.id = -1;
5051 
5052 	ve->base.class = OTHER_CLASS;
5053 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5054 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5055 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5056 
5057 	/*
5058 	 * The decision on whether to submit a request using semaphores
5059 	 * depends on the saturated state of the engine. We only compute
5060 	 * this during HW submission of the request, and we need for this
5061 	 * state to be globally applied to all requests being submitted
5062 	 * to this engine. Virtual engines encompass more than one physical
5063 	 * engine and so we cannot accurately tell in advance if one of those
5064 	 * engines is already saturated and so cannot afford to use a semaphore
5065 	 * and be pessimized in priority for doing so -- if we are the only
5066 	 * context using semaphores after all other clients have stopped, we
5067 	 * will be starved on the saturated system. Such a global switch for
5068 	 * semaphores is less than ideal, but alas is the current compromise.
5069 	 */
5070 	ve->base.saturated = ALL_ENGINES;
5071 
5072 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5073 
5074 	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5075 	intel_engine_init_breadcrumbs(&ve->base);
5076 	intel_engine_init_execlists(&ve->base);
5077 
5078 	ve->base.cops = &virtual_context_ops;
5079 	ve->base.request_alloc = execlists_request_alloc;
5080 
5081 	ve->base.schedule = i915_schedule;
5082 	ve->base.submit_request = virtual_submit_request;
5083 	ve->base.bond_execute = virtual_bond_execute;
5084 
5085 	INIT_LIST_HEAD(virtual_queue(ve));
5086 	ve->base.execlists.queue_priority_hint = INT_MIN;
5087 	tasklet_init(&ve->base.execlists.tasklet,
5088 		     virtual_submission_tasklet,
5089 		     (unsigned long)ve);
5090 
5091 	intel_context_init(&ve->context, &ve->base);
5092 
5093 	for (n = 0; n < count; n++) {
5094 		struct intel_engine_cs *sibling = siblings[n];
5095 
5096 		GEM_BUG_ON(!is_power_of_2(sibling->mask));
5097 		if (sibling->mask & ve->base.mask) {
5098 			DRM_DEBUG("duplicate %s entry in load balancer\n",
5099 				  sibling->name);
5100 			err = -EINVAL;
5101 			goto err_put;
5102 		}
5103 
5104 		/*
5105 		 * The virtual engine implementation is tightly coupled to
5106 		 * the execlists backend -- we push out request directly
5107 		 * into a tree inside each physical engine. We could support
5108 		 * layering if we handle cloning of the requests and
5109 		 * submitting a copy into each backend.
5110 		 */
5111 		if (sibling->execlists.tasklet.func !=
5112 		    execlists_submission_tasklet) {
5113 			err = -ENODEV;
5114 			goto err_put;
5115 		}
5116 
5117 		GEM_BUG_ON(!ve->nodes[sibling->id].inserted);
5118 		ve->nodes[sibling->id].inserted = false;
5119 
5120 		ve->siblings[ve->num_siblings++] = sibling;
5121 		ve->base.mask |= sibling->mask;
5122 
5123 		/*
5124 		 * All physical engines must be compatible for their emission
5125 		 * functions (as we build the instructions during request
5126 		 * construction and do not alter them before submission
5127 		 * on the physical engine). We use the engine class as a guide
5128 		 * here, although that could be refined.
5129 		 */
5130 		if (ve->base.class != OTHER_CLASS) {
5131 			if (ve->base.class != sibling->class) {
5132 				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5133 					  sibling->class, ve->base.class);
5134 				err = -EINVAL;
5135 				goto err_put;
5136 			}
5137 			continue;
5138 		}
5139 
5140 		ve->base.class = sibling->class;
5141 		ve->base.uabi_class = sibling->uabi_class;
5142 		snprintf(ve->base.name, sizeof(ve->base.name),
5143 			 "v%dx%d", ve->base.class, count);
5144 		ve->base.context_size = sibling->context_size;
5145 
5146 		ve->base.emit_bb_start = sibling->emit_bb_start;
5147 		ve->base.emit_flush = sibling->emit_flush;
5148 		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5149 		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5150 		ve->base.emit_fini_breadcrumb_dw =
5151 			sibling->emit_fini_breadcrumb_dw;
5152 
5153 		ve->base.flags = sibling->flags;
5154 	}
5155 
5156 	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5157 
5158 	return &ve->context;
5159 
5160 err_put:
5161 	intel_context_put(&ve->context);
5162 	return ERR_PTR(err);
5163 }
5164 
5165 struct intel_context *
intel_execlists_clone_virtual(struct intel_engine_cs * src)5166 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5167 {
5168 	struct virtual_engine *se = to_virtual_engine(src);
5169 	struct intel_context *dst;
5170 
5171 	dst = intel_execlists_create_virtual(se->siblings,
5172 					     se->num_siblings);
5173 	if (IS_ERR(dst))
5174 		return dst;
5175 
5176 	if (se->num_bonds) {
5177 		struct virtual_engine *de = to_virtual_engine(dst->engine);
5178 
5179 		de->bonds = kmemdup(se->bonds,
5180 				    sizeof(*se->bonds) * se->num_bonds,
5181 				    GFP_KERNEL);
5182 		if (!de->bonds) {
5183 			intel_context_put(dst);
5184 			return ERR_PTR(-ENOMEM);
5185 		}
5186 
5187 		de->num_bonds = se->num_bonds;
5188 	}
5189 
5190 	return dst;
5191 }
5192 
intel_virtual_engine_attach_bond(struct intel_engine_cs * engine,const struct intel_engine_cs * master,const struct intel_engine_cs * sibling)5193 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5194 				     const struct intel_engine_cs *master,
5195 				     const struct intel_engine_cs *sibling)
5196 {
5197 	struct virtual_engine *ve = to_virtual_engine(engine);
5198 	struct ve_bond *bond;
5199 	int n;
5200 
5201 	/* Sanity check the sibling is part of the virtual engine */
5202 	for (n = 0; n < ve->num_siblings; n++)
5203 		if (sibling == ve->siblings[n])
5204 			break;
5205 	if (n == ve->num_siblings)
5206 		return -EINVAL;
5207 
5208 	bond = virtual_find_bond(ve, master);
5209 	if (bond) {
5210 		bond->sibling_mask |= sibling->mask;
5211 		return 0;
5212 	}
5213 
5214 	bond = krealloc(ve->bonds,
5215 			sizeof(*bond) * (ve->num_bonds + 1),
5216 			GFP_KERNEL);
5217 	if (!bond)
5218 		return -ENOMEM;
5219 
5220 	bond[ve->num_bonds].master = master;
5221 	bond[ve->num_bonds].sibling_mask = sibling->mask;
5222 
5223 	ve->bonds = bond;
5224 	ve->num_bonds++;
5225 
5226 	return 0;
5227 }
5228 
5229 struct intel_engine_cs *
intel_virtual_engine_get_sibling(struct intel_engine_cs * engine,unsigned int sibling)5230 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5231 				 unsigned int sibling)
5232 {
5233 	struct virtual_engine *ve = to_virtual_engine(engine);
5234 
5235 	if (sibling >= ve->num_siblings)
5236 		return NULL;
5237 
5238 	return ve->siblings[sibling];
5239 }
5240 
intel_execlists_show_requests(struct intel_engine_cs * engine,struct drm_printer * m,void (* show_request)(struct drm_printer * m,struct i915_request * rq,const char * prefix),unsigned int max)5241 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5242 				   struct drm_printer *m,
5243 				   void (*show_request)(struct drm_printer *m,
5244 							struct i915_request *rq,
5245 							const char *prefix),
5246 				   unsigned int max)
5247 {
5248 	const struct intel_engine_execlists *execlists = &engine->execlists;
5249 	struct i915_request *rq, *last;
5250 	unsigned long flags;
5251 	unsigned int count;
5252 	struct rb_node *rb;
5253 
5254 	spin_lock_irqsave(&engine->active.lock, flags);
5255 
5256 	last = NULL;
5257 	count = 0;
5258 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
5259 		if (count++ < max - 1)
5260 			show_request(m, rq, "\t\tE ");
5261 		else
5262 			last = rq;
5263 	}
5264 	if (last) {
5265 		if (count > max) {
5266 			drm_printf(m,
5267 				   "\t\t...skipping %d executing requests...\n",
5268 				   count - max);
5269 		}
5270 		show_request(m, last, "\t\tE ");
5271 	}
5272 
5273 	last = NULL;
5274 	count = 0;
5275 	if (execlists->queue_priority_hint != INT_MIN)
5276 		drm_printf(m, "\t\tQueue priority hint: %d\n",
5277 			   execlists->queue_priority_hint);
5278 	for (rb = rb_first_cached(&execlists->queue);
5279 	     rb;
5280 	     rb = rb_next2(&execlists->queue.rb_root, rb)) {
5281 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5282 		int i;
5283 
5284 		priolist_for_each_request(rq, p, i) {
5285 			if (count++ < max - 1)
5286 				show_request(m, rq, "\t\tQ ");
5287 			else
5288 				last = rq;
5289 		}
5290 	}
5291 	if (last) {
5292 		if (count > max) {
5293 			drm_printf(m,
5294 				   "\t\t...skipping %d queued requests...\n",
5295 				   count - max);
5296 		}
5297 		show_request(m, last, "\t\tQ ");
5298 	}
5299 
5300 	last = NULL;
5301 	count = 0;
5302 	for (rb = rb_first_cached(&execlists->virtual);
5303 	     rb;
5304 	     rb = rb_next2(&execlists->virtual.rb_root, rb)) {
5305 		struct virtual_engine *ve =
5306 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5307 		struct i915_request *rq = READ_ONCE(ve->request);
5308 
5309 		if (rq) {
5310 			if (count++ < max - 1)
5311 				show_request(m, rq, "\t\tV ");
5312 			else
5313 				last = rq;
5314 		}
5315 	}
5316 	if (last) {
5317 		if (count > max) {
5318 			drm_printf(m,
5319 				   "\t\t...skipping %d virtual requests...\n",
5320 				   count - max);
5321 		}
5322 		show_request(m, last, "\t\tV ");
5323 	}
5324 
5325 	spin_unlock_irqrestore(&engine->active.lock, flags);
5326 }
5327 
intel_lr_context_reset(struct intel_engine_cs * engine,struct intel_context * ce,u32 head,bool scrub)5328 void intel_lr_context_reset(struct intel_engine_cs *engine,
5329 			    struct intel_context *ce,
5330 			    u32 head,
5331 			    bool scrub)
5332 {
5333 	GEM_BUG_ON(!intel_context_is_pinned(ce));
5334 
5335 	/*
5336 	 * We want a simple context + ring to execute the breadcrumb update.
5337 	 * We cannot rely on the context being intact across the GPU hang,
5338 	 * so clear it and rebuild just what we need for the breadcrumb.
5339 	 * All pending requests for this context will be zapped, and any
5340 	 * future request will be after userspace has had the opportunity
5341 	 * to recreate its own state.
5342 	 */
5343 	if (scrub)
5344 		restore_default_state(ce, engine);
5345 
5346 	/* Rerun the request; its payload has been neutered (if guilty). */
5347 	__execlists_update_reg_state(ce, engine, head);
5348 }
5349 
5350 bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs * engine)5351 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5352 {
5353 	return engine->set_default_submission ==
5354 	       intel_execlists_set_default_submission;
5355 }
5356 
5357 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5358 #include "selftest_lrc.c"
5359 #endif
5360