1 /* $NetBSD: intel_guc_submission.c,v 1.4 2021/12/19 12:32:15 riastradh Exp $ */
2
3 // SPDX-License-Identifier: MIT
4 /*
5 * Copyright © 2014 Intel Corporation
6 */
7
8 #include <sys/cdefs.h>
9 __KERNEL_RCSID(0, "$NetBSD: intel_guc_submission.c,v 1.4 2021/12/19 12:32:15 riastradh Exp $");
10
11 #include <linux/circ_buf.h>
12
13 #include "gem/i915_gem_context.h"
14 #include "gt/intel_context.h"
15 #include "gt/intel_engine_pm.h"
16 #include "gt/intel_gt.h"
17 #include "gt/intel_gt_pm.h"
18 #include "gt/intel_lrc_reg.h"
19 #include "gt/intel_ring.h"
20
21 #include "intel_guc_submission.h"
22
23 #include "i915_drv.h"
24 #include "i915_trace.h"
25
26 /**
27 * DOC: GuC-based command submission
28 *
29 * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
30 * firmware is moving to an updated submission interface and we plan to
31 * turn submission back on when that lands. The below documentation (and related
32 * code) matches the old submission model and will be updated as part of the
33 * upgrade to the new flow.
34 *
35 * GuC stage descriptor:
36 * During initialization, the driver allocates a static pool of 1024 such
37 * descriptors, and shares them with the GuC. Currently, we only use one
38 * descriptor. This stage descriptor lets the GuC know about the workqueue and
39 * process descriptor. Theoretically, it also lets the GuC know about our HW
40 * contexts (context ID, etc...), but we actually employ a kind of submission
41 * where the GuC uses the LRCA sent via the work item instead. This is called
42 * a "proxy" submission.
43 *
44 * The Scratch registers:
45 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
46 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
47 * triggers an interrupt on the GuC via another register write (0xC4C8).
48 * Firmware writes a success/fail code back to the action register after
49 * processes the request. The kernel driver polls waiting for this update and
50 * then proceeds.
51 *
52 * Work Items:
53 * There are several types of work items that the host may place into a
54 * workqueue, each with its own requirements and limitations. Currently only
55 * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
56 * represents in-order queue. The kernel driver packs ring tail pointer and an
57 * ELSP context descriptor dword into Work Item.
58 * See guc_add_request()
59 *
60 */
61
to_priolist(struct rb_node * rb)62 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
63 {
64 return rb_entry(rb, struct i915_priolist, node);
65 }
66
__get_stage_desc(struct intel_guc * guc,u32 id)67 static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
68 {
69 struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
70
71 return &base[id];
72 }
73
guc_workqueue_create(struct intel_guc * guc)74 static int guc_workqueue_create(struct intel_guc *guc)
75 {
76 return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue,
77 &guc->workqueue_vaddr);
78 }
79
guc_workqueue_destroy(struct intel_guc * guc)80 static void guc_workqueue_destroy(struct intel_guc *guc)
81 {
82 i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP);
83 }
84
85 /*
86 * Initialise the process descriptor shared with the GuC firmware.
87 */
guc_proc_desc_create(struct intel_guc * guc)88 static int guc_proc_desc_create(struct intel_guc *guc)
89 {
90 const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc));
91
92 return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc,
93 &guc->proc_desc_vaddr);
94 }
95
guc_proc_desc_destroy(struct intel_guc * guc)96 static void guc_proc_desc_destroy(struct intel_guc *guc)
97 {
98 i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP);
99 }
100
guc_proc_desc_init(struct intel_guc * guc)101 static void guc_proc_desc_init(struct intel_guc *guc)
102 {
103 struct guc_process_desc *desc;
104
105 desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc));
106
107 /*
108 * XXX: pDoorbell and WQVBaseAddress are pointers in process address
109 * space for ring3 clients (set them as in mmap_ioctl) or kernel
110 * space for kernel clients (map on demand instead? May make debug
111 * easier to have it mapped).
112 */
113 desc->wq_base_addr = 0;
114 desc->db_base_addr = 0;
115
116 desc->wq_size_bytes = GUC_WQ_SIZE;
117 desc->wq_status = WQ_STATUS_ACTIVE;
118 desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
119 }
120
guc_proc_desc_fini(struct intel_guc * guc)121 static void guc_proc_desc_fini(struct intel_guc *guc)
122 {
123 memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc));
124 }
125
guc_stage_desc_pool_create(struct intel_guc * guc)126 static int guc_stage_desc_pool_create(struct intel_guc *guc)
127 {
128 u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
129 GUC_MAX_STAGE_DESCRIPTORS);
130
131 return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool,
132 &guc->stage_desc_pool_vaddr);
133 }
134
guc_stage_desc_pool_destroy(struct intel_guc * guc)135 static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
136 {
137 i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP);
138 }
139
140 /*
141 * Initialise/clear the stage descriptor shared with the GuC firmware.
142 *
143 * This descriptor tells the GuC where (in GGTT space) to find the important
144 * data structures related to work submission (process descriptor, write queue,
145 * etc).
146 */
guc_stage_desc_init(struct intel_guc * guc)147 static void guc_stage_desc_init(struct intel_guc *guc)
148 {
149 struct guc_stage_desc *desc;
150
151 /* we only use 1 stage desc, so hardcode it to 0 */
152 desc = __get_stage_desc(guc, 0);
153 memset(desc, 0, sizeof(*desc));
154
155 desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
156 GUC_STAGE_DESC_ATTR_KERNEL;
157
158 desc->stage_id = 0;
159 desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
160
161 desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc);
162 desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue);
163 desc->wq_size = GUC_WQ_SIZE;
164 }
165
guc_stage_desc_fini(struct intel_guc * guc)166 static void guc_stage_desc_fini(struct intel_guc *guc)
167 {
168 struct guc_stage_desc *desc;
169
170 desc = __get_stage_desc(guc, 0);
171 memset(desc, 0, sizeof(*desc));
172 }
173
174 /* Construct a Work Item and append it to the GuC's Work Queue */
guc_wq_item_append(struct intel_guc * guc,u32 target_engine,u32 context_desc,u32 ring_tail,u32 fence_id)175 static void guc_wq_item_append(struct intel_guc *guc,
176 u32 target_engine, u32 context_desc,
177 u32 ring_tail, u32 fence_id)
178 {
179 /* wqi_len is in DWords, and does not include the one-word header */
180 const size_t wqi_size = sizeof(struct guc_wq_item);
181 const u32 wqi_len = wqi_size / sizeof(u32) - 1;
182 struct guc_process_desc *desc = guc->proc_desc_vaddr;
183 struct guc_wq_item *wqi;
184 u32 wq_off;
185
186 lockdep_assert_held(&guc->wq_lock);
187
188 /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
189 * should not have the case where structure wqi is across page, neither
190 * wrapped to the beginning. This simplifies the implementation below.
191 *
192 * XXX: if not the case, we need save data to a temp wqi and copy it to
193 * workqueue buffer dw by dw.
194 */
195 BUILD_BUG_ON(wqi_size != 16);
196
197 /* We expect the WQ to be active if we're appending items to it */
198 GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE);
199
200 /* Free space is guaranteed. */
201 wq_off = READ_ONCE(desc->tail);
202 GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head),
203 GUC_WQ_SIZE) < wqi_size);
204 GEM_BUG_ON(wq_off & (wqi_size - 1));
205
206 wqi = guc->workqueue_vaddr + wq_off;
207
208 /* Now fill in the 4-word work queue item */
209 wqi->header = WQ_TYPE_INORDER |
210 (wqi_len << WQ_LEN_SHIFT) |
211 (target_engine << WQ_TARGET_SHIFT) |
212 WQ_NO_WCFLUSH_WAIT;
213 wqi->context_desc = context_desc;
214 wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
215 GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
216 wqi->fence_id = fence_id;
217
218 /* Make the update visible to GuC */
219 WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1));
220 }
221
guc_add_request(struct intel_guc * guc,struct i915_request * rq)222 static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
223 {
224 struct intel_engine_cs *engine = rq->engine;
225 u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
226 u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
227
228 guc_wq_item_append(guc, engine->guc_id, ctx_desc,
229 ring_tail, rq->fence.seqno);
230 }
231
232 /*
233 * When we're doing submissions using regular execlists backend, writing to
234 * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
235 * pinned in mappable aperture portion of GGTT are visible to command streamer.
236 * Writes done by GuC on our behalf are not guaranteeing such ordering,
237 * therefore, to ensure the flush, we're issuing a POSTING READ.
238 */
flush_ggtt_writes(struct i915_vma * vma)239 static void flush_ggtt_writes(struct i915_vma *vma)
240 {
241 if (i915_vma_is_map_and_fenceable(vma))
242 intel_uncore_posting_read_fw(vma->vm->gt->uncore,
243 GUC_STATUS);
244 }
245
guc_submit(struct intel_engine_cs * engine,struct i915_request ** out,struct i915_request ** end)246 static void guc_submit(struct intel_engine_cs *engine,
247 struct i915_request **out,
248 struct i915_request **end)
249 {
250 struct intel_guc *guc = &engine->gt->uc.guc;
251
252 spin_lock(&guc->wq_lock);
253
254 do {
255 struct i915_request *rq = *out++;
256
257 flush_ggtt_writes(rq->ring->vma);
258 guc_add_request(guc, rq);
259 } while (out != end);
260
261 spin_unlock(&guc->wq_lock);
262 }
263
rq_prio(const struct i915_request * rq)264 static inline int rq_prio(const struct i915_request *rq)
265 {
266 return rq->sched.attr.priority | __NO_PREEMPTION;
267 }
268
schedule_in(struct i915_request * rq,int idx)269 static struct i915_request *schedule_in(struct i915_request *rq, int idx)
270 {
271 trace_i915_request_in(rq, idx);
272
273 /*
274 * Currently we are not tracking the rq->context being inflight
275 * (ce->inflight = rq->engine). It is only used by the execlists
276 * backend at the moment, a similar counting strategy would be
277 * required if we generalise the inflight tracking.
278 */
279
280 __intel_gt_pm_get(rq->engine->gt);
281 return i915_request_get(rq);
282 }
283
schedule_out(struct i915_request * rq)284 static void schedule_out(struct i915_request *rq)
285 {
286 trace_i915_request_out(rq);
287
288 intel_gt_pm_put_async(rq->engine->gt);
289 i915_request_put(rq);
290 }
291
__guc_dequeue(struct intel_engine_cs * engine)292 static void __guc_dequeue(struct intel_engine_cs *engine)
293 {
294 struct intel_engine_execlists * const execlists = &engine->execlists;
295 struct i915_request **first = execlists->inflight;
296 struct i915_request ** const last_port = first + execlists->port_mask;
297 struct i915_request *last = first[0];
298 struct i915_request **port;
299 bool submit = false;
300 struct rb_node *rb;
301
302 lockdep_assert_held(&engine->active.lock);
303
304 if (last) {
305 if (*++first)
306 return;
307
308 last = NULL;
309 }
310
311 /*
312 * We write directly into the execlists->inflight queue and don't use
313 * the execlists->pending queue, as we don't have a distinct switch
314 * event.
315 */
316 port = first;
317 while ((rb = rb_first_cached(&execlists->queue))) {
318 struct i915_priolist *p = to_priolist(rb);
319 struct i915_request *rq, *rn;
320 int i;
321
322 priolist_for_each_request_consume(rq, rn, p, i) {
323 if (last && rq->context != last->context) {
324 if (port == last_port)
325 goto done;
326
327 *port = schedule_in(last,
328 port - execlists->inflight);
329 port++;
330 }
331
332 list_del_init(&rq->sched.link);
333 __i915_request_submit(rq);
334 submit = true;
335 last = rq;
336 }
337
338 rb_erase_cached(&p->node, &execlists->queue);
339 i915_priolist_free(p);
340 }
341 done:
342 execlists->queue_priority_hint =
343 rb ? to_priolist(rb)->priority : INT_MIN;
344 if (submit) {
345 *port = schedule_in(last, port - execlists->inflight);
346 *++port = NULL;
347 guc_submit(engine, first, port);
348 }
349 execlists->active = execlists->inflight;
350 }
351
guc_submission_tasklet(unsigned long data)352 static void guc_submission_tasklet(unsigned long data)
353 {
354 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
355 struct intel_engine_execlists * const execlists = &engine->execlists;
356 struct i915_request **port, *rq;
357 unsigned long flags;
358
359 spin_lock_irqsave(&engine->active.lock, flags);
360
361 for (port = execlists->inflight; (rq = *port); port++) {
362 if (!i915_request_completed(rq))
363 break;
364
365 schedule_out(rq);
366 }
367 if (port != execlists->inflight) {
368 int idx = port - execlists->inflight;
369 int rem = ARRAY_SIZE(execlists->inflight) - idx;
370 memmove(execlists->inflight, port, rem * sizeof(*port));
371 }
372
373 __guc_dequeue(engine);
374
375 spin_unlock_irqrestore(&engine->active.lock, flags);
376 }
377
guc_reset_prepare(struct intel_engine_cs * engine)378 static void guc_reset_prepare(struct intel_engine_cs *engine)
379 {
380 struct intel_engine_execlists * const execlists = &engine->execlists;
381
382 ENGINE_TRACE(engine, "\n");
383
384 /*
385 * Prevent request submission to the hardware until we have
386 * completed the reset in i915_gem_reset_finish(). If a request
387 * is completed by one engine, it may then queue a request
388 * to a second via its execlists->tasklet *just* as we are
389 * calling engine->init_hw() and also writing the ELSP.
390 * Turning off the execlists->tasklet until the reset is over
391 * prevents the race.
392 */
393 __tasklet_disable_sync_once(&execlists->tasklet);
394 }
395
396 static void
cancel_port_requests(struct intel_engine_execlists * const execlists)397 cancel_port_requests(struct intel_engine_execlists * const execlists)
398 {
399 struct i915_request * const *port, *rq;
400
401 /* Note we are only using the inflight and not the pending queue */
402
403 for (port = execlists->active; (rq = *port); port++)
404 schedule_out(rq);
405 execlists->active =
406 memset(execlists->inflight, 0, sizeof(execlists->inflight));
407 }
408
guc_reset_rewind(struct intel_engine_cs * engine,bool stalled)409 static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
410 {
411 struct intel_engine_execlists * const execlists = &engine->execlists;
412 struct i915_request *rq;
413 unsigned long flags;
414
415 spin_lock_irqsave(&engine->active.lock, flags);
416
417 cancel_port_requests(execlists);
418
419 /* Push back any incomplete requests for replay after the reset. */
420 rq = execlists_unwind_incomplete_requests(execlists);
421 if (!rq)
422 goto out_unlock;
423
424 if (!i915_request_started(rq))
425 stalled = false;
426
427 __i915_request_reset(rq, stalled);
428 intel_lr_context_reset(engine, rq->context, rq->head, stalled);
429
430 out_unlock:
431 spin_unlock_irqrestore(&engine->active.lock, flags);
432 }
433
guc_reset_cancel(struct intel_engine_cs * engine)434 static void guc_reset_cancel(struct intel_engine_cs *engine)
435 {
436 struct intel_engine_execlists * const execlists = &engine->execlists;
437 struct i915_request *rq, *rn;
438 struct rb_node *rb;
439 unsigned long flags;
440
441 ENGINE_TRACE(engine, "\n");
442
443 /*
444 * Before we call engine->cancel_requests(), we should have exclusive
445 * access to the submission state. This is arranged for us by the
446 * caller disabling the interrupt generation, the tasklet and other
447 * threads that may then access the same state, giving us a free hand
448 * to reset state. However, we still need to let lockdep be aware that
449 * we know this state may be accessed in hardirq context, so we
450 * disable the irq around this manipulation and we want to keep
451 * the spinlock focused on its duties and not accidentally conflate
452 * coverage to the submission's irq state. (Similarly, although we
453 * shouldn't need to disable irq around the manipulation of the
454 * submission's irq state, we also wish to remind ourselves that
455 * it is irq state.)
456 */
457 spin_lock_irqsave(&engine->active.lock, flags);
458
459 /* Cancel the requests on the HW and clear the ELSP tracker. */
460 cancel_port_requests(execlists);
461
462 /* Mark all executing requests as skipped. */
463 list_for_each_entry(rq, &engine->active.requests, sched.link) {
464 if (!i915_request_signaled(rq))
465 dma_fence_set_error(&rq->fence, -EIO);
466
467 i915_request_mark_complete(rq);
468 }
469
470 /* Flush the queued requests to the timeline list (for retiring). */
471 while ((rb = rb_first_cached(&execlists->queue))) {
472 struct i915_priolist *p = to_priolist(rb);
473 int i;
474
475 priolist_for_each_request_consume(rq, rn, p, i) {
476 list_del_init(&rq->sched.link);
477 __i915_request_submit(rq);
478 dma_fence_set_error(&rq->fence, -EIO);
479 i915_request_mark_complete(rq);
480 }
481
482 rb_erase_cached(&p->node, &execlists->queue);
483 i915_priolist_free(p);
484 }
485
486 /* Remaining _unready_ requests will be nop'ed when submitted */
487
488 execlists->queue_priority_hint = INT_MIN;
489 i915_sched_init(execlists);
490
491 spin_unlock_irqrestore(&engine->active.lock, flags);
492 }
493
guc_reset_finish(struct intel_engine_cs * engine)494 static void guc_reset_finish(struct intel_engine_cs *engine)
495 {
496 struct intel_engine_execlists * const execlists = &engine->execlists;
497
498 if (__tasklet_enable(&execlists->tasklet))
499 /* And kick in case we missed a new request submission. */
500 tasklet_hi_schedule(&execlists->tasklet);
501
502 ENGINE_TRACE(engine, "depth->%d\n",
503 atomic_read(&execlists->tasklet.count));
504 }
505
506 /*
507 * Everything below here is concerned with setup & teardown, and is
508 * therefore not part of the somewhat time-critical batch-submission
509 * path of guc_submit() above.
510 */
511
512 /*
513 * Set up the memory resources to be shared with the GuC (via the GGTT)
514 * at firmware loading time.
515 */
intel_guc_submission_init(struct intel_guc * guc)516 int intel_guc_submission_init(struct intel_guc *guc)
517 {
518 int ret;
519
520 if (guc->stage_desc_pool)
521 return 0;
522
523 ret = guc_stage_desc_pool_create(guc);
524 if (ret)
525 return ret;
526 /*
527 * Keep static analysers happy, let them know that we allocated the
528 * vma after testing that it didn't exist earlier.
529 */
530 GEM_BUG_ON(!guc->stage_desc_pool);
531
532 ret = guc_workqueue_create(guc);
533 if (ret)
534 goto err_pool;
535
536 ret = guc_proc_desc_create(guc);
537 if (ret)
538 goto err_workqueue;
539
540 spin_lock_init(&guc->wq_lock);
541
542 return 0;
543
544 err_workqueue:
545 guc_workqueue_destroy(guc);
546 err_pool:
547 guc_stage_desc_pool_destroy(guc);
548 return ret;
549 }
550
intel_guc_submission_fini(struct intel_guc * guc)551 void intel_guc_submission_fini(struct intel_guc *guc)
552 {
553 spin_lock_destroy(&guc->wq_lock);
554 if (guc->stage_desc_pool) {
555 guc_proc_desc_destroy(guc);
556 guc_workqueue_destroy(guc);
557 guc_stage_desc_pool_destroy(guc);
558 }
559 }
560
guc_interrupts_capture(struct intel_gt * gt)561 static void guc_interrupts_capture(struct intel_gt *gt)
562 {
563 struct intel_uncore *uncore = gt->uncore;
564 u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
565 u32 dmask = irqs << 16 | irqs;
566
567 GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
568
569 /* Don't handle the ctx switch interrupt in GuC submission mode */
570 intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0);
571 intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0);
572 }
573
guc_interrupts_release(struct intel_gt * gt)574 static void guc_interrupts_release(struct intel_gt *gt)
575 {
576 struct intel_uncore *uncore = gt->uncore;
577 u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
578 u32 dmask = irqs << 16 | irqs;
579
580 GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
581
582 /* Handle ctx switch interrupts again */
583 intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask);
584 intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask);
585 }
586
guc_set_default_submission(struct intel_engine_cs * engine)587 static void guc_set_default_submission(struct intel_engine_cs *engine)
588 {
589 /*
590 * We inherit a bunch of functions from execlists that we'd like
591 * to keep using:
592 *
593 * engine->submit_request = execlists_submit_request;
594 * engine->cancel_requests = execlists_cancel_requests;
595 * engine->schedule = execlists_schedule;
596 *
597 * But we need to override the actual submission backend in order
598 * to talk to the GuC.
599 */
600 intel_execlists_set_default_submission(engine);
601
602 engine->execlists.tasklet.func = guc_submission_tasklet;
603
604 /* do not use execlists park/unpark */
605 engine->park = engine->unpark = NULL;
606
607 engine->reset.prepare = guc_reset_prepare;
608 engine->reset.rewind = guc_reset_rewind;
609 engine->reset.cancel = guc_reset_cancel;
610 engine->reset.finish = guc_reset_finish;
611
612 engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
613 engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
614
615 /*
616 * For the breadcrumb irq to work we need the interrupts to stay
617 * enabled. However, on all platforms on which we'll have support for
618 * GuC submission we don't allow disabling the interrupts at runtime, so
619 * we're always safe with the current flow.
620 */
621 GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
622 }
623
intel_guc_submission_enable(struct intel_guc * guc)624 void intel_guc_submission_enable(struct intel_guc *guc)
625 {
626 struct intel_gt *gt = guc_to_gt(guc);
627 struct intel_engine_cs *engine;
628 enum intel_engine_id id;
629
630 /*
631 * We're using GuC work items for submitting work through GuC. Since
632 * we're coalescing multiple requests from a single context into a
633 * single work item prior to assigning it to execlist_port, we can
634 * never have more work items than the total number of ports (for all
635 * engines). The GuC firmware is controlling the HEAD of work queue,
636 * and it is guaranteed that it will remove the work item from the
637 * queue before our request is completed.
638 */
639 BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
640 sizeof(struct guc_wq_item) *
641 I915_NUM_ENGINES > GUC_WQ_SIZE);
642
643 guc_proc_desc_init(guc);
644 guc_stage_desc_init(guc);
645
646 /* Take over from manual control of ELSP (execlists) */
647 guc_interrupts_capture(gt);
648
649 for_each_engine(engine, gt, id) {
650 engine->set_default_submission = guc_set_default_submission;
651 engine->set_default_submission(engine);
652 }
653 }
654
intel_guc_submission_disable(struct intel_guc * guc)655 void intel_guc_submission_disable(struct intel_guc *guc)
656 {
657 struct intel_gt *gt = guc_to_gt(guc);
658
659 GEM_BUG_ON(gt->awake); /* GT should be parked first */
660
661 /* Note: By the time we're here, GuC may have already been reset */
662
663 guc_interrupts_release(gt);
664
665 guc_stage_desc_fini(guc);
666 guc_proc_desc_fini(guc);
667 }
668
__guc_submission_support(struct intel_guc * guc)669 static bool __guc_submission_support(struct intel_guc *guc)
670 {
671 /* XXX: GuC submission is unavailable for now */
672 return false;
673
674 if (!intel_guc_is_supported(guc))
675 return false;
676
677 return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION;
678 }
679
intel_guc_submission_init_early(struct intel_guc * guc)680 void intel_guc_submission_init_early(struct intel_guc *guc)
681 {
682 guc->submission_supported = __guc_submission_support(guc);
683 }
684
intel_engine_in_guc_submission_mode(const struct intel_engine_cs * engine)685 bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
686 {
687 return engine->set_default_submission == guc_set_default_submission;
688 }
689