1 /* $NetBSD: selftest_lrc.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright © 2018 Intel Corporation
7 */
8
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: selftest_lrc.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
11
12 #include <linux/prime_numbers.h>
13
14 #include "gem/i915_gem_pm.h"
15 #include "gt/intel_engine_heartbeat.h"
16 #include "gt/intel_reset.h"
17
18 #include "i915_selftest.h"
19 #include "selftests/i915_random.h"
20 #include "selftests/igt_flush_test.h"
21 #include "selftests/igt_live_test.h"
22 #include "selftests/igt_spinner.h"
23 #include "selftests/lib_sw_fence.h"
24
25 #include "gem/selftests/igt_gem_utils.h"
26 #include "gem/selftests/mock_context.h"
27
28 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
29 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
30
create_scratch(struct intel_gt * gt)31 static struct i915_vma *create_scratch(struct intel_gt *gt)
32 {
33 struct drm_i915_gem_object *obj;
34 struct i915_vma *vma;
35 int err;
36
37 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
38 if (IS_ERR(obj))
39 return ERR_CAST(obj);
40
41 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
42
43 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
44 if (IS_ERR(vma)) {
45 i915_gem_object_put(obj);
46 return vma;
47 }
48
49 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
50 if (err) {
51 i915_gem_object_put(obj);
52 return ERR_PTR(err);
53 }
54
55 return vma;
56 }
57
engine_heartbeat_disable(struct intel_engine_cs * engine,unsigned long * saved)58 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
59 unsigned long *saved)
60 {
61 *saved = engine->props.heartbeat_interval_ms;
62 engine->props.heartbeat_interval_ms = 0;
63
64 intel_engine_pm_get(engine);
65 intel_engine_park_heartbeat(engine);
66 }
67
engine_heartbeat_enable(struct intel_engine_cs * engine,unsigned long saved)68 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
69 unsigned long saved)
70 {
71 intel_engine_pm_put(engine);
72
73 engine->props.heartbeat_interval_ms = saved;
74 }
75
live_sanitycheck(void * arg)76 static int live_sanitycheck(void *arg)
77 {
78 struct intel_gt *gt = arg;
79 struct intel_engine_cs *engine;
80 enum intel_engine_id id;
81 struct igt_spinner spin;
82 int err = 0;
83
84 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
85 return 0;
86
87 if (igt_spinner_init(&spin, gt))
88 return -ENOMEM;
89
90 for_each_engine(engine, gt, id) {
91 struct intel_context *ce;
92 struct i915_request *rq;
93
94 ce = intel_context_create(engine);
95 if (IS_ERR(ce)) {
96 err = PTR_ERR(ce);
97 break;
98 }
99
100 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
101 if (IS_ERR(rq)) {
102 err = PTR_ERR(rq);
103 goto out_ctx;
104 }
105
106 i915_request_add(rq);
107 if (!igt_wait_for_spinner(&spin, rq)) {
108 GEM_TRACE("spinner failed to start\n");
109 GEM_TRACE_DUMP();
110 intel_gt_set_wedged(gt);
111 err = -EIO;
112 goto out_ctx;
113 }
114
115 igt_spinner_end(&spin);
116 if (igt_flush_test(gt->i915)) {
117 err = -EIO;
118 goto out_ctx;
119 }
120
121 out_ctx:
122 intel_context_put(ce);
123 if (err)
124 break;
125 }
126
127 igt_spinner_fini(&spin);
128 return err;
129 }
130
live_unlite_restore(struct intel_gt * gt,int prio)131 static int live_unlite_restore(struct intel_gt *gt, int prio)
132 {
133 struct intel_engine_cs *engine;
134 enum intel_engine_id id;
135 struct igt_spinner spin;
136 int err = -ENOMEM;
137
138 /*
139 * Check that we can correctly context switch between 2 instances
140 * on the same engine from the same parent context.
141 */
142
143 if (igt_spinner_init(&spin, gt))
144 return err;
145
146 err = 0;
147 for_each_engine(engine, gt, id) {
148 struct intel_context *ce[2] = {};
149 struct i915_request *rq[2];
150 struct igt_live_test t;
151 unsigned long saved;
152 int n;
153
154 if (prio && !intel_engine_has_preemption(engine))
155 continue;
156
157 if (!intel_engine_can_store_dword(engine))
158 continue;
159
160 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
161 err = -EIO;
162 break;
163 }
164 engine_heartbeat_disable(engine, &saved);
165
166 for (n = 0; n < ARRAY_SIZE(ce); n++) {
167 struct intel_context *tmp;
168
169 tmp = intel_context_create(engine);
170 if (IS_ERR(tmp)) {
171 err = PTR_ERR(tmp);
172 goto err_ce;
173 }
174
175 err = intel_context_pin(tmp);
176 if (err) {
177 intel_context_put(tmp);
178 goto err_ce;
179 }
180
181 /*
182 * Setup the pair of contexts such that if we
183 * lite-restore using the RING_TAIL from ce[1] it
184 * will execute garbage from ce[0]->ring.
185 */
186 memset(tmp->ring->vaddr,
187 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
188 tmp->ring->vma->size);
189
190 ce[n] = tmp;
191 }
192 GEM_BUG_ON(!ce[1]->ring->size);
193 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
194 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
195
196 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
197 if (IS_ERR(rq[0])) {
198 err = PTR_ERR(rq[0]);
199 goto err_ce;
200 }
201
202 i915_request_get(rq[0]);
203 i915_request_add(rq[0]);
204 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
205
206 if (!igt_wait_for_spinner(&spin, rq[0])) {
207 i915_request_put(rq[0]);
208 goto err_ce;
209 }
210
211 rq[1] = i915_request_create(ce[1]);
212 if (IS_ERR(rq[1])) {
213 err = PTR_ERR(rq[1]);
214 i915_request_put(rq[0]);
215 goto err_ce;
216 }
217
218 if (!prio) {
219 /*
220 * Ensure we do the switch to ce[1] on completion.
221 *
222 * rq[0] is already submitted, so this should reduce
223 * to a no-op (a wait on a request on the same engine
224 * uses the submit fence, not the completion fence),
225 * but it will install a dependency on rq[1] for rq[0]
226 * that will prevent the pair being reordered by
227 * timeslicing.
228 */
229 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
230 }
231
232 i915_request_get(rq[1]);
233 i915_request_add(rq[1]);
234 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
235 i915_request_put(rq[0]);
236
237 if (prio) {
238 struct i915_sched_attr attr = {
239 .priority = prio,
240 };
241
242 /* Alternatively preempt the spinner with ce[1] */
243 engine->schedule(rq[1], &attr);
244 }
245
246 /* And switch back to ce[0] for good measure */
247 rq[0] = i915_request_create(ce[0]);
248 if (IS_ERR(rq[0])) {
249 err = PTR_ERR(rq[0]);
250 i915_request_put(rq[1]);
251 goto err_ce;
252 }
253
254 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
255 i915_request_get(rq[0]);
256 i915_request_add(rq[0]);
257 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
258 i915_request_put(rq[1]);
259 i915_request_put(rq[0]);
260
261 err_ce:
262 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
263 igt_spinner_end(&spin);
264 for (n = 0; n < ARRAY_SIZE(ce); n++) {
265 if (IS_ERR_OR_NULL(ce[n]))
266 break;
267
268 intel_context_unpin(ce[n]);
269 intel_context_put(ce[n]);
270 }
271
272 engine_heartbeat_enable(engine, saved);
273 if (igt_live_test_end(&t))
274 err = -EIO;
275 if (err)
276 break;
277 }
278
279 igt_spinner_fini(&spin);
280 return err;
281 }
282
live_unlite_switch(void * arg)283 static int live_unlite_switch(void *arg)
284 {
285 return live_unlite_restore(arg, 0);
286 }
287
live_unlite_preempt(void * arg)288 static int live_unlite_preempt(void *arg)
289 {
290 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
291 }
292
live_hold_reset(void * arg)293 static int live_hold_reset(void *arg)
294 {
295 struct intel_gt *gt = arg;
296 struct intel_engine_cs *engine;
297 enum intel_engine_id id;
298 struct igt_spinner spin;
299 int err = 0;
300
301 /*
302 * In order to support offline error capture for fast preempt reset,
303 * we need to decouple the guilty request and ensure that it and its
304 * descendents are not executed while the capture is in progress.
305 */
306
307 if (!intel_has_reset_engine(gt))
308 return 0;
309
310 if (igt_spinner_init(&spin, gt))
311 return -ENOMEM;
312
313 for_each_engine(engine, gt, id) {
314 struct intel_context *ce;
315 unsigned long heartbeat;
316 struct i915_request *rq;
317
318 ce = intel_context_create(engine);
319 if (IS_ERR(ce)) {
320 err = PTR_ERR(ce);
321 break;
322 }
323
324 engine_heartbeat_disable(engine, &heartbeat);
325
326 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
327 if (IS_ERR(rq)) {
328 err = PTR_ERR(rq);
329 goto out;
330 }
331 i915_request_add(rq);
332
333 if (!igt_wait_for_spinner(&spin, rq)) {
334 intel_gt_set_wedged(gt);
335 err = -ETIME;
336 goto out;
337 }
338
339 /* We have our request executing, now remove it and reset */
340
341 if (test_and_set_bit(I915_RESET_ENGINE + id,
342 >->reset.flags)) {
343 intel_gt_set_wedged(gt);
344 err = -EBUSY;
345 goto out;
346 }
347 tasklet_disable(&engine->execlists.tasklet);
348
349 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
350 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
351
352 i915_request_get(rq);
353 execlists_hold(engine, rq);
354 GEM_BUG_ON(!i915_request_on_hold(rq));
355
356 intel_engine_reset(engine, NULL);
357 GEM_BUG_ON(rq->fence.error != -EIO);
358
359 tasklet_enable(&engine->execlists.tasklet);
360 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
361 >->reset.flags);
362
363 /* Check that we do not resubmit the held request */
364 if (!i915_request_wait(rq, 0, HZ / 5)) {
365 pr_err("%s: on hold request completed!\n",
366 engine->name);
367 i915_request_put(rq);
368 err = -EIO;
369 goto out;
370 }
371 GEM_BUG_ON(!i915_request_on_hold(rq));
372
373 /* But is resubmitted on release */
374 execlists_unhold(engine, rq);
375 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
376 pr_err("%s: held request did not complete!\n",
377 engine->name);
378 intel_gt_set_wedged(gt);
379 err = -ETIME;
380 }
381 i915_request_put(rq);
382
383 out:
384 engine_heartbeat_enable(engine, heartbeat);
385 intel_context_put(ce);
386 if (err)
387 break;
388 }
389
390 igt_spinner_fini(&spin);
391 return err;
392 }
393
394 static int
emit_semaphore_chain(struct i915_request * rq,struct i915_vma * vma,int idx)395 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
396 {
397 u32 *cs;
398
399 cs = intel_ring_begin(rq, 10);
400 if (IS_ERR(cs))
401 return PTR_ERR(cs);
402
403 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
404
405 *cs++ = MI_SEMAPHORE_WAIT |
406 MI_SEMAPHORE_GLOBAL_GTT |
407 MI_SEMAPHORE_POLL |
408 MI_SEMAPHORE_SAD_NEQ_SDD;
409 *cs++ = 0;
410 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
411 *cs++ = 0;
412
413 if (idx > 0) {
414 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
415 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
416 *cs++ = 0;
417 *cs++ = 1;
418 } else {
419 *cs++ = MI_NOOP;
420 *cs++ = MI_NOOP;
421 *cs++ = MI_NOOP;
422 *cs++ = MI_NOOP;
423 }
424
425 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
426
427 intel_ring_advance(rq, cs);
428 return 0;
429 }
430
431 static struct i915_request *
semaphore_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx)432 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
433 {
434 struct intel_context *ce;
435 struct i915_request *rq;
436 int err;
437
438 ce = intel_context_create(engine);
439 if (IS_ERR(ce))
440 return ERR_CAST(ce);
441
442 rq = intel_context_create_request(ce);
443 if (IS_ERR(rq))
444 goto out_ce;
445
446 err = 0;
447 if (rq->engine->emit_init_breadcrumb)
448 err = rq->engine->emit_init_breadcrumb(rq);
449 if (err == 0)
450 err = emit_semaphore_chain(rq, vma, idx);
451 if (err == 0)
452 i915_request_get(rq);
453 i915_request_add(rq);
454 if (err)
455 rq = ERR_PTR(err);
456
457 out_ce:
458 intel_context_put(ce);
459 return rq;
460 }
461
462 static int
release_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx,int prio)463 release_queue(struct intel_engine_cs *engine,
464 struct i915_vma *vma,
465 int idx, int prio)
466 {
467 struct i915_sched_attr attr = {
468 .priority = prio,
469 };
470 struct i915_request *rq;
471 u32 *cs;
472
473 rq = intel_engine_create_kernel_request(engine);
474 if (IS_ERR(rq))
475 return PTR_ERR(rq);
476
477 cs = intel_ring_begin(rq, 4);
478 if (IS_ERR(cs)) {
479 i915_request_add(rq);
480 return PTR_ERR(cs);
481 }
482
483 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
484 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
485 *cs++ = 0;
486 *cs++ = 1;
487
488 intel_ring_advance(rq, cs);
489
490 i915_request_get(rq);
491 i915_request_add(rq);
492
493 local_bh_disable();
494 engine->schedule(rq, &attr);
495 local_bh_enable(); /* kick tasklet */
496
497 i915_request_put(rq);
498
499 return 0;
500 }
501
502 static int
slice_semaphore_queue(struct intel_engine_cs * outer,struct i915_vma * vma,int count)503 slice_semaphore_queue(struct intel_engine_cs *outer,
504 struct i915_vma *vma,
505 int count)
506 {
507 struct intel_engine_cs *engine;
508 struct i915_request *head;
509 enum intel_engine_id id;
510 int err, i, n = 0;
511
512 head = semaphore_queue(outer, vma, n++);
513 if (IS_ERR(head))
514 return PTR_ERR(head);
515
516 for_each_engine(engine, outer->gt, id) {
517 for (i = 0; i < count; i++) {
518 struct i915_request *rq;
519
520 rq = semaphore_queue(engine, vma, n++);
521 if (IS_ERR(rq)) {
522 err = PTR_ERR(rq);
523 goto out;
524 }
525
526 i915_request_put(rq);
527 }
528 }
529
530 err = release_queue(outer, vma, n, INT_MAX);
531 if (err)
532 goto out;
533
534 if (i915_request_wait(head, 0,
535 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
536 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
537 count, n);
538 GEM_TRACE_DUMP();
539 intel_gt_set_wedged(outer->gt);
540 err = -EIO;
541 }
542
543 out:
544 i915_request_put(head);
545 return err;
546 }
547
live_timeslice_preempt(void * arg)548 static int live_timeslice_preempt(void *arg)
549 {
550 struct intel_gt *gt = arg;
551 struct drm_i915_gem_object *obj;
552 struct i915_vma *vma;
553 void *vaddr;
554 int err = 0;
555 int count;
556
557 /*
558 * If a request takes too long, we would like to give other users
559 * a fair go on the GPU. In particular, users may create batches
560 * that wait upon external input, where that input may even be
561 * supplied by another GPU job. To avoid blocking forever, we
562 * need to preempt the current task and replace it with another
563 * ready task.
564 */
565 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
566 return 0;
567
568 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
569 if (IS_ERR(obj))
570 return PTR_ERR(obj);
571
572 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
573 if (IS_ERR(vma)) {
574 err = PTR_ERR(vma);
575 goto err_obj;
576 }
577
578 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
579 if (IS_ERR(vaddr)) {
580 err = PTR_ERR(vaddr);
581 goto err_obj;
582 }
583
584 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
585 if (err)
586 goto err_map;
587
588 for_each_prime_number_from(count, 1, 16) {
589 struct intel_engine_cs *engine;
590 enum intel_engine_id id;
591
592 for_each_engine(engine, gt, id) {
593 unsigned long saved;
594
595 if (!intel_engine_has_preemption(engine))
596 continue;
597
598 memset(vaddr, 0, PAGE_SIZE);
599
600 engine_heartbeat_disable(engine, &saved);
601 err = slice_semaphore_queue(engine, vma, count);
602 engine_heartbeat_enable(engine, saved);
603 if (err)
604 goto err_pin;
605
606 if (igt_flush_test(gt->i915)) {
607 err = -EIO;
608 goto err_pin;
609 }
610 }
611 }
612
613 err_pin:
614 i915_vma_unpin(vma);
615 err_map:
616 i915_gem_object_unpin_map(obj);
617 err_obj:
618 i915_gem_object_put(obj);
619 return err;
620 }
621
nop_request(struct intel_engine_cs * engine)622 static struct i915_request *nop_request(struct intel_engine_cs *engine)
623 {
624 struct i915_request *rq;
625
626 rq = intel_engine_create_kernel_request(engine);
627 if (IS_ERR(rq))
628 return rq;
629
630 i915_request_get(rq);
631 i915_request_add(rq);
632
633 return rq;
634 }
635
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)636 static int wait_for_submit(struct intel_engine_cs *engine,
637 struct i915_request *rq,
638 unsigned long timeout)
639 {
640 timeout += jiffies;
641 do {
642 cond_resched();
643 intel_engine_flush_submission(engine);
644 if (i915_request_is_active(rq))
645 return 0;
646 } while (time_before(jiffies, timeout));
647
648 return -ETIME;
649 }
650
timeslice_threshold(const struct intel_engine_cs * engine)651 static long timeslice_threshold(const struct intel_engine_cs *engine)
652 {
653 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
654 }
655
live_timeslice_queue(void * arg)656 static int live_timeslice_queue(void *arg)
657 {
658 struct intel_gt *gt = arg;
659 struct drm_i915_gem_object *obj;
660 struct intel_engine_cs *engine;
661 enum intel_engine_id id;
662 struct i915_vma *vma;
663 void *vaddr;
664 int err = 0;
665
666 /*
667 * Make sure that even if ELSP[0] and ELSP[1] are filled with
668 * timeslicing between them disabled, we *do* enable timeslicing
669 * if the queue demands it. (Normally, we do not submit if
670 * ELSP[1] is already occupied, so must rely on timeslicing to
671 * eject ELSP[0] in favour of the queue.)
672 */
673 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
674 return 0;
675
676 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
677 if (IS_ERR(obj))
678 return PTR_ERR(obj);
679
680 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
681 if (IS_ERR(vma)) {
682 err = PTR_ERR(vma);
683 goto err_obj;
684 }
685
686 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
687 if (IS_ERR(vaddr)) {
688 err = PTR_ERR(vaddr);
689 goto err_obj;
690 }
691
692 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
693 if (err)
694 goto err_map;
695
696 for_each_engine(engine, gt, id) {
697 struct i915_sched_attr attr = {
698 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
699 };
700 struct i915_request *rq, *nop;
701 unsigned long saved;
702
703 if (!intel_engine_has_preemption(engine))
704 continue;
705
706 engine_heartbeat_disable(engine, &saved);
707 memset(vaddr, 0, PAGE_SIZE);
708
709 /* ELSP[0]: semaphore wait */
710 rq = semaphore_queue(engine, vma, 0);
711 if (IS_ERR(rq)) {
712 err = PTR_ERR(rq);
713 goto err_heartbeat;
714 }
715 engine->schedule(rq, &attr);
716 err = wait_for_submit(engine, rq, HZ / 2);
717 if (err) {
718 pr_err("%s: Timed out trying to submit semaphores\n",
719 engine->name);
720 goto err_rq;
721 }
722
723 /* ELSP[1]: nop request */
724 nop = nop_request(engine);
725 if (IS_ERR(nop)) {
726 err = PTR_ERR(nop);
727 goto err_rq;
728 }
729 err = wait_for_submit(engine, nop, HZ / 2);
730 i915_request_put(nop);
731 if (err) {
732 pr_err("%s: Timed out trying to submit nop\n",
733 engine->name);
734 goto err_rq;
735 }
736
737 GEM_BUG_ON(i915_request_completed(rq));
738 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
739
740 /* Queue: semaphore signal, matching priority as semaphore */
741 err = release_queue(engine, vma, 1, effective_prio(rq));
742 if (err)
743 goto err_rq;
744
745 intel_engine_flush_submission(engine);
746 if (!READ_ONCE(engine->execlists.timer.expires) &&
747 !i915_request_completed(rq)) {
748 struct drm_printer p =
749 drm_info_printer(gt->i915->drm.dev);
750
751 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
752 engine->name);
753 intel_engine_dump(engine, &p,
754 "%s\n", engine->name);
755 GEM_TRACE_DUMP();
756
757 memset(vaddr, 0xff, PAGE_SIZE);
758 err = -EINVAL;
759 }
760
761 /* Timeslice every jiffy, so within 2 we should signal */
762 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
763 struct drm_printer p =
764 drm_info_printer(gt->i915->drm.dev);
765
766 pr_err("%s: Failed to timeslice into queue\n",
767 engine->name);
768 intel_engine_dump(engine, &p,
769 "%s\n", engine->name);
770
771 memset(vaddr, 0xff, PAGE_SIZE);
772 err = -EIO;
773 }
774 err_rq:
775 i915_request_put(rq);
776 err_heartbeat:
777 engine_heartbeat_enable(engine, saved);
778 if (err)
779 break;
780 }
781
782 i915_vma_unpin(vma);
783 err_map:
784 i915_gem_object_unpin_map(obj);
785 err_obj:
786 i915_gem_object_put(obj);
787 return err;
788 }
789
live_busywait_preempt(void * arg)790 static int live_busywait_preempt(void *arg)
791 {
792 struct intel_gt *gt = arg;
793 struct i915_gem_context *ctx_hi, *ctx_lo;
794 struct intel_engine_cs *engine;
795 struct drm_i915_gem_object *obj;
796 struct i915_vma *vma;
797 enum intel_engine_id id;
798 int err = -ENOMEM;
799 u32 *map;
800
801 /*
802 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
803 * preempt the busywaits used to synchronise between rings.
804 */
805
806 ctx_hi = kernel_context(gt->i915);
807 if (!ctx_hi)
808 return -ENOMEM;
809 ctx_hi->sched.priority =
810 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
811
812 ctx_lo = kernel_context(gt->i915);
813 if (!ctx_lo)
814 goto err_ctx_hi;
815 ctx_lo->sched.priority =
816 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
817
818 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
819 if (IS_ERR(obj)) {
820 err = PTR_ERR(obj);
821 goto err_ctx_lo;
822 }
823
824 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
825 if (IS_ERR(map)) {
826 err = PTR_ERR(map);
827 goto err_obj;
828 }
829
830 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
831 if (IS_ERR(vma)) {
832 err = PTR_ERR(vma);
833 goto err_map;
834 }
835
836 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
837 if (err)
838 goto err_map;
839
840 for_each_engine(engine, gt, id) {
841 struct i915_request *lo, *hi;
842 struct igt_live_test t;
843 u32 *cs;
844
845 if (!intel_engine_has_preemption(engine))
846 continue;
847
848 if (!intel_engine_can_store_dword(engine))
849 continue;
850
851 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
852 err = -EIO;
853 goto err_vma;
854 }
855
856 /*
857 * We create two requests. The low priority request
858 * busywaits on a semaphore (inside the ringbuffer where
859 * is should be preemptible) and the high priority requests
860 * uses a MI_STORE_DWORD_IMM to update the semaphore value
861 * allowing the first request to complete. If preemption
862 * fails, we hang instead.
863 */
864
865 lo = igt_request_alloc(ctx_lo, engine);
866 if (IS_ERR(lo)) {
867 err = PTR_ERR(lo);
868 goto err_vma;
869 }
870
871 cs = intel_ring_begin(lo, 8);
872 if (IS_ERR(cs)) {
873 err = PTR_ERR(cs);
874 i915_request_add(lo);
875 goto err_vma;
876 }
877
878 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
879 *cs++ = i915_ggtt_offset(vma);
880 *cs++ = 0;
881 *cs++ = 1;
882
883 /* XXX Do we need a flush + invalidate here? */
884
885 *cs++ = MI_SEMAPHORE_WAIT |
886 MI_SEMAPHORE_GLOBAL_GTT |
887 MI_SEMAPHORE_POLL |
888 MI_SEMAPHORE_SAD_EQ_SDD;
889 *cs++ = 0;
890 *cs++ = i915_ggtt_offset(vma);
891 *cs++ = 0;
892
893 intel_ring_advance(lo, cs);
894
895 i915_request_get(lo);
896 i915_request_add(lo);
897
898 if (wait_for(READ_ONCE(*map), 10)) {
899 i915_request_put(lo);
900 err = -ETIMEDOUT;
901 goto err_vma;
902 }
903
904 /* Low priority request should be busywaiting now */
905 if (i915_request_wait(lo, 0, 1) != -ETIME) {
906 i915_request_put(lo);
907 pr_err("%s: Busywaiting request did not!\n",
908 engine->name);
909 err = -EIO;
910 goto err_vma;
911 }
912
913 hi = igt_request_alloc(ctx_hi, engine);
914 if (IS_ERR(hi)) {
915 err = PTR_ERR(hi);
916 i915_request_put(lo);
917 goto err_vma;
918 }
919
920 cs = intel_ring_begin(hi, 4);
921 if (IS_ERR(cs)) {
922 err = PTR_ERR(cs);
923 i915_request_add(hi);
924 i915_request_put(lo);
925 goto err_vma;
926 }
927
928 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
929 *cs++ = i915_ggtt_offset(vma);
930 *cs++ = 0;
931 *cs++ = 0;
932
933 intel_ring_advance(hi, cs);
934 i915_request_add(hi);
935
936 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
937 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
938
939 pr_err("%s: Failed to preempt semaphore busywait!\n",
940 engine->name);
941
942 intel_engine_dump(engine, &p, "%s\n", engine->name);
943 GEM_TRACE_DUMP();
944
945 i915_request_put(lo);
946 intel_gt_set_wedged(gt);
947 err = -EIO;
948 goto err_vma;
949 }
950 GEM_BUG_ON(READ_ONCE(*map));
951 i915_request_put(lo);
952
953 if (igt_live_test_end(&t)) {
954 err = -EIO;
955 goto err_vma;
956 }
957 }
958
959 err = 0;
960 err_vma:
961 i915_vma_unpin(vma);
962 err_map:
963 i915_gem_object_unpin_map(obj);
964 err_obj:
965 i915_gem_object_put(obj);
966 err_ctx_lo:
967 kernel_context_close(ctx_lo);
968 err_ctx_hi:
969 kernel_context_close(ctx_hi);
970 return err;
971 }
972
973 static struct i915_request *
spinner_create_request(struct igt_spinner * spin,struct i915_gem_context * ctx,struct intel_engine_cs * engine,u32 arb)974 spinner_create_request(struct igt_spinner *spin,
975 struct i915_gem_context *ctx,
976 struct intel_engine_cs *engine,
977 u32 arb)
978 {
979 struct intel_context *ce;
980 struct i915_request *rq;
981
982 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
983 if (IS_ERR(ce))
984 return ERR_CAST(ce);
985
986 rq = igt_spinner_create_request(spin, ce, arb);
987 intel_context_put(ce);
988 return rq;
989 }
990
live_preempt(void * arg)991 static int live_preempt(void *arg)
992 {
993 struct intel_gt *gt = arg;
994 struct i915_gem_context *ctx_hi, *ctx_lo;
995 struct igt_spinner spin_hi, spin_lo;
996 struct intel_engine_cs *engine;
997 enum intel_engine_id id;
998 int err = -ENOMEM;
999
1000 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1001 return 0;
1002
1003 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1004 pr_err("Logical preemption supported, but not exposed\n");
1005
1006 if (igt_spinner_init(&spin_hi, gt))
1007 return -ENOMEM;
1008
1009 if (igt_spinner_init(&spin_lo, gt))
1010 goto err_spin_hi;
1011
1012 ctx_hi = kernel_context(gt->i915);
1013 if (!ctx_hi)
1014 goto err_spin_lo;
1015 ctx_hi->sched.priority =
1016 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1017
1018 ctx_lo = kernel_context(gt->i915);
1019 if (!ctx_lo)
1020 goto err_ctx_hi;
1021 ctx_lo->sched.priority =
1022 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1023
1024 for_each_engine(engine, gt, id) {
1025 struct igt_live_test t;
1026 struct i915_request *rq;
1027
1028 if (!intel_engine_has_preemption(engine))
1029 continue;
1030
1031 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1032 err = -EIO;
1033 goto err_ctx_lo;
1034 }
1035
1036 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1037 MI_ARB_CHECK);
1038 if (IS_ERR(rq)) {
1039 err = PTR_ERR(rq);
1040 goto err_ctx_lo;
1041 }
1042
1043 i915_request_add(rq);
1044 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1045 GEM_TRACE("lo spinner failed to start\n");
1046 GEM_TRACE_DUMP();
1047 intel_gt_set_wedged(gt);
1048 err = -EIO;
1049 goto err_ctx_lo;
1050 }
1051
1052 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1053 MI_ARB_CHECK);
1054 if (IS_ERR(rq)) {
1055 igt_spinner_end(&spin_lo);
1056 err = PTR_ERR(rq);
1057 goto err_ctx_lo;
1058 }
1059
1060 i915_request_add(rq);
1061 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1062 GEM_TRACE("hi spinner failed to start\n");
1063 GEM_TRACE_DUMP();
1064 intel_gt_set_wedged(gt);
1065 err = -EIO;
1066 goto err_ctx_lo;
1067 }
1068
1069 igt_spinner_end(&spin_hi);
1070 igt_spinner_end(&spin_lo);
1071
1072 if (igt_live_test_end(&t)) {
1073 err = -EIO;
1074 goto err_ctx_lo;
1075 }
1076 }
1077
1078 err = 0;
1079 err_ctx_lo:
1080 kernel_context_close(ctx_lo);
1081 err_ctx_hi:
1082 kernel_context_close(ctx_hi);
1083 err_spin_lo:
1084 igt_spinner_fini(&spin_lo);
1085 err_spin_hi:
1086 igt_spinner_fini(&spin_hi);
1087 return err;
1088 }
1089
live_late_preempt(void * arg)1090 static int live_late_preempt(void *arg)
1091 {
1092 struct intel_gt *gt = arg;
1093 struct i915_gem_context *ctx_hi, *ctx_lo;
1094 struct igt_spinner spin_hi, spin_lo;
1095 struct intel_engine_cs *engine;
1096 struct i915_sched_attr attr = {};
1097 enum intel_engine_id id;
1098 int err = -ENOMEM;
1099
1100 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1101 return 0;
1102
1103 if (igt_spinner_init(&spin_hi, gt))
1104 return -ENOMEM;
1105
1106 if (igt_spinner_init(&spin_lo, gt))
1107 goto err_spin_hi;
1108
1109 ctx_hi = kernel_context(gt->i915);
1110 if (!ctx_hi)
1111 goto err_spin_lo;
1112
1113 ctx_lo = kernel_context(gt->i915);
1114 if (!ctx_lo)
1115 goto err_ctx_hi;
1116
1117 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1118 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1119
1120 for_each_engine(engine, gt, id) {
1121 struct igt_live_test t;
1122 struct i915_request *rq;
1123
1124 if (!intel_engine_has_preemption(engine))
1125 continue;
1126
1127 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1128 err = -EIO;
1129 goto err_ctx_lo;
1130 }
1131
1132 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1133 MI_ARB_CHECK);
1134 if (IS_ERR(rq)) {
1135 err = PTR_ERR(rq);
1136 goto err_ctx_lo;
1137 }
1138
1139 i915_request_add(rq);
1140 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1141 pr_err("First context failed to start\n");
1142 goto err_wedged;
1143 }
1144
1145 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1146 MI_NOOP);
1147 if (IS_ERR(rq)) {
1148 igt_spinner_end(&spin_lo);
1149 err = PTR_ERR(rq);
1150 goto err_ctx_lo;
1151 }
1152
1153 i915_request_add(rq);
1154 if (igt_wait_for_spinner(&spin_hi, rq)) {
1155 pr_err("Second context overtook first?\n");
1156 goto err_wedged;
1157 }
1158
1159 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1160 engine->schedule(rq, &attr);
1161
1162 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1163 pr_err("High priority context failed to preempt the low priority context\n");
1164 GEM_TRACE_DUMP();
1165 goto err_wedged;
1166 }
1167
1168 igt_spinner_end(&spin_hi);
1169 igt_spinner_end(&spin_lo);
1170
1171 if (igt_live_test_end(&t)) {
1172 err = -EIO;
1173 goto err_ctx_lo;
1174 }
1175 }
1176
1177 err = 0;
1178 err_ctx_lo:
1179 kernel_context_close(ctx_lo);
1180 err_ctx_hi:
1181 kernel_context_close(ctx_hi);
1182 err_spin_lo:
1183 igt_spinner_fini(&spin_lo);
1184 err_spin_hi:
1185 igt_spinner_fini(&spin_hi);
1186 return err;
1187
1188 err_wedged:
1189 igt_spinner_end(&spin_hi);
1190 igt_spinner_end(&spin_lo);
1191 intel_gt_set_wedged(gt);
1192 err = -EIO;
1193 goto err_ctx_lo;
1194 }
1195
1196 struct preempt_client {
1197 struct igt_spinner spin;
1198 struct i915_gem_context *ctx;
1199 };
1200
preempt_client_init(struct intel_gt * gt,struct preempt_client * c)1201 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1202 {
1203 c->ctx = kernel_context(gt->i915);
1204 if (!c->ctx)
1205 return -ENOMEM;
1206
1207 if (igt_spinner_init(&c->spin, gt))
1208 goto err_ctx;
1209
1210 return 0;
1211
1212 err_ctx:
1213 kernel_context_close(c->ctx);
1214 return -ENOMEM;
1215 }
1216
preempt_client_fini(struct preempt_client * c)1217 static void preempt_client_fini(struct preempt_client *c)
1218 {
1219 igt_spinner_fini(&c->spin);
1220 kernel_context_close(c->ctx);
1221 }
1222
live_nopreempt(void * arg)1223 static int live_nopreempt(void *arg)
1224 {
1225 struct intel_gt *gt = arg;
1226 struct intel_engine_cs *engine;
1227 struct preempt_client a, b;
1228 enum intel_engine_id id;
1229 int err = -ENOMEM;
1230
1231 /*
1232 * Verify that we can disable preemption for an individual request
1233 * that may be being observed and not want to be interrupted.
1234 */
1235
1236 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1237 return 0;
1238
1239 if (preempt_client_init(gt, &a))
1240 return -ENOMEM;
1241 if (preempt_client_init(gt, &b))
1242 goto err_client_a;
1243 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1244
1245 for_each_engine(engine, gt, id) {
1246 struct i915_request *rq_a, *rq_b;
1247
1248 if (!intel_engine_has_preemption(engine))
1249 continue;
1250
1251 engine->execlists.preempt_hang.count = 0;
1252
1253 rq_a = spinner_create_request(&a.spin,
1254 a.ctx, engine,
1255 MI_ARB_CHECK);
1256 if (IS_ERR(rq_a)) {
1257 err = PTR_ERR(rq_a);
1258 goto err_client_b;
1259 }
1260
1261 /* Low priority client, but unpreemptable! */
1262 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1263
1264 i915_request_add(rq_a);
1265 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1266 pr_err("First client failed to start\n");
1267 goto err_wedged;
1268 }
1269
1270 rq_b = spinner_create_request(&b.spin,
1271 b.ctx, engine,
1272 MI_ARB_CHECK);
1273 if (IS_ERR(rq_b)) {
1274 err = PTR_ERR(rq_b);
1275 goto err_client_b;
1276 }
1277
1278 i915_request_add(rq_b);
1279
1280 /* B is much more important than A! (But A is unpreemptable.) */
1281 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1282
1283 /* Wait long enough for preemption and timeslicing */
1284 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1285 pr_err("Second client started too early!\n");
1286 goto err_wedged;
1287 }
1288
1289 igt_spinner_end(&a.spin);
1290
1291 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1292 pr_err("Second client failed to start\n");
1293 goto err_wedged;
1294 }
1295
1296 igt_spinner_end(&b.spin);
1297
1298 if (engine->execlists.preempt_hang.count) {
1299 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1300 engine->execlists.preempt_hang.count);
1301 err = -EINVAL;
1302 goto err_wedged;
1303 }
1304
1305 if (igt_flush_test(gt->i915))
1306 goto err_wedged;
1307 }
1308
1309 err = 0;
1310 err_client_b:
1311 preempt_client_fini(&b);
1312 err_client_a:
1313 preempt_client_fini(&a);
1314 return err;
1315
1316 err_wedged:
1317 igt_spinner_end(&b.spin);
1318 igt_spinner_end(&a.spin);
1319 intel_gt_set_wedged(gt);
1320 err = -EIO;
1321 goto err_client_b;
1322 }
1323
1324 struct live_preempt_cancel {
1325 struct intel_engine_cs *engine;
1326 struct preempt_client a, b;
1327 };
1328
__cancel_active0(struct live_preempt_cancel * arg)1329 static int __cancel_active0(struct live_preempt_cancel *arg)
1330 {
1331 struct i915_request *rq;
1332 struct igt_live_test t;
1333 int err;
1334
1335 /* Preempt cancel of ELSP0 */
1336 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1337 if (igt_live_test_begin(&t, arg->engine->i915,
1338 __func__, arg->engine->name))
1339 return -EIO;
1340
1341 rq = spinner_create_request(&arg->a.spin,
1342 arg->a.ctx, arg->engine,
1343 MI_ARB_CHECK);
1344 if (IS_ERR(rq))
1345 return PTR_ERR(rq);
1346
1347 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1348 i915_request_get(rq);
1349 i915_request_add(rq);
1350 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1351 err = -EIO;
1352 goto out;
1353 }
1354
1355 intel_context_set_banned(rq->context);
1356 err = intel_engine_pulse(arg->engine);
1357 if (err)
1358 goto out;
1359
1360 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1361 err = -EIO;
1362 goto out;
1363 }
1364
1365 if (rq->fence.error != -EIO) {
1366 pr_err("Cancelled inflight0 request did not report -EIO\n");
1367 err = -EINVAL;
1368 goto out;
1369 }
1370
1371 out:
1372 i915_request_put(rq);
1373 if (igt_live_test_end(&t))
1374 err = -EIO;
1375 return err;
1376 }
1377
__cancel_active1(struct live_preempt_cancel * arg)1378 static int __cancel_active1(struct live_preempt_cancel *arg)
1379 {
1380 struct i915_request *rq[2] = {};
1381 struct igt_live_test t;
1382 int err;
1383
1384 /* Preempt cancel of ELSP1 */
1385 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1386 if (igt_live_test_begin(&t, arg->engine->i915,
1387 __func__, arg->engine->name))
1388 return -EIO;
1389
1390 rq[0] = spinner_create_request(&arg->a.spin,
1391 arg->a.ctx, arg->engine,
1392 MI_NOOP); /* no preemption */
1393 if (IS_ERR(rq[0]))
1394 return PTR_ERR(rq[0]);
1395
1396 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1397 i915_request_get(rq[0]);
1398 i915_request_add(rq[0]);
1399 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1400 err = -EIO;
1401 goto out;
1402 }
1403
1404 rq[1] = spinner_create_request(&arg->b.spin,
1405 arg->b.ctx, arg->engine,
1406 MI_ARB_CHECK);
1407 if (IS_ERR(rq[1])) {
1408 err = PTR_ERR(rq[1]);
1409 goto out;
1410 }
1411
1412 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1413 i915_request_get(rq[1]);
1414 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1415 i915_request_add(rq[1]);
1416 if (err)
1417 goto out;
1418
1419 intel_context_set_banned(rq[1]->context);
1420 err = intel_engine_pulse(arg->engine);
1421 if (err)
1422 goto out;
1423
1424 igt_spinner_end(&arg->a.spin);
1425 if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1426 err = -EIO;
1427 goto out;
1428 }
1429
1430 if (rq[0]->fence.error != 0) {
1431 pr_err("Normal inflight0 request did not complete\n");
1432 err = -EINVAL;
1433 goto out;
1434 }
1435
1436 if (rq[1]->fence.error != -EIO) {
1437 pr_err("Cancelled inflight1 request did not report -EIO\n");
1438 err = -EINVAL;
1439 goto out;
1440 }
1441
1442 out:
1443 i915_request_put(rq[1]);
1444 i915_request_put(rq[0]);
1445 if (igt_live_test_end(&t))
1446 err = -EIO;
1447 return err;
1448 }
1449
__cancel_queued(struct live_preempt_cancel * arg)1450 static int __cancel_queued(struct live_preempt_cancel *arg)
1451 {
1452 struct i915_request *rq[3] = {};
1453 struct igt_live_test t;
1454 int err;
1455
1456 /* Full ELSP and one in the wings */
1457 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1458 if (igt_live_test_begin(&t, arg->engine->i915,
1459 __func__, arg->engine->name))
1460 return -EIO;
1461
1462 rq[0] = spinner_create_request(&arg->a.spin,
1463 arg->a.ctx, arg->engine,
1464 MI_ARB_CHECK);
1465 if (IS_ERR(rq[0]))
1466 return PTR_ERR(rq[0]);
1467
1468 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1469 i915_request_get(rq[0]);
1470 i915_request_add(rq[0]);
1471 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1472 err = -EIO;
1473 goto out;
1474 }
1475
1476 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1477 if (IS_ERR(rq[1])) {
1478 err = PTR_ERR(rq[1]);
1479 goto out;
1480 }
1481
1482 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1483 i915_request_get(rq[1]);
1484 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1485 i915_request_add(rq[1]);
1486 if (err)
1487 goto out;
1488
1489 rq[2] = spinner_create_request(&arg->b.spin,
1490 arg->a.ctx, arg->engine,
1491 MI_ARB_CHECK);
1492 if (IS_ERR(rq[2])) {
1493 err = PTR_ERR(rq[2]);
1494 goto out;
1495 }
1496
1497 i915_request_get(rq[2]);
1498 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1499 i915_request_add(rq[2]);
1500 if (err)
1501 goto out;
1502
1503 intel_context_set_banned(rq[2]->context);
1504 err = intel_engine_pulse(arg->engine);
1505 if (err)
1506 goto out;
1507
1508 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1509 err = -EIO;
1510 goto out;
1511 }
1512
1513 if (rq[0]->fence.error != -EIO) {
1514 pr_err("Cancelled inflight0 request did not report -EIO\n");
1515 err = -EINVAL;
1516 goto out;
1517 }
1518
1519 if (rq[1]->fence.error != 0) {
1520 pr_err("Normal inflight1 request did not complete\n");
1521 err = -EINVAL;
1522 goto out;
1523 }
1524
1525 if (rq[2]->fence.error != -EIO) {
1526 pr_err("Cancelled queued request did not report -EIO\n");
1527 err = -EINVAL;
1528 goto out;
1529 }
1530
1531 out:
1532 i915_request_put(rq[2]);
1533 i915_request_put(rq[1]);
1534 i915_request_put(rq[0]);
1535 if (igt_live_test_end(&t))
1536 err = -EIO;
1537 return err;
1538 }
1539
__cancel_hostile(struct live_preempt_cancel * arg)1540 static int __cancel_hostile(struct live_preempt_cancel *arg)
1541 {
1542 struct i915_request *rq;
1543 int err;
1544
1545 /* Preempt cancel non-preemptible spinner in ELSP0 */
1546 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1547 return 0;
1548
1549 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1550 rq = spinner_create_request(&arg->a.spin,
1551 arg->a.ctx, arg->engine,
1552 MI_NOOP); /* preemption disabled */
1553 if (IS_ERR(rq))
1554 return PTR_ERR(rq);
1555
1556 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1557 i915_request_get(rq);
1558 i915_request_add(rq);
1559 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1560 err = -EIO;
1561 goto out;
1562 }
1563
1564 intel_context_set_banned(rq->context);
1565 err = intel_engine_pulse(arg->engine); /* force reset */
1566 if (err)
1567 goto out;
1568
1569 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1570 err = -EIO;
1571 goto out;
1572 }
1573
1574 if (rq->fence.error != -EIO) {
1575 pr_err("Cancelled inflight0 request did not report -EIO\n");
1576 err = -EINVAL;
1577 goto out;
1578 }
1579
1580 out:
1581 i915_request_put(rq);
1582 if (igt_flush_test(arg->engine->i915))
1583 err = -EIO;
1584 return err;
1585 }
1586
live_preempt_cancel(void * arg)1587 static int live_preempt_cancel(void *arg)
1588 {
1589 struct intel_gt *gt = arg;
1590 struct live_preempt_cancel data;
1591 enum intel_engine_id id;
1592 int err = -ENOMEM;
1593
1594 /*
1595 * To cancel an inflight context, we need to first remove it from the
1596 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1597 */
1598
1599 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1600 return 0;
1601
1602 if (preempt_client_init(gt, &data.a))
1603 return -ENOMEM;
1604 if (preempt_client_init(gt, &data.b))
1605 goto err_client_a;
1606
1607 for_each_engine(data.engine, gt, id) {
1608 if (!intel_engine_has_preemption(data.engine))
1609 continue;
1610
1611 err = __cancel_active0(&data);
1612 if (err)
1613 goto err_wedged;
1614
1615 err = __cancel_active1(&data);
1616 if (err)
1617 goto err_wedged;
1618
1619 err = __cancel_queued(&data);
1620 if (err)
1621 goto err_wedged;
1622
1623 err = __cancel_hostile(&data);
1624 if (err)
1625 goto err_wedged;
1626 }
1627
1628 err = 0;
1629 err_client_b:
1630 preempt_client_fini(&data.b);
1631 err_client_a:
1632 preempt_client_fini(&data.a);
1633 return err;
1634
1635 err_wedged:
1636 GEM_TRACE_DUMP();
1637 igt_spinner_end(&data.b.spin);
1638 igt_spinner_end(&data.a.spin);
1639 intel_gt_set_wedged(gt);
1640 goto err_client_b;
1641 }
1642
live_suppress_self_preempt(void * arg)1643 static int live_suppress_self_preempt(void *arg)
1644 {
1645 struct intel_gt *gt = arg;
1646 struct intel_engine_cs *engine;
1647 struct i915_sched_attr attr = {
1648 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
1649 };
1650 struct preempt_client a, b;
1651 enum intel_engine_id id;
1652 int err = -ENOMEM;
1653
1654 /*
1655 * Verify that if a preemption request does not cause a change in
1656 * the current execution order, the preempt-to-idle injection is
1657 * skipped and that we do not accidentally apply it after the CS
1658 * completion event.
1659 */
1660
1661 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1662 return 0;
1663
1664 if (USES_GUC_SUBMISSION(gt->i915))
1665 return 0; /* presume black blox */
1666
1667 if (intel_vgpu_active(gt->i915))
1668 return 0; /* GVT forces single port & request submission */
1669
1670 if (preempt_client_init(gt, &a))
1671 return -ENOMEM;
1672 if (preempt_client_init(gt, &b))
1673 goto err_client_a;
1674
1675 for_each_engine(engine, gt, id) {
1676 struct i915_request *rq_a, *rq_b;
1677 int depth;
1678
1679 if (!intel_engine_has_preemption(engine))
1680 continue;
1681
1682 if (igt_flush_test(gt->i915))
1683 goto err_wedged;
1684
1685 intel_engine_pm_get(engine);
1686 engine->execlists.preempt_hang.count = 0;
1687
1688 rq_a = spinner_create_request(&a.spin,
1689 a.ctx, engine,
1690 MI_NOOP);
1691 if (IS_ERR(rq_a)) {
1692 err = PTR_ERR(rq_a);
1693 intel_engine_pm_put(engine);
1694 goto err_client_b;
1695 }
1696
1697 i915_request_add(rq_a);
1698 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1699 pr_err("First client failed to start\n");
1700 intel_engine_pm_put(engine);
1701 goto err_wedged;
1702 }
1703
1704 /* Keep postponing the timer to avoid premature slicing */
1705 mod_timer(&engine->execlists.timer, jiffies + HZ);
1706 for (depth = 0; depth < 8; depth++) {
1707 rq_b = spinner_create_request(&b.spin,
1708 b.ctx, engine,
1709 MI_NOOP);
1710 if (IS_ERR(rq_b)) {
1711 err = PTR_ERR(rq_b);
1712 intel_engine_pm_put(engine);
1713 goto err_client_b;
1714 }
1715 i915_request_add(rq_b);
1716
1717 GEM_BUG_ON(i915_request_completed(rq_a));
1718 engine->schedule(rq_a, &attr);
1719 igt_spinner_end(&a.spin);
1720
1721 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1722 pr_err("Second client failed to start\n");
1723 intel_engine_pm_put(engine);
1724 goto err_wedged;
1725 }
1726
1727 swap(a, b);
1728 rq_a = rq_b;
1729 }
1730 igt_spinner_end(&a.spin);
1731
1732 if (engine->execlists.preempt_hang.count) {
1733 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1734 engine->name,
1735 engine->execlists.preempt_hang.count,
1736 depth);
1737 intel_engine_pm_put(engine);
1738 err = -EINVAL;
1739 goto err_client_b;
1740 }
1741
1742 intel_engine_pm_put(engine);
1743 if (igt_flush_test(gt->i915))
1744 goto err_wedged;
1745 }
1746
1747 err = 0;
1748 err_client_b:
1749 preempt_client_fini(&b);
1750 err_client_a:
1751 preempt_client_fini(&a);
1752 return err;
1753
1754 err_wedged:
1755 igt_spinner_end(&b.spin);
1756 igt_spinner_end(&a.spin);
1757 intel_gt_set_wedged(gt);
1758 err = -EIO;
1759 goto err_client_b;
1760 }
1761
1762 static int __i915_sw_fence_call
dummy_notify(struct i915_sw_fence * fence,enum i915_sw_fence_notify state)1763 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
1764 {
1765 return NOTIFY_DONE;
1766 }
1767
dummy_request(struct intel_engine_cs * engine)1768 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
1769 {
1770 struct i915_request *rq;
1771
1772 rq = kzalloc(sizeof(*rq), GFP_KERNEL);
1773 if (!rq)
1774 return NULL;
1775
1776 rq->engine = engine;
1777
1778 spin_lock_init(&rq->lock);
1779 INIT_LIST_HEAD(&rq->fence.cb_list);
1780 rq->fence.lock = &rq->lock;
1781 rq->fence.ops = &i915_fence_ops;
1782
1783 i915_sched_node_init(&rq->sched);
1784
1785 /* mark this request as permanently incomplete */
1786 rq->fence.seqno = 1;
1787 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
1788 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
1789 GEM_BUG_ON(i915_request_completed(rq));
1790
1791 i915_sw_fence_init(&rq->submit, dummy_notify);
1792 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1793
1794 spin_lock_init(&rq->lock);
1795 rq->fence.lock = &rq->lock;
1796 INIT_LIST_HEAD(&rq->fence.cb_list);
1797
1798 return rq;
1799 }
1800
dummy_request_free(struct i915_request * dummy)1801 static void dummy_request_free(struct i915_request *dummy)
1802 {
1803 /* We have to fake the CS interrupt to kick the next request */
1804 i915_sw_fence_commit(&dummy->submit);
1805
1806 i915_request_mark_complete(dummy);
1807 dma_fence_signal(&dummy->fence);
1808
1809 i915_sched_node_fini(&dummy->sched);
1810 i915_sw_fence_fini(&dummy->submit);
1811
1812 dma_fence_free(&dummy->fence);
1813 }
1814
live_suppress_wait_preempt(void * arg)1815 static int live_suppress_wait_preempt(void *arg)
1816 {
1817 struct intel_gt *gt = arg;
1818 struct preempt_client client[4];
1819 struct i915_request *rq[ARRAY_SIZE(client)] = {};
1820 struct intel_engine_cs *engine;
1821 enum intel_engine_id id;
1822 int err = -ENOMEM;
1823 int i;
1824
1825 /*
1826 * Waiters are given a little priority nudge, but not enough
1827 * to actually cause any preemption. Double check that we do
1828 * not needlessly generate preempt-to-idle cycles.
1829 */
1830
1831 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1832 return 0;
1833
1834 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
1835 return -ENOMEM;
1836 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
1837 goto err_client_0;
1838 if (preempt_client_init(gt, &client[2])) /* head of queue */
1839 goto err_client_1;
1840 if (preempt_client_init(gt, &client[3])) /* bystander */
1841 goto err_client_2;
1842
1843 for_each_engine(engine, gt, id) {
1844 int depth;
1845
1846 if (!intel_engine_has_preemption(engine))
1847 continue;
1848
1849 if (!engine->emit_init_breadcrumb)
1850 continue;
1851
1852 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
1853 struct i915_request *dummy;
1854
1855 engine->execlists.preempt_hang.count = 0;
1856
1857 dummy = dummy_request(engine);
1858 if (!dummy)
1859 goto err_client_3;
1860
1861 for (i = 0; i < ARRAY_SIZE(client); i++) {
1862 struct i915_request *this;
1863
1864 this = spinner_create_request(&client[i].spin,
1865 client[i].ctx, engine,
1866 MI_NOOP);
1867 if (IS_ERR(this)) {
1868 err = PTR_ERR(this);
1869 goto err_wedged;
1870 }
1871
1872 /* Disable NEWCLIENT promotion */
1873 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
1874 &dummy->fence);
1875
1876 rq[i] = i915_request_get(this);
1877 i915_request_add(this);
1878 }
1879
1880 dummy_request_free(dummy);
1881
1882 GEM_BUG_ON(i915_request_completed(rq[0]));
1883 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
1884 pr_err("%s: First client failed to start\n",
1885 engine->name);
1886 goto err_wedged;
1887 }
1888 GEM_BUG_ON(!i915_request_started(rq[0]));
1889
1890 if (i915_request_wait(rq[depth],
1891 I915_WAIT_PRIORITY,
1892 1) != -ETIME) {
1893 pr_err("%s: Waiter depth:%d completed!\n",
1894 engine->name, depth);
1895 goto err_wedged;
1896 }
1897
1898 for (i = 0; i < ARRAY_SIZE(client); i++) {
1899 igt_spinner_end(&client[i].spin);
1900 i915_request_put(rq[i]);
1901 rq[i] = NULL;
1902 }
1903
1904 if (igt_flush_test(gt->i915))
1905 goto err_wedged;
1906
1907 if (engine->execlists.preempt_hang.count) {
1908 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1909 engine->name,
1910 engine->execlists.preempt_hang.count,
1911 depth);
1912 err = -EINVAL;
1913 goto err_client_3;
1914 }
1915 }
1916 }
1917
1918 err = 0;
1919 err_client_3:
1920 preempt_client_fini(&client[3]);
1921 err_client_2:
1922 preempt_client_fini(&client[2]);
1923 err_client_1:
1924 preempt_client_fini(&client[1]);
1925 err_client_0:
1926 preempt_client_fini(&client[0]);
1927 return err;
1928
1929 err_wedged:
1930 for (i = 0; i < ARRAY_SIZE(client); i++) {
1931 igt_spinner_end(&client[i].spin);
1932 i915_request_put(rq[i]);
1933 }
1934 intel_gt_set_wedged(gt);
1935 err = -EIO;
1936 goto err_client_3;
1937 }
1938
live_chain_preempt(void * arg)1939 static int live_chain_preempt(void *arg)
1940 {
1941 struct intel_gt *gt = arg;
1942 struct intel_engine_cs *engine;
1943 struct preempt_client hi, lo;
1944 enum intel_engine_id id;
1945 int err = -ENOMEM;
1946
1947 /*
1948 * Build a chain AB...BA between two contexts (A, B) and request
1949 * preemption of the last request. It should then complete before
1950 * the previously submitted spinner in B.
1951 */
1952
1953 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1954 return 0;
1955
1956 if (preempt_client_init(gt, &hi))
1957 return -ENOMEM;
1958
1959 if (preempt_client_init(gt, &lo))
1960 goto err_client_hi;
1961
1962 for_each_engine(engine, gt, id) {
1963 struct i915_sched_attr attr = {
1964 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1965 };
1966 struct igt_live_test t;
1967 struct i915_request *rq;
1968 int ring_size, count, i;
1969
1970 if (!intel_engine_has_preemption(engine))
1971 continue;
1972
1973 rq = spinner_create_request(&lo.spin,
1974 lo.ctx, engine,
1975 MI_ARB_CHECK);
1976 if (IS_ERR(rq))
1977 goto err_wedged;
1978
1979 i915_request_get(rq);
1980 i915_request_add(rq);
1981
1982 ring_size = rq->wa_tail - rq->head;
1983 if (ring_size < 0)
1984 ring_size += rq->ring->size;
1985 ring_size = rq->ring->size / ring_size;
1986 pr_debug("%s(%s): Using maximum of %d requests\n",
1987 __func__, engine->name, ring_size);
1988
1989 igt_spinner_end(&lo.spin);
1990 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1991 pr_err("Timed out waiting to flush %s\n", engine->name);
1992 i915_request_put(rq);
1993 goto err_wedged;
1994 }
1995 i915_request_put(rq);
1996
1997 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1998 err = -EIO;
1999 goto err_wedged;
2000 }
2001
2002 for_each_prime_number_from(count, 1, ring_size) {
2003 rq = spinner_create_request(&hi.spin,
2004 hi.ctx, engine,
2005 MI_ARB_CHECK);
2006 if (IS_ERR(rq))
2007 goto err_wedged;
2008 i915_request_add(rq);
2009 if (!igt_wait_for_spinner(&hi.spin, rq))
2010 goto err_wedged;
2011
2012 rq = spinner_create_request(&lo.spin,
2013 lo.ctx, engine,
2014 MI_ARB_CHECK);
2015 if (IS_ERR(rq))
2016 goto err_wedged;
2017 i915_request_add(rq);
2018
2019 for (i = 0; i < count; i++) {
2020 rq = igt_request_alloc(lo.ctx, engine);
2021 if (IS_ERR(rq))
2022 goto err_wedged;
2023 i915_request_add(rq);
2024 }
2025
2026 rq = igt_request_alloc(hi.ctx, engine);
2027 if (IS_ERR(rq))
2028 goto err_wedged;
2029
2030 i915_request_get(rq);
2031 i915_request_add(rq);
2032 engine->schedule(rq, &attr);
2033
2034 igt_spinner_end(&hi.spin);
2035 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2036 struct drm_printer p =
2037 drm_info_printer(gt->i915->drm.dev);
2038
2039 pr_err("Failed to preempt over chain of %d\n",
2040 count);
2041 intel_engine_dump(engine, &p,
2042 "%s\n", engine->name);
2043 i915_request_put(rq);
2044 goto err_wedged;
2045 }
2046 igt_spinner_end(&lo.spin);
2047 i915_request_put(rq);
2048
2049 rq = igt_request_alloc(lo.ctx, engine);
2050 if (IS_ERR(rq))
2051 goto err_wedged;
2052
2053 i915_request_get(rq);
2054 i915_request_add(rq);
2055
2056 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2057 struct drm_printer p =
2058 drm_info_printer(gt->i915->drm.dev);
2059
2060 pr_err("Failed to flush low priority chain of %d requests\n",
2061 count);
2062 intel_engine_dump(engine, &p,
2063 "%s\n", engine->name);
2064
2065 i915_request_put(rq);
2066 goto err_wedged;
2067 }
2068 i915_request_put(rq);
2069 }
2070
2071 if (igt_live_test_end(&t)) {
2072 err = -EIO;
2073 goto err_wedged;
2074 }
2075 }
2076
2077 err = 0;
2078 err_client_lo:
2079 preempt_client_fini(&lo);
2080 err_client_hi:
2081 preempt_client_fini(&hi);
2082 return err;
2083
2084 err_wedged:
2085 igt_spinner_end(&hi.spin);
2086 igt_spinner_end(&lo.spin);
2087 intel_gt_set_wedged(gt);
2088 err = -EIO;
2089 goto err_client_lo;
2090 }
2091
create_gang(struct intel_engine_cs * engine,struct i915_request ** prev)2092 static int create_gang(struct intel_engine_cs *engine,
2093 struct i915_request **prev)
2094 {
2095 struct drm_i915_gem_object *obj;
2096 struct intel_context *ce;
2097 struct i915_request *rq;
2098 struct i915_vma *vma;
2099 u32 *cs;
2100 int err;
2101
2102 ce = intel_context_create(engine);
2103 if (IS_ERR(ce))
2104 return PTR_ERR(ce);
2105
2106 obj = i915_gem_object_create_internal(engine->i915, 4096);
2107 if (IS_ERR(obj)) {
2108 err = PTR_ERR(obj);
2109 goto err_ce;
2110 }
2111
2112 vma = i915_vma_instance(obj, ce->vm, NULL);
2113 if (IS_ERR(vma)) {
2114 err = PTR_ERR(vma);
2115 goto err_obj;
2116 }
2117
2118 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2119 if (err)
2120 goto err_obj;
2121
2122 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2123 if (IS_ERR(cs))
2124 goto err_obj;
2125
2126 /* Semaphore target: spin until zero */
2127 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2128
2129 *cs++ = MI_SEMAPHORE_WAIT |
2130 MI_SEMAPHORE_POLL |
2131 MI_SEMAPHORE_SAD_EQ_SDD;
2132 *cs++ = 0;
2133 *cs++ = lower_32_bits(vma->node.start);
2134 *cs++ = upper_32_bits(vma->node.start);
2135
2136 if (*prev) {
2137 u64 offset = (*prev)->batch->node.start;
2138
2139 /* Terminate the spinner in the next lower priority batch. */
2140 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2141 *cs++ = lower_32_bits(offset);
2142 *cs++ = upper_32_bits(offset);
2143 *cs++ = 0;
2144 }
2145
2146 *cs++ = MI_BATCH_BUFFER_END;
2147 i915_gem_object_flush_map(obj);
2148 i915_gem_object_unpin_map(obj);
2149
2150 rq = intel_context_create_request(ce);
2151 if (IS_ERR(rq))
2152 goto err_obj;
2153
2154 rq->batch = vma;
2155 i915_request_get(rq);
2156
2157 i915_vma_lock(vma);
2158 err = i915_request_await_object(rq, vma->obj, false);
2159 if (!err)
2160 err = i915_vma_move_to_active(vma, rq, 0);
2161 if (!err)
2162 err = rq->engine->emit_bb_start(rq,
2163 vma->node.start,
2164 PAGE_SIZE, 0);
2165 i915_vma_unlock(vma);
2166 i915_request_add(rq);
2167 if (err)
2168 goto err_rq;
2169
2170 i915_gem_object_put(obj);
2171 intel_context_put(ce);
2172
2173 rq->client_link.next = &(*prev)->client_link;
2174 *prev = rq;
2175 return 0;
2176
2177 err_rq:
2178 i915_request_put(rq);
2179 err_obj:
2180 i915_gem_object_put(obj);
2181 err_ce:
2182 intel_context_put(ce);
2183 return err;
2184 }
2185
live_preempt_gang(void * arg)2186 static int live_preempt_gang(void *arg)
2187 {
2188 struct intel_gt *gt = arg;
2189 struct intel_engine_cs *engine;
2190 enum intel_engine_id id;
2191
2192 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2193 return 0;
2194
2195 /*
2196 * Build as long a chain of preempters as we can, with each
2197 * request higher priority than the last. Once we are ready, we release
2198 * the last batch which then precolates down the chain, each releasing
2199 * the next oldest in turn. The intent is to simply push as hard as we
2200 * can with the number of preemptions, trying to exceed narrow HW
2201 * limits. At a minimum, we insist that we can sort all the user
2202 * high priority levels into execution order.
2203 */
2204
2205 for_each_engine(engine, gt, id) {
2206 struct i915_request *rq = NULL;
2207 struct igt_live_test t;
2208 IGT_TIMEOUT(end_time);
2209 int prio = 0;
2210 int err = 0;
2211 u32 *cs;
2212
2213 if (!intel_engine_has_preemption(engine))
2214 continue;
2215
2216 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2217 return -EIO;
2218
2219 do {
2220 struct i915_sched_attr attr = {
2221 .priority = I915_USER_PRIORITY(prio++),
2222 };
2223
2224 err = create_gang(engine, &rq);
2225 if (err)
2226 break;
2227
2228 /* Submit each spinner at increasing priority */
2229 engine->schedule(rq, &attr);
2230
2231 if (prio <= I915_PRIORITY_MAX)
2232 continue;
2233
2234 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2235 break;
2236
2237 if (__igt_timeout(end_time, NULL))
2238 break;
2239 } while (1);
2240 pr_debug("%s: Preempt chain of %d requests\n",
2241 engine->name, prio);
2242
2243 /*
2244 * Such that the last spinner is the highest priority and
2245 * should execute first. When that spinner completes,
2246 * it will terminate the next lowest spinner until there
2247 * are no more spinners and the gang is complete.
2248 */
2249 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2250 if (!IS_ERR(cs)) {
2251 *cs = 0;
2252 i915_gem_object_unpin_map(rq->batch->obj);
2253 } else {
2254 err = PTR_ERR(cs);
2255 intel_gt_set_wedged(gt);
2256 }
2257
2258 while (rq) { /* wait for each rq from highest to lowest prio */
2259 struct i915_request *n =
2260 list_next_entry(rq, client_link);
2261
2262 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2263 struct drm_printer p =
2264 drm_info_printer(engine->i915->drm.dev);
2265
2266 pr_err("Failed to flush chain of %d requests, at %d\n",
2267 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2268 intel_engine_dump(engine, &p,
2269 "%s\n", engine->name);
2270
2271 err = -ETIME;
2272 }
2273
2274 i915_request_put(rq);
2275 rq = n;
2276 }
2277
2278 if (igt_live_test_end(&t))
2279 err = -EIO;
2280 if (err)
2281 return err;
2282 }
2283
2284 return 0;
2285 }
2286
live_preempt_hang(void * arg)2287 static int live_preempt_hang(void *arg)
2288 {
2289 struct intel_gt *gt = arg;
2290 struct i915_gem_context *ctx_hi, *ctx_lo;
2291 struct igt_spinner spin_hi, spin_lo;
2292 struct intel_engine_cs *engine;
2293 enum intel_engine_id id;
2294 int err = -ENOMEM;
2295
2296 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2297 return 0;
2298
2299 if (!intel_has_reset_engine(gt))
2300 return 0;
2301
2302 if (igt_spinner_init(&spin_hi, gt))
2303 return -ENOMEM;
2304
2305 if (igt_spinner_init(&spin_lo, gt))
2306 goto err_spin_hi;
2307
2308 ctx_hi = kernel_context(gt->i915);
2309 if (!ctx_hi)
2310 goto err_spin_lo;
2311 ctx_hi->sched.priority =
2312 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2313
2314 ctx_lo = kernel_context(gt->i915);
2315 if (!ctx_lo)
2316 goto err_ctx_hi;
2317 ctx_lo->sched.priority =
2318 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2319
2320 for_each_engine(engine, gt, id) {
2321 struct i915_request *rq;
2322
2323 if (!intel_engine_has_preemption(engine))
2324 continue;
2325
2326 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2327 MI_ARB_CHECK);
2328 if (IS_ERR(rq)) {
2329 err = PTR_ERR(rq);
2330 goto err_ctx_lo;
2331 }
2332
2333 i915_request_add(rq);
2334 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2335 GEM_TRACE("lo spinner failed to start\n");
2336 GEM_TRACE_DUMP();
2337 intel_gt_set_wedged(gt);
2338 err = -EIO;
2339 goto err_ctx_lo;
2340 }
2341
2342 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
2343 MI_ARB_CHECK);
2344 if (IS_ERR(rq)) {
2345 igt_spinner_end(&spin_lo);
2346 err = PTR_ERR(rq);
2347 goto err_ctx_lo;
2348 }
2349
2350 init_completion(&engine->execlists.preempt_hang.completion);
2351 engine->execlists.preempt_hang.inject_hang = true;
2352
2353 i915_request_add(rq);
2354
2355 if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
2356 HZ / 10)) {
2357 pr_err("Preemption did not occur within timeout!");
2358 GEM_TRACE_DUMP();
2359 intel_gt_set_wedged(gt);
2360 err = -EIO;
2361 goto err_ctx_lo;
2362 }
2363
2364 set_bit(I915_RESET_ENGINE + id, >->reset.flags);
2365 intel_engine_reset(engine, NULL);
2366 clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
2367
2368 engine->execlists.preempt_hang.inject_hang = false;
2369
2370 if (!igt_wait_for_spinner(&spin_hi, rq)) {
2371 GEM_TRACE("hi spinner failed to start\n");
2372 GEM_TRACE_DUMP();
2373 intel_gt_set_wedged(gt);
2374 err = -EIO;
2375 goto err_ctx_lo;
2376 }
2377
2378 igt_spinner_end(&spin_hi);
2379 igt_spinner_end(&spin_lo);
2380 if (igt_flush_test(gt->i915)) {
2381 err = -EIO;
2382 goto err_ctx_lo;
2383 }
2384 }
2385
2386 err = 0;
2387 err_ctx_lo:
2388 kernel_context_close(ctx_lo);
2389 err_ctx_hi:
2390 kernel_context_close(ctx_hi);
2391 err_spin_lo:
2392 igt_spinner_fini(&spin_lo);
2393 err_spin_hi:
2394 igt_spinner_fini(&spin_hi);
2395 return err;
2396 }
2397
live_preempt_timeout(void * arg)2398 static int live_preempt_timeout(void *arg)
2399 {
2400 struct intel_gt *gt = arg;
2401 struct i915_gem_context *ctx_hi, *ctx_lo;
2402 struct igt_spinner spin_lo;
2403 struct intel_engine_cs *engine;
2404 enum intel_engine_id id;
2405 int err = -ENOMEM;
2406
2407 /*
2408 * Check that we force preemption to occur by cancelling the previous
2409 * context if it refuses to yield the GPU.
2410 */
2411 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2412 return 0;
2413
2414 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2415 return 0;
2416
2417 if (!intel_has_reset_engine(gt))
2418 return 0;
2419
2420 if (igt_spinner_init(&spin_lo, gt))
2421 return -ENOMEM;
2422
2423 ctx_hi = kernel_context(gt->i915);
2424 if (!ctx_hi)
2425 goto err_spin_lo;
2426 ctx_hi->sched.priority =
2427 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2428
2429 ctx_lo = kernel_context(gt->i915);
2430 if (!ctx_lo)
2431 goto err_ctx_hi;
2432 ctx_lo->sched.priority =
2433 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2434
2435 for_each_engine(engine, gt, id) {
2436 unsigned long saved_timeout;
2437 struct i915_request *rq;
2438
2439 if (!intel_engine_has_preemption(engine))
2440 continue;
2441
2442 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2443 MI_NOOP); /* preemption disabled */
2444 if (IS_ERR(rq)) {
2445 err = PTR_ERR(rq);
2446 goto err_ctx_lo;
2447 }
2448
2449 i915_request_add(rq);
2450 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2451 intel_gt_set_wedged(gt);
2452 err = -EIO;
2453 goto err_ctx_lo;
2454 }
2455
2456 rq = igt_request_alloc(ctx_hi, engine);
2457 if (IS_ERR(rq)) {
2458 igt_spinner_end(&spin_lo);
2459 err = PTR_ERR(rq);
2460 goto err_ctx_lo;
2461 }
2462
2463 /* Flush the previous CS ack before changing timeouts */
2464 while (READ_ONCE(engine->execlists.pending[0]))
2465 cpu_relax();
2466
2467 saved_timeout = engine->props.preempt_timeout_ms;
2468 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2469
2470 i915_request_get(rq);
2471 i915_request_add(rq);
2472
2473 intel_engine_flush_submission(engine);
2474 engine->props.preempt_timeout_ms = saved_timeout;
2475
2476 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2477 intel_gt_set_wedged(gt);
2478 i915_request_put(rq);
2479 err = -ETIME;
2480 goto err_ctx_lo;
2481 }
2482
2483 igt_spinner_end(&spin_lo);
2484 i915_request_put(rq);
2485 }
2486
2487 err = 0;
2488 err_ctx_lo:
2489 kernel_context_close(ctx_lo);
2490 err_ctx_hi:
2491 kernel_context_close(ctx_hi);
2492 err_spin_lo:
2493 igt_spinner_fini(&spin_lo);
2494 return err;
2495 }
2496
random_range(struct rnd_state * rnd,int min,int max)2497 static int random_range(struct rnd_state *rnd, int min, int max)
2498 {
2499 return i915_prandom_u32_max_state(max - min, rnd) + min;
2500 }
2501
random_priority(struct rnd_state * rnd)2502 static int random_priority(struct rnd_state *rnd)
2503 {
2504 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2505 }
2506
2507 struct preempt_smoke {
2508 struct intel_gt *gt;
2509 struct i915_gem_context **contexts;
2510 struct intel_engine_cs *engine;
2511 struct drm_i915_gem_object *batch;
2512 unsigned int ncontext;
2513 struct rnd_state prng;
2514 unsigned long count;
2515 };
2516
smoke_context(struct preempt_smoke * smoke)2517 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2518 {
2519 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2520 &smoke->prng)];
2521 }
2522
smoke_submit(struct preempt_smoke * smoke,struct i915_gem_context * ctx,int prio,struct drm_i915_gem_object * batch)2523 static int smoke_submit(struct preempt_smoke *smoke,
2524 struct i915_gem_context *ctx, int prio,
2525 struct drm_i915_gem_object *batch)
2526 {
2527 struct i915_request *rq;
2528 struct i915_vma *vma = NULL;
2529 int err = 0;
2530
2531 if (batch) {
2532 struct i915_address_space *vm;
2533
2534 vm = i915_gem_context_get_vm_rcu(ctx);
2535 vma = i915_vma_instance(batch, vm, NULL);
2536 i915_vm_put(vm);
2537 if (IS_ERR(vma))
2538 return PTR_ERR(vma);
2539
2540 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2541 if (err)
2542 return err;
2543 }
2544
2545 ctx->sched.priority = prio;
2546
2547 rq = igt_request_alloc(ctx, smoke->engine);
2548 if (IS_ERR(rq)) {
2549 err = PTR_ERR(rq);
2550 goto unpin;
2551 }
2552
2553 if (vma) {
2554 i915_vma_lock(vma);
2555 err = i915_request_await_object(rq, vma->obj, false);
2556 if (!err)
2557 err = i915_vma_move_to_active(vma, rq, 0);
2558 if (!err)
2559 err = rq->engine->emit_bb_start(rq,
2560 vma->node.start,
2561 PAGE_SIZE, 0);
2562 i915_vma_unlock(vma);
2563 }
2564
2565 i915_request_add(rq);
2566
2567 unpin:
2568 if (vma)
2569 i915_vma_unpin(vma);
2570
2571 return err;
2572 }
2573
smoke_crescendo_thread(void * arg)2574 static int smoke_crescendo_thread(void *arg)
2575 {
2576 struct preempt_smoke *smoke = arg;
2577 IGT_TIMEOUT(end_time);
2578 unsigned long count;
2579
2580 count = 0;
2581 do {
2582 struct i915_gem_context *ctx = smoke_context(smoke);
2583 int err;
2584
2585 err = smoke_submit(smoke,
2586 ctx, count % I915_PRIORITY_MAX,
2587 smoke->batch);
2588 if (err)
2589 return err;
2590
2591 count++;
2592 } while (!__igt_timeout(end_time, NULL));
2593
2594 smoke->count = count;
2595 return 0;
2596 }
2597
smoke_crescendo(struct preempt_smoke * smoke,unsigned int flags)2598 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2599 #define BATCH BIT(0)
2600 {
2601 struct task_struct *tsk[I915_NUM_ENGINES] = {};
2602 struct preempt_smoke arg[I915_NUM_ENGINES];
2603 struct intel_engine_cs *engine;
2604 enum intel_engine_id id;
2605 unsigned long count;
2606 int err = 0;
2607
2608 for_each_engine(engine, smoke->gt, id) {
2609 arg[id] = *smoke;
2610 arg[id].engine = engine;
2611 if (!(flags & BATCH))
2612 arg[id].batch = NULL;
2613 arg[id].count = 0;
2614
2615 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2616 "igt/smoke:%d", id);
2617 if (IS_ERR(tsk[id])) {
2618 err = PTR_ERR(tsk[id]);
2619 break;
2620 }
2621 get_task_struct(tsk[id]);
2622 }
2623
2624 yield(); /* start all threads before we kthread_stop() */
2625
2626 count = 0;
2627 for_each_engine(engine, smoke->gt, id) {
2628 int status;
2629
2630 if (IS_ERR_OR_NULL(tsk[id]))
2631 continue;
2632
2633 status = kthread_stop(tsk[id]);
2634 if (status && !err)
2635 err = status;
2636
2637 count += arg[id].count;
2638
2639 put_task_struct(tsk[id]);
2640 }
2641
2642 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2643 count, flags,
2644 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2645 return 0;
2646 }
2647
smoke_random(struct preempt_smoke * smoke,unsigned int flags)2648 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2649 {
2650 enum intel_engine_id id;
2651 IGT_TIMEOUT(end_time);
2652 unsigned long count;
2653
2654 count = 0;
2655 do {
2656 for_each_engine(smoke->engine, smoke->gt, id) {
2657 struct i915_gem_context *ctx = smoke_context(smoke);
2658 int err;
2659
2660 err = smoke_submit(smoke,
2661 ctx, random_priority(&smoke->prng),
2662 flags & BATCH ? smoke->batch : NULL);
2663 if (err)
2664 return err;
2665
2666 count++;
2667 }
2668 } while (!__igt_timeout(end_time, NULL));
2669
2670 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2671 count, flags,
2672 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2673 return 0;
2674 }
2675
live_preempt_smoke(void * arg)2676 static int live_preempt_smoke(void *arg)
2677 {
2678 struct preempt_smoke smoke = {
2679 .gt = arg,
2680 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
2681 .ncontext = 1024,
2682 };
2683 const unsigned int phase[] = { 0, BATCH };
2684 struct igt_live_test t;
2685 int err = -ENOMEM;
2686 u32 *cs;
2687 int n;
2688
2689 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
2690 return 0;
2691
2692 smoke.contexts = kmalloc_array(smoke.ncontext,
2693 sizeof(*smoke.contexts),
2694 GFP_KERNEL);
2695 if (!smoke.contexts)
2696 return -ENOMEM;
2697
2698 smoke.batch =
2699 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
2700 if (IS_ERR(smoke.batch)) {
2701 err = PTR_ERR(smoke.batch);
2702 goto err_free;
2703 }
2704
2705 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
2706 if (IS_ERR(cs)) {
2707 err = PTR_ERR(cs);
2708 goto err_batch;
2709 }
2710 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
2711 cs[n] = MI_ARB_CHECK;
2712 cs[n] = MI_BATCH_BUFFER_END;
2713 i915_gem_object_flush_map(smoke.batch);
2714 i915_gem_object_unpin_map(smoke.batch);
2715
2716 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
2717 err = -EIO;
2718 goto err_batch;
2719 }
2720
2721 for (n = 0; n < smoke.ncontext; n++) {
2722 smoke.contexts[n] = kernel_context(smoke.gt->i915);
2723 if (!smoke.contexts[n])
2724 goto err_ctx;
2725 }
2726
2727 for (n = 0; n < ARRAY_SIZE(phase); n++) {
2728 err = smoke_crescendo(&smoke, phase[n]);
2729 if (err)
2730 goto err_ctx;
2731
2732 err = smoke_random(&smoke, phase[n]);
2733 if (err)
2734 goto err_ctx;
2735 }
2736
2737 err_ctx:
2738 if (igt_live_test_end(&t))
2739 err = -EIO;
2740
2741 for (n = 0; n < smoke.ncontext; n++) {
2742 if (!smoke.contexts[n])
2743 break;
2744 kernel_context_close(smoke.contexts[n]);
2745 }
2746
2747 err_batch:
2748 i915_gem_object_put(smoke.batch);
2749 err_free:
2750 kfree(smoke.contexts);
2751
2752 return err;
2753 }
2754
nop_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int nctx,unsigned int flags)2755 static int nop_virtual_engine(struct intel_gt *gt,
2756 struct intel_engine_cs **siblings,
2757 unsigned int nsibling,
2758 unsigned int nctx,
2759 unsigned int flags)
2760 #define CHAIN BIT(0)
2761 {
2762 IGT_TIMEOUT(end_time);
2763 struct i915_request *request[16] = {};
2764 struct intel_context *ve[16];
2765 unsigned long n, prime, nc;
2766 struct igt_live_test t;
2767 ktime_t times[2] = {};
2768 int err;
2769
2770 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
2771
2772 for (n = 0; n < nctx; n++) {
2773 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
2774 if (IS_ERR(ve[n])) {
2775 err = PTR_ERR(ve[n]);
2776 nctx = n;
2777 goto out;
2778 }
2779
2780 err = intel_context_pin(ve[n]);
2781 if (err) {
2782 intel_context_put(ve[n]);
2783 nctx = n;
2784 goto out;
2785 }
2786 }
2787
2788 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
2789 if (err)
2790 goto out;
2791
2792 for_each_prime_number_from(prime, 1, 8192) {
2793 times[1] = ktime_get_raw();
2794
2795 if (flags & CHAIN) {
2796 for (nc = 0; nc < nctx; nc++) {
2797 for (n = 0; n < prime; n++) {
2798 struct i915_request *rq;
2799
2800 rq = i915_request_create(ve[nc]);
2801 if (IS_ERR(rq)) {
2802 err = PTR_ERR(rq);
2803 goto out;
2804 }
2805
2806 if (request[nc])
2807 i915_request_put(request[nc]);
2808 request[nc] = i915_request_get(rq);
2809 i915_request_add(rq);
2810 }
2811 }
2812 } else {
2813 for (n = 0; n < prime; n++) {
2814 for (nc = 0; nc < nctx; nc++) {
2815 struct i915_request *rq;
2816
2817 rq = i915_request_create(ve[nc]);
2818 if (IS_ERR(rq)) {
2819 err = PTR_ERR(rq);
2820 goto out;
2821 }
2822
2823 if (request[nc])
2824 i915_request_put(request[nc]);
2825 request[nc] = i915_request_get(rq);
2826 i915_request_add(rq);
2827 }
2828 }
2829 }
2830
2831 for (nc = 0; nc < nctx; nc++) {
2832 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
2833 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2834 __func__, ve[0]->engine->name,
2835 request[nc]->fence.context,
2836 request[nc]->fence.seqno);
2837
2838 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2839 __func__, ve[0]->engine->name,
2840 request[nc]->fence.context,
2841 request[nc]->fence.seqno);
2842 GEM_TRACE_DUMP();
2843 intel_gt_set_wedged(gt);
2844 break;
2845 }
2846 }
2847
2848 times[1] = ktime_sub(ktime_get_raw(), times[1]);
2849 if (prime == 1)
2850 times[0] = times[1];
2851
2852 for (nc = 0; nc < nctx; nc++) {
2853 i915_request_put(request[nc]);
2854 request[nc] = NULL;
2855 }
2856
2857 if (__igt_timeout(end_time, NULL))
2858 break;
2859 }
2860
2861 err = igt_live_test_end(&t);
2862 if (err)
2863 goto out;
2864
2865 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2866 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
2867 prime, div64_u64(ktime_to_ns(times[1]), prime));
2868
2869 out:
2870 if (igt_flush_test(gt->i915))
2871 err = -EIO;
2872
2873 for (nc = 0; nc < nctx; nc++) {
2874 i915_request_put(request[nc]);
2875 intel_context_unpin(ve[nc]);
2876 intel_context_put(ve[nc]);
2877 }
2878 return err;
2879 }
2880
live_virtual_engine(void * arg)2881 static int live_virtual_engine(void *arg)
2882 {
2883 struct intel_gt *gt = arg;
2884 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2885 struct intel_engine_cs *engine;
2886 enum intel_engine_id id;
2887 unsigned int class, inst;
2888 int err;
2889
2890 if (USES_GUC_SUBMISSION(gt->i915))
2891 return 0;
2892
2893 for_each_engine(engine, gt, id) {
2894 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
2895 if (err) {
2896 pr_err("Failed to wrap engine %s: err=%d\n",
2897 engine->name, err);
2898 return err;
2899 }
2900 }
2901
2902 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2903 int nsibling, n;
2904
2905 nsibling = 0;
2906 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2907 if (!gt->engine_class[class][inst])
2908 continue;
2909
2910 siblings[nsibling++] = gt->engine_class[class][inst];
2911 }
2912 if (nsibling < 2)
2913 continue;
2914
2915 for (n = 1; n <= nsibling + 1; n++) {
2916 err = nop_virtual_engine(gt, siblings, nsibling,
2917 n, 0);
2918 if (err)
2919 return err;
2920 }
2921
2922 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
2923 if (err)
2924 return err;
2925 }
2926
2927 return 0;
2928 }
2929
mask_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)2930 static int mask_virtual_engine(struct intel_gt *gt,
2931 struct intel_engine_cs **siblings,
2932 unsigned int nsibling)
2933 {
2934 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
2935 struct intel_context *ve;
2936 struct igt_live_test t;
2937 unsigned int n;
2938 int err;
2939
2940 /*
2941 * Check that by setting the execution mask on a request, we can
2942 * restrict it to our desired engine within the virtual engine.
2943 */
2944
2945 ve = intel_execlists_create_virtual(siblings, nsibling);
2946 if (IS_ERR(ve)) {
2947 err = PTR_ERR(ve);
2948 goto out_close;
2949 }
2950
2951 err = intel_context_pin(ve);
2952 if (err)
2953 goto out_put;
2954
2955 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2956 if (err)
2957 goto out_unpin;
2958
2959 for (n = 0; n < nsibling; n++) {
2960 request[n] = i915_request_create(ve);
2961 if (IS_ERR(request[n])) {
2962 err = PTR_ERR(request[n]);
2963 nsibling = n;
2964 goto out;
2965 }
2966
2967 /* Reverse order as it's more likely to be unnatural */
2968 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
2969
2970 i915_request_get(request[n]);
2971 i915_request_add(request[n]);
2972 }
2973
2974 for (n = 0; n < nsibling; n++) {
2975 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
2976 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2977 __func__, ve->engine->name,
2978 request[n]->fence.context,
2979 request[n]->fence.seqno);
2980
2981 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2982 __func__, ve->engine->name,
2983 request[n]->fence.context,
2984 request[n]->fence.seqno);
2985 GEM_TRACE_DUMP();
2986 intel_gt_set_wedged(gt);
2987 err = -EIO;
2988 goto out;
2989 }
2990
2991 if (request[n]->engine != siblings[nsibling - n - 1]) {
2992 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2993 request[n]->engine->name,
2994 siblings[nsibling - n - 1]->name);
2995 err = -EINVAL;
2996 goto out;
2997 }
2998 }
2999
3000 err = igt_live_test_end(&t);
3001 out:
3002 if (igt_flush_test(gt->i915))
3003 err = -EIO;
3004
3005 for (n = 0; n < nsibling; n++)
3006 i915_request_put(request[n]);
3007
3008 out_unpin:
3009 intel_context_unpin(ve);
3010 out_put:
3011 intel_context_put(ve);
3012 out_close:
3013 return err;
3014 }
3015
live_virtual_mask(void * arg)3016 static int live_virtual_mask(void *arg)
3017 {
3018 struct intel_gt *gt = arg;
3019 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3020 unsigned int class, inst;
3021 int err;
3022
3023 if (USES_GUC_SUBMISSION(gt->i915))
3024 return 0;
3025
3026 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3027 unsigned int nsibling;
3028
3029 nsibling = 0;
3030 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3031 if (!gt->engine_class[class][inst])
3032 break;
3033
3034 siblings[nsibling++] = gt->engine_class[class][inst];
3035 }
3036 if (nsibling < 2)
3037 continue;
3038
3039 err = mask_virtual_engine(gt, siblings, nsibling);
3040 if (err)
3041 return err;
3042 }
3043
3044 return 0;
3045 }
3046
preserved_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)3047 static int preserved_virtual_engine(struct intel_gt *gt,
3048 struct intel_engine_cs **siblings,
3049 unsigned int nsibling)
3050 {
3051 struct i915_request *last = NULL;
3052 struct intel_context *ve;
3053 struct i915_vma *scratch;
3054 struct igt_live_test t;
3055 unsigned int n;
3056 int err = 0;
3057 u32 *cs;
3058
3059 scratch = create_scratch(siblings[0]->gt);
3060 if (IS_ERR(scratch))
3061 return PTR_ERR(scratch);
3062
3063 ve = intel_execlists_create_virtual(siblings, nsibling);
3064 if (IS_ERR(ve)) {
3065 err = PTR_ERR(ve);
3066 goto out_scratch;
3067 }
3068
3069 err = intel_context_pin(ve);
3070 if (err)
3071 goto out_put;
3072
3073 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3074 if (err)
3075 goto out_unpin;
3076
3077 for (n = 0; n < NUM_GPR_DW; n++) {
3078 struct intel_engine_cs *engine = siblings[n % nsibling];
3079 struct i915_request *rq;
3080
3081 rq = i915_request_create(ve);
3082 if (IS_ERR(rq)) {
3083 err = PTR_ERR(rq);
3084 goto out_end;
3085 }
3086
3087 i915_request_put(last);
3088 last = i915_request_get(rq);
3089
3090 cs = intel_ring_begin(rq, 8);
3091 if (IS_ERR(cs)) {
3092 i915_request_add(rq);
3093 err = PTR_ERR(cs);
3094 goto out_end;
3095 }
3096
3097 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3098 *cs++ = CS_GPR(engine, n);
3099 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3100 *cs++ = 0;
3101
3102 *cs++ = MI_LOAD_REGISTER_IMM(1);
3103 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3104 *cs++ = n + 1;
3105
3106 *cs++ = MI_NOOP;
3107 intel_ring_advance(rq, cs);
3108
3109 /* Restrict this request to run on a particular engine */
3110 rq->execution_mask = engine->mask;
3111 i915_request_add(rq);
3112 }
3113
3114 if (i915_request_wait(last, 0, HZ / 5) < 0) {
3115 err = -ETIME;
3116 goto out_end;
3117 }
3118
3119 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3120 if (IS_ERR(cs)) {
3121 err = PTR_ERR(cs);
3122 goto out_end;
3123 }
3124
3125 for (n = 0; n < NUM_GPR_DW; n++) {
3126 if (cs[n] != n) {
3127 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3128 cs[n], n);
3129 err = -EINVAL;
3130 break;
3131 }
3132 }
3133
3134 i915_gem_object_unpin_map(scratch->obj);
3135
3136 out_end:
3137 if (igt_live_test_end(&t))
3138 err = -EIO;
3139 i915_request_put(last);
3140 out_unpin:
3141 intel_context_unpin(ve);
3142 out_put:
3143 intel_context_put(ve);
3144 out_scratch:
3145 i915_vma_unpin_and_release(&scratch, 0);
3146 return err;
3147 }
3148
live_virtual_preserved(void * arg)3149 static int live_virtual_preserved(void *arg)
3150 {
3151 struct intel_gt *gt = arg;
3152 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3153 unsigned int class, inst;
3154
3155 /*
3156 * Check that the context image retains non-privileged (user) registers
3157 * from one engine to the next. For this we check that the CS_GPR
3158 * are preserved.
3159 */
3160
3161 if (USES_GUC_SUBMISSION(gt->i915))
3162 return 0;
3163
3164 /* As we use CS_GPR we cannot run before they existed on all engines. */
3165 if (INTEL_GEN(gt->i915) < 9)
3166 return 0;
3167
3168 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3169 int nsibling, err;
3170
3171 nsibling = 0;
3172 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3173 if (!gt->engine_class[class][inst])
3174 continue;
3175
3176 siblings[nsibling++] = gt->engine_class[class][inst];
3177 }
3178 if (nsibling < 2)
3179 continue;
3180
3181 err = preserved_virtual_engine(gt, siblings, nsibling);
3182 if (err)
3183 return err;
3184 }
3185
3186 return 0;
3187 }
3188
bond_virtual_engine(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int flags)3189 static int bond_virtual_engine(struct intel_gt *gt,
3190 unsigned int class,
3191 struct intel_engine_cs **siblings,
3192 unsigned int nsibling,
3193 unsigned int flags)
3194 #define BOND_SCHEDULE BIT(0)
3195 {
3196 struct intel_engine_cs *master;
3197 struct i915_request *rq[16];
3198 enum intel_engine_id id;
3199 struct igt_spinner spin;
3200 unsigned long n;
3201 int err;
3202
3203 /*
3204 * A set of bonded requests is intended to be run concurrently
3205 * across a number of engines. We use one request per-engine
3206 * and a magic fence to schedule each of the bonded requests
3207 * at the same time. A consequence of our current scheduler is that
3208 * we only move requests to the HW ready queue when the request
3209 * becomes ready, that is when all of its prerequisite fences have
3210 * been signaled. As one of those fences is the master submit fence,
3211 * there is a delay on all secondary fences as the HW may be
3212 * currently busy. Equally, as all the requests are independent,
3213 * they may have other fences that delay individual request
3214 * submission to HW. Ergo, we do not guarantee that all requests are
3215 * immediately submitted to HW at the same time, just that if the
3216 * rules are abided by, they are ready at the same time as the
3217 * first is submitted. Userspace can embed semaphores in its batch
3218 * to ensure parallel execution of its phases as it requires.
3219 * Though naturally it gets requested that perhaps the scheduler should
3220 * take care of parallel execution, even across preemption events on
3221 * different HW. (The proper answer is of course "lalalala".)
3222 *
3223 * With the submit-fence, we have identified three possible phases
3224 * of synchronisation depending on the master fence: queued (not
3225 * ready), executing, and signaled. The first two are quite simple
3226 * and checked below. However, the signaled master fence handling is
3227 * contentious. Currently we do not distinguish between a signaled
3228 * fence and an expired fence, as once signaled it does not convey
3229 * any information about the previous execution. It may even be freed
3230 * and hence checking later it may not exist at all. Ergo we currently
3231 * do not apply the bonding constraint for an already signaled fence,
3232 * as our expectation is that it should not constrain the secondaries
3233 * and is outside of the scope of the bonded request API (i.e. all
3234 * userspace requests are meant to be running in parallel). As
3235 * it imposes no constraint, and is effectively a no-op, we do not
3236 * check below as normal execution flows are checked extensively above.
3237 *
3238 * XXX Is the degenerate handling of signaled submit fences the
3239 * expected behaviour for userpace?
3240 */
3241
3242 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3243
3244 if (igt_spinner_init(&spin, gt))
3245 return -ENOMEM;
3246
3247 err = 0;
3248 rq[0] = ERR_PTR(-ENOMEM);
3249 for_each_engine(master, gt, id) {
3250 struct i915_sw_fence fence = {};
3251
3252 if (master->class == class)
3253 continue;
3254
3255 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3256
3257 rq[0] = igt_spinner_create_request(&spin,
3258 master->kernel_context,
3259 MI_NOOP);
3260 if (IS_ERR(rq[0])) {
3261 err = PTR_ERR(rq[0]);
3262 goto out;
3263 }
3264 i915_request_get(rq[0]);
3265
3266 if (flags & BOND_SCHEDULE) {
3267 onstack_fence_init(&fence);
3268 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3269 &fence,
3270 GFP_KERNEL);
3271 }
3272
3273 i915_request_add(rq[0]);
3274 if (err < 0)
3275 goto out;
3276
3277 if (!(flags & BOND_SCHEDULE) &&
3278 !igt_wait_for_spinner(&spin, rq[0])) {
3279 err = -EIO;
3280 goto out;
3281 }
3282
3283 for (n = 0; n < nsibling; n++) {
3284 struct intel_context *ve;
3285
3286 ve = intel_execlists_create_virtual(siblings, nsibling);
3287 if (IS_ERR(ve)) {
3288 err = PTR_ERR(ve);
3289 onstack_fence_fini(&fence);
3290 goto out;
3291 }
3292
3293 err = intel_virtual_engine_attach_bond(ve->engine,
3294 master,
3295 siblings[n]);
3296 if (err) {
3297 intel_context_put(ve);
3298 onstack_fence_fini(&fence);
3299 goto out;
3300 }
3301
3302 err = intel_context_pin(ve);
3303 intel_context_put(ve);
3304 if (err) {
3305 onstack_fence_fini(&fence);
3306 goto out;
3307 }
3308
3309 rq[n + 1] = i915_request_create(ve);
3310 intel_context_unpin(ve);
3311 if (IS_ERR(rq[n + 1])) {
3312 err = PTR_ERR(rq[n + 1]);
3313 onstack_fence_fini(&fence);
3314 goto out;
3315 }
3316 i915_request_get(rq[n + 1]);
3317
3318 err = i915_request_await_execution(rq[n + 1],
3319 &rq[0]->fence,
3320 ve->engine->bond_execute);
3321 i915_request_add(rq[n + 1]);
3322 if (err < 0) {
3323 onstack_fence_fini(&fence);
3324 goto out;
3325 }
3326 }
3327 onstack_fence_fini(&fence);
3328 intel_engine_flush_submission(master);
3329 igt_spinner_end(&spin);
3330
3331 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3332 pr_err("Master request did not execute (on %s)!\n",
3333 rq[0]->engine->name);
3334 err = -EIO;
3335 goto out;
3336 }
3337
3338 for (n = 0; n < nsibling; n++) {
3339 if (i915_request_wait(rq[n + 1], 0,
3340 MAX_SCHEDULE_TIMEOUT) < 0) {
3341 err = -EIO;
3342 goto out;
3343 }
3344
3345 if (rq[n + 1]->engine != siblings[n]) {
3346 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3347 siblings[n]->name,
3348 rq[n + 1]->engine->name,
3349 rq[0]->engine->name);
3350 err = -EINVAL;
3351 goto out;
3352 }
3353 }
3354
3355 for (n = 0; !IS_ERR(rq[n]); n++)
3356 i915_request_put(rq[n]);
3357 rq[0] = ERR_PTR(-ENOMEM);
3358 }
3359
3360 out:
3361 for (n = 0; !IS_ERR(rq[n]); n++)
3362 i915_request_put(rq[n]);
3363 if (igt_flush_test(gt->i915))
3364 err = -EIO;
3365
3366 igt_spinner_fini(&spin);
3367 return err;
3368 }
3369
live_virtual_bond(void * arg)3370 static int live_virtual_bond(void *arg)
3371 {
3372 static const struct phase {
3373 const char *name;
3374 unsigned int flags;
3375 } phases[] = {
3376 { "", 0 },
3377 { "schedule", BOND_SCHEDULE },
3378 { },
3379 };
3380 struct intel_gt *gt = arg;
3381 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3382 unsigned int class, inst;
3383 int err;
3384
3385 if (USES_GUC_SUBMISSION(gt->i915))
3386 return 0;
3387
3388 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3389 const struct phase *p;
3390 int nsibling;
3391
3392 nsibling = 0;
3393 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3394 if (!gt->engine_class[class][inst])
3395 break;
3396
3397 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3398 siblings[nsibling++] = gt->engine_class[class][inst];
3399 }
3400 if (nsibling < 2)
3401 continue;
3402
3403 for (p = phases; p->name; p++) {
3404 err = bond_virtual_engine(gt,
3405 class, siblings, nsibling,
3406 p->flags);
3407 if (err) {
3408 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3409 __func__, p->name, class, nsibling, err);
3410 return err;
3411 }
3412 }
3413 }
3414
3415 return 0;
3416 }
3417
reset_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)3418 static int reset_virtual_engine(struct intel_gt *gt,
3419 struct intel_engine_cs **siblings,
3420 unsigned int nsibling)
3421 {
3422 struct intel_engine_cs *engine;
3423 struct intel_context *ve;
3424 unsigned long *heartbeat;
3425 struct igt_spinner spin;
3426 struct i915_request *rq;
3427 unsigned int n;
3428 int err = 0;
3429
3430 /*
3431 * In order to support offline error capture for fast preempt reset,
3432 * we need to decouple the guilty request and ensure that it and its
3433 * descendents are not executed while the capture is in progress.
3434 */
3435
3436 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3437 if (!heartbeat)
3438 return -ENOMEM;
3439
3440 if (igt_spinner_init(&spin, gt)) {
3441 err = -ENOMEM;
3442 goto out_free;
3443 }
3444
3445 ve = intel_execlists_create_virtual(siblings, nsibling);
3446 if (IS_ERR(ve)) {
3447 err = PTR_ERR(ve);
3448 goto out_spin;
3449 }
3450
3451 for (n = 0; n < nsibling; n++)
3452 engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3453
3454 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3455 if (IS_ERR(rq)) {
3456 err = PTR_ERR(rq);
3457 goto out_heartbeat;
3458 }
3459 i915_request_add(rq);
3460
3461 if (!igt_wait_for_spinner(&spin, rq)) {
3462 intel_gt_set_wedged(gt);
3463 err = -ETIME;
3464 goto out_heartbeat;
3465 }
3466
3467 engine = rq->engine;
3468 GEM_BUG_ON(engine == ve->engine);
3469
3470 /* Take ownership of the reset and tasklet */
3471 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3472 >->reset.flags)) {
3473 intel_gt_set_wedged(gt);
3474 err = -EBUSY;
3475 goto out_heartbeat;
3476 }
3477 tasklet_disable(&engine->execlists.tasklet);
3478
3479 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3480 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3481
3482 /* Fake a preemption event; failed of course */
3483 spin_lock_irq(&engine->active.lock);
3484 __unwind_incomplete_requests(engine);
3485 spin_unlock_irq(&engine->active.lock);
3486 GEM_BUG_ON(rq->engine != ve->engine);
3487
3488 /* Reset the engine while keeping our active request on hold */
3489 execlists_hold(engine, rq);
3490 GEM_BUG_ON(!i915_request_on_hold(rq));
3491
3492 intel_engine_reset(engine, NULL);
3493 GEM_BUG_ON(rq->fence.error != -EIO);
3494
3495 /* Release our grasp on the engine, letting CS flow again */
3496 tasklet_enable(&engine->execlists.tasklet);
3497 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
3498
3499 /* Check that we do not resubmit the held request */
3500 i915_request_get(rq);
3501 if (!i915_request_wait(rq, 0, HZ / 5)) {
3502 pr_err("%s: on hold request completed!\n",
3503 engine->name);
3504 intel_gt_set_wedged(gt);
3505 err = -EIO;
3506 goto out_rq;
3507 }
3508 GEM_BUG_ON(!i915_request_on_hold(rq));
3509
3510 /* But is resubmitted on release */
3511 execlists_unhold(engine, rq);
3512 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3513 pr_err("%s: held request did not complete!\n",
3514 engine->name);
3515 intel_gt_set_wedged(gt);
3516 err = -ETIME;
3517 }
3518
3519 out_rq:
3520 i915_request_put(rq);
3521 out_heartbeat:
3522 for (n = 0; n < nsibling; n++)
3523 engine_heartbeat_enable(siblings[n], heartbeat[n]);
3524
3525 intel_context_put(ve);
3526 out_spin:
3527 igt_spinner_fini(&spin);
3528 out_free:
3529 kfree(heartbeat);
3530 return err;
3531 }
3532
live_virtual_reset(void * arg)3533 static int live_virtual_reset(void *arg)
3534 {
3535 struct intel_gt *gt = arg;
3536 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3537 unsigned int class, inst;
3538
3539 /*
3540 * Check that we handle a reset event within a virtual engine.
3541 * Only the physical engine is reset, but we have to check the flow
3542 * of the virtual requests around the reset, and make sure it is not
3543 * forgotten.
3544 */
3545
3546 if (USES_GUC_SUBMISSION(gt->i915))
3547 return 0;
3548
3549 if (!intel_has_reset_engine(gt))
3550 return 0;
3551
3552 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3553 int nsibling, err;
3554
3555 nsibling = 0;
3556 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3557 if (!gt->engine_class[class][inst])
3558 continue;
3559
3560 siblings[nsibling++] = gt->engine_class[class][inst];
3561 }
3562 if (nsibling < 2)
3563 continue;
3564
3565 err = reset_virtual_engine(gt, siblings, nsibling);
3566 if (err)
3567 return err;
3568 }
3569
3570 return 0;
3571 }
3572
intel_execlists_live_selftests(struct drm_i915_private * i915)3573 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3574 {
3575 static const struct i915_subtest tests[] = {
3576 SUBTEST(live_sanitycheck),
3577 SUBTEST(live_unlite_switch),
3578 SUBTEST(live_unlite_preempt),
3579 SUBTEST(live_hold_reset),
3580 SUBTEST(live_timeslice_preempt),
3581 SUBTEST(live_timeslice_queue),
3582 SUBTEST(live_busywait_preempt),
3583 SUBTEST(live_preempt),
3584 SUBTEST(live_late_preempt),
3585 SUBTEST(live_nopreempt),
3586 SUBTEST(live_preempt_cancel),
3587 SUBTEST(live_suppress_self_preempt),
3588 SUBTEST(live_suppress_wait_preempt),
3589 SUBTEST(live_chain_preempt),
3590 SUBTEST(live_preempt_gang),
3591 SUBTEST(live_preempt_hang),
3592 SUBTEST(live_preempt_timeout),
3593 SUBTEST(live_preempt_smoke),
3594 SUBTEST(live_virtual_engine),
3595 SUBTEST(live_virtual_mask),
3596 SUBTEST(live_virtual_preserved),
3597 SUBTEST(live_virtual_bond),
3598 SUBTEST(live_virtual_reset),
3599 };
3600
3601 if (!HAS_EXECLISTS(i915))
3602 return 0;
3603
3604 if (intel_gt_is_wedged(&i915->gt))
3605 return 0;
3606
3607 return intel_gt_live_subtests(tests, &i915->gt);
3608 }
3609
hexdump(const void * buf,size_t len)3610 static void hexdump(const void *buf, size_t len)
3611 {
3612 const size_t rowsize = 8 * sizeof(u32);
3613 const void *prev = NULL;
3614 bool skip = false;
3615 size_t pos;
3616
3617 for (pos = 0; pos < len; pos += rowsize) {
3618 char line[128];
3619
3620 if (prev && !memcmp(prev, buf + pos, rowsize)) {
3621 if (!skip) {
3622 pr_info("*\n");
3623 skip = true;
3624 }
3625 continue;
3626 }
3627
3628 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3629 rowsize, sizeof(u32),
3630 line, sizeof(line),
3631 false) >= sizeof(line));
3632 pr_info("[%04zx] %s\n", pos, line);
3633
3634 prev = buf + pos;
3635 skip = false;
3636 }
3637 }
3638
live_lrc_layout(void * arg)3639 static int live_lrc_layout(void *arg)
3640 {
3641 struct intel_gt *gt = arg;
3642 struct intel_engine_cs *engine;
3643 enum intel_engine_id id;
3644 u32 *lrc;
3645 int err;
3646
3647 /*
3648 * Check the registers offsets we use to create the initial reg state
3649 * match the layout saved by HW.
3650 */
3651
3652 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
3653 if (!lrc)
3654 return -ENOMEM;
3655
3656 err = 0;
3657 for_each_engine(engine, gt, id) {
3658 u32 *hw;
3659 int dw;
3660
3661 if (!engine->default_state)
3662 continue;
3663
3664 hw = i915_gem_object_pin_map(engine->default_state,
3665 I915_MAP_WB);
3666 if (IS_ERR(hw)) {
3667 err = PTR_ERR(hw);
3668 break;
3669 }
3670 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3671
3672 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
3673 engine->kernel_context,
3674 engine,
3675 engine->kernel_context->ring,
3676 true);
3677
3678 dw = 0;
3679 do {
3680 u32 lri = hw[dw];
3681
3682 if (lri == 0) {
3683 dw++;
3684 continue;
3685 }
3686
3687 if (lrc[dw] == 0) {
3688 pr_debug("%s: skipped instruction %x at dword %d\n",
3689 engine->name, lri, dw);
3690 dw++;
3691 continue;
3692 }
3693
3694 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3695 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3696 engine->name, dw, lri);
3697 err = -EINVAL;
3698 break;
3699 }
3700
3701 if (lrc[dw] != lri) {
3702 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3703 engine->name, dw, lri, lrc[dw]);
3704 err = -EINVAL;
3705 break;
3706 }
3707
3708 lri &= 0x7f;
3709 lri++;
3710 dw++;
3711
3712 while (lri) {
3713 if (hw[dw] != lrc[dw]) {
3714 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3715 engine->name, dw, hw[dw], lrc[dw]);
3716 err = -EINVAL;
3717 break;
3718 }
3719
3720 /*
3721 * Skip over the actual register value as we
3722 * expect that to differ.
3723 */
3724 dw += 2;
3725 lri -= 2;
3726 }
3727 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
3728
3729 if (err) {
3730 pr_info("%s: HW register image:\n", engine->name);
3731 hexdump(hw, PAGE_SIZE);
3732
3733 pr_info("%s: SW register image:\n", engine->name);
3734 hexdump(lrc, PAGE_SIZE);
3735 }
3736
3737 i915_gem_object_unpin_map(engine->default_state);
3738 if (err)
3739 break;
3740 }
3741
3742 kfree(lrc);
3743 return err;
3744 }
3745
find_offset(const u32 * lri,u32 offset)3746 static int find_offset(const u32 *lri, u32 offset)
3747 {
3748 int i;
3749
3750 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
3751 if (lri[i] == offset)
3752 return i;
3753
3754 return -1;
3755 }
3756
live_lrc_fixed(void * arg)3757 static int live_lrc_fixed(void *arg)
3758 {
3759 struct intel_gt *gt = arg;
3760 struct intel_engine_cs *engine;
3761 enum intel_engine_id id;
3762 int err = 0;
3763
3764 /*
3765 * Check the assumed register offsets match the actual locations in
3766 * the context image.
3767 */
3768
3769 for_each_engine(engine, gt, id) {
3770 const struct {
3771 u32 reg;
3772 u32 offset;
3773 const char *name;
3774 } tbl[] = {
3775 {
3776 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
3777 CTX_RING_START - 1,
3778 "RING_START"
3779 },
3780 {
3781 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
3782 CTX_RING_CTL - 1,
3783 "RING_CTL"
3784 },
3785 {
3786 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
3787 CTX_RING_HEAD - 1,
3788 "RING_HEAD"
3789 },
3790 {
3791 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
3792 CTX_RING_TAIL - 1,
3793 "RING_TAIL"
3794 },
3795 {
3796 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
3797 lrc_ring_mi_mode(engine),
3798 "RING_MI_MODE"
3799 },
3800 {
3801 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
3802 CTX_BB_STATE - 1,
3803 "BB_STATE"
3804 },
3805 { },
3806 }, *t;
3807 u32 *hw;
3808
3809 if (!engine->default_state)
3810 continue;
3811
3812 hw = i915_gem_object_pin_map(engine->default_state,
3813 I915_MAP_WB);
3814 if (IS_ERR(hw)) {
3815 err = PTR_ERR(hw);
3816 break;
3817 }
3818 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3819
3820 for (t = tbl; t->name; t++) {
3821 int dw = find_offset(hw, t->reg);
3822
3823 if (dw != t->offset) {
3824 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3825 engine->name,
3826 t->name,
3827 t->reg,
3828 dw,
3829 t->offset);
3830 err = -EINVAL;
3831 }
3832 }
3833
3834 i915_gem_object_unpin_map(engine->default_state);
3835 }
3836
3837 return err;
3838 }
3839
__live_lrc_state(struct intel_engine_cs * engine,struct i915_vma * scratch)3840 static int __live_lrc_state(struct intel_engine_cs *engine,
3841 struct i915_vma *scratch)
3842 {
3843 struct intel_context *ce;
3844 struct i915_request *rq;
3845 enum {
3846 RING_START_IDX = 0,
3847 RING_TAIL_IDX,
3848 MAX_IDX
3849 };
3850 u32 expected[MAX_IDX];
3851 u32 *cs;
3852 int err;
3853 int n;
3854
3855 ce = intel_context_create(engine);
3856 if (IS_ERR(ce))
3857 return PTR_ERR(ce);
3858
3859 err = intel_context_pin(ce);
3860 if (err)
3861 goto err_put;
3862
3863 rq = i915_request_create(ce);
3864 if (IS_ERR(rq)) {
3865 err = PTR_ERR(rq);
3866 goto err_unpin;
3867 }
3868
3869 cs = intel_ring_begin(rq, 4 * MAX_IDX);
3870 if (IS_ERR(cs)) {
3871 err = PTR_ERR(cs);
3872 i915_request_add(rq);
3873 goto err_unpin;
3874 }
3875
3876 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3877 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
3878 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
3879 *cs++ = 0;
3880
3881 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
3882
3883 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3884 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
3885 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
3886 *cs++ = 0;
3887
3888 i915_request_get(rq);
3889 i915_request_add(rq);
3890
3891 intel_engine_flush_submission(engine);
3892 expected[RING_TAIL_IDX] = ce->ring->tail;
3893
3894 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3895 err = -ETIME;
3896 goto err_rq;
3897 }
3898
3899 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3900 if (IS_ERR(cs)) {
3901 err = PTR_ERR(cs);
3902 goto err_rq;
3903 }
3904
3905 for (n = 0; n < MAX_IDX; n++) {
3906 if (cs[n] != expected[n]) {
3907 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3908 engine->name, n, cs[n], expected[n]);
3909 err = -EINVAL;
3910 break;
3911 }
3912 }
3913
3914 i915_gem_object_unpin_map(scratch->obj);
3915
3916 err_rq:
3917 i915_request_put(rq);
3918 err_unpin:
3919 intel_context_unpin(ce);
3920 err_put:
3921 intel_context_put(ce);
3922 return err;
3923 }
3924
live_lrc_state(void * arg)3925 static int live_lrc_state(void *arg)
3926 {
3927 struct intel_gt *gt = arg;
3928 struct intel_engine_cs *engine;
3929 struct i915_vma *scratch;
3930 enum intel_engine_id id;
3931 int err = 0;
3932
3933 /*
3934 * Check the live register state matches what we expect for this
3935 * intel_context.
3936 */
3937
3938 scratch = create_scratch(gt);
3939 if (IS_ERR(scratch))
3940 return PTR_ERR(scratch);
3941
3942 for_each_engine(engine, gt, id) {
3943 err = __live_lrc_state(engine, scratch);
3944 if (err)
3945 break;
3946 }
3947
3948 if (igt_flush_test(gt->i915))
3949 err = -EIO;
3950
3951 i915_vma_unpin_and_release(&scratch, 0);
3952 return err;
3953 }
3954
gpr_make_dirty(struct intel_engine_cs * engine)3955 static int gpr_make_dirty(struct intel_engine_cs *engine)
3956 {
3957 struct i915_request *rq;
3958 u32 *cs;
3959 int n;
3960
3961 rq = intel_engine_create_kernel_request(engine);
3962 if (IS_ERR(rq))
3963 return PTR_ERR(rq);
3964
3965 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
3966 if (IS_ERR(cs)) {
3967 i915_request_add(rq);
3968 return PTR_ERR(cs);
3969 }
3970
3971 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
3972 for (n = 0; n < NUM_GPR_DW; n++) {
3973 *cs++ = CS_GPR(engine, n);
3974 *cs++ = STACK_MAGIC;
3975 }
3976 *cs++ = MI_NOOP;
3977
3978 intel_ring_advance(rq, cs);
3979 i915_request_add(rq);
3980
3981 return 0;
3982 }
3983
__live_gpr_clear(struct intel_engine_cs * engine,struct i915_vma * scratch)3984 static int __live_gpr_clear(struct intel_engine_cs *engine,
3985 struct i915_vma *scratch)
3986 {
3987 struct intel_context *ce;
3988 struct i915_request *rq;
3989 u32 *cs;
3990 int err;
3991 int n;
3992
3993 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
3994 return 0; /* GPR only on rcs0 for gen8 */
3995
3996 err = gpr_make_dirty(engine);
3997 if (err)
3998 return err;
3999
4000 ce = intel_context_create(engine);
4001 if (IS_ERR(ce))
4002 return PTR_ERR(ce);
4003
4004 rq = intel_context_create_request(ce);
4005 if (IS_ERR(rq)) {
4006 err = PTR_ERR(rq);
4007 goto err_put;
4008 }
4009
4010 cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
4011 if (IS_ERR(cs)) {
4012 err = PTR_ERR(cs);
4013 i915_request_add(rq);
4014 goto err_put;
4015 }
4016
4017 for (n = 0; n < NUM_GPR_DW; n++) {
4018 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4019 *cs++ = CS_GPR(engine, n);
4020 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4021 *cs++ = 0;
4022 }
4023
4024 i915_request_get(rq);
4025 i915_request_add(rq);
4026
4027 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4028 err = -ETIME;
4029 goto err_rq;
4030 }
4031
4032 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4033 if (IS_ERR(cs)) {
4034 err = PTR_ERR(cs);
4035 goto err_rq;
4036 }
4037
4038 for (n = 0; n < NUM_GPR_DW; n++) {
4039 if (cs[n]) {
4040 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4041 engine->name,
4042 n / 2, n & 1 ? "udw" : "ldw",
4043 cs[n]);
4044 err = -EINVAL;
4045 break;
4046 }
4047 }
4048
4049 i915_gem_object_unpin_map(scratch->obj);
4050
4051 err_rq:
4052 i915_request_put(rq);
4053 err_put:
4054 intel_context_put(ce);
4055 return err;
4056 }
4057
live_gpr_clear(void * arg)4058 static int live_gpr_clear(void *arg)
4059 {
4060 struct intel_gt *gt = arg;
4061 struct intel_engine_cs *engine;
4062 struct i915_vma *scratch;
4063 enum intel_engine_id id;
4064 int err = 0;
4065
4066 /*
4067 * Check that GPR registers are cleared in new contexts as we need
4068 * to avoid leaking any information from previous contexts.
4069 */
4070
4071 scratch = create_scratch(gt);
4072 if (IS_ERR(scratch))
4073 return PTR_ERR(scratch);
4074
4075 for_each_engine(engine, gt, id) {
4076 err = __live_gpr_clear(engine, scratch);
4077 if (err)
4078 break;
4079 }
4080
4081 if (igt_flush_test(gt->i915))
4082 err = -EIO;
4083
4084 i915_vma_unpin_and_release(&scratch, 0);
4085 return err;
4086 }
4087
intel_lrc_live_selftests(struct drm_i915_private * i915)4088 int intel_lrc_live_selftests(struct drm_i915_private *i915)
4089 {
4090 static const struct i915_subtest tests[] = {
4091 SUBTEST(live_lrc_layout),
4092 SUBTEST(live_lrc_fixed),
4093 SUBTEST(live_lrc_state),
4094 SUBTEST(live_gpr_clear),
4095 };
4096
4097 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
4098 return 0;
4099
4100 return intel_gt_live_subtests(tests, &i915->gt);
4101 }
4102