1 /* $NetBSD: i915_request.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $ */
2
3 /*
4 * Copyright © 2016 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 *
25 */
26
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: i915_request.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $");
29
30 #include <linux/prime_numbers.h>
31
32 #include "gem/i915_gem_pm.h"
33 #include "gem/selftests/mock_context.h"
34
35 #include "gt/intel_engine_pm.h"
36 #include "gt/intel_gt.h"
37
38 #include "i915_random.h"
39 #include "i915_selftest.h"
40 #include "igt_live_test.h"
41 #include "igt_spinner.h"
42 #include "lib_sw_fence.h"
43
44 #include "mock_drm.h"
45 #include "mock_gem_device.h"
46
num_uabi_engines(struct drm_i915_private * i915)47 static unsigned int num_uabi_engines(struct drm_i915_private *i915)
48 {
49 struct intel_engine_cs *engine;
50 unsigned int count;
51
52 count = 0;
53 for_each_uabi_engine(engine, i915)
54 count++;
55
56 return count;
57 }
58
igt_add_request(void * arg)59 static int igt_add_request(void *arg)
60 {
61 struct drm_i915_private *i915 = arg;
62 struct i915_request *request;
63
64 /* Basic preliminary test to create a request and let it loose! */
65
66 request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
67 if (!request)
68 return -ENOMEM;
69
70 i915_request_add(request);
71
72 return 0;
73 }
74
igt_wait_request(void * arg)75 static int igt_wait_request(void *arg)
76 {
77 const long T = HZ / 4;
78 struct drm_i915_private *i915 = arg;
79 struct i915_request *request;
80 int err = -EINVAL;
81
82 /* Submit a request, then wait upon it */
83
84 request = mock_request(i915->engine[RCS0]->kernel_context, T);
85 if (!request)
86 return -ENOMEM;
87
88 i915_request_get(request);
89
90 if (i915_request_wait(request, 0, 0) != -ETIME) {
91 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
92 goto out_request;
93 }
94
95 if (i915_request_wait(request, 0, T) != -ETIME) {
96 pr_err("request wait succeeded (expected timeout before submit!)\n");
97 goto out_request;
98 }
99
100 if (i915_request_completed(request)) {
101 pr_err("request completed before submit!!\n");
102 goto out_request;
103 }
104
105 i915_request_add(request);
106
107 if (i915_request_wait(request, 0, 0) != -ETIME) {
108 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
109 goto out_request;
110 }
111
112 if (i915_request_completed(request)) {
113 pr_err("request completed immediately!\n");
114 goto out_request;
115 }
116
117 if (i915_request_wait(request, 0, T / 2) != -ETIME) {
118 pr_err("request wait succeeded (expected timeout!)\n");
119 goto out_request;
120 }
121
122 if (i915_request_wait(request, 0, T) == -ETIME) {
123 pr_err("request wait timed out!\n");
124 goto out_request;
125 }
126
127 if (!i915_request_completed(request)) {
128 pr_err("request not complete after waiting!\n");
129 goto out_request;
130 }
131
132 if (i915_request_wait(request, 0, T) == -ETIME) {
133 pr_err("request wait timed out when already complete!\n");
134 goto out_request;
135 }
136
137 err = 0;
138 out_request:
139 i915_request_put(request);
140 mock_device_flush(i915);
141 return err;
142 }
143
igt_fence_wait(void * arg)144 static int igt_fence_wait(void *arg)
145 {
146 const long T = HZ / 4;
147 struct drm_i915_private *i915 = arg;
148 struct i915_request *request;
149 int err = -EINVAL;
150
151 /* Submit a request, treat it as a fence and wait upon it */
152
153 request = mock_request(i915->engine[RCS0]->kernel_context, T);
154 if (!request)
155 return -ENOMEM;
156
157 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
158 pr_err("fence wait success before submit (expected timeout)!\n");
159 goto out;
160 }
161
162 i915_request_add(request);
163
164 if (dma_fence_is_signaled(&request->fence)) {
165 pr_err("fence signaled immediately!\n");
166 goto out;
167 }
168
169 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
170 pr_err("fence wait success after submit (expected timeout)!\n");
171 goto out;
172 }
173
174 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
175 pr_err("fence wait timed out (expected success)!\n");
176 goto out;
177 }
178
179 if (!dma_fence_is_signaled(&request->fence)) {
180 pr_err("fence unsignaled after waiting!\n");
181 goto out;
182 }
183
184 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
185 pr_err("fence wait timed out when complete (expected success)!\n");
186 goto out;
187 }
188
189 err = 0;
190 out:
191 mock_device_flush(i915);
192 return err;
193 }
194
igt_request_rewind(void * arg)195 static int igt_request_rewind(void *arg)
196 {
197 struct drm_i915_private *i915 = arg;
198 struct i915_request *request, *vip;
199 struct i915_gem_context *ctx[2];
200 struct intel_context *ce;
201 int err = -EINVAL;
202
203 ctx[0] = mock_context(i915, "A");
204
205 ce = i915_gem_context_get_engine(ctx[0], RCS0);
206 GEM_BUG_ON(IS_ERR(ce));
207 request = mock_request(ce, 2 * HZ);
208 intel_context_put(ce);
209 if (!request) {
210 err = -ENOMEM;
211 goto err_context_0;
212 }
213
214 i915_request_get(request);
215 i915_request_add(request);
216
217 ctx[1] = mock_context(i915, "B");
218
219 ce = i915_gem_context_get_engine(ctx[1], RCS0);
220 GEM_BUG_ON(IS_ERR(ce));
221 vip = mock_request(ce, 0);
222 intel_context_put(ce);
223 if (!vip) {
224 err = -ENOMEM;
225 goto err_context_1;
226 }
227
228 /* Simulate preemption by manual reordering */
229 if (!mock_cancel_request(request)) {
230 pr_err("failed to cancel request (already executed)!\n");
231 i915_request_add(vip);
232 goto err_context_1;
233 }
234 i915_request_get(vip);
235 i915_request_add(vip);
236 rcu_read_lock();
237 request->engine->submit_request(request);
238 rcu_read_unlock();
239
240
241 if (i915_request_wait(vip, 0, HZ) == -ETIME) {
242 pr_err("timed out waiting for high priority request\n");
243 goto err;
244 }
245
246 if (i915_request_completed(request)) {
247 pr_err("low priority request already completed\n");
248 goto err;
249 }
250
251 err = 0;
252 err:
253 i915_request_put(vip);
254 err_context_1:
255 mock_context_close(ctx[1]);
256 i915_request_put(request);
257 err_context_0:
258 mock_context_close(ctx[0]);
259 mock_device_flush(i915);
260 return err;
261 }
262
263 struct smoketest {
264 struct intel_engine_cs *engine;
265 struct i915_gem_context **contexts;
266 atomic_long_t num_waits, num_fences;
267 int ncontexts, max_batch;
268 struct i915_request *(*request_alloc)(struct intel_context *ce);
269 };
270
271 static struct i915_request *
__mock_request_alloc(struct intel_context * ce)272 __mock_request_alloc(struct intel_context *ce)
273 {
274 return mock_request(ce, 0);
275 }
276
277 static struct i915_request *
__live_request_alloc(struct intel_context * ce)278 __live_request_alloc(struct intel_context *ce)
279 {
280 return intel_context_create_request(ce);
281 }
282
__igt_breadcrumbs_smoketest(void * arg)283 static int __igt_breadcrumbs_smoketest(void *arg)
284 {
285 struct smoketest *t = arg;
286 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
287 const unsigned int total = 4 * t->ncontexts + 1;
288 unsigned int num_waits = 0, num_fences = 0;
289 struct i915_request **requests;
290 I915_RND_STATE(prng);
291 unsigned int *order;
292 int err = 0;
293
294 /*
295 * A very simple test to catch the most egregious of list handling bugs.
296 *
297 * At its heart, we simply create oodles of requests running across
298 * multiple kthreads and enable signaling on them, for the sole purpose
299 * of stressing our breadcrumb handling. The only inspection we do is
300 * that the fences were marked as signaled.
301 */
302
303 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
304 if (!requests)
305 return -ENOMEM;
306
307 order = i915_random_order(total, &prng);
308 if (!order) {
309 err = -ENOMEM;
310 goto out_requests;
311 }
312
313 while (!kthread_should_stop()) {
314 struct i915_sw_fence *submit, *wait;
315 unsigned int n, count;
316
317 submit = heap_fence_create(GFP_KERNEL);
318 if (!submit) {
319 err = -ENOMEM;
320 break;
321 }
322
323 wait = heap_fence_create(GFP_KERNEL);
324 if (!wait) {
325 i915_sw_fence_commit(submit);
326 heap_fence_put(submit);
327 err = ENOMEM;
328 break;
329 }
330
331 i915_random_reorder(order, total, &prng);
332 count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
333
334 for (n = 0; n < count; n++) {
335 struct i915_gem_context *ctx =
336 t->contexts[order[n] % t->ncontexts];
337 struct i915_request *rq;
338 struct intel_context *ce;
339
340 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
341 GEM_BUG_ON(IS_ERR(ce));
342 rq = t->request_alloc(ce);
343 intel_context_put(ce);
344 if (IS_ERR(rq)) {
345 err = PTR_ERR(rq);
346 count = n;
347 break;
348 }
349
350 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
351 submit,
352 GFP_KERNEL);
353
354 requests[n] = i915_request_get(rq);
355 i915_request_add(rq);
356
357 if (err >= 0)
358 err = i915_sw_fence_await_dma_fence(wait,
359 &rq->fence,
360 0,
361 GFP_KERNEL);
362
363 if (err < 0) {
364 i915_request_put(rq);
365 count = n;
366 break;
367 }
368 }
369
370 i915_sw_fence_commit(submit);
371 i915_sw_fence_commit(wait);
372
373 if (!wait_event_timeout(wait->wait,
374 i915_sw_fence_done(wait),
375 5 * HZ)) {
376 struct i915_request *rq = requests[count - 1];
377
378 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
379 atomic_read(&wait->pending), count,
380 rq->fence.context, rq->fence.seqno,
381 t->engine->name);
382 GEM_TRACE_DUMP();
383
384 intel_gt_set_wedged(t->engine->gt);
385 GEM_BUG_ON(!i915_request_completed(rq));
386 i915_sw_fence_wait(wait);
387 err = -EIO;
388 }
389
390 for (n = 0; n < count; n++) {
391 struct i915_request *rq = requests[n];
392
393 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
394 &rq->fence.flags)) {
395 pr_err("%llu:%llu was not signaled!\n",
396 rq->fence.context, rq->fence.seqno);
397 err = -EINVAL;
398 }
399
400 i915_request_put(rq);
401 }
402
403 heap_fence_put(wait);
404 heap_fence_put(submit);
405
406 if (err < 0)
407 break;
408
409 num_fences += count;
410 num_waits++;
411
412 cond_resched();
413 }
414
415 atomic_long_add(num_fences, &t->num_fences);
416 atomic_long_add(num_waits, &t->num_waits);
417
418 kfree(order);
419 out_requests:
420 kfree(requests);
421 return err;
422 }
423
mock_breadcrumbs_smoketest(void * arg)424 static int mock_breadcrumbs_smoketest(void *arg)
425 {
426 struct drm_i915_private *i915 = arg;
427 struct smoketest t = {
428 .engine = i915->engine[RCS0],
429 .ncontexts = 1024,
430 .max_batch = 1024,
431 .request_alloc = __mock_request_alloc
432 };
433 unsigned int ncpus = num_online_cpus();
434 struct task_struct **threads;
435 unsigned int n;
436 int ret = 0;
437
438 /*
439 * Smoketest our breadcrumb/signal handling for requests across multiple
440 * threads. A very simple test to only catch the most egregious of bugs.
441 * See __igt_breadcrumbs_smoketest();
442 */
443
444 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
445 if (!threads)
446 return -ENOMEM;
447
448 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
449 if (!t.contexts) {
450 ret = -ENOMEM;
451 goto out_threads;
452 }
453
454 for (n = 0; n < t.ncontexts; n++) {
455 t.contexts[n] = mock_context(t.engine->i915, "mock");
456 if (!t.contexts[n]) {
457 ret = -ENOMEM;
458 goto out_contexts;
459 }
460 }
461
462 for (n = 0; n < ncpus; n++) {
463 threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
464 &t, "igt/%d", n);
465 if (IS_ERR(threads[n])) {
466 ret = PTR_ERR(threads[n]);
467 ncpus = n;
468 break;
469 }
470
471 get_task_struct(threads[n]);
472 }
473
474 yield(); /* start all threads before we begin */
475 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
476
477 for (n = 0; n < ncpus; n++) {
478 int err;
479
480 err = kthread_stop(threads[n]);
481 if (err < 0 && !ret)
482 ret = err;
483
484 put_task_struct(threads[n]);
485 }
486 pr_info("Completed %lu waits for %lu fence across %d cpus\n",
487 atomic_long_read(&t.num_waits),
488 atomic_long_read(&t.num_fences),
489 ncpus);
490
491 out_contexts:
492 for (n = 0; n < t.ncontexts; n++) {
493 if (!t.contexts[n])
494 break;
495 mock_context_close(t.contexts[n]);
496 }
497 kfree(t.contexts);
498 out_threads:
499 kfree(threads);
500 return ret;
501 }
502
i915_request_mock_selftests(void)503 int i915_request_mock_selftests(void)
504 {
505 static const struct i915_subtest tests[] = {
506 SUBTEST(igt_add_request),
507 SUBTEST(igt_wait_request),
508 SUBTEST(igt_fence_wait),
509 SUBTEST(igt_request_rewind),
510 SUBTEST(mock_breadcrumbs_smoketest),
511 };
512 struct drm_i915_private *i915;
513 intel_wakeref_t wakeref;
514 int err = 0;
515
516 i915 = mock_gem_device();
517 if (!i915)
518 return -ENOMEM;
519
520 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
521 err = i915_subtests(tests, i915);
522
523 drm_dev_put(&i915->drm);
524
525 return err;
526 }
527
live_nop_request(void * arg)528 static int live_nop_request(void *arg)
529 {
530 struct drm_i915_private *i915 = arg;
531 struct intel_engine_cs *engine;
532 struct igt_live_test t;
533 int err = -ENODEV;
534
535 /*
536 * Submit various sized batches of empty requests, to each engine
537 * (individually), and wait for the batch to complete. We can check
538 * the overhead of submitting requests to the hardware.
539 */
540
541 for_each_uabi_engine(engine, i915) {
542 unsigned long n, prime;
543 IGT_TIMEOUT(end_time);
544 ktime_t times[2] = {};
545
546 err = igt_live_test_begin(&t, i915, __func__, engine->name);
547 if (err)
548 return err;
549
550 intel_engine_pm_get(engine);
551 for_each_prime_number_from(prime, 1, 8192) {
552 struct i915_request *request = NULL;
553
554 times[1] = ktime_get_raw();
555
556 for (n = 0; n < prime; n++) {
557 i915_request_put(request);
558 request = i915_request_create(engine->kernel_context);
559 if (IS_ERR(request))
560 return PTR_ERR(request);
561
562 /*
563 * This space is left intentionally blank.
564 *
565 * We do not actually want to perform any
566 * action with this request, we just want
567 * to measure the latency in allocation
568 * and submission of our breadcrumbs -
569 * ensuring that the bare request is sufficient
570 * for the system to work (i.e. proper HEAD
571 * tracking of the rings, interrupt handling,
572 * etc). It also gives us the lowest bounds
573 * for latency.
574 */
575
576 i915_request_get(request);
577 i915_request_add(request);
578 }
579 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
580 i915_request_put(request);
581
582 times[1] = ktime_sub(ktime_get_raw(), times[1]);
583 if (prime == 1)
584 times[0] = times[1];
585
586 if (__igt_timeout(end_time, NULL))
587 break;
588 }
589 intel_engine_pm_put(engine);
590
591 err = igt_live_test_end(&t);
592 if (err)
593 return err;
594
595 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
596 engine->name,
597 ktime_to_ns(times[0]),
598 prime, div64_u64(ktime_to_ns(times[1]), prime));
599 }
600
601 return err;
602 }
603
empty_batch(struct drm_i915_private * i915)604 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
605 {
606 struct drm_i915_gem_object *obj;
607 struct i915_vma *vma;
608 u32 *cmd;
609 int err;
610
611 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
612 if (IS_ERR(obj))
613 return ERR_CAST(obj);
614
615 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
616 if (IS_ERR(cmd)) {
617 err = PTR_ERR(cmd);
618 goto err;
619 }
620
621 *cmd = MI_BATCH_BUFFER_END;
622
623 __i915_gem_object_flush_map(obj, 0, 64);
624 i915_gem_object_unpin_map(obj);
625
626 intel_gt_chipset_flush(&i915->gt);
627
628 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
629 if (IS_ERR(vma)) {
630 err = PTR_ERR(vma);
631 goto err;
632 }
633
634 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
635 if (err)
636 goto err;
637
638 /* Force the wait wait now to avoid including it in the benchmark */
639 err = i915_vma_sync(vma);
640 if (err)
641 goto err_pin;
642
643 return vma;
644
645 err_pin:
646 i915_vma_unpin(vma);
647 err:
648 i915_gem_object_put(obj);
649 return ERR_PTR(err);
650 }
651
652 static struct i915_request *
empty_request(struct intel_engine_cs * engine,struct i915_vma * batch)653 empty_request(struct intel_engine_cs *engine,
654 struct i915_vma *batch)
655 {
656 struct i915_request *request;
657 int err;
658
659 request = i915_request_create(engine->kernel_context);
660 if (IS_ERR(request))
661 return request;
662
663 err = engine->emit_bb_start(request,
664 batch->node.start,
665 batch->node.size,
666 I915_DISPATCH_SECURE);
667 if (err)
668 goto out_request;
669
670 i915_request_get(request);
671 out_request:
672 i915_request_add(request);
673 return err ? ERR_PTR(err) : request;
674 }
675
live_empty_request(void * arg)676 static int live_empty_request(void *arg)
677 {
678 struct drm_i915_private *i915 = arg;
679 struct intel_engine_cs *engine;
680 struct igt_live_test t;
681 struct i915_vma *batch;
682 int err = 0;
683
684 /*
685 * Submit various sized batches of empty requests, to each engine
686 * (individually), and wait for the batch to complete. We can check
687 * the overhead of submitting requests to the hardware.
688 */
689
690 batch = empty_batch(i915);
691 if (IS_ERR(batch))
692 return PTR_ERR(batch);
693
694 for_each_uabi_engine(engine, i915) {
695 IGT_TIMEOUT(end_time);
696 struct i915_request *request;
697 unsigned long n, prime;
698 ktime_t times[2] = {};
699
700 err = igt_live_test_begin(&t, i915, __func__, engine->name);
701 if (err)
702 goto out_batch;
703
704 intel_engine_pm_get(engine);
705
706 /* Warmup / preload */
707 request = empty_request(engine, batch);
708 if (IS_ERR(request)) {
709 err = PTR_ERR(request);
710 intel_engine_pm_put(engine);
711 goto out_batch;
712 }
713 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
714
715 for_each_prime_number_from(prime, 1, 8192) {
716 times[1] = ktime_get_raw();
717
718 for (n = 0; n < prime; n++) {
719 i915_request_put(request);
720 request = empty_request(engine, batch);
721 if (IS_ERR(request)) {
722 err = PTR_ERR(request);
723 intel_engine_pm_put(engine);
724 goto out_batch;
725 }
726 }
727 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
728
729 times[1] = ktime_sub(ktime_get_raw(), times[1]);
730 if (prime == 1)
731 times[0] = times[1];
732
733 if (__igt_timeout(end_time, NULL))
734 break;
735 }
736 i915_request_put(request);
737 intel_engine_pm_put(engine);
738
739 err = igt_live_test_end(&t);
740 if (err)
741 goto out_batch;
742
743 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
744 engine->name,
745 ktime_to_ns(times[0]),
746 prime, div64_u64(ktime_to_ns(times[1]), prime));
747 }
748
749 out_batch:
750 i915_vma_unpin(batch);
751 i915_vma_put(batch);
752 return err;
753 }
754
recursive_batch(struct drm_i915_private * i915)755 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
756 {
757 struct drm_i915_gem_object *obj;
758 const int gen = INTEL_GEN(i915);
759 struct i915_vma *vma;
760 u32 *cmd;
761 int err;
762
763 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
764 if (IS_ERR(obj))
765 return ERR_CAST(obj);
766
767 vma = i915_vma_instance(obj, i915->gt.vm, NULL);
768 if (IS_ERR(vma)) {
769 err = PTR_ERR(vma);
770 goto err;
771 }
772
773 err = i915_vma_pin(vma, 0, 0, PIN_USER);
774 if (err)
775 goto err;
776
777 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
778 if (IS_ERR(cmd)) {
779 err = PTR_ERR(cmd);
780 goto err;
781 }
782
783 if (gen >= 8) {
784 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
785 *cmd++ = lower_32_bits(vma->node.start);
786 *cmd++ = upper_32_bits(vma->node.start);
787 } else if (gen >= 6) {
788 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
789 *cmd++ = lower_32_bits(vma->node.start);
790 } else {
791 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
792 *cmd++ = lower_32_bits(vma->node.start);
793 }
794 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
795
796 __i915_gem_object_flush_map(obj, 0, 64);
797 i915_gem_object_unpin_map(obj);
798
799 intel_gt_chipset_flush(&i915->gt);
800
801 return vma;
802
803 err:
804 i915_gem_object_put(obj);
805 return ERR_PTR(err);
806 }
807
recursive_batch_resolve(struct i915_vma * batch)808 static int recursive_batch_resolve(struct i915_vma *batch)
809 {
810 u32 *cmd;
811
812 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
813 if (IS_ERR(cmd))
814 return PTR_ERR(cmd);
815
816 *cmd = MI_BATCH_BUFFER_END;
817 intel_gt_chipset_flush(batch->vm->gt);
818
819 i915_gem_object_unpin_map(batch->obj);
820
821 return 0;
822 }
823
live_all_engines(void * arg)824 static int live_all_engines(void *arg)
825 {
826 struct drm_i915_private *i915 = arg;
827 const unsigned int nengines = num_uabi_engines(i915);
828 struct intel_engine_cs *engine;
829 struct i915_request **request;
830 struct igt_live_test t;
831 struct i915_vma *batch;
832 unsigned int idx;
833 int err;
834
835 /*
836 * Check we can submit requests to all engines simultaneously. We
837 * send a recursive batch to each engine - checking that we don't
838 * block doing so, and that they don't complete too soon.
839 */
840
841 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
842 if (!request)
843 return -ENOMEM;
844
845 err = igt_live_test_begin(&t, i915, __func__, "");
846 if (err)
847 goto out_free;
848
849 batch = recursive_batch(i915);
850 if (IS_ERR(batch)) {
851 err = PTR_ERR(batch);
852 pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
853 goto out_free;
854 }
855
856 idx = 0;
857 for_each_uabi_engine(engine, i915) {
858 request[idx] = intel_engine_create_kernel_request(engine);
859 if (IS_ERR(request[idx])) {
860 err = PTR_ERR(request[idx]);
861 pr_err("%s: Request allocation failed with err=%d\n",
862 __func__, err);
863 goto out_request;
864 }
865
866 err = engine->emit_bb_start(request[idx],
867 batch->node.start,
868 batch->node.size,
869 0);
870 GEM_BUG_ON(err);
871 request[idx]->batch = batch;
872
873 i915_vma_lock(batch);
874 err = i915_request_await_object(request[idx], batch->obj, 0);
875 if (err == 0)
876 err = i915_vma_move_to_active(batch, request[idx], 0);
877 i915_vma_unlock(batch);
878 GEM_BUG_ON(err);
879
880 i915_request_get(request[idx]);
881 i915_request_add(request[idx]);
882 idx++;
883 }
884
885 idx = 0;
886 for_each_uabi_engine(engine, i915) {
887 if (i915_request_completed(request[idx])) {
888 pr_err("%s(%s): request completed too early!\n",
889 __func__, engine->name);
890 err = -EINVAL;
891 goto out_request;
892 }
893 idx++;
894 }
895
896 err = recursive_batch_resolve(batch);
897 if (err) {
898 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
899 goto out_request;
900 }
901
902 idx = 0;
903 for_each_uabi_engine(engine, i915) {
904 long timeout;
905
906 timeout = i915_request_wait(request[idx], 0,
907 MAX_SCHEDULE_TIMEOUT);
908 if (timeout < 0) {
909 err = timeout;
910 pr_err("%s: error waiting for request on %s, err=%d\n",
911 __func__, engine->name, err);
912 goto out_request;
913 }
914
915 GEM_BUG_ON(!i915_request_completed(request[idx]));
916 i915_request_put(request[idx]);
917 request[idx] = NULL;
918 idx++;
919 }
920
921 err = igt_live_test_end(&t);
922
923 out_request:
924 idx = 0;
925 for_each_uabi_engine(engine, i915) {
926 if (request[idx])
927 i915_request_put(request[idx]);
928 idx++;
929 }
930 i915_vma_unpin(batch);
931 i915_vma_put(batch);
932 out_free:
933 kfree(request);
934 return err;
935 }
936
live_sequential_engines(void * arg)937 static int live_sequential_engines(void *arg)
938 {
939 struct drm_i915_private *i915 = arg;
940 const unsigned int nengines = num_uabi_engines(i915);
941 struct i915_request **request;
942 struct i915_request *prev = NULL;
943 struct intel_engine_cs *engine;
944 struct igt_live_test t;
945 unsigned int idx;
946 int err;
947
948 /*
949 * Check we can submit requests to all engines sequentially, such
950 * that each successive request waits for the earlier ones. This
951 * tests that we don't execute requests out of order, even though
952 * they are running on independent engines.
953 */
954
955 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
956 if (!request)
957 return -ENOMEM;
958
959 err = igt_live_test_begin(&t, i915, __func__, "");
960 if (err)
961 goto out_free;
962
963 idx = 0;
964 for_each_uabi_engine(engine, i915) {
965 struct i915_vma *batch;
966
967 batch = recursive_batch(i915);
968 if (IS_ERR(batch)) {
969 err = PTR_ERR(batch);
970 pr_err("%s: Unable to create batch for %s, err=%d\n",
971 __func__, engine->name, err);
972 goto out_free;
973 }
974
975 request[idx] = intel_engine_create_kernel_request(engine);
976 if (IS_ERR(request[idx])) {
977 err = PTR_ERR(request[idx]);
978 pr_err("%s: Request allocation failed for %s with err=%d\n",
979 __func__, engine->name, err);
980 goto out_request;
981 }
982
983 if (prev) {
984 err = i915_request_await_dma_fence(request[idx],
985 &prev->fence);
986 if (err) {
987 i915_request_add(request[idx]);
988 pr_err("%s: Request await failed for %s with err=%d\n",
989 __func__, engine->name, err);
990 goto out_request;
991 }
992 }
993
994 err = engine->emit_bb_start(request[idx],
995 batch->node.start,
996 batch->node.size,
997 0);
998 GEM_BUG_ON(err);
999 request[idx]->batch = batch;
1000
1001 i915_vma_lock(batch);
1002 err = i915_request_await_object(request[idx],
1003 batch->obj, false);
1004 if (err == 0)
1005 err = i915_vma_move_to_active(batch, request[idx], 0);
1006 i915_vma_unlock(batch);
1007 GEM_BUG_ON(err);
1008
1009 i915_request_get(request[idx]);
1010 i915_request_add(request[idx]);
1011
1012 prev = request[idx];
1013 idx++;
1014 }
1015
1016 idx = 0;
1017 for_each_uabi_engine(engine, i915) {
1018 long timeout;
1019
1020 if (i915_request_completed(request[idx])) {
1021 pr_err("%s(%s): request completed too early!\n",
1022 __func__, engine->name);
1023 err = -EINVAL;
1024 goto out_request;
1025 }
1026
1027 err = recursive_batch_resolve(request[idx]->batch);
1028 if (err) {
1029 pr_err("%s: failed to resolve batch, err=%d\n",
1030 __func__, err);
1031 goto out_request;
1032 }
1033
1034 timeout = i915_request_wait(request[idx], 0,
1035 MAX_SCHEDULE_TIMEOUT);
1036 if (timeout < 0) {
1037 err = timeout;
1038 pr_err("%s: error waiting for request on %s, err=%d\n",
1039 __func__, engine->name, err);
1040 goto out_request;
1041 }
1042
1043 GEM_BUG_ON(!i915_request_completed(request[idx]));
1044 idx++;
1045 }
1046
1047 err = igt_live_test_end(&t);
1048
1049 out_request:
1050 idx = 0;
1051 for_each_uabi_engine(engine, i915) {
1052 u32 *cmd;
1053
1054 if (!request[idx])
1055 break;
1056
1057 cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
1058 I915_MAP_WC);
1059 if (!IS_ERR(cmd)) {
1060 *cmd = MI_BATCH_BUFFER_END;
1061 intel_gt_chipset_flush(engine->gt);
1062
1063 i915_gem_object_unpin_map(request[idx]->batch->obj);
1064 }
1065
1066 i915_vma_put(request[idx]->batch);
1067 i915_request_put(request[idx]);
1068 idx++;
1069 }
1070 out_free:
1071 kfree(request);
1072 return err;
1073 }
1074
__live_parallel_engine1(void * arg)1075 static int __live_parallel_engine1(void *arg)
1076 {
1077 struct intel_engine_cs *engine = arg;
1078 IGT_TIMEOUT(end_time);
1079 unsigned long count;
1080 int err = 0;
1081
1082 count = 0;
1083 intel_engine_pm_get(engine);
1084 do {
1085 struct i915_request *rq;
1086
1087 rq = i915_request_create(engine->kernel_context);
1088 if (IS_ERR(rq)) {
1089 err = PTR_ERR(rq);
1090 break;
1091 }
1092
1093 i915_request_get(rq);
1094 i915_request_add(rq);
1095
1096 err = 0;
1097 if (i915_request_wait(rq, 0, HZ / 5) < 0)
1098 err = -ETIME;
1099 i915_request_put(rq);
1100 if (err)
1101 break;
1102
1103 count++;
1104 } while (!__igt_timeout(end_time, NULL));
1105 intel_engine_pm_put(engine);
1106
1107 pr_info("%s: %lu request + sync\n", engine->name, count);
1108 return err;
1109 }
1110
__live_parallel_engineN(void * arg)1111 static int __live_parallel_engineN(void *arg)
1112 {
1113 struct intel_engine_cs *engine = arg;
1114 IGT_TIMEOUT(end_time);
1115 unsigned long count;
1116 int err = 0;
1117
1118 count = 0;
1119 intel_engine_pm_get(engine);
1120 do {
1121 struct i915_request *rq;
1122
1123 rq = i915_request_create(engine->kernel_context);
1124 if (IS_ERR(rq)) {
1125 err = PTR_ERR(rq);
1126 break;
1127 }
1128
1129 i915_request_add(rq);
1130 count++;
1131 } while (!__igt_timeout(end_time, NULL));
1132 intel_engine_pm_put(engine);
1133
1134 pr_info("%s: %lu requests\n", engine->name, count);
1135 return err;
1136 }
1137
wake_all(struct drm_i915_private * i915)1138 static bool wake_all(struct drm_i915_private *i915)
1139 {
1140 if (atomic_dec_and_test(&i915->selftest.counter)) {
1141 wake_up_var(&i915->selftest.counter);
1142 return true;
1143 }
1144
1145 return false;
1146 }
1147
wait_for_all(struct drm_i915_private * i915)1148 static int wait_for_all(struct drm_i915_private *i915)
1149 {
1150 if (wake_all(i915))
1151 return 0;
1152
1153 if (wait_var_event_timeout(&i915->selftest.counter,
1154 !atomic_read(&i915->selftest.counter),
1155 i915_selftest.timeout_jiffies))
1156 return 0;
1157
1158 return -ETIME;
1159 }
1160
__live_parallel_spin(void * arg)1161 static int __live_parallel_spin(void *arg)
1162 {
1163 struct intel_engine_cs *engine = arg;
1164 struct igt_spinner spin;
1165 struct i915_request *rq;
1166 int err = 0;
1167
1168 /*
1169 * Create a spinner running for eternity on each engine. If a second
1170 * spinner is incorrectly placed on the same engine, it will not be
1171 * able to start in time.
1172 */
1173
1174 if (igt_spinner_init(&spin, engine->gt)) {
1175 wake_all(engine->i915);
1176 return -ENOMEM;
1177 }
1178
1179 intel_engine_pm_get(engine);
1180 rq = igt_spinner_create_request(&spin,
1181 engine->kernel_context,
1182 MI_NOOP); /* no preemption */
1183 intel_engine_pm_put(engine);
1184 if (IS_ERR(rq)) {
1185 err = PTR_ERR(rq);
1186 if (err == -ENODEV)
1187 err = 0;
1188 wake_all(engine->i915);
1189 goto out_spin;
1190 }
1191
1192 i915_request_get(rq);
1193 i915_request_add(rq);
1194 if (igt_wait_for_spinner(&spin, rq)) {
1195 /* Occupy this engine for the whole test */
1196 err = wait_for_all(engine->i915);
1197 } else {
1198 pr_err("Failed to start spinner on %s\n", engine->name);
1199 err = -EINVAL;
1200 }
1201 igt_spinner_end(&spin);
1202
1203 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0)
1204 err = -EIO;
1205 i915_request_put(rq);
1206
1207 out_spin:
1208 igt_spinner_fini(&spin);
1209 return err;
1210 }
1211
live_parallel_engines(void * arg)1212 static int live_parallel_engines(void *arg)
1213 {
1214 struct drm_i915_private *i915 = arg;
1215 static int (* const func[])(void *arg) = {
1216 __live_parallel_engine1,
1217 __live_parallel_engineN,
1218 __live_parallel_spin,
1219 NULL,
1220 };
1221 const unsigned int nengines = num_uabi_engines(i915);
1222 struct intel_engine_cs *engine;
1223 int (* const *fn)(void *arg);
1224 struct task_struct **tsk;
1225 int err = 0;
1226
1227 /*
1228 * Check we can submit requests to all engines concurrently. This
1229 * tests that we load up the system maximally.
1230 */
1231
1232 tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
1233 if (!tsk)
1234 return -ENOMEM;
1235
1236 for (fn = func; !err && *fn; fn++) {
1237 char name[KSYM_NAME_LEN];
1238 struct igt_live_test t;
1239 unsigned int idx;
1240
1241 snprintf(name, sizeof(name), "%pS", fn);
1242 err = igt_live_test_begin(&t, i915, __func__, name);
1243 if (err)
1244 break;
1245
1246 atomic_set(&i915->selftest.counter, nengines);
1247
1248 idx = 0;
1249 for_each_uabi_engine(engine, i915) {
1250 tsk[idx] = kthread_run(*fn, engine,
1251 "igt/parallel:%s",
1252 engine->name);
1253 if (IS_ERR(tsk[idx])) {
1254 err = PTR_ERR(tsk[idx]);
1255 break;
1256 }
1257 get_task_struct(tsk[idx++]);
1258 }
1259
1260 yield(); /* start all threads before we kthread_stop() */
1261
1262 idx = 0;
1263 for_each_uabi_engine(engine, i915) {
1264 int status;
1265
1266 if (IS_ERR(tsk[idx]))
1267 break;
1268
1269 status = kthread_stop(tsk[idx]);
1270 if (status && !err)
1271 err = status;
1272
1273 put_task_struct(tsk[idx++]);
1274 }
1275
1276 if (igt_live_test_end(&t))
1277 err = -EIO;
1278 }
1279
1280 kfree(tsk);
1281 return err;
1282 }
1283
1284 static int
max_batches(struct i915_gem_context * ctx,struct intel_engine_cs * engine)1285 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1286 {
1287 struct i915_request *rq;
1288 int ret;
1289
1290 /*
1291 * Before execlists, all contexts share the same ringbuffer. With
1292 * execlists, each context/engine has a separate ringbuffer and
1293 * for the purposes of this test, inexhaustible.
1294 *
1295 * For the global ringbuffer though, we have to be very careful
1296 * that we do not wrap while preventing the execution of requests
1297 * with a unsignaled fence.
1298 */
1299 if (HAS_EXECLISTS(ctx->i915))
1300 return INT_MAX;
1301
1302 rq = igt_request_alloc(ctx, engine);
1303 if (IS_ERR(rq)) {
1304 ret = PTR_ERR(rq);
1305 } else {
1306 int sz;
1307
1308 ret = rq->ring->size - rq->reserved_space;
1309 i915_request_add(rq);
1310
1311 sz = rq->ring->emit - rq->head;
1312 if (sz < 0)
1313 sz += rq->ring->size;
1314 ret /= sz;
1315 ret /= 2; /* leave half spare, in case of emergency! */
1316 }
1317
1318 return ret;
1319 }
1320
live_breadcrumbs_smoketest(void * arg)1321 static int live_breadcrumbs_smoketest(void *arg)
1322 {
1323 struct drm_i915_private *i915 = arg;
1324 const unsigned int nengines = num_uabi_engines(i915);
1325 const unsigned int ncpus = num_online_cpus();
1326 unsigned long num_waits, num_fences;
1327 struct intel_engine_cs *engine;
1328 struct task_struct **threads;
1329 struct igt_live_test live;
1330 intel_wakeref_t wakeref;
1331 struct smoketest *smoke;
1332 unsigned int n, idx;
1333 struct file *file;
1334 int ret = 0;
1335
1336 /*
1337 * Smoketest our breadcrumb/signal handling for requests across multiple
1338 * threads. A very simple test to only catch the most egregious of bugs.
1339 * See __igt_breadcrumbs_smoketest();
1340 *
1341 * On real hardware this time.
1342 */
1343
1344 wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1345
1346 file = mock_file(i915);
1347 if (IS_ERR(file)) {
1348 ret = PTR_ERR(file);
1349 goto out_rpm;
1350 }
1351
1352 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1353 if (!smoke) {
1354 ret = -ENOMEM;
1355 goto out_file;
1356 }
1357
1358 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1359 if (!threads) {
1360 ret = -ENOMEM;
1361 goto out_smoke;
1362 }
1363
1364 smoke[0].request_alloc = __live_request_alloc;
1365 smoke[0].ncontexts = 64;
1366 smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1367 sizeof(*smoke[0].contexts),
1368 GFP_KERNEL);
1369 if (!smoke[0].contexts) {
1370 ret = -ENOMEM;
1371 goto out_threads;
1372 }
1373
1374 for (n = 0; n < smoke[0].ncontexts; n++) {
1375 smoke[0].contexts[n] = live_context(i915, file);
1376 if (!smoke[0].contexts[n]) {
1377 ret = -ENOMEM;
1378 goto out_contexts;
1379 }
1380 }
1381
1382 ret = igt_live_test_begin(&live, i915, __func__, "");
1383 if (ret)
1384 goto out_contexts;
1385
1386 idx = 0;
1387 for_each_uabi_engine(engine, i915) {
1388 smoke[idx] = smoke[0];
1389 smoke[idx].engine = engine;
1390 smoke[idx].max_batch =
1391 max_batches(smoke[0].contexts[0], engine);
1392 if (smoke[idx].max_batch < 0) {
1393 ret = smoke[idx].max_batch;
1394 goto out_flush;
1395 }
1396 /* One ring interleaved between requests from all cpus */
1397 smoke[idx].max_batch /= num_online_cpus() + 1;
1398 pr_debug("Limiting batches to %d requests on %s\n",
1399 smoke[idx].max_batch, engine->name);
1400
1401 for (n = 0; n < ncpus; n++) {
1402 struct task_struct *tsk;
1403
1404 tsk = kthread_run(__igt_breadcrumbs_smoketest,
1405 &smoke[idx], "igt/%d.%d", idx, n);
1406 if (IS_ERR(tsk)) {
1407 ret = PTR_ERR(tsk);
1408 goto out_flush;
1409 }
1410
1411 get_task_struct(tsk);
1412 threads[idx * ncpus + n] = tsk;
1413 }
1414
1415 idx++;
1416 }
1417
1418 yield(); /* start all threads before we begin */
1419 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1420
1421 out_flush:
1422 idx = 0;
1423 num_waits = 0;
1424 num_fences = 0;
1425 for_each_uabi_engine(engine, i915) {
1426 for (n = 0; n < ncpus; n++) {
1427 struct task_struct *tsk = threads[idx * ncpus + n];
1428 int err;
1429
1430 if (!tsk)
1431 continue;
1432
1433 err = kthread_stop(tsk);
1434 if (err < 0 && !ret)
1435 ret = err;
1436
1437 put_task_struct(tsk);
1438 }
1439
1440 num_waits += atomic_long_read(&smoke[idx].num_waits);
1441 num_fences += atomic_long_read(&smoke[idx].num_fences);
1442 idx++;
1443 }
1444 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1445 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1446
1447 ret = igt_live_test_end(&live) ?: ret;
1448 out_contexts:
1449 kfree(smoke[0].contexts);
1450 out_threads:
1451 kfree(threads);
1452 out_smoke:
1453 kfree(smoke);
1454 out_file:
1455 fput(file);
1456 out_rpm:
1457 intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1458
1459 return ret;
1460 }
1461
i915_request_live_selftests(struct drm_i915_private * i915)1462 int i915_request_live_selftests(struct drm_i915_private *i915)
1463 {
1464 static const struct i915_subtest tests[] = {
1465 SUBTEST(live_nop_request),
1466 SUBTEST(live_all_engines),
1467 SUBTEST(live_sequential_engines),
1468 SUBTEST(live_parallel_engines),
1469 SUBTEST(live_empty_request),
1470 SUBTEST(live_breadcrumbs_smoketest),
1471 };
1472
1473 if (intel_gt_is_wedged(&i915->gt))
1474 return 0;
1475
1476 return i915_subtests(tests, i915);
1477 }
1478