1 /* $NetBSD: selftest_timeline.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright © 2017-2018 Intel Corporation
7 */
8
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: selftest_timeline.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
11
12 #include <linux/prime_numbers.h>
13
14 #include "intel_engine_pm.h"
15 #include "intel_gt.h"
16 #include "intel_gt_requests.h"
17 #include "intel_ring.h"
18
19 #include "../selftests/i915_random.h"
20 #include "../i915_selftest.h"
21
22 #include "../selftests/igt_flush_test.h"
23 #include "../selftests/mock_gem_device.h"
24 #include "selftests/mock_timeline.h"
25
hwsp_page(struct intel_timeline * tl)26 static struct page *hwsp_page(struct intel_timeline *tl)
27 {
28 struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
29
30 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
31 return sg_page(obj->mm.pages->sgl);
32 }
33
hwsp_cacheline(struct intel_timeline * tl)34 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
35 {
36 unsigned long address = (unsigned long)page_address(hwsp_page(tl));
37
38 return (address + tl->hwsp_offset) / CACHELINE_BYTES;
39 }
40
41 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
42
43 struct mock_hwsp_freelist {
44 struct intel_gt *gt;
45 struct radix_tree_root cachelines;
46 struct intel_timeline **history;
47 unsigned long count, max;
48 struct rnd_state prng;
49 };
50
51 enum {
52 SHUFFLE = BIT(0),
53 };
54
__mock_hwsp_record(struct mock_hwsp_freelist * state,unsigned int idx,struct intel_timeline * tl)55 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
56 unsigned int idx,
57 struct intel_timeline *tl)
58 {
59 tl = xchg(&state->history[idx], tl);
60 if (tl) {
61 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
62 intel_timeline_put(tl);
63 }
64 }
65
__mock_hwsp_timeline(struct mock_hwsp_freelist * state,unsigned int count,unsigned int flags)66 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
67 unsigned int count,
68 unsigned int flags)
69 {
70 struct intel_timeline *tl;
71 unsigned int idx;
72
73 while (count--) {
74 unsigned long cacheline;
75 int err;
76
77 tl = intel_timeline_create(state->gt, NULL);
78 if (IS_ERR(tl))
79 return PTR_ERR(tl);
80
81 cacheline = hwsp_cacheline(tl);
82 err = radix_tree_insert(&state->cachelines, cacheline, tl);
83 if (err) {
84 if (err == -EEXIST) {
85 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
86 cacheline);
87 }
88 intel_timeline_put(tl);
89 return err;
90 }
91
92 idx = state->count++ % state->max;
93 __mock_hwsp_record(state, idx, tl);
94 }
95
96 if (flags & SHUFFLE)
97 i915_prandom_shuffle(state->history,
98 sizeof(*state->history),
99 min(state->count, state->max),
100 &state->prng);
101
102 count = i915_prandom_u32_max_state(min(state->count, state->max),
103 &state->prng);
104 while (count--) {
105 idx = --state->count % state->max;
106 __mock_hwsp_record(state, idx, NULL);
107 }
108
109 return 0;
110 }
111
mock_hwsp_freelist(void * arg)112 static int mock_hwsp_freelist(void *arg)
113 {
114 struct mock_hwsp_freelist state;
115 struct drm_i915_private *i915;
116 const struct {
117 const char *name;
118 unsigned int flags;
119 } phases[] = {
120 { "linear", 0 },
121 { "shuffled", SHUFFLE },
122 { },
123 }, *p;
124 unsigned int na;
125 int err = 0;
126
127 i915 = mock_gem_device();
128 if (!i915)
129 return -ENOMEM;
130
131 INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
132 state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
133
134 state.gt = &i915->gt;
135
136 /*
137 * Create a bunch of timelines and check that their HWSP do not overlap.
138 * Free some, and try again.
139 */
140
141 state.max = PAGE_SIZE / sizeof(*state.history);
142 state.count = 0;
143 state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
144 if (!state.history) {
145 err = -ENOMEM;
146 goto err_put;
147 }
148
149 for (p = phases; p->name; p++) {
150 pr_debug("%s(%s)\n", __func__, p->name);
151 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
152 err = __mock_hwsp_timeline(&state, na, p->flags);
153 if (err)
154 goto out;
155 }
156 }
157
158 out:
159 for (na = 0; na < state.max; na++)
160 __mock_hwsp_record(&state, na, NULL);
161 kfree(state.history);
162 err_put:
163 drm_dev_put(&i915->drm);
164 return err;
165 }
166
167 struct __igt_sync {
168 const char *name;
169 u32 seqno;
170 bool expected;
171 bool set;
172 };
173
__igt_sync(struct intel_timeline * tl,u64 ctx,const struct __igt_sync * p,const char * name)174 static int __igt_sync(struct intel_timeline *tl,
175 u64 ctx,
176 const struct __igt_sync *p,
177 const char *name)
178 {
179 int ret;
180
181 if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
182 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
183 name, p->name, ctx, p->seqno, yesno(p->expected));
184 return -EINVAL;
185 }
186
187 if (p->set) {
188 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
189 if (ret)
190 return ret;
191 }
192
193 return 0;
194 }
195
igt_sync(void * arg)196 static int igt_sync(void *arg)
197 {
198 const struct __igt_sync pass[] = {
199 { "unset", 0, false, false },
200 { "new", 0, false, true },
201 { "0a", 0, true, true },
202 { "1a", 1, false, true },
203 { "1b", 1, true, true },
204 { "0b", 0, true, false },
205 { "2a", 2, false, true },
206 { "4", 4, false, true },
207 { "INT_MAX", INT_MAX, false, true },
208 { "INT_MAX-1", INT_MAX-1, true, false },
209 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
210 { "INT_MAX", INT_MAX, true, false },
211 { "UINT_MAX", UINT_MAX, false, true },
212 { "wrap", 0, false, true },
213 { "unwrap", UINT_MAX, true, false },
214 {},
215 }, *p;
216 struct intel_timeline tl;
217 int order, offset;
218 int ret = -ENODEV;
219
220 mock_timeline_init(&tl, 0);
221 for (p = pass; p->name; p++) {
222 for (order = 1; order < 64; order++) {
223 for (offset = -1; offset <= (order > 1); offset++) {
224 u64 ctx = BIT_ULL(order) + offset;
225
226 ret = __igt_sync(&tl, ctx, p, "1");
227 if (ret)
228 goto out;
229 }
230 }
231 }
232 mock_timeline_fini(&tl);
233
234 mock_timeline_init(&tl, 0);
235 for (order = 1; order < 64; order++) {
236 for (offset = -1; offset <= (order > 1); offset++) {
237 u64 ctx = BIT_ULL(order) + offset;
238
239 for (p = pass; p->name; p++) {
240 ret = __igt_sync(&tl, ctx, p, "2");
241 if (ret)
242 goto out;
243 }
244 }
245 }
246
247 out:
248 mock_timeline_fini(&tl);
249 return ret;
250 }
251
random_engine(struct rnd_state * rnd)252 static unsigned int random_engine(struct rnd_state *rnd)
253 {
254 return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
255 }
256
bench_sync(void * arg)257 static int bench_sync(void *arg)
258 {
259 struct rnd_state prng;
260 struct intel_timeline tl;
261 unsigned long end_time, count;
262 u64 prng32_1M;
263 ktime_t kt;
264 int order, last_order;
265
266 mock_timeline_init(&tl, 0);
267
268 /* Lookups from cache are very fast and so the random number generation
269 * and the loop itself becomes a significant factor in the per-iteration
270 * timings. We try to compensate the results by measuring the overhead
271 * of the prng and subtract it from the reported results.
272 */
273 prandom_seed_state(&prng, i915_selftest.random_seed);
274 count = 0;
275 kt = ktime_get();
276 end_time = jiffies + HZ/10;
277 do {
278 u32 x;
279
280 /* Make sure the compiler doesn't optimise away the prng call */
281 WRITE_ONCE(x, prandom_u32_state(&prng));
282
283 count++;
284 } while (!time_after(jiffies, end_time));
285 kt = ktime_sub(ktime_get(), kt);
286 pr_debug("%s: %lu random evaluations, %lluns/prng\n",
287 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
288 prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
289
290 /* Benchmark (only) setting random context ids */
291 prandom_seed_state(&prng, i915_selftest.random_seed);
292 count = 0;
293 kt = ktime_get();
294 end_time = jiffies + HZ/10;
295 do {
296 u64 id = i915_prandom_u64_state(&prng);
297
298 __intel_timeline_sync_set(&tl, id, 0);
299 count++;
300 } while (!time_after(jiffies, end_time));
301 kt = ktime_sub(ktime_get(), kt);
302 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
303 pr_info("%s: %lu random insertions, %lluns/insert\n",
304 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
305
306 /* Benchmark looking up the exact same context ids as we just set */
307 prandom_seed_state(&prng, i915_selftest.random_seed);
308 end_time = count;
309 kt = ktime_get();
310 while (end_time--) {
311 u64 id = i915_prandom_u64_state(&prng);
312
313 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
314 mock_timeline_fini(&tl);
315 pr_err("Lookup of %llu failed\n", id);
316 return -EINVAL;
317 }
318 }
319 kt = ktime_sub(ktime_get(), kt);
320 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
321 pr_info("%s: %lu random lookups, %lluns/lookup\n",
322 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
323
324 mock_timeline_fini(&tl);
325 cond_resched();
326
327 mock_timeline_init(&tl, 0);
328
329 /* Benchmark setting the first N (in order) contexts */
330 count = 0;
331 kt = ktime_get();
332 end_time = jiffies + HZ/10;
333 do {
334 __intel_timeline_sync_set(&tl, count++, 0);
335 } while (!time_after(jiffies, end_time));
336 kt = ktime_sub(ktime_get(), kt);
337 pr_info("%s: %lu in-order insertions, %lluns/insert\n",
338 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
339
340 /* Benchmark looking up the exact same context ids as we just set */
341 end_time = count;
342 kt = ktime_get();
343 while (end_time--) {
344 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
345 pr_err("Lookup of %lu failed\n", end_time);
346 mock_timeline_fini(&tl);
347 return -EINVAL;
348 }
349 }
350 kt = ktime_sub(ktime_get(), kt);
351 pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
352 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
353
354 mock_timeline_fini(&tl);
355 cond_resched();
356
357 mock_timeline_init(&tl, 0);
358
359 /* Benchmark searching for a random context id and maybe changing it */
360 prandom_seed_state(&prng, i915_selftest.random_seed);
361 count = 0;
362 kt = ktime_get();
363 end_time = jiffies + HZ/10;
364 do {
365 u32 id = random_engine(&prng);
366 u32 seqno = prandom_u32_state(&prng);
367
368 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
369 __intel_timeline_sync_set(&tl, id, seqno);
370
371 count++;
372 } while (!time_after(jiffies, end_time));
373 kt = ktime_sub(ktime_get(), kt);
374 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
375 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
376 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
377 mock_timeline_fini(&tl);
378 cond_resched();
379
380 /* Benchmark searching for a known context id and changing the seqno */
381 for (last_order = 1, order = 1; order < 32;
382 ({ int tmp = last_order; last_order = order; order += tmp; })) {
383 unsigned int mask = BIT(order) - 1;
384
385 mock_timeline_init(&tl, 0);
386
387 count = 0;
388 kt = ktime_get();
389 end_time = jiffies + HZ/10;
390 do {
391 /* Without assuming too many details of the underlying
392 * implementation, try to identify its phase-changes
393 * (if any)!
394 */
395 u64 id = (u64)(count & mask) << order;
396
397 __intel_timeline_sync_is_later(&tl, id, 0);
398 __intel_timeline_sync_set(&tl, id, 0);
399
400 count++;
401 } while (!time_after(jiffies, end_time));
402 kt = ktime_sub(ktime_get(), kt);
403 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
404 __func__, count, order,
405 (long long)div64_ul(ktime_to_ns(kt), count));
406 mock_timeline_fini(&tl);
407 cond_resched();
408 }
409
410 return 0;
411 }
412
intel_timeline_mock_selftests(void)413 int intel_timeline_mock_selftests(void)
414 {
415 static const struct i915_subtest tests[] = {
416 SUBTEST(mock_hwsp_freelist),
417 SUBTEST(igt_sync),
418 SUBTEST(bench_sync),
419 };
420
421 return i915_subtests(tests, NULL);
422 }
423
emit_ggtt_store_dw(struct i915_request * rq,u32 addr,u32 value)424 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
425 {
426 u32 *cs;
427
428 cs = intel_ring_begin(rq, 4);
429 if (IS_ERR(cs))
430 return PTR_ERR(cs);
431
432 if (INTEL_GEN(rq->i915) >= 8) {
433 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
434 *cs++ = addr;
435 *cs++ = 0;
436 *cs++ = value;
437 } else if (INTEL_GEN(rq->i915) >= 4) {
438 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
439 *cs++ = 0;
440 *cs++ = addr;
441 *cs++ = value;
442 } else {
443 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
444 *cs++ = addr;
445 *cs++ = value;
446 *cs++ = MI_NOOP;
447 }
448
449 intel_ring_advance(rq, cs);
450
451 return 0;
452 }
453
454 static struct i915_request *
tl_write(struct intel_timeline * tl,struct intel_engine_cs * engine,u32 value)455 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
456 {
457 struct i915_request *rq;
458 int err;
459
460 err = intel_timeline_pin(tl);
461 if (err) {
462 rq = ERR_PTR(err);
463 goto out;
464 }
465
466 rq = intel_engine_create_kernel_request(engine);
467 if (IS_ERR(rq))
468 goto out_unpin;
469
470 i915_request_get(rq);
471
472 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
473 i915_request_add(rq);
474 if (err) {
475 i915_request_put(rq);
476 rq = ERR_PTR(err);
477 }
478
479 out_unpin:
480 intel_timeline_unpin(tl);
481 out:
482 if (IS_ERR(rq))
483 pr_err("Failed to write to timeline!\n");
484 return rq;
485 }
486
487 static struct intel_timeline *
checked_intel_timeline_create(struct intel_gt * gt)488 checked_intel_timeline_create(struct intel_gt *gt)
489 {
490 struct intel_timeline *tl;
491
492 tl = intel_timeline_create(gt, NULL);
493 if (IS_ERR(tl))
494 return tl;
495
496 if (*tl->hwsp_seqno != tl->seqno) {
497 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
498 *tl->hwsp_seqno, tl->seqno);
499 intel_timeline_put(tl);
500 return ERR_PTR(-EINVAL);
501 }
502
503 return tl;
504 }
505
live_hwsp_engine(void * arg)506 static int live_hwsp_engine(void *arg)
507 {
508 #define NUM_TIMELINES 4096
509 struct intel_gt *gt = arg;
510 struct intel_timeline **timelines;
511 struct intel_engine_cs *engine;
512 enum intel_engine_id id;
513 unsigned long count, n;
514 int err = 0;
515
516 /*
517 * Create a bunch of timelines and check we can write
518 * independently to each of their breadcrumb slots.
519 */
520
521 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
522 sizeof(*timelines),
523 GFP_KERNEL);
524 if (!timelines)
525 return -ENOMEM;
526
527 count = 0;
528 for_each_engine(engine, gt, id) {
529 if (!intel_engine_can_store_dword(engine))
530 continue;
531
532 intel_engine_pm_get(engine);
533
534 for (n = 0; n < NUM_TIMELINES; n++) {
535 struct intel_timeline *tl;
536 struct i915_request *rq;
537
538 tl = checked_intel_timeline_create(gt);
539 if (IS_ERR(tl)) {
540 err = PTR_ERR(tl);
541 break;
542 }
543
544 rq = tl_write(tl, engine, count);
545 if (IS_ERR(rq)) {
546 intel_timeline_put(tl);
547 err = PTR_ERR(rq);
548 break;
549 }
550
551 timelines[count++] = tl;
552 i915_request_put(rq);
553 }
554
555 intel_engine_pm_put(engine);
556 if (err)
557 break;
558 }
559
560 if (igt_flush_test(gt->i915))
561 err = -EIO;
562
563 for (n = 0; n < count; n++) {
564 struct intel_timeline *tl = timelines[n];
565
566 if (!err && *tl->hwsp_seqno != n) {
567 pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
568 n, *tl->hwsp_seqno);
569 err = -EINVAL;
570 }
571 intel_timeline_put(tl);
572 }
573
574 kvfree(timelines);
575 return err;
576 #undef NUM_TIMELINES
577 }
578
live_hwsp_alternate(void * arg)579 static int live_hwsp_alternate(void *arg)
580 {
581 #define NUM_TIMELINES 4096
582 struct intel_gt *gt = arg;
583 struct intel_timeline **timelines;
584 struct intel_engine_cs *engine;
585 enum intel_engine_id id;
586 unsigned long count, n;
587 int err = 0;
588
589 /*
590 * Create a bunch of timelines and check we can write
591 * independently to each of their breadcrumb slots with adjacent
592 * engines.
593 */
594
595 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
596 sizeof(*timelines),
597 GFP_KERNEL);
598 if (!timelines)
599 return -ENOMEM;
600
601 count = 0;
602 for (n = 0; n < NUM_TIMELINES; n++) {
603 for_each_engine(engine, gt, id) {
604 struct intel_timeline *tl;
605 struct i915_request *rq;
606
607 if (!intel_engine_can_store_dword(engine))
608 continue;
609
610 tl = checked_intel_timeline_create(gt);
611 if (IS_ERR(tl)) {
612 intel_engine_pm_put(engine);
613 err = PTR_ERR(tl);
614 goto out;
615 }
616
617 intel_engine_pm_get(engine);
618 rq = tl_write(tl, engine, count);
619 intel_engine_pm_put(engine);
620 if (IS_ERR(rq)) {
621 intel_timeline_put(tl);
622 err = PTR_ERR(rq);
623 goto out;
624 }
625
626 timelines[count++] = tl;
627 i915_request_put(rq);
628 }
629 }
630
631 out:
632 if (igt_flush_test(gt->i915))
633 err = -EIO;
634
635 for (n = 0; n < count; n++) {
636 struct intel_timeline *tl = timelines[n];
637
638 if (!err && *tl->hwsp_seqno != n) {
639 pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
640 n, *tl->hwsp_seqno);
641 err = -EINVAL;
642 }
643 intel_timeline_put(tl);
644 }
645
646 kvfree(timelines);
647 return err;
648 #undef NUM_TIMELINES
649 }
650
live_hwsp_wrap(void * arg)651 static int live_hwsp_wrap(void *arg)
652 {
653 struct intel_gt *gt = arg;
654 struct intel_engine_cs *engine;
655 struct intel_timeline *tl;
656 enum intel_engine_id id;
657 int err = 0;
658
659 /*
660 * Across a seqno wrap, we need to keep the old cacheline alive for
661 * foreign GPU references.
662 */
663
664 tl = intel_timeline_create(gt, NULL);
665 if (IS_ERR(tl))
666 return PTR_ERR(tl);
667
668 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
669 goto out_free;
670
671 err = intel_timeline_pin(tl);
672 if (err)
673 goto out_free;
674
675 for_each_engine(engine, gt, id) {
676 const u32 *hwsp_seqno[2];
677 struct i915_request *rq;
678 u32 seqno[2];
679
680 if (!intel_engine_can_store_dword(engine))
681 continue;
682
683 rq = intel_engine_create_kernel_request(engine);
684 if (IS_ERR(rq)) {
685 err = PTR_ERR(rq);
686 goto out;
687 }
688
689 tl->seqno = -4u;
690
691 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
692 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
693 mutex_unlock(&tl->mutex);
694 if (err) {
695 i915_request_add(rq);
696 goto out;
697 }
698 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
699 seqno[0], tl->hwsp_offset);
700
701 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
702 if (err) {
703 i915_request_add(rq);
704 goto out;
705 }
706 hwsp_seqno[0] = tl->hwsp_seqno;
707
708 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
709 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
710 mutex_unlock(&tl->mutex);
711 if (err) {
712 i915_request_add(rq);
713 goto out;
714 }
715 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
716 seqno[1], tl->hwsp_offset);
717
718 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
719 if (err) {
720 i915_request_add(rq);
721 goto out;
722 }
723 hwsp_seqno[1] = tl->hwsp_seqno;
724
725 /* With wrap should come a new hwsp */
726 GEM_BUG_ON(seqno[1] >= seqno[0]);
727 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
728
729 i915_request_add(rq);
730
731 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
732 pr_err("Wait for timeline writes timed out!\n");
733 err = -EIO;
734 goto out;
735 }
736
737 if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
738 pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
739 *hwsp_seqno[0], *hwsp_seqno[1],
740 seqno[0], seqno[1]);
741 err = -EINVAL;
742 goto out;
743 }
744
745 intel_gt_retire_requests(gt); /* recycle HWSP */
746 }
747
748 out:
749 if (igt_flush_test(gt->i915))
750 err = -EIO;
751
752 intel_timeline_unpin(tl);
753 out_free:
754 intel_timeline_put(tl);
755 return err;
756 }
757
live_hwsp_recycle(void * arg)758 static int live_hwsp_recycle(void *arg)
759 {
760 struct intel_gt *gt = arg;
761 struct intel_engine_cs *engine;
762 enum intel_engine_id id;
763 unsigned long count;
764 int err = 0;
765
766 /*
767 * Check seqno writes into one timeline at a time. We expect to
768 * recycle the breadcrumb slot between iterations and neither
769 * want to confuse ourselves or the GPU.
770 */
771
772 count = 0;
773 for_each_engine(engine, gt, id) {
774 IGT_TIMEOUT(end_time);
775
776 if (!intel_engine_can_store_dword(engine))
777 continue;
778
779 intel_engine_pm_get(engine);
780
781 do {
782 struct intel_timeline *tl;
783 struct i915_request *rq;
784
785 tl = checked_intel_timeline_create(gt);
786 if (IS_ERR(tl)) {
787 err = PTR_ERR(tl);
788 break;
789 }
790
791 rq = tl_write(tl, engine, count);
792 if (IS_ERR(rq)) {
793 intel_timeline_put(tl);
794 err = PTR_ERR(rq);
795 break;
796 }
797
798 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
799 pr_err("Wait for timeline writes timed out!\n");
800 i915_request_put(rq);
801 intel_timeline_put(tl);
802 err = -EIO;
803 break;
804 }
805
806 if (*tl->hwsp_seqno != count) {
807 pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
808 count, *tl->hwsp_seqno);
809 err = -EINVAL;
810 }
811
812 i915_request_put(rq);
813 intel_timeline_put(tl);
814 count++;
815
816 if (err)
817 break;
818 } while (!__igt_timeout(end_time, NULL));
819
820 intel_engine_pm_put(engine);
821 if (err)
822 break;
823 }
824
825 return err;
826 }
827
intel_timeline_live_selftests(struct drm_i915_private * i915)828 int intel_timeline_live_selftests(struct drm_i915_private *i915)
829 {
830 static const struct i915_subtest tests[] = {
831 SUBTEST(live_hwsp_recycle),
832 SUBTEST(live_hwsp_engine),
833 SUBTEST(live_hwsp_alternate),
834 SUBTEST(live_hwsp_wrap),
835 };
836
837 if (intel_gt_is_wedged(&i915->gt))
838 return 0;
839
840 return intel_gt_live_subtests(tests, &i915->gt);
841 }
842