xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/i915/gt/selftest_timeline.c (revision 41ec02673d281bbb3d38e6c78504ce6e30c228c1)
1 /*	$NetBSD: selftest_timeline.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
2 
3 /*
4  * SPDX-License-Identifier: MIT
5  *
6  * Copyright © 2017-2018 Intel Corporation
7  */
8 
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: selftest_timeline.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
11 
12 #include <linux/prime_numbers.h>
13 
14 #include "intel_engine_pm.h"
15 #include "intel_gt.h"
16 #include "intel_gt_requests.h"
17 #include "intel_ring.h"
18 
19 #include "../selftests/i915_random.h"
20 #include "../i915_selftest.h"
21 
22 #include "../selftests/igt_flush_test.h"
23 #include "../selftests/mock_gem_device.h"
24 #include "selftests/mock_timeline.h"
25 
hwsp_page(struct intel_timeline * tl)26 static struct page *hwsp_page(struct intel_timeline *tl)
27 {
28 	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
29 
30 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
31 	return sg_page(obj->mm.pages->sgl);
32 }
33 
hwsp_cacheline(struct intel_timeline * tl)34 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
35 {
36 	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
37 
38 	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
39 }
40 
41 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
42 
43 struct mock_hwsp_freelist {
44 	struct intel_gt *gt;
45 	struct radix_tree_root cachelines;
46 	struct intel_timeline **history;
47 	unsigned long count, max;
48 	struct rnd_state prng;
49 };
50 
51 enum {
52 	SHUFFLE = BIT(0),
53 };
54 
__mock_hwsp_record(struct mock_hwsp_freelist * state,unsigned int idx,struct intel_timeline * tl)55 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
56 			       unsigned int idx,
57 			       struct intel_timeline *tl)
58 {
59 	tl = xchg(&state->history[idx], tl);
60 	if (tl) {
61 		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
62 		intel_timeline_put(tl);
63 	}
64 }
65 
__mock_hwsp_timeline(struct mock_hwsp_freelist * state,unsigned int count,unsigned int flags)66 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
67 				unsigned int count,
68 				unsigned int flags)
69 {
70 	struct intel_timeline *tl;
71 	unsigned int idx;
72 
73 	while (count--) {
74 		unsigned long cacheline;
75 		int err;
76 
77 		tl = intel_timeline_create(state->gt, NULL);
78 		if (IS_ERR(tl))
79 			return PTR_ERR(tl);
80 
81 		cacheline = hwsp_cacheline(tl);
82 		err = radix_tree_insert(&state->cachelines, cacheline, tl);
83 		if (err) {
84 			if (err == -EEXIST) {
85 				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
86 				       cacheline);
87 			}
88 			intel_timeline_put(tl);
89 			return err;
90 		}
91 
92 		idx = state->count++ % state->max;
93 		__mock_hwsp_record(state, idx, tl);
94 	}
95 
96 	if (flags & SHUFFLE)
97 		i915_prandom_shuffle(state->history,
98 				     sizeof(*state->history),
99 				     min(state->count, state->max),
100 				     &state->prng);
101 
102 	count = i915_prandom_u32_max_state(min(state->count, state->max),
103 					   &state->prng);
104 	while (count--) {
105 		idx = --state->count % state->max;
106 		__mock_hwsp_record(state, idx, NULL);
107 	}
108 
109 	return 0;
110 }
111 
mock_hwsp_freelist(void * arg)112 static int mock_hwsp_freelist(void *arg)
113 {
114 	struct mock_hwsp_freelist state;
115 	struct drm_i915_private *i915;
116 	const struct {
117 		const char *name;
118 		unsigned int flags;
119 	} phases[] = {
120 		{ "linear", 0 },
121 		{ "shuffled", SHUFFLE },
122 		{ },
123 	}, *p;
124 	unsigned int na;
125 	int err = 0;
126 
127 	i915 = mock_gem_device();
128 	if (!i915)
129 		return -ENOMEM;
130 
131 	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
132 	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
133 
134 	state.gt = &i915->gt;
135 
136 	/*
137 	 * Create a bunch of timelines and check that their HWSP do not overlap.
138 	 * Free some, and try again.
139 	 */
140 
141 	state.max = PAGE_SIZE / sizeof(*state.history);
142 	state.count = 0;
143 	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
144 	if (!state.history) {
145 		err = -ENOMEM;
146 		goto err_put;
147 	}
148 
149 	for (p = phases; p->name; p++) {
150 		pr_debug("%s(%s)\n", __func__, p->name);
151 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
152 			err = __mock_hwsp_timeline(&state, na, p->flags);
153 			if (err)
154 				goto out;
155 		}
156 	}
157 
158 out:
159 	for (na = 0; na < state.max; na++)
160 		__mock_hwsp_record(&state, na, NULL);
161 	kfree(state.history);
162 err_put:
163 	drm_dev_put(&i915->drm);
164 	return err;
165 }
166 
167 struct __igt_sync {
168 	const char *name;
169 	u32 seqno;
170 	bool expected;
171 	bool set;
172 };
173 
__igt_sync(struct intel_timeline * tl,u64 ctx,const struct __igt_sync * p,const char * name)174 static int __igt_sync(struct intel_timeline *tl,
175 		      u64 ctx,
176 		      const struct __igt_sync *p,
177 		      const char *name)
178 {
179 	int ret;
180 
181 	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
182 		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
183 		       name, p->name, ctx, p->seqno, yesno(p->expected));
184 		return -EINVAL;
185 	}
186 
187 	if (p->set) {
188 		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
189 		if (ret)
190 			return ret;
191 	}
192 
193 	return 0;
194 }
195 
igt_sync(void * arg)196 static int igt_sync(void *arg)
197 {
198 	const struct __igt_sync pass[] = {
199 		{ "unset", 0, false, false },
200 		{ "new", 0, false, true },
201 		{ "0a", 0, true, true },
202 		{ "1a", 1, false, true },
203 		{ "1b", 1, true, true },
204 		{ "0b", 0, true, false },
205 		{ "2a", 2, false, true },
206 		{ "4", 4, false, true },
207 		{ "INT_MAX", INT_MAX, false, true },
208 		{ "INT_MAX-1", INT_MAX-1, true, false },
209 		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
210 		{ "INT_MAX", INT_MAX, true, false },
211 		{ "UINT_MAX", UINT_MAX, false, true },
212 		{ "wrap", 0, false, true },
213 		{ "unwrap", UINT_MAX, true, false },
214 		{},
215 	}, *p;
216 	struct intel_timeline tl;
217 	int order, offset;
218 	int ret = -ENODEV;
219 
220 	mock_timeline_init(&tl, 0);
221 	for (p = pass; p->name; p++) {
222 		for (order = 1; order < 64; order++) {
223 			for (offset = -1; offset <= (order > 1); offset++) {
224 				u64 ctx = BIT_ULL(order) + offset;
225 
226 				ret = __igt_sync(&tl, ctx, p, "1");
227 				if (ret)
228 					goto out;
229 			}
230 		}
231 	}
232 	mock_timeline_fini(&tl);
233 
234 	mock_timeline_init(&tl, 0);
235 	for (order = 1; order < 64; order++) {
236 		for (offset = -1; offset <= (order > 1); offset++) {
237 			u64 ctx = BIT_ULL(order) + offset;
238 
239 			for (p = pass; p->name; p++) {
240 				ret = __igt_sync(&tl, ctx, p, "2");
241 				if (ret)
242 					goto out;
243 			}
244 		}
245 	}
246 
247 out:
248 	mock_timeline_fini(&tl);
249 	return ret;
250 }
251 
random_engine(struct rnd_state * rnd)252 static unsigned int random_engine(struct rnd_state *rnd)
253 {
254 	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
255 }
256 
bench_sync(void * arg)257 static int bench_sync(void *arg)
258 {
259 	struct rnd_state prng;
260 	struct intel_timeline tl;
261 	unsigned long end_time, count;
262 	u64 prng32_1M;
263 	ktime_t kt;
264 	int order, last_order;
265 
266 	mock_timeline_init(&tl, 0);
267 
268 	/* Lookups from cache are very fast and so the random number generation
269 	 * and the loop itself becomes a significant factor in the per-iteration
270 	 * timings. We try to compensate the results by measuring the overhead
271 	 * of the prng and subtract it from the reported results.
272 	 */
273 	prandom_seed_state(&prng, i915_selftest.random_seed);
274 	count = 0;
275 	kt = ktime_get();
276 	end_time = jiffies + HZ/10;
277 	do {
278 		u32 x;
279 
280 		/* Make sure the compiler doesn't optimise away the prng call */
281 		WRITE_ONCE(x, prandom_u32_state(&prng));
282 
283 		count++;
284 	} while (!time_after(jiffies, end_time));
285 	kt = ktime_sub(ktime_get(), kt);
286 	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
287 		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
288 	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
289 
290 	/* Benchmark (only) setting random context ids */
291 	prandom_seed_state(&prng, i915_selftest.random_seed);
292 	count = 0;
293 	kt = ktime_get();
294 	end_time = jiffies + HZ/10;
295 	do {
296 		u64 id = i915_prandom_u64_state(&prng);
297 
298 		__intel_timeline_sync_set(&tl, id, 0);
299 		count++;
300 	} while (!time_after(jiffies, end_time));
301 	kt = ktime_sub(ktime_get(), kt);
302 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
303 	pr_info("%s: %lu random insertions, %lluns/insert\n",
304 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
305 
306 	/* Benchmark looking up the exact same context ids as we just set */
307 	prandom_seed_state(&prng, i915_selftest.random_seed);
308 	end_time = count;
309 	kt = ktime_get();
310 	while (end_time--) {
311 		u64 id = i915_prandom_u64_state(&prng);
312 
313 		if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
314 			mock_timeline_fini(&tl);
315 			pr_err("Lookup of %llu failed\n", id);
316 			return -EINVAL;
317 		}
318 	}
319 	kt = ktime_sub(ktime_get(), kt);
320 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
321 	pr_info("%s: %lu random lookups, %lluns/lookup\n",
322 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
323 
324 	mock_timeline_fini(&tl);
325 	cond_resched();
326 
327 	mock_timeline_init(&tl, 0);
328 
329 	/* Benchmark setting the first N (in order) contexts */
330 	count = 0;
331 	kt = ktime_get();
332 	end_time = jiffies + HZ/10;
333 	do {
334 		__intel_timeline_sync_set(&tl, count++, 0);
335 	} while (!time_after(jiffies, end_time));
336 	kt = ktime_sub(ktime_get(), kt);
337 	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
338 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
339 
340 	/* Benchmark looking up the exact same context ids as we just set */
341 	end_time = count;
342 	kt = ktime_get();
343 	while (end_time--) {
344 		if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
345 			pr_err("Lookup of %lu failed\n", end_time);
346 			mock_timeline_fini(&tl);
347 			return -EINVAL;
348 		}
349 	}
350 	kt = ktime_sub(ktime_get(), kt);
351 	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
352 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
353 
354 	mock_timeline_fini(&tl);
355 	cond_resched();
356 
357 	mock_timeline_init(&tl, 0);
358 
359 	/* Benchmark searching for a random context id and maybe changing it */
360 	prandom_seed_state(&prng, i915_selftest.random_seed);
361 	count = 0;
362 	kt = ktime_get();
363 	end_time = jiffies + HZ/10;
364 	do {
365 		u32 id = random_engine(&prng);
366 		u32 seqno = prandom_u32_state(&prng);
367 
368 		if (!__intel_timeline_sync_is_later(&tl, id, seqno))
369 			__intel_timeline_sync_set(&tl, id, seqno);
370 
371 		count++;
372 	} while (!time_after(jiffies, end_time));
373 	kt = ktime_sub(ktime_get(), kt);
374 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
375 	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
376 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
377 	mock_timeline_fini(&tl);
378 	cond_resched();
379 
380 	/* Benchmark searching for a known context id and changing the seqno */
381 	for (last_order = 1, order = 1; order < 32;
382 	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
383 		unsigned int mask = BIT(order) - 1;
384 
385 		mock_timeline_init(&tl, 0);
386 
387 		count = 0;
388 		kt = ktime_get();
389 		end_time = jiffies + HZ/10;
390 		do {
391 			/* Without assuming too many details of the underlying
392 			 * implementation, try to identify its phase-changes
393 			 * (if any)!
394 			 */
395 			u64 id = (u64)(count & mask) << order;
396 
397 			__intel_timeline_sync_is_later(&tl, id, 0);
398 			__intel_timeline_sync_set(&tl, id, 0);
399 
400 			count++;
401 		} while (!time_after(jiffies, end_time));
402 		kt = ktime_sub(ktime_get(), kt);
403 		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
404 			__func__, count, order,
405 			(long long)div64_ul(ktime_to_ns(kt), count));
406 		mock_timeline_fini(&tl);
407 		cond_resched();
408 	}
409 
410 	return 0;
411 }
412 
intel_timeline_mock_selftests(void)413 int intel_timeline_mock_selftests(void)
414 {
415 	static const struct i915_subtest tests[] = {
416 		SUBTEST(mock_hwsp_freelist),
417 		SUBTEST(igt_sync),
418 		SUBTEST(bench_sync),
419 	};
420 
421 	return i915_subtests(tests, NULL);
422 }
423 
emit_ggtt_store_dw(struct i915_request * rq,u32 addr,u32 value)424 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
425 {
426 	u32 *cs;
427 
428 	cs = intel_ring_begin(rq, 4);
429 	if (IS_ERR(cs))
430 		return PTR_ERR(cs);
431 
432 	if (INTEL_GEN(rq->i915) >= 8) {
433 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
434 		*cs++ = addr;
435 		*cs++ = 0;
436 		*cs++ = value;
437 	} else if (INTEL_GEN(rq->i915) >= 4) {
438 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
439 		*cs++ = 0;
440 		*cs++ = addr;
441 		*cs++ = value;
442 	} else {
443 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
444 		*cs++ = addr;
445 		*cs++ = value;
446 		*cs++ = MI_NOOP;
447 	}
448 
449 	intel_ring_advance(rq, cs);
450 
451 	return 0;
452 }
453 
454 static struct i915_request *
tl_write(struct intel_timeline * tl,struct intel_engine_cs * engine,u32 value)455 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
456 {
457 	struct i915_request *rq;
458 	int err;
459 
460 	err = intel_timeline_pin(tl);
461 	if (err) {
462 		rq = ERR_PTR(err);
463 		goto out;
464 	}
465 
466 	rq = intel_engine_create_kernel_request(engine);
467 	if (IS_ERR(rq))
468 		goto out_unpin;
469 
470 	i915_request_get(rq);
471 
472 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
473 	i915_request_add(rq);
474 	if (err) {
475 		i915_request_put(rq);
476 		rq = ERR_PTR(err);
477 	}
478 
479 out_unpin:
480 	intel_timeline_unpin(tl);
481 out:
482 	if (IS_ERR(rq))
483 		pr_err("Failed to write to timeline!\n");
484 	return rq;
485 }
486 
487 static struct intel_timeline *
checked_intel_timeline_create(struct intel_gt * gt)488 checked_intel_timeline_create(struct intel_gt *gt)
489 {
490 	struct intel_timeline *tl;
491 
492 	tl = intel_timeline_create(gt, NULL);
493 	if (IS_ERR(tl))
494 		return tl;
495 
496 	if (*tl->hwsp_seqno != tl->seqno) {
497 		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
498 		       *tl->hwsp_seqno, tl->seqno);
499 		intel_timeline_put(tl);
500 		return ERR_PTR(-EINVAL);
501 	}
502 
503 	return tl;
504 }
505 
live_hwsp_engine(void * arg)506 static int live_hwsp_engine(void *arg)
507 {
508 #define NUM_TIMELINES 4096
509 	struct intel_gt *gt = arg;
510 	struct intel_timeline **timelines;
511 	struct intel_engine_cs *engine;
512 	enum intel_engine_id id;
513 	unsigned long count, n;
514 	int err = 0;
515 
516 	/*
517 	 * Create a bunch of timelines and check we can write
518 	 * independently to each of their breadcrumb slots.
519 	 */
520 
521 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
522 				   sizeof(*timelines),
523 				   GFP_KERNEL);
524 	if (!timelines)
525 		return -ENOMEM;
526 
527 	count = 0;
528 	for_each_engine(engine, gt, id) {
529 		if (!intel_engine_can_store_dword(engine))
530 			continue;
531 
532 		intel_engine_pm_get(engine);
533 
534 		for (n = 0; n < NUM_TIMELINES; n++) {
535 			struct intel_timeline *tl;
536 			struct i915_request *rq;
537 
538 			tl = checked_intel_timeline_create(gt);
539 			if (IS_ERR(tl)) {
540 				err = PTR_ERR(tl);
541 				break;
542 			}
543 
544 			rq = tl_write(tl, engine, count);
545 			if (IS_ERR(rq)) {
546 				intel_timeline_put(tl);
547 				err = PTR_ERR(rq);
548 				break;
549 			}
550 
551 			timelines[count++] = tl;
552 			i915_request_put(rq);
553 		}
554 
555 		intel_engine_pm_put(engine);
556 		if (err)
557 			break;
558 	}
559 
560 	if (igt_flush_test(gt->i915))
561 		err = -EIO;
562 
563 	for (n = 0; n < count; n++) {
564 		struct intel_timeline *tl = timelines[n];
565 
566 		if (!err && *tl->hwsp_seqno != n) {
567 			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
568 			       n, *tl->hwsp_seqno);
569 			err = -EINVAL;
570 		}
571 		intel_timeline_put(tl);
572 	}
573 
574 	kvfree(timelines);
575 	return err;
576 #undef NUM_TIMELINES
577 }
578 
live_hwsp_alternate(void * arg)579 static int live_hwsp_alternate(void *arg)
580 {
581 #define NUM_TIMELINES 4096
582 	struct intel_gt *gt = arg;
583 	struct intel_timeline **timelines;
584 	struct intel_engine_cs *engine;
585 	enum intel_engine_id id;
586 	unsigned long count, n;
587 	int err = 0;
588 
589 	/*
590 	 * Create a bunch of timelines and check we can write
591 	 * independently to each of their breadcrumb slots with adjacent
592 	 * engines.
593 	 */
594 
595 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
596 				   sizeof(*timelines),
597 				   GFP_KERNEL);
598 	if (!timelines)
599 		return -ENOMEM;
600 
601 	count = 0;
602 	for (n = 0; n < NUM_TIMELINES; n++) {
603 		for_each_engine(engine, gt, id) {
604 			struct intel_timeline *tl;
605 			struct i915_request *rq;
606 
607 			if (!intel_engine_can_store_dword(engine))
608 				continue;
609 
610 			tl = checked_intel_timeline_create(gt);
611 			if (IS_ERR(tl)) {
612 				intel_engine_pm_put(engine);
613 				err = PTR_ERR(tl);
614 				goto out;
615 			}
616 
617 			intel_engine_pm_get(engine);
618 			rq = tl_write(tl, engine, count);
619 			intel_engine_pm_put(engine);
620 			if (IS_ERR(rq)) {
621 				intel_timeline_put(tl);
622 				err = PTR_ERR(rq);
623 				goto out;
624 			}
625 
626 			timelines[count++] = tl;
627 			i915_request_put(rq);
628 		}
629 	}
630 
631 out:
632 	if (igt_flush_test(gt->i915))
633 		err = -EIO;
634 
635 	for (n = 0; n < count; n++) {
636 		struct intel_timeline *tl = timelines[n];
637 
638 		if (!err && *tl->hwsp_seqno != n) {
639 			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
640 			       n, *tl->hwsp_seqno);
641 			err = -EINVAL;
642 		}
643 		intel_timeline_put(tl);
644 	}
645 
646 	kvfree(timelines);
647 	return err;
648 #undef NUM_TIMELINES
649 }
650 
live_hwsp_wrap(void * arg)651 static int live_hwsp_wrap(void *arg)
652 {
653 	struct intel_gt *gt = arg;
654 	struct intel_engine_cs *engine;
655 	struct intel_timeline *tl;
656 	enum intel_engine_id id;
657 	int err = 0;
658 
659 	/*
660 	 * Across a seqno wrap, we need to keep the old cacheline alive for
661 	 * foreign GPU references.
662 	 */
663 
664 	tl = intel_timeline_create(gt, NULL);
665 	if (IS_ERR(tl))
666 		return PTR_ERR(tl);
667 
668 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
669 		goto out_free;
670 
671 	err = intel_timeline_pin(tl);
672 	if (err)
673 		goto out_free;
674 
675 	for_each_engine(engine, gt, id) {
676 		const u32 *hwsp_seqno[2];
677 		struct i915_request *rq;
678 		u32 seqno[2];
679 
680 		if (!intel_engine_can_store_dword(engine))
681 			continue;
682 
683 		rq = intel_engine_create_kernel_request(engine);
684 		if (IS_ERR(rq)) {
685 			err = PTR_ERR(rq);
686 			goto out;
687 		}
688 
689 		tl->seqno = -4u;
690 
691 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
692 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
693 		mutex_unlock(&tl->mutex);
694 		if (err) {
695 			i915_request_add(rq);
696 			goto out;
697 		}
698 		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
699 			 seqno[0], tl->hwsp_offset);
700 
701 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
702 		if (err) {
703 			i915_request_add(rq);
704 			goto out;
705 		}
706 		hwsp_seqno[0] = tl->hwsp_seqno;
707 
708 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
709 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
710 		mutex_unlock(&tl->mutex);
711 		if (err) {
712 			i915_request_add(rq);
713 			goto out;
714 		}
715 		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
716 			 seqno[1], tl->hwsp_offset);
717 
718 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
719 		if (err) {
720 			i915_request_add(rq);
721 			goto out;
722 		}
723 		hwsp_seqno[1] = tl->hwsp_seqno;
724 
725 		/* With wrap should come a new hwsp */
726 		GEM_BUG_ON(seqno[1] >= seqno[0]);
727 		GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
728 
729 		i915_request_add(rq);
730 
731 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
732 			pr_err("Wait for timeline writes timed out!\n");
733 			err = -EIO;
734 			goto out;
735 		}
736 
737 		if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
738 			pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
739 			       *hwsp_seqno[0], *hwsp_seqno[1],
740 			       seqno[0], seqno[1]);
741 			err = -EINVAL;
742 			goto out;
743 		}
744 
745 		intel_gt_retire_requests(gt); /* recycle HWSP */
746 	}
747 
748 out:
749 	if (igt_flush_test(gt->i915))
750 		err = -EIO;
751 
752 	intel_timeline_unpin(tl);
753 out_free:
754 	intel_timeline_put(tl);
755 	return err;
756 }
757 
live_hwsp_recycle(void * arg)758 static int live_hwsp_recycle(void *arg)
759 {
760 	struct intel_gt *gt = arg;
761 	struct intel_engine_cs *engine;
762 	enum intel_engine_id id;
763 	unsigned long count;
764 	int err = 0;
765 
766 	/*
767 	 * Check seqno writes into one timeline at a time. We expect to
768 	 * recycle the breadcrumb slot between iterations and neither
769 	 * want to confuse ourselves or the GPU.
770 	 */
771 
772 	count = 0;
773 	for_each_engine(engine, gt, id) {
774 		IGT_TIMEOUT(end_time);
775 
776 		if (!intel_engine_can_store_dword(engine))
777 			continue;
778 
779 		intel_engine_pm_get(engine);
780 
781 		do {
782 			struct intel_timeline *tl;
783 			struct i915_request *rq;
784 
785 			tl = checked_intel_timeline_create(gt);
786 			if (IS_ERR(tl)) {
787 				err = PTR_ERR(tl);
788 				break;
789 			}
790 
791 			rq = tl_write(tl, engine, count);
792 			if (IS_ERR(rq)) {
793 				intel_timeline_put(tl);
794 				err = PTR_ERR(rq);
795 				break;
796 			}
797 
798 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
799 				pr_err("Wait for timeline writes timed out!\n");
800 				i915_request_put(rq);
801 				intel_timeline_put(tl);
802 				err = -EIO;
803 				break;
804 			}
805 
806 			if (*tl->hwsp_seqno != count) {
807 				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
808 				       count, *tl->hwsp_seqno);
809 				err = -EINVAL;
810 			}
811 
812 			i915_request_put(rq);
813 			intel_timeline_put(tl);
814 			count++;
815 
816 			if (err)
817 				break;
818 		} while (!__igt_timeout(end_time, NULL));
819 
820 		intel_engine_pm_put(engine);
821 		if (err)
822 			break;
823 	}
824 
825 	return err;
826 }
827 
intel_timeline_live_selftests(struct drm_i915_private * i915)828 int intel_timeline_live_selftests(struct drm_i915_private *i915)
829 {
830 	static const struct i915_subtest tests[] = {
831 		SUBTEST(live_hwsp_recycle),
832 		SUBTEST(live_hwsp_engine),
833 		SUBTEST(live_hwsp_alternate),
834 		SUBTEST(live_hwsp_wrap),
835 	};
836 
837 	if (intel_gt_is_wedged(&i915->gt))
838 		return 0;
839 
840 	return intel_gt_live_subtests(tests, &i915->gt);
841 }
842