xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/i915/gem/selftests/i915_gem_object_blt.c (revision 41ec02673d281bbb3d38e6c78504ce6e30c228c1)
1 /*	$NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
2 
3 // SPDX-License-Identifier: MIT
4 /*
5  * Copyright © 2019 Intel Corporation
6  */
7 
8 #include <sys/cdefs.h>
9 __KERNEL_RCSID(0, "$NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
10 
11 #include <linux/sort.h>
12 
13 #include "gt/intel_gt.h"
14 #include "gt/intel_engine_user.h"
15 
16 #include "i915_selftest.h"
17 
18 #include "gem/i915_gem_context.h"
19 #include "selftests/igt_flush_test.h"
20 #include "selftests/i915_random.h"
21 #include "selftests/mock_drm.h"
22 #include "huge_gem_object.h"
23 #include "mock_context.h"
24 
wrap_ktime_compare(const void * A,const void * B)25 static int wrap_ktime_compare(const void *A, const void *B)
26 {
27 	const ktime_t *a = A, *b = B;
28 
29 	return ktime_compare(*a, *b);
30 }
31 
__perf_fill_blt(struct drm_i915_gem_object * obj)32 static int __perf_fill_blt(struct drm_i915_gem_object *obj)
33 {
34 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
35 	int inst = 0;
36 
37 	do {
38 		struct intel_engine_cs *engine;
39 		ktime_t t[5];
40 		int pass;
41 		int err;
42 
43 		engine = intel_engine_lookup_user(i915,
44 						  I915_ENGINE_CLASS_COPY,
45 						  inst++);
46 		if (!engine)
47 			return 0;
48 
49 		intel_engine_pm_get(engine);
50 		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
51 			struct intel_context *ce = engine->kernel_context;
52 			ktime_t t0, t1;
53 
54 			t0 = ktime_get();
55 
56 			err = i915_gem_object_fill_blt(obj, ce, 0);
57 			if (err)
58 				break;
59 
60 			err = i915_gem_object_wait(obj,
61 						   I915_WAIT_ALL,
62 						   MAX_SCHEDULE_TIMEOUT);
63 			if (err)
64 				break;
65 
66 			t1 = ktime_get();
67 			t[pass] = ktime_sub(t1, t0);
68 		}
69 		intel_engine_pm_put(engine);
70 		if (err)
71 			return err;
72 
73 		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
74 		pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
75 			engine->name,
76 			obj->base.size >> 10,
77 			div64_u64(mul_u32_u32(4 * obj->base.size,
78 					      1000 * 1000 * 1000),
79 				  t[1] + 2 * t[2] + t[3]) >> 20);
80 	} while (1);
81 }
82 
perf_fill_blt(void * arg)83 static int perf_fill_blt(void *arg)
84 {
85 	struct drm_i915_private *i915 = arg;
86 	static const unsigned long sizes[] = {
87 		SZ_4K,
88 		SZ_64K,
89 		SZ_2M,
90 		SZ_64M
91 	};
92 	int i;
93 
94 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
95 		struct drm_i915_gem_object *obj;
96 		int err;
97 
98 		obj = i915_gem_object_create_internal(i915, sizes[i]);
99 		if (IS_ERR(obj))
100 			return PTR_ERR(obj);
101 
102 		err = __perf_fill_blt(obj);
103 		i915_gem_object_put(obj);
104 		if (err)
105 			return err;
106 	}
107 
108 	return 0;
109 }
110 
__perf_copy_blt(struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst)111 static int __perf_copy_blt(struct drm_i915_gem_object *src,
112 			   struct drm_i915_gem_object *dst)
113 {
114 	struct drm_i915_private *i915 = to_i915(src->base.dev);
115 	int inst = 0;
116 
117 	do {
118 		struct intel_engine_cs *engine;
119 		ktime_t t[5];
120 		int pass;
121 		int err = 0;
122 
123 		engine = intel_engine_lookup_user(i915,
124 						  I915_ENGINE_CLASS_COPY,
125 						  inst++);
126 		if (!engine)
127 			return 0;
128 
129 		intel_engine_pm_get(engine);
130 		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
131 			struct intel_context *ce = engine->kernel_context;
132 			ktime_t t0, t1;
133 
134 			t0 = ktime_get();
135 
136 			err = i915_gem_object_copy_blt(src, dst, ce);
137 			if (err)
138 				break;
139 
140 			err = i915_gem_object_wait(dst,
141 						   I915_WAIT_ALL,
142 						   MAX_SCHEDULE_TIMEOUT);
143 			if (err)
144 				break;
145 
146 			t1 = ktime_get();
147 			t[pass] = ktime_sub(t1, t0);
148 		}
149 		intel_engine_pm_put(engine);
150 		if (err)
151 			return err;
152 
153 		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
154 		pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
155 			engine->name,
156 			src->base.size >> 10,
157 			div64_u64(mul_u32_u32(4 * src->base.size,
158 					      1000 * 1000 * 1000),
159 				  t[1] + 2 * t[2] + t[3]) >> 20);
160 	} while (1);
161 }
162 
perf_copy_blt(void * arg)163 static int perf_copy_blt(void *arg)
164 {
165 	struct drm_i915_private *i915 = arg;
166 	static const unsigned long sizes[] = {
167 		SZ_4K,
168 		SZ_64K,
169 		SZ_2M,
170 		SZ_64M
171 	};
172 	int i;
173 
174 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
175 		struct drm_i915_gem_object *src, *dst;
176 		int err;
177 
178 		src = i915_gem_object_create_internal(i915, sizes[i]);
179 		if (IS_ERR(src))
180 			return PTR_ERR(src);
181 
182 		dst = i915_gem_object_create_internal(i915, sizes[i]);
183 		if (IS_ERR(dst)) {
184 			err = PTR_ERR(dst);
185 			goto err_src;
186 		}
187 
188 		err = __perf_copy_blt(src, dst);
189 
190 		i915_gem_object_put(dst);
191 err_src:
192 		i915_gem_object_put(src);
193 		if (err)
194 			return err;
195 	}
196 
197 	return 0;
198 }
199 
200 struct igt_thread_arg {
201 	struct drm_i915_private *i915;
202 	struct i915_gem_context *ctx;
203 	struct file *file;
204 	struct rnd_state prng;
205 	unsigned int n_cpus;
206 };
207 
igt_fill_blt_thread(void * arg)208 static int igt_fill_blt_thread(void *arg)
209 {
210 	struct igt_thread_arg *thread = arg;
211 	struct drm_i915_private *i915 = thread->i915;
212 	struct rnd_state *prng = &thread->prng;
213 	struct drm_i915_gem_object *obj;
214 	struct i915_gem_context *ctx;
215 	struct intel_context *ce;
216 	unsigned int prio;
217 	IGT_TIMEOUT(end);
218 	int err;
219 
220 	ctx = thread->ctx;
221 	if (!ctx) {
222 		ctx = live_context(i915, thread->file);
223 		if (IS_ERR(ctx))
224 			return PTR_ERR(ctx);
225 
226 		prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
227 		ctx->sched.priority = I915_USER_PRIORITY(prio);
228 	}
229 
230 	ce = i915_gem_context_get_engine(ctx, BCS0);
231 	GEM_BUG_ON(IS_ERR(ce));
232 
233 	do {
234 		const u32 max_block_size = S16_MAX * PAGE_SIZE;
235 		u32 val = prandom_u32_state(prng);
236 		u64 total = ce->vm->total;
237 		u32 phys_sz;
238 		u32 sz;
239 		u32 *vaddr;
240 		u32 i;
241 
242 		/*
243 		 * If we have a tiny shared address space, like for the GGTT
244 		 * then we can't be too greedy.
245 		 */
246 		if (i915_is_ggtt(ce->vm))
247 			total = div64_u64(total, thread->n_cpus);
248 
249 		sz = min_t(u64, total >> 4, prandom_u32_state(prng));
250 		phys_sz = sz % (max_block_size + 1);
251 
252 		sz = round_up(sz, PAGE_SIZE);
253 		phys_sz = round_up(phys_sz, PAGE_SIZE);
254 
255 		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
256 			 phys_sz, sz, val);
257 
258 		obj = huge_gem_object(i915, phys_sz, sz);
259 		if (IS_ERR(obj)) {
260 			err = PTR_ERR(obj);
261 			goto err_flush;
262 		}
263 
264 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
265 		if (IS_ERR(vaddr)) {
266 			err = PTR_ERR(vaddr);
267 			goto err_put;
268 		}
269 
270 		/*
271 		 * Make sure the potentially async clflush does its job, if
272 		 * required.
273 		 */
274 		memset32(vaddr, val ^ 0xdeadbeaf,
275 			 huge_gem_object_phys_size(obj) / sizeof(u32));
276 
277 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
278 			obj->cache_dirty = true;
279 
280 		err = i915_gem_object_fill_blt(obj, ce, val);
281 		if (err)
282 			goto err_unpin;
283 
284 		i915_gem_object_lock(obj);
285 		err = i915_gem_object_set_to_cpu_domain(obj, false);
286 		i915_gem_object_unlock(obj);
287 		if (err)
288 			goto err_unpin;
289 
290 		for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
291 			if (vaddr[i] != val) {
292 				pr_err("vaddr[%u]=%x, expected=%x\n", i,
293 				       vaddr[i], val);
294 				err = -EINVAL;
295 				goto err_unpin;
296 			}
297 		}
298 
299 		i915_gem_object_unpin_map(obj);
300 		i915_gem_object_put(obj);
301 	} while (!time_after(jiffies, end));
302 
303 	goto err_flush;
304 
305 err_unpin:
306 	i915_gem_object_unpin_map(obj);
307 err_put:
308 	i915_gem_object_put(obj);
309 err_flush:
310 	if (err == -ENOMEM)
311 		err = 0;
312 
313 	intel_context_put(ce);
314 	return err;
315 }
316 
igt_copy_blt_thread(void * arg)317 static int igt_copy_blt_thread(void *arg)
318 {
319 	struct igt_thread_arg *thread = arg;
320 	struct drm_i915_private *i915 = thread->i915;
321 	struct rnd_state *prng = &thread->prng;
322 	struct drm_i915_gem_object *src, *dst;
323 	struct i915_gem_context *ctx;
324 	struct intel_context *ce;
325 	unsigned int prio;
326 	IGT_TIMEOUT(end);
327 	int err;
328 
329 	ctx = thread->ctx;
330 	if (!ctx) {
331 		ctx = live_context(i915, thread->file);
332 		if (IS_ERR(ctx))
333 			return PTR_ERR(ctx);
334 
335 		prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
336 		ctx->sched.priority = I915_USER_PRIORITY(prio);
337 	}
338 
339 	ce = i915_gem_context_get_engine(ctx, BCS0);
340 	GEM_BUG_ON(IS_ERR(ce));
341 
342 	do {
343 		const u32 max_block_size = S16_MAX * PAGE_SIZE;
344 		u32 val = prandom_u32_state(prng);
345 		u64 total = ce->vm->total;
346 		u32 phys_sz;
347 		u32 sz;
348 		u32 *vaddr;
349 		u32 i;
350 
351 		if (i915_is_ggtt(ce->vm))
352 			total = div64_u64(total, thread->n_cpus);
353 
354 		sz = min_t(u64, total >> 4, prandom_u32_state(prng));
355 		phys_sz = sz % (max_block_size + 1);
356 
357 		sz = round_up(sz, PAGE_SIZE);
358 		phys_sz = round_up(phys_sz, PAGE_SIZE);
359 
360 		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
361 			 phys_sz, sz, val);
362 
363 		src = huge_gem_object(i915, phys_sz, sz);
364 		if (IS_ERR(src)) {
365 			err = PTR_ERR(src);
366 			goto err_flush;
367 		}
368 
369 		vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
370 		if (IS_ERR(vaddr)) {
371 			err = PTR_ERR(vaddr);
372 			goto err_put_src;
373 		}
374 
375 		memset32(vaddr, val,
376 			 huge_gem_object_phys_size(src) / sizeof(u32));
377 
378 		i915_gem_object_unpin_map(src);
379 
380 		if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
381 			src->cache_dirty = true;
382 
383 		dst = huge_gem_object(i915, phys_sz, sz);
384 		if (IS_ERR(dst)) {
385 			err = PTR_ERR(dst);
386 			goto err_put_src;
387 		}
388 
389 		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
390 		if (IS_ERR(vaddr)) {
391 			err = PTR_ERR(vaddr);
392 			goto err_put_dst;
393 		}
394 
395 		memset32(vaddr, val ^ 0xdeadbeaf,
396 			 huge_gem_object_phys_size(dst) / sizeof(u32));
397 
398 		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
399 			dst->cache_dirty = true;
400 
401 		err = i915_gem_object_copy_blt(src, dst, ce);
402 		if (err)
403 			goto err_unpin;
404 
405 		i915_gem_object_lock(dst);
406 		err = i915_gem_object_set_to_cpu_domain(dst, false);
407 		i915_gem_object_unlock(dst);
408 		if (err)
409 			goto err_unpin;
410 
411 		for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) {
412 			if (vaddr[i] != val) {
413 				pr_err("vaddr[%u]=%x, expected=%x\n", i,
414 				       vaddr[i], val);
415 				err = -EINVAL;
416 				goto err_unpin;
417 			}
418 		}
419 
420 		i915_gem_object_unpin_map(dst);
421 
422 		i915_gem_object_put(src);
423 		i915_gem_object_put(dst);
424 	} while (!time_after(jiffies, end));
425 
426 	goto err_flush;
427 
428 err_unpin:
429 	i915_gem_object_unpin_map(dst);
430 err_put_dst:
431 	i915_gem_object_put(dst);
432 err_put_src:
433 	i915_gem_object_put(src);
434 err_flush:
435 	if (err == -ENOMEM)
436 		err = 0;
437 
438 	intel_context_put(ce);
439 	return err;
440 }
441 
igt_threaded_blt(struct drm_i915_private * i915,int (* blt_fn)(void * arg),unsigned int flags)442 static int igt_threaded_blt(struct drm_i915_private *i915,
443 			    int (*blt_fn)(void *arg),
444 			    unsigned int flags)
445 #define SINGLE_CTX BIT(0)
446 {
447 	struct igt_thread_arg *thread;
448 	struct task_struct **tsk;
449 	unsigned int n_cpus, i;
450 	I915_RND_STATE(prng);
451 	int err = 0;
452 
453 	n_cpus = num_online_cpus() + 1;
454 
455 	tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
456 	if (!tsk)
457 		return 0;
458 
459 	thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
460 	if (!thread)
461 		goto out_tsk;
462 
463 	thread[0].file = mock_file(i915);
464 	if (IS_ERR(thread[0].file)) {
465 		err = PTR_ERR(thread[0].file);
466 		goto out_thread;
467 	}
468 
469 	if (flags & SINGLE_CTX) {
470 		thread[0].ctx = live_context(i915, thread[0].file);
471 		if (IS_ERR(thread[0].ctx)) {
472 			err = PTR_ERR(thread[0].ctx);
473 			goto out_file;
474 		}
475 	}
476 
477 	for (i = 0; i < n_cpus; ++i) {
478 		thread[i].i915 = i915;
479 		thread[i].file = thread[0].file;
480 		thread[i].ctx = thread[0].ctx;
481 		thread[i].n_cpus = n_cpus;
482 		thread[i].prng =
483 			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
484 
485 		tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
486 		if (IS_ERR(tsk[i])) {
487 			err = PTR_ERR(tsk[i]);
488 			break;
489 		}
490 
491 		get_task_struct(tsk[i]);
492 	}
493 
494 	yield(); /* start all threads before we kthread_stop() */
495 
496 	for (i = 0; i < n_cpus; ++i) {
497 		int status;
498 
499 		if (IS_ERR_OR_NULL(tsk[i]))
500 			continue;
501 
502 		status = kthread_stop(tsk[i]);
503 		if (status && !err)
504 			err = status;
505 
506 		put_task_struct(tsk[i]);
507 	}
508 
509 out_file:
510 	fput(thread[0].file);
511 out_thread:
512 	kfree(thread);
513 out_tsk:
514 	kfree(tsk);
515 	return err;
516 }
517 
igt_fill_blt(void * arg)518 static int igt_fill_blt(void *arg)
519 {
520 	return igt_threaded_blt(arg, igt_fill_blt_thread, 0);
521 }
522 
igt_fill_blt_ctx0(void * arg)523 static int igt_fill_blt_ctx0(void *arg)
524 {
525 	return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX);
526 }
527 
igt_copy_blt(void * arg)528 static int igt_copy_blt(void *arg)
529 {
530 	return igt_threaded_blt(arg, igt_copy_blt_thread, 0);
531 }
532 
igt_copy_blt_ctx0(void * arg)533 static int igt_copy_blt_ctx0(void *arg)
534 {
535 	return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX);
536 }
537 
i915_gem_object_blt_live_selftests(struct drm_i915_private * i915)538 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
539 {
540 	static const struct i915_subtest tests[] = {
541 		SUBTEST(igt_fill_blt),
542 		SUBTEST(igt_fill_blt_ctx0),
543 		SUBTEST(igt_copy_blt),
544 		SUBTEST(igt_copy_blt_ctx0),
545 	};
546 
547 	if (intel_gt_is_wedged(&i915->gt))
548 		return 0;
549 
550 	if (!HAS_ENGINE(i915, BCS0))
551 		return 0;
552 
553 	return i915_live_subtests(tests, i915);
554 }
555 
i915_gem_object_blt_perf_selftests(struct drm_i915_private * i915)556 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
557 {
558 	static const struct i915_subtest tests[] = {
559 		SUBTEST(perf_fill_blt),
560 		SUBTEST(perf_copy_blt),
561 	};
562 
563 	if (intel_gt_is_wedged(&i915->gt))
564 		return 0;
565 
566 	return i915_live_subtests(tests, i915);
567 }
568