1 /* $NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */
2
3 // SPDX-License-Identifier: MIT
4 /*
5 * Copyright © 2019 Intel Corporation
6 */
7
8 #include <sys/cdefs.h>
9 __KERNEL_RCSID(0, "$NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
10
11 #include <linux/sort.h>
12
13 #include "gt/intel_gt.h"
14 #include "gt/intel_engine_user.h"
15
16 #include "i915_selftest.h"
17
18 #include "gem/i915_gem_context.h"
19 #include "selftests/igt_flush_test.h"
20 #include "selftests/i915_random.h"
21 #include "selftests/mock_drm.h"
22 #include "huge_gem_object.h"
23 #include "mock_context.h"
24
wrap_ktime_compare(const void * A,const void * B)25 static int wrap_ktime_compare(const void *A, const void *B)
26 {
27 const ktime_t *a = A, *b = B;
28
29 return ktime_compare(*a, *b);
30 }
31
__perf_fill_blt(struct drm_i915_gem_object * obj)32 static int __perf_fill_blt(struct drm_i915_gem_object *obj)
33 {
34 struct drm_i915_private *i915 = to_i915(obj->base.dev);
35 int inst = 0;
36
37 do {
38 struct intel_engine_cs *engine;
39 ktime_t t[5];
40 int pass;
41 int err;
42
43 engine = intel_engine_lookup_user(i915,
44 I915_ENGINE_CLASS_COPY,
45 inst++);
46 if (!engine)
47 return 0;
48
49 intel_engine_pm_get(engine);
50 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
51 struct intel_context *ce = engine->kernel_context;
52 ktime_t t0, t1;
53
54 t0 = ktime_get();
55
56 err = i915_gem_object_fill_blt(obj, ce, 0);
57 if (err)
58 break;
59
60 err = i915_gem_object_wait(obj,
61 I915_WAIT_ALL,
62 MAX_SCHEDULE_TIMEOUT);
63 if (err)
64 break;
65
66 t1 = ktime_get();
67 t[pass] = ktime_sub(t1, t0);
68 }
69 intel_engine_pm_put(engine);
70 if (err)
71 return err;
72
73 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
74 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
75 engine->name,
76 obj->base.size >> 10,
77 div64_u64(mul_u32_u32(4 * obj->base.size,
78 1000 * 1000 * 1000),
79 t[1] + 2 * t[2] + t[3]) >> 20);
80 } while (1);
81 }
82
perf_fill_blt(void * arg)83 static int perf_fill_blt(void *arg)
84 {
85 struct drm_i915_private *i915 = arg;
86 static const unsigned long sizes[] = {
87 SZ_4K,
88 SZ_64K,
89 SZ_2M,
90 SZ_64M
91 };
92 int i;
93
94 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
95 struct drm_i915_gem_object *obj;
96 int err;
97
98 obj = i915_gem_object_create_internal(i915, sizes[i]);
99 if (IS_ERR(obj))
100 return PTR_ERR(obj);
101
102 err = __perf_fill_blt(obj);
103 i915_gem_object_put(obj);
104 if (err)
105 return err;
106 }
107
108 return 0;
109 }
110
__perf_copy_blt(struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst)111 static int __perf_copy_blt(struct drm_i915_gem_object *src,
112 struct drm_i915_gem_object *dst)
113 {
114 struct drm_i915_private *i915 = to_i915(src->base.dev);
115 int inst = 0;
116
117 do {
118 struct intel_engine_cs *engine;
119 ktime_t t[5];
120 int pass;
121 int err = 0;
122
123 engine = intel_engine_lookup_user(i915,
124 I915_ENGINE_CLASS_COPY,
125 inst++);
126 if (!engine)
127 return 0;
128
129 intel_engine_pm_get(engine);
130 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
131 struct intel_context *ce = engine->kernel_context;
132 ktime_t t0, t1;
133
134 t0 = ktime_get();
135
136 err = i915_gem_object_copy_blt(src, dst, ce);
137 if (err)
138 break;
139
140 err = i915_gem_object_wait(dst,
141 I915_WAIT_ALL,
142 MAX_SCHEDULE_TIMEOUT);
143 if (err)
144 break;
145
146 t1 = ktime_get();
147 t[pass] = ktime_sub(t1, t0);
148 }
149 intel_engine_pm_put(engine);
150 if (err)
151 return err;
152
153 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
154 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
155 engine->name,
156 src->base.size >> 10,
157 div64_u64(mul_u32_u32(4 * src->base.size,
158 1000 * 1000 * 1000),
159 t[1] + 2 * t[2] + t[3]) >> 20);
160 } while (1);
161 }
162
perf_copy_blt(void * arg)163 static int perf_copy_blt(void *arg)
164 {
165 struct drm_i915_private *i915 = arg;
166 static const unsigned long sizes[] = {
167 SZ_4K,
168 SZ_64K,
169 SZ_2M,
170 SZ_64M
171 };
172 int i;
173
174 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
175 struct drm_i915_gem_object *src, *dst;
176 int err;
177
178 src = i915_gem_object_create_internal(i915, sizes[i]);
179 if (IS_ERR(src))
180 return PTR_ERR(src);
181
182 dst = i915_gem_object_create_internal(i915, sizes[i]);
183 if (IS_ERR(dst)) {
184 err = PTR_ERR(dst);
185 goto err_src;
186 }
187
188 err = __perf_copy_blt(src, dst);
189
190 i915_gem_object_put(dst);
191 err_src:
192 i915_gem_object_put(src);
193 if (err)
194 return err;
195 }
196
197 return 0;
198 }
199
200 struct igt_thread_arg {
201 struct drm_i915_private *i915;
202 struct i915_gem_context *ctx;
203 struct file *file;
204 struct rnd_state prng;
205 unsigned int n_cpus;
206 };
207
igt_fill_blt_thread(void * arg)208 static int igt_fill_blt_thread(void *arg)
209 {
210 struct igt_thread_arg *thread = arg;
211 struct drm_i915_private *i915 = thread->i915;
212 struct rnd_state *prng = &thread->prng;
213 struct drm_i915_gem_object *obj;
214 struct i915_gem_context *ctx;
215 struct intel_context *ce;
216 unsigned int prio;
217 IGT_TIMEOUT(end);
218 int err;
219
220 ctx = thread->ctx;
221 if (!ctx) {
222 ctx = live_context(i915, thread->file);
223 if (IS_ERR(ctx))
224 return PTR_ERR(ctx);
225
226 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
227 ctx->sched.priority = I915_USER_PRIORITY(prio);
228 }
229
230 ce = i915_gem_context_get_engine(ctx, BCS0);
231 GEM_BUG_ON(IS_ERR(ce));
232
233 do {
234 const u32 max_block_size = S16_MAX * PAGE_SIZE;
235 u32 val = prandom_u32_state(prng);
236 u64 total = ce->vm->total;
237 u32 phys_sz;
238 u32 sz;
239 u32 *vaddr;
240 u32 i;
241
242 /*
243 * If we have a tiny shared address space, like for the GGTT
244 * then we can't be too greedy.
245 */
246 if (i915_is_ggtt(ce->vm))
247 total = div64_u64(total, thread->n_cpus);
248
249 sz = min_t(u64, total >> 4, prandom_u32_state(prng));
250 phys_sz = sz % (max_block_size + 1);
251
252 sz = round_up(sz, PAGE_SIZE);
253 phys_sz = round_up(phys_sz, PAGE_SIZE);
254
255 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
256 phys_sz, sz, val);
257
258 obj = huge_gem_object(i915, phys_sz, sz);
259 if (IS_ERR(obj)) {
260 err = PTR_ERR(obj);
261 goto err_flush;
262 }
263
264 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
265 if (IS_ERR(vaddr)) {
266 err = PTR_ERR(vaddr);
267 goto err_put;
268 }
269
270 /*
271 * Make sure the potentially async clflush does its job, if
272 * required.
273 */
274 memset32(vaddr, val ^ 0xdeadbeaf,
275 huge_gem_object_phys_size(obj) / sizeof(u32));
276
277 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
278 obj->cache_dirty = true;
279
280 err = i915_gem_object_fill_blt(obj, ce, val);
281 if (err)
282 goto err_unpin;
283
284 i915_gem_object_lock(obj);
285 err = i915_gem_object_set_to_cpu_domain(obj, false);
286 i915_gem_object_unlock(obj);
287 if (err)
288 goto err_unpin;
289
290 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
291 if (vaddr[i] != val) {
292 pr_err("vaddr[%u]=%x, expected=%x\n", i,
293 vaddr[i], val);
294 err = -EINVAL;
295 goto err_unpin;
296 }
297 }
298
299 i915_gem_object_unpin_map(obj);
300 i915_gem_object_put(obj);
301 } while (!time_after(jiffies, end));
302
303 goto err_flush;
304
305 err_unpin:
306 i915_gem_object_unpin_map(obj);
307 err_put:
308 i915_gem_object_put(obj);
309 err_flush:
310 if (err == -ENOMEM)
311 err = 0;
312
313 intel_context_put(ce);
314 return err;
315 }
316
igt_copy_blt_thread(void * arg)317 static int igt_copy_blt_thread(void *arg)
318 {
319 struct igt_thread_arg *thread = arg;
320 struct drm_i915_private *i915 = thread->i915;
321 struct rnd_state *prng = &thread->prng;
322 struct drm_i915_gem_object *src, *dst;
323 struct i915_gem_context *ctx;
324 struct intel_context *ce;
325 unsigned int prio;
326 IGT_TIMEOUT(end);
327 int err;
328
329 ctx = thread->ctx;
330 if (!ctx) {
331 ctx = live_context(i915, thread->file);
332 if (IS_ERR(ctx))
333 return PTR_ERR(ctx);
334
335 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
336 ctx->sched.priority = I915_USER_PRIORITY(prio);
337 }
338
339 ce = i915_gem_context_get_engine(ctx, BCS0);
340 GEM_BUG_ON(IS_ERR(ce));
341
342 do {
343 const u32 max_block_size = S16_MAX * PAGE_SIZE;
344 u32 val = prandom_u32_state(prng);
345 u64 total = ce->vm->total;
346 u32 phys_sz;
347 u32 sz;
348 u32 *vaddr;
349 u32 i;
350
351 if (i915_is_ggtt(ce->vm))
352 total = div64_u64(total, thread->n_cpus);
353
354 sz = min_t(u64, total >> 4, prandom_u32_state(prng));
355 phys_sz = sz % (max_block_size + 1);
356
357 sz = round_up(sz, PAGE_SIZE);
358 phys_sz = round_up(phys_sz, PAGE_SIZE);
359
360 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
361 phys_sz, sz, val);
362
363 src = huge_gem_object(i915, phys_sz, sz);
364 if (IS_ERR(src)) {
365 err = PTR_ERR(src);
366 goto err_flush;
367 }
368
369 vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
370 if (IS_ERR(vaddr)) {
371 err = PTR_ERR(vaddr);
372 goto err_put_src;
373 }
374
375 memset32(vaddr, val,
376 huge_gem_object_phys_size(src) / sizeof(u32));
377
378 i915_gem_object_unpin_map(src);
379
380 if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
381 src->cache_dirty = true;
382
383 dst = huge_gem_object(i915, phys_sz, sz);
384 if (IS_ERR(dst)) {
385 err = PTR_ERR(dst);
386 goto err_put_src;
387 }
388
389 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
390 if (IS_ERR(vaddr)) {
391 err = PTR_ERR(vaddr);
392 goto err_put_dst;
393 }
394
395 memset32(vaddr, val ^ 0xdeadbeaf,
396 huge_gem_object_phys_size(dst) / sizeof(u32));
397
398 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
399 dst->cache_dirty = true;
400
401 err = i915_gem_object_copy_blt(src, dst, ce);
402 if (err)
403 goto err_unpin;
404
405 i915_gem_object_lock(dst);
406 err = i915_gem_object_set_to_cpu_domain(dst, false);
407 i915_gem_object_unlock(dst);
408 if (err)
409 goto err_unpin;
410
411 for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) {
412 if (vaddr[i] != val) {
413 pr_err("vaddr[%u]=%x, expected=%x\n", i,
414 vaddr[i], val);
415 err = -EINVAL;
416 goto err_unpin;
417 }
418 }
419
420 i915_gem_object_unpin_map(dst);
421
422 i915_gem_object_put(src);
423 i915_gem_object_put(dst);
424 } while (!time_after(jiffies, end));
425
426 goto err_flush;
427
428 err_unpin:
429 i915_gem_object_unpin_map(dst);
430 err_put_dst:
431 i915_gem_object_put(dst);
432 err_put_src:
433 i915_gem_object_put(src);
434 err_flush:
435 if (err == -ENOMEM)
436 err = 0;
437
438 intel_context_put(ce);
439 return err;
440 }
441
igt_threaded_blt(struct drm_i915_private * i915,int (* blt_fn)(void * arg),unsigned int flags)442 static int igt_threaded_blt(struct drm_i915_private *i915,
443 int (*blt_fn)(void *arg),
444 unsigned int flags)
445 #define SINGLE_CTX BIT(0)
446 {
447 struct igt_thread_arg *thread;
448 struct task_struct **tsk;
449 unsigned int n_cpus, i;
450 I915_RND_STATE(prng);
451 int err = 0;
452
453 n_cpus = num_online_cpus() + 1;
454
455 tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
456 if (!tsk)
457 return 0;
458
459 thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
460 if (!thread)
461 goto out_tsk;
462
463 thread[0].file = mock_file(i915);
464 if (IS_ERR(thread[0].file)) {
465 err = PTR_ERR(thread[0].file);
466 goto out_thread;
467 }
468
469 if (flags & SINGLE_CTX) {
470 thread[0].ctx = live_context(i915, thread[0].file);
471 if (IS_ERR(thread[0].ctx)) {
472 err = PTR_ERR(thread[0].ctx);
473 goto out_file;
474 }
475 }
476
477 for (i = 0; i < n_cpus; ++i) {
478 thread[i].i915 = i915;
479 thread[i].file = thread[0].file;
480 thread[i].ctx = thread[0].ctx;
481 thread[i].n_cpus = n_cpus;
482 thread[i].prng =
483 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
484
485 tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
486 if (IS_ERR(tsk[i])) {
487 err = PTR_ERR(tsk[i]);
488 break;
489 }
490
491 get_task_struct(tsk[i]);
492 }
493
494 yield(); /* start all threads before we kthread_stop() */
495
496 for (i = 0; i < n_cpus; ++i) {
497 int status;
498
499 if (IS_ERR_OR_NULL(tsk[i]))
500 continue;
501
502 status = kthread_stop(tsk[i]);
503 if (status && !err)
504 err = status;
505
506 put_task_struct(tsk[i]);
507 }
508
509 out_file:
510 fput(thread[0].file);
511 out_thread:
512 kfree(thread);
513 out_tsk:
514 kfree(tsk);
515 return err;
516 }
517
igt_fill_blt(void * arg)518 static int igt_fill_blt(void *arg)
519 {
520 return igt_threaded_blt(arg, igt_fill_blt_thread, 0);
521 }
522
igt_fill_blt_ctx0(void * arg)523 static int igt_fill_blt_ctx0(void *arg)
524 {
525 return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX);
526 }
527
igt_copy_blt(void * arg)528 static int igt_copy_blt(void *arg)
529 {
530 return igt_threaded_blt(arg, igt_copy_blt_thread, 0);
531 }
532
igt_copy_blt_ctx0(void * arg)533 static int igt_copy_blt_ctx0(void *arg)
534 {
535 return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX);
536 }
537
i915_gem_object_blt_live_selftests(struct drm_i915_private * i915)538 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
539 {
540 static const struct i915_subtest tests[] = {
541 SUBTEST(igt_fill_blt),
542 SUBTEST(igt_fill_blt_ctx0),
543 SUBTEST(igt_copy_blt),
544 SUBTEST(igt_copy_blt_ctx0),
545 };
546
547 if (intel_gt_is_wedged(&i915->gt))
548 return 0;
549
550 if (!HAS_ENGINE(i915, BCS0))
551 return 0;
552
553 return i915_live_subtests(tests, i915);
554 }
555
i915_gem_object_blt_perf_selftests(struct drm_i915_private * i915)556 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
557 {
558 static const struct i915_subtest tests[] = {
559 SUBTEST(perf_fill_blt),
560 SUBTEST(perf_copy_blt),
561 };
562
563 if (intel_gt_is_wedged(&i915->gt))
564 return 0;
565
566 return i915_live_subtests(tests, i915);
567 }
568