1 /* $NetBSD: i915_request.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $ */ 2 3 /* 4 * Copyright © 2016 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 * 25 */ 26 27 #include <sys/cdefs.h> 28 __KERNEL_RCSID(0, "$NetBSD: i915_request.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $"); 29 30 #include <linux/prime_numbers.h> 31 32 #include "gem/i915_gem_pm.h" 33 #include "gem/selftests/mock_context.h" 34 35 #include "gt/intel_engine_pm.h" 36 #include "gt/intel_gt.h" 37 38 #include "i915_random.h" 39 #include "i915_selftest.h" 40 #include "igt_live_test.h" 41 #include "igt_spinner.h" 42 #include "lib_sw_fence.h" 43 44 #include "mock_drm.h" 45 #include "mock_gem_device.h" 46 47 static unsigned int num_uabi_engines(struct drm_i915_private *i915) 48 { 49 struct intel_engine_cs *engine; 50 unsigned int count; 51 52 count = 0; 53 for_each_uabi_engine(engine, i915) 54 count++; 55 56 return count; 57 } 58 59 static int igt_add_request(void *arg) 60 { 61 struct drm_i915_private *i915 = arg; 62 struct i915_request *request; 63 64 /* Basic preliminary test to create a request and let it loose! */ 65 66 request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); 67 if (!request) 68 return -ENOMEM; 69 70 i915_request_add(request); 71 72 return 0; 73 } 74 75 static int igt_wait_request(void *arg) 76 { 77 const long T = HZ / 4; 78 struct drm_i915_private *i915 = arg; 79 struct i915_request *request; 80 int err = -EINVAL; 81 82 /* Submit a request, then wait upon it */ 83 84 request = mock_request(i915->engine[RCS0]->kernel_context, T); 85 if (!request) 86 return -ENOMEM; 87 88 i915_request_get(request); 89 90 if (i915_request_wait(request, 0, 0) != -ETIME) { 91 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 92 goto out_request; 93 } 94 95 if (i915_request_wait(request, 0, T) != -ETIME) { 96 pr_err("request wait succeeded (expected timeout before submit!)\n"); 97 goto out_request; 98 } 99 100 if (i915_request_completed(request)) { 101 pr_err("request completed before submit!!\n"); 102 goto out_request; 103 } 104 105 i915_request_add(request); 106 107 if (i915_request_wait(request, 0, 0) != -ETIME) { 108 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 109 goto out_request; 110 } 111 112 if (i915_request_completed(request)) { 113 pr_err("request completed immediately!\n"); 114 goto out_request; 115 } 116 117 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 118 pr_err("request wait succeeded (expected timeout!)\n"); 119 goto out_request; 120 } 121 122 if (i915_request_wait(request, 0, T) == -ETIME) { 123 pr_err("request wait timed out!\n"); 124 goto out_request; 125 } 126 127 if (!i915_request_completed(request)) { 128 pr_err("request not complete after waiting!\n"); 129 goto out_request; 130 } 131 132 if (i915_request_wait(request, 0, T) == -ETIME) { 133 pr_err("request wait timed out when already complete!\n"); 134 goto out_request; 135 } 136 137 err = 0; 138 out_request: 139 i915_request_put(request); 140 mock_device_flush(i915); 141 return err; 142 } 143 144 static int igt_fence_wait(void *arg) 145 { 146 const long T = HZ / 4; 147 struct drm_i915_private *i915 = arg; 148 struct i915_request *request; 149 int err = -EINVAL; 150 151 /* Submit a request, treat it as a fence and wait upon it */ 152 153 request = mock_request(i915->engine[RCS0]->kernel_context, T); 154 if (!request) 155 return -ENOMEM; 156 157 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 158 pr_err("fence wait success before submit (expected timeout)!\n"); 159 goto out; 160 } 161 162 i915_request_add(request); 163 164 if (dma_fence_is_signaled(&request->fence)) { 165 pr_err("fence signaled immediately!\n"); 166 goto out; 167 } 168 169 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 170 pr_err("fence wait success after submit (expected timeout)!\n"); 171 goto out; 172 } 173 174 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 175 pr_err("fence wait timed out (expected success)!\n"); 176 goto out; 177 } 178 179 if (!dma_fence_is_signaled(&request->fence)) { 180 pr_err("fence unsignaled after waiting!\n"); 181 goto out; 182 } 183 184 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 185 pr_err("fence wait timed out when complete (expected success)!\n"); 186 goto out; 187 } 188 189 err = 0; 190 out: 191 mock_device_flush(i915); 192 return err; 193 } 194 195 static int igt_request_rewind(void *arg) 196 { 197 struct drm_i915_private *i915 = arg; 198 struct i915_request *request, *vip; 199 struct i915_gem_context *ctx[2]; 200 struct intel_context *ce; 201 int err = -EINVAL; 202 203 ctx[0] = mock_context(i915, "A"); 204 205 ce = i915_gem_context_get_engine(ctx[0], RCS0); 206 GEM_BUG_ON(IS_ERR(ce)); 207 request = mock_request(ce, 2 * HZ); 208 intel_context_put(ce); 209 if (!request) { 210 err = -ENOMEM; 211 goto err_context_0; 212 } 213 214 i915_request_get(request); 215 i915_request_add(request); 216 217 ctx[1] = mock_context(i915, "B"); 218 219 ce = i915_gem_context_get_engine(ctx[1], RCS0); 220 GEM_BUG_ON(IS_ERR(ce)); 221 vip = mock_request(ce, 0); 222 intel_context_put(ce); 223 if (!vip) { 224 err = -ENOMEM; 225 goto err_context_1; 226 } 227 228 /* Simulate preemption by manual reordering */ 229 if (!mock_cancel_request(request)) { 230 pr_err("failed to cancel request (already executed)!\n"); 231 i915_request_add(vip); 232 goto err_context_1; 233 } 234 i915_request_get(vip); 235 i915_request_add(vip); 236 rcu_read_lock(); 237 request->engine->submit_request(request); 238 rcu_read_unlock(); 239 240 241 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 242 pr_err("timed out waiting for high priority request\n"); 243 goto err; 244 } 245 246 if (i915_request_completed(request)) { 247 pr_err("low priority request already completed\n"); 248 goto err; 249 } 250 251 err = 0; 252 err: 253 i915_request_put(vip); 254 err_context_1: 255 mock_context_close(ctx[1]); 256 i915_request_put(request); 257 err_context_0: 258 mock_context_close(ctx[0]); 259 mock_device_flush(i915); 260 return err; 261 } 262 263 struct smoketest { 264 struct intel_engine_cs *engine; 265 struct i915_gem_context **contexts; 266 atomic_long_t num_waits, num_fences; 267 int ncontexts, max_batch; 268 struct i915_request *(*request_alloc)(struct intel_context *ce); 269 }; 270 271 static struct i915_request * 272 __mock_request_alloc(struct intel_context *ce) 273 { 274 return mock_request(ce, 0); 275 } 276 277 static struct i915_request * 278 __live_request_alloc(struct intel_context *ce) 279 { 280 return intel_context_create_request(ce); 281 } 282 283 static int __igt_breadcrumbs_smoketest(void *arg) 284 { 285 struct smoketest *t = arg; 286 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 287 const unsigned int total = 4 * t->ncontexts + 1; 288 unsigned int num_waits = 0, num_fences = 0; 289 struct i915_request **requests; 290 I915_RND_STATE(prng); 291 unsigned int *order; 292 int err = 0; 293 294 /* 295 * A very simple test to catch the most egregious of list handling bugs. 296 * 297 * At its heart, we simply create oodles of requests running across 298 * multiple kthreads and enable signaling on them, for the sole purpose 299 * of stressing our breadcrumb handling. The only inspection we do is 300 * that the fences were marked as signaled. 301 */ 302 303 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); 304 if (!requests) 305 return -ENOMEM; 306 307 order = i915_random_order(total, &prng); 308 if (!order) { 309 err = -ENOMEM; 310 goto out_requests; 311 } 312 313 while (!kthread_should_stop()) { 314 struct i915_sw_fence *submit, *wait; 315 unsigned int n, count; 316 317 submit = heap_fence_create(GFP_KERNEL); 318 if (!submit) { 319 err = -ENOMEM; 320 break; 321 } 322 323 wait = heap_fence_create(GFP_KERNEL); 324 if (!wait) { 325 i915_sw_fence_commit(submit); 326 heap_fence_put(submit); 327 err = ENOMEM; 328 break; 329 } 330 331 i915_random_reorder(order, total, &prng); 332 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 333 334 for (n = 0; n < count; n++) { 335 struct i915_gem_context *ctx = 336 t->contexts[order[n] % t->ncontexts]; 337 struct i915_request *rq; 338 struct intel_context *ce; 339 340 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); 341 GEM_BUG_ON(IS_ERR(ce)); 342 rq = t->request_alloc(ce); 343 intel_context_put(ce); 344 if (IS_ERR(rq)) { 345 err = PTR_ERR(rq); 346 count = n; 347 break; 348 } 349 350 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 351 submit, 352 GFP_KERNEL); 353 354 requests[n] = i915_request_get(rq); 355 i915_request_add(rq); 356 357 if (err >= 0) 358 err = i915_sw_fence_await_dma_fence(wait, 359 &rq->fence, 360 0, 361 GFP_KERNEL); 362 363 if (err < 0) { 364 i915_request_put(rq); 365 count = n; 366 break; 367 } 368 } 369 370 i915_sw_fence_commit(submit); 371 i915_sw_fence_commit(wait); 372 373 if (!wait_event_timeout(wait->wait, 374 i915_sw_fence_done(wait), 375 5 * HZ)) { 376 struct i915_request *rq = requests[count - 1]; 377 378 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 379 atomic_read(&wait->pending), count, 380 rq->fence.context, rq->fence.seqno, 381 t->engine->name); 382 GEM_TRACE_DUMP(); 383 384 intel_gt_set_wedged(t->engine->gt); 385 GEM_BUG_ON(!i915_request_completed(rq)); 386 i915_sw_fence_wait(wait); 387 err = -EIO; 388 } 389 390 for (n = 0; n < count; n++) { 391 struct i915_request *rq = requests[n]; 392 393 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 394 &rq->fence.flags)) { 395 pr_err("%llu:%llu was not signaled!\n", 396 rq->fence.context, rq->fence.seqno); 397 err = -EINVAL; 398 } 399 400 i915_request_put(rq); 401 } 402 403 heap_fence_put(wait); 404 heap_fence_put(submit); 405 406 if (err < 0) 407 break; 408 409 num_fences += count; 410 num_waits++; 411 412 cond_resched(); 413 } 414 415 atomic_long_add(num_fences, &t->num_fences); 416 atomic_long_add(num_waits, &t->num_waits); 417 418 kfree(order); 419 out_requests: 420 kfree(requests); 421 return err; 422 } 423 424 static int mock_breadcrumbs_smoketest(void *arg) 425 { 426 struct drm_i915_private *i915 = arg; 427 struct smoketest t = { 428 .engine = i915->engine[RCS0], 429 .ncontexts = 1024, 430 .max_batch = 1024, 431 .request_alloc = __mock_request_alloc 432 }; 433 unsigned int ncpus = num_online_cpus(); 434 struct task_struct **threads; 435 unsigned int n; 436 int ret = 0; 437 438 /* 439 * Smoketest our breadcrumb/signal handling for requests across multiple 440 * threads. A very simple test to only catch the most egregious of bugs. 441 * See __igt_breadcrumbs_smoketest(); 442 */ 443 444 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); 445 if (!threads) 446 return -ENOMEM; 447 448 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 449 if (!t.contexts) { 450 ret = -ENOMEM; 451 goto out_threads; 452 } 453 454 for (n = 0; n < t.ncontexts; n++) { 455 t.contexts[n] = mock_context(t.engine->i915, "mock"); 456 if (!t.contexts[n]) { 457 ret = -ENOMEM; 458 goto out_contexts; 459 } 460 } 461 462 for (n = 0; n < ncpus; n++) { 463 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 464 &t, "igt/%d", n); 465 if (IS_ERR(threads[n])) { 466 ret = PTR_ERR(threads[n]); 467 ncpus = n; 468 break; 469 } 470 471 get_task_struct(threads[n]); 472 } 473 474 yield(); /* start all threads before we begin */ 475 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 476 477 for (n = 0; n < ncpus; n++) { 478 int err; 479 480 err = kthread_stop(threads[n]); 481 if (err < 0 && !ret) 482 ret = err; 483 484 put_task_struct(threads[n]); 485 } 486 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 487 atomic_long_read(&t.num_waits), 488 atomic_long_read(&t.num_fences), 489 ncpus); 490 491 out_contexts: 492 for (n = 0; n < t.ncontexts; n++) { 493 if (!t.contexts[n]) 494 break; 495 mock_context_close(t.contexts[n]); 496 } 497 kfree(t.contexts); 498 out_threads: 499 kfree(threads); 500 return ret; 501 } 502 503 int i915_request_mock_selftests(void) 504 { 505 static const struct i915_subtest tests[] = { 506 SUBTEST(igt_add_request), 507 SUBTEST(igt_wait_request), 508 SUBTEST(igt_fence_wait), 509 SUBTEST(igt_request_rewind), 510 SUBTEST(mock_breadcrumbs_smoketest), 511 }; 512 struct drm_i915_private *i915; 513 intel_wakeref_t wakeref; 514 int err = 0; 515 516 i915 = mock_gem_device(); 517 if (!i915) 518 return -ENOMEM; 519 520 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 521 err = i915_subtests(tests, i915); 522 523 drm_dev_put(&i915->drm); 524 525 return err; 526 } 527 528 static int live_nop_request(void *arg) 529 { 530 struct drm_i915_private *i915 = arg; 531 struct intel_engine_cs *engine; 532 struct igt_live_test t; 533 int err = -ENODEV; 534 535 /* 536 * Submit various sized batches of empty requests, to each engine 537 * (individually), and wait for the batch to complete. We can check 538 * the overhead of submitting requests to the hardware. 539 */ 540 541 for_each_uabi_engine(engine, i915) { 542 unsigned long n, prime; 543 IGT_TIMEOUT(end_time); 544 ktime_t times[2] = {}; 545 546 err = igt_live_test_begin(&t, i915, __func__, engine->name); 547 if (err) 548 return err; 549 550 intel_engine_pm_get(engine); 551 for_each_prime_number_from(prime, 1, 8192) { 552 struct i915_request *request = NULL; 553 554 times[1] = ktime_get_raw(); 555 556 for (n = 0; n < prime; n++) { 557 i915_request_put(request); 558 request = i915_request_create(engine->kernel_context); 559 if (IS_ERR(request)) 560 return PTR_ERR(request); 561 562 /* 563 * This space is left intentionally blank. 564 * 565 * We do not actually want to perform any 566 * action with this request, we just want 567 * to measure the latency in allocation 568 * and submission of our breadcrumbs - 569 * ensuring that the bare request is sufficient 570 * for the system to work (i.e. proper HEAD 571 * tracking of the rings, interrupt handling, 572 * etc). It also gives us the lowest bounds 573 * for latency. 574 */ 575 576 i915_request_get(request); 577 i915_request_add(request); 578 } 579 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 580 i915_request_put(request); 581 582 times[1] = ktime_sub(ktime_get_raw(), times[1]); 583 if (prime == 1) 584 times[0] = times[1]; 585 586 if (__igt_timeout(end_time, NULL)) 587 break; 588 } 589 intel_engine_pm_put(engine); 590 591 err = igt_live_test_end(&t); 592 if (err) 593 return err; 594 595 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 596 engine->name, 597 ktime_to_ns(times[0]), 598 prime, div64_u64(ktime_to_ns(times[1]), prime)); 599 } 600 601 return err; 602 } 603 604 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 605 { 606 struct drm_i915_gem_object *obj; 607 struct i915_vma *vma; 608 u32 *cmd; 609 int err; 610 611 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 612 if (IS_ERR(obj)) 613 return ERR_CAST(obj); 614 615 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 616 if (IS_ERR(cmd)) { 617 err = PTR_ERR(cmd); 618 goto err; 619 } 620 621 *cmd = MI_BATCH_BUFFER_END; 622 623 __i915_gem_object_flush_map(obj, 0, 64); 624 i915_gem_object_unpin_map(obj); 625 626 intel_gt_chipset_flush(&i915->gt); 627 628 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 629 if (IS_ERR(vma)) { 630 err = PTR_ERR(vma); 631 goto err; 632 } 633 634 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 635 if (err) 636 goto err; 637 638 /* Force the wait wait now to avoid including it in the benchmark */ 639 err = i915_vma_sync(vma); 640 if (err) 641 goto err_pin; 642 643 return vma; 644 645 err_pin: 646 i915_vma_unpin(vma); 647 err: 648 i915_gem_object_put(obj); 649 return ERR_PTR(err); 650 } 651 652 static struct i915_request * 653 empty_request(struct intel_engine_cs *engine, 654 struct i915_vma *batch) 655 { 656 struct i915_request *request; 657 int err; 658 659 request = i915_request_create(engine->kernel_context); 660 if (IS_ERR(request)) 661 return request; 662 663 err = engine->emit_bb_start(request, 664 batch->node.start, 665 batch->node.size, 666 I915_DISPATCH_SECURE); 667 if (err) 668 goto out_request; 669 670 i915_request_get(request); 671 out_request: 672 i915_request_add(request); 673 return err ? ERR_PTR(err) : request; 674 } 675 676 static int live_empty_request(void *arg) 677 { 678 struct drm_i915_private *i915 = arg; 679 struct intel_engine_cs *engine; 680 struct igt_live_test t; 681 struct i915_vma *batch; 682 int err = 0; 683 684 /* 685 * Submit various sized batches of empty requests, to each engine 686 * (individually), and wait for the batch to complete. We can check 687 * the overhead of submitting requests to the hardware. 688 */ 689 690 batch = empty_batch(i915); 691 if (IS_ERR(batch)) 692 return PTR_ERR(batch); 693 694 for_each_uabi_engine(engine, i915) { 695 IGT_TIMEOUT(end_time); 696 struct i915_request *request; 697 unsigned long n, prime; 698 ktime_t times[2] = {}; 699 700 err = igt_live_test_begin(&t, i915, __func__, engine->name); 701 if (err) 702 goto out_batch; 703 704 intel_engine_pm_get(engine); 705 706 /* Warmup / preload */ 707 request = empty_request(engine, batch); 708 if (IS_ERR(request)) { 709 err = PTR_ERR(request); 710 intel_engine_pm_put(engine); 711 goto out_batch; 712 } 713 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 714 715 for_each_prime_number_from(prime, 1, 8192) { 716 times[1] = ktime_get_raw(); 717 718 for (n = 0; n < prime; n++) { 719 i915_request_put(request); 720 request = empty_request(engine, batch); 721 if (IS_ERR(request)) { 722 err = PTR_ERR(request); 723 intel_engine_pm_put(engine); 724 goto out_batch; 725 } 726 } 727 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 728 729 times[1] = ktime_sub(ktime_get_raw(), times[1]); 730 if (prime == 1) 731 times[0] = times[1]; 732 733 if (__igt_timeout(end_time, NULL)) 734 break; 735 } 736 i915_request_put(request); 737 intel_engine_pm_put(engine); 738 739 err = igt_live_test_end(&t); 740 if (err) 741 goto out_batch; 742 743 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 744 engine->name, 745 ktime_to_ns(times[0]), 746 prime, div64_u64(ktime_to_ns(times[1]), prime)); 747 } 748 749 out_batch: 750 i915_vma_unpin(batch); 751 i915_vma_put(batch); 752 return err; 753 } 754 755 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 756 { 757 struct drm_i915_gem_object *obj; 758 const int gen = INTEL_GEN(i915); 759 struct i915_vma *vma; 760 u32 *cmd; 761 int err; 762 763 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 764 if (IS_ERR(obj)) 765 return ERR_CAST(obj); 766 767 vma = i915_vma_instance(obj, i915->gt.vm, NULL); 768 if (IS_ERR(vma)) { 769 err = PTR_ERR(vma); 770 goto err; 771 } 772 773 err = i915_vma_pin(vma, 0, 0, PIN_USER); 774 if (err) 775 goto err; 776 777 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 778 if (IS_ERR(cmd)) { 779 err = PTR_ERR(cmd); 780 goto err; 781 } 782 783 if (gen >= 8) { 784 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 785 *cmd++ = lower_32_bits(vma->node.start); 786 *cmd++ = upper_32_bits(vma->node.start); 787 } else if (gen >= 6) { 788 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 789 *cmd++ = lower_32_bits(vma->node.start); 790 } else { 791 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 792 *cmd++ = lower_32_bits(vma->node.start); 793 } 794 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 795 796 __i915_gem_object_flush_map(obj, 0, 64); 797 i915_gem_object_unpin_map(obj); 798 799 intel_gt_chipset_flush(&i915->gt); 800 801 return vma; 802 803 err: 804 i915_gem_object_put(obj); 805 return ERR_PTR(err); 806 } 807 808 static int recursive_batch_resolve(struct i915_vma *batch) 809 { 810 u32 *cmd; 811 812 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 813 if (IS_ERR(cmd)) 814 return PTR_ERR(cmd); 815 816 *cmd = MI_BATCH_BUFFER_END; 817 intel_gt_chipset_flush(batch->vm->gt); 818 819 i915_gem_object_unpin_map(batch->obj); 820 821 return 0; 822 } 823 824 static int live_all_engines(void *arg) 825 { 826 struct drm_i915_private *i915 = arg; 827 const unsigned int nengines = num_uabi_engines(i915); 828 struct intel_engine_cs *engine; 829 struct i915_request **request; 830 struct igt_live_test t; 831 struct i915_vma *batch; 832 unsigned int idx; 833 int err; 834 835 /* 836 * Check we can submit requests to all engines simultaneously. We 837 * send a recursive batch to each engine - checking that we don't 838 * block doing so, and that they don't complete too soon. 839 */ 840 841 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 842 if (!request) 843 return -ENOMEM; 844 845 err = igt_live_test_begin(&t, i915, __func__, ""); 846 if (err) 847 goto out_free; 848 849 batch = recursive_batch(i915); 850 if (IS_ERR(batch)) { 851 err = PTR_ERR(batch); 852 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 853 goto out_free; 854 } 855 856 idx = 0; 857 for_each_uabi_engine(engine, i915) { 858 request[idx] = intel_engine_create_kernel_request(engine); 859 if (IS_ERR(request[idx])) { 860 err = PTR_ERR(request[idx]); 861 pr_err("%s: Request allocation failed with err=%d\n", 862 __func__, err); 863 goto out_request; 864 } 865 866 err = engine->emit_bb_start(request[idx], 867 batch->node.start, 868 batch->node.size, 869 0); 870 GEM_BUG_ON(err); 871 request[idx]->batch = batch; 872 873 i915_vma_lock(batch); 874 err = i915_request_await_object(request[idx], batch->obj, 0); 875 if (err == 0) 876 err = i915_vma_move_to_active(batch, request[idx], 0); 877 i915_vma_unlock(batch); 878 GEM_BUG_ON(err); 879 880 i915_request_get(request[idx]); 881 i915_request_add(request[idx]); 882 idx++; 883 } 884 885 idx = 0; 886 for_each_uabi_engine(engine, i915) { 887 if (i915_request_completed(request[idx])) { 888 pr_err("%s(%s): request completed too early!\n", 889 __func__, engine->name); 890 err = -EINVAL; 891 goto out_request; 892 } 893 idx++; 894 } 895 896 err = recursive_batch_resolve(batch); 897 if (err) { 898 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 899 goto out_request; 900 } 901 902 idx = 0; 903 for_each_uabi_engine(engine, i915) { 904 long timeout; 905 906 timeout = i915_request_wait(request[idx], 0, 907 MAX_SCHEDULE_TIMEOUT); 908 if (timeout < 0) { 909 err = timeout; 910 pr_err("%s: error waiting for request on %s, err=%d\n", 911 __func__, engine->name, err); 912 goto out_request; 913 } 914 915 GEM_BUG_ON(!i915_request_completed(request[idx])); 916 i915_request_put(request[idx]); 917 request[idx] = NULL; 918 idx++; 919 } 920 921 err = igt_live_test_end(&t); 922 923 out_request: 924 idx = 0; 925 for_each_uabi_engine(engine, i915) { 926 if (request[idx]) 927 i915_request_put(request[idx]); 928 idx++; 929 } 930 i915_vma_unpin(batch); 931 i915_vma_put(batch); 932 out_free: 933 kfree(request); 934 return err; 935 } 936 937 static int live_sequential_engines(void *arg) 938 { 939 struct drm_i915_private *i915 = arg; 940 const unsigned int nengines = num_uabi_engines(i915); 941 struct i915_request **request; 942 struct i915_request *prev = NULL; 943 struct intel_engine_cs *engine; 944 struct igt_live_test t; 945 unsigned int idx; 946 int err; 947 948 /* 949 * Check we can submit requests to all engines sequentially, such 950 * that each successive request waits for the earlier ones. This 951 * tests that we don't execute requests out of order, even though 952 * they are running on independent engines. 953 */ 954 955 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 956 if (!request) 957 return -ENOMEM; 958 959 err = igt_live_test_begin(&t, i915, __func__, ""); 960 if (err) 961 goto out_free; 962 963 idx = 0; 964 for_each_uabi_engine(engine, i915) { 965 struct i915_vma *batch; 966 967 batch = recursive_batch(i915); 968 if (IS_ERR(batch)) { 969 err = PTR_ERR(batch); 970 pr_err("%s: Unable to create batch for %s, err=%d\n", 971 __func__, engine->name, err); 972 goto out_free; 973 } 974 975 request[idx] = intel_engine_create_kernel_request(engine); 976 if (IS_ERR(request[idx])) { 977 err = PTR_ERR(request[idx]); 978 pr_err("%s: Request allocation failed for %s with err=%d\n", 979 __func__, engine->name, err); 980 goto out_request; 981 } 982 983 if (prev) { 984 err = i915_request_await_dma_fence(request[idx], 985 &prev->fence); 986 if (err) { 987 i915_request_add(request[idx]); 988 pr_err("%s: Request await failed for %s with err=%d\n", 989 __func__, engine->name, err); 990 goto out_request; 991 } 992 } 993 994 err = engine->emit_bb_start(request[idx], 995 batch->node.start, 996 batch->node.size, 997 0); 998 GEM_BUG_ON(err); 999 request[idx]->batch = batch; 1000 1001 i915_vma_lock(batch); 1002 err = i915_request_await_object(request[idx], 1003 batch->obj, false); 1004 if (err == 0) 1005 err = i915_vma_move_to_active(batch, request[idx], 0); 1006 i915_vma_unlock(batch); 1007 GEM_BUG_ON(err); 1008 1009 i915_request_get(request[idx]); 1010 i915_request_add(request[idx]); 1011 1012 prev = request[idx]; 1013 idx++; 1014 } 1015 1016 idx = 0; 1017 for_each_uabi_engine(engine, i915) { 1018 long timeout; 1019 1020 if (i915_request_completed(request[idx])) { 1021 pr_err("%s(%s): request completed too early!\n", 1022 __func__, engine->name); 1023 err = -EINVAL; 1024 goto out_request; 1025 } 1026 1027 err = recursive_batch_resolve(request[idx]->batch); 1028 if (err) { 1029 pr_err("%s: failed to resolve batch, err=%d\n", 1030 __func__, err); 1031 goto out_request; 1032 } 1033 1034 timeout = i915_request_wait(request[idx], 0, 1035 MAX_SCHEDULE_TIMEOUT); 1036 if (timeout < 0) { 1037 err = timeout; 1038 pr_err("%s: error waiting for request on %s, err=%d\n", 1039 __func__, engine->name, err); 1040 goto out_request; 1041 } 1042 1043 GEM_BUG_ON(!i915_request_completed(request[idx])); 1044 idx++; 1045 } 1046 1047 err = igt_live_test_end(&t); 1048 1049 out_request: 1050 idx = 0; 1051 for_each_uabi_engine(engine, i915) { 1052 u32 *cmd; 1053 1054 if (!request[idx]) 1055 break; 1056 1057 cmd = i915_gem_object_pin_map(request[idx]->batch->obj, 1058 I915_MAP_WC); 1059 if (!IS_ERR(cmd)) { 1060 *cmd = MI_BATCH_BUFFER_END; 1061 intel_gt_chipset_flush(engine->gt); 1062 1063 i915_gem_object_unpin_map(request[idx]->batch->obj); 1064 } 1065 1066 i915_vma_put(request[idx]->batch); 1067 i915_request_put(request[idx]); 1068 idx++; 1069 } 1070 out_free: 1071 kfree(request); 1072 return err; 1073 } 1074 1075 static int __live_parallel_engine1(void *arg) 1076 { 1077 struct intel_engine_cs *engine = arg; 1078 IGT_TIMEOUT(end_time); 1079 unsigned long count; 1080 int err = 0; 1081 1082 count = 0; 1083 intel_engine_pm_get(engine); 1084 do { 1085 struct i915_request *rq; 1086 1087 rq = i915_request_create(engine->kernel_context); 1088 if (IS_ERR(rq)) { 1089 err = PTR_ERR(rq); 1090 break; 1091 } 1092 1093 i915_request_get(rq); 1094 i915_request_add(rq); 1095 1096 err = 0; 1097 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1098 err = -ETIME; 1099 i915_request_put(rq); 1100 if (err) 1101 break; 1102 1103 count++; 1104 } while (!__igt_timeout(end_time, NULL)); 1105 intel_engine_pm_put(engine); 1106 1107 pr_info("%s: %lu request + sync\n", engine->name, count); 1108 return err; 1109 } 1110 1111 static int __live_parallel_engineN(void *arg) 1112 { 1113 struct intel_engine_cs *engine = arg; 1114 IGT_TIMEOUT(end_time); 1115 unsigned long count; 1116 int err = 0; 1117 1118 count = 0; 1119 intel_engine_pm_get(engine); 1120 do { 1121 struct i915_request *rq; 1122 1123 rq = i915_request_create(engine->kernel_context); 1124 if (IS_ERR(rq)) { 1125 err = PTR_ERR(rq); 1126 break; 1127 } 1128 1129 i915_request_add(rq); 1130 count++; 1131 } while (!__igt_timeout(end_time, NULL)); 1132 intel_engine_pm_put(engine); 1133 1134 pr_info("%s: %lu requests\n", engine->name, count); 1135 return err; 1136 } 1137 1138 static bool wake_all(struct drm_i915_private *i915) 1139 { 1140 if (atomic_dec_and_test(&i915->selftest.counter)) { 1141 wake_up_var(&i915->selftest.counter); 1142 return true; 1143 } 1144 1145 return false; 1146 } 1147 1148 static int wait_for_all(struct drm_i915_private *i915) 1149 { 1150 if (wake_all(i915)) 1151 return 0; 1152 1153 if (wait_var_event_timeout(&i915->selftest.counter, 1154 !atomic_read(&i915->selftest.counter), 1155 i915_selftest.timeout_jiffies)) 1156 return 0; 1157 1158 return -ETIME; 1159 } 1160 1161 static int __live_parallel_spin(void *arg) 1162 { 1163 struct intel_engine_cs *engine = arg; 1164 struct igt_spinner spin; 1165 struct i915_request *rq; 1166 int err = 0; 1167 1168 /* 1169 * Create a spinner running for eternity on each engine. If a second 1170 * spinner is incorrectly placed on the same engine, it will not be 1171 * able to start in time. 1172 */ 1173 1174 if (igt_spinner_init(&spin, engine->gt)) { 1175 wake_all(engine->i915); 1176 return -ENOMEM; 1177 } 1178 1179 intel_engine_pm_get(engine); 1180 rq = igt_spinner_create_request(&spin, 1181 engine->kernel_context, 1182 MI_NOOP); /* no preemption */ 1183 intel_engine_pm_put(engine); 1184 if (IS_ERR(rq)) { 1185 err = PTR_ERR(rq); 1186 if (err == -ENODEV) 1187 err = 0; 1188 wake_all(engine->i915); 1189 goto out_spin; 1190 } 1191 1192 i915_request_get(rq); 1193 i915_request_add(rq); 1194 if (igt_wait_for_spinner(&spin, rq)) { 1195 /* Occupy this engine for the whole test */ 1196 err = wait_for_all(engine->i915); 1197 } else { 1198 pr_err("Failed to start spinner on %s\n", engine->name); 1199 err = -EINVAL; 1200 } 1201 igt_spinner_end(&spin); 1202 1203 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) 1204 err = -EIO; 1205 i915_request_put(rq); 1206 1207 out_spin: 1208 igt_spinner_fini(&spin); 1209 return err; 1210 } 1211 1212 static int live_parallel_engines(void *arg) 1213 { 1214 struct drm_i915_private *i915 = arg; 1215 static int (* const func[])(void *arg) = { 1216 __live_parallel_engine1, 1217 __live_parallel_engineN, 1218 __live_parallel_spin, 1219 NULL, 1220 }; 1221 const unsigned int nengines = num_uabi_engines(i915); 1222 struct intel_engine_cs *engine; 1223 int (* const *fn)(void *arg); 1224 struct task_struct **tsk; 1225 int err = 0; 1226 1227 /* 1228 * Check we can submit requests to all engines concurrently. This 1229 * tests that we load up the system maximally. 1230 */ 1231 1232 tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); 1233 if (!tsk) 1234 return -ENOMEM; 1235 1236 for (fn = func; !err && *fn; fn++) { 1237 char name[KSYM_NAME_LEN]; 1238 struct igt_live_test t; 1239 unsigned int idx; 1240 1241 snprintf(name, sizeof(name), "%pS", fn); 1242 err = igt_live_test_begin(&t, i915, __func__, name); 1243 if (err) 1244 break; 1245 1246 atomic_set(&i915->selftest.counter, nengines); 1247 1248 idx = 0; 1249 for_each_uabi_engine(engine, i915) { 1250 tsk[idx] = kthread_run(*fn, engine, 1251 "igt/parallel:%s", 1252 engine->name); 1253 if (IS_ERR(tsk[idx])) { 1254 err = PTR_ERR(tsk[idx]); 1255 break; 1256 } 1257 get_task_struct(tsk[idx++]); 1258 } 1259 1260 yield(); /* start all threads before we kthread_stop() */ 1261 1262 idx = 0; 1263 for_each_uabi_engine(engine, i915) { 1264 int status; 1265 1266 if (IS_ERR(tsk[idx])) 1267 break; 1268 1269 status = kthread_stop(tsk[idx]); 1270 if (status && !err) 1271 err = status; 1272 1273 put_task_struct(tsk[idx++]); 1274 } 1275 1276 if (igt_live_test_end(&t)) 1277 err = -EIO; 1278 } 1279 1280 kfree(tsk); 1281 return err; 1282 } 1283 1284 static int 1285 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1286 { 1287 struct i915_request *rq; 1288 int ret; 1289 1290 /* 1291 * Before execlists, all contexts share the same ringbuffer. With 1292 * execlists, each context/engine has a separate ringbuffer and 1293 * for the purposes of this test, inexhaustible. 1294 * 1295 * For the global ringbuffer though, we have to be very careful 1296 * that we do not wrap while preventing the execution of requests 1297 * with a unsignaled fence. 1298 */ 1299 if (HAS_EXECLISTS(ctx->i915)) 1300 return INT_MAX; 1301 1302 rq = igt_request_alloc(ctx, engine); 1303 if (IS_ERR(rq)) { 1304 ret = PTR_ERR(rq); 1305 } else { 1306 int sz; 1307 1308 ret = rq->ring->size - rq->reserved_space; 1309 i915_request_add(rq); 1310 1311 sz = rq->ring->emit - rq->head; 1312 if (sz < 0) 1313 sz += rq->ring->size; 1314 ret /= sz; 1315 ret /= 2; /* leave half spare, in case of emergency! */ 1316 } 1317 1318 return ret; 1319 } 1320 1321 static int live_breadcrumbs_smoketest(void *arg) 1322 { 1323 struct drm_i915_private *i915 = arg; 1324 const unsigned int nengines = num_uabi_engines(i915); 1325 const unsigned int ncpus = num_online_cpus(); 1326 unsigned long num_waits, num_fences; 1327 struct intel_engine_cs *engine; 1328 struct task_struct **threads; 1329 struct igt_live_test live; 1330 intel_wakeref_t wakeref; 1331 struct smoketest *smoke; 1332 unsigned int n, idx; 1333 struct file *file; 1334 int ret = 0; 1335 1336 /* 1337 * Smoketest our breadcrumb/signal handling for requests across multiple 1338 * threads. A very simple test to only catch the most egregious of bugs. 1339 * See __igt_breadcrumbs_smoketest(); 1340 * 1341 * On real hardware this time. 1342 */ 1343 1344 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1345 1346 file = mock_file(i915); 1347 if (IS_ERR(file)) { 1348 ret = PTR_ERR(file); 1349 goto out_rpm; 1350 } 1351 1352 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); 1353 if (!smoke) { 1354 ret = -ENOMEM; 1355 goto out_file; 1356 } 1357 1358 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); 1359 if (!threads) { 1360 ret = -ENOMEM; 1361 goto out_smoke; 1362 } 1363 1364 smoke[0].request_alloc = __live_request_alloc; 1365 smoke[0].ncontexts = 64; 1366 smoke[0].contexts = kcalloc(smoke[0].ncontexts, 1367 sizeof(*smoke[0].contexts), 1368 GFP_KERNEL); 1369 if (!smoke[0].contexts) { 1370 ret = -ENOMEM; 1371 goto out_threads; 1372 } 1373 1374 for (n = 0; n < smoke[0].ncontexts; n++) { 1375 smoke[0].contexts[n] = live_context(i915, file); 1376 if (!smoke[0].contexts[n]) { 1377 ret = -ENOMEM; 1378 goto out_contexts; 1379 } 1380 } 1381 1382 ret = igt_live_test_begin(&live, i915, __func__, ""); 1383 if (ret) 1384 goto out_contexts; 1385 1386 idx = 0; 1387 for_each_uabi_engine(engine, i915) { 1388 smoke[idx] = smoke[0]; 1389 smoke[idx].engine = engine; 1390 smoke[idx].max_batch = 1391 max_batches(smoke[0].contexts[0], engine); 1392 if (smoke[idx].max_batch < 0) { 1393 ret = smoke[idx].max_batch; 1394 goto out_flush; 1395 } 1396 /* One ring interleaved between requests from all cpus */ 1397 smoke[idx].max_batch /= num_online_cpus() + 1; 1398 pr_debug("Limiting batches to %d requests on %s\n", 1399 smoke[idx].max_batch, engine->name); 1400 1401 for (n = 0; n < ncpus; n++) { 1402 struct task_struct *tsk; 1403 1404 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1405 &smoke[idx], "igt/%d.%d", idx, n); 1406 if (IS_ERR(tsk)) { 1407 ret = PTR_ERR(tsk); 1408 goto out_flush; 1409 } 1410 1411 get_task_struct(tsk); 1412 threads[idx * ncpus + n] = tsk; 1413 } 1414 1415 idx++; 1416 } 1417 1418 yield(); /* start all threads before we begin */ 1419 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1420 1421 out_flush: 1422 idx = 0; 1423 num_waits = 0; 1424 num_fences = 0; 1425 for_each_uabi_engine(engine, i915) { 1426 for (n = 0; n < ncpus; n++) { 1427 struct task_struct *tsk = threads[idx * ncpus + n]; 1428 int err; 1429 1430 if (!tsk) 1431 continue; 1432 1433 err = kthread_stop(tsk); 1434 if (err < 0 && !ret) 1435 ret = err; 1436 1437 put_task_struct(tsk); 1438 } 1439 1440 num_waits += atomic_long_read(&smoke[idx].num_waits); 1441 num_fences += atomic_long_read(&smoke[idx].num_fences); 1442 idx++; 1443 } 1444 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1445 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1446 1447 ret = igt_live_test_end(&live) ?: ret; 1448 out_contexts: 1449 kfree(smoke[0].contexts); 1450 out_threads: 1451 kfree(threads); 1452 out_smoke: 1453 kfree(smoke); 1454 out_file: 1455 fput(file); 1456 out_rpm: 1457 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1458 1459 return ret; 1460 } 1461 1462 int i915_request_live_selftests(struct drm_i915_private *i915) 1463 { 1464 static const struct i915_subtest tests[] = { 1465 SUBTEST(live_nop_request), 1466 SUBTEST(live_all_engines), 1467 SUBTEST(live_sequential_engines), 1468 SUBTEST(live_parallel_engines), 1469 SUBTEST(live_empty_request), 1470 SUBTEST(live_breadcrumbs_smoketest), 1471 }; 1472 1473 if (intel_gt_is_wedged(&i915->gt)) 1474 return 0; 1475 1476 return i915_subtests(tests, i915); 1477 } 1478