1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prefetch.h> 26 #include <linux/dma-fence-array.h> 27 28 #include "i915_drv.h" 29 30 static const char *i915_fence_get_driver_name(struct dma_fence *fence) 31 { 32 return "i915"; 33 } 34 35 static const char *i915_fence_get_timeline_name(struct dma_fence *fence) 36 { 37 return to_request(fence)->timeline->common->name; 38 } 39 40 static bool i915_fence_signaled(struct dma_fence *fence) 41 { 42 return i915_gem_request_completed(to_request(fence)); 43 } 44 45 static bool i915_fence_enable_signaling(struct dma_fence *fence) 46 { 47 if (i915_fence_signaled(fence)) 48 return false; 49 50 intel_engine_enable_signaling(to_request(fence)); 51 return true; 52 } 53 54 static signed long i915_fence_wait(struct dma_fence *fence, 55 bool interruptible, 56 signed long timeout) 57 { 58 return i915_wait_request(to_request(fence), interruptible, timeout); 59 } 60 61 static void i915_fence_release(struct dma_fence *fence) 62 { 63 struct drm_i915_gem_request *req = to_request(fence); 64 65 kmem_cache_free(req->i915->requests, req); 66 } 67 68 const struct dma_fence_ops i915_fence_ops = { 69 .get_driver_name = i915_fence_get_driver_name, 70 .get_timeline_name = i915_fence_get_timeline_name, 71 .enable_signaling = i915_fence_enable_signaling, 72 .signaled = i915_fence_signaled, 73 .wait = i915_fence_wait, 74 .release = i915_fence_release, 75 }; 76 77 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 78 struct drm_file *file) 79 { 80 struct drm_i915_private *dev_private; 81 struct drm_i915_file_private *file_priv; 82 83 WARN_ON(!req || !file || req->file_priv); 84 85 if (!req || !file) 86 return -EINVAL; 87 88 if (req->file_priv) 89 return -EINVAL; 90 91 dev_private = req->i915; 92 file_priv = file->driver_priv; 93 94 lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE); 95 req->file_priv = file_priv; 96 list_add_tail(&req->client_list, &file_priv->mm.request_list); 97 lockmgr(&file_priv->mm.lock, LK_RELEASE); 98 99 return 0; 100 } 101 102 static inline void 103 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 104 { 105 struct drm_i915_file_private *file_priv = request->file_priv; 106 107 if (!file_priv) 108 return; 109 110 lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE); 111 list_del(&request->client_list); 112 request->file_priv = NULL; 113 lockmgr(&file_priv->mm.lock, LK_RELEASE); 114 } 115 116 static struct i915_dependency * 117 i915_dependency_alloc(struct drm_i915_private *i915) 118 { 119 return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); 120 } 121 122 static void 123 i915_dependency_free(struct drm_i915_private *i915, 124 struct i915_dependency *dep) 125 { 126 kmem_cache_free(i915->dependencies, dep); 127 } 128 129 static void 130 __i915_priotree_add_dependency(struct i915_priotree *pt, 131 struct i915_priotree *signal, 132 struct i915_dependency *dep, 133 unsigned long flags) 134 { 135 INIT_LIST_HEAD(&dep->dfs_link); 136 list_add(&dep->wait_link, &signal->waiters_list); 137 list_add(&dep->signal_link, &pt->signalers_list); 138 dep->signaler = signal; 139 dep->flags = flags; 140 } 141 142 static int 143 i915_priotree_add_dependency(struct drm_i915_private *i915, 144 struct i915_priotree *pt, 145 struct i915_priotree *signal) 146 { 147 struct i915_dependency *dep; 148 149 dep = i915_dependency_alloc(i915); 150 if (!dep) 151 return -ENOMEM; 152 153 __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); 154 return 0; 155 } 156 157 static void 158 i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) 159 { 160 struct i915_dependency *dep, *next; 161 162 GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node)); 163 164 /* Everyone we depended upon (the fences we wait to be signaled) 165 * should retire before us and remove themselves from our list. 166 * However, retirement is run independently on each timeline and 167 * so we may be called out-of-order. 168 */ 169 list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { 170 list_del(&dep->wait_link); 171 if (dep->flags & I915_DEPENDENCY_ALLOC) 172 i915_dependency_free(i915, dep); 173 } 174 175 /* Remove ourselves from everyone who depends upon us */ 176 list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { 177 list_del(&dep->signal_link); 178 if (dep->flags & I915_DEPENDENCY_ALLOC) 179 i915_dependency_free(i915, dep); 180 } 181 } 182 183 static void 184 i915_priotree_init(struct i915_priotree *pt) 185 { 186 INIT_LIST_HEAD(&pt->signalers_list); 187 INIT_LIST_HEAD(&pt->waiters_list); 188 RB_CLEAR_NODE(&pt->node); 189 pt->priority = INT_MIN; 190 } 191 192 void i915_gem_retire_noop(struct i915_gem_active *active, 193 struct drm_i915_gem_request *request) 194 { 195 /* Space left intentionally blank */ 196 } 197 198 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 199 { 200 struct i915_gem_active *active, *next; 201 202 lockdep_assert_held(&request->i915->drm.struct_mutex); 203 GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); 204 GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute)); 205 GEM_BUG_ON(!i915_gem_request_completed(request)); 206 GEM_BUG_ON(!request->i915->gt.active_requests); 207 208 trace_i915_gem_request_retire(request); 209 210 spin_lock_irq(&request->engine->timeline->lock); 211 list_del_init(&request->link); 212 spin_unlock_irq(&request->engine->timeline->lock); 213 214 /* We know the GPU must have read the request to have 215 * sent us the seqno + interrupt, so use the position 216 * of tail of the request to update the last known position 217 * of the GPU head. 218 * 219 * Note this requires that we are always called in request 220 * completion order. 221 */ 222 list_del(&request->ring_link); 223 request->ring->last_retired_head = request->postfix; 224 if (!--request->i915->gt.active_requests) { 225 GEM_BUG_ON(!request->i915->gt.awake); 226 mod_delayed_work(request->i915->wq, 227 &request->i915->gt.idle_work, 228 msecs_to_jiffies(100)); 229 } 230 231 /* Walk through the active list, calling retire on each. This allows 232 * objects to track their GPU activity and mark themselves as idle 233 * when their *last* active request is completed (updating state 234 * tracking lists for eviction, active references for GEM, etc). 235 * 236 * As the ->retire() may free the node, we decouple it first and 237 * pass along the auxiliary information (to avoid dereferencing 238 * the node after the callback). 239 */ 240 list_for_each_entry_safe(active, next, &request->active_list, link) { 241 /* In microbenchmarks or focusing upon time inside the kernel, 242 * we may spend an inordinate amount of time simply handling 243 * the retirement of requests and processing their callbacks. 244 * Of which, this loop itself is particularly hot due to the 245 * cache misses when jumping around the list of i915_gem_active. 246 * So we try to keep this loop as streamlined as possible and 247 * also prefetch the next i915_gem_active to try and hide 248 * the likely cache miss. 249 */ 250 prefetchw(next); 251 252 INIT_LIST_HEAD(&active->link); 253 RCU_INIT_POINTER(active->request, NULL); 254 255 active->retire(active, request); 256 } 257 258 i915_gem_request_remove_from_client(request); 259 260 if (request->previous_context) { 261 if (i915.enable_execlists) 262 intel_lr_context_unpin(request->previous_context, 263 request->engine); 264 } 265 266 i915_gem_context_put(request->ctx); 267 268 dma_fence_signal(&request->fence); 269 270 i915_priotree_fini(request->i915, &request->priotree); 271 i915_gem_request_put(request); 272 } 273 274 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) 275 { 276 struct intel_engine_cs *engine = req->engine; 277 struct drm_i915_gem_request *tmp; 278 279 lockdep_assert_held(&req->i915->drm.struct_mutex); 280 if (list_empty(&req->link)) 281 return; 282 283 do { 284 tmp = list_first_entry(&engine->timeline->requests, 285 typeof(*tmp), link); 286 287 i915_gem_request_retire(tmp); 288 } while (tmp != req); 289 } 290 291 static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) 292 { 293 struct i915_gpu_error *error = &dev_priv->gpu_error; 294 295 if (i915_terminally_wedged(error)) 296 return -EIO; 297 298 if (i915_reset_in_progress(error)) { 299 /* Non-interruptible callers can't handle -EAGAIN, hence return 300 * -EIO unconditionally for these. 301 */ 302 if (!dev_priv->mm.interruptible) 303 return -EIO; 304 305 return -EAGAIN; 306 } 307 308 return 0; 309 } 310 311 static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) 312 { 313 struct i915_gem_timeline *timeline = &i915->gt.global_timeline; 314 struct intel_engine_cs *engine; 315 enum intel_engine_id id; 316 int ret; 317 318 /* Carefully retire all requests without writing to the rings */ 319 ret = i915_gem_wait_for_idle(i915, 320 I915_WAIT_INTERRUPTIBLE | 321 I915_WAIT_LOCKED); 322 if (ret) 323 return ret; 324 325 i915_gem_retire_requests(i915); 326 GEM_BUG_ON(i915->gt.active_requests > 1); 327 328 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ 329 if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) { 330 while (intel_breadcrumbs_busy(i915)) 331 cond_resched(); /* spin until threads are complete */ 332 } 333 atomic_set(&timeline->next_seqno, seqno); 334 335 /* Finally reset hw state */ 336 for_each_engine(engine, i915, id) 337 intel_engine_init_global_seqno(engine, seqno); 338 339 list_for_each_entry(timeline, &i915->gt.timelines, link) { 340 for_each_engine(engine, i915, id) { 341 struct intel_timeline *tl = &timeline->engine[id]; 342 343 memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); 344 } 345 } 346 347 return 0; 348 } 349 350 int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) 351 { 352 struct drm_i915_private *dev_priv = to_i915(dev); 353 354 lockdep_assert_held(&dev_priv->drm.struct_mutex); 355 356 if (seqno == 0) 357 return -EINVAL; 358 359 /* HWS page needs to be set less than what we 360 * will inject to ring 361 */ 362 return i915_gem_init_global_seqno(dev_priv, seqno - 1); 363 } 364 365 static int reserve_global_seqno(struct drm_i915_private *i915) 366 { 367 u32 active_requests = ++i915->gt.active_requests; 368 u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno); 369 int ret; 370 371 /* Reservation is fine until we need to wrap around */ 372 if (likely(next_seqno + active_requests > next_seqno)) 373 return 0; 374 375 ret = i915_gem_init_global_seqno(i915, 0); 376 if (ret) { 377 i915->gt.active_requests--; 378 return ret; 379 } 380 381 return 0; 382 } 383 384 static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) 385 { 386 /* next_seqno only incremented under a mutex */ 387 return ++tl->next_seqno.counter; 388 } 389 390 static u32 timeline_get_seqno(struct i915_gem_timeline *tl) 391 { 392 return atomic_inc_return(&tl->next_seqno); 393 } 394 395 void __i915_gem_request_submit(struct drm_i915_gem_request *request) 396 { 397 struct intel_engine_cs *engine = request->engine; 398 struct intel_timeline *timeline; 399 u32 seqno; 400 401 /* Transfer from per-context onto the global per-engine timeline */ 402 timeline = engine->timeline; 403 GEM_BUG_ON(timeline == request->timeline); 404 assert_spin_locked(&timeline->lock); 405 406 seqno = timeline_get_seqno(timeline->common); 407 GEM_BUG_ON(!seqno); 408 GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); 409 410 GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); 411 request->previous_seqno = timeline->last_submitted_seqno; 412 timeline->last_submitted_seqno = seqno; 413 414 /* We may be recursing from the signal callback of another i915 fence */ 415 lockmgr(&request->lock, LK_EXCLUSIVE); 416 request->global_seqno = seqno; 417 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) 418 intel_engine_enable_signaling(request); 419 lockmgr(&request->lock, LK_RELEASE); 420 421 GEM_BUG_ON(!request->global_seqno); 422 engine->emit_breadcrumb(request, 423 request->ring->vaddr + request->postfix); 424 425 lockmgr(&request->timeline->lock, LK_EXCLUSIVE); 426 list_move_tail(&request->link, &timeline->requests); 427 lockmgr(&request->timeline->lock, LK_RELEASE); 428 429 i915_sw_fence_commit(&request->execute); 430 } 431 432 void i915_gem_request_submit(struct drm_i915_gem_request *request) 433 { 434 struct intel_engine_cs *engine = request->engine; 435 unsigned long flags; 436 437 /* Will be called from irq-context when using foreign fences. */ 438 spin_lock_irqsave(&engine->timeline->lock, flags); 439 440 __i915_gem_request_submit(request); 441 442 spin_unlock_irqrestore(&engine->timeline->lock, flags); 443 } 444 445 static int __i915_sw_fence_call 446 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 447 { 448 struct drm_i915_gem_request *request = 449 container_of(fence, typeof(*request), submit); 450 451 switch (state) { 452 case FENCE_COMPLETE: 453 request->engine->submit_request(request); 454 break; 455 456 case FENCE_FREE: 457 i915_gem_request_put(request); 458 break; 459 } 460 461 return NOTIFY_DONE; 462 } 463 464 static int __i915_sw_fence_call 465 execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 466 { 467 struct drm_i915_gem_request *request = 468 container_of(fence, typeof(*request), execute); 469 470 switch (state) { 471 case FENCE_COMPLETE: 472 break; 473 474 case FENCE_FREE: 475 i915_gem_request_put(request); 476 break; 477 } 478 479 return NOTIFY_DONE; 480 } 481 482 /** 483 * i915_gem_request_alloc - allocate a request structure 484 * 485 * @engine: engine that we wish to issue the request on. 486 * @ctx: context that the request will be associated with. 487 * This can be NULL if the request is not directly related to 488 * any specific user context, in which case this function will 489 * choose an appropriate context to use. 490 * 491 * Returns a pointer to the allocated request if successful, 492 * or an error code if not. 493 */ 494 struct drm_i915_gem_request * 495 i915_gem_request_alloc(struct intel_engine_cs *engine, 496 struct i915_gem_context *ctx) 497 { 498 struct drm_i915_private *dev_priv = engine->i915; 499 struct drm_i915_gem_request *req; 500 int ret; 501 502 lockdep_assert_held(&dev_priv->drm.struct_mutex); 503 504 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 505 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex 506 * and restart. 507 */ 508 ret = i915_gem_check_wedge(dev_priv); 509 if (ret) 510 return ERR_PTR(ret); 511 512 ret = reserve_global_seqno(dev_priv); 513 if (ret) 514 return ERR_PTR(ret); 515 516 /* Move the oldest request to the slab-cache (if not in use!) */ 517 req = list_first_entry_or_null(&engine->timeline->requests, 518 typeof(*req), link); 519 if (req && __i915_gem_request_completed(req)) 520 i915_gem_request_retire(req); 521 522 /* Beware: Dragons be flying overhead. 523 * 524 * We use RCU to look up requests in flight. The lookups may 525 * race with the request being allocated from the slab freelist. 526 * That is the request we are writing to here, may be in the process 527 * of being read by __i915_gem_active_get_rcu(). As such, 528 * we have to be very careful when overwriting the contents. During 529 * the RCU lookup, we change chase the request->engine pointer, 530 * read the request->global_seqno and increment the reference count. 531 * 532 * The reference count is incremented atomically. If it is zero, 533 * the lookup knows the request is unallocated and complete. Otherwise, 534 * it is either still in use, or has been reallocated and reset 535 * with dma_fence_init(). This increment is safe for release as we 536 * check that the request we have a reference to and matches the active 537 * request. 538 * 539 * Before we increment the refcount, we chase the request->engine 540 * pointer. We must not call kmem_cache_zalloc() or else we set 541 * that pointer to NULL and cause a crash during the lookup. If 542 * we see the request is completed (based on the value of the 543 * old engine and seqno), the lookup is complete and reports NULL. 544 * If we decide the request is not completed (new engine or seqno), 545 * then we grab a reference and double check that it is still the 546 * active request - which it won't be and restart the lookup. 547 * 548 * Do not use kmem_cache_zalloc() here! 549 */ 550 req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); 551 if (!req) { 552 ret = -ENOMEM; 553 goto err_unreserve; 554 } 555 556 req->timeline = i915_gem_context_lookup_timeline(ctx, engine); 557 GEM_BUG_ON(req->timeline == engine->timeline); 558 559 lockinit(&req->lock, "i915_rl", 0, 0); 560 dma_fence_init(&req->fence, 561 &i915_fence_ops, 562 &req->lock, 563 req->timeline->fence_context, 564 __timeline_get_seqno(req->timeline->common)); 565 566 /* We bump the ref for the fence chain */ 567 i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); 568 i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify); 569 570 /* Ensure that the execute fence completes after the submit fence - 571 * as we complete the execute fence from within the submit fence 572 * callback, its completion would otherwise be visible first. 573 */ 574 i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq); 575 576 i915_priotree_init(&req->priotree); 577 578 INIT_LIST_HEAD(&req->active_list); 579 req->i915 = dev_priv; 580 req->engine = engine; 581 req->ctx = i915_gem_context_get(ctx); 582 583 /* No zalloc, must clear what we need by hand */ 584 req->global_seqno = 0; 585 req->previous_context = NULL; 586 req->file_priv = NULL; 587 req->batch = NULL; 588 589 /* 590 * Reserve space in the ring buffer for all the commands required to 591 * eventually emit this request. This is to guarantee that the 592 * i915_add_request() call can't fail. Note that the reserve may need 593 * to be redone if the request is not actually submitted straight 594 * away, e.g. because a GPU scheduler has deferred it. 595 */ 596 req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; 597 GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); 598 599 if (i915.enable_execlists) 600 ret = intel_logical_ring_alloc_request_extras(req); 601 else 602 ret = intel_ring_alloc_request_extras(req); 603 if (ret) 604 goto err_ctx; 605 606 /* Record the position of the start of the request so that 607 * should we detect the updated seqno part-way through the 608 * GPU processing the request, we never over-estimate the 609 * position of the head. 610 */ 611 req->head = req->ring->tail; 612 613 return req; 614 615 err_ctx: 616 i915_gem_context_put(ctx); 617 kmem_cache_free(dev_priv->requests, req); 618 err_unreserve: 619 dev_priv->gt.active_requests--; 620 return ERR_PTR(ret); 621 } 622 623 static int 624 i915_gem_request_await_request(struct drm_i915_gem_request *to, 625 struct drm_i915_gem_request *from) 626 { 627 int ret; 628 629 GEM_BUG_ON(to == from); 630 631 if (to->engine->schedule) { 632 ret = i915_priotree_add_dependency(to->i915, 633 &to->priotree, 634 &from->priotree); 635 if (ret < 0) 636 return ret; 637 } 638 639 if (to->timeline == from->timeline) 640 return 0; 641 642 if (to->engine == from->engine) { 643 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, 644 &from->submit, 645 GFP_KERNEL); 646 return ret < 0 ? ret : 0; 647 } 648 649 if (!from->global_seqno) { 650 ret = i915_sw_fence_await_dma_fence(&to->submit, 651 &from->fence, 0, 652 GFP_KERNEL); 653 return ret < 0 ? ret : 0; 654 } 655 656 if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id]) 657 return 0; 658 659 trace_i915_gem_ring_sync_to(to, from); 660 if (!i915.semaphores) { 661 if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) { 662 ret = i915_sw_fence_await_dma_fence(&to->submit, 663 &from->fence, 0, 664 GFP_KERNEL); 665 if (ret < 0) 666 return ret; 667 } 668 } else { 669 ret = to->engine->semaphore.sync_to(to, from); 670 if (ret) 671 return ret; 672 } 673 674 to->timeline->sync_seqno[from->engine->id] = from->global_seqno; 675 return 0; 676 } 677 678 int 679 i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, 680 struct dma_fence *fence) 681 { 682 struct dma_fence_array *array; 683 int ret; 684 int i; 685 686 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 687 return 0; 688 689 if (dma_fence_is_i915(fence)) 690 return i915_gem_request_await_request(req, to_request(fence)); 691 692 if (!dma_fence_is_array(fence)) { 693 ret = i915_sw_fence_await_dma_fence(&req->submit, 694 fence, I915_FENCE_TIMEOUT, 695 GFP_KERNEL); 696 return ret < 0 ? ret : 0; 697 } 698 699 /* Note that if the fence-array was created in signal-on-any mode, 700 * we should *not* decompose it into its individual fences. However, 701 * we don't currently store which mode the fence-array is operating 702 * in. Fortunately, the only user of signal-on-any is private to 703 * amdgpu and we should not see any incoming fence-array from 704 * sync-file being in signal-on-any mode. 705 */ 706 707 array = to_dma_fence_array(fence); 708 for (i = 0; i < array->num_fences; i++) { 709 struct dma_fence *child = array->fences[i]; 710 711 if (dma_fence_is_i915(child)) 712 ret = i915_gem_request_await_request(req, 713 to_request(child)); 714 else 715 ret = i915_sw_fence_await_dma_fence(&req->submit, 716 child, I915_FENCE_TIMEOUT, 717 GFP_KERNEL); 718 if (ret < 0) 719 return ret; 720 } 721 722 return 0; 723 } 724 725 /** 726 * i915_gem_request_await_object - set this request to (async) wait upon a bo 727 * 728 * @to: request we are wishing to use 729 * @obj: object which may be in use on another ring. 730 * 731 * This code is meant to abstract object synchronization with the GPU. 732 * Conceptually we serialise writes between engines inside the GPU. 733 * We only allow one engine to write into a buffer at any time, but 734 * multiple readers. To ensure each has a coherent view of memory, we must: 735 * 736 * - If there is an outstanding write request to the object, the new 737 * request must wait for it to complete (either CPU or in hw, requests 738 * on the same ring will be naturally ordered). 739 * 740 * - If we are a write request (pending_write_domain is set), the new 741 * request must wait for outstanding read requests to complete. 742 * 743 * Returns 0 if successful, else propagates up the lower layer error. 744 */ 745 int 746 i915_gem_request_await_object(struct drm_i915_gem_request *to, 747 struct drm_i915_gem_object *obj, 748 bool write) 749 { 750 struct dma_fence *excl; 751 int ret = 0; 752 753 if (write) { 754 struct dma_fence **shared; 755 unsigned int count, i; 756 757 ret = reservation_object_get_fences_rcu(obj->resv, 758 &excl, &count, &shared); 759 if (ret) 760 return ret; 761 762 for (i = 0; i < count; i++) { 763 ret = i915_gem_request_await_dma_fence(to, shared[i]); 764 if (ret) 765 break; 766 767 dma_fence_put(shared[i]); 768 } 769 770 for (; i < count; i++) 771 dma_fence_put(shared[i]); 772 kfree(shared); 773 } else { 774 excl = reservation_object_get_excl_rcu(obj->resv); 775 } 776 777 if (excl) { 778 if (ret == 0) 779 ret = i915_gem_request_await_dma_fence(to, excl); 780 781 dma_fence_put(excl); 782 } 783 784 return ret; 785 } 786 787 static void i915_gem_mark_busy(const struct intel_engine_cs *engine) 788 { 789 struct drm_i915_private *dev_priv = engine->i915; 790 791 if (dev_priv->gt.awake) 792 return; 793 794 GEM_BUG_ON(!dev_priv->gt.active_requests); 795 796 intel_runtime_pm_get_noresume(dev_priv); 797 dev_priv->gt.awake = true; 798 799 intel_enable_gt_powersave(dev_priv); 800 i915_update_gfx_val(dev_priv); 801 if (INTEL_GEN(dev_priv) >= 6) 802 gen6_rps_busy(dev_priv); 803 804 queue_delayed_work(dev_priv->wq, 805 &dev_priv->gt.retire_work, 806 round_jiffies_up_relative(HZ)); 807 } 808 809 /* 810 * NB: This function is not allowed to fail. Doing so would mean the the 811 * request is not being tracked for completion but the work itself is 812 * going to happen on the hardware. This would be a Bad Thing(tm). 813 */ 814 void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) 815 { 816 struct intel_engine_cs *engine = request->engine; 817 struct intel_ring *ring = request->ring; 818 struct intel_timeline *timeline = request->timeline; 819 struct drm_i915_gem_request *prev; 820 int err; 821 822 lockdep_assert_held(&request->i915->drm.struct_mutex); 823 trace_i915_gem_request_add(request); 824 825 /* 826 * To ensure that this call will not fail, space for its emissions 827 * should already have been reserved in the ring buffer. Let the ring 828 * know that it is time to use that space up. 829 */ 830 request->reserved_space = 0; 831 832 /* 833 * Emit any outstanding flushes - execbuf can fail to emit the flush 834 * after having emitted the batchbuffer command. Hence we need to fix 835 * things up similar to emitting the lazy request. The difference here 836 * is that the flush _must_ happen before the next request, no matter 837 * what. 838 */ 839 if (flush_caches) { 840 err = engine->emit_flush(request, EMIT_FLUSH); 841 842 /* Not allowed to fail! */ 843 WARN(err, "engine->emit_flush() failed: %d!\n", err); 844 } 845 846 /* Record the position of the start of the breadcrumb so that 847 * should we detect the updated seqno part-way through the 848 * GPU processing the request, we never over-estimate the 849 * position of the ring's HEAD. 850 */ 851 err = intel_ring_begin(request, engine->emit_breadcrumb_sz); 852 GEM_BUG_ON(err); 853 request->postfix = ring->tail; 854 ring->tail += engine->emit_breadcrumb_sz * sizeof(u32); 855 856 /* Seal the request and mark it as pending execution. Note that 857 * we may inspect this state, without holding any locks, during 858 * hangcheck. Hence we apply the barrier to ensure that we do not 859 * see a more recent value in the hws than we are tracking. 860 */ 861 862 prev = i915_gem_active_raw(&timeline->last_request, 863 &request->i915->drm.struct_mutex); 864 if (prev) { 865 i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, 866 &request->submitq); 867 if (engine->schedule) 868 __i915_priotree_add_dependency(&request->priotree, 869 &prev->priotree, 870 &request->dep, 871 0); 872 } 873 874 spin_lock_irq(&timeline->lock); 875 list_add_tail(&request->link, &timeline->requests); 876 spin_unlock_irq(&timeline->lock); 877 878 GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, 879 request->fence.seqno)); 880 881 timeline->last_submitted_seqno = request->fence.seqno; 882 i915_gem_active_set(&timeline->last_request, request); 883 884 list_add_tail(&request->ring_link, &ring->request_list); 885 request->emitted_jiffies = jiffies; 886 887 i915_gem_mark_busy(engine); 888 889 /* Let the backend know a new request has arrived that may need 890 * to adjust the existing execution schedule due to a high priority 891 * request - i.e. we may want to preempt the current request in order 892 * to run a high priority dependency chain *before* we can execute this 893 * request. 894 * 895 * This is called before the request is ready to run so that we can 896 * decide whether to preempt the entire chain so that it is ready to 897 * run at the earliest possible convenience. 898 */ 899 if (engine->schedule) 900 engine->schedule(request, request->ctx->priority); 901 902 local_bh_disable(); 903 i915_sw_fence_commit(&request->submit); 904 local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ 905 } 906 907 static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 908 { 909 unsigned long flags; 910 911 spin_lock_irqsave(&q->lock, flags); 912 if (list_empty(&wait->task_list)) 913 __add_wait_queue(q, wait); 914 spin_unlock_irqrestore(&q->lock, flags); 915 } 916 917 static unsigned long local_clock_us(unsigned int *cpu) 918 { 919 unsigned long t; 920 921 /* Cheaply and approximately convert from nanoseconds to microseconds. 922 * The result and subsequent calculations are also defined in the same 923 * approximate microseconds units. The principal source of timing 924 * error here is from the simple truncation. 925 * 926 * Note that local_clock() is only defined wrt to the current CPU; 927 * the comparisons are no longer valid if we switch CPUs. Instead of 928 * blocking preemption for the entire busywait, we can detect the CPU 929 * switch and use that as indicator of system load and a reason to 930 * stop busywaiting, see busywait_stop(). 931 */ 932 *cpu = get_cpu(); 933 t = local_clock() >> 10; 934 put_cpu(); 935 936 return t; 937 } 938 939 static bool busywait_stop(unsigned long timeout, unsigned int cpu) 940 { 941 unsigned int this_cpu; 942 943 if (time_after(local_clock_us(&this_cpu), timeout)) 944 return true; 945 946 return this_cpu != cpu; 947 } 948 949 bool __i915_spin_request(const struct drm_i915_gem_request *req, 950 int state, unsigned long timeout_us) 951 { 952 unsigned int cpu; 953 954 /* When waiting for high frequency requests, e.g. during synchronous 955 * rendering split between the CPU and GPU, the finite amount of time 956 * required to set up the irq and wait upon it limits the response 957 * rate. By busywaiting on the request completion for a short while we 958 * can service the high frequency waits as quick as possible. However, 959 * if it is a slow request, we want to sleep as quickly as possible. 960 * The tradeoff between waiting and sleeping is roughly the time it 961 * takes to sleep on a request, on the order of a microsecond. 962 */ 963 964 timeout_us += local_clock_us(&cpu); 965 do { 966 if (__i915_gem_request_completed(req)) 967 return true; 968 969 if (signal_pending_state(state, current)) 970 break; 971 972 if (busywait_stop(timeout_us, cpu)) 973 break; 974 975 cpu_relax(); 976 } while (!need_resched()); 977 978 return false; 979 } 980 981 static long 982 __i915_request_wait_for_execute(struct drm_i915_gem_request *request, 983 unsigned int flags, 984 long timeout) 985 { 986 const int state = flags & I915_WAIT_INTERRUPTIBLE ? 987 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 988 wait_queue_head_t *q = &request->i915->gpu_error.wait_queue; 989 DEFINE_WAIT(reset); 990 DEFINE_WAIT(wait); 991 992 if (flags & I915_WAIT_LOCKED) 993 add_wait_queue(q, &reset); 994 995 do { 996 prepare_to_wait(&request->execute.wait, &wait, state); 997 998 if (i915_sw_fence_done(&request->execute)) 999 break; 1000 1001 if (flags & I915_WAIT_LOCKED && 1002 i915_reset_in_progress(&request->i915->gpu_error)) { 1003 __set_current_state(TASK_RUNNING); 1004 i915_reset(request->i915); 1005 reset_wait_queue(q, &reset); 1006 continue; 1007 } 1008 1009 if (signal_pending_state(state, current)) { 1010 timeout = -ERESTARTSYS; 1011 break; 1012 } 1013 1014 if (!timeout) { 1015 timeout = -ETIME; 1016 break; 1017 } 1018 1019 timeout = io_schedule_timeout(timeout); 1020 } while (1); 1021 finish_wait(&request->execute.wait, &wait); 1022 1023 if (flags & I915_WAIT_LOCKED) 1024 remove_wait_queue(q, &reset); 1025 1026 return timeout; 1027 } 1028 1029 /** 1030 * i915_wait_request - wait until execution of request has finished 1031 * @req: the request to wait upon 1032 * @flags: how to wait 1033 * @timeout: how long to wait in jiffies 1034 * 1035 * i915_wait_request() waits for the request to be completed, for a 1036 * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an 1037 * unbounded wait). 1038 * 1039 * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED 1040 * in via the flags, and vice versa if the struct_mutex is not held, the caller 1041 * must not specify that the wait is locked. 1042 * 1043 * Returns the remaining time (in jiffies) if the request completed, which may 1044 * be zero or -ETIME if the request is unfinished after the timeout expires. 1045 * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is 1046 * pending before the request completes. 1047 */ 1048 long i915_wait_request(struct drm_i915_gem_request *req, 1049 unsigned int flags, 1050 long timeout) 1051 { 1052 const int state = flags & I915_WAIT_INTERRUPTIBLE ? 1053 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1054 DEFINE_WAIT(reset); 1055 struct intel_wait wait; 1056 1057 might_sleep(); 1058 #if IS_ENABLED(CONFIG_LOCKDEP) 1059 GEM_BUG_ON(debug_locks && 1060 !!lockdep_is_held(&req->i915->drm.struct_mutex) != 1061 !!(flags & I915_WAIT_LOCKED)); 1062 #endif 1063 GEM_BUG_ON(timeout < 0); 1064 1065 if (i915_gem_request_completed(req)) 1066 return timeout; 1067 1068 if (!timeout) 1069 return -ETIME; 1070 1071 trace_i915_gem_request_wait_begin(req); 1072 1073 if (!i915_sw_fence_done(&req->execute)) { 1074 timeout = __i915_request_wait_for_execute(req, flags, timeout); 1075 if (timeout < 0) 1076 goto complete; 1077 1078 GEM_BUG_ON(!i915_sw_fence_done(&req->execute)); 1079 } 1080 GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); 1081 GEM_BUG_ON(!req->global_seqno); 1082 1083 /* Optimistic short spin before touching IRQs */ 1084 if (i915_spin_request(req, state, 5)) 1085 goto complete; 1086 1087 set_current_state(state); 1088 if (flags & I915_WAIT_LOCKED) 1089 add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); 1090 1091 intel_wait_init(&wait, req->global_seqno); 1092 if (intel_engine_add_wait(req->engine, &wait)) 1093 /* In order to check that we haven't missed the interrupt 1094 * as we enabled it, we need to kick ourselves to do a 1095 * coherent check on the seqno before we sleep. 1096 */ 1097 goto wakeup; 1098 1099 for (;;) { 1100 if (signal_pending_state(state, current)) { 1101 timeout = -ERESTARTSYS; 1102 break; 1103 } 1104 1105 if (!timeout) { 1106 timeout = -ETIME; 1107 break; 1108 } 1109 1110 timeout = io_schedule_timeout(timeout); 1111 1112 if (intel_wait_complete(&wait)) 1113 break; 1114 1115 set_current_state(state); 1116 1117 wakeup: 1118 /* Carefully check if the request is complete, giving time 1119 * for the seqno to be visible following the interrupt. 1120 * We also have to check in case we are kicked by the GPU 1121 * reset in order to drop the struct_mutex. 1122 */ 1123 if (__i915_request_irq_complete(req)) 1124 break; 1125 1126 /* If the GPU is hung, and we hold the lock, reset the GPU 1127 * and then check for completion. On a full reset, the engine's 1128 * HW seqno will be advanced passed us and we are complete. 1129 * If we do a partial reset, we have to wait for the GPU to 1130 * resume and update the breadcrumb. 1131 * 1132 * If we don't hold the mutex, we can just wait for the worker 1133 * to come along and update the breadcrumb (either directly 1134 * itself, or indirectly by recovering the GPU). 1135 */ 1136 if (flags & I915_WAIT_LOCKED && 1137 i915_reset_in_progress(&req->i915->gpu_error)) { 1138 __set_current_state(TASK_RUNNING); 1139 i915_reset(req->i915); 1140 reset_wait_queue(&req->i915->gpu_error.wait_queue, 1141 &reset); 1142 continue; 1143 } 1144 1145 /* Only spin if we know the GPU is processing this request */ 1146 if (i915_spin_request(req, state, 2)) 1147 break; 1148 } 1149 1150 intel_engine_remove_wait(req->engine, &wait); 1151 if (flags & I915_WAIT_LOCKED) 1152 remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); 1153 __set_current_state(TASK_RUNNING); 1154 1155 complete: 1156 trace_i915_gem_request_wait_end(req); 1157 1158 return timeout; 1159 } 1160 1161 static void engine_retire_requests(struct intel_engine_cs *engine) 1162 { 1163 struct drm_i915_gem_request *request, *next; 1164 1165 list_for_each_entry_safe(request, next, 1166 &engine->timeline->requests, link) { 1167 if (!__i915_gem_request_completed(request)) 1168 return; 1169 1170 i915_gem_request_retire(request); 1171 } 1172 } 1173 1174 void i915_gem_retire_requests(struct drm_i915_private *dev_priv) 1175 { 1176 struct intel_engine_cs *engine; 1177 enum intel_engine_id id; 1178 1179 lockdep_assert_held(&dev_priv->drm.struct_mutex); 1180 1181 if (!dev_priv->gt.active_requests) 1182 return; 1183 1184 for_each_engine(engine, dev_priv, id) 1185 engine_retire_requests(engine); 1186 } 1187