xref: /dflybsd-src/sys/dev/drm/i915/i915_gem_request.c (revision 4758d649ae1bd804db6736d6e84b9589b414834e)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prefetch.h>
26 #include <linux/dma-fence-array.h>
27 
28 #include "i915_drv.h"
29 
30 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
31 {
32 	return "i915";
33 }
34 
35 static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
36 {
37 	return to_request(fence)->timeline->common->name;
38 }
39 
40 static bool i915_fence_signaled(struct dma_fence *fence)
41 {
42 	return i915_gem_request_completed(to_request(fence));
43 }
44 
45 static bool i915_fence_enable_signaling(struct dma_fence *fence)
46 {
47 	if (i915_fence_signaled(fence))
48 		return false;
49 
50 	intel_engine_enable_signaling(to_request(fence));
51 	return true;
52 }
53 
54 static signed long i915_fence_wait(struct dma_fence *fence,
55 				   bool interruptible,
56 				   signed long timeout)
57 {
58 	return i915_wait_request(to_request(fence), interruptible, timeout);
59 }
60 
61 static void i915_fence_release(struct dma_fence *fence)
62 {
63 	struct drm_i915_gem_request *req = to_request(fence);
64 
65 	kmem_cache_free(req->i915->requests, req);
66 }
67 
68 const struct dma_fence_ops i915_fence_ops = {
69 	.get_driver_name = i915_fence_get_driver_name,
70 	.get_timeline_name = i915_fence_get_timeline_name,
71 	.enable_signaling = i915_fence_enable_signaling,
72 	.signaled = i915_fence_signaled,
73 	.wait = i915_fence_wait,
74 	.release = i915_fence_release,
75 };
76 
77 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
78 				   struct drm_file *file)
79 {
80 	struct drm_i915_private *dev_private;
81 	struct drm_i915_file_private *file_priv;
82 
83 	WARN_ON(!req || !file || req->file_priv);
84 
85 	if (!req || !file)
86 		return -EINVAL;
87 
88 	if (req->file_priv)
89 		return -EINVAL;
90 
91 	dev_private = req->i915;
92 	file_priv = file->driver_priv;
93 
94 	lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE);
95 	req->file_priv = file_priv;
96 	list_add_tail(&req->client_list, &file_priv->mm.request_list);
97 	lockmgr(&file_priv->mm.lock, LK_RELEASE);
98 
99 	return 0;
100 }
101 
102 static inline void
103 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
104 {
105 	struct drm_i915_file_private *file_priv = request->file_priv;
106 
107 	if (!file_priv)
108 		return;
109 
110 	lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE);
111 	list_del(&request->client_list);
112 	request->file_priv = NULL;
113 	lockmgr(&file_priv->mm.lock, LK_RELEASE);
114 }
115 
116 static struct i915_dependency *
117 i915_dependency_alloc(struct drm_i915_private *i915)
118 {
119 	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
120 }
121 
122 static void
123 i915_dependency_free(struct drm_i915_private *i915,
124 		     struct i915_dependency *dep)
125 {
126 	kmem_cache_free(i915->dependencies, dep);
127 }
128 
129 static void
130 __i915_priotree_add_dependency(struct i915_priotree *pt,
131 			       struct i915_priotree *signal,
132 			       struct i915_dependency *dep,
133 			       unsigned long flags)
134 {
135 	INIT_LIST_HEAD(&dep->dfs_link);
136 	list_add(&dep->wait_link, &signal->waiters_list);
137 	list_add(&dep->signal_link, &pt->signalers_list);
138 	dep->signaler = signal;
139 	dep->flags = flags;
140 }
141 
142 static int
143 i915_priotree_add_dependency(struct drm_i915_private *i915,
144 			     struct i915_priotree *pt,
145 			     struct i915_priotree *signal)
146 {
147 	struct i915_dependency *dep;
148 
149 	dep = i915_dependency_alloc(i915);
150 	if (!dep)
151 		return -ENOMEM;
152 
153 	__i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC);
154 	return 0;
155 }
156 
157 static void
158 i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
159 {
160 	struct i915_dependency *dep, *next;
161 
162 	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
163 
164 	/* Everyone we depended upon (the fences we wait to be signaled)
165 	 * should retire before us and remove themselves from our list.
166 	 * However, retirement is run independently on each timeline and
167 	 * so we may be called out-of-order.
168 	 */
169 	list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) {
170 		list_del(&dep->wait_link);
171 		if (dep->flags & I915_DEPENDENCY_ALLOC)
172 			i915_dependency_free(i915, dep);
173 	}
174 
175 	/* Remove ourselves from everyone who depends upon us */
176 	list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) {
177 		list_del(&dep->signal_link);
178 		if (dep->flags & I915_DEPENDENCY_ALLOC)
179 			i915_dependency_free(i915, dep);
180 	}
181 }
182 
183 static void
184 i915_priotree_init(struct i915_priotree *pt)
185 {
186 	INIT_LIST_HEAD(&pt->signalers_list);
187 	INIT_LIST_HEAD(&pt->waiters_list);
188 	RB_CLEAR_NODE(&pt->node);
189 	pt->priority = INT_MIN;
190 }
191 
192 void i915_gem_retire_noop(struct i915_gem_active *active,
193 			  struct drm_i915_gem_request *request)
194 {
195 	/* Space left intentionally blank */
196 }
197 
198 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
199 {
200 	struct i915_gem_active *active, *next;
201 
202 	lockdep_assert_held(&request->i915->drm.struct_mutex);
203 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
204 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
205 	GEM_BUG_ON(!i915_gem_request_completed(request));
206 	GEM_BUG_ON(!request->i915->gt.active_requests);
207 
208 	trace_i915_gem_request_retire(request);
209 
210 	spin_lock_irq(&request->engine->timeline->lock);
211 	list_del_init(&request->link);
212 	spin_unlock_irq(&request->engine->timeline->lock);
213 
214 	/* We know the GPU must have read the request to have
215 	 * sent us the seqno + interrupt, so use the position
216 	 * of tail of the request to update the last known position
217 	 * of the GPU head.
218 	 *
219 	 * Note this requires that we are always called in request
220 	 * completion order.
221 	 */
222 	list_del(&request->ring_link);
223 	request->ring->last_retired_head = request->postfix;
224 	if (!--request->i915->gt.active_requests) {
225 		GEM_BUG_ON(!request->i915->gt.awake);
226 		mod_delayed_work(request->i915->wq,
227 				 &request->i915->gt.idle_work,
228 				 msecs_to_jiffies(100));
229 	}
230 
231 	/* Walk through the active list, calling retire on each. This allows
232 	 * objects to track their GPU activity and mark themselves as idle
233 	 * when their *last* active request is completed (updating state
234 	 * tracking lists for eviction, active references for GEM, etc).
235 	 *
236 	 * As the ->retire() may free the node, we decouple it first and
237 	 * pass along the auxiliary information (to avoid dereferencing
238 	 * the node after the callback).
239 	 */
240 	list_for_each_entry_safe(active, next, &request->active_list, link) {
241 		/* In microbenchmarks or focusing upon time inside the kernel,
242 		 * we may spend an inordinate amount of time simply handling
243 		 * the retirement of requests and processing their callbacks.
244 		 * Of which, this loop itself is particularly hot due to the
245 		 * cache misses when jumping around the list of i915_gem_active.
246 		 * So we try to keep this loop as streamlined as possible and
247 		 * also prefetch the next i915_gem_active to try and hide
248 		 * the likely cache miss.
249 		 */
250 		prefetchw(next);
251 
252 		INIT_LIST_HEAD(&active->link);
253 		RCU_INIT_POINTER(active->request, NULL);
254 
255 		active->retire(active, request);
256 	}
257 
258 	i915_gem_request_remove_from_client(request);
259 
260 	if (request->previous_context) {
261 		if (i915.enable_execlists)
262 			intel_lr_context_unpin(request->previous_context,
263 					       request->engine);
264 	}
265 
266 	i915_gem_context_put(request->ctx);
267 
268 	dma_fence_signal(&request->fence);
269 
270 	i915_priotree_fini(request->i915, &request->priotree);
271 	i915_gem_request_put(request);
272 }
273 
274 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
275 {
276 	struct intel_engine_cs *engine = req->engine;
277 	struct drm_i915_gem_request *tmp;
278 
279 	lockdep_assert_held(&req->i915->drm.struct_mutex);
280 	if (list_empty(&req->link))
281 		return;
282 
283 	do {
284 		tmp = list_first_entry(&engine->timeline->requests,
285 				       typeof(*tmp), link);
286 
287 		i915_gem_request_retire(tmp);
288 	} while (tmp != req);
289 }
290 
291 static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
292 {
293 	struct i915_gpu_error *error = &dev_priv->gpu_error;
294 
295 	if (i915_terminally_wedged(error))
296 		return -EIO;
297 
298 	if (i915_reset_in_progress(error)) {
299 		/* Non-interruptible callers can't handle -EAGAIN, hence return
300 		 * -EIO unconditionally for these.
301 		 */
302 		if (!dev_priv->mm.interruptible)
303 			return -EIO;
304 
305 		return -EAGAIN;
306 	}
307 
308 	return 0;
309 }
310 
311 static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
312 {
313 	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
314 	struct intel_engine_cs *engine;
315 	enum intel_engine_id id;
316 	int ret;
317 
318 	/* Carefully retire all requests without writing to the rings */
319 	ret = i915_gem_wait_for_idle(i915,
320 				     I915_WAIT_INTERRUPTIBLE |
321 				     I915_WAIT_LOCKED);
322 	if (ret)
323 		return ret;
324 
325 	i915_gem_retire_requests(i915);
326 	GEM_BUG_ON(i915->gt.active_requests > 1);
327 
328 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
329 	if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) {
330 		while (intel_breadcrumbs_busy(i915))
331 			cond_resched(); /* spin until threads are complete */
332 	}
333 	atomic_set(&timeline->next_seqno, seqno);
334 
335 	/* Finally reset hw state */
336 	for_each_engine(engine, i915, id)
337 		intel_engine_init_global_seqno(engine, seqno);
338 
339 	list_for_each_entry(timeline, &i915->gt.timelines, link) {
340 		for_each_engine(engine, i915, id) {
341 			struct intel_timeline *tl = &timeline->engine[id];
342 
343 			memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno));
344 		}
345 	}
346 
347 	return 0;
348 }
349 
350 int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
351 {
352 	struct drm_i915_private *dev_priv = to_i915(dev);
353 
354 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
355 
356 	if (seqno == 0)
357 		return -EINVAL;
358 
359 	/* HWS page needs to be set less than what we
360 	 * will inject to ring
361 	 */
362 	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
363 }
364 
365 static int reserve_global_seqno(struct drm_i915_private *i915)
366 {
367 	u32 active_requests = ++i915->gt.active_requests;
368 	u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno);
369 	int ret;
370 
371 	/* Reservation is fine until we need to wrap around */
372 	if (likely(next_seqno + active_requests > next_seqno))
373 		return 0;
374 
375 	ret = i915_gem_init_global_seqno(i915, 0);
376 	if (ret) {
377 		i915->gt.active_requests--;
378 		return ret;
379 	}
380 
381 	return 0;
382 }
383 
384 static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
385 {
386 	/* next_seqno only incremented under a mutex */
387 	return ++tl->next_seqno.counter;
388 }
389 
390 static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
391 {
392 	return atomic_inc_return(&tl->next_seqno);
393 }
394 
395 void __i915_gem_request_submit(struct drm_i915_gem_request *request)
396 {
397 	struct intel_engine_cs *engine = request->engine;
398 	struct intel_timeline *timeline;
399 	u32 seqno;
400 
401 	/* Transfer from per-context onto the global per-engine timeline */
402 	timeline = engine->timeline;
403 	GEM_BUG_ON(timeline == request->timeline);
404 	assert_spin_locked(&timeline->lock);
405 
406 	seqno = timeline_get_seqno(timeline->common);
407 	GEM_BUG_ON(!seqno);
408 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
409 
410 	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
411 	request->previous_seqno = timeline->last_submitted_seqno;
412 	timeline->last_submitted_seqno = seqno;
413 
414 	/* We may be recursing from the signal callback of another i915 fence */
415 	lockmgr(&request->lock, LK_EXCLUSIVE);
416 	request->global_seqno = seqno;
417 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
418 		intel_engine_enable_signaling(request);
419 	lockmgr(&request->lock, LK_RELEASE);
420 
421 	GEM_BUG_ON(!request->global_seqno);
422 	engine->emit_breadcrumb(request,
423 				request->ring->vaddr + request->postfix);
424 
425 	lockmgr(&request->timeline->lock, LK_EXCLUSIVE);
426 	list_move_tail(&request->link, &timeline->requests);
427 	lockmgr(&request->timeline->lock, LK_RELEASE);
428 
429 	i915_sw_fence_commit(&request->execute);
430 }
431 
432 void i915_gem_request_submit(struct drm_i915_gem_request *request)
433 {
434 	struct intel_engine_cs *engine = request->engine;
435 	unsigned long flags;
436 
437 	/* Will be called from irq-context when using foreign fences. */
438 	spin_lock_irqsave(&engine->timeline->lock, flags);
439 
440 	__i915_gem_request_submit(request);
441 
442 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
443 }
444 
445 static int __i915_sw_fence_call
446 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
447 {
448 	struct drm_i915_gem_request *request =
449 		container_of(fence, typeof(*request), submit);
450 
451 	switch (state) {
452 	case FENCE_COMPLETE:
453 		request->engine->submit_request(request);
454 		break;
455 
456 	case FENCE_FREE:
457 		i915_gem_request_put(request);
458 		break;
459 	}
460 
461 	return NOTIFY_DONE;
462 }
463 
464 static int __i915_sw_fence_call
465 execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
466 {
467 	struct drm_i915_gem_request *request =
468 		container_of(fence, typeof(*request), execute);
469 
470 	switch (state) {
471 	case FENCE_COMPLETE:
472 		break;
473 
474 	case FENCE_FREE:
475 		i915_gem_request_put(request);
476 		break;
477 	}
478 
479 	return NOTIFY_DONE;
480 }
481 
482 /**
483  * i915_gem_request_alloc - allocate a request structure
484  *
485  * @engine: engine that we wish to issue the request on.
486  * @ctx: context that the request will be associated with.
487  *       This can be NULL if the request is not directly related to
488  *       any specific user context, in which case this function will
489  *       choose an appropriate context to use.
490  *
491  * Returns a pointer to the allocated request if successful,
492  * or an error code if not.
493  */
494 struct drm_i915_gem_request *
495 i915_gem_request_alloc(struct intel_engine_cs *engine,
496 		       struct i915_gem_context *ctx)
497 {
498 	struct drm_i915_private *dev_priv = engine->i915;
499 	struct drm_i915_gem_request *req;
500 	int ret;
501 
502 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
503 
504 	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
505 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
506 	 * and restart.
507 	 */
508 	ret = i915_gem_check_wedge(dev_priv);
509 	if (ret)
510 		return ERR_PTR(ret);
511 
512 	ret = reserve_global_seqno(dev_priv);
513 	if (ret)
514 		return ERR_PTR(ret);
515 
516 	/* Move the oldest request to the slab-cache (if not in use!) */
517 	req = list_first_entry_or_null(&engine->timeline->requests,
518 				       typeof(*req), link);
519 	if (req && __i915_gem_request_completed(req))
520 		i915_gem_request_retire(req);
521 
522 	/* Beware: Dragons be flying overhead.
523 	 *
524 	 * We use RCU to look up requests in flight. The lookups may
525 	 * race with the request being allocated from the slab freelist.
526 	 * That is the request we are writing to here, may be in the process
527 	 * of being read by __i915_gem_active_get_rcu(). As such,
528 	 * we have to be very careful when overwriting the contents. During
529 	 * the RCU lookup, we change chase the request->engine pointer,
530 	 * read the request->global_seqno and increment the reference count.
531 	 *
532 	 * The reference count is incremented atomically. If it is zero,
533 	 * the lookup knows the request is unallocated and complete. Otherwise,
534 	 * it is either still in use, or has been reallocated and reset
535 	 * with dma_fence_init(). This increment is safe for release as we
536 	 * check that the request we have a reference to and matches the active
537 	 * request.
538 	 *
539 	 * Before we increment the refcount, we chase the request->engine
540 	 * pointer. We must not call kmem_cache_zalloc() or else we set
541 	 * that pointer to NULL and cause a crash during the lookup. If
542 	 * we see the request is completed (based on the value of the
543 	 * old engine and seqno), the lookup is complete and reports NULL.
544 	 * If we decide the request is not completed (new engine or seqno),
545 	 * then we grab a reference and double check that it is still the
546 	 * active request - which it won't be and restart the lookup.
547 	 *
548 	 * Do not use kmem_cache_zalloc() here!
549 	 */
550 	req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
551 	if (!req) {
552 		ret = -ENOMEM;
553 		goto err_unreserve;
554 	}
555 
556 	req->timeline = i915_gem_context_lookup_timeline(ctx, engine);
557 	GEM_BUG_ON(req->timeline == engine->timeline);
558 
559 	lockinit(&req->lock, "i915_rl", 0, 0);
560 	dma_fence_init(&req->fence,
561 		       &i915_fence_ops,
562 		       &req->lock,
563 		       req->timeline->fence_context,
564 		       __timeline_get_seqno(req->timeline->common));
565 
566 	/* We bump the ref for the fence chain */
567 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
568 	i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify);
569 
570 	/* Ensure that the execute fence completes after the submit fence -
571 	 * as we complete the execute fence from within the submit fence
572 	 * callback, its completion would otherwise be visible first.
573 	 */
574 	i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq);
575 
576 	i915_priotree_init(&req->priotree);
577 
578 	INIT_LIST_HEAD(&req->active_list);
579 	req->i915 = dev_priv;
580 	req->engine = engine;
581 	req->ctx = i915_gem_context_get(ctx);
582 
583 	/* No zalloc, must clear what we need by hand */
584 	req->global_seqno = 0;
585 	req->previous_context = NULL;
586 	req->file_priv = NULL;
587 	req->batch = NULL;
588 
589 	/*
590 	 * Reserve space in the ring buffer for all the commands required to
591 	 * eventually emit this request. This is to guarantee that the
592 	 * i915_add_request() call can't fail. Note that the reserve may need
593 	 * to be redone if the request is not actually submitted straight
594 	 * away, e.g. because a GPU scheduler has deferred it.
595 	 */
596 	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
597 	GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz);
598 
599 	if (i915.enable_execlists)
600 		ret = intel_logical_ring_alloc_request_extras(req);
601 	else
602 		ret = intel_ring_alloc_request_extras(req);
603 	if (ret)
604 		goto err_ctx;
605 
606 	/* Record the position of the start of the request so that
607 	 * should we detect the updated seqno part-way through the
608 	 * GPU processing the request, we never over-estimate the
609 	 * position of the head.
610 	 */
611 	req->head = req->ring->tail;
612 
613 	return req;
614 
615 err_ctx:
616 	i915_gem_context_put(ctx);
617 	kmem_cache_free(dev_priv->requests, req);
618 err_unreserve:
619 	dev_priv->gt.active_requests--;
620 	return ERR_PTR(ret);
621 }
622 
623 static int
624 i915_gem_request_await_request(struct drm_i915_gem_request *to,
625 			       struct drm_i915_gem_request *from)
626 {
627 	int ret;
628 
629 	GEM_BUG_ON(to == from);
630 
631 	if (to->engine->schedule) {
632 		ret = i915_priotree_add_dependency(to->i915,
633 						   &to->priotree,
634 						   &from->priotree);
635 		if (ret < 0)
636 			return ret;
637 	}
638 
639 	if (to->timeline == from->timeline)
640 		return 0;
641 
642 	if (to->engine == from->engine) {
643 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
644 						       &from->submit,
645 						       GFP_KERNEL);
646 		return ret < 0 ? ret : 0;
647 	}
648 
649 	if (!from->global_seqno) {
650 		ret = i915_sw_fence_await_dma_fence(&to->submit,
651 						    &from->fence, 0,
652 						    GFP_KERNEL);
653 		return ret < 0 ? ret : 0;
654 	}
655 
656 	if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
657 		return 0;
658 
659 	trace_i915_gem_ring_sync_to(to, from);
660 	if (!i915.semaphores) {
661 		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
662 			ret = i915_sw_fence_await_dma_fence(&to->submit,
663 							    &from->fence, 0,
664 							    GFP_KERNEL);
665 			if (ret < 0)
666 				return ret;
667 		}
668 	} else {
669 		ret = to->engine->semaphore.sync_to(to, from);
670 		if (ret)
671 			return ret;
672 	}
673 
674 	to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
675 	return 0;
676 }
677 
678 int
679 i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
680 				 struct dma_fence *fence)
681 {
682 	struct dma_fence_array *array;
683 	int ret;
684 	int i;
685 
686 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
687 		return 0;
688 
689 	if (dma_fence_is_i915(fence))
690 		return i915_gem_request_await_request(req, to_request(fence));
691 
692 	if (!dma_fence_is_array(fence)) {
693 		ret = i915_sw_fence_await_dma_fence(&req->submit,
694 						    fence, I915_FENCE_TIMEOUT,
695 						    GFP_KERNEL);
696 		return ret < 0 ? ret : 0;
697 	}
698 
699 	/* Note that if the fence-array was created in signal-on-any mode,
700 	 * we should *not* decompose it into its individual fences. However,
701 	 * we don't currently store which mode the fence-array is operating
702 	 * in. Fortunately, the only user of signal-on-any is private to
703 	 * amdgpu and we should not see any incoming fence-array from
704 	 * sync-file being in signal-on-any mode.
705 	 */
706 
707 	array = to_dma_fence_array(fence);
708 	for (i = 0; i < array->num_fences; i++) {
709 		struct dma_fence *child = array->fences[i];
710 
711 		if (dma_fence_is_i915(child))
712 			ret = i915_gem_request_await_request(req,
713 							     to_request(child));
714 		else
715 			ret = i915_sw_fence_await_dma_fence(&req->submit,
716 							    child, I915_FENCE_TIMEOUT,
717 							    GFP_KERNEL);
718 		if (ret < 0)
719 			return ret;
720 	}
721 
722 	return 0;
723 }
724 
725 /**
726  * i915_gem_request_await_object - set this request to (async) wait upon a bo
727  *
728  * @to: request we are wishing to use
729  * @obj: object which may be in use on another ring.
730  *
731  * This code is meant to abstract object synchronization with the GPU.
732  * Conceptually we serialise writes between engines inside the GPU.
733  * We only allow one engine to write into a buffer at any time, but
734  * multiple readers. To ensure each has a coherent view of memory, we must:
735  *
736  * - If there is an outstanding write request to the object, the new
737  *   request must wait for it to complete (either CPU or in hw, requests
738  *   on the same ring will be naturally ordered).
739  *
740  * - If we are a write request (pending_write_domain is set), the new
741  *   request must wait for outstanding read requests to complete.
742  *
743  * Returns 0 if successful, else propagates up the lower layer error.
744  */
745 int
746 i915_gem_request_await_object(struct drm_i915_gem_request *to,
747 			      struct drm_i915_gem_object *obj,
748 			      bool write)
749 {
750 	struct dma_fence *excl;
751 	int ret = 0;
752 
753 	if (write) {
754 		struct dma_fence **shared;
755 		unsigned int count, i;
756 
757 		ret = reservation_object_get_fences_rcu(obj->resv,
758 							&excl, &count, &shared);
759 		if (ret)
760 			return ret;
761 
762 		for (i = 0; i < count; i++) {
763 			ret = i915_gem_request_await_dma_fence(to, shared[i]);
764 			if (ret)
765 				break;
766 
767 			dma_fence_put(shared[i]);
768 		}
769 
770 		for (; i < count; i++)
771 			dma_fence_put(shared[i]);
772 		kfree(shared);
773 	} else {
774 		excl = reservation_object_get_excl_rcu(obj->resv);
775 	}
776 
777 	if (excl) {
778 		if (ret == 0)
779 			ret = i915_gem_request_await_dma_fence(to, excl);
780 
781 		dma_fence_put(excl);
782 	}
783 
784 	return ret;
785 }
786 
787 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
788 {
789 	struct drm_i915_private *dev_priv = engine->i915;
790 
791 	if (dev_priv->gt.awake)
792 		return;
793 
794 	GEM_BUG_ON(!dev_priv->gt.active_requests);
795 
796 	intel_runtime_pm_get_noresume(dev_priv);
797 	dev_priv->gt.awake = true;
798 
799 	intel_enable_gt_powersave(dev_priv);
800 	i915_update_gfx_val(dev_priv);
801 	if (INTEL_GEN(dev_priv) >= 6)
802 		gen6_rps_busy(dev_priv);
803 
804 	queue_delayed_work(dev_priv->wq,
805 			   &dev_priv->gt.retire_work,
806 			   round_jiffies_up_relative(HZ));
807 }
808 
809 /*
810  * NB: This function is not allowed to fail. Doing so would mean the the
811  * request is not being tracked for completion but the work itself is
812  * going to happen on the hardware. This would be a Bad Thing(tm).
813  */
814 void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
815 {
816 	struct intel_engine_cs *engine = request->engine;
817 	struct intel_ring *ring = request->ring;
818 	struct intel_timeline *timeline = request->timeline;
819 	struct drm_i915_gem_request *prev;
820 	int err;
821 
822 	lockdep_assert_held(&request->i915->drm.struct_mutex);
823 	trace_i915_gem_request_add(request);
824 
825 	/*
826 	 * To ensure that this call will not fail, space for its emissions
827 	 * should already have been reserved in the ring buffer. Let the ring
828 	 * know that it is time to use that space up.
829 	 */
830 	request->reserved_space = 0;
831 
832 	/*
833 	 * Emit any outstanding flushes - execbuf can fail to emit the flush
834 	 * after having emitted the batchbuffer command. Hence we need to fix
835 	 * things up similar to emitting the lazy request. The difference here
836 	 * is that the flush _must_ happen before the next request, no matter
837 	 * what.
838 	 */
839 	if (flush_caches) {
840 		err = engine->emit_flush(request, EMIT_FLUSH);
841 
842 		/* Not allowed to fail! */
843 		WARN(err, "engine->emit_flush() failed: %d!\n", err);
844 	}
845 
846 	/* Record the position of the start of the breadcrumb so that
847 	 * should we detect the updated seqno part-way through the
848 	 * GPU processing the request, we never over-estimate the
849 	 * position of the ring's HEAD.
850 	 */
851 	err = intel_ring_begin(request, engine->emit_breadcrumb_sz);
852 	GEM_BUG_ON(err);
853 	request->postfix = ring->tail;
854 	ring->tail += engine->emit_breadcrumb_sz * sizeof(u32);
855 
856 	/* Seal the request and mark it as pending execution. Note that
857 	 * we may inspect this state, without holding any locks, during
858 	 * hangcheck. Hence we apply the barrier to ensure that we do not
859 	 * see a more recent value in the hws than we are tracking.
860 	 */
861 
862 	prev = i915_gem_active_raw(&timeline->last_request,
863 				   &request->i915->drm.struct_mutex);
864 	if (prev) {
865 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
866 					     &request->submitq);
867 		if (engine->schedule)
868 			__i915_priotree_add_dependency(&request->priotree,
869 						       &prev->priotree,
870 						       &request->dep,
871 						       0);
872 	}
873 
874 	spin_lock_irq(&timeline->lock);
875 	list_add_tail(&request->link, &timeline->requests);
876 	spin_unlock_irq(&timeline->lock);
877 
878 	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
879 				     request->fence.seqno));
880 
881 	timeline->last_submitted_seqno = request->fence.seqno;
882 	i915_gem_active_set(&timeline->last_request, request);
883 
884 	list_add_tail(&request->ring_link, &ring->request_list);
885 	request->emitted_jiffies = jiffies;
886 
887 	i915_gem_mark_busy(engine);
888 
889 	/* Let the backend know a new request has arrived that may need
890 	 * to adjust the existing execution schedule due to a high priority
891 	 * request - i.e. we may want to preempt the current request in order
892 	 * to run a high priority dependency chain *before* we can execute this
893 	 * request.
894 	 *
895 	 * This is called before the request is ready to run so that we can
896 	 * decide whether to preempt the entire chain so that it is ready to
897 	 * run at the earliest possible convenience.
898 	 */
899 	if (engine->schedule)
900 		engine->schedule(request, request->ctx->priority);
901 
902 	local_bh_disable();
903 	i915_sw_fence_commit(&request->submit);
904 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
905 }
906 
907 static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
908 {
909 	unsigned long flags;
910 
911 	spin_lock_irqsave(&q->lock, flags);
912 	if (list_empty(&wait->task_list))
913 		__add_wait_queue(q, wait);
914 	spin_unlock_irqrestore(&q->lock, flags);
915 }
916 
917 static unsigned long local_clock_us(unsigned int *cpu)
918 {
919 	unsigned long t;
920 
921 	/* Cheaply and approximately convert from nanoseconds to microseconds.
922 	 * The result and subsequent calculations are also defined in the same
923 	 * approximate microseconds units. The principal source of timing
924 	 * error here is from the simple truncation.
925 	 *
926 	 * Note that local_clock() is only defined wrt to the current CPU;
927 	 * the comparisons are no longer valid if we switch CPUs. Instead of
928 	 * blocking preemption for the entire busywait, we can detect the CPU
929 	 * switch and use that as indicator of system load and a reason to
930 	 * stop busywaiting, see busywait_stop().
931 	 */
932 	*cpu = get_cpu();
933 	t = local_clock() >> 10;
934 	put_cpu();
935 
936 	return t;
937 }
938 
939 static bool busywait_stop(unsigned long timeout, unsigned int cpu)
940 {
941 	unsigned int this_cpu;
942 
943 	if (time_after(local_clock_us(&this_cpu), timeout))
944 		return true;
945 
946 	return this_cpu != cpu;
947 }
948 
949 bool __i915_spin_request(const struct drm_i915_gem_request *req,
950 			 int state, unsigned long timeout_us)
951 {
952 	unsigned int cpu;
953 
954 	/* When waiting for high frequency requests, e.g. during synchronous
955 	 * rendering split between the CPU and GPU, the finite amount of time
956 	 * required to set up the irq and wait upon it limits the response
957 	 * rate. By busywaiting on the request completion for a short while we
958 	 * can service the high frequency waits as quick as possible. However,
959 	 * if it is a slow request, we want to sleep as quickly as possible.
960 	 * The tradeoff between waiting and sleeping is roughly the time it
961 	 * takes to sleep on a request, on the order of a microsecond.
962 	 */
963 
964 	timeout_us += local_clock_us(&cpu);
965 	do {
966 		if (__i915_gem_request_completed(req))
967 			return true;
968 
969 		if (signal_pending_state(state, current))
970 			break;
971 
972 		if (busywait_stop(timeout_us, cpu))
973 			break;
974 
975 		cpu_relax();
976 	} while (!need_resched());
977 
978 	return false;
979 }
980 
981 static long
982 __i915_request_wait_for_execute(struct drm_i915_gem_request *request,
983 				unsigned int flags,
984 				long timeout)
985 {
986 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
987 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
988 	wait_queue_head_t *q = &request->i915->gpu_error.wait_queue;
989 	DEFINE_WAIT(reset);
990 	DEFINE_WAIT(wait);
991 
992 	if (flags & I915_WAIT_LOCKED)
993 		add_wait_queue(q, &reset);
994 
995 	do {
996 		prepare_to_wait(&request->execute.wait, &wait, state);
997 
998 		if (i915_sw_fence_done(&request->execute))
999 			break;
1000 
1001 		if (flags & I915_WAIT_LOCKED &&
1002 		    i915_reset_in_progress(&request->i915->gpu_error)) {
1003 			__set_current_state(TASK_RUNNING);
1004 			i915_reset(request->i915);
1005 			reset_wait_queue(q, &reset);
1006 			continue;
1007 		}
1008 
1009 		if (signal_pending_state(state, current)) {
1010 			timeout = -ERESTARTSYS;
1011 			break;
1012 		}
1013 
1014 		if (!timeout) {
1015 			timeout = -ETIME;
1016 			break;
1017 		}
1018 
1019 		timeout = io_schedule_timeout(timeout);
1020 	} while (1);
1021 	finish_wait(&request->execute.wait, &wait);
1022 
1023 	if (flags & I915_WAIT_LOCKED)
1024 		remove_wait_queue(q, &reset);
1025 
1026 	return timeout;
1027 }
1028 
1029 /**
1030  * i915_wait_request - wait until execution of request has finished
1031  * @req: the request to wait upon
1032  * @flags: how to wait
1033  * @timeout: how long to wait in jiffies
1034  *
1035  * i915_wait_request() waits for the request to be completed, for a
1036  * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1037  * unbounded wait).
1038  *
1039  * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
1040  * in via the flags, and vice versa if the struct_mutex is not held, the caller
1041  * must not specify that the wait is locked.
1042  *
1043  * Returns the remaining time (in jiffies) if the request completed, which may
1044  * be zero or -ETIME if the request is unfinished after the timeout expires.
1045  * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
1046  * pending before the request completes.
1047  */
1048 long i915_wait_request(struct drm_i915_gem_request *req,
1049 		       unsigned int flags,
1050 		       long timeout)
1051 {
1052 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1053 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1054 	DEFINE_WAIT(reset);
1055 	struct intel_wait wait;
1056 
1057 	might_sleep();
1058 #if IS_ENABLED(CONFIG_LOCKDEP)
1059 	GEM_BUG_ON(debug_locks &&
1060 		   !!lockdep_is_held(&req->i915->drm.struct_mutex) !=
1061 		   !!(flags & I915_WAIT_LOCKED));
1062 #endif
1063 	GEM_BUG_ON(timeout < 0);
1064 
1065 	if (i915_gem_request_completed(req))
1066 		return timeout;
1067 
1068 	if (!timeout)
1069 		return -ETIME;
1070 
1071 	trace_i915_gem_request_wait_begin(req);
1072 
1073 	if (!i915_sw_fence_done(&req->execute)) {
1074 		timeout = __i915_request_wait_for_execute(req, flags, timeout);
1075 		if (timeout < 0)
1076 			goto complete;
1077 
1078 		GEM_BUG_ON(!i915_sw_fence_done(&req->execute));
1079 	}
1080 	GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
1081 	GEM_BUG_ON(!req->global_seqno);
1082 
1083 	/* Optimistic short spin before touching IRQs */
1084 	if (i915_spin_request(req, state, 5))
1085 		goto complete;
1086 
1087 	set_current_state(state);
1088 	if (flags & I915_WAIT_LOCKED)
1089 		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
1090 
1091 	intel_wait_init(&wait, req->global_seqno);
1092 	if (intel_engine_add_wait(req->engine, &wait))
1093 		/* In order to check that we haven't missed the interrupt
1094 		 * as we enabled it, we need to kick ourselves to do a
1095 		 * coherent check on the seqno before we sleep.
1096 		 */
1097 		goto wakeup;
1098 
1099 	for (;;) {
1100 		if (signal_pending_state(state, current)) {
1101 			timeout = -ERESTARTSYS;
1102 			break;
1103 		}
1104 
1105 		if (!timeout) {
1106 			timeout = -ETIME;
1107 			break;
1108 		}
1109 
1110 		timeout = io_schedule_timeout(timeout);
1111 
1112 		if (intel_wait_complete(&wait))
1113 			break;
1114 
1115 		set_current_state(state);
1116 
1117 wakeup:
1118 		/* Carefully check if the request is complete, giving time
1119 		 * for the seqno to be visible following the interrupt.
1120 		 * We also have to check in case we are kicked by the GPU
1121 		 * reset in order to drop the struct_mutex.
1122 		 */
1123 		if (__i915_request_irq_complete(req))
1124 			break;
1125 
1126 		/* If the GPU is hung, and we hold the lock, reset the GPU
1127 		 * and then check for completion. On a full reset, the engine's
1128 		 * HW seqno will be advanced passed us and we are complete.
1129 		 * If we do a partial reset, we have to wait for the GPU to
1130 		 * resume and update the breadcrumb.
1131 		 *
1132 		 * If we don't hold the mutex, we can just wait for the worker
1133 		 * to come along and update the breadcrumb (either directly
1134 		 * itself, or indirectly by recovering the GPU).
1135 		 */
1136 		if (flags & I915_WAIT_LOCKED &&
1137 		    i915_reset_in_progress(&req->i915->gpu_error)) {
1138 			__set_current_state(TASK_RUNNING);
1139 			i915_reset(req->i915);
1140 			reset_wait_queue(&req->i915->gpu_error.wait_queue,
1141 					 &reset);
1142 			continue;
1143 		}
1144 
1145 		/* Only spin if we know the GPU is processing this request */
1146 		if (i915_spin_request(req, state, 2))
1147 			break;
1148 	}
1149 
1150 	intel_engine_remove_wait(req->engine, &wait);
1151 	if (flags & I915_WAIT_LOCKED)
1152 		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
1153 	__set_current_state(TASK_RUNNING);
1154 
1155 complete:
1156 	trace_i915_gem_request_wait_end(req);
1157 
1158 	return timeout;
1159 }
1160 
1161 static void engine_retire_requests(struct intel_engine_cs *engine)
1162 {
1163 	struct drm_i915_gem_request *request, *next;
1164 
1165 	list_for_each_entry_safe(request, next,
1166 				 &engine->timeline->requests, link) {
1167 		if (!__i915_gem_request_completed(request))
1168 			return;
1169 
1170 		i915_gem_request_retire(request);
1171 	}
1172 }
1173 
1174 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
1175 {
1176 	struct intel_engine_cs *engine;
1177 	enum intel_engine_id id;
1178 
1179 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
1180 
1181 	if (!dev_priv->gt.active_requests)
1182 		return;
1183 
1184 	for_each_engine(engine, dev_priv, id)
1185 		engine_retire_requests(engine);
1186 }
1187