1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2015-2021 Intel Corporation
4 */
5
6 #include <linux/kthread.h>
7 #include <linux/string_helpers.h>
8 #include <trace/events/dma_fence.h>
9 #ifdef notyet
10 #include <uapi/linux/sched/types.h>
11 #endif
12
13 #include "i915_drv.h"
14 #include "i915_trace.h"
15 #include "intel_breadcrumbs.h"
16 #include "intel_context.h"
17 #include "intel_engine_pm.h"
18 #include "intel_gt_pm.h"
19 #include "intel_gt_requests.h"
20
irq_enable(struct intel_breadcrumbs * b)21 static bool irq_enable(struct intel_breadcrumbs *b)
22 {
23 return intel_engine_irq_enable(b->irq_engine);
24 }
25
irq_disable(struct intel_breadcrumbs * b)26 static void irq_disable(struct intel_breadcrumbs *b)
27 {
28 intel_engine_irq_disable(b->irq_engine);
29 }
30
__intel_breadcrumbs_arm_irq(struct intel_breadcrumbs * b)31 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
32 {
33 /*
34 * Since we are waiting on a request, the GPU should be busy
35 * and should have its own rpm reference.
36 */
37 if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
38 return;
39
40 /*
41 * The breadcrumb irq will be disarmed on the interrupt after the
42 * waiters are signaled. This gives us a single interrupt window in
43 * which we can add a new waiter and avoid the cost of re-enabling
44 * the irq.
45 */
46 WRITE_ONCE(b->irq_armed, true);
47
48 /* Requests may have completed before we could enable the interrupt. */
49 if (!b->irq_enabled++ && b->irq_enable(b))
50 irq_work_queue(&b->irq_work);
51 }
52
intel_breadcrumbs_arm_irq(struct intel_breadcrumbs * b)53 static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
54 {
55 if (!b->irq_engine)
56 return;
57
58 spin_lock(&b->irq_lock);
59 if (!b->irq_armed)
60 __intel_breadcrumbs_arm_irq(b);
61 spin_unlock(&b->irq_lock);
62 }
63
__intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs * b)64 static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
65 {
66 GEM_BUG_ON(!b->irq_enabled);
67 if (!--b->irq_enabled)
68 b->irq_disable(b);
69
70 WRITE_ONCE(b->irq_armed, false);
71 intel_gt_pm_put_async(b->irq_engine->gt);
72 }
73
intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs * b)74 static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
75 {
76 spin_lock(&b->irq_lock);
77 if (b->irq_armed)
78 __intel_breadcrumbs_disarm_irq(b);
79 spin_unlock(&b->irq_lock);
80 }
81
add_signaling_context(struct intel_breadcrumbs * b,struct intel_context * ce)82 static void add_signaling_context(struct intel_breadcrumbs *b,
83 struct intel_context *ce)
84 {
85 lockdep_assert_held(&ce->signal_lock);
86
87 spin_lock(&b->signalers_lock);
88 list_add_rcu(&ce->signal_link, &b->signalers);
89 spin_unlock(&b->signalers_lock);
90 }
91
remove_signaling_context(struct intel_breadcrumbs * b,struct intel_context * ce)92 static bool remove_signaling_context(struct intel_breadcrumbs *b,
93 struct intel_context *ce)
94 {
95 lockdep_assert_held(&ce->signal_lock);
96
97 if (!list_empty(&ce->signals))
98 return false;
99
100 spin_lock(&b->signalers_lock);
101 list_del_rcu(&ce->signal_link);
102 spin_unlock(&b->signalers_lock);
103
104 return true;
105 }
106
107 __maybe_unused static bool
check_signal_order(struct intel_context * ce,struct i915_request * rq)108 check_signal_order(struct intel_context *ce, struct i915_request *rq)
109 {
110 if (rq->context != ce)
111 return false;
112
113 if (!list_is_last(&rq->signal_link, &ce->signals) &&
114 i915_seqno_passed(rq->fence.seqno,
115 list_next_entry(rq, signal_link)->fence.seqno))
116 return false;
117
118 if (!list_is_first(&rq->signal_link, &ce->signals) &&
119 i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
120 rq->fence.seqno))
121 return false;
122
123 return true;
124 }
125
126 static bool
__dma_fence_signal(struct dma_fence * fence)127 __dma_fence_signal(struct dma_fence *fence)
128 {
129 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
130 }
131
132 static void
__dma_fence_signal__timestamp(struct dma_fence * fence,ktime_t timestamp)133 __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
134 {
135 fence->timestamp = timestamp;
136 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
137 trace_dma_fence_signaled(fence);
138 }
139
140 static void
__dma_fence_signal__notify(struct dma_fence * fence,const struct list_head * list)141 __dma_fence_signal__notify(struct dma_fence *fence,
142 const struct list_head *list)
143 {
144 struct dma_fence_cb *cur, *tmp;
145
146 lockdep_assert_held(fence->lock);
147
148 list_for_each_entry_safe(cur, tmp, list, node) {
149 INIT_LIST_HEAD(&cur->node);
150 cur->func(fence, cur);
151 }
152 }
153
add_retire(struct intel_breadcrumbs * b,struct intel_timeline * tl)154 static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
155 {
156 if (b->irq_engine)
157 intel_engine_add_retire(b->irq_engine, tl);
158 }
159
160 static struct llist_node *
slist_add(struct llist_node * node,struct llist_node * head)161 slist_add(struct llist_node *node, struct llist_node *head)
162 {
163 node->next = head;
164 return node;
165 }
166
signal_irq_work(struct irq_work * work)167 static void signal_irq_work(struct irq_work *work)
168 {
169 struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
170 const ktime_t timestamp = ktime_get();
171 struct llist_node *signal, *sn;
172 struct intel_context *ce;
173
174 signal = NULL;
175 if (unlikely(!llist_empty(&b->signaled_requests)))
176 signal = llist_del_all(&b->signaled_requests);
177
178 /*
179 * Keep the irq armed until the interrupt after all listeners are gone.
180 *
181 * Enabling/disabling the interrupt is rather costly, roughly a couple
182 * of hundred microseconds. If we are proactive and enable/disable
183 * the interrupt around every request that wants a breadcrumb, we
184 * quickly drown in the extra orders of magnitude of latency imposed
185 * on request submission.
186 *
187 * So we try to be lazy, and keep the interrupts enabled until no
188 * more listeners appear within a breadcrumb interrupt interval (that
189 * is until a request completes that no one cares about). The
190 * observation is that listeners come in batches, and will often
191 * listen to a bunch of requests in succession. Though note on icl+,
192 * interrupts are always enabled due to concerns with rc6 being
193 * dysfunctional with per-engine interrupt masking.
194 *
195 * We also try to avoid raising too many interrupts, as they may
196 * be generated by userspace batches and it is unfortunately rather
197 * too easy to drown the CPU under a flood of GPU interrupts. Thus
198 * whenever no one appears to be listening, we turn off the interrupts.
199 * Fewer interrupts should conserve power -- at the very least, fewer
200 * interrupt draw less ire from other users of the system and tools
201 * like powertop.
202 */
203 if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
204 intel_breadcrumbs_disarm_irq(b);
205
206 rcu_read_lock();
207 atomic_inc(&b->signaler_active);
208 list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
209 struct i915_request *rq;
210
211 list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
212 bool release;
213
214 if (!__i915_request_is_complete(rq))
215 break;
216
217 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
218 &rq->fence.flags))
219 break;
220
221 /*
222 * Queue for execution after dropping the signaling
223 * spinlock as the callback chain may end up adding
224 * more signalers to the same context or engine.
225 */
226 spin_lock(&ce->signal_lock);
227 list_del_rcu(&rq->signal_link);
228 release = remove_signaling_context(b, ce);
229 spin_unlock(&ce->signal_lock);
230 if (release) {
231 if (intel_timeline_is_last(ce->timeline, rq))
232 add_retire(b, ce->timeline);
233 intel_context_put(ce);
234 }
235
236 if (__dma_fence_signal(&rq->fence))
237 /* We own signal_node now, xfer to local list */
238 signal = slist_add(&rq->signal_node, signal);
239 else
240 i915_request_put(rq);
241 }
242 }
243 atomic_dec(&b->signaler_active);
244 rcu_read_unlock();
245
246 llist_for_each_safe(signal, sn, signal) {
247 struct i915_request *rq =
248 llist_entry(signal, typeof(*rq), signal_node);
249 struct list_head cb_list;
250
251 if (rq->engine->sched_engine->retire_inflight_request_prio)
252 rq->engine->sched_engine->retire_inflight_request_prio(rq);
253
254 spin_lock(&rq->lock);
255 list_replace(&rq->fence.cb_list, &cb_list);
256 __dma_fence_signal__timestamp(&rq->fence, timestamp);
257 __dma_fence_signal__notify(&rq->fence, &cb_list);
258 spin_unlock(&rq->lock);
259
260 i915_request_put(rq);
261 }
262
263 /* Lazy irq enabling after HW submission */
264 if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
265 intel_breadcrumbs_arm_irq(b);
266
267 /* And confirm that we still want irqs enabled before we yield */
268 if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active))
269 intel_breadcrumbs_disarm_irq(b);
270 }
271
272 struct intel_breadcrumbs *
intel_breadcrumbs_create(struct intel_engine_cs * irq_engine)273 intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
274 {
275 struct intel_breadcrumbs *b;
276
277 b = kzalloc(sizeof(*b), GFP_KERNEL);
278 if (!b)
279 return NULL;
280
281 kref_init(&b->ref);
282
283 mtx_init(&b->signalers_lock, IPL_TTY);
284 INIT_LIST_HEAD(&b->signalers);
285 init_llist_head(&b->signaled_requests);
286
287 mtx_init(&b->irq_lock, IPL_TTY);
288 init_irq_work(&b->irq_work, signal_irq_work);
289
290 b->irq_engine = irq_engine;
291 b->irq_enable = irq_enable;
292 b->irq_disable = irq_disable;
293
294 return b;
295 }
296
intel_breadcrumbs_reset(struct intel_breadcrumbs * b)297 void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
298 {
299 unsigned long flags;
300
301 if (!b->irq_engine)
302 return;
303
304 spin_lock_irqsave(&b->irq_lock, flags);
305
306 if (b->irq_enabled)
307 b->irq_enable(b);
308 else
309 b->irq_disable(b);
310
311 spin_unlock_irqrestore(&b->irq_lock, flags);
312 }
313
__intel_breadcrumbs_park(struct intel_breadcrumbs * b)314 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
315 {
316 if (!READ_ONCE(b->irq_armed))
317 return;
318
319 /* Kick the work once more to drain the signalers, and disarm the irq */
320 irq_work_queue(&b->irq_work);
321 }
322
intel_breadcrumbs_free(struct kref * kref)323 void intel_breadcrumbs_free(struct kref *kref)
324 {
325 struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref);
326
327 irq_work_sync(&b->irq_work);
328 GEM_BUG_ON(!list_empty(&b->signalers));
329 GEM_BUG_ON(b->irq_armed);
330
331 kfree(b);
332 }
333
irq_signal_request(struct i915_request * rq,struct intel_breadcrumbs * b)334 static void irq_signal_request(struct i915_request *rq,
335 struct intel_breadcrumbs *b)
336 {
337 if (!__dma_fence_signal(&rq->fence))
338 return;
339
340 i915_request_get(rq);
341 if (llist_add(&rq->signal_node, &b->signaled_requests))
342 irq_work_queue(&b->irq_work);
343 }
344
insert_breadcrumb(struct i915_request * rq)345 static void insert_breadcrumb(struct i915_request *rq)
346 {
347 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
348 struct intel_context *ce = rq->context;
349 struct list_head *pos;
350
351 if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
352 return;
353
354 /*
355 * If the request is already completed, we can transfer it
356 * straight onto a signaled list, and queue the irq worker for
357 * its signal completion.
358 */
359 if (__i915_request_is_complete(rq)) {
360 irq_signal_request(rq, b);
361 return;
362 }
363
364 if (list_empty(&ce->signals)) {
365 intel_context_get(ce);
366 add_signaling_context(b, ce);
367 pos = &ce->signals;
368 } else {
369 /*
370 * We keep the seqno in retirement order, so we can break
371 * inside intel_engine_signal_breadcrumbs as soon as we've
372 * passed the last completed request (or seen a request that
373 * hasn't event started). We could walk the timeline->requests,
374 * but keeping a separate signalers_list has the advantage of
375 * hopefully being much smaller than the full list and so
376 * provides faster iteration and detection when there are no
377 * more interrupts required for this context.
378 *
379 * We typically expect to add new signalers in order, so we
380 * start looking for our insertion point from the tail of
381 * the list.
382 */
383 list_for_each_prev(pos, &ce->signals) {
384 struct i915_request *it =
385 list_entry(pos, typeof(*it), signal_link);
386
387 if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
388 break;
389 }
390 }
391
392 i915_request_get(rq);
393 list_add_rcu(&rq->signal_link, pos);
394 GEM_BUG_ON(!check_signal_order(ce, rq));
395 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
396 set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
397
398 /*
399 * Defer enabling the interrupt to after HW submission and recheck
400 * the request as it may have completed and raised the interrupt as
401 * we were attaching it into the lists.
402 */
403 if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq))
404 irq_work_queue(&b->irq_work);
405 }
406
i915_request_enable_breadcrumb(struct i915_request * rq)407 bool i915_request_enable_breadcrumb(struct i915_request *rq)
408 {
409 struct intel_context *ce = rq->context;
410
411 /* Serialises with i915_request_retire() using rq->lock */
412 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
413 return true;
414
415 /*
416 * Peek at i915_request_submit()/i915_request_unsubmit() status.
417 *
418 * If the request is not yet active (and not signaled), we will
419 * attach the breadcrumb later.
420 */
421 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
422 return true;
423
424 spin_lock(&ce->signal_lock);
425 if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
426 insert_breadcrumb(rq);
427 spin_unlock(&ce->signal_lock);
428
429 return true;
430 }
431
i915_request_cancel_breadcrumb(struct i915_request * rq)432 void i915_request_cancel_breadcrumb(struct i915_request *rq)
433 {
434 struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
435 struct intel_context *ce = rq->context;
436 bool release;
437
438 spin_lock(&ce->signal_lock);
439 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
440 spin_unlock(&ce->signal_lock);
441 return;
442 }
443
444 list_del_rcu(&rq->signal_link);
445 release = remove_signaling_context(b, ce);
446 spin_unlock(&ce->signal_lock);
447 if (release)
448 intel_context_put(ce);
449
450 if (__i915_request_is_complete(rq))
451 irq_signal_request(rq, b);
452
453 i915_request_put(rq);
454 }
455
intel_context_remove_breadcrumbs(struct intel_context * ce,struct intel_breadcrumbs * b)456 void intel_context_remove_breadcrumbs(struct intel_context *ce,
457 struct intel_breadcrumbs *b)
458 {
459 struct i915_request *rq, *rn;
460 bool release = false;
461 unsigned long flags;
462
463 spin_lock_irqsave(&ce->signal_lock, flags);
464
465 if (list_empty(&ce->signals))
466 goto unlock;
467
468 list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
469 GEM_BUG_ON(!__i915_request_is_complete(rq));
470 if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
471 &rq->fence.flags))
472 continue;
473
474 list_del_rcu(&rq->signal_link);
475 irq_signal_request(rq, b);
476 i915_request_put(rq);
477 }
478 release = remove_signaling_context(b, ce);
479
480 unlock:
481 spin_unlock_irqrestore(&ce->signal_lock, flags);
482 if (release)
483 intel_context_put(ce);
484
485 while (atomic_read(&b->signaler_active))
486 cpu_relax();
487 }
488
print_signals(struct intel_breadcrumbs * b,struct drm_printer * p)489 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
490 {
491 struct intel_context *ce;
492 struct i915_request *rq;
493
494 drm_printf(p, "Signals:\n");
495
496 rcu_read_lock();
497 list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
498 list_for_each_entry_rcu(rq, &ce->signals, signal_link)
499 drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
500 rq->fence.context, rq->fence.seqno,
501 __i915_request_is_complete(rq) ? "!" :
502 __i915_request_has_started(rq) ? "*" :
503 "",
504 jiffies_to_msecs(jiffies - rq->emitted_jiffies));
505 }
506 rcu_read_unlock();
507 }
508
intel_engine_print_breadcrumbs(struct intel_engine_cs * engine,struct drm_printer * p)509 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
510 struct drm_printer *p)
511 {
512 struct intel_breadcrumbs *b;
513
514 b = engine->breadcrumbs;
515 if (!b)
516 return;
517
518 drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed));
519 if (!list_empty(&b->signalers))
520 print_signals(b, p);
521 }
522