1 /* $NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright © 2019 Intel Corporation
7 */
8
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $");
11
12 #include "i915_drv.h"
13
14 #include "intel_context.h"
15 #include "intel_engine.h"
16 #include "intel_engine_heartbeat.h"
17 #include "intel_engine_pm.h"
18 #include "intel_engine_pool.h"
19 #include "intel_gt.h"
20 #include "intel_gt_pm.h"
21 #include "intel_rc6.h"
22 #include "intel_ring.h"
23
__engine_unpark(struct intel_wakeref * wf)24 static int __engine_unpark(struct intel_wakeref *wf)
25 {
26 struct intel_engine_cs *engine =
27 container_of(wf, typeof(*engine), wakeref);
28 struct intel_context *ce;
29 void *map;
30
31 ENGINE_TRACE(engine, "\n");
32
33 intel_gt_pm_get(engine->gt);
34
35 /* Pin the default state for fast resets from atomic context. */
36 map = NULL;
37 if (engine->default_state)
38 map = i915_gem_object_pin_map(engine->default_state,
39 I915_MAP_WB);
40 if (!IS_ERR_OR_NULL(map))
41 engine->pinned_default_state = map;
42
43 /* Discard stale context state from across idling */
44 ce = engine->kernel_context;
45 if (ce) {
46 GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
47
48 /* First poison the image to verify we never fully trust it */
49 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
50 struct drm_i915_gem_object *obj = ce->state->obj;
51 int type = i915_coherent_map_type(engine->i915);
52
53 map = i915_gem_object_pin_map(obj, type);
54 if (!IS_ERR(map)) {
55 memset(map, CONTEXT_REDZONE, obj->base.size);
56 i915_gem_object_flush_map(obj);
57 i915_gem_object_unpin_map(obj);
58 }
59 }
60
61 ce->ops->reset(ce);
62 }
63
64 if (engine->unpark)
65 engine->unpark(engine);
66
67 intel_engine_unpark_heartbeat(engine);
68 return 0;
69 }
70
71 #if IS_ENABLED(CONFIG_LOCKDEP)
72
__timeline_mark_lock(struct intel_context * ce)73 static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
74 {
75 unsigned long flags;
76
77 local_irq_save(flags);
78 mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
79
80 return flags;
81 }
82
__timeline_mark_unlock(struct intel_context * ce,unsigned long flags)83 static inline void __timeline_mark_unlock(struct intel_context *ce,
84 unsigned long flags)
85 {
86 mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
87 local_irq_restore(flags);
88 }
89
90 #else
91
__timeline_mark_lock(struct intel_context * ce)92 static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
93 {
94 return 0;
95 }
96
__timeline_mark_unlock(struct intel_context * ce,unsigned long flags)97 static inline void __timeline_mark_unlock(struct intel_context *ce,
98 unsigned long flags)
99 {
100 }
101
102 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
103
duration(struct dma_fence * fence,struct dma_fence_cb * cb)104 static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
105 {
106 struct i915_request *rq = to_request(fence);
107
108 ewma__engine_latency_add(&rq->engine->latency,
109 ktime_us_delta(rq->fence.timestamp,
110 rq->duration.emitted));
111 }
112
113 static void
__queue_and_release_pm(struct i915_request * rq,struct intel_timeline * tl,struct intel_engine_cs * engine)114 __queue_and_release_pm(struct i915_request *rq,
115 struct intel_timeline *tl,
116 struct intel_engine_cs *engine)
117 {
118 struct intel_gt_timelines *timelines = &engine->gt->timelines;
119
120 ENGINE_TRACE(engine, "\n");
121
122 /*
123 * We have to serialise all potential retirement paths with our
124 * submission, as we don't want to underflow either the
125 * engine->wakeref.counter or our timeline->active_count.
126 *
127 * Equally, we cannot allow a new submission to start until
128 * after we finish queueing, nor could we allow that submitter
129 * to retire us before we are ready!
130 */
131 spin_lock(&timelines->lock);
132
133 /* Let intel_gt_retire_requests() retire us (acquired under lock) */
134 if (!atomic_fetch_inc(&tl->active_count))
135 list_add_tail(&tl->link, &timelines->active_list);
136
137 /* Hand the request over to HW and so engine_retire() */
138 __i915_request_queue(rq, NULL);
139
140 /* Let new submissions commence (and maybe retire this timeline) */
141 __intel_wakeref_defer_park(&engine->wakeref);
142
143 spin_unlock(&timelines->lock);
144 }
145
switch_to_kernel_context(struct intel_engine_cs * engine)146 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
147 {
148 struct intel_context *ce = engine->kernel_context;
149 struct i915_request *rq;
150 unsigned long flags;
151 bool result = true;
152
153 /* GPU is pointing to the void, as good as in the kernel context. */
154 if (intel_gt_is_wedged(engine->gt))
155 return true;
156
157 GEM_BUG_ON(!intel_context_is_barrier(ce));
158
159 /* Already inside the kernel context, safe to power down. */
160 if (engine->wakeref_serial == engine->serial)
161 return true;
162
163 /*
164 * Note, we do this without taking the timeline->mutex. We cannot
165 * as we may be called while retiring the kernel context and so
166 * already underneath the timeline->mutex. Instead we rely on the
167 * exclusive property of the __engine_park that prevents anyone
168 * else from creating a request on this engine. This also requires
169 * that the ring is empty and we avoid any waits while constructing
170 * the context, as they assume protection by the timeline->mutex.
171 * This should hold true as we can only park the engine after
172 * retiring the last request, thus all rings should be empty and
173 * all timelines idle.
174 *
175 * For unlocking, there are 2 other parties and the GPU who have a
176 * stake here.
177 *
178 * A new gpu user will be waiting on the engine-pm to start their
179 * engine_unpark. New waiters are predicated on engine->wakeref.count
180 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
181 * engine->wakeref.
182 *
183 * The other party is intel_gt_retire_requests(), which is walking the
184 * list of active timelines looking for completions. Meanwhile as soon
185 * as we call __i915_request_queue(), the GPU may complete our request.
186 * Ergo, if we put ourselves on the timelines.active_list
187 * (se intel_timeline_enter()) before we increment the
188 * engine->wakeref.count, we may see the request completion and retire
189 * it causing an undeflow of the engine->wakeref.
190 */
191 flags = __timeline_mark_lock(ce);
192 GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
193
194 rq = __i915_request_create(ce, GFP_NOWAIT);
195 if (IS_ERR(rq))
196 /* Context switch failed, hope for the best! Maybe reset? */
197 goto out_unlock;
198
199 /* Check again on the next retirement. */
200 engine->wakeref_serial = engine->serial + 1;
201 i915_request_add_active_barriers(rq);
202
203 /* Install ourselves as a preemption barrier */
204 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
205 if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
206 /*
207 * Use an interrupt for precise measurement of duration,
208 * otherwise we rely on someone else retiring all the requests
209 * which may delay the signaling (i.e. we will likely wait
210 * until the background request retirement running every
211 * second or two).
212 */
213 dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
214 rq->duration.emitted = ktime_get();
215 }
216
217 /* Expose ourselves to the world */
218 __queue_and_release_pm(rq, ce->timeline, engine);
219
220 result = false;
221 out_unlock:
222 __timeline_mark_unlock(ce, flags);
223 return result;
224 }
225
call_idle_barriers(struct intel_engine_cs * engine)226 static void call_idle_barriers(struct intel_engine_cs *engine)
227 {
228 struct llist_node *node, *next;
229
230 llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
231 struct i915_active_fence *fence =
232 container_of(node, struct i915_active_fence, llist);
233
234 fence->cb.func(ERR_PTR(-EAGAIN), &fence->cb);
235 }
236 }
237
__engine_park(struct intel_wakeref * wf)238 static int __engine_park(struct intel_wakeref *wf)
239 {
240 struct intel_engine_cs *engine =
241 container_of(wf, typeof(*engine), wakeref);
242
243 engine->saturated = 0;
244
245 /*
246 * If one and only one request is completed between pm events,
247 * we know that we are inside the kernel context and it is
248 * safe to power down. (We are paranoid in case that runtime
249 * suspend causes corruption to the active context image, and
250 * want to avoid that impacting userspace.)
251 */
252 if (!switch_to_kernel_context(engine))
253 return -EBUSY;
254
255 ENGINE_TRACE(engine, "\n");
256
257 call_idle_barriers(engine); /* cleanup after wedging */
258
259 intel_engine_park_heartbeat(engine);
260 intel_engine_disarm_breadcrumbs(engine);
261 intel_engine_pool_park(&engine->pool);
262
263 /* Must be reset upon idling, or we may miss the busy wakeup. */
264 GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
265
266 if (engine->park)
267 engine->park(engine);
268
269 if (engine->pinned_default_state) {
270 i915_gem_object_unpin_map(engine->default_state);
271 engine->pinned_default_state = NULL;
272 }
273
274 engine->execlists.no_priolist = false;
275
276 /* While gt calls i915_vma_parked(), we have to break the lock cycle */
277 intel_gt_pm_put_async(engine->gt);
278 return 0;
279 }
280
281 static const struct intel_wakeref_ops wf_ops = {
282 .get = __engine_unpark,
283 .put = __engine_park,
284 };
285
intel_engine_init__pm(struct intel_engine_cs * engine)286 void intel_engine_init__pm(struct intel_engine_cs *engine)
287 {
288 struct intel_runtime_pm *rpm = engine->uncore->rpm;
289
290 intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
291 intel_engine_init_heartbeat(engine);
292 }
293
294 void
intel_engine_fini__pm(struct intel_engine_cs * engine)295 intel_engine_fini__pm(struct intel_engine_cs *engine)
296 {
297
298 intel_wakeref_fini(&engine->wakeref);
299 }
300
301 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
302 #include "selftest_engine_pm.c"
303 #endif
304