xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/i915/gt/intel_engine_pm.c (revision d56b2253bd9d3ee08c5923f8e140970123af0e94)
1 /*	$NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $	*/
2 
3 /*
4  * SPDX-License-Identifier: MIT
5  *
6  * Copyright © 2019 Intel Corporation
7  */
8 
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $");
11 
12 #include "i915_drv.h"
13 
14 #include "intel_context.h"
15 #include "intel_engine.h"
16 #include "intel_engine_heartbeat.h"
17 #include "intel_engine_pm.h"
18 #include "intel_engine_pool.h"
19 #include "intel_gt.h"
20 #include "intel_gt_pm.h"
21 #include "intel_rc6.h"
22 #include "intel_ring.h"
23 
__engine_unpark(struct intel_wakeref * wf)24 static int __engine_unpark(struct intel_wakeref *wf)
25 {
26 	struct intel_engine_cs *engine =
27 		container_of(wf, typeof(*engine), wakeref);
28 	struct intel_context *ce;
29 	void *map;
30 
31 	ENGINE_TRACE(engine, "\n");
32 
33 	intel_gt_pm_get(engine->gt);
34 
35 	/* Pin the default state for fast resets from atomic context. */
36 	map = NULL;
37 	if (engine->default_state)
38 		map = i915_gem_object_pin_map(engine->default_state,
39 					      I915_MAP_WB);
40 	if (!IS_ERR_OR_NULL(map))
41 		engine->pinned_default_state = map;
42 
43 	/* Discard stale context state from across idling */
44 	ce = engine->kernel_context;
45 	if (ce) {
46 		GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
47 
48 		/* First poison the image to verify we never fully trust it */
49 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
50 			struct drm_i915_gem_object *obj = ce->state->obj;
51 			int type = i915_coherent_map_type(engine->i915);
52 
53 			map = i915_gem_object_pin_map(obj, type);
54 			if (!IS_ERR(map)) {
55 				memset(map, CONTEXT_REDZONE, obj->base.size);
56 				i915_gem_object_flush_map(obj);
57 				i915_gem_object_unpin_map(obj);
58 			}
59 		}
60 
61 		ce->ops->reset(ce);
62 	}
63 
64 	if (engine->unpark)
65 		engine->unpark(engine);
66 
67 	intel_engine_unpark_heartbeat(engine);
68 	return 0;
69 }
70 
71 #if IS_ENABLED(CONFIG_LOCKDEP)
72 
__timeline_mark_lock(struct intel_context * ce)73 static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
74 {
75 	unsigned long flags;
76 
77 	local_irq_save(flags);
78 	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
79 
80 	return flags;
81 }
82 
__timeline_mark_unlock(struct intel_context * ce,unsigned long flags)83 static inline void __timeline_mark_unlock(struct intel_context *ce,
84 					  unsigned long flags)
85 {
86 	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
87 	local_irq_restore(flags);
88 }
89 
90 #else
91 
__timeline_mark_lock(struct intel_context * ce)92 static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
93 {
94 	return 0;
95 }
96 
__timeline_mark_unlock(struct intel_context * ce,unsigned long flags)97 static inline void __timeline_mark_unlock(struct intel_context *ce,
98 					  unsigned long flags)
99 {
100 }
101 
102 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
103 
duration(struct dma_fence * fence,struct dma_fence_cb * cb)104 static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
105 {
106 	struct i915_request *rq = to_request(fence);
107 
108 	ewma__engine_latency_add(&rq->engine->latency,
109 				 ktime_us_delta(rq->fence.timestamp,
110 						rq->duration.emitted));
111 }
112 
113 static void
__queue_and_release_pm(struct i915_request * rq,struct intel_timeline * tl,struct intel_engine_cs * engine)114 __queue_and_release_pm(struct i915_request *rq,
115 		       struct intel_timeline *tl,
116 		       struct intel_engine_cs *engine)
117 {
118 	struct intel_gt_timelines *timelines = &engine->gt->timelines;
119 
120 	ENGINE_TRACE(engine, "\n");
121 
122 	/*
123 	 * We have to serialise all potential retirement paths with our
124 	 * submission, as we don't want to underflow either the
125 	 * engine->wakeref.counter or our timeline->active_count.
126 	 *
127 	 * Equally, we cannot allow a new submission to start until
128 	 * after we finish queueing, nor could we allow that submitter
129 	 * to retire us before we are ready!
130 	 */
131 	spin_lock(&timelines->lock);
132 
133 	/* Let intel_gt_retire_requests() retire us (acquired under lock) */
134 	if (!atomic_fetch_inc(&tl->active_count))
135 		list_add_tail(&tl->link, &timelines->active_list);
136 
137 	/* Hand the request over to HW and so engine_retire() */
138 	__i915_request_queue(rq, NULL);
139 
140 	/* Let new submissions commence (and maybe retire this timeline) */
141 	__intel_wakeref_defer_park(&engine->wakeref);
142 
143 	spin_unlock(&timelines->lock);
144 }
145 
switch_to_kernel_context(struct intel_engine_cs * engine)146 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
147 {
148 	struct intel_context *ce = engine->kernel_context;
149 	struct i915_request *rq;
150 	unsigned long flags;
151 	bool result = true;
152 
153 	/* GPU is pointing to the void, as good as in the kernel context. */
154 	if (intel_gt_is_wedged(engine->gt))
155 		return true;
156 
157 	GEM_BUG_ON(!intel_context_is_barrier(ce));
158 
159 	/* Already inside the kernel context, safe to power down. */
160 	if (engine->wakeref_serial == engine->serial)
161 		return true;
162 
163 	/*
164 	 * Note, we do this without taking the timeline->mutex. We cannot
165 	 * as we may be called while retiring the kernel context and so
166 	 * already underneath the timeline->mutex. Instead we rely on the
167 	 * exclusive property of the __engine_park that prevents anyone
168 	 * else from creating a request on this engine. This also requires
169 	 * that the ring is empty and we avoid any waits while constructing
170 	 * the context, as they assume protection by the timeline->mutex.
171 	 * This should hold true as we can only park the engine after
172 	 * retiring the last request, thus all rings should be empty and
173 	 * all timelines idle.
174 	 *
175 	 * For unlocking, there are 2 other parties and the GPU who have a
176 	 * stake here.
177 	 *
178 	 * A new gpu user will be waiting on the engine-pm to start their
179 	 * engine_unpark. New waiters are predicated on engine->wakeref.count
180 	 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
181 	 * engine->wakeref.
182 	 *
183 	 * The other party is intel_gt_retire_requests(), which is walking the
184 	 * list of active timelines looking for completions. Meanwhile as soon
185 	 * as we call __i915_request_queue(), the GPU may complete our request.
186 	 * Ergo, if we put ourselves on the timelines.active_list
187 	 * (se intel_timeline_enter()) before we increment the
188 	 * engine->wakeref.count, we may see the request completion and retire
189 	 * it causing an undeflow of the engine->wakeref.
190 	 */
191 	flags = __timeline_mark_lock(ce);
192 	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
193 
194 	rq = __i915_request_create(ce, GFP_NOWAIT);
195 	if (IS_ERR(rq))
196 		/* Context switch failed, hope for the best! Maybe reset? */
197 		goto out_unlock;
198 
199 	/* Check again on the next retirement. */
200 	engine->wakeref_serial = engine->serial + 1;
201 	i915_request_add_active_barriers(rq);
202 
203 	/* Install ourselves as a preemption barrier */
204 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
205 	if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
206 		/*
207 		 * Use an interrupt for precise measurement of duration,
208 		 * otherwise we rely on someone else retiring all the requests
209 		 * which may delay the signaling (i.e. we will likely wait
210 		 * until the background request retirement running every
211 		 * second or two).
212 		 */
213 		dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
214 		rq->duration.emitted = ktime_get();
215 	}
216 
217 	/* Expose ourselves to the world */
218 	__queue_and_release_pm(rq, ce->timeline, engine);
219 
220 	result = false;
221 out_unlock:
222 	__timeline_mark_unlock(ce, flags);
223 	return result;
224 }
225 
call_idle_barriers(struct intel_engine_cs * engine)226 static void call_idle_barriers(struct intel_engine_cs *engine)
227 {
228 	struct llist_node *node, *next;
229 
230 	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
231 		struct i915_active_fence *fence =
232 		    container_of(node, struct i915_active_fence, llist);
233 
234 		fence->cb.func(ERR_PTR(-EAGAIN), &fence->cb);
235 	}
236 }
237 
__engine_park(struct intel_wakeref * wf)238 static int __engine_park(struct intel_wakeref *wf)
239 {
240 	struct intel_engine_cs *engine =
241 		container_of(wf, typeof(*engine), wakeref);
242 
243 	engine->saturated = 0;
244 
245 	/*
246 	 * If one and only one request is completed between pm events,
247 	 * we know that we are inside the kernel context and it is
248 	 * safe to power down. (We are paranoid in case that runtime
249 	 * suspend causes corruption to the active context image, and
250 	 * want to avoid that impacting userspace.)
251 	 */
252 	if (!switch_to_kernel_context(engine))
253 		return -EBUSY;
254 
255 	ENGINE_TRACE(engine, "\n");
256 
257 	call_idle_barriers(engine); /* cleanup after wedging */
258 
259 	intel_engine_park_heartbeat(engine);
260 	intel_engine_disarm_breadcrumbs(engine);
261 	intel_engine_pool_park(&engine->pool);
262 
263 	/* Must be reset upon idling, or we may miss the busy wakeup. */
264 	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
265 
266 	if (engine->park)
267 		engine->park(engine);
268 
269 	if (engine->pinned_default_state) {
270 		i915_gem_object_unpin_map(engine->default_state);
271 		engine->pinned_default_state = NULL;
272 	}
273 
274 	engine->execlists.no_priolist = false;
275 
276 	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
277 	intel_gt_pm_put_async(engine->gt);
278 	return 0;
279 }
280 
281 static const struct intel_wakeref_ops wf_ops = {
282 	.get = __engine_unpark,
283 	.put = __engine_park,
284 };
285 
intel_engine_init__pm(struct intel_engine_cs * engine)286 void intel_engine_init__pm(struct intel_engine_cs *engine)
287 {
288 	struct intel_runtime_pm *rpm = engine->uncore->rpm;
289 
290 	intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
291 	intel_engine_init_heartbeat(engine);
292 }
293 
294 void
intel_engine_fini__pm(struct intel_engine_cs * engine)295 intel_engine_fini__pm(struct intel_engine_cs *engine)
296 {
297 
298 	intel_wakeref_fini(&engine->wakeref);
299 }
300 
301 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
302 #include "selftest_engine_pm.c"
303 #endif
304