xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/i915/gt/intel_engine_heartbeat.c (revision 39a22047b841baa5cf026d864cb972f191501a6d)
1 /*	$NetBSD: intel_engine_heartbeat.c,v 1.3 2021/12/19 11:38:37 riastradh Exp $	*/
2 
3 /*
4  * SPDX-License-Identifier: MIT
5  *
6  * Copyright © 2019 Intel Corporation
7  */
8 
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: intel_engine_heartbeat.c,v 1.3 2021/12/19 11:38:37 riastradh Exp $");
11 
12 #include "i915_request.h"
13 
14 #include "intel_context.h"
15 #include "intel_engine_heartbeat.h"
16 #include "intel_engine_pm.h"
17 #include "intel_engine.h"
18 #include "intel_gt.h"
19 #include "intel_reset.h"
20 
21 /*
22  * While the engine is active, we send a periodic pulse along the engine
23  * to check on its health and to flush any idle-barriers. If that request
24  * is stuck, and we fail to preempt it, we declare the engine hung and
25  * issue a reset -- in the hope that restores progress.
26  */
27 
next_heartbeat(struct intel_engine_cs * engine)28 static bool next_heartbeat(struct intel_engine_cs *engine)
29 {
30 	long delay;
31 
32 	delay = READ_ONCE(engine->props.heartbeat_interval_ms);
33 	if (!delay)
34 		return false;
35 
36 	delay = msecs_to_jiffies_timeout(delay);
37 	if (delay >= HZ)
38 		delay = round_jiffies_up_relative(delay);
39 	schedule_delayed_work(&engine->heartbeat.work, delay);
40 
41 	return true;
42 }
43 
idle_pulse(struct intel_engine_cs * engine,struct i915_request * rq)44 static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
45 {
46 	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
47 	i915_request_add_active_barriers(rq);
48 }
49 
show_heartbeat(const struct i915_request * rq,struct intel_engine_cs * engine)50 static void show_heartbeat(const struct i915_request *rq,
51 			   struct intel_engine_cs *engine)
52 {
53 	struct drm_printer p = drm_debug_printer("heartbeat");
54 
55 	intel_engine_dump(engine, &p,
56 			  "%s heartbeat {prio:%d} not ticking\n",
57 			  engine->name,
58 			  rq->sched.attr.priority);
59 }
60 
heartbeat(struct work_struct * wrk)61 static void heartbeat(struct work_struct *wrk)
62 {
63 	struct i915_sched_attr attr = {
64 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
65 	};
66 	struct intel_engine_cs *engine =
67 		container_of(wrk, typeof(*engine), heartbeat.work.work);
68 	struct intel_context *ce = engine->kernel_context;
69 	struct i915_request *rq;
70 
71 	rq = engine->heartbeat.systole;
72 	if (rq && i915_request_completed(rq)) {
73 		i915_request_put(rq);
74 		engine->heartbeat.systole = NULL;
75 	}
76 
77 	if (!intel_engine_pm_get_if_awake(engine))
78 		return;
79 
80 	if (intel_gt_is_wedged(engine->gt))
81 		goto out;
82 
83 	if (engine->heartbeat.systole) {
84 		if (engine->schedule &&
85 		    rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
86 			/*
87 			 * Gradually raise the priority of the heartbeat to
88 			 * give high priority work [which presumably desires
89 			 * low latency and no jitter] the chance to naturally
90 			 * complete before being preempted.
91 			 */
92 			attr.priority = I915_PRIORITY_MASK;
93 			if (rq->sched.attr.priority >= attr.priority)
94 				attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
95 			if (rq->sched.attr.priority >= attr.priority)
96 				attr.priority = I915_PRIORITY_BARRIER;
97 
98 #ifdef __NetBSD__
99 			int s = splsoftserial();
100 #else
101 			local_bh_disable();
102 #endif
103 			engine->schedule(rq, &attr);
104 #ifdef __NetBSD__
105 			splx(s);
106 #else
107 			local_bh_enable();
108 #endif
109 		} else {
110 			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
111 				show_heartbeat(rq, engine);
112 
113 			intel_gt_handle_error(engine->gt, engine->mask,
114 					      I915_ERROR_CAPTURE,
115 					      "stopped heartbeat on %s",
116 					      engine->name);
117 		}
118 		goto out;
119 	}
120 
121 	if (engine->wakeref_serial == engine->serial)
122 		goto out;
123 
124 	mutex_lock(&ce->timeline->mutex);
125 
126 	intel_context_enter(ce);
127 	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
128 	intel_context_exit(ce);
129 	if (IS_ERR(rq))
130 		goto unlock;
131 
132 	idle_pulse(engine, rq);
133 	if (i915_modparams.enable_hangcheck)
134 		engine->heartbeat.systole = i915_request_get(rq);
135 
136 	__i915_request_commit(rq);
137 	__i915_request_queue(rq, &attr);
138 
139 unlock:
140 	mutex_unlock(&ce->timeline->mutex);
141 out:
142 	if (!next_heartbeat(engine))
143 		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
144 	intel_engine_pm_put(engine);
145 }
146 
intel_engine_unpark_heartbeat(struct intel_engine_cs * engine)147 void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
148 {
149 	if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
150 		return;
151 
152 	next_heartbeat(engine);
153 }
154 
intel_engine_park_heartbeat(struct intel_engine_cs * engine)155 void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
156 {
157 	if (cancel_delayed_work(&engine->heartbeat.work))
158 		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
159 }
160 
intel_engine_init_heartbeat(struct intel_engine_cs * engine)161 void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
162 {
163 	INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
164 }
165 
intel_engine_set_heartbeat(struct intel_engine_cs * engine,unsigned long delay)166 int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
167 			       unsigned long delay)
168 {
169 	int err;
170 
171 	/* Send one last pulse before to cleanup persistent hogs */
172 	if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
173 		err = intel_engine_pulse(engine);
174 		if (err)
175 			return err;
176 	}
177 
178 	WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
179 
180 	if (intel_engine_pm_get_if_awake(engine)) {
181 		if (delay)
182 			intel_engine_unpark_heartbeat(engine);
183 		else
184 			intel_engine_park_heartbeat(engine);
185 		intel_engine_pm_put(engine);
186 	}
187 
188 	return 0;
189 }
190 
intel_engine_pulse(struct intel_engine_cs * engine)191 int intel_engine_pulse(struct intel_engine_cs *engine)
192 {
193 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
194 	struct intel_context *ce = engine->kernel_context;
195 	struct i915_request *rq;
196 	int err = 0;
197 
198 	if (!intel_engine_has_preemption(engine))
199 		return -ENODEV;
200 
201 	if (!intel_engine_pm_get_if_awake(engine))
202 		return 0;
203 
204 	if (mutex_lock_interruptible(&ce->timeline->mutex))
205 		goto out_rpm;
206 
207 	intel_context_enter(ce);
208 	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
209 	intel_context_exit(ce);
210 	if (IS_ERR(rq)) {
211 		err = PTR_ERR(rq);
212 		goto out_unlock;
213 	}
214 
215 	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
216 	idle_pulse(engine, rq);
217 
218 	__i915_request_commit(rq);
219 	__i915_request_queue(rq, &attr);
220 
221 out_unlock:
222 	mutex_unlock(&ce->timeline->mutex);
223 out_rpm:
224 	intel_engine_pm_put(engine);
225 	return err;
226 }
227 
intel_engine_flush_barriers(struct intel_engine_cs * engine)228 int intel_engine_flush_barriers(struct intel_engine_cs *engine)
229 {
230 	struct i915_request *rq;
231 	int err = 0;
232 
233 	if (llist_empty(&engine->barrier_tasks))
234 		return 0;
235 
236 	if (!intel_engine_pm_get_if_awake(engine))
237 		return 0;
238 
239 	rq = i915_request_create(engine->kernel_context);
240 	if (IS_ERR(rq)) {
241 		err = PTR_ERR(rq);
242 		goto out_rpm;
243 	}
244 
245 	idle_pulse(engine, rq);
246 	i915_request_add(rq);
247 
248 out_rpm:
249 	intel_engine_pm_put(engine);
250 	return err;
251 }
252 
253 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
254 #include "selftest_engine_heartbeat.c"
255 #endif
256