1 /* $NetBSD: selftest_engine_heartbeat.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright © 2018 Intel Corporation
7 */
8
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: selftest_engine_heartbeat.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
11
12 #include <linux/sort.h>
13
14 #include "i915_drv.h"
15
16 #include "intel_gt_requests.h"
17 #include "i915_selftest.h"
18
timeline_sync(struct intel_timeline * tl)19 static int timeline_sync(struct intel_timeline *tl)
20 {
21 struct dma_fence *fence;
22 long timeout;
23
24 fence = i915_active_fence_get(&tl->last_request);
25 if (!fence)
26 return 0;
27
28 timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
29 dma_fence_put(fence);
30 if (timeout < 0)
31 return timeout;
32
33 return 0;
34 }
35
engine_sync_barrier(struct intel_engine_cs * engine)36 static int engine_sync_barrier(struct intel_engine_cs *engine)
37 {
38 return timeline_sync(engine->kernel_context->timeline);
39 }
40
41 struct pulse {
42 struct i915_active active;
43 struct kref kref;
44 };
45
pulse_active(struct i915_active * active)46 static int pulse_active(struct i915_active *active)
47 {
48 kref_get(&container_of(active, struct pulse, active)->kref);
49 return 0;
50 }
51
pulse_free(struct kref * kref)52 static void pulse_free(struct kref *kref)
53 {
54 kfree(container_of(kref, struct pulse, kref));
55 }
56
pulse_put(struct pulse * p)57 static void pulse_put(struct pulse *p)
58 {
59 kref_put(&p->kref, pulse_free);
60 }
61
pulse_retire(struct i915_active * active)62 static void pulse_retire(struct i915_active *active)
63 {
64 pulse_put(container_of(active, struct pulse, active));
65 }
66
pulse_create(void)67 static struct pulse *pulse_create(void)
68 {
69 struct pulse *p;
70
71 p = kmalloc(sizeof(*p), GFP_KERNEL);
72 if (!p)
73 return p;
74
75 kref_init(&p->kref);
76 i915_active_init(&p->active, pulse_active, pulse_retire);
77
78 return p;
79 }
80
pulse_unlock_wait(struct pulse * p)81 static void pulse_unlock_wait(struct pulse *p)
82 {
83 i915_active_unlock_wait(&p->active);
84 }
85
__live_idle_pulse(struct intel_engine_cs * engine,int (* fn)(struct intel_engine_cs * cs))86 static int __live_idle_pulse(struct intel_engine_cs *engine,
87 int (*fn)(struct intel_engine_cs *cs))
88 {
89 struct pulse *p;
90 int err;
91
92 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
93
94 p = pulse_create();
95 if (!p)
96 return -ENOMEM;
97
98 err = i915_active_acquire(&p->active);
99 if (err)
100 goto out;
101
102 err = i915_active_acquire_preallocate_barrier(&p->active, engine);
103 if (err) {
104 i915_active_release(&p->active);
105 goto out;
106 }
107
108 i915_active_acquire_barrier(&p->active);
109 i915_active_release(&p->active);
110
111 GEM_BUG_ON(i915_active_is_idle(&p->active));
112 GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
113
114 err = fn(engine);
115 if (err)
116 goto out;
117
118 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
119
120 if (engine_sync_barrier(engine)) {
121 struct drm_printer m = drm_err_printer("pulse");
122
123 pr_err("%s: no heartbeat pulse?\n", engine->name);
124 intel_engine_dump(engine, &m, "%s", engine->name);
125
126 err = -ETIME;
127 goto out;
128 }
129
130 GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
131
132 pulse_unlock_wait(p); /* synchronize with the retirement callback */
133
134 if (!i915_active_is_idle(&p->active)) {
135 struct drm_printer m = drm_err_printer("pulse");
136
137 pr_err("%s: heartbeat pulse did not flush idle tasks\n",
138 engine->name);
139 i915_active_print(&p->active, &m);
140
141 err = -EINVAL;
142 goto out;
143 }
144
145 out:
146 pulse_put(p);
147 return err;
148 }
149
live_idle_flush(void * arg)150 static int live_idle_flush(void *arg)
151 {
152 struct intel_gt *gt = arg;
153 struct intel_engine_cs *engine;
154 enum intel_engine_id id;
155 int err = 0;
156
157 /* Check that we can flush the idle barriers */
158
159 for_each_engine(engine, gt, id) {
160 intel_engine_pm_get(engine);
161 err = __live_idle_pulse(engine, intel_engine_flush_barriers);
162 intel_engine_pm_put(engine);
163 if (err)
164 break;
165 }
166
167 return err;
168 }
169
live_idle_pulse(void * arg)170 static int live_idle_pulse(void *arg)
171 {
172 struct intel_gt *gt = arg;
173 struct intel_engine_cs *engine;
174 enum intel_engine_id id;
175 int err = 0;
176
177 /* Check that heartbeat pulses flush the idle barriers */
178
179 for_each_engine(engine, gt, id) {
180 intel_engine_pm_get(engine);
181 err = __live_idle_pulse(engine, intel_engine_pulse);
182 intel_engine_pm_put(engine);
183 if (err && err != -ENODEV)
184 break;
185
186 err = 0;
187 }
188
189 return err;
190 }
191
cmp_u32(const void * _a,const void * _b)192 static int cmp_u32(const void *_a, const void *_b)
193 {
194 const u32 *a = _a, *b = _b;
195
196 return *a - *b;
197 }
198
__live_heartbeat_fast(struct intel_engine_cs * engine)199 static int __live_heartbeat_fast(struct intel_engine_cs *engine)
200 {
201 struct intel_context *ce;
202 struct i915_request *rq;
203 ktime_t t0, t1;
204 u32 times[5];
205 int err;
206 int i;
207
208 ce = intel_context_create(engine);
209 if (IS_ERR(ce))
210 return PTR_ERR(ce);
211
212 intel_engine_pm_get(engine);
213
214 err = intel_engine_set_heartbeat(engine, 1);
215 if (err)
216 goto err_pm;
217
218 for (i = 0; i < ARRAY_SIZE(times); i++) {
219 /* Manufacture a tick */
220 do {
221 while (READ_ONCE(engine->heartbeat.systole))
222 flush_delayed_work(&engine->heartbeat.work);
223
224 engine->serial++; /* quick, pretend we are not idle! */
225 flush_delayed_work(&engine->heartbeat.work);
226 if (!delayed_work_pending(&engine->heartbeat.work)) {
227 pr_err("%s: heartbeat did not start\n",
228 engine->name);
229 err = -EINVAL;
230 goto err_pm;
231 }
232
233 rcu_read_lock();
234 rq = READ_ONCE(engine->heartbeat.systole);
235 if (rq)
236 rq = i915_request_get_rcu(rq);
237 rcu_read_unlock();
238 } while (!rq);
239
240 t0 = ktime_get();
241 while (rq == READ_ONCE(engine->heartbeat.systole))
242 yield(); /* work is on the local cpu! */
243 t1 = ktime_get();
244
245 i915_request_put(rq);
246 times[i] = ktime_us_delta(t1, t0);
247 }
248
249 sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
250
251 pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
252 engine->name,
253 times[ARRAY_SIZE(times) / 2],
254 times[0],
255 times[ARRAY_SIZE(times) - 1]);
256
257 /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
258 if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
259 pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
260 engine->name,
261 times[ARRAY_SIZE(times) / 2],
262 jiffies_to_usecs(6));
263 err = -EINVAL;
264 }
265
266 intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
267 err_pm:
268 intel_engine_pm_put(engine);
269 intel_context_put(ce);
270 return err;
271 }
272
live_heartbeat_fast(void * arg)273 static int live_heartbeat_fast(void *arg)
274 {
275 struct intel_gt *gt = arg;
276 struct intel_engine_cs *engine;
277 enum intel_engine_id id;
278 int err = 0;
279
280 /* Check that the heartbeat ticks at the desired rate. */
281 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
282 return 0;
283
284 for_each_engine(engine, gt, id) {
285 err = __live_heartbeat_fast(engine);
286 if (err)
287 break;
288 }
289
290 return err;
291 }
292
__live_heartbeat_off(struct intel_engine_cs * engine)293 static int __live_heartbeat_off(struct intel_engine_cs *engine)
294 {
295 int err;
296
297 intel_engine_pm_get(engine);
298
299 engine->serial++;
300 flush_delayed_work(&engine->heartbeat.work);
301 if (!delayed_work_pending(&engine->heartbeat.work)) {
302 pr_err("%s: heartbeat not running\n",
303 engine->name);
304 err = -EINVAL;
305 goto err_pm;
306 }
307
308 err = intel_engine_set_heartbeat(engine, 0);
309 if (err)
310 goto err_pm;
311
312 engine->serial++;
313 flush_delayed_work(&engine->heartbeat.work);
314 if (delayed_work_pending(&engine->heartbeat.work)) {
315 pr_err("%s: heartbeat still running\n",
316 engine->name);
317 err = -EINVAL;
318 goto err_beat;
319 }
320
321 if (READ_ONCE(engine->heartbeat.systole)) {
322 pr_err("%s: heartbeat still allocated\n",
323 engine->name);
324 err = -EINVAL;
325 goto err_beat;
326 }
327
328 err_beat:
329 intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
330 err_pm:
331 intel_engine_pm_put(engine);
332 return err;
333 }
334
live_heartbeat_off(void * arg)335 static int live_heartbeat_off(void *arg)
336 {
337 struct intel_gt *gt = arg;
338 struct intel_engine_cs *engine;
339 enum intel_engine_id id;
340 int err = 0;
341
342 /* Check that we can turn off heartbeat and not interrupt VIP */
343 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
344 return 0;
345
346 for_each_engine(engine, gt, id) {
347 if (!intel_engine_has_preemption(engine))
348 continue;
349
350 err = __live_heartbeat_off(engine);
351 if (err)
352 break;
353 }
354
355 return err;
356 }
357
intel_heartbeat_live_selftests(struct drm_i915_private * i915)358 int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
359 {
360 static const struct i915_subtest tests[] = {
361 SUBTEST(live_idle_flush),
362 SUBTEST(live_idle_pulse),
363 SUBTEST(live_heartbeat_fast),
364 SUBTEST(live_heartbeat_off),
365 };
366 int saved_hangcheck;
367 int err;
368
369 if (intel_gt_is_wedged(&i915->gt))
370 return 0;
371
372 saved_hangcheck = i915_modparams.enable_hangcheck;
373 i915_modparams.enable_hangcheck = INT_MAX;
374
375 err = intel_gt_live_subtests(tests, &i915->gt);
376
377 i915_modparams.enable_hangcheck = saved_hangcheck;
378 return err;
379 }
380