xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/i915/gvt/sched_policy.c (revision 41ec02673d281bbb3d38e6c78504ce6e30c228c1)
1 /*	$NetBSD: sched_policy.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $	*/
2 
3 /*
4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Anhua Xu
27  *    Kevin Tian <kevin.tian@intel.com>
28  *
29  * Contributors:
30  *    Min He <min.he@intel.com>
31  *    Bing Niu <bing.niu@intel.com>
32  *    Zhi Wang <zhi.a.wang@intel.com>
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: sched_policy.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $");
38 
39 #include "i915_drv.h"
40 #include "gvt.h"
41 
vgpu_has_pending_workload(struct intel_vgpu * vgpu)42 static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
43 {
44 	enum intel_engine_id i;
45 	struct intel_engine_cs *engine;
46 
47 	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
48 		if (!list_empty(workload_q_head(vgpu, i)))
49 			return true;
50 	}
51 
52 	return false;
53 }
54 
55 /* We give 2 seconds higher prio for vGPU during start */
56 #define GVT_SCHED_VGPU_PRI_TIME  2
57 
58 struct vgpu_sched_data {
59 	struct list_head lru_list;
60 	struct intel_vgpu *vgpu;
61 	bool active;
62 	bool pri_sched;
63 	ktime_t pri_time;
64 	ktime_t sched_in_time;
65 	ktime_t sched_time;
66 	ktime_t left_ts;
67 	ktime_t allocated_ts;
68 
69 	struct vgpu_sched_ctl sched_ctl;
70 };
71 
72 struct gvt_sched_data {
73 	struct intel_gvt *gvt;
74 	struct hrtimer timer;
75 	unsigned long period;
76 	struct list_head lru_runq_head;
77 	ktime_t expire_time;
78 };
79 
vgpu_update_timeslice(struct intel_vgpu * vgpu,ktime_t cur_time)80 static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time)
81 {
82 	ktime_t delta_ts;
83 	struct vgpu_sched_data *vgpu_data;
84 
85 	if (!vgpu || vgpu == vgpu->gvt->idle_vgpu)
86 		return;
87 
88 	vgpu_data = vgpu->sched_data;
89 	delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time);
90 	vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts);
91 	vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts);
92 	vgpu_data->sched_in_time = cur_time;
93 }
94 
95 #define GVT_TS_BALANCE_PERIOD_MS 100
96 #define GVT_TS_BALANCE_STAGE_NUM 10
97 
gvt_balance_timeslice(struct gvt_sched_data * sched_data)98 static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
99 {
100 	struct vgpu_sched_data *vgpu_data;
101 	struct list_head *pos;
102 	static u64 stage_check;
103 	int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM;
104 
105 	/* The timeslice accumulation reset at stage 0, which is
106 	 * allocated again without adding previous debt.
107 	 */
108 	if (stage == 0) {
109 		int total_weight = 0;
110 		ktime_t fair_timeslice;
111 
112 		list_for_each(pos, &sched_data->lru_runq_head) {
113 			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
114 			total_weight += vgpu_data->sched_ctl.weight;
115 		}
116 
117 		list_for_each(pos, &sched_data->lru_runq_head) {
118 			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
119 			fair_timeslice = ktime_divns(ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS),
120 						     total_weight) * vgpu_data->sched_ctl.weight;
121 
122 			vgpu_data->allocated_ts = fair_timeslice;
123 			vgpu_data->left_ts = vgpu_data->allocated_ts;
124 		}
125 	} else {
126 		list_for_each(pos, &sched_data->lru_runq_head) {
127 			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
128 
129 			/* timeslice for next 100ms should add the left/debt
130 			 * slice of previous stages.
131 			 */
132 			vgpu_data->left_ts += vgpu_data->allocated_ts;
133 		}
134 	}
135 }
136 
try_to_schedule_next_vgpu(struct intel_gvt * gvt)137 static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
138 {
139 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
140 	enum intel_engine_id i;
141 	struct intel_engine_cs *engine;
142 	struct vgpu_sched_data *vgpu_data;
143 	ktime_t cur_time;
144 
145 	/* no need to schedule if next_vgpu is the same with current_vgpu,
146 	 * let scheduler chose next_vgpu again by setting it to NULL.
147 	 */
148 	if (scheduler->next_vgpu == scheduler->current_vgpu) {
149 		scheduler->next_vgpu = NULL;
150 		return;
151 	}
152 
153 	/*
154 	 * after the flag is set, workload dispatch thread will
155 	 * stop dispatching workload for current vgpu
156 	 */
157 	scheduler->need_reschedule = true;
158 
159 	/* still have uncompleted workload? */
160 	for_each_engine(engine, gvt->dev_priv, i) {
161 		if (scheduler->current_workload[i])
162 			return;
163 	}
164 
165 	cur_time = ktime_get();
166 	vgpu_update_timeslice(scheduler->current_vgpu, cur_time);
167 	vgpu_data = scheduler->next_vgpu->sched_data;
168 	vgpu_data->sched_in_time = cur_time;
169 
170 	/* switch current vgpu */
171 	scheduler->current_vgpu = scheduler->next_vgpu;
172 	scheduler->next_vgpu = NULL;
173 
174 	scheduler->need_reschedule = false;
175 
176 	/* wake up workload dispatch thread */
177 	for_each_engine(engine, gvt->dev_priv, i)
178 		wake_up(&scheduler->waitq[i]);
179 }
180 
find_busy_vgpu(struct gvt_sched_data * sched_data)181 static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
182 {
183 	struct vgpu_sched_data *vgpu_data;
184 	struct intel_vgpu *vgpu = NULL;
185 	struct list_head *head = &sched_data->lru_runq_head;
186 	struct list_head *pos;
187 
188 	/* search a vgpu with pending workload */
189 	list_for_each(pos, head) {
190 
191 		vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
192 		if (!vgpu_has_pending_workload(vgpu_data->vgpu))
193 			continue;
194 
195 		if (vgpu_data->pri_sched) {
196 			if (ktime_before(ktime_get(), vgpu_data->pri_time)) {
197 				vgpu = vgpu_data->vgpu;
198 				break;
199 			} else
200 				vgpu_data->pri_sched = false;
201 		}
202 
203 		/* Return the vGPU only if it has time slice left */
204 		if (vgpu_data->left_ts > 0) {
205 			vgpu = vgpu_data->vgpu;
206 			break;
207 		}
208 	}
209 
210 	return vgpu;
211 }
212 
213 /* in nanosecond */
214 #define GVT_DEFAULT_TIME_SLICE 1000000
215 
tbs_sched_func(struct gvt_sched_data * sched_data)216 static void tbs_sched_func(struct gvt_sched_data *sched_data)
217 {
218 	struct intel_gvt *gvt = sched_data->gvt;
219 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
220 	struct vgpu_sched_data *vgpu_data;
221 	struct intel_vgpu *vgpu = NULL;
222 
223 	/* no active vgpu or has already had a target */
224 	if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
225 		goto out;
226 
227 	vgpu = find_busy_vgpu(sched_data);
228 	if (vgpu) {
229 		scheduler->next_vgpu = vgpu;
230 		vgpu_data = vgpu->sched_data;
231 		if (!vgpu_data->pri_sched) {
232 			/* Move the last used vGPU to the tail of lru_list */
233 			list_del_init(&vgpu_data->lru_list);
234 			list_add_tail(&vgpu_data->lru_list,
235 				      &sched_data->lru_runq_head);
236 		}
237 	} else {
238 		scheduler->next_vgpu = gvt->idle_vgpu;
239 	}
240 out:
241 	if (scheduler->next_vgpu)
242 		try_to_schedule_next_vgpu(gvt);
243 }
244 
intel_gvt_schedule(struct intel_gvt * gvt)245 void intel_gvt_schedule(struct intel_gvt *gvt)
246 {
247 	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
248 	ktime_t cur_time;
249 
250 	mutex_lock(&gvt->sched_lock);
251 	cur_time = ktime_get();
252 
253 	if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
254 				(void *)&gvt->service_request)) {
255 		if (cur_time >= sched_data->expire_time) {
256 			gvt_balance_timeslice(sched_data);
257 			sched_data->expire_time = ktime_add_ms(
258 				cur_time, GVT_TS_BALANCE_PERIOD_MS);
259 		}
260 	}
261 	clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request);
262 
263 	vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time);
264 	tbs_sched_func(sched_data);
265 
266 	mutex_unlock(&gvt->sched_lock);
267 }
268 
tbs_timer_fn(struct hrtimer * timer_data)269 static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data)
270 {
271 	struct gvt_sched_data *data;
272 
273 	data = container_of(timer_data, struct gvt_sched_data, timer);
274 
275 	intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED);
276 
277 	hrtimer_add_expires_ns(&data->timer, data->period);
278 
279 	return HRTIMER_RESTART;
280 }
281 
tbs_sched_init(struct intel_gvt * gvt)282 static int tbs_sched_init(struct intel_gvt *gvt)
283 {
284 	struct intel_gvt_workload_scheduler *scheduler =
285 		&gvt->scheduler;
286 
287 	struct gvt_sched_data *data;
288 
289 	data = kzalloc(sizeof(*data), GFP_KERNEL);
290 	if (!data)
291 		return -ENOMEM;
292 
293 	INIT_LIST_HEAD(&data->lru_runq_head);
294 	hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
295 	data->timer.function = tbs_timer_fn;
296 	data->period = GVT_DEFAULT_TIME_SLICE;
297 	data->gvt = gvt;
298 
299 	scheduler->sched_data = data;
300 
301 	return 0;
302 }
303 
tbs_sched_clean(struct intel_gvt * gvt)304 static void tbs_sched_clean(struct intel_gvt *gvt)
305 {
306 	struct intel_gvt_workload_scheduler *scheduler =
307 		&gvt->scheduler;
308 	struct gvt_sched_data *data = scheduler->sched_data;
309 
310 	hrtimer_cancel(&data->timer);
311 
312 	kfree(data);
313 	scheduler->sched_data = NULL;
314 }
315 
tbs_sched_init_vgpu(struct intel_vgpu * vgpu)316 static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
317 {
318 	struct vgpu_sched_data *data;
319 
320 	data = kzalloc(sizeof(*data), GFP_KERNEL);
321 	if (!data)
322 		return -ENOMEM;
323 
324 	data->sched_ctl.weight = vgpu->sched_ctl.weight;
325 	data->vgpu = vgpu;
326 	INIT_LIST_HEAD(&data->lru_list);
327 
328 	vgpu->sched_data = data;
329 
330 	return 0;
331 }
332 
tbs_sched_clean_vgpu(struct intel_vgpu * vgpu)333 static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
334 {
335 	struct intel_gvt *gvt = vgpu->gvt;
336 	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
337 
338 	kfree(vgpu->sched_data);
339 	vgpu->sched_data = NULL;
340 
341 	/* this vgpu id has been removed */
342 	if (idr_is_empty(&gvt->vgpu_idr))
343 		hrtimer_cancel(&sched_data->timer);
344 }
345 
tbs_sched_start_schedule(struct intel_vgpu * vgpu)346 static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
347 {
348 	struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
349 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
350 	ktime_t now;
351 
352 	if (!list_empty(&vgpu_data->lru_list))
353 		return;
354 
355 	now = ktime_get();
356 	vgpu_data->pri_time = ktime_add(now,
357 					ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0));
358 	vgpu_data->pri_sched = true;
359 
360 	list_add(&vgpu_data->lru_list, &sched_data->lru_runq_head);
361 
362 	if (!hrtimer_active(&sched_data->timer))
363 		hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
364 			sched_data->period), HRTIMER_MODE_ABS);
365 	vgpu_data->active = true;
366 }
367 
tbs_sched_stop_schedule(struct intel_vgpu * vgpu)368 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
369 {
370 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
371 
372 	list_del_init(&vgpu_data->lru_list);
373 	vgpu_data->active = false;
374 }
375 
376 static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
377 	.init = tbs_sched_init,
378 	.clean = tbs_sched_clean,
379 	.init_vgpu = tbs_sched_init_vgpu,
380 	.clean_vgpu = tbs_sched_clean_vgpu,
381 	.start_schedule = tbs_sched_start_schedule,
382 	.stop_schedule = tbs_sched_stop_schedule,
383 };
384 
intel_gvt_init_sched_policy(struct intel_gvt * gvt)385 int intel_gvt_init_sched_policy(struct intel_gvt *gvt)
386 {
387 	int ret;
388 
389 	mutex_lock(&gvt->sched_lock);
390 	gvt->scheduler.sched_ops = &tbs_schedule_ops;
391 	ret = gvt->scheduler.sched_ops->init(gvt);
392 	mutex_unlock(&gvt->sched_lock);
393 
394 	return ret;
395 }
396 
intel_gvt_clean_sched_policy(struct intel_gvt * gvt)397 void intel_gvt_clean_sched_policy(struct intel_gvt *gvt)
398 {
399 	mutex_lock(&gvt->sched_lock);
400 	gvt->scheduler.sched_ops->clean(gvt);
401 	mutex_unlock(&gvt->sched_lock);
402 }
403 
404 /* for per-vgpu scheduler policy, there are 2 per-vgpu data:
405  * sched_data, and sched_ctl. We see these 2 data as part of
406  * the global scheduler which are proteced by gvt->sched_lock.
407  * Caller should make their decision if the vgpu_lock should
408  * be hold outside.
409  */
410 
intel_vgpu_init_sched_policy(struct intel_vgpu * vgpu)411 int intel_vgpu_init_sched_policy(struct intel_vgpu *vgpu)
412 {
413 	int ret;
414 
415 	mutex_lock(&vgpu->gvt->sched_lock);
416 	ret = vgpu->gvt->scheduler.sched_ops->init_vgpu(vgpu);
417 	mutex_unlock(&vgpu->gvt->sched_lock);
418 
419 	return ret;
420 }
421 
intel_vgpu_clean_sched_policy(struct intel_vgpu * vgpu)422 void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu)
423 {
424 	mutex_lock(&vgpu->gvt->sched_lock);
425 	vgpu->gvt->scheduler.sched_ops->clean_vgpu(vgpu);
426 	mutex_unlock(&vgpu->gvt->sched_lock);
427 }
428 
intel_vgpu_start_schedule(struct intel_vgpu * vgpu)429 void intel_vgpu_start_schedule(struct intel_vgpu *vgpu)
430 {
431 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
432 
433 	mutex_lock(&vgpu->gvt->sched_lock);
434 	if (!vgpu_data->active) {
435 		gvt_dbg_core("vgpu%d: start schedule\n", vgpu->id);
436 		vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu);
437 	}
438 	mutex_unlock(&vgpu->gvt->sched_lock);
439 }
440 
intel_gvt_kick_schedule(struct intel_gvt * gvt)441 void intel_gvt_kick_schedule(struct intel_gvt *gvt)
442 {
443 	mutex_lock(&gvt->sched_lock);
444 	intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
445 	mutex_unlock(&gvt->sched_lock);
446 }
447 
intel_vgpu_stop_schedule(struct intel_vgpu * vgpu)448 void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
449 {
450 	struct intel_gvt_workload_scheduler *scheduler =
451 		&vgpu->gvt->scheduler;
452 	int ring_id;
453 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
454 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
455 
456 	if (!vgpu_data->active)
457 		return;
458 
459 	gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id);
460 
461 	mutex_lock(&vgpu->gvt->sched_lock);
462 	scheduler->sched_ops->stop_schedule(vgpu);
463 
464 	if (scheduler->next_vgpu == vgpu)
465 		scheduler->next_vgpu = NULL;
466 
467 	if (scheduler->current_vgpu == vgpu) {
468 		/* stop workload dispatching */
469 		scheduler->need_reschedule = true;
470 		scheduler->current_vgpu = NULL;
471 	}
472 
473 	intel_runtime_pm_get(&dev_priv->runtime_pm);
474 	spin_lock_bh(&scheduler->mmio_context_lock);
475 	for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
476 		if (scheduler->engine_owner[ring_id] == vgpu) {
477 			intel_gvt_switch_mmio(vgpu, NULL, ring_id);
478 			scheduler->engine_owner[ring_id] = NULL;
479 		}
480 	}
481 	spin_unlock_bh(&scheduler->mmio_context_lock);
482 	intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
483 	mutex_unlock(&vgpu->gvt->sched_lock);
484 }
485