1 /* $NetBSD: sched_policy.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $ */
2
3 /*
4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Anhua Xu
27 * Kevin Tian <kevin.tian@intel.com>
28 *
29 * Contributors:
30 * Min He <min.he@intel.com>
31 * Bing Niu <bing.niu@intel.com>
32 * Zhi Wang <zhi.a.wang@intel.com>
33 *
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: sched_policy.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $");
38
39 #include "i915_drv.h"
40 #include "gvt.h"
41
vgpu_has_pending_workload(struct intel_vgpu * vgpu)42 static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
43 {
44 enum intel_engine_id i;
45 struct intel_engine_cs *engine;
46
47 for_each_engine(engine, vgpu->gvt->dev_priv, i) {
48 if (!list_empty(workload_q_head(vgpu, i)))
49 return true;
50 }
51
52 return false;
53 }
54
55 /* We give 2 seconds higher prio for vGPU during start */
56 #define GVT_SCHED_VGPU_PRI_TIME 2
57
58 struct vgpu_sched_data {
59 struct list_head lru_list;
60 struct intel_vgpu *vgpu;
61 bool active;
62 bool pri_sched;
63 ktime_t pri_time;
64 ktime_t sched_in_time;
65 ktime_t sched_time;
66 ktime_t left_ts;
67 ktime_t allocated_ts;
68
69 struct vgpu_sched_ctl sched_ctl;
70 };
71
72 struct gvt_sched_data {
73 struct intel_gvt *gvt;
74 struct hrtimer timer;
75 unsigned long period;
76 struct list_head lru_runq_head;
77 ktime_t expire_time;
78 };
79
vgpu_update_timeslice(struct intel_vgpu * vgpu,ktime_t cur_time)80 static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time)
81 {
82 ktime_t delta_ts;
83 struct vgpu_sched_data *vgpu_data;
84
85 if (!vgpu || vgpu == vgpu->gvt->idle_vgpu)
86 return;
87
88 vgpu_data = vgpu->sched_data;
89 delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time);
90 vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts);
91 vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts);
92 vgpu_data->sched_in_time = cur_time;
93 }
94
95 #define GVT_TS_BALANCE_PERIOD_MS 100
96 #define GVT_TS_BALANCE_STAGE_NUM 10
97
gvt_balance_timeslice(struct gvt_sched_data * sched_data)98 static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
99 {
100 struct vgpu_sched_data *vgpu_data;
101 struct list_head *pos;
102 static u64 stage_check;
103 int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM;
104
105 /* The timeslice accumulation reset at stage 0, which is
106 * allocated again without adding previous debt.
107 */
108 if (stage == 0) {
109 int total_weight = 0;
110 ktime_t fair_timeslice;
111
112 list_for_each(pos, &sched_data->lru_runq_head) {
113 vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
114 total_weight += vgpu_data->sched_ctl.weight;
115 }
116
117 list_for_each(pos, &sched_data->lru_runq_head) {
118 vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
119 fair_timeslice = ktime_divns(ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS),
120 total_weight) * vgpu_data->sched_ctl.weight;
121
122 vgpu_data->allocated_ts = fair_timeslice;
123 vgpu_data->left_ts = vgpu_data->allocated_ts;
124 }
125 } else {
126 list_for_each(pos, &sched_data->lru_runq_head) {
127 vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
128
129 /* timeslice for next 100ms should add the left/debt
130 * slice of previous stages.
131 */
132 vgpu_data->left_ts += vgpu_data->allocated_ts;
133 }
134 }
135 }
136
try_to_schedule_next_vgpu(struct intel_gvt * gvt)137 static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
138 {
139 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
140 enum intel_engine_id i;
141 struct intel_engine_cs *engine;
142 struct vgpu_sched_data *vgpu_data;
143 ktime_t cur_time;
144
145 /* no need to schedule if next_vgpu is the same with current_vgpu,
146 * let scheduler chose next_vgpu again by setting it to NULL.
147 */
148 if (scheduler->next_vgpu == scheduler->current_vgpu) {
149 scheduler->next_vgpu = NULL;
150 return;
151 }
152
153 /*
154 * after the flag is set, workload dispatch thread will
155 * stop dispatching workload for current vgpu
156 */
157 scheduler->need_reschedule = true;
158
159 /* still have uncompleted workload? */
160 for_each_engine(engine, gvt->dev_priv, i) {
161 if (scheduler->current_workload[i])
162 return;
163 }
164
165 cur_time = ktime_get();
166 vgpu_update_timeslice(scheduler->current_vgpu, cur_time);
167 vgpu_data = scheduler->next_vgpu->sched_data;
168 vgpu_data->sched_in_time = cur_time;
169
170 /* switch current vgpu */
171 scheduler->current_vgpu = scheduler->next_vgpu;
172 scheduler->next_vgpu = NULL;
173
174 scheduler->need_reschedule = false;
175
176 /* wake up workload dispatch thread */
177 for_each_engine(engine, gvt->dev_priv, i)
178 wake_up(&scheduler->waitq[i]);
179 }
180
find_busy_vgpu(struct gvt_sched_data * sched_data)181 static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
182 {
183 struct vgpu_sched_data *vgpu_data;
184 struct intel_vgpu *vgpu = NULL;
185 struct list_head *head = &sched_data->lru_runq_head;
186 struct list_head *pos;
187
188 /* search a vgpu with pending workload */
189 list_for_each(pos, head) {
190
191 vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
192 if (!vgpu_has_pending_workload(vgpu_data->vgpu))
193 continue;
194
195 if (vgpu_data->pri_sched) {
196 if (ktime_before(ktime_get(), vgpu_data->pri_time)) {
197 vgpu = vgpu_data->vgpu;
198 break;
199 } else
200 vgpu_data->pri_sched = false;
201 }
202
203 /* Return the vGPU only if it has time slice left */
204 if (vgpu_data->left_ts > 0) {
205 vgpu = vgpu_data->vgpu;
206 break;
207 }
208 }
209
210 return vgpu;
211 }
212
213 /* in nanosecond */
214 #define GVT_DEFAULT_TIME_SLICE 1000000
215
tbs_sched_func(struct gvt_sched_data * sched_data)216 static void tbs_sched_func(struct gvt_sched_data *sched_data)
217 {
218 struct intel_gvt *gvt = sched_data->gvt;
219 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
220 struct vgpu_sched_data *vgpu_data;
221 struct intel_vgpu *vgpu = NULL;
222
223 /* no active vgpu or has already had a target */
224 if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
225 goto out;
226
227 vgpu = find_busy_vgpu(sched_data);
228 if (vgpu) {
229 scheduler->next_vgpu = vgpu;
230 vgpu_data = vgpu->sched_data;
231 if (!vgpu_data->pri_sched) {
232 /* Move the last used vGPU to the tail of lru_list */
233 list_del_init(&vgpu_data->lru_list);
234 list_add_tail(&vgpu_data->lru_list,
235 &sched_data->lru_runq_head);
236 }
237 } else {
238 scheduler->next_vgpu = gvt->idle_vgpu;
239 }
240 out:
241 if (scheduler->next_vgpu)
242 try_to_schedule_next_vgpu(gvt);
243 }
244
intel_gvt_schedule(struct intel_gvt * gvt)245 void intel_gvt_schedule(struct intel_gvt *gvt)
246 {
247 struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
248 ktime_t cur_time;
249
250 mutex_lock(&gvt->sched_lock);
251 cur_time = ktime_get();
252
253 if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
254 (void *)&gvt->service_request)) {
255 if (cur_time >= sched_data->expire_time) {
256 gvt_balance_timeslice(sched_data);
257 sched_data->expire_time = ktime_add_ms(
258 cur_time, GVT_TS_BALANCE_PERIOD_MS);
259 }
260 }
261 clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request);
262
263 vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time);
264 tbs_sched_func(sched_data);
265
266 mutex_unlock(&gvt->sched_lock);
267 }
268
tbs_timer_fn(struct hrtimer * timer_data)269 static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data)
270 {
271 struct gvt_sched_data *data;
272
273 data = container_of(timer_data, struct gvt_sched_data, timer);
274
275 intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED);
276
277 hrtimer_add_expires_ns(&data->timer, data->period);
278
279 return HRTIMER_RESTART;
280 }
281
tbs_sched_init(struct intel_gvt * gvt)282 static int tbs_sched_init(struct intel_gvt *gvt)
283 {
284 struct intel_gvt_workload_scheduler *scheduler =
285 &gvt->scheduler;
286
287 struct gvt_sched_data *data;
288
289 data = kzalloc(sizeof(*data), GFP_KERNEL);
290 if (!data)
291 return -ENOMEM;
292
293 INIT_LIST_HEAD(&data->lru_runq_head);
294 hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
295 data->timer.function = tbs_timer_fn;
296 data->period = GVT_DEFAULT_TIME_SLICE;
297 data->gvt = gvt;
298
299 scheduler->sched_data = data;
300
301 return 0;
302 }
303
tbs_sched_clean(struct intel_gvt * gvt)304 static void tbs_sched_clean(struct intel_gvt *gvt)
305 {
306 struct intel_gvt_workload_scheduler *scheduler =
307 &gvt->scheduler;
308 struct gvt_sched_data *data = scheduler->sched_data;
309
310 hrtimer_cancel(&data->timer);
311
312 kfree(data);
313 scheduler->sched_data = NULL;
314 }
315
tbs_sched_init_vgpu(struct intel_vgpu * vgpu)316 static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
317 {
318 struct vgpu_sched_data *data;
319
320 data = kzalloc(sizeof(*data), GFP_KERNEL);
321 if (!data)
322 return -ENOMEM;
323
324 data->sched_ctl.weight = vgpu->sched_ctl.weight;
325 data->vgpu = vgpu;
326 INIT_LIST_HEAD(&data->lru_list);
327
328 vgpu->sched_data = data;
329
330 return 0;
331 }
332
tbs_sched_clean_vgpu(struct intel_vgpu * vgpu)333 static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
334 {
335 struct intel_gvt *gvt = vgpu->gvt;
336 struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
337
338 kfree(vgpu->sched_data);
339 vgpu->sched_data = NULL;
340
341 /* this vgpu id has been removed */
342 if (idr_is_empty(&gvt->vgpu_idr))
343 hrtimer_cancel(&sched_data->timer);
344 }
345
tbs_sched_start_schedule(struct intel_vgpu * vgpu)346 static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
347 {
348 struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
349 struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
350 ktime_t now;
351
352 if (!list_empty(&vgpu_data->lru_list))
353 return;
354
355 now = ktime_get();
356 vgpu_data->pri_time = ktime_add(now,
357 ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0));
358 vgpu_data->pri_sched = true;
359
360 list_add(&vgpu_data->lru_list, &sched_data->lru_runq_head);
361
362 if (!hrtimer_active(&sched_data->timer))
363 hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
364 sched_data->period), HRTIMER_MODE_ABS);
365 vgpu_data->active = true;
366 }
367
tbs_sched_stop_schedule(struct intel_vgpu * vgpu)368 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
369 {
370 struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
371
372 list_del_init(&vgpu_data->lru_list);
373 vgpu_data->active = false;
374 }
375
376 static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
377 .init = tbs_sched_init,
378 .clean = tbs_sched_clean,
379 .init_vgpu = tbs_sched_init_vgpu,
380 .clean_vgpu = tbs_sched_clean_vgpu,
381 .start_schedule = tbs_sched_start_schedule,
382 .stop_schedule = tbs_sched_stop_schedule,
383 };
384
intel_gvt_init_sched_policy(struct intel_gvt * gvt)385 int intel_gvt_init_sched_policy(struct intel_gvt *gvt)
386 {
387 int ret;
388
389 mutex_lock(&gvt->sched_lock);
390 gvt->scheduler.sched_ops = &tbs_schedule_ops;
391 ret = gvt->scheduler.sched_ops->init(gvt);
392 mutex_unlock(&gvt->sched_lock);
393
394 return ret;
395 }
396
intel_gvt_clean_sched_policy(struct intel_gvt * gvt)397 void intel_gvt_clean_sched_policy(struct intel_gvt *gvt)
398 {
399 mutex_lock(&gvt->sched_lock);
400 gvt->scheduler.sched_ops->clean(gvt);
401 mutex_unlock(&gvt->sched_lock);
402 }
403
404 /* for per-vgpu scheduler policy, there are 2 per-vgpu data:
405 * sched_data, and sched_ctl. We see these 2 data as part of
406 * the global scheduler which are proteced by gvt->sched_lock.
407 * Caller should make their decision if the vgpu_lock should
408 * be hold outside.
409 */
410
intel_vgpu_init_sched_policy(struct intel_vgpu * vgpu)411 int intel_vgpu_init_sched_policy(struct intel_vgpu *vgpu)
412 {
413 int ret;
414
415 mutex_lock(&vgpu->gvt->sched_lock);
416 ret = vgpu->gvt->scheduler.sched_ops->init_vgpu(vgpu);
417 mutex_unlock(&vgpu->gvt->sched_lock);
418
419 return ret;
420 }
421
intel_vgpu_clean_sched_policy(struct intel_vgpu * vgpu)422 void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu)
423 {
424 mutex_lock(&vgpu->gvt->sched_lock);
425 vgpu->gvt->scheduler.sched_ops->clean_vgpu(vgpu);
426 mutex_unlock(&vgpu->gvt->sched_lock);
427 }
428
intel_vgpu_start_schedule(struct intel_vgpu * vgpu)429 void intel_vgpu_start_schedule(struct intel_vgpu *vgpu)
430 {
431 struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
432
433 mutex_lock(&vgpu->gvt->sched_lock);
434 if (!vgpu_data->active) {
435 gvt_dbg_core("vgpu%d: start schedule\n", vgpu->id);
436 vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu);
437 }
438 mutex_unlock(&vgpu->gvt->sched_lock);
439 }
440
intel_gvt_kick_schedule(struct intel_gvt * gvt)441 void intel_gvt_kick_schedule(struct intel_gvt *gvt)
442 {
443 mutex_lock(&gvt->sched_lock);
444 intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
445 mutex_unlock(&gvt->sched_lock);
446 }
447
intel_vgpu_stop_schedule(struct intel_vgpu * vgpu)448 void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
449 {
450 struct intel_gvt_workload_scheduler *scheduler =
451 &vgpu->gvt->scheduler;
452 int ring_id;
453 struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
454 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
455
456 if (!vgpu_data->active)
457 return;
458
459 gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id);
460
461 mutex_lock(&vgpu->gvt->sched_lock);
462 scheduler->sched_ops->stop_schedule(vgpu);
463
464 if (scheduler->next_vgpu == vgpu)
465 scheduler->next_vgpu = NULL;
466
467 if (scheduler->current_vgpu == vgpu) {
468 /* stop workload dispatching */
469 scheduler->need_reschedule = true;
470 scheduler->current_vgpu = NULL;
471 }
472
473 intel_runtime_pm_get(&dev_priv->runtime_pm);
474 spin_lock_bh(&scheduler->mmio_context_lock);
475 for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
476 if (scheduler->engine_owner[ring_id] == vgpu) {
477 intel_gvt_switch_mmio(vgpu, NULL, ring_id);
478 scheduler->engine_owner[ring_id] = NULL;
479 }
480 }
481 spin_unlock_bh(&scheduler->mmio_context_lock);
482 intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
483 mutex_unlock(&vgpu->gvt->sched_lock);
484 }
485