1 /* $NetBSD: intel_bw.c,v 1.2 2021/12/18 23:45:29 riastradh Exp $ */
2
3 // SPDX-License-Identifier: MIT
4 /*
5 * Copyright © 2019 Intel Corporation
6 */
7
8 #include <sys/cdefs.h>
9 __KERNEL_RCSID(0, "$NetBSD: intel_bw.c,v 1.2 2021/12/18 23:45:29 riastradh Exp $");
10
11 #include <drm/drm_atomic_state_helper.h>
12
13 #include "intel_bw.h"
14 #include "intel_display_types.h"
15 #include "intel_sideband.h"
16
17 /* Parameters for Qclk Geyserville (QGV) */
18 struct intel_qgv_point {
19 u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
20 };
21
22 struct intel_qgv_info {
23 struct intel_qgv_point points[I915_NUM_QGV_POINTS];
24 u8 num_points;
25 u8 num_channels;
26 u8 t_bl;
27 enum intel_dram_type dram_type;
28 };
29
icl_pcode_read_mem_global_info(struct drm_i915_private * dev_priv,struct intel_qgv_info * qi)30 static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
31 struct intel_qgv_info *qi)
32 {
33 u32 val = 0;
34 int ret;
35
36 ret = sandybridge_pcode_read(dev_priv,
37 ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
38 ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
39 &val, NULL);
40 if (ret)
41 return ret;
42
43 if (IS_GEN(dev_priv, 12)) {
44 switch (val & 0xf) {
45 case 0:
46 qi->dram_type = INTEL_DRAM_DDR4;
47 break;
48 case 3:
49 qi->dram_type = INTEL_DRAM_LPDDR4;
50 break;
51 case 4:
52 qi->dram_type = INTEL_DRAM_DDR3;
53 break;
54 case 5:
55 qi->dram_type = INTEL_DRAM_LPDDR3;
56 break;
57 default:
58 MISSING_CASE(val & 0xf);
59 break;
60 }
61 } else if (IS_GEN(dev_priv, 11)) {
62 switch (val & 0xf) {
63 case 0:
64 qi->dram_type = INTEL_DRAM_DDR4;
65 break;
66 case 1:
67 qi->dram_type = INTEL_DRAM_DDR3;
68 break;
69 case 2:
70 qi->dram_type = INTEL_DRAM_LPDDR3;
71 break;
72 case 3:
73 qi->dram_type = INTEL_DRAM_LPDDR4;
74 break;
75 default:
76 MISSING_CASE(val & 0xf);
77 break;
78 }
79 } else {
80 MISSING_CASE(INTEL_GEN(dev_priv));
81 qi->dram_type = INTEL_DRAM_LPDDR3; /* Conservative default */
82 }
83
84 qi->num_channels = (val & 0xf0) >> 4;
85 qi->num_points = (val & 0xf00) >> 8;
86
87 if (IS_GEN(dev_priv, 12))
88 qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 16;
89 else if (IS_GEN(dev_priv, 11))
90 qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
91
92 return 0;
93 }
94
icl_pcode_read_qgv_point_info(struct drm_i915_private * dev_priv,struct intel_qgv_point * sp,int point)95 static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
96 struct intel_qgv_point *sp,
97 int point)
98 {
99 u32 val = 0, val2 = 0;
100 int ret;
101
102 ret = sandybridge_pcode_read(dev_priv,
103 ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
104 ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
105 &val, &val2);
106 if (ret)
107 return ret;
108
109 sp->dclk = val & 0xffff;
110 sp->t_rp = (val & 0xff0000) >> 16;
111 sp->t_rcd = (val & 0xff000000) >> 24;
112
113 sp->t_rdpre = val2 & 0xff;
114 sp->t_ras = (val2 & 0xff00) >> 8;
115
116 sp->t_rc = sp->t_rp + sp->t_ras;
117
118 return 0;
119 }
120
icl_get_qgv_points(struct drm_i915_private * dev_priv,struct intel_qgv_info * qi)121 static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
122 struct intel_qgv_info *qi)
123 {
124 int i, ret;
125
126 ret = icl_pcode_read_mem_global_info(dev_priv, qi);
127 if (ret)
128 return ret;
129
130 if (WARN_ON(qi->num_points > ARRAY_SIZE(qi->points)))
131 qi->num_points = ARRAY_SIZE(qi->points);
132
133 for (i = 0; i < qi->num_points; i++) {
134 struct intel_qgv_point *sp = &qi->points[i];
135
136 ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
137 if (ret)
138 return ret;
139
140 DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
141 i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
142 sp->t_rcd, sp->t_rc);
143 }
144
145 return 0;
146 }
147
icl_calc_bw(int dclk,int num,int den)148 static int icl_calc_bw(int dclk, int num, int den)
149 {
150 /* multiples of 16.666MHz (100/6) */
151 return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
152 }
153
icl_sagv_max_dclk(const struct intel_qgv_info * qi)154 static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
155 {
156 u16 dclk = 0;
157 int i;
158
159 for (i = 0; i < qi->num_points; i++)
160 dclk = max(dclk, qi->points[i].dclk);
161
162 return dclk;
163 }
164
165 struct intel_sa_info {
166 u16 displayrtids;
167 u8 deburst, deprogbwlimit;
168 };
169
170 static const struct intel_sa_info icl_sa_info = {
171 .deburst = 8,
172 .deprogbwlimit = 25, /* GB/s */
173 .displayrtids = 128,
174 };
175
176 static const struct intel_sa_info tgl_sa_info = {
177 .deburst = 16,
178 .deprogbwlimit = 34, /* GB/s */
179 .displayrtids = 256,
180 };
181
icl_get_bw_info(struct drm_i915_private * dev_priv,const struct intel_sa_info * sa)182 static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
183 {
184 struct intel_qgv_info qi = {};
185 bool is_y_tile = true; /* assume y tile may be used */
186 int num_channels;
187 int deinterleave;
188 int ipqdepth, ipqdepthpch;
189 int dclk_max;
190 int maxdebw;
191 int i, ret;
192
193 ret = icl_get_qgv_points(dev_priv, &qi);
194 if (ret) {
195 DRM_DEBUG_KMS("Failed to get memory subsystem information, ignoring bandwidth limits");
196 return ret;
197 }
198 num_channels = qi.num_channels;
199
200 deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
201 dclk_max = icl_sagv_max_dclk(&qi);
202
203 ipqdepthpch = 16;
204
205 maxdebw = min(sa->deprogbwlimit * 1000,
206 icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
207 ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
208
209 for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
210 struct intel_bw_info *bi = &dev_priv->max_bw[i];
211 int clpchgroup;
212 int j;
213
214 clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
215 bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
216
217 bi->num_qgv_points = qi.num_points;
218
219 for (j = 0; j < qi.num_points; j++) {
220 const struct intel_qgv_point *sp = &qi.points[j];
221 int ct, bw;
222
223 /*
224 * Max row cycle time
225 *
226 * FIXME what is the logic behind the
227 * assumed burst length?
228 */
229 ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
230 (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
231 bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
232
233 bi->deratedbw[j] = min(maxdebw,
234 bw * 9 / 10); /* 90% */
235
236 DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
237 i, j, bi->num_planes, bi->deratedbw[j]);
238 }
239
240 if (bi->num_planes == 1)
241 break;
242 }
243
244 return 0;
245 }
246
icl_max_bw(struct drm_i915_private * dev_priv,int num_planes,int qgv_point)247 static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
248 int num_planes, int qgv_point)
249 {
250 int i;
251
252 for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
253 const struct intel_bw_info *bi =
254 &dev_priv->max_bw[i];
255
256 /*
257 * Pcode will not expose all QGV points when
258 * SAGV is forced to off/min/med/max.
259 */
260 if (qgv_point >= bi->num_qgv_points)
261 return UINT_MAX;
262
263 if (num_planes >= bi->num_planes)
264 return bi->deratedbw[qgv_point];
265 }
266
267 return 0;
268 }
269
intel_bw_init_hw(struct drm_i915_private * dev_priv)270 void intel_bw_init_hw(struct drm_i915_private *dev_priv)
271 {
272 if (!HAS_DISPLAY(dev_priv))
273 return;
274
275 if (IS_GEN(dev_priv, 12))
276 icl_get_bw_info(dev_priv, &tgl_sa_info);
277 else if (IS_GEN(dev_priv, 11))
278 icl_get_bw_info(dev_priv, &icl_sa_info);
279 }
280
intel_max_data_rate(struct drm_i915_private * dev_priv,int num_planes)281 static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
282 int num_planes)
283 {
284 if (INTEL_GEN(dev_priv) >= 11) {
285 /*
286 * Any bw group has same amount of QGV points
287 */
288 const struct intel_bw_info *bi =
289 &dev_priv->max_bw[0];
290 unsigned int min_bw = UINT_MAX;
291 int i;
292
293 /*
294 * FIXME with SAGV disabled maybe we can assume
295 * point 1 will always be used? Seems to match
296 * the behaviour observed in the wild.
297 */
298 for (i = 0; i < bi->num_qgv_points; i++) {
299 unsigned int bw = icl_max_bw(dev_priv, num_planes, i);
300
301 min_bw = min(bw, min_bw);
302 }
303 return min_bw;
304 } else {
305 return UINT_MAX;
306 }
307 }
308
intel_bw_crtc_num_active_planes(const struct intel_crtc_state * crtc_state)309 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
310 {
311 /*
312 * We assume cursors are small enough
313 * to not not cause bandwidth problems.
314 */
315 return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
316 }
317
intel_bw_crtc_data_rate(const struct intel_crtc_state * crtc_state)318 static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
319 {
320 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
321 unsigned int data_rate = 0;
322 enum plane_id plane_id;
323
324 for_each_plane_id_on_crtc(crtc, plane_id) {
325 /*
326 * We assume cursors are small enough
327 * to not not cause bandwidth problems.
328 */
329 if (plane_id == PLANE_CURSOR)
330 continue;
331
332 data_rate += crtc_state->data_rate[plane_id];
333 }
334
335 return data_rate;
336 }
337
intel_bw_crtc_update(struct intel_bw_state * bw_state,const struct intel_crtc_state * crtc_state)338 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
339 const struct intel_crtc_state *crtc_state)
340 {
341 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
342
343 bw_state->data_rate[crtc->pipe] =
344 intel_bw_crtc_data_rate(crtc_state);
345 bw_state->num_active_planes[crtc->pipe] =
346 intel_bw_crtc_num_active_planes(crtc_state);
347
348 DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
349 pipe_name(crtc->pipe),
350 bw_state->data_rate[crtc->pipe],
351 bw_state->num_active_planes[crtc->pipe]);
352 }
353
intel_bw_num_active_planes(struct drm_i915_private * dev_priv,const struct intel_bw_state * bw_state)354 static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
355 const struct intel_bw_state *bw_state)
356 {
357 unsigned int num_active_planes = 0;
358 enum pipe pipe;
359
360 for_each_pipe(dev_priv, pipe)
361 num_active_planes += bw_state->num_active_planes[pipe];
362
363 return num_active_planes;
364 }
365
intel_bw_data_rate(struct drm_i915_private * dev_priv,const struct intel_bw_state * bw_state)366 static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
367 const struct intel_bw_state *bw_state)
368 {
369 unsigned int data_rate = 0;
370 enum pipe pipe;
371
372 for_each_pipe(dev_priv, pipe)
373 data_rate += bw_state->data_rate[pipe];
374
375 return data_rate;
376 }
377
378 static struct intel_bw_state *
intel_atomic_get_bw_state(struct intel_atomic_state * state)379 intel_atomic_get_bw_state(struct intel_atomic_state *state)
380 {
381 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
382 struct drm_private_state *bw_state;
383
384 bw_state = drm_atomic_get_private_obj_state(&state->base,
385 &dev_priv->bw_obj);
386 if (IS_ERR(bw_state))
387 return ERR_CAST(bw_state);
388
389 return to_intel_bw_state(bw_state);
390 }
391
intel_bw_atomic_check(struct intel_atomic_state * state)392 int intel_bw_atomic_check(struct intel_atomic_state *state)
393 {
394 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
395 struct intel_crtc_state *new_crtc_state, *old_crtc_state;
396 struct intel_bw_state *bw_state = NULL;
397 unsigned int data_rate, max_data_rate;
398 unsigned int num_active_planes;
399 struct intel_crtc *crtc;
400 int i;
401
402 /* FIXME earlier gens need some checks too */
403 if (INTEL_GEN(dev_priv) < 11)
404 return 0;
405
406 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
407 new_crtc_state, i) {
408 unsigned int old_data_rate =
409 intel_bw_crtc_data_rate(old_crtc_state);
410 unsigned int new_data_rate =
411 intel_bw_crtc_data_rate(new_crtc_state);
412 unsigned int old_active_planes =
413 intel_bw_crtc_num_active_planes(old_crtc_state);
414 unsigned int new_active_planes =
415 intel_bw_crtc_num_active_planes(new_crtc_state);
416
417 /*
418 * Avoid locking the bw state when
419 * nothing significant has changed.
420 */
421 if (old_data_rate == new_data_rate &&
422 old_active_planes == new_active_planes)
423 continue;
424
425 bw_state = intel_atomic_get_bw_state(state);
426 if (IS_ERR(bw_state))
427 return PTR_ERR(bw_state);
428
429 bw_state->data_rate[crtc->pipe] = new_data_rate;
430 bw_state->num_active_planes[crtc->pipe] = new_active_planes;
431
432 DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
433 pipe_name(crtc->pipe),
434 bw_state->data_rate[crtc->pipe],
435 bw_state->num_active_planes[crtc->pipe]);
436 }
437
438 if (!bw_state)
439 return 0;
440
441 data_rate = intel_bw_data_rate(dev_priv, bw_state);
442 num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
443
444 max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
445
446 data_rate = DIV_ROUND_UP(data_rate, 1000);
447
448 if (data_rate > max_data_rate) {
449 DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
450 data_rate, max_data_rate, num_active_planes);
451 return -EINVAL;
452 }
453
454 return 0;
455 }
456
intel_bw_duplicate_state(struct drm_private_obj * obj)457 static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
458 {
459 struct intel_bw_state *state;
460
461 state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
462 if (!state)
463 return NULL;
464
465 __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
466
467 return &state->base;
468 }
469
intel_bw_destroy_state(struct drm_private_obj * obj,struct drm_private_state * state)470 static void intel_bw_destroy_state(struct drm_private_obj *obj,
471 struct drm_private_state *state)
472 {
473 kfree(state);
474 }
475
476 static const struct drm_private_state_funcs intel_bw_funcs = {
477 .atomic_duplicate_state = intel_bw_duplicate_state,
478 .atomic_destroy_state = intel_bw_destroy_state,
479 };
480
intel_bw_init(struct drm_i915_private * dev_priv)481 int intel_bw_init(struct drm_i915_private *dev_priv)
482 {
483 struct intel_bw_state *state;
484
485 state = kzalloc(sizeof(*state), GFP_KERNEL);
486 if (!state)
487 return -ENOMEM;
488
489 drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
490 &state->base, &intel_bw_funcs);
491
492 return 0;
493 }
494
intel_bw_cleanup(struct drm_i915_private * dev_priv)495 void intel_bw_cleanup(struct drm_i915_private *dev_priv)
496 {
497 drm_atomic_private_obj_fini(&dev_priv->bw_obj);
498 }
499