1 /* $NetBSD: amdgpu_display_rq_dlg_calc_21.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $ */
2
3 /*
4 * Copyright 2017 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: AMD
25 *
26 */
27
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_display_rq_dlg_calc_21.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $");
31
32 #include "../display_mode_lib.h"
33 #include "../display_mode_vba.h"
34 #include "../dml_inline_defs.h"
35 #include "display_rq_dlg_calc_21.h"
36
37 /*
38 * NOTE:
39 * This file is gcc-parseable HW gospel, coming straight from HW engineers.
40 *
41 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
42 * ways. Unless there is something clearly wrong with it the code should
43 * remain as-is as it provides us with a guarantee from HW that it is correct.
44 */
45
46 static void calculate_ttu_cursor(
47 struct display_mode_lib *mode_lib,
48 double *refcyc_per_req_delivery_pre_cur,
49 double *refcyc_per_req_delivery_cur,
50 double refclk_freq_in_mhz,
51 double ref_freq_to_pix_freq,
52 double hscale_pixel_rate_l,
53 double hscl_ratio,
54 double vratio_pre_l,
55 double vratio_l,
56 unsigned int cur_width,
57 enum cursor_bpp cur_bpp);
58
get_bytes_per_element(enum source_format_class source_format,bool is_chroma)59 static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
60 {
61 unsigned int ret_val = 0;
62
63 if (source_format == dm_444_16) {
64 if (!is_chroma)
65 ret_val = 2;
66 } else if (source_format == dm_444_32) {
67 if (!is_chroma)
68 ret_val = 4;
69 } else if (source_format == dm_444_64) {
70 if (!is_chroma)
71 ret_val = 8;
72 } else if (source_format == dm_420_8) {
73 if (is_chroma)
74 ret_val = 2;
75 else
76 ret_val = 1;
77 } else if (source_format == dm_420_10) {
78 if (is_chroma)
79 ret_val = 4;
80 else
81 ret_val = 2;
82 } else if (source_format == dm_444_8) {
83 ret_val = 1;
84 }
85 return ret_val;
86 }
87
is_dual_plane(enum source_format_class source_format)88 static bool is_dual_plane(enum source_format_class source_format)
89 {
90 bool ret_val = false;
91
92 if ((source_format == dm_420_8) || (source_format == dm_420_10))
93 ret_val = true;
94
95 return ret_val;
96 }
97
get_refcyc_per_delivery(struct display_mode_lib * mode_lib,double refclk_freq_in_mhz,double pclk_freq_in_mhz,bool odm_combine,unsigned int recout_width,unsigned int hactive,double vratio,double hscale_pixel_rate,unsigned int delivery_width,unsigned int req_per_swath_ub)98 static double get_refcyc_per_delivery(
99 struct display_mode_lib *mode_lib,
100 double refclk_freq_in_mhz,
101 double pclk_freq_in_mhz,
102 bool odm_combine,
103 unsigned int recout_width,
104 unsigned int hactive,
105 double vratio,
106 double hscale_pixel_rate,
107 unsigned int delivery_width,
108 unsigned int req_per_swath_ub)
109 {
110 double refcyc_per_delivery = 0.0;
111
112 if (vratio <= 1.0) {
113 if (odm_combine)
114 refcyc_per_delivery = (double) refclk_freq_in_mhz
115 * dml_min((double) recout_width, (double) hactive / 2.0)
116 / pclk_freq_in_mhz / (double) req_per_swath_ub;
117 else
118 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width
119 / pclk_freq_in_mhz / (double) req_per_swath_ub;
120 } else {
121 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width
122 / (double) hscale_pixel_rate / (double) req_per_swath_ub;
123 }
124
125 dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
126 dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
127 dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width);
128 dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio);
129 dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub);
130 dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery);
131
132 return refcyc_per_delivery;
133
134 }
135
get_blk_size_bytes(const enum source_macro_tile_size tile_size)136 static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size)
137 {
138 if (tile_size == dm_256k_tile)
139 return (256 * 1024);
140 else if (tile_size == dm_64k_tile)
141 return (64 * 1024);
142 else
143 return (4 * 1024);
144 }
145
extract_rq_sizing_regs(struct display_mode_lib * mode_lib,display_data_rq_regs_st * rq_regs,const display_data_rq_sizing_params_st rq_sizing)146 static void extract_rq_sizing_regs(
147 struct display_mode_lib *mode_lib,
148 display_data_rq_regs_st *rq_regs,
149 const display_data_rq_sizing_params_st rq_sizing)
150 {
151 dml_print("DML_DLG: %s: rq_sizing param\n", __func__);
152 print__data_rq_sizing_params_st(mode_lib, rq_sizing);
153
154 rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10;
155
156 if (rq_sizing.min_chunk_bytes == 0)
157 rq_regs->min_chunk_size = 0;
158 else
159 rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1;
160
161 rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10;
162 if (rq_sizing.min_meta_chunk_bytes == 0)
163 rq_regs->min_meta_chunk_size = 0;
164 else
165 rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1;
166
167 rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6;
168 rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6;
169 }
170
extract_rq_regs(struct display_mode_lib * mode_lib,display_rq_regs_st * rq_regs,const display_rq_params_st rq_param)171 static void extract_rq_regs(
172 struct display_mode_lib *mode_lib,
173 display_rq_regs_st *rq_regs,
174 const display_rq_params_st rq_param)
175 {
176 unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
177 unsigned int detile_buf_plane1_addr = 0;
178
179 extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l);
180
181 rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(
182 dml_log2(rq_param.dlg.rq_l.dpte_row_height),
183 1) - 3;
184
185 if (rq_param.yuv420) {
186 extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c);
187 rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(
188 dml_log2(rq_param.dlg.rq_c.dpte_row_height),
189 1) - 3;
190 }
191
192 rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height);
193 rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height);
194
195 // FIXME: take the max between luma, chroma chunk size?
196 // okay for now, as we are setting chunk_bytes to 8kb anyways
197 if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb
198 rq_regs->drq_expansion_mode = 0;
199 } else {
200 rq_regs->drq_expansion_mode = 2;
201 }
202 rq_regs->prq_expansion_mode = 1;
203 rq_regs->mrq_expansion_mode = 1;
204 rq_regs->crq_expansion_mode = 1;
205
206 if (rq_param.yuv420) {
207 if ((double) rq_param.misc.rq_l.stored_swath_bytes
208 / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) {
209 detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma
210 } else {
211 detile_buf_plane1_addr = dml_round_to_multiple(
212 (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0),
213 256,
214 0) / 64.0; // 2/3 to chroma
215 }
216 }
217 rq_regs->plane1_base_address = detile_buf_plane1_addr;
218 }
219
handle_det_buf_split(struct display_mode_lib * mode_lib,display_rq_params_st * rq_param,const display_pipe_source_params_st pipe_src_param)220 static void handle_det_buf_split(
221 struct display_mode_lib *mode_lib,
222 display_rq_params_st *rq_param,
223 const display_pipe_source_params_st pipe_src_param)
224 {
225 unsigned int total_swath_bytes = 0;
226 unsigned int swath_bytes_l = 0;
227 unsigned int swath_bytes_c = 0;
228 unsigned int full_swath_bytes_packed_l = 0;
229 unsigned int full_swath_bytes_packed_c = 0;
230 bool req128_l = false;
231 bool req128_c = false;
232 bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear);
233 bool surf_vert = (pipe_src_param.source_scan == dm_vert);
234 unsigned int log2_swath_height_l = 0;
235 unsigned int log2_swath_height_c = 0;
236 unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
237
238 full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes;
239 full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes;
240
241 if (rq_param->yuv420_10bpc) {
242 full_swath_bytes_packed_l = dml_round_to_multiple(
243 rq_param->misc.rq_l.full_swath_bytes * 2 / 3,
244 256,
245 1) + 256;
246 full_swath_bytes_packed_c = dml_round_to_multiple(
247 rq_param->misc.rq_c.full_swath_bytes * 2 / 3,
248 256,
249 1) + 256;
250 }
251
252 if (rq_param->yuv420) {
253 total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
254
255 if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
256 req128_l = false;
257 req128_c = false;
258 swath_bytes_l = full_swath_bytes_packed_l;
259 swath_bytes_c = full_swath_bytes_packed_c;
260 } else { //128b request (for luma only for yuv420 8bpc)
261 req128_l = true;
262 req128_c = false;
263 swath_bytes_l = full_swath_bytes_packed_l / 2;
264 swath_bytes_c = full_swath_bytes_packed_c;
265 }
266 // Note: assumption, the config that pass in will fit into
267 // the detiled buffer.
268 } else {
269 total_swath_bytes = 2 * full_swath_bytes_packed_l;
270
271 if (total_swath_bytes <= detile_buf_size_in_bytes)
272 req128_l = false;
273 else
274 req128_l = true;
275
276 swath_bytes_l = total_swath_bytes;
277 swath_bytes_c = 0;
278 }
279 rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l;
280 rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c;
281
282 if (surf_linear) {
283 log2_swath_height_l = 0;
284 log2_swath_height_c = 0;
285 } else if (!surf_vert) {
286 log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l;
287 log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c;
288 } else {
289 log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l;
290 log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c;
291 }
292 rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
293 rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c;
294
295 dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l);
296 dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c);
297 dml_print(
298 "DML_DLG: %s: full_swath_bytes_packed_l = %0d\n",
299 __func__,
300 full_swath_bytes_packed_l);
301 dml_print(
302 "DML_DLG: %s: full_swath_bytes_packed_c = %0d\n",
303 __func__,
304 full_swath_bytes_packed_c);
305 }
306
get_meta_and_pte_attr(struct display_mode_lib * mode_lib,display_data_rq_dlg_params_st * rq_dlg_param,display_data_rq_misc_params_st * rq_misc_param,display_data_rq_sizing_params_st * rq_sizing_param,unsigned int vp_width,unsigned int vp_height,unsigned int data_pitch,unsigned int meta_pitch,unsigned int source_format,unsigned int tiling,unsigned int macro_tile_size,unsigned int source_scan,unsigned int hostvm_enable,unsigned int is_chroma)307 static void get_meta_and_pte_attr(
308 struct display_mode_lib *mode_lib,
309 display_data_rq_dlg_params_st *rq_dlg_param,
310 display_data_rq_misc_params_st *rq_misc_param,
311 display_data_rq_sizing_params_st *rq_sizing_param,
312 unsigned int vp_width,
313 unsigned int vp_height,
314 unsigned int data_pitch,
315 unsigned int meta_pitch,
316 unsigned int source_format,
317 unsigned int tiling,
318 unsigned int macro_tile_size,
319 unsigned int source_scan,
320 unsigned int hostvm_enable,
321 unsigned int is_chroma)
322 {
323 bool surf_linear = (tiling == dm_sw_linear);
324 bool surf_vert = (source_scan == dm_vert);
325
326 unsigned int bytes_per_element;
327 unsigned int bytes_per_element_y = get_bytes_per_element(
328 (enum source_format_class) (source_format),
329 false);
330 unsigned int bytes_per_element_c = get_bytes_per_element(
331 (enum source_format_class) (source_format),
332 true);
333
334 unsigned int blk256_width = 0;
335 unsigned int blk256_height = 0;
336
337 unsigned int blk256_width_y = 0;
338 unsigned int blk256_height_y = 0;
339 unsigned int blk256_width_c = 0;
340 unsigned int blk256_height_c = 0;
341 unsigned int log2_bytes_per_element;
342 unsigned int log2_blk256_width;
343 unsigned int log2_blk256_height;
344 unsigned int blk_bytes;
345 unsigned int log2_blk_bytes;
346 unsigned int log2_blk_height;
347 unsigned int log2_blk_width;
348 unsigned int log2_meta_req_bytes;
349 unsigned int log2_meta_req_height;
350 unsigned int log2_meta_req_width;
351 unsigned int meta_req_width;
352 unsigned int meta_req_height;
353 unsigned int log2_meta_row_height;
354 unsigned int meta_row_width_ub;
355 unsigned int log2_meta_chunk_bytes;
356 unsigned int log2_meta_chunk_height;
357
358 //full sized meta chunk width in unit of data elements
359 unsigned int log2_meta_chunk_width;
360 unsigned int log2_min_meta_chunk_bytes;
361 unsigned int min_meta_chunk_width;
362 unsigned int meta_chunk_width;
363 unsigned int meta_chunk_per_row_int;
364 unsigned int meta_row_remainder;
365 unsigned int meta_chunk_threshold;
366 unsigned int meta_blk_bytes;
367 unsigned int meta_blk_height;
368 unsigned int meta_blk_width;
369 unsigned int meta_surface_bytes;
370 unsigned int vmpg_bytes;
371 unsigned int meta_pte_req_per_frame_ub;
372 unsigned int meta_pte_bytes_per_frame_ub;
373 const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes);
374 const unsigned int dpte_buf_in_pte_reqs =
375 mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
376 const unsigned int pde_proc_buffer_size_64k_reqs =
377 mode_lib->ip.pde_proc_buffer_size_64k_reqs;
378
379 unsigned int log2_vmpg_height = 0;
380 unsigned int log2_vmpg_width = 0;
381 unsigned int log2_dpte_req_height_ptes = 0;
382 unsigned int log2_dpte_req_height = 0;
383 unsigned int log2_dpte_req_width = 0;
384 unsigned int log2_dpte_row_height_linear = 0;
385 unsigned int log2_dpte_row_height = 0;
386 unsigned int log2_dpte_group_width = 0;
387 unsigned int dpte_row_width_ub = 0;
388 unsigned int dpte_req_height = 0;
389 unsigned int dpte_req_width = 0;
390 unsigned int dpte_group_width = 0;
391 unsigned int log2_dpte_group_bytes = 0;
392 unsigned int log2_dpte_group_length = 0;
393 unsigned int pde_buf_entries;
394 bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10);
395
396 Calculate256BBlockSizes(
397 (enum source_format_class) (source_format),
398 (enum dm_swizzle_mode) (tiling),
399 bytes_per_element_y,
400 bytes_per_element_c,
401 &blk256_height_y,
402 &blk256_height_c,
403 &blk256_width_y,
404 &blk256_width_c);
405
406 if (!is_chroma) {
407 blk256_width = blk256_width_y;
408 blk256_height = blk256_height_y;
409 bytes_per_element = bytes_per_element_y;
410 } else {
411 blk256_width = blk256_width_c;
412 blk256_height = blk256_height_c;
413 bytes_per_element = bytes_per_element_c;
414 }
415
416 log2_bytes_per_element = dml_log2(bytes_per_element);
417
418 dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear);
419 dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert);
420 dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width);
421 dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height);
422
423 log2_blk256_width = dml_log2((double) blk256_width);
424 log2_blk256_height = dml_log2((double) blk256_height);
425 blk_bytes = surf_linear ?
426 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
427 log2_blk_bytes = dml_log2((double) blk_bytes);
428 log2_blk_height = 0;
429 log2_blk_width = 0;
430
431 // remember log rule
432 // "+" in log is multiply
433 // "-" in log is divide
434 // "/2" is like square root
435 // blk is vertical biased
436 if (tiling != dm_sw_linear)
437 log2_blk_height = log2_blk256_height
438 + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
439 else
440 log2_blk_height = 0; // blk height of 1
441
442 log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
443
444 if (!surf_vert) {
445 rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1)
446 + blk256_width;
447 rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width;
448 } else {
449 rq_dlg_param->swath_width_ub = dml_round_to_multiple(
450 vp_height - 1,
451 blk256_height,
452 1) + blk256_height;
453 rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height;
454 }
455
456 if (!surf_vert)
457 rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height
458 * bytes_per_element;
459 else
460 rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width
461 * bytes_per_element;
462
463 rq_misc_param->blk256_height = blk256_height;
464 rq_misc_param->blk256_width = blk256_width;
465
466 // -------
467 // meta
468 // -------
469 log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element
470
471 // each 64b meta request for dcn is 8x8 meta elements and
472 // a meta element covers one 256b block of the the data surface.
473 log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256
474 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element
475 - log2_meta_req_height;
476 meta_req_width = 1 << log2_meta_req_width;
477 meta_req_height = 1 << log2_meta_req_height;
478 log2_meta_row_height = 0;
479 meta_row_width_ub = 0;
480
481 // the dimensions of a meta row are meta_row_width x meta_row_height in elements.
482 // calculate upper bound of the meta_row_width
483 if (!surf_vert) {
484 log2_meta_row_height = log2_meta_req_height;
485 meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1)
486 + meta_req_width;
487 rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width;
488 } else {
489 log2_meta_row_height = log2_meta_req_width;
490 meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1)
491 + meta_req_height;
492 rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height;
493 }
494 rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64;
495
496 rq_dlg_param->meta_row_height = 1 << log2_meta_row_height;
497
498 log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes);
499 log2_meta_chunk_height = log2_meta_row_height;
500
501 //full sized meta chunk width in unit of data elements
502 log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element
503 - log2_meta_chunk_height;
504 log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes);
505 min_meta_chunk_width = 1
506 << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element
507 - log2_meta_chunk_height);
508 meta_chunk_width = 1 << log2_meta_chunk_width;
509 meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width);
510 meta_row_remainder = meta_row_width_ub % meta_chunk_width;
511 meta_chunk_threshold = 0;
512 meta_blk_bytes = 4096;
513 meta_blk_height = blk256_height * 64;
514 meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height;
515 meta_surface_bytes = meta_pitch
516 * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1)
517 + meta_blk_height) * bytes_per_element / 256;
518 vmpg_bytes = mode_lib->soc.vmm_page_size_bytes;
519 meta_pte_req_per_frame_ub = (dml_round_to_multiple(
520 meta_surface_bytes - vmpg_bytes,
521 8 * vmpg_bytes,
522 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes);
523 meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request
524 rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub;
525
526 dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height);
527 dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width);
528 dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes);
529 dml_print(
530 "DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n",
531 __func__,
532 meta_pte_req_per_frame_ub);
533 dml_print(
534 "DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n",
535 __func__,
536 meta_pte_bytes_per_frame_ub);
537
538 if (!surf_vert)
539 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width;
540 else
541 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height;
542
543 if (meta_row_remainder <= meta_chunk_threshold)
544 rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
545 else
546 rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
547
548 // ------
549 // dpte
550 // ------
551 if (surf_linear) {
552 log2_vmpg_height = 0; // one line high
553 } else {
554 log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
555 }
556 log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
557
558 // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4.
559 if (surf_linear) { //one 64B PTE request returns 8 PTEs
560 log2_dpte_req_height_ptes = 0;
561 log2_dpte_req_width = log2_vmpg_width + 3;
562 log2_dpte_req_height = 0;
563 } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size
564 //one 64B req gives 8x1 PTEs for 4KB tile
565 log2_dpte_req_height_ptes = 0;
566 log2_dpte_req_width = log2_blk_width + 3;
567 log2_dpte_req_height = log2_blk_height + 0;
568 } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB
569 //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB
570 log2_dpte_req_height_ptes = 4;
571 log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width
572 log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height
573 } else { //64KB page size and must 64KB tile block
574 //one 64B req gives 8x1 PTEs for 64KB tile
575 log2_dpte_req_height_ptes = 0;
576 log2_dpte_req_width = log2_blk_width + 3;
577 log2_dpte_req_height = log2_blk_height + 0;
578 }
579
580 // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height
581 // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent
582 // That depends on the pte shape (i.e. 8x1, 4x2, 2x4)
583 //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
584 //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
585 dpte_req_height = 1 << log2_dpte_req_height;
586 dpte_req_width = 1 << log2_dpte_req_width;
587
588 // calculate pitch dpte row buffer can hold
589 // round the result down to a power of two.
590 pde_buf_entries =
591 yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs;
592 if (surf_linear) {
593 unsigned int dpte_row_height;
594
595 log2_dpte_row_height_linear = dml_floor(
596 dml_log2(
597 dml_min(
598 64 * 1024 * pde_buf_entries
599 / bytes_per_element,
600 dpte_buf_in_pte_reqs
601 * dpte_req_width)
602 / data_pitch),
603 1);
604
605 ASSERT(log2_dpte_row_height_linear >= 3);
606
607 if (log2_dpte_row_height_linear > 7)
608 log2_dpte_row_height_linear = 7;
609
610 log2_dpte_row_height = log2_dpte_row_height_linear;
611 // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary.
612 // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering.
613 dpte_row_height = 1 << log2_dpte_row_height;
614 dpte_row_width_ub = dml_round_to_multiple(
615 data_pitch * dpte_row_height - 1,
616 dpte_req_width,
617 1) + dpte_req_width;
618 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
619 } else {
620 // the upper bound of the dpte_row_width without dependency on viewport position follows.
621 // for tiled mode, row height is the same as req height and row store up to vp size upper bound
622 if (!surf_vert) {
623 log2_dpte_row_height = log2_dpte_req_height;
624 dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1)
625 + dpte_req_width;
626 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
627 } else {
628 log2_dpte_row_height =
629 (log2_blk_width < log2_dpte_req_width) ?
630 log2_blk_width : log2_dpte_req_width;
631 dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1)
632 + dpte_req_height;
633 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height;
634 }
635 }
636 if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB
637 rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request
638 else
639 rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request
640
641 rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
642
643 // the dpte_group_bytes is reduced for the specific case of vertical
644 // access of a tile surface that has dpte request of 8x1 ptes.
645
646 if (hostvm_enable)
647 rq_sizing_param->dpte_group_bytes = 512;
648 else {
649 if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
650 rq_sizing_param->dpte_group_bytes = 512;
651 else
652 //full size
653 rq_sizing_param->dpte_group_bytes = 2048;
654 }
655
656 //since pte request size is 64byte, the number of data pte requests per full sized group is as follows.
657 log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes);
658 log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests
659
660 // full sized data pte group width in elements
661 if (!surf_vert)
662 log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width;
663 else
664 log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height;
665
666 //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B
667 if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB
668 log2_dpte_group_width = log2_dpte_group_width - 1;
669
670 dpte_group_width = 1 << log2_dpte_group_width;
671
672 // since dpte groups are only aligned to dpte_req_width and not dpte_group_width,
673 // the upper bound for the dpte groups per row is as follows.
674 rq_dlg_param->dpte_groups_per_row_ub = dml_ceil(
675 (double) dpte_row_width_ub / dpte_group_width,
676 1);
677 }
678
get_surf_rq_param(struct display_mode_lib * mode_lib,display_data_rq_sizing_params_st * rq_sizing_param,display_data_rq_dlg_params_st * rq_dlg_param,display_data_rq_misc_params_st * rq_misc_param,const display_pipe_params_st pipe_param,bool is_chroma)679 static void get_surf_rq_param(
680 struct display_mode_lib *mode_lib,
681 display_data_rq_sizing_params_st *rq_sizing_param,
682 display_data_rq_dlg_params_st *rq_dlg_param,
683 display_data_rq_misc_params_st *rq_misc_param,
684 const display_pipe_params_st pipe_param,
685 bool is_chroma)
686 {
687 bool mode_422 = false;
688 unsigned int vp_width = 0;
689 unsigned int vp_height = 0;
690 unsigned int data_pitch = 0;
691 unsigned int meta_pitch = 0;
692 unsigned int ppe = mode_422 ? 2 : 1;
693
694 // FIXME check if ppe apply for both luma and chroma in 422 case
695 if (is_chroma) {
696 vp_width = pipe_param.src.viewport_width_c / ppe;
697 vp_height = pipe_param.src.viewport_height_c;
698 data_pitch = pipe_param.src.data_pitch_c;
699 meta_pitch = pipe_param.src.meta_pitch_c;
700 } else {
701 vp_width = pipe_param.src.viewport_width / ppe;
702 vp_height = pipe_param.src.viewport_height;
703 data_pitch = pipe_param.src.data_pitch;
704 meta_pitch = pipe_param.src.meta_pitch;
705 }
706
707 if (pipe_param.dest.odm_combine) {
708 unsigned int access_dir;
709 unsigned int full_src_vp_width;
710 unsigned int hactive_half;
711 unsigned int src_hactive_half;
712 access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
713 hactive_half = pipe_param.dest.hactive / 2;
714 if (is_chroma) {
715 full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width;
716 src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_half;
717 } else {
718 full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width;
719 src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio * hactive_half;
720 }
721
722 if (access_dir == 0) {
723 vp_width = dml_min(full_src_vp_width, src_hactive_half);
724 dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width);
725 } else {
726 vp_height = dml_min(full_src_vp_width, src_hactive_half);
727 dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height);
728
729 }
730 dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width);
731 dml_print("DML_DLG: %s: hactive_half = %d\n", __func__, hactive_half);
732 dml_print("DML_DLG: %s: src_hactive_half = %d\n", __func__, src_hactive_half);
733 }
734 rq_sizing_param->chunk_bytes = 8192;
735
736 if (rq_sizing_param->chunk_bytes == 64 * 1024)
737 rq_sizing_param->min_chunk_bytes = 0;
738 else
739 rq_sizing_param->min_chunk_bytes = 1024;
740
741 rq_sizing_param->meta_chunk_bytes = 2048;
742 rq_sizing_param->min_meta_chunk_bytes = 256;
743
744 if (pipe_param.src.hostvm)
745 rq_sizing_param->mpte_group_bytes = 512;
746 else
747 rq_sizing_param->mpte_group_bytes = 2048;
748
749 get_meta_and_pte_attr(
750 mode_lib,
751 rq_dlg_param,
752 rq_misc_param,
753 rq_sizing_param,
754 vp_width,
755 vp_height,
756 data_pitch,
757 meta_pitch,
758 pipe_param.src.source_format,
759 pipe_param.src.sw_mode,
760 pipe_param.src.macro_tile_size,
761 pipe_param.src.source_scan,
762 pipe_param.src.hostvm,
763 is_chroma);
764 }
765
dml_rq_dlg_get_rq_params(struct display_mode_lib * mode_lib,display_rq_params_st * rq_param,const display_pipe_params_st pipe_param)766 static void dml_rq_dlg_get_rq_params(
767 struct display_mode_lib *mode_lib,
768 display_rq_params_st *rq_param,
769 const display_pipe_params_st pipe_param)
770 {
771 // get param for luma surface
772 rq_param->yuv420 = pipe_param.src.source_format == dm_420_8
773 || pipe_param.src.source_format == dm_420_10;
774 rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10;
775
776 get_surf_rq_param(
777 mode_lib,
778 &(rq_param->sizing.rq_l),
779 &(rq_param->dlg.rq_l),
780 &(rq_param->misc.rq_l),
781 pipe_param,
782 0);
783
784 if (is_dual_plane((enum source_format_class) (pipe_param.src.source_format))) {
785 // get param for chroma surface
786 get_surf_rq_param(
787 mode_lib,
788 &(rq_param->sizing.rq_c),
789 &(rq_param->dlg.rq_c),
790 &(rq_param->misc.rq_c),
791 pipe_param,
792 1);
793 }
794
795 // calculate how to split the det buffer space between luma and chroma
796 handle_det_buf_split(mode_lib, rq_param, pipe_param.src);
797 print__rq_params_st(mode_lib, *rq_param);
798 }
799
dml21_rq_dlg_get_rq_reg(struct display_mode_lib * mode_lib,display_rq_regs_st * rq_regs,const display_pipe_params_st pipe_param)800 void dml21_rq_dlg_get_rq_reg(
801 struct display_mode_lib *mode_lib,
802 display_rq_regs_st *rq_regs,
803 const display_pipe_params_st pipe_param)
804 {
805 display_rq_params_st rq_param = {0};
806
807 memset(rq_regs, 0, sizeof(*rq_regs));
808 dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param);
809 extract_rq_regs(mode_lib, rq_regs, rq_param);
810
811 print__rq_regs_st(mode_lib, *rq_regs);
812 }
813
814 // Note: currently taken in as is.
815 // Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
dml_rq_dlg_get_dlg_params(struct display_mode_lib * mode_lib,const display_e2e_pipe_params_st * e2e_pipe_param,const unsigned int num_pipes,const unsigned int pipe_idx,display_dlg_regs_st * disp_dlg_regs,display_ttu_regs_st * disp_ttu_regs,const display_rq_dlg_params_st rq_dlg_param,const display_dlg_sys_params_st dlg_sys_param,const bool cstate_en,const bool pstate_en)816 static void dml_rq_dlg_get_dlg_params(
817 struct display_mode_lib *mode_lib,
818 const display_e2e_pipe_params_st *e2e_pipe_param,
819 const unsigned int num_pipes,
820 const unsigned int pipe_idx,
821 display_dlg_regs_st *disp_dlg_regs,
822 display_ttu_regs_st *disp_ttu_regs,
823 const display_rq_dlg_params_st rq_dlg_param,
824 const display_dlg_sys_params_st dlg_sys_param,
825 const bool cstate_en,
826 const bool pstate_en)
827 {
828 const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
829 const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
830 const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
831 const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
832 const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
833 const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
834
835 // -------------------------
836 // Section 1.15.2.1: OTG dependent Params
837 // -------------------------
838 // Timing
839 unsigned int htotal = dst->htotal;
840 // unsigned int hblank_start = dst.hblank_start; // TODO: Remove
841 unsigned int hblank_end = dst->hblank_end;
842 unsigned int vblank_start = dst->vblank_start;
843 unsigned int vblank_end = dst->vblank_end;
844 unsigned int min_vblank = mode_lib->ip.min_vblank_lines;
845
846 double dppclk_freq_in_mhz = clks->dppclk_mhz;
847 double dispclk_freq_in_mhz = clks->dispclk_mhz;
848 double refclk_freq_in_mhz = clks->refclk_mhz;
849 double pclk_freq_in_mhz = dst->pixel_rate_mhz;
850 bool interlaced = dst->interlaced;
851
852 double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
853
854 double min_dcfclk_mhz;
855 double t_calc_us;
856 double min_ttu_vblank;
857
858 double min_dst_y_ttu_vblank;
859 unsigned int dlg_vblank_start;
860 bool dual_plane;
861 bool mode_422;
862 unsigned int access_dir;
863 unsigned int vp_height_l;
864 unsigned int vp_width_l;
865 unsigned int vp_height_c;
866 unsigned int vp_width_c;
867
868 // Scaling
869 unsigned int htaps_l;
870 unsigned int htaps_c;
871 double hratio_l;
872 double hratio_c;
873 double vratio_l;
874 double vratio_c;
875 bool scl_enable;
876
877 double line_time_in_us;
878 // double vinit_l;
879 // double vinit_c;
880 // double vinit_bot_l;
881 // double vinit_bot_c;
882
883 // unsigned int swath_height_l;
884 unsigned int swath_width_ub_l;
885 // unsigned int dpte_bytes_per_row_ub_l;
886 unsigned int dpte_groups_per_row_ub_l;
887 // unsigned int meta_pte_bytes_per_frame_ub_l;
888 // unsigned int meta_bytes_per_row_ub_l;
889
890 // unsigned int swath_height_c;
891 unsigned int swath_width_ub_c;
892 // unsigned int dpte_bytes_per_row_ub_c;
893 unsigned int dpte_groups_per_row_ub_c;
894
895 unsigned int meta_chunks_per_row_ub_l;
896 unsigned int meta_chunks_per_row_ub_c;
897 unsigned int vupdate_offset;
898 unsigned int vupdate_width;
899 unsigned int vready_offset;
900
901 unsigned int dppclk_delay_subtotal;
902 unsigned int dispclk_delay_subtotal;
903 unsigned int pixel_rate_delay_subtotal;
904
905 unsigned int vstartup_start;
906 unsigned int dst_x_after_scaler;
907 unsigned int dst_y_after_scaler;
908 double line_wait;
909 double dst_y_prefetch;
910 double dst_y_per_vm_vblank;
911 double dst_y_per_row_vblank;
912 double dst_y_per_vm_flip;
913 double dst_y_per_row_flip;
914 double max_dst_y_per_vm_vblank;
915 double max_dst_y_per_row_vblank;
916 double lsw;
917 double vratio_pre_l;
918 double vratio_pre_c;
919 unsigned int req_per_swath_ub_l;
920 unsigned int req_per_swath_ub_c;
921 unsigned int meta_row_height_l;
922 unsigned int meta_row_height_c;
923 unsigned int swath_width_pixels_ub_l;
924 unsigned int swath_width_pixels_ub_c;
925 unsigned int scaler_rec_in_width_l;
926 unsigned int scaler_rec_in_width_c;
927 unsigned int dpte_row_height_l;
928 unsigned int dpte_row_height_c;
929 double hscale_pixel_rate_l;
930 double hscale_pixel_rate_c;
931 double min_hratio_fact_l;
932 double min_hratio_fact_c;
933 double refcyc_per_line_delivery_pre_l;
934 double refcyc_per_line_delivery_pre_c;
935 double refcyc_per_line_delivery_l;
936 double refcyc_per_line_delivery_c;
937
938 double refcyc_per_req_delivery_pre_l;
939 double refcyc_per_req_delivery_pre_c;
940 double refcyc_per_req_delivery_l;
941 double refcyc_per_req_delivery_c;
942
943 unsigned int full_recout_width;
944 double xfc_transfer_delay;
945 double xfc_precharge_delay;
946 double xfc_remote_surface_flip_latency;
947 double xfc_dst_y_delta_drq_limit;
948 double xfc_prefetch_margin;
949 double refcyc_per_req_delivery_pre_cur0;
950 double refcyc_per_req_delivery_cur0;
951 double refcyc_per_req_delivery_pre_cur1;
952 double refcyc_per_req_delivery_cur1;
953
954 memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
955 memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
956
957 dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en);
958 dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en);
959
960 dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz);
961 dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz);
962 dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
963 dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
964 dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced);
965 ASSERT(ref_freq_to_pix_freq < 4.0);
966
967 disp_dlg_regs->ref_freq_to_pix_freq =
968 (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
969 disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal
970 * dml_pow(2, 8));
971 disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
972 disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end
973 * (double) ref_freq_to_pix_freq);
974 ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
975
976 min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz;
977 t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes);
978 min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
979
980 min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal;
981 dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
982
983 disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
984 ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
985
986 dml_print(
987 "DML_DLG: %s: min_dcfclk_mhz = %3.2f\n",
988 __func__,
989 min_dcfclk_mhz);
990 dml_print(
991 "DML_DLG: %s: min_ttu_vblank = %3.2f\n",
992 __func__,
993 min_ttu_vblank);
994 dml_print(
995 "DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n",
996 __func__,
997 min_dst_y_ttu_vblank);
998 dml_print(
999 "DML_DLG: %s: t_calc_us = %3.2f\n",
1000 __func__,
1001 t_calc_us);
1002 dml_print(
1003 "DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n",
1004 __func__,
1005 disp_dlg_regs->min_dst_y_next_start);
1006 dml_print(
1007 "DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n",
1008 __func__,
1009 ref_freq_to_pix_freq);
1010
1011 // -------------------------
1012 // Section 1.15.2.2: Prefetch, Active and TTU
1013 // -------------------------
1014 // Prefetch Calc
1015 // Source
1016 // dcc_en = src.dcc;
1017 dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
1018 mode_422 = false; // FIXME
1019 access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
1020 // bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
1021 // bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
1022 vp_height_l = src->viewport_height;
1023 vp_width_l = src->viewport_width;
1024 vp_height_c = src->viewport_height_c;
1025 vp_width_c = src->viewport_width_c;
1026
1027 // Scaling
1028 htaps_l = taps->htaps;
1029 htaps_c = taps->htaps_c;
1030 hratio_l = scl->hscl_ratio;
1031 hratio_c = scl->hscl_ratio_c;
1032 vratio_l = scl->vscl_ratio;
1033 vratio_c = scl->vscl_ratio_c;
1034 scl_enable = scl->scl_enable;
1035
1036 line_time_in_us = (htotal / pclk_freq_in_mhz);
1037 swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub;
1038 dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub;
1039 swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub;
1040 dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub;
1041
1042 meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub;
1043 meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub;
1044 vupdate_offset = dst->vupdate_offset;
1045 vupdate_width = dst->vupdate_width;
1046 vready_offset = dst->vready_offset;
1047
1048 dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
1049 dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
1050
1051 if (scl_enable)
1052 dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
1053 else
1054 dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
1055
1056 dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter
1057 + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
1058
1059 if (dout->dsc_enable) {
1060 double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1061
1062 dispclk_delay_subtotal += dsc_delay;
1063 }
1064
1065 pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz
1066 + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz;
1067
1068 vstartup_start = dst->vstartup_start;
1069 if (interlaced) {
1070 if (vstartup_start / 2.0
1071 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal
1072 <= vblank_end / 2.0)
1073 disp_dlg_regs->vready_after_vcount0 = 1;
1074 else
1075 disp_dlg_regs->vready_after_vcount0 = 0;
1076 } else {
1077 if (vstartup_start
1078 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal
1079 <= vblank_end)
1080 disp_dlg_regs->vready_after_vcount0 = 1;
1081 else
1082 disp_dlg_regs->vready_after_vcount0 = 0;
1083 }
1084
1085 // TODO: Where is this coming from?
1086 if (interlaced)
1087 vstartup_start = vstartup_start / 2;
1088
1089 // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp?
1090 if (vstartup_start >= min_vblank) {
1091 dml_print(
1092 "WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n",
1093 __func__,
1094 vblank_start,
1095 vblank_end);
1096 dml_print(
1097 "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n",
1098 __func__,
1099 vstartup_start,
1100 min_vblank);
1101 min_vblank = vstartup_start + 1;
1102 dml_print(
1103 "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n",
1104 __func__,
1105 vstartup_start,
1106 min_vblank);
1107 }
1108
1109 dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1110 dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1111
1112 dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal);
1113 dml_print(
1114 "DML_DLG: %s: pixel_rate_delay_subtotal = %d\n",
1115 __func__,
1116 pixel_rate_delay_subtotal);
1117 dml_print(
1118 "DML_DLG: %s: dst_x_after_scaler = %d\n",
1119 __func__,
1120 dst_x_after_scaler);
1121 dml_print(
1122 "DML_DLG: %s: dst_y_after_scaler = %d\n",
1123 __func__,
1124 dst_y_after_scaler);
1125
1126 // Lwait
1127 // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us?
1128 line_wait = mode_lib->soc.urgent_latency_pixel_data_only_us;
1129 if (cstate_en)
1130 line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait);
1131 if (pstate_en)
1132 line_wait = dml_max(
1133 mode_lib->soc.dram_clock_change_latency_us
1134 + mode_lib->soc.urgent_latency_pixel_data_only_us, // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us?
1135 line_wait);
1136 line_wait = line_wait / line_time_in_us;
1137
1138 dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1139 dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
1140
1141 dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(
1142 mode_lib,
1143 e2e_pipe_param,
1144 num_pipes,
1145 pipe_idx);
1146 dst_y_per_row_vblank = get_dst_y_per_row_vblank(
1147 mode_lib,
1148 e2e_pipe_param,
1149 num_pipes,
1150 pipe_idx);
1151 dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1152 dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1153
1154 max_dst_y_per_vm_vblank = 32.0;
1155 max_dst_y_per_row_vblank = 16.0;
1156
1157 // magic!
1158 if (htotal <= 75) {
1159 min_vblank = 300;
1160 max_dst_y_per_vm_vblank = 100.0;
1161 max_dst_y_per_row_vblank = 100.0;
1162 }
1163
1164 dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
1165 dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
1166 dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
1167 dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
1168
1169 ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank);
1170 ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
1171
1172 ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
1173 lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank);
1174
1175 dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw);
1176
1177 vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1178 vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1179
1180 dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l);
1181 dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c);
1182
1183 // Active
1184 req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub;
1185 req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub;
1186 meta_row_height_l = rq_dlg_param.rq_l.meta_row_height;
1187 meta_row_height_c = rq_dlg_param.rq_c.meta_row_height;
1188 swath_width_pixels_ub_l = 0;
1189 swath_width_pixels_ub_c = 0;
1190 scaler_rec_in_width_l = 0;
1191 scaler_rec_in_width_c = 0;
1192 dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height;
1193 dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height;
1194
1195 if (mode_422) {
1196 swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
1197 swath_width_pixels_ub_c = swath_width_ub_c * 2;
1198 } else {
1199 swath_width_pixels_ub_l = swath_width_ub_l * 1;
1200 swath_width_pixels_ub_c = swath_width_ub_c * 1;
1201 }
1202
1203 hscale_pixel_rate_l = 0.;
1204 hscale_pixel_rate_c = 0.;
1205 min_hratio_fact_l = 1.0;
1206 min_hratio_fact_c = 1.0;
1207
1208 if (htaps_l <= 1)
1209 min_hratio_fact_l = 2.0;
1210 else if (htaps_l <= 6) {
1211 if ((hratio_l * 2.0) > 4.0)
1212 min_hratio_fact_l = 4.0;
1213 else
1214 min_hratio_fact_l = hratio_l * 2.0;
1215 } else {
1216 if (hratio_l > 4.0)
1217 min_hratio_fact_l = 4.0;
1218 else
1219 min_hratio_fact_l = hratio_l;
1220 }
1221
1222 hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
1223
1224 if (htaps_c <= 1)
1225 min_hratio_fact_c = 2.0;
1226 else if (htaps_c <= 6) {
1227 if ((hratio_c * 2.0) > 4.0)
1228 min_hratio_fact_c = 4.0;
1229 else
1230 min_hratio_fact_c = hratio_c * 2.0;
1231 } else {
1232 if (hratio_c > 4.0)
1233 min_hratio_fact_c = 4.0;
1234 else
1235 min_hratio_fact_c = hratio_c;
1236 }
1237
1238 hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz;
1239
1240 refcyc_per_line_delivery_pre_l = 0.;
1241 refcyc_per_line_delivery_pre_c = 0.;
1242 refcyc_per_line_delivery_l = 0.;
1243 refcyc_per_line_delivery_c = 0.;
1244
1245 refcyc_per_req_delivery_pre_l = 0.;
1246 refcyc_per_req_delivery_pre_c = 0.;
1247 refcyc_per_req_delivery_l = 0.;
1248 refcyc_per_req_delivery_c = 0.;
1249
1250 full_recout_width = 0;
1251 // In ODM
1252 if (src->is_hsplit) {
1253 // This "hack" is only allowed (and valid) for MPC combine. In ODM
1254 // combine, you MUST specify the full_recout_width...according to Oswin
1255 if (dst->full_recout_width == 0 && !dst->odm_combine) {
1256 dml_print(
1257 "DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n",
1258 __func__);
1259 full_recout_width = dst->recout_width * 2; // assume half split for dcn1
1260 } else
1261 full_recout_width = dst->full_recout_width;
1262 } else
1263 full_recout_width = dst->recout_width;
1264
1265 // As of DCN2, mpc_combine and odm_combine are mutually exclusive
1266 refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(
1267 mode_lib,
1268 refclk_freq_in_mhz,
1269 pclk_freq_in_mhz,
1270 dst->odm_combine,
1271 full_recout_width,
1272 dst->hactive,
1273 vratio_pre_l,
1274 hscale_pixel_rate_l,
1275 swath_width_pixels_ub_l,
1276 1); // per line
1277
1278 refcyc_per_line_delivery_l = get_refcyc_per_delivery(
1279 mode_lib,
1280 refclk_freq_in_mhz,
1281 pclk_freq_in_mhz,
1282 dst->odm_combine,
1283 full_recout_width,
1284 dst->hactive,
1285 vratio_l,
1286 hscale_pixel_rate_l,
1287 swath_width_pixels_ub_l,
1288 1); // per line
1289
1290 dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width);
1291 dml_print(
1292 "DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n",
1293 __func__,
1294 hscale_pixel_rate_l);
1295 dml_print(
1296 "DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n",
1297 __func__,
1298 refcyc_per_line_delivery_pre_l);
1299 dml_print(
1300 "DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n",
1301 __func__,
1302 refcyc_per_line_delivery_l);
1303
1304 if (dual_plane) {
1305 refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(
1306 mode_lib,
1307 refclk_freq_in_mhz,
1308 pclk_freq_in_mhz,
1309 dst->odm_combine,
1310 full_recout_width,
1311 dst->hactive,
1312 vratio_pre_c,
1313 hscale_pixel_rate_c,
1314 swath_width_pixels_ub_c,
1315 1); // per line
1316
1317 refcyc_per_line_delivery_c = get_refcyc_per_delivery(
1318 mode_lib,
1319 refclk_freq_in_mhz,
1320 pclk_freq_in_mhz,
1321 dst->odm_combine,
1322 full_recout_width,
1323 dst->hactive,
1324 vratio_c,
1325 hscale_pixel_rate_c,
1326 swath_width_pixels_ub_c,
1327 1); // per line
1328
1329 dml_print(
1330 "DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n",
1331 __func__,
1332 refcyc_per_line_delivery_pre_c);
1333 dml_print(
1334 "DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n",
1335 __func__,
1336 refcyc_per_line_delivery_c);
1337 }
1338
1339 // TTU - Luma / Chroma
1340 if (access_dir) { // vertical access
1341 scaler_rec_in_width_l = vp_height_l;
1342 scaler_rec_in_width_c = vp_height_c;
1343 } else {
1344 scaler_rec_in_width_l = vp_width_l;
1345 scaler_rec_in_width_c = vp_width_c;
1346 }
1347
1348 refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(
1349 mode_lib,
1350 refclk_freq_in_mhz,
1351 pclk_freq_in_mhz,
1352 dst->odm_combine,
1353 full_recout_width,
1354 dst->hactive,
1355 vratio_pre_l,
1356 hscale_pixel_rate_l,
1357 scaler_rec_in_width_l,
1358 req_per_swath_ub_l); // per req
1359 refcyc_per_req_delivery_l = get_refcyc_per_delivery(
1360 mode_lib,
1361 refclk_freq_in_mhz,
1362 pclk_freq_in_mhz,
1363 dst->odm_combine,
1364 full_recout_width,
1365 dst->hactive,
1366 vratio_l,
1367 hscale_pixel_rate_l,
1368 scaler_rec_in_width_l,
1369 req_per_swath_ub_l); // per req
1370
1371 dml_print(
1372 "DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n",
1373 __func__,
1374 refcyc_per_req_delivery_pre_l);
1375 dml_print(
1376 "DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n",
1377 __func__,
1378 refcyc_per_req_delivery_l);
1379
1380 ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
1381 ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
1382
1383 if (dual_plane) {
1384 refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(
1385 mode_lib,
1386 refclk_freq_in_mhz,
1387 pclk_freq_in_mhz,
1388 dst->odm_combine,
1389 full_recout_width,
1390 dst->hactive,
1391 vratio_pre_c,
1392 hscale_pixel_rate_c,
1393 scaler_rec_in_width_c,
1394 req_per_swath_ub_c); // per req
1395 refcyc_per_req_delivery_c = get_refcyc_per_delivery(
1396 mode_lib,
1397 refclk_freq_in_mhz,
1398 pclk_freq_in_mhz,
1399 dst->odm_combine,
1400 full_recout_width,
1401 dst->hactive,
1402 vratio_c,
1403 hscale_pixel_rate_c,
1404 scaler_rec_in_width_c,
1405 req_per_swath_ub_c); // per req
1406
1407 dml_print(
1408 "DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n",
1409 __func__,
1410 refcyc_per_req_delivery_pre_c);
1411 dml_print(
1412 "DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n",
1413 __func__,
1414 refcyc_per_req_delivery_c);
1415
1416 ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
1417 ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
1418 }
1419
1420 // XFC
1421 xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1422 xfc_precharge_delay = get_xfc_precharge_delay(
1423 mode_lib,
1424 e2e_pipe_param,
1425 num_pipes,
1426 pipe_idx);
1427 xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency(
1428 mode_lib,
1429 e2e_pipe_param,
1430 num_pipes,
1431 pipe_idx);
1432 xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency;
1433 xfc_prefetch_margin = get_xfc_prefetch_margin(
1434 mode_lib,
1435 e2e_pipe_param,
1436 num_pipes,
1437 pipe_idx);
1438
1439 // TTU - Cursor
1440 refcyc_per_req_delivery_pre_cur0 = 0.0;
1441 refcyc_per_req_delivery_cur0 = 0.0;
1442 if (src->num_cursors > 0) {
1443 calculate_ttu_cursor(
1444 mode_lib,
1445 &refcyc_per_req_delivery_pre_cur0,
1446 &refcyc_per_req_delivery_cur0,
1447 refclk_freq_in_mhz,
1448 ref_freq_to_pix_freq,
1449 hscale_pixel_rate_l,
1450 scl->hscl_ratio,
1451 vratio_pre_l,
1452 vratio_l,
1453 src->cur0_src_width,
1454 (enum cursor_bpp) (src->cur0_bpp));
1455 }
1456
1457 refcyc_per_req_delivery_pre_cur1 = 0.0;
1458 refcyc_per_req_delivery_cur1 = 0.0;
1459 if (src->num_cursors > 1) {
1460 calculate_ttu_cursor(
1461 mode_lib,
1462 &refcyc_per_req_delivery_pre_cur1,
1463 &refcyc_per_req_delivery_cur1,
1464 refclk_freq_in_mhz,
1465 ref_freq_to_pix_freq,
1466 hscale_pixel_rate_l,
1467 scl->hscl_ratio,
1468 vratio_pre_l,
1469 vratio_l,
1470 src->cur1_src_width,
1471 (enum cursor_bpp) (src->cur1_bpp));
1472 }
1473
1474 // TTU - Misc
1475 // all hard-coded
1476
1477 // Assignment to register structures
1478 disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
1479 disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
1480 ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
1481 disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
1482 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
1483 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
1484 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
1485 disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
1486
1487 disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
1488 disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
1489
1490 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
1491 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
1492 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
1493 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
1494
1495 disp_dlg_regs->refcyc_per_pte_group_vblank_l =
1496 (unsigned int) (dst_y_per_row_vblank * (double) htotal
1497 * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
1498 ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
1499
1500 if (dual_plane) {
1501 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank
1502 * (double) htotal * ref_freq_to_pix_freq
1503 / (double) dpte_groups_per_row_ub_c);
1504 ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c
1505 < (unsigned int)dml_pow(2, 13));
1506 }
1507
1508 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l =
1509 (unsigned int) (dst_y_per_row_vblank * (double) htotal
1510 * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
1511 ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
1512
1513 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c =
1514 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
1515
1516 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal
1517 * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l;
1518 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal
1519 * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l;
1520
1521 if (dual_plane) {
1522 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip
1523 * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c;
1524 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip
1525 * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c;
1526 }
1527
1528 disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
1529 disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
1530 disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
1531 disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
1532
1533 // Clamp to max for now
1534 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23))
1535 disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
1536
1537 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)dml_pow(2, 23))
1538 disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
1539
1540 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)dml_pow(2, 23))
1541 disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
1542
1543 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)dml_pow(2, 23))
1544 disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
1545 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l
1546 / (double) vratio_l * dml_pow(2, 2));
1547 ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
1548
1549 if (dual_plane) {
1550 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c
1551 / (double) vratio_c * dml_pow(2, 2));
1552 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
1553 dml_print(
1554 "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n",
1555 __func__,
1556 disp_dlg_regs->dst_y_per_pte_row_nom_c,
1557 (unsigned int)dml_pow(2, 17) - 1);
1558 }
1559 }
1560
1561 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l
1562 / (double) vratio_l * dml_pow(2, 2));
1563 ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
1564
1565 disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
1566
1567 dml_print(
1568 "DML: Trow: %fus\n",
1569 line_time_in_us * (double)dpte_row_height_l / (double)vratio_l);
1570
1571 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l
1572 / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
1573 / (double) dpte_groups_per_row_ub_l);
1574 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
1575 disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
1576 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l
1577 / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
1578 / (double) meta_chunks_per_row_ub_l);
1579 if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
1580 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
1581
1582 if (dual_plane) {
1583 disp_dlg_regs->refcyc_per_pte_group_nom_c =
1584 (unsigned int) ((double) dpte_row_height_c / (double) vratio_c
1585 * (double) htotal * ref_freq_to_pix_freq
1586 / (double) dpte_groups_per_row_ub_c);
1587 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
1588 disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
1589
1590 // TODO: Is this the right calculation? Does htotal need to be halved?
1591 disp_dlg_regs->refcyc_per_meta_chunk_nom_c =
1592 (unsigned int) ((double) meta_row_height_c / (double) vratio_c
1593 * (double) htotal * ref_freq_to_pix_freq
1594 / (double) meta_chunks_per_row_ub_c);
1595 if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
1596 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
1597 }
1598
1599 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(
1600 refcyc_per_line_delivery_pre_l, 1);
1601 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(
1602 refcyc_per_line_delivery_l, 1);
1603 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13));
1604 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
1605
1606 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(
1607 refcyc_per_line_delivery_pre_c, 1);
1608 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(
1609 refcyc_per_line_delivery_c, 1);
1610 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13));
1611 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
1612
1613 disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
1614 disp_dlg_regs->dst_y_offset_cur0 = 0;
1615 disp_dlg_regs->chunk_hdl_adjust_cur1 = 3;
1616 disp_dlg_regs->dst_y_offset_cur1 = 0;
1617
1618 disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay;
1619 disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay;
1620 disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency;
1621 disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil(
1622 xfc_prefetch_margin * refclk_freq_in_mhz, 1);
1623
1624 // slave has to have this value also set to off
1625 if (src->xfc_enable && !src->xfc_slave)
1626 disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1);
1627 else
1628 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
1629
1630 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l
1631 * dml_pow(2, 10));
1632 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l
1633 * dml_pow(2, 10));
1634 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c
1635 * dml_pow(2, 10));
1636 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c
1637 * dml_pow(2, 10));
1638 disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 =
1639 (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
1640 disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0
1641 * dml_pow(2, 10));
1642 disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 =
1643 (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10));
1644 disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1
1645 * dml_pow(2, 10));
1646 disp_ttu_regs->qos_level_low_wm = 0;
1647 ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14));
1648 disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal
1649 * ref_freq_to_pix_freq);
1650 ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));
1651
1652 disp_ttu_regs->qos_level_flip = 14;
1653 disp_ttu_regs->qos_level_fixed_l = 8;
1654 disp_ttu_regs->qos_level_fixed_c = 8;
1655 disp_ttu_regs->qos_level_fixed_cur0 = 8;
1656 disp_ttu_regs->qos_ramp_disable_l = 0;
1657 disp_ttu_regs->qos_ramp_disable_c = 0;
1658 disp_ttu_regs->qos_ramp_disable_cur0 = 0;
1659
1660 disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
1661 ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));
1662
1663 print__ttu_regs_st(mode_lib, *disp_ttu_regs);
1664 print__dlg_regs_st(mode_lib, *disp_dlg_regs);
1665 }
1666
dml21_rq_dlg_get_dlg_reg(struct display_mode_lib * mode_lib,display_dlg_regs_st * dlg_regs,display_ttu_regs_st * ttu_regs,display_e2e_pipe_params_st * e2e_pipe_param,const unsigned int num_pipes,const unsigned int pipe_idx,const bool cstate_en,const bool pstate_en,const bool vm_en,const bool ignore_viewport_pos,const bool immediate_flip_support)1667 void dml21_rq_dlg_get_dlg_reg(
1668 struct display_mode_lib *mode_lib,
1669 display_dlg_regs_st *dlg_regs,
1670 display_ttu_regs_st *ttu_regs,
1671 display_e2e_pipe_params_st *e2e_pipe_param,
1672 const unsigned int num_pipes,
1673 const unsigned int pipe_idx,
1674 const bool cstate_en,
1675 const bool pstate_en,
1676 const bool vm_en,
1677 const bool ignore_viewport_pos,
1678 const bool immediate_flip_support)
1679 {
1680 display_rq_params_st rq_param = {0};
1681 display_dlg_sys_params_st dlg_sys_param = {0};
1682
1683 // Get watermark and Tex.
1684 dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes);
1685 dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(
1686 mode_lib,
1687 e2e_pipe_param,
1688 num_pipes);
1689 dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes);
1690 dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes);
1691 dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes);
1692 dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes);
1693 dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(
1694 mode_lib,
1695 e2e_pipe_param,
1696 num_pipes);
1697 dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(
1698 mode_lib,
1699 e2e_pipe_param,
1700 num_pipes);
1701 dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency
1702 / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated
1703
1704 print__dlg_sys_params_st(mode_lib, dlg_sys_param);
1705
1706 // system parameter calculation done
1707
1708 dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
1709 dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe);
1710 dml_rq_dlg_get_dlg_params(
1711 mode_lib,
1712 e2e_pipe_param,
1713 num_pipes,
1714 pipe_idx,
1715 dlg_regs,
1716 ttu_regs,
1717 rq_param.dlg,
1718 dlg_sys_param,
1719 cstate_en,
1720 pstate_en);
1721 dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
1722 }
1723
dml_rq_dlg_get_arb_params(struct display_mode_lib * mode_lib,display_arb_params_st * arb_param)1724 void dml_rq_dlg_get_arb_params(struct display_mode_lib *mode_lib, display_arb_params_st *arb_param)
1725 {
1726 memset(arb_param, 0, sizeof(*arb_param));
1727 arb_param->max_req_outstanding = 256;
1728 arb_param->min_req_outstanding = 68;
1729 arb_param->sat_level_us = 60;
1730 }
1731
calculate_ttu_cursor(struct display_mode_lib * mode_lib,double * refcyc_per_req_delivery_pre_cur,double * refcyc_per_req_delivery_cur,double refclk_freq_in_mhz,double ref_freq_to_pix_freq,double hscale_pixel_rate_l,double hscl_ratio,double vratio_pre_l,double vratio_l,unsigned int cur_width,enum cursor_bpp cur_bpp)1732 static void calculate_ttu_cursor(
1733 struct display_mode_lib *mode_lib,
1734 double *refcyc_per_req_delivery_pre_cur,
1735 double *refcyc_per_req_delivery_cur,
1736 double refclk_freq_in_mhz,
1737 double ref_freq_to_pix_freq,
1738 double hscale_pixel_rate_l,
1739 double hscl_ratio,
1740 double vratio_pre_l,
1741 double vratio_l,
1742 unsigned int cur_width,
1743 enum cursor_bpp cur_bpp)
1744 {
1745 unsigned int cur_src_width = cur_width;
1746 unsigned int cur_req_size = 0;
1747 unsigned int cur_req_width = 0;
1748 double cur_width_ub = 0.0;
1749 double cur_req_per_width = 0.0;
1750 double hactive_cur = 0.0;
1751
1752 ASSERT(cur_src_width <= 256);
1753
1754 *refcyc_per_req_delivery_pre_cur = 0.0;
1755 *refcyc_per_req_delivery_cur = 0.0;
1756 if (cur_src_width > 0) {
1757 unsigned int cur_bit_per_pixel = 0;
1758
1759 if (cur_bpp == dm_cur_2bit) {
1760 cur_req_size = 64; // byte
1761 cur_bit_per_pixel = 2;
1762 } else { // 32bit
1763 cur_bit_per_pixel = 32;
1764 if (cur_src_width >= 1 && cur_src_width <= 16)
1765 cur_req_size = 64;
1766 else if (cur_src_width >= 17 && cur_src_width <= 31)
1767 cur_req_size = 128;
1768 else
1769 cur_req_size = 256;
1770 }
1771
1772 cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0);
1773 cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1)
1774 * (double) cur_req_width;
1775 cur_req_per_width = cur_width_ub / (double) cur_req_width;
1776 hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
1777
1778 if (vratio_pre_l <= 1.0) {
1779 *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq
1780 / (double) cur_req_per_width;
1781 } else {
1782 *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz
1783 * (double) cur_src_width / hscale_pixel_rate_l
1784 / (double) cur_req_per_width;
1785 }
1786
1787 ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13));
1788
1789 if (vratio_l <= 1.0) {
1790 *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq
1791 / (double) cur_req_per_width;
1792 } else {
1793 *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz
1794 * (double) cur_src_width / hscale_pixel_rate_l
1795 / (double) cur_req_per_width;
1796 }
1797
1798 dml_print(
1799 "DML_DLG: %s: cur_req_width = %d\n",
1800 __func__,
1801 cur_req_width);
1802 dml_print(
1803 "DML_DLG: %s: cur_width_ub = %3.2f\n",
1804 __func__,
1805 cur_width_ub);
1806 dml_print(
1807 "DML_DLG: %s: cur_req_per_width = %3.2f\n",
1808 __func__,
1809 cur_req_per_width);
1810 dml_print(
1811 "DML_DLG: %s: hactive_cur = %3.2f\n",
1812 __func__,
1813 hactive_cur);
1814 dml_print(
1815 "DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n",
1816 __func__,
1817 *refcyc_per_req_delivery_pre_cur);
1818 dml_print(
1819 "DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n",
1820 __func__,
1821 *refcyc_per_req_delivery_cur);
1822
1823 ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13));
1824 }
1825 }
1826
1827