1 /* $NetBSD: amdgpu_rc_calc.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $ */
2
3
4 /*
5 * Copyright 2017 Advanced Micro Devices, Inc.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors: AMD
26 *
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_rc_calc.c,v 1.2 2021/12/18 23:45:04 riastradh Exp $");
31
32 #include "os_types.h"
33 #include "rc_calc.h"
34 #include "qp_tables.h"
35
36 #define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
37
38 #define MODE_SELECT(val444, val422, val420) \
39 (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
40
41
42 #define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \
43 table = qp_table_##mode##_##bpc##bpc_##max; \
44 table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
45 break
46
47
get_qp_set(qp_set qps,enum colour_mode cm,enum bits_per_comp bpc,enum max_min max_min,float bpp)48 void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp)
49 {
50 int mode = MODE_SELECT(444, 422, 420);
51 int sel = table_hash(mode, bpc, max_min);
52 int table_size = 0;
53 int index;
54 const struct qp_entry *table = 0L;
55
56 // alias enum
57 enum { min = MM_MIN, max = MM_MAX };
58 switch (sel) {
59 TABLE_CASE(444, 8, max);
60 TABLE_CASE(444, 8, min);
61 TABLE_CASE(444, 10, max);
62 TABLE_CASE(444, 10, min);
63 TABLE_CASE(444, 12, max);
64 TABLE_CASE(444, 12, min);
65 TABLE_CASE(422, 8, max);
66 TABLE_CASE(422, 8, min);
67 TABLE_CASE(422, 10, max);
68 TABLE_CASE(422, 10, min);
69 TABLE_CASE(422, 12, max);
70 TABLE_CASE(422, 12, min);
71 TABLE_CASE(420, 8, max);
72 TABLE_CASE(420, 8, min);
73 TABLE_CASE(420, 10, max);
74 TABLE_CASE(420, 10, min);
75 TABLE_CASE(420, 12, max);
76 TABLE_CASE(420, 12, min);
77 }
78
79 if (table == 0)
80 return;
81
82 index = (bpp - table[0].bpp) * 2;
83
84 /* requested size is bigger than the table */
85 if (index >= table_size) {
86 dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
87 return;
88 }
89
90 memcpy(qps, table[index].qps, sizeof(qp_set));
91 }
92
dsc_roundf(double num)93 double dsc_roundf(double num)
94 {
95 if (num < 0.0)
96 num = num - 0.5;
97 else
98 num = num + 0.5;
99
100 return (int)(num);
101 }
102
dsc_ceil(double num)103 double dsc_ceil(double num)
104 {
105 double retval = (int)num;
106
107 if (retval != num && num > 0)
108 retval = num + 1;
109
110 return (int)retval;
111 }
112
get_ofs_set(qp_set ofs,enum colour_mode mode,float bpp)113 void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
114 {
115 int *p = ofs;
116
117 if (mode == CM_444 || mode == CM_RGB) {
118 *p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
119 *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
120 *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
121 *p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
122 *p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
123 *p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
124 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
125 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
126 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
127 *p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
128 *p++ = -10;
129 *p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2 / 2.0))));
130 *p++ = -12;
131 *p++ = -12;
132 *p++ = -12;
133 } else if (mode == CM_422) {
134 *p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp - 8) * (8 / 2.0))));
135 *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp - 8) * (8 / 2.0))));
136 *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp - 8) * (6 / 2.0))));
137 *p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp - 8) * (6 / 2.0))));
138 *p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp - 8) * (6 / 2.0))));
139 *p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp - 8) * (6 / 2.0))));
140 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp - 8) * (6 / 2.0))));
141 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp - 8) * (4 / 2.0))));
142 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp - 8) * (2 / 2.0))));
143 *p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp - 8) * (2 / 2.0))));
144 *p++ = -10;
145 *p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2.0 / 1))));
146 *p++ = -12;
147 *p++ = -12;
148 *p++ = -12;
149 } else {
150 *p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf((bpp - 6) * (8 / 2.0))));
151 *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf((bpp - 6) * (8 / 2.0))));
152 *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf((bpp - 6) * (6 / 2.0))));
153 *p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf((bpp - 6) * (6 / 2.0))));
154 *p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf((bpp - 6) * (6 / 2.0))));
155 *p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf((bpp - 6) * (6 / 2.0))));
156 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf((bpp - 6) * (6 / 2.0))));
157 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf((bpp - 6) * (4 / 2.0))));
158 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf((bpp - 6) * (2 / 2.0))));
159 *p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf((bpp - 6) * (2 / 2.0))));
160 *p++ = -10;
161 *p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf((bpp - 4) * (2 / 1.0))));
162 *p++ = -12;
163 *p++ = -12;
164 *p++ = -12;
165 }
166 }
167
median3(int a,int b,int c)168 int median3(int a, int b, int c)
169 {
170 if (a > b)
171 swap(a, b);
172 if (b > c)
173 swap(b, c);
174 if (a > b)
175 swap(b, c);
176
177 return b;
178 }
179
calc_rc_params(struct rc_params * rc,enum colour_mode cm,enum bits_per_comp bpc,float bpp,int slice_width,int slice_height,int minor_version)180 void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version)
181 {
182 float bpp_group;
183 float initial_xmit_delay_factor;
184 int padding_pixels;
185 int i;
186
187 rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
188 rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
189
190 bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
191
192 switch (cm) {
193 case CM_420:
194 rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp - 5) * (3584)))));
195 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
196 rc->second_line_bpg_offset = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
197 break;
198 case CM_422:
199 rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
200 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
201 rc->second_line_bpg_offset = 0;
202 break;
203 case CM_444:
204 case CM_RGB:
205 rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
206 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
207 rc->second_line_bpg_offset = 0;
208 break;
209 }
210
211 initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
212 rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
213
214 if (cm == CM_422 || cm == CM_420)
215 slice_width /= 2;
216
217 padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
218 if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
219 if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
220 rc->initial_xmit_delay++;
221 }
222
223 rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
224 rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
225 rc->flatness_det_thresh = 2 << (bpc - 8);
226
227 get_qp_set(rc->qp_min, cm, bpc, MM_MIN, bpp);
228 get_qp_set(rc->qp_max, cm, bpc, MM_MAX, bpp);
229 if (cm == CM_444 && minor_version == 1) {
230 for (i = 0; i < QP_SET_SIZE; ++i) {
231 rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
232 rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
233 }
234 }
235 get_ofs_set(rc->ofs, cm, bpp);
236
237 /* fixed parameters */
238 rc->rc_model_size = 8192;
239 rc->rc_edge_factor = 6;
240 rc->rc_tgt_offset_hi = 3;
241 rc->rc_tgt_offset_lo = 3;
242
243 rc->rc_buf_thresh[0] = 896;
244 rc->rc_buf_thresh[1] = 1792;
245 rc->rc_buf_thresh[2] = 2688;
246 rc->rc_buf_thresh[3] = 3584;
247 rc->rc_buf_thresh[4] = 4480;
248 rc->rc_buf_thresh[5] = 5376;
249 rc->rc_buf_thresh[6] = 6272;
250 rc->rc_buf_thresh[7] = 6720;
251 rc->rc_buf_thresh[8] = 7168;
252 rc->rc_buf_thresh[9] = 7616;
253 rc->rc_buf_thresh[10] = 7744;
254 rc->rc_buf_thresh[11] = 7872;
255 rc->rc_buf_thresh[12] = 8000;
256 rc->rc_buf_thresh[13] = 8064;
257 }
258
259