xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/radeon/radeon_rs690.c (revision 41ec02673d281bbb3d38e6c78504ce6e30c228c1)
1 /*	$NetBSD: radeon_rs690.c,v 1.2 2021/12/18 23:45:43 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2008 Advanced Micro Devices, Inc.
5  * Copyright 2008 Red Hat Inc.
6  * Copyright 2009 Jerome Glisse.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
22  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  *
26  * Authors: Dave Airlie
27  *          Alex Deucher
28  *          Jerome Glisse
29  */
30 
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: radeon_rs690.c,v 1.2 2021/12/18 23:45:43 riastradh Exp $");
33 
34 #include <linux/pci.h>
35 
36 #include "atom.h"
37 #include "radeon.h"
38 #include "radeon_asic.h"
39 #include "radeon_audio.h"
40 #include "rs690d.h"
41 
rs690_mc_wait_for_idle(struct radeon_device * rdev)42 int rs690_mc_wait_for_idle(struct radeon_device *rdev)
43 {
44 	unsigned i;
45 	uint32_t tmp;
46 
47 	for (i = 0; i < rdev->usec_timeout; i++) {
48 		/* read MC_STATUS */
49 		tmp = RREG32_MC(R_000090_MC_SYSTEM_STATUS);
50 		if (G_000090_MC_SYSTEM_IDLE(tmp))
51 			return 0;
52 		udelay(1);
53 	}
54 	return -1;
55 }
56 
rs690_gpu_init(struct radeon_device * rdev)57 static void rs690_gpu_init(struct radeon_device *rdev)
58 {
59 	/* FIXME: is this correct ? */
60 	r420_pipes_init(rdev);
61 	if (rs690_mc_wait_for_idle(rdev)) {
62 		pr_warn("Failed to wait MC idle while programming pipes. Bad things might happen.\n");
63 	}
64 }
65 
66 union igp_info {
67 	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
68 	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_v2;
69 };
70 
rs690_pm_info(struct radeon_device * rdev)71 void rs690_pm_info(struct radeon_device *rdev)
72 {
73 	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
74 	union igp_info *info;
75 	uint16_t data_offset;
76 	uint8_t frev, crev;
77 	fixed20_12 tmp;
78 
79 	if (atom_parse_data_header(rdev->mode_info.atom_context, index, NULL,
80 				   &frev, &crev, &data_offset)) {
81 		info = (union igp_info *)(rdev->mode_info.atom_context->bios + data_offset);
82 
83 		/* Get various system informations from bios */
84 		switch (crev) {
85 		case 1:
86 			tmp.full = dfixed_const(100);
87 			rdev->pm.igp_sideport_mclk.full = dfixed_const(le32_to_cpu(info->info.ulBootUpMemoryClock));
88 			rdev->pm.igp_sideport_mclk.full = dfixed_div(rdev->pm.igp_sideport_mclk, tmp);
89 			if (le16_to_cpu(info->info.usK8MemoryClock))
90 				rdev->pm.igp_system_mclk.full = dfixed_const(le16_to_cpu(info->info.usK8MemoryClock));
91 			else if (rdev->clock.default_mclk) {
92 				rdev->pm.igp_system_mclk.full = dfixed_const(rdev->clock.default_mclk);
93 				rdev->pm.igp_system_mclk.full = dfixed_div(rdev->pm.igp_system_mclk, tmp);
94 			} else
95 				rdev->pm.igp_system_mclk.full = dfixed_const(400);
96 			rdev->pm.igp_ht_link_clk.full = dfixed_const(le16_to_cpu(info->info.usFSBClock));
97 			rdev->pm.igp_ht_link_width.full = dfixed_const(info->info.ucHTLinkWidth);
98 			break;
99 		case 2:
100 			tmp.full = dfixed_const(100);
101 			rdev->pm.igp_sideport_mclk.full = dfixed_const(le32_to_cpu(info->info_v2.ulBootUpSidePortClock));
102 			rdev->pm.igp_sideport_mclk.full = dfixed_div(rdev->pm.igp_sideport_mclk, tmp);
103 			if (le32_to_cpu(info->info_v2.ulBootUpUMAClock))
104 				rdev->pm.igp_system_mclk.full = dfixed_const(le32_to_cpu(info->info_v2.ulBootUpUMAClock));
105 			else if (rdev->clock.default_mclk)
106 				rdev->pm.igp_system_mclk.full = dfixed_const(rdev->clock.default_mclk);
107 			else
108 				rdev->pm.igp_system_mclk.full = dfixed_const(66700);
109 			rdev->pm.igp_system_mclk.full = dfixed_div(rdev->pm.igp_system_mclk, tmp);
110 			rdev->pm.igp_ht_link_clk.full = dfixed_const(le32_to_cpu(info->info_v2.ulHTLinkFreq));
111 			rdev->pm.igp_ht_link_clk.full = dfixed_div(rdev->pm.igp_ht_link_clk, tmp);
112 			rdev->pm.igp_ht_link_width.full = dfixed_const(le16_to_cpu(info->info_v2.usMinHTLinkWidth));
113 			break;
114 		default:
115 			/* We assume the slower possible clock ie worst case */
116 			rdev->pm.igp_sideport_mclk.full = dfixed_const(200);
117 			rdev->pm.igp_system_mclk.full = dfixed_const(200);
118 			rdev->pm.igp_ht_link_clk.full = dfixed_const(1000);
119 			rdev->pm.igp_ht_link_width.full = dfixed_const(8);
120 			DRM_ERROR("No integrated system info for your GPU, using safe default\n");
121 			break;
122 		}
123 	} else {
124 		/* We assume the slower possible clock ie worst case */
125 		rdev->pm.igp_sideport_mclk.full = dfixed_const(200);
126 		rdev->pm.igp_system_mclk.full = dfixed_const(200);
127 		rdev->pm.igp_ht_link_clk.full = dfixed_const(1000);
128 		rdev->pm.igp_ht_link_width.full = dfixed_const(8);
129 		DRM_ERROR("No integrated system info for your GPU, using safe default\n");
130 	}
131 	/* Compute various bandwidth */
132 	/* k8_bandwidth = (memory_clk / 2) * 2 * 8 * 0.5 = memory_clk * 4  */
133 	tmp.full = dfixed_const(4);
134 	rdev->pm.k8_bandwidth.full = dfixed_mul(rdev->pm.igp_system_mclk, tmp);
135 	/* ht_bandwidth = ht_clk * 2 * ht_width / 8 * 0.8
136 	 *              = ht_clk * ht_width / 5
137 	 */
138 	tmp.full = dfixed_const(5);
139 	rdev->pm.ht_bandwidth.full = dfixed_mul(rdev->pm.igp_ht_link_clk,
140 						rdev->pm.igp_ht_link_width);
141 	rdev->pm.ht_bandwidth.full = dfixed_div(rdev->pm.ht_bandwidth, tmp);
142 	if (tmp.full < rdev->pm.max_bandwidth.full) {
143 		/* HT link is a limiting factor */
144 		rdev->pm.max_bandwidth.full = tmp.full;
145 	}
146 	/* sideport_bandwidth = (sideport_clk / 2) * 2 * 2 * 0.7
147 	 *                    = (sideport_clk * 14) / 10
148 	 */
149 	tmp.full = dfixed_const(14);
150 	rdev->pm.sideport_bandwidth.full = dfixed_mul(rdev->pm.igp_sideport_mclk, tmp);
151 	tmp.full = dfixed_const(10);
152 	rdev->pm.sideport_bandwidth.full = dfixed_div(rdev->pm.sideport_bandwidth, tmp);
153 }
154 
rs690_mc_init(struct radeon_device * rdev)155 static void rs690_mc_init(struct radeon_device *rdev)
156 {
157 	u64 base;
158 	uint32_t h_addr, l_addr;
159 	unsigned long long k8_addr;
160 
161 	rs400_gart_adjust_size(rdev);
162 	rdev->mc.vram_is_ddr = true;
163 	rdev->mc.vram_width = 128;
164 	rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
165 	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
166 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
167 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
168 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
169 	base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
170 	base = G_000100_MC_FB_START(base) << 16;
171 	rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
172 	/* Some boards seem to be configured for 128MB of sideport memory,
173 	 * but really only have 64MB.  Just skip the sideport and use
174 	 * UMA memory.
175 	 */
176 	if (rdev->mc.igp_sideport_enabled &&
177 	    (rdev->mc.real_vram_size == (384 * 1024 * 1024))) {
178 		base += 128 * 1024 * 1024;
179 		rdev->mc.real_vram_size -= 128 * 1024 * 1024;
180 		rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
181 	}
182 
183 	/* Use K8 direct mapping for fast fb access. */
184 	rdev->fastfb_working = false;
185 	h_addr = G_00005F_K8_ADDR_EXT(RREG32_MC(R_00005F_MC_MISC_UMA_CNTL));
186 	l_addr = RREG32_MC(R_00001E_K8_FB_LOCATION);
187 	k8_addr = ((unsigned long long)h_addr) << 32 | l_addr;
188 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
189 	if (k8_addr + rdev->mc.visible_vram_size < 0x100000000ULL)
190 #endif
191 	{
192 		/* FastFB shall be used with UMA memory. Here it is simply disabled when sideport
193 		 * memory is present.
194 		 */
195 		if (rdev->mc.igp_sideport_enabled == false && radeon_fastfb == 1) {
196 			DRM_INFO("Direct mapping: aper base at 0x%llx, replaced by direct mapping base 0x%llx.\n",
197 					(unsigned long long)rdev->mc.aper_base, k8_addr);
198 			rdev->mc.aper_base = (resource_size_t)k8_addr;
199 			rdev->fastfb_working = true;
200 		}
201 	}
202 
203 	rs690_pm_info(rdev);
204 	radeon_vram_location(rdev, &rdev->mc, base);
205 	rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
206 	radeon_gtt_location(rdev, &rdev->mc);
207 	radeon_update_bandwidth_info(rdev);
208 }
209 
rs690_line_buffer_adjust(struct radeon_device * rdev,struct drm_display_mode * mode1,struct drm_display_mode * mode2)210 void rs690_line_buffer_adjust(struct radeon_device *rdev,
211 			      struct drm_display_mode *mode1,
212 			      struct drm_display_mode *mode2)
213 {
214 	u32 tmp;
215 
216 	/* Guess line buffer size to be 8192 pixels */
217 	u32 lb_size = 8192;
218 
219 	/*
220 	 * Line Buffer Setup
221 	 * There is a single line buffer shared by both display controllers.
222 	 * R_006520_DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
223 	 * the display controllers.  The paritioning can either be done
224 	 * manually or via one of four preset allocations specified in bits 1:0:
225 	 *  0 - line buffer is divided in half and shared between crtc
226 	 *  1 - D1 gets 3/4 of the line buffer, D2 gets 1/4
227 	 *  2 - D1 gets the whole buffer
228 	 *  3 - D1 gets 1/4 of the line buffer, D2 gets 3/4
229 	 * Setting bit 2 of R_006520_DC_LB_MEMORY_SPLIT controls switches to manual
230 	 * allocation mode. In manual allocation mode, D1 always starts at 0,
231 	 * D1 end/2 is specified in bits 14:4; D2 allocation follows D1.
232 	 */
233 	tmp = RREG32(R_006520_DC_LB_MEMORY_SPLIT) & C_006520_DC_LB_MEMORY_SPLIT;
234 	tmp &= ~C_006520_DC_LB_MEMORY_SPLIT_MODE;
235 	/* auto */
236 	if (mode1 && mode2) {
237 		if (mode1->hdisplay > mode2->hdisplay) {
238 			if (mode1->hdisplay > 2560)
239 				tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q;
240 			else
241 				tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
242 		} else if (mode2->hdisplay > mode1->hdisplay) {
243 			if (mode2->hdisplay > 2560)
244 				tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
245 			else
246 				tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
247 		} else
248 			tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
249 	} else if (mode1) {
250 		tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1_ONLY;
251 	} else if (mode2) {
252 		tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
253 	}
254 	WREG32(R_006520_DC_LB_MEMORY_SPLIT, tmp);
255 
256 	/* Save number of lines the linebuffer leads before the scanout */
257 	if (mode1)
258 		rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
259 
260 	if (mode2)
261 		rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
262 }
263 
264 struct rs690_watermark {
265 	u32        lb_request_fifo_depth;
266 	fixed20_12 num_line_pair;
267 	fixed20_12 estimated_width;
268 	fixed20_12 worst_case_latency;
269 	fixed20_12 consumption_rate;
270 	fixed20_12 active_time;
271 	fixed20_12 dbpp;
272 	fixed20_12 priority_mark_max;
273 	fixed20_12 priority_mark;
274 	fixed20_12 sclk;
275 };
276 
rs690_crtc_bandwidth_compute(struct radeon_device * rdev,struct radeon_crtc * crtc,struct rs690_watermark * wm,bool low)277 static void rs690_crtc_bandwidth_compute(struct radeon_device *rdev,
278 					 struct radeon_crtc *crtc,
279 					 struct rs690_watermark *wm,
280 					 bool low)
281 {
282 	struct drm_display_mode *mode = &crtc->base.mode;
283 	fixed20_12 a, b, c;
284 	fixed20_12 pclk, request_fifo_depth, tolerable_latency, estimated_width;
285 	fixed20_12 consumption_time, line_time, chunk_time, read_delay_latency;
286 	fixed20_12 sclk, core_bandwidth, max_bandwidth;
287 	u32 selected_sclk;
288 
289 	if (!crtc->base.enabled) {
290 		/* FIXME: wouldn't it better to set priority mark to maximum */
291 		wm->lb_request_fifo_depth = 4;
292 		return;
293 	}
294 
295 	if (((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880)) &&
296 	    (rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
297 		selected_sclk = radeon_dpm_get_sclk(rdev, low);
298 	else
299 		selected_sclk = rdev->pm.current_sclk;
300 
301 	/* sclk in Mhz */
302 	a.full = dfixed_const(100);
303 	sclk.full = dfixed_const(selected_sclk);
304 	sclk.full = dfixed_div(sclk, a);
305 
306 	/* core_bandwidth = sclk(Mhz) * 16 */
307 	a.full = dfixed_const(16);
308 	core_bandwidth.full = dfixed_div(rdev->pm.sclk, a);
309 
310 	if (crtc->vsc.full > dfixed_const(2))
311 		wm->num_line_pair.full = dfixed_const(2);
312 	else
313 		wm->num_line_pair.full = dfixed_const(1);
314 
315 	b.full = dfixed_const(mode->crtc_hdisplay);
316 	c.full = dfixed_const(256);
317 	a.full = dfixed_div(b, c);
318 	request_fifo_depth.full = dfixed_mul(a, wm->num_line_pair);
319 	request_fifo_depth.full = dfixed_ceil(request_fifo_depth);
320 	if (a.full < dfixed_const(4)) {
321 		wm->lb_request_fifo_depth = 4;
322 	} else {
323 		wm->lb_request_fifo_depth = dfixed_trunc(request_fifo_depth);
324 	}
325 
326 	/* Determine consumption rate
327 	 *  pclk = pixel clock period(ns) = 1000 / (mode.clock / 1000)
328 	 *  vtaps = number of vertical taps,
329 	 *  vsc = vertical scaling ratio, defined as source/destination
330 	 *  hsc = horizontal scaling ration, defined as source/destination
331 	 */
332 	a.full = dfixed_const(mode->clock);
333 	b.full = dfixed_const(1000);
334 	a.full = dfixed_div(a, b);
335 	pclk.full = dfixed_div(b, a);
336 	if (crtc->rmx_type != RMX_OFF) {
337 		b.full = dfixed_const(2);
338 		if (crtc->vsc.full > b.full)
339 			b.full = crtc->vsc.full;
340 		b.full = dfixed_mul(b, crtc->hsc);
341 		c.full = dfixed_const(2);
342 		b.full = dfixed_div(b, c);
343 		consumption_time.full = dfixed_div(pclk, b);
344 	} else {
345 		consumption_time.full = pclk.full;
346 	}
347 	a.full = dfixed_const(1);
348 	wm->consumption_rate.full = dfixed_div(a, consumption_time);
349 
350 
351 	/* Determine line time
352 	 *  LineTime = total time for one line of displayhtotal
353 	 *  LineTime = total number of horizontal pixels
354 	 *  pclk = pixel clock period(ns)
355 	 */
356 	a.full = dfixed_const(crtc->base.mode.crtc_htotal);
357 	line_time.full = dfixed_mul(a, pclk);
358 
359 	/* Determine active time
360 	 *  ActiveTime = time of active region of display within one line,
361 	 *  hactive = total number of horizontal active pixels
362 	 *  htotal = total number of horizontal pixels
363 	 */
364 	a.full = dfixed_const(crtc->base.mode.crtc_htotal);
365 	b.full = dfixed_const(crtc->base.mode.crtc_hdisplay);
366 	wm->active_time.full = dfixed_mul(line_time, b);
367 	wm->active_time.full = dfixed_div(wm->active_time, a);
368 
369 	/* Maximun bandwidth is the minimun bandwidth of all component */
370 	max_bandwidth = core_bandwidth;
371 	if (rdev->mc.igp_sideport_enabled) {
372 		if (max_bandwidth.full > rdev->pm.sideport_bandwidth.full &&
373 			rdev->pm.sideport_bandwidth.full)
374 			max_bandwidth = rdev->pm.sideport_bandwidth;
375 		read_delay_latency.full = dfixed_const(370 * 800);
376 		a.full = dfixed_const(1000);
377 		b.full = dfixed_div(rdev->pm.igp_sideport_mclk, a);
378 		read_delay_latency.full = dfixed_div(read_delay_latency, b);
379 		read_delay_latency.full = dfixed_mul(read_delay_latency, a);
380 	} else {
381 		if (max_bandwidth.full > rdev->pm.k8_bandwidth.full &&
382 			rdev->pm.k8_bandwidth.full)
383 			max_bandwidth = rdev->pm.k8_bandwidth;
384 		if (max_bandwidth.full > rdev->pm.ht_bandwidth.full &&
385 			rdev->pm.ht_bandwidth.full)
386 			max_bandwidth = rdev->pm.ht_bandwidth;
387 		read_delay_latency.full = dfixed_const(5000);
388 	}
389 
390 	/* sclk = system clocks(ns) = 1000 / max_bandwidth / 16 */
391 	a.full = dfixed_const(16);
392 	sclk.full = dfixed_mul(max_bandwidth, a);
393 	a.full = dfixed_const(1000);
394 	sclk.full = dfixed_div(a, sclk);
395 	/* Determine chunk time
396 	 * ChunkTime = the time it takes the DCP to send one chunk of data
397 	 * to the LB which consists of pipeline delay and inter chunk gap
398 	 * sclk = system clock(ns)
399 	 */
400 	a.full = dfixed_const(256 * 13);
401 	chunk_time.full = dfixed_mul(sclk, a);
402 	a.full = dfixed_const(10);
403 	chunk_time.full = dfixed_div(chunk_time, a);
404 
405 	/* Determine the worst case latency
406 	 * NumLinePair = Number of line pairs to request(1=2 lines, 2=4 lines)
407 	 * WorstCaseLatency = worst case time from urgent to when the MC starts
408 	 *                    to return data
409 	 * READ_DELAY_IDLE_MAX = constant of 1us
410 	 * ChunkTime = time it takes the DCP to send one chunk of data to the LB
411 	 *             which consists of pipeline delay and inter chunk gap
412 	 */
413 	if (dfixed_trunc(wm->num_line_pair) > 1) {
414 		a.full = dfixed_const(3);
415 		wm->worst_case_latency.full = dfixed_mul(a, chunk_time);
416 		wm->worst_case_latency.full += read_delay_latency.full;
417 	} else {
418 		a.full = dfixed_const(2);
419 		wm->worst_case_latency.full = dfixed_mul(a, chunk_time);
420 		wm->worst_case_latency.full += read_delay_latency.full;
421 	}
422 
423 	/* Determine the tolerable latency
424 	 * TolerableLatency = Any given request has only 1 line time
425 	 *                    for the data to be returned
426 	 * LBRequestFifoDepth = Number of chunk requests the LB can
427 	 *                      put into the request FIFO for a display
428 	 *  LineTime = total time for one line of display
429 	 *  ChunkTime = the time it takes the DCP to send one chunk
430 	 *              of data to the LB which consists of
431 	 *  pipeline delay and inter chunk gap
432 	 */
433 	if ((2+wm->lb_request_fifo_depth) >= dfixed_trunc(request_fifo_depth)) {
434 		tolerable_latency.full = line_time.full;
435 	} else {
436 		tolerable_latency.full = dfixed_const(wm->lb_request_fifo_depth - 2);
437 		tolerable_latency.full = request_fifo_depth.full - tolerable_latency.full;
438 		tolerable_latency.full = dfixed_mul(tolerable_latency, chunk_time);
439 		tolerable_latency.full = line_time.full - tolerable_latency.full;
440 	}
441 	/* We assume worst case 32bits (4 bytes) */
442 	wm->dbpp.full = dfixed_const(4 * 8);
443 
444 	/* Determine the maximum priority mark
445 	 *  width = viewport width in pixels
446 	 */
447 	a.full = dfixed_const(16);
448 	wm->priority_mark_max.full = dfixed_const(crtc->base.mode.crtc_hdisplay);
449 	wm->priority_mark_max.full = dfixed_div(wm->priority_mark_max, a);
450 	wm->priority_mark_max.full = dfixed_ceil(wm->priority_mark_max);
451 
452 	/* Determine estimated width */
453 	estimated_width.full = tolerable_latency.full - wm->worst_case_latency.full;
454 	estimated_width.full = dfixed_div(estimated_width, consumption_time);
455 	if (dfixed_trunc(estimated_width) > crtc->base.mode.crtc_hdisplay) {
456 		wm->priority_mark.full = dfixed_const(10);
457 	} else {
458 		a.full = dfixed_const(16);
459 		wm->priority_mark.full = dfixed_div(estimated_width, a);
460 		wm->priority_mark.full = dfixed_ceil(wm->priority_mark);
461 		wm->priority_mark.full = wm->priority_mark_max.full - wm->priority_mark.full;
462 	}
463 }
464 
rs690_compute_mode_priority(struct radeon_device * rdev,struct rs690_watermark * wm0,struct rs690_watermark * wm1,struct drm_display_mode * mode0,struct drm_display_mode * mode1,u32 * d1mode_priority_a_cnt,u32 * d2mode_priority_a_cnt)465 static void rs690_compute_mode_priority(struct radeon_device *rdev,
466 					struct rs690_watermark *wm0,
467 					struct rs690_watermark *wm1,
468 					struct drm_display_mode *mode0,
469 					struct drm_display_mode *mode1,
470 					u32 *d1mode_priority_a_cnt,
471 					u32 *d2mode_priority_a_cnt)
472 {
473 	fixed20_12 priority_mark02, priority_mark12, fill_rate;
474 	fixed20_12 a, b;
475 
476 	*d1mode_priority_a_cnt = S_006548_D1MODE_PRIORITY_A_OFF(1);
477 	*d2mode_priority_a_cnt = S_006548_D1MODE_PRIORITY_A_OFF(1);
478 
479 	if (mode0 && mode1) {
480 		if (dfixed_trunc(wm0->dbpp) > 64)
481 			a.full = dfixed_mul(wm0->dbpp, wm0->num_line_pair);
482 		else
483 			a.full = wm0->num_line_pair.full;
484 		if (dfixed_trunc(wm1->dbpp) > 64)
485 			b.full = dfixed_mul(wm1->dbpp, wm1->num_line_pair);
486 		else
487 			b.full = wm1->num_line_pair.full;
488 		a.full += b.full;
489 		fill_rate.full = dfixed_div(wm0->sclk, a);
490 		if (wm0->consumption_rate.full > fill_rate.full) {
491 			b.full = wm0->consumption_rate.full - fill_rate.full;
492 			b.full = dfixed_mul(b, wm0->active_time);
493 			a.full = dfixed_mul(wm0->worst_case_latency,
494 						wm0->consumption_rate);
495 			a.full = a.full + b.full;
496 			b.full = dfixed_const(16 * 1000);
497 			priority_mark02.full = dfixed_div(a, b);
498 		} else {
499 			a.full = dfixed_mul(wm0->worst_case_latency,
500 						wm0->consumption_rate);
501 			b.full = dfixed_const(16 * 1000);
502 			priority_mark02.full = dfixed_div(a, b);
503 		}
504 		if (wm1->consumption_rate.full > fill_rate.full) {
505 			b.full = wm1->consumption_rate.full - fill_rate.full;
506 			b.full = dfixed_mul(b, wm1->active_time);
507 			a.full = dfixed_mul(wm1->worst_case_latency,
508 						wm1->consumption_rate);
509 			a.full = a.full + b.full;
510 			b.full = dfixed_const(16 * 1000);
511 			priority_mark12.full = dfixed_div(a, b);
512 		} else {
513 			a.full = dfixed_mul(wm1->worst_case_latency,
514 						wm1->consumption_rate);
515 			b.full = dfixed_const(16 * 1000);
516 			priority_mark12.full = dfixed_div(a, b);
517 		}
518 		if (wm0->priority_mark.full > priority_mark02.full)
519 			priority_mark02.full = wm0->priority_mark.full;
520 		if (wm0->priority_mark_max.full > priority_mark02.full)
521 			priority_mark02.full = wm0->priority_mark_max.full;
522 		if (wm1->priority_mark.full > priority_mark12.full)
523 			priority_mark12.full = wm1->priority_mark.full;
524 		if (wm1->priority_mark_max.full > priority_mark12.full)
525 			priority_mark12.full = wm1->priority_mark_max.full;
526 		*d1mode_priority_a_cnt = dfixed_trunc(priority_mark02);
527 		*d2mode_priority_a_cnt = dfixed_trunc(priority_mark12);
528 		if (rdev->disp_priority == 2) {
529 			*d1mode_priority_a_cnt |= S_006548_D1MODE_PRIORITY_A_ALWAYS_ON(1);
530 			*d2mode_priority_a_cnt |= S_006D48_D2MODE_PRIORITY_A_ALWAYS_ON(1);
531 		}
532 	} else if (mode0) {
533 		if (dfixed_trunc(wm0->dbpp) > 64)
534 			a.full = dfixed_mul(wm0->dbpp, wm0->num_line_pair);
535 		else
536 			a.full = wm0->num_line_pair.full;
537 		fill_rate.full = dfixed_div(wm0->sclk, a);
538 		if (wm0->consumption_rate.full > fill_rate.full) {
539 			b.full = wm0->consumption_rate.full - fill_rate.full;
540 			b.full = dfixed_mul(b, wm0->active_time);
541 			a.full = dfixed_mul(wm0->worst_case_latency,
542 						wm0->consumption_rate);
543 			a.full = a.full + b.full;
544 			b.full = dfixed_const(16 * 1000);
545 			priority_mark02.full = dfixed_div(a, b);
546 		} else {
547 			a.full = dfixed_mul(wm0->worst_case_latency,
548 						wm0->consumption_rate);
549 			b.full = dfixed_const(16 * 1000);
550 			priority_mark02.full = dfixed_div(a, b);
551 		}
552 		if (wm0->priority_mark.full > priority_mark02.full)
553 			priority_mark02.full = wm0->priority_mark.full;
554 		if (wm0->priority_mark_max.full > priority_mark02.full)
555 			priority_mark02.full = wm0->priority_mark_max.full;
556 		*d1mode_priority_a_cnt = dfixed_trunc(priority_mark02);
557 		if (rdev->disp_priority == 2)
558 			*d1mode_priority_a_cnt |= S_006548_D1MODE_PRIORITY_A_ALWAYS_ON(1);
559 	} else if (mode1) {
560 		if (dfixed_trunc(wm1->dbpp) > 64)
561 			a.full = dfixed_mul(wm1->dbpp, wm1->num_line_pair);
562 		else
563 			a.full = wm1->num_line_pair.full;
564 		fill_rate.full = dfixed_div(wm1->sclk, a);
565 		if (wm1->consumption_rate.full > fill_rate.full) {
566 			b.full = wm1->consumption_rate.full - fill_rate.full;
567 			b.full = dfixed_mul(b, wm1->active_time);
568 			a.full = dfixed_mul(wm1->worst_case_latency,
569 						wm1->consumption_rate);
570 			a.full = a.full + b.full;
571 			b.full = dfixed_const(16 * 1000);
572 			priority_mark12.full = dfixed_div(a, b);
573 		} else {
574 			a.full = dfixed_mul(wm1->worst_case_latency,
575 						wm1->consumption_rate);
576 			b.full = dfixed_const(16 * 1000);
577 			priority_mark12.full = dfixed_div(a, b);
578 		}
579 		if (wm1->priority_mark.full > priority_mark12.full)
580 			priority_mark12.full = wm1->priority_mark.full;
581 		if (wm1->priority_mark_max.full > priority_mark12.full)
582 			priority_mark12.full = wm1->priority_mark_max.full;
583 		*d2mode_priority_a_cnt = dfixed_trunc(priority_mark12);
584 		if (rdev->disp_priority == 2)
585 			*d2mode_priority_a_cnt |= S_006D48_D2MODE_PRIORITY_A_ALWAYS_ON(1);
586 	}
587 }
588 
rs690_bandwidth_update(struct radeon_device * rdev)589 void rs690_bandwidth_update(struct radeon_device *rdev)
590 {
591 	struct drm_display_mode *mode0 = NULL;
592 	struct drm_display_mode *mode1 = NULL;
593 	struct rs690_watermark wm0_high, wm0_low;
594 	struct rs690_watermark wm1_high, wm1_low;
595 	u32 tmp;
596 	u32 d1mode_priority_a_cnt, d1mode_priority_b_cnt;
597 	u32 d2mode_priority_a_cnt, d2mode_priority_b_cnt;
598 
599 	if (!rdev->mode_info.mode_config_initialized)
600 		return;
601 
602 	radeon_update_display_priority(rdev);
603 
604 	if (rdev->mode_info.crtcs[0]->base.enabled)
605 		mode0 = &rdev->mode_info.crtcs[0]->base.mode;
606 	if (rdev->mode_info.crtcs[1]->base.enabled)
607 		mode1 = &rdev->mode_info.crtcs[1]->base.mode;
608 	/*
609 	 * Set display0/1 priority up in the memory controller for
610 	 * modes if the user specifies HIGH for displaypriority
611 	 * option.
612 	 */
613 	if ((rdev->disp_priority == 2) &&
614 	    ((rdev->family == CHIP_RS690) || (rdev->family == CHIP_RS740))) {
615 		tmp = RREG32_MC(R_000104_MC_INIT_MISC_LAT_TIMER);
616 		tmp &= C_000104_MC_DISP0R_INIT_LAT;
617 		tmp &= C_000104_MC_DISP1R_INIT_LAT;
618 		if (mode0)
619 			tmp |= S_000104_MC_DISP0R_INIT_LAT(1);
620 		if (mode1)
621 			tmp |= S_000104_MC_DISP1R_INIT_LAT(1);
622 		WREG32_MC(R_000104_MC_INIT_MISC_LAT_TIMER, tmp);
623 	}
624 	rs690_line_buffer_adjust(rdev, mode0, mode1);
625 
626 	if ((rdev->family == CHIP_RS690) || (rdev->family == CHIP_RS740))
627 		WREG32(R_006C9C_DCP_CONTROL, 0);
628 	if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
629 		WREG32(R_006C9C_DCP_CONTROL, 2);
630 
631 	rs690_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[0], &wm0_high, false);
632 	rs690_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[1], &wm1_high, false);
633 
634 	rs690_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[0], &wm0_low, true);
635 	rs690_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[1], &wm1_low, true);
636 
637 	tmp = (wm0_high.lb_request_fifo_depth - 1);
638 	tmp |= (wm1_high.lb_request_fifo_depth - 1) << 16;
639 	WREG32(R_006D58_LB_MAX_REQ_OUTSTANDING, tmp);
640 
641 	rs690_compute_mode_priority(rdev,
642 				    &wm0_high, &wm1_high,
643 				    mode0, mode1,
644 				    &d1mode_priority_a_cnt, &d2mode_priority_a_cnt);
645 	rs690_compute_mode_priority(rdev,
646 				    &wm0_low, &wm1_low,
647 				    mode0, mode1,
648 				    &d1mode_priority_b_cnt, &d2mode_priority_b_cnt);
649 
650 	WREG32(R_006548_D1MODE_PRIORITY_A_CNT, d1mode_priority_a_cnt);
651 	WREG32(R_00654C_D1MODE_PRIORITY_B_CNT, d1mode_priority_b_cnt);
652 	WREG32(R_006D48_D2MODE_PRIORITY_A_CNT, d2mode_priority_a_cnt);
653 	WREG32(R_006D4C_D2MODE_PRIORITY_B_CNT, d2mode_priority_b_cnt);
654 }
655 
rs690_mc_rreg(struct radeon_device * rdev,uint32_t reg)656 uint32_t rs690_mc_rreg(struct radeon_device *rdev, uint32_t reg)
657 {
658 	unsigned long flags;
659 	uint32_t r;
660 
661 	spin_lock_irqsave(&rdev->mc_idx_lock, flags);
662 	WREG32(R_000078_MC_INDEX, S_000078_MC_IND_ADDR(reg));
663 	r = RREG32(R_00007C_MC_DATA);
664 	WREG32(R_000078_MC_INDEX, ~C_000078_MC_IND_ADDR);
665 	spin_unlock_irqrestore(&rdev->mc_idx_lock, flags);
666 	return r;
667 }
668 
rs690_mc_wreg(struct radeon_device * rdev,uint32_t reg,uint32_t v)669 void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
670 {
671 	unsigned long flags;
672 
673 	spin_lock_irqsave(&rdev->mc_idx_lock, flags);
674 	WREG32(R_000078_MC_INDEX, S_000078_MC_IND_ADDR(reg) |
675 		S_000078_MC_IND_WR_EN(1));
676 	WREG32(R_00007C_MC_DATA, v);
677 	WREG32(R_000078_MC_INDEX, 0x7F);
678 	spin_unlock_irqrestore(&rdev->mc_idx_lock, flags);
679 }
680 
rs690_mc_program(struct radeon_device * rdev)681 static void rs690_mc_program(struct radeon_device *rdev)
682 {
683 	struct rv515_mc_save save;
684 
685 	/* Stops all mc clients */
686 	rv515_mc_stop(rdev, &save);
687 
688 	/* Wait for mc idle */
689 	if (rs690_mc_wait_for_idle(rdev))
690 		dev_warn(rdev->dev, "Wait MC idle timeout before updating MC.\n");
691 	/* Program MC, should be a 32bits limited address space */
692 	WREG32_MC(R_000100_MCCFG_FB_LOCATION,
693 			S_000100_MC_FB_START(rdev->mc.vram_start >> 16) |
694 			S_000100_MC_FB_TOP(rdev->mc.vram_end >> 16));
695 	WREG32(R_000134_HDP_FB_LOCATION,
696 		S_000134_HDP_FB_START(rdev->mc.vram_start >> 16));
697 
698 	rv515_mc_resume(rdev, &save);
699 }
700 
rs690_startup(struct radeon_device * rdev)701 static int rs690_startup(struct radeon_device *rdev)
702 {
703 	int r;
704 
705 	rs690_mc_program(rdev);
706 	/* Resume clock */
707 	rv515_clock_startup(rdev);
708 	/* Initialize GPU configuration (# pipes, ...) */
709 	rs690_gpu_init(rdev);
710 	/* Initialize GART (initialize after TTM so we can allocate
711 	 * memory through TTM but finalize after TTM) */
712 	r = rs400_gart_enable(rdev);
713 	if (r)
714 		return r;
715 
716 	/* allocate wb buffer */
717 	r = radeon_wb_init(rdev);
718 	if (r)
719 		return r;
720 
721 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
722 	if (r) {
723 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
724 		return r;
725 	}
726 
727 	/* Enable IRQ */
728 	if (!rdev->irq.installed) {
729 		r = radeon_irq_kms_init(rdev);
730 		if (r)
731 			return r;
732 	}
733 
734 	rs600_irq_set(rdev);
735 	rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
736 	/* 1M ring buffer */
737 	r = r100_cp_init(rdev, 1024 * 1024);
738 	if (r) {
739 		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
740 		return r;
741 	}
742 
743 	r = radeon_ib_pool_init(rdev);
744 	if (r) {
745 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
746 		return r;
747 	}
748 
749 	r = radeon_audio_init(rdev);
750 	if (r) {
751 		dev_err(rdev->dev, "failed initializing audio\n");
752 		return r;
753 	}
754 
755 	return 0;
756 }
757 
rs690_resume(struct radeon_device * rdev)758 int rs690_resume(struct radeon_device *rdev)
759 {
760 	int r;
761 
762 	/* Make sur GART are not working */
763 	rs400_gart_disable(rdev);
764 	/* Resume clock before doing reset */
765 	rv515_clock_startup(rdev);
766 	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
767 	if (radeon_asic_reset(rdev)) {
768 		dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
769 			RREG32(R_000E40_RBBM_STATUS),
770 			RREG32(R_0007C0_CP_STAT));
771 	}
772 	/* post */
773 	atom_asic_init(rdev->mode_info.atom_context);
774 	/* Resume clock after posting */
775 	rv515_clock_startup(rdev);
776 	/* Initialize surface registers */
777 	radeon_surface_init(rdev);
778 
779 	rdev->accel_working = true;
780 	r = rs690_startup(rdev);
781 	if (r) {
782 		rdev->accel_working = false;
783 	}
784 	return r;
785 }
786 
rs690_suspend(struct radeon_device * rdev)787 int rs690_suspend(struct radeon_device *rdev)
788 {
789 	radeon_pm_suspend(rdev);
790 	radeon_audio_fini(rdev);
791 	r100_cp_disable(rdev);
792 	radeon_wb_disable(rdev);
793 	rs600_irq_disable(rdev);
794 	rs400_gart_disable(rdev);
795 	return 0;
796 }
797 
rs690_fini(struct radeon_device * rdev)798 void rs690_fini(struct radeon_device *rdev)
799 {
800 	radeon_pm_fini(rdev);
801 	radeon_audio_fini(rdev);
802 	r100_cp_fini(rdev);
803 	radeon_wb_fini(rdev);
804 	radeon_ib_pool_fini(rdev);
805 	radeon_gem_fini(rdev);
806 	rs400_gart_fini(rdev);
807 	radeon_irq_kms_fini(rdev);
808 	radeon_fence_driver_fini(rdev);
809 	radeon_bo_fini(rdev);
810 	radeon_atombios_fini(rdev);
811 	kfree(rdev->bios);
812 	rdev->bios = NULL;
813 }
814 
rs690_init(struct radeon_device * rdev)815 int rs690_init(struct radeon_device *rdev)
816 {
817 	int r;
818 
819 	/* Disable VGA */
820 	rv515_vga_render_disable(rdev);
821 	/* Initialize scratch registers */
822 	radeon_scratch_init(rdev);
823 	/* Initialize surface registers */
824 	radeon_surface_init(rdev);
825 	/* restore some register to sane defaults */
826 	r100_restore_sanity(rdev);
827 	/* TODO: disable VGA need to use VGA request */
828 	/* BIOS*/
829 	if (!radeon_get_bios(rdev)) {
830 		if (ASIC_IS_AVIVO(rdev))
831 			return -EINVAL;
832 	}
833 	if (rdev->is_atom_bios) {
834 		r = radeon_atombios_init(rdev);
835 		if (r)
836 			return r;
837 	} else {
838 		dev_err(rdev->dev, "Expecting atombios for RV515 GPU\n");
839 		return -EINVAL;
840 	}
841 	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
842 	if (radeon_asic_reset(rdev)) {
843 		dev_warn(rdev->dev,
844 			"GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
845 			RREG32(R_000E40_RBBM_STATUS),
846 			RREG32(R_0007C0_CP_STAT));
847 	}
848 	/* check if cards are posted or not */
849 	if (radeon_boot_test_post_card(rdev) == false)
850 		return -EINVAL;
851 
852 	/* Initialize clocks */
853 	radeon_get_clock_info(rdev->ddev);
854 	/* initialize memory controller */
855 	rs690_mc_init(rdev);
856 	rv515_debugfs(rdev);
857 	/* Fence driver */
858 	r = radeon_fence_driver_init(rdev);
859 	if (r)
860 		return r;
861 	/* Memory manager */
862 	r = radeon_bo_init(rdev);
863 	if (r)
864 		return r;
865 	r = rs400_gart_init(rdev);
866 	if (r)
867 		return r;
868 	rs600_set_safe_registers(rdev);
869 
870 	/* Initialize power management */
871 	radeon_pm_init(rdev);
872 
873 	rdev->accel_working = true;
874 	r = rs690_startup(rdev);
875 	if (r) {
876 		/* Somethings want wront with the accel init stop accel */
877 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
878 		r100_cp_fini(rdev);
879 		radeon_wb_fini(rdev);
880 		radeon_ib_pool_fini(rdev);
881 		rs400_gart_fini(rdev);
882 		radeon_irq_kms_fini(rdev);
883 		rdev->accel_working = false;
884 	}
885 	return 0;
886 }
887