xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ring.c (revision 53d1339bf7f9c7367b35a9e1ebe693f9b047a47b)
1 /*	$NetBSD: amdgpu_ring.c,v 1.4 2020/02/14 14:34:58 maya Exp $	*/
2 
3 /*
4  * Copyright 2008 Advanced Micro Devices, Inc.
5  * Copyright 2008 Red Hat Inc.
6  * Copyright 2009 Jerome Glisse.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
22  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  *
26  * Authors: Dave Airlie
27  *          Alex Deucher
28  *          Jerome Glisse
29  *          Christian König
30  */
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: amdgpu_ring.c,v 1.4 2020/02/14 14:34:58 maya Exp $");
33 
34 #include <linux/seq_file.h>
35 #include <linux/slab.h>
36 #include <drm/drmP.h>
37 #include <drm/amdgpu_drm.h>
38 #include "amdgpu.h"
39 #include "atom.h"
40 
41 /*
42  * Rings
43  * Most engines on the GPU are fed via ring buffers.  Ring
44  * buffers are areas of GPU accessible memory that the host
45  * writes commands into and the GPU reads commands out of.
46  * There is a rptr (read pointer) that determines where the
47  * GPU is currently reading, and a wptr (write pointer)
48  * which determines where the host has written.  When the
49  * pointers are equal, the ring is idle.  When the host
50  * writes commands to the ring buffer, it increments the
51  * wptr.  The GPU then starts fetching commands and executes
52  * them until the pointers are equal again.
53  */
54 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
55 
56 /**
57  * amdgpu_ring_free_size - update the free size
58  *
59  * @adev: amdgpu_device pointer
60  * @ring: amdgpu_ring structure holding ring information
61  *
62  * Update the free dw slots in the ring buffer (all asics).
63  */
64 void amdgpu_ring_free_size(struct amdgpu_ring *ring)
65 {
66 	uint32_t rptr = amdgpu_ring_get_rptr(ring);
67 
68 	/* This works because ring_size is a power of 2 */
69 	ring->ring_free_dw = rptr + (ring->ring_size / 4);
70 	ring->ring_free_dw -= ring->wptr;
71 	ring->ring_free_dw &= ring->ptr_mask;
72 	if (!ring->ring_free_dw) {
73 		/* this is an empty ring */
74 		ring->ring_free_dw = ring->ring_size / 4;
75 	}
76 }
77 
78 /**
79  * amdgpu_ring_alloc - allocate space on the ring buffer
80  *
81  * @adev: amdgpu_device pointer
82  * @ring: amdgpu_ring structure holding ring information
83  * @ndw: number of dwords to allocate in the ring buffer
84  *
85  * Allocate @ndw dwords in the ring buffer (all asics).
86  * Returns 0 on success, error on failure.
87  */
88 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
89 {
90 	int r;
91 
92 	/* make sure we aren't trying to allocate more space than there is on the ring */
93 	if (ndw > (ring->ring_size / 4))
94 		return -ENOMEM;
95 	/* Align requested size with padding so unlock_commit can
96 	 * pad safely */
97 	amdgpu_ring_free_size(ring);
98 	ndw = (ndw + ring->align_mask) & ~ring->align_mask;
99 	while (ndw > (ring->ring_free_dw - 1)) {
100 		amdgpu_ring_free_size(ring);
101 		if (ndw < ring->ring_free_dw) {
102 			break;
103 		}
104 		r = amdgpu_fence_wait_next(ring);
105 		if (r)
106 			return r;
107 	}
108 	ring->count_dw = ndw;
109 	ring->wptr_old = ring->wptr;
110 	return 0;
111 }
112 
113 /**
114  * amdgpu_ring_lock - lock the ring and allocate space on it
115  *
116  * @adev: amdgpu_device pointer
117  * @ring: amdgpu_ring structure holding ring information
118  * @ndw: number of dwords to allocate in the ring buffer
119  *
120  * Lock the ring and allocate @ndw dwords in the ring buffer
121  * (all asics).
122  * Returns 0 on success, error on failure.
123  */
124 int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
125 {
126 	int r;
127 
128 	mutex_lock(ring->ring_lock);
129 	r = amdgpu_ring_alloc(ring, ndw);
130 	if (r) {
131 		mutex_unlock(ring->ring_lock);
132 		return r;
133 	}
134 	return 0;
135 }
136 
137 /** amdgpu_ring_insert_nop - insert NOP packets
138  *
139  * @ring: amdgpu_ring structure holding ring information
140  * @count: the number of NOP packets to insert
141  *
142  * This is the generic insert_nop function for rings except SDMA
143  */
144 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
145 {
146 	int i;
147 
148 	for (i = 0; i < count; i++)
149 		amdgpu_ring_write(ring, ring->nop);
150 }
151 
152 /**
153  * amdgpu_ring_commit - tell the GPU to execute the new
154  * commands on the ring buffer
155  *
156  * @adev: amdgpu_device pointer
157  * @ring: amdgpu_ring structure holding ring information
158  *
159  * Update the wptr (write pointer) to tell the GPU to
160  * execute new commands on the ring buffer (all asics).
161  */
162 void amdgpu_ring_commit(struct amdgpu_ring *ring)
163 {
164 	uint32_t count;
165 
166 	/* We pad to match fetch size */
167 	count = ring->align_mask + 1 - (ring->wptr & ring->align_mask);
168 	count %= ring->align_mask + 1;
169 	ring->funcs->insert_nop(ring, count);
170 
171 	mb();
172 	amdgpu_ring_set_wptr(ring);
173 }
174 
175 /**
176  * amdgpu_ring_unlock_commit - tell the GPU to execute the new
177  * commands on the ring buffer and unlock it
178  *
179  * @ring: amdgpu_ring structure holding ring information
180  *
181  * Call amdgpu_ring_commit() then unlock the ring (all asics).
182  */
183 void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring)
184 {
185 	amdgpu_ring_commit(ring);
186 	mutex_unlock(ring->ring_lock);
187 }
188 
189 /**
190  * amdgpu_ring_undo - reset the wptr
191  *
192  * @ring: amdgpu_ring structure holding ring information
193  *
194  * Reset the driver's copy of the wptr (all asics).
195  */
196 void amdgpu_ring_undo(struct amdgpu_ring *ring)
197 {
198 	ring->wptr = ring->wptr_old;
199 }
200 
201 /**
202  * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring
203  *
204  * @ring: amdgpu_ring structure holding ring information
205  *
206  * Call amdgpu_ring_undo() then unlock the ring (all asics).
207  */
208 void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring)
209 {
210 	amdgpu_ring_undo(ring);
211 	mutex_unlock(ring->ring_lock);
212 }
213 
214 /**
215  * amdgpu_ring_backup - Back up the content of a ring
216  *
217  * @ring: the ring we want to back up
218  *
219  * Saves all unprocessed commits from a ring, returns the number of dwords saved.
220  */
221 unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
222 			    uint32_t **data)
223 {
224 	unsigned size, ptr, i;
225 
226 	/* just in case lock the ring */
227 	mutex_lock(ring->ring_lock);
228 	*data = NULL;
229 
230 	if (ring->ring_obj == NULL) {
231 		mutex_unlock(ring->ring_lock);
232 		return 0;
233 	}
234 
235 	/* it doesn't make sense to save anything if all fences are signaled */
236 	if (!amdgpu_fence_count_emitted(ring)) {
237 		mutex_unlock(ring->ring_lock);
238 		return 0;
239 	}
240 
241 	ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
242 
243 	size = ring->wptr + (ring->ring_size / 4);
244 	size -= ptr;
245 	size &= ring->ptr_mask;
246 	if (size == 0) {
247 		mutex_unlock(ring->ring_lock);
248 		return 0;
249 	}
250 
251 	/* and then save the content of the ring */
252 	*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
253 	if (!*data) {
254 		mutex_unlock(ring->ring_lock);
255 		return 0;
256 	}
257 	for (i = 0; i < size; ++i) {
258 		(*data)[i] = ring->ring[ptr++];
259 		ptr &= ring->ptr_mask;
260 	}
261 
262 	mutex_unlock(ring->ring_lock);
263 	return size;
264 }
265 
266 /**
267  * amdgpu_ring_restore - append saved commands to the ring again
268  *
269  * @ring: ring to append commands to
270  * @size: number of dwords we want to write
271  * @data: saved commands
272  *
273  * Allocates space on the ring and restore the previously saved commands.
274  */
275 int amdgpu_ring_restore(struct amdgpu_ring *ring,
276 			unsigned size, uint32_t *data)
277 {
278 	int i, r;
279 
280 	if (!size || !data)
281 		return 0;
282 
283 	/* restore the saved ring content */
284 	r = amdgpu_ring_lock(ring, size);
285 	if (r)
286 		return r;
287 
288 	for (i = 0; i < size; ++i) {
289 		amdgpu_ring_write(ring, data[i]);
290 	}
291 
292 	amdgpu_ring_unlock_commit(ring);
293 	kfree(data);
294 	return 0;
295 }
296 
297 /**
298  * amdgpu_ring_init - init driver ring struct.
299  *
300  * @adev: amdgpu_device pointer
301  * @ring: amdgpu_ring structure holding ring information
302  * @ring_size: size of the ring
303  * @nop: nop packet for this ring
304  *
305  * Initialize the driver information for the selected ring (all asics).
306  * Returns 0 on success, error on failure.
307  */
308 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
309 		     unsigned ring_size, u32 nop, u32 align_mask,
310 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
311 		     enum amdgpu_ring_type ring_type)
312 {
313 	u32 rb_bufsz;
314 	int r;
315 
316 	if (ring->adev == NULL) {
317 		if (adev->num_rings >= AMDGPU_MAX_RINGS)
318 			return -EINVAL;
319 
320 		ring->adev = adev;
321 		ring->idx = adev->num_rings++;
322 		adev->rings[ring->idx] = ring;
323 		r = amdgpu_fence_driver_init_ring(ring);
324 		if (r)
325 			return r;
326 	}
327 
328 	r = amdgpu_wb_get(adev, &ring->rptr_offs);
329 	if (r) {
330 		dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
331 		return r;
332 	}
333 
334 	r = amdgpu_wb_get(adev, &ring->wptr_offs);
335 	if (r) {
336 		dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
337 		return r;
338 	}
339 
340 	r = amdgpu_wb_get(adev, &ring->fence_offs);
341 	if (r) {
342 		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
343 		return r;
344 	}
345 
346 	r = amdgpu_wb_get(adev, &ring->next_rptr_offs);
347 	if (r) {
348 		dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r);
349 		return r;
350 	}
351 	ring->next_rptr_gpu_addr = adev->wb.gpu_addr + (ring->next_rptr_offs * 4);
352 	ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
353 	spin_lock_init(&ring->fence_lock);
354 	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
355 	if (r) {
356 		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
357 		return r;
358 	}
359 
360 	ring->ring_lock = &adev->ring_lock;
361 	/* Align ring size */
362 	rb_bufsz = order_base_2(ring_size / 8);
363 	ring_size = (1 << (rb_bufsz + 1)) * 4;
364 	ring->ring_size = ring_size;
365 	ring->align_mask = align_mask;
366 	ring->nop = nop;
367 	ring->type = ring_type;
368 
369 	/* Allocate ring buffer */
370 	if (ring->ring_obj == NULL) {
371 		r = amdgpu_bo_create(adev, ring->ring_size, PAGE_SIZE, true,
372 				     AMDGPU_GEM_DOMAIN_GTT, 0,
373 				     NULL, NULL, &ring->ring_obj);
374 		if (r) {
375 			dev_err(adev->dev, "(%d) ring create failed\n", r);
376 			return r;
377 		}
378 		r = amdgpu_bo_reserve(ring->ring_obj, false);
379 		if (unlikely(r != 0))
380 			return r;
381 		r = amdgpu_bo_pin(ring->ring_obj, AMDGPU_GEM_DOMAIN_GTT,
382 					&ring->gpu_addr);
383 		if (r) {
384 			amdgpu_bo_unreserve(ring->ring_obj);
385 			dev_err(adev->dev, "(%d) ring pin failed\n", r);
386 			return r;
387 		}
388 		r = amdgpu_bo_kmap(ring->ring_obj,
389 				       (void **)__UNVOLATILE(&ring->ring));
390 		amdgpu_bo_unreserve(ring->ring_obj);
391 		if (r) {
392 			dev_err(adev->dev, "(%d) ring map failed\n", r);
393 			return r;
394 		}
395 	}
396 	ring->ptr_mask = (ring->ring_size / 4) - 1;
397 	ring->ring_free_dw = ring->ring_size / 4;
398 
399 	if (amdgpu_debugfs_ring_init(adev, ring)) {
400 		DRM_ERROR("Failed to register debugfs file for rings !\n");
401 	}
402 	return 0;
403 }
404 
405 /**
406  * amdgpu_ring_fini - tear down the driver ring struct.
407  *
408  * @adev: amdgpu_device pointer
409  * @ring: amdgpu_ring structure holding ring information
410  *
411  * Tear down the driver information for the selected ring (all asics).
412  */
413 void amdgpu_ring_fini(struct amdgpu_ring *ring)
414 {
415 	int r;
416 	struct amdgpu_bo *ring_obj;
417 
418 	if (ring->ring_lock == NULL)
419 		return;
420 
421 	mutex_lock(ring->ring_lock);
422 	ring_obj = ring->ring_obj;
423 	ring->ready = false;
424 	ring->ring = NULL;
425 	ring->ring_obj = NULL;
426 	mutex_unlock(ring->ring_lock);
427 
428 	amdgpu_wb_free(ring->adev, ring->fence_offs);
429 	amdgpu_wb_free(ring->adev, ring->rptr_offs);
430 	amdgpu_wb_free(ring->adev, ring->wptr_offs);
431 	amdgpu_wb_free(ring->adev, ring->next_rptr_offs);
432 
433 	if (ring_obj) {
434 		r = amdgpu_bo_reserve(ring_obj, false);
435 		if (likely(r == 0)) {
436 			amdgpu_bo_kunmap(ring_obj);
437 			amdgpu_bo_unpin(ring_obj);
438 			amdgpu_bo_unreserve(ring_obj);
439 		}
440 		amdgpu_bo_unref(&ring_obj);
441 	}
442 }
443 
444 /**
445  * amdgpu_ring_from_fence - get ring from fence
446  *
447  * @f: fence structure
448  *
449  * Extract the ring a fence belongs to. Handles both scheduler as
450  * well as hardware fences.
451  */
452 struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
453 {
454 	struct amdgpu_fence *a_fence;
455 	struct amd_sched_fence *s_fence;
456 
457 	s_fence = to_amd_sched_fence(f);
458 	if (s_fence)
459 		return container_of(s_fence->sched, struct amdgpu_ring, sched);
460 
461 	a_fence = to_amdgpu_fence(f);
462 	if (a_fence)
463 		return a_fence->ring;
464 
465 	return NULL;
466 }
467 
468 /*
469  * Debugfs info
470  */
471 #if defined(CONFIG_DEBUG_FS)
472 
473 static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
474 {
475 	struct drm_info_node *node = (struct drm_info_node *) m->private;
476 	struct drm_device *dev = node->minor->dev;
477 	struct amdgpu_device *adev = dev->dev_private;
478 	int roffset = *(int*)node->info_ent->data;
479 	struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
480 
481 	uint32_t rptr, wptr, rptr_next;
482 	unsigned count, i, j;
483 
484 	amdgpu_ring_free_size(ring);
485 	count = (ring->ring_size / 4) - ring->ring_free_dw;
486 
487 	wptr = amdgpu_ring_get_wptr(ring);
488 	seq_printf(m, "wptr: 0x%08x [%5d]\n",
489 		   wptr, wptr);
490 
491 	rptr = amdgpu_ring_get_rptr(ring);
492 	seq_printf(m, "rptr: 0x%08x [%5d]\n",
493 		   rptr, rptr);
494 
495 	rptr_next = ~0;
496 
497 	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
498 		   ring->wptr, ring->wptr);
499 	seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
500 		   ring->last_semaphore_signal_addr);
501 	seq_printf(m, "last semaphore wait addr   : 0x%016llx\n",
502 		   ring->last_semaphore_wait_addr);
503 	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
504 	seq_printf(m, "%u dwords in ring\n", count);
505 
506 	if (!ring->ready)
507 		return 0;
508 
509 	/* print 8 dw before current rptr as often it's the last executed
510 	 * packet that is the root issue
511 	 */
512 	i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
513 	for (j = 0; j <= (count + 32); j++) {
514 		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
515 		if (rptr == i)
516 			seq_puts(m, " *");
517 		if (rptr_next == i)
518 			seq_puts(m, " #");
519 		seq_puts(m, "\n");
520 		i = (i + 1) & ring->ptr_mask;
521 	}
522 	return 0;
523 }
524 
525 /* TODO: clean this up !*/
526 static int amdgpu_gfx_index = offsetof(struct amdgpu_device, gfx.gfx_ring[0]);
527 static int cayman_cp1_index = offsetof(struct amdgpu_device, gfx.compute_ring[0]);
528 static int cayman_cp2_index = offsetof(struct amdgpu_device, gfx.compute_ring[1]);
529 static int amdgpu_dma1_index = offsetof(struct amdgpu_device, sdma.instance[0].ring);
530 static int amdgpu_dma2_index = offsetof(struct amdgpu_device, sdma.instance[1].ring);
531 static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring);
532 static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]);
533 static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]);
534 
535 static struct drm_info_list amdgpu_debugfs_ring_info_list[] = {
536 	{"amdgpu_ring_gfx", amdgpu_debugfs_ring_info, 0, &amdgpu_gfx_index},
537 	{"amdgpu_ring_cp1", amdgpu_debugfs_ring_info, 0, &cayman_cp1_index},
538 	{"amdgpu_ring_cp2", amdgpu_debugfs_ring_info, 0, &cayman_cp2_index},
539 	{"amdgpu_ring_dma1", amdgpu_debugfs_ring_info, 0, &amdgpu_dma1_index},
540 	{"amdgpu_ring_dma2", amdgpu_debugfs_ring_info, 0, &amdgpu_dma2_index},
541 	{"amdgpu_ring_uvd", amdgpu_debugfs_ring_info, 0, &r600_uvd_index},
542 	{"amdgpu_ring_vce1", amdgpu_debugfs_ring_info, 0, &si_vce1_index},
543 	{"amdgpu_ring_vce2", amdgpu_debugfs_ring_info, 0, &si_vce2_index},
544 };
545 
546 #endif
547 
548 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
549 {
550 #if defined(CONFIG_DEBUG_FS)
551 	unsigned i;
552 	for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) {
553 		struct drm_info_list *info = &amdgpu_debugfs_ring_info_list[i];
554 		int roffset = *(int*)amdgpu_debugfs_ring_info_list[i].data;
555 		struct amdgpu_ring *other = (void *)(((uint8_t*)adev) + roffset);
556 		unsigned r;
557 
558 		if (other != ring)
559 			continue;
560 
561 		r = amdgpu_debugfs_add_files(adev, info, 1);
562 		if (r)
563 			return r;
564 	}
565 #endif
566 	return 0;
567 }
568