xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_test.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: amdgpu_test.c,v 1.4 2020/02/14 04:30:04 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2009 VMware, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Michel Dänzer
25  */
26 #include <sys/cdefs.h>
27 __KERNEL_RCSID(0, "$NetBSD: amdgpu_test.c,v 1.4 2020/02/14 04:30:04 riastradh Exp $");
28 
29 #include <drm/drmP.h>
30 #include <drm/amdgpu_drm.h>
31 #include "amdgpu.h"
32 #include "amdgpu_uvd.h"
33 #include "amdgpu_vce.h"
34 
35 /* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
36 static void amdgpu_do_test_moves(struct amdgpu_device *adev)
37 {
38 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
39 	struct amdgpu_bo *vram_obj = NULL;
40 	struct amdgpu_bo **gtt_obj = NULL;
41 	uint64_t gtt_addr, vram_addr;
42 	unsigned n, size;
43 	int i, r;
44 
45 	size = 1024 * 1024;
46 
47 	/* Number of tests =
48 	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
49 	 */
50 	n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024;
51 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
52 		if (adev->rings[i])
53 			n -= adev->rings[i]->ring_size;
54 	if (adev->wb.wb_obj)
55 		n -= AMDGPU_GPU_PAGE_SIZE;
56 	if (adev->irq.ih.ring_obj)
57 		n -= adev->irq.ih.ring_size;
58 	n /= size;
59 
60 	gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
61 	if (!gtt_obj) {
62 		DRM_ERROR("Failed to allocate %d pointers\n", n);
63 		r = 1;
64 		goto out_cleanup;
65 	}
66 
67 	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
68 			     AMDGPU_GEM_DOMAIN_VRAM, 0,
69 			     NULL, NULL, &vram_obj);
70 	if (r) {
71 		DRM_ERROR("Failed to create VRAM object\n");
72 		goto out_cleanup;
73 	}
74 	r = amdgpu_bo_reserve(vram_obj, false);
75 	if (unlikely(r != 0))
76 		goto out_unref;
77 	r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr);
78 	if (r) {
79 		DRM_ERROR("Failed to pin VRAM object\n");
80 		goto out_unres;
81 	}
82 	for (i = 0; i < n; i++) {
83 		void *gtt_map, *vram_map;
84 		void **gtt_start, **gtt_end;
85 		void **vram_start, **vram_end;
86 		struct fence *fence = NULL;
87 
88 		r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
89 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
90 				     NULL, gtt_obj + i);
91 		if (r) {
92 			DRM_ERROR("Failed to create GTT object %d\n", i);
93 			goto out_lclean;
94 		}
95 
96 		r = amdgpu_bo_reserve(gtt_obj[i], false);
97 		if (unlikely(r != 0))
98 			goto out_lclean_unref;
99 		r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gtt_addr);
100 		if (r) {
101 			DRM_ERROR("Failed to pin GTT object %d\n", i);
102 			goto out_lclean_unres;
103 		}
104 
105 		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
106 		if (r) {
107 			DRM_ERROR("Failed to map GTT object %d\n", i);
108 			goto out_lclean_unpin;
109 		}
110 
111 		for (gtt_start = gtt_map, gtt_end = gtt_map + size;
112 		     gtt_start < gtt_end;
113 		     gtt_start++)
114 			*gtt_start = gtt_start;
115 
116 		amdgpu_bo_kunmap(gtt_obj[i]);
117 
118 		r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr,
119 				       size, NULL, &fence);
120 
121 		if (r) {
122 			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
123 			goto out_lclean_unpin;
124 		}
125 
126 		r = fence_wait(fence, false);
127 		if (r) {
128 			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
129 			goto out_lclean_unpin;
130 		}
131 
132 		fence_put(fence);
133 
134 		r = amdgpu_bo_kmap(vram_obj, &vram_map);
135 		if (r) {
136 			DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
137 			goto out_lclean_unpin;
138 		}
139 
140 		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
141 		     vram_start = vram_map, vram_end = vram_map + size;
142 		     vram_start < vram_end;
143 		     gtt_start++, vram_start++) {
144 			if (*vram_start != gtt_start) {
145 				DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
146 					  "expected 0x%p (GTT/VRAM offset "
147 					  "0x%16llx/0x%16llx)\n",
148 					  i, *vram_start, gtt_start,
149 					  (unsigned long long)
150 					  (gtt_addr - adev->mc.gtt_start +
151 					   (void*)gtt_start - gtt_map),
152 					  (unsigned long long)
153 					  (vram_addr - adev->mc.vram_start +
154 					   (void*)gtt_start - gtt_map));
155 				amdgpu_bo_kunmap(vram_obj);
156 				goto out_lclean_unpin;
157 			}
158 			*vram_start = vram_start;
159 		}
160 
161 		amdgpu_bo_kunmap(vram_obj);
162 
163 		r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr,
164 				       size, NULL, &fence);
165 
166 		if (r) {
167 			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
168 			goto out_lclean_unpin;
169 		}
170 
171 		r = fence_wait(fence, false);
172 		if (r) {
173 			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
174 			goto out_lclean_unpin;
175 		}
176 
177 		fence_put(fence);
178 
179 		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
180 		if (r) {
181 			DRM_ERROR("Failed to map GTT object after copy %d\n", i);
182 			goto out_lclean_unpin;
183 		}
184 
185 		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
186 		     vram_start = vram_map, vram_end = vram_map + size;
187 		     gtt_start < gtt_end;
188 		     gtt_start++, vram_start++) {
189 			if (*gtt_start != vram_start) {
190 				DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
191 					  "expected 0x%p (VRAM/GTT offset "
192 					  "0x%16llx/0x%16llx)\n",
193 					  i, *gtt_start, vram_start,
194 					  (unsigned long long)
195 					  (vram_addr - adev->mc.vram_start +
196 					   (void*)vram_start - vram_map),
197 					  (unsigned long long)
198 					  (gtt_addr - adev->mc.gtt_start +
199 					   (void*)vram_start - vram_map));
200 				amdgpu_bo_kunmap(gtt_obj[i]);
201 				goto out_lclean_unpin;
202 			}
203 		}
204 
205 		amdgpu_bo_kunmap(gtt_obj[i]);
206 
207 		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%"PRIx64"\n",
208 			 gtt_addr - adev->mc.gtt_start);
209 		continue;
210 
211 out_lclean_unpin:
212 		amdgpu_bo_unpin(gtt_obj[i]);
213 out_lclean_unres:
214 		amdgpu_bo_unreserve(gtt_obj[i]);
215 out_lclean_unref:
216 		amdgpu_bo_unref(&gtt_obj[i]);
217 out_lclean:
218 		for (--i; i >= 0; --i) {
219 			amdgpu_bo_unpin(gtt_obj[i]);
220 			amdgpu_bo_unreserve(gtt_obj[i]);
221 			amdgpu_bo_unref(&gtt_obj[i]);
222 		}
223 		if (fence)
224 			fence_put(fence);
225 		break;
226 	}
227 
228 	amdgpu_bo_unpin(vram_obj);
229 out_unres:
230 	amdgpu_bo_unreserve(vram_obj);
231 out_unref:
232 	amdgpu_bo_unref(&vram_obj);
233 out_cleanup:
234 	kfree(gtt_obj);
235 	if (r) {
236 		printk(KERN_WARNING "Error while testing BO move.\n");
237 	}
238 }
239 
240 void amdgpu_test_moves(struct amdgpu_device *adev)
241 {
242 	if (adev->mman.buffer_funcs)
243 		amdgpu_do_test_moves(adev);
244 }
245 
246 static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
247 					     struct amdgpu_ring *ring,
248 					     struct fence **fence)
249 {
250 	uint32_t handle = ring->idx ^ 0xdeafbeef;
251 	int r;
252 
253 	if (ring == &adev->uvd.ring) {
254 		r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
255 		if (r) {
256 			DRM_ERROR("Failed to get dummy create msg\n");
257 			return r;
258 		}
259 
260 		r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
261 		if (r) {
262 			DRM_ERROR("Failed to get dummy destroy msg\n");
263 			return r;
264 		}
265 
266 	} else if (ring == &adev->vce.ring[0] ||
267 		   ring == &adev->vce.ring[1]) {
268 		r = amdgpu_vce_get_create_msg(ring, handle, NULL);
269 		if (r) {
270 			DRM_ERROR("Failed to get dummy create msg\n");
271 			return r;
272 		}
273 
274 		r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
275 		if (r) {
276 			DRM_ERROR("Failed to get dummy destroy msg\n");
277 			return r;
278 		}
279 	} else {
280 		struct amdgpu_fence *a_fence = NULL;
281 		r = amdgpu_ring_lock(ring, 64);
282 		if (r) {
283 			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
284 			return r;
285 		}
286 		amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
287 		amdgpu_ring_unlock_commit(ring);
288 		*fence = &a_fence->base;
289 	}
290 	return 0;
291 }
292 
293 void amdgpu_test_ring_sync(struct amdgpu_device *adev,
294 			   struct amdgpu_ring *ringA,
295 			   struct amdgpu_ring *ringB)
296 {
297 	struct fence *fence1 = NULL, *fence2 = NULL;
298 	struct amdgpu_semaphore *semaphore = NULL;
299 	int r;
300 
301 	r = amdgpu_semaphore_create(adev, &semaphore);
302 	if (r) {
303 		DRM_ERROR("Failed to create semaphore\n");
304 		goto out_cleanup;
305 	}
306 
307 	r = amdgpu_ring_lock(ringA, 64);
308 	if (r) {
309 		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
310 		goto out_cleanup;
311 	}
312 	amdgpu_semaphore_emit_wait(ringA, semaphore);
313 	amdgpu_ring_unlock_commit(ringA);
314 
315 	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
316 	if (r)
317 		goto out_cleanup;
318 
319 	r = amdgpu_ring_lock(ringA, 64);
320 	if (r) {
321 		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
322 		goto out_cleanup;
323 	}
324 	amdgpu_semaphore_emit_wait(ringA, semaphore);
325 	amdgpu_ring_unlock_commit(ringA);
326 
327 	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
328 	if (r)
329 		goto out_cleanup;
330 
331 	mdelay(1000);
332 
333 	if (fence_is_signaled(fence1)) {
334 		DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
335 		goto out_cleanup;
336 	}
337 
338 	r = amdgpu_ring_lock(ringB, 64);
339 	if (r) {
340 		DRM_ERROR("Failed to lock ring B %p\n", ringB);
341 		goto out_cleanup;
342 	}
343 	amdgpu_semaphore_emit_signal(ringB, semaphore);
344 	amdgpu_ring_unlock_commit(ringB);
345 
346 	r = fence_wait(fence1, false);
347 	if (r) {
348 		DRM_ERROR("Failed to wait for sync fence 1\n");
349 		goto out_cleanup;
350 	}
351 
352 	mdelay(1000);
353 
354 	if (fence_is_signaled(fence2)) {
355 		DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
356 		goto out_cleanup;
357 	}
358 
359 	r = amdgpu_ring_lock(ringB, 64);
360 	if (r) {
361 		DRM_ERROR("Failed to lock ring B %p\n", ringB);
362 		goto out_cleanup;
363 	}
364 	amdgpu_semaphore_emit_signal(ringB, semaphore);
365 	amdgpu_ring_unlock_commit(ringB);
366 
367 	r = fence_wait(fence2, false);
368 	if (r) {
369 		DRM_ERROR("Failed to wait for sync fence 1\n");
370 		goto out_cleanup;
371 	}
372 
373 out_cleanup:
374 	amdgpu_semaphore_free(adev, &semaphore, NULL);
375 
376 	if (fence1)
377 		fence_put(fence1);
378 
379 	if (fence2)
380 		fence_put(fence2);
381 
382 	if (r)
383 		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
384 }
385 
386 static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
387 			    struct amdgpu_ring *ringA,
388 			    struct amdgpu_ring *ringB,
389 			    struct amdgpu_ring *ringC)
390 {
391 	struct fence *fenceA = NULL, *fenceB = NULL;
392 	struct amdgpu_semaphore *semaphore = NULL;
393 	bool sigA, sigB;
394 	int i, r;
395 
396 	r = amdgpu_semaphore_create(adev, &semaphore);
397 	if (r) {
398 		DRM_ERROR("Failed to create semaphore\n");
399 		goto out_cleanup;
400 	}
401 
402 	r = amdgpu_ring_lock(ringA, 64);
403 	if (r) {
404 		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
405 		goto out_cleanup;
406 	}
407 	amdgpu_semaphore_emit_wait(ringA, semaphore);
408 	amdgpu_ring_unlock_commit(ringA);
409 
410 	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
411 	if (r)
412 		goto out_cleanup;
413 
414 	r = amdgpu_ring_lock(ringB, 64);
415 	if (r) {
416 		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
417 		goto out_cleanup;
418 	}
419 	amdgpu_semaphore_emit_wait(ringB, semaphore);
420 	amdgpu_ring_unlock_commit(ringB);
421 	r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
422 	if (r)
423 		goto out_cleanup;
424 
425 	mdelay(1000);
426 
427 	if (fence_is_signaled(fenceA)) {
428 		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
429 		goto out_cleanup;
430 	}
431 	if (fence_is_signaled(fenceB)) {
432 		DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
433 		goto out_cleanup;
434 	}
435 
436 	r = amdgpu_ring_lock(ringC, 64);
437 	if (r) {
438 		DRM_ERROR("Failed to lock ring B %p\n", ringC);
439 		goto out_cleanup;
440 	}
441 	amdgpu_semaphore_emit_signal(ringC, semaphore);
442 	amdgpu_ring_unlock_commit(ringC);
443 
444 	for (i = 0; i < 30; ++i) {
445 		mdelay(100);
446 		sigA = fence_is_signaled(fenceA);
447 		sigB = fence_is_signaled(fenceB);
448 		if (sigA || sigB)
449 			break;
450 	}
451 
452 	if (!sigA && !sigB) {
453 		DRM_ERROR("Neither fence A nor B has been signaled\n");
454 		goto out_cleanup;
455 	} else if (sigA && sigB) {
456 		DRM_ERROR("Both fence A and B has been signaled\n");
457 		goto out_cleanup;
458 	}
459 
460 	DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
461 
462 	r = amdgpu_ring_lock(ringC, 64);
463 	if (r) {
464 		DRM_ERROR("Failed to lock ring B %p\n", ringC);
465 		goto out_cleanup;
466 	}
467 	amdgpu_semaphore_emit_signal(ringC, semaphore);
468 	amdgpu_ring_unlock_commit(ringC);
469 
470 	mdelay(1000);
471 
472 	r = fence_wait(fenceA, false);
473 	if (r) {
474 		DRM_ERROR("Failed to wait for sync fence A\n");
475 		goto out_cleanup;
476 	}
477 	r = fence_wait(fenceB, false);
478 	if (r) {
479 		DRM_ERROR("Failed to wait for sync fence B\n");
480 		goto out_cleanup;
481 	}
482 
483 out_cleanup:
484 	amdgpu_semaphore_free(adev, &semaphore, NULL);
485 
486 	if (fenceA)
487 		fence_put(fenceA);
488 
489 	if (fenceB)
490 		fence_put(fenceB);
491 
492 	if (r)
493 		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
494 }
495 
496 static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
497 				      struct amdgpu_ring *ringB)
498 {
499 	if (ringA == &ringA->adev->vce.ring[0] &&
500 	    ringB == &ringB->adev->vce.ring[1])
501 		return false;
502 
503 	return true;
504 }
505 
506 void amdgpu_test_syncing(struct amdgpu_device *adev)
507 {
508 	int i, j, k;
509 
510 	for (i = 1; i < AMDGPU_MAX_RINGS; ++i) {
511 		struct amdgpu_ring *ringA = adev->rings[i];
512 		if (!ringA || !ringA->ready)
513 			continue;
514 
515 		for (j = 0; j < i; ++j) {
516 			struct amdgpu_ring *ringB = adev->rings[j];
517 			if (!ringB || !ringB->ready)
518 				continue;
519 
520 			if (!amdgpu_test_sync_possible(ringA, ringB))
521 				continue;
522 
523 			DRM_INFO("Testing syncing between rings %d and %d...\n", i, j);
524 			amdgpu_test_ring_sync(adev, ringA, ringB);
525 
526 			DRM_INFO("Testing syncing between rings %d and %d...\n", j, i);
527 			amdgpu_test_ring_sync(adev, ringB, ringA);
528 
529 			for (k = 0; k < j; ++k) {
530 				struct amdgpu_ring *ringC = adev->rings[k];
531 				if (!ringC || !ringC->ready)
532 					continue;
533 
534 				if (!amdgpu_test_sync_possible(ringA, ringC))
535 					continue;
536 
537 				if (!amdgpu_test_sync_possible(ringB, ringC))
538 					continue;
539 
540 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
541 				amdgpu_test_ring_sync2(adev, ringA, ringB, ringC);
542 
543 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j);
544 				amdgpu_test_ring_sync2(adev, ringA, ringC, ringB);
545 
546 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k);
547 				amdgpu_test_ring_sync2(adev, ringB, ringA, ringC);
548 
549 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i);
550 				amdgpu_test_ring_sync2(adev, ringB, ringC, ringA);
551 
552 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j);
553 				amdgpu_test_ring_sync2(adev, ringC, ringA, ringB);
554 
555 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i);
556 				amdgpu_test_ring_sync2(adev, ringC, ringB, ringA);
557 			}
558 		}
559 	}
560 }
561