xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_sync.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: amdgpu_sync.c,v 1.2 2018/08/27 04:58:19 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2014 Advanced Micro Devices, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * The above copyright notice and this permission notice (including the
24  * next paragraph) shall be included in all copies or substantial portions
25  * of the Software.
26  *
27  */
28 /*
29  * Authors:
30  *    Christian König <christian.koenig@amd.com>
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: amdgpu_sync.c,v 1.2 2018/08/27 04:58:19 riastradh Exp $");
35 
36 #include <drm/drmP.h>
37 #include "amdgpu.h"
38 #include "amdgpu_trace.h"
39 
40 struct amdgpu_sync_entry {
41 	struct hlist_node	node;
42 	struct fence		*fence;
43 };
44 
45 /**
46  * amdgpu_sync_create - zero init sync object
47  *
48  * @sync: sync object to initialize
49  *
50  * Just clear the sync object for now.
51  */
52 void amdgpu_sync_create(struct amdgpu_sync *sync)
53 {
54 	unsigned i;
55 
56 	for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
57 		sync->semaphores[i] = NULL;
58 
59 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
60 		sync->sync_to[i] = NULL;
61 
62 	hash_init(sync->fences);
63 	sync->last_vm_update = NULL;
64 }
65 
66 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
67 {
68 	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
69 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
70 
71 	if (a_fence)
72 		return a_fence->ring->adev == adev;
73 
74 	if (s_fence) {
75 		struct amdgpu_ring *ring;
76 
77 		ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
78 		return ring->adev == adev;
79 	}
80 
81 	return false;
82 }
83 
84 static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
85 {
86 	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
87 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
88 	if (s_fence)
89 		return s_fence->owner == owner;
90 	if (a_fence)
91 		return a_fence->owner == owner;
92 	return false;
93 }
94 
95 static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
96 {
97 	if (*keep && fence_is_later(*keep, fence))
98 		return;
99 
100 	fence_put(*keep);
101 	*keep = fence_get(fence);
102 }
103 
104 /**
105  * amdgpu_sync_fence - remember to sync to this fence
106  *
107  * @sync: sync object to add fence to
108  * @fence: fence to sync to
109  *
110  */
111 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
112 		      struct fence *f)
113 {
114 	struct amdgpu_sync_entry *e;
115 	struct amdgpu_fence *fence;
116 
117 	if (!f)
118 		return 0;
119 
120 	if (amdgpu_sync_same_dev(adev, f) &&
121 	    amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
122 		amdgpu_sync_keep_later(&sync->last_vm_update, f);
123 
124 	fence = to_amdgpu_fence(f);
125 	if (!fence || fence->ring->adev != adev) {
126 		hash_for_each_possible(sync->fences, e, node, f->context) {
127 			if (unlikely(e->fence->context != f->context))
128 				continue;
129 
130 			amdgpu_sync_keep_later(&e->fence, f);
131 			return 0;
132 		}
133 
134 		e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
135 		if (!e)
136 			return -ENOMEM;
137 
138 		hash_add(sync->fences, &e->node, f->context);
139 		e->fence = fence_get(f);
140 		return 0;
141 	}
142 
143 	amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
144 
145 	return 0;
146 }
147 
148 static void *amdgpu_sync_get_owner(struct fence *f)
149 {
150 	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
151 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
152 
153 	if (s_fence)
154 		return s_fence->owner;
155 	else if (a_fence)
156 		return a_fence->owner;
157 	return AMDGPU_FENCE_OWNER_UNDEFINED;
158 }
159 
160 /**
161  * amdgpu_sync_resv - use the semaphores to sync to a reservation object
162  *
163  * @sync: sync object to add fences from reservation object to
164  * @resv: reservation object with embedded fence
165  * @shared: true if we should only sync to the exclusive fence
166  *
167  * Sync to the fence using the semaphore objects
168  */
169 int amdgpu_sync_resv(struct amdgpu_device *adev,
170 		     struct amdgpu_sync *sync,
171 		     struct reservation_object *resv,
172 		     void *owner)
173 {
174 	struct reservation_object_list *flist;
175 	struct fence *f;
176 	void *fence_owner;
177 	unsigned i;
178 	int r = 0;
179 
180 	if (resv == NULL)
181 		return -EINVAL;
182 
183 	/* always sync to the exclusive fence */
184 	f = reservation_object_get_excl(resv);
185 	r = amdgpu_sync_fence(adev, sync, f);
186 
187 	flist = reservation_object_get_list(resv);
188 	if (!flist || r)
189 		return r;
190 
191 	for (i = 0; i < flist->shared_count; ++i) {
192 		f = rcu_dereference_protected(flist->shared[i],
193 					      reservation_object_held(resv));
194 		if (amdgpu_sync_same_dev(adev, f)) {
195 			/* VM updates are only interesting
196 			 * for other VM updates and moves.
197 			 */
198 			fence_owner = amdgpu_sync_get_owner(f);
199 			if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
200 			    (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
201 			    ((owner == AMDGPU_FENCE_OWNER_VM) !=
202 			     (fence_owner == AMDGPU_FENCE_OWNER_VM)))
203 				continue;
204 
205 			/* Ignore fence from the same owner as
206 			 * long as it isn't undefined.
207 			 */
208 			if (owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
209 			    fence_owner == owner)
210 				continue;
211 		}
212 
213 		r = amdgpu_sync_fence(adev, sync, f);
214 		if (r)
215 			break;
216 	}
217 	return r;
218 }
219 
220 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
221 {
222 	struct amdgpu_sync_entry *e;
223 	struct hlist_node *tmp;
224 	struct fence *f;
225 	int i;
226 
227 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
228 
229 		f = e->fence;
230 
231 		hash_del(&e->node);
232 		kfree(e);
233 
234 		if (!fence_is_signaled(f))
235 			return f;
236 
237 		fence_put(f);
238 	}
239 	return NULL;
240 }
241 
242 int amdgpu_sync_wait(struct amdgpu_sync *sync)
243 {
244 	struct amdgpu_sync_entry *e;
245 	struct hlist_node *tmp;
246 	int i, r;
247 
248 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
249 		r = fence_wait(e->fence, false);
250 		if (r)
251 			return r;
252 
253 		hash_del(&e->node);
254 		fence_put(e->fence);
255 		kfree(e);
256 	}
257 
258 	if (amdgpu_enable_semaphores)
259 		return 0;
260 
261 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
262 		struct fence *fence = sync->sync_to[i];
263 		if (!fence)
264 			continue;
265 
266 		r = fence_wait(fence, false);
267 		if (r)
268 			return r;
269 	}
270 
271 	return 0;
272 }
273 
274 /**
275  * amdgpu_sync_rings - sync ring to all registered fences
276  *
277  * @sync: sync object to use
278  * @ring: ring that needs sync
279  *
280  * Ensure that all registered fences are signaled before letting
281  * the ring continue. The caller must hold the ring lock.
282  */
283 int amdgpu_sync_rings(struct amdgpu_sync *sync,
284 		      struct amdgpu_ring *ring)
285 {
286 	struct amdgpu_device *adev = ring->adev;
287 	unsigned count = 0;
288 	int i, r;
289 
290 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
291 		struct amdgpu_ring *other = adev->rings[i];
292 		struct amdgpu_semaphore *semaphore;
293 		struct amdgpu_fence *fence;
294 
295 		if (!sync->sync_to[i])
296 			continue;
297 
298 		fence = to_amdgpu_fence(sync->sync_to[i]);
299 
300 		/* check if we really need to sync */
301 		if (!amdgpu_enable_scheduler &&
302 		    !amdgpu_fence_need_sync(fence, ring))
303 			continue;
304 
305 		/* prevent GPU deadlocks */
306 		if (!other->ready) {
307 			dev_err(adev->dev, "Syncing to a disabled ring!");
308 			return -EINVAL;
309 		}
310 
311 		if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
312 			r = fence_wait(sync->sync_to[i], true);
313 			if (r)
314 				return r;
315 			continue;
316 		}
317 
318 		if (count >= AMDGPU_NUM_SYNCS) {
319 			/* not enough room, wait manually */
320 			r = fence_wait(&fence->base, false);
321 			if (r)
322 				return r;
323 			continue;
324 		}
325 		r = amdgpu_semaphore_create(adev, &semaphore);
326 		if (r)
327 			return r;
328 
329 		sync->semaphores[count++] = semaphore;
330 
331 		/* allocate enough space for sync command */
332 		r = amdgpu_ring_alloc(other, 16);
333 		if (r)
334 			return r;
335 
336 		/* emit the signal semaphore */
337 		if (!amdgpu_semaphore_emit_signal(other, semaphore)) {
338 			/* signaling wasn't successful wait manually */
339 			amdgpu_ring_undo(other);
340 			r = fence_wait(&fence->base, false);
341 			if (r)
342 				return r;
343 			continue;
344 		}
345 
346 		/* we assume caller has already allocated space on waiters ring */
347 		if (!amdgpu_semaphore_emit_wait(ring, semaphore)) {
348 			/* waiting wasn't successful wait manually */
349 			amdgpu_ring_undo(other);
350 			r = fence_wait(&fence->base, false);
351 			if (r)
352 				return r;
353 			continue;
354 		}
355 
356 		amdgpu_ring_commit(other);
357 		amdgpu_fence_note_sync(fence, ring);
358 	}
359 
360 	return 0;
361 }
362 
363 /**
364  * amdgpu_sync_free - free the sync object
365  *
366  * @adev: amdgpu_device pointer
367  * @sync: sync object to use
368  * @fence: fence to use for the free
369  *
370  * Free the sync object by freeing all semaphores in it.
371  */
372 void amdgpu_sync_free(struct amdgpu_device *adev,
373 		      struct amdgpu_sync *sync,
374 		      struct fence *fence)
375 {
376 	struct amdgpu_sync_entry *e;
377 	struct hlist_node *tmp;
378 	unsigned i;
379 
380 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
381 		hash_del(&e->node);
382 		fence_put(e->fence);
383 		kfree(e);
384 	}
385 
386 	for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
387 		amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
388 
389 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
390 		fence_put(sync->sync_to[i]);
391 
392 	fence_put(sync->last_vm_update);
393 }
394