xref: /openbsd-src/sys/dev/pci/drm/amd/amdgpu/amdgpu_sync.c (revision 24bb5fcea3ed904bc467217bdaadb5dfc618d5bf)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Christian König <christian.koenig@amd.com>
29  */
30 
31 #include "amdgpu.h"
32 #include "amdgpu_trace.h"
33 #include "amdgpu_amdkfd.h"
34 
35 struct amdgpu_sync_entry {
36 	struct hlist_node	node;
37 	struct dma_fence	*fence;
38 };
39 
40 static struct pool amdgpu_sync_slab;
41 
42 /**
43  * amdgpu_sync_create - zero init sync object
44  *
45  * @sync: sync object to initialize
46  *
47  * Just clear the sync object for now.
48  */
49 void amdgpu_sync_create(struct amdgpu_sync *sync)
50 {
51 	hash_init(sync->fences);
52 	sync->last_vm_update = NULL;
53 }
54 
55 /**
56  * amdgpu_sync_same_dev - test if fence belong to us
57  *
58  * @adev: amdgpu device to use for the test
59  * @f: fence to test
60  *
61  * Test if the fence was issued by us.
62  */
63 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
64 				 struct dma_fence *f)
65 {
66 	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
67 
68 	if (s_fence) {
69 		struct amdgpu_ring *ring;
70 
71 		ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
72 		return ring->adev == adev;
73 	}
74 
75 	return false;
76 }
77 
78 /**
79  * amdgpu_sync_get_owner - extract the owner of a fence
80  *
81  * @fence: fence get the owner from
82  *
83  * Extract who originally created the fence.
84  */
85 static void *amdgpu_sync_get_owner(struct dma_fence *f)
86 {
87 	struct drm_sched_fence *s_fence;
88 	struct amdgpu_amdkfd_fence *kfd_fence;
89 
90 	if (!f)
91 		return AMDGPU_FENCE_OWNER_UNDEFINED;
92 
93 	s_fence = to_drm_sched_fence(f);
94 	if (s_fence)
95 		return s_fence->owner;
96 
97 	kfd_fence = to_amdgpu_amdkfd_fence(f);
98 	if (kfd_fence)
99 		return AMDGPU_FENCE_OWNER_KFD;
100 
101 	return AMDGPU_FENCE_OWNER_UNDEFINED;
102 }
103 
104 /**
105  * amdgpu_sync_keep_later - Keep the later fence
106  *
107  * @keep: existing fence to test
108  * @fence: new fence
109  *
110  * Either keep the existing fence or the new one, depending which one is later.
111  */
112 static void amdgpu_sync_keep_later(struct dma_fence **keep,
113 				   struct dma_fence *fence)
114 {
115 	if (*keep && dma_fence_is_later(*keep, fence))
116 		return;
117 
118 	dma_fence_put(*keep);
119 	*keep = dma_fence_get(fence);
120 }
121 
122 /**
123  * amdgpu_sync_add_later - add the fence to the hash
124  *
125  * @sync: sync object to add the fence to
126  * @f: fence to add
127  *
128  * Tries to add the fence to an existing hash entry. Returns true when an entry
129  * was found, false otherwise.
130  */
131 static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
132 {
133 	struct amdgpu_sync_entry *e;
134 
135 	hash_for_each_possible(sync->fences, e, node, f->context) {
136 		if (unlikely(e->fence->context != f->context))
137 			continue;
138 
139 		amdgpu_sync_keep_later(&e->fence, f);
140 		return true;
141 	}
142 	return false;
143 }
144 
145 /**
146  * amdgpu_sync_fence - remember to sync to this fence
147  *
148  * @sync: sync object to add fence to
149  * @f: fence to sync to
150  *
151  * Add the fence to the sync object.
152  */
153 int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
154 {
155 	struct amdgpu_sync_entry *e;
156 
157 	if (!f)
158 		return 0;
159 
160 	if (amdgpu_sync_add_later(sync, f))
161 		return 0;
162 
163 #ifdef __linux__
164 	e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
165 #else
166 	e = pool_get(&amdgpu_sync_slab, PR_WAITOK);
167 #endif
168 	if (!e)
169 		return -ENOMEM;
170 
171 	hash_add(sync->fences, &e->node, f->context);
172 	e->fence = dma_fence_get(f);
173 	return 0;
174 }
175 
176 /**
177  * amdgpu_sync_vm_fence - remember to sync to this VM fence
178  *
179  * @adev: amdgpu device
180  * @sync: sync object to add fence to
181  * @fence: the VM fence to add
182  *
183  * Add the fence to the sync object and remember it as VM update.
184  */
185 int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
186 {
187 	if (!fence)
188 		return 0;
189 
190 	amdgpu_sync_keep_later(&sync->last_vm_update, fence);
191 	return amdgpu_sync_fence(sync, fence);
192 }
193 
194 /**
195  * amdgpu_sync_resv - sync to a reservation object
196  *
197  * @sync: sync object to add fences from reservation object to
198  * @resv: reservation object with embedded fence
199  * @mode: how owner affects which fences we sync to
200  * @owner: owner of the planned job submission
201  *
202  * Sync to the fence
203  */
204 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
205 		     struct dma_resv *resv, enum amdgpu_sync_mode mode,
206 		     void *owner)
207 {
208 	struct dma_resv_list *flist;
209 	struct dma_fence *f;
210 	unsigned i;
211 	int r = 0;
212 
213 	if (resv == NULL)
214 		return -EINVAL;
215 
216 	/* always sync to the exclusive fence */
217 	f = dma_resv_get_excl(resv);
218 	r = amdgpu_sync_fence(sync, f);
219 
220 	flist = dma_resv_get_list(resv);
221 	if (!flist || r)
222 		return r;
223 
224 	for (i = 0; i < flist->shared_count; ++i) {
225 		void *fence_owner;
226 
227 		f = rcu_dereference_protected(flist->shared[i],
228 					      dma_resv_held(resv));
229 
230 		fence_owner = amdgpu_sync_get_owner(f);
231 
232 		/* Always sync to moves, no matter what */
233 		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
234 			r = amdgpu_sync_fence(sync, f);
235 			if (r)
236 				break;
237 		}
238 
239 		/* We only want to trigger KFD eviction fences on
240 		 * evict or move jobs. Skip KFD fences otherwise.
241 		 */
242 		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
243 		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
244 			continue;
245 
246 		/* Never sync to VM updates either. */
247 		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
248 		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
249 			continue;
250 
251 		/* Ignore fences depending on the sync mode */
252 		switch (mode) {
253 		case AMDGPU_SYNC_ALWAYS:
254 			break;
255 
256 		case AMDGPU_SYNC_NE_OWNER:
257 			if (amdgpu_sync_same_dev(adev, f) &&
258 			    fence_owner == owner)
259 				continue;
260 			break;
261 
262 		case AMDGPU_SYNC_EQ_OWNER:
263 			if (amdgpu_sync_same_dev(adev, f) &&
264 			    fence_owner != owner)
265 				continue;
266 			break;
267 
268 		case AMDGPU_SYNC_EXPLICIT:
269 			continue;
270 		}
271 
272 		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
273 		     "Adding eviction fence to sync obj");
274 		r = amdgpu_sync_fence(sync, f);
275 		if (r)
276 			break;
277 	}
278 	return r;
279 }
280 
281 /**
282  * amdgpu_sync_peek_fence - get the next fence not signaled yet
283  *
284  * @sync: the sync object
285  * @ring: optional ring to use for test
286  *
287  * Returns the next fence not signaled yet without removing it from the sync
288  * object.
289  */
290 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
291 					 struct amdgpu_ring *ring)
292 {
293 	struct amdgpu_sync_entry *e;
294 	struct hlist_node *tmp;
295 	int i;
296 
297 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
298 		struct dma_fence *f = e->fence;
299 		struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
300 
301 		if (dma_fence_is_signaled(f)) {
302 			hash_del(&e->node);
303 			dma_fence_put(f);
304 #ifdef __linux__
305 			kmem_cache_free(amdgpu_sync_slab, e);
306 #else
307 			pool_put(&amdgpu_sync_slab, e);
308 #endif
309 			continue;
310 		}
311 		if (ring && s_fence) {
312 			/* For fences from the same ring it is sufficient
313 			 * when they are scheduled.
314 			 */
315 			if (s_fence->sched == &ring->sched) {
316 				if (dma_fence_is_signaled(&s_fence->scheduled))
317 					continue;
318 
319 				return &s_fence->scheduled;
320 			}
321 		}
322 
323 		return f;
324 	}
325 
326 	return NULL;
327 }
328 
329 /**
330  * amdgpu_sync_get_fence - get the next fence from the sync object
331  *
332  * @sync: sync object to use
333  *
334  * Get and removes the next fence from the sync object not signaled yet.
335  */
336 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
337 {
338 	struct amdgpu_sync_entry *e;
339 	struct hlist_node *tmp;
340 	struct dma_fence *f;
341 	int i;
342 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
343 
344 		f = e->fence;
345 
346 		hash_del(&e->node);
347 #ifdef __linux__
348 		kmem_cache_free(amdgpu_sync_slab, e);
349 #else
350 		pool_put(&amdgpu_sync_slab, e);
351 #endif
352 
353 		if (!dma_fence_is_signaled(f))
354 			return f;
355 
356 		dma_fence_put(f);
357 	}
358 	return NULL;
359 }
360 
361 /**
362  * amdgpu_sync_clone - clone a sync object
363  *
364  * @source: sync object to clone
365  * @clone: pointer to destination sync object
366  *
367  * Adds references to all unsignaled fences in @source to @clone. Also
368  * removes signaled fences from @source while at it.
369  */
370 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
371 {
372 	struct amdgpu_sync_entry *e;
373 	struct hlist_node *tmp;
374 	struct dma_fence *f;
375 	int i, r;
376 
377 	hash_for_each_safe(source->fences, i, tmp, e, node) {
378 		f = e->fence;
379 		if (!dma_fence_is_signaled(f)) {
380 			r = amdgpu_sync_fence(clone, f);
381 			if (r)
382 				return r;
383 		} else {
384 			hash_del(&e->node);
385 			dma_fence_put(f);
386 #ifdef __linux__
387 			kmem_cache_free(amdgpu_sync_slab, e);
388 #else
389 			pool_put(&amdgpu_sync_slab, e);
390 #endif
391 		}
392 	}
393 
394 	dma_fence_put(clone->last_vm_update);
395 	clone->last_vm_update = dma_fence_get(source->last_vm_update);
396 
397 	return 0;
398 }
399 
400 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
401 {
402 	struct amdgpu_sync_entry *e;
403 	struct hlist_node *tmp;
404 	int i, r;
405 
406 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
407 		r = dma_fence_wait(e->fence, intr);
408 		if (r)
409 			return r;
410 
411 		hash_del(&e->node);
412 		dma_fence_put(e->fence);
413 #ifdef __linux__
414 		kmem_cache_free(amdgpu_sync_slab, e);
415 #else
416 		pool_put(&amdgpu_sync_slab, e);
417 #endif
418 	}
419 
420 	return 0;
421 }
422 
423 /**
424  * amdgpu_sync_free - free the sync object
425  *
426  * @sync: sync object to use
427  *
428  * Free the sync object.
429  */
430 void amdgpu_sync_free(struct amdgpu_sync *sync)
431 {
432 	struct amdgpu_sync_entry *e;
433 	struct hlist_node *tmp;
434 	unsigned i;
435 
436 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
437 		hash_del(&e->node);
438 		dma_fence_put(e->fence);
439 #ifdef __linux__
440 		kmem_cache_free(amdgpu_sync_slab, e);
441 #else
442 		pool_put(&amdgpu_sync_slab, e);
443 #endif
444 	}
445 
446 	dma_fence_put(sync->last_vm_update);
447 }
448 
449 /**
450  * amdgpu_sync_init - init sync object subsystem
451  *
452  * Allocate the slab allocator.
453  */
454 int amdgpu_sync_init(void)
455 {
456 #ifdef __linux__
457 	amdgpu_sync_slab = kmem_cache_create(
458 		"amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
459 		SLAB_HWCACHE_ALIGN, NULL);
460 	if (!amdgpu_sync_slab)
461 		return -ENOMEM;
462 #else
463 	pool_init(&amdgpu_sync_slab, sizeof(struct amdgpu_sync_entry),
464 	    CACHELINESIZE, IPL_TTY, 0, "amdgpu_sync", NULL);
465 #endif
466 
467 	return 0;
468 }
469 
470 /**
471  * amdgpu_sync_fini - fini sync object subsystem
472  *
473  * Free the slab allocator.
474  */
475 void amdgpu_sync_fini(void)
476 {
477 #ifdef __linux__
478 	kmem_cache_destroy(amdgpu_sync_slab);
479 #else
480 	pool_destroy(&amdgpu_sync_slab);
481 #endif
482 }
483