xref: /openbsd-src/sys/dev/pci/drm/scheduler/sched_fence.c (revision f005ef32267c16bdb134f0e9fa4477dbe07c263a)
1fb4d8502Sjsg /*
2fb4d8502Sjsg  * Copyright 2015 Advanced Micro Devices, Inc.
3fb4d8502Sjsg  *
4fb4d8502Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
5fb4d8502Sjsg  * copy of this software and associated documentation files (the "Software"),
6fb4d8502Sjsg  * to deal in the Software without restriction, including without limitation
7fb4d8502Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8fb4d8502Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
9fb4d8502Sjsg  * Software is furnished to do so, subject to the following conditions:
10fb4d8502Sjsg  *
11fb4d8502Sjsg  * The above copyright notice and this permission notice shall be included in
12fb4d8502Sjsg  * all copies or substantial portions of the Software.
13fb4d8502Sjsg  *
14fb4d8502Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15fb4d8502Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16fb4d8502Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17fb4d8502Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18fb4d8502Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19fb4d8502Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20fb4d8502Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
21fb4d8502Sjsg  *
22fb4d8502Sjsg  */
23fb4d8502Sjsg 
24fb4d8502Sjsg #include <linux/kthread.h>
25c349dbc7Sjsg #include <linux/module.h>
26fb4d8502Sjsg #include <linux/sched.h>
27c349dbc7Sjsg #include <linux/slab.h>
28c349dbc7Sjsg #include <linux/wait.h>
29c349dbc7Sjsg 
30fb4d8502Sjsg #include <drm/gpu_scheduler.h>
31fb4d8502Sjsg 
32c349dbc7Sjsg #include <sys/pool.h>
33c349dbc7Sjsg 
34fb4d8502Sjsg static struct pool sched_fence_slab;
35fb4d8502Sjsg 
drm_sched_fence_slab_init(void)36fb4d8502Sjsg int __init drm_sched_fence_slab_init(void)
37fb4d8502Sjsg {
38fb4d8502Sjsg #ifdef __linux__
39fb4d8502Sjsg 	sched_fence_slab = kmem_cache_create(
40fb4d8502Sjsg 		"drm_sched_fence", sizeof(struct drm_sched_fence), 0,
41fb4d8502Sjsg 		SLAB_HWCACHE_ALIGN, NULL);
42fb4d8502Sjsg 	if (!sched_fence_slab)
43fb4d8502Sjsg 		return -ENOMEM;
44fb4d8502Sjsg #else
45fb4d8502Sjsg 	pool_init(&sched_fence_slab, sizeof(struct drm_sched_fence),
465ca02815Sjsg 	    CACHELINESIZE, IPL_TTY, 0, "drm_sched_fence", NULL);
47fb4d8502Sjsg #endif
48fb4d8502Sjsg 
49fb4d8502Sjsg 	return 0;
50fb4d8502Sjsg }
51fb4d8502Sjsg 
drm_sched_fence_slab_fini(void)52fb4d8502Sjsg void __exit drm_sched_fence_slab_fini(void)
53fb4d8502Sjsg {
54fb4d8502Sjsg 	rcu_barrier();
55fb4d8502Sjsg #ifdef __linux__
56fb4d8502Sjsg 	kmem_cache_destroy(sched_fence_slab);
57fb4d8502Sjsg #else
58fb4d8502Sjsg 	pool_destroy(&sched_fence_slab);
59fb4d8502Sjsg #endif
60fb4d8502Sjsg }
61fb4d8502Sjsg 
drm_sched_fence_set_parent(struct drm_sched_fence * s_fence,struct dma_fence * fence)62*f005ef32Sjsg static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
63*f005ef32Sjsg 				       struct dma_fence *fence)
64fb4d8502Sjsg {
65*f005ef32Sjsg 	/*
66*f005ef32Sjsg 	 * smp_store_release() to ensure another thread racing us
67*f005ef32Sjsg 	 * in drm_sched_fence_set_deadline_finished() sees the
68*f005ef32Sjsg 	 * fence's parent set before test_bit()
69*f005ef32Sjsg 	 */
70*f005ef32Sjsg 	smp_store_release(&s_fence->parent, dma_fence_get(fence));
71*f005ef32Sjsg 	if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT,
72*f005ef32Sjsg 		     &s_fence->finished.flags))
73*f005ef32Sjsg 		dma_fence_set_deadline(fence, s_fence->deadline);
74*f005ef32Sjsg }
75*f005ef32Sjsg 
drm_sched_fence_scheduled(struct drm_sched_fence * fence,struct dma_fence * parent)76*f005ef32Sjsg void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
77*f005ef32Sjsg 			       struct dma_fence *parent)
78*f005ef32Sjsg {
79*f005ef32Sjsg 	/* Set the parent before signaling the scheduled fence, such that,
80*f005ef32Sjsg 	 * any waiter expecting the parent to be filled after the job has
81*f005ef32Sjsg 	 * been scheduled (which is the case for drivers delegating waits
82*f005ef32Sjsg 	 * to some firmware) doesn't have to busy wait for parent to show
83*f005ef32Sjsg 	 * up.
84*f005ef32Sjsg 	 */
85*f005ef32Sjsg 	if (!IS_ERR_OR_NULL(parent))
86*f005ef32Sjsg 		drm_sched_fence_set_parent(fence, parent);
87*f005ef32Sjsg 
881bb76ff1Sjsg 	dma_fence_signal(&fence->scheduled);
89fb4d8502Sjsg }
90fb4d8502Sjsg 
drm_sched_fence_finished(struct drm_sched_fence * fence,int result)91*f005ef32Sjsg void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
92fb4d8502Sjsg {
93*f005ef32Sjsg 	if (result)
94*f005ef32Sjsg 		dma_fence_set_error(&fence->finished, result);
951bb76ff1Sjsg 	dma_fence_signal(&fence->finished);
96fb4d8502Sjsg }
97fb4d8502Sjsg 
drm_sched_fence_get_driver_name(struct dma_fence * fence)98fb4d8502Sjsg static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
99fb4d8502Sjsg {
100fb4d8502Sjsg 	return "drm_sched";
101fb4d8502Sjsg }
102fb4d8502Sjsg 
drm_sched_fence_get_timeline_name(struct dma_fence * f)103fb4d8502Sjsg static const char *drm_sched_fence_get_timeline_name(struct dma_fence *f)
104fb4d8502Sjsg {
105fb4d8502Sjsg 	struct drm_sched_fence *fence = to_drm_sched_fence(f);
106fb4d8502Sjsg 	return (const char *)fence->sched->name;
107fb4d8502Sjsg }
108fb4d8502Sjsg 
drm_sched_fence_free_rcu(struct rcu_head * rcu)1091bb76ff1Sjsg static void drm_sched_fence_free_rcu(struct rcu_head *rcu)
110fb4d8502Sjsg {
111fb4d8502Sjsg 	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
112fb4d8502Sjsg 	struct drm_sched_fence *fence = to_drm_sched_fence(f);
113fb4d8502Sjsg 
1141bb76ff1Sjsg 	if (!WARN_ON_ONCE(!fence)) {
115fb4d8502Sjsg #ifdef __linux__
116fb4d8502Sjsg 		kmem_cache_free(sched_fence_slab, fence);
117fb4d8502Sjsg #else
118fb4d8502Sjsg 		pool_put(&sched_fence_slab, fence);
119fb4d8502Sjsg #endif
120fb4d8502Sjsg 	}
1211bb76ff1Sjsg }
1221bb76ff1Sjsg 
1231bb76ff1Sjsg /**
1241bb76ff1Sjsg  * drm_sched_fence_free - free up an uninitialized fence
1251bb76ff1Sjsg  *
1261bb76ff1Sjsg  * @fence: fence to free
1271bb76ff1Sjsg  *
1281bb76ff1Sjsg  * Free up the fence memory. Should only be used if drm_sched_fence_init()
1291bb76ff1Sjsg  * has not been called yet.
1301bb76ff1Sjsg  */
drm_sched_fence_free(struct drm_sched_fence * fence)1311bb76ff1Sjsg void drm_sched_fence_free(struct drm_sched_fence *fence)
1321bb76ff1Sjsg {
1331bb76ff1Sjsg 	/* This function should not be called if the fence has been initialized. */
1341bb76ff1Sjsg 	if (!WARN_ON_ONCE(fence->sched)) {
1351bb76ff1Sjsg #ifdef __linux__
1361bb76ff1Sjsg 		kmem_cache_free(sched_fence_slab, fence);
1371bb76ff1Sjsg #else
1381bb76ff1Sjsg 		pool_put(&sched_fence_slab, fence);
1391bb76ff1Sjsg #endif
1401bb76ff1Sjsg 	}
1411bb76ff1Sjsg }
142fb4d8502Sjsg 
143fb4d8502Sjsg /**
144fb4d8502Sjsg  * drm_sched_fence_release_scheduled - callback that fence can be freed
145fb4d8502Sjsg  *
146ad8b1aafSjsg  * @f: fence
147fb4d8502Sjsg  *
148fb4d8502Sjsg  * This function is called when the reference count becomes zero.
149fb4d8502Sjsg  * It just RCU schedules freeing up the fence.
150fb4d8502Sjsg  */
drm_sched_fence_release_scheduled(struct dma_fence * f)151fb4d8502Sjsg static void drm_sched_fence_release_scheduled(struct dma_fence *f)
152fb4d8502Sjsg {
153fb4d8502Sjsg 	struct drm_sched_fence *fence = to_drm_sched_fence(f);
154fb4d8502Sjsg 
155fb4d8502Sjsg 	dma_fence_put(fence->parent);
1561bb76ff1Sjsg 	call_rcu(&fence->finished.rcu, drm_sched_fence_free_rcu);
157fb4d8502Sjsg }
158fb4d8502Sjsg 
159fb4d8502Sjsg /**
160fb4d8502Sjsg  * drm_sched_fence_release_finished - drop extra reference
161fb4d8502Sjsg  *
162fb4d8502Sjsg  * @f: fence
163fb4d8502Sjsg  *
164fb4d8502Sjsg  * Drop the extra reference from the scheduled fence to the base fence.
165fb4d8502Sjsg  */
drm_sched_fence_release_finished(struct dma_fence * f)166fb4d8502Sjsg static void drm_sched_fence_release_finished(struct dma_fence *f)
167fb4d8502Sjsg {
168fb4d8502Sjsg 	struct drm_sched_fence *fence = to_drm_sched_fence(f);
169fb4d8502Sjsg 
170fb4d8502Sjsg 	dma_fence_put(&fence->scheduled);
171fb4d8502Sjsg }
172fb4d8502Sjsg 
drm_sched_fence_set_deadline_finished(struct dma_fence * f,ktime_t deadline)173*f005ef32Sjsg static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
174*f005ef32Sjsg 						  ktime_t deadline)
175*f005ef32Sjsg {
176*f005ef32Sjsg 	struct drm_sched_fence *fence = to_drm_sched_fence(f);
177*f005ef32Sjsg 	struct dma_fence *parent;
178*f005ef32Sjsg 	unsigned long flags;
179*f005ef32Sjsg 
180*f005ef32Sjsg 	spin_lock_irqsave(&fence->lock, flags);
181*f005ef32Sjsg 
182*f005ef32Sjsg 	/* If we already have an earlier deadline, keep it: */
183*f005ef32Sjsg 	if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
184*f005ef32Sjsg 	    ktime_before(fence->deadline, deadline)) {
185*f005ef32Sjsg 		spin_unlock_irqrestore(&fence->lock, flags);
186*f005ef32Sjsg 		return;
187*f005ef32Sjsg 	}
188*f005ef32Sjsg 
189*f005ef32Sjsg 	fence->deadline = deadline;
190*f005ef32Sjsg 	set_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
191*f005ef32Sjsg 
192*f005ef32Sjsg 	spin_unlock_irqrestore(&fence->lock, flags);
193*f005ef32Sjsg 
194*f005ef32Sjsg 	/*
195*f005ef32Sjsg 	 * smp_load_aquire() to ensure that if we are racing another
196*f005ef32Sjsg 	 * thread calling drm_sched_fence_set_parent(), that we see
197*f005ef32Sjsg 	 * the parent set before it calls test_bit(HAS_DEADLINE_BIT)
198*f005ef32Sjsg 	 */
199*f005ef32Sjsg 	parent = smp_load_acquire(&fence->parent);
200*f005ef32Sjsg 	if (parent)
201*f005ef32Sjsg 		dma_fence_set_deadline(parent, deadline);
202*f005ef32Sjsg }
203*f005ef32Sjsg 
204c349dbc7Sjsg static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
205fb4d8502Sjsg 	.get_driver_name = drm_sched_fence_get_driver_name,
206fb4d8502Sjsg 	.get_timeline_name = drm_sched_fence_get_timeline_name,
207fb4d8502Sjsg 	.release = drm_sched_fence_release_scheduled,
208fb4d8502Sjsg };
209fb4d8502Sjsg 
210c349dbc7Sjsg static const struct dma_fence_ops drm_sched_fence_ops_finished = {
211fb4d8502Sjsg 	.get_driver_name = drm_sched_fence_get_driver_name,
212fb4d8502Sjsg 	.get_timeline_name = drm_sched_fence_get_timeline_name,
213fb4d8502Sjsg 	.release = drm_sched_fence_release_finished,
214*f005ef32Sjsg 	.set_deadline = drm_sched_fence_set_deadline_finished,
215fb4d8502Sjsg };
216fb4d8502Sjsg 
to_drm_sched_fence(struct dma_fence * f)217fb4d8502Sjsg struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
218fb4d8502Sjsg {
219fb4d8502Sjsg 	if (f->ops == &drm_sched_fence_ops_scheduled)
220fb4d8502Sjsg 		return container_of(f, struct drm_sched_fence, scheduled);
221fb4d8502Sjsg 
222fb4d8502Sjsg 	if (f->ops == &drm_sched_fence_ops_finished)
223fb4d8502Sjsg 		return container_of(f, struct drm_sched_fence, finished);
224fb4d8502Sjsg 
225fb4d8502Sjsg 	return NULL;
226fb4d8502Sjsg }
227fb4d8502Sjsg EXPORT_SYMBOL(to_drm_sched_fence);
228fb4d8502Sjsg 
drm_sched_fence_alloc(struct drm_sched_entity * entity,void * owner)2291bb76ff1Sjsg struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
230fb4d8502Sjsg 					      void *owner)
231fb4d8502Sjsg {
232fb4d8502Sjsg 	struct drm_sched_fence *fence = NULL;
233fb4d8502Sjsg 
234fb4d8502Sjsg #ifdef __linux__
235fb4d8502Sjsg 	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
236fb4d8502Sjsg #else
237fb4d8502Sjsg 	fence = pool_get(&sched_fence_slab, PR_WAITOK | PR_ZERO);
238fb4d8502Sjsg #endif
239fb4d8502Sjsg 	if (fence == NULL)
240fb4d8502Sjsg 		return NULL;
241fb4d8502Sjsg 
242fb4d8502Sjsg 	fence->owner = owner;
243fb4d8502Sjsg 	mtx_init(&fence->lock, IPL_TTY);
244fb4d8502Sjsg 
2451bb76ff1Sjsg 	return fence;
2461bb76ff1Sjsg }
2471bb76ff1Sjsg 
drm_sched_fence_init(struct drm_sched_fence * fence,struct drm_sched_entity * entity)2481bb76ff1Sjsg void drm_sched_fence_init(struct drm_sched_fence *fence,
2491bb76ff1Sjsg 			  struct drm_sched_entity *entity)
2501bb76ff1Sjsg {
2511bb76ff1Sjsg 	unsigned seq;
2521bb76ff1Sjsg 
2531bb76ff1Sjsg 	fence->sched = entity->rq->sched;
254fb4d8502Sjsg 	seq = atomic_inc_return(&entity->fence_seq);
255fb4d8502Sjsg 	dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
256fb4d8502Sjsg 		       &fence->lock, entity->fence_context, seq);
257fb4d8502Sjsg 	dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished,
258fb4d8502Sjsg 		       &fence->lock, entity->fence_context + 1, seq);
259fb4d8502Sjsg }
260fb4d8502Sjsg 
261fb4d8502Sjsg module_init(drm_sched_fence_slab_init);
262fb4d8502Sjsg module_exit(drm_sched_fence_slab_fini);
263fb4d8502Sjsg 
264fb4d8502Sjsg MODULE_DESCRIPTION("DRM GPU scheduler");
265fb4d8502Sjsg MODULE_LICENSE("GPL and additional rights");
266