1fb4d8502Sjsg /*
2fb4d8502Sjsg * Copyright 2015 Advanced Micro Devices, Inc.
3fb4d8502Sjsg *
4fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a
5fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"),
6fb4d8502Sjsg * to deal in the Software without restriction, including without limitation
7fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the
9fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions:
10fb4d8502Sjsg *
11fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in
12fb4d8502Sjsg * all copies or substantial portions of the Software.
13fb4d8502Sjsg *
14fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE.
21fb4d8502Sjsg *
22fb4d8502Sjsg */
23fb4d8502Sjsg
24fb4d8502Sjsg #include <linux/kthread.h>
25c349dbc7Sjsg #include <linux/module.h>
26fb4d8502Sjsg #include <linux/sched.h>
27c349dbc7Sjsg #include <linux/slab.h>
28c349dbc7Sjsg #include <linux/wait.h>
29c349dbc7Sjsg
30fb4d8502Sjsg #include <drm/gpu_scheduler.h>
31fb4d8502Sjsg
32c349dbc7Sjsg #include <sys/pool.h>
33c349dbc7Sjsg
34fb4d8502Sjsg static struct pool sched_fence_slab;
35fb4d8502Sjsg
drm_sched_fence_slab_init(void)36fb4d8502Sjsg int __init drm_sched_fence_slab_init(void)
37fb4d8502Sjsg {
38fb4d8502Sjsg #ifdef __linux__
39fb4d8502Sjsg sched_fence_slab = kmem_cache_create(
40fb4d8502Sjsg "drm_sched_fence", sizeof(struct drm_sched_fence), 0,
41fb4d8502Sjsg SLAB_HWCACHE_ALIGN, NULL);
42fb4d8502Sjsg if (!sched_fence_slab)
43fb4d8502Sjsg return -ENOMEM;
44fb4d8502Sjsg #else
45fb4d8502Sjsg pool_init(&sched_fence_slab, sizeof(struct drm_sched_fence),
465ca02815Sjsg CACHELINESIZE, IPL_TTY, 0, "drm_sched_fence", NULL);
47fb4d8502Sjsg #endif
48fb4d8502Sjsg
49fb4d8502Sjsg return 0;
50fb4d8502Sjsg }
51fb4d8502Sjsg
drm_sched_fence_slab_fini(void)52fb4d8502Sjsg void __exit drm_sched_fence_slab_fini(void)
53fb4d8502Sjsg {
54fb4d8502Sjsg rcu_barrier();
55fb4d8502Sjsg #ifdef __linux__
56fb4d8502Sjsg kmem_cache_destroy(sched_fence_slab);
57fb4d8502Sjsg #else
58fb4d8502Sjsg pool_destroy(&sched_fence_slab);
59fb4d8502Sjsg #endif
60fb4d8502Sjsg }
61fb4d8502Sjsg
drm_sched_fence_set_parent(struct drm_sched_fence * s_fence,struct dma_fence * fence)62*f005ef32Sjsg static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
63*f005ef32Sjsg struct dma_fence *fence)
64fb4d8502Sjsg {
65*f005ef32Sjsg /*
66*f005ef32Sjsg * smp_store_release() to ensure another thread racing us
67*f005ef32Sjsg * in drm_sched_fence_set_deadline_finished() sees the
68*f005ef32Sjsg * fence's parent set before test_bit()
69*f005ef32Sjsg */
70*f005ef32Sjsg smp_store_release(&s_fence->parent, dma_fence_get(fence));
71*f005ef32Sjsg if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT,
72*f005ef32Sjsg &s_fence->finished.flags))
73*f005ef32Sjsg dma_fence_set_deadline(fence, s_fence->deadline);
74*f005ef32Sjsg }
75*f005ef32Sjsg
drm_sched_fence_scheduled(struct drm_sched_fence * fence,struct dma_fence * parent)76*f005ef32Sjsg void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
77*f005ef32Sjsg struct dma_fence *parent)
78*f005ef32Sjsg {
79*f005ef32Sjsg /* Set the parent before signaling the scheduled fence, such that,
80*f005ef32Sjsg * any waiter expecting the parent to be filled after the job has
81*f005ef32Sjsg * been scheduled (which is the case for drivers delegating waits
82*f005ef32Sjsg * to some firmware) doesn't have to busy wait for parent to show
83*f005ef32Sjsg * up.
84*f005ef32Sjsg */
85*f005ef32Sjsg if (!IS_ERR_OR_NULL(parent))
86*f005ef32Sjsg drm_sched_fence_set_parent(fence, parent);
87*f005ef32Sjsg
881bb76ff1Sjsg dma_fence_signal(&fence->scheduled);
89fb4d8502Sjsg }
90fb4d8502Sjsg
drm_sched_fence_finished(struct drm_sched_fence * fence,int result)91*f005ef32Sjsg void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
92fb4d8502Sjsg {
93*f005ef32Sjsg if (result)
94*f005ef32Sjsg dma_fence_set_error(&fence->finished, result);
951bb76ff1Sjsg dma_fence_signal(&fence->finished);
96fb4d8502Sjsg }
97fb4d8502Sjsg
drm_sched_fence_get_driver_name(struct dma_fence * fence)98fb4d8502Sjsg static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
99fb4d8502Sjsg {
100fb4d8502Sjsg return "drm_sched";
101fb4d8502Sjsg }
102fb4d8502Sjsg
drm_sched_fence_get_timeline_name(struct dma_fence * f)103fb4d8502Sjsg static const char *drm_sched_fence_get_timeline_name(struct dma_fence *f)
104fb4d8502Sjsg {
105fb4d8502Sjsg struct drm_sched_fence *fence = to_drm_sched_fence(f);
106fb4d8502Sjsg return (const char *)fence->sched->name;
107fb4d8502Sjsg }
108fb4d8502Sjsg
drm_sched_fence_free_rcu(struct rcu_head * rcu)1091bb76ff1Sjsg static void drm_sched_fence_free_rcu(struct rcu_head *rcu)
110fb4d8502Sjsg {
111fb4d8502Sjsg struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
112fb4d8502Sjsg struct drm_sched_fence *fence = to_drm_sched_fence(f);
113fb4d8502Sjsg
1141bb76ff1Sjsg if (!WARN_ON_ONCE(!fence)) {
115fb4d8502Sjsg #ifdef __linux__
116fb4d8502Sjsg kmem_cache_free(sched_fence_slab, fence);
117fb4d8502Sjsg #else
118fb4d8502Sjsg pool_put(&sched_fence_slab, fence);
119fb4d8502Sjsg #endif
120fb4d8502Sjsg }
1211bb76ff1Sjsg }
1221bb76ff1Sjsg
1231bb76ff1Sjsg /**
1241bb76ff1Sjsg * drm_sched_fence_free - free up an uninitialized fence
1251bb76ff1Sjsg *
1261bb76ff1Sjsg * @fence: fence to free
1271bb76ff1Sjsg *
1281bb76ff1Sjsg * Free up the fence memory. Should only be used if drm_sched_fence_init()
1291bb76ff1Sjsg * has not been called yet.
1301bb76ff1Sjsg */
drm_sched_fence_free(struct drm_sched_fence * fence)1311bb76ff1Sjsg void drm_sched_fence_free(struct drm_sched_fence *fence)
1321bb76ff1Sjsg {
1331bb76ff1Sjsg /* This function should not be called if the fence has been initialized. */
1341bb76ff1Sjsg if (!WARN_ON_ONCE(fence->sched)) {
1351bb76ff1Sjsg #ifdef __linux__
1361bb76ff1Sjsg kmem_cache_free(sched_fence_slab, fence);
1371bb76ff1Sjsg #else
1381bb76ff1Sjsg pool_put(&sched_fence_slab, fence);
1391bb76ff1Sjsg #endif
1401bb76ff1Sjsg }
1411bb76ff1Sjsg }
142fb4d8502Sjsg
143fb4d8502Sjsg /**
144fb4d8502Sjsg * drm_sched_fence_release_scheduled - callback that fence can be freed
145fb4d8502Sjsg *
146ad8b1aafSjsg * @f: fence
147fb4d8502Sjsg *
148fb4d8502Sjsg * This function is called when the reference count becomes zero.
149fb4d8502Sjsg * It just RCU schedules freeing up the fence.
150fb4d8502Sjsg */
drm_sched_fence_release_scheduled(struct dma_fence * f)151fb4d8502Sjsg static void drm_sched_fence_release_scheduled(struct dma_fence *f)
152fb4d8502Sjsg {
153fb4d8502Sjsg struct drm_sched_fence *fence = to_drm_sched_fence(f);
154fb4d8502Sjsg
155fb4d8502Sjsg dma_fence_put(fence->parent);
1561bb76ff1Sjsg call_rcu(&fence->finished.rcu, drm_sched_fence_free_rcu);
157fb4d8502Sjsg }
158fb4d8502Sjsg
159fb4d8502Sjsg /**
160fb4d8502Sjsg * drm_sched_fence_release_finished - drop extra reference
161fb4d8502Sjsg *
162fb4d8502Sjsg * @f: fence
163fb4d8502Sjsg *
164fb4d8502Sjsg * Drop the extra reference from the scheduled fence to the base fence.
165fb4d8502Sjsg */
drm_sched_fence_release_finished(struct dma_fence * f)166fb4d8502Sjsg static void drm_sched_fence_release_finished(struct dma_fence *f)
167fb4d8502Sjsg {
168fb4d8502Sjsg struct drm_sched_fence *fence = to_drm_sched_fence(f);
169fb4d8502Sjsg
170fb4d8502Sjsg dma_fence_put(&fence->scheduled);
171fb4d8502Sjsg }
172fb4d8502Sjsg
drm_sched_fence_set_deadline_finished(struct dma_fence * f,ktime_t deadline)173*f005ef32Sjsg static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
174*f005ef32Sjsg ktime_t deadline)
175*f005ef32Sjsg {
176*f005ef32Sjsg struct drm_sched_fence *fence = to_drm_sched_fence(f);
177*f005ef32Sjsg struct dma_fence *parent;
178*f005ef32Sjsg unsigned long flags;
179*f005ef32Sjsg
180*f005ef32Sjsg spin_lock_irqsave(&fence->lock, flags);
181*f005ef32Sjsg
182*f005ef32Sjsg /* If we already have an earlier deadline, keep it: */
183*f005ef32Sjsg if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
184*f005ef32Sjsg ktime_before(fence->deadline, deadline)) {
185*f005ef32Sjsg spin_unlock_irqrestore(&fence->lock, flags);
186*f005ef32Sjsg return;
187*f005ef32Sjsg }
188*f005ef32Sjsg
189*f005ef32Sjsg fence->deadline = deadline;
190*f005ef32Sjsg set_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
191*f005ef32Sjsg
192*f005ef32Sjsg spin_unlock_irqrestore(&fence->lock, flags);
193*f005ef32Sjsg
194*f005ef32Sjsg /*
195*f005ef32Sjsg * smp_load_aquire() to ensure that if we are racing another
196*f005ef32Sjsg * thread calling drm_sched_fence_set_parent(), that we see
197*f005ef32Sjsg * the parent set before it calls test_bit(HAS_DEADLINE_BIT)
198*f005ef32Sjsg */
199*f005ef32Sjsg parent = smp_load_acquire(&fence->parent);
200*f005ef32Sjsg if (parent)
201*f005ef32Sjsg dma_fence_set_deadline(parent, deadline);
202*f005ef32Sjsg }
203*f005ef32Sjsg
204c349dbc7Sjsg static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
205fb4d8502Sjsg .get_driver_name = drm_sched_fence_get_driver_name,
206fb4d8502Sjsg .get_timeline_name = drm_sched_fence_get_timeline_name,
207fb4d8502Sjsg .release = drm_sched_fence_release_scheduled,
208fb4d8502Sjsg };
209fb4d8502Sjsg
210c349dbc7Sjsg static const struct dma_fence_ops drm_sched_fence_ops_finished = {
211fb4d8502Sjsg .get_driver_name = drm_sched_fence_get_driver_name,
212fb4d8502Sjsg .get_timeline_name = drm_sched_fence_get_timeline_name,
213fb4d8502Sjsg .release = drm_sched_fence_release_finished,
214*f005ef32Sjsg .set_deadline = drm_sched_fence_set_deadline_finished,
215fb4d8502Sjsg };
216fb4d8502Sjsg
to_drm_sched_fence(struct dma_fence * f)217fb4d8502Sjsg struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
218fb4d8502Sjsg {
219fb4d8502Sjsg if (f->ops == &drm_sched_fence_ops_scheduled)
220fb4d8502Sjsg return container_of(f, struct drm_sched_fence, scheduled);
221fb4d8502Sjsg
222fb4d8502Sjsg if (f->ops == &drm_sched_fence_ops_finished)
223fb4d8502Sjsg return container_of(f, struct drm_sched_fence, finished);
224fb4d8502Sjsg
225fb4d8502Sjsg return NULL;
226fb4d8502Sjsg }
227fb4d8502Sjsg EXPORT_SYMBOL(to_drm_sched_fence);
228fb4d8502Sjsg
drm_sched_fence_alloc(struct drm_sched_entity * entity,void * owner)2291bb76ff1Sjsg struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
230fb4d8502Sjsg void *owner)
231fb4d8502Sjsg {
232fb4d8502Sjsg struct drm_sched_fence *fence = NULL;
233fb4d8502Sjsg
234fb4d8502Sjsg #ifdef __linux__
235fb4d8502Sjsg fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
236fb4d8502Sjsg #else
237fb4d8502Sjsg fence = pool_get(&sched_fence_slab, PR_WAITOK | PR_ZERO);
238fb4d8502Sjsg #endif
239fb4d8502Sjsg if (fence == NULL)
240fb4d8502Sjsg return NULL;
241fb4d8502Sjsg
242fb4d8502Sjsg fence->owner = owner;
243fb4d8502Sjsg mtx_init(&fence->lock, IPL_TTY);
244fb4d8502Sjsg
2451bb76ff1Sjsg return fence;
2461bb76ff1Sjsg }
2471bb76ff1Sjsg
drm_sched_fence_init(struct drm_sched_fence * fence,struct drm_sched_entity * entity)2481bb76ff1Sjsg void drm_sched_fence_init(struct drm_sched_fence *fence,
2491bb76ff1Sjsg struct drm_sched_entity *entity)
2501bb76ff1Sjsg {
2511bb76ff1Sjsg unsigned seq;
2521bb76ff1Sjsg
2531bb76ff1Sjsg fence->sched = entity->rq->sched;
254fb4d8502Sjsg seq = atomic_inc_return(&entity->fence_seq);
255fb4d8502Sjsg dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
256fb4d8502Sjsg &fence->lock, entity->fence_context, seq);
257fb4d8502Sjsg dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished,
258fb4d8502Sjsg &fence->lock, entity->fence_context + 1, seq);
259fb4d8502Sjsg }
260fb4d8502Sjsg
261fb4d8502Sjsg module_init(drm_sched_fence_slab_init);
262fb4d8502Sjsg module_exit(drm_sched_fence_slab_fini);
263fb4d8502Sjsg
264fb4d8502Sjsg MODULE_DESCRIPTION("DRM GPU scheduler");
265fb4d8502Sjsg MODULE_LICENSE("GPL and additional rights");
266