1c349dbc7Sjsg /*
2c349dbc7Sjsg * SPDX-License-Identifier: MIT
3c349dbc7Sjsg *
4c349dbc7Sjsg * Copyright © 2014-2016 Intel Corporation
5c349dbc7Sjsg */
6c349dbc7Sjsg
7*1bb76ff1Sjsg #include <linux/dma-fence-array.h>
8*1bb76ff1Sjsg
9c349dbc7Sjsg #include "gt/intel_engine.h"
10c349dbc7Sjsg
11c349dbc7Sjsg #include "i915_gem_ioctls.h"
12c349dbc7Sjsg #include "i915_gem_object.h"
13c349dbc7Sjsg
__busy_read_flag(u16 id)14c349dbc7Sjsg static __always_inline u32 __busy_read_flag(u16 id)
15c349dbc7Sjsg {
16c349dbc7Sjsg if (id == (u16)I915_ENGINE_CLASS_INVALID)
17c349dbc7Sjsg return 0xffff0000u;
18c349dbc7Sjsg
19c349dbc7Sjsg GEM_BUG_ON(id >= 16);
20c349dbc7Sjsg return 0x10000u << id;
21c349dbc7Sjsg }
22c349dbc7Sjsg
__busy_write_id(u16 id)23c349dbc7Sjsg static __always_inline u32 __busy_write_id(u16 id)
24c349dbc7Sjsg {
25c349dbc7Sjsg /*
26c349dbc7Sjsg * The uABI guarantees an active writer is also amongst the read
27c349dbc7Sjsg * engines. This would be true if we accessed the activity tracking
28c349dbc7Sjsg * under the lock, but as we perform the lookup of the object and
29c349dbc7Sjsg * its activity locklessly we can not guarantee that the last_write
30c349dbc7Sjsg * being active implies that we have set the same engine flag from
31c349dbc7Sjsg * last_read - hence we always set both read and write busy for
32c349dbc7Sjsg * last_write.
33c349dbc7Sjsg */
34c349dbc7Sjsg if (id == (u16)I915_ENGINE_CLASS_INVALID)
35c349dbc7Sjsg return 0xffffffffu;
36c349dbc7Sjsg
37c349dbc7Sjsg return (id + 1) | __busy_read_flag(id);
38c349dbc7Sjsg }
39c349dbc7Sjsg
40c349dbc7Sjsg static __always_inline unsigned int
__busy_set_if_active(struct dma_fence * fence,u32 (* flag)(u16 id))41*1bb76ff1Sjsg __busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
42c349dbc7Sjsg {
43c349dbc7Sjsg const struct i915_request *rq;
44c349dbc7Sjsg
45c349dbc7Sjsg /*
46c349dbc7Sjsg * We have to check the current hw status of the fence as the uABI
47c349dbc7Sjsg * guarantees forward progress. We could rely on the idle worker
48c349dbc7Sjsg * to eventually flush us, but to minimise latency just ask the
49c349dbc7Sjsg * hardware.
50c349dbc7Sjsg *
51*1bb76ff1Sjsg * Note we only report on the status of native fences and we currently
52*1bb76ff1Sjsg * have two native fences:
53*1bb76ff1Sjsg *
54*1bb76ff1Sjsg * 1. A composite fence (dma_fence_array) constructed of i915 requests
55*1bb76ff1Sjsg * created during a parallel submission. In this case we deconstruct the
56*1bb76ff1Sjsg * composite fence into individual i915 requests and check the status of
57*1bb76ff1Sjsg * each request.
58*1bb76ff1Sjsg *
59*1bb76ff1Sjsg * 2. A single i915 request.
60c349dbc7Sjsg */
61*1bb76ff1Sjsg if (dma_fence_is_array(fence)) {
62*1bb76ff1Sjsg struct dma_fence_array *array = to_dma_fence_array(fence);
63*1bb76ff1Sjsg struct dma_fence **child = array->fences;
64*1bb76ff1Sjsg unsigned int nchild = array->num_fences;
65*1bb76ff1Sjsg
66*1bb76ff1Sjsg do {
67*1bb76ff1Sjsg struct dma_fence *current_fence = *child++;
68*1bb76ff1Sjsg
69*1bb76ff1Sjsg /* Not an i915 fence, can't be busy per above */
70*1bb76ff1Sjsg if (!dma_fence_is_i915(current_fence) ||
71*1bb76ff1Sjsg !test_bit(I915_FENCE_FLAG_COMPOSITE,
72*1bb76ff1Sjsg ¤t_fence->flags)) {
73*1bb76ff1Sjsg return 0;
74*1bb76ff1Sjsg }
75*1bb76ff1Sjsg
76*1bb76ff1Sjsg rq = to_request(current_fence);
77*1bb76ff1Sjsg if (!i915_request_completed(rq))
78*1bb76ff1Sjsg return flag(rq->engine->uabi_class);
79*1bb76ff1Sjsg } while (--nchild);
80*1bb76ff1Sjsg
81*1bb76ff1Sjsg /* All requests in array complete, not busy */
82*1bb76ff1Sjsg return 0;
83*1bb76ff1Sjsg } else {
84c349dbc7Sjsg if (!dma_fence_is_i915(fence))
85c349dbc7Sjsg return 0;
86c349dbc7Sjsg
87*1bb76ff1Sjsg rq = to_request(fence);
88c349dbc7Sjsg if (i915_request_completed(rq))
89c349dbc7Sjsg return 0;
90c349dbc7Sjsg
91c349dbc7Sjsg /* Beware type-expansion follies! */
92c349dbc7Sjsg BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
93c349dbc7Sjsg return flag(rq->engine->uabi_class);
94c349dbc7Sjsg }
95*1bb76ff1Sjsg }
96c349dbc7Sjsg
97c349dbc7Sjsg static __always_inline unsigned int
busy_check_reader(struct dma_fence * fence)98*1bb76ff1Sjsg busy_check_reader(struct dma_fence *fence)
99c349dbc7Sjsg {
100c349dbc7Sjsg return __busy_set_if_active(fence, __busy_read_flag);
101c349dbc7Sjsg }
102c349dbc7Sjsg
103c349dbc7Sjsg static __always_inline unsigned int
busy_check_writer(struct dma_fence * fence)104*1bb76ff1Sjsg busy_check_writer(struct dma_fence *fence)
105c349dbc7Sjsg {
106c349dbc7Sjsg if (!fence)
107c349dbc7Sjsg return 0;
108c349dbc7Sjsg
109c349dbc7Sjsg return __busy_set_if_active(fence, __busy_write_id);
110c349dbc7Sjsg }
111c349dbc7Sjsg
112c349dbc7Sjsg int
i915_gem_busy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)113c349dbc7Sjsg i915_gem_busy_ioctl(struct drm_device *dev, void *data,
114c349dbc7Sjsg struct drm_file *file)
115c349dbc7Sjsg {
116c349dbc7Sjsg struct drm_i915_gem_busy *args = data;
117c349dbc7Sjsg struct drm_i915_gem_object *obj;
118*1bb76ff1Sjsg struct dma_resv_iter cursor;
119*1bb76ff1Sjsg struct dma_fence *fence;
120c349dbc7Sjsg int err;
121c349dbc7Sjsg
122c349dbc7Sjsg err = -ENOENT;
123c349dbc7Sjsg rcu_read_lock();
124c349dbc7Sjsg obj = i915_gem_object_lookup_rcu(file, args->handle);
125c349dbc7Sjsg if (!obj)
126c349dbc7Sjsg goto out;
127c349dbc7Sjsg
128c349dbc7Sjsg /*
129c349dbc7Sjsg * A discrepancy here is that we do not report the status of
130c349dbc7Sjsg * non-i915 fences, i.e. even though we may report the object as idle,
131c349dbc7Sjsg * a call to set-domain may still stall waiting for foreign rendering.
132c349dbc7Sjsg * This also means that wait-ioctl may report an object as busy,
133c349dbc7Sjsg * where busy-ioctl considers it idle.
134c349dbc7Sjsg *
135c349dbc7Sjsg * We trade the ability to warn of foreign fences to report on which
136c349dbc7Sjsg * i915 engines are active for the object.
137c349dbc7Sjsg *
138c349dbc7Sjsg * Alternatively, we can trade that extra information on read/write
139c349dbc7Sjsg * activity with
140c349dbc7Sjsg * args->busy =
141*1bb76ff1Sjsg * !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ);
142c349dbc7Sjsg * to report the overall busyness. This is what the wait-ioctl does.
143c349dbc7Sjsg *
144c349dbc7Sjsg */
145*1bb76ff1Sjsg args->busy = 0;
146*1bb76ff1Sjsg dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_READ);
147*1bb76ff1Sjsg dma_resv_for_each_fence_unlocked(&cursor, fence) {
148*1bb76ff1Sjsg if (dma_resv_iter_is_restarted(&cursor))
149*1bb76ff1Sjsg args->busy = 0;
150c349dbc7Sjsg
151*1bb76ff1Sjsg if (dma_resv_iter_usage(&cursor) <= DMA_RESV_USAGE_WRITE)
152*1bb76ff1Sjsg /* Translate the write fences to the READ *and* WRITE engine */
153*1bb76ff1Sjsg args->busy |= busy_check_writer(fence);
154*1bb76ff1Sjsg else
155*1bb76ff1Sjsg /* Translate read fences to READ set of engines */
156c349dbc7Sjsg args->busy |= busy_check_reader(fence);
157c349dbc7Sjsg }
158*1bb76ff1Sjsg dma_resv_iter_end(&cursor);
159c349dbc7Sjsg
160c349dbc7Sjsg err = 0;
161c349dbc7Sjsg out:
162c349dbc7Sjsg rcu_read_unlock();
163c349dbc7Sjsg return err;
164c349dbc7Sjsg }
165