1 /* $NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */
2
3 // SPDX-License-Identifier: MIT
4 /*
5 * Copyright © 2019 Intel Corporation
6 */
7
8 #include <sys/cdefs.h>
9 __KERNEL_RCSID(0, "$NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
10
11 #include "i915_drv.h"
12 #include "gt/intel_context.h"
13 #include "gt/intel_engine_pm.h"
14 #include "gt/intel_engine_pool.h"
15 #include "gt/intel_gt.h"
16 #include "gt/intel_ring.h"
17 #include "i915_gem_clflush.h"
18 #include "i915_gem_object_blt.h"
19
intel_emit_vma_fill_blt(struct intel_context * ce,struct i915_vma * vma,u32 value)20 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
21 struct i915_vma *vma,
22 u32 value)
23 {
24 struct drm_i915_private *i915 = ce->vm->i915;
25 const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
26 struct intel_engine_pool_node *pool;
27 struct i915_vma *batch;
28 u64 offset;
29 u64 count;
30 u64 rem;
31 u32 size;
32 u32 *cmd;
33 int err;
34
35 GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
36 intel_engine_pm_get(ce->engine);
37
38 count = div_u64(round_up(vma->size, block_size), block_size);
39 size = (1 + 8 * count) * sizeof(u32);
40 size = round_up(size, PAGE_SIZE);
41 pool = intel_engine_get_pool(ce->engine, size);
42 if (IS_ERR(pool)) {
43 err = PTR_ERR(pool);
44 goto out_pm;
45 }
46
47 cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
48 if (IS_ERR(cmd)) {
49 err = PTR_ERR(cmd);
50 goto out_put;
51 }
52
53 rem = vma->size;
54 offset = vma->node.start;
55
56 do {
57 u32 size = min_t(u64, rem, block_size);
58
59 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
60
61 if (INTEL_GEN(i915) >= 8) {
62 *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
63 *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
64 *cmd++ = 0;
65 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
66 *cmd++ = lower_32_bits(offset);
67 *cmd++ = upper_32_bits(offset);
68 *cmd++ = value;
69 } else {
70 *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
71 *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
72 *cmd++ = 0;
73 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
74 *cmd++ = offset;
75 *cmd++ = value;
76 }
77
78 /* Allow ourselves to be preempted in between blocks. */
79 *cmd++ = MI_ARB_CHECK;
80
81 offset += size;
82 rem -= size;
83 } while (rem);
84
85 *cmd = MI_BATCH_BUFFER_END;
86 intel_gt_chipset_flush(ce->vm->gt);
87
88 i915_gem_object_unpin_map(pool->obj);
89
90 batch = i915_vma_instance(pool->obj, ce->vm, NULL);
91 if (IS_ERR(batch)) {
92 err = PTR_ERR(batch);
93 goto out_put;
94 }
95
96 err = i915_vma_pin(batch, 0, 0, PIN_USER);
97 if (unlikely(err))
98 goto out_put;
99
100 batch->private = pool;
101 return batch;
102
103 out_put:
104 intel_engine_pool_put(pool);
105 out_pm:
106 intel_engine_pm_put(ce->engine);
107 return ERR_PTR(err);
108 }
109
intel_emit_vma_mark_active(struct i915_vma * vma,struct i915_request * rq)110 int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
111 {
112 int err;
113
114 i915_vma_lock(vma);
115 err = i915_request_await_object(rq, vma->obj, false);
116 if (err == 0)
117 err = i915_vma_move_to_active(vma, rq, 0);
118 i915_vma_unlock(vma);
119 if (unlikely(err))
120 return err;
121
122 return intel_engine_pool_mark_active(vma->private, rq);
123 }
124
intel_emit_vma_release(struct intel_context * ce,struct i915_vma * vma)125 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
126 {
127 i915_vma_unpin(vma);
128 intel_engine_pool_put(vma->private);
129 intel_engine_pm_put(ce->engine);
130 }
131
i915_gem_object_fill_blt(struct drm_i915_gem_object * obj,struct intel_context * ce,u32 value)132 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
133 struct intel_context *ce,
134 u32 value)
135 {
136 struct i915_request *rq;
137 struct i915_vma *batch;
138 struct i915_vma *vma;
139 int err;
140
141 vma = i915_vma_instance(obj, ce->vm, NULL);
142 if (IS_ERR(vma))
143 return PTR_ERR(vma);
144
145 err = i915_vma_pin(vma, 0, 0, PIN_USER);
146 if (unlikely(err))
147 return err;
148
149 if (obj->cache_dirty & ~obj->cache_coherent) {
150 i915_gem_object_lock(obj);
151 i915_gem_clflush_object(obj, 0);
152 i915_gem_object_unlock(obj);
153 }
154
155 batch = intel_emit_vma_fill_blt(ce, vma, value);
156 if (IS_ERR(batch)) {
157 err = PTR_ERR(batch);
158 goto out_unpin;
159 }
160
161 rq = intel_context_create_request(ce);
162 if (IS_ERR(rq)) {
163 err = PTR_ERR(rq);
164 goto out_batch;
165 }
166
167 err = intel_emit_vma_mark_active(batch, rq);
168 if (unlikely(err))
169 goto out_request;
170
171 err = i915_request_await_object(rq, obj, true);
172 if (unlikely(err))
173 goto out_request;
174
175 if (ce->engine->emit_init_breadcrumb) {
176 err = ce->engine->emit_init_breadcrumb(rq);
177 if (unlikely(err))
178 goto out_request;
179 }
180
181 i915_vma_lock(vma);
182 err = i915_request_await_object(rq, vma->obj, true);
183 if (err == 0)
184 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
185 i915_vma_unlock(vma);
186 if (unlikely(err))
187 goto out_request;
188
189 err = ce->engine->emit_bb_start(rq,
190 batch->node.start, batch->node.size,
191 0);
192 out_request:
193 if (unlikely(err))
194 i915_request_skip(rq, err);
195
196 i915_request_add(rq);
197 out_batch:
198 intel_emit_vma_release(ce, batch);
199 out_unpin:
200 i915_vma_unpin(vma);
201 return err;
202 }
203
intel_emit_vma_copy_blt(struct intel_context * ce,struct i915_vma * src,struct i915_vma * dst)204 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
205 struct i915_vma *src,
206 struct i915_vma *dst)
207 {
208 struct drm_i915_private *i915 = ce->vm->i915;
209 const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
210 struct intel_engine_pool_node *pool;
211 struct i915_vma *batch;
212 u64 src_offset, dst_offset;
213 u64 count, rem;
214 u32 size, *cmd;
215 int err;
216
217 GEM_BUG_ON(src->size != dst->size);
218
219 GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
220 intel_engine_pm_get(ce->engine);
221
222 count = div_u64(round_up(dst->size, block_size), block_size);
223 size = (1 + 11 * count) * sizeof(u32);
224 size = round_up(size, PAGE_SIZE);
225 pool = intel_engine_get_pool(ce->engine, size);
226 if (IS_ERR(pool)) {
227 err = PTR_ERR(pool);
228 goto out_pm;
229 }
230
231 cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
232 if (IS_ERR(cmd)) {
233 err = PTR_ERR(cmd);
234 goto out_put;
235 }
236
237 rem = src->size;
238 src_offset = src->node.start;
239 dst_offset = dst->node.start;
240
241 do {
242 size = min_t(u64, rem, block_size);
243 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
244
245 if (INTEL_GEN(i915) >= 9) {
246 *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
247 *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
248 *cmd++ = 0;
249 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
250 *cmd++ = lower_32_bits(dst_offset);
251 *cmd++ = upper_32_bits(dst_offset);
252 *cmd++ = 0;
253 *cmd++ = PAGE_SIZE;
254 *cmd++ = lower_32_bits(src_offset);
255 *cmd++ = upper_32_bits(src_offset);
256 } else if (INTEL_GEN(i915) >= 8) {
257 *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
258 *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
259 *cmd++ = 0;
260 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
261 *cmd++ = lower_32_bits(dst_offset);
262 *cmd++ = upper_32_bits(dst_offset);
263 *cmd++ = 0;
264 *cmd++ = PAGE_SIZE;
265 *cmd++ = lower_32_bits(src_offset);
266 *cmd++ = upper_32_bits(src_offset);
267 } else {
268 *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
269 *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
270 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
271 *cmd++ = dst_offset;
272 *cmd++ = PAGE_SIZE;
273 *cmd++ = src_offset;
274 }
275
276 /* Allow ourselves to be preempted in between blocks. */
277 *cmd++ = MI_ARB_CHECK;
278
279 src_offset += size;
280 dst_offset += size;
281 rem -= size;
282 } while (rem);
283
284 *cmd = MI_BATCH_BUFFER_END;
285 intel_gt_chipset_flush(ce->vm->gt);
286
287 i915_gem_object_unpin_map(pool->obj);
288
289 batch = i915_vma_instance(pool->obj, ce->vm, NULL);
290 if (IS_ERR(batch)) {
291 err = PTR_ERR(batch);
292 goto out_put;
293 }
294
295 err = i915_vma_pin(batch, 0, 0, PIN_USER);
296 if (unlikely(err))
297 goto out_put;
298
299 batch->private = pool;
300 return batch;
301
302 out_put:
303 intel_engine_pool_put(pool);
304 out_pm:
305 intel_engine_pm_put(ce->engine);
306 return ERR_PTR(err);
307 }
308
move_to_gpu(struct i915_vma * vma,struct i915_request * rq,bool write)309 static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
310 {
311 struct drm_i915_gem_object *obj = vma->obj;
312
313 if (obj->cache_dirty & ~obj->cache_coherent)
314 i915_gem_clflush_object(obj, 0);
315
316 return i915_request_await_object(rq, obj, write);
317 }
318
i915_gem_object_copy_blt(struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst,struct intel_context * ce)319 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
320 struct drm_i915_gem_object *dst,
321 struct intel_context *ce)
322 {
323 struct drm_gem_object *objs[] = { &src->base, &dst->base };
324 struct i915_address_space *vm = ce->vm;
325 struct i915_vma *vma[2], *batch;
326 struct ww_acquire_ctx acquire;
327 struct i915_request *rq;
328 int err, i;
329
330 vma[0] = i915_vma_instance(src, vm, NULL);
331 if (IS_ERR(vma[0]))
332 return PTR_ERR(vma[0]);
333
334 err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
335 if (unlikely(err))
336 return err;
337
338 vma[1] = i915_vma_instance(dst, vm, NULL);
339 if (IS_ERR(vma[1]))
340 goto out_unpin_src;
341
342 err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
343 if (unlikely(err))
344 goto out_unpin_src;
345
346 batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
347 if (IS_ERR(batch)) {
348 err = PTR_ERR(batch);
349 goto out_unpin_dst;
350 }
351
352 rq = intel_context_create_request(ce);
353 if (IS_ERR(rq)) {
354 err = PTR_ERR(rq);
355 goto out_batch;
356 }
357
358 err = intel_emit_vma_mark_active(batch, rq);
359 if (unlikely(err))
360 goto out_request;
361
362 err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
363 if (unlikely(err))
364 goto out_request;
365
366 for (i = 0; i < ARRAY_SIZE(vma); i++) {
367 err = move_to_gpu(vma[i], rq, i);
368 if (unlikely(err))
369 goto out_unlock;
370 }
371
372 for (i = 0; i < ARRAY_SIZE(vma); i++) {
373 unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
374
375 err = i915_vma_move_to_active(vma[i], rq, flags);
376 if (unlikely(err))
377 goto out_unlock;
378 }
379
380 if (rq->engine->emit_init_breadcrumb) {
381 err = rq->engine->emit_init_breadcrumb(rq);
382 if (unlikely(err))
383 goto out_unlock;
384 }
385
386 err = rq->engine->emit_bb_start(rq,
387 batch->node.start, batch->node.size,
388 0);
389 out_unlock:
390 drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
391 out_request:
392 if (unlikely(err))
393 i915_request_skip(rq, err);
394
395 i915_request_add(rq);
396 out_batch:
397 intel_emit_vma_release(ce, batch);
398 out_unpin_dst:
399 i915_vma_unpin(vma[1]);
400 out_unpin_src:
401 i915_vma_unpin(vma[0]);
402 return err;
403 }
404
405 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
406 #include "selftests/i915_gem_object_blt.c"
407 #endif
408