xref: /netbsd-src/sys/external/bsd/drm2/dist/drm/i915/gem/i915_gem_object_blt.c (revision 41ec02673d281bbb3d38e6c78504ce6e30c228c1)
1 /*	$NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
2 
3 // SPDX-License-Identifier: MIT
4 /*
5  * Copyright © 2019 Intel Corporation
6  */
7 
8 #include <sys/cdefs.h>
9 __KERNEL_RCSID(0, "$NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
10 
11 #include "i915_drv.h"
12 #include "gt/intel_context.h"
13 #include "gt/intel_engine_pm.h"
14 #include "gt/intel_engine_pool.h"
15 #include "gt/intel_gt.h"
16 #include "gt/intel_ring.h"
17 #include "i915_gem_clflush.h"
18 #include "i915_gem_object_blt.h"
19 
intel_emit_vma_fill_blt(struct intel_context * ce,struct i915_vma * vma,u32 value)20 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
21 					 struct i915_vma *vma,
22 					 u32 value)
23 {
24 	struct drm_i915_private *i915 = ce->vm->i915;
25 	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
26 	struct intel_engine_pool_node *pool;
27 	struct i915_vma *batch;
28 	u64 offset;
29 	u64 count;
30 	u64 rem;
31 	u32 size;
32 	u32 *cmd;
33 	int err;
34 
35 	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
36 	intel_engine_pm_get(ce->engine);
37 
38 	count = div_u64(round_up(vma->size, block_size), block_size);
39 	size = (1 + 8 * count) * sizeof(u32);
40 	size = round_up(size, PAGE_SIZE);
41 	pool = intel_engine_get_pool(ce->engine, size);
42 	if (IS_ERR(pool)) {
43 		err = PTR_ERR(pool);
44 		goto out_pm;
45 	}
46 
47 	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
48 	if (IS_ERR(cmd)) {
49 		err = PTR_ERR(cmd);
50 		goto out_put;
51 	}
52 
53 	rem = vma->size;
54 	offset = vma->node.start;
55 
56 	do {
57 		u32 size = min_t(u64, rem, block_size);
58 
59 		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
60 
61 		if (INTEL_GEN(i915) >= 8) {
62 			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
63 			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
64 			*cmd++ = 0;
65 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
66 			*cmd++ = lower_32_bits(offset);
67 			*cmd++ = upper_32_bits(offset);
68 			*cmd++ = value;
69 		} else {
70 			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
71 			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
72 			*cmd++ = 0;
73 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
74 			*cmd++ = offset;
75 			*cmd++ = value;
76 		}
77 
78 		/* Allow ourselves to be preempted in between blocks. */
79 		*cmd++ = MI_ARB_CHECK;
80 
81 		offset += size;
82 		rem -= size;
83 	} while (rem);
84 
85 	*cmd = MI_BATCH_BUFFER_END;
86 	intel_gt_chipset_flush(ce->vm->gt);
87 
88 	i915_gem_object_unpin_map(pool->obj);
89 
90 	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
91 	if (IS_ERR(batch)) {
92 		err = PTR_ERR(batch);
93 		goto out_put;
94 	}
95 
96 	err = i915_vma_pin(batch, 0, 0, PIN_USER);
97 	if (unlikely(err))
98 		goto out_put;
99 
100 	batch->private = pool;
101 	return batch;
102 
103 out_put:
104 	intel_engine_pool_put(pool);
105 out_pm:
106 	intel_engine_pm_put(ce->engine);
107 	return ERR_PTR(err);
108 }
109 
intel_emit_vma_mark_active(struct i915_vma * vma,struct i915_request * rq)110 int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
111 {
112 	int err;
113 
114 	i915_vma_lock(vma);
115 	err = i915_request_await_object(rq, vma->obj, false);
116 	if (err == 0)
117 		err = i915_vma_move_to_active(vma, rq, 0);
118 	i915_vma_unlock(vma);
119 	if (unlikely(err))
120 		return err;
121 
122 	return intel_engine_pool_mark_active(vma->private, rq);
123 }
124 
intel_emit_vma_release(struct intel_context * ce,struct i915_vma * vma)125 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
126 {
127 	i915_vma_unpin(vma);
128 	intel_engine_pool_put(vma->private);
129 	intel_engine_pm_put(ce->engine);
130 }
131 
i915_gem_object_fill_blt(struct drm_i915_gem_object * obj,struct intel_context * ce,u32 value)132 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
133 			     struct intel_context *ce,
134 			     u32 value)
135 {
136 	struct i915_request *rq;
137 	struct i915_vma *batch;
138 	struct i915_vma *vma;
139 	int err;
140 
141 	vma = i915_vma_instance(obj, ce->vm, NULL);
142 	if (IS_ERR(vma))
143 		return PTR_ERR(vma);
144 
145 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
146 	if (unlikely(err))
147 		return err;
148 
149 	if (obj->cache_dirty & ~obj->cache_coherent) {
150 		i915_gem_object_lock(obj);
151 		i915_gem_clflush_object(obj, 0);
152 		i915_gem_object_unlock(obj);
153 	}
154 
155 	batch = intel_emit_vma_fill_blt(ce, vma, value);
156 	if (IS_ERR(batch)) {
157 		err = PTR_ERR(batch);
158 		goto out_unpin;
159 	}
160 
161 	rq = intel_context_create_request(ce);
162 	if (IS_ERR(rq)) {
163 		err = PTR_ERR(rq);
164 		goto out_batch;
165 	}
166 
167 	err = intel_emit_vma_mark_active(batch, rq);
168 	if (unlikely(err))
169 		goto out_request;
170 
171 	err = i915_request_await_object(rq, obj, true);
172 	if (unlikely(err))
173 		goto out_request;
174 
175 	if (ce->engine->emit_init_breadcrumb) {
176 		err = ce->engine->emit_init_breadcrumb(rq);
177 		if (unlikely(err))
178 			goto out_request;
179 	}
180 
181 	i915_vma_lock(vma);
182 	err = i915_request_await_object(rq, vma->obj, true);
183 	if (err == 0)
184 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
185 	i915_vma_unlock(vma);
186 	if (unlikely(err))
187 		goto out_request;
188 
189 	err = ce->engine->emit_bb_start(rq,
190 					batch->node.start, batch->node.size,
191 					0);
192 out_request:
193 	if (unlikely(err))
194 		i915_request_skip(rq, err);
195 
196 	i915_request_add(rq);
197 out_batch:
198 	intel_emit_vma_release(ce, batch);
199 out_unpin:
200 	i915_vma_unpin(vma);
201 	return err;
202 }
203 
intel_emit_vma_copy_blt(struct intel_context * ce,struct i915_vma * src,struct i915_vma * dst)204 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
205 					 struct i915_vma *src,
206 					 struct i915_vma *dst)
207 {
208 	struct drm_i915_private *i915 = ce->vm->i915;
209 	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
210 	struct intel_engine_pool_node *pool;
211 	struct i915_vma *batch;
212 	u64 src_offset, dst_offset;
213 	u64 count, rem;
214 	u32 size, *cmd;
215 	int err;
216 
217 	GEM_BUG_ON(src->size != dst->size);
218 
219 	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
220 	intel_engine_pm_get(ce->engine);
221 
222 	count = div_u64(round_up(dst->size, block_size), block_size);
223 	size = (1 + 11 * count) * sizeof(u32);
224 	size = round_up(size, PAGE_SIZE);
225 	pool = intel_engine_get_pool(ce->engine, size);
226 	if (IS_ERR(pool)) {
227 		err = PTR_ERR(pool);
228 		goto out_pm;
229 	}
230 
231 	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
232 	if (IS_ERR(cmd)) {
233 		err = PTR_ERR(cmd);
234 		goto out_put;
235 	}
236 
237 	rem = src->size;
238 	src_offset = src->node.start;
239 	dst_offset = dst->node.start;
240 
241 	do {
242 		size = min_t(u64, rem, block_size);
243 		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
244 
245 		if (INTEL_GEN(i915) >= 9) {
246 			*cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
247 			*cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
248 			*cmd++ = 0;
249 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
250 			*cmd++ = lower_32_bits(dst_offset);
251 			*cmd++ = upper_32_bits(dst_offset);
252 			*cmd++ = 0;
253 			*cmd++ = PAGE_SIZE;
254 			*cmd++ = lower_32_bits(src_offset);
255 			*cmd++ = upper_32_bits(src_offset);
256 		} else if (INTEL_GEN(i915) >= 8) {
257 			*cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
258 			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
259 			*cmd++ = 0;
260 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
261 			*cmd++ = lower_32_bits(dst_offset);
262 			*cmd++ = upper_32_bits(dst_offset);
263 			*cmd++ = 0;
264 			*cmd++ = PAGE_SIZE;
265 			*cmd++ = lower_32_bits(src_offset);
266 			*cmd++ = upper_32_bits(src_offset);
267 		} else {
268 			*cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
269 			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
270 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
271 			*cmd++ = dst_offset;
272 			*cmd++ = PAGE_SIZE;
273 			*cmd++ = src_offset;
274 		}
275 
276 		/* Allow ourselves to be preempted in between blocks. */
277 		*cmd++ = MI_ARB_CHECK;
278 
279 		src_offset += size;
280 		dst_offset += size;
281 		rem -= size;
282 	} while (rem);
283 
284 	*cmd = MI_BATCH_BUFFER_END;
285 	intel_gt_chipset_flush(ce->vm->gt);
286 
287 	i915_gem_object_unpin_map(pool->obj);
288 
289 	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
290 	if (IS_ERR(batch)) {
291 		err = PTR_ERR(batch);
292 		goto out_put;
293 	}
294 
295 	err = i915_vma_pin(batch, 0, 0, PIN_USER);
296 	if (unlikely(err))
297 		goto out_put;
298 
299 	batch->private = pool;
300 	return batch;
301 
302 out_put:
303 	intel_engine_pool_put(pool);
304 out_pm:
305 	intel_engine_pm_put(ce->engine);
306 	return ERR_PTR(err);
307 }
308 
move_to_gpu(struct i915_vma * vma,struct i915_request * rq,bool write)309 static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
310 {
311 	struct drm_i915_gem_object *obj = vma->obj;
312 
313 	if (obj->cache_dirty & ~obj->cache_coherent)
314 		i915_gem_clflush_object(obj, 0);
315 
316 	return i915_request_await_object(rq, obj, write);
317 }
318 
i915_gem_object_copy_blt(struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst,struct intel_context * ce)319 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
320 			     struct drm_i915_gem_object *dst,
321 			     struct intel_context *ce)
322 {
323 	struct drm_gem_object *objs[] = { &src->base, &dst->base };
324 	struct i915_address_space *vm = ce->vm;
325 	struct i915_vma *vma[2], *batch;
326 	struct ww_acquire_ctx acquire;
327 	struct i915_request *rq;
328 	int err, i;
329 
330 	vma[0] = i915_vma_instance(src, vm, NULL);
331 	if (IS_ERR(vma[0]))
332 		return PTR_ERR(vma[0]);
333 
334 	err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
335 	if (unlikely(err))
336 		return err;
337 
338 	vma[1] = i915_vma_instance(dst, vm, NULL);
339 	if (IS_ERR(vma[1]))
340 		goto out_unpin_src;
341 
342 	err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
343 	if (unlikely(err))
344 		goto out_unpin_src;
345 
346 	batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
347 	if (IS_ERR(batch)) {
348 		err = PTR_ERR(batch);
349 		goto out_unpin_dst;
350 	}
351 
352 	rq = intel_context_create_request(ce);
353 	if (IS_ERR(rq)) {
354 		err = PTR_ERR(rq);
355 		goto out_batch;
356 	}
357 
358 	err = intel_emit_vma_mark_active(batch, rq);
359 	if (unlikely(err))
360 		goto out_request;
361 
362 	err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
363 	if (unlikely(err))
364 		goto out_request;
365 
366 	for (i = 0; i < ARRAY_SIZE(vma); i++) {
367 		err = move_to_gpu(vma[i], rq, i);
368 		if (unlikely(err))
369 			goto out_unlock;
370 	}
371 
372 	for (i = 0; i < ARRAY_SIZE(vma); i++) {
373 		unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
374 
375 		err = i915_vma_move_to_active(vma[i], rq, flags);
376 		if (unlikely(err))
377 			goto out_unlock;
378 	}
379 
380 	if (rq->engine->emit_init_breadcrumb) {
381 		err = rq->engine->emit_init_breadcrumb(rq);
382 		if (unlikely(err))
383 			goto out_unlock;
384 	}
385 
386 	err = rq->engine->emit_bb_start(rq,
387 					batch->node.start, batch->node.size,
388 					0);
389 out_unlock:
390 	drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
391 out_request:
392 	if (unlikely(err))
393 		i915_request_skip(rq, err);
394 
395 	i915_request_add(rq);
396 out_batch:
397 	intel_emit_vma_release(ce, batch);
398 out_unpin_dst:
399 	i915_vma_unpin(vma[1]);
400 out_unpin_src:
401 	i915_vma_unpin(vma[0]);
402 	return err;
403 }
404 
405 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
406 #include "selftests/i915_gem_object_blt.c"
407 #endif
408