1 /* $NetBSD: amdgpu_cs.c,v 1.7 2021/12/19 12:02:39 riastradh Exp $ */
2
3 /*
4 * Copyright 2008 Jerome Glisse.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 *
26 * Authors:
27 * Jerome Glisse <glisse@freedesktop.org>
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: amdgpu_cs.c,v 1.7 2021/12/19 12:02:39 riastradh Exp $");
32
33 #include <linux/file.h>
34 #include <linux/pagemap.h>
35 #include <linux/sync_file.h>
36
37 #include <drm/amdgpu_drm.h>
38 #include <drm/drm_syncobj.h>
39 #include "amdgpu.h"
40 #include "amdgpu_trace.h"
41 #include "amdgpu_gmc.h"
42 #include "amdgpu_gem.h"
43 #include "amdgpu_ras.h"
44
amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser * p,struct drm_amdgpu_cs_chunk_fence * data,uint32_t * offset)45 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
46 struct drm_amdgpu_cs_chunk_fence *data,
47 uint32_t *offset)
48 {
49 struct drm_gem_object *gobj;
50 struct amdgpu_bo *bo;
51 unsigned long size;
52 int r;
53
54 gobj = drm_gem_object_lookup(p->filp, data->handle);
55 if (gobj == NULL)
56 return -EINVAL;
57
58 bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
59 p->uf_entry.priority = 0;
60 p->uf_entry.tv.bo = &bo->tbo;
61 /* One for TTM and one for the CS job */
62 p->uf_entry.tv.num_shared = 2;
63
64 drm_gem_object_put_unlocked(gobj);
65
66 size = amdgpu_bo_size(bo);
67 if (size != PAGE_SIZE || (data->offset + 8) > size) {
68 r = -EINVAL;
69 goto error_unref;
70 }
71
72 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
73 r = -EINVAL;
74 goto error_unref;
75 }
76
77 *offset = data->offset;
78
79 return 0;
80
81 error_unref:
82 amdgpu_bo_unref(&bo);
83 return r;
84 }
85
amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser * p,struct drm_amdgpu_bo_list_in * data)86 static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
87 struct drm_amdgpu_bo_list_in *data)
88 {
89 int r;
90 struct drm_amdgpu_bo_list_entry *info = NULL;
91
92 r = amdgpu_bo_create_list_entry_array(data, &info);
93 if (r)
94 return r;
95
96 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
97 &p->bo_list);
98 if (r)
99 goto error_free;
100
101 kvfree(info);
102 return 0;
103
104 error_free:
105 if (info)
106 kvfree(info);
107
108 return r;
109 }
110
amdgpu_cs_parser_init(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)111 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
112 {
113 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
114 struct amdgpu_vm *vm = &fpriv->vm;
115 uint64_t *chunk_array_user;
116 uint64_t *chunk_array;
117 unsigned size, num_ibs = 0;
118 uint32_t uf_offset = 0;
119 int i;
120 int ret;
121
122 if (cs->in.num_chunks == 0)
123 return 0;
124
125 chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
126 if (!chunk_array)
127 return -ENOMEM;
128
129 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
130 if (!p->ctx) {
131 ret = -EINVAL;
132 goto free_chunk;
133 }
134
135 mutex_lock(&p->ctx->lock);
136
137 /* skip guilty context job */
138 if (atomic_read(&p->ctx->guilty) == 1) {
139 ret = -ECANCELED;
140 goto free_chunk;
141 }
142
143 /* get chunks */
144 chunk_array_user = u64_to_user_ptr(cs->in.chunks);
145 if (copy_from_user(chunk_array, chunk_array_user,
146 sizeof(uint64_t)*cs->in.num_chunks)) {
147 ret = -EFAULT;
148 goto free_chunk;
149 }
150
151 p->nchunks = cs->in.num_chunks;
152 p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
153 GFP_KERNEL);
154 if (!p->chunks) {
155 ret = -ENOMEM;
156 goto free_chunk;
157 }
158
159 for (i = 0; i < p->nchunks; i++) {
160 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
161 struct drm_amdgpu_cs_chunk user_chunk;
162 uint32_t __user *cdata;
163
164 chunk_ptr = u64_to_user_ptr(chunk_array[i]);
165 if (copy_from_user(&user_chunk, chunk_ptr,
166 sizeof(struct drm_amdgpu_cs_chunk))) {
167 ret = -EFAULT;
168 i--;
169 goto free_partial_kdata;
170 }
171 p->chunks[i].chunk_id = user_chunk.chunk_id;
172 p->chunks[i].length_dw = user_chunk.length_dw;
173
174 size = p->chunks[i].length_dw;
175 cdata = u64_to_user_ptr(user_chunk.chunk_data);
176
177 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
178 if (p->chunks[i].kdata == NULL) {
179 ret = -ENOMEM;
180 i--;
181 goto free_partial_kdata;
182 }
183 size *= sizeof(uint32_t);
184 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
185 ret = -EFAULT;
186 goto free_partial_kdata;
187 }
188
189 switch (p->chunks[i].chunk_id) {
190 case AMDGPU_CHUNK_ID_IB:
191 ++num_ibs;
192 break;
193
194 case AMDGPU_CHUNK_ID_FENCE:
195 size = sizeof(struct drm_amdgpu_cs_chunk_fence);
196 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
197 ret = -EINVAL;
198 goto free_partial_kdata;
199 }
200
201 ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
202 &uf_offset);
203 if (ret)
204 goto free_partial_kdata;
205
206 break;
207
208 case AMDGPU_CHUNK_ID_BO_HANDLES:
209 size = sizeof(struct drm_amdgpu_bo_list_in);
210 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
211 ret = -EINVAL;
212 goto free_partial_kdata;
213 }
214
215 ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
216 if (ret)
217 goto free_partial_kdata;
218
219 break;
220
221 case AMDGPU_CHUNK_ID_DEPENDENCIES:
222 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
223 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
224 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
225 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
226 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
227 break;
228
229 default:
230 ret = -EINVAL;
231 goto free_partial_kdata;
232 }
233 }
234
235 ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
236 if (ret)
237 goto free_all_kdata;
238
239 if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
240 ret = -ECANCELED;
241 goto free_all_kdata;
242 }
243
244 if (p->uf_entry.tv.bo)
245 p->job->uf_addr = uf_offset;
246 kfree(chunk_array);
247
248 /* Use this opportunity to fill in task info for the vm */
249 amdgpu_vm_set_task_info(vm);
250
251 return 0;
252
253 free_all_kdata:
254 i = p->nchunks - 1;
255 free_partial_kdata:
256 for (; i >= 0; i--)
257 kvfree(p->chunks[i].kdata);
258 kfree(p->chunks);
259 p->chunks = NULL;
260 p->nchunks = 0;
261 free_chunk:
262 kfree(chunk_array);
263
264 return ret;
265 }
266
267 /* Convert microseconds to bytes. */
us_to_bytes(struct amdgpu_device * adev,s64 us)268 static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
269 {
270 if (us <= 0 || !adev->mm_stats.log2_max_MBps)
271 return 0;
272
273 /* Since accum_us is incremented by a million per second, just
274 * multiply it by the number of MB/s to get the number of bytes.
275 */
276 return us << adev->mm_stats.log2_max_MBps;
277 }
278
bytes_to_us(struct amdgpu_device * adev,u64 bytes)279 static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
280 {
281 if (!adev->mm_stats.log2_max_MBps)
282 return 0;
283
284 return bytes >> adev->mm_stats.log2_max_MBps;
285 }
286
287 /* Returns how many bytes TTM can move right now. If no bytes can be moved,
288 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
289 * which means it can go over the threshold once. If that happens, the driver
290 * will be in debt and no other buffer migrations can be done until that debt
291 * is repaid.
292 *
293 * This approach allows moving a buffer of any size (it's important to allow
294 * that).
295 *
296 * The currency is simply time in microseconds and it increases as the clock
297 * ticks. The accumulated microseconds (us) are converted to bytes and
298 * returned.
299 */
amdgpu_cs_get_threshold_for_moves(struct amdgpu_device * adev,u64 * max_bytes,u64 * max_vis_bytes)300 static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
301 u64 *max_bytes,
302 u64 *max_vis_bytes)
303 {
304 s64 time_us, increment_us;
305 u64 free_vram, total_vram, used_vram;
306
307 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
308 * throttling.
309 *
310 * It means that in order to get full max MBps, at least 5 IBs per
311 * second must be submitted and not more than 200ms apart from each
312 * other.
313 */
314 const s64 us_upper_bound = 200000;
315
316 if (!adev->mm_stats.log2_max_MBps) {
317 *max_bytes = 0;
318 *max_vis_bytes = 0;
319 return;
320 }
321
322 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
323 used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
324 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
325
326 spin_lock(&adev->mm_stats.lock);
327
328 /* Increase the amount of accumulated us. */
329 time_us = ktime_to_us(ktime_get());
330 increment_us = time_us - adev->mm_stats.last_update_us;
331 adev->mm_stats.last_update_us = time_us;
332 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
333 us_upper_bound);
334
335 /* This prevents the short period of low performance when the VRAM
336 * usage is low and the driver is in debt or doesn't have enough
337 * accumulated us to fill VRAM quickly.
338 *
339 * The situation can occur in these cases:
340 * - a lot of VRAM is freed by userspace
341 * - the presence of a big buffer causes a lot of evictions
342 * (solution: split buffers into smaller ones)
343 *
344 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
345 * accum_us to a positive number.
346 */
347 if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
348 s64 min_us;
349
350 /* Be more aggresive on dGPUs. Try to fill a portion of free
351 * VRAM now.
352 */
353 if (!(adev->flags & AMD_IS_APU))
354 min_us = bytes_to_us(adev, free_vram / 4);
355 else
356 min_us = 0; /* Reset accum_us on APUs. */
357
358 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
359 }
360
361 /* This is set to 0 if the driver is in debt to disallow (optional)
362 * buffer moves.
363 */
364 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
365
366 /* Do the same for visible VRAM if half of it is free */
367 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
368 u64 total_vis_vram = adev->gmc.visible_vram_size;
369 u64 used_vis_vram =
370 amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
371
372 if (used_vis_vram < total_vis_vram) {
373 u64 free_vis_vram = total_vis_vram - used_vis_vram;
374 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
375 increment_us, us_upper_bound);
376
377 if (free_vis_vram >= total_vis_vram / 2)
378 adev->mm_stats.accum_us_vis =
379 max(bytes_to_us(adev, free_vis_vram / 2),
380 adev->mm_stats.accum_us_vis);
381 }
382
383 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
384 } else {
385 *max_vis_bytes = 0;
386 }
387
388 spin_unlock(&adev->mm_stats.lock);
389 }
390
391 /* Report how many bytes have really been moved for the last command
392 * submission. This can result in a debt that can stop buffer migrations
393 * temporarily.
394 */
amdgpu_cs_report_moved_bytes(struct amdgpu_device * adev,u64 num_bytes,u64 num_vis_bytes)395 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
396 u64 num_vis_bytes)
397 {
398 spin_lock(&adev->mm_stats.lock);
399 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
400 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
401 spin_unlock(&adev->mm_stats.lock);
402 }
403
amdgpu_cs_bo_validate(struct amdgpu_cs_parser * p,struct amdgpu_bo * bo)404 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
405 struct amdgpu_bo *bo)
406 {
407 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
408 struct ttm_operation_ctx ctx = {
409 .interruptible = true,
410 .no_wait_gpu = false,
411 .resv = bo->tbo.base.resv,
412 .flags = 0
413 };
414 uint32_t domain;
415 int r;
416
417 if (bo->pin_count)
418 return 0;
419
420 /* Don't move this buffer if we have depleted our allowance
421 * to move it. Don't move anything if the threshold is zero.
422 */
423 if (p->bytes_moved < p->bytes_moved_threshold) {
424 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
425 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
426 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
427 * visible VRAM if we've depleted our allowance to do
428 * that.
429 */
430 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
431 domain = bo->preferred_domains;
432 else
433 domain = bo->allowed_domains;
434 } else {
435 domain = bo->preferred_domains;
436 }
437 } else {
438 domain = bo->allowed_domains;
439 }
440
441 retry:
442 amdgpu_bo_placement_from_domain(bo, domain);
443 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
444
445 p->bytes_moved += ctx.bytes_moved;
446 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
447 amdgpu_bo_in_cpu_visible_vram(bo))
448 p->bytes_moved_vis += ctx.bytes_moved;
449
450 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
451 domain = bo->allowed_domains;
452 goto retry;
453 }
454
455 return r;
456 }
457
amdgpu_cs_validate(void * param,struct amdgpu_bo * bo)458 static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
459 {
460 struct amdgpu_cs_parser *p = param;
461 int r;
462
463 r = amdgpu_cs_bo_validate(p, bo);
464 if (r)
465 return r;
466
467 if (bo->shadow)
468 r = amdgpu_cs_bo_validate(p, bo->shadow);
469
470 return r;
471 }
472
amdgpu_cs_list_validate(struct amdgpu_cs_parser * p,struct list_head * validated)473 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
474 struct list_head *validated)
475 {
476 struct ttm_operation_ctx ctx = { true, false };
477 struct amdgpu_bo_list_entry *lobj;
478 int r;
479
480 list_for_each_entry(lobj, validated, tv.head) {
481 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
482 #ifdef __NetBSD__
483 struct vmspace *usermm;
484 #else
485 struct mm_struct *usermm;
486 #endif
487
488 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
489 #ifdef __NetBSD__
490 if (usermm && usermm != curproc->p_vmspace)
491 #else
492 if (usermm && usermm != current->mm)
493 #endif
494 return -EPERM;
495
496 if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
497 lobj->user_invalidated && lobj->user_pages) {
498 amdgpu_bo_placement_from_domain(bo,
499 AMDGPU_GEM_DOMAIN_CPU);
500 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
501 if (r)
502 return r;
503
504 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
505 lobj->user_pages);
506 }
507
508 r = amdgpu_cs_validate(p, bo);
509 if (r)
510 return r;
511
512 kvfree(lobj->user_pages);
513 lobj->user_pages = NULL;
514 }
515 return 0;
516 }
517
amdgpu_cs_parser_bos(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)518 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
519 union drm_amdgpu_cs *cs)
520 {
521 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
522 struct amdgpu_vm *vm = &fpriv->vm;
523 struct amdgpu_bo_list_entry *e;
524 struct list_head duplicates;
525 struct amdgpu_bo *gds;
526 struct amdgpu_bo *gws;
527 struct amdgpu_bo *oa;
528 int r;
529
530 INIT_LIST_HEAD(&p->validated);
531
532 /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
533 if (cs->in.bo_list_handle) {
534 if (p->bo_list)
535 return -EINVAL;
536
537 r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
538 &p->bo_list);
539 if (r)
540 return r;
541 } else if (!p->bo_list) {
542 /* Create a empty bo_list when no handle is provided */
543 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
544 &p->bo_list);
545 if (r)
546 return r;
547 }
548
549 /* One for TTM and one for the CS job */
550 amdgpu_bo_list_for_each_entry(e, p->bo_list)
551 e->tv.num_shared = 2;
552
553 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
554
555 INIT_LIST_HEAD(&duplicates);
556 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
557
558 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
559 list_add(&p->uf_entry.tv.head, &p->validated);
560
561 /* Get userptr backing pages. If pages are updated after registered
562 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
563 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
564 */
565 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
566 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
567 bool userpage_invalidated = false;
568 int i;
569
570 e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
571 sizeof(struct page *),
572 GFP_KERNEL | __GFP_ZERO);
573 if (!e->user_pages) {
574 DRM_ERROR("calloc failure\n");
575 return -ENOMEM;
576 }
577
578 r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
579 if (r) {
580 kvfree(e->user_pages);
581 e->user_pages = NULL;
582 return r;
583 }
584
585 for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
586 if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
587 userpage_invalidated = true;
588 break;
589 }
590 }
591 e->user_invalidated = userpage_invalidated;
592 }
593
594 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
595 &duplicates);
596 if (unlikely(r != 0)) {
597 if (r != -ERESTARTSYS)
598 DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
599 goto out;
600 }
601
602 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
603 &p->bytes_moved_vis_threshold);
604 p->bytes_moved = 0;
605 p->bytes_moved_vis = 0;
606
607 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
608 amdgpu_cs_validate, p);
609 if (r) {
610 DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
611 goto error_validate;
612 }
613
614 r = amdgpu_cs_list_validate(p, &duplicates);
615 if (r)
616 goto error_validate;
617
618 r = amdgpu_cs_list_validate(p, &p->validated);
619 if (r)
620 goto error_validate;
621
622 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
623 p->bytes_moved_vis);
624
625 gds = p->bo_list->gds_obj;
626 gws = p->bo_list->gws_obj;
627 oa = p->bo_list->oa_obj;
628
629 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
630 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
631
632 /* Make sure we use the exclusive slot for shared BOs */
633 if (bo->prime_shared_count)
634 e->tv.num_shared = 0;
635 e->bo_va = amdgpu_vm_bo_find(vm, bo);
636 }
637
638 if (gds) {
639 p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
640 p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
641 }
642 if (gws) {
643 p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
644 p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
645 }
646 if (oa) {
647 p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
648 p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
649 }
650
651 if (!r && p->uf_entry.tv.bo) {
652 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
653
654 r = amdgpu_ttm_alloc_gart(&uf->tbo);
655 p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
656 }
657
658 error_validate:
659 if (r)
660 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
661 out:
662 return r;
663 }
664
amdgpu_cs_sync_rings(struct amdgpu_cs_parser * p)665 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
666 {
667 struct amdgpu_bo_list_entry *e;
668 int r;
669
670 list_for_each_entry(e, &p->validated, tv.head) {
671 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
672 struct dma_resv *resv = bo->tbo.base.resv;
673
674 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
675 amdgpu_bo_explicit_sync(bo));
676
677 if (r)
678 return r;
679 }
680 return 0;
681 }
682
683 /**
684 * cs_parser_fini() - clean parser states
685 * @parser: parser structure holding parsing context.
686 * @error: error number
687 *
688 * If error is set than unvalidate buffer, otherwise just free memory
689 * used by parsing context.
690 **/
amdgpu_cs_parser_fini(struct amdgpu_cs_parser * parser,int error,bool backoff)691 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
692 bool backoff)
693 {
694 unsigned i;
695
696 if (error && backoff)
697 ttm_eu_backoff_reservation(&parser->ticket,
698 &parser->validated);
699
700 for (i = 0; i < parser->num_post_deps; i++) {
701 drm_syncobj_put(parser->post_deps[i].syncobj);
702 kfree(parser->post_deps[i].chain);
703 }
704 kfree(parser->post_deps);
705
706 dma_fence_put(parser->fence);
707
708 if (parser->ctx) {
709 mutex_unlock(&parser->ctx->lock);
710 amdgpu_ctx_put(parser->ctx);
711 }
712 if (parser->bo_list)
713 amdgpu_bo_list_put(parser->bo_list);
714
715 for (i = 0; i < parser->nchunks; i++)
716 kvfree(parser->chunks[i].kdata);
717 kfree(parser->chunks);
718 if (parser->job)
719 amdgpu_job_free(parser->job);
720 if (parser->uf_entry.tv.bo) {
721 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
722
723 amdgpu_bo_unref(&uf);
724 }
725 }
726
amdgpu_cs_vm_handling(struct amdgpu_cs_parser * p)727 static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
728 {
729 struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
730 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
731 struct amdgpu_device *adev = p->adev;
732 struct amdgpu_vm *vm = &fpriv->vm;
733 struct amdgpu_bo_list_entry *e;
734 struct amdgpu_bo_va *bo_va;
735 struct amdgpu_bo *bo;
736 int r;
737
738 /* Only for UVD/VCE VM emulation */
739 if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
740 unsigned i, j;
741
742 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
743 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
744 struct amdgpu_bo_va_mapping *m;
745 struct amdgpu_bo *aobj = NULL;
746 struct amdgpu_cs_chunk *chunk;
747 uint64_t offset, va_start;
748 struct amdgpu_ib *ib;
749 uint8_t *kptr;
750
751 chunk = &p->chunks[i];
752 ib = &p->job->ibs[j];
753 chunk_ib = chunk->kdata;
754
755 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
756 continue;
757
758 va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
759 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
760 if (r) {
761 DRM_ERROR("IB va_start is invalid\n");
762 return r;
763 }
764
765 if ((va_start + chunk_ib->ib_bytes) >
766 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
767 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
768 return -EINVAL;
769 }
770
771 /* the IB should be reserved at this point */
772 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
773 if (r) {
774 return r;
775 }
776
777 offset = m->start * AMDGPU_GPU_PAGE_SIZE;
778 kptr += va_start - offset;
779
780 if (ring->funcs->parse_cs) {
781 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
782 amdgpu_bo_kunmap(aobj);
783
784 r = amdgpu_ring_parse_cs(ring, p, j);
785 if (r)
786 return r;
787 } else {
788 ib->ptr = (uint32_t *)kptr;
789 r = amdgpu_ring_patch_cs_in_place(ring, p, j);
790 amdgpu_bo_kunmap(aobj);
791 if (r)
792 return r;
793 }
794
795 j++;
796 }
797 }
798
799 if (!p->job->vm)
800 return amdgpu_cs_sync_rings(p);
801
802
803 r = amdgpu_vm_clear_freed(adev, vm, NULL);
804 if (r)
805 return r;
806
807 r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
808 if (r)
809 return r;
810
811 r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
812 if (r)
813 return r;
814
815 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
816 bo_va = fpriv->csa_va;
817 BUG_ON(!bo_va);
818 r = amdgpu_vm_bo_update(adev, bo_va, false);
819 if (r)
820 return r;
821
822 r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
823 if (r)
824 return r;
825 }
826
827 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
828 /* ignore duplicates */
829 bo = ttm_to_amdgpu_bo(e->tv.bo);
830 if (!bo)
831 continue;
832
833 bo_va = e->bo_va;
834 if (bo_va == NULL)
835 continue;
836
837 r = amdgpu_vm_bo_update(adev, bo_va, false);
838 if (r)
839 return r;
840
841 r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
842 if (r)
843 return r;
844 }
845
846 r = amdgpu_vm_handle_moved(adev, vm);
847 if (r)
848 return r;
849
850 r = amdgpu_vm_update_pdes(adev, vm, false);
851 if (r)
852 return r;
853
854 r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
855 if (r)
856 return r;
857
858 p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
859
860 if (amdgpu_vm_debug) {
861 /* Invalidate all BOs to test for userspace bugs */
862 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
863 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
864
865 /* ignore duplicates */
866 if (!bo)
867 continue;
868
869 amdgpu_vm_bo_invalidate(adev, bo, false);
870 }
871 }
872
873 return amdgpu_cs_sync_rings(p);
874 }
875
amdgpu_cs_ib_fill(struct amdgpu_device * adev,struct amdgpu_cs_parser * parser)876 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
877 struct amdgpu_cs_parser *parser)
878 {
879 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
880 struct amdgpu_vm *vm = &fpriv->vm;
881 int r, ce_preempt = 0, de_preempt = 0;
882 struct amdgpu_ring *ring;
883 int i, j;
884
885 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
886 struct amdgpu_cs_chunk *chunk;
887 struct amdgpu_ib *ib;
888 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
889 struct drm_sched_entity *entity;
890
891 chunk = &parser->chunks[i];
892 ib = &parser->job->ibs[j];
893 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
894
895 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
896 continue;
897
898 if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
899 (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
900 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
901 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
902 ce_preempt++;
903 else
904 de_preempt++;
905 }
906
907 /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
908 if (ce_preempt > 1 || de_preempt > 1)
909 return -EINVAL;
910 }
911
912 r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
913 chunk_ib->ip_instance, chunk_ib->ring,
914 &entity);
915 if (r)
916 return r;
917
918 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
919 parser->job->preamble_status |=
920 AMDGPU_PREAMBLE_IB_PRESENT;
921
922 if (parser->entity && parser->entity != entity)
923 return -EINVAL;
924
925 /* Return if there is no run queue associated with this entity.
926 * Possibly because of disabled HW IP*/
927 if (entity->rq == NULL)
928 return -EINVAL;
929
930 parser->entity = entity;
931
932 ring = to_amdgpu_ring(entity->rq->sched);
933 r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
934 chunk_ib->ib_bytes : 0, ib);
935 if (r) {
936 DRM_ERROR("Failed to get ib !\n");
937 return r;
938 }
939
940 ib->gpu_addr = chunk_ib->va_start;
941 ib->length_dw = chunk_ib->ib_bytes / 4;
942 ib->flags = chunk_ib->flags;
943
944 j++;
945 }
946
947 /* MM engine doesn't support user fences */
948 ring = to_amdgpu_ring(parser->entity->rq->sched);
949 if (parser->job->uf_addr && ring->funcs->no_user_fence)
950 return -EINVAL;
951
952 return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
953 }
954
amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)955 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
956 struct amdgpu_cs_chunk *chunk)
957 {
958 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
959 unsigned num_deps;
960 int i, r;
961 struct drm_amdgpu_cs_chunk_dep *deps;
962
963 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
964 num_deps = chunk->length_dw * 4 /
965 sizeof(struct drm_amdgpu_cs_chunk_dep);
966
967 for (i = 0; i < num_deps; ++i) {
968 struct amdgpu_ctx *ctx;
969 struct drm_sched_entity *entity;
970 struct dma_fence *fence;
971
972 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
973 if (ctx == NULL)
974 return -EINVAL;
975
976 r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
977 deps[i].ip_instance,
978 deps[i].ring, &entity);
979 if (r) {
980 amdgpu_ctx_put(ctx);
981 return r;
982 }
983
984 fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
985 amdgpu_ctx_put(ctx);
986
987 if (IS_ERR(fence))
988 return PTR_ERR(fence);
989 else if (!fence)
990 continue;
991
992 if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
993 struct drm_sched_fence *s_fence;
994 struct dma_fence *old = fence;
995
996 s_fence = to_drm_sched_fence(fence);
997 fence = dma_fence_get(&s_fence->scheduled);
998 dma_fence_put(old);
999 }
1000
1001 r = amdgpu_sync_fence(&p->job->sync, fence, true);
1002 dma_fence_put(fence);
1003 if (r)
1004 return r;
1005 }
1006 return 0;
1007 }
1008
amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser * p,uint32_t handle,u64 point,u64 flags)1009 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1010 uint32_t handle, u64 point,
1011 u64 flags)
1012 {
1013 struct dma_fence *fence;
1014 int r;
1015
1016 r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1017 if (r) {
1018 DRM_ERROR("syncobj %u failed to find fence @ %"PRIu64" (%d)!\n",
1019 handle, point, r);
1020 return r;
1021 }
1022
1023 r = amdgpu_sync_fence(&p->job->sync, fence, true);
1024 dma_fence_put(fence);
1025
1026 return r;
1027 }
1028
amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)1029 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1030 struct amdgpu_cs_chunk *chunk)
1031 {
1032 struct drm_amdgpu_cs_chunk_sem *deps;
1033 unsigned num_deps;
1034 int i, r;
1035
1036 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1037 num_deps = chunk->length_dw * 4 /
1038 sizeof(struct drm_amdgpu_cs_chunk_sem);
1039 for (i = 0; i < num_deps; ++i) {
1040 r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1041 0, 0);
1042 if (r)
1043 return r;
1044 }
1045
1046 return 0;
1047 }
1048
1049
amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)1050 static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1051 struct amdgpu_cs_chunk *chunk)
1052 {
1053 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1054 unsigned num_deps;
1055 int i, r;
1056
1057 syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1058 num_deps = chunk->length_dw * 4 /
1059 sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1060 for (i = 0; i < num_deps; ++i) {
1061 r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1062 syncobj_deps[i].handle,
1063 syncobj_deps[i].point,
1064 syncobj_deps[i].flags);
1065 if (r)
1066 return r;
1067 }
1068
1069 return 0;
1070 }
1071
amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)1072 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1073 struct amdgpu_cs_chunk *chunk)
1074 {
1075 struct drm_amdgpu_cs_chunk_sem *deps;
1076 unsigned num_deps;
1077 int i;
1078
1079 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1080 num_deps = chunk->length_dw * 4 /
1081 sizeof(struct drm_amdgpu_cs_chunk_sem);
1082
1083 if (p->post_deps)
1084 return -EINVAL;
1085
1086 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1087 GFP_KERNEL);
1088 p->num_post_deps = 0;
1089
1090 if (!p->post_deps)
1091 return -ENOMEM;
1092
1093
1094 for (i = 0; i < num_deps; ++i) {
1095 p->post_deps[i].syncobj =
1096 drm_syncobj_find(p->filp, deps[i].handle);
1097 if (!p->post_deps[i].syncobj)
1098 return -EINVAL;
1099 p->post_deps[i].chain = NULL;
1100 p->post_deps[i].point = 0;
1101 p->num_post_deps++;
1102 }
1103
1104 return 0;
1105 }
1106
1107
amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)1108 static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1109 struct amdgpu_cs_chunk *chunk)
1110 {
1111 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1112 unsigned num_deps;
1113 int i;
1114
1115 syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1116 num_deps = chunk->length_dw * 4 /
1117 sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1118
1119 if (p->post_deps)
1120 return -EINVAL;
1121
1122 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1123 GFP_KERNEL);
1124 p->num_post_deps = 0;
1125
1126 if (!p->post_deps)
1127 return -ENOMEM;
1128
1129 for (i = 0; i < num_deps; ++i) {
1130 struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1131
1132 dep->chain = NULL;
1133 if (syncobj_deps[i].point) {
1134 dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
1135 if (!dep->chain)
1136 return -ENOMEM;
1137 }
1138
1139 dep->syncobj = drm_syncobj_find(p->filp,
1140 syncobj_deps[i].handle);
1141 if (!dep->syncobj) {
1142 kfree(dep->chain);
1143 return -EINVAL;
1144 }
1145 dep->point = syncobj_deps[i].point;
1146 p->num_post_deps++;
1147 }
1148
1149 return 0;
1150 }
1151
amdgpu_cs_dependencies(struct amdgpu_device * adev,struct amdgpu_cs_parser * p)1152 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1153 struct amdgpu_cs_parser *p)
1154 {
1155 int i, r;
1156
1157 for (i = 0; i < p->nchunks; ++i) {
1158 struct amdgpu_cs_chunk *chunk;
1159
1160 chunk = &p->chunks[i];
1161
1162 switch (chunk->chunk_id) {
1163 case AMDGPU_CHUNK_ID_DEPENDENCIES:
1164 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1165 r = amdgpu_cs_process_fence_dep(p, chunk);
1166 if (r)
1167 return r;
1168 break;
1169 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1170 r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1171 if (r)
1172 return r;
1173 break;
1174 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1175 r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1176 if (r)
1177 return r;
1178 break;
1179 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1180 r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1181 if (r)
1182 return r;
1183 break;
1184 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1185 r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1186 if (r)
1187 return r;
1188 break;
1189 }
1190 }
1191
1192 return 0;
1193 }
1194
amdgpu_cs_post_dependencies(struct amdgpu_cs_parser * p)1195 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1196 {
1197 int i;
1198
1199 for (i = 0; i < p->num_post_deps; ++i) {
1200 if (p->post_deps[i].chain && p->post_deps[i].point) {
1201 drm_syncobj_add_point(p->post_deps[i].syncobj,
1202 p->post_deps[i].chain,
1203 p->fence, p->post_deps[i].point);
1204 p->post_deps[i].chain = NULL;
1205 } else {
1206 drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1207 p->fence);
1208 }
1209 }
1210 }
1211
amdgpu_cs_submit(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)1212 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1213 union drm_amdgpu_cs *cs)
1214 {
1215 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1216 struct drm_sched_entity *entity = p->entity;
1217 enum drm_sched_priority priority;
1218 struct amdgpu_ring *ring;
1219 struct amdgpu_bo_list_entry *e;
1220 struct amdgpu_job *job;
1221 uint64_t seq;
1222 int r;
1223
1224 job = p->job;
1225 p->job = NULL;
1226
1227 r = drm_sched_job_init(&job->base, entity, p->filp);
1228 if (r)
1229 goto error_unlock;
1230
1231 /* No memory allocation is allowed while holding the notifier lock.
1232 * The lock is held until amdgpu_cs_submit is finished and fence is
1233 * added to BOs.
1234 */
1235 mutex_lock(&p->adev->notifier_lock);
1236
1237 /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1238 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1239 */
1240 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1241 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1242
1243 r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1244 }
1245 if (r) {
1246 r = -EAGAIN;
1247 goto error_abort;
1248 }
1249
1250 p->fence = dma_fence_get(&job->base.s_fence->finished);
1251
1252 amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1253 amdgpu_cs_post_dependencies(p);
1254
1255 if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1256 !p->ctx->preamble_presented) {
1257 job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1258 p->ctx->preamble_presented = true;
1259 }
1260
1261 cs->out.handle = seq;
1262 job->uf_sequence = seq;
1263
1264 amdgpu_job_free_resources(job);
1265
1266 trace_amdgpu_cs_ioctl(job);
1267 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1268 priority = job->base.s_priority;
1269 drm_sched_entity_push_job(&job->base, entity);
1270
1271 ring = to_amdgpu_ring(entity->rq->sched);
1272 amdgpu_ring_priority_get(ring, priority);
1273
1274 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1275
1276 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1277 mutex_unlock(&p->adev->notifier_lock);
1278
1279 return 0;
1280
1281 error_abort:
1282 drm_sched_job_cleanup(&job->base);
1283 mutex_unlock(&p->adev->notifier_lock);
1284
1285 error_unlock:
1286 amdgpu_job_free(job);
1287 return r;
1288 }
1289
amdgpu_cs_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1290 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1291 {
1292 struct amdgpu_device *adev = dev->dev_private;
1293 union drm_amdgpu_cs *cs = data;
1294 struct amdgpu_cs_parser parser = {};
1295 bool reserved_buffers = false;
1296 int i, r;
1297
1298 if (amdgpu_ras_intr_triggered())
1299 return -EHWPOISON;
1300
1301 if (!adev->accel_working)
1302 return -EBUSY;
1303
1304 parser.adev = adev;
1305 parser.filp = filp;
1306
1307 r = amdgpu_cs_parser_init(&parser, data);
1308 if (r) {
1309 DRM_ERROR("Failed to initialize parser %d!\n", r);
1310 goto out;
1311 }
1312
1313 r = amdgpu_cs_ib_fill(adev, &parser);
1314 if (r)
1315 goto out;
1316
1317 r = amdgpu_cs_dependencies(adev, &parser);
1318 if (r) {
1319 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1320 goto out;
1321 }
1322
1323 r = amdgpu_cs_parser_bos(&parser, data);
1324 if (r) {
1325 if (r == -ENOMEM)
1326 DRM_ERROR("Not enough memory for command submission!\n");
1327 else if (r != -ERESTARTSYS && r != -EAGAIN)
1328 DRM_ERROR("Failed to process the buffer list %d!\n", r);
1329 goto out;
1330 }
1331
1332 reserved_buffers = true;
1333
1334 for (i = 0; i < parser.job->num_ibs; i++)
1335 trace_amdgpu_cs(&parser, i);
1336
1337 r = amdgpu_cs_vm_handling(&parser);
1338 if (r)
1339 goto out;
1340
1341 r = amdgpu_cs_submit(&parser, cs);
1342
1343 out:
1344 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1345
1346 return r;
1347 }
1348
1349 /**
1350 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1351 *
1352 * @dev: drm device
1353 * @data: data from userspace
1354 * @filp: file private
1355 *
1356 * Wait for the command submission identified by handle to finish.
1357 */
amdgpu_cs_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1358 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1359 struct drm_file *filp)
1360 {
1361 union drm_amdgpu_wait_cs *wait = data;
1362 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1363 struct drm_sched_entity *entity;
1364 struct amdgpu_ctx *ctx;
1365 struct dma_fence *fence;
1366 long r;
1367
1368 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1369 if (ctx == NULL)
1370 return -EINVAL;
1371
1372 r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1373 wait->in.ring, &entity);
1374 if (r) {
1375 amdgpu_ctx_put(ctx);
1376 return r;
1377 }
1378
1379 fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1380 if (IS_ERR(fence))
1381 r = PTR_ERR(fence);
1382 else if (fence) {
1383 r = dma_fence_wait_timeout(fence, true, timeout);
1384 if (r > 0 && fence->error)
1385 r = fence->error;
1386 dma_fence_put(fence);
1387 } else
1388 r = 1;
1389
1390 amdgpu_ctx_put(ctx);
1391 if (r < 0)
1392 return r;
1393
1394 memset(wait, 0, sizeof(*wait));
1395 wait->out.status = (r == 0);
1396
1397 return 0;
1398 }
1399
1400 /**
1401 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1402 *
1403 * @adev: amdgpu device
1404 * @filp: file private
1405 * @user: drm_amdgpu_fence copied from user space
1406 */
amdgpu_cs_get_fence(struct amdgpu_device * adev,struct drm_file * filp,struct drm_amdgpu_fence * user)1407 static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1408 struct drm_file *filp,
1409 struct drm_amdgpu_fence *user)
1410 {
1411 struct drm_sched_entity *entity;
1412 struct amdgpu_ctx *ctx;
1413 struct dma_fence *fence;
1414 int r;
1415
1416 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1417 if (ctx == NULL)
1418 return ERR_PTR(-EINVAL);
1419
1420 r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1421 user->ring, &entity);
1422 if (r) {
1423 amdgpu_ctx_put(ctx);
1424 return ERR_PTR(r);
1425 }
1426
1427 fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1428 amdgpu_ctx_put(ctx);
1429
1430 return fence;
1431 }
1432
amdgpu_cs_fence_to_handle_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1433 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1434 struct drm_file *filp)
1435 {
1436 struct amdgpu_device *adev = dev->dev_private;
1437 union drm_amdgpu_fence_to_handle *info = data;
1438 struct dma_fence *fence;
1439 struct drm_syncobj *syncobj;
1440 struct sync_file *sync_file;
1441 int fd, r;
1442
1443 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1444 if (IS_ERR(fence))
1445 return PTR_ERR(fence);
1446
1447 if (!fence)
1448 fence = dma_fence_get_stub();
1449
1450 switch (info->in.what) {
1451 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1452 r = drm_syncobj_create(&syncobj, 0, fence);
1453 dma_fence_put(fence);
1454 if (r)
1455 return r;
1456 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1457 drm_syncobj_put(syncobj);
1458 return r;
1459
1460 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1461 r = drm_syncobj_create(&syncobj, 0, fence);
1462 dma_fence_put(fence);
1463 if (r)
1464 return r;
1465 r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
1466 drm_syncobj_put(syncobj);
1467 return r;
1468
1469 case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1470 #ifdef __NetBSD__
1471 {
1472 struct file *fp = NULL;
1473
1474 /* XXX errno NetBSD->Linux */
1475 r = -fd_allocfile(&fp, &fd);
1476 if (r)
1477 goto out;
1478 sync_file = sync_file_create(fence, fp);
1479 if (sync_file == NULL)
1480 goto out;
1481 fd_affix(curproc, fp, fd);
1482 fp = NULL; /* consumed by sync_file */
1483
1484 out: if (fp) {
1485 fd_abort(curproc, fp, fd);
1486 fd = -1;
1487 }
1488 dma_fence_put(fence);
1489 }
1490 #else
1491 fd = get_unused_fd_flags(O_CLOEXEC);
1492 if (fd < 0) {
1493 dma_fence_put(fence);
1494 return fd;
1495 }
1496
1497 sync_file = sync_file_create(fence);
1498 dma_fence_put(fence);
1499 if (!sync_file) {
1500 put_unused_fd(fd);
1501 return -ENOMEM;
1502 }
1503
1504 fd_install(fd, sync_file->file);
1505 #endif
1506 info->out.handle = fd;
1507 return 0;
1508
1509 default:
1510 return -EINVAL;
1511 }
1512 }
1513
1514 /**
1515 * amdgpu_cs_wait_all_fence - wait on all fences to signal
1516 *
1517 * @adev: amdgpu device
1518 * @filp: file private
1519 * @wait: wait parameters
1520 * @fences: array of drm_amdgpu_fence
1521 */
amdgpu_cs_wait_all_fences(struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_wait_fences * wait,struct drm_amdgpu_fence * fences)1522 static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1523 struct drm_file *filp,
1524 union drm_amdgpu_wait_fences *wait,
1525 struct drm_amdgpu_fence *fences)
1526 {
1527 uint32_t fence_count = wait->in.fence_count;
1528 unsigned int i;
1529 long r = 1;
1530
1531 for (i = 0; i < fence_count; i++) {
1532 struct dma_fence *fence;
1533 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1534
1535 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1536 if (IS_ERR(fence))
1537 return PTR_ERR(fence);
1538 else if (!fence)
1539 continue;
1540
1541 r = dma_fence_wait_timeout(fence, true, timeout);
1542 dma_fence_put(fence);
1543 if (r < 0)
1544 return r;
1545
1546 if (r == 0)
1547 break;
1548
1549 if (fence->error)
1550 return fence->error;
1551 }
1552
1553 memset(wait, 0, sizeof(*wait));
1554 wait->out.status = (r > 0);
1555
1556 return 0;
1557 }
1558
1559 /**
1560 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1561 *
1562 * @adev: amdgpu device
1563 * @filp: file private
1564 * @wait: wait parameters
1565 * @fences: array of drm_amdgpu_fence
1566 */
amdgpu_cs_wait_any_fence(struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_wait_fences * wait,struct drm_amdgpu_fence * fences)1567 static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1568 struct drm_file *filp,
1569 union drm_amdgpu_wait_fences *wait,
1570 struct drm_amdgpu_fence *fences)
1571 {
1572 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1573 uint32_t fence_count = wait->in.fence_count;
1574 uint32_t first = ~0;
1575 struct dma_fence **array;
1576 unsigned int i;
1577 long r;
1578
1579 /* Prepare the fence array */
1580 array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1581
1582 if (array == NULL)
1583 return -ENOMEM;
1584
1585 for (i = 0; i < fence_count; i++) {
1586 struct dma_fence *fence;
1587
1588 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1589 if (IS_ERR(fence)) {
1590 r = PTR_ERR(fence);
1591 goto err_free_fence_array;
1592 } else if (fence) {
1593 array[i] = fence;
1594 } else { /* NULL, the fence has been already signaled */
1595 r = 1;
1596 first = i;
1597 goto out;
1598 }
1599 }
1600
1601 r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1602 &first);
1603 if (r < 0)
1604 goto err_free_fence_array;
1605
1606 out:
1607 memset(wait, 0, sizeof(*wait));
1608 wait->out.status = (r > 0);
1609 wait->out.first_signaled = first;
1610
1611 if (first < fence_count && array[first])
1612 r = array[first]->error;
1613 else
1614 r = 0;
1615
1616 err_free_fence_array:
1617 for (i = 0; i < fence_count; i++)
1618 dma_fence_put(array[i]);
1619 kfree(array);
1620
1621 return r;
1622 }
1623
1624 /**
1625 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1626 *
1627 * @dev: drm device
1628 * @data: data from userspace
1629 * @filp: file private
1630 */
amdgpu_cs_wait_fences_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1631 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1632 struct drm_file *filp)
1633 {
1634 struct amdgpu_device *adev = dev->dev_private;
1635 union drm_amdgpu_wait_fences *wait = data;
1636 uint32_t fence_count = wait->in.fence_count;
1637 struct drm_amdgpu_fence *fences_user;
1638 struct drm_amdgpu_fence *fences;
1639 int r;
1640
1641 /* Get the fences from userspace */
1642 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1643 GFP_KERNEL);
1644 if (fences == NULL)
1645 return -ENOMEM;
1646
1647 fences_user = u64_to_user_ptr(wait->in.fences);
1648 if (copy_from_user(fences, fences_user,
1649 sizeof(struct drm_amdgpu_fence) * fence_count)) {
1650 r = -EFAULT;
1651 goto err_free_fences;
1652 }
1653
1654 if (wait->in.wait_all)
1655 r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1656 else
1657 r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1658
1659 err_free_fences:
1660 kfree(fences);
1661
1662 return r;
1663 }
1664
1665 /**
1666 * amdgpu_cs_find_bo_va - find bo_va for VM address
1667 *
1668 * @parser: command submission parser context
1669 * @addr: VM address
1670 * @bo: resulting BO of the mapping found
1671 *
1672 * Search the buffer objects in the command submission context for a certain
1673 * virtual memory address. Returns allocation structure when found, NULL
1674 * otherwise.
1675 */
amdgpu_cs_find_mapping(struct amdgpu_cs_parser * parser,uint64_t addr,struct amdgpu_bo ** bo,struct amdgpu_bo_va_mapping ** map)1676 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1677 uint64_t addr, struct amdgpu_bo **bo,
1678 struct amdgpu_bo_va_mapping **map)
1679 {
1680 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1681 struct ttm_operation_ctx ctx = { false, false };
1682 struct amdgpu_vm *vm = &fpriv->vm;
1683 struct amdgpu_bo_va_mapping *mapping;
1684 int r;
1685
1686 addr /= AMDGPU_GPU_PAGE_SIZE;
1687
1688 mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1689 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1690 return -EINVAL;
1691
1692 *bo = mapping->bo_va->base.bo;
1693 *map = mapping;
1694
1695 /* Double check that the BO is reserved by this CS */
1696 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1697 return -EINVAL;
1698
1699 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1700 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1701 amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1702 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1703 if (r)
1704 return r;
1705 }
1706
1707 return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1708 }
1709