1*0caae222Sriastradh /* $NetBSD: amdgpu_cs.c,v 1.7 2021/12/19 12:02:39 riastradh Exp $ */
2efa246c0Sriastradh
3efa246c0Sriastradh /*
4efa246c0Sriastradh * Copyright 2008 Jerome Glisse.
5efa246c0Sriastradh * All Rights Reserved.
6efa246c0Sriastradh *
7efa246c0Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a
8efa246c0Sriastradh * copy of this software and associated documentation files (the "Software"),
9efa246c0Sriastradh * to deal in the Software without restriction, including without limitation
10efa246c0Sriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11efa246c0Sriastradh * and/or sell copies of the Software, and to permit persons to whom the
12efa246c0Sriastradh * Software is furnished to do so, subject to the following conditions:
13efa246c0Sriastradh *
14efa246c0Sriastradh * The above copyright notice and this permission notice (including the next
15efa246c0Sriastradh * paragraph) shall be included in all copies or substantial portions of the
16efa246c0Sriastradh * Software.
17efa246c0Sriastradh *
18efa246c0Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19efa246c0Sriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20efa246c0Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21efa246c0Sriastradh * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22efa246c0Sriastradh * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23efa246c0Sriastradh * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24efa246c0Sriastradh * DEALINGS IN THE SOFTWARE.
25efa246c0Sriastradh *
26efa246c0Sriastradh * Authors:
27efa246c0Sriastradh * Jerome Glisse <glisse@freedesktop.org>
28efa246c0Sriastradh */
29efa246c0Sriastradh
3041ec0267Sriastradh #include <sys/cdefs.h>
31*0caae222Sriastradh __KERNEL_RCSID(0, "$NetBSD: amdgpu_cs.c,v 1.7 2021/12/19 12:02:39 riastradh Exp $");
3241ec0267Sriastradh
3341ec0267Sriastradh #include <linux/file.h>
3441ec0267Sriastradh #include <linux/pagemap.h>
3541ec0267Sriastradh #include <linux/sync_file.h>
3641ec0267Sriastradh
37efa246c0Sriastradh #include <drm/amdgpu_drm.h>
3841ec0267Sriastradh #include <drm/drm_syncobj.h>
39efa246c0Sriastradh #include "amdgpu.h"
40efa246c0Sriastradh #include "amdgpu_trace.h"
4141ec0267Sriastradh #include "amdgpu_gmc.h"
4241ec0267Sriastradh #include "amdgpu_gem.h"
4341ec0267Sriastradh #include "amdgpu_ras.h"
44efa246c0Sriastradh
amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser * p,struct drm_amdgpu_cs_chunk_fence * data,uint32_t * offset)45efa246c0Sriastradh static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
4641ec0267Sriastradh struct drm_amdgpu_cs_chunk_fence *data,
4741ec0267Sriastradh uint32_t *offset)
48efa246c0Sriastradh {
49efa246c0Sriastradh struct drm_gem_object *gobj;
5041ec0267Sriastradh struct amdgpu_bo *bo;
5141ec0267Sriastradh unsigned long size;
5241ec0267Sriastradh int r;
53efa246c0Sriastradh
5441ec0267Sriastradh gobj = drm_gem_object_lookup(p->filp, data->handle);
55efa246c0Sriastradh if (gobj == NULL)
56efa246c0Sriastradh return -EINVAL;
57efa246c0Sriastradh
5841ec0267Sriastradh bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
59efa246c0Sriastradh p->uf_entry.priority = 0;
6041ec0267Sriastradh p->uf_entry.tv.bo = &bo->tbo;
6141ec0267Sriastradh /* One for TTM and one for the CS job */
6241ec0267Sriastradh p->uf_entry.tv.num_shared = 2;
63efa246c0Sriastradh
6441ec0267Sriastradh drm_gem_object_put_unlocked(gobj);
6541ec0267Sriastradh
6641ec0267Sriastradh size = amdgpu_bo_size(bo);
6741ec0267Sriastradh if (size != PAGE_SIZE || (data->offset + 8) > size) {
6841ec0267Sriastradh r = -EINVAL;
6941ec0267Sriastradh goto error_unref;
70efa246c0Sriastradh }
71efa246c0Sriastradh
7241ec0267Sriastradh if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
7341ec0267Sriastradh r = -EINVAL;
7441ec0267Sriastradh goto error_unref;
7541ec0267Sriastradh }
7641ec0267Sriastradh
7741ec0267Sriastradh *offset = data->offset;
7841ec0267Sriastradh
7941ec0267Sriastradh return 0;
8041ec0267Sriastradh
8141ec0267Sriastradh error_unref:
8241ec0267Sriastradh amdgpu_bo_unref(&bo);
8341ec0267Sriastradh return r;
8441ec0267Sriastradh }
8541ec0267Sriastradh
amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser * p,struct drm_amdgpu_bo_list_in * data)8641ec0267Sriastradh static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
8741ec0267Sriastradh struct drm_amdgpu_bo_list_in *data)
88efa246c0Sriastradh {
8941ec0267Sriastradh int r;
9041ec0267Sriastradh struct drm_amdgpu_bo_list_entry *info = NULL;
9141ec0267Sriastradh
9241ec0267Sriastradh r = amdgpu_bo_create_list_entry_array(data, &info);
9341ec0267Sriastradh if (r)
9441ec0267Sriastradh return r;
9541ec0267Sriastradh
9641ec0267Sriastradh r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
9741ec0267Sriastradh &p->bo_list);
9841ec0267Sriastradh if (r)
9941ec0267Sriastradh goto error_free;
10041ec0267Sriastradh
10141ec0267Sriastradh kvfree(info);
10241ec0267Sriastradh return 0;
10341ec0267Sriastradh
10441ec0267Sriastradh error_free:
10541ec0267Sriastradh if (info)
10641ec0267Sriastradh kvfree(info);
10741ec0267Sriastradh
10841ec0267Sriastradh return r;
10941ec0267Sriastradh }
11041ec0267Sriastradh
amdgpu_cs_parser_init(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)11141ec0267Sriastradh static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
11241ec0267Sriastradh {
11341ec0267Sriastradh struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
11441ec0267Sriastradh struct amdgpu_vm *vm = &fpriv->vm;
115efa246c0Sriastradh uint64_t *chunk_array_user;
116efa246c0Sriastradh uint64_t *chunk_array;
11741ec0267Sriastradh unsigned size, num_ibs = 0;
11841ec0267Sriastradh uint32_t uf_offset = 0;
119efa246c0Sriastradh int i;
120efa246c0Sriastradh int ret;
121efa246c0Sriastradh
122efa246c0Sriastradh if (cs->in.num_chunks == 0)
123efa246c0Sriastradh return 0;
124efa246c0Sriastradh
125efa246c0Sriastradh chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
126efa246c0Sriastradh if (!chunk_array)
127efa246c0Sriastradh return -ENOMEM;
128efa246c0Sriastradh
129efa246c0Sriastradh p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
130efa246c0Sriastradh if (!p->ctx) {
131efa246c0Sriastradh ret = -EINVAL;
132efa246c0Sriastradh goto free_chunk;
133efa246c0Sriastradh }
134efa246c0Sriastradh
13541ec0267Sriastradh mutex_lock(&p->ctx->lock);
13641ec0267Sriastradh
13741ec0267Sriastradh /* skip guilty context job */
13841ec0267Sriastradh if (atomic_read(&p->ctx->guilty) == 1) {
13941ec0267Sriastradh ret = -ECANCELED;
14041ec0267Sriastradh goto free_chunk;
14141ec0267Sriastradh }
142efa246c0Sriastradh
143efa246c0Sriastradh /* get chunks */
14441ec0267Sriastradh chunk_array_user = u64_to_user_ptr(cs->in.chunks);
145efa246c0Sriastradh if (copy_from_user(chunk_array, chunk_array_user,
146efa246c0Sriastradh sizeof(uint64_t)*cs->in.num_chunks)) {
147efa246c0Sriastradh ret = -EFAULT;
14841ec0267Sriastradh goto free_chunk;
149efa246c0Sriastradh }
150efa246c0Sriastradh
151efa246c0Sriastradh p->nchunks = cs->in.num_chunks;
152efa246c0Sriastradh p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
153efa246c0Sriastradh GFP_KERNEL);
154efa246c0Sriastradh if (!p->chunks) {
155efa246c0Sriastradh ret = -ENOMEM;
15641ec0267Sriastradh goto free_chunk;
157efa246c0Sriastradh }
158efa246c0Sriastradh
159efa246c0Sriastradh for (i = 0; i < p->nchunks; i++) {
160efa246c0Sriastradh struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
161efa246c0Sriastradh struct drm_amdgpu_cs_chunk user_chunk;
162efa246c0Sriastradh uint32_t __user *cdata;
163efa246c0Sriastradh
16441ec0267Sriastradh chunk_ptr = u64_to_user_ptr(chunk_array[i]);
165efa246c0Sriastradh if (copy_from_user(&user_chunk, chunk_ptr,
166efa246c0Sriastradh sizeof(struct drm_amdgpu_cs_chunk))) {
167efa246c0Sriastradh ret = -EFAULT;
168efa246c0Sriastradh i--;
169efa246c0Sriastradh goto free_partial_kdata;
170efa246c0Sriastradh }
171efa246c0Sriastradh p->chunks[i].chunk_id = user_chunk.chunk_id;
172efa246c0Sriastradh p->chunks[i].length_dw = user_chunk.length_dw;
173efa246c0Sriastradh
174efa246c0Sriastradh size = p->chunks[i].length_dw;
17541ec0267Sriastradh cdata = u64_to_user_ptr(user_chunk.chunk_data);
176efa246c0Sriastradh
17741ec0267Sriastradh p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
178efa246c0Sriastradh if (p->chunks[i].kdata == NULL) {
179efa246c0Sriastradh ret = -ENOMEM;
180efa246c0Sriastradh i--;
181efa246c0Sriastradh goto free_partial_kdata;
182efa246c0Sriastradh }
183efa246c0Sriastradh size *= sizeof(uint32_t);
184efa246c0Sriastradh if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
185efa246c0Sriastradh ret = -EFAULT;
186efa246c0Sriastradh goto free_partial_kdata;
187efa246c0Sriastradh }
188efa246c0Sriastradh
189efa246c0Sriastradh switch (p->chunks[i].chunk_id) {
190efa246c0Sriastradh case AMDGPU_CHUNK_ID_IB:
19141ec0267Sriastradh ++num_ibs;
192efa246c0Sriastradh break;
193efa246c0Sriastradh
194efa246c0Sriastradh case AMDGPU_CHUNK_ID_FENCE:
195efa246c0Sriastradh size = sizeof(struct drm_amdgpu_cs_chunk_fence);
196efa246c0Sriastradh if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
197efa246c0Sriastradh ret = -EINVAL;
198efa246c0Sriastradh goto free_partial_kdata;
199efa246c0Sriastradh }
200efa246c0Sriastradh
20141ec0267Sriastradh ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
20241ec0267Sriastradh &uf_offset);
20341ec0267Sriastradh if (ret)
20441ec0267Sriastradh goto free_partial_kdata;
20541ec0267Sriastradh
20641ec0267Sriastradh break;
20741ec0267Sriastradh
20841ec0267Sriastradh case AMDGPU_CHUNK_ID_BO_HANDLES:
20941ec0267Sriastradh size = sizeof(struct drm_amdgpu_bo_list_in);
21041ec0267Sriastradh if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
21141ec0267Sriastradh ret = -EINVAL;
21241ec0267Sriastradh goto free_partial_kdata;
21341ec0267Sriastradh }
21441ec0267Sriastradh
21541ec0267Sriastradh ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
216efa246c0Sriastradh if (ret)
217efa246c0Sriastradh goto free_partial_kdata;
218efa246c0Sriastradh
219efa246c0Sriastradh break;
220efa246c0Sriastradh
221efa246c0Sriastradh case AMDGPU_CHUNK_ID_DEPENDENCIES:
22241ec0267Sriastradh case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
22341ec0267Sriastradh case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
22441ec0267Sriastradh case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
22541ec0267Sriastradh case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
22641ec0267Sriastradh case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
227efa246c0Sriastradh break;
228efa246c0Sriastradh
229efa246c0Sriastradh default:
230efa246c0Sriastradh ret = -EINVAL;
231efa246c0Sriastradh goto free_partial_kdata;
232efa246c0Sriastradh }
233efa246c0Sriastradh }
234efa246c0Sriastradh
23541ec0267Sriastradh ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
23641ec0267Sriastradh if (ret)
23741ec0267Sriastradh goto free_all_kdata;
238efa246c0Sriastradh
23941ec0267Sriastradh if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
24041ec0267Sriastradh ret = -ECANCELED;
241efa246c0Sriastradh goto free_all_kdata;
242efa246c0Sriastradh }
243efa246c0Sriastradh
24441ec0267Sriastradh if (p->uf_entry.tv.bo)
24541ec0267Sriastradh p->job->uf_addr = uf_offset;
246efa246c0Sriastradh kfree(chunk_array);
24741ec0267Sriastradh
24841ec0267Sriastradh /* Use this opportunity to fill in task info for the vm */
24941ec0267Sriastradh amdgpu_vm_set_task_info(vm);
25041ec0267Sriastradh
251efa246c0Sriastradh return 0;
252efa246c0Sriastradh
253efa246c0Sriastradh free_all_kdata:
254efa246c0Sriastradh i = p->nchunks - 1;
255efa246c0Sriastradh free_partial_kdata:
256efa246c0Sriastradh for (; i >= 0; i--)
25741ec0267Sriastradh kvfree(p->chunks[i].kdata);
258efa246c0Sriastradh kfree(p->chunks);
25941ec0267Sriastradh p->chunks = NULL;
26041ec0267Sriastradh p->nchunks = 0;
261efa246c0Sriastradh free_chunk:
262efa246c0Sriastradh kfree(chunk_array);
263efa246c0Sriastradh
264efa246c0Sriastradh return ret;
265efa246c0Sriastradh }
266efa246c0Sriastradh
26741ec0267Sriastradh /* Convert microseconds to bytes. */
us_to_bytes(struct amdgpu_device * adev,s64 us)26841ec0267Sriastradh static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
269efa246c0Sriastradh {
27041ec0267Sriastradh if (us <= 0 || !adev->mm_stats.log2_max_MBps)
27141ec0267Sriastradh return 0;
272efa246c0Sriastradh
27341ec0267Sriastradh /* Since accum_us is incremented by a million per second, just
27441ec0267Sriastradh * multiply it by the number of MB/s to get the number of bytes.
275efa246c0Sriastradh */
27641ec0267Sriastradh return us << adev->mm_stats.log2_max_MBps;
277efa246c0Sriastradh }
278efa246c0Sriastradh
bytes_to_us(struct amdgpu_device * adev,u64 bytes)27941ec0267Sriastradh static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
280efa246c0Sriastradh {
28141ec0267Sriastradh if (!adev->mm_stats.log2_max_MBps)
28241ec0267Sriastradh return 0;
28341ec0267Sriastradh
28441ec0267Sriastradh return bytes >> adev->mm_stats.log2_max_MBps;
28541ec0267Sriastradh }
28641ec0267Sriastradh
28741ec0267Sriastradh /* Returns how many bytes TTM can move right now. If no bytes can be moved,
28841ec0267Sriastradh * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
28941ec0267Sriastradh * which means it can go over the threshold once. If that happens, the driver
29041ec0267Sriastradh * will be in debt and no other buffer migrations can be done until that debt
29141ec0267Sriastradh * is repaid.
29241ec0267Sriastradh *
29341ec0267Sriastradh * This approach allows moving a buffer of any size (it's important to allow
29441ec0267Sriastradh * that).
29541ec0267Sriastradh *
29641ec0267Sriastradh * The currency is simply time in microseconds and it increases as the clock
29741ec0267Sriastradh * ticks. The accumulated microseconds (us) are converted to bytes and
29841ec0267Sriastradh * returned.
29941ec0267Sriastradh */
amdgpu_cs_get_threshold_for_moves(struct amdgpu_device * adev,u64 * max_bytes,u64 * max_vis_bytes)30041ec0267Sriastradh static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
30141ec0267Sriastradh u64 *max_bytes,
30241ec0267Sriastradh u64 *max_vis_bytes)
30341ec0267Sriastradh {
30441ec0267Sriastradh s64 time_us, increment_us;
30541ec0267Sriastradh u64 free_vram, total_vram, used_vram;
30641ec0267Sriastradh
30741ec0267Sriastradh /* Allow a maximum of 200 accumulated ms. This is basically per-IB
30841ec0267Sriastradh * throttling.
30941ec0267Sriastradh *
31041ec0267Sriastradh * It means that in order to get full max MBps, at least 5 IBs per
31141ec0267Sriastradh * second must be submitted and not more than 200ms apart from each
31241ec0267Sriastradh * other.
31341ec0267Sriastradh */
31441ec0267Sriastradh const s64 us_upper_bound = 200000;
31541ec0267Sriastradh
31641ec0267Sriastradh if (!adev->mm_stats.log2_max_MBps) {
31741ec0267Sriastradh *max_bytes = 0;
31841ec0267Sriastradh *max_vis_bytes = 0;
31941ec0267Sriastradh return;
32041ec0267Sriastradh }
32141ec0267Sriastradh
32241ec0267Sriastradh total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
32341ec0267Sriastradh used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
32441ec0267Sriastradh free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
32541ec0267Sriastradh
32641ec0267Sriastradh spin_lock(&adev->mm_stats.lock);
32741ec0267Sriastradh
32841ec0267Sriastradh /* Increase the amount of accumulated us. */
32941ec0267Sriastradh time_us = ktime_to_us(ktime_get());
33041ec0267Sriastradh increment_us = time_us - adev->mm_stats.last_update_us;
33141ec0267Sriastradh adev->mm_stats.last_update_us = time_us;
33241ec0267Sriastradh adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
33341ec0267Sriastradh us_upper_bound);
33441ec0267Sriastradh
33541ec0267Sriastradh /* This prevents the short period of low performance when the VRAM
33641ec0267Sriastradh * usage is low and the driver is in debt or doesn't have enough
33741ec0267Sriastradh * accumulated us to fill VRAM quickly.
33841ec0267Sriastradh *
33941ec0267Sriastradh * The situation can occur in these cases:
34041ec0267Sriastradh * - a lot of VRAM is freed by userspace
34141ec0267Sriastradh * - the presence of a big buffer causes a lot of evictions
34241ec0267Sriastradh * (solution: split buffers into smaller ones)
34341ec0267Sriastradh *
34441ec0267Sriastradh * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
34541ec0267Sriastradh * accum_us to a positive number.
34641ec0267Sriastradh */
34741ec0267Sriastradh if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
34841ec0267Sriastradh s64 min_us;
34941ec0267Sriastradh
35041ec0267Sriastradh /* Be more aggresive on dGPUs. Try to fill a portion of free
35141ec0267Sriastradh * VRAM now.
35241ec0267Sriastradh */
35341ec0267Sriastradh if (!(adev->flags & AMD_IS_APU))
35441ec0267Sriastradh min_us = bytes_to_us(adev, free_vram / 4);
35541ec0267Sriastradh else
35641ec0267Sriastradh min_us = 0; /* Reset accum_us on APUs. */
35741ec0267Sriastradh
35841ec0267Sriastradh adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
35941ec0267Sriastradh }
36041ec0267Sriastradh
36141ec0267Sriastradh /* This is set to 0 if the driver is in debt to disallow (optional)
36241ec0267Sriastradh * buffer moves.
36341ec0267Sriastradh */
36441ec0267Sriastradh *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
36541ec0267Sriastradh
36641ec0267Sriastradh /* Do the same for visible VRAM if half of it is free */
36741ec0267Sriastradh if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
36841ec0267Sriastradh u64 total_vis_vram = adev->gmc.visible_vram_size;
36941ec0267Sriastradh u64 used_vis_vram =
37041ec0267Sriastradh amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
37141ec0267Sriastradh
37241ec0267Sriastradh if (used_vis_vram < total_vis_vram) {
37341ec0267Sriastradh u64 free_vis_vram = total_vis_vram - used_vis_vram;
37441ec0267Sriastradh adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
37541ec0267Sriastradh increment_us, us_upper_bound);
37641ec0267Sriastradh
37741ec0267Sriastradh if (free_vis_vram >= total_vis_vram / 2)
37841ec0267Sriastradh adev->mm_stats.accum_us_vis =
37941ec0267Sriastradh max(bytes_to_us(adev, free_vis_vram / 2),
38041ec0267Sriastradh adev->mm_stats.accum_us_vis);
38141ec0267Sriastradh }
38241ec0267Sriastradh
38341ec0267Sriastradh *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
38441ec0267Sriastradh } else {
38541ec0267Sriastradh *max_vis_bytes = 0;
38641ec0267Sriastradh }
38741ec0267Sriastradh
38841ec0267Sriastradh spin_unlock(&adev->mm_stats.lock);
38941ec0267Sriastradh }
39041ec0267Sriastradh
39141ec0267Sriastradh /* Report how many bytes have really been moved for the last command
39241ec0267Sriastradh * submission. This can result in a debt that can stop buffer migrations
39341ec0267Sriastradh * temporarily.
39441ec0267Sriastradh */
amdgpu_cs_report_moved_bytes(struct amdgpu_device * adev,u64 num_bytes,u64 num_vis_bytes)39541ec0267Sriastradh void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
39641ec0267Sriastradh u64 num_vis_bytes)
39741ec0267Sriastradh {
39841ec0267Sriastradh spin_lock(&adev->mm_stats.lock);
39941ec0267Sriastradh adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
40041ec0267Sriastradh adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
40141ec0267Sriastradh spin_unlock(&adev->mm_stats.lock);
40241ec0267Sriastradh }
40341ec0267Sriastradh
amdgpu_cs_bo_validate(struct amdgpu_cs_parser * p,struct amdgpu_bo * bo)40441ec0267Sriastradh static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
40541ec0267Sriastradh struct amdgpu_bo *bo)
40641ec0267Sriastradh {
40741ec0267Sriastradh struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
40841ec0267Sriastradh struct ttm_operation_ctx ctx = {
40941ec0267Sriastradh .interruptible = true,
41041ec0267Sriastradh .no_wait_gpu = false,
41141ec0267Sriastradh .resv = bo->tbo.base.resv,
41241ec0267Sriastradh .flags = 0
41341ec0267Sriastradh };
41441ec0267Sriastradh uint32_t domain;
415efa246c0Sriastradh int r;
416efa246c0Sriastradh
41741ec0267Sriastradh if (bo->pin_count)
41841ec0267Sriastradh return 0;
419efa246c0Sriastradh
42041ec0267Sriastradh /* Don't move this buffer if we have depleted our allowance
42141ec0267Sriastradh * to move it. Don't move anything if the threshold is zero.
422efa246c0Sriastradh */
42341ec0267Sriastradh if (p->bytes_moved < p->bytes_moved_threshold) {
42441ec0267Sriastradh if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
42541ec0267Sriastradh (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
42641ec0267Sriastradh /* And don't move a CPU_ACCESS_REQUIRED BO to limited
42741ec0267Sriastradh * visible VRAM if we've depleted our allowance to do
42841ec0267Sriastradh * that.
42941ec0267Sriastradh */
43041ec0267Sriastradh if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
43141ec0267Sriastradh domain = bo->preferred_domains;
43241ec0267Sriastradh else
43341ec0267Sriastradh domain = bo->allowed_domains;
43441ec0267Sriastradh } else {
43541ec0267Sriastradh domain = bo->preferred_domains;
43641ec0267Sriastradh }
43741ec0267Sriastradh } else {
43841ec0267Sriastradh domain = bo->allowed_domains;
439efa246c0Sriastradh }
440efa246c0Sriastradh
441efa246c0Sriastradh retry:
44241ec0267Sriastradh amdgpu_bo_placement_from_domain(bo, domain);
44341ec0267Sriastradh r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
444efa246c0Sriastradh
44541ec0267Sriastradh p->bytes_moved += ctx.bytes_moved;
44641ec0267Sriastradh if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
44741ec0267Sriastradh amdgpu_bo_in_cpu_visible_vram(bo))
44841ec0267Sriastradh p->bytes_moved_vis += ctx.bytes_moved;
44941ec0267Sriastradh
45041ec0267Sriastradh if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
45141ec0267Sriastradh domain = bo->allowed_domains;
452efa246c0Sriastradh goto retry;
453efa246c0Sriastradh }
45441ec0267Sriastradh
455efa246c0Sriastradh return r;
456efa246c0Sriastradh }
45741ec0267Sriastradh
amdgpu_cs_validate(void * param,struct amdgpu_bo * bo)45841ec0267Sriastradh static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
45941ec0267Sriastradh {
46041ec0267Sriastradh struct amdgpu_cs_parser *p = param;
46141ec0267Sriastradh int r;
46241ec0267Sriastradh
46341ec0267Sriastradh r = amdgpu_cs_bo_validate(p, bo);
46441ec0267Sriastradh if (r)
46541ec0267Sriastradh return r;
46641ec0267Sriastradh
46741ec0267Sriastradh if (bo->shadow)
46841ec0267Sriastradh r = amdgpu_cs_bo_validate(p, bo->shadow);
46941ec0267Sriastradh
47041ec0267Sriastradh return r;
471efa246c0Sriastradh }
47241ec0267Sriastradh
amdgpu_cs_list_validate(struct amdgpu_cs_parser * p,struct list_head * validated)47341ec0267Sriastradh static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
47441ec0267Sriastradh struct list_head *validated)
47541ec0267Sriastradh {
47641ec0267Sriastradh struct ttm_operation_ctx ctx = { true, false };
47741ec0267Sriastradh struct amdgpu_bo_list_entry *lobj;
47841ec0267Sriastradh int r;
47941ec0267Sriastradh
48041ec0267Sriastradh list_for_each_entry(lobj, validated, tv.head) {
48141ec0267Sriastradh struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
482*0caae222Sriastradh #ifdef __NetBSD__
483*0caae222Sriastradh struct vmspace *usermm;
484*0caae222Sriastradh #else
48541ec0267Sriastradh struct mm_struct *usermm;
486*0caae222Sriastradh #endif
48741ec0267Sriastradh
48841ec0267Sriastradh usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
489*0caae222Sriastradh #ifdef __NetBSD__
490*0caae222Sriastradh if (usermm && usermm != curproc->p_vmspace)
491e4a580baSriastradh #else
49241ec0267Sriastradh if (usermm && usermm != current->mm)
493e4a580baSriastradh #endif
494*0caae222Sriastradh return -EPERM;
49541ec0267Sriastradh
49641ec0267Sriastradh if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
49741ec0267Sriastradh lobj->user_invalidated && lobj->user_pages) {
49841ec0267Sriastradh amdgpu_bo_placement_from_domain(bo,
49941ec0267Sriastradh AMDGPU_GEM_DOMAIN_CPU);
50041ec0267Sriastradh r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
50141ec0267Sriastradh if (r)
50241ec0267Sriastradh return r;
50341ec0267Sriastradh
50441ec0267Sriastradh amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
50541ec0267Sriastradh lobj->user_pages);
50641ec0267Sriastradh }
50741ec0267Sriastradh
50841ec0267Sriastradh r = amdgpu_cs_validate(p, bo);
50941ec0267Sriastradh if (r)
51041ec0267Sriastradh return r;
51141ec0267Sriastradh
51241ec0267Sriastradh kvfree(lobj->user_pages);
51341ec0267Sriastradh lobj->user_pages = NULL;
514efa246c0Sriastradh }
515efa246c0Sriastradh return 0;
516efa246c0Sriastradh }
517efa246c0Sriastradh
amdgpu_cs_parser_bos(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)51841ec0267Sriastradh static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
51941ec0267Sriastradh union drm_amdgpu_cs *cs)
520efa246c0Sriastradh {
521efa246c0Sriastradh struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
52241ec0267Sriastradh struct amdgpu_vm *vm = &fpriv->vm;
52341ec0267Sriastradh struct amdgpu_bo_list_entry *e;
524efa246c0Sriastradh struct list_head duplicates;
52541ec0267Sriastradh struct amdgpu_bo *gds;
52641ec0267Sriastradh struct amdgpu_bo *gws;
52741ec0267Sriastradh struct amdgpu_bo *oa;
52841ec0267Sriastradh int r;
529efa246c0Sriastradh
53041ec0267Sriastradh INIT_LIST_HEAD(&p->validated);
531efa246c0Sriastradh
53241ec0267Sriastradh /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
53341ec0267Sriastradh if (cs->in.bo_list_handle) {
53441ec0267Sriastradh if (p->bo_list)
53541ec0267Sriastradh return -EINVAL;
53641ec0267Sriastradh
53741ec0267Sriastradh r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
53841ec0267Sriastradh &p->bo_list);
53941ec0267Sriastradh if (r)
54041ec0267Sriastradh return r;
54141ec0267Sriastradh } else if (!p->bo_list) {
54241ec0267Sriastradh /* Create a empty bo_list when no handle is provided */
54341ec0267Sriastradh r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
54441ec0267Sriastradh &p->bo_list);
54541ec0267Sriastradh if (r)
54641ec0267Sriastradh return r;
547efa246c0Sriastradh }
548efa246c0Sriastradh
54941ec0267Sriastradh /* One for TTM and one for the CS job */
55041ec0267Sriastradh amdgpu_bo_list_for_each_entry(e, p->bo_list)
55141ec0267Sriastradh e->tv.num_shared = 2;
552efa246c0Sriastradh
55341ec0267Sriastradh amdgpu_bo_list_get_list(p->bo_list, &p->validated);
554efa246c0Sriastradh
555efa246c0Sriastradh INIT_LIST_HEAD(&duplicates);
55641ec0267Sriastradh amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
557efa246c0Sriastradh
55841ec0267Sriastradh if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
55941ec0267Sriastradh list_add(&p->uf_entry.tv.head, &p->validated);
56041ec0267Sriastradh
56141ec0267Sriastradh /* Get userptr backing pages. If pages are updated after registered
56241ec0267Sriastradh * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
56341ec0267Sriastradh * amdgpu_ttm_backend_bind() to flush and invalidate new pages
56441ec0267Sriastradh */
56541ec0267Sriastradh amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
56641ec0267Sriastradh struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
56741ec0267Sriastradh bool userpage_invalidated = false;
56841ec0267Sriastradh int i;
56941ec0267Sriastradh
57041ec0267Sriastradh e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
57141ec0267Sriastradh sizeof(struct page *),
57241ec0267Sriastradh GFP_KERNEL | __GFP_ZERO);
57341ec0267Sriastradh if (!e->user_pages) {
57441ec0267Sriastradh DRM_ERROR("calloc failure\n");
57541ec0267Sriastradh return -ENOMEM;
57641ec0267Sriastradh }
57741ec0267Sriastradh
57841ec0267Sriastradh r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
57941ec0267Sriastradh if (r) {
58041ec0267Sriastradh kvfree(e->user_pages);
58141ec0267Sriastradh e->user_pages = NULL;
58241ec0267Sriastradh return r;
58341ec0267Sriastradh }
58441ec0267Sriastradh
58541ec0267Sriastradh for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
58641ec0267Sriastradh if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
58741ec0267Sriastradh userpage_invalidated = true;
58841ec0267Sriastradh break;
58941ec0267Sriastradh }
59041ec0267Sriastradh }
59141ec0267Sriastradh e->user_invalidated = userpage_invalidated;
59241ec0267Sriastradh }
59341ec0267Sriastradh
59441ec0267Sriastradh r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
59541ec0267Sriastradh &duplicates);
59641ec0267Sriastradh if (unlikely(r != 0)) {
59741ec0267Sriastradh if (r != -ERESTARTSYS)
59841ec0267Sriastradh DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
59941ec0267Sriastradh goto out;
60041ec0267Sriastradh }
60141ec0267Sriastradh
60241ec0267Sriastradh amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
60341ec0267Sriastradh &p->bytes_moved_vis_threshold);
60441ec0267Sriastradh p->bytes_moved = 0;
60541ec0267Sriastradh p->bytes_moved_vis = 0;
60641ec0267Sriastradh
60741ec0267Sriastradh r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
60841ec0267Sriastradh amdgpu_cs_validate, p);
60941ec0267Sriastradh if (r) {
61041ec0267Sriastradh DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
61141ec0267Sriastradh goto error_validate;
61241ec0267Sriastradh }
61341ec0267Sriastradh
61441ec0267Sriastradh r = amdgpu_cs_list_validate(p, &duplicates);
615efa246c0Sriastradh if (r)
616efa246c0Sriastradh goto error_validate;
617efa246c0Sriastradh
61841ec0267Sriastradh r = amdgpu_cs_list_validate(p, &p->validated);
61941ec0267Sriastradh if (r)
62041ec0267Sriastradh goto error_validate;
62141ec0267Sriastradh
62241ec0267Sriastradh amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
62341ec0267Sriastradh p->bytes_moved_vis);
62441ec0267Sriastradh
62541ec0267Sriastradh gds = p->bo_list->gds_obj;
62641ec0267Sriastradh gws = p->bo_list->gws_obj;
62741ec0267Sriastradh oa = p->bo_list->oa_obj;
62841ec0267Sriastradh
62941ec0267Sriastradh amdgpu_bo_list_for_each_entry(e, p->bo_list) {
63041ec0267Sriastradh struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
63141ec0267Sriastradh
63241ec0267Sriastradh /* Make sure we use the exclusive slot for shared BOs */
63341ec0267Sriastradh if (bo->prime_shared_count)
63441ec0267Sriastradh e->tv.num_shared = 0;
63541ec0267Sriastradh e->bo_va = amdgpu_vm_bo_find(vm, bo);
63641ec0267Sriastradh }
63741ec0267Sriastradh
63841ec0267Sriastradh if (gds) {
63941ec0267Sriastradh p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
64041ec0267Sriastradh p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
64141ec0267Sriastradh }
64241ec0267Sriastradh if (gws) {
64341ec0267Sriastradh p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
64441ec0267Sriastradh p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
64541ec0267Sriastradh }
64641ec0267Sriastradh if (oa) {
64741ec0267Sriastradh p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
64841ec0267Sriastradh p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
64941ec0267Sriastradh }
65041ec0267Sriastradh
65141ec0267Sriastradh if (!r && p->uf_entry.tv.bo) {
65241ec0267Sriastradh struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
65341ec0267Sriastradh
65441ec0267Sriastradh r = amdgpu_ttm_alloc_gart(&uf->tbo);
65541ec0267Sriastradh p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
65641ec0267Sriastradh }
657efa246c0Sriastradh
658efa246c0Sriastradh error_validate:
659efa246c0Sriastradh if (r)
660efa246c0Sriastradh ttm_eu_backoff_reservation(&p->ticket, &p->validated);
66141ec0267Sriastradh out:
662efa246c0Sriastradh return r;
663efa246c0Sriastradh }
664efa246c0Sriastradh
amdgpu_cs_sync_rings(struct amdgpu_cs_parser * p)665efa246c0Sriastradh static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
666efa246c0Sriastradh {
667efa246c0Sriastradh struct amdgpu_bo_list_entry *e;
668efa246c0Sriastradh int r;
669efa246c0Sriastradh
670efa246c0Sriastradh list_for_each_entry(e, &p->validated, tv.head) {
67141ec0267Sriastradh struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
67241ec0267Sriastradh struct dma_resv *resv = bo->tbo.base.resv;
67341ec0267Sriastradh
67441ec0267Sriastradh r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
67541ec0267Sriastradh amdgpu_bo_explicit_sync(bo));
676efa246c0Sriastradh
677efa246c0Sriastradh if (r)
678efa246c0Sriastradh return r;
679efa246c0Sriastradh }
680efa246c0Sriastradh return 0;
681efa246c0Sriastradh }
682efa246c0Sriastradh
683efa246c0Sriastradh /**
684efa246c0Sriastradh * cs_parser_fini() - clean parser states
685efa246c0Sriastradh * @parser: parser structure holding parsing context.
686efa246c0Sriastradh * @error: error number
687efa246c0Sriastradh *
688efa246c0Sriastradh * If error is set than unvalidate buffer, otherwise just free memory
689efa246c0Sriastradh * used by parsing context.
690efa246c0Sriastradh **/
amdgpu_cs_parser_fini(struct amdgpu_cs_parser * parser,int error,bool backoff)69141ec0267Sriastradh static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
69241ec0267Sriastradh bool backoff)
693efa246c0Sriastradh {
694efa246c0Sriastradh unsigned i;
695efa246c0Sriastradh
69641ec0267Sriastradh if (error && backoff)
697efa246c0Sriastradh ttm_eu_backoff_reservation(&parser->ticket,
698efa246c0Sriastradh &parser->validated);
699efa246c0Sriastradh
70041ec0267Sriastradh for (i = 0; i < parser->num_post_deps; i++) {
70141ec0267Sriastradh drm_syncobj_put(parser->post_deps[i].syncobj);
70241ec0267Sriastradh kfree(parser->post_deps[i].chain);
70341ec0267Sriastradh }
70441ec0267Sriastradh kfree(parser->post_deps);
70541ec0267Sriastradh
70641ec0267Sriastradh dma_fence_put(parser->fence);
70741ec0267Sriastradh
70841ec0267Sriastradh if (parser->ctx) {
70941ec0267Sriastradh mutex_unlock(&parser->ctx->lock);
710efa246c0Sriastradh amdgpu_ctx_put(parser->ctx);
71141ec0267Sriastradh }
712efa246c0Sriastradh if (parser->bo_list)
713efa246c0Sriastradh amdgpu_bo_list_put(parser->bo_list);
714efa246c0Sriastradh
715efa246c0Sriastradh for (i = 0; i < parser->nchunks; i++)
71641ec0267Sriastradh kvfree(parser->chunks[i].kdata);
717efa246c0Sriastradh kfree(parser->chunks);
71841ec0267Sriastradh if (parser->job)
71941ec0267Sriastradh amdgpu_job_free(parser->job);
72041ec0267Sriastradh if (parser->uf_entry.tv.bo) {
72141ec0267Sriastradh struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
72241ec0267Sriastradh
72341ec0267Sriastradh amdgpu_bo_unref(&uf);
72441ec0267Sriastradh }
725efa246c0Sriastradh }
726efa246c0Sriastradh
amdgpu_cs_vm_handling(struct amdgpu_cs_parser * p)72741ec0267Sriastradh static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
728efa246c0Sriastradh {
72941ec0267Sriastradh struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
73041ec0267Sriastradh struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
731efa246c0Sriastradh struct amdgpu_device *adev = p->adev;
73241ec0267Sriastradh struct amdgpu_vm *vm = &fpriv->vm;
73341ec0267Sriastradh struct amdgpu_bo_list_entry *e;
734efa246c0Sriastradh struct amdgpu_bo_va *bo_va;
735efa246c0Sriastradh struct amdgpu_bo *bo;
736efa246c0Sriastradh int r;
737efa246c0Sriastradh
73841ec0267Sriastradh /* Only for UVD/VCE VM emulation */
73941ec0267Sriastradh if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
74041ec0267Sriastradh unsigned i, j;
741efa246c0Sriastradh
74241ec0267Sriastradh for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
74341ec0267Sriastradh struct drm_amdgpu_cs_chunk_ib *chunk_ib;
74441ec0267Sriastradh struct amdgpu_bo_va_mapping *m;
74541ec0267Sriastradh struct amdgpu_bo *aobj = NULL;
74641ec0267Sriastradh struct amdgpu_cs_chunk *chunk;
74741ec0267Sriastradh uint64_t offset, va_start;
74841ec0267Sriastradh struct amdgpu_ib *ib;
74941ec0267Sriastradh uint8_t *kptr;
75041ec0267Sriastradh
75141ec0267Sriastradh chunk = &p->chunks[i];
75241ec0267Sriastradh ib = &p->job->ibs[j];
75341ec0267Sriastradh chunk_ib = chunk->kdata;
754efa246c0Sriastradh
755efa246c0Sriastradh if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
756efa246c0Sriastradh continue;
757efa246c0Sriastradh
75841ec0267Sriastradh va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
75941ec0267Sriastradh r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
76041ec0267Sriastradh if (r) {
761efa246c0Sriastradh DRM_ERROR("IB va_start is invalid\n");
76241ec0267Sriastradh return r;
763efa246c0Sriastradh }
764efa246c0Sriastradh
76541ec0267Sriastradh if ((va_start + chunk_ib->ib_bytes) >
76641ec0267Sriastradh (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
767efa246c0Sriastradh DRM_ERROR("IB va_start+ib_bytes is invalid\n");
768efa246c0Sriastradh return -EINVAL;
769efa246c0Sriastradh }
770efa246c0Sriastradh
771efa246c0Sriastradh /* the IB should be reserved at this point */
772efa246c0Sriastradh r = amdgpu_bo_kmap(aobj, (void **)&kptr);
773efa246c0Sriastradh if (r) {
774efa246c0Sriastradh return r;
775efa246c0Sriastradh }
776efa246c0Sriastradh
77741ec0267Sriastradh offset = m->start * AMDGPU_GPU_PAGE_SIZE;
77841ec0267Sriastradh kptr += va_start - offset;
779efa246c0Sriastradh
78041ec0267Sriastradh if (ring->funcs->parse_cs) {
78141ec0267Sriastradh memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
78241ec0267Sriastradh amdgpu_bo_kunmap(aobj);
78341ec0267Sriastradh
78441ec0267Sriastradh r = amdgpu_ring_parse_cs(ring, p, j);
78541ec0267Sriastradh if (r)
78641ec0267Sriastradh return r;
78741ec0267Sriastradh } else {
78841ec0267Sriastradh ib->ptr = (uint32_t *)kptr;
78941ec0267Sriastradh r = amdgpu_ring_patch_cs_in_place(ring, p, j);
79041ec0267Sriastradh amdgpu_bo_kunmap(aobj);
79141ec0267Sriastradh if (r)
792efa246c0Sriastradh return r;
793efa246c0Sriastradh }
794efa246c0Sriastradh
79541ec0267Sriastradh j++;
79641ec0267Sriastradh }
79741ec0267Sriastradh }
79841ec0267Sriastradh
79941ec0267Sriastradh if (!p->job->vm)
80041ec0267Sriastradh return amdgpu_cs_sync_rings(p);
80141ec0267Sriastradh
80241ec0267Sriastradh
80341ec0267Sriastradh r = amdgpu_vm_clear_freed(adev, vm, NULL);
80441ec0267Sriastradh if (r)
80541ec0267Sriastradh return r;
80641ec0267Sriastradh
80741ec0267Sriastradh r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
80841ec0267Sriastradh if (r)
80941ec0267Sriastradh return r;
81041ec0267Sriastradh
81141ec0267Sriastradh r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
81241ec0267Sriastradh if (r)
81341ec0267Sriastradh return r;
81441ec0267Sriastradh
81541ec0267Sriastradh if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
81641ec0267Sriastradh bo_va = fpriv->csa_va;
81741ec0267Sriastradh BUG_ON(!bo_va);
81841ec0267Sriastradh r = amdgpu_vm_bo_update(adev, bo_va, false);
81941ec0267Sriastradh if (r)
82041ec0267Sriastradh return r;
82141ec0267Sriastradh
82241ec0267Sriastradh r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
82341ec0267Sriastradh if (r)
82441ec0267Sriastradh return r;
82541ec0267Sriastradh }
82641ec0267Sriastradh
82741ec0267Sriastradh amdgpu_bo_list_for_each_entry(e, p->bo_list) {
82841ec0267Sriastradh /* ignore duplicates */
82941ec0267Sriastradh bo = ttm_to_amdgpu_bo(e->tv.bo);
83041ec0267Sriastradh if (!bo)
83141ec0267Sriastradh continue;
83241ec0267Sriastradh
83341ec0267Sriastradh bo_va = e->bo_va;
83441ec0267Sriastradh if (bo_va == NULL)
83541ec0267Sriastradh continue;
83641ec0267Sriastradh
83741ec0267Sriastradh r = amdgpu_vm_bo_update(adev, bo_va, false);
83841ec0267Sriastradh if (r)
83941ec0267Sriastradh return r;
84041ec0267Sriastradh
84141ec0267Sriastradh r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
84241ec0267Sriastradh if (r)
84341ec0267Sriastradh return r;
84441ec0267Sriastradh }
84541ec0267Sriastradh
84641ec0267Sriastradh r = amdgpu_vm_handle_moved(adev, vm);
84741ec0267Sriastradh if (r)
84841ec0267Sriastradh return r;
84941ec0267Sriastradh
85041ec0267Sriastradh r = amdgpu_vm_update_pdes(adev, vm, false);
85141ec0267Sriastradh if (r)
85241ec0267Sriastradh return r;
85341ec0267Sriastradh
85441ec0267Sriastradh r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
85541ec0267Sriastradh if (r)
85641ec0267Sriastradh return r;
85741ec0267Sriastradh
85841ec0267Sriastradh p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
85941ec0267Sriastradh
86041ec0267Sriastradh if (amdgpu_vm_debug) {
86141ec0267Sriastradh /* Invalidate all BOs to test for userspace bugs */
86241ec0267Sriastradh amdgpu_bo_list_for_each_entry(e, p->bo_list) {
86341ec0267Sriastradh struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
86441ec0267Sriastradh
86541ec0267Sriastradh /* ignore duplicates */
86641ec0267Sriastradh if (!bo)
86741ec0267Sriastradh continue;
86841ec0267Sriastradh
86941ec0267Sriastradh amdgpu_vm_bo_invalidate(adev, bo, false);
87041ec0267Sriastradh }
87141ec0267Sriastradh }
87241ec0267Sriastradh
87341ec0267Sriastradh return amdgpu_cs_sync_rings(p);
87441ec0267Sriastradh }
87541ec0267Sriastradh
amdgpu_cs_ib_fill(struct amdgpu_device * adev,struct amdgpu_cs_parser * parser)87641ec0267Sriastradh static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
87741ec0267Sriastradh struct amdgpu_cs_parser *parser)
87841ec0267Sriastradh {
87941ec0267Sriastradh struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
88041ec0267Sriastradh struct amdgpu_vm *vm = &fpriv->vm;
88141ec0267Sriastradh int r, ce_preempt = 0, de_preempt = 0;
88241ec0267Sriastradh struct amdgpu_ring *ring;
88341ec0267Sriastradh int i, j;
88441ec0267Sriastradh
88541ec0267Sriastradh for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
88641ec0267Sriastradh struct amdgpu_cs_chunk *chunk;
88741ec0267Sriastradh struct amdgpu_ib *ib;
88841ec0267Sriastradh struct drm_amdgpu_cs_chunk_ib *chunk_ib;
88941ec0267Sriastradh struct drm_sched_entity *entity;
89041ec0267Sriastradh
89141ec0267Sriastradh chunk = &parser->chunks[i];
89241ec0267Sriastradh ib = &parser->job->ibs[j];
89341ec0267Sriastradh chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
89441ec0267Sriastradh
89541ec0267Sriastradh if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
89641ec0267Sriastradh continue;
89741ec0267Sriastradh
89841ec0267Sriastradh if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
89941ec0267Sriastradh (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
90041ec0267Sriastradh if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
90141ec0267Sriastradh if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
90241ec0267Sriastradh ce_preempt++;
90341ec0267Sriastradh else
90441ec0267Sriastradh de_preempt++;
90541ec0267Sriastradh }
90641ec0267Sriastradh
90741ec0267Sriastradh /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
90841ec0267Sriastradh if (ce_preempt > 1 || de_preempt > 1)
90941ec0267Sriastradh return -EINVAL;
91041ec0267Sriastradh }
91141ec0267Sriastradh
91241ec0267Sriastradh r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
91341ec0267Sriastradh chunk_ib->ip_instance, chunk_ib->ring,
91441ec0267Sriastradh &entity);
91541ec0267Sriastradh if (r)
91641ec0267Sriastradh return r;
91741ec0267Sriastradh
91841ec0267Sriastradh if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
91941ec0267Sriastradh parser->job->preamble_status |=
92041ec0267Sriastradh AMDGPU_PREAMBLE_IB_PRESENT;
92141ec0267Sriastradh
92241ec0267Sriastradh if (parser->entity && parser->entity != entity)
92341ec0267Sriastradh return -EINVAL;
92441ec0267Sriastradh
92541ec0267Sriastradh /* Return if there is no run queue associated with this entity.
92641ec0267Sriastradh * Possibly because of disabled HW IP*/
92741ec0267Sriastradh if (entity->rq == NULL)
92841ec0267Sriastradh return -EINVAL;
92941ec0267Sriastradh
93041ec0267Sriastradh parser->entity = entity;
93141ec0267Sriastradh
93241ec0267Sriastradh ring = to_amdgpu_ring(entity->rq->sched);
93341ec0267Sriastradh r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
93441ec0267Sriastradh chunk_ib->ib_bytes : 0, ib);
935efa246c0Sriastradh if (r) {
936efa246c0Sriastradh DRM_ERROR("Failed to get ib !\n");
937efa246c0Sriastradh return r;
938efa246c0Sriastradh }
939efa246c0Sriastradh
940efa246c0Sriastradh ib->gpu_addr = chunk_ib->va_start;
941efa246c0Sriastradh ib->length_dw = chunk_ib->ib_bytes / 4;
942efa246c0Sriastradh ib->flags = chunk_ib->flags;
94341ec0267Sriastradh
944efa246c0Sriastradh j++;
945efa246c0Sriastradh }
946efa246c0Sriastradh
94741ec0267Sriastradh /* MM engine doesn't support user fences */
94841ec0267Sriastradh ring = to_amdgpu_ring(parser->entity->rq->sched);
94941ec0267Sriastradh if (parser->job->uf_addr && ring->funcs->no_user_fence)
950efa246c0Sriastradh return -EINVAL;
951efa246c0Sriastradh
95241ec0267Sriastradh return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
95341ec0267Sriastradh }
95441ec0267Sriastradh
amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)95541ec0267Sriastradh static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
95641ec0267Sriastradh struct amdgpu_cs_chunk *chunk)
95741ec0267Sriastradh {
95841ec0267Sriastradh struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
95941ec0267Sriastradh unsigned num_deps;
96041ec0267Sriastradh int i, r;
96141ec0267Sriastradh struct drm_amdgpu_cs_chunk_dep *deps;
96241ec0267Sriastradh
96341ec0267Sriastradh deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
96441ec0267Sriastradh num_deps = chunk->length_dw * 4 /
96541ec0267Sriastradh sizeof(struct drm_amdgpu_cs_chunk_dep);
96641ec0267Sriastradh
96741ec0267Sriastradh for (i = 0; i < num_deps; ++i) {
96841ec0267Sriastradh struct amdgpu_ctx *ctx;
96941ec0267Sriastradh struct drm_sched_entity *entity;
97041ec0267Sriastradh struct dma_fence *fence;
97141ec0267Sriastradh
97241ec0267Sriastradh ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
97341ec0267Sriastradh if (ctx == NULL)
97441ec0267Sriastradh return -EINVAL;
97541ec0267Sriastradh
97641ec0267Sriastradh r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
97741ec0267Sriastradh deps[i].ip_instance,
97841ec0267Sriastradh deps[i].ring, &entity);
97941ec0267Sriastradh if (r) {
98041ec0267Sriastradh amdgpu_ctx_put(ctx);
98141ec0267Sriastradh return r;
98241ec0267Sriastradh }
98341ec0267Sriastradh
98441ec0267Sriastradh fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
98541ec0267Sriastradh amdgpu_ctx_put(ctx);
98641ec0267Sriastradh
98741ec0267Sriastradh if (IS_ERR(fence))
98841ec0267Sriastradh return PTR_ERR(fence);
98941ec0267Sriastradh else if (!fence)
99041ec0267Sriastradh continue;
99141ec0267Sriastradh
99241ec0267Sriastradh if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
99341ec0267Sriastradh struct drm_sched_fence *s_fence;
99441ec0267Sriastradh struct dma_fence *old = fence;
99541ec0267Sriastradh
99641ec0267Sriastradh s_fence = to_drm_sched_fence(fence);
99741ec0267Sriastradh fence = dma_fence_get(&s_fence->scheduled);
99841ec0267Sriastradh dma_fence_put(old);
99941ec0267Sriastradh }
100041ec0267Sriastradh
100141ec0267Sriastradh r = amdgpu_sync_fence(&p->job->sync, fence, true);
100241ec0267Sriastradh dma_fence_put(fence);
100341ec0267Sriastradh if (r)
100441ec0267Sriastradh return r;
100541ec0267Sriastradh }
100641ec0267Sriastradh return 0;
100741ec0267Sriastradh }
100841ec0267Sriastradh
amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser * p,uint32_t handle,u64 point,u64 flags)100941ec0267Sriastradh static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
101041ec0267Sriastradh uint32_t handle, u64 point,
101141ec0267Sriastradh u64 flags)
101241ec0267Sriastradh {
101341ec0267Sriastradh struct dma_fence *fence;
101441ec0267Sriastradh int r;
101541ec0267Sriastradh
101641ec0267Sriastradh r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
101741ec0267Sriastradh if (r) {
1018e4a580baSriastradh DRM_ERROR("syncobj %u failed to find fence @ %"PRIu64" (%d)!\n",
101941ec0267Sriastradh handle, point, r);
102041ec0267Sriastradh return r;
102141ec0267Sriastradh }
102241ec0267Sriastradh
102341ec0267Sriastradh r = amdgpu_sync_fence(&p->job->sync, fence, true);
102441ec0267Sriastradh dma_fence_put(fence);
102541ec0267Sriastradh
102641ec0267Sriastradh return r;
102741ec0267Sriastradh }
102841ec0267Sriastradh
amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)102941ec0267Sriastradh static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
103041ec0267Sriastradh struct amdgpu_cs_chunk *chunk)
103141ec0267Sriastradh {
103241ec0267Sriastradh struct drm_amdgpu_cs_chunk_sem *deps;
103341ec0267Sriastradh unsigned num_deps;
103441ec0267Sriastradh int i, r;
103541ec0267Sriastradh
103641ec0267Sriastradh deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
103741ec0267Sriastradh num_deps = chunk->length_dw * 4 /
103841ec0267Sriastradh sizeof(struct drm_amdgpu_cs_chunk_sem);
103941ec0267Sriastradh for (i = 0; i < num_deps; ++i) {
104041ec0267Sriastradh r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
104141ec0267Sriastradh 0, 0);
104241ec0267Sriastradh if (r)
104341ec0267Sriastradh return r;
104441ec0267Sriastradh }
104541ec0267Sriastradh
104641ec0267Sriastradh return 0;
104741ec0267Sriastradh }
104841ec0267Sriastradh
104941ec0267Sriastradh
amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)105041ec0267Sriastradh static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
105141ec0267Sriastradh struct amdgpu_cs_chunk *chunk)
105241ec0267Sriastradh {
105341ec0267Sriastradh struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
105441ec0267Sriastradh unsigned num_deps;
105541ec0267Sriastradh int i, r;
105641ec0267Sriastradh
105741ec0267Sriastradh syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
105841ec0267Sriastradh num_deps = chunk->length_dw * 4 /
105941ec0267Sriastradh sizeof(struct drm_amdgpu_cs_chunk_syncobj);
106041ec0267Sriastradh for (i = 0; i < num_deps; ++i) {
106141ec0267Sriastradh r = amdgpu_syncobj_lookup_and_add_to_sync(p,
106241ec0267Sriastradh syncobj_deps[i].handle,
106341ec0267Sriastradh syncobj_deps[i].point,
106441ec0267Sriastradh syncobj_deps[i].flags);
106541ec0267Sriastradh if (r)
106641ec0267Sriastradh return r;
106741ec0267Sriastradh }
106841ec0267Sriastradh
106941ec0267Sriastradh return 0;
107041ec0267Sriastradh }
107141ec0267Sriastradh
amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)107241ec0267Sriastradh static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
107341ec0267Sriastradh struct amdgpu_cs_chunk *chunk)
107441ec0267Sriastradh {
107541ec0267Sriastradh struct drm_amdgpu_cs_chunk_sem *deps;
107641ec0267Sriastradh unsigned num_deps;
107741ec0267Sriastradh int i;
107841ec0267Sriastradh
107941ec0267Sriastradh deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
108041ec0267Sriastradh num_deps = chunk->length_dw * 4 /
108141ec0267Sriastradh sizeof(struct drm_amdgpu_cs_chunk_sem);
108241ec0267Sriastradh
108341ec0267Sriastradh if (p->post_deps)
108441ec0267Sriastradh return -EINVAL;
108541ec0267Sriastradh
108641ec0267Sriastradh p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
108741ec0267Sriastradh GFP_KERNEL);
108841ec0267Sriastradh p->num_post_deps = 0;
108941ec0267Sriastradh
109041ec0267Sriastradh if (!p->post_deps)
109141ec0267Sriastradh return -ENOMEM;
109241ec0267Sriastradh
109341ec0267Sriastradh
109441ec0267Sriastradh for (i = 0; i < num_deps; ++i) {
109541ec0267Sriastradh p->post_deps[i].syncobj =
109641ec0267Sriastradh drm_syncobj_find(p->filp, deps[i].handle);
109741ec0267Sriastradh if (!p->post_deps[i].syncobj)
109841ec0267Sriastradh return -EINVAL;
109941ec0267Sriastradh p->post_deps[i].chain = NULL;
110041ec0267Sriastradh p->post_deps[i].point = 0;
110141ec0267Sriastradh p->num_post_deps++;
110241ec0267Sriastradh }
110341ec0267Sriastradh
110441ec0267Sriastradh return 0;
110541ec0267Sriastradh }
110641ec0267Sriastradh
110741ec0267Sriastradh
amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)110841ec0267Sriastradh static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
110941ec0267Sriastradh struct amdgpu_cs_chunk *chunk)
111041ec0267Sriastradh {
111141ec0267Sriastradh struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
111241ec0267Sriastradh unsigned num_deps;
111341ec0267Sriastradh int i;
111441ec0267Sriastradh
111541ec0267Sriastradh syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
111641ec0267Sriastradh num_deps = chunk->length_dw * 4 /
111741ec0267Sriastradh sizeof(struct drm_amdgpu_cs_chunk_syncobj);
111841ec0267Sriastradh
111941ec0267Sriastradh if (p->post_deps)
112041ec0267Sriastradh return -EINVAL;
112141ec0267Sriastradh
112241ec0267Sriastradh p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
112341ec0267Sriastradh GFP_KERNEL);
112441ec0267Sriastradh p->num_post_deps = 0;
112541ec0267Sriastradh
112641ec0267Sriastradh if (!p->post_deps)
112741ec0267Sriastradh return -ENOMEM;
112841ec0267Sriastradh
112941ec0267Sriastradh for (i = 0; i < num_deps; ++i) {
113041ec0267Sriastradh struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
113141ec0267Sriastradh
113241ec0267Sriastradh dep->chain = NULL;
113341ec0267Sriastradh if (syncobj_deps[i].point) {
113441ec0267Sriastradh dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
113541ec0267Sriastradh if (!dep->chain)
113641ec0267Sriastradh return -ENOMEM;
113741ec0267Sriastradh }
113841ec0267Sriastradh
113941ec0267Sriastradh dep->syncobj = drm_syncobj_find(p->filp,
114041ec0267Sriastradh syncobj_deps[i].handle);
114141ec0267Sriastradh if (!dep->syncobj) {
114241ec0267Sriastradh kfree(dep->chain);
114341ec0267Sriastradh return -EINVAL;
114441ec0267Sriastradh }
114541ec0267Sriastradh dep->point = syncobj_deps[i].point;
114641ec0267Sriastradh p->num_post_deps++;
1147efa246c0Sriastradh }
1148efa246c0Sriastradh
1149efa246c0Sriastradh return 0;
1150efa246c0Sriastradh }
1151efa246c0Sriastradh
amdgpu_cs_dependencies(struct amdgpu_device * adev,struct amdgpu_cs_parser * p)1152efa246c0Sriastradh static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1153efa246c0Sriastradh struct amdgpu_cs_parser *p)
1154efa246c0Sriastradh {
115541ec0267Sriastradh int i, r;
1156efa246c0Sriastradh
1157efa246c0Sriastradh for (i = 0; i < p->nchunks; ++i) {
1158efa246c0Sriastradh struct amdgpu_cs_chunk *chunk;
1159efa246c0Sriastradh
1160efa246c0Sriastradh chunk = &p->chunks[i];
1161efa246c0Sriastradh
116241ec0267Sriastradh switch (chunk->chunk_id) {
116341ec0267Sriastradh case AMDGPU_CHUNK_ID_DEPENDENCIES:
116441ec0267Sriastradh case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
116541ec0267Sriastradh r = amdgpu_cs_process_fence_dep(p, chunk);
1166efa246c0Sriastradh if (r)
1167efa246c0Sriastradh return r;
116841ec0267Sriastradh break;
116941ec0267Sriastradh case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
117041ec0267Sriastradh r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1171efa246c0Sriastradh if (r)
1172efa246c0Sriastradh return r;
117341ec0267Sriastradh break;
117441ec0267Sriastradh case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
117541ec0267Sriastradh r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
117641ec0267Sriastradh if (r)
117741ec0267Sriastradh return r;
117841ec0267Sriastradh break;
117941ec0267Sriastradh case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
118041ec0267Sriastradh r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
118141ec0267Sriastradh if (r)
118241ec0267Sriastradh return r;
118341ec0267Sriastradh break;
118441ec0267Sriastradh case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
118541ec0267Sriastradh r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
118641ec0267Sriastradh if (r)
118741ec0267Sriastradh return r;
118841ec0267Sriastradh break;
1189efa246c0Sriastradh }
1190efa246c0Sriastradh }
1191efa246c0Sriastradh
1192efa246c0Sriastradh return 0;
1193efa246c0Sriastradh }
1194efa246c0Sriastradh
amdgpu_cs_post_dependencies(struct amdgpu_cs_parser * p)119541ec0267Sriastradh static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1196efa246c0Sriastradh {
1197efa246c0Sriastradh int i;
119841ec0267Sriastradh
119941ec0267Sriastradh for (i = 0; i < p->num_post_deps; ++i) {
120041ec0267Sriastradh if (p->post_deps[i].chain && p->post_deps[i].point) {
120141ec0267Sriastradh drm_syncobj_add_point(p->post_deps[i].syncobj,
120241ec0267Sriastradh p->post_deps[i].chain,
120341ec0267Sriastradh p->fence, p->post_deps[i].point);
120441ec0267Sriastradh p->post_deps[i].chain = NULL;
120541ec0267Sriastradh } else {
120641ec0267Sriastradh drm_syncobj_replace_fence(p->post_deps[i].syncobj,
120741ec0267Sriastradh p->fence);
120841ec0267Sriastradh }
120941ec0267Sriastradh }
121041ec0267Sriastradh }
121141ec0267Sriastradh
amdgpu_cs_submit(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)121241ec0267Sriastradh static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
121341ec0267Sriastradh union drm_amdgpu_cs *cs)
121441ec0267Sriastradh {
121541ec0267Sriastradh struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
121641ec0267Sriastradh struct drm_sched_entity *entity = p->entity;
121741ec0267Sriastradh enum drm_sched_priority priority;
121841ec0267Sriastradh struct amdgpu_ring *ring;
121941ec0267Sriastradh struct amdgpu_bo_list_entry *e;
122041ec0267Sriastradh struct amdgpu_job *job;
122141ec0267Sriastradh uint64_t seq;
122241ec0267Sriastradh int r;
122341ec0267Sriastradh
122441ec0267Sriastradh job = p->job;
122541ec0267Sriastradh p->job = NULL;
122641ec0267Sriastradh
122741ec0267Sriastradh r = drm_sched_job_init(&job->base, entity, p->filp);
122841ec0267Sriastradh if (r)
122941ec0267Sriastradh goto error_unlock;
123041ec0267Sriastradh
123141ec0267Sriastradh /* No memory allocation is allowed while holding the notifier lock.
123241ec0267Sriastradh * The lock is held until amdgpu_cs_submit is finished and fence is
123341ec0267Sriastradh * added to BOs.
123441ec0267Sriastradh */
123541ec0267Sriastradh mutex_lock(&p->adev->notifier_lock);
123641ec0267Sriastradh
123741ec0267Sriastradh /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
123841ec0267Sriastradh * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
123941ec0267Sriastradh */
124041ec0267Sriastradh amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
124141ec0267Sriastradh struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
124241ec0267Sriastradh
124341ec0267Sriastradh r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
124441ec0267Sriastradh }
124541ec0267Sriastradh if (r) {
124641ec0267Sriastradh r = -EAGAIN;
124741ec0267Sriastradh goto error_abort;
124841ec0267Sriastradh }
124941ec0267Sriastradh
125041ec0267Sriastradh p->fence = dma_fence_get(&job->base.s_fence->finished);
125141ec0267Sriastradh
125241ec0267Sriastradh amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
125341ec0267Sriastradh amdgpu_cs_post_dependencies(p);
125441ec0267Sriastradh
125541ec0267Sriastradh if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
125641ec0267Sriastradh !p->ctx->preamble_presented) {
125741ec0267Sriastradh job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
125841ec0267Sriastradh p->ctx->preamble_presented = true;
125941ec0267Sriastradh }
126041ec0267Sriastradh
126141ec0267Sriastradh cs->out.handle = seq;
126241ec0267Sriastradh job->uf_sequence = seq;
126341ec0267Sriastradh
126441ec0267Sriastradh amdgpu_job_free_resources(job);
126541ec0267Sriastradh
126641ec0267Sriastradh trace_amdgpu_cs_ioctl(job);
126741ec0267Sriastradh amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
126841ec0267Sriastradh priority = job->base.s_priority;
126941ec0267Sriastradh drm_sched_entity_push_job(&job->base, entity);
127041ec0267Sriastradh
127141ec0267Sriastradh ring = to_amdgpu_ring(entity->rq->sched);
127241ec0267Sriastradh amdgpu_ring_priority_get(ring, priority);
127341ec0267Sriastradh
127441ec0267Sriastradh amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
127541ec0267Sriastradh
127641ec0267Sriastradh ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
127741ec0267Sriastradh mutex_unlock(&p->adev->notifier_lock);
127841ec0267Sriastradh
1279efa246c0Sriastradh return 0;
128041ec0267Sriastradh
128141ec0267Sriastradh error_abort:
128241ec0267Sriastradh drm_sched_job_cleanup(&job->base);
128341ec0267Sriastradh mutex_unlock(&p->adev->notifier_lock);
128441ec0267Sriastradh
128541ec0267Sriastradh error_unlock:
128641ec0267Sriastradh amdgpu_job_free(job);
128741ec0267Sriastradh return r;
1288efa246c0Sriastradh }
1289efa246c0Sriastradh
amdgpu_cs_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1290efa246c0Sriastradh int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1291efa246c0Sriastradh {
1292efa246c0Sriastradh struct amdgpu_device *adev = dev->dev_private;
1293efa246c0Sriastradh union drm_amdgpu_cs *cs = data;
1294efa246c0Sriastradh struct amdgpu_cs_parser parser = {};
1295efa246c0Sriastradh bool reserved_buffers = false;
1296efa246c0Sriastradh int i, r;
1297efa246c0Sriastradh
129841ec0267Sriastradh if (amdgpu_ras_intr_triggered())
129941ec0267Sriastradh return -EHWPOISON;
130041ec0267Sriastradh
1301efa246c0Sriastradh if (!adev->accel_working)
1302efa246c0Sriastradh return -EBUSY;
1303efa246c0Sriastradh
1304efa246c0Sriastradh parser.adev = adev;
1305efa246c0Sriastradh parser.filp = filp;
1306efa246c0Sriastradh
1307efa246c0Sriastradh r = amdgpu_cs_parser_init(&parser, data);
1308efa246c0Sriastradh if (r) {
130941ec0267Sriastradh DRM_ERROR("Failed to initialize parser %d!\n", r);
131041ec0267Sriastradh goto out;
1311efa246c0Sriastradh }
131241ec0267Sriastradh
131341ec0267Sriastradh r = amdgpu_cs_ib_fill(adev, &parser);
131441ec0267Sriastradh if (r)
131541ec0267Sriastradh goto out;
131641ec0267Sriastradh
131741ec0267Sriastradh r = amdgpu_cs_dependencies(adev, &parser);
131841ec0267Sriastradh if (r) {
131941ec0267Sriastradh DRM_ERROR("Failed in the dependencies handling %d!\n", r);
132041ec0267Sriastradh goto out;
132141ec0267Sriastradh }
132241ec0267Sriastradh
132341ec0267Sriastradh r = amdgpu_cs_parser_bos(&parser, data);
132441ec0267Sriastradh if (r) {
1325efa246c0Sriastradh if (r == -ENOMEM)
1326efa246c0Sriastradh DRM_ERROR("Not enough memory for command submission!\n");
132741ec0267Sriastradh else if (r != -ERESTARTSYS && r != -EAGAIN)
1328efa246c0Sriastradh DRM_ERROR("Failed to process the buffer list %d!\n", r);
1329efa246c0Sriastradh goto out;
133041ec0267Sriastradh }
1331efa246c0Sriastradh
133241ec0267Sriastradh reserved_buffers = true;
133341ec0267Sriastradh
133441ec0267Sriastradh for (i = 0; i < parser.job->num_ibs; i++)
1335efa246c0Sriastradh trace_amdgpu_cs(&parser, i);
1336efa246c0Sriastradh
133741ec0267Sriastradh r = amdgpu_cs_vm_handling(&parser);
1338efa246c0Sriastradh if (r)
1339efa246c0Sriastradh goto out;
1340efa246c0Sriastradh
134141ec0267Sriastradh r = amdgpu_cs_submit(&parser, cs);
1342efa246c0Sriastradh
1343efa246c0Sriastradh out:
1344efa246c0Sriastradh amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
134541ec0267Sriastradh
1346efa246c0Sriastradh return r;
1347efa246c0Sriastradh }
1348efa246c0Sriastradh
1349efa246c0Sriastradh /**
1350efa246c0Sriastradh * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1351efa246c0Sriastradh *
1352efa246c0Sriastradh * @dev: drm device
1353efa246c0Sriastradh * @data: data from userspace
1354efa246c0Sriastradh * @filp: file private
1355efa246c0Sriastradh *
1356efa246c0Sriastradh * Wait for the command submission identified by handle to finish.
1357efa246c0Sriastradh */
amdgpu_cs_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1358efa246c0Sriastradh int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1359efa246c0Sriastradh struct drm_file *filp)
1360efa246c0Sriastradh {
1361efa246c0Sriastradh union drm_amdgpu_wait_cs *wait = data;
1362efa246c0Sriastradh unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
136341ec0267Sriastradh struct drm_sched_entity *entity;
1364efa246c0Sriastradh struct amdgpu_ctx *ctx;
136541ec0267Sriastradh struct dma_fence *fence;
1366efa246c0Sriastradh long r;
1367efa246c0Sriastradh
1368efa246c0Sriastradh ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1369efa246c0Sriastradh if (ctx == NULL)
1370efa246c0Sriastradh return -EINVAL;
1371efa246c0Sriastradh
137241ec0267Sriastradh r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
137341ec0267Sriastradh wait->in.ring, &entity);
137441ec0267Sriastradh if (r) {
137541ec0267Sriastradh amdgpu_ctx_put(ctx);
137641ec0267Sriastradh return r;
137741ec0267Sriastradh }
137841ec0267Sriastradh
137941ec0267Sriastradh fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1380efa246c0Sriastradh if (IS_ERR(fence))
1381efa246c0Sriastradh r = PTR_ERR(fence);
1382efa246c0Sriastradh else if (fence) {
138341ec0267Sriastradh r = dma_fence_wait_timeout(fence, true, timeout);
138441ec0267Sriastradh if (r > 0 && fence->error)
138541ec0267Sriastradh r = fence->error;
138641ec0267Sriastradh dma_fence_put(fence);
1387efa246c0Sriastradh } else
1388efa246c0Sriastradh r = 1;
1389efa246c0Sriastradh
1390efa246c0Sriastradh amdgpu_ctx_put(ctx);
1391efa246c0Sriastradh if (r < 0)
1392efa246c0Sriastradh return r;
1393efa246c0Sriastradh
1394efa246c0Sriastradh memset(wait, 0, sizeof(*wait));
1395efa246c0Sriastradh wait->out.status = (r == 0);
1396efa246c0Sriastradh
1397efa246c0Sriastradh return 0;
1398efa246c0Sriastradh }
1399efa246c0Sriastradh
1400efa246c0Sriastradh /**
140141ec0267Sriastradh * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
140241ec0267Sriastradh *
140341ec0267Sriastradh * @adev: amdgpu device
140441ec0267Sriastradh * @filp: file private
140541ec0267Sriastradh * @user: drm_amdgpu_fence copied from user space
140641ec0267Sriastradh */
amdgpu_cs_get_fence(struct amdgpu_device * adev,struct drm_file * filp,struct drm_amdgpu_fence * user)140741ec0267Sriastradh static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
140841ec0267Sriastradh struct drm_file *filp,
140941ec0267Sriastradh struct drm_amdgpu_fence *user)
141041ec0267Sriastradh {
141141ec0267Sriastradh struct drm_sched_entity *entity;
141241ec0267Sriastradh struct amdgpu_ctx *ctx;
141341ec0267Sriastradh struct dma_fence *fence;
141441ec0267Sriastradh int r;
141541ec0267Sriastradh
141641ec0267Sriastradh ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
141741ec0267Sriastradh if (ctx == NULL)
141841ec0267Sriastradh return ERR_PTR(-EINVAL);
141941ec0267Sriastradh
142041ec0267Sriastradh r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
142141ec0267Sriastradh user->ring, &entity);
142241ec0267Sriastradh if (r) {
142341ec0267Sriastradh amdgpu_ctx_put(ctx);
142441ec0267Sriastradh return ERR_PTR(r);
142541ec0267Sriastradh }
142641ec0267Sriastradh
142741ec0267Sriastradh fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
142841ec0267Sriastradh amdgpu_ctx_put(ctx);
142941ec0267Sriastradh
143041ec0267Sriastradh return fence;
143141ec0267Sriastradh }
143241ec0267Sriastradh
amdgpu_cs_fence_to_handle_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)143341ec0267Sriastradh int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
143441ec0267Sriastradh struct drm_file *filp)
143541ec0267Sriastradh {
143641ec0267Sriastradh struct amdgpu_device *adev = dev->dev_private;
143741ec0267Sriastradh union drm_amdgpu_fence_to_handle *info = data;
143841ec0267Sriastradh struct dma_fence *fence;
143941ec0267Sriastradh struct drm_syncobj *syncobj;
144041ec0267Sriastradh struct sync_file *sync_file;
144141ec0267Sriastradh int fd, r;
144241ec0267Sriastradh
144341ec0267Sriastradh fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
144441ec0267Sriastradh if (IS_ERR(fence))
144541ec0267Sriastradh return PTR_ERR(fence);
144641ec0267Sriastradh
144741ec0267Sriastradh if (!fence)
144841ec0267Sriastradh fence = dma_fence_get_stub();
144941ec0267Sriastradh
145041ec0267Sriastradh switch (info->in.what) {
145141ec0267Sriastradh case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
145241ec0267Sriastradh r = drm_syncobj_create(&syncobj, 0, fence);
145341ec0267Sriastradh dma_fence_put(fence);
145441ec0267Sriastradh if (r)
145541ec0267Sriastradh return r;
145641ec0267Sriastradh r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
145741ec0267Sriastradh drm_syncobj_put(syncobj);
145841ec0267Sriastradh return r;
145941ec0267Sriastradh
146041ec0267Sriastradh case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
146141ec0267Sriastradh r = drm_syncobj_create(&syncobj, 0, fence);
146241ec0267Sriastradh dma_fence_put(fence);
146341ec0267Sriastradh if (r)
146441ec0267Sriastradh return r;
146541ec0267Sriastradh r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
146641ec0267Sriastradh drm_syncobj_put(syncobj);
146741ec0267Sriastradh return r;
146841ec0267Sriastradh
146941ec0267Sriastradh case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1470e4a580baSriastradh #ifdef __NetBSD__
1471e4a580baSriastradh {
1472e4a580baSriastradh struct file *fp = NULL;
1473e4a580baSriastradh
1474e4a580baSriastradh /* XXX errno NetBSD->Linux */
1475e4a580baSriastradh r = -fd_allocfile(&fp, &fd);
1476e4a580baSriastradh if (r)
1477e4a580baSriastradh goto out;
1478e4a580baSriastradh sync_file = sync_file_create(fence, fp);
1479e4a580baSriastradh if (sync_file == NULL)
1480e4a580baSriastradh goto out;
1481e4a580baSriastradh fd_affix(curproc, fp, fd);
1482e4a580baSriastradh fp = NULL; /* consumed by sync_file */
1483e4a580baSriastradh
1484e4a580baSriastradh out: if (fp) {
1485e4a580baSriastradh fd_abort(curproc, fp, fd);
1486e4a580baSriastradh fd = -1;
1487e4a580baSriastradh }
1488e4a580baSriastradh dma_fence_put(fence);
1489e4a580baSriastradh }
1490e4a580baSriastradh #else
149141ec0267Sriastradh fd = get_unused_fd_flags(O_CLOEXEC);
149241ec0267Sriastradh if (fd < 0) {
149341ec0267Sriastradh dma_fence_put(fence);
149441ec0267Sriastradh return fd;
149541ec0267Sriastradh }
149641ec0267Sriastradh
149741ec0267Sriastradh sync_file = sync_file_create(fence);
149841ec0267Sriastradh dma_fence_put(fence);
149941ec0267Sriastradh if (!sync_file) {
150041ec0267Sriastradh put_unused_fd(fd);
150141ec0267Sriastradh return -ENOMEM;
150241ec0267Sriastradh }
150341ec0267Sriastradh
150441ec0267Sriastradh fd_install(fd, sync_file->file);
1505e4a580baSriastradh #endif
150641ec0267Sriastradh info->out.handle = fd;
150741ec0267Sriastradh return 0;
150841ec0267Sriastradh
150941ec0267Sriastradh default:
151041ec0267Sriastradh return -EINVAL;
151141ec0267Sriastradh }
151241ec0267Sriastradh }
151341ec0267Sriastradh
151441ec0267Sriastradh /**
151541ec0267Sriastradh * amdgpu_cs_wait_all_fence - wait on all fences to signal
151641ec0267Sriastradh *
151741ec0267Sriastradh * @adev: amdgpu device
151841ec0267Sriastradh * @filp: file private
151941ec0267Sriastradh * @wait: wait parameters
152041ec0267Sriastradh * @fences: array of drm_amdgpu_fence
152141ec0267Sriastradh */
amdgpu_cs_wait_all_fences(struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_wait_fences * wait,struct drm_amdgpu_fence * fences)152241ec0267Sriastradh static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
152341ec0267Sriastradh struct drm_file *filp,
152441ec0267Sriastradh union drm_amdgpu_wait_fences *wait,
152541ec0267Sriastradh struct drm_amdgpu_fence *fences)
152641ec0267Sriastradh {
152741ec0267Sriastradh uint32_t fence_count = wait->in.fence_count;
152841ec0267Sriastradh unsigned int i;
152941ec0267Sriastradh long r = 1;
153041ec0267Sriastradh
153141ec0267Sriastradh for (i = 0; i < fence_count; i++) {
153241ec0267Sriastradh struct dma_fence *fence;
153341ec0267Sriastradh unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
153441ec0267Sriastradh
153541ec0267Sriastradh fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
153641ec0267Sriastradh if (IS_ERR(fence))
153741ec0267Sriastradh return PTR_ERR(fence);
153841ec0267Sriastradh else if (!fence)
153941ec0267Sriastradh continue;
154041ec0267Sriastradh
154141ec0267Sriastradh r = dma_fence_wait_timeout(fence, true, timeout);
154241ec0267Sriastradh dma_fence_put(fence);
154341ec0267Sriastradh if (r < 0)
154441ec0267Sriastradh return r;
154541ec0267Sriastradh
154641ec0267Sriastradh if (r == 0)
154741ec0267Sriastradh break;
154841ec0267Sriastradh
154941ec0267Sriastradh if (fence->error)
155041ec0267Sriastradh return fence->error;
155141ec0267Sriastradh }
155241ec0267Sriastradh
155341ec0267Sriastradh memset(wait, 0, sizeof(*wait));
155441ec0267Sriastradh wait->out.status = (r > 0);
155541ec0267Sriastradh
155641ec0267Sriastradh return 0;
155741ec0267Sriastradh }
155841ec0267Sriastradh
155941ec0267Sriastradh /**
156041ec0267Sriastradh * amdgpu_cs_wait_any_fence - wait on any fence to signal
156141ec0267Sriastradh *
156241ec0267Sriastradh * @adev: amdgpu device
156341ec0267Sriastradh * @filp: file private
156441ec0267Sriastradh * @wait: wait parameters
156541ec0267Sriastradh * @fences: array of drm_amdgpu_fence
156641ec0267Sriastradh */
amdgpu_cs_wait_any_fence(struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_wait_fences * wait,struct drm_amdgpu_fence * fences)156741ec0267Sriastradh static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
156841ec0267Sriastradh struct drm_file *filp,
156941ec0267Sriastradh union drm_amdgpu_wait_fences *wait,
157041ec0267Sriastradh struct drm_amdgpu_fence *fences)
157141ec0267Sriastradh {
157241ec0267Sriastradh unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
157341ec0267Sriastradh uint32_t fence_count = wait->in.fence_count;
157441ec0267Sriastradh uint32_t first = ~0;
157541ec0267Sriastradh struct dma_fence **array;
157641ec0267Sriastradh unsigned int i;
157741ec0267Sriastradh long r;
157841ec0267Sriastradh
157941ec0267Sriastradh /* Prepare the fence array */
158041ec0267Sriastradh array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
158141ec0267Sriastradh
158241ec0267Sriastradh if (array == NULL)
158341ec0267Sriastradh return -ENOMEM;
158441ec0267Sriastradh
158541ec0267Sriastradh for (i = 0; i < fence_count; i++) {
158641ec0267Sriastradh struct dma_fence *fence;
158741ec0267Sriastradh
158841ec0267Sriastradh fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
158941ec0267Sriastradh if (IS_ERR(fence)) {
159041ec0267Sriastradh r = PTR_ERR(fence);
159141ec0267Sriastradh goto err_free_fence_array;
159241ec0267Sriastradh } else if (fence) {
159341ec0267Sriastradh array[i] = fence;
159441ec0267Sriastradh } else { /* NULL, the fence has been already signaled */
159541ec0267Sriastradh r = 1;
159641ec0267Sriastradh first = i;
159741ec0267Sriastradh goto out;
159841ec0267Sriastradh }
159941ec0267Sriastradh }
160041ec0267Sriastradh
160141ec0267Sriastradh r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
160241ec0267Sriastradh &first);
160341ec0267Sriastradh if (r < 0)
160441ec0267Sriastradh goto err_free_fence_array;
160541ec0267Sriastradh
160641ec0267Sriastradh out:
160741ec0267Sriastradh memset(wait, 0, sizeof(*wait));
160841ec0267Sriastradh wait->out.status = (r > 0);
160941ec0267Sriastradh wait->out.first_signaled = first;
161041ec0267Sriastradh
161141ec0267Sriastradh if (first < fence_count && array[first])
161241ec0267Sriastradh r = array[first]->error;
161341ec0267Sriastradh else
161441ec0267Sriastradh r = 0;
161541ec0267Sriastradh
161641ec0267Sriastradh err_free_fence_array:
161741ec0267Sriastradh for (i = 0; i < fence_count; i++)
161841ec0267Sriastradh dma_fence_put(array[i]);
161941ec0267Sriastradh kfree(array);
162041ec0267Sriastradh
162141ec0267Sriastradh return r;
162241ec0267Sriastradh }
162341ec0267Sriastradh
162441ec0267Sriastradh /**
162541ec0267Sriastradh * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
162641ec0267Sriastradh *
162741ec0267Sriastradh * @dev: drm device
162841ec0267Sriastradh * @data: data from userspace
162941ec0267Sriastradh * @filp: file private
163041ec0267Sriastradh */
amdgpu_cs_wait_fences_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)163141ec0267Sriastradh int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
163241ec0267Sriastradh struct drm_file *filp)
163341ec0267Sriastradh {
163441ec0267Sriastradh struct amdgpu_device *adev = dev->dev_private;
163541ec0267Sriastradh union drm_amdgpu_wait_fences *wait = data;
163641ec0267Sriastradh uint32_t fence_count = wait->in.fence_count;
163741ec0267Sriastradh struct drm_amdgpu_fence *fences_user;
163841ec0267Sriastradh struct drm_amdgpu_fence *fences;
163941ec0267Sriastradh int r;
164041ec0267Sriastradh
164141ec0267Sriastradh /* Get the fences from userspace */
164241ec0267Sriastradh fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
164341ec0267Sriastradh GFP_KERNEL);
164441ec0267Sriastradh if (fences == NULL)
164541ec0267Sriastradh return -ENOMEM;
164641ec0267Sriastradh
164741ec0267Sriastradh fences_user = u64_to_user_ptr(wait->in.fences);
164841ec0267Sriastradh if (copy_from_user(fences, fences_user,
164941ec0267Sriastradh sizeof(struct drm_amdgpu_fence) * fence_count)) {
165041ec0267Sriastradh r = -EFAULT;
165141ec0267Sriastradh goto err_free_fences;
165241ec0267Sriastradh }
165341ec0267Sriastradh
165441ec0267Sriastradh if (wait->in.wait_all)
165541ec0267Sriastradh r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
165641ec0267Sriastradh else
165741ec0267Sriastradh r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
165841ec0267Sriastradh
165941ec0267Sriastradh err_free_fences:
166041ec0267Sriastradh kfree(fences);
166141ec0267Sriastradh
166241ec0267Sriastradh return r;
166341ec0267Sriastradh }
166441ec0267Sriastradh
166541ec0267Sriastradh /**
1666efa246c0Sriastradh * amdgpu_cs_find_bo_va - find bo_va for VM address
1667efa246c0Sriastradh *
1668efa246c0Sriastradh * @parser: command submission parser context
1669efa246c0Sriastradh * @addr: VM address
1670efa246c0Sriastradh * @bo: resulting BO of the mapping found
1671efa246c0Sriastradh *
1672efa246c0Sriastradh * Search the buffer objects in the command submission context for a certain
1673efa246c0Sriastradh * virtual memory address. Returns allocation structure when found, NULL
1674efa246c0Sriastradh * otherwise.
1675efa246c0Sriastradh */
amdgpu_cs_find_mapping(struct amdgpu_cs_parser * parser,uint64_t addr,struct amdgpu_bo ** bo,struct amdgpu_bo_va_mapping ** map)167641ec0267Sriastradh int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
167741ec0267Sriastradh uint64_t addr, struct amdgpu_bo **bo,
167841ec0267Sriastradh struct amdgpu_bo_va_mapping **map)
1679efa246c0Sriastradh {
168041ec0267Sriastradh struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
168141ec0267Sriastradh struct ttm_operation_ctx ctx = { false, false };
168241ec0267Sriastradh struct amdgpu_vm *vm = &fpriv->vm;
1683efa246c0Sriastradh struct amdgpu_bo_va_mapping *mapping;
168441ec0267Sriastradh int r;
1685efa246c0Sriastradh
1686efa246c0Sriastradh addr /= AMDGPU_GPU_PAGE_SIZE;
1687efa246c0Sriastradh
168841ec0267Sriastradh mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
168941ec0267Sriastradh if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
169041ec0267Sriastradh return -EINVAL;
1691efa246c0Sriastradh
169241ec0267Sriastradh *bo = mapping->bo_va->base.bo;
169341ec0267Sriastradh *map = mapping;
1694efa246c0Sriastradh
169541ec0267Sriastradh /* Double check that the BO is reserved by this CS */
169641ec0267Sriastradh if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
169741ec0267Sriastradh return -EINVAL;
169841ec0267Sriastradh
169941ec0267Sriastradh if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
170041ec0267Sriastradh (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
170141ec0267Sriastradh amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
170241ec0267Sriastradh r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
170341ec0267Sriastradh if (r)
170441ec0267Sriastradh return r;
1705efa246c0Sriastradh }
1706efa246c0Sriastradh
170741ec0267Sriastradh return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1708efa246c0Sriastradh }
1709