1fb4d8502Sjsg /* 2fb4d8502Sjsg * Copyright 2008 Jerome Glisse. 3fb4d8502Sjsg * All Rights Reserved. 4fb4d8502Sjsg * 5fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 6fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 7fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 8fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 10fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 11fb4d8502Sjsg * 12fb4d8502Sjsg * The above copyright notice and this permission notice (including the next 13fb4d8502Sjsg * paragraph) shall be included in all copies or substantial portions of the 14fb4d8502Sjsg * Software. 15fb4d8502Sjsg * 16fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fb4d8502Sjsg * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22fb4d8502Sjsg * DEALINGS IN THE SOFTWARE. 23fb4d8502Sjsg * 24fb4d8502Sjsg * Authors: 25fb4d8502Sjsg * Jerome Glisse <glisse@freedesktop.org> 26fb4d8502Sjsg */ 27c349dbc7Sjsg 28c349dbc7Sjsg #include <linux/file.h> 29fb4d8502Sjsg #include <linux/pagemap.h> 30fb4d8502Sjsg #include <linux/sync_file.h> 31c349dbc7Sjsg #include <linux/dma-buf.h> 32c349dbc7Sjsg 33fb4d8502Sjsg #include <drm/amdgpu_drm.h> 34fb4d8502Sjsg #include <drm/drm_syncobj.h> 35f005ef32Sjsg #include <drm/ttm/ttm_tt.h> 36f005ef32Sjsg 371bb76ff1Sjsg #include "amdgpu_cs.h" 38fb4d8502Sjsg #include "amdgpu.h" 39fb4d8502Sjsg #include "amdgpu_trace.h" 40fb4d8502Sjsg #include "amdgpu_gmc.h" 41c349dbc7Sjsg #include "amdgpu_gem.h" 42c349dbc7Sjsg #include "amdgpu_ras.h" 43fb4d8502Sjsg 441bb76ff1Sjsg static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 451bb76ff1Sjsg struct amdgpu_device *adev, 461bb76ff1Sjsg struct drm_file *filp, 471bb76ff1Sjsg union drm_amdgpu_cs *cs) 481bb76ff1Sjsg { 491bb76ff1Sjsg struct amdgpu_fpriv *fpriv = filp->driver_priv; 501bb76ff1Sjsg 511bb76ff1Sjsg if (cs->in.num_chunks == 0) 521bb76ff1Sjsg return -EINVAL; 531bb76ff1Sjsg 541bb76ff1Sjsg memset(p, 0, sizeof(*p)); 551bb76ff1Sjsg p->adev = adev; 561bb76ff1Sjsg p->filp = filp; 571bb76ff1Sjsg 581bb76ff1Sjsg p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); 591bb76ff1Sjsg if (!p->ctx) 601bb76ff1Sjsg return -EINVAL; 611bb76ff1Sjsg 621bb76ff1Sjsg if (atomic_read(&p->ctx->guilty)) { 631bb76ff1Sjsg amdgpu_ctx_put(p->ctx); 641bb76ff1Sjsg return -ECANCELED; 651bb76ff1Sjsg } 66f005ef32Sjsg 67f005ef32Sjsg amdgpu_sync_create(&p->sync); 68f005ef32Sjsg drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 69f005ef32Sjsg DRM_EXEC_IGNORE_DUPLICATES); 701bb76ff1Sjsg return 0; 711bb76ff1Sjsg } 721bb76ff1Sjsg 731bb76ff1Sjsg static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p, 741bb76ff1Sjsg struct drm_amdgpu_cs_chunk_ib *chunk_ib) 751bb76ff1Sjsg { 761bb76ff1Sjsg struct drm_sched_entity *entity; 771bb76ff1Sjsg unsigned int i; 781bb76ff1Sjsg int r; 791bb76ff1Sjsg 801bb76ff1Sjsg r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, 811bb76ff1Sjsg chunk_ib->ip_instance, 821bb76ff1Sjsg chunk_ib->ring, &entity); 831bb76ff1Sjsg if (r) 841bb76ff1Sjsg return r; 851bb76ff1Sjsg 861bb76ff1Sjsg /* 871bb76ff1Sjsg * Abort if there is no run queue associated with this entity. 881bb76ff1Sjsg * Possibly because of disabled HW IP. 891bb76ff1Sjsg */ 901bb76ff1Sjsg if (entity->rq == NULL) 911bb76ff1Sjsg return -EINVAL; 921bb76ff1Sjsg 931bb76ff1Sjsg /* Check if we can add this IB to some existing job */ 941bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) 951bb76ff1Sjsg if (p->entities[i] == entity) 961bb76ff1Sjsg return i; 971bb76ff1Sjsg 981bb76ff1Sjsg /* If not increase the gang size if possible */ 991bb76ff1Sjsg if (i == AMDGPU_CS_GANG_SIZE) 1001bb76ff1Sjsg return -EINVAL; 1011bb76ff1Sjsg 1021bb76ff1Sjsg p->entities[i] = entity; 1031bb76ff1Sjsg p->gang_size = i + 1; 1041bb76ff1Sjsg return i; 1051bb76ff1Sjsg } 1061bb76ff1Sjsg 1071bb76ff1Sjsg static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, 1081bb76ff1Sjsg struct drm_amdgpu_cs_chunk_ib *chunk_ib, 1091bb76ff1Sjsg unsigned int *num_ibs) 1101bb76ff1Sjsg { 1111bb76ff1Sjsg int r; 1121bb76ff1Sjsg 1131bb76ff1Sjsg r = amdgpu_cs_job_idx(p, chunk_ib); 1141bb76ff1Sjsg if (r < 0) 1151bb76ff1Sjsg return r; 1161bb76ff1Sjsg 117f005ef32Sjsg if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type)) 118f005ef32Sjsg return -EINVAL; 119f005ef32Sjsg 1201bb76ff1Sjsg ++(num_ibs[r]); 1211bb76ff1Sjsg p->gang_leader_idx = r; 1221bb76ff1Sjsg return 0; 1231bb76ff1Sjsg } 1241bb76ff1Sjsg 1251bb76ff1Sjsg static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, 126fb4d8502Sjsg struct drm_amdgpu_cs_chunk_fence *data, 127fb4d8502Sjsg uint32_t *offset) 128fb4d8502Sjsg { 129fb4d8502Sjsg struct drm_gem_object *gobj; 130fb4d8502Sjsg unsigned long size; 131fb4d8502Sjsg 132fb4d8502Sjsg gobj = drm_gem_object_lookup(p->filp, data->handle); 133fb4d8502Sjsg if (gobj == NULL) 134fb4d8502Sjsg return -EINVAL; 135fb4d8502Sjsg 136f005ef32Sjsg p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 137ad8b1aafSjsg drm_gem_object_put(gobj); 138fb4d8502Sjsg 139f005ef32Sjsg size = amdgpu_bo_size(p->uf_bo); 140af263f4eSjsg if (size != PAGE_SIZE || data->offset > (size - 8)) 141af263f4eSjsg return -EINVAL; 142fb4d8502Sjsg 143f005ef32Sjsg if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) 144af263f4eSjsg return -EINVAL; 145fb4d8502Sjsg 146fb4d8502Sjsg *offset = data->offset; 147fb4d8502Sjsg return 0; 148fb4d8502Sjsg } 149fb4d8502Sjsg 1501bb76ff1Sjsg static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p, 151fb4d8502Sjsg struct drm_amdgpu_bo_list_in *data) 152fb4d8502Sjsg { 1531bb76ff1Sjsg struct drm_amdgpu_bo_list_entry *info; 154fb4d8502Sjsg int r; 155fb4d8502Sjsg 156fb4d8502Sjsg r = amdgpu_bo_create_list_entry_array(data, &info); 157fb4d8502Sjsg if (r) 158fb4d8502Sjsg return r; 159fb4d8502Sjsg 160fb4d8502Sjsg r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, 161fb4d8502Sjsg &p->bo_list); 162fb4d8502Sjsg if (r) 163fb4d8502Sjsg goto error_free; 164fb4d8502Sjsg 165fb4d8502Sjsg kvfree(info); 166fb4d8502Sjsg return 0; 167fb4d8502Sjsg 168fb4d8502Sjsg error_free: 169fb4d8502Sjsg kvfree(info); 170fb4d8502Sjsg 171fb4d8502Sjsg return r; 172fb4d8502Sjsg } 173fb4d8502Sjsg 1741bb76ff1Sjsg /* Copy the data from userspace and go over it the first time */ 1751bb76ff1Sjsg static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, 1761bb76ff1Sjsg union drm_amdgpu_cs *cs) 177fb4d8502Sjsg { 178fb4d8502Sjsg struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 1791bb76ff1Sjsg unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { }; 180fb4d8502Sjsg struct amdgpu_vm *vm = &fpriv->vm; 181fb4d8502Sjsg uint64_t *chunk_array_user; 182fb4d8502Sjsg uint64_t *chunk_array; 183fb4d8502Sjsg uint32_t uf_offset = 0; 184d6b8148dSjsg size_t size; 185fb4d8502Sjsg int ret; 1861bb76ff1Sjsg int i; 187fb4d8502Sjsg 1881bb76ff1Sjsg chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), 1891bb76ff1Sjsg GFP_KERNEL); 190fb4d8502Sjsg if (!chunk_array) 191fb4d8502Sjsg return -ENOMEM; 192fb4d8502Sjsg 193fb4d8502Sjsg /* get chunks */ 194fb4d8502Sjsg chunk_array_user = u64_to_user_ptr(cs->in.chunks); 195fb4d8502Sjsg if (copy_from_user(chunk_array, chunk_array_user, 196fb4d8502Sjsg sizeof(uint64_t)*cs->in.num_chunks)) { 197fb4d8502Sjsg ret = -EFAULT; 198fb4d8502Sjsg goto free_chunk; 199fb4d8502Sjsg } 200fb4d8502Sjsg 201fb4d8502Sjsg p->nchunks = cs->in.num_chunks; 2025ca02815Sjsg p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), 203fb4d8502Sjsg GFP_KERNEL); 204fb4d8502Sjsg if (!p->chunks) { 205fb4d8502Sjsg ret = -ENOMEM; 206fb4d8502Sjsg goto free_chunk; 207fb4d8502Sjsg } 208fb4d8502Sjsg 209fb4d8502Sjsg for (i = 0; i < p->nchunks; i++) { 210d774e9baSjsg struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; 211fb4d8502Sjsg struct drm_amdgpu_cs_chunk user_chunk; 212fb4d8502Sjsg uint32_t __user *cdata; 213fb4d8502Sjsg 214fb4d8502Sjsg chunk_ptr = u64_to_user_ptr(chunk_array[i]); 215fb4d8502Sjsg if (copy_from_user(&user_chunk, chunk_ptr, 216fb4d8502Sjsg sizeof(struct drm_amdgpu_cs_chunk))) { 217fb4d8502Sjsg ret = -EFAULT; 218fb4d8502Sjsg i--; 219fb4d8502Sjsg goto free_partial_kdata; 220fb4d8502Sjsg } 221fb4d8502Sjsg p->chunks[i].chunk_id = user_chunk.chunk_id; 222fb4d8502Sjsg p->chunks[i].length_dw = user_chunk.length_dw; 223fb4d8502Sjsg 224fb4d8502Sjsg size = p->chunks[i].length_dw; 225fb4d8502Sjsg cdata = u64_to_user_ptr(user_chunk.chunk_data); 226fb4d8502Sjsg 2271bb76ff1Sjsg p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), 2281bb76ff1Sjsg GFP_KERNEL); 229fb4d8502Sjsg if (p->chunks[i].kdata == NULL) { 230fb4d8502Sjsg ret = -ENOMEM; 231fb4d8502Sjsg i--; 232fb4d8502Sjsg goto free_partial_kdata; 233fb4d8502Sjsg } 234fb4d8502Sjsg size *= sizeof(uint32_t); 235fb4d8502Sjsg if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 236fb4d8502Sjsg ret = -EFAULT; 237fb4d8502Sjsg goto free_partial_kdata; 238fb4d8502Sjsg } 239fb4d8502Sjsg 2401bb76ff1Sjsg /* Assume the worst on the following checks */ 2411bb76ff1Sjsg ret = -EINVAL; 242fb4d8502Sjsg switch (p->chunks[i].chunk_id) { 243fb4d8502Sjsg case AMDGPU_CHUNK_ID_IB: 2441bb76ff1Sjsg if (size < sizeof(struct drm_amdgpu_cs_chunk_ib)) 2451bb76ff1Sjsg goto free_partial_kdata; 2461bb76ff1Sjsg 2471bb76ff1Sjsg ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); 2481bb76ff1Sjsg if (ret) 2491bb76ff1Sjsg goto free_partial_kdata; 250fb4d8502Sjsg break; 251fb4d8502Sjsg 252fb4d8502Sjsg case AMDGPU_CHUNK_ID_FENCE: 2531bb76ff1Sjsg if (size < sizeof(struct drm_amdgpu_cs_chunk_fence)) 254fb4d8502Sjsg goto free_partial_kdata; 255fb4d8502Sjsg 2561bb76ff1Sjsg ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata, 257fb4d8502Sjsg &uf_offset); 258fb4d8502Sjsg if (ret) 259fb4d8502Sjsg goto free_partial_kdata; 260fb4d8502Sjsg break; 261fb4d8502Sjsg 262fb4d8502Sjsg case AMDGPU_CHUNK_ID_BO_HANDLES: 2631bb76ff1Sjsg if (size < sizeof(struct drm_amdgpu_bo_list_in)) 264fb4d8502Sjsg goto free_partial_kdata; 265fb4d8502Sjsg 266a5f3710fSjsg /* Only a single BO list is allowed to simplify handling. */ 267a5f3710fSjsg if (p->bo_list) 268*5b5a95fdSjsg goto free_partial_kdata; 269a5f3710fSjsg 2701bb76ff1Sjsg ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); 271fb4d8502Sjsg if (ret) 272fb4d8502Sjsg goto free_partial_kdata; 273fb4d8502Sjsg break; 274fb4d8502Sjsg 275fb4d8502Sjsg case AMDGPU_CHUNK_ID_DEPENDENCIES: 276fb4d8502Sjsg case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 277fb4d8502Sjsg case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 278c349dbc7Sjsg case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: 279c349dbc7Sjsg case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: 280c349dbc7Sjsg case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: 281f005ef32Sjsg case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: 282fb4d8502Sjsg break; 283fb4d8502Sjsg 284fb4d8502Sjsg default: 285fb4d8502Sjsg goto free_partial_kdata; 286fb4d8502Sjsg } 287fb4d8502Sjsg } 288fb4d8502Sjsg 2891bb76ff1Sjsg if (!p->gang_size) { 2901bb76ff1Sjsg ret = -EINVAL; 2918f0674baSjsg goto free_all_kdata; 2921bb76ff1Sjsg } 2931bb76ff1Sjsg 2941bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) { 295f005ef32Sjsg ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, 296f005ef32Sjsg num_ibs[i], &p->jobs[i]); 2971bb76ff1Sjsg if (ret) 2981bb76ff1Sjsg goto free_all_kdata; 2991bb76ff1Sjsg } 3001bb76ff1Sjsg p->gang_leader = p->jobs[p->gang_leader_idx]; 3011bb76ff1Sjsg 302f005ef32Sjsg if (p->ctx->generation != p->gang_leader->generation) { 303fb4d8502Sjsg ret = -ECANCELED; 304fb4d8502Sjsg goto free_all_kdata; 305fb4d8502Sjsg } 306fb4d8502Sjsg 307f005ef32Sjsg if (p->uf_bo) 3081bb76ff1Sjsg p->gang_leader->uf_addr = uf_offset; 3095ca02815Sjsg kvfree(chunk_array); 310fb4d8502Sjsg 311fb4d8502Sjsg /* Use this opportunity to fill in task info for the vm */ 312fb4d8502Sjsg amdgpu_vm_set_task_info(vm); 313fb4d8502Sjsg 314fb4d8502Sjsg return 0; 315fb4d8502Sjsg 316fb4d8502Sjsg free_all_kdata: 317fb4d8502Sjsg i = p->nchunks - 1; 318fb4d8502Sjsg free_partial_kdata: 319fb4d8502Sjsg for (; i >= 0; i--) 320fb4d8502Sjsg kvfree(p->chunks[i].kdata); 3215ca02815Sjsg kvfree(p->chunks); 322fb4d8502Sjsg p->chunks = NULL; 323fb4d8502Sjsg p->nchunks = 0; 324fb4d8502Sjsg free_chunk: 3255ca02815Sjsg kvfree(chunk_array); 326fb4d8502Sjsg 327fb4d8502Sjsg return ret; 328fb4d8502Sjsg } 329fb4d8502Sjsg 3301bb76ff1Sjsg static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, 3311bb76ff1Sjsg struct amdgpu_cs_chunk *chunk, 3321bb76ff1Sjsg unsigned int *ce_preempt, 3331bb76ff1Sjsg unsigned int *de_preempt) 3341bb76ff1Sjsg { 3351bb76ff1Sjsg struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata; 3361bb76ff1Sjsg struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 3371bb76ff1Sjsg struct amdgpu_vm *vm = &fpriv->vm; 3381bb76ff1Sjsg struct amdgpu_ring *ring; 3391bb76ff1Sjsg struct amdgpu_job *job; 3401bb76ff1Sjsg struct amdgpu_ib *ib; 3411bb76ff1Sjsg int r; 3421bb76ff1Sjsg 3431bb76ff1Sjsg r = amdgpu_cs_job_idx(p, chunk_ib); 3441bb76ff1Sjsg if (r < 0) 3451bb76ff1Sjsg return r; 3461bb76ff1Sjsg 3471bb76ff1Sjsg job = p->jobs[r]; 3481bb76ff1Sjsg ring = amdgpu_job_ring(job); 3491bb76ff1Sjsg ib = &job->ibs[job->num_ibs++]; 3501bb76ff1Sjsg 3511bb76ff1Sjsg /* MM engine doesn't support user fences */ 352f005ef32Sjsg if (p->uf_bo && ring->funcs->no_user_fence) 3531bb76ff1Sjsg return -EINVAL; 3541bb76ff1Sjsg 3551bb76ff1Sjsg if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && 3561bb76ff1Sjsg chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 3571bb76ff1Sjsg if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) 3581bb76ff1Sjsg (*ce_preempt)++; 3591bb76ff1Sjsg else 3601bb76ff1Sjsg (*de_preempt)++; 3611bb76ff1Sjsg 3621bb76ff1Sjsg /* Each GFX command submit allows only 1 IB max 3631bb76ff1Sjsg * preemptible for CE & DE */ 3641bb76ff1Sjsg if (*ce_preempt > 1 || *de_preempt > 1) 3651bb76ff1Sjsg return -EINVAL; 3661bb76ff1Sjsg } 3671bb76ff1Sjsg 3681bb76ff1Sjsg if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) 3691bb76ff1Sjsg job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; 3701bb76ff1Sjsg 3711bb76ff1Sjsg r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ? 3721bb76ff1Sjsg chunk_ib->ib_bytes : 0, 3731bb76ff1Sjsg AMDGPU_IB_POOL_DELAYED, ib); 3741bb76ff1Sjsg if (r) { 3751bb76ff1Sjsg DRM_ERROR("Failed to get ib !\n"); 3761bb76ff1Sjsg return r; 3771bb76ff1Sjsg } 3781bb76ff1Sjsg 3791bb76ff1Sjsg ib->gpu_addr = chunk_ib->va_start; 3801bb76ff1Sjsg ib->length_dw = chunk_ib->ib_bytes / 4; 3811bb76ff1Sjsg ib->flags = chunk_ib->flags; 3821bb76ff1Sjsg return 0; 3831bb76ff1Sjsg } 3841bb76ff1Sjsg 3851bb76ff1Sjsg static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, 3861bb76ff1Sjsg struct amdgpu_cs_chunk *chunk) 3871bb76ff1Sjsg { 3881bb76ff1Sjsg struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; 3891bb76ff1Sjsg struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 390f005ef32Sjsg unsigned int num_deps; 3911bb76ff1Sjsg int i, r; 3921bb76ff1Sjsg 3931bb76ff1Sjsg num_deps = chunk->length_dw * 4 / 3941bb76ff1Sjsg sizeof(struct drm_amdgpu_cs_chunk_dep); 3951bb76ff1Sjsg 3961bb76ff1Sjsg for (i = 0; i < num_deps; ++i) { 3971bb76ff1Sjsg struct amdgpu_ctx *ctx; 3981bb76ff1Sjsg struct drm_sched_entity *entity; 3991bb76ff1Sjsg struct dma_fence *fence; 4001bb76ff1Sjsg 4011bb76ff1Sjsg ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); 4021bb76ff1Sjsg if (ctx == NULL) 4031bb76ff1Sjsg return -EINVAL; 4041bb76ff1Sjsg 4051bb76ff1Sjsg r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, 4061bb76ff1Sjsg deps[i].ip_instance, 4071bb76ff1Sjsg deps[i].ring, &entity); 4081bb76ff1Sjsg if (r) { 4091bb76ff1Sjsg amdgpu_ctx_put(ctx); 4101bb76ff1Sjsg return r; 4111bb76ff1Sjsg } 4121bb76ff1Sjsg 4131bb76ff1Sjsg fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); 4141bb76ff1Sjsg amdgpu_ctx_put(ctx); 4151bb76ff1Sjsg 4161bb76ff1Sjsg if (IS_ERR(fence)) 4171bb76ff1Sjsg return PTR_ERR(fence); 4181bb76ff1Sjsg else if (!fence) 4191bb76ff1Sjsg continue; 4201bb76ff1Sjsg 4211bb76ff1Sjsg if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { 4221bb76ff1Sjsg struct drm_sched_fence *s_fence; 4231bb76ff1Sjsg struct dma_fence *old = fence; 4241bb76ff1Sjsg 4251bb76ff1Sjsg s_fence = to_drm_sched_fence(fence); 4261bb76ff1Sjsg fence = dma_fence_get(&s_fence->scheduled); 4271bb76ff1Sjsg dma_fence_put(old); 4281bb76ff1Sjsg } 4291bb76ff1Sjsg 430f005ef32Sjsg r = amdgpu_sync_fence(&p->sync, fence); 4311bb76ff1Sjsg dma_fence_put(fence); 4321bb76ff1Sjsg if (r) 4331bb76ff1Sjsg return r; 4341bb76ff1Sjsg } 4351bb76ff1Sjsg return 0; 4361bb76ff1Sjsg } 4371bb76ff1Sjsg 4381bb76ff1Sjsg static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, 4391bb76ff1Sjsg uint32_t handle, u64 point, 4401bb76ff1Sjsg u64 flags) 4411bb76ff1Sjsg { 4421bb76ff1Sjsg struct dma_fence *fence; 4431bb76ff1Sjsg int r; 4441bb76ff1Sjsg 4451bb76ff1Sjsg r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); 4461bb76ff1Sjsg if (r) { 4471bb76ff1Sjsg DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", 4481bb76ff1Sjsg handle, point, r); 4491bb76ff1Sjsg return r; 4501bb76ff1Sjsg } 4511bb76ff1Sjsg 452f005ef32Sjsg r = amdgpu_sync_fence(&p->sync, fence); 4531bb76ff1Sjsg dma_fence_put(fence); 4541bb76ff1Sjsg return r; 4551bb76ff1Sjsg } 4561bb76ff1Sjsg 4571bb76ff1Sjsg static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p, 4581bb76ff1Sjsg struct amdgpu_cs_chunk *chunk) 4591bb76ff1Sjsg { 4601bb76ff1Sjsg struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; 461f005ef32Sjsg unsigned int num_deps; 4621bb76ff1Sjsg int i, r; 4631bb76ff1Sjsg 4641bb76ff1Sjsg num_deps = chunk->length_dw * 4 / 4651bb76ff1Sjsg sizeof(struct drm_amdgpu_cs_chunk_sem); 4661bb76ff1Sjsg for (i = 0; i < num_deps; ++i) { 4671bb76ff1Sjsg r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0); 4681bb76ff1Sjsg if (r) 4691bb76ff1Sjsg return r; 4701bb76ff1Sjsg } 4711bb76ff1Sjsg 4721bb76ff1Sjsg return 0; 4731bb76ff1Sjsg } 4741bb76ff1Sjsg 4751bb76ff1Sjsg static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p, 4761bb76ff1Sjsg struct amdgpu_cs_chunk *chunk) 4771bb76ff1Sjsg { 4781bb76ff1Sjsg struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; 479f005ef32Sjsg unsigned int num_deps; 4801bb76ff1Sjsg int i, r; 4811bb76ff1Sjsg 4821bb76ff1Sjsg num_deps = chunk->length_dw * 4 / 4831bb76ff1Sjsg sizeof(struct drm_amdgpu_cs_chunk_syncobj); 4841bb76ff1Sjsg for (i = 0; i < num_deps; ++i) { 4851bb76ff1Sjsg r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle, 4861bb76ff1Sjsg syncobj_deps[i].point, 4871bb76ff1Sjsg syncobj_deps[i].flags); 4881bb76ff1Sjsg if (r) 4891bb76ff1Sjsg return r; 4901bb76ff1Sjsg } 4911bb76ff1Sjsg 4921bb76ff1Sjsg return 0; 4931bb76ff1Sjsg } 4941bb76ff1Sjsg 4951bb76ff1Sjsg static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p, 4961bb76ff1Sjsg struct amdgpu_cs_chunk *chunk) 4971bb76ff1Sjsg { 4981bb76ff1Sjsg struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; 499f005ef32Sjsg unsigned int num_deps; 5001bb76ff1Sjsg int i; 5011bb76ff1Sjsg 5021bb76ff1Sjsg num_deps = chunk->length_dw * 4 / 5031bb76ff1Sjsg sizeof(struct drm_amdgpu_cs_chunk_sem); 5041bb76ff1Sjsg 5051bb76ff1Sjsg if (p->post_deps) 5061bb76ff1Sjsg return -EINVAL; 5071bb76ff1Sjsg 5081bb76ff1Sjsg p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), 5091bb76ff1Sjsg GFP_KERNEL); 5101bb76ff1Sjsg p->num_post_deps = 0; 5111bb76ff1Sjsg 5121bb76ff1Sjsg if (!p->post_deps) 5131bb76ff1Sjsg return -ENOMEM; 5141bb76ff1Sjsg 5151bb76ff1Sjsg 5161bb76ff1Sjsg for (i = 0; i < num_deps; ++i) { 5171bb76ff1Sjsg p->post_deps[i].syncobj = 5181bb76ff1Sjsg drm_syncobj_find(p->filp, deps[i].handle); 5191bb76ff1Sjsg if (!p->post_deps[i].syncobj) 5201bb76ff1Sjsg return -EINVAL; 5211bb76ff1Sjsg p->post_deps[i].chain = NULL; 5221bb76ff1Sjsg p->post_deps[i].point = 0; 5231bb76ff1Sjsg p->num_post_deps++; 5241bb76ff1Sjsg } 5251bb76ff1Sjsg 5261bb76ff1Sjsg return 0; 5271bb76ff1Sjsg } 5281bb76ff1Sjsg 5291bb76ff1Sjsg static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, 5301bb76ff1Sjsg struct amdgpu_cs_chunk *chunk) 5311bb76ff1Sjsg { 5321bb76ff1Sjsg struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; 533f005ef32Sjsg unsigned int num_deps; 5341bb76ff1Sjsg int i; 5351bb76ff1Sjsg 5361bb76ff1Sjsg num_deps = chunk->length_dw * 4 / 5371bb76ff1Sjsg sizeof(struct drm_amdgpu_cs_chunk_syncobj); 5381bb76ff1Sjsg 5391bb76ff1Sjsg if (p->post_deps) 5401bb76ff1Sjsg return -EINVAL; 5411bb76ff1Sjsg 5421bb76ff1Sjsg p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), 5431bb76ff1Sjsg GFP_KERNEL); 5441bb76ff1Sjsg p->num_post_deps = 0; 5451bb76ff1Sjsg 5461bb76ff1Sjsg if (!p->post_deps) 5471bb76ff1Sjsg return -ENOMEM; 5481bb76ff1Sjsg 5491bb76ff1Sjsg for (i = 0; i < num_deps; ++i) { 5501bb76ff1Sjsg struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; 5511bb76ff1Sjsg 5521bb76ff1Sjsg dep->chain = NULL; 5531bb76ff1Sjsg if (syncobj_deps[i].point) { 5541bb76ff1Sjsg dep->chain = dma_fence_chain_alloc(); 5551bb76ff1Sjsg if (!dep->chain) 5561bb76ff1Sjsg return -ENOMEM; 5571bb76ff1Sjsg } 5581bb76ff1Sjsg 5591bb76ff1Sjsg dep->syncobj = drm_syncobj_find(p->filp, 5601bb76ff1Sjsg syncobj_deps[i].handle); 5611bb76ff1Sjsg if (!dep->syncobj) { 5621bb76ff1Sjsg dma_fence_chain_free(dep->chain); 5631bb76ff1Sjsg return -EINVAL; 5641bb76ff1Sjsg } 5651bb76ff1Sjsg dep->point = syncobj_deps[i].point; 5661bb76ff1Sjsg p->num_post_deps++; 5671bb76ff1Sjsg } 5681bb76ff1Sjsg 5691bb76ff1Sjsg return 0; 5701bb76ff1Sjsg } 5711bb76ff1Sjsg 572f005ef32Sjsg static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p, 573f005ef32Sjsg struct amdgpu_cs_chunk *chunk) 574f005ef32Sjsg { 575f005ef32Sjsg struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata; 576f005ef32Sjsg int i; 577f005ef32Sjsg 578f005ef32Sjsg if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW) 579f005ef32Sjsg return -EINVAL; 580f005ef32Sjsg 581f005ef32Sjsg for (i = 0; i < p->gang_size; ++i) { 582f005ef32Sjsg p->jobs[i]->shadow_va = shadow->shadow_va; 583f005ef32Sjsg p->jobs[i]->csa_va = shadow->csa_va; 584f005ef32Sjsg p->jobs[i]->gds_va = shadow->gds_va; 585f005ef32Sjsg p->jobs[i]->init_shadow = 586f005ef32Sjsg shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; 587f005ef32Sjsg } 588f005ef32Sjsg 589f005ef32Sjsg return 0; 590f005ef32Sjsg } 591f005ef32Sjsg 5921bb76ff1Sjsg static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) 5931bb76ff1Sjsg { 5941bb76ff1Sjsg unsigned int ce_preempt = 0, de_preempt = 0; 5951bb76ff1Sjsg int i, r; 5961bb76ff1Sjsg 5971bb76ff1Sjsg for (i = 0; i < p->nchunks; ++i) { 5981bb76ff1Sjsg struct amdgpu_cs_chunk *chunk; 5991bb76ff1Sjsg 6001bb76ff1Sjsg chunk = &p->chunks[i]; 6011bb76ff1Sjsg 6021bb76ff1Sjsg switch (chunk->chunk_id) { 6031bb76ff1Sjsg case AMDGPU_CHUNK_ID_IB: 6041bb76ff1Sjsg r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt); 6051bb76ff1Sjsg if (r) 6061bb76ff1Sjsg return r; 6071bb76ff1Sjsg break; 6081bb76ff1Sjsg case AMDGPU_CHUNK_ID_DEPENDENCIES: 6091bb76ff1Sjsg case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: 6101bb76ff1Sjsg r = amdgpu_cs_p2_dependencies(p, chunk); 6111bb76ff1Sjsg if (r) 6121bb76ff1Sjsg return r; 6131bb76ff1Sjsg break; 6141bb76ff1Sjsg case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 6151bb76ff1Sjsg r = amdgpu_cs_p2_syncobj_in(p, chunk); 6161bb76ff1Sjsg if (r) 6171bb76ff1Sjsg return r; 6181bb76ff1Sjsg break; 6191bb76ff1Sjsg case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 6201bb76ff1Sjsg r = amdgpu_cs_p2_syncobj_out(p, chunk); 6211bb76ff1Sjsg if (r) 6221bb76ff1Sjsg return r; 6231bb76ff1Sjsg break; 6241bb76ff1Sjsg case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: 6251bb76ff1Sjsg r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk); 6261bb76ff1Sjsg if (r) 6271bb76ff1Sjsg return r; 6281bb76ff1Sjsg break; 6291bb76ff1Sjsg case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: 6301bb76ff1Sjsg r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk); 6311bb76ff1Sjsg if (r) 6321bb76ff1Sjsg return r; 6331bb76ff1Sjsg break; 634f005ef32Sjsg case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: 635f005ef32Sjsg r = amdgpu_cs_p2_shadow(p, chunk); 636f005ef32Sjsg if (r) 637f005ef32Sjsg return r; 638f005ef32Sjsg break; 6391bb76ff1Sjsg } 6401bb76ff1Sjsg } 6411bb76ff1Sjsg 6421bb76ff1Sjsg return 0; 6431bb76ff1Sjsg } 6441bb76ff1Sjsg 645fb4d8502Sjsg /* Convert microseconds to bytes. */ 646fb4d8502Sjsg static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) 647fb4d8502Sjsg { 648fb4d8502Sjsg if (us <= 0 || !adev->mm_stats.log2_max_MBps) 649fb4d8502Sjsg return 0; 650fb4d8502Sjsg 651fb4d8502Sjsg /* Since accum_us is incremented by a million per second, just 652fb4d8502Sjsg * multiply it by the number of MB/s to get the number of bytes. 653fb4d8502Sjsg */ 654fb4d8502Sjsg return us << adev->mm_stats.log2_max_MBps; 655fb4d8502Sjsg } 656fb4d8502Sjsg 657fb4d8502Sjsg static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) 658fb4d8502Sjsg { 659fb4d8502Sjsg if (!adev->mm_stats.log2_max_MBps) 660fb4d8502Sjsg return 0; 661fb4d8502Sjsg 662fb4d8502Sjsg return bytes >> adev->mm_stats.log2_max_MBps; 663fb4d8502Sjsg } 664fb4d8502Sjsg 665fb4d8502Sjsg /* Returns how many bytes TTM can move right now. If no bytes can be moved, 666fb4d8502Sjsg * it returns 0. If it returns non-zero, it's OK to move at least one buffer, 667fb4d8502Sjsg * which means it can go over the threshold once. If that happens, the driver 668fb4d8502Sjsg * will be in debt and no other buffer migrations can be done until that debt 669fb4d8502Sjsg * is repaid. 670fb4d8502Sjsg * 671fb4d8502Sjsg * This approach allows moving a buffer of any size (it's important to allow 672fb4d8502Sjsg * that). 673fb4d8502Sjsg * 674fb4d8502Sjsg * The currency is simply time in microseconds and it increases as the clock 675fb4d8502Sjsg * ticks. The accumulated microseconds (us) are converted to bytes and 676fb4d8502Sjsg * returned. 677fb4d8502Sjsg */ 678fb4d8502Sjsg static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, 679fb4d8502Sjsg u64 *max_bytes, 680fb4d8502Sjsg u64 *max_vis_bytes) 681fb4d8502Sjsg { 682fb4d8502Sjsg s64 time_us, increment_us; 683fb4d8502Sjsg u64 free_vram, total_vram, used_vram; 684fb4d8502Sjsg /* Allow a maximum of 200 accumulated ms. This is basically per-IB 685fb4d8502Sjsg * throttling. 686fb4d8502Sjsg * 687fb4d8502Sjsg * It means that in order to get full max MBps, at least 5 IBs per 688fb4d8502Sjsg * second must be submitted and not more than 200ms apart from each 689fb4d8502Sjsg * other. 690fb4d8502Sjsg */ 691fb4d8502Sjsg const s64 us_upper_bound = 200000; 692fb4d8502Sjsg 693fb4d8502Sjsg if (!adev->mm_stats.log2_max_MBps) { 694fb4d8502Sjsg *max_bytes = 0; 695fb4d8502Sjsg *max_vis_bytes = 0; 696fb4d8502Sjsg return; 697fb4d8502Sjsg } 698fb4d8502Sjsg 699fb4d8502Sjsg total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); 7001bb76ff1Sjsg used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); 701fb4d8502Sjsg free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; 702fb4d8502Sjsg 703fb4d8502Sjsg spin_lock(&adev->mm_stats.lock); 704fb4d8502Sjsg 705fb4d8502Sjsg /* Increase the amount of accumulated us. */ 706fb4d8502Sjsg time_us = ktime_to_us(ktime_get()); 707fb4d8502Sjsg increment_us = time_us - adev->mm_stats.last_update_us; 708fb4d8502Sjsg adev->mm_stats.last_update_us = time_us; 709fb4d8502Sjsg adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, 710fb4d8502Sjsg us_upper_bound); 711fb4d8502Sjsg 712fb4d8502Sjsg /* This prevents the short period of low performance when the VRAM 713fb4d8502Sjsg * usage is low and the driver is in debt or doesn't have enough 714fb4d8502Sjsg * accumulated us to fill VRAM quickly. 715fb4d8502Sjsg * 716fb4d8502Sjsg * The situation can occur in these cases: 717fb4d8502Sjsg * - a lot of VRAM is freed by userspace 718fb4d8502Sjsg * - the presence of a big buffer causes a lot of evictions 719fb4d8502Sjsg * (solution: split buffers into smaller ones) 720fb4d8502Sjsg * 721fb4d8502Sjsg * If 128 MB or 1/8th of VRAM is free, start filling it now by setting 722fb4d8502Sjsg * accum_us to a positive number. 723fb4d8502Sjsg */ 724fb4d8502Sjsg if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { 725fb4d8502Sjsg s64 min_us; 726fb4d8502Sjsg 7271bb76ff1Sjsg /* Be more aggressive on dGPUs. Try to fill a portion of free 728fb4d8502Sjsg * VRAM now. 729fb4d8502Sjsg */ 730fb4d8502Sjsg if (!(adev->flags & AMD_IS_APU)) 731fb4d8502Sjsg min_us = bytes_to_us(adev, free_vram / 4); 732fb4d8502Sjsg else 733fb4d8502Sjsg min_us = 0; /* Reset accum_us on APUs. */ 734fb4d8502Sjsg 735fb4d8502Sjsg adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); 736fb4d8502Sjsg } 737fb4d8502Sjsg 738fb4d8502Sjsg /* This is set to 0 if the driver is in debt to disallow (optional) 739fb4d8502Sjsg * buffer moves. 740fb4d8502Sjsg */ 741fb4d8502Sjsg *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); 742fb4d8502Sjsg 743fb4d8502Sjsg /* Do the same for visible VRAM if half of it is free */ 744fb4d8502Sjsg if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { 745fb4d8502Sjsg u64 total_vis_vram = adev->gmc.visible_vram_size; 746fb4d8502Sjsg u64 used_vis_vram = 7471bb76ff1Sjsg amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); 748fb4d8502Sjsg 749fb4d8502Sjsg if (used_vis_vram < total_vis_vram) { 750fb4d8502Sjsg u64 free_vis_vram = total_vis_vram - used_vis_vram; 751f005ef32Sjsg 752fb4d8502Sjsg adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + 753fb4d8502Sjsg increment_us, us_upper_bound); 754fb4d8502Sjsg 755fb4d8502Sjsg if (free_vis_vram >= total_vis_vram / 2) 756fb4d8502Sjsg adev->mm_stats.accum_us_vis = 757fb4d8502Sjsg max(bytes_to_us(adev, free_vis_vram / 2), 758fb4d8502Sjsg adev->mm_stats.accum_us_vis); 759fb4d8502Sjsg } 760fb4d8502Sjsg 761fb4d8502Sjsg *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); 762fb4d8502Sjsg } else { 763fb4d8502Sjsg *max_vis_bytes = 0; 764fb4d8502Sjsg } 765fb4d8502Sjsg 766fb4d8502Sjsg spin_unlock(&adev->mm_stats.lock); 767fb4d8502Sjsg } 768fb4d8502Sjsg 769fb4d8502Sjsg /* Report how many bytes have really been moved for the last command 770fb4d8502Sjsg * submission. This can result in a debt that can stop buffer migrations 771fb4d8502Sjsg * temporarily. 772fb4d8502Sjsg */ 773fb4d8502Sjsg void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, 774fb4d8502Sjsg u64 num_vis_bytes) 775fb4d8502Sjsg { 776fb4d8502Sjsg spin_lock(&adev->mm_stats.lock); 777fb4d8502Sjsg adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); 778fb4d8502Sjsg adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); 779fb4d8502Sjsg spin_unlock(&adev->mm_stats.lock); 780fb4d8502Sjsg } 781fb4d8502Sjsg 7825ca02815Sjsg static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo) 783fb4d8502Sjsg { 784fb4d8502Sjsg struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 7855ca02815Sjsg struct amdgpu_cs_parser *p = param; 786fb4d8502Sjsg struct ttm_operation_ctx ctx = { 787fb4d8502Sjsg .interruptible = true, 788fb4d8502Sjsg .no_wait_gpu = false, 7895ca02815Sjsg .resv = bo->tbo.base.resv 790fb4d8502Sjsg }; 791fb4d8502Sjsg uint32_t domain; 792fb4d8502Sjsg int r; 793fb4d8502Sjsg 7945ca02815Sjsg if (bo->tbo.pin_count) 795fb4d8502Sjsg return 0; 796fb4d8502Sjsg 797fb4d8502Sjsg /* Don't move this buffer if we have depleted our allowance 798fb4d8502Sjsg * to move it. Don't move anything if the threshold is zero. 799fb4d8502Sjsg */ 800c349dbc7Sjsg if (p->bytes_moved < p->bytes_moved_threshold && 801c349dbc7Sjsg (!bo->tbo.base.dma_buf || 802c349dbc7Sjsg list_empty(&bo->tbo.base.dma_buf->attachments))) { 803fb4d8502Sjsg if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 804fb4d8502Sjsg (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 805fb4d8502Sjsg /* And don't move a CPU_ACCESS_REQUIRED BO to limited 806fb4d8502Sjsg * visible VRAM if we've depleted our allowance to do 807fb4d8502Sjsg * that. 808fb4d8502Sjsg */ 809fb4d8502Sjsg if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) 810fb4d8502Sjsg domain = bo->preferred_domains; 811fb4d8502Sjsg else 812fb4d8502Sjsg domain = bo->allowed_domains; 813fb4d8502Sjsg } else { 814fb4d8502Sjsg domain = bo->preferred_domains; 815fb4d8502Sjsg } 816fb4d8502Sjsg } else { 817fb4d8502Sjsg domain = bo->allowed_domains; 818fb4d8502Sjsg } 819fb4d8502Sjsg 820fb4d8502Sjsg retry: 821fb4d8502Sjsg amdgpu_bo_placement_from_domain(bo, domain); 822fb4d8502Sjsg r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 823fb4d8502Sjsg 824fb4d8502Sjsg p->bytes_moved += ctx.bytes_moved; 825fb4d8502Sjsg if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 8260979a8e6Sjsg amdgpu_res_cpu_visible(adev, bo->tbo.resource)) 827fb4d8502Sjsg p->bytes_moved_vis += ctx.bytes_moved; 828fb4d8502Sjsg 829fb4d8502Sjsg if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 830fb4d8502Sjsg domain = bo->allowed_domains; 831fb4d8502Sjsg goto retry; 832fb4d8502Sjsg } 833fb4d8502Sjsg 834fb4d8502Sjsg return r; 835fb4d8502Sjsg } 836fb4d8502Sjsg 837fb4d8502Sjsg static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, 838fb4d8502Sjsg union drm_amdgpu_cs *cs) 839fb4d8502Sjsg { 840fb4d8502Sjsg struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 841f005ef32Sjsg struct ttm_operation_ctx ctx = { true, false }; 842fb4d8502Sjsg struct amdgpu_vm *vm = &fpriv->vm; 843fb4d8502Sjsg struct amdgpu_bo_list_entry *e; 844f005ef32Sjsg struct drm_gem_object *obj; 845f005ef32Sjsg unsigned long index; 8461bb76ff1Sjsg unsigned int i; 847fb4d8502Sjsg int r; 848fb4d8502Sjsg 849fb4d8502Sjsg /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ 850fb4d8502Sjsg if (cs->in.bo_list_handle) { 851fb4d8502Sjsg if (p->bo_list) 852fb4d8502Sjsg return -EINVAL; 853fb4d8502Sjsg 854fb4d8502Sjsg r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, 855fb4d8502Sjsg &p->bo_list); 856fb4d8502Sjsg if (r) 857fb4d8502Sjsg return r; 858fb4d8502Sjsg } else if (!p->bo_list) { 859fb4d8502Sjsg /* Create a empty bo_list when no handle is provided */ 860fb4d8502Sjsg r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, 861fb4d8502Sjsg &p->bo_list); 862fb4d8502Sjsg if (r) 863fb4d8502Sjsg return r; 864fb4d8502Sjsg } 865fb4d8502Sjsg 8661bb76ff1Sjsg mutex_lock(&p->bo_list->bo_list_mutex); 8671bb76ff1Sjsg 868c349dbc7Sjsg /* Get userptr backing pages. If pages are updated after registered 869c349dbc7Sjsg * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do 870c349dbc7Sjsg * amdgpu_ttm_backend_bind() to flush and invalidate new pages 871c349dbc7Sjsg */ 872c349dbc7Sjsg amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 873c349dbc7Sjsg bool userpage_invalidated = false; 874f005ef32Sjsg struct amdgpu_bo *bo = e->bo; 875c349dbc7Sjsg int i; 876c349dbc7Sjsg 877c349dbc7Sjsg e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, 878c349dbc7Sjsg sizeof(struct vm_page *), 879c349dbc7Sjsg GFP_KERNEL | __GFP_ZERO); 880c349dbc7Sjsg if (!e->user_pages) { 8815ca02815Sjsg DRM_ERROR("kvmalloc_array failure\n"); 8821bb76ff1Sjsg r = -ENOMEM; 8831bb76ff1Sjsg goto out_free_user_pages; 884c349dbc7Sjsg } 885c349dbc7Sjsg 8861bb76ff1Sjsg r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); 887c349dbc7Sjsg if (r) { 888c349dbc7Sjsg kvfree(e->user_pages); 889c349dbc7Sjsg e->user_pages = NULL; 8901bb76ff1Sjsg goto out_free_user_pages; 891c349dbc7Sjsg } 892c349dbc7Sjsg 893c349dbc7Sjsg for (i = 0; i < bo->tbo.ttm->num_pages; i++) { 894c349dbc7Sjsg if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { 895c349dbc7Sjsg userpage_invalidated = true; 896c349dbc7Sjsg break; 897c349dbc7Sjsg } 898c349dbc7Sjsg } 899c349dbc7Sjsg e->user_invalidated = userpage_invalidated; 900c349dbc7Sjsg } 901fb4d8502Sjsg 902f005ef32Sjsg drm_exec_until_all_locked(&p->exec) { 903f005ef32Sjsg r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size); 904f005ef32Sjsg drm_exec_retry_on_contention(&p->exec); 905f005ef32Sjsg if (unlikely(r)) 9061bb76ff1Sjsg goto out_free_user_pages; 907fb4d8502Sjsg 9085ca02815Sjsg amdgpu_bo_list_for_each_entry(e, p->bo_list) { 909f005ef32Sjsg /* One fence for TTM and one for each CS job */ 910f005ef32Sjsg r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, 911f005ef32Sjsg 1 + p->gang_size); 912f005ef32Sjsg drm_exec_retry_on_contention(&p->exec); 913f005ef32Sjsg if (unlikely(r)) 914f005ef32Sjsg goto out_free_user_pages; 9155ca02815Sjsg 916f005ef32Sjsg e->bo_va = amdgpu_vm_bo_find(vm, e->bo); 917f005ef32Sjsg } 918f005ef32Sjsg 919f005ef32Sjsg if (p->uf_bo) { 920f005ef32Sjsg r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base, 921f005ef32Sjsg 1 + p->gang_size); 922f005ef32Sjsg drm_exec_retry_on_contention(&p->exec); 923f005ef32Sjsg if (unlikely(r)) 924f005ef32Sjsg goto out_free_user_pages; 925f005ef32Sjsg } 926f005ef32Sjsg } 927f005ef32Sjsg 928f005ef32Sjsg amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 929f005ef32Sjsg #ifdef notyet 930f005ef32Sjsg struct mm_struct *usermm; 931f005ef32Sjsg 932f005ef32Sjsg usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm); 933f005ef32Sjsg if (usermm && usermm != current->mm) { 934f005ef32Sjsg r = -EPERM; 935f005ef32Sjsg goto out_free_user_pages; 936f005ef32Sjsg } 937f005ef32Sjsg #endif 938f005ef32Sjsg 939f005ef32Sjsg if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && 940f005ef32Sjsg e->user_invalidated && e->user_pages) { 941f005ef32Sjsg amdgpu_bo_placement_from_domain(e->bo, 942f005ef32Sjsg AMDGPU_GEM_DOMAIN_CPU); 943f005ef32Sjsg r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, 944f005ef32Sjsg &ctx); 945f005ef32Sjsg if (r) 946f005ef32Sjsg goto out_free_user_pages; 947f005ef32Sjsg 948f005ef32Sjsg amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, 949f005ef32Sjsg e->user_pages); 950f005ef32Sjsg } 951f005ef32Sjsg 952f005ef32Sjsg kvfree(e->user_pages); 953f005ef32Sjsg e->user_pages = NULL; 9545ca02815Sjsg } 9555ca02815Sjsg 956fb4d8502Sjsg amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, 957fb4d8502Sjsg &p->bytes_moved_vis_threshold); 958fb4d8502Sjsg p->bytes_moved = 0; 959fb4d8502Sjsg p->bytes_moved_vis = 0; 960fb4d8502Sjsg 961fb4d8502Sjsg r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, 9625ca02815Sjsg amdgpu_cs_bo_validate, p); 963fb4d8502Sjsg if (r) { 964fb4d8502Sjsg DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); 965f005ef32Sjsg goto out_free_user_pages; 966fb4d8502Sjsg } 967fb4d8502Sjsg 968f005ef32Sjsg drm_exec_for_each_locked_object(&p->exec, index, obj) { 969f005ef32Sjsg r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj)); 970f005ef32Sjsg if (unlikely(r)) 971f005ef32Sjsg goto out_free_user_pages; 972f005ef32Sjsg } 973fb4d8502Sjsg 974f005ef32Sjsg if (p->uf_bo) { 975f005ef32Sjsg r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo); 976f005ef32Sjsg if (unlikely(r)) 977f005ef32Sjsg goto out_free_user_pages; 978fb4d8502Sjsg 979f005ef32Sjsg p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo); 980fb4d8502Sjsg } 981fb4d8502Sjsg 9821bb76ff1Sjsg amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, 9831bb76ff1Sjsg p->bytes_moved_vis); 9841bb76ff1Sjsg 9851bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) 9861bb76ff1Sjsg amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj, 9871bb76ff1Sjsg p->bo_list->gws_obj, 9881bb76ff1Sjsg p->bo_list->oa_obj); 9891bb76ff1Sjsg return 0; 9901bb76ff1Sjsg 9911bb76ff1Sjsg out_free_user_pages: 9921bb76ff1Sjsg amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 993f005ef32Sjsg struct amdgpu_bo *bo = e->bo; 994fb4d8502Sjsg 9951bb76ff1Sjsg if (!e->user_pages) 9961bb76ff1Sjsg continue; 9971bb76ff1Sjsg amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); 9981bb76ff1Sjsg kvfree(e->user_pages); 9991bb76ff1Sjsg e->user_pages = NULL; 10001bb76ff1Sjsg e->range = NULL; 10011bb76ff1Sjsg } 10021bb76ff1Sjsg mutex_unlock(&p->bo_list->bo_list_mutex); 1003fb4d8502Sjsg return r; 1004fb4d8502Sjsg } 1005fb4d8502Sjsg 10061bb76ff1Sjsg static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p) 1007fb4d8502Sjsg { 10081bb76ff1Sjsg int i, j; 1009fb4d8502Sjsg 10101bb76ff1Sjsg if (!trace_amdgpu_cs_enabled()) 10111bb76ff1Sjsg return; 10125ca02815Sjsg 10131bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) { 10141bb76ff1Sjsg struct amdgpu_job *job = p->jobs[i]; 10155ca02815Sjsg 10161bb76ff1Sjsg for (j = 0; j < job->num_ibs; ++j) 10171bb76ff1Sjsg trace_amdgpu_cs(p, job, &job->ibs[j]); 1018c349dbc7Sjsg } 1019fb4d8502Sjsg } 1020fb4d8502Sjsg 10211bb76ff1Sjsg static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, 10221bb76ff1Sjsg struct amdgpu_job *job) 1023fb4d8502Sjsg { 10241bb76ff1Sjsg struct amdgpu_ring *ring = amdgpu_job_ring(job); 10251bb76ff1Sjsg unsigned int i; 1026fb4d8502Sjsg int r; 1027fb4d8502Sjsg 1028fb4d8502Sjsg /* Only for UVD/VCE VM emulation */ 10291bb76ff1Sjsg if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place) 10301bb76ff1Sjsg return 0; 1031fb4d8502Sjsg 10321bb76ff1Sjsg for (i = 0; i < job->num_ibs; ++i) { 10331bb76ff1Sjsg struct amdgpu_ib *ib = &job->ibs[i]; 1034fb4d8502Sjsg struct amdgpu_bo_va_mapping *m; 10351bb76ff1Sjsg struct amdgpu_bo *aobj; 10361bb76ff1Sjsg uint64_t va_start; 1037fb4d8502Sjsg uint8_t *kptr; 1038fb4d8502Sjsg 10391bb76ff1Sjsg va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK; 1040fb4d8502Sjsg r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); 1041fb4d8502Sjsg if (r) { 1042fb4d8502Sjsg DRM_ERROR("IB va_start is invalid\n"); 1043fb4d8502Sjsg return r; 1044fb4d8502Sjsg } 1045fb4d8502Sjsg 10461bb76ff1Sjsg if ((va_start + ib->length_dw * 4) > 1047fb4d8502Sjsg (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { 1048fb4d8502Sjsg DRM_ERROR("IB va_start+ib_bytes is invalid\n"); 1049fb4d8502Sjsg return -EINVAL; 1050fb4d8502Sjsg } 1051fb4d8502Sjsg 1052fb4d8502Sjsg /* the IB should be reserved at this point */ 1053fb4d8502Sjsg r = amdgpu_bo_kmap(aobj, (void **)&kptr); 1054f005ef32Sjsg if (r) 1055fb4d8502Sjsg return r; 1056fb4d8502Sjsg 10571bb76ff1Sjsg kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); 1058fb4d8502Sjsg 1059c349dbc7Sjsg if (ring->funcs->parse_cs) { 10601bb76ff1Sjsg memcpy(ib->ptr, kptr, ib->length_dw * 4); 1061fb4d8502Sjsg amdgpu_bo_kunmap(aobj); 1062fb4d8502Sjsg 10631bb76ff1Sjsg r = amdgpu_ring_parse_cs(ring, p, job, ib); 1064fb4d8502Sjsg if (r) 1065fb4d8502Sjsg return r; 1066a1ac4461Sjsg 1067a1ac4461Sjsg if (ib->sa_bo) 1068a1ac4461Sjsg ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); 1069fb4d8502Sjsg } else { 1070fb4d8502Sjsg ib->ptr = (uint32_t *)kptr; 10711bb76ff1Sjsg r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); 1072fb4d8502Sjsg amdgpu_bo_kunmap(aobj); 1073fb4d8502Sjsg if (r) 1074fb4d8502Sjsg return r; 1075fb4d8502Sjsg } 1076fb4d8502Sjsg } 1077fb4d8502Sjsg 10781bb76ff1Sjsg return 0; 10791bb76ff1Sjsg } 1080fb4d8502Sjsg 10811bb76ff1Sjsg static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p) 10821bb76ff1Sjsg { 10831bb76ff1Sjsg unsigned int i; 10841bb76ff1Sjsg int r; 10851bb76ff1Sjsg 10861bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) { 10871bb76ff1Sjsg r = amdgpu_cs_patch_ibs(p, p->jobs[i]); 10881bb76ff1Sjsg if (r) 10891bb76ff1Sjsg return r; 10901bb76ff1Sjsg } 10911bb76ff1Sjsg return 0; 10921bb76ff1Sjsg } 10931bb76ff1Sjsg 10941bb76ff1Sjsg static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) 10951bb76ff1Sjsg { 10961bb76ff1Sjsg struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 10971bb76ff1Sjsg struct amdgpu_job *job = p->gang_leader; 10981bb76ff1Sjsg struct amdgpu_device *adev = p->adev; 10991bb76ff1Sjsg struct amdgpu_vm *vm = &fpriv->vm; 11001bb76ff1Sjsg struct amdgpu_bo_list_entry *e; 11011bb76ff1Sjsg struct amdgpu_bo_va *bo_va; 11021bb76ff1Sjsg unsigned int i; 11031bb76ff1Sjsg int r; 1104c349dbc7Sjsg 1105742c464eSjsg /* 1106742c464eSjsg * We can't use gang submit on with reserved VMIDs when the VM changes 1107742c464eSjsg * can't be invalidated by more than one engine at the same time. 1108742c464eSjsg */ 1109742c464eSjsg if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { 1110742c464eSjsg for (i = 0; i < p->gang_size; ++i) { 1111742c464eSjsg struct drm_sched_entity *entity = p->entities[i]; 1112742c464eSjsg struct drm_gpu_scheduler *sched = entity->rq->sched; 1113742c464eSjsg struct amdgpu_ring *ring = to_amdgpu_ring(sched); 1114742c464eSjsg 1115742c464eSjsg if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) 1116742c464eSjsg return -EINVAL; 1117742c464eSjsg } 1118742c464eSjsg } 1119742c464eSjsg 1120c349dbc7Sjsg r = amdgpu_vm_clear_freed(adev, vm, NULL); 1121fb4d8502Sjsg if (r) 1122fb4d8502Sjsg return r; 1123fb4d8502Sjsg 11241bb76ff1Sjsg r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); 1125fb4d8502Sjsg if (r) 1126fb4d8502Sjsg return r; 1127c349dbc7Sjsg 1128f005ef32Sjsg r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); 1129c349dbc7Sjsg if (r) 1130c349dbc7Sjsg return r; 1131c349dbc7Sjsg 11321bb76ff1Sjsg if (fpriv->csa_va) { 1133c349dbc7Sjsg bo_va = fpriv->csa_va; 1134c349dbc7Sjsg BUG_ON(!bo_va); 11351bb76ff1Sjsg r = amdgpu_vm_bo_update(adev, bo_va, false); 1136c349dbc7Sjsg if (r) 1137c349dbc7Sjsg return r; 1138c349dbc7Sjsg 1139f005ef32Sjsg r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); 1140c349dbc7Sjsg if (r) 1141c349dbc7Sjsg return r; 1142c349dbc7Sjsg } 1143c349dbc7Sjsg 1144c349dbc7Sjsg amdgpu_bo_list_for_each_entry(e, p->bo_list) { 1145c349dbc7Sjsg bo_va = e->bo_va; 1146c349dbc7Sjsg if (bo_va == NULL) 1147c349dbc7Sjsg continue; 1148c349dbc7Sjsg 11491bb76ff1Sjsg r = amdgpu_vm_bo_update(adev, bo_va, false); 1150c349dbc7Sjsg if (r) 1151c349dbc7Sjsg return r; 1152c349dbc7Sjsg 1153f005ef32Sjsg r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); 1154c349dbc7Sjsg if (r) 1155c349dbc7Sjsg return r; 1156c349dbc7Sjsg } 1157c349dbc7Sjsg 1158c349dbc7Sjsg r = amdgpu_vm_handle_moved(adev, vm); 1159c349dbc7Sjsg if (r) 1160c349dbc7Sjsg return r; 1161c349dbc7Sjsg 1162c349dbc7Sjsg r = amdgpu_vm_update_pdes(adev, vm, false); 1163c349dbc7Sjsg if (r) 1164c349dbc7Sjsg return r; 1165c349dbc7Sjsg 1166f005ef32Sjsg r = amdgpu_sync_fence(&p->sync, vm->last_update); 1167c349dbc7Sjsg if (r) 1168c349dbc7Sjsg return r; 1169c349dbc7Sjsg 11701bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) { 11711bb76ff1Sjsg job = p->jobs[i]; 11721bb76ff1Sjsg 11731bb76ff1Sjsg if (!job->vm) 11741bb76ff1Sjsg continue; 11751bb76ff1Sjsg 11761bb76ff1Sjsg job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); 11771bb76ff1Sjsg } 1178c349dbc7Sjsg 1179c349dbc7Sjsg if (amdgpu_vm_debug) { 1180c349dbc7Sjsg /* Invalidate all BOs to test for userspace bugs */ 1181c349dbc7Sjsg amdgpu_bo_list_for_each_entry(e, p->bo_list) { 1182f005ef32Sjsg struct amdgpu_bo *bo = e->bo; 1183c349dbc7Sjsg 1184c349dbc7Sjsg /* ignore duplicates */ 1185c349dbc7Sjsg if (!bo) 1186c349dbc7Sjsg continue; 1187c349dbc7Sjsg 1188c349dbc7Sjsg amdgpu_vm_bo_invalidate(adev, bo, false); 1189c349dbc7Sjsg } 1190fb4d8502Sjsg } 1191fb4d8502Sjsg 11921bb76ff1Sjsg return 0; 1193fb4d8502Sjsg } 1194fb4d8502Sjsg 11951bb76ff1Sjsg static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) 1196fb4d8502Sjsg { 1197fb4d8502Sjsg struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 1198f005ef32Sjsg struct drm_gpu_scheduler *sched; 1199f005ef32Sjsg struct drm_gem_object *obj; 1200f005ef32Sjsg struct dma_fence *fence; 1201f005ef32Sjsg unsigned long index; 12021bb76ff1Sjsg unsigned int i; 1203c349dbc7Sjsg int r; 1204fb4d8502Sjsg 1205f005ef32Sjsg r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); 1206f005ef32Sjsg if (r) { 1207f005ef32Sjsg if (r != -ERESTARTSYS) 1208f005ef32Sjsg DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); 1209f005ef32Sjsg return r; 1210f005ef32Sjsg } 1211f005ef32Sjsg 1212f005ef32Sjsg drm_exec_for_each_locked_object(&p->exec, index, obj) { 1213f005ef32Sjsg struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 1214f005ef32Sjsg 12151bb76ff1Sjsg struct dma_resv *resv = bo->tbo.base.resv; 12161bb76ff1Sjsg enum amdgpu_sync_mode sync_mode; 1217c349dbc7Sjsg 12181bb76ff1Sjsg sync_mode = amdgpu_bo_explicit_sync(bo) ? 12191bb76ff1Sjsg AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; 1220f005ef32Sjsg r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode, 12211bb76ff1Sjsg &fpriv->vm); 1222fb4d8502Sjsg if (r) 1223fb4d8502Sjsg return r; 1224fb4d8502Sjsg } 1225c349dbc7Sjsg 12261bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) { 1227f005ef32Sjsg r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]); 1228c349dbc7Sjsg if (r) 1229c349dbc7Sjsg return r; 1230c349dbc7Sjsg } 1231c349dbc7Sjsg 1232f005ef32Sjsg sched = p->gang_leader->base.entity->rq->sched; 1233f005ef32Sjsg while ((fence = amdgpu_sync_get_fence(&p->sync))) { 1234f005ef32Sjsg struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); 1235f005ef32Sjsg 1236f005ef32Sjsg /* 1237f005ef32Sjsg * When we have an dependency it might be necessary to insert a 1238f005ef32Sjsg * pipeline sync to make sure that all caches etc are flushed and the 1239f005ef32Sjsg * next job actually sees the results from the previous one 1240f005ef32Sjsg * before we start executing on the same scheduler ring. 1241f005ef32Sjsg */ 1242f005ef32Sjsg if (!s_fence || s_fence->sched != sched) { 1243f005ef32Sjsg dma_fence_put(fence); 1244f005ef32Sjsg continue; 1245f005ef32Sjsg } 1246f005ef32Sjsg 1247f005ef32Sjsg r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); 1248f005ef32Sjsg dma_fence_put(fence); 1249f005ef32Sjsg if (r) 1250fb4d8502Sjsg return r; 1251fb4d8502Sjsg } 1252f005ef32Sjsg return 0; 1253f005ef32Sjsg } 1254fb4d8502Sjsg 1255fb4d8502Sjsg static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) 1256fb4d8502Sjsg { 1257fb4d8502Sjsg int i; 1258fb4d8502Sjsg 1259c349dbc7Sjsg for (i = 0; i < p->num_post_deps; ++i) { 1260c349dbc7Sjsg if (p->post_deps[i].chain && p->post_deps[i].point) { 1261c349dbc7Sjsg drm_syncobj_add_point(p->post_deps[i].syncobj, 1262c349dbc7Sjsg p->post_deps[i].chain, 1263c349dbc7Sjsg p->fence, p->post_deps[i].point); 1264c349dbc7Sjsg p->post_deps[i].chain = NULL; 1265c349dbc7Sjsg } else { 1266c349dbc7Sjsg drm_syncobj_replace_fence(p->post_deps[i].syncobj, 1267c349dbc7Sjsg p->fence); 1268c349dbc7Sjsg } 1269c349dbc7Sjsg } 1270fb4d8502Sjsg } 1271fb4d8502Sjsg 1272fb4d8502Sjsg static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, 1273fb4d8502Sjsg union drm_amdgpu_cs *cs) 1274fb4d8502Sjsg { 1275fb4d8502Sjsg struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 12761bb76ff1Sjsg struct amdgpu_job *leader = p->gang_leader; 1277fb4d8502Sjsg struct amdgpu_bo_list_entry *e; 1278f005ef32Sjsg struct drm_gem_object *gobj; 1279f005ef32Sjsg unsigned long index; 12801bb76ff1Sjsg unsigned int i; 1281fb4d8502Sjsg uint64_t seq; 1282fb4d8502Sjsg int r; 1283fb4d8502Sjsg 12841bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) 12851bb76ff1Sjsg drm_sched_job_arm(&p->jobs[i]->base); 1286fb4d8502Sjsg 12871bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) { 12881bb76ff1Sjsg struct dma_fence *fence; 12891bb76ff1Sjsg 12901bb76ff1Sjsg if (p->jobs[i] == leader) 12911bb76ff1Sjsg continue; 12921bb76ff1Sjsg 12931bb76ff1Sjsg fence = &p->jobs[i]->base.s_fence->scheduled; 1294f005ef32Sjsg dma_fence_get(fence); 1295f005ef32Sjsg r = drm_sched_job_add_dependency(&leader->base, fence); 1296f005ef32Sjsg if (r) { 1297f005ef32Sjsg dma_fence_put(fence); 1298f005ef32Sjsg return r; 1299f005ef32Sjsg } 13001bb76ff1Sjsg } 13011bb76ff1Sjsg 13021bb76ff1Sjsg if (p->gang_size > 1) { 13031bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) 13041bb76ff1Sjsg amdgpu_job_set_gang_leader(p->jobs[i], leader); 13051bb76ff1Sjsg } 1306fb4d8502Sjsg 1307c349dbc7Sjsg /* No memory allocation is allowed while holding the notifier lock. 1308c349dbc7Sjsg * The lock is held until amdgpu_cs_submit is finished and fence is 1309c349dbc7Sjsg * added to BOs. 1310c349dbc7Sjsg */ 1311c349dbc7Sjsg mutex_lock(&p->adev->notifier_lock); 1312fb4d8502Sjsg 1313c349dbc7Sjsg /* If userptr are invalidated after amdgpu_cs_parser_bos(), return 1314c349dbc7Sjsg * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. 1315c349dbc7Sjsg */ 13161bb76ff1Sjsg r = 0; 1317c349dbc7Sjsg amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 1318f005ef32Sjsg r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm, 1319f005ef32Sjsg e->range); 13201bb76ff1Sjsg e->range = NULL; 1321c349dbc7Sjsg } 1322c349dbc7Sjsg if (r) { 1323c349dbc7Sjsg r = -EAGAIN; 1324f005ef32Sjsg mutex_unlock(&p->adev->notifier_lock); 1325f005ef32Sjsg return r; 1326fb4d8502Sjsg } 1327fb4d8502Sjsg 13281bb76ff1Sjsg p->fence = dma_fence_get(&leader->base.s_fence->finished); 1329f005ef32Sjsg drm_exec_for_each_locked_object(&p->exec, index, gobj) { 1330f005ef32Sjsg 1331f005ef32Sjsg ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo); 1332fb4d8502Sjsg 13331bb76ff1Sjsg /* Everybody except for the gang leader uses READ */ 13341bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) { 13351bb76ff1Sjsg if (p->jobs[i] == leader) 13361bb76ff1Sjsg continue; 13371bb76ff1Sjsg 1338f005ef32Sjsg dma_resv_add_fence(gobj->resv, 13391bb76ff1Sjsg &p->jobs[i]->base.s_fence->finished, 13401bb76ff1Sjsg DMA_RESV_USAGE_READ); 13411bb76ff1Sjsg } 13421bb76ff1Sjsg 1343f005ef32Sjsg /* The gang leader as remembered as writer */ 1344f005ef32Sjsg dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE); 13451bb76ff1Sjsg } 13461bb76ff1Sjsg 13471bb76ff1Sjsg seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], 13481bb76ff1Sjsg p->fence); 1349fb4d8502Sjsg amdgpu_cs_post_dependencies(p); 1350fb4d8502Sjsg 13511bb76ff1Sjsg if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && 1352fb4d8502Sjsg !p->ctx->preamble_presented) { 13531bb76ff1Sjsg leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; 1354fb4d8502Sjsg p->ctx->preamble_presented = true; 1355fb4d8502Sjsg } 1356fb4d8502Sjsg 1357fb4d8502Sjsg cs->out.handle = seq; 13581bb76ff1Sjsg leader->uf_sequence = seq; 1359fb4d8502Sjsg 1360f005ef32Sjsg amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket); 13611bb76ff1Sjsg for (i = 0; i < p->gang_size; ++i) { 13621bb76ff1Sjsg amdgpu_job_free_resources(p->jobs[i]); 13631bb76ff1Sjsg trace_amdgpu_cs_ioctl(p->jobs[i]); 13641bb76ff1Sjsg drm_sched_entity_push_job(&p->jobs[i]->base); 13651bb76ff1Sjsg p->jobs[i] = NULL; 13665ca02815Sjsg } 13675ca02815Sjsg 13681bb76ff1Sjsg amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); 1369fb4d8502Sjsg 13701bb76ff1Sjsg mutex_unlock(&p->adev->notifier_lock); 13711bb76ff1Sjsg mutex_unlock(&p->bo_list->bo_list_mutex); 1372fb4d8502Sjsg return 0; 1373fb4d8502Sjsg } 1374fb4d8502Sjsg 13751bb76ff1Sjsg /* Cleanup the parser structure */ 13761bb76ff1Sjsg static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) 1377ad8b1aafSjsg { 1378f005ef32Sjsg unsigned int i; 1379f005ef32Sjsg 1380f005ef32Sjsg amdgpu_sync_free(&parser->sync); 1381f005ef32Sjsg drm_exec_fini(&parser->exec); 1382ad8b1aafSjsg 13831bb76ff1Sjsg for (i = 0; i < parser->num_post_deps; i++) { 13841bb76ff1Sjsg drm_syncobj_put(parser->post_deps[i].syncobj); 13851bb76ff1Sjsg kfree(parser->post_deps[i].chain); 13861bb76ff1Sjsg } 13871bb76ff1Sjsg kfree(parser->post_deps); 1388ad8b1aafSjsg 13891bb76ff1Sjsg dma_fence_put(parser->fence); 13901bb76ff1Sjsg 13911bb76ff1Sjsg if (parser->ctx) 13921bb76ff1Sjsg amdgpu_ctx_put(parser->ctx); 13931bb76ff1Sjsg if (parser->bo_list) 13941bb76ff1Sjsg amdgpu_bo_list_put(parser->bo_list); 13951bb76ff1Sjsg 13961bb76ff1Sjsg for (i = 0; i < parser->nchunks; i++) 13971bb76ff1Sjsg kvfree(parser->chunks[i].kdata); 13981bb76ff1Sjsg kvfree(parser->chunks); 13991bb76ff1Sjsg for (i = 0; i < parser->gang_size; ++i) { 14001bb76ff1Sjsg if (parser->jobs[i]) 14011bb76ff1Sjsg amdgpu_job_free(parser->jobs[i]); 14021bb76ff1Sjsg } 1403f005ef32Sjsg amdgpu_bo_unref(&parser->uf_bo); 1404ad8b1aafSjsg } 1405ad8b1aafSjsg 1406fb4d8502Sjsg int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 1407fb4d8502Sjsg { 1408ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 14091bb76ff1Sjsg struct amdgpu_cs_parser parser; 1410ad8b1aafSjsg int r; 1411fb4d8502Sjsg 1412c349dbc7Sjsg if (amdgpu_ras_intr_triggered()) 1413c349dbc7Sjsg return -EHWPOISON; 1414c349dbc7Sjsg 1415fb4d8502Sjsg if (!adev->accel_working) 1416fb4d8502Sjsg return -EBUSY; 1417fb4d8502Sjsg 14181bb76ff1Sjsg r = amdgpu_cs_parser_init(&parser, adev, filp, data); 1419fb4d8502Sjsg if (r) { 1420ad8b1aafSjsg if (printk_ratelimit()) 1421c349dbc7Sjsg DRM_ERROR("Failed to initialize parser %d!\n", r); 14221bb76ff1Sjsg return r; 1423fb4d8502Sjsg } 1424fb4d8502Sjsg 14251bb76ff1Sjsg r = amdgpu_cs_pass1(&parser, data); 1426fb4d8502Sjsg if (r) 14271bb76ff1Sjsg goto error_fini; 1428fb4d8502Sjsg 14291bb76ff1Sjsg r = amdgpu_cs_pass2(&parser); 14301bb76ff1Sjsg if (r) 14311bb76ff1Sjsg goto error_fini; 1432fb4d8502Sjsg 1433c349dbc7Sjsg r = amdgpu_cs_parser_bos(&parser, data); 1434c349dbc7Sjsg if (r) { 1435c349dbc7Sjsg if (r == -ENOMEM) 1436c349dbc7Sjsg DRM_ERROR("Not enough memory for command submission!\n"); 1437c349dbc7Sjsg else if (r != -ERESTARTSYS && r != -EAGAIN) 1438e9e40a79Sjsg DRM_DEBUG("Failed to process the buffer list %d!\n", r); 14391bb76ff1Sjsg goto error_fini; 1440c349dbc7Sjsg } 1441c349dbc7Sjsg 14421bb76ff1Sjsg r = amdgpu_cs_patch_jobs(&parser); 14431bb76ff1Sjsg if (r) 14441bb76ff1Sjsg goto error_backoff; 1445fb4d8502Sjsg 1446c349dbc7Sjsg r = amdgpu_cs_vm_handling(&parser); 1447fb4d8502Sjsg if (r) 14481bb76ff1Sjsg goto error_backoff; 1449fb4d8502Sjsg 14501bb76ff1Sjsg r = amdgpu_cs_sync_rings(&parser); 14511bb76ff1Sjsg if (r) 14521bb76ff1Sjsg goto error_backoff; 1453fb4d8502Sjsg 14541bb76ff1Sjsg trace_amdgpu_cs_ibs(&parser); 1455c349dbc7Sjsg 14561bb76ff1Sjsg r = amdgpu_cs_submit(&parser, data); 14571bb76ff1Sjsg if (r) 14581bb76ff1Sjsg goto error_backoff; 14591bb76ff1Sjsg 14601bb76ff1Sjsg amdgpu_cs_parser_fini(&parser); 14611bb76ff1Sjsg return 0; 14621bb76ff1Sjsg 14631bb76ff1Sjsg error_backoff: 14641bb76ff1Sjsg mutex_unlock(&parser.bo_list->bo_list_mutex); 14651bb76ff1Sjsg 14661bb76ff1Sjsg error_fini: 14671bb76ff1Sjsg amdgpu_cs_parser_fini(&parser); 1468fb4d8502Sjsg return r; 1469fb4d8502Sjsg } 1470fb4d8502Sjsg 1471fb4d8502Sjsg /** 1472fb4d8502Sjsg * amdgpu_cs_wait_ioctl - wait for a command submission to finish 1473fb4d8502Sjsg * 1474fb4d8502Sjsg * @dev: drm device 1475fb4d8502Sjsg * @data: data from userspace 1476fb4d8502Sjsg * @filp: file private 1477fb4d8502Sjsg * 1478fb4d8502Sjsg * Wait for the command submission identified by handle to finish. 1479fb4d8502Sjsg */ 1480fb4d8502Sjsg int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, 1481fb4d8502Sjsg struct drm_file *filp) 1482fb4d8502Sjsg { 1483fb4d8502Sjsg union drm_amdgpu_wait_cs *wait = data; 1484fb4d8502Sjsg unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 1485c349dbc7Sjsg struct drm_sched_entity *entity; 1486fb4d8502Sjsg struct amdgpu_ctx *ctx; 1487fb4d8502Sjsg struct dma_fence *fence; 1488fb4d8502Sjsg long r; 1489fb4d8502Sjsg 1490fb4d8502Sjsg ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 1491fb4d8502Sjsg if (ctx == NULL) 1492fb4d8502Sjsg return -EINVAL; 1493fb4d8502Sjsg 1494c349dbc7Sjsg r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, 1495c349dbc7Sjsg wait->in.ring, &entity); 1496fb4d8502Sjsg if (r) { 1497fb4d8502Sjsg amdgpu_ctx_put(ctx); 1498fb4d8502Sjsg return r; 1499fb4d8502Sjsg } 1500fb4d8502Sjsg 1501c349dbc7Sjsg fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); 1502fb4d8502Sjsg if (IS_ERR(fence)) 1503fb4d8502Sjsg r = PTR_ERR(fence); 1504fb4d8502Sjsg else if (fence) { 1505fb4d8502Sjsg r = dma_fence_wait_timeout(fence, true, timeout); 1506fb4d8502Sjsg if (r > 0 && fence->error) 1507fb4d8502Sjsg r = fence->error; 1508fb4d8502Sjsg dma_fence_put(fence); 1509fb4d8502Sjsg } else 1510fb4d8502Sjsg r = 1; 1511fb4d8502Sjsg 1512fb4d8502Sjsg amdgpu_ctx_put(ctx); 1513fb4d8502Sjsg if (r < 0) 1514fb4d8502Sjsg return r; 1515fb4d8502Sjsg 1516fb4d8502Sjsg memset(wait, 0, sizeof(*wait)); 1517fb4d8502Sjsg wait->out.status = (r == 0); 1518fb4d8502Sjsg 1519fb4d8502Sjsg return 0; 1520fb4d8502Sjsg } 1521fb4d8502Sjsg 1522fb4d8502Sjsg /** 1523fb4d8502Sjsg * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence 1524fb4d8502Sjsg * 1525fb4d8502Sjsg * @adev: amdgpu device 1526fb4d8502Sjsg * @filp: file private 1527fb4d8502Sjsg * @user: drm_amdgpu_fence copied from user space 1528fb4d8502Sjsg */ 1529fb4d8502Sjsg static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, 1530fb4d8502Sjsg struct drm_file *filp, 1531fb4d8502Sjsg struct drm_amdgpu_fence *user) 1532fb4d8502Sjsg { 1533c349dbc7Sjsg struct drm_sched_entity *entity; 1534fb4d8502Sjsg struct amdgpu_ctx *ctx; 1535fb4d8502Sjsg struct dma_fence *fence; 1536fb4d8502Sjsg int r; 1537fb4d8502Sjsg 1538fb4d8502Sjsg ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); 1539fb4d8502Sjsg if (ctx == NULL) 1540fb4d8502Sjsg return ERR_PTR(-EINVAL); 1541fb4d8502Sjsg 1542c349dbc7Sjsg r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, 1543c349dbc7Sjsg user->ring, &entity); 1544fb4d8502Sjsg if (r) { 1545fb4d8502Sjsg amdgpu_ctx_put(ctx); 1546fb4d8502Sjsg return ERR_PTR(r); 1547fb4d8502Sjsg } 1548fb4d8502Sjsg 1549c349dbc7Sjsg fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); 1550fb4d8502Sjsg amdgpu_ctx_put(ctx); 1551fb4d8502Sjsg 1552fb4d8502Sjsg return fence; 1553fb4d8502Sjsg } 1554fb4d8502Sjsg 1555fb4d8502Sjsg int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, 1556fb4d8502Sjsg struct drm_file *filp) 1557fb4d8502Sjsg { 1558ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 1559fb4d8502Sjsg union drm_amdgpu_fence_to_handle *info = data; 1560fb4d8502Sjsg struct dma_fence *fence; 1561fb4d8502Sjsg struct drm_syncobj *syncobj; 1562fb4d8502Sjsg struct sync_file *sync_file; 1563fb4d8502Sjsg int fd, r; 1564fb4d8502Sjsg 1565fb4d8502Sjsg fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); 1566fb4d8502Sjsg if (IS_ERR(fence)) 1567fb4d8502Sjsg return PTR_ERR(fence); 1568fb4d8502Sjsg 1569c349dbc7Sjsg if (!fence) 1570c349dbc7Sjsg fence = dma_fence_get_stub(); 1571c349dbc7Sjsg 1572fb4d8502Sjsg switch (info->in.what) { 1573fb4d8502Sjsg case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: 1574fb4d8502Sjsg r = drm_syncobj_create(&syncobj, 0, fence); 1575fb4d8502Sjsg dma_fence_put(fence); 1576fb4d8502Sjsg if (r) 1577fb4d8502Sjsg return r; 1578fb4d8502Sjsg r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); 1579fb4d8502Sjsg drm_syncobj_put(syncobj); 1580fb4d8502Sjsg return r; 1581fb4d8502Sjsg 1582fb4d8502Sjsg case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD: 1583fb4d8502Sjsg r = drm_syncobj_create(&syncobj, 0, fence); 1584fb4d8502Sjsg dma_fence_put(fence); 1585fb4d8502Sjsg if (r) 1586fb4d8502Sjsg return r; 1587fb4d8502Sjsg r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle); 1588fb4d8502Sjsg drm_syncobj_put(syncobj); 1589fb4d8502Sjsg return r; 1590fb4d8502Sjsg 1591fb4d8502Sjsg case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD: 1592fb4d8502Sjsg fd = get_unused_fd_flags(O_CLOEXEC); 1593fb4d8502Sjsg if (fd < 0) { 1594fb4d8502Sjsg dma_fence_put(fence); 1595fb4d8502Sjsg return fd; 1596fb4d8502Sjsg } 1597fb4d8502Sjsg 1598fb4d8502Sjsg sync_file = sync_file_create(fence); 1599fb4d8502Sjsg dma_fence_put(fence); 1600fb4d8502Sjsg if (!sync_file) { 1601fb4d8502Sjsg put_unused_fd(fd); 1602fb4d8502Sjsg return -ENOMEM; 1603fb4d8502Sjsg } 1604fb4d8502Sjsg 1605fb4d8502Sjsg fd_install(fd, sync_file->file); 1606fb4d8502Sjsg info->out.handle = fd; 1607fb4d8502Sjsg return 0; 1608fb4d8502Sjsg 1609fb4d8502Sjsg default: 161090f9b1caSjsg dma_fence_put(fence); 1611fb4d8502Sjsg return -EINVAL; 1612fb4d8502Sjsg } 1613fb4d8502Sjsg } 1614fb4d8502Sjsg 1615fb4d8502Sjsg /** 16165ca02815Sjsg * amdgpu_cs_wait_all_fences - wait on all fences to signal 1617fb4d8502Sjsg * 1618fb4d8502Sjsg * @adev: amdgpu device 1619fb4d8502Sjsg * @filp: file private 1620fb4d8502Sjsg * @wait: wait parameters 1621fb4d8502Sjsg * @fences: array of drm_amdgpu_fence 1622fb4d8502Sjsg */ 1623fb4d8502Sjsg static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, 1624fb4d8502Sjsg struct drm_file *filp, 1625fb4d8502Sjsg union drm_amdgpu_wait_fences *wait, 1626fb4d8502Sjsg struct drm_amdgpu_fence *fences) 1627fb4d8502Sjsg { 1628fb4d8502Sjsg uint32_t fence_count = wait->in.fence_count; 1629fb4d8502Sjsg unsigned int i; 1630fb4d8502Sjsg long r = 1; 1631fb4d8502Sjsg 1632fb4d8502Sjsg for (i = 0; i < fence_count; i++) { 1633fb4d8502Sjsg struct dma_fence *fence; 1634fb4d8502Sjsg unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); 1635fb4d8502Sjsg 1636fb4d8502Sjsg fence = amdgpu_cs_get_fence(adev, filp, &fences[i]); 1637fb4d8502Sjsg if (IS_ERR(fence)) 1638fb4d8502Sjsg return PTR_ERR(fence); 1639fb4d8502Sjsg else if (!fence) 1640fb4d8502Sjsg continue; 1641fb4d8502Sjsg 1642fb4d8502Sjsg r = dma_fence_wait_timeout(fence, true, timeout); 1643d62e2f46Sjsg if (r > 0 && fence->error) 1644d62e2f46Sjsg r = fence->error; 1645d62e2f46Sjsg 1646fb4d8502Sjsg dma_fence_put(fence); 1647fb4d8502Sjsg if (r < 0) 1648fb4d8502Sjsg return r; 1649fb4d8502Sjsg 1650fb4d8502Sjsg if (r == 0) 1651fb4d8502Sjsg break; 1652fb4d8502Sjsg } 1653fb4d8502Sjsg 1654fb4d8502Sjsg memset(wait, 0, sizeof(*wait)); 1655fb4d8502Sjsg wait->out.status = (r > 0); 1656fb4d8502Sjsg 1657fb4d8502Sjsg return 0; 1658fb4d8502Sjsg } 1659fb4d8502Sjsg 1660fb4d8502Sjsg /** 1661fb4d8502Sjsg * amdgpu_cs_wait_any_fence - wait on any fence to signal 1662fb4d8502Sjsg * 1663fb4d8502Sjsg * @adev: amdgpu device 1664fb4d8502Sjsg * @filp: file private 1665fb4d8502Sjsg * @wait: wait parameters 1666fb4d8502Sjsg * @fences: array of drm_amdgpu_fence 1667fb4d8502Sjsg */ 1668fb4d8502Sjsg static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, 1669fb4d8502Sjsg struct drm_file *filp, 1670fb4d8502Sjsg union drm_amdgpu_wait_fences *wait, 1671fb4d8502Sjsg struct drm_amdgpu_fence *fences) 1672fb4d8502Sjsg { 1673fb4d8502Sjsg unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); 1674fb4d8502Sjsg uint32_t fence_count = wait->in.fence_count; 1675fb4d8502Sjsg uint32_t first = ~0; 1676fb4d8502Sjsg struct dma_fence **array; 1677fb4d8502Sjsg unsigned int i; 1678fb4d8502Sjsg long r; 1679fb4d8502Sjsg 1680fb4d8502Sjsg /* Prepare the fence array */ 1681fb4d8502Sjsg array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL); 1682fb4d8502Sjsg 1683fb4d8502Sjsg if (array == NULL) 1684fb4d8502Sjsg return -ENOMEM; 1685fb4d8502Sjsg 1686fb4d8502Sjsg for (i = 0; i < fence_count; i++) { 1687fb4d8502Sjsg struct dma_fence *fence; 1688fb4d8502Sjsg 1689fb4d8502Sjsg fence = amdgpu_cs_get_fence(adev, filp, &fences[i]); 1690fb4d8502Sjsg if (IS_ERR(fence)) { 1691fb4d8502Sjsg r = PTR_ERR(fence); 1692fb4d8502Sjsg goto err_free_fence_array; 1693fb4d8502Sjsg } else if (fence) { 1694fb4d8502Sjsg array[i] = fence; 1695fb4d8502Sjsg } else { /* NULL, the fence has been already signaled */ 1696fb4d8502Sjsg r = 1; 1697fb4d8502Sjsg first = i; 1698fb4d8502Sjsg goto out; 1699fb4d8502Sjsg } 1700fb4d8502Sjsg } 1701fb4d8502Sjsg 1702fb4d8502Sjsg r = dma_fence_wait_any_timeout(array, fence_count, true, timeout, 1703fb4d8502Sjsg &first); 1704fb4d8502Sjsg if (r < 0) 1705fb4d8502Sjsg goto err_free_fence_array; 1706fb4d8502Sjsg 1707fb4d8502Sjsg out: 1708fb4d8502Sjsg memset(wait, 0, sizeof(*wait)); 1709fb4d8502Sjsg wait->out.status = (r > 0); 1710fb4d8502Sjsg wait->out.first_signaled = first; 1711fb4d8502Sjsg 1712fb4d8502Sjsg if (first < fence_count && array[first]) 1713fb4d8502Sjsg r = array[first]->error; 1714fb4d8502Sjsg else 1715fb4d8502Sjsg r = 0; 1716fb4d8502Sjsg 1717fb4d8502Sjsg err_free_fence_array: 1718fb4d8502Sjsg for (i = 0; i < fence_count; i++) 1719fb4d8502Sjsg dma_fence_put(array[i]); 1720fb4d8502Sjsg kfree(array); 1721fb4d8502Sjsg 1722fb4d8502Sjsg return r; 1723fb4d8502Sjsg } 1724fb4d8502Sjsg 1725fb4d8502Sjsg /** 1726fb4d8502Sjsg * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish 1727fb4d8502Sjsg * 1728fb4d8502Sjsg * @dev: drm device 1729fb4d8502Sjsg * @data: data from userspace 1730fb4d8502Sjsg * @filp: file private 1731fb4d8502Sjsg */ 1732fb4d8502Sjsg int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, 1733fb4d8502Sjsg struct drm_file *filp) 1734fb4d8502Sjsg { 1735ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 1736fb4d8502Sjsg union drm_amdgpu_wait_fences *wait = data; 1737fb4d8502Sjsg uint32_t fence_count = wait->in.fence_count; 1738fb4d8502Sjsg struct drm_amdgpu_fence *fences_user; 1739fb4d8502Sjsg struct drm_amdgpu_fence *fences; 1740fb4d8502Sjsg int r; 1741fb4d8502Sjsg 1742fb4d8502Sjsg /* Get the fences from userspace */ 1743fb4d8502Sjsg fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), 1744fb4d8502Sjsg GFP_KERNEL); 1745fb4d8502Sjsg if (fences == NULL) 1746fb4d8502Sjsg return -ENOMEM; 1747fb4d8502Sjsg 1748fb4d8502Sjsg fences_user = u64_to_user_ptr(wait->in.fences); 1749fb4d8502Sjsg if (copy_from_user(fences, fences_user, 1750fb4d8502Sjsg sizeof(struct drm_amdgpu_fence) * fence_count)) { 1751fb4d8502Sjsg r = -EFAULT; 1752fb4d8502Sjsg goto err_free_fences; 1753fb4d8502Sjsg } 1754fb4d8502Sjsg 1755fb4d8502Sjsg if (wait->in.wait_all) 1756fb4d8502Sjsg r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences); 1757fb4d8502Sjsg else 1758fb4d8502Sjsg r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences); 1759fb4d8502Sjsg 1760fb4d8502Sjsg err_free_fences: 1761fb4d8502Sjsg kfree(fences); 1762fb4d8502Sjsg 1763fb4d8502Sjsg return r; 1764fb4d8502Sjsg } 1765fb4d8502Sjsg 1766fb4d8502Sjsg /** 17675ca02815Sjsg * amdgpu_cs_find_mapping - find bo_va for VM address 1768fb4d8502Sjsg * 1769fb4d8502Sjsg * @parser: command submission parser context 1770fb4d8502Sjsg * @addr: VM address 1771fb4d8502Sjsg * @bo: resulting BO of the mapping found 17725ca02815Sjsg * @map: Placeholder to return found BO mapping 1773fb4d8502Sjsg * 1774fb4d8502Sjsg * Search the buffer objects in the command submission context for a certain 1775fb4d8502Sjsg * virtual memory address. Returns allocation structure when found, NULL 1776fb4d8502Sjsg * otherwise. 1777fb4d8502Sjsg */ 1778fb4d8502Sjsg int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 1779fb4d8502Sjsg uint64_t addr, struct amdgpu_bo **bo, 1780fb4d8502Sjsg struct amdgpu_bo_va_mapping **map) 1781fb4d8502Sjsg { 1782fb4d8502Sjsg struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 1783fb4d8502Sjsg struct ttm_operation_ctx ctx = { false, false }; 1784fb4d8502Sjsg struct amdgpu_vm *vm = &fpriv->vm; 1785fb4d8502Sjsg struct amdgpu_bo_va_mapping *mapping; 1786fb4d8502Sjsg int r; 1787fb4d8502Sjsg 1788fb4d8502Sjsg addr /= AMDGPU_GPU_PAGE_SIZE; 1789fb4d8502Sjsg 1790fb4d8502Sjsg mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); 1791fb4d8502Sjsg if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) 1792fb4d8502Sjsg return -EINVAL; 1793fb4d8502Sjsg 1794fb4d8502Sjsg *bo = mapping->bo_va->base.bo; 1795fb4d8502Sjsg *map = mapping; 1796fb4d8502Sjsg 1797fb4d8502Sjsg /* Double check that the BO is reserved by this CS */ 1798f005ef32Sjsg if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) 1799fb4d8502Sjsg return -EINVAL; 1800fb4d8502Sjsg 1801fb4d8502Sjsg if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { 1802fb4d8502Sjsg (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1803fb4d8502Sjsg amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); 1804fb4d8502Sjsg r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); 1805fb4d8502Sjsg if (r) 1806fb4d8502Sjsg return r; 1807fb4d8502Sjsg } 1808fb4d8502Sjsg 1809fb4d8502Sjsg return amdgpu_ttm_alloc_gart(&(*bo)->tbo); 1810fb4d8502Sjsg } 1811