1fb4d8502Sjsg /* 2fb4d8502Sjsg * Copyright 2015 Advanced Micro Devices, Inc. 3fb4d8502Sjsg * 4fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 5fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 6fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 7fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 9fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 10fb4d8502Sjsg * 11fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in 12fb4d8502Sjsg * all copies or substantial portions of the Software. 13fb4d8502Sjsg * 14fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE. 21fb4d8502Sjsg * 22fb4d8502Sjsg * Authors: monk liu <monk.liu@amd.com> 23fb4d8502Sjsg */ 24fb4d8502Sjsg 25fb4d8502Sjsg #include <drm/drm_auth.h> 261bb76ff1Sjsg #include <drm/drm_drv.h> 27fb4d8502Sjsg #include "amdgpu.h" 28fb4d8502Sjsg #include "amdgpu_sched.h" 29c349dbc7Sjsg #include "amdgpu_ras.h" 30ad8b1aafSjsg #include <linux/nospec.h> 31c349dbc7Sjsg 32c349dbc7Sjsg #define to_amdgpu_ctx_entity(e) \ 33c349dbc7Sjsg container_of((e), struct amdgpu_ctx_entity, entity) 34c349dbc7Sjsg 35c349dbc7Sjsg const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { 36c349dbc7Sjsg [AMDGPU_HW_IP_GFX] = 1, 37c349dbc7Sjsg [AMDGPU_HW_IP_COMPUTE] = 4, 38c349dbc7Sjsg [AMDGPU_HW_IP_DMA] = 2, 39c349dbc7Sjsg [AMDGPU_HW_IP_UVD] = 1, 40c349dbc7Sjsg [AMDGPU_HW_IP_VCE] = 1, 41c349dbc7Sjsg [AMDGPU_HW_IP_UVD_ENC] = 1, 42c349dbc7Sjsg [AMDGPU_HW_IP_VCN_DEC] = 1, 43c349dbc7Sjsg [AMDGPU_HW_IP_VCN_ENC] = 1, 44c349dbc7Sjsg [AMDGPU_HW_IP_VCN_JPEG] = 1, 45c349dbc7Sjsg }; 46fb4d8502Sjsg 471bb76ff1Sjsg bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) 48fb4d8502Sjsg { 491bb76ff1Sjsg switch (ctx_prio) { 501bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_LOW: 511bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_LOW: 521bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_NORMAL: 531bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_HIGH: 541bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_HIGH: 551bb76ff1Sjsg return true; 561bb76ff1Sjsg default: 5784a6fc66Sjsg case AMDGPU_CTX_PRIORITY_UNSET: 58f005ef32Sjsg /* UNSET priority is not valid and we don't carry that 59f005ef32Sjsg * around, but set it to NORMAL in the only place this 60f005ef32Sjsg * function is called, amdgpu_ctx_ioctl(). 61f005ef32Sjsg */ 621bb76ff1Sjsg return false; 631bb76ff1Sjsg } 641bb76ff1Sjsg } 651bb76ff1Sjsg 661bb76ff1Sjsg static enum drm_sched_priority 671bb76ff1Sjsg amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) 681bb76ff1Sjsg { 691bb76ff1Sjsg switch (ctx_prio) { 701bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_UNSET: 71d97b72cfSjsg pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL"); 72d97b72cfSjsg return DRM_SCHED_PRIORITY_NORMAL; 731bb76ff1Sjsg 741bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_LOW: 751bb76ff1Sjsg return DRM_SCHED_PRIORITY_MIN; 761bb76ff1Sjsg 771bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_LOW: 781bb76ff1Sjsg return DRM_SCHED_PRIORITY_MIN; 791bb76ff1Sjsg 801bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_NORMAL: 811bb76ff1Sjsg return DRM_SCHED_PRIORITY_NORMAL; 821bb76ff1Sjsg 831bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_HIGH: 841bb76ff1Sjsg return DRM_SCHED_PRIORITY_HIGH; 851bb76ff1Sjsg 861bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_HIGH: 871bb76ff1Sjsg return DRM_SCHED_PRIORITY_HIGH; 881bb76ff1Sjsg 891bb76ff1Sjsg /* This should not happen as we sanitized userspace provided priority 901bb76ff1Sjsg * already, WARN if this happens. 911bb76ff1Sjsg */ 921bb76ff1Sjsg default: 931bb76ff1Sjsg WARN(1, "Invalid context priority %d\n", ctx_prio); 941bb76ff1Sjsg return DRM_SCHED_PRIORITY_NORMAL; 951bb76ff1Sjsg } 961bb76ff1Sjsg 971bb76ff1Sjsg } 981bb76ff1Sjsg 991bb76ff1Sjsg static int amdgpu_ctx_priority_permit(struct drm_file *filp, 1001bb76ff1Sjsg int32_t priority) 1011bb76ff1Sjsg { 102fb4d8502Sjsg /* NORMAL and below are accessible by everyone */ 1031bb76ff1Sjsg if (priority <= AMDGPU_CTX_PRIORITY_NORMAL) 104fb4d8502Sjsg return 0; 105fb4d8502Sjsg 106fb4d8502Sjsg if (capable(CAP_SYS_NICE)) 107fb4d8502Sjsg return 0; 108fb4d8502Sjsg 109fb4d8502Sjsg if (drm_is_current_master(filp)) 110fb4d8502Sjsg return 0; 111fb4d8502Sjsg 112fb4d8502Sjsg return -EACCES; 113fb4d8502Sjsg } 114fb4d8502Sjsg 1151bb76ff1Sjsg static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio) 116c349dbc7Sjsg { 117c349dbc7Sjsg switch (prio) { 1181bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_HIGH: 1191bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_HIGH: 120c349dbc7Sjsg return AMDGPU_GFX_PIPE_PRIO_HIGH; 121c349dbc7Sjsg default: 122c349dbc7Sjsg return AMDGPU_GFX_PIPE_PRIO_NORMAL; 123c349dbc7Sjsg } 124c349dbc7Sjsg } 125c349dbc7Sjsg 1261bb76ff1Sjsg static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio) 127ad8b1aafSjsg { 1281bb76ff1Sjsg switch (prio) { 1291bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_HIGH: 1301bb76ff1Sjsg return AMDGPU_RING_PRIO_1; 1311bb76ff1Sjsg case AMDGPU_CTX_PRIORITY_VERY_HIGH: 1321bb76ff1Sjsg return AMDGPU_RING_PRIO_2; 1331bb76ff1Sjsg default: 1341bb76ff1Sjsg return AMDGPU_RING_PRIO_0; 1351bb76ff1Sjsg } 1361bb76ff1Sjsg } 137ad8b1aafSjsg 1381bb76ff1Sjsg static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) 1391bb76ff1Sjsg { 1401bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 1411bb76ff1Sjsg unsigned int hw_prio; 1421bb76ff1Sjsg int32_t ctx_prio; 1431bb76ff1Sjsg 1441bb76ff1Sjsg ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 1451bb76ff1Sjsg ctx->init_priority : ctx->override_priority; 1461bb76ff1Sjsg 1471bb76ff1Sjsg switch (hw_ip) { 1481bb76ff1Sjsg case AMDGPU_HW_IP_GFX: 1491bb76ff1Sjsg case AMDGPU_HW_IP_COMPUTE: 1501bb76ff1Sjsg hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio); 1511bb76ff1Sjsg break; 1521bb76ff1Sjsg case AMDGPU_HW_IP_VCE: 1531bb76ff1Sjsg case AMDGPU_HW_IP_VCN_ENC: 1541bb76ff1Sjsg hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio); 1551bb76ff1Sjsg break; 1561bb76ff1Sjsg default: 1571bb76ff1Sjsg hw_prio = AMDGPU_RING_PRIO_DEFAULT; 1581bb76ff1Sjsg break; 1591bb76ff1Sjsg } 1601bb76ff1Sjsg 161ad8b1aafSjsg hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); 162ad8b1aafSjsg if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0) 163ad8b1aafSjsg hw_prio = AMDGPU_RING_PRIO_DEFAULT; 164ad8b1aafSjsg 165ad8b1aafSjsg return hw_prio; 166ad8b1aafSjsg } 167ad8b1aafSjsg 1681bb76ff1Sjsg /* Calculate the time spend on the hw */ 1691bb76ff1Sjsg static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence) 1701bb76ff1Sjsg { 1711bb76ff1Sjsg struct drm_sched_fence *s_fence; 1721bb76ff1Sjsg 1731bb76ff1Sjsg if (!fence) 1741bb76ff1Sjsg return ns_to_ktime(0); 1751bb76ff1Sjsg 1761bb76ff1Sjsg /* When the fence is not even scheduled it can't have spend time */ 1771bb76ff1Sjsg s_fence = to_drm_sched_fence(fence); 1781bb76ff1Sjsg if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags)) 1791bb76ff1Sjsg return ns_to_ktime(0); 1801bb76ff1Sjsg 1811bb76ff1Sjsg /* When it is still running account how much already spend */ 1821bb76ff1Sjsg if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags)) 1831bb76ff1Sjsg return ktime_sub(ktime_get(), s_fence->scheduled.timestamp); 1841bb76ff1Sjsg 1851bb76ff1Sjsg return ktime_sub(s_fence->finished.timestamp, 1861bb76ff1Sjsg s_fence->scheduled.timestamp); 1871bb76ff1Sjsg } 1881bb76ff1Sjsg 1891bb76ff1Sjsg static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx, 1901bb76ff1Sjsg struct amdgpu_ctx_entity *centity) 1911bb76ff1Sjsg { 1921bb76ff1Sjsg ktime_t res = ns_to_ktime(0); 1931bb76ff1Sjsg uint32_t i; 1941bb76ff1Sjsg 1951bb76ff1Sjsg spin_lock(&ctx->ring_lock); 1961bb76ff1Sjsg for (i = 0; i < amdgpu_sched_jobs; i++) { 1971bb76ff1Sjsg res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i])); 1981bb76ff1Sjsg } 1991bb76ff1Sjsg spin_unlock(&ctx->ring_lock); 2001bb76ff1Sjsg return res; 2011bb76ff1Sjsg } 2021bb76ff1Sjsg 203ad8b1aafSjsg static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, 204ad8b1aafSjsg const u32 ring) 205c349dbc7Sjsg { 206c349dbc7Sjsg struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; 2071bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 2081bb76ff1Sjsg struct amdgpu_ctx_entity *entity; 2091bb76ff1Sjsg enum drm_sched_priority drm_prio; 2101bb76ff1Sjsg unsigned int hw_prio, num_scheds; 2111bb76ff1Sjsg int32_t ctx_prio; 212c349dbc7Sjsg int r; 213c349dbc7Sjsg 2145ca02815Sjsg entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs), 215c349dbc7Sjsg GFP_KERNEL); 216c349dbc7Sjsg if (!entity) 217c349dbc7Sjsg return -ENOMEM; 218c349dbc7Sjsg 2191bb76ff1Sjsg ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 220c349dbc7Sjsg ctx->init_priority : ctx->override_priority; 2211bb76ff1Sjsg entity->hw_ip = hw_ip; 2221bb76ff1Sjsg entity->sequence = 1; 2231bb76ff1Sjsg hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); 2241bb76ff1Sjsg drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); 225ad8b1aafSjsg 226ad8b1aafSjsg hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); 227f005ef32Sjsg 228f005ef32Sjsg if (!(adev)->xcp_mgr) { 229ad8b1aafSjsg scheds = adev->gpu_sched[hw_ip][hw_prio].sched; 230ad8b1aafSjsg num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; 231f005ef32Sjsg } else { 232f005ef32Sjsg struct amdgpu_fpriv *fpriv; 233f005ef32Sjsg 234f005ef32Sjsg fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr); 235f005ef32Sjsg r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, 236f005ef32Sjsg &num_scheds, &scheds); 237f005ef32Sjsg if (r) 238f005ef32Sjsg goto cleanup_entity; 239f005ef32Sjsg } 240ad8b1aafSjsg 241ad8b1aafSjsg /* disable load balance if the hw engine retains context among dependent jobs */ 242ad8b1aafSjsg if (hw_ip == AMDGPU_HW_IP_VCN_ENC || 243ad8b1aafSjsg hw_ip == AMDGPU_HW_IP_VCN_DEC || 244ad8b1aafSjsg hw_ip == AMDGPU_HW_IP_UVD_ENC || 245ad8b1aafSjsg hw_ip == AMDGPU_HW_IP_UVD) { 246ad8b1aafSjsg sched = drm_sched_pick_best(scheds, num_scheds); 247c349dbc7Sjsg scheds = &sched; 248c349dbc7Sjsg num_scheds = 1; 249c349dbc7Sjsg } 250c349dbc7Sjsg 2511bb76ff1Sjsg r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds, 252c349dbc7Sjsg &ctx->guilty); 253c349dbc7Sjsg if (r) 254c349dbc7Sjsg goto error_free_entity; 255c349dbc7Sjsg 2561bb76ff1Sjsg /* It's not an error if we fail to install the new entity */ 2571bb76ff1Sjsg if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity)) 2581bb76ff1Sjsg goto cleanup_entity; 2591bb76ff1Sjsg 260c349dbc7Sjsg return 0; 261c349dbc7Sjsg 2621bb76ff1Sjsg cleanup_entity: 2631bb76ff1Sjsg drm_sched_entity_fini(&entity->entity); 2641bb76ff1Sjsg 265c349dbc7Sjsg error_free_entity: 266c349dbc7Sjsg kfree(entity); 267c349dbc7Sjsg 268c349dbc7Sjsg return r; 269c349dbc7Sjsg } 270c349dbc7Sjsg 271f005ef32Sjsg static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev, 272f005ef32Sjsg struct amdgpu_ctx_entity *entity) 273fb4d8502Sjsg { 2741bb76ff1Sjsg ktime_t res = ns_to_ktime(0); 2751bb76ff1Sjsg int i; 2761bb76ff1Sjsg 2771bb76ff1Sjsg if (!entity) 2781bb76ff1Sjsg return res; 2791bb76ff1Sjsg 2801bb76ff1Sjsg for (i = 0; i < amdgpu_sched_jobs; ++i) { 2811bb76ff1Sjsg res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i])); 2821bb76ff1Sjsg dma_fence_put(entity->fences[i]); 2831bb76ff1Sjsg } 2841bb76ff1Sjsg 285f005ef32Sjsg amdgpu_xcp_release_sched(adev, entity); 286f005ef32Sjsg 2871bb76ff1Sjsg kfree(entity); 2881bb76ff1Sjsg return res; 2891bb76ff1Sjsg } 2901bb76ff1Sjsg 2911bb76ff1Sjsg static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, 2921bb76ff1Sjsg u32 *stable_pstate) 2931bb76ff1Sjsg { 2941bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 2951bb76ff1Sjsg enum amd_dpm_forced_level current_level; 2961bb76ff1Sjsg 2971bb76ff1Sjsg current_level = amdgpu_dpm_get_performance_level(adev); 2981bb76ff1Sjsg 2991bb76ff1Sjsg switch (current_level) { 3001bb76ff1Sjsg case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: 3011bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD; 3021bb76ff1Sjsg break; 3031bb76ff1Sjsg case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: 3041bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK; 3051bb76ff1Sjsg break; 3061bb76ff1Sjsg case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: 3071bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK; 3081bb76ff1Sjsg break; 3091bb76ff1Sjsg case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: 3101bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK; 3111bb76ff1Sjsg break; 3121bb76ff1Sjsg default: 3131bb76ff1Sjsg *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; 3141bb76ff1Sjsg break; 3151bb76ff1Sjsg } 3161bb76ff1Sjsg return 0; 3171bb76ff1Sjsg } 3181bb76ff1Sjsg 3191bb76ff1Sjsg static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, 3201bb76ff1Sjsg struct drm_file *filp, struct amdgpu_ctx *ctx) 3211bb76ff1Sjsg { 322f005ef32Sjsg struct amdgpu_fpriv *fpriv = filp->driver_priv; 3231bb76ff1Sjsg u32 current_stable_pstate; 324fb4d8502Sjsg int r; 325fb4d8502Sjsg 326fb4d8502Sjsg r = amdgpu_ctx_priority_permit(filp, priority); 327fb4d8502Sjsg if (r) 328fb4d8502Sjsg return r; 329fb4d8502Sjsg 330fb4d8502Sjsg memset(ctx, 0, sizeof(*ctx)); 331c349dbc7Sjsg 332fb4d8502Sjsg kref_init(&ctx->refcount); 3331bb76ff1Sjsg ctx->mgr = mgr; 334fb4d8502Sjsg mtx_init(&ctx->ring_lock, IPL_TTY); 335fb4d8502Sjsg 3361bb76ff1Sjsg ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); 337fb4d8502Sjsg ctx->reset_counter_query = ctx->reset_counter; 338f005ef32Sjsg ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm); 339fb4d8502Sjsg ctx->init_priority = priority; 3401bb76ff1Sjsg ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; 3411bb76ff1Sjsg 3421bb76ff1Sjsg r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); 3431bb76ff1Sjsg if (r) 3441bb76ff1Sjsg return r; 3451bb76ff1Sjsg 3461bb76ff1Sjsg if (mgr->adev->pm.stable_pstate_ctx) 3471bb76ff1Sjsg ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate; 3481bb76ff1Sjsg else 3491bb76ff1Sjsg ctx->stable_pstate = current_stable_pstate; 350fb4d8502Sjsg 351f005ef32Sjsg ctx->ctx_mgr = &(fpriv->ctx_mgr); 352fb4d8502Sjsg return 0; 353c349dbc7Sjsg } 354c349dbc7Sjsg 3551bb76ff1Sjsg static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, 3561bb76ff1Sjsg u32 stable_pstate) 357c349dbc7Sjsg { 3581bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 3591bb76ff1Sjsg enum amd_dpm_forced_level level; 3601bb76ff1Sjsg u32 current_stable_pstate; 3611bb76ff1Sjsg int r; 362c349dbc7Sjsg 3631bb76ff1Sjsg mutex_lock(&adev->pm.stable_pstate_ctx_lock); 3641bb76ff1Sjsg if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { 3651bb76ff1Sjsg r = -EBUSY; 3661bb76ff1Sjsg goto done; 3671bb76ff1Sjsg } 368c349dbc7Sjsg 3691bb76ff1Sjsg r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); 3701bb76ff1Sjsg if (r || (stable_pstate == current_stable_pstate)) 3711bb76ff1Sjsg goto done; 372c349dbc7Sjsg 3731bb76ff1Sjsg switch (stable_pstate) { 3741bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_NONE: 3751bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_AUTO; 3761bb76ff1Sjsg break; 3771bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_STANDARD: 3781bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD; 3791bb76ff1Sjsg break; 3801bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK: 3811bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK; 3821bb76ff1Sjsg break; 3831bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK: 3841bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK; 3851bb76ff1Sjsg break; 3861bb76ff1Sjsg case AMDGPU_CTX_STABLE_PSTATE_PEAK: 3871bb76ff1Sjsg level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; 3881bb76ff1Sjsg break; 3891bb76ff1Sjsg default: 3901bb76ff1Sjsg r = -EINVAL; 3911bb76ff1Sjsg goto done; 3921bb76ff1Sjsg } 393c349dbc7Sjsg 3941bb76ff1Sjsg r = amdgpu_dpm_force_performance_level(adev, level); 3951bb76ff1Sjsg 3961bb76ff1Sjsg if (level == AMD_DPM_FORCED_LEVEL_AUTO) 3971bb76ff1Sjsg adev->pm.stable_pstate_ctx = NULL; 3981bb76ff1Sjsg else 3991bb76ff1Sjsg adev->pm.stable_pstate_ctx = ctx; 4001bb76ff1Sjsg done: 4011bb76ff1Sjsg mutex_unlock(&adev->pm.stable_pstate_ctx_lock); 4021bb76ff1Sjsg 4031bb76ff1Sjsg return r; 404fb4d8502Sjsg } 405fb4d8502Sjsg 406fb4d8502Sjsg static void amdgpu_ctx_fini(struct kref *ref) 407fb4d8502Sjsg { 408fb4d8502Sjsg struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); 4091bb76ff1Sjsg struct amdgpu_ctx_mgr *mgr = ctx->mgr; 4101bb76ff1Sjsg struct amdgpu_device *adev = mgr->adev; 4111bb76ff1Sjsg unsigned i, j, idx; 412fb4d8502Sjsg 413fb4d8502Sjsg if (!adev) 414fb4d8502Sjsg return; 415fb4d8502Sjsg 416c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 417c349dbc7Sjsg for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { 4181bb76ff1Sjsg ktime_t spend; 4191bb76ff1Sjsg 420f005ef32Sjsg spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]); 4211bb76ff1Sjsg atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]); 422c349dbc7Sjsg } 423c349dbc7Sjsg } 424fb4d8502Sjsg 4251bb76ff1Sjsg if (drm_dev_enter(adev_to_drm(adev), &idx)) { 4261bb76ff1Sjsg amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); 4271bb76ff1Sjsg drm_dev_exit(idx); 4281bb76ff1Sjsg } 4291bb76ff1Sjsg 430fb4d8502Sjsg kfree(ctx); 431fb4d8502Sjsg } 432fb4d8502Sjsg 433c349dbc7Sjsg int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, 434c349dbc7Sjsg u32 ring, struct drm_sched_entity **entity) 435c349dbc7Sjsg { 436c349dbc7Sjsg int r; 437f005ef32Sjsg struct drm_sched_entity *ctx_entity; 438c349dbc7Sjsg 439c349dbc7Sjsg if (hw_ip >= AMDGPU_HW_IP_NUM) { 440c349dbc7Sjsg DRM_ERROR("unknown HW IP type: %d\n", hw_ip); 441c349dbc7Sjsg return -EINVAL; 442c349dbc7Sjsg } 443c349dbc7Sjsg 444c349dbc7Sjsg /* Right now all IPs have only one instance - multiple rings. */ 445c349dbc7Sjsg if (instance != 0) { 446c349dbc7Sjsg DRM_DEBUG("invalid ip instance: %d\n", instance); 447c349dbc7Sjsg return -EINVAL; 448c349dbc7Sjsg } 449c349dbc7Sjsg 450c349dbc7Sjsg if (ring >= amdgpu_ctx_num_entities[hw_ip]) { 451c349dbc7Sjsg DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring); 452c349dbc7Sjsg return -EINVAL; 453c349dbc7Sjsg } 454c349dbc7Sjsg 455c349dbc7Sjsg if (ctx->entities[hw_ip][ring] == NULL) { 456c349dbc7Sjsg r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); 457c349dbc7Sjsg if (r) 458c349dbc7Sjsg return r; 459c349dbc7Sjsg } 460c349dbc7Sjsg 461f005ef32Sjsg ctx_entity = &ctx->entities[hw_ip][ring]->entity; 462f005ef32Sjsg r = drm_sched_entity_error(ctx_entity); 463f005ef32Sjsg if (r) { 464f005ef32Sjsg DRM_DEBUG("error entity %p\n", ctx_entity); 465f005ef32Sjsg return r; 466f005ef32Sjsg } 467f005ef32Sjsg 468f005ef32Sjsg *entity = ctx_entity; 469c349dbc7Sjsg return 0; 470c349dbc7Sjsg } 471c349dbc7Sjsg 472fb4d8502Sjsg static int amdgpu_ctx_alloc(struct amdgpu_device *adev, 473fb4d8502Sjsg struct amdgpu_fpriv *fpriv, 474fb4d8502Sjsg struct drm_file *filp, 4751bb76ff1Sjsg int32_t priority, 476fb4d8502Sjsg uint32_t *id) 477fb4d8502Sjsg { 478fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 479fb4d8502Sjsg struct amdgpu_ctx *ctx; 480fb4d8502Sjsg int r; 481fb4d8502Sjsg 482fb4d8502Sjsg ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 483fb4d8502Sjsg if (!ctx) 484fb4d8502Sjsg return -ENOMEM; 485fb4d8502Sjsg 486fb4d8502Sjsg mutex_lock(&mgr->lock); 487c349dbc7Sjsg r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); 488fb4d8502Sjsg if (r < 0) { 489fb4d8502Sjsg mutex_unlock(&mgr->lock); 490fb4d8502Sjsg kfree(ctx); 491fb4d8502Sjsg return r; 492fb4d8502Sjsg } 493fb4d8502Sjsg 494fb4d8502Sjsg *id = (uint32_t)r; 4951bb76ff1Sjsg r = amdgpu_ctx_init(mgr, priority, filp, ctx); 496fb4d8502Sjsg if (r) { 497fb4d8502Sjsg idr_remove(&mgr->ctx_handles, *id); 498fb4d8502Sjsg *id = 0; 499fb4d8502Sjsg kfree(ctx); 500fb4d8502Sjsg } 501fb4d8502Sjsg mutex_unlock(&mgr->lock); 502fb4d8502Sjsg return r; 503fb4d8502Sjsg } 504fb4d8502Sjsg 505fb4d8502Sjsg static void amdgpu_ctx_do_release(struct kref *ref) 506fb4d8502Sjsg { 507fb4d8502Sjsg struct amdgpu_ctx *ctx; 508c349dbc7Sjsg u32 i, j; 509fb4d8502Sjsg 510fb4d8502Sjsg ctx = container_of(ref, struct amdgpu_ctx, refcount); 511c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 512c349dbc7Sjsg for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 513c349dbc7Sjsg if (!ctx->entities[i][j]) 514fb4d8502Sjsg continue; 515fb4d8502Sjsg 516c349dbc7Sjsg drm_sched_entity_destroy(&ctx->entities[i][j]->entity); 517c349dbc7Sjsg } 518fb4d8502Sjsg } 519fb4d8502Sjsg 520fb4d8502Sjsg amdgpu_ctx_fini(ref); 521fb4d8502Sjsg } 522fb4d8502Sjsg 523fb4d8502Sjsg static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) 524fb4d8502Sjsg { 525fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 526fb4d8502Sjsg struct amdgpu_ctx *ctx; 527fb4d8502Sjsg 528fb4d8502Sjsg mutex_lock(&mgr->lock); 529fb4d8502Sjsg ctx = idr_remove(&mgr->ctx_handles, id); 530fb4d8502Sjsg if (ctx) 531fb4d8502Sjsg kref_put(&ctx->refcount, amdgpu_ctx_do_release); 532fb4d8502Sjsg mutex_unlock(&mgr->lock); 533fb4d8502Sjsg return ctx ? 0 : -EINVAL; 534fb4d8502Sjsg } 535fb4d8502Sjsg 536fb4d8502Sjsg static int amdgpu_ctx_query(struct amdgpu_device *adev, 537fb4d8502Sjsg struct amdgpu_fpriv *fpriv, uint32_t id, 538fb4d8502Sjsg union drm_amdgpu_ctx_out *out) 539fb4d8502Sjsg { 540fb4d8502Sjsg struct amdgpu_ctx *ctx; 541fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr; 542fb4d8502Sjsg unsigned reset_counter; 543fb4d8502Sjsg 544fb4d8502Sjsg if (!fpriv) 545fb4d8502Sjsg return -EINVAL; 546fb4d8502Sjsg 547fb4d8502Sjsg mgr = &fpriv->ctx_mgr; 548fb4d8502Sjsg mutex_lock(&mgr->lock); 549fb4d8502Sjsg ctx = idr_find(&mgr->ctx_handles, id); 550fb4d8502Sjsg if (!ctx) { 551fb4d8502Sjsg mutex_unlock(&mgr->lock); 552fb4d8502Sjsg return -EINVAL; 553fb4d8502Sjsg } 554fb4d8502Sjsg 555fb4d8502Sjsg /* TODO: these two are always zero */ 556fb4d8502Sjsg out->state.flags = 0x0; 557fb4d8502Sjsg out->state.hangs = 0x0; 558fb4d8502Sjsg 559fb4d8502Sjsg /* determine if a GPU reset has occured since the last call */ 560fb4d8502Sjsg reset_counter = atomic_read(&adev->gpu_reset_counter); 561fb4d8502Sjsg /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */ 562fb4d8502Sjsg if (ctx->reset_counter_query == reset_counter) 563fb4d8502Sjsg out->state.reset_status = AMDGPU_CTX_NO_RESET; 564fb4d8502Sjsg else 565fb4d8502Sjsg out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET; 566fb4d8502Sjsg ctx->reset_counter_query = reset_counter; 567fb4d8502Sjsg 568fb4d8502Sjsg mutex_unlock(&mgr->lock); 569fb4d8502Sjsg return 0; 570fb4d8502Sjsg } 571fb4d8502Sjsg 5725ca02815Sjsg #define AMDGPU_RAS_COUNTE_DELAY_MS 3000 5735ca02815Sjsg 574fb4d8502Sjsg static int amdgpu_ctx_query2(struct amdgpu_device *adev, 575fb4d8502Sjsg struct amdgpu_fpriv *fpriv, uint32_t id, 576fb4d8502Sjsg union drm_amdgpu_ctx_out *out) 577fb4d8502Sjsg { 5785ca02815Sjsg struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 579fb4d8502Sjsg struct amdgpu_ctx *ctx; 580fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr; 581fb4d8502Sjsg 582fb4d8502Sjsg if (!fpriv) 583fb4d8502Sjsg return -EINVAL; 584fb4d8502Sjsg 585fb4d8502Sjsg mgr = &fpriv->ctx_mgr; 586fb4d8502Sjsg mutex_lock(&mgr->lock); 587fb4d8502Sjsg ctx = idr_find(&mgr->ctx_handles, id); 588fb4d8502Sjsg if (!ctx) { 589fb4d8502Sjsg mutex_unlock(&mgr->lock); 590fb4d8502Sjsg return -EINVAL; 591fb4d8502Sjsg } 592fb4d8502Sjsg 593fb4d8502Sjsg out->state.flags = 0x0; 594fb4d8502Sjsg out->state.hangs = 0x0; 595fb4d8502Sjsg 596fb4d8502Sjsg if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter)) 597fb4d8502Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET; 598fb4d8502Sjsg 599f005ef32Sjsg if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm)) 600fb4d8502Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; 601fb4d8502Sjsg 602fb4d8502Sjsg if (atomic_read(&ctx->guilty)) 603fb4d8502Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; 604fb4d8502Sjsg 605f005ef32Sjsg if (amdgpu_in_reset(adev)) 606f005ef32Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS; 607f005ef32Sjsg 6085ca02815Sjsg if (adev->ras_enabled && con) { 6095ca02815Sjsg /* Return the cached values in O(1), 6105ca02815Sjsg * and schedule delayed work to cache 6115ca02815Sjsg * new vaues. 6125ca02815Sjsg */ 6135ca02815Sjsg int ce_count, ue_count; 6145ca02815Sjsg 6155ca02815Sjsg ce_count = atomic_read(&con->ras_ce_count); 6165ca02815Sjsg ue_count = atomic_read(&con->ras_ue_count); 6175ca02815Sjsg 6185ca02815Sjsg if (ce_count != ctx->ras_counter_ce) { 6195ca02815Sjsg ctx->ras_counter_ce = ce_count; 6205ca02815Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; 6215ca02815Sjsg } 6225ca02815Sjsg 6235ca02815Sjsg if (ue_count != ctx->ras_counter_ue) { 6245ca02815Sjsg ctx->ras_counter_ue = ue_count; 6255ca02815Sjsg out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; 6265ca02815Sjsg } 6275ca02815Sjsg 6285ca02815Sjsg schedule_delayed_work(&con->ras_counte_delay_work, 6295ca02815Sjsg msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS)); 6305ca02815Sjsg } 6315ca02815Sjsg 632fb4d8502Sjsg mutex_unlock(&mgr->lock); 633fb4d8502Sjsg return 0; 634fb4d8502Sjsg } 635fb4d8502Sjsg 6361bb76ff1Sjsg static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, 6371bb76ff1Sjsg struct amdgpu_fpriv *fpriv, uint32_t id, 6381bb76ff1Sjsg bool set, u32 *stable_pstate) 6391bb76ff1Sjsg { 6401bb76ff1Sjsg struct amdgpu_ctx *ctx; 6411bb76ff1Sjsg struct amdgpu_ctx_mgr *mgr; 6421bb76ff1Sjsg int r; 6431bb76ff1Sjsg 6441bb76ff1Sjsg if (!fpriv) 6451bb76ff1Sjsg return -EINVAL; 6461bb76ff1Sjsg 6471bb76ff1Sjsg mgr = &fpriv->ctx_mgr; 6481bb76ff1Sjsg mutex_lock(&mgr->lock); 6491bb76ff1Sjsg ctx = idr_find(&mgr->ctx_handles, id); 6501bb76ff1Sjsg if (!ctx) { 6511bb76ff1Sjsg mutex_unlock(&mgr->lock); 6521bb76ff1Sjsg return -EINVAL; 6531bb76ff1Sjsg } 6541bb76ff1Sjsg 6551bb76ff1Sjsg if (set) 6561bb76ff1Sjsg r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate); 6571bb76ff1Sjsg else 6581bb76ff1Sjsg r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate); 6591bb76ff1Sjsg 6601bb76ff1Sjsg mutex_unlock(&mgr->lock); 6611bb76ff1Sjsg return r; 6621bb76ff1Sjsg } 6631bb76ff1Sjsg 664fb4d8502Sjsg int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, 665fb4d8502Sjsg struct drm_file *filp) 666fb4d8502Sjsg { 667fb4d8502Sjsg int r; 6681bb76ff1Sjsg uint32_t id, stable_pstate; 6691bb76ff1Sjsg int32_t priority; 670fb4d8502Sjsg 671fb4d8502Sjsg union drm_amdgpu_ctx *args = data; 672ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 673fb4d8502Sjsg struct amdgpu_fpriv *fpriv = filp->driver_priv; 674fb4d8502Sjsg 675fb4d8502Sjsg id = args->in.ctx_id; 6761bb76ff1Sjsg priority = args->in.priority; 677fb4d8502Sjsg 678f005ef32Sjsg /* For backwards compatibility, we need to accept ioctls with garbage 679f005ef32Sjsg * in the priority field. Garbage values in the priority field, result 680f005ef32Sjsg * in the priority being set to NORMAL. 681f005ef32Sjsg */ 6821bb76ff1Sjsg if (!amdgpu_ctx_priority_is_valid(priority)) 6831bb76ff1Sjsg priority = AMDGPU_CTX_PRIORITY_NORMAL; 684fb4d8502Sjsg 685fb4d8502Sjsg switch (args->in.op) { 686fb4d8502Sjsg case AMDGPU_CTX_OP_ALLOC_CTX: 687*ace50a5eSjsg if (args->in.flags) 688*ace50a5eSjsg return -EINVAL; 689fb4d8502Sjsg r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); 690fb4d8502Sjsg args->out.alloc.ctx_id = id; 691fb4d8502Sjsg break; 692fb4d8502Sjsg case AMDGPU_CTX_OP_FREE_CTX: 693*ace50a5eSjsg if (args->in.flags) 694*ace50a5eSjsg return -EINVAL; 695fb4d8502Sjsg r = amdgpu_ctx_free(fpriv, id); 696fb4d8502Sjsg break; 697fb4d8502Sjsg case AMDGPU_CTX_OP_QUERY_STATE: 698*ace50a5eSjsg if (args->in.flags) 699*ace50a5eSjsg return -EINVAL; 700fb4d8502Sjsg r = amdgpu_ctx_query(adev, fpriv, id, &args->out); 701fb4d8502Sjsg break; 702fb4d8502Sjsg case AMDGPU_CTX_OP_QUERY_STATE2: 703*ace50a5eSjsg if (args->in.flags) 704*ace50a5eSjsg return -EINVAL; 705fb4d8502Sjsg r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); 706fb4d8502Sjsg break; 7071bb76ff1Sjsg case AMDGPU_CTX_OP_GET_STABLE_PSTATE: 7081bb76ff1Sjsg if (args->in.flags) 7091bb76ff1Sjsg return -EINVAL; 7101bb76ff1Sjsg r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate); 7111bb76ff1Sjsg if (!r) 7121bb76ff1Sjsg args->out.pstate.flags = stable_pstate; 7131bb76ff1Sjsg break; 7141bb76ff1Sjsg case AMDGPU_CTX_OP_SET_STABLE_PSTATE: 7151bb76ff1Sjsg if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK) 7161bb76ff1Sjsg return -EINVAL; 7171bb76ff1Sjsg stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK; 7181bb76ff1Sjsg if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK) 7191bb76ff1Sjsg return -EINVAL; 7201bb76ff1Sjsg r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate); 7211bb76ff1Sjsg break; 722fb4d8502Sjsg default: 723fb4d8502Sjsg return -EINVAL; 724fb4d8502Sjsg } 725fb4d8502Sjsg 726fb4d8502Sjsg return r; 727fb4d8502Sjsg } 728fb4d8502Sjsg 729fb4d8502Sjsg struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id) 730fb4d8502Sjsg { 731fb4d8502Sjsg struct amdgpu_ctx *ctx; 732fb4d8502Sjsg struct amdgpu_ctx_mgr *mgr; 733fb4d8502Sjsg 734fb4d8502Sjsg if (!fpriv) 735fb4d8502Sjsg return NULL; 736fb4d8502Sjsg 737fb4d8502Sjsg mgr = &fpriv->ctx_mgr; 738fb4d8502Sjsg 739fb4d8502Sjsg mutex_lock(&mgr->lock); 740fb4d8502Sjsg ctx = idr_find(&mgr->ctx_handles, id); 741fb4d8502Sjsg if (ctx) 742fb4d8502Sjsg kref_get(&ctx->refcount); 743fb4d8502Sjsg mutex_unlock(&mgr->lock); 744fb4d8502Sjsg return ctx; 745fb4d8502Sjsg } 746fb4d8502Sjsg 747fb4d8502Sjsg int amdgpu_ctx_put(struct amdgpu_ctx *ctx) 748fb4d8502Sjsg { 749fb4d8502Sjsg if (ctx == NULL) 750fb4d8502Sjsg return -EINVAL; 751fb4d8502Sjsg 752fb4d8502Sjsg kref_put(&ctx->refcount, amdgpu_ctx_do_release); 753fb4d8502Sjsg return 0; 754fb4d8502Sjsg } 755fb4d8502Sjsg 7561bb76ff1Sjsg uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 757c349dbc7Sjsg struct drm_sched_entity *entity, 7581bb76ff1Sjsg struct dma_fence *fence) 759fb4d8502Sjsg { 760c349dbc7Sjsg struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 761c349dbc7Sjsg uint64_t seq = centity->sequence; 762fb4d8502Sjsg struct dma_fence *other = NULL; 763c349dbc7Sjsg unsigned idx = 0; 764fb4d8502Sjsg 765fb4d8502Sjsg idx = seq & (amdgpu_sched_jobs - 1); 766c349dbc7Sjsg other = centity->fences[idx]; 7671bb76ff1Sjsg WARN_ON(other && !dma_fence_is_signaled(other)); 768fb4d8502Sjsg 769fb4d8502Sjsg dma_fence_get(fence); 770fb4d8502Sjsg 771fb4d8502Sjsg spin_lock(&ctx->ring_lock); 772c349dbc7Sjsg centity->fences[idx] = fence; 773c349dbc7Sjsg centity->sequence++; 774fb4d8502Sjsg spin_unlock(&ctx->ring_lock); 775fb4d8502Sjsg 7761bb76ff1Sjsg atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)), 7771bb76ff1Sjsg &ctx->mgr->time_spend[centity->hw_ip]); 7781bb76ff1Sjsg 779fb4d8502Sjsg dma_fence_put(other); 7801bb76ff1Sjsg return seq; 781fb4d8502Sjsg } 782fb4d8502Sjsg 783fb4d8502Sjsg struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 784c349dbc7Sjsg struct drm_sched_entity *entity, 785c349dbc7Sjsg uint64_t seq) 786fb4d8502Sjsg { 787c349dbc7Sjsg struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 788fb4d8502Sjsg struct dma_fence *fence; 789fb4d8502Sjsg 790fb4d8502Sjsg spin_lock(&ctx->ring_lock); 791fb4d8502Sjsg 792fb4d8502Sjsg if (seq == ~0ull) 793c349dbc7Sjsg seq = centity->sequence - 1; 794fb4d8502Sjsg 795c349dbc7Sjsg if (seq >= centity->sequence) { 796fb4d8502Sjsg spin_unlock(&ctx->ring_lock); 797fb4d8502Sjsg return ERR_PTR(-EINVAL); 798fb4d8502Sjsg } 799fb4d8502Sjsg 800fb4d8502Sjsg 801c349dbc7Sjsg if (seq + amdgpu_sched_jobs < centity->sequence) { 802fb4d8502Sjsg spin_unlock(&ctx->ring_lock); 803fb4d8502Sjsg return NULL; 804fb4d8502Sjsg } 805fb4d8502Sjsg 806c349dbc7Sjsg fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]); 807fb4d8502Sjsg spin_unlock(&ctx->ring_lock); 808fb4d8502Sjsg 809fb4d8502Sjsg return fence; 810fb4d8502Sjsg } 811fb4d8502Sjsg 812c349dbc7Sjsg static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, 813c349dbc7Sjsg struct amdgpu_ctx_entity *aentity, 814c349dbc7Sjsg int hw_ip, 8151bb76ff1Sjsg int32_t priority) 816c349dbc7Sjsg { 8171bb76ff1Sjsg struct amdgpu_device *adev = ctx->mgr->adev; 818ad8b1aafSjsg unsigned int hw_prio; 819c349dbc7Sjsg struct drm_gpu_scheduler **scheds = NULL; 820c349dbc7Sjsg unsigned num_scheds; 821c349dbc7Sjsg 822c349dbc7Sjsg /* set sw priority */ 8231bb76ff1Sjsg drm_sched_entity_set_priority(&aentity->entity, 8241bb76ff1Sjsg amdgpu_ctx_to_drm_sched_prio(priority)); 825c349dbc7Sjsg 826c349dbc7Sjsg /* set hw priority */ 8271bb76ff1Sjsg if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) { 8281bb76ff1Sjsg hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); 829ad8b1aafSjsg hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); 830ad8b1aafSjsg scheds = adev->gpu_sched[hw_ip][hw_prio].sched; 831ad8b1aafSjsg num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; 832c349dbc7Sjsg drm_sched_entity_modify_sched(&aentity->entity, scheds, 833c349dbc7Sjsg num_scheds); 834c349dbc7Sjsg } 835c349dbc7Sjsg } 836c349dbc7Sjsg 837fb4d8502Sjsg void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, 8381bb76ff1Sjsg int32_t priority) 839fb4d8502Sjsg { 8401bb76ff1Sjsg int32_t ctx_prio; 841c349dbc7Sjsg unsigned i, j; 842fb4d8502Sjsg 843fb4d8502Sjsg ctx->override_priority = priority; 844fb4d8502Sjsg 8451bb76ff1Sjsg ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 846fb4d8502Sjsg ctx->init_priority : ctx->override_priority; 847c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 848c349dbc7Sjsg for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 849c349dbc7Sjsg if (!ctx->entities[i][j]) 850fb4d8502Sjsg continue; 851fb4d8502Sjsg 852c349dbc7Sjsg amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j], 853c349dbc7Sjsg i, ctx_prio); 854c349dbc7Sjsg } 855fb4d8502Sjsg } 856fb4d8502Sjsg } 857fb4d8502Sjsg 858c349dbc7Sjsg int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, 859c349dbc7Sjsg struct drm_sched_entity *entity) 860fb4d8502Sjsg { 861c349dbc7Sjsg struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 862c349dbc7Sjsg struct dma_fence *other; 863c349dbc7Sjsg unsigned idx; 864c349dbc7Sjsg long r; 865fb4d8502Sjsg 866c349dbc7Sjsg spin_lock(&ctx->ring_lock); 867c349dbc7Sjsg idx = centity->sequence & (amdgpu_sched_jobs - 1); 868c349dbc7Sjsg other = dma_fence_get(centity->fences[idx]); 869c349dbc7Sjsg spin_unlock(&ctx->ring_lock); 870c349dbc7Sjsg 871c349dbc7Sjsg if (!other) 872c349dbc7Sjsg return 0; 873c349dbc7Sjsg 874fb4d8502Sjsg r = dma_fence_wait(other, true); 875c349dbc7Sjsg if (r < 0 && r != -ERESTARTSYS) 876fb4d8502Sjsg DRM_ERROR("Error (%ld) waiting for fence!\n", r); 877fb4d8502Sjsg 878c349dbc7Sjsg dma_fence_put(other); 879fb4d8502Sjsg return r; 880fb4d8502Sjsg } 881fb4d8502Sjsg 8821bb76ff1Sjsg void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, 8831bb76ff1Sjsg struct amdgpu_device *adev) 884fb4d8502Sjsg { 8851bb76ff1Sjsg unsigned int i; 8861bb76ff1Sjsg 8871bb76ff1Sjsg mgr->adev = adev; 888fb4d8502Sjsg rw_init(&mgr->lock, "mgrlk"); 8891bb76ff1Sjsg idr_init_base(&mgr->ctx_handles, 1); 8901bb76ff1Sjsg 8911bb76ff1Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) 8921bb76ff1Sjsg atomic64_set(&mgr->time_spend[i], 0); 893fb4d8502Sjsg } 894fb4d8502Sjsg 895c349dbc7Sjsg long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) 896fb4d8502Sjsg { 897fb4d8502Sjsg struct amdgpu_ctx *ctx; 898fb4d8502Sjsg struct idr *idp; 899c349dbc7Sjsg uint32_t id, i, j; 900fb4d8502Sjsg 901fb4d8502Sjsg idp = &mgr->ctx_handles; 902fb4d8502Sjsg 903fb4d8502Sjsg mutex_lock(&mgr->lock); 904fb4d8502Sjsg idr_for_each_entry(idp, ctx, id) { 905c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 906c349dbc7Sjsg for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 907c349dbc7Sjsg struct drm_sched_entity *entity; 908fb4d8502Sjsg 909c349dbc7Sjsg if (!ctx->entities[i][j]) 910fb4d8502Sjsg continue; 911fb4d8502Sjsg 912c349dbc7Sjsg entity = &ctx->entities[i][j]->entity; 913c349dbc7Sjsg timeout = drm_sched_entity_flush(entity, timeout); 914c349dbc7Sjsg } 915fb4d8502Sjsg } 916fb4d8502Sjsg } 917fb4d8502Sjsg mutex_unlock(&mgr->lock); 918c349dbc7Sjsg return timeout; 919fb4d8502Sjsg } 920fb4d8502Sjsg 921fb4d8502Sjsg void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) 922fb4d8502Sjsg { 923fb4d8502Sjsg struct amdgpu_ctx *ctx; 924fb4d8502Sjsg struct idr *idp; 925c349dbc7Sjsg uint32_t id, i, j; 926fb4d8502Sjsg 927fb4d8502Sjsg idp = &mgr->ctx_handles; 928fb4d8502Sjsg 929fb4d8502Sjsg idr_for_each_entry(idp, ctx, id) { 930c349dbc7Sjsg if (kref_read(&ctx->refcount) != 1) { 931c349dbc7Sjsg DRM_ERROR("ctx %p is still alive\n", ctx); 932c349dbc7Sjsg continue; 933c349dbc7Sjsg } 934fb4d8502Sjsg 935c349dbc7Sjsg for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 936c349dbc7Sjsg for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 937c349dbc7Sjsg struct drm_sched_entity *entity; 938fb4d8502Sjsg 939c349dbc7Sjsg if (!ctx->entities[i][j]) 940fb4d8502Sjsg continue; 941fb4d8502Sjsg 942c349dbc7Sjsg entity = &ctx->entities[i][j]->entity; 943c349dbc7Sjsg drm_sched_entity_fini(entity); 944c349dbc7Sjsg } 945fb4d8502Sjsg } 946fb4d8502Sjsg } 947fb4d8502Sjsg } 948fb4d8502Sjsg 949fb4d8502Sjsg void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) 950fb4d8502Sjsg { 951fb4d8502Sjsg struct amdgpu_ctx *ctx; 952fb4d8502Sjsg struct idr *idp; 953fb4d8502Sjsg uint32_t id; 954fb4d8502Sjsg 955fb4d8502Sjsg amdgpu_ctx_mgr_entity_fini(mgr); 956fb4d8502Sjsg 957fb4d8502Sjsg idp = &mgr->ctx_handles; 958fb4d8502Sjsg 959fb4d8502Sjsg idr_for_each_entry(idp, ctx, id) { 960fb4d8502Sjsg if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) 961fb4d8502Sjsg DRM_ERROR("ctx %p is still alive\n", ctx); 962fb4d8502Sjsg } 963fb4d8502Sjsg 964fb4d8502Sjsg idr_destroy(&mgr->ctx_handles); 965fb4d8502Sjsg mutex_destroy(&mgr->lock); 966fb4d8502Sjsg } 9675ca02815Sjsg 9681bb76ff1Sjsg void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, 9691bb76ff1Sjsg ktime_t usage[AMDGPU_HW_IP_NUM]) 9705ca02815Sjsg { 9715ca02815Sjsg struct amdgpu_ctx *ctx; 9721bb76ff1Sjsg unsigned int hw_ip, i; 9735ca02815Sjsg uint32_t id; 9745ca02815Sjsg 9751bb76ff1Sjsg /* 9761bb76ff1Sjsg * This is a little bit racy because it can be that a ctx or a fence are 9771bb76ff1Sjsg * destroyed just in the moment we try to account them. But that is ok 9781bb76ff1Sjsg * since exactly that case is explicitely allowed by the interface. 9795ca02815Sjsg */ 9801bb76ff1Sjsg mutex_lock(&mgr->lock); 9811bb76ff1Sjsg for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { 9821bb76ff1Sjsg uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]); 9835ca02815Sjsg 9841bb76ff1Sjsg usage[hw_ip] = ns_to_ktime(ns); 9855ca02815Sjsg } 9865ca02815Sjsg 9871bb76ff1Sjsg idr_for_each_entry(&mgr->ctx_handles, ctx, id) { 9881bb76ff1Sjsg for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { 9891bb76ff1Sjsg for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) { 9901bb76ff1Sjsg struct amdgpu_ctx_entity *centity; 9911bb76ff1Sjsg ktime_t spend; 9925ca02815Sjsg 9931bb76ff1Sjsg centity = ctx->entities[hw_ip][i]; 9941bb76ff1Sjsg if (!centity) 9951bb76ff1Sjsg continue; 9961bb76ff1Sjsg spend = amdgpu_ctx_entity_time(ctx, centity); 9971bb76ff1Sjsg usage[hw_ip] = ktime_add(usage[hw_ip], spend); 9981bb76ff1Sjsg } 9991bb76ff1Sjsg } 10001bb76ff1Sjsg } 10011bb76ff1Sjsg mutex_unlock(&mgr->lock); 10025ca02815Sjsg } 1003