1 /* $NetBSD: amdgpu_sa.c,v 1.4 2021/12/18 23:44:58 riastradh Exp $ */ 2 3 /* 4 * Copyright 2011 Red Hat Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28 /* 29 * Authors: 30 * Jerome Glisse <glisse@freedesktop.org> 31 */ 32 /* Algorithm: 33 * 34 * We store the last allocated bo in "hole", we always try to allocate 35 * after the last allocated bo. Principle is that in a linear GPU ring 36 * progression was is after last is the oldest bo we allocated and thus 37 * the first one that should no longer be in use by the GPU. 38 * 39 * If it's not the case we skip over the bo after last to the closest 40 * done bo if such one exist. If none exist and we are not asked to 41 * block we report failure to allocate. 42 * 43 * If we are asked to block we wait on all the oldest fence of all 44 * rings. We just wait for any of those fence to complete. 45 */ 46 47 #include <sys/cdefs.h> 48 __KERNEL_RCSID(0, "$NetBSD: amdgpu_sa.c,v 1.4 2021/12/18 23:44:58 riastradh Exp $"); 49 50 #include "amdgpu.h" 51 52 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo); 53 static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager); 54 55 int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, 56 struct amdgpu_sa_manager *sa_manager, 57 unsigned size, u32 align, u32 domain) 58 { 59 int i, r; 60 61 #ifdef __NetBSD__ 62 spin_lock_init(&sa_manager->wq_lock); 63 DRM_INIT_WAITQUEUE(&sa_manager->wq, "amdsabom"); 64 #else 65 init_waitqueue_head(&sa_manager->wq); 66 #endif 67 sa_manager->bo = NULL; 68 sa_manager->size = size; 69 sa_manager->domain = domain; 70 sa_manager->align = align; 71 sa_manager->hole = &sa_manager->olist; 72 INIT_LIST_HEAD(&sa_manager->olist); 73 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 74 INIT_LIST_HEAD(&sa_manager->flist[i]); 75 76 r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo, 77 &sa_manager->gpu_addr, &sa_manager->cpu_ptr); 78 if (r) { 79 dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); 80 return r; 81 } 82 83 memset(sa_manager->cpu_ptr, 0, sa_manager->size); 84 return r; 85 } 86 87 void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, 88 struct amdgpu_sa_manager *sa_manager) 89 { 90 struct amdgpu_sa_bo *sa_bo, *tmp; 91 92 if (sa_manager->bo == NULL) { 93 dev_err(adev->dev, "no bo for sa manager\n"); 94 return; 95 } 96 97 if (!list_empty(&sa_manager->olist)) { 98 sa_manager->hole = &sa_manager->olist, 99 amdgpu_sa_bo_try_free(sa_manager); 100 if (!list_empty(&sa_manager->olist)) { 101 dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n"); 102 } 103 } 104 list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) { 105 amdgpu_sa_bo_remove_locked(sa_bo); 106 } 107 108 amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr); 109 sa_manager->size = 0; 110 #ifdef __NetBSD__ 111 DRM_DESTROY_WAITQUEUE(&sa_manager->wq); 112 spin_lock_destroy(&sa_manager->wq_lock); 113 #endif 114 } 115 116 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) 117 { 118 struct amdgpu_sa_manager *sa_manager = sa_bo->manager; 119 if (sa_manager->hole == &sa_bo->olist) { 120 sa_manager->hole = sa_bo->olist.prev; 121 } 122 list_del_init(&sa_bo->olist); 123 list_del_init(&sa_bo->flist); 124 dma_fence_put(sa_bo->fence); 125 kfree(sa_bo); 126 } 127 128 static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager) 129 { 130 struct amdgpu_sa_bo *sa_bo, *tmp; 131 132 if (sa_manager->hole->next == &sa_manager->olist) 133 return; 134 135 sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); 136 list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { 137 if (sa_bo->fence == NULL || 138 !dma_fence_is_signaled(sa_bo->fence)) { 139 return; 140 } 141 amdgpu_sa_bo_remove_locked(sa_bo); 142 } 143 } 144 145 static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager) 146 { 147 struct list_head *hole = sa_manager->hole; 148 149 if (hole != &sa_manager->olist) { 150 return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset; 151 } 152 return 0; 153 } 154 155 static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager) 156 { 157 struct list_head *hole = sa_manager->hole; 158 159 if (hole->next != &sa_manager->olist) { 160 return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset; 161 } 162 return sa_manager->size; 163 } 164 165 static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager, 166 struct amdgpu_sa_bo *sa_bo, 167 unsigned size, unsigned align) 168 { 169 unsigned soffset, eoffset, wasted; 170 171 soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 172 eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager); 173 wasted = (align - (soffset % align)) % align; 174 175 if ((eoffset - soffset) >= (size + wasted)) { 176 soffset += wasted; 177 178 sa_bo->manager = sa_manager; 179 sa_bo->soffset = soffset; 180 sa_bo->eoffset = soffset + size; 181 list_add(&sa_bo->olist, sa_manager->hole); 182 INIT_LIST_HEAD(&sa_bo->flist); 183 sa_manager->hole = &sa_bo->olist; 184 return true; 185 } 186 return false; 187 } 188 189 /** 190 * amdgpu_sa_event - Check if we can stop waiting 191 * 192 * @sa_manager: pointer to the sa_manager 193 * @size: number of bytes we want to allocate 194 * @align: alignment we need to match 195 * 196 * Check if either there is a fence we can wait for or 197 * enough free memory to satisfy the allocation directly 198 */ 199 static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager, 200 unsigned size, unsigned align) 201 { 202 unsigned soffset, eoffset, wasted; 203 int i; 204 205 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 206 if (!list_empty(&sa_manager->flist[i])) 207 return true; 208 209 soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 210 eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager); 211 wasted = (align - (soffset % align)) % align; 212 213 if ((eoffset - soffset) >= (size + wasted)) { 214 return true; 215 } 216 217 return false; 218 } 219 220 static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, 221 struct dma_fence **fences, 222 unsigned *tries) 223 { 224 struct amdgpu_sa_bo *best_bo = NULL; 225 unsigned i, soffset, best, tmp; 226 227 /* if hole points to the end of the buffer */ 228 if (sa_manager->hole->next == &sa_manager->olist) { 229 /* try again with its beginning */ 230 sa_manager->hole = &sa_manager->olist; 231 return true; 232 } 233 234 soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 235 /* to handle wrap around we add sa_manager->size */ 236 best = sa_manager->size * 2; 237 /* go over all fence list and try to find the closest sa_bo 238 * of the current last 239 */ 240 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { 241 struct amdgpu_sa_bo *sa_bo; 242 243 fences[i] = NULL; 244 245 if (list_empty(&sa_manager->flist[i])) 246 continue; 247 248 sa_bo = list_first_entry(&sa_manager->flist[i], 249 struct amdgpu_sa_bo, flist); 250 251 if (!dma_fence_is_signaled(sa_bo->fence)) { 252 fences[i] = sa_bo->fence; 253 continue; 254 } 255 256 /* limit the number of tries each ring gets */ 257 if (tries[i] > 2) { 258 continue; 259 } 260 261 tmp = sa_bo->soffset; 262 if (tmp < soffset) { 263 /* wrap around, pretend it's after */ 264 tmp += sa_manager->size; 265 } 266 tmp -= soffset; 267 if (tmp < best) { 268 /* this sa bo is the closest one */ 269 best = tmp; 270 best_bo = sa_bo; 271 } 272 } 273 274 if (best_bo) { 275 uint32_t idx = best_bo->fence->context; 276 277 idx %= AMDGPU_SA_NUM_FENCE_LISTS; 278 ++tries[idx]; 279 sa_manager->hole = best_bo->olist.prev; 280 281 /* we knew that this one is signaled, 282 so it's save to remote it */ 283 amdgpu_sa_bo_remove_locked(best_bo); 284 return true; 285 } 286 return false; 287 } 288 289 int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, 290 struct amdgpu_sa_bo **sa_bo, 291 unsigned size, unsigned align) 292 { 293 struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS]; 294 unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS]; 295 unsigned count; 296 int i, r; 297 signed long t; 298 299 if (WARN_ON_ONCE(align > sa_manager->align)) 300 return -EINVAL; 301 302 if (WARN_ON_ONCE(size > sa_manager->size)) 303 return -EINVAL; 304 305 *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL); 306 if (!(*sa_bo)) 307 return -ENOMEM; 308 (*sa_bo)->manager = sa_manager; 309 (*sa_bo)->fence = NULL; 310 INIT_LIST_HEAD(&(*sa_bo)->olist); 311 INIT_LIST_HEAD(&(*sa_bo)->flist); 312 313 #ifdef __NetBSD__ 314 spin_lock(&sa_manager->wq_lock); 315 #else 316 spin_lock(&sa_manager->wq.lock); 317 #endif 318 do { 319 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 320 tries[i] = 0; 321 322 do { 323 amdgpu_sa_bo_try_free(sa_manager); 324 325 if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo, 326 size, align)) { 327 #ifdef __NetBSD__ 328 spin_unlock(&sa_manager->wq_lock); 329 #else 330 spin_unlock(&sa_manager->wq.lock); 331 #endif 332 return 0; 333 } 334 335 /* see if we can skip over some allocations */ 336 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); 337 338 for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 339 if (fences[i]) 340 fences[count++] = dma_fence_get(fences[i]); 341 342 if (count) { 343 #ifdef __NetBSD__ 344 spin_unlock(&sa_manager->wq_lock); 345 #else 346 spin_unlock(&sa_manager->wq.lock); 347 #endif 348 t = dma_fence_wait_any_timeout(fences, count, false, 349 MAX_SCHEDULE_TIMEOUT, 350 NULL); 351 for (i = 0; i < count; ++i) 352 dma_fence_put(fences[i]); 353 354 r = (t > 0) ? 0 : t; 355 #ifdef __NetBSD__ 356 spin_lock(&sa_manager->wq_lock); 357 #else 358 spin_lock(&sa_manager->wq.lock); 359 #endif 360 } else { 361 /* if we have nothing to wait for block */ 362 #ifdef __NetBSD__ 363 DRM_SPIN_WAIT_UNTIL(r, &sa_manager->wq, 364 &sa_manager->wq_lock, 365 amdgpu_sa_event(sa_manager, size, align)); 366 #else 367 r = wait_event_interruptible_locked( 368 sa_manager->wq, 369 amdgpu_sa_event(sa_manager, size, align) 370 ); 371 #endif 372 } 373 374 } while (!r); 375 376 #ifdef __NetBSD__ 377 spin_unlock(&sa_manager->wq_lock); 378 #else 379 spin_unlock(&sa_manager->wq.lock); 380 #endif 381 kfree(*sa_bo); 382 *sa_bo = NULL; 383 return r; 384 } 385 386 void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, 387 struct dma_fence *fence) 388 { 389 struct amdgpu_sa_manager *sa_manager; 390 391 if (sa_bo == NULL || *sa_bo == NULL) { 392 return; 393 } 394 395 sa_manager = (*sa_bo)->manager; 396 #ifdef __NetBSD__ 397 spin_lock(&sa_manager->wq_lock); 398 #else 399 spin_lock(&sa_manager->wq.lock); 400 #endif 401 if (fence && !dma_fence_is_signaled(fence)) { 402 uint32_t idx; 403 404 (*sa_bo)->fence = dma_fence_get(fence); 405 idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS; 406 list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); 407 } else { 408 amdgpu_sa_bo_remove_locked(*sa_bo); 409 } 410 #ifdef __NetBSD__ 411 DRM_SPIN_WAKEUP_ALL(&sa_manager->wq, &sa_manager->wq_lock); 412 spin_unlock(&sa_manager->wq_lock); 413 #else 414 wake_up_all_locked(&sa_manager->wq); 415 spin_unlock(&sa_manager->wq.lock); 416 #endif 417 *sa_bo = NULL; 418 } 419 420 #if defined(CONFIG_DEBUG_FS) 421 422 void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, 423 struct seq_file *m) 424 { 425 struct amdgpu_sa_bo *i; 426 427 spin_lock(&sa_manager->wq.lock); 428 list_for_each_entry(i, &sa_manager->olist, olist) { 429 uint64_t soffset = i->soffset + sa_manager->gpu_addr; 430 uint64_t eoffset = i->eoffset + sa_manager->gpu_addr; 431 if (&i->olist == sa_manager->hole) { 432 seq_printf(m, ">"); 433 } else { 434 seq_printf(m, " "); 435 } 436 seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", 437 soffset, eoffset, eoffset - soffset); 438 439 if (i->fence) 440 seq_printf(m, " protected by 0x%016llx on context %llu", 441 i->fence->seqno, i->fence->context); 442 443 seq_printf(m, "\n"); 444 } 445 spin_unlock(&sa_manager->wq.lock); 446 } 447 #endif 448