1 /* $NetBSD: amdgpu_sync.c,v 1.2 2018/08/27 04:58:19 riastradh Exp $ */ 2 3 /* 4 * Copyright 2014 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28 /* 29 * Authors: 30 * Christian König <christian.koenig@amd.com> 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: amdgpu_sync.c,v 1.2 2018/08/27 04:58:19 riastradh Exp $"); 35 36 #include <drm/drmP.h> 37 #include "amdgpu.h" 38 #include "amdgpu_trace.h" 39 40 struct amdgpu_sync_entry { 41 struct hlist_node node; 42 struct fence *fence; 43 }; 44 45 /** 46 * amdgpu_sync_create - zero init sync object 47 * 48 * @sync: sync object to initialize 49 * 50 * Just clear the sync object for now. 51 */ 52 void amdgpu_sync_create(struct amdgpu_sync *sync) 53 { 54 unsigned i; 55 56 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) 57 sync->semaphores[i] = NULL; 58 59 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 60 sync->sync_to[i] = NULL; 61 62 hash_init(sync->fences); 63 sync->last_vm_update = NULL; 64 } 65 66 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) 67 { 68 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 69 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 70 71 if (a_fence) 72 return a_fence->ring->adev == adev; 73 74 if (s_fence) { 75 struct amdgpu_ring *ring; 76 77 ring = container_of(s_fence->sched, struct amdgpu_ring, sched); 78 return ring->adev == adev; 79 } 80 81 return false; 82 } 83 84 static bool amdgpu_sync_test_owner(struct fence *f, void *owner) 85 { 86 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 87 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 88 if (s_fence) 89 return s_fence->owner == owner; 90 if (a_fence) 91 return a_fence->owner == owner; 92 return false; 93 } 94 95 static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence) 96 { 97 if (*keep && fence_is_later(*keep, fence)) 98 return; 99 100 fence_put(*keep); 101 *keep = fence_get(fence); 102 } 103 104 /** 105 * amdgpu_sync_fence - remember to sync to this fence 106 * 107 * @sync: sync object to add fence to 108 * @fence: fence to sync to 109 * 110 */ 111 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, 112 struct fence *f) 113 { 114 struct amdgpu_sync_entry *e; 115 struct amdgpu_fence *fence; 116 117 if (!f) 118 return 0; 119 120 if (amdgpu_sync_same_dev(adev, f) && 121 amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) 122 amdgpu_sync_keep_later(&sync->last_vm_update, f); 123 124 fence = to_amdgpu_fence(f); 125 if (!fence || fence->ring->adev != adev) { 126 hash_for_each_possible(sync->fences, e, node, f->context) { 127 if (unlikely(e->fence->context != f->context)) 128 continue; 129 130 amdgpu_sync_keep_later(&e->fence, f); 131 return 0; 132 } 133 134 e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); 135 if (!e) 136 return -ENOMEM; 137 138 hash_add(sync->fences, &e->node, f->context); 139 e->fence = fence_get(f); 140 return 0; 141 } 142 143 amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f); 144 145 return 0; 146 } 147 148 static void *amdgpu_sync_get_owner(struct fence *f) 149 { 150 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 151 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 152 153 if (s_fence) 154 return s_fence->owner; 155 else if (a_fence) 156 return a_fence->owner; 157 return AMDGPU_FENCE_OWNER_UNDEFINED; 158 } 159 160 /** 161 * amdgpu_sync_resv - use the semaphores to sync to a reservation object 162 * 163 * @sync: sync object to add fences from reservation object to 164 * @resv: reservation object with embedded fence 165 * @shared: true if we should only sync to the exclusive fence 166 * 167 * Sync to the fence using the semaphore objects 168 */ 169 int amdgpu_sync_resv(struct amdgpu_device *adev, 170 struct amdgpu_sync *sync, 171 struct reservation_object *resv, 172 void *owner) 173 { 174 struct reservation_object_list *flist; 175 struct fence *f; 176 void *fence_owner; 177 unsigned i; 178 int r = 0; 179 180 if (resv == NULL) 181 return -EINVAL; 182 183 /* always sync to the exclusive fence */ 184 f = reservation_object_get_excl(resv); 185 r = amdgpu_sync_fence(adev, sync, f); 186 187 flist = reservation_object_get_list(resv); 188 if (!flist || r) 189 return r; 190 191 for (i = 0; i < flist->shared_count; ++i) { 192 f = rcu_dereference_protected(flist->shared[i], 193 reservation_object_held(resv)); 194 if (amdgpu_sync_same_dev(adev, f)) { 195 /* VM updates are only interesting 196 * for other VM updates and moves. 197 */ 198 fence_owner = amdgpu_sync_get_owner(f); 199 if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && 200 (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && 201 ((owner == AMDGPU_FENCE_OWNER_VM) != 202 (fence_owner == AMDGPU_FENCE_OWNER_VM))) 203 continue; 204 205 /* Ignore fence from the same owner as 206 * long as it isn't undefined. 207 */ 208 if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && 209 fence_owner == owner) 210 continue; 211 } 212 213 r = amdgpu_sync_fence(adev, sync, f); 214 if (r) 215 break; 216 } 217 return r; 218 } 219 220 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) 221 { 222 struct amdgpu_sync_entry *e; 223 struct hlist_node *tmp; 224 struct fence *f; 225 int i; 226 227 hash_for_each_safe(sync->fences, i, tmp, e, node) { 228 229 f = e->fence; 230 231 hash_del(&e->node); 232 kfree(e); 233 234 if (!fence_is_signaled(f)) 235 return f; 236 237 fence_put(f); 238 } 239 return NULL; 240 } 241 242 int amdgpu_sync_wait(struct amdgpu_sync *sync) 243 { 244 struct amdgpu_sync_entry *e; 245 struct hlist_node *tmp; 246 int i, r; 247 248 hash_for_each_safe(sync->fences, i, tmp, e, node) { 249 r = fence_wait(e->fence, false); 250 if (r) 251 return r; 252 253 hash_del(&e->node); 254 fence_put(e->fence); 255 kfree(e); 256 } 257 258 if (amdgpu_enable_semaphores) 259 return 0; 260 261 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 262 struct fence *fence = sync->sync_to[i]; 263 if (!fence) 264 continue; 265 266 r = fence_wait(fence, false); 267 if (r) 268 return r; 269 } 270 271 return 0; 272 } 273 274 /** 275 * amdgpu_sync_rings - sync ring to all registered fences 276 * 277 * @sync: sync object to use 278 * @ring: ring that needs sync 279 * 280 * Ensure that all registered fences are signaled before letting 281 * the ring continue. The caller must hold the ring lock. 282 */ 283 int amdgpu_sync_rings(struct amdgpu_sync *sync, 284 struct amdgpu_ring *ring) 285 { 286 struct amdgpu_device *adev = ring->adev; 287 unsigned count = 0; 288 int i, r; 289 290 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 291 struct amdgpu_ring *other = adev->rings[i]; 292 struct amdgpu_semaphore *semaphore; 293 struct amdgpu_fence *fence; 294 295 if (!sync->sync_to[i]) 296 continue; 297 298 fence = to_amdgpu_fence(sync->sync_to[i]); 299 300 /* check if we really need to sync */ 301 if (!amdgpu_enable_scheduler && 302 !amdgpu_fence_need_sync(fence, ring)) 303 continue; 304 305 /* prevent GPU deadlocks */ 306 if (!other->ready) { 307 dev_err(adev->dev, "Syncing to a disabled ring!"); 308 return -EINVAL; 309 } 310 311 if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) { 312 r = fence_wait(sync->sync_to[i], true); 313 if (r) 314 return r; 315 continue; 316 } 317 318 if (count >= AMDGPU_NUM_SYNCS) { 319 /* not enough room, wait manually */ 320 r = fence_wait(&fence->base, false); 321 if (r) 322 return r; 323 continue; 324 } 325 r = amdgpu_semaphore_create(adev, &semaphore); 326 if (r) 327 return r; 328 329 sync->semaphores[count++] = semaphore; 330 331 /* allocate enough space for sync command */ 332 r = amdgpu_ring_alloc(other, 16); 333 if (r) 334 return r; 335 336 /* emit the signal semaphore */ 337 if (!amdgpu_semaphore_emit_signal(other, semaphore)) { 338 /* signaling wasn't successful wait manually */ 339 amdgpu_ring_undo(other); 340 r = fence_wait(&fence->base, false); 341 if (r) 342 return r; 343 continue; 344 } 345 346 /* we assume caller has already allocated space on waiters ring */ 347 if (!amdgpu_semaphore_emit_wait(ring, semaphore)) { 348 /* waiting wasn't successful wait manually */ 349 amdgpu_ring_undo(other); 350 r = fence_wait(&fence->base, false); 351 if (r) 352 return r; 353 continue; 354 } 355 356 amdgpu_ring_commit(other); 357 amdgpu_fence_note_sync(fence, ring); 358 } 359 360 return 0; 361 } 362 363 /** 364 * amdgpu_sync_free - free the sync object 365 * 366 * @adev: amdgpu_device pointer 367 * @sync: sync object to use 368 * @fence: fence to use for the free 369 * 370 * Free the sync object by freeing all semaphores in it. 371 */ 372 void amdgpu_sync_free(struct amdgpu_device *adev, 373 struct amdgpu_sync *sync, 374 struct fence *fence) 375 { 376 struct amdgpu_sync_entry *e; 377 struct hlist_node *tmp; 378 unsigned i; 379 380 hash_for_each_safe(sync->fences, i, tmp, e, node) { 381 hash_del(&e->node); 382 fence_put(e->fence); 383 kfree(e); 384 } 385 386 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) 387 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); 388 389 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 390 fence_put(sync->sync_to[i]); 391 392 fence_put(sync->last_vm_update); 393 } 394