15ca02815Sjsg // SPDX-License-Identifier: GPL-2.0 OR MIT
25ca02815Sjsg /*
35ca02815Sjsg * Copyright 2020-2021 Advanced Micro Devices, Inc.
45ca02815Sjsg *
55ca02815Sjsg * Permission is hereby granted, free of charge, to any person obtaining a
65ca02815Sjsg * copy of this software and associated documentation files (the "Software"),
75ca02815Sjsg * to deal in the Software without restriction, including without limitation
85ca02815Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
95ca02815Sjsg * and/or sell copies of the Software, and to permit persons to whom the
105ca02815Sjsg * Software is furnished to do so, subject to the following conditions:
115ca02815Sjsg *
125ca02815Sjsg * The above copyright notice and this permission notice shall be included in
135ca02815Sjsg * all copies or substantial portions of the Software.
145ca02815Sjsg *
155ca02815Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
165ca02815Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
175ca02815Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
185ca02815Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
195ca02815Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
205ca02815Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
215ca02815Sjsg * OTHER DEALINGS IN THE SOFTWARE.
225ca02815Sjsg */
235ca02815Sjsg #include <linux/types.h>
245ca02815Sjsg #include <linux/hmm.h>
255ca02815Sjsg #include <linux/dma-direction.h>
265ca02815Sjsg #include <linux/dma-mapping.h>
271bb76ff1Sjsg #include <linux/migrate.h>
285ca02815Sjsg #include "amdgpu_sync.h"
295ca02815Sjsg #include "amdgpu_object.h"
305ca02815Sjsg #include "amdgpu_vm.h"
315ca02815Sjsg #include "amdgpu_res_cursor.h"
325ca02815Sjsg #include "kfd_priv.h"
335ca02815Sjsg #include "kfd_svm.h"
345ca02815Sjsg #include "kfd_migrate.h"
351bb76ff1Sjsg #include "kfd_smi_events.h"
361bb76ff1Sjsg
371bb76ff1Sjsg #ifdef dev_fmt
381bb76ff1Sjsg #undef dev_fmt
391bb76ff1Sjsg #endif
401bb76ff1Sjsg #define dev_fmt(fmt) "kfd_migrate: " fmt
415ca02815Sjsg
425ca02815Sjsg static uint64_t
svm_migrate_direct_mapping_addr(struct amdgpu_device * adev,uint64_t addr)435ca02815Sjsg svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
445ca02815Sjsg {
455ca02815Sjsg return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
465ca02815Sjsg }
475ca02815Sjsg
485ca02815Sjsg static int
svm_migrate_gart_map(struct amdgpu_ring * ring,uint64_t npages,dma_addr_t * addr,uint64_t * gart_addr,uint64_t flags)495ca02815Sjsg svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
505ca02815Sjsg dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
515ca02815Sjsg {
525ca02815Sjsg struct amdgpu_device *adev = ring->adev;
535ca02815Sjsg struct amdgpu_job *job;
545ca02815Sjsg unsigned int num_dw, num_bytes;
555ca02815Sjsg struct dma_fence *fence;
565ca02815Sjsg uint64_t src_addr, dst_addr;
575ca02815Sjsg uint64_t pte_flags;
585ca02815Sjsg void *cpu_addr;
595ca02815Sjsg int r;
605ca02815Sjsg
615ca02815Sjsg /* use gart window 0 */
625ca02815Sjsg *gart_addr = adev->gmc.gart_start;
635ca02815Sjsg
645ca02815Sjsg num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
655ca02815Sjsg num_bytes = npages * 8;
665ca02815Sjsg
67f005ef32Sjsg r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
68f005ef32Sjsg AMDGPU_FENCE_OWNER_UNDEFINED,
69f005ef32Sjsg num_dw * 4 + num_bytes,
70f005ef32Sjsg AMDGPU_IB_POOL_DELAYED,
71f005ef32Sjsg &job);
725ca02815Sjsg if (r)
735ca02815Sjsg return r;
745ca02815Sjsg
755ca02815Sjsg src_addr = num_dw * 4;
765ca02815Sjsg src_addr += job->ibs[0].gpu_addr;
775ca02815Sjsg
785ca02815Sjsg dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
795ca02815Sjsg amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
805ca02815Sjsg dst_addr, num_bytes, false);
815ca02815Sjsg
825ca02815Sjsg amdgpu_ring_pad_ib(ring, &job->ibs[0]);
835ca02815Sjsg WARN_ON(job->ibs[0].length_dw > num_dw);
845ca02815Sjsg
855ca02815Sjsg pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
865ca02815Sjsg pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
875ca02815Sjsg if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
885ca02815Sjsg pte_flags |= AMDGPU_PTE_WRITEABLE;
895ca02815Sjsg pte_flags |= adev->gart.gart_pte_flags;
905ca02815Sjsg
915ca02815Sjsg cpu_addr = &job->ibs[0].ptr[num_dw];
925ca02815Sjsg
931bb76ff1Sjsg amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
94f005ef32Sjsg fence = amdgpu_job_submit(job);
955ca02815Sjsg dma_fence_put(fence);
965ca02815Sjsg
975ca02815Sjsg return r;
985ca02815Sjsg }
995ca02815Sjsg
1005ca02815Sjsg /**
1015ca02815Sjsg * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
1025ca02815Sjsg *
1035ca02815Sjsg * @adev: amdgpu device the sdma ring running
1041bb76ff1Sjsg * @sys: system DMA pointer to be copied
1051bb76ff1Sjsg * @vram: vram destination DMA pointer
1065ca02815Sjsg * @npages: number of pages to copy
1075ca02815Sjsg * @direction: enum MIGRATION_COPY_DIR
1085ca02815Sjsg * @mfence: output, sdma fence to signal after sdma is done
1095ca02815Sjsg *
1105ca02815Sjsg * ram address uses GART table continuous entries mapping to ram pages,
1115ca02815Sjsg * vram address uses direct mapping of vram pages, which must have npages
1125ca02815Sjsg * number of continuous pages.
1135ca02815Sjsg * GART update and sdma uses same buf copy function ring, sdma is splited to
1145ca02815Sjsg * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
1155ca02815Sjsg * the last sdma finish fence which is returned to check copy memory is done.
1165ca02815Sjsg *
1175ca02815Sjsg * Context: Process context, takes and releases gtt_window_lock
1185ca02815Sjsg *
1195ca02815Sjsg * Return:
1205ca02815Sjsg * 0 - OK, otherwise error code
1215ca02815Sjsg */
1225ca02815Sjsg
1235ca02815Sjsg static int
svm_migrate_copy_memory_gart(struct amdgpu_device * adev,dma_addr_t * sys,uint64_t * vram,uint64_t npages,enum MIGRATION_COPY_DIR direction,struct dma_fence ** mfence)1245ca02815Sjsg svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
1255ca02815Sjsg uint64_t *vram, uint64_t npages,
1265ca02815Sjsg enum MIGRATION_COPY_DIR direction,
1275ca02815Sjsg struct dma_fence **mfence)
1285ca02815Sjsg {
1295ca02815Sjsg const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
1305ca02815Sjsg struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
1315ca02815Sjsg uint64_t gart_s, gart_d;
1325ca02815Sjsg struct dma_fence *next;
1335ca02815Sjsg uint64_t size;
1345ca02815Sjsg int r;
1355ca02815Sjsg
1365ca02815Sjsg mutex_lock(&adev->mman.gtt_window_lock);
1375ca02815Sjsg
1385ca02815Sjsg while (npages) {
1395ca02815Sjsg size = min(GTT_MAX_PAGES, npages);
1405ca02815Sjsg
1415ca02815Sjsg if (direction == FROM_VRAM_TO_RAM) {
1425ca02815Sjsg gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
1435ca02815Sjsg r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
1445ca02815Sjsg
1455ca02815Sjsg } else if (direction == FROM_RAM_TO_VRAM) {
1465ca02815Sjsg r = svm_migrate_gart_map(ring, size, sys, &gart_s,
1475ca02815Sjsg KFD_IOCTL_SVM_FLAG_GPU_RO);
1485ca02815Sjsg gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
1495ca02815Sjsg }
1505ca02815Sjsg if (r) {
1511bb76ff1Sjsg dev_err(adev->dev, "fail %d create gart mapping\n", r);
1525ca02815Sjsg goto out_unlock;
1535ca02815Sjsg }
1545ca02815Sjsg
1555ca02815Sjsg r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
1565ca02815Sjsg NULL, &next, false, true, false);
1575ca02815Sjsg if (r) {
1581bb76ff1Sjsg dev_err(adev->dev, "fail %d to copy memory\n", r);
1595ca02815Sjsg goto out_unlock;
1605ca02815Sjsg }
1615ca02815Sjsg
1625ca02815Sjsg dma_fence_put(*mfence);
1635ca02815Sjsg *mfence = next;
1645ca02815Sjsg npages -= size;
1655ca02815Sjsg if (npages) {
1665ca02815Sjsg sys += size;
1675ca02815Sjsg vram += size;
1685ca02815Sjsg }
1695ca02815Sjsg }
1705ca02815Sjsg
1715ca02815Sjsg out_unlock:
1725ca02815Sjsg mutex_unlock(&adev->mman.gtt_window_lock);
1735ca02815Sjsg
1745ca02815Sjsg return r;
1755ca02815Sjsg }
1765ca02815Sjsg
1775ca02815Sjsg /**
1785ca02815Sjsg * svm_migrate_copy_done - wait for memory copy sdma is done
1795ca02815Sjsg *
1805ca02815Sjsg * @adev: amdgpu device the sdma memory copy is executing on
1815ca02815Sjsg * @mfence: migrate fence
1825ca02815Sjsg *
1835ca02815Sjsg * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
1845ca02815Sjsg * operations, this is the last sdma operation fence.
1855ca02815Sjsg *
1865ca02815Sjsg * Context: called after svm_migrate_copy_memory
1875ca02815Sjsg *
1885ca02815Sjsg * Return:
1895ca02815Sjsg * 0 - success
1905ca02815Sjsg * otherwise - error code from dma fence signal
1915ca02815Sjsg */
1925ca02815Sjsg static int
svm_migrate_copy_done(struct amdgpu_device * adev,struct dma_fence * mfence)1935ca02815Sjsg svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
1945ca02815Sjsg {
1955ca02815Sjsg int r = 0;
1965ca02815Sjsg
1975ca02815Sjsg if (mfence) {
1985ca02815Sjsg r = dma_fence_wait(mfence, false);
1995ca02815Sjsg dma_fence_put(mfence);
2005ca02815Sjsg pr_debug("sdma copy memory fence done\n");
2015ca02815Sjsg }
2025ca02815Sjsg
2035ca02815Sjsg return r;
2045ca02815Sjsg }
2055ca02815Sjsg
2065ca02815Sjsg unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device * adev,unsigned long addr)2075ca02815Sjsg svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
2085ca02815Sjsg {
209f005ef32Sjsg return (addr + adev->kfd.pgmap.range.start) >> PAGE_SHIFT;
2105ca02815Sjsg }
2115ca02815Sjsg
2125ca02815Sjsg static void
svm_migrate_get_vram_page(struct svm_range * prange,unsigned long pfn)2135ca02815Sjsg svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
2145ca02815Sjsg {
2155ca02815Sjsg struct page *page;
2165ca02815Sjsg
2175ca02815Sjsg page = pfn_to_page(pfn);
2185ca02815Sjsg svm_range_bo_ref(prange->svm_bo);
2195ca02815Sjsg page->zone_device_data = prange->svm_bo;
2201bb76ff1Sjsg zone_device_page_init(page);
2215ca02815Sjsg }
2225ca02815Sjsg
2235ca02815Sjsg static void
svm_migrate_put_vram_page(struct amdgpu_device * adev,unsigned long addr)2245ca02815Sjsg svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
2255ca02815Sjsg {
2265ca02815Sjsg struct page *page;
2275ca02815Sjsg
2285ca02815Sjsg page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
2295ca02815Sjsg unlock_page(page);
2305ca02815Sjsg put_page(page);
2315ca02815Sjsg }
2325ca02815Sjsg
2335ca02815Sjsg static unsigned long
svm_migrate_addr(struct amdgpu_device * adev,struct page * page)2345ca02815Sjsg svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
2355ca02815Sjsg {
2365ca02815Sjsg unsigned long addr;
2375ca02815Sjsg
2385ca02815Sjsg addr = page_to_pfn(page) << PAGE_SHIFT;
239f005ef32Sjsg return (addr - adev->kfd.pgmap.range.start);
2405ca02815Sjsg }
2415ca02815Sjsg
2425ca02815Sjsg static struct page *
svm_migrate_get_sys_page(struct vm_area_struct * vma,unsigned long addr)2435ca02815Sjsg svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
2445ca02815Sjsg {
2455ca02815Sjsg struct page *page;
2465ca02815Sjsg
2475ca02815Sjsg page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
2485ca02815Sjsg if (page)
2495ca02815Sjsg lock_page(page);
2505ca02815Sjsg
2515ca02815Sjsg return page;
2525ca02815Sjsg }
2535ca02815Sjsg
svm_migrate_put_sys_page(unsigned long addr)2545ca02815Sjsg static void svm_migrate_put_sys_page(unsigned long addr)
2555ca02815Sjsg {
2565ca02815Sjsg struct page *page;
2575ca02815Sjsg
2585ca02815Sjsg page = pfn_to_page(addr >> PAGE_SHIFT);
2595ca02815Sjsg unlock_page(page);
2605ca02815Sjsg put_page(page);
2615ca02815Sjsg }
2625ca02815Sjsg
svm_migrate_successful_pages(struct migrate_vma * migrate)2631bb76ff1Sjsg static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
2641bb76ff1Sjsg {
2651bb76ff1Sjsg unsigned long cpages = 0;
2661bb76ff1Sjsg unsigned long i;
2671bb76ff1Sjsg
2681bb76ff1Sjsg for (i = 0; i < migrate->npages; i++) {
2691bb76ff1Sjsg if (migrate->src[i] & MIGRATE_PFN_VALID &&
2701bb76ff1Sjsg migrate->src[i] & MIGRATE_PFN_MIGRATE)
2711bb76ff1Sjsg cpages++;
2721bb76ff1Sjsg }
2731bb76ff1Sjsg return cpages;
2741bb76ff1Sjsg }
2751bb76ff1Sjsg
svm_migrate_unsuccessful_pages(struct migrate_vma * migrate)2761bb76ff1Sjsg static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
2771bb76ff1Sjsg {
2781bb76ff1Sjsg unsigned long upages = 0;
2791bb76ff1Sjsg unsigned long i;
2801bb76ff1Sjsg
2811bb76ff1Sjsg for (i = 0; i < migrate->npages; i++) {
2821bb76ff1Sjsg if (migrate->src[i] & MIGRATE_PFN_VALID &&
2831bb76ff1Sjsg !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2841bb76ff1Sjsg upages++;
2851bb76ff1Sjsg }
2861bb76ff1Sjsg return upages;
2871bb76ff1Sjsg }
2881bb76ff1Sjsg
2895ca02815Sjsg static int
svm_migrate_copy_to_vram(struct kfd_node * node,struct svm_range * prange,struct migrate_vma * migrate,struct dma_fence ** mfence,dma_addr_t * scratch,uint64_t ttm_res_offset)290f005ef32Sjsg svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
2915ca02815Sjsg struct migrate_vma *migrate, struct dma_fence **mfence,
29229979f57Sjsg dma_addr_t *scratch, uint64_t ttm_res_offset)
2935ca02815Sjsg {
294f005ef32Sjsg uint64_t npages = migrate->cpages;
295f005ef32Sjsg struct amdgpu_device *adev = node->adev;
2965ca02815Sjsg struct device *dev = adev->dev;
2975ca02815Sjsg struct amdgpu_res_cursor cursor;
2985ca02815Sjsg dma_addr_t *src;
2995ca02815Sjsg uint64_t *dst;
3005ca02815Sjsg uint64_t i, j;
3015ca02815Sjsg int r;
3025ca02815Sjsg
30329979f57Sjsg pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
30429979f57Sjsg prange->last, ttm_res_offset);
3055ca02815Sjsg
3065ca02815Sjsg src = scratch;
3075ca02815Sjsg dst = (uint64_t *)(scratch + npages);
3085ca02815Sjsg
30929979f57Sjsg amdgpu_res_first(prange->ttm_res, ttm_res_offset,
3105ca02815Sjsg npages << PAGE_SHIFT, &cursor);
3115ca02815Sjsg for (i = j = 0; i < npages; i++) {
3125ca02815Sjsg struct page *spage;
3135ca02815Sjsg
3145ca02815Sjsg dst[i] = cursor.start + (j << PAGE_SHIFT);
3155ca02815Sjsg migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
3165ca02815Sjsg svm_migrate_get_vram_page(prange, migrate->dst[i]);
3175ca02815Sjsg migrate->dst[i] = migrate_pfn(migrate->dst[i]);
3181bb76ff1Sjsg
3191bb76ff1Sjsg spage = migrate_pfn_to_page(migrate->src[i]);
3201bb76ff1Sjsg if (spage && !is_zone_device_page(spage)) {
3215ca02815Sjsg src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
3225ca02815Sjsg DMA_TO_DEVICE);
3235ca02815Sjsg r = dma_mapping_error(dev, src[i]);
3245ca02815Sjsg if (r) {
325f005ef32Sjsg dev_err(dev, "%s: fail %d dma_map_page\n",
3261bb76ff1Sjsg __func__, r);
3275ca02815Sjsg goto out_free_vram_pages;
3285ca02815Sjsg }
3295ca02815Sjsg } else {
3305ca02815Sjsg if (j) {
3315ca02815Sjsg r = svm_migrate_copy_memory_gart(
3325ca02815Sjsg adev, src + i - j,
3335ca02815Sjsg dst + i - j, j,
3345ca02815Sjsg FROM_RAM_TO_VRAM,
3355ca02815Sjsg mfence);
3365ca02815Sjsg if (r)
3375ca02815Sjsg goto out_free_vram_pages;
3381bb76ff1Sjsg amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
3395ca02815Sjsg j = 0;
3405ca02815Sjsg } else {
3415ca02815Sjsg amdgpu_res_next(&cursor, PAGE_SIZE);
3425ca02815Sjsg }
3435ca02815Sjsg continue;
3445ca02815Sjsg }
3455ca02815Sjsg
3461bb76ff1Sjsg pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
3475ca02815Sjsg src[i] >> PAGE_SHIFT, page_to_pfn(spage));
3485ca02815Sjsg
3495ca02815Sjsg if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
3505ca02815Sjsg r = svm_migrate_copy_memory_gart(adev, src + i - j,
3515ca02815Sjsg dst + i - j, j + 1,
3525ca02815Sjsg FROM_RAM_TO_VRAM,
3535ca02815Sjsg mfence);
3545ca02815Sjsg if (r)
3555ca02815Sjsg goto out_free_vram_pages;
3565ca02815Sjsg amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
3575ca02815Sjsg j = 0;
3585ca02815Sjsg } else {
3595ca02815Sjsg j++;
3605ca02815Sjsg }
3615ca02815Sjsg }
3625ca02815Sjsg
3635ca02815Sjsg r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
3645ca02815Sjsg FROM_RAM_TO_VRAM, mfence);
3655ca02815Sjsg
3665ca02815Sjsg out_free_vram_pages:
3675ca02815Sjsg if (r) {
3685ca02815Sjsg pr_debug("failed %d to copy memory to vram\n", r);
3695ca02815Sjsg while (i--) {
3705ca02815Sjsg svm_migrate_put_vram_page(adev, dst[i]);
3715ca02815Sjsg migrate->dst[i] = 0;
3725ca02815Sjsg }
3735ca02815Sjsg }
3745ca02815Sjsg
3755ca02815Sjsg #ifdef DEBUG_FORCE_MIXED_DOMAINS
3765ca02815Sjsg for (i = 0, j = 0; i < npages; i += 4, j++) {
3775ca02815Sjsg if (j & 1)
3785ca02815Sjsg continue;
3795ca02815Sjsg svm_migrate_put_vram_page(adev, dst[i]);
3805ca02815Sjsg migrate->dst[i] = 0;
3815ca02815Sjsg svm_migrate_put_vram_page(adev, dst[i + 1]);
3825ca02815Sjsg migrate->dst[i + 1] = 0;
3835ca02815Sjsg svm_migrate_put_vram_page(adev, dst[i + 2]);
3845ca02815Sjsg migrate->dst[i + 2] = 0;
3855ca02815Sjsg svm_migrate_put_vram_page(adev, dst[i + 3]);
3865ca02815Sjsg migrate->dst[i + 3] = 0;
3875ca02815Sjsg }
3885ca02815Sjsg #endif
3896e427476Sjsg
3905ca02815Sjsg return r;
3915ca02815Sjsg }
3925ca02815Sjsg
3931bb76ff1Sjsg static long
svm_migrate_vma_to_vram(struct kfd_node * node,struct svm_range * prange,struct vm_area_struct * vma,uint64_t start,uint64_t end,uint32_t trigger,uint64_t ttm_res_offset)394f005ef32Sjsg svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
3955ca02815Sjsg struct vm_area_struct *vma, uint64_t start,
39629979f57Sjsg uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
3975ca02815Sjsg {
3981bb76ff1Sjsg struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
3995ca02815Sjsg uint64_t npages = (end - start) >> PAGE_SHIFT;
400f005ef32Sjsg struct amdgpu_device *adev = node->adev;
4015ca02815Sjsg struct kfd_process_device *pdd;
4025ca02815Sjsg struct dma_fence *mfence = NULL;
4031bb76ff1Sjsg struct migrate_vma migrate = { 0 };
4041bb76ff1Sjsg unsigned long cpages = 0;
4055ca02815Sjsg dma_addr_t *scratch;
4065ca02815Sjsg void *buf;
4075ca02815Sjsg int r = -ENOMEM;
4085ca02815Sjsg
4095ca02815Sjsg memset(&migrate, 0, sizeof(migrate));
4105ca02815Sjsg migrate.vma = vma;
4115ca02815Sjsg migrate.start = start;
4125ca02815Sjsg migrate.end = end;
4135ca02815Sjsg migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
4145ca02815Sjsg migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
4155ca02815Sjsg
4161bb76ff1Sjsg buf = kvcalloc(npages,
4171bb76ff1Sjsg 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
4181bb76ff1Sjsg GFP_KERNEL);
4195ca02815Sjsg if (!buf)
4205ca02815Sjsg goto out;
4215ca02815Sjsg
4225ca02815Sjsg migrate.src = buf;
4235ca02815Sjsg migrate.dst = migrate.src + npages;
4245ca02815Sjsg scratch = (dma_addr_t *)(migrate.dst + npages);
4255ca02815Sjsg
426f005ef32Sjsg kfd_smi_event_migration_start(node, p->lead_thread->pid,
4271bb76ff1Sjsg start >> PAGE_SHIFT, end >> PAGE_SHIFT,
428f005ef32Sjsg 0, node->id, prange->prefetch_loc,
4291bb76ff1Sjsg prange->preferred_loc, trigger);
4301bb76ff1Sjsg
4315ca02815Sjsg r = migrate_vma_setup(&migrate);
4325ca02815Sjsg if (r) {
4331bb76ff1Sjsg dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
4341bb76ff1Sjsg __func__, r, prange->start, prange->last);
4355ca02815Sjsg goto out_free;
4365ca02815Sjsg }
4375ca02815Sjsg
4381bb76ff1Sjsg cpages = migrate.cpages;
4391bb76ff1Sjsg if (!cpages) {
4401bb76ff1Sjsg pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
4411bb76ff1Sjsg prange->start, prange->last);
4421bb76ff1Sjsg goto out_free;
4431bb76ff1Sjsg }
4441bb76ff1Sjsg if (cpages != npages)
4451bb76ff1Sjsg pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
4461bb76ff1Sjsg cpages, npages);
4471bb76ff1Sjsg else
4481bb76ff1Sjsg pr_debug("0x%lx pages migrated\n", cpages);
4491bb76ff1Sjsg
450f005ef32Sjsg r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
4515ca02815Sjsg migrate_vma_pages(&migrate);
4521bb76ff1Sjsg
4531bb76ff1Sjsg pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
4541bb76ff1Sjsg svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
4551bb76ff1Sjsg
4565ca02815Sjsg svm_migrate_copy_done(adev, mfence);
4575ca02815Sjsg migrate_vma_finalize(&migrate);
4581bb76ff1Sjsg
459f005ef32Sjsg kfd_smi_event_migration_end(node, p->lead_thread->pid,
4601bb76ff1Sjsg start >> PAGE_SHIFT, end >> PAGE_SHIFT,
461f005ef32Sjsg 0, node->id, trigger);
4625ca02815Sjsg
4635ca02815Sjsg svm_range_dma_unmap(adev->dev, scratch, 0, npages);
4645ca02815Sjsg
4655ca02815Sjsg out_free:
4665ca02815Sjsg kvfree(buf);
4675ca02815Sjsg out:
4681bb76ff1Sjsg if (!r && cpages) {
469f005ef32Sjsg pdd = svm_range_get_pdd_by_node(prange, node);
4705ca02815Sjsg if (pdd)
4711bb76ff1Sjsg WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
4725ca02815Sjsg
4731bb76ff1Sjsg return cpages;
4741bb76ff1Sjsg }
4755ca02815Sjsg return r;
4765ca02815Sjsg }
4775ca02815Sjsg
4785ca02815Sjsg /**
4795ca02815Sjsg * svm_migrate_ram_to_vram - migrate svm range from system to device
4805ca02815Sjsg * @prange: range structure
4815ca02815Sjsg * @best_loc: the device to migrate to
4825ca02815Sjsg * @mm: the process mm structure
4831bb76ff1Sjsg * @trigger: reason of migration
4845ca02815Sjsg *
4855ca02815Sjsg * Context: Process context, caller hold mmap read lock, svms lock, prange lock
4865ca02815Sjsg *
4875ca02815Sjsg * Return:
4885ca02815Sjsg * 0 - OK, otherwise error code
4895ca02815Sjsg */
4905ca02815Sjsg static int
svm_migrate_ram_to_vram(struct svm_range * prange,uint32_t best_loc,struct mm_struct * mm,uint32_t trigger)4915ca02815Sjsg svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
4921bb76ff1Sjsg struct mm_struct *mm, uint32_t trigger)
4935ca02815Sjsg {
4945ca02815Sjsg unsigned long addr, start, end;
4955ca02815Sjsg struct vm_area_struct *vma;
49629979f57Sjsg uint64_t ttm_res_offset;
497f005ef32Sjsg struct kfd_node *node;
4981bb76ff1Sjsg unsigned long cpages = 0;
4991bb76ff1Sjsg long r = 0;
5005ca02815Sjsg
5015ca02815Sjsg if (prange->actual_loc == best_loc) {
5025ca02815Sjsg pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
5035ca02815Sjsg prange->svms, prange->start, prange->last, best_loc);
5045ca02815Sjsg return 0;
5055ca02815Sjsg }
5065ca02815Sjsg
507f005ef32Sjsg node = svm_range_get_node_by_id(prange, best_loc);
508f005ef32Sjsg if (!node) {
509f005ef32Sjsg pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
5105ca02815Sjsg return -ENODEV;
5115ca02815Sjsg }
5125ca02815Sjsg
5135ca02815Sjsg pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
5145ca02815Sjsg prange->start, prange->last, best_loc);
5155ca02815Sjsg
5165ca02815Sjsg start = prange->start << PAGE_SHIFT;
5175ca02815Sjsg end = (prange->last + 1) << PAGE_SHIFT;
5186e427476Sjsg
519*b6128eb2Sjsg r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
520*b6128eb2Sjsg prange->npages * PAGE_SIZE,
521*b6128eb2Sjsg KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
522*b6128eb2Sjsg node->xcp ? node->xcp->id : 0);
523*b6128eb2Sjsg if (r) {
524*b6128eb2Sjsg dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r);
525*b6128eb2Sjsg return -ENOSPC;
526*b6128eb2Sjsg }
527*b6128eb2Sjsg
528f005ef32Sjsg r = svm_range_vram_node_new(node, prange, true);
5296e427476Sjsg if (r) {
530f005ef32Sjsg dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
531*b6128eb2Sjsg goto out;
5326e427476Sjsg }
53329979f57Sjsg ttm_res_offset = prange->offset << PAGE_SHIFT;
5345ca02815Sjsg
5355ca02815Sjsg for (addr = start; addr < end;) {
5365ca02815Sjsg unsigned long next;
5375ca02815Sjsg
538f005ef32Sjsg vma = vma_lookup(mm, addr);
539f005ef32Sjsg if (!vma)
5405ca02815Sjsg break;
5415ca02815Sjsg
5425ca02815Sjsg next = min(vma->vm_end, end);
543f005ef32Sjsg r = svm_migrate_vma_to_vram(node, prange, vma, addr, next, trigger, ttm_res_offset);
5441bb76ff1Sjsg if (r < 0) {
5451bb76ff1Sjsg pr_debug("failed %ld to migrate\n", r);
5465ca02815Sjsg break;
5471bb76ff1Sjsg } else {
5481bb76ff1Sjsg cpages += r;
5495ca02815Sjsg }
55029979f57Sjsg ttm_res_offset += next - addr;
5515ca02815Sjsg addr = next;
5525ca02815Sjsg }
5535ca02815Sjsg
554f005ef32Sjsg if (cpages) {
5555ca02815Sjsg prange->actual_loc = best_loc;
556f005ef32Sjsg svm_range_free_dma_mappings(prange, true);
557f005ef32Sjsg } else {
5586e427476Sjsg svm_range_vram_node_free(prange);
559f005ef32Sjsg }
5605ca02815Sjsg
561*b6128eb2Sjsg out:
562*b6128eb2Sjsg amdgpu_amdkfd_unreserve_mem_limit(node->adev,
563*b6128eb2Sjsg prange->npages * PAGE_SIZE,
564*b6128eb2Sjsg KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
565*b6128eb2Sjsg node->xcp ? node->xcp->id : 0);
5661bb76ff1Sjsg return r < 0 ? r : 0;
5675ca02815Sjsg }
5685ca02815Sjsg
svm_migrate_page_free(struct page * page)5695ca02815Sjsg static void svm_migrate_page_free(struct page *page)
5705ca02815Sjsg {
5715ca02815Sjsg struct svm_range_bo *svm_bo = page->zone_device_data;
5725ca02815Sjsg
5735ca02815Sjsg if (svm_bo) {
5741bb76ff1Sjsg pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
5751bb76ff1Sjsg svm_range_bo_unref_async(svm_bo);
5765ca02815Sjsg }
5775ca02815Sjsg }
5785ca02815Sjsg
5795ca02815Sjsg static int
svm_migrate_copy_to_ram(struct amdgpu_device * adev,struct svm_range * prange,struct migrate_vma * migrate,struct dma_fence ** mfence,dma_addr_t * scratch,uint64_t npages)5805ca02815Sjsg svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
5815ca02815Sjsg struct migrate_vma *migrate, struct dma_fence **mfence,
5825ca02815Sjsg dma_addr_t *scratch, uint64_t npages)
5835ca02815Sjsg {
5845ca02815Sjsg struct device *dev = adev->dev;
5855ca02815Sjsg uint64_t *src;
5865ca02815Sjsg dma_addr_t *dst;
5875ca02815Sjsg struct page *dpage;
5885ca02815Sjsg uint64_t i = 0, j;
5895ca02815Sjsg uint64_t addr;
5905ca02815Sjsg int r = 0;
5915ca02815Sjsg
5925ca02815Sjsg pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
5935ca02815Sjsg prange->last);
5945ca02815Sjsg
5955ca02815Sjsg addr = prange->start << PAGE_SHIFT;
5965ca02815Sjsg
5975ca02815Sjsg src = (uint64_t *)(scratch + npages);
5985ca02815Sjsg dst = scratch;
5995ca02815Sjsg
6005ca02815Sjsg for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
6015ca02815Sjsg struct page *spage;
6025ca02815Sjsg
6035ca02815Sjsg spage = migrate_pfn_to_page(migrate->src[i]);
6045ca02815Sjsg if (!spage || !is_zone_device_page(spage)) {
6055ca02815Sjsg pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
6065ca02815Sjsg prange->svms, prange->start, prange->last);
6075ca02815Sjsg if (j) {
6085ca02815Sjsg r = svm_migrate_copy_memory_gart(adev, dst + i - j,
6095ca02815Sjsg src + i - j, j,
6105ca02815Sjsg FROM_VRAM_TO_RAM,
6115ca02815Sjsg mfence);
6125ca02815Sjsg if (r)
6135ca02815Sjsg goto out_oom;
6145ca02815Sjsg j = 0;
6155ca02815Sjsg }
6165ca02815Sjsg continue;
6175ca02815Sjsg }
6185ca02815Sjsg src[i] = svm_migrate_addr(adev, spage);
6191bb76ff1Sjsg if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
6205ca02815Sjsg r = svm_migrate_copy_memory_gart(adev, dst + i - j,
6215ca02815Sjsg src + i - j, j,
6225ca02815Sjsg FROM_VRAM_TO_RAM,
6235ca02815Sjsg mfence);
6245ca02815Sjsg if (r)
6255ca02815Sjsg goto out_oom;
6265ca02815Sjsg j = 0;
6275ca02815Sjsg }
6285ca02815Sjsg
6295ca02815Sjsg dpage = svm_migrate_get_sys_page(migrate->vma, addr);
6305ca02815Sjsg if (!dpage) {
6315ca02815Sjsg pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
6325ca02815Sjsg prange->svms, prange->start, prange->last);
6335ca02815Sjsg r = -ENOMEM;
6345ca02815Sjsg goto out_oom;
6355ca02815Sjsg }
6365ca02815Sjsg
6375ca02815Sjsg dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
6385ca02815Sjsg r = dma_mapping_error(dev, dst[i]);
6395ca02815Sjsg if (r) {
6401bb76ff1Sjsg dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
6415ca02815Sjsg goto out_oom;
6425ca02815Sjsg }
6435ca02815Sjsg
6441bb76ff1Sjsg pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
6455ca02815Sjsg dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
6465ca02815Sjsg
6475ca02815Sjsg migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
6485ca02815Sjsg j++;
6495ca02815Sjsg }
6505ca02815Sjsg
6515ca02815Sjsg r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
6525ca02815Sjsg FROM_VRAM_TO_RAM, mfence);
6535ca02815Sjsg
6545ca02815Sjsg out_oom:
6555ca02815Sjsg if (r) {
6565ca02815Sjsg pr_debug("failed %d copy to ram\n", r);
6575ca02815Sjsg while (i--) {
6585ca02815Sjsg svm_migrate_put_sys_page(dst[i]);
6595ca02815Sjsg migrate->dst[i] = 0;
6605ca02815Sjsg }
6615ca02815Sjsg }
6625ca02815Sjsg
6635ca02815Sjsg return r;
6645ca02815Sjsg }
6655ca02815Sjsg
6661bb76ff1Sjsg /**
6671bb76ff1Sjsg * svm_migrate_vma_to_ram - migrate range inside one vma from device to system
6681bb76ff1Sjsg *
6691bb76ff1Sjsg * @prange: svm range structure
6701bb76ff1Sjsg * @vma: vm_area_struct that range [start, end] belongs to
6711bb76ff1Sjsg * @start: range start virtual address in pages
6721bb76ff1Sjsg * @end: range end virtual address in pages
673f005ef32Sjsg * @node: kfd node device to migrate from
674f005ef32Sjsg * @trigger: reason of migration
675f005ef32Sjsg * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
6761bb76ff1Sjsg *
6771bb76ff1Sjsg * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
6781bb76ff1Sjsg *
6791bb76ff1Sjsg * Return:
6801bb76ff1Sjsg * 0 - success with all pages migrated
6811bb76ff1Sjsg * negative values - indicate error
6821bb76ff1Sjsg * positive values - partial migration, number of pages not migrated
6831bb76ff1Sjsg */
6841bb76ff1Sjsg static long
svm_migrate_vma_to_ram(struct kfd_node * node,struct svm_range * prange,struct vm_area_struct * vma,uint64_t start,uint64_t end,uint32_t trigger,struct page * fault_page)685f005ef32Sjsg svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
6861bb76ff1Sjsg struct vm_area_struct *vma, uint64_t start, uint64_t end,
6871bb76ff1Sjsg uint32_t trigger, struct page *fault_page)
6885ca02815Sjsg {
6891bb76ff1Sjsg struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
6905ca02815Sjsg uint64_t npages = (end - start) >> PAGE_SHIFT;
6911bb76ff1Sjsg unsigned long upages = npages;
6921bb76ff1Sjsg unsigned long cpages = 0;
693f005ef32Sjsg struct amdgpu_device *adev = node->adev;
6945ca02815Sjsg struct kfd_process_device *pdd;
6955ca02815Sjsg struct dma_fence *mfence = NULL;
6961bb76ff1Sjsg struct migrate_vma migrate = { 0 };
6975ca02815Sjsg dma_addr_t *scratch;
6985ca02815Sjsg void *buf;
6995ca02815Sjsg int r = -ENOMEM;
7005ca02815Sjsg
7015ca02815Sjsg memset(&migrate, 0, sizeof(migrate));
7025ca02815Sjsg migrate.vma = vma;
7035ca02815Sjsg migrate.start = start;
7045ca02815Sjsg migrate.end = end;
7055ca02815Sjsg migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
7061bb76ff1Sjsg if (adev->gmc.xgmi.connected_to_cpu)
7071bb76ff1Sjsg migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT;
7081bb76ff1Sjsg else
7091bb76ff1Sjsg migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
7105ca02815Sjsg
7111bb76ff1Sjsg buf = kvcalloc(npages,
7121bb76ff1Sjsg 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
7131bb76ff1Sjsg GFP_KERNEL);
7145ca02815Sjsg if (!buf)
7155ca02815Sjsg goto out;
7165ca02815Sjsg
7175ca02815Sjsg migrate.src = buf;
7185ca02815Sjsg migrate.dst = migrate.src + npages;
7191bb76ff1Sjsg migrate.fault_page = fault_page;
7205ca02815Sjsg scratch = (dma_addr_t *)(migrate.dst + npages);
7215ca02815Sjsg
722f005ef32Sjsg kfd_smi_event_migration_start(node, p->lead_thread->pid,
7231bb76ff1Sjsg start >> PAGE_SHIFT, end >> PAGE_SHIFT,
724f005ef32Sjsg node->id, 0, prange->prefetch_loc,
7251bb76ff1Sjsg prange->preferred_loc, trigger);
7261bb76ff1Sjsg
7275ca02815Sjsg r = migrate_vma_setup(&migrate);
7285ca02815Sjsg if (r) {
7291bb76ff1Sjsg dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
7301bb76ff1Sjsg __func__, r, prange->start, prange->last);
7315ca02815Sjsg goto out_free;
7325ca02815Sjsg }
7335ca02815Sjsg
7341bb76ff1Sjsg cpages = migrate.cpages;
7351bb76ff1Sjsg if (!cpages) {
7361bb76ff1Sjsg pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
7371bb76ff1Sjsg prange->start, prange->last);
7381bb76ff1Sjsg upages = svm_migrate_unsuccessful_pages(&migrate);
7391bb76ff1Sjsg goto out_free;
7401bb76ff1Sjsg }
7411bb76ff1Sjsg if (cpages != npages)
7421bb76ff1Sjsg pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
7431bb76ff1Sjsg cpages, npages);
7441bb76ff1Sjsg else
7451bb76ff1Sjsg pr_debug("0x%lx pages migrated\n", cpages);
7465ca02815Sjsg
7475ca02815Sjsg r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
7485ca02815Sjsg scratch, npages);
7495ca02815Sjsg migrate_vma_pages(&migrate);
7501bb76ff1Sjsg
7511bb76ff1Sjsg upages = svm_migrate_unsuccessful_pages(&migrate);
7521bb76ff1Sjsg pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
7531bb76ff1Sjsg upages, cpages, migrate.npages);
7541bb76ff1Sjsg
7555ca02815Sjsg svm_migrate_copy_done(adev, mfence);
7565ca02815Sjsg migrate_vma_finalize(&migrate);
7571bb76ff1Sjsg
758f005ef32Sjsg kfd_smi_event_migration_end(node, p->lead_thread->pid,
7591bb76ff1Sjsg start >> PAGE_SHIFT, end >> PAGE_SHIFT,
760f005ef32Sjsg node->id, 0, trigger);
7615ca02815Sjsg
7625ca02815Sjsg svm_range_dma_unmap(adev->dev, scratch, 0, npages);
7635ca02815Sjsg
7645ca02815Sjsg out_free:
7655ca02815Sjsg kvfree(buf);
7665ca02815Sjsg out:
7671bb76ff1Sjsg if (!r && cpages) {
768f005ef32Sjsg pdd = svm_range_get_pdd_by_node(prange, node);
7695ca02815Sjsg if (pdd)
7701bb76ff1Sjsg WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
7715ca02815Sjsg }
7721bb76ff1Sjsg return r ? r : upages;
7735ca02815Sjsg }
7745ca02815Sjsg
7755ca02815Sjsg /**
7765ca02815Sjsg * svm_migrate_vram_to_ram - migrate svm range from device to system
7775ca02815Sjsg * @prange: range structure
7785ca02815Sjsg * @mm: process mm, use current->mm if NULL
7791bb76ff1Sjsg * @trigger: reason of migration
780f005ef32Sjsg * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
7815ca02815Sjsg *
7821bb76ff1Sjsg * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
7835ca02815Sjsg *
7845ca02815Sjsg * Return:
7855ca02815Sjsg * 0 - OK, otherwise error code
7865ca02815Sjsg */
svm_migrate_vram_to_ram(struct svm_range * prange,struct mm_struct * mm,uint32_t trigger,struct page * fault_page)7871bb76ff1Sjsg int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
7881bb76ff1Sjsg uint32_t trigger, struct page *fault_page)
7895ca02815Sjsg {
790f005ef32Sjsg struct kfd_node *node;
7915ca02815Sjsg struct vm_area_struct *vma;
7925ca02815Sjsg unsigned long addr;
7935ca02815Sjsg unsigned long start;
7945ca02815Sjsg unsigned long end;
7951bb76ff1Sjsg unsigned long upages = 0;
7961bb76ff1Sjsg long r = 0;
7975ca02815Sjsg
7985ca02815Sjsg if (!prange->actual_loc) {
7995ca02815Sjsg pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
8005ca02815Sjsg prange->start, prange->last);
8015ca02815Sjsg return 0;
8025ca02815Sjsg }
8035ca02815Sjsg
804f005ef32Sjsg node = svm_range_get_node_by_id(prange, prange->actual_loc);
805f005ef32Sjsg if (!node) {
806f005ef32Sjsg pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
8075ca02815Sjsg return -ENODEV;
8085ca02815Sjsg }
8095ca02815Sjsg pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
8105ca02815Sjsg prange->svms, prange, prange->start, prange->last,
8115ca02815Sjsg prange->actual_loc);
8125ca02815Sjsg
8135ca02815Sjsg start = prange->start << PAGE_SHIFT;
8145ca02815Sjsg end = (prange->last + 1) << PAGE_SHIFT;
8155ca02815Sjsg
8165ca02815Sjsg for (addr = start; addr < end;) {
8175ca02815Sjsg unsigned long next;
8185ca02815Sjsg
819f005ef32Sjsg vma = vma_lookup(mm, addr);
820f005ef32Sjsg if (!vma) {
8211bb76ff1Sjsg pr_debug("failed to find vma for prange %p\n", prange);
8221bb76ff1Sjsg r = -EFAULT;
8235ca02815Sjsg break;
8241bb76ff1Sjsg }
8255ca02815Sjsg
8265ca02815Sjsg next = min(vma->vm_end, end);
827f005ef32Sjsg r = svm_migrate_vma_to_ram(node, prange, vma, addr, next, trigger,
8281bb76ff1Sjsg fault_page);
8291bb76ff1Sjsg if (r < 0) {
8301bb76ff1Sjsg pr_debug("failed %ld to migrate prange %p\n", r, prange);
8315ca02815Sjsg break;
8321bb76ff1Sjsg } else {
8331bb76ff1Sjsg upages += r;
8345ca02815Sjsg }
8355ca02815Sjsg addr = next;
8365ca02815Sjsg }
8375ca02815Sjsg
8381bb76ff1Sjsg if (r >= 0 && !upages) {
8395ca02815Sjsg svm_range_vram_node_free(prange);
8405ca02815Sjsg prange->actual_loc = 0;
8415ca02815Sjsg }
8421bb76ff1Sjsg
8431bb76ff1Sjsg return r < 0 ? r : 0;
8445ca02815Sjsg }
8455ca02815Sjsg
8465ca02815Sjsg /**
8475ca02815Sjsg * svm_migrate_vram_to_vram - migrate svm range from device to device
8485ca02815Sjsg * @prange: range structure
8495ca02815Sjsg * @best_loc: the device to migrate to
8505ca02815Sjsg * @mm: process mm, use current->mm if NULL
8511bb76ff1Sjsg * @trigger: reason of migration
8525ca02815Sjsg *
8535ca02815Sjsg * Context: Process context, caller hold mmap read lock, svms lock, prange lock
8545ca02815Sjsg *
8555ca02815Sjsg * Return:
8565ca02815Sjsg * 0 - OK, otherwise error code
8575ca02815Sjsg */
8585ca02815Sjsg static int
svm_migrate_vram_to_vram(struct svm_range * prange,uint32_t best_loc,struct mm_struct * mm,uint32_t trigger)8595ca02815Sjsg svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
8601bb76ff1Sjsg struct mm_struct *mm, uint32_t trigger)
8615ca02815Sjsg {
8621bb76ff1Sjsg int r, retries = 3;
8635ca02815Sjsg
8645ca02815Sjsg /*
8655ca02815Sjsg * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
8665ca02815Sjsg * system memory as migration bridge
8675ca02815Sjsg */
8685ca02815Sjsg
8695ca02815Sjsg pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
8705ca02815Sjsg
8711bb76ff1Sjsg do {
8721bb76ff1Sjsg r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL);
8735ca02815Sjsg if (r)
8745ca02815Sjsg return r;
8751bb76ff1Sjsg } while (prange->actual_loc && --retries);
8765ca02815Sjsg
8771bb76ff1Sjsg if (prange->actual_loc)
8781bb76ff1Sjsg return -EDEADLK;
8791bb76ff1Sjsg
8801bb76ff1Sjsg return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
8815ca02815Sjsg }
8825ca02815Sjsg
8835ca02815Sjsg int
svm_migrate_to_vram(struct svm_range * prange,uint32_t best_loc,struct mm_struct * mm,uint32_t trigger)8845ca02815Sjsg svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
8851bb76ff1Sjsg struct mm_struct *mm, uint32_t trigger)
8865ca02815Sjsg {
8875ca02815Sjsg if (!prange->actual_loc)
8881bb76ff1Sjsg return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
8895ca02815Sjsg else
8901bb76ff1Sjsg return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
8915ca02815Sjsg
8925ca02815Sjsg }
8935ca02815Sjsg
8945ca02815Sjsg /**
8955ca02815Sjsg * svm_migrate_to_ram - CPU page fault handler
8965ca02815Sjsg * @vmf: CPU vm fault vma, address
8975ca02815Sjsg *
8985ca02815Sjsg * Context: vm fault handler, caller holds the mmap read lock
8995ca02815Sjsg *
9005ca02815Sjsg * Return:
9015ca02815Sjsg * 0 - OK
9025ca02815Sjsg * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
9035ca02815Sjsg */
svm_migrate_to_ram(struct vm_fault * vmf)9045ca02815Sjsg static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
9055ca02815Sjsg {
9065ca02815Sjsg unsigned long addr = vmf->address;
9071bb76ff1Sjsg struct svm_range_bo *svm_bo;
9085ca02815Sjsg enum svm_work_list_ops op;
9095ca02815Sjsg struct svm_range *parent;
9105ca02815Sjsg struct svm_range *prange;
9115ca02815Sjsg struct kfd_process *p;
9125ca02815Sjsg struct mm_struct *mm;
9135ca02815Sjsg int r = 0;
9145ca02815Sjsg
9151bb76ff1Sjsg svm_bo = vmf->page->zone_device_data;
9161bb76ff1Sjsg if (!svm_bo) {
9171bb76ff1Sjsg pr_debug("failed get device page at addr 0x%lx\n", addr);
9185ca02815Sjsg return VM_FAULT_SIGBUS;
9195ca02815Sjsg }
9201bb76ff1Sjsg if (!mmget_not_zero(svm_bo->eviction_fence->mm)) {
9211bb76ff1Sjsg pr_debug("addr 0x%lx of process mm is destroyed\n", addr);
9221bb76ff1Sjsg return VM_FAULT_SIGBUS;
9231bb76ff1Sjsg }
9241bb76ff1Sjsg
9251bb76ff1Sjsg mm = svm_bo->eviction_fence->mm;
9261bb76ff1Sjsg if (mm != vmf->vma->vm_mm)
9271bb76ff1Sjsg pr_debug("addr 0x%lx is COW mapping in child process\n", addr);
9281bb76ff1Sjsg
9291bb76ff1Sjsg p = kfd_lookup_process_by_mm(mm);
9301bb76ff1Sjsg if (!p) {
9311bb76ff1Sjsg pr_debug("failed find process at fault address 0x%lx\n", addr);
9321bb76ff1Sjsg r = VM_FAULT_SIGBUS;
9331bb76ff1Sjsg goto out_mmput;
9341bb76ff1Sjsg }
9351bb76ff1Sjsg if (READ_ONCE(p->svms.faulting_task) == current) {
9361bb76ff1Sjsg pr_debug("skipping ram migration\n");
9371bb76ff1Sjsg r = 0;
9381bb76ff1Sjsg goto out_unref_process;
9391bb76ff1Sjsg }
9401bb76ff1Sjsg
9415ca02815Sjsg pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
9421bb76ff1Sjsg addr >>= PAGE_SHIFT;
9435ca02815Sjsg
9445ca02815Sjsg mutex_lock(&p->svms.lock);
9455ca02815Sjsg
9465ca02815Sjsg prange = svm_range_from_addr(&p->svms, addr, &parent);
9475ca02815Sjsg if (!prange) {
9481bb76ff1Sjsg pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
9495ca02815Sjsg r = -EFAULT;
9501bb76ff1Sjsg goto out_unlock_svms;
9515ca02815Sjsg }
9525ca02815Sjsg
9535ca02815Sjsg mutex_lock(&parent->migrate_mutex);
9545ca02815Sjsg if (prange != parent)
9555ca02815Sjsg mutex_lock_nested(&prange->migrate_mutex, 1);
9565ca02815Sjsg
9575ca02815Sjsg if (!prange->actual_loc)
9585ca02815Sjsg goto out_unlock_prange;
9595ca02815Sjsg
9605ca02815Sjsg svm_range_lock(parent);
9615ca02815Sjsg if (prange != parent)
9625ca02815Sjsg mutex_lock_nested(&prange->lock, 1);
9635ca02815Sjsg r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
9645ca02815Sjsg if (prange != parent)
9655ca02815Sjsg mutex_unlock(&prange->lock);
9665ca02815Sjsg svm_range_unlock(parent);
9675ca02815Sjsg if (r) {
9685ca02815Sjsg pr_debug("failed %d to split range by granularity\n", r);
9695ca02815Sjsg goto out_unlock_prange;
9705ca02815Sjsg }
9715ca02815Sjsg
9721bb76ff1Sjsg r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
9731bb76ff1Sjsg KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
9741bb76ff1Sjsg vmf->page);
9755ca02815Sjsg if (r)
9761bb76ff1Sjsg pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
9771bb76ff1Sjsg r, prange->svms, prange, prange->start, prange->last);
9785ca02815Sjsg
9795ca02815Sjsg /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
9805ca02815Sjsg if (p->xnack_enabled && parent == prange)
9815ca02815Sjsg op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
9825ca02815Sjsg else
9835ca02815Sjsg op = SVM_OP_UPDATE_RANGE_NOTIFIER;
9845ca02815Sjsg svm_range_add_list_work(&p->svms, parent, mm, op);
9855ca02815Sjsg schedule_deferred_list_work(&p->svms);
9865ca02815Sjsg
9875ca02815Sjsg out_unlock_prange:
9885ca02815Sjsg if (prange != parent)
9895ca02815Sjsg mutex_unlock(&prange->migrate_mutex);
9905ca02815Sjsg mutex_unlock(&parent->migrate_mutex);
9911bb76ff1Sjsg out_unlock_svms:
9925ca02815Sjsg mutex_unlock(&p->svms.lock);
9931bb76ff1Sjsg out_unref_process:
9945ca02815Sjsg pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
9951bb76ff1Sjsg kfd_unref_process(p);
9961bb76ff1Sjsg out_mmput:
9971bb76ff1Sjsg mmput(mm);
9985ca02815Sjsg return r ? VM_FAULT_SIGBUS : 0;
9995ca02815Sjsg }
10005ca02815Sjsg
10015ca02815Sjsg static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
10025ca02815Sjsg .page_free = svm_migrate_page_free,
10035ca02815Sjsg .migrate_to_ram = svm_migrate_to_ram,
10045ca02815Sjsg };
10055ca02815Sjsg
10065ca02815Sjsg /* Each VRAM page uses sizeof(struct page) on system memory */
10075ca02815Sjsg #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
10085ca02815Sjsg
kgd2kfd_init_zone_device(struct amdgpu_device * adev)1009f005ef32Sjsg int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
10105ca02815Sjsg {
1011f005ef32Sjsg struct amdgpu_kfd_dev *kfddev = &adev->kfd;
10125ca02815Sjsg struct dev_pagemap *pgmap;
10131bb76ff1Sjsg struct resource *res = NULL;
10145ca02815Sjsg unsigned long size;
10155ca02815Sjsg void *r;
10165ca02815Sjsg
1017f005ef32Sjsg /* Page migration works on gfx9 or newer */
1018f005ef32Sjsg if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
10195ca02815Sjsg return -EINVAL;
10205ca02815Sjsg
1021f005ef32Sjsg if (adev->gmc.is_app_apu)
1022f005ef32Sjsg return 0;
1023f005ef32Sjsg
10245ca02815Sjsg pgmap = &kfddev->pgmap;
10255ca02815Sjsg memset(pgmap, 0, sizeof(*pgmap));
10265ca02815Sjsg
10275ca02815Sjsg /* TODO: register all vram to HMM for now.
10285ca02815Sjsg * should remove reserved size
10295ca02815Sjsg */
10305ca02815Sjsg size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
10311bb76ff1Sjsg if (adev->gmc.xgmi.connected_to_cpu) {
10321bb76ff1Sjsg pgmap->range.start = adev->gmc.aper_base;
10331bb76ff1Sjsg pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1;
10341bb76ff1Sjsg pgmap->type = MEMORY_DEVICE_COHERENT;
10351bb76ff1Sjsg } else {
10365ca02815Sjsg res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
10375ca02815Sjsg if (IS_ERR(res))
1038caf33622Sjsg return PTR_ERR(res);
10395ca02815Sjsg pgmap->range.start = res->start;
10405ca02815Sjsg pgmap->range.end = res->end;
10411bb76ff1Sjsg pgmap->type = MEMORY_DEVICE_PRIVATE;
10421bb76ff1Sjsg }
10431bb76ff1Sjsg
10441bb76ff1Sjsg pgmap->nr_range = 1;
10455ca02815Sjsg pgmap->ops = &svm_migrate_pgmap_ops;
10465ca02815Sjsg pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
10471bb76ff1Sjsg pgmap->flags = 0;
10485ca02815Sjsg /* Device manager releases device-specific resources, memory region and
10495ca02815Sjsg * pgmap when driver disconnects from device.
10505ca02815Sjsg */
10515ca02815Sjsg r = devm_memremap_pages(adev->dev, pgmap);
10525ca02815Sjsg if (IS_ERR(r)) {
10535ca02815Sjsg pr_err("failed to register HMM device memory\n");
10541bb76ff1Sjsg if (pgmap->type == MEMORY_DEVICE_PRIVATE)
1055f005ef32Sjsg devm_release_mem_region(adev->dev, res->start, resource_size(res));
1056caf33622Sjsg /* Disable SVM support capability */
1057caf33622Sjsg pgmap->type = 0;
10585ca02815Sjsg return PTR_ERR(r);
10595ca02815Sjsg }
10605ca02815Sjsg
10615ca02815Sjsg pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
10625ca02815Sjsg SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
10635ca02815Sjsg
10645ca02815Sjsg amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
10655ca02815Sjsg
10665ca02815Sjsg pr_info("HMM registered %ldMB device memory\n", size >> 20);
10675ca02815Sjsg
10685ca02815Sjsg return 0;
10695ca02815Sjsg }
1070