1*9ffa50b3Sandvar /* $NetBSD: subr_physmap.c,v 1.5 2021/09/06 20:55:08 andvar Exp $ */
2d797bd3dSrmind
3beae54ffSmatt /*-
4beae54ffSmatt * Copyright (c) 2013 The NetBSD Foundation, Inc.
5beae54ffSmatt * All rights reserved.
6beae54ffSmatt *
7beae54ffSmatt * This code is derived from software contributed to The NetBSD Foundation
8beae54ffSmatt * by Matt Thomas of 3am Software Foundry.
9beae54ffSmatt *
10beae54ffSmatt * Redistribution and use in source and binary forms, with or without
11beae54ffSmatt * modification, are permitted provided that the following conditions
12beae54ffSmatt * are met:
13beae54ffSmatt * 1. Redistributions of source code must retain the above copyright
14beae54ffSmatt * notice, this list of conditions and the following disclaimer.
15beae54ffSmatt * 2. Redistributions in binary form must reproduce the above copyright
16beae54ffSmatt * notice, this list of conditions and the following disclaimer in the
17beae54ffSmatt * documentation and/or other materials provided with the distribution.
18beae54ffSmatt *
19beae54ffSmatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20beae54ffSmatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21beae54ffSmatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22beae54ffSmatt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23beae54ffSmatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24beae54ffSmatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25beae54ffSmatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26beae54ffSmatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27beae54ffSmatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28beae54ffSmatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29beae54ffSmatt * POSSIBILITY OF SUCH DAMAGE.
30beae54ffSmatt */
31beae54ffSmatt
32beae54ffSmatt #include <sys/cdefs.h>
33*9ffa50b3Sandvar __KERNEL_RCSID(1, "$NetBSD: subr_physmap.c,v 1.5 2021/09/06 20:55:08 andvar Exp $");
34beae54ffSmatt
35beae54ffSmatt #include <sys/param.h>
36beae54ffSmatt #include <sys/physmap.h>
37beae54ffSmatt #include <sys/kmem.h>
38beae54ffSmatt
39add69999Sriastradh #include <uvm/uvm_extern.h>
40add69999Sriastradh #include <uvm/uvm_page.h>
41add69999Sriastradh
42beae54ffSmatt #include <dev/mm.h>
43beae54ffSmatt
44beae54ffSmatt /*
45beae54ffSmatt * This file contain support routines used to create and destroy lists of
46beae54ffSmatt * physical pages from lists of pages or ranges of virtual address. By using
47beae54ffSmatt * these physical maps, the kernel can avoid mapping physical I/O in the
48beae54ffSmatt * kernel's address space in most cases.
49beae54ffSmatt */
50beae54ffSmatt
51beae54ffSmatt typedef struct {
52beae54ffSmatt physmap_t *pc_physmap;
53beae54ffSmatt physmap_segment_t *pc_segs;
54beae54ffSmatt vsize_t pc_offset;
55beae54ffSmatt vsize_t pc_klen;
56beae54ffSmatt vaddr_t pc_kva;
57beae54ffSmatt u_int pc_nsegs;
58beae54ffSmatt vm_prot_t pc_prot;
59beae54ffSmatt bool pc_direct_mapped;
60beae54ffSmatt } physmap_cookie_t;
61beae54ffSmatt
62beae54ffSmatt /*
63beae54ffSmatt * Allocate a physmap structure that requires "maxsegs" segments.
64beae54ffSmatt */
65beae54ffSmatt static physmap_t *
physmap_alloc(size_t maxsegs)66beae54ffSmatt physmap_alloc(size_t maxsegs)
67beae54ffSmatt {
68beae54ffSmatt const size_t mapsize = offsetof(physmap_t, pm_segs[maxsegs]);
69beae54ffSmatt
70beae54ffSmatt KASSERT(maxsegs > 0);
71beae54ffSmatt
72beae54ffSmatt physmap_t * const map = kmem_zalloc(mapsize, KM_SLEEP);
73beae54ffSmatt map->pm_maxsegs = maxsegs;
74beae54ffSmatt
75beae54ffSmatt return map;
76beae54ffSmatt }
77beae54ffSmatt
78beae54ffSmatt static int
physmap_fill(physmap_t * map,pmap_t pmap,vaddr_t va,vsize_t len)79beae54ffSmatt physmap_fill(physmap_t *map, pmap_t pmap, vaddr_t va, vsize_t len)
80beae54ffSmatt {
81beae54ffSmatt size_t nsegs = map->pm_nsegs;
82beae54ffSmatt physmap_segment_t *ps = &map->pm_segs[nsegs];
83beae54ffSmatt vsize_t offset = va - trunc_page(va);
84beae54ffSmatt
85beae54ffSmatt if (nsegs == 0) {
86beae54ffSmatt if (!pmap_extract(pmap, va, &ps->ps_addr)) {
87beae54ffSmatt return EFAULT;
88beae54ffSmatt }
89d797bd3dSrmind ps->ps_len = MIN(len, PAGE_SIZE - offset);
90beae54ffSmatt if (ps->ps_len == len) {
91beae54ffSmatt map->pm_nsegs = 1;
92beae54ffSmatt return 0;
93beae54ffSmatt }
94beae54ffSmatt offset = 0;
95beae54ffSmatt } else {
96beae54ffSmatt /*
97beae54ffSmatt * Backup to the last segment since we have to see if we can
98beae54ffSmatt * merge virtual addresses that are physically contiguous into
99beae54ffSmatt * as few segments as possible.
100beae54ffSmatt */
101beae54ffSmatt ps--;
102beae54ffSmatt nsegs--;
103beae54ffSmatt }
104beae54ffSmatt
105beae54ffSmatt paddr_t lastaddr = ps->ps_addr + ps->ps_len;
106beae54ffSmatt for (;;) {
107beae54ffSmatt paddr_t curaddr;
108beae54ffSmatt if (!pmap_extract(pmap, va, &curaddr)) {
109beae54ffSmatt return EFAULT;
110beae54ffSmatt }
111beae54ffSmatt if (curaddr != lastaddr) {
112beae54ffSmatt ps++;
113beae54ffSmatt nsegs++;
114beae54ffSmatt KASSERT(nsegs < map->pm_maxsegs);
115beae54ffSmatt ps->ps_addr = curaddr;
116beae54ffSmatt lastaddr = curaddr;
117beae54ffSmatt }
118beae54ffSmatt if (offset + len > PAGE_SIZE) {
119beae54ffSmatt ps->ps_len += PAGE_SIZE - offset;
120beae54ffSmatt lastaddr = ps->ps_addr + ps->ps_len;
121beae54ffSmatt len -= PAGE_SIZE - offset;
122beae54ffSmatt lastaddr += PAGE_SIZE - offset;
123beae54ffSmatt offset = 0;
124beae54ffSmatt } else {
125beae54ffSmatt ps->ps_len += len;
126beae54ffSmatt map->pm_nsegs = nsegs + 1;
127beae54ffSmatt return 0;
128beae54ffSmatt }
129beae54ffSmatt }
130beae54ffSmatt }
131beae54ffSmatt
132beae54ffSmatt /*
133beae54ffSmatt * Create a physmap and populate it with the pages that are used to mapped
134beae54ffSmatt * linear range of virtual addresses. It is assumed that uvm_vslock has been
135beae54ffSmatt * called to lock these pages into memory.
136beae54ffSmatt */
137beae54ffSmatt int
physmap_create_linear(physmap_t ** map_p,const struct vmspace * vs,vaddr_t va,vsize_t len)138beae54ffSmatt physmap_create_linear(physmap_t **map_p, const struct vmspace *vs, vaddr_t va,
139beae54ffSmatt vsize_t len)
140beae54ffSmatt {
141beae54ffSmatt const size_t maxsegs = atop(round_page(va + len) - trunc_page(va));
142beae54ffSmatt physmap_t * const map = physmap_alloc(maxsegs);
143beae54ffSmatt int error = physmap_fill(map, vs->vm_map.pmap, va, len);
144beae54ffSmatt if (error) {
145beae54ffSmatt physmap_destroy(map);
146beae54ffSmatt *map_p = NULL;
147beae54ffSmatt return error;
148beae54ffSmatt }
149beae54ffSmatt *map_p = map;
150beae54ffSmatt return 0;
151beae54ffSmatt }
152beae54ffSmatt
153beae54ffSmatt /*
154beae54ffSmatt * Create a physmap and populate it with the pages that are contained in an
155beae54ffSmatt * iovec array. It is assumed that uvm_vslock has been called to lock these
156beae54ffSmatt * pages into memory.
157beae54ffSmatt */
158beae54ffSmatt int
physmap_create_iov(physmap_t ** map_p,const struct vmspace * vs,struct iovec * iov,size_t iovlen)159beae54ffSmatt physmap_create_iov(physmap_t **map_p, const struct vmspace *vs,
160beae54ffSmatt struct iovec *iov, size_t iovlen)
161beae54ffSmatt {
162beae54ffSmatt size_t maxsegs = 0;
163beae54ffSmatt for (size_t i = 0; i < iovlen; i++) {
164beae54ffSmatt const vaddr_t start = (vaddr_t) iov[i].iov_base;
165beae54ffSmatt const vaddr_t end = start + iov[i].iov_len;
166beae54ffSmatt maxsegs += atop(round_page(end) - trunc_page(start));
167beae54ffSmatt }
168beae54ffSmatt physmap_t * const map = physmap_alloc(maxsegs);
169beae54ffSmatt
170beae54ffSmatt for (size_t i = 0; i < iovlen; i++) {
171beae54ffSmatt int error = physmap_fill(map, vs->vm_map.pmap,
172beae54ffSmatt (vaddr_t) iov[i].iov_base, iov[i].iov_len);
173beae54ffSmatt if (error) {
174beae54ffSmatt physmap_destroy(map);
175beae54ffSmatt *map_p = NULL;
176beae54ffSmatt return error;
177beae54ffSmatt }
178beae54ffSmatt }
179beae54ffSmatt *map_p = map;
180beae54ffSmatt return 0;
181beae54ffSmatt }
182beae54ffSmatt
183beae54ffSmatt /*
184beae54ffSmatt * This uses a list of vm_page structure to create a physmap.
185beae54ffSmatt */
186beae54ffSmatt physmap_t *
physmap_create_pagelist(struct vm_page ** pgs,size_t npgs)187beae54ffSmatt physmap_create_pagelist(struct vm_page **pgs, size_t npgs)
188beae54ffSmatt {
189beae54ffSmatt physmap_t * const map = physmap_alloc(npgs);
190beae54ffSmatt
191beae54ffSmatt physmap_segment_t *ps = map->pm_segs;
192beae54ffSmatt
193beae54ffSmatt /*
194beae54ffSmatt * Initialize the first segment.
195beae54ffSmatt */
196beae54ffSmatt paddr_t lastaddr = VM_PAGE_TO_PHYS(pgs[0]);
197beae54ffSmatt ps->ps_addr = lastaddr;
198beae54ffSmatt ps->ps_len = PAGE_SIZE;
199beae54ffSmatt
200beae54ffSmatt for (pgs++; npgs-- > 1; pgs++) {
201beae54ffSmatt /*
202beae54ffSmatt * lastaddr needs to be increased by a page.
203beae54ffSmatt */
204beae54ffSmatt lastaddr += PAGE_SIZE;
205beae54ffSmatt paddr_t curaddr = VM_PAGE_TO_PHYS(*pgs);
206beae54ffSmatt if (curaddr != lastaddr) {
207beae54ffSmatt /*
208beae54ffSmatt * If the addresses are not the same, we need to use
209*9ffa50b3Sandvar * a new segment. Set its address and update lastaddr.
210beae54ffSmatt */
211beae54ffSmatt ps++;
212beae54ffSmatt ps->ps_addr = curaddr;
213beae54ffSmatt lastaddr = curaddr;
214beae54ffSmatt }
215beae54ffSmatt /*
216beae54ffSmatt * Increase this segment's length by a page
217beae54ffSmatt */
218beae54ffSmatt ps->ps_len += PAGE_SIZE;
219beae54ffSmatt }
220beae54ffSmatt
221beae54ffSmatt map->pm_nsegs = ps + 1 - map->pm_segs;
222beae54ffSmatt return map;
223beae54ffSmatt }
224beae54ffSmatt
225beae54ffSmatt void
physmap_destroy(physmap_t * map)226beae54ffSmatt physmap_destroy(physmap_t *map)
227beae54ffSmatt {
228beae54ffSmatt const size_t mapsize = offsetof(physmap_t, pm_segs[map->pm_maxsegs]);
229beae54ffSmatt
230beae54ffSmatt kmem_free(map, mapsize);
231beae54ffSmatt }
232beae54ffSmatt
233beae54ffSmatt void *
physmap_map_init(physmap_t * map,size_t offset,vm_prot_t prot)234beae54ffSmatt physmap_map_init(physmap_t *map, size_t offset, vm_prot_t prot)
235beae54ffSmatt {
236beae54ffSmatt physmap_cookie_t * const pc = kmem_zalloc(sizeof(*pc), KM_SLEEP);
237beae54ffSmatt
238beae54ffSmatt KASSERT(prot == VM_PROT_READ || prot == (VM_PROT_READ|VM_PROT_WRITE));
239beae54ffSmatt
240beae54ffSmatt pc->pc_physmap = map;
241beae54ffSmatt pc->pc_segs = map->pm_segs;
242beae54ffSmatt pc->pc_nsegs = map->pm_nsegs;
243beae54ffSmatt pc->pc_prot = prot;
244beae54ffSmatt pc->pc_klen = 0;
245beae54ffSmatt pc->pc_kva = 0;
246beae54ffSmatt pc->pc_direct_mapped = false;
247beae54ffSmatt
248beae54ffSmatt /*
249beae54ffSmatt * Skip to the first segment we are interested in.
250beae54ffSmatt */
251beae54ffSmatt while (offset >= pc->pc_segs->ps_len) {
252beae54ffSmatt offset -= pc->pc_segs->ps_len;
253beae54ffSmatt pc->pc_segs++;
254beae54ffSmatt pc->pc_nsegs--;
255beae54ffSmatt }
256beae54ffSmatt
257beae54ffSmatt pc->pc_offset = offset;
258beae54ffSmatt
259beae54ffSmatt return pc;
260beae54ffSmatt }
261beae54ffSmatt
262beae54ffSmatt size_t
physmap_map(void * cookie,vaddr_t * kvap)263beae54ffSmatt physmap_map(void *cookie, vaddr_t *kvap)
264beae54ffSmatt {
265beae54ffSmatt physmap_cookie_t * const pc = cookie;
266beae54ffSmatt
267beae54ffSmatt /*
268beae54ffSmatt * If there is currently a non-direct mapped KVA region allocated,
269beae54ffSmatt * free it now.
270beae54ffSmatt */
271beae54ffSmatt if (pc->pc_kva != 0 && !pc->pc_direct_mapped) {
272beae54ffSmatt pmap_kremove(pc->pc_kva, pc->pc_klen);
273d797bd3dSrmind pmap_update(pmap_kernel());
274beae54ffSmatt uvm_km_free(kernel_map, pc->pc_kva, pc->pc_klen,
275beae54ffSmatt UVM_KMF_VAONLY);
276beae54ffSmatt }
277beae54ffSmatt
278beae54ffSmatt /*
279beae54ffSmatt * If there are no more segments to process, return 0 indicating
280beae54ffSmatt * we are done.
281beae54ffSmatt */
282beae54ffSmatt if (pc->pc_nsegs == 0) {
283beae54ffSmatt return 0;
284beae54ffSmatt }
285beae54ffSmatt
286beae54ffSmatt /*
287beae54ffSmatt * Get starting physical address of this segment and its length.
288beae54ffSmatt */
289beae54ffSmatt paddr_t pa = pc->pc_segs->ps_addr + pc->pc_offset;
290beae54ffSmatt const size_t koff = pa & PAGE_MASK;
291beae54ffSmatt const size_t len = pc->pc_segs->ps_len - pc->pc_offset;
292beae54ffSmatt
293beae54ffSmatt /*
294beae54ffSmatt * Now that we have the starting offset in the page, reset to the
295beae54ffSmatt * beginning of the page.
296beae54ffSmatt */
297beae54ffSmatt pa = trunc_page(pa);
298beae54ffSmatt
299beae54ffSmatt /*
300beae54ffSmatt * We are now done with this segment; advance to the next one.
301beae54ffSmatt */
302beae54ffSmatt pc->pc_segs++;
303beae54ffSmatt pc->pc_nsegs--;
304beae54ffSmatt pc->pc_offset = 0;
305beae54ffSmatt
306beae54ffSmatt /*
307beae54ffSmatt * Find out how many pages we are mapping.
308beae54ffSmatt */
309beae54ffSmatt pc->pc_klen = round_page(len);
310beae54ffSmatt #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
311beae54ffSmatt /*
312beae54ffSmatt * Always try to direct map it since that's nearly zero cost.
313beae54ffSmatt */
314beae54ffSmatt pc->pc_direct_mapped = mm_md_direct_mapped_phys(pa, &pc->pc_kva);
315beae54ffSmatt #endif
316beae54ffSmatt if (!pc->pc_direct_mapped) {
317beae54ffSmatt /*
318beae54ffSmatt * If we can't direct map it, we have to allocate some KVA
319beae54ffSmatt * so we map it via the kernel_map.
320beae54ffSmatt */
321beae54ffSmatt pc->pc_kva = uvm_km_alloc(kernel_map, pc->pc_klen,
322354b08d2Sad atop(pa) & uvmexp.colormask,
323beae54ffSmatt UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
324beae54ffSmatt KASSERT(pc->pc_kva != 0);
325beae54ffSmatt
326beae54ffSmatt /*
327beae54ffSmatt * Setup mappings for this segment.
328beae54ffSmatt */
329beae54ffSmatt for (size_t poff = 0; poff < pc->pc_klen; poff += PAGE_SIZE) {
330beae54ffSmatt pmap_kenter_pa(pc->pc_kva + poff, pa + poff,
331beae54ffSmatt pc->pc_prot, 0);
332beae54ffSmatt }
333beae54ffSmatt /*
334beae54ffSmatt * Make them real.
335beae54ffSmatt */
336beae54ffSmatt pmap_update(pmap_kernel());
337beae54ffSmatt }
338beae54ffSmatt /*
339beae54ffSmatt * Return the starting KVA (including offset into the page) and
340beae54ffSmatt * the length of this segment.
341beae54ffSmatt */
342beae54ffSmatt *kvap = pc->pc_kva + koff;
343beae54ffSmatt return len;
344beae54ffSmatt }
345beae54ffSmatt
346beae54ffSmatt void
physmap_map_fini(void * cookie)347beae54ffSmatt physmap_map_fini(void *cookie)
348beae54ffSmatt {
349beae54ffSmatt physmap_cookie_t * const pc = cookie;
350beae54ffSmatt
351beae54ffSmatt /*
352beae54ffSmatt * If there is currently a non-direct mapped KVA region allocated,
353beae54ffSmatt * free it now.
354beae54ffSmatt */
355beae54ffSmatt if (pc->pc_kva != 0 && !pc->pc_direct_mapped) {
356beae54ffSmatt pmap_kremove(pc->pc_kva, pc->pc_klen);
357d797bd3dSrmind pmap_update(pmap_kernel());
358beae54ffSmatt uvm_km_free(kernel_map, pc->pc_kva, pc->pc_klen,
359beae54ffSmatt UVM_KMF_VAONLY);
360beae54ffSmatt }
361beae54ffSmatt
362beae54ffSmatt /*
363beae54ffSmatt * Free the cookie.
364beae54ffSmatt */
365beae54ffSmatt kmem_free(pc, sizeof(*pc));
366beae54ffSmatt }
367beae54ffSmatt
368beae54ffSmatt /*
369beae54ffSmatt * genio needs to zero pages past the EOF or without backing storage (think
370beae54ffSmatt * sparse files). But since we are using physmaps, there is no kva to use with
371beae54ffSmatt * memset so we need a helper to obtain a kva and memset the desired memory.
372beae54ffSmatt */
373beae54ffSmatt void
physmap_zero(physmap_t * map,size_t offset,size_t len)374beae54ffSmatt physmap_zero(physmap_t *map, size_t offset, size_t len)
375beae54ffSmatt {
376beae54ffSmatt void * const cookie = physmap_map_init(map, offset,
377beae54ffSmatt VM_PROT_READ|VM_PROT_WRITE);
378beae54ffSmatt
379beae54ffSmatt for (;;) {
380beae54ffSmatt vaddr_t kva;
381beae54ffSmatt size_t seglen = physmap_map(cookie, &kva);
382beae54ffSmatt KASSERT(seglen != 0);
383beae54ffSmatt if (seglen > len)
384beae54ffSmatt seglen = len;
385beae54ffSmatt memset((void *)kva, 0, seglen);
386beae54ffSmatt if (seglen == len)
387beae54ffSmatt break;
388beae54ffSmatt }
389beae54ffSmatt
390beae54ffSmatt physmap_map_fini(cookie);
391beae54ffSmatt }
392