xref: /netbsd-src/sys/kern/subr_physmap.c (revision 9ffa50b380df94a158696fe407bc5fecdd40119b)
1*9ffa50b3Sandvar /*	$NetBSD: subr_physmap.c,v 1.5 2021/09/06 20:55:08 andvar Exp $	*/
2d797bd3dSrmind 
3beae54ffSmatt /*-
4beae54ffSmatt  * Copyright (c) 2013 The NetBSD Foundation, Inc.
5beae54ffSmatt  * All rights reserved.
6beae54ffSmatt  *
7beae54ffSmatt  * This code is derived from software contributed to The NetBSD Foundation
8beae54ffSmatt  * by Matt Thomas of 3am Software Foundry.
9beae54ffSmatt  *
10beae54ffSmatt  * Redistribution and use in source and binary forms, with or without
11beae54ffSmatt  * modification, are permitted provided that the following conditions
12beae54ffSmatt  * are met:
13beae54ffSmatt  * 1. Redistributions of source code must retain the above copyright
14beae54ffSmatt  *    notice, this list of conditions and the following disclaimer.
15beae54ffSmatt  * 2. Redistributions in binary form must reproduce the above copyright
16beae54ffSmatt  *    notice, this list of conditions and the following disclaimer in the
17beae54ffSmatt  *    documentation and/or other materials provided with the distribution.
18beae54ffSmatt  *
19beae54ffSmatt  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20beae54ffSmatt  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21beae54ffSmatt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22beae54ffSmatt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23beae54ffSmatt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24beae54ffSmatt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25beae54ffSmatt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26beae54ffSmatt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27beae54ffSmatt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28beae54ffSmatt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29beae54ffSmatt  * POSSIBILITY OF SUCH DAMAGE.
30beae54ffSmatt  */
31beae54ffSmatt 
32beae54ffSmatt #include <sys/cdefs.h>
33*9ffa50b3Sandvar __KERNEL_RCSID(1, "$NetBSD: subr_physmap.c,v 1.5 2021/09/06 20:55:08 andvar Exp $");
34beae54ffSmatt 
35beae54ffSmatt #include <sys/param.h>
36beae54ffSmatt #include <sys/physmap.h>
37beae54ffSmatt #include <sys/kmem.h>
38beae54ffSmatt 
39add69999Sriastradh #include <uvm/uvm_extern.h>
40add69999Sriastradh #include <uvm/uvm_page.h>
41add69999Sriastradh 
42beae54ffSmatt #include <dev/mm.h>
43beae54ffSmatt 
44beae54ffSmatt /*
45beae54ffSmatt  * This file contain support routines used to create and destroy lists of
46beae54ffSmatt  * physical pages from lists of pages or ranges of virtual address.  By using
47beae54ffSmatt  * these physical maps, the kernel can avoid mapping physical I/O in the
48beae54ffSmatt  * kernel's address space in most cases.
49beae54ffSmatt  */
50beae54ffSmatt 
51beae54ffSmatt typedef struct {
52beae54ffSmatt 	physmap_t *pc_physmap;
53beae54ffSmatt 	physmap_segment_t *pc_segs;
54beae54ffSmatt 	vsize_t pc_offset;
55beae54ffSmatt 	vsize_t pc_klen;
56beae54ffSmatt 	vaddr_t pc_kva;
57beae54ffSmatt 	u_int pc_nsegs;
58beae54ffSmatt 	vm_prot_t pc_prot;
59beae54ffSmatt 	bool pc_direct_mapped;
60beae54ffSmatt } physmap_cookie_t;
61beae54ffSmatt 
62beae54ffSmatt /*
63beae54ffSmatt  * Allocate a physmap structure that requires "maxsegs" segments.
64beae54ffSmatt  */
65beae54ffSmatt static physmap_t *
physmap_alloc(size_t maxsegs)66beae54ffSmatt physmap_alloc(size_t maxsegs)
67beae54ffSmatt {
68beae54ffSmatt 	const size_t mapsize = offsetof(physmap_t, pm_segs[maxsegs]);
69beae54ffSmatt 
70beae54ffSmatt 	KASSERT(maxsegs > 0);
71beae54ffSmatt 
72beae54ffSmatt 	physmap_t * const map = kmem_zalloc(mapsize, KM_SLEEP);
73beae54ffSmatt 	map->pm_maxsegs = maxsegs;
74beae54ffSmatt 
75beae54ffSmatt 	return map;
76beae54ffSmatt }
77beae54ffSmatt 
78beae54ffSmatt static int
physmap_fill(physmap_t * map,pmap_t pmap,vaddr_t va,vsize_t len)79beae54ffSmatt physmap_fill(physmap_t *map, pmap_t pmap, vaddr_t va, vsize_t len)
80beae54ffSmatt {
81beae54ffSmatt 	size_t nsegs = map->pm_nsegs;
82beae54ffSmatt 	physmap_segment_t *ps = &map->pm_segs[nsegs];
83beae54ffSmatt 	vsize_t offset = va - trunc_page(va);
84beae54ffSmatt 
85beae54ffSmatt 	if (nsegs == 0) {
86beae54ffSmatt 		if (!pmap_extract(pmap, va, &ps->ps_addr)) {
87beae54ffSmatt 			return EFAULT;
88beae54ffSmatt 		}
89d797bd3dSrmind 		ps->ps_len = MIN(len, PAGE_SIZE - offset);
90beae54ffSmatt 		if (ps->ps_len == len) {
91beae54ffSmatt 			map->pm_nsegs = 1;
92beae54ffSmatt 			return 0;
93beae54ffSmatt 		}
94beae54ffSmatt 		offset = 0;
95beae54ffSmatt 	} else {
96beae54ffSmatt 		/*
97beae54ffSmatt 		 * Backup to the last segment since we have to see if we can
98beae54ffSmatt 		 * merge virtual addresses that are physically contiguous into
99beae54ffSmatt 		 * as few segments as possible.
100beae54ffSmatt 		 */
101beae54ffSmatt 		ps--;
102beae54ffSmatt 		nsegs--;
103beae54ffSmatt 	}
104beae54ffSmatt 
105beae54ffSmatt 	paddr_t lastaddr = ps->ps_addr + ps->ps_len;
106beae54ffSmatt 	for (;;) {
107beae54ffSmatt 		paddr_t curaddr;
108beae54ffSmatt 		if (!pmap_extract(pmap, va, &curaddr)) {
109beae54ffSmatt 			return EFAULT;
110beae54ffSmatt 		}
111beae54ffSmatt 		if (curaddr != lastaddr) {
112beae54ffSmatt 			ps++;
113beae54ffSmatt 			nsegs++;
114beae54ffSmatt 			KASSERT(nsegs < map->pm_maxsegs);
115beae54ffSmatt 			ps->ps_addr = curaddr;
116beae54ffSmatt 			lastaddr = curaddr;
117beae54ffSmatt 		}
118beae54ffSmatt 		if (offset + len > PAGE_SIZE) {
119beae54ffSmatt 			ps->ps_len += PAGE_SIZE - offset;
120beae54ffSmatt 			lastaddr = ps->ps_addr + ps->ps_len;
121beae54ffSmatt 			len -= PAGE_SIZE - offset;
122beae54ffSmatt 			lastaddr += PAGE_SIZE - offset;
123beae54ffSmatt 			offset = 0;
124beae54ffSmatt 		} else {
125beae54ffSmatt 			ps->ps_len += len;
126beae54ffSmatt 			map->pm_nsegs = nsegs + 1;
127beae54ffSmatt 			return 0;
128beae54ffSmatt 		}
129beae54ffSmatt 	}
130beae54ffSmatt }
131beae54ffSmatt 
132beae54ffSmatt /*
133beae54ffSmatt  * Create a physmap and populate it with the pages that are used to mapped
134beae54ffSmatt  * linear range of virtual addresses.  It is assumed that uvm_vslock has been
135beae54ffSmatt  * called to lock these pages into memory.
136beae54ffSmatt  */
137beae54ffSmatt int
physmap_create_linear(physmap_t ** map_p,const struct vmspace * vs,vaddr_t va,vsize_t len)138beae54ffSmatt physmap_create_linear(physmap_t **map_p, const struct vmspace *vs, vaddr_t va,
139beae54ffSmatt 	vsize_t len)
140beae54ffSmatt {
141beae54ffSmatt 	const size_t maxsegs = atop(round_page(va + len) - trunc_page(va));
142beae54ffSmatt 	physmap_t * const map = physmap_alloc(maxsegs);
143beae54ffSmatt 	int error = physmap_fill(map, vs->vm_map.pmap, va, len);
144beae54ffSmatt 	if (error) {
145beae54ffSmatt 		physmap_destroy(map);
146beae54ffSmatt 		*map_p = NULL;
147beae54ffSmatt 		return error;
148beae54ffSmatt 	}
149beae54ffSmatt 	*map_p = map;
150beae54ffSmatt 	return 0;
151beae54ffSmatt }
152beae54ffSmatt 
153beae54ffSmatt /*
154beae54ffSmatt  * Create a physmap and populate it with the pages that are contained in an
155beae54ffSmatt  * iovec array.  It is assumed that uvm_vslock has been called to lock these
156beae54ffSmatt  * pages into memory.
157beae54ffSmatt  */
158beae54ffSmatt int
physmap_create_iov(physmap_t ** map_p,const struct vmspace * vs,struct iovec * iov,size_t iovlen)159beae54ffSmatt physmap_create_iov(physmap_t **map_p, const struct vmspace *vs,
160beae54ffSmatt 	struct iovec *iov, size_t iovlen)
161beae54ffSmatt {
162beae54ffSmatt 	size_t maxsegs = 0;
163beae54ffSmatt 	for (size_t i = 0; i < iovlen; i++) {
164beae54ffSmatt 		const vaddr_t start = (vaddr_t) iov[i].iov_base;
165beae54ffSmatt 		const vaddr_t end = start + iov[i].iov_len;
166beae54ffSmatt 		maxsegs += atop(round_page(end) - trunc_page(start));
167beae54ffSmatt 	}
168beae54ffSmatt 	physmap_t * const map = physmap_alloc(maxsegs);
169beae54ffSmatt 
170beae54ffSmatt 	for (size_t i = 0; i < iovlen; i++) {
171beae54ffSmatt 		int error = physmap_fill(map, vs->vm_map.pmap,
172beae54ffSmatt 		    (vaddr_t) iov[i].iov_base, iov[i].iov_len);
173beae54ffSmatt 		if (error) {
174beae54ffSmatt 			physmap_destroy(map);
175beae54ffSmatt 			*map_p = NULL;
176beae54ffSmatt 			return error;
177beae54ffSmatt 		}
178beae54ffSmatt 	}
179beae54ffSmatt 	*map_p = map;
180beae54ffSmatt 	return 0;
181beae54ffSmatt }
182beae54ffSmatt 
183beae54ffSmatt /*
184beae54ffSmatt  * This uses a list of vm_page structure to create a physmap.
185beae54ffSmatt  */
186beae54ffSmatt physmap_t *
physmap_create_pagelist(struct vm_page ** pgs,size_t npgs)187beae54ffSmatt physmap_create_pagelist(struct vm_page **pgs, size_t npgs)
188beae54ffSmatt {
189beae54ffSmatt 	physmap_t * const map = physmap_alloc(npgs);
190beae54ffSmatt 
191beae54ffSmatt 	physmap_segment_t *ps = map->pm_segs;
192beae54ffSmatt 
193beae54ffSmatt 	/*
194beae54ffSmatt 	 * Initialize the first segment.
195beae54ffSmatt 	 */
196beae54ffSmatt 	paddr_t lastaddr = VM_PAGE_TO_PHYS(pgs[0]);
197beae54ffSmatt 	ps->ps_addr = lastaddr;
198beae54ffSmatt 	ps->ps_len = PAGE_SIZE;
199beae54ffSmatt 
200beae54ffSmatt 	for (pgs++; npgs-- > 1; pgs++) {
201beae54ffSmatt 		/*
202beae54ffSmatt 		 * lastaddr needs to be increased by a page.
203beae54ffSmatt 		 */
204beae54ffSmatt 		lastaddr += PAGE_SIZE;
205beae54ffSmatt 		paddr_t curaddr = VM_PAGE_TO_PHYS(*pgs);
206beae54ffSmatt 		if (curaddr != lastaddr) {
207beae54ffSmatt 			/*
208beae54ffSmatt 			 * If the addresses are not the same, we need to use
209*9ffa50b3Sandvar 			 * a new segment.  Set its address and update lastaddr.
210beae54ffSmatt 			 */
211beae54ffSmatt 			ps++;
212beae54ffSmatt 			ps->ps_addr = curaddr;
213beae54ffSmatt 			lastaddr = curaddr;
214beae54ffSmatt 		}
215beae54ffSmatt 		/*
216beae54ffSmatt 		 * Increase this segment's length by a page
217beae54ffSmatt 		 */
218beae54ffSmatt 		ps->ps_len += PAGE_SIZE;
219beae54ffSmatt 	}
220beae54ffSmatt 
221beae54ffSmatt 	map->pm_nsegs = ps + 1 - map->pm_segs;
222beae54ffSmatt 	return map;
223beae54ffSmatt }
224beae54ffSmatt 
225beae54ffSmatt void
physmap_destroy(physmap_t * map)226beae54ffSmatt physmap_destroy(physmap_t *map)
227beae54ffSmatt {
228beae54ffSmatt 	const size_t mapsize = offsetof(physmap_t, pm_segs[map->pm_maxsegs]);
229beae54ffSmatt 
230beae54ffSmatt 	kmem_free(map, mapsize);
231beae54ffSmatt }
232beae54ffSmatt 
233beae54ffSmatt void *
physmap_map_init(physmap_t * map,size_t offset,vm_prot_t prot)234beae54ffSmatt physmap_map_init(physmap_t *map, size_t offset, vm_prot_t prot)
235beae54ffSmatt {
236beae54ffSmatt 	physmap_cookie_t * const pc = kmem_zalloc(sizeof(*pc), KM_SLEEP);
237beae54ffSmatt 
238beae54ffSmatt 	KASSERT(prot == VM_PROT_READ || prot == (VM_PROT_READ|VM_PROT_WRITE));
239beae54ffSmatt 
240beae54ffSmatt 	pc->pc_physmap = map;
241beae54ffSmatt 	pc->pc_segs = map->pm_segs;
242beae54ffSmatt 	pc->pc_nsegs = map->pm_nsegs;
243beae54ffSmatt 	pc->pc_prot = prot;
244beae54ffSmatt 	pc->pc_klen = 0;
245beae54ffSmatt 	pc->pc_kva = 0;
246beae54ffSmatt 	pc->pc_direct_mapped = false;
247beae54ffSmatt 
248beae54ffSmatt 	/*
249beae54ffSmatt 	 * Skip to the first segment we are interested in.
250beae54ffSmatt 	 */
251beae54ffSmatt 	while (offset >= pc->pc_segs->ps_len) {
252beae54ffSmatt 		offset -= pc->pc_segs->ps_len;
253beae54ffSmatt 		pc->pc_segs++;
254beae54ffSmatt 		pc->pc_nsegs--;
255beae54ffSmatt 	}
256beae54ffSmatt 
257beae54ffSmatt 	pc->pc_offset = offset;
258beae54ffSmatt 
259beae54ffSmatt 	return pc;
260beae54ffSmatt }
261beae54ffSmatt 
262beae54ffSmatt size_t
physmap_map(void * cookie,vaddr_t * kvap)263beae54ffSmatt physmap_map(void *cookie, vaddr_t *kvap)
264beae54ffSmatt {
265beae54ffSmatt 	physmap_cookie_t * const pc = cookie;
266beae54ffSmatt 
267beae54ffSmatt 	/*
268beae54ffSmatt 	 * If there is currently a non-direct mapped KVA region allocated,
269beae54ffSmatt 	 * free it now.
270beae54ffSmatt 	 */
271beae54ffSmatt 	if (pc->pc_kva != 0 && !pc->pc_direct_mapped) {
272beae54ffSmatt 		pmap_kremove(pc->pc_kva, pc->pc_klen);
273d797bd3dSrmind 		pmap_update(pmap_kernel());
274beae54ffSmatt 		uvm_km_free(kernel_map, pc->pc_kva, pc->pc_klen,
275beae54ffSmatt 		    UVM_KMF_VAONLY);
276beae54ffSmatt 	}
277beae54ffSmatt 
278beae54ffSmatt 	/*
279beae54ffSmatt 	 * If there are no more segments to process, return 0 indicating
280beae54ffSmatt 	 * we are done.
281beae54ffSmatt 	 */
282beae54ffSmatt 	if (pc->pc_nsegs == 0) {
283beae54ffSmatt 		return 0;
284beae54ffSmatt 	}
285beae54ffSmatt 
286beae54ffSmatt 	/*
287beae54ffSmatt 	 * Get starting physical address of this segment and its length.
288beae54ffSmatt 	 */
289beae54ffSmatt 	paddr_t pa = pc->pc_segs->ps_addr + pc->pc_offset;
290beae54ffSmatt 	const size_t koff = pa & PAGE_MASK;
291beae54ffSmatt 	const size_t len = pc->pc_segs->ps_len - pc->pc_offset;
292beae54ffSmatt 
293beae54ffSmatt 	/*
294beae54ffSmatt 	 * Now that we have the starting offset in the page, reset to the
295beae54ffSmatt 	 * beginning of the page.
296beae54ffSmatt 	 */
297beae54ffSmatt 	pa = trunc_page(pa);
298beae54ffSmatt 
299beae54ffSmatt 	/*
300beae54ffSmatt 	 * We are now done with this segment; advance to the next one.
301beae54ffSmatt 	 */
302beae54ffSmatt 	pc->pc_segs++;
303beae54ffSmatt 	pc->pc_nsegs--;
304beae54ffSmatt 	pc->pc_offset = 0;
305beae54ffSmatt 
306beae54ffSmatt 	/*
307beae54ffSmatt 	 * Find out how many pages we are mapping.
308beae54ffSmatt 	 */
309beae54ffSmatt 	pc->pc_klen = round_page(len);
310beae54ffSmatt #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
311beae54ffSmatt 	/*
312beae54ffSmatt 	 * Always try to direct map it since that's nearly zero cost.
313beae54ffSmatt 	 */
314beae54ffSmatt 	pc->pc_direct_mapped = mm_md_direct_mapped_phys(pa, &pc->pc_kva);
315beae54ffSmatt #endif
316beae54ffSmatt 	if (!pc->pc_direct_mapped) {
317beae54ffSmatt 		/*
318beae54ffSmatt 		 * If we can't direct map it, we have to allocate some KVA
319beae54ffSmatt 		 * so we map it via the kernel_map.
320beae54ffSmatt 		 */
321beae54ffSmatt 		pc->pc_kva = uvm_km_alloc(kernel_map, pc->pc_klen,
322354b08d2Sad 		    atop(pa) & uvmexp.colormask,
323beae54ffSmatt 		    UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
324beae54ffSmatt 		KASSERT(pc->pc_kva != 0);
325beae54ffSmatt 
326beae54ffSmatt 		/*
327beae54ffSmatt 		 * Setup mappings for this segment.
328beae54ffSmatt 		 */
329beae54ffSmatt 		for (size_t poff = 0; poff < pc->pc_klen; poff += PAGE_SIZE) {
330beae54ffSmatt 			pmap_kenter_pa(pc->pc_kva + poff, pa + poff,
331beae54ffSmatt 			    pc->pc_prot, 0);
332beae54ffSmatt 		}
333beae54ffSmatt 		/*
334beae54ffSmatt 		 * Make them real.
335beae54ffSmatt 		 */
336beae54ffSmatt 		pmap_update(pmap_kernel());
337beae54ffSmatt 	}
338beae54ffSmatt 	/*
339beae54ffSmatt 	 * Return the starting KVA (including offset into the page) and
340beae54ffSmatt 	 * the length of this segment.
341beae54ffSmatt 	 */
342beae54ffSmatt 	*kvap = pc->pc_kva + koff;
343beae54ffSmatt 	return len;
344beae54ffSmatt }
345beae54ffSmatt 
346beae54ffSmatt void
physmap_map_fini(void * cookie)347beae54ffSmatt physmap_map_fini(void *cookie)
348beae54ffSmatt {
349beae54ffSmatt 	physmap_cookie_t * const pc = cookie;
350beae54ffSmatt 
351beae54ffSmatt 	/*
352beae54ffSmatt 	 * If there is currently a non-direct mapped KVA region allocated,
353beae54ffSmatt 	 * free it now.
354beae54ffSmatt 	 */
355beae54ffSmatt 	if (pc->pc_kva != 0 && !pc->pc_direct_mapped) {
356beae54ffSmatt 		pmap_kremove(pc->pc_kva, pc->pc_klen);
357d797bd3dSrmind 		pmap_update(pmap_kernel());
358beae54ffSmatt 		uvm_km_free(kernel_map, pc->pc_kva, pc->pc_klen,
359beae54ffSmatt 		    UVM_KMF_VAONLY);
360beae54ffSmatt 	}
361beae54ffSmatt 
362beae54ffSmatt 	/*
363beae54ffSmatt 	 * Free the cookie.
364beae54ffSmatt 	 */
365beae54ffSmatt 	kmem_free(pc, sizeof(*pc));
366beae54ffSmatt }
367beae54ffSmatt 
368beae54ffSmatt /*
369beae54ffSmatt  * genio needs to zero pages past the EOF or without backing storage (think
370beae54ffSmatt  * sparse files).  But since we are using physmaps, there is no kva to use with
371beae54ffSmatt  * memset so we need a helper to obtain a kva and memset the desired memory.
372beae54ffSmatt  */
373beae54ffSmatt void
physmap_zero(physmap_t * map,size_t offset,size_t len)374beae54ffSmatt physmap_zero(physmap_t *map, size_t offset, size_t len)
375beae54ffSmatt {
376beae54ffSmatt 	void * const cookie = physmap_map_init(map, offset,
377beae54ffSmatt 	    VM_PROT_READ|VM_PROT_WRITE);
378beae54ffSmatt 
379beae54ffSmatt 	for (;;) {
380beae54ffSmatt 		vaddr_t kva;
381beae54ffSmatt 		size_t seglen = physmap_map(cookie, &kva);
382beae54ffSmatt 		KASSERT(seglen != 0);
383beae54ffSmatt 		if (seglen > len)
384beae54ffSmatt 			seglen = len;
385beae54ffSmatt 		memset((void *)kva, 0, seglen);
386beae54ffSmatt 		if (seglen == len)
387beae54ffSmatt 			break;
388beae54ffSmatt 	}
389beae54ffSmatt 
390beae54ffSmatt 	physmap_map_fini(cookie);
391beae54ffSmatt }
392