xref: /minix3/minix/kernel/arch/i386/memory.c (revision 7c48de6cc4c6d56f2277d378dba01dbac8a8c3b9)
1 
2 #include "kernel/kernel.h"
3 #include "kernel/vm.h"
4 
5 #include <machine/vm.h>
6 
7 #include <minix/type.h>
8 #include <minix/syslib.h>
9 #include <minix/cpufeature.h>
10 #include <string.h>
11 #include <assert.h>
12 #include <signal.h>
13 #include <stdlib.h>
14 
15 #include <machine/vm.h>
16 
17 #include "oxpcie.h"
18 #include "arch_proto.h"
19 
20 #ifdef USE_APIC
21 #include "apic.h"
22 #ifdef USE_WATCHDOG
23 #include "kernel/watchdog.h"
24 #endif
25 #endif
26 
27 phys_bytes video_mem_vaddr = 0;
28 
29 #define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0)
30 static int nfreepdes = 0;
31 #define MAXFREEPDES	2
32 static int freepdes[MAXFREEPDES];
33 
34 static u32_t phys_get32(phys_bytes v);
35 
36 void mem_clear_mapcache(void)
37 {
38 	int i;
39 	for(i = 0; i < nfreepdes; i++) {
40 		struct proc *ptproc = get_cpulocal_var(ptproc);
41 		int pde = freepdes[i];
42 		u32_t *ptv;
43 		assert(ptproc);
44 		ptv = ptproc->p_seg.p_cr3_v;
45 		assert(ptv);
46 		ptv[pde] = 0;
47 	}
48 }
49 
50 /* This function sets up a mapping from within the kernel's address
51  * space to any other area of memory, either straight physical
52  * memory (pr == NULL) or a process view of memory, in 4MB windows.
53  * I.e., it maps in 4MB chunks of virtual (or physical) address space
54  * to 4MB chunks of kernel virtual address space.
55  *
56  * It recognizes pr already being in memory as a special case (no
57  * mapping required).
58  *
59  * The target (i.e. in-kernel) mapping area is one of the freepdes[]
60  * VM has earlier already told the kernel about that is available. It is
61  * identified as the 'pde' parameter. This value can be chosen freely
62  * by the caller, as long as it is in range (i.e. 0 or higher and corresponds
63  * to a known freepde slot). It is up to the caller to keep track of which
64  * freepde's are in use, and to determine which ones are free to use.
65  *
66  * The logical number supplied by the caller is translated into an actual
67  * pde number to be used, and a pointer to it (linear address) is returned
68  * for actual use by phys_copy or memset.
69  */
70 static phys_bytes createpde(
71 	const struct proc *pr,	/* Requested process, NULL for physical. */
72 	const phys_bytes linaddr,/* Address after segment translation. */
73 	phys_bytes *bytes,	/* Size of chunk, function may truncate it. */
74 	int free_pde_idx,	/* index of the free slot to use */
75 	int *changed		/* If mapping is made, this is set to 1. */
76 	)
77 {
78 	u32_t pdeval;
79 	phys_bytes offset;
80 	int pde;
81 
82 	assert(free_pde_idx >= 0 && free_pde_idx < nfreepdes);
83 	pde = freepdes[free_pde_idx];
84 	assert(pde >= 0 && pde < 1024);
85 
86 	if(pr && ((pr == get_cpulocal_var(ptproc)) || iskernelp(pr))) {
87 		/* Process memory is requested, and
88 		 * it's a process that is already in current page table, or
89 		 * the kernel, which is always there.
90 		 * Therefore linaddr is valid directly, with the requested
91 		 * size.
92 		 */
93 		return linaddr;
94 	}
95 
96 	if(pr) {
97 		/* Requested address is in a process that is not currently
98 		 * accessible directly. Grab the PDE entry of that process'
99 		 * page table that corresponds to the requested address.
100 		 */
101 		assert(pr->p_seg.p_cr3_v);
102 		pdeval = pr->p_seg.p_cr3_v[I386_VM_PDE(linaddr)];
103 	} else {
104 		/* Requested address is physical. Make up the PDE entry. */
105 		pdeval = (linaddr & I386_VM_ADDR_MASK_4MB) |
106 			I386_VM_BIGPAGE | I386_VM_PRESENT |
107 			I386_VM_WRITE | I386_VM_USER;
108 	}
109 
110 	/* Write the pde value that we need into a pde that the kernel
111 	 * can access, into the currently loaded page table so it becomes
112 	 * visible.
113 	 */
114 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
115 	if(get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] != pdeval) {
116 		get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] = pdeval;
117 		*changed = 1;
118 	}
119 
120 	/* Memory is now available, but only the 4MB window of virtual
121 	 * address space that we have mapped; calculate how much of
122 	 * the requested range is visible and return that in *bytes,
123 	 * if that is less than the requested range.
124 	 */
125 	offset = linaddr & I386_VM_OFFSET_MASK_4MB; /* Offset in 4MB window. */
126 	*bytes = MIN(*bytes, I386_BIG_PAGE_SIZE - offset);
127 
128 	/* Return the linear address of the start of the new mapping. */
129 	return I386_BIG_PAGE_SIZE*pde + offset;
130 }
131 
132 
133 /*===========================================================================*
134  *                           check_resumed_caller                            *
135  *===========================================================================*/
136 static int check_resumed_caller(struct proc *caller)
137 {
138 	/* Returns the result from VM if caller was resumed, otherwise OK. */
139 	if (caller && (caller->p_misc_flags & MF_KCALL_RESUME)) {
140 		assert(caller->p_vmrequest.vmresult != VMSUSPEND);
141 		return caller->p_vmrequest.vmresult;
142 	}
143 
144 	return OK;
145 }
146 
147 /*===========================================================================*
148  *				lin_lin_copy				     *
149  *===========================================================================*/
150 static int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr,
151 	struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes)
152 {
153 	u32_t addr;
154 	proc_nr_t procslot;
155 
156 	assert(get_cpulocal_var(ptproc));
157 	assert(get_cpulocal_var(proc_ptr));
158 	assert(read_cr3() == get_cpulocal_var(ptproc)->p_seg.p_cr3);
159 
160 	procslot = get_cpulocal_var(ptproc)->p_nr;
161 
162 	assert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES);
163 
164 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
165 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
166 	assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
167 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
168 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_VMINHIBIT));
169 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_VMINHIBIT));
170 
171 	while(bytes > 0) {
172 		phys_bytes srcptr, dstptr;
173 		vir_bytes chunk = bytes;
174 		int changed = 0;
175 
176 #ifdef CONFIG_SMP
177 		unsigned cpu = cpuid;
178 
179 		if (srcproc && GET_BIT(srcproc->p_stale_tlb, cpu)) {
180 			changed = 1;
181 			UNSET_BIT(srcproc->p_stale_tlb, cpu);
182 		}
183 		if (dstproc && GET_BIT(dstproc->p_stale_tlb, cpu)) {
184 			changed = 1;
185 			UNSET_BIT(dstproc->p_stale_tlb, cpu);
186 		}
187 #endif
188 
189 		/* Set up 4MB ranges. */
190 		srcptr = createpde(srcproc, srclinaddr, &chunk, 0, &changed);
191 		dstptr = createpde(dstproc, dstlinaddr, &chunk, 1, &changed);
192 		if(changed)
193 			reload_cr3();
194 
195 		/* Check for overflow. */
196 		if (srcptr + chunk < srcptr) return EFAULT_SRC;
197 		if (dstptr + chunk < dstptr) return EFAULT_DST;
198 
199 		/* Copy pages. */
200 		PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr);
201 
202 		if(addr) {
203 			/* If addr is nonzero, a page fault was caught. */
204 
205 			if(addr >= srcptr && addr < (srcptr + chunk)) {
206 				return EFAULT_SRC;
207 			}
208 			if(addr >= dstptr && addr < (dstptr + chunk)) {
209 				return EFAULT_DST;
210 			}
211 
212 			panic("lin_lin_copy fault out of range");
213 
214 			/* Not reached. */
215 			return EFAULT;
216 		}
217 
218 		/* Update counter and addresses for next iteration, if any. */
219 		bytes -= chunk;
220 		srclinaddr += chunk;
221 		dstlinaddr += chunk;
222 	}
223 
224 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
225 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
226 	assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
227 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
228 
229 	return OK;
230 }
231 
232 
233 static u32_t phys_get32(phys_bytes addr)
234 {
235 	u32_t v;
236 	int r;
237 
238 	if((r=lin_lin_copy(NULL, addr,
239 		proc_addr(SYSTEM), (phys_bytes) &v, sizeof(v))) != OK) {
240 		panic("lin_lin_copy for phys_get32 failed: %d",  r);
241 	}
242 
243 	return v;
244 }
245 
246 #if 0
247 static char *cr0_str(u32_t e)
248 {
249 	static char str[80];
250 	strcpy(str, "");
251 #define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0)
252 	FLAG(I386_CR0_PE);
253 	FLAG(I386_CR0_MP);
254 	FLAG(I386_CR0_EM);
255 	FLAG(I386_CR0_TS);
256 	FLAG(I386_CR0_ET);
257 	FLAG(I386_CR0_PG);
258 	FLAG(I386_CR0_WP);
259 	if(e) { strcat(str, " (++)"); }
260 	return str;
261 }
262 
263 static char *cr4_str(u32_t e)
264 {
265 	static char str[80];
266 	strcpy(str, "");
267 	FLAG(I386_CR4_VME);
268 	FLAG(I386_CR4_PVI);
269 	FLAG(I386_CR4_TSD);
270 	FLAG(I386_CR4_DE);
271 	FLAG(I386_CR4_PSE);
272 	FLAG(I386_CR4_PAE);
273 	FLAG(I386_CR4_MCE);
274 	FLAG(I386_CR4_PGE);
275 	if(e) { strcat(str, " (++)"); }
276 	return str;
277 }
278 #endif
279 
280 /*===========================================================================*
281  *                              umap_virtual                                 *
282  *===========================================================================*/
283 phys_bytes umap_virtual(rp, seg, vir_addr, bytes)
284 register struct proc *rp;       /* pointer to proc table entry for process */
285 int seg;                        /* T, D, or S segment */
286 vir_bytes vir_addr;             /* virtual address in bytes within the seg */
287 vir_bytes bytes;                /* # of bytes to be copied */
288 {
289 	phys_bytes phys = 0;
290 
291 	if(vm_lookup(rp, vir_addr, &phys, NULL) != OK) {
292 		printf("SYSTEM:umap_virtual: vm_lookup of %s: seg 0x%x: 0x%lx failed\n", rp->p_name, seg, vir_addr);
293 		phys = 0;
294 	} else {
295 		if(phys == 0)
296 			panic("vm_lookup returned phys: 0x%lx",  phys);
297 	}
298 
299 	if(phys == 0) {
300 		printf("SYSTEM:umap_virtual: lookup failed\n");
301 		return 0;
302 	}
303 
304 	/* Now make sure addresses are contiguous in physical memory
305 	 * so that the umap makes sense.
306 	 */
307 	if(bytes > 0 && vm_lookup_range(rp, vir_addr, NULL, bytes) != bytes) {
308 		printf("umap_virtual: %s: %lu at 0x%lx (vir 0x%lx) not contiguous\n",
309 			rp->p_name, bytes, vir_addr, vir_addr);
310 		return 0;
311 	}
312 
313 	/* phys must be larger than 0 (or the caller will think the call
314 	 * failed), and address must not cross a page boundary.
315 	 */
316 	assert(phys);
317 
318 	return phys;
319 }
320 
321 
322 /*===========================================================================*
323  *                              vm_lookup                                    *
324  *===========================================================================*/
325 int vm_lookup(const struct proc *proc, const vir_bytes virtual,
326  phys_bytes *physical, u32_t *ptent)
327 {
328 	u32_t *root, *pt;
329 	int pde, pte;
330 	u32_t pde_v, pte_v;
331 
332 	assert(proc);
333 	assert(physical);
334 	assert(!isemptyp(proc));
335 	assert(HASPT(proc));
336 
337 	/* Retrieve page directory entry. */
338 	root = (u32_t *) proc->p_seg.p_cr3;
339 	assert(!((u32_t) root % I386_PAGE_SIZE));
340 	pde = I386_VM_PDE(virtual);
341 	assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
342 	pde_v = phys_get32((u32_t) (root + pde));
343 
344 	if(!(pde_v & I386_VM_PRESENT)) {
345 		return EFAULT;
346 	}
347 
348 	/* We don't expect to ever see this. */
349 	if(pde_v & I386_VM_BIGPAGE) {
350 		*physical = pde_v & I386_VM_ADDR_MASK_4MB;
351 		if(ptent) *ptent = pde_v;
352 		*physical += virtual & I386_VM_OFFSET_MASK_4MB;
353 	} else {
354 		/* Retrieve page table entry. */
355 		pt = (u32_t *) I386_VM_PFA(pde_v);
356 		assert(!((u32_t) pt % I386_PAGE_SIZE));
357 		pte = I386_VM_PTE(virtual);
358 		assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
359 		pte_v = phys_get32((u32_t) (pt + pte));
360 		if(!(pte_v & I386_VM_PRESENT)) {
361 			return EFAULT;
362 		}
363 
364 		if(ptent) *ptent = pte_v;
365 
366 		/* Actual address now known; retrieve it and add page offset. */
367 		*physical = I386_VM_PFA(pte_v);
368 		*physical += virtual % I386_PAGE_SIZE;
369 	}
370 
371 	return OK;
372 }
373 
374 /*===========================================================================*
375  *				vm_lookup_range				     *
376  *===========================================================================*/
377 size_t vm_lookup_range(const struct proc *proc, vir_bytes vir_addr,
378 	phys_bytes *phys_addr, size_t bytes)
379 {
380 	/* Look up the physical address corresponding to linear virtual address
381 	 * 'vir_addr' for process 'proc'. Return the size of the range covered
382 	 * by contiguous physical memory starting from that address; this may
383 	 * be anywhere between 0 and 'bytes' inclusive. If the return value is
384 	 * nonzero, and 'phys_addr' is non-NULL, 'phys_addr' will be set to the
385 	 * base physical address of the range. 'vir_addr' and 'bytes' need not
386 	 * be page-aligned, but the caller must have verified that the given
387 	 * linear range is valid for the given process at all.
388 	 */
389 	phys_bytes phys, next_phys;
390 	size_t len;
391 
392 	assert(proc);
393 	assert(bytes > 0);
394 	assert(HASPT(proc));
395 
396 	/* Look up the first page. */
397 	if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
398 		return 0;
399 
400 	if (phys_addr != NULL)
401 		*phys_addr = phys;
402 
403 	len = I386_PAGE_SIZE - (vir_addr % I386_PAGE_SIZE);
404 	vir_addr += len;
405 	next_phys = phys + len;
406 
407 	/* Look up any next pages and test physical contiguity. */
408 	while (len < bytes) {
409 		if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
410 			break;
411 
412 		if (next_phys != phys)
413 			break;
414 
415 		len += I386_PAGE_SIZE;
416 		vir_addr += I386_PAGE_SIZE;
417 		next_phys += I386_PAGE_SIZE;
418 	}
419 
420 	/* We might now have overshot the requested length somewhat. */
421 	return MIN(bytes, len);
422 }
423 
424 /*===========================================================================*
425  *				vm_check_range				     *
426  *===========================================================================*/
427 int vm_check_range(struct proc *caller, struct proc *target,
428 	vir_bytes vir_addr, size_t bytes, int writeflag)
429 {
430 	/* Public interface to vm_suspend(), for use by kernel calls. On behalf
431 	 * of 'caller', call into VM to check linear virtual address range of
432 	 * process 'target', starting at 'vir_addr', for 'bytes' bytes. This
433 	 * function assumes that it will called twice if VM returned an error
434 	 * the first time (since nothing has changed in that case), and will
435 	 * then return the error code resulting from the first call. Upon the
436 	 * first call, a non-success error code is returned as well.
437 	 */
438 	int r;
439 
440 	if ((caller->p_misc_flags & MF_KCALL_RESUME) &&
441 			(r = caller->p_vmrequest.vmresult) != OK)
442 		return r;
443 
444 	vm_suspend(caller, target, vir_addr, bytes, VMSTYPE_KERNELCALL,
445 		writeflag);
446 
447 	return VMSUSPEND;
448 }
449 
450 #if 0
451 static char *flagstr(u32_t e, const int dir)
452 {
453 	static char str[80];
454 	strcpy(str, "");
455 	FLAG(I386_VM_PRESENT);
456 	FLAG(I386_VM_WRITE);
457 	FLAG(I386_VM_USER);
458 	FLAG(I386_VM_PWT);
459 	FLAG(I386_VM_PCD);
460 	FLAG(I386_VM_GLOBAL);
461 	if(dir)
462 		FLAG(I386_VM_BIGPAGE);	/* Page directory entry only */
463 	else
464 		FLAG(I386_VM_DIRTY);	/* Page table entry only */
465 	return str;
466 }
467 
468 static void vm_pt_print(u32_t *pagetable, const u32_t v)
469 {
470 	int pte;
471 	int col = 0;
472 
473 	assert(!((u32_t) pagetable % I386_PAGE_SIZE));
474 
475 	for(pte = 0; pte < I386_VM_PT_ENTRIES; pte++) {
476 		u32_t pte_v, pfa;
477 		pte_v = phys_get32((u32_t) (pagetable + pte));
478 		if(!(pte_v & I386_VM_PRESENT))
479 			continue;
480 		pfa = I386_VM_PFA(pte_v);
481 		printf("%4d:%08lx:%08lx %2s ",
482 			pte, v + I386_PAGE_SIZE*pte, pfa,
483 			(pte_v & I386_VM_WRITE) ? "rw":"RO");
484 		col++;
485 		if(col == 3) { printf("\n"); col = 0; }
486 	}
487 	if(col > 0) printf("\n");
488 
489 	return;
490 }
491 
492 static void vm_print(u32_t *root)
493 {
494 	int pde;
495 
496 	assert(!((u32_t) root % I386_PAGE_SIZE));
497 
498 	printf("page table 0x%lx:\n", root);
499 
500 	for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) {
501 		u32_t pde_v;
502 		u32_t *pte_a;
503 		pde_v = phys_get32((u32_t) (root + pde));
504 		if(!(pde_v & I386_VM_PRESENT))
505 			continue;
506 		if(pde_v & I386_VM_BIGPAGE) {
507 			printf("%4d: 0x%lx, flags %s\n",
508 				pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1));
509 		} else {
510 			pte_a = (u32_t *) I386_VM_PFA(pde_v);
511 			printf("%4d: pt %08lx %s\n",
512 				pde, pte_a, flagstr(pde_v, 1));
513 			vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE);
514 			printf("\n");
515 		}
516 	}
517 
518 
519 	return;
520 }
521 #endif
522 
523 /*===========================================================================*
524  *                                 vmmemset                                  *
525  *===========================================================================*/
526 int vm_memset(struct proc* caller, endpoint_t who, phys_bytes ph, int c,
527 	phys_bytes count)
528 {
529 	u32_t pattern;
530 	struct proc *whoptr = NULL;
531 	phys_bytes cur_ph = ph;
532 	phys_bytes left = count;
533 	phys_bytes ptr, chunk, pfa = 0;
534 	int new_cr3, r = OK;
535 
536 	if ((r = check_resumed_caller(caller)) != OK)
537 		return r;
538 
539 	/* NONE for physical, otherwise virtual */
540 	if (who != NONE && !(whoptr = endpoint_lookup(who)))
541 		return ESRCH;
542 
543 	c &= 0xFF;
544 	pattern = c | (c << 8) | (c << 16) | (c << 24);
545 
546 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
547 	assert(!catch_pagefaults);
548 	catch_pagefaults = 1;
549 
550 	/* We can memset as many bytes as we have remaining,
551 	 * or as many as remain in the 4MB chunk we mapped in.
552 	 */
553 	while (left > 0) {
554 		new_cr3 = 0;
555 		chunk = left;
556 		ptr = createpde(whoptr, cur_ph, &chunk, 0, &new_cr3);
557 
558 		if (new_cr3)
559 			reload_cr3();
560 
561 		/* If a page fault happens, pfa is non-null */
562 		if ((pfa = phys_memset(ptr, pattern, chunk))) {
563 
564 			/* If a process pagefaults, VM may help out */
565 			if (whoptr) {
566 				vm_suspend(caller, whoptr, ph, count,
567 						   VMSTYPE_KERNELCALL, 1);
568 				assert(catch_pagefaults);
569 				catch_pagefaults = 0;
570 				return VMSUSPEND;
571 			}
572 
573 			/* Pagefault when phys copying ?! */
574 			panic("vm_memset: pf %lx addr=%lx len=%lu\n",
575 						pfa , ptr, chunk);
576 		}
577 
578 		cur_ph += chunk;
579 		left -= chunk;
580 	}
581 
582 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
583 	assert(catch_pagefaults);
584 	catch_pagefaults = 0;
585 
586 	return OK;
587 }
588 
589 /*===========================================================================*
590  *				virtual_copy_f				     *
591  *===========================================================================*/
592 int virtual_copy_f(caller, src_addr, dst_addr, bytes, vmcheck)
593 struct proc * caller;
594 struct vir_addr *src_addr;	/* source virtual address */
595 struct vir_addr *dst_addr;	/* destination virtual address */
596 vir_bytes bytes;		/* # of bytes to copy  */
597 int vmcheck;			/* if nonzero, can return VMSUSPEND */
598 {
599 /* Copy bytes from virtual address src_addr to virtual address dst_addr. */
600   struct vir_addr *vir_addr[2];	/* virtual source and destination address */
601   int i, r;
602   struct proc *procs[2];
603 
604   assert((vmcheck && caller) || (!vmcheck && !caller));
605 
606   /* Check copy count. */
607   if (bytes <= 0) return(EDOM);
608 
609   /* Do some more checks and map virtual addresses to physical addresses. */
610   vir_addr[_SRC_] = src_addr;
611   vir_addr[_DST_] = dst_addr;
612 
613   for (i=_SRC_; i<=_DST_; i++) {
614   	endpoint_t proc_e = vir_addr[i]->proc_nr_e;
615 	int proc_nr;
616 	struct proc *p;
617 
618 	if(proc_e == NONE) {
619 		p = NULL;
620 	} else {
621 		if(!isokendpt(proc_e, &proc_nr)) {
622 			printf("virtual_copy: no reasonable endpoint\n");
623 			return ESRCH;
624 		}
625 		p = proc_addr(proc_nr);
626 	}
627 
628 	procs[i] = p;
629   }
630 
631   if ((r = check_resumed_caller(caller)) != OK)
632 	return r;
633 
634   if((r=lin_lin_copy(procs[_SRC_], vir_addr[_SRC_]->offset,
635   	procs[_DST_], vir_addr[_DST_]->offset, bytes)) != OK) {
636 	int writeflag;
637   	struct proc *target = NULL;
638   	phys_bytes lin;
639   	if(r != EFAULT_SRC && r != EFAULT_DST)
640   		panic("lin_lin_copy failed: %d",  r);
641   	if(!vmcheck || !caller) {
642     		return r;
643   	}
644 
645   	if(r == EFAULT_SRC) {
646   		lin = vir_addr[_SRC_]->offset;
647   		target = procs[_SRC_];
648 		writeflag = 0;
649   	} else if(r == EFAULT_DST) {
650   		lin = vir_addr[_DST_]->offset;
651   		target = procs[_DST_];
652 		writeflag = 1;
653   	} else {
654   		panic("r strange: %d",  r);
655   	}
656 
657 	assert(caller);
658 	assert(target);
659 
660 	vm_suspend(caller, target, lin, bytes, VMSTYPE_KERNELCALL, writeflag);
661 	return VMSUSPEND;
662   }
663 
664   return OK;
665 }
666 
667 /*===========================================================================*
668  *				data_copy				     *
669  *===========================================================================*/
670 int data_copy(const endpoint_t from_proc, const vir_bytes from_addr,
671 	const endpoint_t to_proc, const vir_bytes to_addr,
672 	size_t bytes)
673 {
674   struct vir_addr src, dst;
675 
676   src.offset = from_addr;
677   dst.offset = to_addr;
678   src.proc_nr_e = from_proc;
679   dst.proc_nr_e = to_proc;
680   assert(src.proc_nr_e != NONE);
681   assert(dst.proc_nr_e != NONE);
682 
683   return virtual_copy(&src, &dst, bytes);
684 }
685 
686 /*===========================================================================*
687  *				data_copy_vmcheck			     *
688  *===========================================================================*/
689 int data_copy_vmcheck(struct proc * caller,
690 	const endpoint_t from_proc, const vir_bytes from_addr,
691 	const endpoint_t to_proc, const vir_bytes to_addr,
692 	size_t bytes)
693 {
694   struct vir_addr src, dst;
695 
696   src.offset = from_addr;
697   dst.offset = to_addr;
698   src.proc_nr_e = from_proc;
699   dst.proc_nr_e = to_proc;
700   assert(src.proc_nr_e != NONE);
701   assert(dst.proc_nr_e != NONE);
702 
703   return virtual_copy_vmcheck(caller, &src, &dst, bytes);
704 }
705 
706 void memory_init(void)
707 {
708 	assert(nfreepdes == 0);
709 
710 	freepdes[nfreepdes++] = kinfo.freepde_start++;
711 	freepdes[nfreepdes++] = kinfo.freepde_start++;
712 
713 	assert(kinfo.freepde_start < I386_VM_DIR_ENTRIES);
714 	assert(nfreepdes == 2);
715 	assert(nfreepdes <= MAXFREEPDES);
716 }
717 
718 /*===========================================================================*
719  *				arch_proc_init				     *
720  *===========================================================================*/
721 void arch_proc_init(struct proc *pr, const u32_t ip, const u32_t sp,
722 	const u32_t ps_str, char *name)
723 {
724 	arch_proc_reset(pr);
725 	strlcpy(pr->p_name, name, sizeof(pr->p_name));
726 
727 	/* set custom state we know */
728 	pr->p_reg.pc = ip;
729 	pr->p_reg.sp = sp;
730 	pr->p_reg.bx = ps_str;
731 }
732 
733 static int oxpcie_mapping_index = -1,
734 	lapic_mapping_index = -1,
735 	ioapic_first_index = -1,
736 	ioapic_last_index = -1,
737 	video_mem_mapping_index = -1,
738 	usermapped_glo_index = -1,
739 	usermapped_index = -1, first_um_idx = -1;
740 
741 extern char *video_mem;
742 
743 extern char usermapped_start, usermapped_end, usermapped_nonglo_start;
744 
745 int arch_phys_map(const int index,
746 			phys_bytes *addr,
747 			phys_bytes *len,
748 			int *flags)
749 {
750 	static int first = 1;
751 	int freeidx = 0;
752 	static char *ser_var = NULL;
753 	u32_t glo_len = (u32_t) &usermapped_nonglo_start -
754 			(u32_t) &usermapped_start;
755 
756 	if(first) {
757 		memset(&minix_kerninfo, 0, sizeof(minix_kerninfo));
758 		video_mem_mapping_index = freeidx++;
759 		if(glo_len > 0) {
760 			usermapped_glo_index = freeidx++;
761 		}
762 
763 		usermapped_index = freeidx++;
764 		first_um_idx = usermapped_index;
765 		if(usermapped_glo_index != -1)
766 			first_um_idx = usermapped_glo_index;
767 
768 #ifdef USE_APIC
769 		if(lapic_addr)
770 			lapic_mapping_index = freeidx++;
771 		if (ioapic_enabled) {
772 			ioapic_first_index = freeidx;
773 			assert(nioapics > 0);
774 			freeidx += nioapics;
775 			ioapic_last_index = freeidx-1;
776 		}
777 #endif
778 
779 #ifdef CONFIG_OXPCIE
780 		if((ser_var = env_get("oxpcie"))) {
781 			if(ser_var[0] != '0' || ser_var[1] != 'x') {
782 				printf("oxpcie address in hex please\n");
783 			} else {
784 				printf("oxpcie address is %s\n", ser_var);
785 				oxpcie_mapping_index = freeidx++;
786 			}
787 		}
788 #endif
789 
790 		first = 0;
791 	}
792 
793 	if(index == usermapped_glo_index) {
794 		*addr = vir2phys(&usermapped_start);
795 		*len = glo_len;
796 		*flags = VMMF_USER | VMMF_GLO;
797 		return OK;
798 	}
799 	else if(index == usermapped_index) {
800 		*addr = vir2phys(&usermapped_nonglo_start);
801 		*len = (u32_t) &usermapped_end -
802 			(u32_t) &usermapped_nonglo_start;
803 		*flags = VMMF_USER;
804 		return OK;
805 	}
806 	else if (index == video_mem_mapping_index) {
807 		/* map video memory in so we can print panic messages */
808 		*addr = MULTIBOOT_VIDEO_BUFFER;
809 		*len = I386_PAGE_SIZE;
810 		*flags = VMMF_WRITE;
811 		return OK;
812 	}
813 #ifdef USE_APIC
814 	else if (index == lapic_mapping_index) {
815 		/* map the local APIC if enabled */
816 		if (!lapic_addr)
817 			return EINVAL;
818 		*addr = lapic_addr;
819 		*len = 4 << 10 /* 4kB */;
820 		*flags = VMMF_UNCACHED | VMMF_WRITE;
821 		return OK;
822 	}
823 	else if (ioapic_enabled && index >= ioapic_first_index && index <= ioapic_last_index) {
824 		int ioapic_idx = index - ioapic_first_index;
825 		*addr = io_apic[ioapic_idx].paddr;
826 		assert(*addr);
827 		*len = 4 << 10 /* 4kB */;
828 		*flags = VMMF_UNCACHED | VMMF_WRITE;
829 		printf("ioapic map: addr 0x%lx\n", *addr);
830 		return OK;
831 	}
832 #endif
833 
834 #if CONFIG_OXPCIE
835 	if(index == oxpcie_mapping_index) {
836 		*addr = strtoul(ser_var+2, NULL, 16);
837 		*len = 0x4000;
838 		*flags = VMMF_UNCACHED | VMMF_WRITE;
839 		return OK;
840 	}
841 #endif
842 
843 	return EINVAL;
844 }
845 
846 int arch_phys_map_reply(const int index, const vir_bytes addr)
847 {
848 #ifdef USE_APIC
849 	/* if local APIC is enabled */
850 	if (index == lapic_mapping_index && lapic_addr) {
851 		lapic_addr_vaddr = addr;
852 		return OK;
853 	}
854 	else if (ioapic_enabled && index >= ioapic_first_index &&
855 		index <= ioapic_last_index) {
856 		int i = index - ioapic_first_index;
857 		io_apic[i].vaddr = addr;
858 		return OK;
859 	}
860 #endif
861 
862 #if CONFIG_OXPCIE
863 	if (index == oxpcie_mapping_index) {
864 		oxpcie_set_vaddr((unsigned char *) addr);
865 		return OK;
866 	}
867 #endif
868 	if(index == first_um_idx) {
869 		extern struct minix_ipcvecs minix_ipcvecs_sysenter,
870 			minix_ipcvecs_syscall,
871 			minix_ipcvecs_softint;
872 		extern u32_t usermapped_offset;
873 		assert(addr > (u32_t) &usermapped_start);
874 		usermapped_offset = addr - (u32_t) &usermapped_start;
875 #define FIXEDPTR(ptr) (void *) ((u32_t)ptr + usermapped_offset)
876 #define FIXPTR(ptr) ptr = FIXEDPTR(ptr)
877 #define ASSIGN(minixstruct) minix_kerninfo.minixstruct = FIXEDPTR(&minixstruct)
878 		ASSIGN(kinfo);
879 		ASSIGN(machine);
880 		ASSIGN(kmessages);
881 		ASSIGN(loadinfo);
882 		ASSIGN(kuserinfo);
883 		ASSIGN(arm_frclock); /* eh, why not. */
884 		ASSIGN(kclockinfo);
885 
886 		/* select the right set of IPC routines to map into processes */
887 		if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
888 			DEBUGBASIC(("kernel: selecting intel sysenter ipc style\n"));
889 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_sysenter;
890 		} else  if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
891 			DEBUGBASIC(("kernel: selecting amd syscall ipc style\n"));
892 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_syscall;
893 		} else	{
894 			DEBUGBASIC(("kernel: selecting fallback (int) ipc style\n"));
895 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_softint;
896 		}
897 
898 		/* adjust the pointers of the functions and the struct
899 		 * itself to the user-accessible mapping
900 		 */
901 		FIXPTR(minix_kerninfo.minix_ipcvecs->send);
902 		FIXPTR(minix_kerninfo.minix_ipcvecs->receive);
903 		FIXPTR(minix_kerninfo.minix_ipcvecs->sendrec);
904 		FIXPTR(minix_kerninfo.minix_ipcvecs->senda);
905 		FIXPTR(minix_kerninfo.minix_ipcvecs->sendnb);
906 		FIXPTR(minix_kerninfo.minix_ipcvecs->notify);
907 		FIXPTR(minix_kerninfo.minix_ipcvecs->do_kernel_call);
908 		FIXPTR(minix_kerninfo.minix_ipcvecs);
909 
910 		minix_kerninfo.kerninfo_magic = KERNINFO_MAGIC;
911 		minix_kerninfo.minix_feature_flags = minix_feature_flags;
912 		minix_kerninfo_user = (vir_bytes) FIXEDPTR(&minix_kerninfo);
913 
914 		/* if libc_ipc is set, disable usermapped ipc functions
915 		 * and force binaries to use in-libc fallbacks.
916 		 */
917 		if(env_get("libc_ipc")) {
918 			printf("kernel: forcing in-libc fallback ipc style\n");
919 			minix_kerninfo.minix_ipcvecs = NULL;
920 		} else {
921 			minix_kerninfo.ki_flags |= MINIX_KIF_IPCVECS;
922 		}
923 
924 		minix_kerninfo.ki_flags |= MINIX_KIF_USERINFO;
925 
926 		return OK;
927 	}
928 
929 	if(index == usermapped_index) return OK;
930 
931 	if (index == video_mem_mapping_index) {
932 		video_mem_vaddr =  addr;
933 		return OK;
934 	}
935 
936 	return EINVAL;
937 }
938 
939 int arch_enable_paging(struct proc * caller)
940 {
941 	assert(caller->p_seg.p_cr3);
942 
943 	/* load caller's page table */
944 	switch_address_space(caller);
945 
946 	video_mem = (char *) video_mem_vaddr;
947 
948 #ifdef USE_APIC
949 	/* start using the virtual addresses */
950 
951 	/* if local APIC is enabled */
952 	if (lapic_addr) {
953 		lapic_addr = lapic_addr_vaddr;
954 		lapic_eoi_addr = LAPIC_EOI;
955 	}
956 	/* if IO apics are enabled */
957 	if (ioapic_enabled) {
958 		int i;
959 
960 		for (i = 0; i < nioapics; i++) {
961 			io_apic[i].addr = io_apic[i].vaddr;
962 		}
963 	}
964 #if CONFIG_SMP
965 	barrier();
966 
967 	wait_for_APs_to_finish_booting();
968 #endif
969 #endif
970 
971 #ifdef USE_WATCHDOG
972 	/*
973 	 * We make sure that we don't enable the watchdog until paging is turned
974 	 * on as we might get an NMI while switching and we might still use wrong
975 	 * lapic address. Bad things would happen. It is unfortunate but such is
976 	 * life
977 	 */
978 	if (watchdog_enabled)
979 		i386_watchdog_start();
980 #endif
981 
982 	return OK;
983 }
984 
985 void release_address_space(struct proc *pr)
986 {
987 	pr->p_seg.p_cr3_v = NULL;
988 }
989 
990 /* computes a checksum of a buffer of a given length. The byte sum must be zero */
991 int platform_tbl_checksum_ok(void *ptr, unsigned int length)
992 {
993 	u8_t total = 0;
994 	unsigned int i;
995 	for (i = 0; i < length; i++)
996 		total += ((unsigned char *)ptr)[i];
997 	return !total;
998 }
999 
1000 int platform_tbl_ptr(phys_bytes start,
1001 					phys_bytes end,
1002 					unsigned increment,
1003 					void * buff,
1004 					unsigned size,
1005 					phys_bytes * phys_addr,
1006 					int ((* cmp_f)(void *)))
1007 {
1008 	phys_bytes addr;
1009 
1010 	for (addr = start; addr < end; addr += increment) {
1011 		phys_copy (addr, (phys_bytes) buff, size);
1012 		if (cmp_f(buff)) {
1013 			if (phys_addr)
1014 				*phys_addr = addr;
1015 			return 1;
1016 		}
1017 	}
1018 	return 0;
1019 }
1020