xref: /plan9-contrib/sys/src/9/pc/l.s (revision 5e4924093ecb86f7174bf23023955abc83fb6962)
1#include "mem.h"
2#include "/sys/src/boot/pc/x16.h"
3#undef DELAY
4
5#define PADDR(a)	((a) & ~KZERO)
6#define KADDR(a)	(KZERO|(a))
7
8/*
9 * Some machine instructions not handled by 8[al].
10 */
11#define OP16		BYTE $0x66
12#define DELAY		BYTE $0xEB; BYTE $0x00	/* JMP .+2 */
13#define CPUID		BYTE $0x0F; BYTE $0xA2	/* CPUID, argument in AX */
14#define WRMSR		BYTE $0x0F; BYTE $0x30	/* WRMSR, argument in AX/DX (lo/hi) */
15#define RDTSC 		BYTE $0x0F; BYTE $0x31	/* RDTSC, result in AX/DX (lo/hi) */
16#define RDMSR		BYTE $0x0F; BYTE $0x32	/* RDMSR, result in AX/DX (lo/hi) */
17#define HLT		BYTE $0xF4
18#define INVLPG	BYTE $0x0F; BYTE $0x01; BYTE $0x39	/* INVLPG (%ecx) */
19
20/*
21 * Macros for calculating offsets within the page directory base
22 * and page tables. Note that these are assembler-specific hence
23 * the '<<2'.
24 */
25#define PDO(a)		(((((a))>>22) & 0x03FF)<<2)
26#define PTO(a)		(((((a))>>12) & 0x03FF)<<2)
27
28/*
29 * For backwards compatiblity with 9load - should go away when 9load is changed
30 * 9load currently sets up the mmu, however the first 16MB of memory is identity
31 * mapped, so behave as if the mmu was not setup
32 */
33TEXT _startKADDR(SB), $0
34	MOVL	$_startPADDR(SB), AX
35	ANDL	$~KZERO, AX
36	JMP*	AX
37
38/*
39 * Must be 4-byte aligned.
40 */
41TEXT _multibootheader(SB), $0
42	LONG	$0x1BADB002			/* magic */
43	LONG	$0x00010003			/* flags */
44	LONG	$-(0x1BADB002 + 0x00010003)	/* checksum */
45	LONG	$_multibootheader-KZERO(SB)	/* header_addr */
46	LONG	$_startKADDR-KZERO(SB)		/* load_addr */
47	LONG	$edata-KZERO(SB)		/* load_end_addr */
48	LONG	$end-KZERO(SB)			/* bss_end_addr */
49	LONG	$_startKADDR-KZERO(SB)		/* entry_addr */
50	LONG	$0				/* mode_type */
51	LONG	$0				/* width */
52	LONG	$0				/* height */
53	LONG	$0				/* depth */
54
55/*
56 * In protected mode with paging turned off and segment registers setup to linear map all memory.
57 * Entered via a jump to PADDR(entry), the physical address of the virtual kernel entry point of KADDR(entry)
58 * Make the basic page tables for processor 0. Four pages are needed for the basic set:
59 * a page directory, a page table for mapping the first 4MB of physical memory to KZERO,
60 * and virtual and physical pages for mapping the Mach structure.
61 * The remaining PTEs will be allocated later when memory is sized.
62 * An identity mmu map is also needed for the switch to virtual mode.  This
63 * identity mapping is removed once the MMU is going and the JMP has been made
64 * to virtual memory.
65 */
66TEXT _startPADDR(SB), $0
67	CLI					/* make sure interrupts are off */
68
69	/* set up the gdt so we have sane plan 9 style gdts. */
70	MOVL	$tgdtptr(SB), AX
71	ANDL	$~KZERO, AX
72	MOVL	(AX), GDTR
73	MOVW	$1, AX
74	MOVW	AX, MSW
75
76	/* clear prefetch queue (weird code to avoid optimizations) */
77	DELAY
78
79	/* set segs to something sane (avoid traps later) */
80	MOVW	$(1<<3), AX
81	MOVW	AX, DS
82	MOVW	AX, SS
83	MOVW	AX, ES
84	MOVW	AX, FS
85	MOVW	AX, GS
86
87/*	JMP	$(2<<3):$mode32bit(SB) /**/
88	 BYTE	$0xEA
89	 LONG	$mode32bit-KZERO(SB)
90	 WORD	$(2<<3)
91
92/*
93 *  gdt to get us to 32-bit/segmented/unpaged mode
94 */
95TEXT tgdt(SB), $0
96
97	/* null descriptor */
98	LONG	$0
99	LONG	$0
100
101	/* data segment descriptor for 4 gigabytes (PL 0) */
102	LONG	$(0xFFFF)
103	LONG	$(SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(0)|SEGDATA|SEGW)
104
105	/* exec segment descriptor for 4 gigabytes (PL 0) */
106	LONG	$(0xFFFF)
107	LONG	$(SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(0)|SEGEXEC|SEGR)
108
109/*
110 *  pointer to initial gdt
111 *  Note the -KZERO which puts the physical address in the gdtptr.
112 *  that's needed as we start executing in physical addresses.
113 */
114TEXT tgdtptr(SB), $0
115	WORD	$(3*8)
116	LONG	$tgdt-KZERO(SB)
117
118TEXT m0rgdtptr(SB), $0
119	WORD	$(NGDT*8-1)
120	LONG	$(CPU0GDT-KZERO)
121
122TEXT m0gdtptr(SB), $0
123	WORD	$(NGDT*8-1)
124	LONG	$CPU0GDT
125
126TEXT m0idtptr(SB), $0
127	WORD $(256*8-1)
128	LONG $IDTADDR
129
130TEXT mode32bit(SB), $0
131	/* At this point, the GDT setup is done. */
132
133	MOVL	$PADDR(CPU0PDB), DI		/* clear 4 pages for the tables etc. */
134	XORL	AX, AX
135	MOVL	$(4*BY2PG), CX
136	SHRL	$2, CX
137
138	CLD
139	REP;	STOSL
140
141	MOVL	$PADDR(CPU0PDB), AX
142	ADDL	$PDO(KZERO), AX			/* page directory offset for KZERO */
143	MOVL	$PADDR(CPU0PTE), (AX)		/* PTE's for KZERO */
144	MOVL	$(PTEWRITE|PTEVALID), BX	/* page permissions */
145	ORL	BX, (AX)
146
147	MOVL	$PADDR(CPU0PTE), AX		/* first page of page table */
148	MOVL	$1024, CX			/* 1024 pages in 4MB */
149_setpte:
150	MOVL	BX, (AX)
151	ADDL	$(1<<PGSHIFT), BX
152	ADDL	$4, AX
153	LOOP	_setpte
154
155	MOVL	$PADDR(CPU0PTE), AX
156	ADDL	$PTO(MACHADDR), AX		/* page table entry offset for MACHADDR */
157	MOVL	$PADDR(CPU0MACH), (AX)		/* PTE for Mach */
158	MOVL	$(PTEWRITE|PTEVALID), BX	/* page permissions */
159	ORL	BX, (AX)
160
161/*
162 * Now ready to use the new map. Make sure the processor options are what is wanted.
163 * It is necessary on some processors to immediately follow mode switching with a JMP instruction
164 * to clear the prefetch queues.
165 */
166	MOVL	$PADDR(CPU0PDB), CX		/* load address of page directory */
167	MOVL	(PDO(KZERO))(CX), DX		/* double-map KZERO at 0 */
168	MOVL	DX, (PDO(0))(CX)
169	MOVL	CX, CR3
170	DELAY					/* JMP .+2 */
171
172	MOVL	CR0, DX
173	ORL	$0x80010000, DX			/* PG|WP */
174	ANDL	$~0x6000000A, DX		/* ~(CD|NW|TS|MP) */
175
176	MOVL	$_startpg(SB), AX		/* this is a virtual address */
177	MOVL	DX, CR0				/* turn on paging */
178	JMP*	AX				/* jump to the virtual nirvana */
179
180/*
181 * Basic machine environment set, can clear BSS and create a stack.
182 * The stack starts at the top of the page containing the Mach structure.
183 * The x86 architecture forces the use of the same virtual address for
184 * each processor's Mach structure, so the global Mach pointer 'm' can
185 * be initialised here.
186 */
187TEXT _startpg(SB), $0
188	MOVL	$0, (PDO(0))(CX)		/* undo double-map of KZERO at 0 */
189	MOVL	CX, CR3				/* load and flush the mmu */
190
191_clearbss:
192	MOVL	$edata(SB), DI
193	XORL	AX, AX
194	MOVL	$end(SB), CX
195	SUBL	DI, CX				/* end-edata bytes */
196	SHRL	$2, CX				/* end-edata doublewords */
197
198	CLD
199	REP;	STOSL				/* clear BSS */
200
201	MOVL	$MACHADDR, SP
202	MOVL	SP, m(SB)			/* initialise global Mach pointer */
203	MOVL	$0, 0(SP)			/* initialise m->machno */
204
205
206	ADDL	$(MACHSIZE-4), SP		/* initialise stack */
207
208/*
209 * Need to do one final thing to ensure a clean machine environment,
210 * clear the EFLAGS register, which can only be done once there is a stack.
211 */
212	MOVL	$0, AX
213	PUSHL	AX
214	POPFL
215
216	CALL	main(SB)
217
218/*
219 * Park a processor. Should never fall through a return from main to here,
220 * should only be called by application processors when shutting down.
221 */
222TEXT idle(SB), $0
223_idle:
224	STI
225	HLT
226	JMP	_idle
227
228/*
229 * Save registers.
230 */
231TEXT saveregs(SB), $0
232	/* appease 8l */
233	SUBL $32, SP
234	POPL AX
235	POPL AX
236	POPL AX
237	POPL AX
238	POPL AX
239	POPL AX
240	POPL AX
241	POPL AX
242
243	PUSHL	AX
244	PUSHL	BX
245	PUSHL	CX
246	PUSHL	DX
247	PUSHL	BP
248	PUSHL	DI
249	PUSHL	SI
250	PUSHFL
251
252	XCHGL	32(SP), AX	/* swap return PC and saved flags */
253	XCHGL	0(SP), AX
254	XCHGL	32(SP), AX
255	RET
256
257TEXT restoreregs(SB), $0
258	/* appease 8l */
259	PUSHL	AX
260	PUSHL	AX
261	PUSHL	AX
262	PUSHL	AX
263	PUSHL	AX
264	PUSHL	AX
265	PUSHL	AX
266	PUSHL	AX
267	ADDL	$32, SP
268
269	XCHGL	32(SP), AX	/* swap return PC and saved flags */
270	XCHGL	0(SP), AX
271	XCHGL	32(SP), AX
272
273	POPFL
274	POPL	SI
275	POPL	DI
276	POPL	BP
277	POPL	DX
278	POPL	CX
279	POPL	BX
280	POPL	AX
281	RET
282
283/*
284 * Assumed to be in protected mode at time of call.
285 * Switch to real mode, execute an interrupt, and
286 * then switch back to protected mode.
287 *
288 * Assumes:
289 *
290 *	- no device interrupts are going to come in
291 *	- 0-16MB is identity mapped in page tables
292 *	- realmode() has copied us down from 0x100000 to 0x8000
293 *	- can use code segment 0x0800 in real mode
294 *		to get at l.s code
295 *	- l.s code is less than 1 page
296 */
297#define RELOC	(RMCODE-KTZERO)
298
299TEXT realmodeidtptr(SB), $0
300	WORD	$(4*256-1)
301	LONG	$0
302
303TEXT realmode0(SB), $0
304	CALL	saveregs(SB)
305
306	/* switch to low code address */
307	LEAL	physcode-KZERO(SB), AX
308	JMP *AX
309
310TEXT physcode(SB), $0
311
312	/* switch to low stack */
313	MOVL	SP, AX
314	MOVL	$0x7C00, SP
315	PUSHL	AX
316
317	/* change gdt to physical pointer */
318	MOVL	m0rgdtptr-KZERO(SB), GDTR
319
320	/* load IDT with real-mode version*/
321	MOVL	realmodeidtptr-KZERO(SB), IDTR
322
323	/* edit INT $0x00 instruction below */
324	MOVL	$(RMUADDR-KZERO+48), AX	/* &rmu.trap */
325	MOVL	(AX), AX
326	MOVB	AX, realmodeintrinst+(-KZERO+1+RELOC)(SB)
327
328	/* disable paging */
329	MOVL	CR0, AX
330	ANDL	$0x7FFFFFFF, AX
331	MOVL	AX, CR0
332	/* JMP .+2 to clear prefetch queue*/
333	BYTE $0xEB; BYTE $0x00
334
335	/* jump to 16-bit code segment */
336/*	JMPFAR	SELECTOR(KESEG16, SELGDT, 0):$again16bit(SB) /**/
337	 BYTE	$0xEA
338	 LONG	$again16bit-KZERO(SB)
339	 WORD	$SELECTOR(KESEG16, SELGDT, 0)
340
341TEXT again16bit(SB), $0
342	/*
343	 * Now in 16-bit compatibility mode.
344	 * These are 32-bit instructions being interpreted
345	 * as 16-bit instructions.  I'm being lazy and
346	 * not using the macros because I know when
347	 * the 16- and 32-bit instructions look the same
348	 * or close enough.
349	 */
350
351	/* disable protected mode and jump to real mode cs */
352	OPSIZE; MOVL CR0, AX
353	OPSIZE; XORL BX, BX
354	OPSIZE; INCL BX
355	OPSIZE; XORL BX, AX
356	OPSIZE; MOVL AX, CR0
357
358	/* JMPFAR 0x0800:now16real */
359	 BYTE $0xEA
360	 WORD	$now16real-KZERO(SB)
361	 WORD	$0x0800
362
363TEXT now16real(SB), $0
364	/* copy the registers for the bios call */
365	LWI(0x0000, rAX)
366	MOVW	AX,SS
367	LWI(RMUADDR, rBP)
368
369	/* offsets are in Ureg */
370	LXW(44, xBP, rAX)
371	MOVW	AX, DS
372	LXW(40, xBP, rAX)
373	MOVW	AX, ES
374
375	OPSIZE; LXW(0, xBP, rDI)
376	OPSIZE; LXW(4, xBP, rSI)
377	OPSIZE; LXW(16, xBP, rBX)
378	OPSIZE; LXW(20, xBP, rDX)
379	OPSIZE; LXW(24, xBP, rCX)
380	OPSIZE; LXW(28, xBP, rAX)
381
382	CLC
383
384TEXT realmodeintrinst(SB), $0
385	INT $0x00
386
387	/* save the registers after the call */
388
389	LWI(0x7bfc, rSP)
390	OPSIZE; PUSHFL
391	OPSIZE; PUSHL AX
392
393	LWI(0, rAX)
394	MOVW	AX,SS
395	LWI(RMUADDR, rBP)
396
397	OPSIZE; SXW(rDI, 0, xBP)
398	OPSIZE; SXW(rSI, 4, xBP)
399	OPSIZE; SXW(rBX, 16, xBP)
400	OPSIZE; SXW(rDX, 20, xBP)
401	OPSIZE; SXW(rCX, 24, xBP)
402	OPSIZE; POPL AX
403	OPSIZE; SXW(rAX, 28, xBP)
404
405	MOVW	DS, AX
406	OPSIZE; SXW(rAX, 44, xBP)
407	MOVW	ES, AX
408	OPSIZE; SXW(rAX, 40, xBP)
409
410	OPSIZE; POPL AX
411	OPSIZE; SXW(rAX, 64, xBP)	/* flags */
412
413	/* re-enter protected mode and jump to 32-bit code */
414	OPSIZE; MOVL $1, AX
415	OPSIZE; MOVL AX, CR0
416
417/*	JMPFAR	SELECTOR(KESEG, SELGDT, 0):$again32bit(SB) /**/
418	 OPSIZE
419	 BYTE $0xEA
420	 LONG	$again32bit-KZERO(SB)
421	 WORD	$SELECTOR(KESEG, SELGDT, 0)
422
423TEXT again32bit(SB), $0
424	MOVW	$SELECTOR(KDSEG, SELGDT, 0),AX
425	MOVW	AX,DS
426	MOVW	AX,SS
427	MOVW	AX,ES
428	MOVW	AX,FS
429	MOVW	AX,GS
430
431	/* enable paging and jump to kzero-address code */
432	MOVL	CR0, AX
433	ORL	$0x80000000, AX
434	MOVL	AX, CR0
435	LEAL	again32kzero(SB), AX
436	JMP*	AX
437
438TEXT again32kzero(SB), $0
439	/* breathe a sigh of relief - back in 32-bit protected mode */
440
441	/* switch to old stack */
442	PUSHL	AX	/* match popl below for 8l */
443	MOVL	$0x7BFC, SP
444	POPL	SP
445
446	/* restore idt */
447	MOVL	m0idtptr(SB),IDTR
448
449	/* restore gdt */
450	MOVL	m0gdtptr(SB), GDTR
451
452	CALL	restoreregs(SB)
453	RET
454
455/*
456/*
457 * Port I/O.
458 *	in[bsl]		input a byte|short|long
459 *	ins[bsl]	input a string of bytes|shorts|longs
460 *	out[bsl]	output a byte|short|long
461 *	outs[bsl]	output a string of bytes|shorts|longs
462 */
463TEXT inb(SB), $0
464	MOVL	port+0(FP), DX
465	XORL	AX, AX
466	INB
467	RET
468
469TEXT insb(SB), $0
470	MOVL	port+0(FP), DX
471	MOVL	address+4(FP), DI
472	MOVL	count+8(FP), CX
473	CLD
474	REP;	INSB
475	RET
476
477TEXT ins(SB), $0
478	MOVL	port+0(FP), DX
479	XORL	AX, AX
480	OP16;	INL
481	RET
482
483TEXT inss(SB), $0
484	MOVL	port+0(FP), DX
485	MOVL	address+4(FP), DI
486	MOVL	count+8(FP), CX
487	CLD
488	REP;	OP16; INSL
489	RET
490
491TEXT inl(SB), $0
492	MOVL	port+0(FP), DX
493	INL
494	RET
495
496TEXT insl(SB), $0
497	MOVL	port+0(FP), DX
498	MOVL	address+4(FP), DI
499	MOVL	count+8(FP), CX
500	CLD
501	REP;	INSL
502	RET
503
504TEXT outb(SB), $0
505	MOVL	port+0(FP), DX
506	MOVL	byte+4(FP), AX
507	OUTB
508	RET
509
510TEXT outsb(SB), $0
511	MOVL	port+0(FP), DX
512	MOVL	address+4(FP), SI
513	MOVL	count+8(FP), CX
514	CLD
515	REP;	OUTSB
516	RET
517
518TEXT outs(SB), $0
519	MOVL	port+0(FP), DX
520	MOVL	short+4(FP), AX
521	OP16;	OUTL
522	RET
523
524TEXT outss(SB), $0
525	MOVL	port+0(FP), DX
526	MOVL	address+4(FP), SI
527	MOVL	count+8(FP), CX
528	CLD
529	REP;	OP16; OUTSL
530	RET
531
532TEXT outl(SB), $0
533	MOVL	port+0(FP), DX
534	MOVL	long+4(FP), AX
535	OUTL
536	RET
537
538TEXT outsl(SB), $0
539	MOVL	port+0(FP), DX
540	MOVL	address+4(FP), SI
541	MOVL	count+8(FP), CX
542	CLD
543	REP;	OUTSL
544	RET
545
546/*
547 * Read/write various system registers.
548 * CR4 and the 'model specific registers' should only be read/written
549 * after it has been determined the processor supports them
550 */
551TEXT lgdt(SB), $0				/* GDTR - global descriptor table */
552	MOVL	gdtptr+0(FP), AX
553	MOVL	(AX), GDTR
554	RET
555
556TEXT lidt(SB), $0				/* IDTR - interrupt descriptor table */
557	MOVL	idtptr+0(FP), AX
558	MOVL	(AX), IDTR
559	RET
560
561TEXT ltr(SB), $0				/* TR - task register */
562	MOVL	tptr+0(FP), AX
563	MOVW	AX, TASK
564	RET
565
566TEXT getcr0(SB), $0				/* CR0 - processor control */
567	MOVL	CR0, AX
568	RET
569
570TEXT getcr2(SB), $0				/* CR2 - page fault linear address */
571	MOVL	CR2, AX
572	RET
573
574TEXT getcr3(SB), $0				/* CR3 - page directory base */
575	MOVL	CR3, AX
576	RET
577
578TEXT putcr3(SB), $0
579	MOVL	cr3+0(FP), AX
580	MOVL	AX, CR3
581	RET
582
583TEXT getcr4(SB), $0				/* CR4 - extensions */
584	MOVL	CR4, AX
585	RET
586
587TEXT putcr4(SB), $0
588	MOVL	cr4+0(FP), AX
589	MOVL	AX, CR4
590	RET
591
592TEXT invlpg(SB), $0
593	/* 486+ only */
594	MOVL	va+0(FP), CX
595	INVLPG
596	RET
597
598TEXT _cycles(SB), $0				/* time stamp counter */
599	RDTSC
600	MOVL	vlong+0(FP), CX			/* &vlong */
601	MOVL	AX, 0(CX)			/* lo */
602	MOVL	DX, 4(CX)			/* hi */
603	RET
604
605TEXT rdmsr(SB), $0				/* model-specific register */
606	MOVL	index+0(FP), CX
607	RDMSR
608	MOVL	vlong+4(FP), CX			/* &vlong */
609	MOVL	AX, 0(CX)			/* lo */
610	MOVL	DX, 4(CX)			/* hi */
611	RET
612
613TEXT wrmsr(SB), $0
614	MOVL	index+0(FP), CX
615	MOVL	lo+4(FP), AX
616	MOVL	hi+8(FP), DX
617	WRMSR
618	RET
619
620/*
621 * Try to determine the CPU type which requires fiddling with EFLAGS.
622 * If the Id bit can be toggled then the CPUID instruction can be used
623 * to determine CPU identity and features. First have to check if it's
624 * a 386 (Ac bit can't be set). If it's not a 386 and the Id bit can't be
625 * toggled then it's an older 486 of some kind.
626 *
627 *	cpuid(id[], &ax, &dx);
628 */
629TEXT cpuid(SB), $0
630	MOVL	$0x240000, AX
631	PUSHL	AX
632	POPFL					/* set Id|Ac */
633
634	PUSHFL
635	POPL	BX				/* retrieve value */
636
637	MOVL	$0, AX
638	PUSHL	AX
639	POPFL					/* clear Id|Ac, EFLAGS initialised */
640
641	PUSHFL
642	POPL	AX				/* retrieve value */
643	XORL	BX, AX
644	TESTL	$0x040000, AX			/* Ac */
645	JZ	_cpu386				/* can't set this bit on 386 */
646	TESTL	$0x200000, AX			/* Id */
647	JZ	_cpu486				/* can't toggle this bit on some 486 */
648
649	MOVL	$0, AX
650	CPUID
651	MOVL	id+0(FP), BP
652	MOVL	BX, 0(BP)			/* "Genu" "Auth" "Cyri" */
653	MOVL	DX, 4(BP)			/* "ineI" "enti" "xIns" */
654	MOVL	CX, 8(BP)			/* "ntel" "cAMD" "tead" */
655
656	MOVL	$1, AX
657	CPUID
658	JMP	_cpuid
659
660_cpu486:
661	MOVL	$0x400, AX
662	MOVL	$0, DX
663	JMP	_cpuid
664
665_cpu386:
666	MOVL	$0x300, AX
667	MOVL	$0, DX
668
669_cpuid:
670	MOVL	ax+4(FP), BP
671	MOVL	AX, 0(BP)
672	MOVL	dx+8(FP), BP
673	MOVL	DX, 0(BP)
674	RET
675
676/*
677 * Basic timing loop to determine CPU frequency.
678 */
679TEXT aamloop(SB), $0
680	MOVL	count+0(FP), CX
681_aamloop:
682	AAM
683	LOOP	_aamloop
684	RET
685
686/*
687 * Floating point.
688 * Note: the encodings for the FCLEX, FINIT, FSAVE, FSTCW, FSENV and FSTSW
689 * instructions do NOT have the WAIT prefix byte (i.e. they act like their
690 * FNxxx variations) so WAIT instructions must be explicitly placed in the
691 * code as necessary.
692 */
693#define	FPOFF(l)						 ;\
694	MOVL	CR0, AX 					 ;\
695	ANDL	$0xC, AX			/* EM, TS */	 ;\
696	CMPL	AX, $0x8					 ;\
697	JEQ 	l						 ;\
698	WAIT							 ;\
699l:								 ;\
700	MOVL	CR0, AX						 ;\
701	ANDL	$~0x4, AX			/* EM=0 */	 ;\
702	ORL	$0x28, AX			/* NE=1, TS=1 */ ;\
703	MOVL	AX, CR0
704
705#define	FPON							 ;\
706	MOVL	CR0, AX						 ;\
707	ANDL	$~0xC, AX			/* EM=0, TS=0 */ ;\
708	MOVL	AX, CR0
709
710TEXT fpoff(SB), $0				/* disable */
711	FPOFF(l1)
712	RET
713
714TEXT fpinit(SB), $0				/* enable and init */
715	FPON
716	FINIT
717	WAIT
718	/* setfcr(FPPDBL|FPRNR|FPINVAL|FPZDIV|FPOVFL) */
719	/* note that low 6 bits are masks, not enables, on this chip */
720	PUSHW	$0x0232
721	FLDCW	0(SP)
722	POPW	AX
723	WAIT
724	RET
725
726TEXT fpsave(SB), $0				/* save state and disable */
727	MOVL	p+0(FP), AX
728	FSAVE	0(AX)				/* no WAIT */
729	FPOFF(l2)
730	RET
731
732TEXT fprestore(SB), $0				/* enable and restore state */
733	FPON
734	MOVL	p+0(FP), AX
735	FRSTOR	0(AX)
736	WAIT
737	RET
738
739TEXT fpstatus(SB), $0				/* get floating point status */
740	FSTSW	AX
741	RET
742
743TEXT fpenv(SB), $0				/* save state without waiting */
744	MOVL	p+0(FP), AX
745	FSTENV	0(AX)
746	RET
747
748TEXT fpclear(SB), $0				/* clear pending exceptions */
749	FPON
750	FCLEX					/* no WAIT */
751	FPOFF(l3)
752	RET
753
754/*
755 */
756TEXT splhi(SB), $0
757shi:
758	PUSHFL
759	POPL	AX
760	TESTL	$0x200, AX
761	JZ	alreadyhi
762	MOVL	$(MACHADDR+0x04), CX 		/* save PC in m->splpc */
763	MOVL	(SP), BX
764	MOVL	BX, (CX)
765alreadyhi:
766	CLI
767	RET
768
769TEXT spllo(SB), $0
770slo:
771	PUSHFL
772	POPL	AX
773	TESTL	$0x200, AX
774	JNZ	alreadylo
775	MOVL	$(MACHADDR+0x04), CX		/* clear m->splpc */
776	MOVL	$0, (CX)
777alreadylo:
778	STI
779	RET
780
781TEXT splx(SB), $0
782	MOVL	s+0(FP), AX
783	TESTL	$0x200, AX
784	JNZ	slo
785	JMP	shi
786
787TEXT spldone(SB), $0
788	RET
789
790TEXT islo(SB), $0
791	PUSHFL
792	POPL	AX
793	ANDL	$0x200, AX			/* interrupt enable flag */
794	RET
795
796/*
797 * Test-And-Set
798 */
799TEXT tas(SB), $0
800	MOVL	$0xDEADDEAD, AX
801	MOVL	lock+0(FP), BX
802	XCHGL	AX, (BX)			/* lock->key */
803	RET
804
805TEXT _xinc(SB), $0				/* void _xinc(long*); */
806	MOVL	l+0(FP), AX
807	LOCK;	INCL 0(AX)
808	RET
809
810TEXT _xdec(SB), $0				/* long _xdec(long*); */
811	MOVL	l+0(FP), BX
812	XORL	AX, AX
813	LOCK;	DECL 0(BX)
814	JLT	_xdeclt
815	JGT	_xdecgt
816	RET
817_xdecgt:
818	INCL	AX
819	RET
820_xdeclt:
821	DECL	AX
822	RET
823
824TEXT mb386(SB), $0
825	POPL	AX				/* return PC */
826	PUSHFL
827	PUSHL	CS
828	PUSHL	AX
829	IRETL
830
831TEXT mb586(SB), $0
832	XORL	AX, AX
833	CPUID
834	RET
835
836TEXT xchgw(SB), $0
837	MOVL	v+4(FP), AX
838	MOVL	p+0(FP), BX
839	XCHGW	AX, (BX)
840	RET
841
842TEXT cmpswap486(SB), $0
843	MOVL	addr+0(FP), BX
844	MOVL	old+4(FP), AX
845	MOVL	new+8(FP), CX
846	LOCK
847	BYTE $0x0F; BYTE $0xB1; BYTE $0x0B	/* CMPXCHGL CX, (BX) */
848	JNZ didnt
849	MOVL	$1, AX
850	RET
851didnt:
852	XORL	AX,AX
853	RET
854
855TEXT mul64fract(SB), $0
856/*
857 * Multiply two 64-bit number s and keep the middle 64 bits from the 128-bit result
858 * See ../port/tod.c for motivation.
859 */
860	MOVL	r+0(FP), CX
861	XORL	BX, BX				/* BX = 0 */
862
863	MOVL	a+8(FP), AX
864	MULL	b+16(FP)			/* a1*b1 */
865	MOVL	AX, 4(CX)			/* r2 = lo(a1*b1) */
866
867	MOVL	a+8(FP), AX
868	MULL	b+12(FP)			/* a1*b0 */
869	MOVL	AX, 0(CX)			/* r1 = lo(a1*b0) */
870	ADDL	DX, 4(CX)			/* r2 += hi(a1*b0) */
871
872	MOVL	a+4(FP), AX
873	MULL	b+16(FP)			/* a0*b1 */
874	ADDL	AX, 0(CX)			/* r1 += lo(a0*b1) */
875	ADCL	DX, 4(CX)			/* r2 += hi(a0*b1) + carry */
876
877	MOVL	a+4(FP), AX
878	MULL	b+12(FP)			/* a0*b0 */
879	ADDL	DX, 0(CX)			/* r1 += hi(a0*b0) */
880	ADCL	BX, 4(CX)			/* r2 += carry */
881	RET
882
883/*
884 *  label consists of a stack pointer and a PC
885 */
886TEXT gotolabel(SB), $0
887	MOVL	label+0(FP), AX
888	MOVL	0(AX), SP			/* restore sp */
889	MOVL	4(AX), AX			/* put return pc on the stack */
890	MOVL	AX, 0(SP)
891	MOVL	$1, AX				/* return 1 */
892	RET
893
894TEXT setlabel(SB), $0
895	MOVL	label+0(FP), AX
896	MOVL	SP, 0(AX)			/* store sp */
897	MOVL	0(SP), BX			/* store return pc */
898	MOVL	BX, 4(AX)
899	MOVL	$0, AX				/* return 0 */
900	RET
901
902/*
903 * Attempt at power saving. -rsc
904 */
905TEXT halt(SB), $0
906	CLI
907	CMPL	nrdy(SB), $0
908	JEQ	_nothingready
909	STI
910	RET
911
912_nothingready:
913	STI
914	HLT
915	RET
916
917/*
918 * Interrupt/exception handling.
919 * Each entry in the vector table calls either _strayintr or _strayintrx depending
920 * on whether an error code has been automatically pushed onto the stack
921 * (_strayintrx) or not, in which case a dummy entry must be pushed before retrieving
922 * the trap type from the vector table entry and placing it on the stack as part
923 * of the Ureg structure.
924 * The size of each entry in the vector table (6 bytes) is known in trapinit().
925 */
926TEXT _strayintr(SB), $0
927	PUSHL	AX			/* save AX */
928	MOVL	4(SP), AX		/* return PC from vectortable(SB) */
929	JMP	intrcommon
930
931TEXT _strayintrx(SB), $0
932	XCHGL	AX, (SP)		/* swap AX with vectortable CALL PC */
933intrcommon:
934	PUSHL	DS			/* save DS */
935	PUSHL	$(KDSEL)
936	POPL	DS			/* fix up DS */
937	MOVBLZX	(AX), AX		/* trap type -> AX */
938	XCHGL	AX, 4(SP)		/* exchange trap type with saved AX */
939
940	PUSHL	ES			/* save ES */
941	PUSHL	$(KDSEL)
942	POPL	ES			/* fix up ES */
943
944	PUSHL	FS			/* save the rest of the Ureg struct */
945	PUSHL	GS
946	PUSHAL
947
948	PUSHL	SP			/* Ureg* argument to trap */
949	CALL	trap(SB)
950
951TEXT forkret(SB), $0
952	POPL	AX
953	POPAL
954	POPL	GS
955	POPL	FS
956	POPL	ES
957	POPL	DS
958	ADDL	$8, SP			/* pop error code and trap type */
959	IRETL
960
961TEXT vectortable(SB), $0
962	CALL _strayintr(SB); BYTE $0x00		/* divide error */
963	CALL _strayintr(SB); BYTE $0x01		/* debug exception */
964	CALL _strayintr(SB); BYTE $0x02		/* NMI interrupt */
965	CALL _strayintr(SB); BYTE $0x03		/* breakpoint */
966	CALL _strayintr(SB); BYTE $0x04		/* overflow */
967	CALL _strayintr(SB); BYTE $0x05		/* bound */
968	CALL _strayintr(SB); BYTE $0x06		/* invalid opcode */
969	CALL _strayintr(SB); BYTE $0x07		/* no coprocessor available */
970	CALL _strayintrx(SB); BYTE $0x08	/* double fault */
971	CALL _strayintr(SB); BYTE $0x09		/* coprocessor segment overflow */
972	CALL _strayintrx(SB); BYTE $0x0A	/* invalid TSS */
973	CALL _strayintrx(SB); BYTE $0x0B	/* segment not available */
974	CALL _strayintrx(SB); BYTE $0x0C	/* stack exception */
975	CALL _strayintrx(SB); BYTE $0x0D	/* general protection error */
976	CALL _strayintrx(SB); BYTE $0x0E	/* page fault */
977	CALL _strayintr(SB); BYTE $0x0F		/*  */
978	CALL _strayintr(SB); BYTE $0x10		/* coprocessor error */
979	CALL _strayintrx(SB); BYTE $0x11	/* alignment check */
980	CALL _strayintr(SB); BYTE $0x12		/* machine check */
981	CALL _strayintr(SB); BYTE $0x13
982	CALL _strayintr(SB); BYTE $0x14
983	CALL _strayintr(SB); BYTE $0x15
984	CALL _strayintr(SB); BYTE $0x16
985	CALL _strayintr(SB); BYTE $0x17
986	CALL _strayintr(SB); BYTE $0x18
987	CALL _strayintr(SB); BYTE $0x19
988	CALL _strayintr(SB); BYTE $0x1A
989	CALL _strayintr(SB); BYTE $0x1B
990	CALL _strayintr(SB); BYTE $0x1C
991	CALL _strayintr(SB); BYTE $0x1D
992	CALL _strayintr(SB); BYTE $0x1E
993	CALL _strayintr(SB); BYTE $0x1F
994	CALL _strayintr(SB); BYTE $0x20		/* VectorLAPIC */
995	CALL _strayintr(SB); BYTE $0x21
996	CALL _strayintr(SB); BYTE $0x22
997	CALL _strayintr(SB); BYTE $0x23
998	CALL _strayintr(SB); BYTE $0x24
999	CALL _strayintr(SB); BYTE $0x25
1000	CALL _strayintr(SB); BYTE $0x26
1001	CALL _strayintr(SB); BYTE $0x27
1002	CALL _strayintr(SB); BYTE $0x28
1003	CALL _strayintr(SB); BYTE $0x29
1004	CALL _strayintr(SB); BYTE $0x2A
1005	CALL _strayintr(SB); BYTE $0x2B
1006	CALL _strayintr(SB); BYTE $0x2C
1007	CALL _strayintr(SB); BYTE $0x2D
1008	CALL _strayintr(SB); BYTE $0x2E
1009	CALL _strayintr(SB); BYTE $0x2F
1010	CALL _strayintr(SB); BYTE $0x30
1011	CALL _strayintr(SB); BYTE $0x31
1012	CALL _strayintr(SB); BYTE $0x32
1013	CALL _strayintr(SB); BYTE $0x33
1014	CALL _strayintr(SB); BYTE $0x34
1015	CALL _strayintr(SB); BYTE $0x35
1016	CALL _strayintr(SB); BYTE $0x36
1017	CALL _strayintr(SB); BYTE $0x37
1018	CALL _strayintr(SB); BYTE $0x38
1019	CALL _strayintr(SB); BYTE $0x39
1020	CALL _strayintr(SB); BYTE $0x3A
1021	CALL _strayintr(SB); BYTE $0x3B
1022	CALL _strayintr(SB); BYTE $0x3C
1023	CALL _strayintr(SB); BYTE $0x3D
1024	CALL _strayintr(SB); BYTE $0x3E
1025	CALL _strayintr(SB); BYTE $0x3F
1026	CALL _syscallintr(SB); BYTE $0x40	/* VectorSYSCALL */
1027	CALL _strayintr(SB); BYTE $0x41
1028	CALL _strayintr(SB); BYTE $0x42
1029	CALL _strayintr(SB); BYTE $0x43
1030	CALL _strayintr(SB); BYTE $0x44
1031	CALL _strayintr(SB); BYTE $0x45
1032	CALL _strayintr(SB); BYTE $0x46
1033	CALL _strayintr(SB); BYTE $0x47
1034	CALL _strayintr(SB); BYTE $0x48
1035	CALL _strayintr(SB); BYTE $0x49
1036	CALL _strayintr(SB); BYTE $0x4A
1037	CALL _strayintr(SB); BYTE $0x4B
1038	CALL _strayintr(SB); BYTE $0x4C
1039	CALL _strayintr(SB); BYTE $0x4D
1040	CALL _strayintr(SB); BYTE $0x4E
1041	CALL _strayintr(SB); BYTE $0x4F
1042	CALL _strayintr(SB); BYTE $0x50
1043	CALL _strayintr(SB); BYTE $0x51
1044	CALL _strayintr(SB); BYTE $0x52
1045	CALL _strayintr(SB); BYTE $0x53
1046	CALL _strayintr(SB); BYTE $0x54
1047	CALL _strayintr(SB); BYTE $0x55
1048	CALL _strayintr(SB); BYTE $0x56
1049	CALL _strayintr(SB); BYTE $0x57
1050	CALL _strayintr(SB); BYTE $0x58
1051	CALL _strayintr(SB); BYTE $0x59
1052	CALL _strayintr(SB); BYTE $0x5A
1053	CALL _strayintr(SB); BYTE $0x5B
1054	CALL _strayintr(SB); BYTE $0x5C
1055	CALL _strayintr(SB); BYTE $0x5D
1056	CALL _strayintr(SB); BYTE $0x5E
1057	CALL _strayintr(SB); BYTE $0x5F
1058	CALL _strayintr(SB); BYTE $0x60
1059	CALL _strayintr(SB); BYTE $0x61
1060	CALL _strayintr(SB); BYTE $0x62
1061	CALL _strayintr(SB); BYTE $0x63
1062	CALL _strayintr(SB); BYTE $0x64
1063	CALL _strayintr(SB); BYTE $0x65
1064	CALL _strayintr(SB); BYTE $0x66
1065	CALL _strayintr(SB); BYTE $0x67
1066	CALL _strayintr(SB); BYTE $0x68
1067	CALL _strayintr(SB); BYTE $0x69
1068	CALL _strayintr(SB); BYTE $0x6A
1069	CALL _strayintr(SB); BYTE $0x6B
1070	CALL _strayintr(SB); BYTE $0x6C
1071	CALL _strayintr(SB); BYTE $0x6D
1072	CALL _strayintr(SB); BYTE $0x6E
1073	CALL _strayintr(SB); BYTE $0x6F
1074	CALL _strayintr(SB); BYTE $0x70
1075	CALL _strayintr(SB); BYTE $0x71
1076	CALL _strayintr(SB); BYTE $0x72
1077	CALL _strayintr(SB); BYTE $0x73
1078	CALL _strayintr(SB); BYTE $0x74
1079	CALL _strayintr(SB); BYTE $0x75
1080	CALL _strayintr(SB); BYTE $0x76
1081	CALL _strayintr(SB); BYTE $0x77
1082	CALL _strayintr(SB); BYTE $0x78
1083	CALL _strayintr(SB); BYTE $0x79
1084	CALL _strayintr(SB); BYTE $0x7A
1085	CALL _strayintr(SB); BYTE $0x7B
1086	CALL _strayintr(SB); BYTE $0x7C
1087	CALL _strayintr(SB); BYTE $0x7D
1088	CALL _strayintr(SB); BYTE $0x7E
1089	CALL _strayintr(SB); BYTE $0x7F
1090	CALL _strayintr(SB); BYTE $0x80		/* Vector[A]PIC */
1091	CALL _strayintr(SB); BYTE $0x81
1092	CALL _strayintr(SB); BYTE $0x82
1093	CALL _strayintr(SB); BYTE $0x83
1094	CALL _strayintr(SB); BYTE $0x84
1095	CALL _strayintr(SB); BYTE $0x85
1096	CALL _strayintr(SB); BYTE $0x86
1097	CALL _strayintr(SB); BYTE $0x87
1098	CALL _strayintr(SB); BYTE $0x88
1099	CALL _strayintr(SB); BYTE $0x89
1100	CALL _strayintr(SB); BYTE $0x8A
1101	CALL _strayintr(SB); BYTE $0x8B
1102	CALL _strayintr(SB); BYTE $0x8C
1103	CALL _strayintr(SB); BYTE $0x8D
1104	CALL _strayintr(SB); BYTE $0x8E
1105	CALL _strayintr(SB); BYTE $0x8F
1106	CALL _strayintr(SB); BYTE $0x90
1107	CALL _strayintr(SB); BYTE $0x91
1108	CALL _strayintr(SB); BYTE $0x92
1109	CALL _strayintr(SB); BYTE $0x93
1110	CALL _strayintr(SB); BYTE $0x94
1111	CALL _strayintr(SB); BYTE $0x95
1112	CALL _strayintr(SB); BYTE $0x96
1113	CALL _strayintr(SB); BYTE $0x97
1114	CALL _strayintr(SB); BYTE $0x98
1115	CALL _strayintr(SB); BYTE $0x99
1116	CALL _strayintr(SB); BYTE $0x9A
1117	CALL _strayintr(SB); BYTE $0x9B
1118	CALL _strayintr(SB); BYTE $0x9C
1119	CALL _strayintr(SB); BYTE $0x9D
1120	CALL _strayintr(SB); BYTE $0x9E
1121	CALL _strayintr(SB); BYTE $0x9F
1122	CALL _strayintr(SB); BYTE $0xA0
1123	CALL _strayintr(SB); BYTE $0xA1
1124	CALL _strayintr(SB); BYTE $0xA2
1125	CALL _strayintr(SB); BYTE $0xA3
1126	CALL _strayintr(SB); BYTE $0xA4
1127	CALL _strayintr(SB); BYTE $0xA5
1128	CALL _strayintr(SB); BYTE $0xA6
1129	CALL _strayintr(SB); BYTE $0xA7
1130	CALL _strayintr(SB); BYTE $0xA8
1131	CALL _strayintr(SB); BYTE $0xA9
1132	CALL _strayintr(SB); BYTE $0xAA
1133	CALL _strayintr(SB); BYTE $0xAB
1134	CALL _strayintr(SB); BYTE $0xAC
1135	CALL _strayintr(SB); BYTE $0xAD
1136	CALL _strayintr(SB); BYTE $0xAE
1137	CALL _strayintr(SB); BYTE $0xAF
1138	CALL _strayintr(SB); BYTE $0xB0
1139	CALL _strayintr(SB); BYTE $0xB1
1140	CALL _strayintr(SB); BYTE $0xB2
1141	CALL _strayintr(SB); BYTE $0xB3
1142	CALL _strayintr(SB); BYTE $0xB4
1143	CALL _strayintr(SB); BYTE $0xB5
1144	CALL _strayintr(SB); BYTE $0xB6
1145	CALL _strayintr(SB); BYTE $0xB7
1146	CALL _strayintr(SB); BYTE $0xB8
1147	CALL _strayintr(SB); BYTE $0xB9
1148	CALL _strayintr(SB); BYTE $0xBA
1149	CALL _strayintr(SB); BYTE $0xBB
1150	CALL _strayintr(SB); BYTE $0xBC
1151	CALL _strayintr(SB); BYTE $0xBD
1152	CALL _strayintr(SB); BYTE $0xBE
1153	CALL _strayintr(SB); BYTE $0xBF
1154	CALL _strayintr(SB); BYTE $0xC0
1155	CALL _strayintr(SB); BYTE $0xC1
1156	CALL _strayintr(SB); BYTE $0xC2
1157	CALL _strayintr(SB); BYTE $0xC3
1158	CALL _strayintr(SB); BYTE $0xC4
1159	CALL _strayintr(SB); BYTE $0xC5
1160	CALL _strayintr(SB); BYTE $0xC6
1161	CALL _strayintr(SB); BYTE $0xC7
1162	CALL _strayintr(SB); BYTE $0xC8
1163	CALL _strayintr(SB); BYTE $0xC9
1164	CALL _strayintr(SB); BYTE $0xCA
1165	CALL _strayintr(SB); BYTE $0xCB
1166	CALL _strayintr(SB); BYTE $0xCC
1167	CALL _strayintr(SB); BYTE $0xCD
1168	CALL _strayintr(SB); BYTE $0xCE
1169	CALL _strayintr(SB); BYTE $0xCF
1170	CALL _strayintr(SB); BYTE $0xD0
1171	CALL _strayintr(SB); BYTE $0xD1
1172	CALL _strayintr(SB); BYTE $0xD2
1173	CALL _strayintr(SB); BYTE $0xD3
1174	CALL _strayintr(SB); BYTE $0xD4
1175	CALL _strayintr(SB); BYTE $0xD5
1176	CALL _strayintr(SB); BYTE $0xD6
1177	CALL _strayintr(SB); BYTE $0xD7
1178	CALL _strayintr(SB); BYTE $0xD8
1179	CALL _strayintr(SB); BYTE $0xD9
1180	CALL _strayintr(SB); BYTE $0xDA
1181	CALL _strayintr(SB); BYTE $0xDB
1182	CALL _strayintr(SB); BYTE $0xDC
1183	CALL _strayintr(SB); BYTE $0xDD
1184	CALL _strayintr(SB); BYTE $0xDE
1185	CALL _strayintr(SB); BYTE $0xDF
1186	CALL _strayintr(SB); BYTE $0xE0
1187	CALL _strayintr(SB); BYTE $0xE1
1188	CALL _strayintr(SB); BYTE $0xE2
1189	CALL _strayintr(SB); BYTE $0xE3
1190	CALL _strayintr(SB); BYTE $0xE4
1191	CALL _strayintr(SB); BYTE $0xE5
1192	CALL _strayintr(SB); BYTE $0xE6
1193	CALL _strayintr(SB); BYTE $0xE7
1194	CALL _strayintr(SB); BYTE $0xE8
1195	CALL _strayintr(SB); BYTE $0xE9
1196	CALL _strayintr(SB); BYTE $0xEA
1197	CALL _strayintr(SB); BYTE $0xEB
1198	CALL _strayintr(SB); BYTE $0xEC
1199	CALL _strayintr(SB); BYTE $0xED
1200	CALL _strayintr(SB); BYTE $0xEE
1201	CALL _strayintr(SB); BYTE $0xEF
1202	CALL _strayintr(SB); BYTE $0xF0
1203	CALL _strayintr(SB); BYTE $0xF1
1204	CALL _strayintr(SB); BYTE $0xF2
1205	CALL _strayintr(SB); BYTE $0xF3
1206	CALL _strayintr(SB); BYTE $0xF4
1207	CALL _strayintr(SB); BYTE $0xF5
1208	CALL _strayintr(SB); BYTE $0xF6
1209	CALL _strayintr(SB); BYTE $0xF7
1210	CALL _strayintr(SB); BYTE $0xF8
1211	CALL _strayintr(SB); BYTE $0xF9
1212	CALL _strayintr(SB); BYTE $0xFA
1213	CALL _strayintr(SB); BYTE $0xFB
1214	CALL _strayintr(SB); BYTE $0xFC
1215	CALL _strayintr(SB); BYTE $0xFD
1216	CALL _strayintr(SB); BYTE $0xFE
1217	CALL _strayintr(SB); BYTE $0xFF
1218