xref: /netbsd-src/sys/arch/arm/arm32/cpuswitch.S (revision 81e0d2b0af8485d94ed5da487d4253841a2e6e45)
1/*	$NetBSD: cpuswitch.S,v 1.41 2003/11/15 08:44:18 scw Exp $	*/
2
3/*
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37/*
38 * Copyright (c) 1994-1998 Mark Brinicombe.
39 * Copyright (c) 1994 Brini.
40 * All rights reserved.
41 *
42 * This code is derived from software written for Brini by Mark Brinicombe
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 *    notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 *    notice, this list of conditions and the following disclaimer in the
51 *    documentation and/or other materials provided with the distribution.
52 * 3. All advertising materials mentioning features or use of this software
53 *    must display the following acknowledgement:
54 *	This product includes software developed by Brini.
55 * 4. The name of the company nor the name of the author may be used to
56 *    endorse or promote products derived from this software without specific
57 *    prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
60 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
61 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
62 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
63 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
64 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
65 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * RiscBSD kernel project
72 *
73 * cpuswitch.S
74 *
75 * cpu switching functions
76 *
77 * Created      : 15/10/94
78 */
79
80#include "opt_armfpe.h"
81#include "opt_arm32_pmap.h"
82#include "opt_multiprocessor.h"
83#include "opt_lockdebug.h"
84
85#include "assym.h"
86#include <machine/param.h>
87#include <machine/cpu.h>
88#include <machine/frame.h>
89#include <machine/asm.h>
90
91/* LINTSTUB: include <sys/param.h> */
92
93#undef IRQdisable
94#undef IRQenable
95
96/*
97 * New experimental definitions of IRQdisable and IRQenable
98 * These keep FIQ's enabled since FIQ's are special.
99 */
100
101#define IRQdisable \
102	mrs	r14, cpsr ; \
103	orr	r14, r14, #(I32_bit) ; \
104	msr	cpsr_c, r14 ; \
105
106#define IRQenable \
107	mrs	r14, cpsr ; \
108	bic	r14, r14, #(I32_bit) ; \
109	msr	cpsr_c, r14 ; \
110
111/*
112 * These are used for switching the translation table/DACR.
113 * Since the vector page can be invalid for a short time, we must
114 * disable both regular IRQs *and* FIQs.
115 *
116 * XXX: This is not necessary if the vector table is relocated.
117 */
118#define IRQdisableALL \
119	mrs	r14, cpsr ; \
120	orr	r14, r14, #(I32_bit | F32_bit) ; \
121	msr	cpsr_c, r14
122
123#define IRQenableALL \
124	mrs	r14, cpsr ; \
125	bic	r14, r14, #(I32_bit | F32_bit) ; \
126	msr	cpsr_c, r14
127
128	.text
129
130.Lwhichqs:
131	.word	_C_LABEL(sched_whichqs)
132
133.Lqs:
134	.word	_C_LABEL(sched_qs)
135
136/*
137 * cpuswitch()
138 *
139 * preforms a process context switch.
140 * This function has several entry points
141 */
142
143#ifdef MULTIPROCESSOR
144.Lcpu_info_store:
145	.word	_C_LABEL(cpu_info_store)
146.Lcurlwp:
147	/* FIXME: This is bogus in the general case. */
148	.word	_C_LABEL(cpu_info_store) + CI_CURLWP
149
150.Lcurpcb:
151	.word	_C_LABEL(cpu_info_store) + CI_CURPCB
152#else
153.Lcurlwp:
154	.word	_C_LABEL(curlwp)
155
156.Lcurpcb:
157	.word	_C_LABEL(curpcb)
158#endif
159
160.Lwant_resched:
161	.word	_C_LABEL(want_resched)
162
163.Lcpufuncs:
164	.word	_C_LABEL(cpufuncs)
165
166#ifndef MULTIPROCESSOR
167	.data
168	.global	_C_LABEL(curpcb)
169_C_LABEL(curpcb):
170	.word	0x00000000
171	.text
172#endif
173
174.Lblock_userspace_access:
175	.word	_C_LABEL(block_userspace_access)
176
177.Lcpu_do_powersave:
178	.word	_C_LABEL(cpu_do_powersave)
179
180.Lpmap_kernel_cstate:
181	.word	(kernel_pmap_store + PMAP_CSTATE)
182
183.Llast_cache_state_ptr:
184	.word	_C_LABEL(pmap_cache_state)
185
186/*
187 * Idle loop, exercised while waiting for a process to wake up.
188 *
189 * NOTE: When we jump back to .Lswitch_search, we must have a
190 * pointer to whichqs in r7, which is what it is when we arrive
191 * here.
192 */
193/* LINTSTUB: Ignore */
194ASENTRY_NP(idle)
195	ldr	r6, .Lcpu_do_powersave
196	IRQenable			/* Enable interrupts */
197	ldr	r6, [r6]		/* r6 = cpu_do_powersave */
198
199#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
200	bl	_C_LABEL(sched_unlock_idle)
201#endif
202
203	/* Drop to spl0 (returns the current spl level in r0). */
204#ifdef __NEWINTR
205	mov	r0, #(IPL_NONE)
206	bl	_C_LABEL(_spllower)
207#else /* ! __NEWINTR */
208	mov	r0, #(_SPL_0)
209	bl	_C_LABEL(splx)
210#endif /* __NEWINTR */
211
212	teq	r6, #0			/* cpu_do_powersave non zero? */
213	ldrne	r6, .Lcpufuncs
214	mov	r4, r0			/* Old interrupt level to r4 */
215	ldrne	r6, [r6, #(CF_SLEEP)]
216
217	/*
218	 * Main idle loop.
219	 * r6 points to power-save idle function if required, else NULL.
220	 */
2211:	ldr	r3, [r7]		/* r3 = sched_whichqs */
222	teq	r3, #0
223	bne	2f			/* We have work to do */
224	teq	r6, #0			/* Powersave idle? */
225	beq	1b			/* Nope. Just sit-n-spin. */
226
227	/*
228	 * Before going into powersave idle mode, disable interrupts
229	 * and check sched_whichqs one more time.
230	 */
231	IRQdisableALL
232	ldr	r3, [r7]
233	mov	r0, #0
234	teq	r3, #0			/* sched_whichqs still zero? */
235	moveq	lr, pc
236	moveq	pc, r6			/* If so, do powersave idle */
237	IRQenableALL
238	b	1b			/* Back around */
239
240	/*
241	 * sched_whichqs indicates that at least one lwp is ready to run.
242	 * Restore the original interrupt priority level, grab the
243	 * scheduler lock if necessary, and jump back into cpu_switch.
244	 */
2452:	mov	r0, r4
246#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
247	bl	_C_LABEL(splx)
248	adr	lr, .Lswitch_search
249	b	_C_LABEL(sched_lock_idle)
250#else
251	adr	lr, .Lswitch_search
252	b	_C_LABEL(splx)
253#endif
254
255
256/*
257 * Find a new lwp to run, save the current context and
258 * load the new context
259 *
260 * Arguments:
261 *	r0	'struct lwp *' of the current LWP
262 */
263
264ENTRY(cpu_switch)
265/*
266 * Local register usage. Some of these registers are out of date.
267 * r1 = oldlwp
268 * r2 = spl level
269 * r3 = whichqs
270 * r4 = queue
271 * r5 = &qs[queue]
272 * r6 = newlwp
273 * r7 = scratch
274 */
275	stmfd	sp!, {r4-r7, lr}
276
277	/*
278	 * Indicate that there is no longer a valid process (curlwp = 0).
279	 * Zero the current PCB pointer while we're at it.
280	 */
281	ldr	r7, .Lcurlwp
282	ldr	r6, .Lcurpcb
283	mov	r2, #0x00000000
284	str	r2, [r7]		/* curproc = NULL */
285	str	r2, [r6]		/* curpcb = NULL */
286
287	/* stash the old proc while we call functions */
288	mov	r5, r0
289
290	/* First phase : find a new lwp */
291	ldr	r7, .Lwhichqs
292
293	/* rem: r5 = old lwp */
294	/* rem: r7 = &whichqs */
295
296.Lswitch_search:
297	IRQdisable
298
299	/* Do we have any active queues  */
300	ldr	r3, [r7]
301
302	/* If not we must idle until we do. */
303	teq	r3, #0x00000000
304	beq	_ASM_LABEL(idle)
305
306	/* put old proc back in r1 */
307	mov	r1, r5
308
309	/* rem: r1 = old lwp */
310	/* rem: r3 = whichqs */
311	/* rem: interrupts are disabled */
312
313	/* used further down, saves SA stall */
314	ldr	r6, .Lqs
315
316	/*
317	 * We have found an active queue. Currently we do not know which queue
318	 * is active just that one of them is.
319	 */
320	/* Non-Xscale version of the ffs algorithm devised by d.seal and
321	 * posted to comp.sys.arm on 16 Feb 1994.
322	 */
323 	rsb	r5, r3, #0
324 	ands	r0, r3, r5
325
326#ifndef __XSCALE__
327	adr	r5, .Lcpu_switch_ffs_table
328
329				    /* X = R0 */
330	orr	r4, r0, r0, lsl #4  /* r4 = X * 0x11 */
331	orr	r4, r4, r4, lsl #6  /* r4 = X * 0x451 */
332	rsb	r4, r4, r4, lsl #16 /* r4 = X * 0x0450fbaf */
333
334	/* now lookup in table indexed on top 6 bits of a4 */
335	ldrb	r4, [ r5, r4, lsr #26 ]
336
337#else	/* __XSCALE__ */
338	clz	r4, r0
339	rsb	r4, r4, #31
340#endif	/* __XSCALE__ */
341
342	/* rem: r0 = bit mask of chosen queue (1 << r4) */
343	/* rem: r1 = old lwp */
344	/* rem: r3 = whichqs */
345	/* rem: r4 = queue number */
346	/* rem: interrupts are disabled */
347
348	/* Get the address of the queue (&qs[queue]) */
349	add	r5, r6, r4, lsl #3
350
351	/*
352	 * Get the lwp from the queue and place the next process in
353	 * the queue at the head. This basically unlinks the lwp at
354	 * the head of the queue.
355	 */
356	ldr	r6, [r5, #(L_FORW)]
357
358#ifdef DIAGNOSTIC
359	cmp	r6, r5
360	beq	.Lswitch_bogons
361#endif
362
363	/* rem: r6 = new lwp */
364	ldr	r7, [r6, #(L_FORW)]
365	str	r7, [r5, #(L_FORW)]
366
367	/*
368	 * Test to see if the queue is now empty. If the head of the queue
369	 * points to the queue itself then there are no more lwps in
370	 * the queue. We can therefore clear the queue not empty flag held
371	 * in r3.
372	 */
373
374	teq	r5, r7
375	biceq	r3, r3, r0
376
377	/* rem: r0 = bit mask of chosen queue (1 << r4) - NOT NEEDED AN MORE */
378
379	/* Fix the back pointer for the lwp now at the head of the queue. */
380	ldr	r0, [r6, #(L_BACK)]
381	str	r0, [r7, #(L_BACK)]
382
383	/* Update the RAM copy of the queue not empty flags word. */
384	ldreq	r7, .Lwhichqs
385	streq	r3, [r7]
386
387	/* rem: r1 = old lwp */
388	/* rem: r3 = whichqs - NOT NEEDED ANY MORE */
389	/* rem: r4 = queue number - NOT NEEDED ANY MORE */
390	/* rem: r6 = new lwp */
391	/* rem: interrupts are disabled */
392
393	/* Clear the want_resched flag */
394	ldr	r7, .Lwant_resched
395	mov	r0, #0x00000000
396	str	r0, [r7]
397
398	/*
399	 * Clear the back pointer of the lwp we have removed from
400	 * the head of the queue. The new lwp is isolated now.
401	 */
402	str	r0, [r6, #(L_BACK)]
403
404#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
405	/*
406	 * unlock the sched_lock, but leave interrupts off, for now.
407	 */
408	mov	r7, r1
409	bl	_C_LABEL(sched_unlock_idle)
410	mov	r1, r7
411#endif
412
413
414.Lswitch_resume:
415	/* rem: r1 = old lwp */
416	/* rem: r4 = return value [not used if came from cpu_switchto()] */
417	/* rem: r6 = new process */
418	/* rem: interrupts are disabled */
419
420#ifdef MULTIPROCESSOR
421	/* XXX use curcpu() */
422	ldr	r0, .Lcpu_info_store
423	str	r0, [r6, #(L_CPU)]
424#else
425	/* l->l_cpu initialized in fork1() for single-processor */
426#endif
427
428	/* Process is now on a processor. */
429	mov	r0, #LSONPROC			/* l->l_stat = LSONPROC */
430	str	r0, [r6, #(L_STAT)]
431
432	/* We have a new curlwp now so make a note it */
433	ldr	r7, .Lcurlwp
434	str	r6, [r7]
435
436	/* Hook in a new pcb */
437	ldr	r7, .Lcurpcb
438	ldr	r0, [r6, #(L_ADDR)]
439	str	r0, [r7]
440
441	/* At this point we can allow IRQ's again. */
442	IRQenable
443
444	/* rem: r1 = old lwp */
445	/* rem: r4 = return value */
446	/* rem: r6 = new process */
447	/* rem: interrupts are enabled */
448
449	/*
450	 * If the new process is the same as the process that called
451	 * cpu_switch() then we do not need to save and restore any
452	 * contexts. This means we can make a quick exit.
453	 * The test is simple if curlwp on entry (now in r1) is the
454	 * same as the proc removed from the queue we can jump to the exit.
455	 */
456	teq	r1, r6
457	moveq	r4, #0x00000000		/* default to "didn't switch" */
458	beq	.Lswitch_return
459
460	/*
461	 * At this point, we are guaranteed to be switching to
462	 * a new lwp.
463	 */
464	mov	r4, #0x00000001
465
466	/* Remember the old lwp in r0 */
467	mov	r0, r1
468
469	/*
470	 * If the old lwp on entry to cpu_switch was zero then the
471	 * process that called it was exiting. This means that we do
472	 * not need to save the current context. Instead we can jump
473	 * straight to restoring the context for the new process.
474	 */
475	teq	r0, #0x00000000
476	beq	.Lswitch_exited
477
478	/* rem: r0 = old lwp */
479	/* rem: r4 = return value */
480	/* rem: r6 = new process */
481	/* rem: interrupts are enabled */
482
483	/* Stage two : Save old context */
484
485	/* Get the user structure for the old lwp. */
486	ldr	r1, [r0, #(L_ADDR)]
487
488	/* Save all the registers in the old lwp's pcb */
489#ifndef __XSCALE__
490	add	r7, r1, #(PCB_R8)
491	stmia	r7, {r8-r13}
492#else
493	strd	r8, [r1, #(PCB_R8)]
494	strd	r10, [r1, #(PCB_R10)]
495	strd	r12, [r1, #(PCB_R12)]
496#endif
497
498	/*
499	 * NOTE: We can now use r8-r13 until it is time to restore
500	 * them for the new process.
501	 */
502
503	/* Remember the old PCB. */
504	mov	r8, r1
505
506	/* r1 now free! */
507
508	/* Get the user structure for the new process in r9 */
509	ldr	r9, [r6, #(L_ADDR)]
510
511	/*
512	 * This can be optimised... We know we want to go from SVC32
513	 * mode to UND32 mode
514	 */
515        mrs	r3, cpsr
516	bic	r2, r3, #(PSR_MODE)
517	orr	r2, r2, #(PSR_UND32_MODE | I32_bit)
518        msr	cpsr_c, r2
519
520	str	sp, [r8, #(PCB_UND_SP)]
521
522        msr	cpsr_c, r3		/* Restore the old mode */
523
524	/* rem: r0 = old lwp */
525	/* rem: r4 = return value */
526	/* rem: r6 = new process */
527	/* rem: r8 = old PCB */
528	/* rem: r9 = new PCB */
529	/* rem: interrupts are enabled */
530
531	/* What else needs to be saved  Only FPA stuff when that is supported */
532
533	/* Third phase : restore saved context */
534
535	/* rem: r0 = old lwp */
536	/* rem: r4 = return value */
537	/* rem: r6 = new lwp */
538	/* rem: r8 = old PCB */
539	/* rem: r9 = new PCB */
540	/* rem: interrupts are enabled */
541
542	/*
543	 * Get the new L1 table pointer into r11.  If we're switching to
544	 * an LWP with the same address space as the outgoing one, we can
545	 * skip the cache purge and the TTB load.
546	 *
547	 * To avoid data dep stalls that would happen anyway, we try
548	 * and get some useful work done in the mean time.
549	 */
550	ldr	r10, [r8, #(PCB_PAGEDIR)]	/* r10 = old L1 */
551	ldr	r11, [r9, #(PCB_PAGEDIR)]	/* r11 = new L1 */
552
553	ldr	r0, [r8, #(PCB_DACR)]		/* r0 = old DACR */
554	ldr	r1, [r9, #(PCB_DACR)]		/* r1 = new DACR */
555	ldr	r8, [r9, #(PCB_CSTATE)]		/* r8 = &new_pmap->pm_cstate */
556	ldr	r5, .Llast_cache_state_ptr	/* Previous thread's cstate */
557
558	teq	r10, r11			/* Same L1? */
559	ldr	r5, [r5]
560	cmpeq	r0, r1				/* Same DACR? */
561	beq	.Lcs_context_switched		/* yes! */
562
563	ldr	r3, .Lblock_userspace_access
564	mov	r12, #0
565	cmp	r5, #0				/* No last vm? (switch_exit) */
566	beq	.Lcs_cache_purge_skipped	/* No, we can skip cache flsh */
567
568	mov	r2, #DOMAIN_CLIENT
569	cmp	r1, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */
570	beq	.Lcs_cache_purge_skipped	/* Yup. Don't flush cache */
571
572	cmp	r5, r8				/* Same userland VM space? */
573	ldrneb	r12, [r5, #(CS_CACHE_ID)]	/* Last VM space cache state */
574
575	/*
576	 * We're definately switching to a new userland VM space,
577	 * and the previous userland VM space has yet to be flushed
578	 * from the cache/tlb.
579	 *
580	 * r12 holds the previous VM space's cs_cache_id state
581	 */
582	tst	r12, #0xff			/* Test cs_cache_id */
583	beq	.Lcs_cache_purge_skipped	/* VM space is not in cache */
584
585	/*
586	 * Definately need to flush the cache.
587	 * Mark the old VM space as NOT being resident in the cache.
588	 */
589	mov	r2, #0x00000000
590	strb	r2, [r5, #(CS_CACHE_ID)]
591	strb	r2, [r5, #(CS_CACHE_D)]
592
593	/*
594	 * Don't allow user space access between the purge and the switch.
595	 */
596	mov	r2, #0x00000001
597	str	r2, [r3]
598
599	stmfd	sp!, {r0-r3}
600	ldr	r1, .Lcpufuncs
601	mov	lr, pc
602	ldr	pc, [r1, #CF_IDCACHE_WBINV_ALL]
603	ldmfd	sp!, {r0-r3}
604
605.Lcs_cache_purge_skipped:
606	/* rem: r1 = new DACR */
607	/* rem: r3 = &block_userspace_access */
608	/* rem: r4 = return value */
609	/* rem: r5 = &old_pmap->pm_cstate (or NULL) */
610	/* rem: r6 = new lwp */
611	/* rem: r8 = &new_pmap->pm_cstate */
612	/* rem: r9 = new PCB */
613	/* rem: r10 = old L1 */
614	/* rem: r11 = new L1 */
615
616	mov	r2, #0x00000000
617	ldr	r7, [r9, #(PCB_PL1VEC)]
618
619	/*
620	 * At this point we need to kill IRQ's again.
621	 *
622	 * XXXSCW: Don't need to block FIQs if vectors have been relocated
623	 */
624	IRQdisableALL
625
626	/*
627	 * Interrupts are disabled so we can allow user space accesses again
628	 * as none will occur until interrupts are re-enabled after the
629	 * switch.
630	 */
631	str	r2, [r3]
632
633	/*
634	 * Ensure the vector table is accessible by fixing up the L1
635	 */
636	cmp	r7, #0			/* No need to fixup vector table? */
637	ldrne	r2, [r7]		/* But if yes, fetch current value */
638	ldrne	r0, [r9, #(PCB_L1VEC)]	/* Fetch new vector_page value */
639	mcr	p15, 0, r1, c3, c0, 0	/* Update DACR for new context */
640	cmpne	r2, r0			/* Stuffing the same value? */
641#ifndef PMAP_INCLUDE_PTE_SYNC
642	strne	r0, [r7]		/* Nope, update it */
643#else
644	beq	.Lcs_same_vector
645	str	r0, [r7]		/* Otherwise, update it */
646
647	/*
648	 * Need to sync the cache to make sure that last store is
649	 * visible to the MMU.
650	 */
651	ldr	r2, .Lcpufuncs
652	mov	r0, r7
653	mov	r1, #4
654	mov	lr, pc
655	ldr	pc, [r2, #CF_DCACHE_WB_RANGE]
656
657.Lcs_same_vector:
658#endif /* PMAP_INCLUDE_PTE_SYNC */
659
660	cmp	r10, r11		/* Switching to the same L1? */
661	ldr	r10, .Lcpufuncs
662	beq	.Lcs_same_l1		/* Yup. */
663
664	/*
665	 * Do a full context switch, including full TLB flush.
666	 */
667	mov	r0, r11
668	mov	lr, pc
669	ldr	pc, [r10, #CF_CONTEXT_SWITCH]
670
671	/*
672	 * Mark the old VM space as NOT being resident in the TLB
673	 */
674	mov	r2, #0x00000000
675	cmp	r5, #0
676	strneh	r2, [r5, #(CS_TLB_ID)]
677	b	.Lcs_context_switched
678
679	/*
680	 * We're switching to a different process in the same L1.
681	 * In this situation, we only need to flush the TLB for the
682	 * vector_page mapping, and even then only if r7 is non-NULL.
683	 */
684.Lcs_same_l1:
685	cmp	r7, #0
686	movne	r0, #0			/* We *know* vector_page's VA is 0x0 */
687	movne	lr, pc
688	ldrne	pc, [r10, #CF_TLB_FLUSHID_SE]
689
690.Lcs_context_switched:
691	/* rem: r8 = &new_pmap->pm_cstate */
692
693	/* XXXSCW: Safe to re-enable FIQs here */
694
695	/*
696	 * The new VM space is live in the cache and TLB.
697	 * Update its cache/tlb state, and if it's not the kernel
698	 * pmap, update the 'last cache state' pointer.
699	 */
700	mov	r2, #-1
701	ldr	r5, .Lpmap_kernel_cstate
702	ldr	r0, .Llast_cache_state_ptr
703	str	r2, [r8, #(CS_ALL)]
704	cmp	r5, r8
705	strne	r8, [r0]
706
707	/* rem: r4 = return value */
708	/* rem: r6 = new lwp */
709	/* rem: r9 = new PCB */
710
711	/*
712	 * This can be optimised... We know we want to go from SVC32
713	 * mode to UND32 mode
714	 */
715        mrs	r3, cpsr
716	bic	r2, r3, #(PSR_MODE)
717	orr	r2, r2, #(PSR_UND32_MODE)
718        msr	cpsr_c, r2
719
720	ldr	sp, [r9, #(PCB_UND_SP)]
721
722        msr	cpsr_c, r3		/* Restore the old mode */
723
724	/* Restore all the save registers */
725#ifndef __XSCALE__
726	add	r7, r9, #PCB_R8
727	ldmia	r7, {r8-r13}
728
729	sub	r7, r7, #PCB_R8		/* restore PCB pointer */
730#else
731	mov	r7, r9
732	ldr	r8, [r7, #(PCB_R8)]
733	ldr	r9, [r7, #(PCB_R9)]
734	ldr	r10, [r7, #(PCB_R10)]
735	ldr	r11, [r7, #(PCB_R11)]
736	ldr	r12, [r7, #(PCB_R12)]
737	ldr	r13, [r7, #(PCB_SP)]
738#endif
739
740	ldr	r5, [r6, #(L_PROC)]	/* fetch the proc for below */
741
742	/* rem: r4 = return value */
743	/* rem: r5 = new lwp's proc */
744	/* rem: r6 = new lwp */
745	/* rem: r7 = new pcb */
746
747#ifdef ARMFPE
748	add	r0, r7, #(USER_SIZE) & 0x00ff
749	add	r0, r0, #(USER_SIZE) & 0xff00
750	bl	_C_LABEL(arm_fpe_core_changecontext)
751#endif
752
753	/* We can enable interrupts again */
754	IRQenableALL
755
756	/* rem: r4 = return value */
757	/* rem: r5 = new lwp's proc */
758	/* rem: r6 = new lwp */
759	/* rem: r7 = new PCB */
760
761	/*
762	 * Check for restartable atomic sequences (RAS).
763	 */
764
765	ldr	r2, [r5, #(P_RASLIST)]
766	ldr	r1, [r7, #(PCB_TF)]	/* r1 = trapframe (used below) */
767	teq	r2, #0			/* p->p_nras == 0? */
768	bne	.Lswitch_do_ras		/* no, check for one */
769
770.Lswitch_return:
771	/* cpu_switch returns 1 == switched, 0 == didn't switch */
772	mov	r0, r4
773
774	/*
775	 * Pull the registers that got pushed when either savectx() or
776	 * cpu_switch() was called and return.
777	 */
778	ldmfd	sp!, {r4-r7, pc}
779
780.Lswitch_do_ras:
781	ldr	r1, [r1, #(TF_PC)]	/* second ras_lookup() arg */
782	mov	r0, r5			/* first ras_lookup() arg */
783	bl	_C_LABEL(ras_lookup)
784	cmn	r0, #1			/* -1 means "not in a RAS" */
785	ldrne	r1, [r7, #(PCB_TF)]
786	strne	r0, [r1, #(TF_PC)]
787	b	.Lswitch_return
788
789.Lswitch_exited:
790	/*
791	 * We skip the cache purge because switch_exit() already did it.
792	 * Load up registers the way .Lcs_cache_purge_skipped expects.
793	 * Userpsace access already blocked by switch_exit().
794	 */
795	ldr	r9, [r6, #(L_ADDR)]		/* r9 = new PCB */
796	ldr	r3, .Lblock_userspace_access
797	mrc	p15, 0, r10, c2, c0, 0		/* r10 = old L1 */
798	mov	r5, #0				/* No previous cache state */
799	ldr	r1, [r9, #(PCB_DACR)]		/* r1 = new DACR */
800	ldr	r8, [r9, #(PCB_CSTATE)]		/* r8 = new cache state */
801	ldr	r11, [r9, #(PCB_PAGEDIR)]	/* r11 = new L1 */
802	b	.Lcs_cache_purge_skipped
803
804
805#ifdef DIAGNOSTIC
806.Lswitch_bogons:
807	adr	r0, .Lswitch_panic_str
808	bl	_C_LABEL(panic)
8091:	nop
810	b	1b
811
812.Lswitch_panic_str:
813	.asciz	"cpu_switch: sched_qs empty with non-zero sched_whichqs!\n"
814#endif
815
816/*
817 * cpu_switchto(struct lwp *current, struct lwp *next)
818 * Switch to the specified next LWP
819 * Arguments:
820 *
821 *	r0	'struct lwp *' of the current LWP
822 *	r1	'struct lwp *' of the LWP to switch to
823 */
824ENTRY(cpu_switchto)
825	stmfd	sp!, {r4-r7, lr}
826
827	mov	r6, r1		/* save new lwp */
828
829#if defined(LOCKDEBUG)
830	mov	r5, r0		/* save old lwp */
831	bl	_C_LABEL(sched_unlock_idle)
832	mov	r1, r5
833#else
834	mov	r1, r0
835#endif
836
837	IRQdisable
838
839	/*
840	 * Okay, set up registers the way cpu_switch() wants them,
841	 * and jump into the middle of it (where we bring up the
842	 * new process).
843	 *
844	 * r1 = old lwp (r6 = new lwp)
845	 */
846	b	.Lswitch_resume
847
848/*
849 * void switch_exit(struct lwp *l, struct lwp *l0, void (*exit)(struct lwp *));
850 * Switch to lwp0's saved context and deallocate the address space and kernel
851 * stack for l.  Then jump into cpu_switch(), as if we were in lwp0 all along.
852 */
853
854/* LINTSTUB: Func: void switch_exit(struct lwp *l, struct lwp *l0, void (*func)(struct lwp *)) */
855ENTRY(switch_exit)
856	/*
857	 * The process is going away, so we can use callee-saved
858	 * registers here without having to save them.
859	 */
860
861	mov	r4, r0
862	ldr	r0, .Lcurlwp
863
864	mov	r5, r1
865	ldr	r1, .Lblock_userspace_access
866
867	mov	r6, r2
868
869	/*
870	 * r4 = lwp
871	 * r5 = lwp0
872	 * r6 = exit func
873	 */
874
875	mov	r2, #0x00000000		/* curlwp = NULL */
876	str	r2, [r0]
877
878	/*
879	 * We're about to clear both the cache and the TLB.
880	 * Make sure to zap the 'last cache state' pointer since the
881	 * pmap might be about to go away. Also ensure the outgoing
882	 * VM space's cache state is marked as NOT resident in the
883	 * cache, and that lwp0's cache state IS resident.
884	 */
885	ldr	r7, [r4, #(L_ADDR)]		/* r7 = old lwp's PCB */
886	ldr	r0, .Llast_cache_state_ptr	/* Last userland cache state */
887	ldr	r9, [r7, #(PCB_CSTATE)]		/* Fetch cache state pointer */
888	ldr	r3, [r5, #(L_ADDR)]		/* r3 = lwp0's PCB */
889	str	r2, [r0]			/* No previous cache state */
890	str	r2, [r9, #(CS_ALL)]		/* Zap old lwp's cache state */
891	ldr	r3, [r3, #(PCB_CSTATE)]		/* lwp0's cache state */
892	mov	r2, #-1
893	str	r2, [r3, #(CS_ALL)]		/* lwp0 is in da cache! */
894
895	/*
896	 * Don't allow user space access between the purge and the switch.
897	 */
898	mov	r2, #0x00000001
899	str	r2, [r1]
900
901	/* Switch to lwp0 context */
902
903	ldr	r9, .Lcpufuncs
904	mov	lr, pc
905	ldr	pc, [r9, #CF_IDCACHE_WBINV_ALL]
906
907	ldr	r0, [r7, #(PCB_PL1VEC)]
908	ldr	r1, [r7, #(PCB_DACR)]
909
910	/*
911	 * r0 = Pointer to L1 slot for vector_page (or NULL)
912	 * r1 = lwp0's DACR
913	 * r4 = lwp we're switching from
914	 * r5 = lwp0
915	 * r6 = exit func
916	 * r7 = lwp0's PCB
917	 * r9 = cpufuncs
918	 */
919
920	IRQdisableALL
921
922	/*
923	 * Ensure the vector table is accessible by fixing up lwp0's L1
924	 */
925	cmp	r0, #0			/* No need to fixup vector table? */
926	ldrne	r3, [r0]		/* But if yes, fetch current value */
927	ldrne	r2, [r7, #(PCB_L1VEC)]	/* Fetch new vector_page value */
928	mcr	p15, 0, r1, c3, c0, 0	/* Update DACR for lwp0's context */
929	cmpne	r3, r2			/* Stuffing the same value? */
930	strne	r2, [r0]		/* Store if not. */
931
932#ifdef PMAP_INCLUDE_PTE_SYNC
933	/*
934	 * Need to sync the cache to make sure that last store is
935	 * visible to the MMU.
936	 */
937	movne	r1, #4
938	movne	lr, pc
939	ldrne	pc, [r9, #CF_DCACHE_WB_RANGE]
940#endif /* PMAP_INCLUDE_PTE_SYNC */
941
942	/*
943	 * Note: We don't do the same optimisation as cpu_switch() with
944	 * respect to avoiding flushing the TLB if we're switching to
945	 * the same L1 since this process' VM space may be about to go
946	 * away, so we don't want *any* turds left in the TLB.
947	 */
948
949	/* Switch the memory to the new process */
950	ldr	r0, [r7, #(PCB_PAGEDIR)]
951	mov	lr, pc
952	ldr	pc, [r9, #CF_CONTEXT_SWITCH]
953
954	ldr	r0, .Lcurpcb
955
956	/* Restore all the save registers */
957#ifndef __XSCALE__
958	add	r1, r7, #PCB_R8
959	ldmia	r1, {r8-r13}
960#else
961	ldr	r8, [r7, #(PCB_R8)]
962	ldr	r9, [r7, #(PCB_R9)]
963	ldr	r10, [r7, #(PCB_R10)]
964	ldr	r11, [r7, #(PCB_R11)]
965	ldr	r12, [r7, #(PCB_R12)]
966	ldr	r13, [r7, #(PCB_SP)]
967#endif
968	str	r7, [r0]	/* curpcb = lwp0's PCB */
969
970	IRQenableALL
971
972	/*
973	 * Schedule the vmspace and stack to be freed.
974	 */
975	mov	r0, r4			/* {lwp_}exit2(l) */
976	mov	lr, pc
977	mov	pc, r6
978
979#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
980	bl	_C_LABEL(sched_lock_idle)
981#endif
982
983	ldr	r7, .Lwhichqs		/* r7 = &whichqs */
984	mov	r5, #0x00000000		/* r5 = old lwp = NULL */
985	b	.Lswitch_search
986
987/* LINTSTUB: Func: void savectx(struct pcb *pcb) */
988ENTRY(savectx)
989	/*
990	 * r0 = pcb
991	 */
992
993	/* Push registers.*/
994	stmfd	sp!, {r4-r7, lr}
995
996	/* Store all the registers in the process's pcb */
997#ifndef __XSCALE__
998	add	r2, r0, #(PCB_R8)
999	stmia	r2, {r8-r13}
1000#else
1001	strd	r8, [r0, #(PCB_R8)]
1002	strd	r10, [r0, #(PCB_R10)]
1003	strd	r12, [r0, #(PCB_R12)]
1004#endif
1005
1006	/* Pull the regs of the stack */
1007	ldmfd	sp!, {r4-r7, pc}
1008
1009ENTRY(proc_trampoline)
1010#ifdef __NEWINTR
1011	mov	r0, #(IPL_NONE)
1012	bl	_C_LABEL(_spllower)
1013#else /* ! __NEWINTR */
1014	mov	r0, #(_SPL_0)
1015	bl	_C_LABEL(splx)
1016#endif /* __NEWINTR */
1017
1018#ifdef MULTIPROCESSOR
1019	bl	_C_LABEL(proc_trampoline_mp)
1020#endif
1021	mov	r0, r5
1022	mov	r1, sp
1023	mov	lr, pc
1024	mov	pc, r4
1025
1026	/* Kill irq's */
1027        mrs     r0, cpsr
1028        orr     r0, r0, #(I32_bit)
1029        msr     cpsr_c, r0
1030
1031	PULLFRAME
1032
1033	movs	pc, lr			/* Exit */
1034
1035#ifndef __XSCALE__
1036	.type .Lcpu_switch_ffs_table, _ASM_TYPE_OBJECT;
1037.Lcpu_switch_ffs_table:
1038/* same as ffs table but all nums are -1 from that */
1039/*               0   1   2   3   4   5   6   7           */
1040	.byte	 0,  0,  1, 12,  2,  6,  0, 13  /*  0- 7 */
1041	.byte	 3,  0,  7,  0,  0,  0,  0, 14  /*  8-15 */
1042	.byte	10,  4,  0,  0,  8,  0,  0, 25  /* 16-23 */
1043	.byte	 0,  0,  0,  0,  0, 21, 27, 15  /* 24-31 */
1044	.byte	31, 11,  5,  0,  0,  0,  0,  0	/* 32-39 */
1045	.byte	 9,  0,  0, 24,  0,  0, 20, 26  /* 40-47 */
1046	.byte	30,  0,  0,  0,  0, 23,  0, 19  /* 48-55 */
1047	.byte   29,  0, 22, 18, 28, 17, 16,  0  /* 56-63 */
1048#endif	/* !__XSCALE_ */
1049