xref: /netbsd-src/sys/arch/arm/arm/cpufunc_asm_armv7.S (revision 710ed8dbfdeb24431d92512526c4f6179108b0c6)
1/*-
2 * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
14 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
15 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23 * POSSIBILITY OF SUCH DAMAGE.
24 */
25
26/* ARMv7 assembly functions for manipulating caches and other core functions.
27 * Based on cpufuncs for v6 and xscale.
28 */
29
30#include "assym.h"
31#include <machine/asm.h>
32#include <arm/locore.h>
33
34	.arch	armv7a
35
36ENTRY(armv7_cpu_sleep)
37	dsb
38	wfi				@ wait for an interrupt
39	b	irq_idle_entry		@ assume we got an interrupt
40END(armv7_cpu_sleep)
41
42ENTRY(armv7_wait)
43	mrc	p15, 0, r0, c2, c0, 0	@ arbitrary read of CP15
44	add	r0, r0, #0		@ a stall
45	bx	lr
46END(armv7_wait)
47
48ENTRY(armv7_context_switch)
49	dsb				@ data synchronization barrier
50	mrc	p15, 0, ip, c0, c0, 5	@ get MPIDR
51	cmp	ip, #0
52	orrlt	r0, r0, #TTBR_MPATTR	@ MP, cachable (Normal WB)
53	orrge	r0, r0, #TTBR_UPATTR	@ Non-MP, cacheable, normal WB
54	mcr	p15, 0, r0, c2, c0, 0 	@ set the new TTBR 0
55#ifdef ARM_MMU_EXTENDED
56	cmp	r1, #0
57	mcreq	p15, 0, r0, c2, c0, 1   @ set the new TTBR 1
58#else
59	mcr	p15, 0, r0, c8, c7, 0	@ flush the I+D
60#endif
61	dsb
62	isb
63	bx	lr
64END(armv7_context_switch)
65
66#ifdef ARM_MMU_EXTENDED_XXX
67ENTRY(armv7up_tlb_flushID_ASID)
68	mcr	p15, 0, r0, c8, c7, 2	@ flush I+D tlb all ASID
69	dsb				@ data synchronization barrier
70	isb
71	bx	lr
72END(armv7up_tlb_flushID_ASID)
73
74#ifdef MULTIPROCESSOR
75ENTRY(armv7mp_tlb_flushID_ASID)
76	mcr	p15, 0, r0, c8, c3, 2	@ flush I+D tlb all ASID
77	dsb				@ data synchronization barrier
78	isb
79	bx	lr
80END(armv7mp_tlb_flushID_ASID)
81#endif
82#endif
83
84STRONG_ALIAS(armv7up_tlb_flushD_SE, armv7up_tlb_flushID_SE)
85STRONG_ALIAS(armv7up_tlb_flushI_SE, armv7up_tlb_flushID_SE)
86ENTRY(armv7up_tlb_flushID_SE)
87	bfc	r0, #0, #12		@ Always KERNEL_PID, i.e. 0
88	mcr	p15, 0, r0, c8, c7, 1	@ flush I+D tlb single entry
89#if PAGE_SIZE == 2*L2_S_SIZE
90	add	r0, r0, #L2_S_SIZE
91	mcr	p15, 0, r0, c8, c7, 1	@ flush I+D tlb single entry
92#endif
93	dsb				@ data synchronization barrier
94	isb
95	bx	lr
96END(armv7up_tlb_flushID_SE)
97
98#ifdef MULTIPROCESSOR
99STRONG_ALIAS(armv7mp_tlb_flushD_SE, armv7mp_tlb_flushID_SE)
100STRONG_ALIAS(armv7mp_tlb_flushI_SE, armv7mp_tlb_flushID_SE)
101ENTRY(armv7mp_tlb_flushID_SE)
102	bfc	r0, #0, #12		@ Always KERNEL_PID, i.e. 0
103	mcr	p15, 0, r0, c8, c3, 1	@ flush I+D tlb single entry
104#if PAGE_SIZE == 2*L2_S_SIZE
105	add	r0, r0, #L2_S_SIZE
106	mcr	p15, 0, r0, c8, c3, 1	@ flush I+D tlb single entry
107#endif
108	dsb				@ data synchronization barrier
109	isb
110	bx	lr
111END(armv7mp_tlb_flushID_SE)
112#endif
113
114#ifdef MULTIPROCESSOR
115STRONG_ALIAS(armv7mp_tlb_flushD, armv7up_tlb_flushD)
116#endif
117ENTRY(armv7up_tlb_flushD)
118	mov	r0, #0
119	mcr	p15, 0, r0, c8, c6, 0	@ flush entire D tlb
120	dsb				@ data synchronization barrier
121	isb
122	bx      lr
123END(armv7up_tlb_flushD)
124
125STRONG_ALIAS(armv7up_tlb_flushI, armv7up_tlb_flushID)
126ENTRY(armv7up_tlb_flushID)
127	dsb
128	mov	r0, #0
129	mcr	p15, 0, r0, c8, c7, 0	@ flush entire I+D tlb
130	mcr	p15, 0, r0, c7, c5, 6	@ branch predictor invalidate
131	dsb				@ data synchronization barrier
132	isb
133	bx      lr
134END(armv7up_tlb_flushID)
135
136#ifdef MULTIPROCESSOR
137STRONG_ALIAS(armv7mp_tlb_flushI, armv7mp_tlb_flushID)
138ENTRY(armv7mp_tlb_flushID)
139	dsb
140	mov	r0, #0
141	mcr	p15, 0, r0, c8, c3, 0	@ flush entire I+D tlb, IS
142	mcr	p15, 0, r0, c7, c1, 6	@ branch predictor invalidate, IS
143	dsb				@ data synchronization barrier
144	isb
145	bx      lr
146END(armv7mp_tlb_flushID)
147#endif
148
149ENTRY_NP(armv7_setttb)
150	mrc	p15, 0, ip, c0, c0, 5	@ get MPIDR
151	cmp	ip, #0
152	orrlt	r0, r0, #TTBR_MPATTR	@ MP, cachable (Normal WB)
153	orrge	r0, r0, #TTBR_UPATTR	@ Non-MP, cacheable, normal WB
154	mcr	p15, 0, r0, c2, c0, 0   @ load new TTBR 0
155#ifdef ARM_MMU_EXTENDED
156	cmp	r1, #0
157	mcreq	p15, 0, r0, c2, c0, 1   @ load new TTBR 1
158#else
159	mcr	p15, 0, r0, c8, c7, 0   @ invalidate all I+D TLBs
160#endif
161	dsb				@ data synchronization barrier
162	isb
163	bx	lr
164END(armv7_setttb)
165
166/* Other functions. */
167
168ENTRY_NP(armv7_drain_writebuf)
169	dsb				@ data synchronization barrier
170	RET
171END(armv7_drain_writebuf)
172
173/* Cache operations. */
174
175/* LINTSTUB: void armv7_icache_sync_range(vaddr_t, vsize_t); */
176ENTRY_NP(armv7_icache_sync_range)
177	mov	ip, #CPU_CSSR_InD
178	mcr	p15, 2, ip, c0, c0, 0	@ set cache level to L1-I
179	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
180	mov	ip, #0
181	mcr	p15, 2, ip, c0, c0, 0	@ set cache level to L1-D
182	mrc	p15, 1, r3, c0, c0, 0	@ read CCSIDR
183	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
184	and	r3, r3, #7		@ get line size (log2(size)-4, 0=16)
185	cmp	r2, r3			@ compare ilinesize to dlinesize
186	movgt	r2, r3			@ pick lesser of the two
187	mov	ip, #16			@ make a bit mask
188	lsl	r2, ip, r2		@ and shift into position
189	sub	ip, r2, #1		@ make into a mask
190	and	r3, r0, ip		@ get offset into cache line
191	add	r1, r1, r3		@ add to length
192	bic	r0, r0, ip		@ clear offset from start.
1931:
194	mcr	p15, 0, r0, c7, c10, 1	@ wb the D-Cache line
195	mcr	p15, 0, r0, c7, c5, 1	@ invalidate the I-Cache line
196	add	r0, r0, r2
197	subs	r1, r1, r2
198	bhi	1b
199
200	dsb				@ data synchronization barrier
201	isb
202	bx	lr
203END(armv7_icache_sync_range)
204
205/* LINTSTUB: void armv7_icache_sync_all(void); */
206ENTRY_NP(armv7_icache_sync_all)
207	/*
208	 * We assume that the code here can never be out of sync with the
209	 * dcache, so that we can safely flush the Icache and fall through
210	 * into the Dcache cleaning code.
211	 */
212	stmdb	sp!, {r0, lr}
213	bl	_C_LABEL(armv7_idcache_wbinv_all) @clean the D cache
214	ldmia	sp!, {r0, lr}
215	dsb				@ data synchronization barrier
216	isb
217	bx	lr
218END(armv7_icache_sync_all)
219
220ENTRY(armv7_dcache_wb_range)
221	mov	ip, #0
222	mcr	p15, 2, ip, c0, c0, 0	@ set cache level to L1
223	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
224	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
225	mov	ip, #16			@ make a bit mask
226	lsl	r2, ip, r2		@ and shift into position
227	sub	ip, r2, #1		@ make into a mask
228	and	r3, r0, ip		@ get offset into cache line
229	add	r1, r1, r3		@ add to length
230	bic	r0, r0, ip		@ clear offset from start.
231	dsb
2321:
233	mcr	p15, 0, r0, c7, c10, 1	@ wb the D-Cache to PoC
234	add	r0, r0, r2
235	subs	r1, r1, r2
236	bhi	1b
237	dsb				@ data synchronization barrier
238	bx	lr
239END(armv7_dcache_wb_range)
240
241/* LINTSTUB: void armv7_dcache_wbinv_range(vaddr_t, vsize_t); */
242ENTRY(armv7_dcache_wbinv_range)
243	mov	ip, #0
244	mcr	p15, 2, ip, c0, c0, 0	@ set cache level to L1
245	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
246	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
247	mov	ip, #16			@ make a bit mask
248	lsl	r2, ip, r2		@ and shift into position
249	sub	ip, r2, #1		@ make into a mask
250	and	r3, r0, ip		@ get offset into cache line
251	add	r1, r1, r3		@ add to length
252	bic	r0, r0, ip		@ clear offset from start.
253	dsb
2541:
255	mcr	p15, 0, r0, c7, c14, 1	@ wb and inv the D-Cache line to PoC
256	add	r0, r0, r2
257	subs	r1, r1, r2
258	bhi	1b
259	dsb				@ data synchronization barrier
260	bx	lr
261END(armv7_dcache_wbinv_range)
262
263/* * LINTSTUB: void armv7_dcache_inv_range(vaddr_t, vsize_t); */
264ENTRY(armv7_dcache_inv_range)
265	mov	ip, #0
266	mcr	p15, 2, ip, c0, c0, 0	@ set cache level to L1
267	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
268	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
269	mov	ip, #16			@ make a bit mask
270	lsl	r2, ip, r2		@ and shift into position
271	sub	ip, r2, #1		@ make into a mask
272	and	r3, r0, ip		@ get offset into cache line
273	add	r1, r1, r3		@ add to length
274	bic	r0, r0, ip		@ clear offset from start.
2751:
276	mcr	p15, 0, r0, c7, c6, 1	@ invalidate the D-Cache line
277	add	r0, r0, r2
278	subs	r1, r1, r2
279	bhi	1b
280
281	dsb				@ data synchronization barrier
282	bx	lr
283END(armv7_dcache_inv_range)
284
285
286/* * LINTSTUB: void armv7_idcache_wbinv_range(vaddr_t, vsize_t); */
287ENTRY(armv7_idcache_wbinv_range)
288	mov	ip, #0
289	mcr	p15, 2, ip, c0, c0, 0	@ set cache level to L1
290	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
291	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
292	mov	ip, #16			@ make a bit mask
293	lsl	r2, ip, r2		@ and shift into position
294	sub	ip, r2, #1		@ make into a mask
295	and	r3, r0, ip		@ get offset into cache line
296	add	r1, r1, r3		@ add to length
297	bic	r0, r0, ip		@ clear offset from start.
298	dsb
2991:
300	mcr	p15, 0, r0, c7, c5, 1	@ invalidate the I-Cache line
301	mcr	p15, 0, r0, c7, c14, 1 	@ wb and inv the D-Cache line
302	add	r0, r0, r2
303	subs	r1, r1, r2
304	bhi	1b
305
306	dsb				@ data synchronization barrier
307	isb
308	bx	lr
309END(armv7_idcache_wbinv_range)
310
311/* * LINTSTUB: void armv7_idcache_wbinv_all(void); */
312ENTRY_NP(armv7_idcache_wbinv_all)
313	/*
314	 * We assume that the code here can never be out of sync with the
315	 * dcache, so that we can safely flush the Icache and fall through
316	 * into the Dcache purging code.
317	 */
318	dmb
319	mcr	p15, 0, r0, c7, c5, 0
320	b	_C_LABEL(armv7_dcache_wbinv_all)
321END(armv7_idcache_wbinv_all)
322
323/*
324 * These work very hard to not push registers onto the stack
325 * and to limit themselves to use r0-r3 and ip.
326 */
327/* * LINTSTUB: void armv7_icache_inv_all(void); */
328ENTRY_NP(armv7_icache_inv_all)
329	mov	r0, #0
330	mcr	p15, 2, r0, c0, c0, 0	@ set cache level to L1
331	mrc	p15, 1, r0, c0, c0, 0	@ read CCSIDR
332
333	ubfx	r2, r0, #13, #15	@ get num sets - 1 from CCSIDR
334	ubfx	r3, r0, #3, #10		@ get numways - 1 from CCSIDR
335	clz	r1, r3			@ number of bits to MSB of way
336	lsl	r3, r3, r1		@ shift into position
337	mov	ip, #1			@
338	lsl	ip, ip, r1		@ ip now contains the way decr
339
340	ubfx	r0, r0, #0, #3		@ get linesize from CCSIDR
341	add	r0, r0, #4		@ apply bias
342	lsl	r2, r2, r0		@ shift sets by log2(linesize)
343	add	r3, r3, r2		@ merge numsets - 1 with numways - 1
344	sub	ip, ip, r2		@ subtract numsets - 1 from way decr
345	mov	r1, #1
346	lsl	r1, r1, r0		@ r1 now contains the set decr
347	mov	r2, ip			@ r2 now contains set way decr
348
349	/* r3 = ways/sets, r2 = way decr, r1 = set decr, r0 and ip are free */
3501:	mcr	p15, 0, r3, c7, c6, 2	@ DCISW (data cache invalidate by set/way)
351	movs	r0, r3			@ get current way/set
352	beq	2f			@ at 0 means we are done.
353	lsls	r0, r0, #10		@ clear way bits leaving only set bits
354	subne	r3, r3, r1		@ non-zero?, decrement set #
355	subeq	r3, r3, r2		@ zero?, decrement way # and restore set count
356	b	1b
357
3582:	dsb				@ wait for stores to finish
359	mov	r0, #0			@ and ...
360	mcr	p15, 0, r0, c7, c5, 0	@ invalidate L1 cache
361	isb				@ instruction sync barrier
362	bx	lr			@ return
363END(armv7_icache_inv_all)
364
365/* * LINTSTUB: void armv7_dcache_l1inv_all(void); */
366ENTRY_NP(armv7_dcache_l1inv_all)
367	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
368	and	r0, r0, #0x7		@ check L1
369	bxeq	lr			@ return if no L1 cache
370	mov	r3, #0			@ start with L1
371	mcr	p15, 2, r3, c0, c0, 0	@ select cache level
372	isb
373	mrc	p15, 1, r0, c0, c0, 0	@ read CCSIDR
374
375	ubfx	ip, r0, #0, #3		@ get linesize from CCSIDR
376	add	ip, ip, #4		@ apply bias
377	ubfx	r2, r0, #13, #15	@ get numsets - 1 from CCSIDR
378	lsl	r2, r2, ip		@ shift to set position
379	orr	r3, r3, r2		@ merge set into way/set/level
380	mov	r1, #1
381	lsl	r1, r1, ip		@ r1 = set decr
382
383	ubfx	ip, r0, #3, #10		@ get numways - 1 from [to be discarded] CCSIDR
384	clz	r2, ip			@ number of bits to MSB of way
385	lsl	ip, ip, r2		@ shift by that into way position
386	mov	r0, #1			@
387	lsl	r2, r0, r2		@ r2 now contains the way decr
388	mov	r0, r3 			@ get sets/level (no way yet)
389	orr	r3, r3, ip		@ merge way into way/set/level
390	bfc	r0, #0, #4		@ clear low 4 bits (level) to get numset - 1
391	sub	r2, r2, r0		@ subtract from way decr
392
393	/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
3941:	mcr	p15, 0, r3, c7, c6, 2	@ DCISW (data cache invalidate by set/way)
395	cmp	r3, #15			@ are we done with this level (way/set == 0)
396	bls	.Ldone_l1inv		@ yes, we've finished
397	ubfx	r0, r3, #4, #18		@ extract set bits
398	cmp	r0, #0			@ compare
399	subne	r3, r3, r1		@ non-zero?, decrement set #
400	subeq	r3, r3, r2		@ zero?, decrement way # and restore set count
401	b	1b
402
403.Ldone_l1inv:
404	dsb
405	mov	r0, #0			@ default back to cache level 0
406	mcr	p15, 2, r0, c0, c0, 0	@ select cache level
407	dsb
408	isb
409	bx	lr
410END(armv7_dcache_l1inv_all)
411
412/* * LINTSTUB: void armv7_dcache_inv_all(void); */
413ENTRY_NP(armv7_dcache_inv_all)
414	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
415	tst	r0, #0x07000000
416	beq	.Ldone_inv
417	mov	r3, #0			@ start with L1
418
419.Lstart_inv:
420	add	r2, r3, r3, lsr #1	@ r2 = level * 3 / 2
421	mov	r1, r0, lsr r2		@ r1 = cache type
422	tst	r1, #6			@ is it data or i&d?
423	beq	.Lnext_level_inv	@ nope, skip level
424
425	mcr	p15, 2, r3, c0, c0, 0	@ select cache level
426	isb
427	mrc	p15, 1, r0, c0, c0, 0	@ read CCSIDR
428
429	ubfx	ip, r0, #0, #3		@ get linesize from CCSIDR
430	add	ip, ip, #4		@ apply bias
431	ubfx	r2, r0, #13, #15	@ get numsets - 1 from CCSIDR
432	lsl	r2, r2, ip		@ shift to set position
433	orr	r3, r3, r2		@ merge set into way/set/level
434	mov	r1, #1
435	lsl	r1, r1, ip		@ r1 = set decr
436
437	ubfx	ip, r0, #3, #10		@ get numways - 1 from [to be discarded] CCSIDR
438	clz	r2, ip			@ number of bits to MSB of way
439	lsl	ip, ip, r2		@ shift by that into way position
440	mov	r0, #1			@
441	lsl	r2, r0, r2		@ r2 now contains the way decr
442	mov	r0, r3 			@ get sets/level (no way yet)
443	orr	r3, r3, ip		@ merge way into way/set/level
444	bfc	r0, #0, #4		@ clear low 4 bits (level) to get numset - 1
445	sub	r2, r2, r0		@ subtract from way decr
446
447	/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
4481:	mcr	p15, 0, r3, c7, c6, 2	@ DCISW (data cache invalidate by set/way)
449	cmp	r3, #15			@ are we done with this level (way/set == 0)
450	bls	.Lnext_level_inv	@ yes, go to next level
451	ubfx	r0, r3, #4, #18		@ extract set bits
452	cmp	r0, #0			@ compare
453	subne	r3, r3, r1		@ non-zero?, decrement set #
454	subeq	r3, r3, r2		@ zero?, decrement way # and restore set count
455	b	1b
456
457.Lnext_level_inv:
458	dsb
459	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
460	ubfx	ip, r0, #24, #3		@ narrow to LoC
461	add	r3, r3, #2		@ go to next level
462	cmp	r3, ip, lsl #1		@ compare
463	blt	.Lstart_inv		@ not done, next level (r0 == CLIDR)
464
465.Ldone_inv:
466	mov	r0, #0			@ default back to cache level 0
467	mcr	p15, 2, r0, c0, c0, 0	@ select cache level
468	dsb
469	isb
470	bx	lr
471END(armv7_dcache_inv_all)
472
473/* * LINTSTUB: void armv7_dcache_wbinv_all(void); */
474ENTRY_NP(armv7_dcache_wbinv_all)
475	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
476	tst	r0, #0x07000000
477	bxeq	lr
478	mov	r3, #0			@ start with L1
479
480.Lstart_wbinv:
481	add	r2, r3, r3, lsr #1	@ r2 = level * 3 / 2
482	mov	r1, r0, lsr r2		@ r1 = cache type
483	tst	r1, #6			@ is it unified or data?
484	beq	.Lnext_level_wbinv	@ nope, skip level
485
486	mcr	p15, 2, r3, c0, c0, 0	@ select cache level
487	isb
488	mrc	p15, 1, r0, c0, c0, 0	@ read CCSIDR
489
490	ubfx	ip, r0, #0, #3		@ get linesize from CCSIDR
491	add	ip, ip, #4		@ apply bias
492	ubfx	r2, r0, #13, #15	@ get numsets - 1 from CCSIDR
493	lsl	r2, r2, ip		@ shift to set position
494	orr	r3, r3, r2		@ merge set into way/set/level
495	mov	r1, #1
496	lsl	r1, r1, ip		@ r1 = set decr
497
498	ubfx	ip, r0, #3, #10		@ get numways - 1 from [to be discarded] CCSIDR
499	clz	r2, ip			@ number of bits to MSB of way
500	lsl	ip, ip, r2		@ shift by that into way position
501	mov	r0, #1			@
502	lsl	r2, r0, r2		@ r2 now contains the way decr
503	mov	r0, r3 			@ get sets/level (no way yet)
504	orr	r3, r3, ip		@ merge way into way/set/level
505	bfc	r0, #0, #4		@ clear low 4 bits (level) to get numset - 1
506	sub	r2, r2, r0		@ subtract from way decr
507
508	/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
5091:	mcr	p15, 0, r3, c7, c14, 2	@ DCCISW (data cache clean and invalidate by set/way)
510	cmp	r3, #15			@ are we done with this level (way/set == 0)
511	bls	.Lnext_level_wbinv	@ yes, go to next level
512	ubfx	r0, r3, #4, #18		@ extract set bits
513	cmp	r0, #0			@ compare
514	subne	r3, r3, r1		@ non-zero?, decrement set #
515	subeq	r3, r3, r2		@ zero?, decrement way # and restore set count
516	b	1b
517
518.Lnext_level_wbinv:
519	dsb
520	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
521	ubfx	ip, r0, #24, #3		@ narrow to LoC
522	add	r3, r3, #2		@ go to next level
523	cmp	r3, ip, lsl #1		@ compare
524	blt	.Lstart_wbinv		@ not done, next level (r0 == CLIDR)
525
526.Ldone_wbinv:
527	mov	r0, #0			@ default back to cache level 0
528	mcr	p15, 2, r0, c0, c0, 0	@ select cache level
529	dsb
530	isb
531	bx	lr
532END(armv7_dcache_wbinv_all)
533