xref: /netbsd-src/external/gpl3/gcc.old/dist/libgcc/config/sh/lib1funcs.S (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1/* Copyright (C) 1994-2020 Free Software Foundation, Inc.
2
3This file is free software; you can redistribute it and/or modify it
4under the terms of the GNU General Public License as published by the
5Free Software Foundation; either version 3, or (at your option) any
6later version.
7
8This file is distributed in the hope that it will be useful, but
9WITHOUT ANY WARRANTY; without even the implied warranty of
10MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11General Public License for more details.
12
13Under Section 7 of GPL version 3, you are granted additional
14permissions described in the GCC Runtime Library Exception, version
153.1, as published by the Free Software Foundation.
16
17You should have received a copy of the GNU General Public License and
18a copy of the GCC Runtime Library Exception along with this program;
19see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
20<http://www.gnu.org/licenses/>.  */
21
22
23!! libgcc routines for the Renesas / SuperH SH CPUs.
24!! Contributed by Steve Chamberlain.
25!! sac@cygnus.com
26
27!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28!! recoded in assembly by Toshiyasu Morita
29!! tm@netcom.com
30
31#if defined(__ELF__) && defined(__linux__)
32.section .note.GNU-stack,"",%progbits
33.previous
34#endif
35
36/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37   ELF local label prefixes by J"orn Rennecke
38   amylaar@cygnus.com  */
39
40#include "lib1funcs.h"
41
42/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43   so it is more convenient to define NO_FPSCR_VALUES here than to
44   define it on the command line.  */
45#if defined __vxworks && defined __PIC__
46#define NO_FPSCR_VALUES
47#endif
48
49#ifdef L_ashiftrt
50	.global	GLOBAL(ashiftrt_r4_0)
51	.global	GLOBAL(ashiftrt_r4_1)
52	.global	GLOBAL(ashiftrt_r4_2)
53	.global	GLOBAL(ashiftrt_r4_3)
54	.global	GLOBAL(ashiftrt_r4_4)
55	.global	GLOBAL(ashiftrt_r4_5)
56	.global	GLOBAL(ashiftrt_r4_6)
57	.global	GLOBAL(ashiftrt_r4_7)
58	.global	GLOBAL(ashiftrt_r4_8)
59	.global	GLOBAL(ashiftrt_r4_9)
60	.global	GLOBAL(ashiftrt_r4_10)
61	.global	GLOBAL(ashiftrt_r4_11)
62	.global	GLOBAL(ashiftrt_r4_12)
63	.global	GLOBAL(ashiftrt_r4_13)
64	.global	GLOBAL(ashiftrt_r4_14)
65	.global	GLOBAL(ashiftrt_r4_15)
66	.global	GLOBAL(ashiftrt_r4_16)
67	.global	GLOBAL(ashiftrt_r4_17)
68	.global	GLOBAL(ashiftrt_r4_18)
69	.global	GLOBAL(ashiftrt_r4_19)
70	.global	GLOBAL(ashiftrt_r4_20)
71	.global	GLOBAL(ashiftrt_r4_21)
72	.global	GLOBAL(ashiftrt_r4_22)
73	.global	GLOBAL(ashiftrt_r4_23)
74	.global	GLOBAL(ashiftrt_r4_24)
75	.global	GLOBAL(ashiftrt_r4_25)
76	.global	GLOBAL(ashiftrt_r4_26)
77	.global	GLOBAL(ashiftrt_r4_27)
78	.global	GLOBAL(ashiftrt_r4_28)
79	.global	GLOBAL(ashiftrt_r4_29)
80	.global	GLOBAL(ashiftrt_r4_30)
81	.global	GLOBAL(ashiftrt_r4_31)
82	.global	GLOBAL(ashiftrt_r4_32)
83
84	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
85	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
86	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
87	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
88	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
89	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
90	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
91	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
92	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
93	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
94	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
95	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
96	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
97	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
98	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
99	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
100	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
101	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
102	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
103	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
104	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
105	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
106	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
107	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
108	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
109	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
110	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
111	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
112	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
113	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
114	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
115	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
116	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
117
118	.align	1
119GLOBAL(ashiftrt_r4_32):
120GLOBAL(ashiftrt_r4_31):
121	rotcl	r4
122	rts
123	subc	r4,r4
124
125GLOBAL(ashiftrt_r4_30):
126	shar	r4
127GLOBAL(ashiftrt_r4_29):
128	shar	r4
129GLOBAL(ashiftrt_r4_28):
130	shar	r4
131GLOBAL(ashiftrt_r4_27):
132	shar	r4
133GLOBAL(ashiftrt_r4_26):
134	shar	r4
135GLOBAL(ashiftrt_r4_25):
136	shar	r4
137GLOBAL(ashiftrt_r4_24):
138	shlr16	r4
139	shlr8	r4
140	rts
141	exts.b	r4,r4
142
143GLOBAL(ashiftrt_r4_23):
144	shar	r4
145GLOBAL(ashiftrt_r4_22):
146	shar	r4
147GLOBAL(ashiftrt_r4_21):
148	shar	r4
149GLOBAL(ashiftrt_r4_20):
150	shar	r4
151GLOBAL(ashiftrt_r4_19):
152	shar	r4
153GLOBAL(ashiftrt_r4_18):
154	shar	r4
155GLOBAL(ashiftrt_r4_17):
156	shar	r4
157GLOBAL(ashiftrt_r4_16):
158	shlr16	r4
159	rts
160	exts.w	r4,r4
161
162GLOBAL(ashiftrt_r4_15):
163	shar	r4
164GLOBAL(ashiftrt_r4_14):
165	shar	r4
166GLOBAL(ashiftrt_r4_13):
167	shar	r4
168GLOBAL(ashiftrt_r4_12):
169	shar	r4
170GLOBAL(ashiftrt_r4_11):
171	shar	r4
172GLOBAL(ashiftrt_r4_10):
173	shar	r4
174GLOBAL(ashiftrt_r4_9):
175	shar	r4
176GLOBAL(ashiftrt_r4_8):
177	shar	r4
178GLOBAL(ashiftrt_r4_7):
179	shar	r4
180GLOBAL(ashiftrt_r4_6):
181	shar	r4
182GLOBAL(ashiftrt_r4_5):
183	shar	r4
184GLOBAL(ashiftrt_r4_4):
185	shar	r4
186GLOBAL(ashiftrt_r4_3):
187	shar	r4
188GLOBAL(ashiftrt_r4_2):
189	shar	r4
190GLOBAL(ashiftrt_r4_1):
191	rts
192	shar	r4
193
194GLOBAL(ashiftrt_r4_0):
195	rts
196	nop
197
198	ENDFUNC(GLOBAL(ashiftrt_r4_0))
199	ENDFUNC(GLOBAL(ashiftrt_r4_1))
200	ENDFUNC(GLOBAL(ashiftrt_r4_2))
201	ENDFUNC(GLOBAL(ashiftrt_r4_3))
202	ENDFUNC(GLOBAL(ashiftrt_r4_4))
203	ENDFUNC(GLOBAL(ashiftrt_r4_5))
204	ENDFUNC(GLOBAL(ashiftrt_r4_6))
205	ENDFUNC(GLOBAL(ashiftrt_r4_7))
206	ENDFUNC(GLOBAL(ashiftrt_r4_8))
207	ENDFUNC(GLOBAL(ashiftrt_r4_9))
208	ENDFUNC(GLOBAL(ashiftrt_r4_10))
209	ENDFUNC(GLOBAL(ashiftrt_r4_11))
210	ENDFUNC(GLOBAL(ashiftrt_r4_12))
211	ENDFUNC(GLOBAL(ashiftrt_r4_13))
212	ENDFUNC(GLOBAL(ashiftrt_r4_14))
213	ENDFUNC(GLOBAL(ashiftrt_r4_15))
214	ENDFUNC(GLOBAL(ashiftrt_r4_16))
215	ENDFUNC(GLOBAL(ashiftrt_r4_17))
216	ENDFUNC(GLOBAL(ashiftrt_r4_18))
217	ENDFUNC(GLOBAL(ashiftrt_r4_19))
218	ENDFUNC(GLOBAL(ashiftrt_r4_20))
219	ENDFUNC(GLOBAL(ashiftrt_r4_21))
220	ENDFUNC(GLOBAL(ashiftrt_r4_22))
221	ENDFUNC(GLOBAL(ashiftrt_r4_23))
222	ENDFUNC(GLOBAL(ashiftrt_r4_24))
223	ENDFUNC(GLOBAL(ashiftrt_r4_25))
224	ENDFUNC(GLOBAL(ashiftrt_r4_26))
225	ENDFUNC(GLOBAL(ashiftrt_r4_27))
226	ENDFUNC(GLOBAL(ashiftrt_r4_28))
227	ENDFUNC(GLOBAL(ashiftrt_r4_29))
228	ENDFUNC(GLOBAL(ashiftrt_r4_30))
229	ENDFUNC(GLOBAL(ashiftrt_r4_31))
230	ENDFUNC(GLOBAL(ashiftrt_r4_32))
231#endif
232
233#ifdef L_ashiftrt_n
234
235!
236! GLOBAL(ashrsi3)
237!
238! Entry:
239!
240! r4: Value to shift
241! r5: Shift count
242!
243! Exit:
244!
245! r0: Result
246!
247! Destroys:
248!
249! T bit, r5
250!
251
252	.global	GLOBAL(ashrsi3)
253	HIDDEN_FUNC(GLOBAL(ashrsi3))
254	.align	2
255GLOBAL(ashrsi3):
256	mov	#31,r0
257	and	r0,r5
258	mova	LOCAL(ashrsi3_table),r0
259	mov.b	@(r0,r5),r5
260#ifdef __sh1__
261	add	r5,r0
262	jmp	@r0
263#else
264	braf	r5
265#endif
266	mov	r4,r0
267
268	.align	2
269LOCAL(ashrsi3_table):
270	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
271	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
272	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
273	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
274	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
275	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
276	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
277	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
278	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
279	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
280	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
281	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
282	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
283	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
284	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
285	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
286	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
287	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
288	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
289	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
290	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
291	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
292	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
293	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
294	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
295	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
296	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
297	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
298	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
299	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
300	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
301	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
302
303LOCAL(ashrsi3_31):
304	rotcl	r0
305	rts
306	subc	r0,r0
307
308LOCAL(ashrsi3_30):
309	shar	r0
310LOCAL(ashrsi3_29):
311	shar	r0
312LOCAL(ashrsi3_28):
313	shar	r0
314LOCAL(ashrsi3_27):
315	shar	r0
316LOCAL(ashrsi3_26):
317	shar	r0
318LOCAL(ashrsi3_25):
319	shar	r0
320LOCAL(ashrsi3_24):
321	shlr16	r0
322	shlr8	r0
323	rts
324	exts.b	r0,r0
325
326LOCAL(ashrsi3_23):
327	shar	r0
328LOCAL(ashrsi3_22):
329	shar	r0
330LOCAL(ashrsi3_21):
331	shar	r0
332LOCAL(ashrsi3_20):
333	shar	r0
334LOCAL(ashrsi3_19):
335	shar	r0
336LOCAL(ashrsi3_18):
337	shar	r0
338LOCAL(ashrsi3_17):
339	shar	r0
340LOCAL(ashrsi3_16):
341	shlr16	r0
342	rts
343	exts.w	r0,r0
344
345LOCAL(ashrsi3_15):
346	shar	r0
347LOCAL(ashrsi3_14):
348	shar	r0
349LOCAL(ashrsi3_13):
350	shar	r0
351LOCAL(ashrsi3_12):
352	shar	r0
353LOCAL(ashrsi3_11):
354	shar	r0
355LOCAL(ashrsi3_10):
356	shar	r0
357LOCAL(ashrsi3_9):
358	shar	r0
359LOCAL(ashrsi3_8):
360	shar	r0
361LOCAL(ashrsi3_7):
362	shar	r0
363LOCAL(ashrsi3_6):
364	shar	r0
365LOCAL(ashrsi3_5):
366	shar	r0
367LOCAL(ashrsi3_4):
368	shar	r0
369LOCAL(ashrsi3_3):
370	shar	r0
371LOCAL(ashrsi3_2):
372	shar	r0
373LOCAL(ashrsi3_1):
374	rts
375	shar	r0
376
377LOCAL(ashrsi3_0):
378	rts
379	nop
380
381	ENDFUNC(GLOBAL(ashrsi3))
382#endif
383
384#ifdef L_ashiftlt
385
386!
387! GLOBAL(ashlsi3)
388! (For compatibility with older binaries, not used by compiler)
389!
390! Entry:
391!	r4: Value to shift
392!	r5: Shift count
393!
394! Exit:
395!	r0: Result
396!
397! Destroys:
398!	T bit
399!
400!
401! GLOBAL(ashlsi3_r0)
402!
403! Entry:
404!	r4: Value to shift
405!	r0: Shift count
406!
407! Exit:
408!	r0: Result
409!
410! Destroys:
411!	T bit
412
413	.global	GLOBAL(ashlsi3)
414	.global GLOBAL(ashlsi3_r0)
415	HIDDEN_FUNC(GLOBAL(ashlsi3))
416	HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
417GLOBAL(ashlsi3):
418	mov	r5,r0
419	.align	2
420GLOBAL(ashlsi3_r0):
421
422#ifdef __sh1__
423	and	#31,r0
424	shll2	r0
425	mov.l	r4,@-r15
426	mov	r0,r4
427	mova	LOCAL(ashlsi3_table),r0
428	add	r4,r0
429	mov.l	@r15+,r4
430	jmp	@r0
431	mov	r4,r0
432	.align 2
433#else
434	and	#31,r0
435	shll2	r0
436	braf	r0
437	mov	r4,r0
438#endif
439
440LOCAL(ashlsi3_table):
441	rts				// << 0
442	nop
443LOCAL(ashlsi_1):
444	rts				// << 1
445	shll	r0
446LOCAL(ashlsi_2):			// << 2
447	rts
448	shll2	r0
449	bra	LOCAL(ashlsi_1)		// << 3
450	shll2	r0
451	bra	LOCAL(ashlsi_2)		// << 4
452	shll2	r0
453	bra	LOCAL(ashlsi_5)		// << 5
454	shll	r0
455	bra	LOCAL(ashlsi_6)		// << 6
456	shll2	r0
457	bra	LOCAL(ashlsi_7)		// << 7
458	shll	r0
459LOCAL(ashlsi_8):			// << 8
460	rts
461	shll8	r0
462	bra	LOCAL(ashlsi_8)		// << 9
463	shll	r0
464	bra	LOCAL(ashlsi_8)		// << 10
465	shll2	r0
466	bra	LOCAL(ashlsi_11)	// << 11
467	shll	r0
468	bra	LOCAL(ashlsi_12)	// << 12
469	shll2	r0
470	bra	LOCAL(ashlsi_13)	// << 13
471	shll	r0
472	bra	LOCAL(ashlsi_14)	// << 14
473	shll8	r0
474	bra	LOCAL(ashlsi_15)	// << 15
475	shll8	r0
476LOCAL(ashlsi_16):			// << 16
477	rts
478	shll16	r0
479	bra	LOCAL(ashlsi_16)	// << 17
480	shll	r0
481	bra	LOCAL(ashlsi_16)	// << 18
482	shll2	r0
483	bra	LOCAL(ashlsi_19)	// << 19
484	shll	r0
485	bra	LOCAL(ashlsi_20)	// << 20
486	shll2	r0
487	bra	LOCAL(ashlsi_21)	// << 21
488	shll	r0
489	bra	LOCAL(ashlsi_22)	// << 22
490	shll16	r0
491	bra	LOCAL(ashlsi_23)	// << 23
492	shll16	r0
493	bra	LOCAL(ashlsi_16)	// << 24
494	shll8	r0
495	bra	LOCAL(ashlsi_25)	// << 25
496	shll	r0
497	bra	LOCAL(ashlsi_26)	// << 26
498	shll2	r0
499	bra	LOCAL(ashlsi_27)	// << 27
500	shll	r0
501	bra	LOCAL(ashlsi_28)	// << 28
502	shll2	r0
503	bra	LOCAL(ashlsi_29)	// << 29
504	shll16	r0
505	bra	LOCAL(ashlsi_30)	// << 30
506	shll16	r0
507	and	#1,r0			// << 31
508	rts
509	rotr	r0
510
511LOCAL(ashlsi_7):
512	shll2	r0
513LOCAL(ashlsi_5):
514LOCAL(ashlsi_6):
515	shll2	r0
516	rts
517LOCAL(ashlsi_13):
518	shll2	r0
519LOCAL(ashlsi_12):
520LOCAL(ashlsi_11):
521	shll8	r0
522	rts
523LOCAL(ashlsi_21):
524	shll2	r0
525LOCAL(ashlsi_20):
526LOCAL(ashlsi_19):
527	shll16	r0
528	rts
529LOCAL(ashlsi_28):
530LOCAL(ashlsi_27):
531	shll2	r0
532LOCAL(ashlsi_26):
533LOCAL(ashlsi_25):
534	shll16	r0
535	rts
536	shll8	r0
537
538LOCAL(ashlsi_22):
539LOCAL(ashlsi_14):
540	shlr2	r0
541	rts
542	shll8	r0
543
544LOCAL(ashlsi_23):
545LOCAL(ashlsi_15):
546	shlr	r0
547	rts
548	shll8	r0
549
550LOCAL(ashlsi_29):
551	shlr	r0
552LOCAL(ashlsi_30):
553	shlr2	r0
554	rts
555	shll16	r0
556
557	ENDFUNC(GLOBAL(ashlsi3))
558	ENDFUNC(GLOBAL(ashlsi3_r0))
559#endif
560
561#ifdef L_lshiftrt
562
563!
564! GLOBAL(lshrsi3)
565! (For compatibility with older binaries, not used by compiler)
566!
567! Entry:
568!	r4: Value to shift
569!	r5: Shift count
570!
571! Exit:
572!	r0: Result
573!
574! Destroys:
575!	T bit
576!
577!
578! GLOBAL(lshrsi3_r0)
579!
580! Entry:
581!	r4: Value to shift
582!	r0: Shift count
583!
584! Exit:
585!	r0: Result
586!
587! Destroys:
588!	T bit
589
590	.global	GLOBAL(lshrsi3)
591	.global	GLOBAL(lshrsi3_r0)
592	HIDDEN_FUNC(GLOBAL(lshrsi3))
593	HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
594GLOBAL(lshrsi3):
595	mov	r5,r0
596	.align	2
597GLOBAL(lshrsi3_r0):
598
599#ifdef __sh1__
600	and	#31,r0
601	shll2	r0
602	mov.l	r4,@-r15
603	mov	r0,r4
604	mova	LOCAL(lshrsi3_table),r0
605	add	r4,r0
606	mov.l	@r15+,r4
607	jmp	@r0
608	mov	r4,r0
609	.align 2
610#else
611	and	#31,r0
612	shll2	r0
613	braf	r0
614	mov	r4,r0
615#endif
616LOCAL(lshrsi3_table):
617	rts				// >> 0
618	nop
619LOCAL(lshrsi_1):			// >> 1
620	rts
621	shlr	r0
622LOCAL(lshrsi_2):			// >> 2
623	rts
624	shlr2	r0
625	bra	LOCAL(lshrsi_1)		// >> 3
626	shlr2	r0
627	bra	LOCAL(lshrsi_2)		// >> 4
628	shlr2	r0
629	bra	LOCAL(lshrsi_5)		// >> 5
630	shlr	r0
631	bra	LOCAL(lshrsi_6)		// >> 6
632	shlr2	r0
633	bra	LOCAL(lshrsi_7)		// >> 7
634	shlr	r0
635LOCAL(lshrsi_8):			// >> 8
636	rts
637	shlr8	r0
638	bra	LOCAL(lshrsi_8)		// >> 9
639	shlr	r0
640	bra	LOCAL(lshrsi_8)		// >> 10
641	shlr2	r0
642	bra	LOCAL(lshrsi_11)	// >> 11
643	shlr	r0
644	bra	LOCAL(lshrsi_12)	// >> 12
645	shlr2	r0
646	bra	LOCAL(lshrsi_13)	// >> 13
647	shlr	r0
648	bra	LOCAL(lshrsi_14)	// >> 14
649	shlr8	r0
650	bra	LOCAL(lshrsi_15)	// >> 15
651	shlr8	r0
652LOCAL(lshrsi_16):			// >> 16
653	rts
654	shlr16	r0
655	bra	LOCAL(lshrsi_16)	// >> 17
656	shlr	r0
657	bra	LOCAL(lshrsi_16)	// >> 18
658	shlr2	r0
659	bra	LOCAL(lshrsi_19)	// >> 19
660	shlr	r0
661	bra	LOCAL(lshrsi_20)	// >> 20
662	shlr2	r0
663	bra	LOCAL(lshrsi_21)	// >> 21
664	shlr	r0
665	bra	LOCAL(lshrsi_22)	// >> 22
666	shlr16	r0
667	bra	LOCAL(lshrsi_23)	// >> 23
668	shlr16	r0
669	bra	LOCAL(lshrsi_16)	// >> 24
670	shlr8	r0
671	bra	LOCAL(lshrsi_25)	// >> 25
672	shlr	r0
673	bra	LOCAL(lshrsi_26)	// >> 26
674	shlr2	r0
675	bra	LOCAL(lshrsi_27)	// >> 27
676	shlr	r0
677	bra	LOCAL(lshrsi_28)	// >> 28
678	shlr2	r0
679	bra	LOCAL(lshrsi_29)	// >> 29
680	shlr16	r0
681	bra	LOCAL(lshrsi_30)	// >> 30
682	shlr16	r0
683	shll	r0			// >> 31
684	rts
685	movt	r0
686
687LOCAL(lshrsi_7):
688	shlr2	r0
689LOCAL(lshrsi_5):
690LOCAL(lshrsi_6):
691	shlr2	r0
692	rts
693LOCAL(lshrsi_13):
694	shlr2	r0
695LOCAL(lshrsi_12):
696LOCAL(lshrsi_11):
697	shlr8	r0
698	rts
699LOCAL(lshrsi_21):
700	shlr2	r0
701LOCAL(lshrsi_20):
702LOCAL(lshrsi_19):
703	shlr16	r0
704	rts
705LOCAL(lshrsi_28):
706LOCAL(lshrsi_27):
707	shlr2	r0
708LOCAL(lshrsi_26):
709LOCAL(lshrsi_25):
710	shlr16	r0
711	rts
712	shlr8	r0
713
714LOCAL(lshrsi_22):
715LOCAL(lshrsi_14):
716	shll2	r0
717	rts
718	shlr8	r0
719
720LOCAL(lshrsi_23):
721LOCAL(lshrsi_15):
722	shll	r0
723	rts
724	shlr8	r0
725
726LOCAL(lshrsi_29):
727	shll	r0
728LOCAL(lshrsi_30):
729	shll2	r0
730	rts
731	shlr16	r0
732
733	ENDFUNC(GLOBAL(lshrsi3))
734	ENDFUNC(GLOBAL(lshrsi3_r0))
735#endif
736
737#ifdef L_movmem
738	.text
739	.balign	4
740	.global	GLOBAL(movmem)
741	HIDDEN_FUNC(GLOBAL(movmem))
742	HIDDEN_ALIAS(movstr,movmem)
743	/* This would be a lot simpler if r6 contained the byte count
744	   minus 64, and we wouldn't be called here for a byte count of 64.  */
745GLOBAL(movmem):
746	sts.l	pr,@-r15
747	shll2	r6
748	bsr	GLOBAL(movmemSI52+2)
749	mov.l	@(48,r5),r0
750	.balign	4
751LOCAL(movmem_loop): /* Reached with rts */
752	mov.l	@(60,r5),r0
753	add	#-64,r6
754	mov.l	r0,@(60,r4)
755	tst	r6,r6
756	mov.l	@(56,r5),r0
757	bt	LOCAL(movmem_done)
758	mov.l	r0,@(56,r4)
759	cmp/pl	r6
760	mov.l	@(52,r5),r0
761	add	#64,r5
762	mov.l	r0,@(52,r4)
763	add	#64,r4
764	bt	GLOBAL(movmemSI52)
765! done all the large groups, do the remainder
766! jump to movmem+
767	mova	GLOBAL(movmemSI4)+4,r0
768	add	r6,r0
769	jmp	@r0
770LOCAL(movmem_done): ! share slot insn, works out aligned.
771	lds.l	@r15+,pr
772	mov.l	r0,@(56,r4)
773	mov.l	@(52,r5),r0
774	rts
775	mov.l	r0,@(52,r4)
776	.balign	4
777! ??? We need aliases movstr* for movmem* for the older libraries.  These
778! aliases will be removed at the some point in the future.
779	.global	GLOBAL(movmemSI64)
780	HIDDEN_FUNC(GLOBAL(movmemSI64))
781	HIDDEN_ALIAS(movstrSI64,movmemSI64)
782GLOBAL(movmemSI64):
783	mov.l	@(60,r5),r0
784	mov.l	r0,@(60,r4)
785	.global	GLOBAL(movmemSI60)
786	HIDDEN_FUNC(GLOBAL(movmemSI60))
787	HIDDEN_ALIAS(movstrSI60,movmemSI60)
788GLOBAL(movmemSI60):
789	mov.l	@(56,r5),r0
790	mov.l	r0,@(56,r4)
791	.global	GLOBAL(movmemSI56)
792	HIDDEN_FUNC(GLOBAL(movmemSI56))
793	HIDDEN_ALIAS(movstrSI56,movmemSI56)
794GLOBAL(movmemSI56):
795	mov.l	@(52,r5),r0
796	mov.l	r0,@(52,r4)
797	.global	GLOBAL(movmemSI52)
798	HIDDEN_FUNC(GLOBAL(movmemSI52))
799	HIDDEN_ALIAS(movstrSI52,movmemSI52)
800GLOBAL(movmemSI52):
801	mov.l	@(48,r5),r0
802	mov.l	r0,@(48,r4)
803	.global	GLOBAL(movmemSI48)
804	HIDDEN_FUNC(GLOBAL(movmemSI48))
805	HIDDEN_ALIAS(movstrSI48,movmemSI48)
806GLOBAL(movmemSI48):
807	mov.l	@(44,r5),r0
808	mov.l	r0,@(44,r4)
809	.global	GLOBAL(movmemSI44)
810	HIDDEN_FUNC(GLOBAL(movmemSI44))
811	HIDDEN_ALIAS(movstrSI44,movmemSI44)
812GLOBAL(movmemSI44):
813	mov.l	@(40,r5),r0
814	mov.l	r0,@(40,r4)
815	.global	GLOBAL(movmemSI40)
816	HIDDEN_FUNC(GLOBAL(movmemSI40))
817	HIDDEN_ALIAS(movstrSI40,movmemSI40)
818GLOBAL(movmemSI40):
819	mov.l	@(36,r5),r0
820	mov.l	r0,@(36,r4)
821	.global	GLOBAL(movmemSI36)
822	HIDDEN_FUNC(GLOBAL(movmemSI36))
823	HIDDEN_ALIAS(movstrSI36,movmemSI36)
824GLOBAL(movmemSI36):
825	mov.l	@(32,r5),r0
826	mov.l	r0,@(32,r4)
827	.global	GLOBAL(movmemSI32)
828	HIDDEN_FUNC(GLOBAL(movmemSI32))
829	HIDDEN_ALIAS(movstrSI32,movmemSI32)
830GLOBAL(movmemSI32):
831	mov.l	@(28,r5),r0
832	mov.l	r0,@(28,r4)
833	.global	GLOBAL(movmemSI28)
834	HIDDEN_FUNC(GLOBAL(movmemSI28))
835	HIDDEN_ALIAS(movstrSI28,movmemSI28)
836GLOBAL(movmemSI28):
837	mov.l	@(24,r5),r0
838	mov.l	r0,@(24,r4)
839	.global	GLOBAL(movmemSI24)
840	HIDDEN_FUNC(GLOBAL(movmemSI24))
841	HIDDEN_ALIAS(movstrSI24,movmemSI24)
842GLOBAL(movmemSI24):
843	mov.l	@(20,r5),r0
844	mov.l	r0,@(20,r4)
845	.global	GLOBAL(movmemSI20)
846	HIDDEN_FUNC(GLOBAL(movmemSI20))
847	HIDDEN_ALIAS(movstrSI20,movmemSI20)
848GLOBAL(movmemSI20):
849	mov.l	@(16,r5),r0
850	mov.l	r0,@(16,r4)
851	.global	GLOBAL(movmemSI16)
852	HIDDEN_FUNC(GLOBAL(movmemSI16))
853	HIDDEN_ALIAS(movstrSI16,movmemSI16)
854GLOBAL(movmemSI16):
855	mov.l	@(12,r5),r0
856	mov.l	r0,@(12,r4)
857	.global	GLOBAL(movmemSI12)
858	HIDDEN_FUNC(GLOBAL(movmemSI12))
859	HIDDEN_ALIAS(movstrSI12,movmemSI12)
860GLOBAL(movmemSI12):
861	mov.l	@(8,r5),r0
862	mov.l	r0,@(8,r4)
863	.global	GLOBAL(movmemSI8)
864	HIDDEN_FUNC(GLOBAL(movmemSI8))
865	HIDDEN_ALIAS(movstrSI8,movmemSI8)
866GLOBAL(movmemSI8):
867	mov.l	@(4,r5),r0
868	mov.l	r0,@(4,r4)
869	.global	GLOBAL(movmemSI4)
870	HIDDEN_FUNC(GLOBAL(movmemSI4))
871	HIDDEN_ALIAS(movstrSI4,movmemSI4)
872GLOBAL(movmemSI4):
873	mov.l	@(0,r5),r0
874	rts
875	mov.l	r0,@(0,r4)
876
877	ENDFUNC(GLOBAL(movmemSI64))
878	ENDFUNC(GLOBAL(movmemSI60))
879	ENDFUNC(GLOBAL(movmemSI56))
880	ENDFUNC(GLOBAL(movmemSI52))
881	ENDFUNC(GLOBAL(movmemSI48))
882	ENDFUNC(GLOBAL(movmemSI44))
883	ENDFUNC(GLOBAL(movmemSI40))
884	ENDFUNC(GLOBAL(movmemSI36))
885	ENDFUNC(GLOBAL(movmemSI32))
886	ENDFUNC(GLOBAL(movmemSI28))
887	ENDFUNC(GLOBAL(movmemSI24))
888	ENDFUNC(GLOBAL(movmemSI20))
889	ENDFUNC(GLOBAL(movmemSI16))
890	ENDFUNC(GLOBAL(movmemSI12))
891	ENDFUNC(GLOBAL(movmemSI8))
892	ENDFUNC(GLOBAL(movmemSI4))
893	ENDFUNC(GLOBAL(movmem))
894#endif
895
896#ifdef L_movmem_i4
897	.text
898	.global	GLOBAL(movmem_i4_even)
899	.global	GLOBAL(movmem_i4_odd)
900	.global	GLOBAL(movmemSI12_i4)
901
902	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
903	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
904	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
905
906	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
907	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
908	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
909
910	.p2align	5
911L_movmem_2mod4_end:
912	mov.l	r0,@(16,r4)
913	rts
914	mov.l	r1,@(20,r4)
915
916	.p2align	2
917
918GLOBAL(movmem_i4_even):
919	mov.l	@r5+,r0
920	bra	L_movmem_start_even
921	mov.l	@r5+,r1
922
923GLOBAL(movmem_i4_odd):
924	mov.l	@r5+,r1
925	add	#-4,r4
926	mov.l	@r5+,r2
927	mov.l	@r5+,r3
928	mov.l	r1,@(4,r4)
929	mov.l	r2,@(8,r4)
930
931L_movmem_loop:
932	mov.l	r3,@(12,r4)
933	dt	r6
934	mov.l	@r5+,r0
935	bt/s	L_movmem_2mod4_end
936	mov.l	@r5+,r1
937	add	#16,r4
938L_movmem_start_even:
939	mov.l	@r5+,r2
940	mov.l	@r5+,r3
941	mov.l	r0,@r4
942	dt	r6
943	mov.l	r1,@(4,r4)
944	bf/s	L_movmem_loop
945	mov.l	r2,@(8,r4)
946	rts
947	mov.l	r3,@(12,r4)
948
949	ENDFUNC(GLOBAL(movmem_i4_even))
950	ENDFUNC(GLOBAL(movmem_i4_odd))
951
952	.p2align	4
953GLOBAL(movmemSI12_i4):
954	mov.l	@r5,r0
955	mov.l	@(4,r5),r1
956	mov.l	@(8,r5),r2
957	mov.l	r0,@r4
958	mov.l	r1,@(4,r4)
959	rts
960	mov.l	r2,@(8,r4)
961
962	ENDFUNC(GLOBAL(movmemSI12_i4))
963#endif
964
965#ifdef L_mulsi3
966
967
968	.global	GLOBAL(mulsi3)
969	HIDDEN_FUNC(GLOBAL(mulsi3))
970
971! r4 =       aabb
972! r5 =       ccdd
973! r0 = aabb*ccdd  via partial products
974!
975! if aa == 0 and cc = 0
976! r0 = bb*dd
977!
978! else
979! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
980!
981
982GLOBAL(mulsi3):
983	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
984	mov     r5,r3		! r3 = ccdd
985	swap.w  r4,r2		! r2 = bbaa
986	xtrct   r2,r3		! r3 = aacc
987	tst  	r3,r3		! msws zero ?
988	bf      hiset
989	rts			! yes - then we have the answer
990	sts     macl,r0
991
992hiset:	sts	macl,r0		! r0 = bb*dd
993	mulu.w	r2,r5		! brewing macl = aa*dd
994	sts	macl,r1
995	mulu.w	r3,r4		! brewing macl = cc*bb
996	sts	macl,r2
997	add	r1,r2
998	shll16	r2
999	rts
1000	add	r2,r0
1001
1002	ENDFUNC(GLOBAL(mulsi3))
1003#endif
1004
1005/*------------------------------------------------------------------------------
1006  32 bit signed integer division that uses FPU double precision division.  */
1007
1008#ifdef L_sdivsi3_i4
1009	.title "SH DIVIDE"
1010
1011#if defined (__SH4__) || defined (__SH2A__)
1012/* This variant is used when FPSCR.PR = 1 (double precision) is the default
1013   setting.
1014   Args in r4 and r5, result in fpul, clobber dr0, dr2.  */
1015
1016	.global	GLOBAL(sdivsi3_i4)
1017	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1018GLOBAL(sdivsi3_i4):
1019	lds r4,fpul
1020	float fpul,dr0
1021	lds r5,fpul
1022	float fpul,dr2
1023	fdiv dr2,dr0
1024	rts
1025	ftrc dr0,fpul
1026
1027	ENDFUNC(GLOBAL(sdivsi3_i4))
1028
1029#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1030/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1031   setting.
1032   Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
1033   For this to work, we must temporarily switch the FPU do double precision,
1034   but we better do not touch FPSCR.FR.  See PR 6526.  */
1035
1036	.global	GLOBAL(sdivsi3_i4)
1037	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1038GLOBAL(sdivsi3_i4):
1039
1040#ifndef __SH4A__
1041	mov.l	r3,@-r15
1042	sts	fpscr,r2
1043	mov	#8,r3
1044	swap.w	r3,r3		// r3 = 1 << 19 (FPSCR.PR bit)
1045	or	r2,r3
1046	lds	r3,fpscr	// Set FPSCR.PR = 1.
1047	lds	r4,fpul
1048	float	fpul,dr0
1049	lds	r5,fpul
1050	float	fpul,dr2
1051	fdiv	dr2,dr0
1052	ftrc	dr0,fpul
1053	lds	r2,fpscr
1054	rts
1055	mov.l	@r15+,r3
1056#else
1057/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.  */
1058	fpchg
1059	lds	r4,fpul
1060	float	fpul,dr0
1061	lds	r5,fpul
1062	float	fpul,dr2
1063	fdiv	dr2,dr0
1064	ftrc	dr0,fpul
1065	rts
1066	fpchg
1067
1068#endif /* __SH4A__  */
1069
1070	ENDFUNC(GLOBAL(sdivsi3_i4))
1071#endif /* ! __SH4__ || __SH2A__  */
1072#endif /* L_sdivsi3_i4  */
1073
1074//------------------------------------------------------------------------------
1075#ifdef L_sdivsi3
1076/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1077   sh2e/sh3e code.  */
1078!!
1079!! Steve Chamberlain
1080!! sac@cygnus.com
1081!!
1082!!
1083
1084!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1085
1086	.global	GLOBAL(sdivsi3)
1087	.align	2
1088
1089	FUNC(GLOBAL(sdivsi3))
1090GLOBAL(sdivsi3):
1091	mov	r4,r1
1092	mov	r5,r0
1093
1094	tst	r0,r0
1095	bt	div0
1096	mov	#0,r2
1097	div0s	r2,r1
1098	subc	r3,r3
1099	subc	r2,r1
1100	div0s	r0,r3
1101	rotcl	r1
1102	div1	r0,r3
1103	rotcl	r1
1104	div1	r0,r3
1105	rotcl	r1
1106	div1	r0,r3
1107	rotcl	r1
1108	div1	r0,r3
1109	rotcl	r1
1110	div1	r0,r3
1111	rotcl	r1
1112	div1	r0,r3
1113	rotcl	r1
1114	div1	r0,r3
1115	rotcl	r1
1116	div1	r0,r3
1117	rotcl	r1
1118	div1	r0,r3
1119	rotcl	r1
1120	div1	r0,r3
1121	rotcl	r1
1122	div1	r0,r3
1123	rotcl	r1
1124	div1	r0,r3
1125	rotcl	r1
1126	div1	r0,r3
1127	rotcl	r1
1128	div1	r0,r3
1129	rotcl	r1
1130	div1	r0,r3
1131	rotcl	r1
1132	div1	r0,r3
1133	rotcl	r1
1134	div1	r0,r3
1135	rotcl	r1
1136	div1	r0,r3
1137	rotcl	r1
1138	div1	r0,r3
1139	rotcl	r1
1140	div1	r0,r3
1141	rotcl	r1
1142	div1	r0,r3
1143	rotcl	r1
1144	div1	r0,r3
1145	rotcl	r1
1146	div1	r0,r3
1147	rotcl	r1
1148	div1	r0,r3
1149	rotcl	r1
1150	div1	r0,r3
1151	rotcl	r1
1152	div1	r0,r3
1153	rotcl	r1
1154	div1	r0,r3
1155	rotcl	r1
1156	div1	r0,r3
1157	rotcl	r1
1158	div1	r0,r3
1159	rotcl	r1
1160	div1	r0,r3
1161	rotcl	r1
1162	div1	r0,r3
1163	rotcl	r1
1164	div1	r0,r3
1165	rotcl	r1
1166	addc	r2,r1
1167	rts
1168	mov	r1,r0
1169
1170
1171div0:	rts
1172	mov	#0,r0
1173
1174	ENDFUNC(GLOBAL(sdivsi3))
1175#endif /* L_sdivsi3  */
1176
1177/*------------------------------------------------------------------------------
1178  32 bit unsigned integer division that uses FPU double precision division.  */
1179
1180#ifdef L_udivsi3_i4
1181	.title "SH DIVIDE"
1182
1183#if defined (__SH4__) || defined (__SH2A__)
1184/* This variant is used when FPSCR.PR = 1 (double precision) is the default
1185   setting.
1186   Args in r4 and r5, result in fpul,
1187   clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit  */
1188
1189	.global	GLOBAL(udivsi3_i4)
1190	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1191GLOBAL(udivsi3_i4):
1192	mov	#1,r1
1193	cmp/hi	r1,r5
1194	bf/s	trivial
1195	rotr	r1
1196	xor	r1,r4
1197	lds	r4,fpul
1198	mova	L1,r0
1199#ifdef FMOVD_WORKS
1200	fmov.d	@r0+,dr4
1201#else
1202	fmov.s	@r0+,DR40
1203	fmov.s	@r0,DR41
1204#endif
1205	float	fpul,dr0
1206	xor	r1,r5
1207	lds	r5,fpul
1208	float	fpul,dr2
1209	fadd	dr4,dr0
1210	fadd	dr4,dr2
1211	fdiv	dr2,dr0
1212	rts
1213	ftrc	dr0,fpul
1214
1215trivial:
1216	rts
1217	lds	r4,fpul
1218
1219	.align 2
1220#ifdef FMOVD_WORKS
1221	.align 3	// Make the double below 8 byte aligned.
1222#endif
1223L1:
1224	.double 2147483648
1225
1226	ENDFUNC(GLOBAL(udivsi3_i4))
1227
1228#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1229/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1230   setting.
1231   Args in r4 and r5, result in fpul,
1232   clobber r0, r1, r4, r5, dr0, dr2, dr4.
1233   For this to work, we must temporarily switch the FPU do double precision,
1234   but we better do not touch FPSCR.FR.  See PR 6526.  */
1235
1236	.global	GLOBAL(udivsi3_i4)
1237	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1238GLOBAL(udivsi3_i4):
1239
1240#ifndef __SH4A__
1241	mov	#1,r1
1242	cmp/hi	r1,r5
1243	bf/s	trivial
1244	rotr	r1		// r1 = 1 << 31
1245	sts.l	fpscr,@-r15
1246	xor	r1,r4
1247	mov.l	@(0,r15),r0
1248	xor	r1,r5
1249	mov.l	L2,r1
1250	lds	r4,fpul
1251	or	r0,r1
1252	mova	L1,r0
1253	lds	r1,fpscr
1254#ifdef FMOVD_WORKS
1255	fmov.d	@r0+,dr4
1256#else
1257	fmov.s	@r0+,DR40
1258	fmov.s	@r0,DR41
1259#endif
1260	float	fpul,dr0
1261	lds	r5,fpul
1262	float	fpul,dr2
1263	fadd	dr4,dr0
1264	fadd	dr4,dr2
1265	fdiv	dr2,dr0
1266	ftrc	dr0,fpul
1267	rts
1268	lds.l	@r15+,fpscr
1269
1270#ifdef FMOVD_WORKS
1271	.align 3	// Make the double below 8 byte aligned.
1272#endif
1273trivial:
1274	rts
1275	lds	r4,fpul
1276
1277	.align 2
1278L2:
1279#ifdef FMOVD_WORKS
1280	.long 0x180000	// FPSCR.PR = 1, FPSCR.SZ = 1
1281#else
1282	.long 0x80000	// FPSCR.PR = 1
1283#endif
1284L1:
1285	.double 2147483648
1286
1287#else
1288/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.
1289   Although on SH4A fmovd usually works, it would require either additional
1290   two fschg instructions or an FPSCR push + pop.  It's not worth the effort
1291   for loading only one double constant.  */
1292	mov	#1,r1
1293	cmp/hi	r1,r5
1294	bf/s	trivial
1295	rotr	r1		// r1 = 1 << 31
1296	fpchg
1297	mova	L1,r0
1298	xor	r1,r4
1299	fmov.s	@r0+,DR40
1300	lds	r4,fpul
1301	fmov.s	@r0,DR41
1302	xor	r1,r5
1303	float	fpul,dr0
1304	lds	r5,fpul
1305	float	fpul,dr2
1306	fadd	dr4,dr0
1307	fadd	dr4,dr2
1308	fdiv	dr2,dr0
1309	ftrc	dr0,fpul
1310	rts
1311	fpchg
1312
1313trivial:
1314	rts
1315	lds	r4,fpul
1316
1317	.align 2
1318L1:
1319	.double 2147483648
1320
1321#endif /* __SH4A__  */
1322
1323
1324	ENDFUNC(GLOBAL(udivsi3_i4))
1325#endif /* ! __SH4__ */
1326#endif /* L_udivsi3_i4  */
1327
1328#ifdef L_udivsi3
1329/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1330   sh2e/sh3e code.  */
1331
1332!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1333	.global	GLOBAL(udivsi3)
1334	HIDDEN_FUNC(GLOBAL(udivsi3))
1335
1336LOCAL(div8):
1337 div1 r5,r4
1338LOCAL(div7):
1339 div1 r5,r4; div1 r5,r4; div1 r5,r4
1340 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1341
1342LOCAL(divx4):
1343 div1 r5,r4; rotcl r0
1344 div1 r5,r4; rotcl r0
1345 div1 r5,r4; rotcl r0
1346 rts; div1 r5,r4
1347
1348GLOBAL(udivsi3):
1349 sts.l pr,@-r15
1350 extu.w r5,r0
1351 cmp/eq r5,r0
1352#ifdef __sh1__
1353 bf LOCAL(large_divisor)
1354#else
1355 bf/s LOCAL(large_divisor)
1356#endif
1357 div0u
1358 swap.w r4,r0
1359 shlr16 r4
1360 bsr LOCAL(div8)
1361 shll16 r5
1362 bsr LOCAL(div7)
1363 div1 r5,r4
1364 xtrct r4,r0
1365 xtrct r0,r4
1366 bsr LOCAL(div8)
1367 swap.w r4,r4
1368 bsr LOCAL(div7)
1369 div1 r5,r4
1370 lds.l @r15+,pr
1371 xtrct r4,r0
1372 swap.w r0,r0
1373 rotcl r0
1374 rts
1375 shlr16 r5
1376
1377LOCAL(large_divisor):
1378#ifdef __sh1__
1379 div0u
1380#endif
1381 mov #0,r0
1382 xtrct r4,r0
1383 xtrct r0,r4
1384 bsr LOCAL(divx4)
1385 rotcl r0
1386 bsr LOCAL(divx4)
1387 rotcl r0
1388 bsr LOCAL(divx4)
1389 rotcl r0
1390 bsr LOCAL(divx4)
1391 rotcl r0
1392 lds.l @r15+,pr
1393 rts
1394 rotcl r0
1395
1396	ENDFUNC(GLOBAL(udivsi3))
1397#endif /* L_udivsi3 */
1398
1399#ifdef L_set_fpscr
1400#if !defined (__SH2A_NOFPU__)
1401#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1402	.global GLOBAL(set_fpscr)
1403	HIDDEN_FUNC(GLOBAL(set_fpscr))
1404GLOBAL(set_fpscr):
1405	lds r4,fpscr
1406#ifdef __PIC__
1407	mov.l	r12,@-r15
1408#ifdef __vxworks
1409	mov.l	LOCAL(set_fpscr_L0_base),r12
1410	mov.l	LOCAL(set_fpscr_L0_index),r0
1411	mov.l	@r12,r12
1412	mov.l	@(r0,r12),r12
1413#else
1414	mova	LOCAL(set_fpscr_L0),r0
1415	mov.l	LOCAL(set_fpscr_L0),r12
1416	add	r0,r12
1417#endif
1418	mov.l	LOCAL(set_fpscr_L1),r0
1419	mov.l	@(r0,r12),r1
1420	mov.l	@r15+,r12
1421#else
1422	mov.l LOCAL(set_fpscr_L1),r1
1423#endif
1424	swap.w r4,r0
1425	or #24,r0
1426#ifndef FMOVD_WORKS
1427	xor #16,r0
1428#endif
1429#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1430	swap.w r0,r3
1431	mov.l r3,@(4,r1)
1432#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1433	swap.w r0,r2
1434	mov.l r2,@r1
1435#endif
1436#ifndef FMOVD_WORKS
1437	xor #8,r0
1438#else
1439	xor #24,r0
1440#endif
1441#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1442	swap.w r0,r2
1443	rts
1444	mov.l r2,@r1
1445#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1446	swap.w r0,r3
1447	rts
1448	mov.l r3,@(4,r1)
1449#endif
1450	.align 2
1451#ifdef __PIC__
1452#ifdef __vxworks
1453LOCAL(set_fpscr_L0_base):
1454	.long ___GOTT_BASE__
1455LOCAL(set_fpscr_L0_index):
1456	.long ___GOTT_INDEX__
1457#else
1458LOCAL(set_fpscr_L0):
1459	.long _GLOBAL_OFFSET_TABLE_
1460#endif
1461LOCAL(set_fpscr_L1):
1462	.long GLOBAL(fpscr_values@GOT)
1463#else
1464LOCAL(set_fpscr_L1):
1465	.long GLOBAL(fpscr_values)
1466#endif
1467
1468	ENDFUNC(GLOBAL(set_fpscr))
1469#ifndef NO_FPSCR_VALUES
1470#ifdef __ELF__
1471        .comm   GLOBAL(fpscr_values),8,4
1472#else
1473        .comm   GLOBAL(fpscr_values),8
1474#endif /* ELF */
1475#endif /* NO_FPSCR_VALUES */
1476#endif /* SH2E / SH3E / SH4 */
1477#endif /* __SH2A_NOFPU__ */
1478#endif /* L_set_fpscr */
1479#ifdef L_ic_invalidate
1480
1481#if defined(__SH4A__)
1482	.global GLOBAL(ic_invalidate)
1483	HIDDEN_FUNC(GLOBAL(ic_invalidate))
1484GLOBAL(ic_invalidate):
1485	ocbwb	@r4
1486	synco
1487	icbi	@r4
1488	rts
1489	  nop
1490	ENDFUNC(GLOBAL(ic_invalidate))
1491#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)
1492	/* For system code, we use ic_invalidate_line_i, but user code
1493	   needs a different mechanism.  A kernel call is generally not
1494	   available, and it would also be slow.  Different SH4 variants use
1495	   different sizes and associativities of the Icache.  We use a small
1496	   bit of dispatch code that can be put hidden in every shared object,
1497	   which calls the actual processor-specific invalidation code in a
1498	   separate module.
1499	   Or if you have operating system support, the OS could mmap the
1500	   procesor-specific code from a single page, since it is highly
1501	   repetitive.  */
1502	.global GLOBAL(ic_invalidate)
1503	HIDDEN_FUNC(GLOBAL(ic_invalidate))
1504GLOBAL(ic_invalidate):
1505#ifdef __pic__
1506#ifdef __vxworks
1507	mov.l	1f,r1
1508	mov.l	2f,r0
1509	mov.l	@r1,r1
1510	mov.l	0f,r2
1511	mov.l	@(r0,r1),r0
1512#else
1513	mov.l	1f,r1
1514	mova	1f,r0
1515	mov.l	0f,r2
1516	add	r1,r0
1517#endif
1518	mov.l	@(r0,r2),r1
1519#else
1520	mov.l	0f,r1
1521#endif
1522	ocbwb	@r4
1523	mov.l	@(8,r1),r0
1524	sub	r1,r4
1525	and	r4,r0
1526	add	r1,r0
1527	jmp	@r0
1528	mov.l	@(4,r1),r0
1529	.align	2
1530#ifndef __pic__
15310:	.long   GLOBAL(ic_invalidate_array)
1532#else /* __pic__ */
1533	.global GLOBAL(ic_invalidate_array)
15340:	.long   GLOBAL(ic_invalidate_array)@GOT
1535#ifdef __vxworks
15361:	.long	___GOTT_BASE__
15372:	.long	___GOTT_INDEX__
1538#else
15391:	.long   _GLOBAL_OFFSET_TABLE_
1540#endif
1541	ENDFUNC(GLOBAL(ic_invalidate))
1542#endif /* __pic__ */
1543#endif /* SH4 */
1544#endif /* L_ic_invalidate */
1545
1546#ifdef L_ic_invalidate_array
1547#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)))
1548	.global GLOBAL(ic_invalidate_array)
1549	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
1550	.global GLOBAL(ic_invalidate_array)
1551	FUNC(GLOBAL(ic_invalidate_array))
1552GLOBAL(ic_invalidate_array):
1553	add	r1,r4
1554	synco
1555	icbi	@r4
1556	rts
1557	  nop
1558	.align 2
1559	.long	0
1560	ENDFUNC(GLOBAL(ic_invalidate_array))
1561#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)
1562	.global GLOBAL(ic_invalidate_array)
1563	.p2align 5
1564	FUNC(GLOBAL(ic_invalidate_array))
1565/* This must be aligned to the beginning of a cache line.  */
1566GLOBAL(ic_invalidate_array):
1567#ifndef WAYS
1568#define WAYS 4
1569#define WAY_SIZE 0x4000
1570#endif
1571#if WAYS == 1
1572	.rept	WAY_SIZE * WAYS / 32
1573	rts
1574	nop
1575	.rept	7
1576	.long	WAY_SIZE - 32
1577	.endr
1578	.endr
1579#elif WAYS <= 6
1580	.rept	WAY_SIZE * WAYS / 32
1581	braf	r0
1582	add	#-8,r0
1583	.long	WAY_SIZE + 8
1584	.long	WAY_SIZE - 32
1585	.rept	WAYS-2
1586	braf	r0
1587	nop
1588	.endr
1589	.rept	7 - WAYS
1590	rts
1591	nop
1592	.endr
1593	.endr
1594#else /* WAYS > 6 */
1595	/* This variant needs two different pages for mmap-ing.  */
1596 	.rept	WAYS-1
1597	.rept	WAY_SIZE / 32
1598	braf	r0
1599	nop
1600	.long	WAY_SIZE
1601	.rept 6
1602	.long	WAY_SIZE - 32
1603	.endr
1604	.endr
1605	.endr
1606	.rept	WAY_SIZE / 32
1607	rts
1608	.rept	15
1609	nop
1610	.endr
1611	.endr
1612#endif /* WAYS */
1613	ENDFUNC(GLOBAL(ic_invalidate_array))
1614#endif /* SH4 */
1615#endif /* L_ic_invalidate_array */
1616
1617
1618#ifdef L_div_table
1619
1620#if defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
1621/* This code uses shld, thus is not suitable for SH1 / SH2.  */
1622
1623/* Signed / unsigned division without use of FPU, optimized for SH4.
1624   Uses a lookup table for divisors in the range -128 .. +128, and
1625   div1 with case distinction for larger divisors in three more ranges.
1626   The code is lumped together with the table to allow the use of mova.  */
1627#ifdef __LITTLE_ENDIAN__
1628#define L_LSB 0
1629#define L_LSWMSB 1
1630#define L_MSWLSB 2
1631#else
1632#define L_LSB 3
1633#define L_LSWMSB 2
1634#define L_MSWLSB 1
1635#endif
1636
1637	.balign 4
1638	.global	GLOBAL(udivsi3_i4i)
1639	FUNC(GLOBAL(udivsi3_i4i))
1640GLOBAL(udivsi3_i4i):
1641	mov.w LOCAL(c128_w), r1
1642	div0u
1643	mov r4,r0
1644	shlr8 r0
1645	cmp/hi r1,r5
1646	extu.w r5,r1
1647	bf LOCAL(udiv_le128)
1648	cmp/eq r5,r1
1649	bf LOCAL(udiv_ge64k)
1650	shlr r0
1651	mov r5,r1
1652	shll16 r5
1653	mov.l r4,@-r15
1654	div1 r5,r0
1655	mov.l r1,@-r15
1656	div1 r5,r0
1657	div1 r5,r0
1658	bra LOCAL(udiv_25)
1659	div1 r5,r0
1660
1661LOCAL(div_le128):
1662	mova LOCAL(div_table_ix),r0
1663	bra LOCAL(div_le128_2)
1664	mov.b @(r0,r5),r1
1665LOCAL(udiv_le128):
1666	mov.l r4,@-r15
1667	mova LOCAL(div_table_ix),r0
1668	mov.b @(r0,r5),r1
1669	mov.l r5,@-r15
1670LOCAL(div_le128_2):
1671	mova LOCAL(div_table_inv),r0
1672	mov.l @(r0,r1),r1
1673	mov r5,r0
1674	tst #0xfe,r0
1675	mova LOCAL(div_table_clz),r0
1676	dmulu.l r1,r4
1677	mov.b @(r0,r5),r1
1678	bt/s LOCAL(div_by_1)
1679	mov r4,r0
1680	mov.l @r15+,r5
1681	sts mach,r0
1682	/* clrt */
1683	addc r4,r0
1684	mov.l @r15+,r4
1685	rotcr r0
1686	rts
1687	shld r1,r0
1688
1689LOCAL(div_by_1_neg):
1690	neg r4,r0
1691LOCAL(div_by_1):
1692	mov.l @r15+,r5
1693	rts
1694	mov.l @r15+,r4
1695
1696LOCAL(div_ge64k):
1697	bt/s LOCAL(div_r8)
1698	div0u
1699	shll8 r5
1700	bra LOCAL(div_ge64k_2)
1701	div1 r5,r0
1702LOCAL(udiv_ge64k):
1703	cmp/hi r0,r5
1704	mov r5,r1
1705	bt LOCAL(udiv_r8)
1706	shll8 r5
1707	mov.l r4,@-r15
1708	div1 r5,r0
1709	mov.l r1,@-r15
1710LOCAL(div_ge64k_2):
1711	div1 r5,r0
1712	mov.l LOCAL(zero_l),r1
1713	.rept 4
1714	div1 r5,r0
1715	.endr
1716	mov.l r1,@-r15
1717	div1 r5,r0
1718	mov.w LOCAL(m256_w),r1
1719	div1 r5,r0
1720	mov.b r0,@(L_LSWMSB,r15)
1721	xor r4,r0
1722	and r1,r0
1723	bra LOCAL(div_ge64k_end)
1724	xor r4,r0
1725
1726LOCAL(div_r8):
1727	shll16 r4
1728	bra LOCAL(div_r8_2)
1729	shll8 r4
1730LOCAL(udiv_r8):
1731	mov.l r4,@-r15
1732	shll16 r4
1733	clrt
1734	shll8 r4
1735	mov.l r5,@-r15
1736LOCAL(div_r8_2):
1737	rotcl r4
1738	mov r0,r1
1739	div1 r5,r1
1740	mov r4,r0
1741	rotcl r0
1742	mov r5,r4
1743	div1 r5,r1
1744	.rept 5
1745	rotcl r0; div1 r5,r1
1746	.endr
1747	rotcl r0
1748	mov.l @r15+,r5
1749	div1 r4,r1
1750	mov.l @r15+,r4
1751	rts
1752	rotcl r0
1753
1754	ENDFUNC(GLOBAL(udivsi3_i4i))
1755
1756	.global	GLOBAL(sdivsi3_i4i)
1757	FUNC(GLOBAL(sdivsi3_i4i))
1758	/* This is link-compatible with a GLOBAL(sdivsi3) call,
1759	   but we effectively clobber only r1.  */
1760GLOBAL(sdivsi3_i4i):
1761	mov.l r4,@-r15
1762	cmp/pz r5
1763	mov.w LOCAL(c128_w), r1
1764	bt/s LOCAL(pos_divisor)
1765	cmp/pz r4
1766	mov.l r5,@-r15
1767	neg r5,r5
1768	bt/s LOCAL(neg_result)
1769	cmp/hi r1,r5
1770	neg r4,r4
1771LOCAL(pos_result):
1772	extu.w r5,r0
1773	bf LOCAL(div_le128)
1774	cmp/eq r5,r0
1775	mov r4,r0
1776	shlr8 r0
1777	bf/s LOCAL(div_ge64k)
1778	cmp/hi r0,r5
1779	div0u
1780	shll16 r5
1781	div1 r5,r0
1782	div1 r5,r0
1783	div1 r5,r0
1784LOCAL(udiv_25):
1785	mov.l LOCAL(zero_l),r1
1786	div1 r5,r0
1787	div1 r5,r0
1788	mov.l r1,@-r15
1789	.rept 3
1790	div1 r5,r0
1791	.endr
1792	mov.b r0,@(L_MSWLSB,r15)
1793	xtrct r4,r0
1794	swap.w r0,r0
1795	.rept 8
1796	div1 r5,r0
1797	.endr
1798	mov.b r0,@(L_LSWMSB,r15)
1799LOCAL(div_ge64k_end):
1800	.rept 8
1801	div1 r5,r0
1802	.endr
1803	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
1804	extu.b r0,r0
1805	mov.l @r15+,r5
1806	or r4,r0
1807	mov.l @r15+,r4
1808	rts
1809	rotcl r0
1810
1811LOCAL(div_le128_neg):
1812	tst #0xfe,r0
1813	mova LOCAL(div_table_ix),r0
1814	mov.b @(r0,r5),r1
1815	mova LOCAL(div_table_inv),r0
1816	bt/s LOCAL(div_by_1_neg)
1817	mov.l @(r0,r1),r1
1818	mova LOCAL(div_table_clz),r0
1819	dmulu.l r1,r4
1820	mov.b @(r0,r5),r1
1821	mov.l @r15+,r5
1822	sts mach,r0
1823	/* clrt */
1824	addc r4,r0
1825	mov.l @r15+,r4
1826	rotcr r0
1827	shld r1,r0
1828	rts
1829	neg r0,r0
1830
1831LOCAL(pos_divisor):
1832	mov.l r5,@-r15
1833	bt/s LOCAL(pos_result)
1834	cmp/hi r1,r5
1835	neg r4,r4
1836LOCAL(neg_result):
1837	extu.w r5,r0
1838	bf LOCAL(div_le128_neg)
1839	cmp/eq r5,r0
1840	mov r4,r0
1841	shlr8 r0
1842	bf/s LOCAL(div_ge64k_neg)
1843	cmp/hi r0,r5
1844	div0u
1845	mov.l LOCAL(zero_l),r1
1846	shll16 r5
1847	div1 r5,r0
1848	mov.l r1,@-r15
1849	.rept 7
1850	div1 r5,r0
1851	.endr
1852	mov.b r0,@(L_MSWLSB,r15)
1853	xtrct r4,r0
1854	swap.w r0,r0
1855	.rept 8
1856	div1 r5,r0
1857	.endr
1858	mov.b r0,@(L_LSWMSB,r15)
1859LOCAL(div_ge64k_neg_end):
1860	.rept 8
1861	div1 r5,r0
1862	.endr
1863	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
1864	extu.b r0,r1
1865	mov.l @r15+,r5
1866	or r4,r1
1867LOCAL(div_r8_neg_end):
1868	mov.l @r15+,r4
1869	rotcl r1
1870	rts
1871	neg r1,r0
1872
1873LOCAL(div_ge64k_neg):
1874	bt/s LOCAL(div_r8_neg)
1875	div0u
1876	shll8 r5
1877	mov.l LOCAL(zero_l),r1
1878	.rept 6
1879	div1 r5,r0
1880	.endr
1881	mov.l r1,@-r15
1882	div1 r5,r0
1883	mov.w LOCAL(m256_w),r1
1884	div1 r5,r0
1885	mov.b r0,@(L_LSWMSB,r15)
1886	xor r4,r0
1887	and r1,r0
1888	bra LOCAL(div_ge64k_neg_end)
1889	xor r4,r0
1890
1891LOCAL(c128_w):
1892	.word 128
1893
1894LOCAL(div_r8_neg):
1895	clrt
1896	shll16 r4
1897	mov r4,r1
1898	shll8 r1
1899	mov r5,r4
1900	.rept 7
1901	rotcl r1; div1 r5,r0
1902	.endr
1903	mov.l @r15+,r5
1904	rotcl r1
1905	bra LOCAL(div_r8_neg_end)
1906	div1 r4,r0
1907
1908LOCAL(m256_w):
1909	.word 0xff00
1910/* This table has been generated by divtab-sh4.c.  */
1911	.balign 4
1912LOCAL(div_table_clz):
1913	.byte	0
1914	.byte	1
1915	.byte	0
1916	.byte	-1
1917	.byte	-1
1918	.byte	-2
1919	.byte	-2
1920	.byte	-2
1921	.byte	-2
1922	.byte	-3
1923	.byte	-3
1924	.byte	-3
1925	.byte	-3
1926	.byte	-3
1927	.byte	-3
1928	.byte	-3
1929	.byte	-3
1930	.byte	-4
1931	.byte	-4
1932	.byte	-4
1933	.byte	-4
1934	.byte	-4
1935	.byte	-4
1936	.byte	-4
1937	.byte	-4
1938	.byte	-4
1939	.byte	-4
1940	.byte	-4
1941	.byte	-4
1942	.byte	-4
1943	.byte	-4
1944	.byte	-4
1945	.byte	-4
1946	.byte	-5
1947	.byte	-5
1948	.byte	-5
1949	.byte	-5
1950	.byte	-5
1951	.byte	-5
1952	.byte	-5
1953	.byte	-5
1954	.byte	-5
1955	.byte	-5
1956	.byte	-5
1957	.byte	-5
1958	.byte	-5
1959	.byte	-5
1960	.byte	-5
1961	.byte	-5
1962	.byte	-5
1963	.byte	-5
1964	.byte	-5
1965	.byte	-5
1966	.byte	-5
1967	.byte	-5
1968	.byte	-5
1969	.byte	-5
1970	.byte	-5
1971	.byte	-5
1972	.byte	-5
1973	.byte	-5
1974	.byte	-5
1975	.byte	-5
1976	.byte	-5
1977	.byte	-5
1978	.byte	-6
1979	.byte	-6
1980	.byte	-6
1981	.byte	-6
1982	.byte	-6
1983	.byte	-6
1984	.byte	-6
1985	.byte	-6
1986	.byte	-6
1987	.byte	-6
1988	.byte	-6
1989	.byte	-6
1990	.byte	-6
1991	.byte	-6
1992	.byte	-6
1993	.byte	-6
1994	.byte	-6
1995	.byte	-6
1996	.byte	-6
1997	.byte	-6
1998	.byte	-6
1999	.byte	-6
2000	.byte	-6
2001	.byte	-6
2002	.byte	-6
2003	.byte	-6
2004	.byte	-6
2005	.byte	-6
2006	.byte	-6
2007	.byte	-6
2008	.byte	-6
2009	.byte	-6
2010	.byte	-6
2011	.byte	-6
2012	.byte	-6
2013	.byte	-6
2014	.byte	-6
2015	.byte	-6
2016	.byte	-6
2017	.byte	-6
2018	.byte	-6
2019	.byte	-6
2020	.byte	-6
2021	.byte	-6
2022	.byte	-6
2023	.byte	-6
2024	.byte	-6
2025	.byte	-6
2026	.byte	-6
2027	.byte	-6
2028	.byte	-6
2029	.byte	-6
2030	.byte	-6
2031	.byte	-6
2032	.byte	-6
2033	.byte	-6
2034	.byte	-6
2035	.byte	-6
2036	.byte	-6
2037	.byte	-6
2038	.byte	-6
2039	.byte	-6
2040	.byte	-6
2041/* Lookup table translating positive divisor to index into table of
2042   normalized inverse.  N.B. the '0' entry is also the last entry of the
2043 previous table, and causes an unaligned access for division by zero.  */
2044LOCAL(div_table_ix):
2045	.byte	-6
2046	.byte	-128
2047	.byte	-128
2048	.byte	0
2049	.byte	-128
2050	.byte	-64
2051	.byte	0
2052	.byte	64
2053	.byte	-128
2054	.byte	-96
2055	.byte	-64
2056	.byte	-32
2057	.byte	0
2058	.byte	32
2059	.byte	64
2060	.byte	96
2061	.byte	-128
2062	.byte	-112
2063	.byte	-96
2064	.byte	-80
2065	.byte	-64
2066	.byte	-48
2067	.byte	-32
2068	.byte	-16
2069	.byte	0
2070	.byte	16
2071	.byte	32
2072	.byte	48
2073	.byte	64
2074	.byte	80
2075	.byte	96
2076	.byte	112
2077	.byte	-128
2078	.byte	-120
2079	.byte	-112
2080	.byte	-104
2081	.byte	-96
2082	.byte	-88
2083	.byte	-80
2084	.byte	-72
2085	.byte	-64
2086	.byte	-56
2087	.byte	-48
2088	.byte	-40
2089	.byte	-32
2090	.byte	-24
2091	.byte	-16
2092	.byte	-8
2093	.byte	0
2094	.byte	8
2095	.byte	16
2096	.byte	24
2097	.byte	32
2098	.byte	40
2099	.byte	48
2100	.byte	56
2101	.byte	64
2102	.byte	72
2103	.byte	80
2104	.byte	88
2105	.byte	96
2106	.byte	104
2107	.byte	112
2108	.byte	120
2109	.byte	-128
2110	.byte	-124
2111	.byte	-120
2112	.byte	-116
2113	.byte	-112
2114	.byte	-108
2115	.byte	-104
2116	.byte	-100
2117	.byte	-96
2118	.byte	-92
2119	.byte	-88
2120	.byte	-84
2121	.byte	-80
2122	.byte	-76
2123	.byte	-72
2124	.byte	-68
2125	.byte	-64
2126	.byte	-60
2127	.byte	-56
2128	.byte	-52
2129	.byte	-48
2130	.byte	-44
2131	.byte	-40
2132	.byte	-36
2133	.byte	-32
2134	.byte	-28
2135	.byte	-24
2136	.byte	-20
2137	.byte	-16
2138	.byte	-12
2139	.byte	-8
2140	.byte	-4
2141	.byte	0
2142	.byte	4
2143	.byte	8
2144	.byte	12
2145	.byte	16
2146	.byte	20
2147	.byte	24
2148	.byte	28
2149	.byte	32
2150	.byte	36
2151	.byte	40
2152	.byte	44
2153	.byte	48
2154	.byte	52
2155	.byte	56
2156	.byte	60
2157	.byte	64
2158	.byte	68
2159	.byte	72
2160	.byte	76
2161	.byte	80
2162	.byte	84
2163	.byte	88
2164	.byte	92
2165	.byte	96
2166	.byte	100
2167	.byte	104
2168	.byte	108
2169	.byte	112
2170	.byte	116
2171	.byte	120
2172	.byte	124
2173	.byte	-128
2174/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
2175	.balign 4
2176LOCAL(zero_l):
2177	.long	0x0
2178	.long	0xF81F81F9
2179	.long	0xF07C1F08
2180	.long	0xE9131AC0
2181	.long	0xE1E1E1E2
2182	.long	0xDAE6076C
2183	.long	0xD41D41D5
2184	.long	0xCD856891
2185	.long	0xC71C71C8
2186	.long	0xC0E07039
2187	.long	0xBACF914D
2188	.long	0xB4E81B4F
2189	.long	0xAF286BCB
2190	.long	0xA98EF607
2191	.long	0xA41A41A5
2192	.long	0x9EC8E952
2193	.long	0x9999999A
2194	.long	0x948B0FCE
2195	.long	0x8F9C18FA
2196	.long	0x8ACB90F7
2197	.long	0x86186187
2198	.long	0x81818182
2199	.long	0x7D05F418
2200	.long	0x78A4C818
2201	.long	0x745D1746
2202	.long	0x702E05C1
2203	.long	0x6C16C16D
2204	.long	0x68168169
2205	.long	0x642C8591
2206	.long	0x60581606
2207	.long	0x5C9882BA
2208	.long	0x58ED2309
2209LOCAL(div_table_inv):
2210	.long	0x55555556
2211	.long	0x51D07EAF
2212	.long	0x4E5E0A73
2213	.long	0x4AFD6A06
2214	.long	0x47AE147B
2215	.long	0x446F8657
2216	.long	0x41414142
2217	.long	0x3E22CBCF
2218	.long	0x3B13B13C
2219	.long	0x38138139
2220	.long	0x3521CFB3
2221	.long	0x323E34A3
2222	.long	0x2F684BDB
2223	.long	0x2C9FB4D9
2224	.long	0x29E4129F
2225	.long	0x27350B89
2226	.long	0x24924925
2227	.long	0x21FB7813
2228	.long	0x1F7047DD
2229	.long	0x1CF06ADB
2230	.long	0x1A7B9612
2231	.long	0x18118119
2232	.long	0x15B1E5F8
2233	.long	0x135C8114
2234	.long	0x11111112
2235	.long	0xECF56BF
2236	.long	0xC9714FC
2237	.long	0xA6810A7
2238	.long	0x8421085
2239	.long	0x624DD30
2240	.long	0x4104105
2241	.long	0x2040811
2242	/* maximum error: 0.987342 scaled: 0.921875*/
2243
2244	ENDFUNC(GLOBAL(sdivsi3_i4i))
2245#endif /* SH3 / SH4 */
2246
2247#endif /* L_div_table */
2248
2249#ifdef L_udiv_qrnnd_16
2250	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
2251	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
2252	/* n1 < d, but n1 might be larger than d1.  */
2253	.global GLOBAL(udiv_qrnnd_16)
2254	.balign 8
2255GLOBAL(udiv_qrnnd_16):
2256	div0u
2257	cmp/hi r6,r0
2258	bt .Lots
2259	.rept 16
2260	div1 r6,r0
2261	.endr
2262	extu.w r0,r1
2263	bt 0f
2264	add r6,r0
22650:	rotcl r1
2266	mulu.w r1,r5
2267	xtrct r4,r0
2268	swap.w r0,r0
2269	sts macl,r2
2270	cmp/hs r2,r0
2271	sub r2,r0
2272	bt 0f
2273	addc r5,r0
2274	add #-1,r1
2275	bt 0f
22761:	add #-1,r1
2277	rts
2278	add r5,r0
2279	.balign 8
2280.Lots:
2281	sub r5,r0
2282	swap.w r4,r1
2283	xtrct r0,r1
2284	clrt
2285	mov r1,r0
2286	addc r5,r0
2287	mov #-1,r1
2288	SL1(bf, 1b,
2289	shlr16 r1)
22900:	rts
2291	nop
2292	ENDFUNC(GLOBAL(udiv_qrnnd_16))
2293#endif /* L_udiv_qrnnd_16 */
2294