xref: /netbsd-src/external/gpl3/gcc/dist/libgcc/config/sh/lib1funcs.S (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1/* Copyright (C) 1994-2013 Free Software Foundation, Inc.
2
3This file is free software; you can redistribute it and/or modify it
4under the terms of the GNU General Public License as published by the
5Free Software Foundation; either version 3, or (at your option) any
6later version.
7
8This file is distributed in the hope that it will be useful, but
9WITHOUT ANY WARRANTY; without even the implied warranty of
10MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11General Public License for more details.
12
13Under Section 7 of GPL version 3, you are granted additional
14permissions described in the GCC Runtime Library Exception, version
153.1, as published by the Free Software Foundation.
16
17You should have received a copy of the GNU General Public License and
18a copy of the GCC Runtime Library Exception along with this program;
19see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
20<http://www.gnu.org/licenses/>.  */
21
22
23!! libgcc routines for the Renesas / SuperH SH CPUs.
24!! Contributed by Steve Chamberlain.
25!! sac@cygnus.com
26
27!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28!! recoded in assembly by Toshiyasu Morita
29!! tm@netcom.com
30
31#if defined(__ELF__) && defined(__linux__)
32.section .note.GNU-stack,"",%progbits
33.previous
34#endif
35
36/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37   ELF local label prefixes by J"orn Rennecke
38   amylaar@cygnus.com  */
39
40#include "lib1funcs.h"
41
42/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43   so it is more convenient to define NO_FPSCR_VALUES here than to
44   define it on the command line.  */
45#if defined __vxworks && defined __PIC__
46#define NO_FPSCR_VALUES
47#endif
48
49#if ! __SH5__
50#ifdef L_ashiftrt
51	.global	GLOBAL(ashiftrt_r4_0)
52	.global	GLOBAL(ashiftrt_r4_1)
53	.global	GLOBAL(ashiftrt_r4_2)
54	.global	GLOBAL(ashiftrt_r4_3)
55	.global	GLOBAL(ashiftrt_r4_4)
56	.global	GLOBAL(ashiftrt_r4_5)
57	.global	GLOBAL(ashiftrt_r4_6)
58	.global	GLOBAL(ashiftrt_r4_7)
59	.global	GLOBAL(ashiftrt_r4_8)
60	.global	GLOBAL(ashiftrt_r4_9)
61	.global	GLOBAL(ashiftrt_r4_10)
62	.global	GLOBAL(ashiftrt_r4_11)
63	.global	GLOBAL(ashiftrt_r4_12)
64	.global	GLOBAL(ashiftrt_r4_13)
65	.global	GLOBAL(ashiftrt_r4_14)
66	.global	GLOBAL(ashiftrt_r4_15)
67	.global	GLOBAL(ashiftrt_r4_16)
68	.global	GLOBAL(ashiftrt_r4_17)
69	.global	GLOBAL(ashiftrt_r4_18)
70	.global	GLOBAL(ashiftrt_r4_19)
71	.global	GLOBAL(ashiftrt_r4_20)
72	.global	GLOBAL(ashiftrt_r4_21)
73	.global	GLOBAL(ashiftrt_r4_22)
74	.global	GLOBAL(ashiftrt_r4_23)
75	.global	GLOBAL(ashiftrt_r4_24)
76	.global	GLOBAL(ashiftrt_r4_25)
77	.global	GLOBAL(ashiftrt_r4_26)
78	.global	GLOBAL(ashiftrt_r4_27)
79	.global	GLOBAL(ashiftrt_r4_28)
80	.global	GLOBAL(ashiftrt_r4_29)
81	.global	GLOBAL(ashiftrt_r4_30)
82	.global	GLOBAL(ashiftrt_r4_31)
83	.global	GLOBAL(ashiftrt_r4_32)
84
85	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
86	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
87	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
88	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
89	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
90	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
91	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
92	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
93	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
94	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
95	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
96	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
97	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
98	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
99	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
100	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
101	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
102	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
103	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
104	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
105	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
106	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
107	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
108	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
109	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
110	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
111	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
112	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
113	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
114	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
115	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
116	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
117	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
118
119	.align	1
120GLOBAL(ashiftrt_r4_32):
121GLOBAL(ashiftrt_r4_31):
122	rotcl	r4
123	rts
124	subc	r4,r4
125
126GLOBAL(ashiftrt_r4_30):
127	shar	r4
128GLOBAL(ashiftrt_r4_29):
129	shar	r4
130GLOBAL(ashiftrt_r4_28):
131	shar	r4
132GLOBAL(ashiftrt_r4_27):
133	shar	r4
134GLOBAL(ashiftrt_r4_26):
135	shar	r4
136GLOBAL(ashiftrt_r4_25):
137	shar	r4
138GLOBAL(ashiftrt_r4_24):
139	shlr16	r4
140	shlr8	r4
141	rts
142	exts.b	r4,r4
143
144GLOBAL(ashiftrt_r4_23):
145	shar	r4
146GLOBAL(ashiftrt_r4_22):
147	shar	r4
148GLOBAL(ashiftrt_r4_21):
149	shar	r4
150GLOBAL(ashiftrt_r4_20):
151	shar	r4
152GLOBAL(ashiftrt_r4_19):
153	shar	r4
154GLOBAL(ashiftrt_r4_18):
155	shar	r4
156GLOBAL(ashiftrt_r4_17):
157	shar	r4
158GLOBAL(ashiftrt_r4_16):
159	shlr16	r4
160	rts
161	exts.w	r4,r4
162
163GLOBAL(ashiftrt_r4_15):
164	shar	r4
165GLOBAL(ashiftrt_r4_14):
166	shar	r4
167GLOBAL(ashiftrt_r4_13):
168	shar	r4
169GLOBAL(ashiftrt_r4_12):
170	shar	r4
171GLOBAL(ashiftrt_r4_11):
172	shar	r4
173GLOBAL(ashiftrt_r4_10):
174	shar	r4
175GLOBAL(ashiftrt_r4_9):
176	shar	r4
177GLOBAL(ashiftrt_r4_8):
178	shar	r4
179GLOBAL(ashiftrt_r4_7):
180	shar	r4
181GLOBAL(ashiftrt_r4_6):
182	shar	r4
183GLOBAL(ashiftrt_r4_5):
184	shar	r4
185GLOBAL(ashiftrt_r4_4):
186	shar	r4
187GLOBAL(ashiftrt_r4_3):
188	shar	r4
189GLOBAL(ashiftrt_r4_2):
190	shar	r4
191GLOBAL(ashiftrt_r4_1):
192	rts
193	shar	r4
194
195GLOBAL(ashiftrt_r4_0):
196	rts
197	nop
198
199	ENDFUNC(GLOBAL(ashiftrt_r4_0))
200	ENDFUNC(GLOBAL(ashiftrt_r4_1))
201	ENDFUNC(GLOBAL(ashiftrt_r4_2))
202	ENDFUNC(GLOBAL(ashiftrt_r4_3))
203	ENDFUNC(GLOBAL(ashiftrt_r4_4))
204	ENDFUNC(GLOBAL(ashiftrt_r4_5))
205	ENDFUNC(GLOBAL(ashiftrt_r4_6))
206	ENDFUNC(GLOBAL(ashiftrt_r4_7))
207	ENDFUNC(GLOBAL(ashiftrt_r4_8))
208	ENDFUNC(GLOBAL(ashiftrt_r4_9))
209	ENDFUNC(GLOBAL(ashiftrt_r4_10))
210	ENDFUNC(GLOBAL(ashiftrt_r4_11))
211	ENDFUNC(GLOBAL(ashiftrt_r4_12))
212	ENDFUNC(GLOBAL(ashiftrt_r4_13))
213	ENDFUNC(GLOBAL(ashiftrt_r4_14))
214	ENDFUNC(GLOBAL(ashiftrt_r4_15))
215	ENDFUNC(GLOBAL(ashiftrt_r4_16))
216	ENDFUNC(GLOBAL(ashiftrt_r4_17))
217	ENDFUNC(GLOBAL(ashiftrt_r4_18))
218	ENDFUNC(GLOBAL(ashiftrt_r4_19))
219	ENDFUNC(GLOBAL(ashiftrt_r4_20))
220	ENDFUNC(GLOBAL(ashiftrt_r4_21))
221	ENDFUNC(GLOBAL(ashiftrt_r4_22))
222	ENDFUNC(GLOBAL(ashiftrt_r4_23))
223	ENDFUNC(GLOBAL(ashiftrt_r4_24))
224	ENDFUNC(GLOBAL(ashiftrt_r4_25))
225	ENDFUNC(GLOBAL(ashiftrt_r4_26))
226	ENDFUNC(GLOBAL(ashiftrt_r4_27))
227	ENDFUNC(GLOBAL(ashiftrt_r4_28))
228	ENDFUNC(GLOBAL(ashiftrt_r4_29))
229	ENDFUNC(GLOBAL(ashiftrt_r4_30))
230	ENDFUNC(GLOBAL(ashiftrt_r4_31))
231	ENDFUNC(GLOBAL(ashiftrt_r4_32))
232#endif
233
234#ifdef L_ashiftrt_n
235
236!
237! GLOBAL(ashrsi3)
238!
239! Entry:
240!
241! r4: Value to shift
242! r5: Shift count
243!
244! Exit:
245!
246! r0: Result
247!
248! Destroys:
249!
250! T bit, r5
251!
252
253	.global	GLOBAL(ashrsi3)
254	HIDDEN_FUNC(GLOBAL(ashrsi3))
255	.align	2
256GLOBAL(ashrsi3):
257	mov	#31,r0
258	and	r0,r5
259	mova	LOCAL(ashrsi3_table),r0
260	mov.b	@(r0,r5),r5
261#ifdef __sh1__
262	add	r5,r0
263	jmp	@r0
264#else
265	braf	r5
266#endif
267	mov	r4,r0
268
269	.align	2
270LOCAL(ashrsi3_table):
271	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
272	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
273	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
274	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
275	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
276	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
277	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
278	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
279	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
280	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
281	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
282	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
283	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
284	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
285	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
286	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
287	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
288	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
289	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
290	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
291	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
292	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
293	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
294	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
295	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
296	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
297	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
298	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
299	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
300	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
301	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
302	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
303
304LOCAL(ashrsi3_31):
305	rotcl	r0
306	rts
307	subc	r0,r0
308
309LOCAL(ashrsi3_30):
310	shar	r0
311LOCAL(ashrsi3_29):
312	shar	r0
313LOCAL(ashrsi3_28):
314	shar	r0
315LOCAL(ashrsi3_27):
316	shar	r0
317LOCAL(ashrsi3_26):
318	shar	r0
319LOCAL(ashrsi3_25):
320	shar	r0
321LOCAL(ashrsi3_24):
322	shlr16	r0
323	shlr8	r0
324	rts
325	exts.b	r0,r0
326
327LOCAL(ashrsi3_23):
328	shar	r0
329LOCAL(ashrsi3_22):
330	shar	r0
331LOCAL(ashrsi3_21):
332	shar	r0
333LOCAL(ashrsi3_20):
334	shar	r0
335LOCAL(ashrsi3_19):
336	shar	r0
337LOCAL(ashrsi3_18):
338	shar	r0
339LOCAL(ashrsi3_17):
340	shar	r0
341LOCAL(ashrsi3_16):
342	shlr16	r0
343	rts
344	exts.w	r0,r0
345
346LOCAL(ashrsi3_15):
347	shar	r0
348LOCAL(ashrsi3_14):
349	shar	r0
350LOCAL(ashrsi3_13):
351	shar	r0
352LOCAL(ashrsi3_12):
353	shar	r0
354LOCAL(ashrsi3_11):
355	shar	r0
356LOCAL(ashrsi3_10):
357	shar	r0
358LOCAL(ashrsi3_9):
359	shar	r0
360LOCAL(ashrsi3_8):
361	shar	r0
362LOCAL(ashrsi3_7):
363	shar	r0
364LOCAL(ashrsi3_6):
365	shar	r0
366LOCAL(ashrsi3_5):
367	shar	r0
368LOCAL(ashrsi3_4):
369	shar	r0
370LOCAL(ashrsi3_3):
371	shar	r0
372LOCAL(ashrsi3_2):
373	shar	r0
374LOCAL(ashrsi3_1):
375	rts
376	shar	r0
377
378LOCAL(ashrsi3_0):
379	rts
380	nop
381
382	ENDFUNC(GLOBAL(ashrsi3))
383#endif
384
385#ifdef L_ashiftlt
386
387!
388! GLOBAL(ashlsi3)
389! (For compatibility with older binaries, not used by compiler)
390!
391! Entry:
392!	r4: Value to shift
393!	r5: Shift count
394!
395! Exit:
396!	r0: Result
397!
398! Destroys:
399!	T bit
400!
401!
402! GLOBAL(ashlsi3_r0)
403!
404! Entry:
405!	r4: Value to shift
406!	r0: Shift count
407!
408! Exit:
409!	r0: Result
410!
411! Destroys:
412!	T bit
413
414	.global	GLOBAL(ashlsi3)
415	.global GLOBAL(ashlsi3_r0)
416	HIDDEN_FUNC(GLOBAL(ashlsi3))
417	HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
418GLOBAL(ashlsi3):
419	mov	r5,r0
420	.align	2
421GLOBAL(ashlsi3_r0):
422
423#ifdef __sh1__
424	and	#31,r0
425	shll2	r0
426	mov.l	r4,@-r15
427	mov	r0,r4
428	mova	LOCAL(ashlsi3_table),r0
429	add	r4,r0
430	mov.l	@r15+,r4
431	jmp	@r0
432	mov	r4,r0
433	.align 2
434#else
435	and	#31,r0
436	shll2	r0
437	braf	r0
438	mov	r4,r0
439#endif
440
441LOCAL(ashlsi3_table):
442	rts				// << 0
443	nop
444LOCAL(ashlsi_1):
445	rts				// << 1
446	shll	r0
447LOCAL(ashlsi_2):			// << 2
448	rts
449	shll2	r0
450	bra	LOCAL(ashlsi_1)		// << 3
451	shll2	r0
452	bra	LOCAL(ashlsi_2)		// << 4
453	shll2	r0
454	bra	LOCAL(ashlsi_5)		// << 5
455	shll	r0
456	bra	LOCAL(ashlsi_6)		// << 6
457	shll2	r0
458	bra	LOCAL(ashlsi_7)		// << 7
459	shll	r0
460LOCAL(ashlsi_8):			// << 8
461	rts
462	shll8	r0
463	bra	LOCAL(ashlsi_8)		// << 9
464	shll	r0
465	bra	LOCAL(ashlsi_8)		// << 10
466	shll2	r0
467	bra	LOCAL(ashlsi_11)	// << 11
468	shll	r0
469	bra	LOCAL(ashlsi_12)	// << 12
470	shll2	r0
471	bra	LOCAL(ashlsi_13)	// << 13
472	shll	r0
473	bra	LOCAL(ashlsi_14)	// << 14
474	shll8	r0
475	bra	LOCAL(ashlsi_15)	// << 15
476	shll8	r0
477LOCAL(ashlsi_16):			// << 16
478	rts
479	shll16	r0
480	bra	LOCAL(ashlsi_16)	// << 17
481	shll	r0
482	bra	LOCAL(ashlsi_16)	// << 18
483	shll2	r0
484	bra	LOCAL(ashlsi_19)	// << 19
485	shll	r0
486	bra	LOCAL(ashlsi_20)	// << 20
487	shll2	r0
488	bra	LOCAL(ashlsi_21)	// << 21
489	shll	r0
490	bra	LOCAL(ashlsi_22)	// << 22
491	shll16	r0
492	bra	LOCAL(ashlsi_23)	// << 23
493	shll16	r0
494	bra	LOCAL(ashlsi_16)	// << 24
495	shll8	r0
496	bra	LOCAL(ashlsi_25)	// << 25
497	shll	r0
498	bra	LOCAL(ashlsi_26)	// << 26
499	shll2	r0
500	bra	LOCAL(ashlsi_27)	// << 27
501	shll	r0
502	bra	LOCAL(ashlsi_28)	// << 28
503	shll2	r0
504	bra	LOCAL(ashlsi_29)	// << 29
505	shll16	r0
506	bra	LOCAL(ashlsi_30)	// << 30
507	shll16	r0
508	and	#1,r0			// << 31
509	rts
510	rotr	r0
511
512LOCAL(ashlsi_7):
513	shll2	r0
514LOCAL(ashlsi_5):
515LOCAL(ashlsi_6):
516	shll2	r0
517	rts
518LOCAL(ashlsi_13):
519	shll2	r0
520LOCAL(ashlsi_12):
521LOCAL(ashlsi_11):
522	shll8	r0
523	rts
524LOCAL(ashlsi_21):
525	shll2	r0
526LOCAL(ashlsi_20):
527LOCAL(ashlsi_19):
528	shll16	r0
529	rts
530LOCAL(ashlsi_28):
531LOCAL(ashlsi_27):
532	shll2	r0
533LOCAL(ashlsi_26):
534LOCAL(ashlsi_25):
535	shll16	r0
536	rts
537	shll8	r0
538
539LOCAL(ashlsi_22):
540LOCAL(ashlsi_14):
541	shlr2	r0
542	rts
543	shll8	r0
544
545LOCAL(ashlsi_23):
546LOCAL(ashlsi_15):
547	shlr	r0
548	rts
549	shll8	r0
550
551LOCAL(ashlsi_29):
552	shlr	r0
553LOCAL(ashlsi_30):
554	shlr2	r0
555	rts
556	shll16	r0
557
558	ENDFUNC(GLOBAL(ashlsi3))
559	ENDFUNC(GLOBAL(ashlsi3_r0))
560#endif
561
562#ifdef L_lshiftrt
563
564!
565! GLOBAL(lshrsi3)
566! (For compatibility with older binaries, not used by compiler)
567!
568! Entry:
569!	r4: Value to shift
570!	r5: Shift count
571!
572! Exit:
573!	r0: Result
574!
575! Destroys:
576!	T bit
577!
578!
579! GLOBAL(lshrsi3_r0)
580!
581! Entry:
582!	r4: Value to shift
583!	r0: Shift count
584!
585! Exit:
586!	r0: Result
587!
588! Destroys:
589!	T bit
590
591	.global	GLOBAL(lshrsi3)
592	.global	GLOBAL(lshrsi3_r0)
593	HIDDEN_FUNC(GLOBAL(lshrsi3))
594	HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
595GLOBAL(lshrsi3):
596	mov	r5,r0
597	.align	2
598GLOBAL(lshrsi3_r0):
599
600#ifdef __sh1__
601	and	#31,r0
602	shll2	r0
603	mov.l	r4,@-r15
604	mov	r0,r4
605	mova	LOCAL(lshrsi3_table),r0
606	add	r4,r0
607	mov.l	@r15+,r4
608	jmp	@r0
609	mov	r4,r0
610	.align 2
611#else
612	and	#31,r0
613	shll2	r0
614	braf	r0
615	mov	r4,r0
616#endif
617LOCAL(lshrsi3_table):
618	rts				// >> 0
619	nop
620LOCAL(lshrsi_1):			// >> 1
621	rts
622	shlr	r0
623LOCAL(lshrsi_2):			// >> 2
624	rts
625	shlr2	r0
626	bra	LOCAL(lshrsi_1)		// >> 3
627	shlr2	r0
628	bra	LOCAL(lshrsi_2)		// >> 4
629	shlr2	r0
630	bra	LOCAL(lshrsi_5)		// >> 5
631	shlr	r0
632	bra	LOCAL(lshrsi_6)		// >> 6
633	shlr2	r0
634	bra	LOCAL(lshrsi_7)		// >> 7
635	shlr	r0
636LOCAL(lshrsi_8):			// >> 8
637	rts
638	shlr8	r0
639	bra	LOCAL(lshrsi_8)		// >> 9
640	shlr	r0
641	bra	LOCAL(lshrsi_8)		// >> 10
642	shlr2	r0
643	bra	LOCAL(lshrsi_11)	// >> 11
644	shlr	r0
645	bra	LOCAL(lshrsi_12)	// >> 12
646	shlr2	r0
647	bra	LOCAL(lshrsi_13)	// >> 13
648	shlr	r0
649	bra	LOCAL(lshrsi_14)	// >> 14
650	shlr8	r0
651	bra	LOCAL(lshrsi_15)	// >> 15
652	shlr8	r0
653LOCAL(lshrsi_16):			// >> 16
654	rts
655	shlr16	r0
656	bra	LOCAL(lshrsi_16)	// >> 17
657	shlr	r0
658	bra	LOCAL(lshrsi_16)	// >> 18
659	shlr2	r0
660	bra	LOCAL(lshrsi_19)	// >> 19
661	shlr	r0
662	bra	LOCAL(lshrsi_20)	// >> 20
663	shlr2	r0
664	bra	LOCAL(lshrsi_21)	// >> 21
665	shlr	r0
666	bra	LOCAL(lshrsi_22)	// >> 22
667	shlr16	r0
668	bra	LOCAL(lshrsi_23)	// >> 23
669	shlr16	r0
670	bra	LOCAL(lshrsi_16)	// >> 24
671	shlr8	r0
672	bra	LOCAL(lshrsi_25)	// >> 25
673	shlr	r0
674	bra	LOCAL(lshrsi_26)	// >> 26
675	shlr2	r0
676	bra	LOCAL(lshrsi_27)	// >> 27
677	shlr	r0
678	bra	LOCAL(lshrsi_28)	// >> 28
679	shlr2	r0
680	bra	LOCAL(lshrsi_29)	// >> 29
681	shlr16	r0
682	bra	LOCAL(lshrsi_30)	// >> 30
683	shlr16	r0
684	shll	r0			// >> 31
685	rts
686	movt	r0
687
688LOCAL(lshrsi_7):
689	shlr2	r0
690LOCAL(lshrsi_5):
691LOCAL(lshrsi_6):
692	shlr2	r0
693	rts
694LOCAL(lshrsi_13):
695	shlr2	r0
696LOCAL(lshrsi_12):
697LOCAL(lshrsi_11):
698	shlr8	r0
699	rts
700LOCAL(lshrsi_21):
701	shlr2	r0
702LOCAL(lshrsi_20):
703LOCAL(lshrsi_19):
704	shlr16	r0
705	rts
706LOCAL(lshrsi_28):
707LOCAL(lshrsi_27):
708	shlr2	r0
709LOCAL(lshrsi_26):
710LOCAL(lshrsi_25):
711	shlr16	r0
712	rts
713	shlr8	r0
714
715LOCAL(lshrsi_22):
716LOCAL(lshrsi_14):
717	shll2	r0
718	rts
719	shlr8	r0
720
721LOCAL(lshrsi_23):
722LOCAL(lshrsi_15):
723	shll	r0
724	rts
725	shlr8	r0
726
727LOCAL(lshrsi_29):
728	shll	r0
729LOCAL(lshrsi_30):
730	shll2	r0
731	rts
732	shlr16	r0
733
734	ENDFUNC(GLOBAL(lshrsi3))
735	ENDFUNC(GLOBAL(lshrsi3_r0))
736#endif
737
738#ifdef L_movmem
739	.text
740	.balign	4
741	.global	GLOBAL(movmem)
742	HIDDEN_FUNC(GLOBAL(movmem))
743	HIDDEN_ALIAS(movstr,movmem)
744	/* This would be a lot simpler if r6 contained the byte count
745	   minus 64, and we wouldn't be called here for a byte count of 64.  */
746GLOBAL(movmem):
747	sts.l	pr,@-r15
748	shll2	r6
749	bsr	GLOBAL(movmemSI52+2)
750	mov.l	@(48,r5),r0
751	.balign	4
752LOCAL(movmem_loop): /* Reached with rts */
753	mov.l	@(60,r5),r0
754	add	#-64,r6
755	mov.l	r0,@(60,r4)
756	tst	r6,r6
757	mov.l	@(56,r5),r0
758	bt	LOCAL(movmem_done)
759	mov.l	r0,@(56,r4)
760	cmp/pl	r6
761	mov.l	@(52,r5),r0
762	add	#64,r5
763	mov.l	r0,@(52,r4)
764	add	#64,r4
765	bt	GLOBAL(movmemSI52)
766! done all the large groups, do the remainder
767! jump to movmem+
768	mova	GLOBAL(movmemSI4)+4,r0
769	add	r6,r0
770	jmp	@r0
771LOCAL(movmem_done): ! share slot insn, works out aligned.
772	lds.l	@r15+,pr
773	mov.l	r0,@(56,r4)
774	mov.l	@(52,r5),r0
775	rts
776	mov.l	r0,@(52,r4)
777	.balign	4
778! ??? We need aliases movstr* for movmem* for the older libraries.  These
779! aliases will be removed at the some point in the future.
780	.global	GLOBAL(movmemSI64)
781	HIDDEN_FUNC(GLOBAL(movmemSI64))
782	HIDDEN_ALIAS(movstrSI64,movmemSI64)
783GLOBAL(movmemSI64):
784	mov.l	@(60,r5),r0
785	mov.l	r0,@(60,r4)
786	.global	GLOBAL(movmemSI60)
787	HIDDEN_FUNC(GLOBAL(movmemSI60))
788	HIDDEN_ALIAS(movstrSI60,movmemSI60)
789GLOBAL(movmemSI60):
790	mov.l	@(56,r5),r0
791	mov.l	r0,@(56,r4)
792	.global	GLOBAL(movmemSI56)
793	HIDDEN_FUNC(GLOBAL(movmemSI56))
794	HIDDEN_ALIAS(movstrSI56,movmemSI56)
795GLOBAL(movmemSI56):
796	mov.l	@(52,r5),r0
797	mov.l	r0,@(52,r4)
798	.global	GLOBAL(movmemSI52)
799	HIDDEN_FUNC(GLOBAL(movmemSI52))
800	HIDDEN_ALIAS(movstrSI52,movmemSI52)
801GLOBAL(movmemSI52):
802	mov.l	@(48,r5),r0
803	mov.l	r0,@(48,r4)
804	.global	GLOBAL(movmemSI48)
805	HIDDEN_FUNC(GLOBAL(movmemSI48))
806	HIDDEN_ALIAS(movstrSI48,movmemSI48)
807GLOBAL(movmemSI48):
808	mov.l	@(44,r5),r0
809	mov.l	r0,@(44,r4)
810	.global	GLOBAL(movmemSI44)
811	HIDDEN_FUNC(GLOBAL(movmemSI44))
812	HIDDEN_ALIAS(movstrSI44,movmemSI44)
813GLOBAL(movmemSI44):
814	mov.l	@(40,r5),r0
815	mov.l	r0,@(40,r4)
816	.global	GLOBAL(movmemSI40)
817	HIDDEN_FUNC(GLOBAL(movmemSI40))
818	HIDDEN_ALIAS(movstrSI40,movmemSI40)
819GLOBAL(movmemSI40):
820	mov.l	@(36,r5),r0
821	mov.l	r0,@(36,r4)
822	.global	GLOBAL(movmemSI36)
823	HIDDEN_FUNC(GLOBAL(movmemSI36))
824	HIDDEN_ALIAS(movstrSI36,movmemSI36)
825GLOBAL(movmemSI36):
826	mov.l	@(32,r5),r0
827	mov.l	r0,@(32,r4)
828	.global	GLOBAL(movmemSI32)
829	HIDDEN_FUNC(GLOBAL(movmemSI32))
830	HIDDEN_ALIAS(movstrSI32,movmemSI32)
831GLOBAL(movmemSI32):
832	mov.l	@(28,r5),r0
833	mov.l	r0,@(28,r4)
834	.global	GLOBAL(movmemSI28)
835	HIDDEN_FUNC(GLOBAL(movmemSI28))
836	HIDDEN_ALIAS(movstrSI28,movmemSI28)
837GLOBAL(movmemSI28):
838	mov.l	@(24,r5),r0
839	mov.l	r0,@(24,r4)
840	.global	GLOBAL(movmemSI24)
841	HIDDEN_FUNC(GLOBAL(movmemSI24))
842	HIDDEN_ALIAS(movstrSI24,movmemSI24)
843GLOBAL(movmemSI24):
844	mov.l	@(20,r5),r0
845	mov.l	r0,@(20,r4)
846	.global	GLOBAL(movmemSI20)
847	HIDDEN_FUNC(GLOBAL(movmemSI20))
848	HIDDEN_ALIAS(movstrSI20,movmemSI20)
849GLOBAL(movmemSI20):
850	mov.l	@(16,r5),r0
851	mov.l	r0,@(16,r4)
852	.global	GLOBAL(movmemSI16)
853	HIDDEN_FUNC(GLOBAL(movmemSI16))
854	HIDDEN_ALIAS(movstrSI16,movmemSI16)
855GLOBAL(movmemSI16):
856	mov.l	@(12,r5),r0
857	mov.l	r0,@(12,r4)
858	.global	GLOBAL(movmemSI12)
859	HIDDEN_FUNC(GLOBAL(movmemSI12))
860	HIDDEN_ALIAS(movstrSI12,movmemSI12)
861GLOBAL(movmemSI12):
862	mov.l	@(8,r5),r0
863	mov.l	r0,@(8,r4)
864	.global	GLOBAL(movmemSI8)
865	HIDDEN_FUNC(GLOBAL(movmemSI8))
866	HIDDEN_ALIAS(movstrSI8,movmemSI8)
867GLOBAL(movmemSI8):
868	mov.l	@(4,r5),r0
869	mov.l	r0,@(4,r4)
870	.global	GLOBAL(movmemSI4)
871	HIDDEN_FUNC(GLOBAL(movmemSI4))
872	HIDDEN_ALIAS(movstrSI4,movmemSI4)
873GLOBAL(movmemSI4):
874	mov.l	@(0,r5),r0
875	rts
876	mov.l	r0,@(0,r4)
877
878	ENDFUNC(GLOBAL(movmemSI64))
879	ENDFUNC(GLOBAL(movmemSI60))
880	ENDFUNC(GLOBAL(movmemSI56))
881	ENDFUNC(GLOBAL(movmemSI52))
882	ENDFUNC(GLOBAL(movmemSI48))
883	ENDFUNC(GLOBAL(movmemSI44))
884	ENDFUNC(GLOBAL(movmemSI40))
885	ENDFUNC(GLOBAL(movmemSI36))
886	ENDFUNC(GLOBAL(movmemSI32))
887	ENDFUNC(GLOBAL(movmemSI28))
888	ENDFUNC(GLOBAL(movmemSI24))
889	ENDFUNC(GLOBAL(movmemSI20))
890	ENDFUNC(GLOBAL(movmemSI16))
891	ENDFUNC(GLOBAL(movmemSI12))
892	ENDFUNC(GLOBAL(movmemSI8))
893	ENDFUNC(GLOBAL(movmemSI4))
894	ENDFUNC(GLOBAL(movmem))
895#endif
896
897#ifdef L_movmem_i4
898	.text
899	.global	GLOBAL(movmem_i4_even)
900	.global	GLOBAL(movmem_i4_odd)
901	.global	GLOBAL(movmemSI12_i4)
902
903	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
904	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
905	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
906
907	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
908	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
909	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
910
911	.p2align	5
912L_movmem_2mod4_end:
913	mov.l	r0,@(16,r4)
914	rts
915	mov.l	r1,@(20,r4)
916
917	.p2align	2
918
919GLOBAL(movmem_i4_even):
920	mov.l	@r5+,r0
921	bra	L_movmem_start_even
922	mov.l	@r5+,r1
923
924GLOBAL(movmem_i4_odd):
925	mov.l	@r5+,r1
926	add	#-4,r4
927	mov.l	@r5+,r2
928	mov.l	@r5+,r3
929	mov.l	r1,@(4,r4)
930	mov.l	r2,@(8,r4)
931
932L_movmem_loop:
933	mov.l	r3,@(12,r4)
934	dt	r6
935	mov.l	@r5+,r0
936	bt/s	L_movmem_2mod4_end
937	mov.l	@r5+,r1
938	add	#16,r4
939L_movmem_start_even:
940	mov.l	@r5+,r2
941	mov.l	@r5+,r3
942	mov.l	r0,@r4
943	dt	r6
944	mov.l	r1,@(4,r4)
945	bf/s	L_movmem_loop
946	mov.l	r2,@(8,r4)
947	rts
948	mov.l	r3,@(12,r4)
949
950	ENDFUNC(GLOBAL(movmem_i4_even))
951	ENDFUNC(GLOBAL(movmem_i4_odd))
952
953	.p2align	4
954GLOBAL(movmemSI12_i4):
955	mov.l	@r5,r0
956	mov.l	@(4,r5),r1
957	mov.l	@(8,r5),r2
958	mov.l	r0,@r4
959	mov.l	r1,@(4,r4)
960	rts
961	mov.l	r2,@(8,r4)
962
963	ENDFUNC(GLOBAL(movmemSI12_i4))
964#endif
965
966#ifdef L_mulsi3
967
968
969	.global	GLOBAL(mulsi3)
970	HIDDEN_FUNC(GLOBAL(mulsi3))
971
972! r4 =       aabb
973! r5 =       ccdd
974! r0 = aabb*ccdd  via partial products
975!
976! if aa == 0 and cc = 0
977! r0 = bb*dd
978!
979! else
980! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
981!
982
983GLOBAL(mulsi3):
984	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
985	mov     r5,r3		! r3 = ccdd
986	swap.w  r4,r2		! r2 = bbaa
987	xtrct   r2,r3		! r3 = aacc
988	tst  	r3,r3		! msws zero ?
989	bf      hiset
990	rts			! yes - then we have the answer
991	sts     macl,r0
992
993hiset:	sts	macl,r0		! r0 = bb*dd
994	mulu.w	r2,r5		! brewing macl = aa*dd
995	sts	macl,r1
996	mulu.w	r3,r4		! brewing macl = cc*bb
997	sts	macl,r2
998	add	r1,r2
999	shll16	r2
1000	rts
1001	add	r2,r0
1002
1003	ENDFUNC(GLOBAL(mulsi3))
1004#endif
1005#endif /* ! __SH5__ */
1006#ifdef L_sdivsi3_i4
1007	.title "SH DIVIDE"
1008!! 4 byte integer Divide code for the Renesas SH
1009#if defined (__SH4__) || defined (__SH2A__)
1010!! args in r4 and r5, result in fpul, clobber dr0, dr2
1011
1012	.global	GLOBAL(sdivsi3_i4)
1013	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1014GLOBAL(sdivsi3_i4):
1015	lds r4,fpul
1016	float fpul,dr0
1017	lds r5,fpul
1018	float fpul,dr2
1019	fdiv dr2,dr0
1020	rts
1021	ftrc dr0,fpul
1022
1023	ENDFUNC(GLOBAL(sdivsi3_i4))
1024#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1025!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1026
1027#if ! __SH5__ || __SH5__ == 32
1028#if __SH5__
1029	.mode	SHcompact
1030#endif
1031	.global	GLOBAL(sdivsi3_i4)
1032	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1033GLOBAL(sdivsi3_i4):
1034	sts.l fpscr,@-r15
1035	mov #8,r2
1036	swap.w r2,r2
1037	lds r2,fpscr
1038	lds r4,fpul
1039	float fpul,dr0
1040	lds r5,fpul
1041	float fpul,dr2
1042	fdiv dr2,dr0
1043	ftrc dr0,fpul
1044	rts
1045	lds.l @r15+,fpscr
1046
1047	ENDFUNC(GLOBAL(sdivsi3_i4))
1048#endif /* ! __SH5__ || __SH5__ == 32 */
1049#endif /* ! __SH4__ || __SH2A__  */
1050#endif
1051
1052#ifdef L_sdivsi3
1053/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1054   sh2e/sh3e code.  */
1055!!
1056!! Steve Chamberlain
1057!! sac@cygnus.com
1058!!
1059!!
1060
1061!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1062
1063	.global	GLOBAL(sdivsi3)
1064#if __SHMEDIA__
1065#if __SH5__ == 32
1066	.section	.text..SHmedia32,"ax"
1067#else
1068	.text
1069#endif
1070	.align	2
1071#if 0
1072/* The assembly code that follows is a hand-optimized version of the C
1073   code that follows.  Note that the registers that are modified are
1074   exactly those listed as clobbered in the patterns divsi3_i1 and
1075   divsi3_i1_media.
1076
1077int __sdivsi3 (i, j)
1078     int i, j;
1079{
1080  register unsigned long long r18 asm ("r18");
1081  register unsigned long long r19 asm ("r19");
1082  register unsigned long long r0 asm ("r0") = 0;
1083  register unsigned long long r1 asm ("r1") = 1;
1084  register int r2 asm ("r2") = i >> 31;
1085  register int r3 asm ("r3") = j >> 31;
1086
1087  r2 = r2 ? r2 : r1;
1088  r3 = r3 ? r3 : r1;
1089  r18 = i * r2;
1090  r19 = j * r3;
1091  r2 *= r3;
1092
1093  r19 <<= 31;
1094  r1 <<= 31;
1095  do
1096    if (r18 >= r19)
1097      r0 |= r1, r18 -= r19;
1098  while (r19 >>= 1, r1 >>= 1);
1099
1100  return r2 * (int)r0;
1101}
1102*/
1103GLOBAL(sdivsi3):
1104	pt/l	LOCAL(sdivsi3_dontadd), tr2
1105	pt/l	LOCAL(sdivsi3_loop), tr1
1106	ptabs/l	r18, tr0
1107	movi	0, r0
1108	movi	1, r1
1109	shari.l	r4, 31, r2
1110	shari.l	r5, 31, r3
1111	cmveq	r2, r1, r2
1112	cmveq	r3, r1, r3
1113	muls.l	r4, r2, r18
1114	muls.l	r5, r3, r19
1115	muls.l	r2, r3, r2
1116	shlli	r19, 31, r19
1117	shlli	r1, 31, r1
1118LOCAL(sdivsi3_loop):
1119	bgtu	r19, r18, tr2
1120	or	r0, r1, r0
1121	sub	r18, r19, r18
1122LOCAL(sdivsi3_dontadd):
1123	shlri	r1, 1, r1
1124	shlri	r19, 1, r19
1125	bnei	r1, 0, tr1
1126	muls.l	r0, r2, r0
1127	add.l	r0, r63, r0
1128	blink	tr0, r63
1129#elif 0 /* ! 0 */
1130 // inputs: r4,r5
1131 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1132 // result in r0
1133GLOBAL(sdivsi3):
1134 // can create absolute value without extra latency,
1135 // but dependent on proper sign extension of inputs:
1136 // shari.l r5,31,r2
1137 // xor r5,r2,r20
1138 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1139 shari.l r5,31,r2
1140 ori r2,1,r2
1141 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1142 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1143 shari.l r4,31,r3
1144 nsb r20,r0
1145 shlld r20,r0,r25
1146 shlri r25,48,r25
1147 sub r19,r25,r1
1148 mmulfx.w r1,r1,r2
1149 mshflo.w r1,r63,r1
1150 // If r4 was to be used in-place instead of r21, could use this sequence
1151 // to compute absolute:
1152 // sub r63,r4,r19 // compute absolute value of r4
1153 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1154 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1155 ori r3,1,r3
1156 mmulfx.w r25,r2,r2
1157 sub r19,r0,r0
1158 muls.l r4,r3,r21
1159 msub.w r1,r2,r2
1160 addi r2,-2,r1
1161 mulu.l r21,r1,r19
1162 mmulfx.w r2,r2,r2
1163 shlli r1,15,r1
1164 shlrd r19,r0,r19
1165 mulu.l r19,r20,r3
1166 mmacnfx.wl r25,r2,r1
1167 ptabs r18,tr0
1168 sub r21,r3,r25
1169
1170 mulu.l r25,r1,r2
1171 addi r0,14,r0
1172 xor r4,r5,r18
1173 shlrd r2,r0,r2
1174 mulu.l r2,r20,r3
1175 add r19,r2,r19
1176 shari.l r18,31,r18
1177 sub r25,r3,r25
1178
1179 mulu.l r25,r1,r2
1180 sub r25,r20,r25
1181 add r19,r18,r19
1182 shlrd r2,r0,r2
1183 mulu.l r2,r20,r3
1184 addi r25,1,r25
1185 add r19,r2,r19
1186
1187 cmpgt r25,r3,r25
1188 add.l r19,r25,r0
1189 xor r0,r18,r0
1190 blink tr0,r63
1191#else /* ! 0 && ! 0 */
1192
1193 // inputs: r4,r5
1194 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1195 // result in r0
1196	HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1197#ifndef __pic__
1198	FUNC(GLOBAL(sdivsi3))
1199GLOBAL(sdivsi3): /* this is the shcompact entry point */
1200 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1201 // with the SHcompact implementation, which clobbers tr1 / tr2.
1202 .global GLOBAL(sdivsi3_1)
1203GLOBAL(sdivsi3_1):
1204 .global GLOBAL(div_table_internal)
1205 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1206 shori GLOBAL(div_table_internal) & 65535, r20
1207#endif
1208 .global GLOBAL(sdivsi3_2)
1209 // div_table in r20
1210 // clobbered: r1,r18,r19,r21,r25,tr0
1211GLOBAL(sdivsi3_2):
1212 nsb r5, r1
1213 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
1214 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
1215 ldx.ub r20, r21, r19 // u0.8
1216 shari r25, 32, r25   // normalize to s2.30
1217 shlli r21, 1, r21
1218 muls.l r25, r19, r19 // s2.38
1219 ldx.w r20, r21, r21  // s2.14
1220  ptabs r18, tr0
1221 shari r19, 24, r19   // truncate to s2.14
1222 sub r21, r19, r19    // some 11 bit inverse in s1.14
1223 muls.l r19, r19, r21 // u0.28
1224  sub r63, r1, r1
1225  addi r1, 92, r1
1226 muls.l r25, r21, r18 // s2.58
1227 shlli r19, 45, r19   // multiply by two and convert to s2.58
1228  /* bubble */
1229 sub r19, r18, r18
1230 shari r18, 28, r18   // some 22 bit inverse in s1.30
1231 muls.l r18, r25, r0  // s2.60
1232  muls.l r18, r4, r25 // s32.30
1233  /* bubble */
1234 shari r0, 16, r19   // s-16.44
1235 muls.l r19, r18, r19 // s-16.74
1236  shari r25, 63, r0
1237  shari r4, 14, r18   // s19.-14
1238 shari r19, 30, r19   // s-16.44
1239 muls.l r19, r18, r19 // s15.30
1240  xor r21, r0, r21    // You could also use the constant 1 << 27.
1241  add r21, r25, r21
1242 sub r21, r19, r21
1243 shard r21, r1, r21
1244 sub r21, r0, r0
1245 blink tr0, r63
1246#ifndef __pic__
1247	ENDFUNC(GLOBAL(sdivsi3))
1248#endif
1249	ENDFUNC(GLOBAL(sdivsi3_2))
1250#endif
1251#elif defined __SHMEDIA__
1252/* m5compact-nofpu */
1253 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1254	.mode	SHmedia
1255	.section	.text..SHmedia32,"ax"
1256	.align	2
1257	FUNC(GLOBAL(sdivsi3))
1258GLOBAL(sdivsi3):
1259	pt/l LOCAL(sdivsi3_dontsub), tr0
1260	pt/l LOCAL(sdivsi3_loop), tr1
1261	ptabs/l r18,tr2
1262	shari.l r4,31,r18
1263	shari.l r5,31,r19
1264	xor r4,r18,r20
1265	xor r5,r19,r21
1266	sub.l r20,r18,r20
1267	sub.l r21,r19,r21
1268	xor r18,r19,r19
1269	shlli r21,32,r25
1270	addi r25,-1,r21
1271	addz.l r20,r63,r20
1272LOCAL(sdivsi3_loop):
1273	shlli r20,1,r20
1274	bgeu/u r21,r20,tr0
1275	sub r20,r21,r20
1276LOCAL(sdivsi3_dontsub):
1277	addi.l r25,-1,r25
1278	bnei r25,-32,tr1
1279	xor r20,r19,r20
1280	sub.l r20,r19,r0
1281	blink tr2,r63
1282	ENDFUNC(GLOBAL(sdivsi3))
1283#else /* ! __SHMEDIA__ */
1284	FUNC(GLOBAL(sdivsi3))
1285GLOBAL(sdivsi3):
1286	mov	r4,r1
1287	mov	r5,r0
1288
1289	tst	r0,r0
1290	bt	div0
1291	mov	#0,r2
1292	div0s	r2,r1
1293	subc	r3,r3
1294	subc	r2,r1
1295	div0s	r0,r3
1296	rotcl	r1
1297	div1	r0,r3
1298	rotcl	r1
1299	div1	r0,r3
1300	rotcl	r1
1301	div1	r0,r3
1302	rotcl	r1
1303	div1	r0,r3
1304	rotcl	r1
1305	div1	r0,r3
1306	rotcl	r1
1307	div1	r0,r3
1308	rotcl	r1
1309	div1	r0,r3
1310	rotcl	r1
1311	div1	r0,r3
1312	rotcl	r1
1313	div1	r0,r3
1314	rotcl	r1
1315	div1	r0,r3
1316	rotcl	r1
1317	div1	r0,r3
1318	rotcl	r1
1319	div1	r0,r3
1320	rotcl	r1
1321	div1	r0,r3
1322	rotcl	r1
1323	div1	r0,r3
1324	rotcl	r1
1325	div1	r0,r3
1326	rotcl	r1
1327	div1	r0,r3
1328	rotcl	r1
1329	div1	r0,r3
1330	rotcl	r1
1331	div1	r0,r3
1332	rotcl	r1
1333	div1	r0,r3
1334	rotcl	r1
1335	div1	r0,r3
1336	rotcl	r1
1337	div1	r0,r3
1338	rotcl	r1
1339	div1	r0,r3
1340	rotcl	r1
1341	div1	r0,r3
1342	rotcl	r1
1343	div1	r0,r3
1344	rotcl	r1
1345	div1	r0,r3
1346	rotcl	r1
1347	div1	r0,r3
1348	rotcl	r1
1349	div1	r0,r3
1350	rotcl	r1
1351	div1	r0,r3
1352	rotcl	r1
1353	div1	r0,r3
1354	rotcl	r1
1355	div1	r0,r3
1356	rotcl	r1
1357	div1	r0,r3
1358	rotcl	r1
1359	div1	r0,r3
1360	rotcl	r1
1361	addc	r2,r1
1362	rts
1363	mov	r1,r0
1364
1365
1366div0:	rts
1367	mov	#0,r0
1368
1369	ENDFUNC(GLOBAL(sdivsi3))
1370#endif /* ! __SHMEDIA__ */
1371#endif
1372#ifdef L_udivsi3_i4
1373
1374	.title "SH DIVIDE"
1375!! 4 byte integer Divide code for the Renesas SH
1376#if defined (__SH4__) || defined (__SH2A__)
1377!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1378!! and t bit
1379
1380	.global	GLOBAL(udivsi3_i4)
1381	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1382GLOBAL(udivsi3_i4):
1383	mov #1,r1
1384	cmp/hi r1,r5
1385	bf trivial
1386	rotr r1
1387	xor r1,r4
1388	lds r4,fpul
1389	mova L1,r0
1390#ifdef FMOVD_WORKS
1391	fmov.d @r0+,dr4
1392#else
1393	fmov.s @r0+,DR40
1394	fmov.s @r0,DR41
1395#endif
1396	float fpul,dr0
1397	xor r1,r5
1398	lds r5,fpul
1399	float fpul,dr2
1400	fadd dr4,dr0
1401	fadd dr4,dr2
1402	fdiv dr2,dr0
1403	rts
1404	ftrc dr0,fpul
1405
1406trivial:
1407	rts
1408	lds r4,fpul
1409
1410	.align 2
1411#ifdef FMOVD_WORKS
1412	.align 3	! make double below 8 byte aligned.
1413#endif
1414L1:
1415	.double 2147483648
1416
1417	ENDFUNC(GLOBAL(udivsi3_i4))
1418#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__)
1419#if ! __SH5__ || __SH5__ == 32
1420!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1421	.mode	SHmedia
1422	.global	GLOBAL(udivsi3_i4)
1423	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1424GLOBAL(udivsi3_i4):
1425	addz.l	r4,r63,r20
1426	addz.l	r5,r63,r21
1427	fmov.qd	r20,dr0
1428	fmov.qd	r21,dr32
1429	ptabs	r18,tr0
1430	float.qd dr0,dr0
1431	float.qd dr32,dr32
1432	fdiv.d	dr0,dr32,dr0
1433	ftrc.dq dr0,dr32
1434	fmov.s fr33,fr32
1435	blink tr0,r63
1436
1437	ENDFUNC(GLOBAL(udivsi3_i4))
1438#endif /* ! __SH5__ || __SH5__ == 32 */
1439#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1440!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1441
1442	.global	GLOBAL(udivsi3_i4)
1443	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1444GLOBAL(udivsi3_i4):
1445	mov #1,r1
1446	cmp/hi r1,r5
1447	bf trivial
1448	sts.l fpscr,@-r15
1449	mova L1,r0
1450	lds.l @r0+,fpscr
1451	rotr r1
1452	xor r1,r4
1453	lds r4,fpul
1454#ifdef FMOVD_WORKS
1455	fmov.d @r0+,dr4
1456#else
1457	fmov.s @r0+,DR40
1458	fmov.s @r0,DR41
1459#endif
1460	float fpul,dr0
1461	xor r1,r5
1462	lds r5,fpul
1463	float fpul,dr2
1464	fadd dr4,dr0
1465	fadd dr4,dr2
1466	fdiv dr2,dr0
1467	ftrc dr0,fpul
1468	rts
1469	lds.l @r15+,fpscr
1470
1471#ifdef FMOVD_WORKS
1472	.align 3	! make double below 8 byte aligned.
1473#endif
1474trivial:
1475	rts
1476	lds r4,fpul
1477
1478	.align 2
1479L1:
1480#ifndef FMOVD_WORKS
1481	.long 0x80000
1482#else
1483	.long 0x180000
1484#endif
1485	.double 2147483648
1486
1487	ENDFUNC(GLOBAL(udivsi3_i4))
1488#endif /* ! __SH4__ */
1489#endif
1490
1491#ifdef L_udivsi3
1492/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1493   sh2e/sh3e code.  */
1494
1495!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1496	.global	GLOBAL(udivsi3)
1497	HIDDEN_FUNC(GLOBAL(udivsi3))
1498
1499#if __SHMEDIA__
1500#if __SH5__ == 32
1501	.section	.text..SHmedia32,"ax"
1502#else
1503	.text
1504#endif
1505	.align	2
1506#if 0
1507/* The assembly code that follows is a hand-optimized version of the C
1508   code that follows.  Note that the registers that are modified are
1509   exactly those listed as clobbered in the patterns udivsi3_i1 and
1510   udivsi3_i1_media.
1511
1512unsigned
1513__udivsi3 (i, j)
1514    unsigned i, j;
1515{
1516  register unsigned long long r0 asm ("r0") = 0;
1517  register unsigned long long r18 asm ("r18") = 1;
1518  register unsigned long long r4 asm ("r4") = i;
1519  register unsigned long long r19 asm ("r19") = j;
1520
1521  r19 <<= 31;
1522  r18 <<= 31;
1523  do
1524    if (r4 >= r19)
1525      r0 |= r18, r4 -= r19;
1526  while (r19 >>= 1, r18 >>= 1);
1527
1528  return r0;
1529}
1530*/
1531GLOBAL(udivsi3):
1532	pt/l	LOCAL(udivsi3_dontadd), tr2
1533	pt/l	LOCAL(udivsi3_loop), tr1
1534	ptabs/l	r18, tr0
1535	movi	0, r0
1536	movi	1, r18
1537	addz.l	r5, r63, r19
1538	addz.l	r4, r63, r4
1539	shlli	r19, 31, r19
1540	shlli	r18, 31, r18
1541LOCAL(udivsi3_loop):
1542	bgtu	r19, r4, tr2
1543	or	r0, r18, r0
1544	sub	r4, r19, r4
1545LOCAL(udivsi3_dontadd):
1546	shlri	r18, 1, r18
1547	shlri	r19, 1, r19
1548	bnei	r18, 0, tr1
1549	blink	tr0, r63
1550#else
1551GLOBAL(udivsi3):
1552 // inputs: r4,r5
1553 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1554 // result in r0.
1555 addz.l r5,r63,r22
1556 nsb r22,r0
1557 shlld r22,r0,r25
1558 shlri r25,48,r25
1559 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1560 sub r20,r25,r21
1561 mmulfx.w r21,r21,r19
1562 mshflo.w r21,r63,r21
1563 ptabs r18,tr0
1564 mmulfx.w r25,r19,r19
1565 sub r20,r0,r0
1566 /* bubble */
1567 msub.w r21,r19,r19
1568 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1569		    before the msub.w, but we need a different value for
1570		    r19 to keep errors under control.  */
1571 mulu.l r4,r21,r18
1572 mmulfx.w r19,r19,r19
1573 shlli r21,15,r21
1574 shlrd r18,r0,r18
1575 mulu.l r18,r22,r20
1576 mmacnfx.wl r25,r19,r21
1577 /* bubble */
1578 sub r4,r20,r25
1579
1580 mulu.l r25,r21,r19
1581 addi r0,14,r0
1582 /* bubble */
1583 shlrd r19,r0,r19
1584 mulu.l r19,r22,r20
1585 add r18,r19,r18
1586 /* bubble */
1587 sub.l r25,r20,r25
1588
1589 mulu.l r25,r21,r19
1590 addz.l r25,r63,r25
1591 sub r25,r22,r25
1592 shlrd r19,r0,r19
1593 mulu.l r19,r22,r20
1594 addi r25,1,r25
1595 add r18,r19,r18
1596
1597 cmpgt r25,r20,r25
1598 add.l r18,r25,r0
1599 blink tr0,r63
1600#endif
1601#elif defined (__SHMEDIA__)
1602/* m5compact-nofpu - more emphasis on code size than on speed, but don't
1603   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1604   So use a short shmedia loop.  */
1605 // clobbered: r20,r21,r25,tr0,tr1,tr2
1606	.mode	SHmedia
1607	.section	.text..SHmedia32,"ax"
1608	.align	2
1609GLOBAL(udivsi3):
1610 pt/l LOCAL(udivsi3_dontsub), tr0
1611 pt/l LOCAL(udivsi3_loop), tr1
1612 ptabs/l r18,tr2
1613 shlli r5,32,r25
1614 addi r25,-1,r21
1615 addz.l r4,r63,r20
1616LOCAL(udivsi3_loop):
1617 shlli r20,1,r20
1618 bgeu/u r21,r20,tr0
1619 sub r20,r21,r20
1620LOCAL(udivsi3_dontsub):
1621 addi.l r25,-1,r25
1622 bnei r25,-32,tr1
1623 add.l r20,r63,r0
1624 blink tr2,r63
1625#else /* ! defined (__SHMEDIA__) */
1626LOCAL(div8):
1627 div1 r5,r4
1628LOCAL(div7):
1629 div1 r5,r4; div1 r5,r4; div1 r5,r4
1630 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1631
1632LOCAL(divx4):
1633 div1 r5,r4; rotcl r0
1634 div1 r5,r4; rotcl r0
1635 div1 r5,r4; rotcl r0
1636 rts; div1 r5,r4
1637
1638GLOBAL(udivsi3):
1639 sts.l pr,@-r15
1640 extu.w r5,r0
1641 cmp/eq r5,r0
1642#ifdef __sh1__
1643 bf LOCAL(large_divisor)
1644#else
1645 bf/s LOCAL(large_divisor)
1646#endif
1647 div0u
1648 swap.w r4,r0
1649 shlr16 r4
1650 bsr LOCAL(div8)
1651 shll16 r5
1652 bsr LOCAL(div7)
1653 div1 r5,r4
1654 xtrct r4,r0
1655 xtrct r0,r4
1656 bsr LOCAL(div8)
1657 swap.w r4,r4
1658 bsr LOCAL(div7)
1659 div1 r5,r4
1660 lds.l @r15+,pr
1661 xtrct r4,r0
1662 swap.w r0,r0
1663 rotcl r0
1664 rts
1665 shlr16 r5
1666
1667LOCAL(large_divisor):
1668#ifdef __sh1__
1669 div0u
1670#endif
1671 mov #0,r0
1672 xtrct r4,r0
1673 xtrct r0,r4
1674 bsr LOCAL(divx4)
1675 rotcl r0
1676 bsr LOCAL(divx4)
1677 rotcl r0
1678 bsr LOCAL(divx4)
1679 rotcl r0
1680 bsr LOCAL(divx4)
1681 rotcl r0
1682 lds.l @r15+,pr
1683 rts
1684 rotcl r0
1685
1686	ENDFUNC(GLOBAL(udivsi3))
1687#endif /* ! __SHMEDIA__ */
1688#endif /* L_udivsi3 */
1689
1690#ifdef L_udivdi3
1691#ifdef __SHMEDIA__
1692	.mode	SHmedia
1693	.section	.text..SHmedia32,"ax"
1694	.align	2
1695	.global	GLOBAL(udivdi3)
1696	FUNC(GLOBAL(udivdi3))
1697GLOBAL(udivdi3):
1698	HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1699	shlri r3,1,r4
1700	nsb r4,r22
1701	shlld r3,r22,r6
1702	shlri r6,49,r5
1703	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1704	sub r21,r5,r1
1705	mmulfx.w r1,r1,r4
1706	mshflo.w r1,r63,r1
1707	sub r63,r22,r20 // r63 == 64 % 64
1708	mmulfx.w r5,r4,r4
1709	pta LOCAL(large_divisor),tr0
1710	addi r20,32,r9
1711	msub.w r1,r4,r1
1712	madd.w r1,r1,r1
1713	mmulfx.w r1,r1,r4
1714	shlri r6,32,r7
1715	bgt/u r9,r63,tr0 // large_divisor
1716	mmulfx.w r5,r4,r4
1717	shlri r2,32+14,r19
1718	addi r22,-31,r0
1719	msub.w r1,r4,r1
1720
1721	mulu.l r1,r7,r4
1722	addi r1,-3,r5
1723	mulu.l r5,r19,r5
1724	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1725	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1726	                 the case may be, %0000000000000000 000.11111111111, still */
1727	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1728	mulu.l r5,r3,r8
1729	mshalds.l r1,r21,r1
1730	shari r4,26,r4
1731	shlld r8,r0,r8
1732	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1733	sub r2,r8,r2
1734	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1735
1736	shlri r2,22,r21
1737	mulu.l r21,r1,r21
1738	shlld r5,r0,r8
1739	addi r20,30-22,r0
1740	shlrd r21,r0,r21
1741	mulu.l r21,r3,r5
1742	add r8,r21,r8
1743	mcmpgt.l r21,r63,r21 // See Note 1
1744	addi r20,30,r0
1745	mshfhi.l r63,r21,r21
1746	sub r2,r5,r2
1747	andc r2,r21,r2
1748
1749	/* small divisor: need a third divide step */
1750	mulu.l r2,r1,r7
1751	ptabs r18,tr0
1752	addi r2,1,r2
1753	shlrd r7,r0,r7
1754	mulu.l r7,r3,r5
1755	add r8,r7,r8
1756	sub r2,r3,r2
1757	cmpgt r2,r5,r5
1758	add r8,r5,r2
1759	/* could test r3 here to check for divide by zero.  */
1760	blink tr0,r63
1761
1762LOCAL(large_divisor):
1763	mmulfx.w r5,r4,r4
1764	shlrd r2,r9,r25
1765	shlri r25,32,r8
1766	msub.w r1,r4,r1
1767
1768	mulu.l r1,r7,r4
1769	addi r1,-3,r5
1770	mulu.l r5,r8,r5
1771	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1772	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1773	                 the case may be, %0000000000000000 000.11111111111, still */
1774	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1775	shlri r5,14-1,r8
1776	mulu.l r8,r7,r5
1777	mshalds.l r1,r21,r1
1778	shari r4,26,r4
1779	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1780	sub r25,r5,r25
1781	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1782
1783	shlri r25,22,r21
1784	mulu.l r21,r1,r21
1785	pta LOCAL(no_lo_adj),tr0
1786	addi r22,32,r0
1787	shlri r21,40,r21
1788	mulu.l r21,r7,r5
1789	add r8,r21,r8
1790	shlld r2,r0,r2
1791	sub r25,r5,r25
1792	bgtu/u r7,r25,tr0 // no_lo_adj
1793	addi r8,1,r8
1794	sub r25,r7,r25
1795LOCAL(no_lo_adj):
1796	mextr4 r2,r25,r2
1797
1798	/* large_divisor: only needs a few adjustments.  */
1799	mulu.l r8,r6,r5
1800	ptabs r18,tr0
1801	/* bubble */
1802	cmpgtu r5,r2,r5
1803	sub r8,r5,r2
1804	blink tr0,r63
1805	ENDFUNC(GLOBAL(udivdi3))
1806/* Note 1: To shift the result of the second divide stage so that the result
1807   always fits into 32 bits, yet we still reduce the rest sufficiently
1808   would require a lot of instructions to do the shifts just right.  Using
1809   the full 64 bit shift result to multiply with the divisor would require
1810   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1811   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1812   know that the rest after taking this partial result into account will
1813   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1814   upper 32 bits of the partial result are nonzero.  */
1815#endif /* __SHMEDIA__ */
1816#endif /* L_udivdi3 */
1817
1818#ifdef L_divdi3
1819#ifdef __SHMEDIA__
1820	.mode	SHmedia
1821	.section	.text..SHmedia32,"ax"
1822	.align	2
1823	.global	GLOBAL(divdi3)
1824	FUNC(GLOBAL(divdi3))
1825GLOBAL(divdi3):
1826	pta GLOBAL(udivdi3_internal),tr0
1827	shari r2,63,r22
1828	shari r3,63,r23
1829	xor r2,r22,r2
1830	xor r3,r23,r3
1831	sub r2,r22,r2
1832	sub r3,r23,r3
1833	beq/u r22,r23,tr0
1834	ptabs r18,tr1
1835	blink tr0,r18
1836	sub r63,r2,r2
1837	blink tr1,r63
1838	ENDFUNC(GLOBAL(divdi3))
1839#endif /* __SHMEDIA__ */
1840#endif /* L_divdi3 */
1841
1842#ifdef L_umoddi3
1843#ifdef __SHMEDIA__
1844	.mode	SHmedia
1845	.section	.text..SHmedia32,"ax"
1846	.align	2
1847	.global	GLOBAL(umoddi3)
1848	FUNC(GLOBAL(umoddi3))
1849GLOBAL(umoddi3):
1850	HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1851	shlri r3,1,r4
1852	nsb r4,r22
1853	shlld r3,r22,r6
1854	shlri r6,49,r5
1855	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1856	sub r21,r5,r1
1857	mmulfx.w r1,r1,r4
1858	mshflo.w r1,r63,r1
1859	sub r63,r22,r20 // r63 == 64 % 64
1860	mmulfx.w r5,r4,r4
1861	pta LOCAL(large_divisor),tr0
1862	addi r20,32,r9
1863	msub.w r1,r4,r1
1864	madd.w r1,r1,r1
1865	mmulfx.w r1,r1,r4
1866	shlri r6,32,r7
1867	bgt/u r9,r63,tr0 // large_divisor
1868	mmulfx.w r5,r4,r4
1869	shlri r2,32+14,r19
1870	addi r22,-31,r0
1871	msub.w r1,r4,r1
1872
1873	mulu.l r1,r7,r4
1874	addi r1,-3,r5
1875	mulu.l r5,r19,r5
1876	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1877	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1878	                 the case may be, %0000000000000000 000.11111111111, still */
1879	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1880	mulu.l r5,r3,r5
1881	mshalds.l r1,r21,r1
1882	shari r4,26,r4
1883	shlld r5,r0,r5
1884	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1885	sub r2,r5,r2
1886	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1887
1888	shlri r2,22,r21
1889	mulu.l r21,r1,r21
1890	addi r20,30-22,r0
1891	/* bubble */ /* could test r3 here to check for divide by zero.  */
1892	shlrd r21,r0,r21
1893	mulu.l r21,r3,r5
1894	mcmpgt.l r21,r63,r21 // See Note 1
1895	addi r20,30,r0
1896	mshfhi.l r63,r21,r21
1897	sub r2,r5,r2
1898	andc r2,r21,r2
1899
1900	/* small divisor: need a third divide step */
1901	mulu.l r2,r1,r7
1902	ptabs r18,tr0
1903	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1904	shlrd r7,r0,r7
1905	mulu.l r7,r3,r5
1906	/* bubble */
1907	addi r8,1,r7
1908	cmpgt r7,r5,r7
1909	cmvne r7,r8,r2
1910	sub r2,r5,r2
1911	blink tr0,r63
1912
1913LOCAL(large_divisor):
1914	mmulfx.w r5,r4,r4
1915	shlrd r2,r9,r25
1916	shlri r25,32,r8
1917	msub.w r1,r4,r1
1918
1919	mulu.l r1,r7,r4
1920	addi r1,-3,r5
1921	mulu.l r5,r8,r5
1922	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1923	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1924	                 the case may be, %0000000000000000 000.11111111111, still */
1925	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1926	shlri r5,14-1,r8
1927	mulu.l r8,r7,r5
1928	mshalds.l r1,r21,r1
1929	shari r4,26,r4
1930	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1931	sub r25,r5,r25
1932	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1933
1934	shlri r25,22,r21
1935	mulu.l r21,r1,r21
1936	pta LOCAL(no_lo_adj),tr0
1937	addi r22,32,r0
1938	shlri r21,40,r21
1939	mulu.l r21,r7,r5
1940	add r8,r21,r8
1941	shlld r2,r0,r2
1942	sub r25,r5,r25
1943	bgtu/u r7,r25,tr0 // no_lo_adj
1944	addi r8,1,r8
1945	sub r25,r7,r25
1946LOCAL(no_lo_adj):
1947	mextr4 r2,r25,r2
1948
1949	/* large_divisor: only needs a few adjustments.  */
1950	mulu.l r8,r6,r5
1951	ptabs r18,tr0
1952	add r2,r6,r7
1953	cmpgtu r5,r2,r8
1954	cmvne r8,r7,r2
1955	sub r2,r5,r2
1956	shlrd r2,r22,r2
1957	blink tr0,r63
1958	ENDFUNC(GLOBAL(umoddi3))
1959/* Note 1: To shift the result of the second divide stage so that the result
1960   always fits into 32 bits, yet we still reduce the rest sufficiently
1961   would require a lot of instructions to do the shifts just right.  Using
1962   the full 64 bit shift result to multiply with the divisor would require
1963   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1964   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1965   know that the rest after taking this partial result into account will
1966   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1967   upper 32 bits of the partial result are nonzero.  */
1968#endif /* __SHMEDIA__ */
1969#endif /* L_umoddi3 */
1970
1971#ifdef L_moddi3
1972#ifdef __SHMEDIA__
1973	.mode	SHmedia
1974	.section	.text..SHmedia32,"ax"
1975	.align	2
1976	.global	GLOBAL(moddi3)
1977	FUNC(GLOBAL(moddi3))
1978GLOBAL(moddi3):
1979	pta GLOBAL(umoddi3_internal),tr0
1980	shari r2,63,r22
1981	shari r3,63,r23
1982	xor r2,r22,r2
1983	xor r3,r23,r3
1984	sub r2,r22,r2
1985	sub r3,r23,r3
1986	beq/u r22,r63,tr0
1987	ptabs r18,tr1
1988	blink tr0,r18
1989	sub r63,r2,r2
1990	blink tr1,r63
1991	ENDFUNC(GLOBAL(moddi3))
1992#endif /* __SHMEDIA__ */
1993#endif /* L_moddi3 */
1994
1995#ifdef L_set_fpscr
1996#if !defined (__SH2A_NOFPU__)
1997#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1998#ifdef __SH5__
1999	.mode	SHcompact
2000#endif
2001	.global GLOBAL(set_fpscr)
2002	HIDDEN_FUNC(GLOBAL(set_fpscr))
2003GLOBAL(set_fpscr):
2004	lds r4,fpscr
2005#ifdef __PIC__
2006	mov.l	r12,@-r15
2007#ifdef __vxworks
2008	mov.l	LOCAL(set_fpscr_L0_base),r12
2009	mov.l	LOCAL(set_fpscr_L0_index),r0
2010	mov.l	@r12,r12
2011	mov.l	@(r0,r12),r12
2012#else
2013	mova	LOCAL(set_fpscr_L0),r0
2014	mov.l	LOCAL(set_fpscr_L0),r12
2015	add	r0,r12
2016#endif
2017	mov.l	LOCAL(set_fpscr_L1),r0
2018	mov.l	@(r0,r12),r1
2019	mov.l	@r15+,r12
2020#else
2021	mov.l LOCAL(set_fpscr_L1),r1
2022#endif
2023	swap.w r4,r0
2024	or #24,r0
2025#ifndef FMOVD_WORKS
2026	xor #16,r0
2027#endif
2028#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2029	swap.w r0,r3
2030	mov.l r3,@(4,r1)
2031#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2032	swap.w r0,r2
2033	mov.l r2,@r1
2034#endif
2035#ifndef FMOVD_WORKS
2036	xor #8,r0
2037#else
2038	xor #24,r0
2039#endif
2040#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2041	swap.w r0,r2
2042	rts
2043	mov.l r2,@r1
2044#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2045	swap.w r0,r3
2046	rts
2047	mov.l r3,@(4,r1)
2048#endif
2049	.align 2
2050#ifdef __PIC__
2051#ifdef __vxworks
2052LOCAL(set_fpscr_L0_base):
2053	.long ___GOTT_BASE__
2054LOCAL(set_fpscr_L0_index):
2055	.long ___GOTT_INDEX__
2056#else
2057LOCAL(set_fpscr_L0):
2058	.long _GLOBAL_OFFSET_TABLE_
2059#endif
2060LOCAL(set_fpscr_L1):
2061	.long GLOBAL(fpscr_values@GOT)
2062#else
2063LOCAL(set_fpscr_L1):
2064	.long GLOBAL(fpscr_values)
2065#endif
2066
2067	ENDFUNC(GLOBAL(set_fpscr))
2068#ifndef NO_FPSCR_VALUES
2069#ifdef __ELF__
2070        .comm   GLOBAL(fpscr_values),8,4
2071#else
2072        .comm   GLOBAL(fpscr_values),8
2073#endif /* ELF */
2074#endif /* NO_FPSCR_VALUES */
2075#endif /* SH2E / SH3E / SH4 */
2076#endif /* __SH2A_NOFPU__ */
2077#endif /* L_set_fpscr */
2078#ifdef L_ic_invalidate
2079#if __SH5__ == 32
2080	.mode	SHmedia
2081	.section	.text..SHmedia32,"ax"
2082	.align	2
2083	.global	GLOBAL(init_trampoline)
2084	HIDDEN_FUNC(GLOBAL(init_trampoline))
2085GLOBAL(init_trampoline):
2086	st.l	r0,8,r2
2087#ifdef __LITTLE_ENDIAN__
2088	movi	9,r20
2089	shori	0x402b,r20
2090	shori	0xd101,r20
2091	shori	0xd002,r20
2092#else
2093	movi	0xffffffffffffd002,r20
2094	shori	0xd101,r20
2095	shori	0x402b,r20
2096	shori	9,r20
2097#endif
2098	st.q	r0,0,r20
2099	st.l	r0,12,r3
2100	ENDFUNC(GLOBAL(init_trampoline))
2101	.global	GLOBAL(ic_invalidate)
2102	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2103GLOBAL(ic_invalidate):
2104	ocbwb	r0,0
2105	synco
2106	icbi	r0, 0
2107	ptabs	r18, tr0
2108	synci
2109	blink	tr0, r63
2110	ENDFUNC(GLOBAL(ic_invalidate))
2111#elif defined(__SH4A__)
2112	.global GLOBAL(ic_invalidate)
2113	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2114GLOBAL(ic_invalidate):
2115	ocbwb	@r4
2116	synco
2117	icbi	@r4
2118	rts
2119	  nop
2120	ENDFUNC(GLOBAL(ic_invalidate))
2121#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2122	/* For system code, we use ic_invalidate_line_i, but user code
2123	   needs a different mechanism.  A kernel call is generally not
2124	   available, and it would also be slow.  Different SH4 variants use
2125	   different sizes and associativities of the Icache.  We use a small
2126	   bit of dispatch code that can be put hidden in every shared object,
2127	   which calls the actual processor-specific invalidation code in a
2128	   separate module.
2129	   Or if you have operating system support, the OS could mmap the
2130	   procesor-specific code from a single page, since it is highly
2131	   repetitive.  */
2132	.global GLOBAL(ic_invalidate)
2133	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2134GLOBAL(ic_invalidate):
2135#ifdef __pic__
2136#ifdef __vxworks
2137	mov.l	1f,r1
2138	mov.l	2f,r0
2139	mov.l	@r1,r1
2140	mov.l	0f,r2
2141	mov.l	@(r0,r1),r0
2142#else
2143	mov.l	1f,r1
2144	mova	1f,r0
2145	mov.l	0f,r2
2146	add	r1,r0
2147#endif
2148	mov.l	@(r0,r2),r1
2149#else
2150	mov.l	0f,r1
2151#endif
2152	ocbwb	@r4
2153	mov.l	@(8,r1),r0
2154	sub	r1,r4
2155	and	r4,r0
2156	add	r1,r0
2157	jmp	@r0
2158	mov.l	@(4,r1),r0
2159	.align	2
2160#ifndef __pic__
21610:	.long   GLOBAL(ic_invalidate_array)
2162#else /* __pic__ */
2163	.global GLOBAL(ic_invalidate_array)
21640:	.long   GLOBAL(ic_invalidate_array)@GOT
2165#ifdef __vxworks
21661:	.long	___GOTT_BASE__
21672:	.long	___GOTT_INDEX__
2168#else
21691:	.long   _GLOBAL_OFFSET_TABLE_
2170#endif
2171	ENDFUNC(GLOBAL(ic_invalidate))
2172#endif /* __pic__ */
2173#endif /* SH4 */
2174#endif /* L_ic_invalidate */
2175
2176#ifdef L_ic_invalidate_array
2177#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2178	.global GLOBAL(ic_invalidate_array)
2179	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
2180	.global GLOBAL(ic_invalidate_array)
2181	FUNC(GLOBAL(ic_invalidate_array))
2182GLOBAL(ic_invalidate_array):
2183	add	r1,r4
2184	synco
2185	icbi	@r4
2186	rts
2187	  nop
2188	.align 2
2189	.long	0
2190	ENDFUNC(GLOBAL(ic_invalidate_array))
2191#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2192	.global GLOBAL(ic_invalidate_array)
2193	.p2align 5
2194	FUNC(GLOBAL(ic_invalidate_array))
2195/* This must be aligned to the beginning of a cache line.  */
2196GLOBAL(ic_invalidate_array):
2197#ifndef WAYS
2198#define WAYS 4
2199#define WAY_SIZE 0x4000
2200#endif
2201#if WAYS == 1
2202	.rept	WAY_SIZE * WAYS / 32
2203	rts
2204	nop
2205	.rept	7
2206	.long	WAY_SIZE - 32
2207	.endr
2208	.endr
2209#elif WAYS <= 6
2210	.rept	WAY_SIZE * WAYS / 32
2211	braf	r0
2212	add	#-8,r0
2213	.long	WAY_SIZE + 8
2214	.long	WAY_SIZE - 32
2215	.rept	WAYS-2
2216	braf	r0
2217	nop
2218	.endr
2219	.rept	7 - WAYS
2220	rts
2221	nop
2222	.endr
2223	.endr
2224#else /* WAYS > 6 */
2225	/* This variant needs two different pages for mmap-ing.  */
2226 	.rept	WAYS-1
2227	.rept	WAY_SIZE / 32
2228	braf	r0
2229	nop
2230	.long	WAY_SIZE
2231	.rept 6
2232	.long	WAY_SIZE - 32
2233	.endr
2234	.endr
2235	.endr
2236	.rept	WAY_SIZE / 32
2237	rts
2238	.rept	15
2239	nop
2240	.endr
2241	.endr
2242#endif /* WAYS */
2243	ENDFUNC(GLOBAL(ic_invalidate_array))
2244#endif /* SH4 */
2245#endif /* L_ic_invalidate_array */
2246
2247#if defined (__SH5__) && __SH5__ == 32
2248#ifdef L_shcompact_call_trampoline
2249	.section	.rodata
2250	.align	1
2251LOCAL(ct_main_table):
2252.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2253.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2254.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2255.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2256.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2257.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2258.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2259.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2260.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2261.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2262.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2263.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2264.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2265.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2266.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2267.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2268.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2269.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2270.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2271.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2272.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2273.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2274.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2275.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2276.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2277.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2278.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2279.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2280.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2281.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2282.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2283.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2284.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2285	.mode	SHmedia
2286	.section	.text..SHmedia32, "ax"
2287	.align	2
2288
2289     /* This function loads 64-bit general-purpose registers from the
2290	stack, from a memory address contained in them or from an FP
2291	register, according to a cookie passed in r1.  Its execution
2292	time is linear on the number of registers that actually have
2293	to be copied.  See sh.h for details on the actual bit pattern.
2294
2295	The function to be called is passed in r0.  If a 32-bit return
2296	value is expected, the actual function will be tail-called,
2297	otherwise the return address will be stored in r10 (that the
2298	caller should expect to be clobbered) and the return value
2299	will be expanded into r2/r3 upon return.  */
2300
2301	.global	GLOBAL(GCC_shcompact_call_trampoline)
2302	FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2303GLOBAL(GCC_shcompact_call_trampoline):
2304	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
2305	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2306	pt/l	LOCAL(ct_loop), tr1
2307	addz.l	r1, r63, r1
2308	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2309LOCAL(ct_loop):
2310	nsb	r1, r28
2311	shlli	r28, 1, r29
2312	ldx.w	r0, r29, r30
2313LOCAL(ct_main_label):
2314	ptrel/l	r30, tr2
2315	blink	tr2, r63
2316LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
2317	/* It must be dr0, so just do it.  */
2318	fmov.dq	dr0, r2
2319	movi	7, r30
2320	shlli	r30, 29, r31
2321	andc	r1, r31, r1
2322	blink	tr1, r63
2323LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
2324	/* It is either dr0 or dr2.  */
2325	movi	7, r30
2326	shlri	r1, 26, r32
2327	shlli	r30, 26, r31
2328	andc	r1, r31, r1
2329	fmov.dq	dr0, r3
2330	beqi/l	r32, 4, tr1
2331	fmov.dq	dr2, r3
2332	blink	tr1, r63
2333LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
2334	shlri	r1, 23 - 3, r34
2335	andi	r34, 3 << 3, r33
2336	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2337LOCAL(ct_r4_fp_base):
2338	ptrel/l	r32, tr2
2339	movi	7, r30
2340	shlli	r30, 23, r31
2341	andc	r1, r31, r1
2342	blink	tr2, r63
2343LOCAL(ct_r4_fp_copy):
2344	fmov.dq	dr0, r4
2345	blink	tr1, r63
2346	fmov.dq	dr2, r4
2347	blink	tr1, r63
2348	fmov.dq	dr4, r4
2349	blink	tr1, r63
2350LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
2351	shlri	r1, 20 - 3, r34
2352	andi	r34, 3 << 3, r33
2353	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2354LOCAL(ct_r5_fp_base):
2355	ptrel/l	r32, tr2
2356	movi	7, r30
2357	shlli	r30, 20, r31
2358	andc	r1, r31, r1
2359	blink	tr2, r63
2360LOCAL(ct_r5_fp_copy):
2361	fmov.dq	dr0, r5
2362	blink	tr1, r63
2363	fmov.dq	dr2, r5
2364	blink	tr1, r63
2365	fmov.dq	dr4, r5
2366	blink	tr1, r63
2367	fmov.dq	dr6, r5
2368	blink	tr1, r63
2369LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
2370	/* It must be dr8.  */
2371	fmov.dq	dr8, r6
2372	movi	15, r30
2373	shlli	r30, 16, r31
2374	andc	r1, r31, r1
2375	blink	tr1, r63
2376LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
2377	shlri	r1, 16 - 3, r34
2378	andi	r34, 3 << 3, r33
2379	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2380LOCAL(ct_r6_fp_base):
2381	ptrel/l	r32, tr2
2382	movi	7, r30
2383	shlli	r30, 16, r31
2384	andc	r1, r31, r1
2385	blink	tr2, r63
2386LOCAL(ct_r6_fp_copy):
2387	fmov.dq	dr0, r6
2388	blink	tr1, r63
2389	fmov.dq	dr2, r6
2390	blink	tr1, r63
2391	fmov.dq	dr4, r6
2392	blink	tr1, r63
2393	fmov.dq	dr6, r6
2394	blink	tr1, r63
2395LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
2396	/* It is either dr8 or dr10.  */
2397	movi	15 << 12, r31
2398	shlri	r1, 12, r32
2399	andc	r1, r31, r1
2400	fmov.dq	dr8, r7
2401	beqi/l	r32, 8, tr1
2402	fmov.dq	dr10, r7
2403	blink	tr1, r63
2404LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
2405	shlri	r1, 12 - 3, r34
2406	andi	r34, 3 << 3, r33
2407	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2408LOCAL(ct_r7_fp_base):
2409	ptrel/l	r32, tr2
2410	movi	7 << 12, r31
2411	andc	r1, r31, r1
2412	blink	tr2, r63
2413LOCAL(ct_r7_fp_copy):
2414	fmov.dq	dr0, r7
2415	blink	tr1, r63
2416	fmov.dq	dr2, r7
2417	blink	tr1, r63
2418	fmov.dq	dr4, r7
2419	blink	tr1, r63
2420	fmov.dq	dr6, r7
2421	blink	tr1, r63
2422LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
2423	/* It is either dr8 or dr10.  */
2424	movi	15 << 8, r31
2425	andi	r1, 1 << 8, r32
2426	andc	r1, r31, r1
2427	fmov.dq	dr8, r8
2428	beq/l	r32, r63, tr1
2429	fmov.dq	dr10, r8
2430	blink	tr1, r63
2431LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
2432	shlri	r1, 8 - 3, r34
2433	andi	r34, 3 << 3, r33
2434	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2435LOCAL(ct_r8_fp_base):
2436	ptrel/l	r32, tr2
2437	movi	7 << 8, r31
2438	andc	r1, r31, r1
2439	blink	tr2, r63
2440LOCAL(ct_r8_fp_copy):
2441	fmov.dq	dr0, r8
2442	blink	tr1, r63
2443	fmov.dq	dr2, r8
2444	blink	tr1, r63
2445	fmov.dq	dr4, r8
2446	blink	tr1, r63
2447	fmov.dq	dr6, r8
2448	blink	tr1, r63
2449LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
2450	/* It is either dr8 or dr10.  */
2451	movi	15 << 4, r31
2452	andi	r1, 1 << 4, r32
2453	andc	r1, r31, r1
2454	fmov.dq	dr8, r9
2455	beq/l	r32, r63, tr1
2456	fmov.dq	dr10, r9
2457	blink	tr1, r63
2458LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
2459	shlri	r1, 4 - 3, r34
2460	andi	r34, 3 << 3, r33
2461	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2462LOCAL(ct_r9_fp_base):
2463	ptrel/l	r32, tr2
2464	movi	7 << 4, r31
2465	andc	r1, r31, r1
2466	blink	tr2, r63
2467LOCAL(ct_r9_fp_copy):
2468	fmov.dq	dr0, r9
2469	blink	tr1, r63
2470	fmov.dq	dr2, r9
2471	blink	tr1, r63
2472	fmov.dq	dr4, r9
2473	blink	tr1, r63
2474	fmov.dq	dr6, r9
2475	blink	tr1, r63
2476LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
2477	pt/l	LOCAL(ct_r2_load), tr2
2478	movi	3, r30
2479	shlli	r30, 29, r31
2480	and	r1, r31, r32
2481	andc	r1, r31, r1
2482	beq/l	r31, r32, tr2
2483	addi.l	r2, 8, r3
2484	ldx.q	r2, r63, r2
2485	/* Fall through.  */
2486LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
2487	pt/l	LOCAL(ct_r3_load), tr2
2488	movi	3, r30
2489	shlli	r30, 26, r31
2490	and	r1, r31, r32
2491	andc	r1, r31, r1
2492	beq/l	r31, r32, tr2
2493	addi.l	r3, 8, r4
2494	ldx.q	r3, r63, r3
2495LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
2496	pt/l	LOCAL(ct_r4_load), tr2
2497	movi	3, r30
2498	shlli	r30, 23, r31
2499	and	r1, r31, r32
2500	andc	r1, r31, r1
2501	beq/l	r31, r32, tr2
2502	addi.l	r4, 8, r5
2503	ldx.q	r4, r63, r4
2504LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
2505	pt/l	LOCAL(ct_r5_load), tr2
2506	movi	3, r30
2507	shlli	r30, 20, r31
2508	and	r1, r31, r32
2509	andc	r1, r31, r1
2510	beq/l	r31, r32, tr2
2511	addi.l	r5, 8, r6
2512	ldx.q	r5, r63, r5
2513LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
2514	pt/l	LOCAL(ct_r6_load), tr2
2515	movi	3 << 16, r31
2516	and	r1, r31, r32
2517	andc	r1, r31, r1
2518	beq/l	r31, r32, tr2
2519	addi.l	r6, 8, r7
2520	ldx.q	r6, r63, r6
2521LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
2522	pt/l	LOCAL(ct_r7_load), tr2
2523	movi	3 << 12, r31
2524	and	r1, r31, r32
2525	andc	r1, r31, r1
2526	beq/l	r31, r32, tr2
2527	addi.l	r7, 8, r8
2528	ldx.q	r7, r63, r7
2529LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
2530	pt/l	LOCAL(ct_r8_load), tr2
2531	movi	3 << 8, r31
2532	and	r1, r31, r32
2533	andc	r1, r31, r1
2534	beq/l	r31, r32, tr2
2535	addi.l	r8, 8, r9
2536	ldx.q	r8, r63, r8
2537LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
2538	pt/l	LOCAL(ct_check_tramp), tr2
2539	ldx.q	r9, r63, r9
2540	blink	tr2, r63
2541LOCAL(ct_r2_load):
2542	ldx.q	r2, r63, r2
2543	blink	tr1, r63
2544LOCAL(ct_r3_load):
2545	ldx.q	r3, r63, r3
2546	blink	tr1, r63
2547LOCAL(ct_r4_load):
2548	ldx.q	r4, r63, r4
2549	blink	tr1, r63
2550LOCAL(ct_r5_load):
2551	ldx.q	r5, r63, r5
2552	blink	tr1, r63
2553LOCAL(ct_r6_load):
2554	ldx.q	r6, r63, r6
2555	blink	tr1, r63
2556LOCAL(ct_r7_load):
2557	ldx.q	r7, r63, r7
2558	blink	tr1, r63
2559LOCAL(ct_r8_load):
2560	ldx.q	r8, r63, r8
2561	blink	tr1, r63
2562LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
2563	movi	1, r30
2564	ldx.q	r15, r63, r2
2565	shlli	r30, 29, r31
2566	addi.l	r15, 8, r15
2567	andc	r1, r31, r1
2568	blink	tr1, r63
2569LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
2570	movi	1, r30
2571	ldx.q	r15, r63, r3
2572	shlli	r30, 26, r31
2573	addi.l	r15, 8, r15
2574	andc	r1, r31, r1
2575	blink	tr1, r63
2576LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
2577	movi	1, r30
2578	ldx.q	r15, r63, r4
2579	shlli	r30, 23, r31
2580	addi.l	r15, 8, r15
2581	andc	r1, r31, r1
2582	blink	tr1, r63
2583LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
2584	movi	1, r30
2585	ldx.q	r15, r63, r5
2586	shlli	r30, 20, r31
2587	addi.l	r15, 8, r15
2588	andc	r1, r31, r1
2589	blink	tr1, r63
2590LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
2591	movi	1, r30
2592	ldx.q	r15, r63, r6
2593	shlli	r30, 16, r31
2594	addi.l	r15, 8, r15
2595	andc	r1, r31, r1
2596	blink	tr1, r63
2597LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
2598	ldx.q	r15, r63, r7
2599	movi	1 << 12, r31
2600	addi.l	r15, 8, r15
2601	andc	r1, r31, r1
2602	blink	tr1, r63
2603LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
2604	ldx.q	r15, r63, r8
2605	movi	1 << 8, r31
2606	addi.l	r15, 8, r15
2607	andc	r1, r31, r1
2608	blink	tr1, r63
2609LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
2610	andi	r1, 7 << 1, r30
2611	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2612	shlli	r30, 2, r31
2613	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
2614	sub.l	r32, r31, r33
2615	ptabs/l	r33, tr2
2616	blink	tr2, r63
2617LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
2618	ldx.q	r15, r63, r3
2619	addi.l	r15, 8, r15
2620	ldx.q	r15, r63, r4
2621	addi.l	r15, 8, r15
2622	ldx.q	r15, r63, r5
2623	addi.l	r15, 8, r15
2624	ldx.q	r15, r63, r6
2625	addi.l	r15, 8, r15
2626	ldx.q	r15, r63, r7
2627	addi.l	r15, 8, r15
2628	ldx.q	r15, r63, r8
2629	addi.l	r15, 8, r15
2630LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
2631	ldx.q	r15, r63, r9
2632	addi.l	r15, 8, r15
2633LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
2634LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
2635	pt/u	LOCAL(ct_ret_wide), tr2
2636	andi	r1, 1, r1
2637	bne/u	r1, r63, tr2
2638LOCAL(ct_call_func):	/* Just branch to the function.  */
2639	blink	tr0, r63
2640LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its
2641			   64-bit return value.  */
2642	add.l	r18, r63, r10
2643	blink	tr0, r18
2644	ptabs	r10, tr0
2645#if __LITTLE_ENDIAN__
2646	shari	r2, 32, r3
2647	add.l	r2, r63, r2
2648#else
2649	add.l	r2, r63, r3
2650	shari	r2, 32, r2
2651#endif
2652	blink	tr0, r63
2653
2654	ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2655#endif /* L_shcompact_call_trampoline */
2656
2657#ifdef L_shcompact_return_trampoline
2658     /* This function does the converse of the code in `ret_wide'
2659	above.  It is tail-called by SHcompact functions returning
2660	64-bit non-floating-point values, to pack the 32-bit values in
2661	r2 and r3 into r2.  */
2662
2663	.mode	SHmedia
2664	.section	.text..SHmedia32, "ax"
2665	.align	2
2666	.global	GLOBAL(GCC_shcompact_return_trampoline)
2667	HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2668GLOBAL(GCC_shcompact_return_trampoline):
2669	ptabs/l	r18, tr0
2670#if __LITTLE_ENDIAN__
2671	addz.l	r2, r63, r2
2672	shlli	r3, 32, r3
2673#else
2674	addz.l	r3, r63, r3
2675	shlli	r2, 32, r2
2676#endif
2677	or	r3, r2, r2
2678	blink	tr0, r63
2679
2680	ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2681#endif /* L_shcompact_return_trampoline */
2682
2683#ifdef L_shcompact_incoming_args
2684	.section	.rodata
2685	.align	1
2686LOCAL(ia_main_table):
2687.word	1 /* Invalid, just loop */
2688.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2689.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2690.word	1 /* Invalid, just loop */
2691.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2692.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2693.word	1 /* Invalid, just loop */
2694.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2695.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2696.word	1 /* Invalid, just loop */
2697.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2698.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2699.word	1 /* Invalid, just loop */
2700.word	1 /* Invalid, just loop */
2701.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2702.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2703.word	1 /* Invalid, just loop */
2704.word	1 /* Invalid, just loop */
2705.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2706.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2707.word	1 /* Invalid, just loop */
2708.word	1 /* Invalid, just loop */
2709.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2710.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2711.word	1 /* Invalid, just loop */
2712.word	1 /* Invalid, just loop */
2713.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2714.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2715.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2716.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2717.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2718.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2719.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2720	.mode	SHmedia
2721	.section	.text..SHmedia32, "ax"
2722	.align	2
2723
2724     /* This function stores 64-bit general-purpose registers back in
2725	the stack, and loads the address in which each register
2726	was stored into itself.  The lower 32 bits of r17 hold the address
2727	to begin storing, and the upper 32 bits of r17 hold the cookie.
2728	Its execution time is linear on the
2729	number of registers that actually have to be copied, and it is
2730	optimized for structures larger than 64 bits, as opposed to
2731	individual `long long' arguments.  See sh.h for details on the
2732	actual bit pattern.  */
2733
2734	.global	GLOBAL(GCC_shcompact_incoming_args)
2735 	FUNC(GLOBAL(GCC_shcompact_incoming_args))
2736GLOBAL(GCC_shcompact_incoming_args):
2737	ptabs/l	r18, tr0	/* Prepare to return.  */
2738	shlri	r17, 32, r0	/* Load the cookie.  */
2739	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2740	pt/l	LOCAL(ia_loop), tr1
2741	add.l	r17, r63, r17
2742	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2743LOCAL(ia_loop):
2744	nsb	r0, r36
2745	shlli	r36, 1, r37
2746	ldx.w	r43, r37, r38
2747LOCAL(ia_main_label):
2748	ptrel/l	r38, tr2
2749	blink	tr2, r63
2750LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
2751	movi	3, r38
2752	shlli	r38, 29, r39
2753	and	r0, r39, r40
2754	andc	r0, r39, r0
2755	stx.q	r17, r63, r2
2756	add.l	r17, r63, r2
2757	addi.l	r17, 8, r17
2758	beq/u	r39, r40, tr1
2759LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
2760	movi	3, r38
2761	shlli	r38, 26, r39
2762	and	r0, r39, r40
2763	andc	r0, r39, r0
2764	stx.q	r17, r63, r3
2765	add.l	r17, r63, r3
2766	addi.l	r17, 8, r17
2767	beq/u	r39, r40, tr1
2768LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
2769	movi	3, r38
2770	shlli	r38, 23, r39
2771	and	r0, r39, r40
2772	andc	r0, r39, r0
2773	stx.q	r17, r63, r4
2774	add.l	r17, r63, r4
2775	addi.l	r17, 8, r17
2776	beq/u	r39, r40, tr1
2777LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
2778	movi	3, r38
2779	shlli	r38, 20, r39
2780	and	r0, r39, r40
2781	andc	r0, r39, r0
2782	stx.q	r17, r63, r5
2783	add.l	r17, r63, r5
2784	addi.l	r17, 8, r17
2785	beq/u	r39, r40, tr1
2786LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
2787	movi	3, r38
2788	shlli	r38, 16, r39
2789	and	r0, r39, r40
2790	andc	r0, r39, r0
2791	stx.q	r17, r63, r6
2792	add.l	r17, r63, r6
2793	addi.l	r17, 8, r17
2794	beq/u	r39, r40, tr1
2795LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
2796	movi	3 << 12, r39
2797	and	r0, r39, r40
2798	andc	r0, r39, r0
2799	stx.q	r17, r63, r7
2800	add.l	r17, r63, r7
2801	addi.l	r17, 8, r17
2802	beq/u	r39, r40, tr1
2803LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
2804	movi	3 << 8, r39
2805	and	r0, r39, r40
2806	andc	r0, r39, r0
2807	stx.q	r17, r63, r8
2808	add.l	r17, r63, r8
2809	addi.l	r17, 8, r17
2810	beq/u	r39, r40, tr1
2811LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
2812	stx.q	r17, r63, r9
2813	add.l	r17, r63, r9
2814	blink	tr0, r63
2815LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
2816	movi	1, r38
2817	shlli	r38, 29, r39
2818	andc	r0, r39, r0
2819	stx.q	r17, r63, r2
2820	addi.l	r17, 8, r17
2821	blink	tr1, r63
2822LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
2823	movi	1, r38
2824	shlli	r38, 26, r39
2825	andc	r0, r39, r0
2826	stx.q	r17, r63, r3
2827	addi.l	r17, 8, r17
2828	blink	tr1, r63
2829LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
2830	movi	1, r38
2831	shlli	r38, 23, r39
2832	andc	r0, r39, r0
2833	stx.q	r17, r63, r4
2834	addi.l	r17, 8, r17
2835	blink	tr1, r63
2836LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
2837	movi	1, r38
2838	shlli	r38, 20, r39
2839	andc	r0, r39, r0
2840	stx.q	r17, r63, r5
2841	addi.l	r17, 8, r17
2842	blink	tr1, r63
2843LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
2844	movi	1, r38
2845	shlli	r38, 16, r39
2846	andc	r0, r39, r0
2847	stx.q	r17, r63, r6
2848	addi.l	r17, 8, r17
2849	blink	tr1, r63
2850LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
2851	movi	1 << 12, r39
2852	andc	r0, r39, r0
2853	stx.q	r17, r63, r7
2854	addi.l	r17, 8, r17
2855	blink	tr1, r63
2856LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
2857	movi	1 << 8, r39
2858	andc	r0, r39, r0
2859	stx.q	r17, r63, r8
2860	addi.l	r17, 8, r17
2861	blink	tr1, r63
2862LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
2863	andi	r0, 7 << 1, r38
2864	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2865	shlli	r38, 2, r39
2866	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
2867	sub.l	r40, r39, r41
2868	ptabs/l	r41, tr2
2869	blink	tr2, r63
2870LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
2871	stx.q	r17, r63, r3
2872	addi.l	r17, 8, r17
2873	stx.q	r17, r63, r4
2874	addi.l	r17, 8, r17
2875	stx.q	r17, r63, r5
2876	addi.l	r17, 8, r17
2877	stx.q	r17, r63, r6
2878	addi.l	r17, 8, r17
2879	stx.q	r17, r63, r7
2880	addi.l	r17, 8, r17
2881	stx.q	r17, r63, r8
2882	addi.l	r17, 8, r17
2883LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
2884	stx.q	r17, r63, r9
2885LOCAL(ia_return):	/* Return.  */
2886	blink	tr0, r63
2887LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
2888	ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2889#endif /* L_shcompact_incoming_args */
2890#endif
2891#if __SH5__
2892#ifdef L_nested_trampoline
2893#if __SH5__ == 32
2894	.section	.text..SHmedia32,"ax"
2895#else
2896	.text
2897#endif
2898	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
2899	.global	GLOBAL(GCC_nested_trampoline)
2900	HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2901GLOBAL(GCC_nested_trampoline):
2902	.mode	SHmedia
2903	ptrel/u	r63, tr0
2904	gettr	tr0, r0
2905#if __SH5__ == 64
2906	ld.q	r0, 24, r1
2907#else
2908	ld.l	r0, 24, r1
2909#endif
2910	ptabs/l	r1, tr1
2911#if __SH5__ == 64
2912	ld.q	r0, 32, r1
2913#else
2914	ld.l	r0, 28, r1
2915#endif
2916	blink	tr1, r63
2917
2918	ENDFUNC(GLOBAL(GCC_nested_trampoline))
2919#endif /* L_nested_trampoline */
2920#endif /* __SH5__ */
2921#if __SH5__ == 32
2922#ifdef L_push_pop_shmedia_regs
2923	.section	.text..SHmedia32,"ax"
2924	.mode	SHmedia
2925	.align	2
2926#ifndef __SH4_NOFPU__
2927	.global	GLOBAL(GCC_push_shmedia_regs)
2928	FUNC(GLOBAL(GCC_push_shmedia_regs))
2929GLOBAL(GCC_push_shmedia_regs):
2930	addi.l	r15, -14*8, r15
2931	fst.d	r15, 13*8, dr62
2932	fst.d	r15, 12*8, dr60
2933	fst.d	r15, 11*8, dr58
2934	fst.d	r15, 10*8, dr56
2935	fst.d	r15,  9*8, dr54
2936	fst.d	r15,  8*8, dr52
2937	fst.d	r15,  7*8, dr50
2938	fst.d	r15,  6*8, dr48
2939	fst.d	r15,  5*8, dr46
2940	fst.d	r15,  4*8, dr44
2941	fst.d	r15,  3*8, dr42
2942	fst.d	r15,  2*8, dr40
2943	fst.d	r15,  1*8, dr38
2944	fst.d	r15,  0*8, dr36
2945#else /* ! __SH4_NOFPU__ */
2946	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
2947	FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2948GLOBAL(GCC_push_shmedia_regs_nofpu):
2949#endif /* ! __SH4_NOFPU__ */
2950	ptabs/l	r18, tr0
2951	addi.l	r15, -27*8, r15
2952	gettr	tr7, r62
2953	gettr	tr6, r61
2954	gettr	tr5, r60
2955	st.q	r15, 26*8, r62
2956	st.q	r15, 25*8, r61
2957	st.q	r15, 24*8, r60
2958	st.q	r15, 23*8, r59
2959	st.q	r15, 22*8, r58
2960	st.q	r15, 21*8, r57
2961	st.q	r15, 20*8, r56
2962	st.q	r15, 19*8, r55
2963	st.q	r15, 18*8, r54
2964	st.q	r15, 17*8, r53
2965	st.q	r15, 16*8, r52
2966	st.q	r15, 15*8, r51
2967	st.q	r15, 14*8, r50
2968	st.q	r15, 13*8, r49
2969	st.q	r15, 12*8, r48
2970	st.q	r15, 11*8, r47
2971	st.q	r15, 10*8, r46
2972	st.q	r15,  9*8, r45
2973	st.q	r15,  8*8, r44
2974	st.q	r15,  7*8, r35
2975	st.q	r15,  6*8, r34
2976	st.q	r15,  5*8, r33
2977	st.q	r15,  4*8, r32
2978	st.q	r15,  3*8, r31
2979	st.q	r15,  2*8, r30
2980	st.q	r15,  1*8, r29
2981	st.q	r15,  0*8, r28
2982	blink	tr0, r63
2983#ifndef __SH4_NOFPU__
2984	ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2985#else
2986	ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2987#endif
2988#ifndef __SH4_NOFPU__
2989	.global	GLOBAL(GCC_pop_shmedia_regs)
2990	FUNC(GLOBAL(GCC_pop_shmedia_regs))
2991GLOBAL(GCC_pop_shmedia_regs):
2992	pt	.L0, tr1
2993	movi	41*8, r0
2994	fld.d	r15, 40*8, dr62
2995	fld.d	r15, 39*8, dr60
2996	fld.d	r15, 38*8, dr58
2997	fld.d	r15, 37*8, dr56
2998	fld.d	r15, 36*8, dr54
2999	fld.d	r15, 35*8, dr52
3000	fld.d	r15, 34*8, dr50
3001	fld.d	r15, 33*8, dr48
3002	fld.d	r15, 32*8, dr46
3003	fld.d	r15, 31*8, dr44
3004	fld.d	r15, 30*8, dr42
3005	fld.d	r15, 29*8, dr40
3006	fld.d	r15, 28*8, dr38
3007	fld.d	r15, 27*8, dr36
3008	blink	tr1, r63
3009#else /* ! __SH4_NOFPU__	*/
3010	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
3011	FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3012GLOBAL(GCC_pop_shmedia_regs_nofpu):
3013#endif /* ! __SH4_NOFPU__	*/
3014	movi	27*8, r0
3015.L0:
3016	ptabs	r18, tr0
3017	ld.q	r15, 26*8, r62
3018	ld.q	r15, 25*8, r61
3019	ld.q	r15, 24*8, r60
3020	ptabs	r62, tr7
3021	ptabs	r61, tr6
3022	ptabs	r60, tr5
3023	ld.q	r15, 23*8, r59
3024	ld.q	r15, 22*8, r58
3025	ld.q	r15, 21*8, r57
3026	ld.q	r15, 20*8, r56
3027	ld.q	r15, 19*8, r55
3028	ld.q	r15, 18*8, r54
3029	ld.q	r15, 17*8, r53
3030	ld.q	r15, 16*8, r52
3031	ld.q	r15, 15*8, r51
3032	ld.q	r15, 14*8, r50
3033	ld.q	r15, 13*8, r49
3034	ld.q	r15, 12*8, r48
3035	ld.q	r15, 11*8, r47
3036	ld.q	r15, 10*8, r46
3037	ld.q	r15,  9*8, r45
3038	ld.q	r15,  8*8, r44
3039	ld.q	r15,  7*8, r35
3040	ld.q	r15,  6*8, r34
3041	ld.q	r15,  5*8, r33
3042	ld.q	r15,  4*8, r32
3043	ld.q	r15,  3*8, r31
3044	ld.q	r15,  2*8, r30
3045	ld.q	r15,  1*8, r29
3046	ld.q	r15,  0*8, r28
3047	add.l	r15, r0, r15
3048	blink	tr0, r63
3049
3050#ifndef __SH4_NOFPU__
3051	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3052#else
3053	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3054#endif
3055#endif /* __SH5__ == 32 */
3056#endif /* L_push_pop_shmedia_regs */
3057
3058#ifdef L_div_table
3059#if __SH5__
3060#if defined(__pic__) && defined(__SHMEDIA__)
3061	.global	GLOBAL(sdivsi3)
3062	FUNC(GLOBAL(sdivsi3))
3063#if __SH5__ == 32
3064	.section	.text..SHmedia32,"ax"
3065#else
3066	.text
3067#endif
3068#if 0
3069/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3070   in a text section does not work (at least for shared libraries):
3071   the linker sets the LSB of the address as if this was SHmedia code.  */
3072#define TEXT_DATA_BUG
3073#endif
3074	.align	2
3075 // inputs: r4,r5
3076 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3077 // result in r0
3078 .global GLOBAL(sdivsi3)
3079GLOBAL(sdivsi3):
3080#ifdef TEXT_DATA_BUG
3081 ptb datalabel Local_div_table,tr0
3082#else
3083 ptb GLOBAL(div_table_internal),tr0
3084#endif
3085 nsb r5, r1
3086 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
3087 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
3088 /* bubble */
3089 gettr tr0,r20
3090 ldx.ub r20, r21, r19 // u0.8
3091 shari r25, 32, r25   // normalize to s2.30
3092 shlli r21, 1, r21
3093 muls.l r25, r19, r19 // s2.38
3094 ldx.w r20, r21, r21  // s2.14
3095  ptabs r18, tr0
3096 shari r19, 24, r19   // truncate to s2.14
3097 sub r21, r19, r19    // some 11 bit inverse in s1.14
3098 muls.l r19, r19, r21 // u0.28
3099  sub r63, r1, r1
3100  addi r1, 92, r1
3101 muls.l r25, r21, r18 // s2.58
3102 shlli r19, 45, r19   // multiply by two and convert to s2.58
3103  /* bubble */
3104 sub r19, r18, r18
3105 shari r18, 28, r18   // some 22 bit inverse in s1.30
3106 muls.l r18, r25, r0  // s2.60
3107  muls.l r18, r4, r25 // s32.30
3108  /* bubble */
3109 shari r0, 16, r19   // s-16.44
3110 muls.l r19, r18, r19 // s-16.74
3111  shari r25, 63, r0
3112  shari r4, 14, r18   // s19.-14
3113 shari r19, 30, r19   // s-16.44
3114 muls.l r19, r18, r19 // s15.30
3115  xor r21, r0, r21    // You could also use the constant 1 << 27.
3116  add r21, r25, r21
3117 sub r21, r19, r21
3118 shard r21, r1, r21
3119 sub r21, r0, r0
3120 blink tr0, r63
3121	ENDFUNC(GLOBAL(sdivsi3))
3122/* This table has been generated by divtab.c .
3123Defects for bias -330:
3124   Max defect: 6.081536e-07 at -1.000000e+00
3125   Min defect: 2.849516e-08 at 1.030651e+00
3126   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3127   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3128   Defect at 1: 1.238659e-07
3129   Defect at -2: 1.061708e-07 */
3130#else /* ! __pic__ || ! __SHMEDIA__ */
3131	.section	.rodata
3132#endif /* __pic__ */
3133#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3134	.balign 2
3135	.type	Local_div_table,@object
3136	.size	Local_div_table,128
3137/* negative division constants */
3138	.word	-16638
3139	.word	-17135
3140	.word	-17737
3141	.word	-18433
3142	.word	-19103
3143	.word	-19751
3144	.word	-20583
3145	.word	-21383
3146	.word	-22343
3147	.word	-23353
3148	.word	-24407
3149	.word	-25582
3150	.word	-26863
3151	.word	-28382
3152	.word	-29965
3153	.word	-31800
3154/* negative division factors */
3155	.byte	66
3156	.byte	70
3157	.byte	75
3158	.byte	81
3159	.byte	87
3160	.byte	93
3161	.byte	101
3162	.byte	109
3163	.byte	119
3164	.byte	130
3165	.byte	142
3166	.byte	156
3167	.byte	172
3168	.byte	192
3169	.byte	214
3170	.byte	241
3171	.skip 16
3172Local_div_table:
3173	.skip 16
3174/* positive division factors */
3175	.byte	241
3176	.byte	214
3177	.byte	192
3178	.byte	172
3179	.byte	156
3180	.byte	142
3181	.byte	130
3182	.byte	119
3183	.byte	109
3184	.byte	101
3185	.byte	93
3186	.byte	87
3187	.byte	81
3188	.byte	75
3189	.byte	70
3190	.byte	66
3191/* positive division constants */
3192	.word	31801
3193	.word	29966
3194	.word	28383
3195	.word	26864
3196	.word	25583
3197	.word	24408
3198	.word	23354
3199	.word	22344
3200	.word	21384
3201	.word	20584
3202	.word	19752
3203	.word	19104
3204	.word	18434
3205	.word	17738
3206	.word	17136
3207	.word	16639
3208	.section	.rodata
3209#endif /* TEXT_DATA_BUG */
3210	.balign 2
3211	.type	GLOBAL(div_table),@object
3212	.size	GLOBAL(div_table),128
3213/* negative division constants */
3214	.word	-16638
3215	.word	-17135
3216	.word	-17737
3217	.word	-18433
3218	.word	-19103
3219	.word	-19751
3220	.word	-20583
3221	.word	-21383
3222	.word	-22343
3223	.word	-23353
3224	.word	-24407
3225	.word	-25582
3226	.word	-26863
3227	.word	-28382
3228	.word	-29965
3229	.word	-31800
3230/* negative division factors */
3231	.byte	66
3232	.byte	70
3233	.byte	75
3234	.byte	81
3235	.byte	87
3236	.byte	93
3237	.byte	101
3238	.byte	109
3239	.byte	119
3240	.byte	130
3241	.byte	142
3242	.byte	156
3243	.byte	172
3244	.byte	192
3245	.byte	214
3246	.byte	241
3247	.skip 16
3248	.global	GLOBAL(div_table)
3249GLOBAL(div_table):
3250	HIDDEN_ALIAS(div_table_internal,div_table)
3251	.skip 16
3252/* positive division factors */
3253	.byte	241
3254	.byte	214
3255	.byte	192
3256	.byte	172
3257	.byte	156
3258	.byte	142
3259	.byte	130
3260	.byte	119
3261	.byte	109
3262	.byte	101
3263	.byte	93
3264	.byte	87
3265	.byte	81
3266	.byte	75
3267	.byte	70
3268	.byte	66
3269/* positive division constants */
3270	.word	31801
3271	.word	29966
3272	.word	28383
3273	.word	26864
3274	.word	25583
3275	.word	24408
3276	.word	23354
3277	.word	22344
3278	.word	21384
3279	.word	20584
3280	.word	19752
3281	.word	19104
3282	.word	18434
3283	.word	17738
3284	.word	17136
3285	.word	16639
3286
3287#elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3288/* This code uses shld, thus is not suitable for SH1 / SH2.  */
3289
3290/* Signed / unsigned division without use of FPU, optimized for SH4.
3291   Uses a lookup table for divisors in the range -128 .. +128, and
3292   div1 with case distinction for larger divisors in three more ranges.
3293   The code is lumped together with the table to allow the use of mova.  */
3294#ifdef __LITTLE_ENDIAN__
3295#define L_LSB 0
3296#define L_LSWMSB 1
3297#define L_MSWLSB 2
3298#else
3299#define L_LSB 3
3300#define L_LSWMSB 2
3301#define L_MSWLSB 1
3302#endif
3303
3304	.balign 4
3305	.global	GLOBAL(udivsi3_i4i)
3306	FUNC(GLOBAL(udivsi3_i4i))
3307GLOBAL(udivsi3_i4i):
3308	mov.w LOCAL(c128_w), r1
3309	div0u
3310	mov r4,r0
3311	shlr8 r0
3312	cmp/hi r1,r5
3313	extu.w r5,r1
3314	bf LOCAL(udiv_le128)
3315	cmp/eq r5,r1
3316	bf LOCAL(udiv_ge64k)
3317	shlr r0
3318	mov r5,r1
3319	shll16 r5
3320	mov.l r4,@-r15
3321	div1 r5,r0
3322	mov.l r1,@-r15
3323	div1 r5,r0
3324	div1 r5,r0
3325	bra LOCAL(udiv_25)
3326	div1 r5,r0
3327
3328LOCAL(div_le128):
3329	mova LOCAL(div_table_ix),r0
3330	bra LOCAL(div_le128_2)
3331	mov.b @(r0,r5),r1
3332LOCAL(udiv_le128):
3333	mov.l r4,@-r15
3334	mova LOCAL(div_table_ix),r0
3335	mov.b @(r0,r5),r1
3336	mov.l r5,@-r15
3337LOCAL(div_le128_2):
3338	mova LOCAL(div_table_inv),r0
3339	mov.l @(r0,r1),r1
3340	mov r5,r0
3341	tst #0xfe,r0
3342	mova LOCAL(div_table_clz),r0
3343	dmulu.l r1,r4
3344	mov.b @(r0,r5),r1
3345	bt/s LOCAL(div_by_1)
3346	mov r4,r0
3347	mov.l @r15+,r5
3348	sts mach,r0
3349	/* clrt */
3350	addc r4,r0
3351	mov.l @r15+,r4
3352	rotcr r0
3353	rts
3354	shld r1,r0
3355
3356LOCAL(div_by_1_neg):
3357	neg r4,r0
3358LOCAL(div_by_1):
3359	mov.l @r15+,r5
3360	rts
3361	mov.l @r15+,r4
3362
3363LOCAL(div_ge64k):
3364	bt/s LOCAL(div_r8)
3365	div0u
3366	shll8 r5
3367	bra LOCAL(div_ge64k_2)
3368	div1 r5,r0
3369LOCAL(udiv_ge64k):
3370	cmp/hi r0,r5
3371	mov r5,r1
3372	bt LOCAL(udiv_r8)
3373	shll8 r5
3374	mov.l r4,@-r15
3375	div1 r5,r0
3376	mov.l r1,@-r15
3377LOCAL(div_ge64k_2):
3378	div1 r5,r0
3379	mov.l LOCAL(zero_l),r1
3380	.rept 4
3381	div1 r5,r0
3382	.endr
3383	mov.l r1,@-r15
3384	div1 r5,r0
3385	mov.w LOCAL(m256_w),r1
3386	div1 r5,r0
3387	mov.b r0,@(L_LSWMSB,r15)
3388	xor r4,r0
3389	and r1,r0
3390	bra LOCAL(div_ge64k_end)
3391	xor r4,r0
3392
3393LOCAL(div_r8):
3394	shll16 r4
3395	bra LOCAL(div_r8_2)
3396	shll8 r4
3397LOCAL(udiv_r8):
3398	mov.l r4,@-r15
3399	shll16 r4
3400	clrt
3401	shll8 r4
3402	mov.l r5,@-r15
3403LOCAL(div_r8_2):
3404	rotcl r4
3405	mov r0,r1
3406	div1 r5,r1
3407	mov r4,r0
3408	rotcl r0
3409	mov r5,r4
3410	div1 r5,r1
3411	.rept 5
3412	rotcl r0; div1 r5,r1
3413	.endr
3414	rotcl r0
3415	mov.l @r15+,r5
3416	div1 r4,r1
3417	mov.l @r15+,r4
3418	rts
3419	rotcl r0
3420
3421	ENDFUNC(GLOBAL(udivsi3_i4i))
3422
3423	.global	GLOBAL(sdivsi3_i4i)
3424	FUNC(GLOBAL(sdivsi3_i4i))
3425	/* This is link-compatible with a GLOBAL(sdivsi3) call,
3426	   but we effectively clobber only r1.  */
3427GLOBAL(sdivsi3_i4i):
3428	mov.l r4,@-r15
3429	cmp/pz r5
3430	mov.w LOCAL(c128_w), r1
3431	bt/s LOCAL(pos_divisor)
3432	cmp/pz r4
3433	mov.l r5,@-r15
3434	neg r5,r5
3435	bt/s LOCAL(neg_result)
3436	cmp/hi r1,r5
3437	neg r4,r4
3438LOCAL(pos_result):
3439	extu.w r5,r0
3440	bf LOCAL(div_le128)
3441	cmp/eq r5,r0
3442	mov r4,r0
3443	shlr8 r0
3444	bf/s LOCAL(div_ge64k)
3445	cmp/hi r0,r5
3446	div0u
3447	shll16 r5
3448	div1 r5,r0
3449	div1 r5,r0
3450	div1 r5,r0
3451LOCAL(udiv_25):
3452	mov.l LOCAL(zero_l),r1
3453	div1 r5,r0
3454	div1 r5,r0
3455	mov.l r1,@-r15
3456	.rept 3
3457	div1 r5,r0
3458	.endr
3459	mov.b r0,@(L_MSWLSB,r15)
3460	xtrct r4,r0
3461	swap.w r0,r0
3462	.rept 8
3463	div1 r5,r0
3464	.endr
3465	mov.b r0,@(L_LSWMSB,r15)
3466LOCAL(div_ge64k_end):
3467	.rept 8
3468	div1 r5,r0
3469	.endr
3470	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3471	extu.b r0,r0
3472	mov.l @r15+,r5
3473	or r4,r0
3474	mov.l @r15+,r4
3475	rts
3476	rotcl r0
3477
3478LOCAL(div_le128_neg):
3479	tst #0xfe,r0
3480	mova LOCAL(div_table_ix),r0
3481	mov.b @(r0,r5),r1
3482	mova LOCAL(div_table_inv),r0
3483	bt/s LOCAL(div_by_1_neg)
3484	mov.l @(r0,r1),r1
3485	mova LOCAL(div_table_clz),r0
3486	dmulu.l r1,r4
3487	mov.b @(r0,r5),r1
3488	mov.l @r15+,r5
3489	sts mach,r0
3490	/* clrt */
3491	addc r4,r0
3492	mov.l @r15+,r4
3493	rotcr r0
3494	shld r1,r0
3495	rts
3496	neg r0,r0
3497
3498LOCAL(pos_divisor):
3499	mov.l r5,@-r15
3500	bt/s LOCAL(pos_result)
3501	cmp/hi r1,r5
3502	neg r4,r4
3503LOCAL(neg_result):
3504	extu.w r5,r0
3505	bf LOCAL(div_le128_neg)
3506	cmp/eq r5,r0
3507	mov r4,r0
3508	shlr8 r0
3509	bf/s LOCAL(div_ge64k_neg)
3510	cmp/hi r0,r5
3511	div0u
3512	mov.l LOCAL(zero_l),r1
3513	shll16 r5
3514	div1 r5,r0
3515	mov.l r1,@-r15
3516	.rept 7
3517	div1 r5,r0
3518	.endr
3519	mov.b r0,@(L_MSWLSB,r15)
3520	xtrct r4,r0
3521	swap.w r0,r0
3522	.rept 8
3523	div1 r5,r0
3524	.endr
3525	mov.b r0,@(L_LSWMSB,r15)
3526LOCAL(div_ge64k_neg_end):
3527	.rept 8
3528	div1 r5,r0
3529	.endr
3530	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3531	extu.b r0,r1
3532	mov.l @r15+,r5
3533	or r4,r1
3534LOCAL(div_r8_neg_end):
3535	mov.l @r15+,r4
3536	rotcl r1
3537	rts
3538	neg r1,r0
3539
3540LOCAL(div_ge64k_neg):
3541	bt/s LOCAL(div_r8_neg)
3542	div0u
3543	shll8 r5
3544	mov.l LOCAL(zero_l),r1
3545	.rept 6
3546	div1 r5,r0
3547	.endr
3548	mov.l r1,@-r15
3549	div1 r5,r0
3550	mov.w LOCAL(m256_w),r1
3551	div1 r5,r0
3552	mov.b r0,@(L_LSWMSB,r15)
3553	xor r4,r0
3554	and r1,r0
3555	bra LOCAL(div_ge64k_neg_end)
3556	xor r4,r0
3557
3558LOCAL(c128_w):
3559	.word 128
3560
3561LOCAL(div_r8_neg):
3562	clrt
3563	shll16 r4
3564	mov r4,r1
3565	shll8 r1
3566	mov r5,r4
3567	.rept 7
3568	rotcl r1; div1 r5,r0
3569	.endr
3570	mov.l @r15+,r5
3571	rotcl r1
3572	bra LOCAL(div_r8_neg_end)
3573	div1 r4,r0
3574
3575LOCAL(m256_w):
3576	.word 0xff00
3577/* This table has been generated by divtab-sh4.c.  */
3578	.balign 4
3579LOCAL(div_table_clz):
3580	.byte	0
3581	.byte	1
3582	.byte	0
3583	.byte	-1
3584	.byte	-1
3585	.byte	-2
3586	.byte	-2
3587	.byte	-2
3588	.byte	-2
3589	.byte	-3
3590	.byte	-3
3591	.byte	-3
3592	.byte	-3
3593	.byte	-3
3594	.byte	-3
3595	.byte	-3
3596	.byte	-3
3597	.byte	-4
3598	.byte	-4
3599	.byte	-4
3600	.byte	-4
3601	.byte	-4
3602	.byte	-4
3603	.byte	-4
3604	.byte	-4
3605	.byte	-4
3606	.byte	-4
3607	.byte	-4
3608	.byte	-4
3609	.byte	-4
3610	.byte	-4
3611	.byte	-4
3612	.byte	-4
3613	.byte	-5
3614	.byte	-5
3615	.byte	-5
3616	.byte	-5
3617	.byte	-5
3618	.byte	-5
3619	.byte	-5
3620	.byte	-5
3621	.byte	-5
3622	.byte	-5
3623	.byte	-5
3624	.byte	-5
3625	.byte	-5
3626	.byte	-5
3627	.byte	-5
3628	.byte	-5
3629	.byte	-5
3630	.byte	-5
3631	.byte	-5
3632	.byte	-5
3633	.byte	-5
3634	.byte	-5
3635	.byte	-5
3636	.byte	-5
3637	.byte	-5
3638	.byte	-5
3639	.byte	-5
3640	.byte	-5
3641	.byte	-5
3642	.byte	-5
3643	.byte	-5
3644	.byte	-5
3645	.byte	-6
3646	.byte	-6
3647	.byte	-6
3648	.byte	-6
3649	.byte	-6
3650	.byte	-6
3651	.byte	-6
3652	.byte	-6
3653	.byte	-6
3654	.byte	-6
3655	.byte	-6
3656	.byte	-6
3657	.byte	-6
3658	.byte	-6
3659	.byte	-6
3660	.byte	-6
3661	.byte	-6
3662	.byte	-6
3663	.byte	-6
3664	.byte	-6
3665	.byte	-6
3666	.byte	-6
3667	.byte	-6
3668	.byte	-6
3669	.byte	-6
3670	.byte	-6
3671	.byte	-6
3672	.byte	-6
3673	.byte	-6
3674	.byte	-6
3675	.byte	-6
3676	.byte	-6
3677	.byte	-6
3678	.byte	-6
3679	.byte	-6
3680	.byte	-6
3681	.byte	-6
3682	.byte	-6
3683	.byte	-6
3684	.byte	-6
3685	.byte	-6
3686	.byte	-6
3687	.byte	-6
3688	.byte	-6
3689	.byte	-6
3690	.byte	-6
3691	.byte	-6
3692	.byte	-6
3693	.byte	-6
3694	.byte	-6
3695	.byte	-6
3696	.byte	-6
3697	.byte	-6
3698	.byte	-6
3699	.byte	-6
3700	.byte	-6
3701	.byte	-6
3702	.byte	-6
3703	.byte	-6
3704	.byte	-6
3705	.byte	-6
3706	.byte	-6
3707	.byte	-6
3708/* Lookup table translating positive divisor to index into table of
3709   normalized inverse.  N.B. the '0' entry is also the last entry of the
3710 previous table, and causes an unaligned access for division by zero.  */
3711LOCAL(div_table_ix):
3712	.byte	-6
3713	.byte	-128
3714	.byte	-128
3715	.byte	0
3716	.byte	-128
3717	.byte	-64
3718	.byte	0
3719	.byte	64
3720	.byte	-128
3721	.byte	-96
3722	.byte	-64
3723	.byte	-32
3724	.byte	0
3725	.byte	32
3726	.byte	64
3727	.byte	96
3728	.byte	-128
3729	.byte	-112
3730	.byte	-96
3731	.byte	-80
3732	.byte	-64
3733	.byte	-48
3734	.byte	-32
3735	.byte	-16
3736	.byte	0
3737	.byte	16
3738	.byte	32
3739	.byte	48
3740	.byte	64
3741	.byte	80
3742	.byte	96
3743	.byte	112
3744	.byte	-128
3745	.byte	-120
3746	.byte	-112
3747	.byte	-104
3748	.byte	-96
3749	.byte	-88
3750	.byte	-80
3751	.byte	-72
3752	.byte	-64
3753	.byte	-56
3754	.byte	-48
3755	.byte	-40
3756	.byte	-32
3757	.byte	-24
3758	.byte	-16
3759	.byte	-8
3760	.byte	0
3761	.byte	8
3762	.byte	16
3763	.byte	24
3764	.byte	32
3765	.byte	40
3766	.byte	48
3767	.byte	56
3768	.byte	64
3769	.byte	72
3770	.byte	80
3771	.byte	88
3772	.byte	96
3773	.byte	104
3774	.byte	112
3775	.byte	120
3776	.byte	-128
3777	.byte	-124
3778	.byte	-120
3779	.byte	-116
3780	.byte	-112
3781	.byte	-108
3782	.byte	-104
3783	.byte	-100
3784	.byte	-96
3785	.byte	-92
3786	.byte	-88
3787	.byte	-84
3788	.byte	-80
3789	.byte	-76
3790	.byte	-72
3791	.byte	-68
3792	.byte	-64
3793	.byte	-60
3794	.byte	-56
3795	.byte	-52
3796	.byte	-48
3797	.byte	-44
3798	.byte	-40
3799	.byte	-36
3800	.byte	-32
3801	.byte	-28
3802	.byte	-24
3803	.byte	-20
3804	.byte	-16
3805	.byte	-12
3806	.byte	-8
3807	.byte	-4
3808	.byte	0
3809	.byte	4
3810	.byte	8
3811	.byte	12
3812	.byte	16
3813	.byte	20
3814	.byte	24
3815	.byte	28
3816	.byte	32
3817	.byte	36
3818	.byte	40
3819	.byte	44
3820	.byte	48
3821	.byte	52
3822	.byte	56
3823	.byte	60
3824	.byte	64
3825	.byte	68
3826	.byte	72
3827	.byte	76
3828	.byte	80
3829	.byte	84
3830	.byte	88
3831	.byte	92
3832	.byte	96
3833	.byte	100
3834	.byte	104
3835	.byte	108
3836	.byte	112
3837	.byte	116
3838	.byte	120
3839	.byte	124
3840	.byte	-128
3841/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
3842	.balign 4
3843LOCAL(zero_l):
3844	.long	0x0
3845	.long	0xF81F81F9
3846	.long	0xF07C1F08
3847	.long	0xE9131AC0
3848	.long	0xE1E1E1E2
3849	.long	0xDAE6076C
3850	.long	0xD41D41D5
3851	.long	0xCD856891
3852	.long	0xC71C71C8
3853	.long	0xC0E07039
3854	.long	0xBACF914D
3855	.long	0xB4E81B4F
3856	.long	0xAF286BCB
3857	.long	0xA98EF607
3858	.long	0xA41A41A5
3859	.long	0x9EC8E952
3860	.long	0x9999999A
3861	.long	0x948B0FCE
3862	.long	0x8F9C18FA
3863	.long	0x8ACB90F7
3864	.long	0x86186187
3865	.long	0x81818182
3866	.long	0x7D05F418
3867	.long	0x78A4C818
3868	.long	0x745D1746
3869	.long	0x702E05C1
3870	.long	0x6C16C16D
3871	.long	0x68168169
3872	.long	0x642C8591
3873	.long	0x60581606
3874	.long	0x5C9882BA
3875	.long	0x58ED2309
3876LOCAL(div_table_inv):
3877	.long	0x55555556
3878	.long	0x51D07EAF
3879	.long	0x4E5E0A73
3880	.long	0x4AFD6A06
3881	.long	0x47AE147B
3882	.long	0x446F8657
3883	.long	0x41414142
3884	.long	0x3E22CBCF
3885	.long	0x3B13B13C
3886	.long	0x38138139
3887	.long	0x3521CFB3
3888	.long	0x323E34A3
3889	.long	0x2F684BDB
3890	.long	0x2C9FB4D9
3891	.long	0x29E4129F
3892	.long	0x27350B89
3893	.long	0x24924925
3894	.long	0x21FB7813
3895	.long	0x1F7047DD
3896	.long	0x1CF06ADB
3897	.long	0x1A7B9612
3898	.long	0x18118119
3899	.long	0x15B1E5F8
3900	.long	0x135C8114
3901	.long	0x11111112
3902	.long	0xECF56BF
3903	.long	0xC9714FC
3904	.long	0xA6810A7
3905	.long	0x8421085
3906	.long	0x624DD30
3907	.long	0x4104105
3908	.long	0x2040811
3909	/* maximum error: 0.987342 scaled: 0.921875*/
3910
3911	ENDFUNC(GLOBAL(sdivsi3_i4i))
3912#endif /* SH3 / SH4 */
3913
3914#endif /* L_div_table */
3915
3916#ifdef L_udiv_qrnnd_16
3917#if !__SHMEDIA__
3918	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3919	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3920	/* n1 < d, but n1 might be larger than d1.  */
3921	.global GLOBAL(udiv_qrnnd_16)
3922	.balign 8
3923GLOBAL(udiv_qrnnd_16):
3924	div0u
3925	cmp/hi r6,r0
3926	bt .Lots
3927	.rept 16
3928	div1 r6,r0
3929	.endr
3930	extu.w r0,r1
3931	bt 0f
3932	add r6,r0
39330:	rotcl r1
3934	mulu.w r1,r5
3935	xtrct r4,r0
3936	swap.w r0,r0
3937	sts macl,r2
3938	cmp/hs r2,r0
3939	sub r2,r0
3940	bt 0f
3941	addc r5,r0
3942	add #-1,r1
3943	bt 0f
39441:	add #-1,r1
3945	rts
3946	add r5,r0
3947	.balign 8
3948.Lots:
3949	sub r5,r0
3950	swap.w r4,r1
3951	xtrct r0,r1
3952	clrt
3953	mov r1,r0
3954	addc r5,r0
3955	mov #-1,r1
3956	SL1(bf, 1b,
3957	shlr16 r1)
39580:	rts
3959	nop
3960	ENDFUNC(GLOBAL(udiv_qrnnd_16))
3961#endif /* !__SHMEDIA__ */
3962#endif /* L_udiv_qrnnd_16 */
3963