xref: /netbsd-src/external/gpl3/gcc.old/dist/libgcc/config/sh/lib1funcs.S (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1/* Copyright (C) 1994-2015 Free Software Foundation, Inc.
2
3This file is free software; you can redistribute it and/or modify it
4under the terms of the GNU General Public License as published by the
5Free Software Foundation; either version 3, or (at your option) any
6later version.
7
8This file is distributed in the hope that it will be useful, but
9WITHOUT ANY WARRANTY; without even the implied warranty of
10MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11General Public License for more details.
12
13Under Section 7 of GPL version 3, you are granted additional
14permissions described in the GCC Runtime Library Exception, version
153.1, as published by the Free Software Foundation.
16
17You should have received a copy of the GNU General Public License and
18a copy of the GCC Runtime Library Exception along with this program;
19see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
20<http://www.gnu.org/licenses/>.  */
21
22
23!! libgcc routines for the Renesas / SuperH SH CPUs.
24!! Contributed by Steve Chamberlain.
25!! sac@cygnus.com
26
27!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28!! recoded in assembly by Toshiyasu Morita
29!! tm@netcom.com
30
31#if defined(__ELF__) && defined(__linux__)
32.section .note.GNU-stack,"",%progbits
33.previous
34#endif
35
36/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37   ELF local label prefixes by J"orn Rennecke
38   amylaar@cygnus.com  */
39
40#include "lib1funcs.h"
41
42/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43   so it is more convenient to define NO_FPSCR_VALUES here than to
44   define it on the command line.  */
45#if defined __vxworks && defined __PIC__
46#define NO_FPSCR_VALUES
47#endif
48
49#if ! __SH5__
50#ifdef L_ashiftrt
51	.global	GLOBAL(ashiftrt_r4_0)
52	.global	GLOBAL(ashiftrt_r4_1)
53	.global	GLOBAL(ashiftrt_r4_2)
54	.global	GLOBAL(ashiftrt_r4_3)
55	.global	GLOBAL(ashiftrt_r4_4)
56	.global	GLOBAL(ashiftrt_r4_5)
57	.global	GLOBAL(ashiftrt_r4_6)
58	.global	GLOBAL(ashiftrt_r4_7)
59	.global	GLOBAL(ashiftrt_r4_8)
60	.global	GLOBAL(ashiftrt_r4_9)
61	.global	GLOBAL(ashiftrt_r4_10)
62	.global	GLOBAL(ashiftrt_r4_11)
63	.global	GLOBAL(ashiftrt_r4_12)
64	.global	GLOBAL(ashiftrt_r4_13)
65	.global	GLOBAL(ashiftrt_r4_14)
66	.global	GLOBAL(ashiftrt_r4_15)
67	.global	GLOBAL(ashiftrt_r4_16)
68	.global	GLOBAL(ashiftrt_r4_17)
69	.global	GLOBAL(ashiftrt_r4_18)
70	.global	GLOBAL(ashiftrt_r4_19)
71	.global	GLOBAL(ashiftrt_r4_20)
72	.global	GLOBAL(ashiftrt_r4_21)
73	.global	GLOBAL(ashiftrt_r4_22)
74	.global	GLOBAL(ashiftrt_r4_23)
75	.global	GLOBAL(ashiftrt_r4_24)
76	.global	GLOBAL(ashiftrt_r4_25)
77	.global	GLOBAL(ashiftrt_r4_26)
78	.global	GLOBAL(ashiftrt_r4_27)
79	.global	GLOBAL(ashiftrt_r4_28)
80	.global	GLOBAL(ashiftrt_r4_29)
81	.global	GLOBAL(ashiftrt_r4_30)
82	.global	GLOBAL(ashiftrt_r4_31)
83	.global	GLOBAL(ashiftrt_r4_32)
84
85	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
86	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
87	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
88	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
89	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
90	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
91	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
92	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
93	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
94	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
95	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
96	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
97	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
98	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
99	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
100	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
101	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
102	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
103	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
104	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
105	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
106	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
107	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
108	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
109	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
110	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
111	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
112	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
113	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
114	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
115	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
116	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
117	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
118
119	.align	1
120GLOBAL(ashiftrt_r4_32):
121GLOBAL(ashiftrt_r4_31):
122	rotcl	r4
123	rts
124	subc	r4,r4
125
126GLOBAL(ashiftrt_r4_30):
127	shar	r4
128GLOBAL(ashiftrt_r4_29):
129	shar	r4
130GLOBAL(ashiftrt_r4_28):
131	shar	r4
132GLOBAL(ashiftrt_r4_27):
133	shar	r4
134GLOBAL(ashiftrt_r4_26):
135	shar	r4
136GLOBAL(ashiftrt_r4_25):
137	shar	r4
138GLOBAL(ashiftrt_r4_24):
139	shlr16	r4
140	shlr8	r4
141	rts
142	exts.b	r4,r4
143
144GLOBAL(ashiftrt_r4_23):
145	shar	r4
146GLOBAL(ashiftrt_r4_22):
147	shar	r4
148GLOBAL(ashiftrt_r4_21):
149	shar	r4
150GLOBAL(ashiftrt_r4_20):
151	shar	r4
152GLOBAL(ashiftrt_r4_19):
153	shar	r4
154GLOBAL(ashiftrt_r4_18):
155	shar	r4
156GLOBAL(ashiftrt_r4_17):
157	shar	r4
158GLOBAL(ashiftrt_r4_16):
159	shlr16	r4
160	rts
161	exts.w	r4,r4
162
163GLOBAL(ashiftrt_r4_15):
164	shar	r4
165GLOBAL(ashiftrt_r4_14):
166	shar	r4
167GLOBAL(ashiftrt_r4_13):
168	shar	r4
169GLOBAL(ashiftrt_r4_12):
170	shar	r4
171GLOBAL(ashiftrt_r4_11):
172	shar	r4
173GLOBAL(ashiftrt_r4_10):
174	shar	r4
175GLOBAL(ashiftrt_r4_9):
176	shar	r4
177GLOBAL(ashiftrt_r4_8):
178	shar	r4
179GLOBAL(ashiftrt_r4_7):
180	shar	r4
181GLOBAL(ashiftrt_r4_6):
182	shar	r4
183GLOBAL(ashiftrt_r4_5):
184	shar	r4
185GLOBAL(ashiftrt_r4_4):
186	shar	r4
187GLOBAL(ashiftrt_r4_3):
188	shar	r4
189GLOBAL(ashiftrt_r4_2):
190	shar	r4
191GLOBAL(ashiftrt_r4_1):
192	rts
193	shar	r4
194
195GLOBAL(ashiftrt_r4_0):
196	rts
197	nop
198
199	ENDFUNC(GLOBAL(ashiftrt_r4_0))
200	ENDFUNC(GLOBAL(ashiftrt_r4_1))
201	ENDFUNC(GLOBAL(ashiftrt_r4_2))
202	ENDFUNC(GLOBAL(ashiftrt_r4_3))
203	ENDFUNC(GLOBAL(ashiftrt_r4_4))
204	ENDFUNC(GLOBAL(ashiftrt_r4_5))
205	ENDFUNC(GLOBAL(ashiftrt_r4_6))
206	ENDFUNC(GLOBAL(ashiftrt_r4_7))
207	ENDFUNC(GLOBAL(ashiftrt_r4_8))
208	ENDFUNC(GLOBAL(ashiftrt_r4_9))
209	ENDFUNC(GLOBAL(ashiftrt_r4_10))
210	ENDFUNC(GLOBAL(ashiftrt_r4_11))
211	ENDFUNC(GLOBAL(ashiftrt_r4_12))
212	ENDFUNC(GLOBAL(ashiftrt_r4_13))
213	ENDFUNC(GLOBAL(ashiftrt_r4_14))
214	ENDFUNC(GLOBAL(ashiftrt_r4_15))
215	ENDFUNC(GLOBAL(ashiftrt_r4_16))
216	ENDFUNC(GLOBAL(ashiftrt_r4_17))
217	ENDFUNC(GLOBAL(ashiftrt_r4_18))
218	ENDFUNC(GLOBAL(ashiftrt_r4_19))
219	ENDFUNC(GLOBAL(ashiftrt_r4_20))
220	ENDFUNC(GLOBAL(ashiftrt_r4_21))
221	ENDFUNC(GLOBAL(ashiftrt_r4_22))
222	ENDFUNC(GLOBAL(ashiftrt_r4_23))
223	ENDFUNC(GLOBAL(ashiftrt_r4_24))
224	ENDFUNC(GLOBAL(ashiftrt_r4_25))
225	ENDFUNC(GLOBAL(ashiftrt_r4_26))
226	ENDFUNC(GLOBAL(ashiftrt_r4_27))
227	ENDFUNC(GLOBAL(ashiftrt_r4_28))
228	ENDFUNC(GLOBAL(ashiftrt_r4_29))
229	ENDFUNC(GLOBAL(ashiftrt_r4_30))
230	ENDFUNC(GLOBAL(ashiftrt_r4_31))
231	ENDFUNC(GLOBAL(ashiftrt_r4_32))
232#endif
233
234#ifdef L_ashiftrt_n
235
236!
237! GLOBAL(ashrsi3)
238!
239! Entry:
240!
241! r4: Value to shift
242! r5: Shift count
243!
244! Exit:
245!
246! r0: Result
247!
248! Destroys:
249!
250! T bit, r5
251!
252
253	.global	GLOBAL(ashrsi3)
254	HIDDEN_FUNC(GLOBAL(ashrsi3))
255	.align	2
256GLOBAL(ashrsi3):
257	mov	#31,r0
258	and	r0,r5
259	mova	LOCAL(ashrsi3_table),r0
260	mov.b	@(r0,r5),r5
261#ifdef __sh1__
262	add	r5,r0
263	jmp	@r0
264#else
265	braf	r5
266#endif
267	mov	r4,r0
268
269	.align	2
270LOCAL(ashrsi3_table):
271	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
272	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
273	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
274	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
275	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
276	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
277	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
278	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
279	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
280	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
281	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
282	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
283	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
284	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
285	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
286	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
287	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
288	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
289	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
290	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
291	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
292	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
293	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
294	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
295	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
296	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
297	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
298	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
299	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
300	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
301	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
302	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
303
304LOCAL(ashrsi3_31):
305	rotcl	r0
306	rts
307	subc	r0,r0
308
309LOCAL(ashrsi3_30):
310	shar	r0
311LOCAL(ashrsi3_29):
312	shar	r0
313LOCAL(ashrsi3_28):
314	shar	r0
315LOCAL(ashrsi3_27):
316	shar	r0
317LOCAL(ashrsi3_26):
318	shar	r0
319LOCAL(ashrsi3_25):
320	shar	r0
321LOCAL(ashrsi3_24):
322	shlr16	r0
323	shlr8	r0
324	rts
325	exts.b	r0,r0
326
327LOCAL(ashrsi3_23):
328	shar	r0
329LOCAL(ashrsi3_22):
330	shar	r0
331LOCAL(ashrsi3_21):
332	shar	r0
333LOCAL(ashrsi3_20):
334	shar	r0
335LOCAL(ashrsi3_19):
336	shar	r0
337LOCAL(ashrsi3_18):
338	shar	r0
339LOCAL(ashrsi3_17):
340	shar	r0
341LOCAL(ashrsi3_16):
342	shlr16	r0
343	rts
344	exts.w	r0,r0
345
346LOCAL(ashrsi3_15):
347	shar	r0
348LOCAL(ashrsi3_14):
349	shar	r0
350LOCAL(ashrsi3_13):
351	shar	r0
352LOCAL(ashrsi3_12):
353	shar	r0
354LOCAL(ashrsi3_11):
355	shar	r0
356LOCAL(ashrsi3_10):
357	shar	r0
358LOCAL(ashrsi3_9):
359	shar	r0
360LOCAL(ashrsi3_8):
361	shar	r0
362LOCAL(ashrsi3_7):
363	shar	r0
364LOCAL(ashrsi3_6):
365	shar	r0
366LOCAL(ashrsi3_5):
367	shar	r0
368LOCAL(ashrsi3_4):
369	shar	r0
370LOCAL(ashrsi3_3):
371	shar	r0
372LOCAL(ashrsi3_2):
373	shar	r0
374LOCAL(ashrsi3_1):
375	rts
376	shar	r0
377
378LOCAL(ashrsi3_0):
379	rts
380	nop
381
382	ENDFUNC(GLOBAL(ashrsi3))
383#endif
384
385#ifdef L_ashiftlt
386
387!
388! GLOBAL(ashlsi3)
389! (For compatibility with older binaries, not used by compiler)
390!
391! Entry:
392!	r4: Value to shift
393!	r5: Shift count
394!
395! Exit:
396!	r0: Result
397!
398! Destroys:
399!	T bit
400!
401!
402! GLOBAL(ashlsi3_r0)
403!
404! Entry:
405!	r4: Value to shift
406!	r0: Shift count
407!
408! Exit:
409!	r0: Result
410!
411! Destroys:
412!	T bit
413
414	.global	GLOBAL(ashlsi3)
415	.global GLOBAL(ashlsi3_r0)
416	HIDDEN_FUNC(GLOBAL(ashlsi3))
417	HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
418GLOBAL(ashlsi3):
419	mov	r5,r0
420	.align	2
421GLOBAL(ashlsi3_r0):
422
423#ifdef __sh1__
424	and	#31,r0
425	shll2	r0
426	mov.l	r4,@-r15
427	mov	r0,r4
428	mova	LOCAL(ashlsi3_table),r0
429	add	r4,r0
430	mov.l	@r15+,r4
431	jmp	@r0
432	mov	r4,r0
433	.align 2
434#else
435	and	#31,r0
436	shll2	r0
437	braf	r0
438	mov	r4,r0
439#endif
440
441LOCAL(ashlsi3_table):
442	rts				// << 0
443	nop
444LOCAL(ashlsi_1):
445	rts				// << 1
446	shll	r0
447LOCAL(ashlsi_2):			// << 2
448	rts
449	shll2	r0
450	bra	LOCAL(ashlsi_1)		// << 3
451	shll2	r0
452	bra	LOCAL(ashlsi_2)		// << 4
453	shll2	r0
454	bra	LOCAL(ashlsi_5)		// << 5
455	shll	r0
456	bra	LOCAL(ashlsi_6)		// << 6
457	shll2	r0
458	bra	LOCAL(ashlsi_7)		// << 7
459	shll	r0
460LOCAL(ashlsi_8):			// << 8
461	rts
462	shll8	r0
463	bra	LOCAL(ashlsi_8)		// << 9
464	shll	r0
465	bra	LOCAL(ashlsi_8)		// << 10
466	shll2	r0
467	bra	LOCAL(ashlsi_11)	// << 11
468	shll	r0
469	bra	LOCAL(ashlsi_12)	// << 12
470	shll2	r0
471	bra	LOCAL(ashlsi_13)	// << 13
472	shll	r0
473	bra	LOCAL(ashlsi_14)	// << 14
474	shll8	r0
475	bra	LOCAL(ashlsi_15)	// << 15
476	shll8	r0
477LOCAL(ashlsi_16):			// << 16
478	rts
479	shll16	r0
480	bra	LOCAL(ashlsi_16)	// << 17
481	shll	r0
482	bra	LOCAL(ashlsi_16)	// << 18
483	shll2	r0
484	bra	LOCAL(ashlsi_19)	// << 19
485	shll	r0
486	bra	LOCAL(ashlsi_20)	// << 20
487	shll2	r0
488	bra	LOCAL(ashlsi_21)	// << 21
489	shll	r0
490	bra	LOCAL(ashlsi_22)	// << 22
491	shll16	r0
492	bra	LOCAL(ashlsi_23)	// << 23
493	shll16	r0
494	bra	LOCAL(ashlsi_16)	// << 24
495	shll8	r0
496	bra	LOCAL(ashlsi_25)	// << 25
497	shll	r0
498	bra	LOCAL(ashlsi_26)	// << 26
499	shll2	r0
500	bra	LOCAL(ashlsi_27)	// << 27
501	shll	r0
502	bra	LOCAL(ashlsi_28)	// << 28
503	shll2	r0
504	bra	LOCAL(ashlsi_29)	// << 29
505	shll16	r0
506	bra	LOCAL(ashlsi_30)	// << 30
507	shll16	r0
508	and	#1,r0			// << 31
509	rts
510	rotr	r0
511
512LOCAL(ashlsi_7):
513	shll2	r0
514LOCAL(ashlsi_5):
515LOCAL(ashlsi_6):
516	shll2	r0
517	rts
518LOCAL(ashlsi_13):
519	shll2	r0
520LOCAL(ashlsi_12):
521LOCAL(ashlsi_11):
522	shll8	r0
523	rts
524LOCAL(ashlsi_21):
525	shll2	r0
526LOCAL(ashlsi_20):
527LOCAL(ashlsi_19):
528	shll16	r0
529	rts
530LOCAL(ashlsi_28):
531LOCAL(ashlsi_27):
532	shll2	r0
533LOCAL(ashlsi_26):
534LOCAL(ashlsi_25):
535	shll16	r0
536	rts
537	shll8	r0
538
539LOCAL(ashlsi_22):
540LOCAL(ashlsi_14):
541	shlr2	r0
542	rts
543	shll8	r0
544
545LOCAL(ashlsi_23):
546LOCAL(ashlsi_15):
547	shlr	r0
548	rts
549	shll8	r0
550
551LOCAL(ashlsi_29):
552	shlr	r0
553LOCAL(ashlsi_30):
554	shlr2	r0
555	rts
556	shll16	r0
557
558	ENDFUNC(GLOBAL(ashlsi3))
559	ENDFUNC(GLOBAL(ashlsi3_r0))
560#endif
561
562#ifdef L_lshiftrt
563
564!
565! GLOBAL(lshrsi3)
566! (For compatibility with older binaries, not used by compiler)
567!
568! Entry:
569!	r4: Value to shift
570!	r5: Shift count
571!
572! Exit:
573!	r0: Result
574!
575! Destroys:
576!	T bit
577!
578!
579! GLOBAL(lshrsi3_r0)
580!
581! Entry:
582!	r4: Value to shift
583!	r0: Shift count
584!
585! Exit:
586!	r0: Result
587!
588! Destroys:
589!	T bit
590
591	.global	GLOBAL(lshrsi3)
592	.global	GLOBAL(lshrsi3_r0)
593	HIDDEN_FUNC(GLOBAL(lshrsi3))
594	HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
595GLOBAL(lshrsi3):
596	mov	r5,r0
597	.align	2
598GLOBAL(lshrsi3_r0):
599
600#ifdef __sh1__
601	and	#31,r0
602	shll2	r0
603	mov.l	r4,@-r15
604	mov	r0,r4
605	mova	LOCAL(lshrsi3_table),r0
606	add	r4,r0
607	mov.l	@r15+,r4
608	jmp	@r0
609	mov	r4,r0
610	.align 2
611#else
612	and	#31,r0
613	shll2	r0
614	braf	r0
615	mov	r4,r0
616#endif
617LOCAL(lshrsi3_table):
618	rts				// >> 0
619	nop
620LOCAL(lshrsi_1):			// >> 1
621	rts
622	shlr	r0
623LOCAL(lshrsi_2):			// >> 2
624	rts
625	shlr2	r0
626	bra	LOCAL(lshrsi_1)		// >> 3
627	shlr2	r0
628	bra	LOCAL(lshrsi_2)		// >> 4
629	shlr2	r0
630	bra	LOCAL(lshrsi_5)		// >> 5
631	shlr	r0
632	bra	LOCAL(lshrsi_6)		// >> 6
633	shlr2	r0
634	bra	LOCAL(lshrsi_7)		// >> 7
635	shlr	r0
636LOCAL(lshrsi_8):			// >> 8
637	rts
638	shlr8	r0
639	bra	LOCAL(lshrsi_8)		// >> 9
640	shlr	r0
641	bra	LOCAL(lshrsi_8)		// >> 10
642	shlr2	r0
643	bra	LOCAL(lshrsi_11)	// >> 11
644	shlr	r0
645	bra	LOCAL(lshrsi_12)	// >> 12
646	shlr2	r0
647	bra	LOCAL(lshrsi_13)	// >> 13
648	shlr	r0
649	bra	LOCAL(lshrsi_14)	// >> 14
650	shlr8	r0
651	bra	LOCAL(lshrsi_15)	// >> 15
652	shlr8	r0
653LOCAL(lshrsi_16):			// >> 16
654	rts
655	shlr16	r0
656	bra	LOCAL(lshrsi_16)	// >> 17
657	shlr	r0
658	bra	LOCAL(lshrsi_16)	// >> 18
659	shlr2	r0
660	bra	LOCAL(lshrsi_19)	// >> 19
661	shlr	r0
662	bra	LOCAL(lshrsi_20)	// >> 20
663	shlr2	r0
664	bra	LOCAL(lshrsi_21)	// >> 21
665	shlr	r0
666	bra	LOCAL(lshrsi_22)	// >> 22
667	shlr16	r0
668	bra	LOCAL(lshrsi_23)	// >> 23
669	shlr16	r0
670	bra	LOCAL(lshrsi_16)	// >> 24
671	shlr8	r0
672	bra	LOCAL(lshrsi_25)	// >> 25
673	shlr	r0
674	bra	LOCAL(lshrsi_26)	// >> 26
675	shlr2	r0
676	bra	LOCAL(lshrsi_27)	// >> 27
677	shlr	r0
678	bra	LOCAL(lshrsi_28)	// >> 28
679	shlr2	r0
680	bra	LOCAL(lshrsi_29)	// >> 29
681	shlr16	r0
682	bra	LOCAL(lshrsi_30)	// >> 30
683	shlr16	r0
684	shll	r0			// >> 31
685	rts
686	movt	r0
687
688LOCAL(lshrsi_7):
689	shlr2	r0
690LOCAL(lshrsi_5):
691LOCAL(lshrsi_6):
692	shlr2	r0
693	rts
694LOCAL(lshrsi_13):
695	shlr2	r0
696LOCAL(lshrsi_12):
697LOCAL(lshrsi_11):
698	shlr8	r0
699	rts
700LOCAL(lshrsi_21):
701	shlr2	r0
702LOCAL(lshrsi_20):
703LOCAL(lshrsi_19):
704	shlr16	r0
705	rts
706LOCAL(lshrsi_28):
707LOCAL(lshrsi_27):
708	shlr2	r0
709LOCAL(lshrsi_26):
710LOCAL(lshrsi_25):
711	shlr16	r0
712	rts
713	shlr8	r0
714
715LOCAL(lshrsi_22):
716LOCAL(lshrsi_14):
717	shll2	r0
718	rts
719	shlr8	r0
720
721LOCAL(lshrsi_23):
722LOCAL(lshrsi_15):
723	shll	r0
724	rts
725	shlr8	r0
726
727LOCAL(lshrsi_29):
728	shll	r0
729LOCAL(lshrsi_30):
730	shll2	r0
731	rts
732	shlr16	r0
733
734	ENDFUNC(GLOBAL(lshrsi3))
735	ENDFUNC(GLOBAL(lshrsi3_r0))
736#endif
737
738#ifdef L_movmem
739	.text
740	.balign	4
741	.global	GLOBAL(movmem)
742	HIDDEN_FUNC(GLOBAL(movmem))
743	HIDDEN_ALIAS(movstr,movmem)
744	/* This would be a lot simpler if r6 contained the byte count
745	   minus 64, and we wouldn't be called here for a byte count of 64.  */
746GLOBAL(movmem):
747	sts.l	pr,@-r15
748	shll2	r6
749	bsr	GLOBAL(movmemSI52+2)
750	mov.l	@(48,r5),r0
751	.balign	4
752LOCAL(movmem_loop): /* Reached with rts */
753	mov.l	@(60,r5),r0
754	add	#-64,r6
755	mov.l	r0,@(60,r4)
756	tst	r6,r6
757	mov.l	@(56,r5),r0
758	bt	LOCAL(movmem_done)
759	mov.l	r0,@(56,r4)
760	cmp/pl	r6
761	mov.l	@(52,r5),r0
762	add	#64,r5
763	mov.l	r0,@(52,r4)
764	add	#64,r4
765	bt	GLOBAL(movmemSI52)
766! done all the large groups, do the remainder
767! jump to movmem+
768	mova	GLOBAL(movmemSI4)+4,r0
769	add	r6,r0
770	jmp	@r0
771LOCAL(movmem_done): ! share slot insn, works out aligned.
772	lds.l	@r15+,pr
773	mov.l	r0,@(56,r4)
774	mov.l	@(52,r5),r0
775	rts
776	mov.l	r0,@(52,r4)
777	.balign	4
778! ??? We need aliases movstr* for movmem* for the older libraries.  These
779! aliases will be removed at the some point in the future.
780	.global	GLOBAL(movmemSI64)
781	HIDDEN_FUNC(GLOBAL(movmemSI64))
782	HIDDEN_ALIAS(movstrSI64,movmemSI64)
783GLOBAL(movmemSI64):
784	mov.l	@(60,r5),r0
785	mov.l	r0,@(60,r4)
786	.global	GLOBAL(movmemSI60)
787	HIDDEN_FUNC(GLOBAL(movmemSI60))
788	HIDDEN_ALIAS(movstrSI60,movmemSI60)
789GLOBAL(movmemSI60):
790	mov.l	@(56,r5),r0
791	mov.l	r0,@(56,r4)
792	.global	GLOBAL(movmemSI56)
793	HIDDEN_FUNC(GLOBAL(movmemSI56))
794	HIDDEN_ALIAS(movstrSI56,movmemSI56)
795GLOBAL(movmemSI56):
796	mov.l	@(52,r5),r0
797	mov.l	r0,@(52,r4)
798	.global	GLOBAL(movmemSI52)
799	HIDDEN_FUNC(GLOBAL(movmemSI52))
800	HIDDEN_ALIAS(movstrSI52,movmemSI52)
801GLOBAL(movmemSI52):
802	mov.l	@(48,r5),r0
803	mov.l	r0,@(48,r4)
804	.global	GLOBAL(movmemSI48)
805	HIDDEN_FUNC(GLOBAL(movmemSI48))
806	HIDDEN_ALIAS(movstrSI48,movmemSI48)
807GLOBAL(movmemSI48):
808	mov.l	@(44,r5),r0
809	mov.l	r0,@(44,r4)
810	.global	GLOBAL(movmemSI44)
811	HIDDEN_FUNC(GLOBAL(movmemSI44))
812	HIDDEN_ALIAS(movstrSI44,movmemSI44)
813GLOBAL(movmemSI44):
814	mov.l	@(40,r5),r0
815	mov.l	r0,@(40,r4)
816	.global	GLOBAL(movmemSI40)
817	HIDDEN_FUNC(GLOBAL(movmemSI40))
818	HIDDEN_ALIAS(movstrSI40,movmemSI40)
819GLOBAL(movmemSI40):
820	mov.l	@(36,r5),r0
821	mov.l	r0,@(36,r4)
822	.global	GLOBAL(movmemSI36)
823	HIDDEN_FUNC(GLOBAL(movmemSI36))
824	HIDDEN_ALIAS(movstrSI36,movmemSI36)
825GLOBAL(movmemSI36):
826	mov.l	@(32,r5),r0
827	mov.l	r0,@(32,r4)
828	.global	GLOBAL(movmemSI32)
829	HIDDEN_FUNC(GLOBAL(movmemSI32))
830	HIDDEN_ALIAS(movstrSI32,movmemSI32)
831GLOBAL(movmemSI32):
832	mov.l	@(28,r5),r0
833	mov.l	r0,@(28,r4)
834	.global	GLOBAL(movmemSI28)
835	HIDDEN_FUNC(GLOBAL(movmemSI28))
836	HIDDEN_ALIAS(movstrSI28,movmemSI28)
837GLOBAL(movmemSI28):
838	mov.l	@(24,r5),r0
839	mov.l	r0,@(24,r4)
840	.global	GLOBAL(movmemSI24)
841	HIDDEN_FUNC(GLOBAL(movmemSI24))
842	HIDDEN_ALIAS(movstrSI24,movmemSI24)
843GLOBAL(movmemSI24):
844	mov.l	@(20,r5),r0
845	mov.l	r0,@(20,r4)
846	.global	GLOBAL(movmemSI20)
847	HIDDEN_FUNC(GLOBAL(movmemSI20))
848	HIDDEN_ALIAS(movstrSI20,movmemSI20)
849GLOBAL(movmemSI20):
850	mov.l	@(16,r5),r0
851	mov.l	r0,@(16,r4)
852	.global	GLOBAL(movmemSI16)
853	HIDDEN_FUNC(GLOBAL(movmemSI16))
854	HIDDEN_ALIAS(movstrSI16,movmemSI16)
855GLOBAL(movmemSI16):
856	mov.l	@(12,r5),r0
857	mov.l	r0,@(12,r4)
858	.global	GLOBAL(movmemSI12)
859	HIDDEN_FUNC(GLOBAL(movmemSI12))
860	HIDDEN_ALIAS(movstrSI12,movmemSI12)
861GLOBAL(movmemSI12):
862	mov.l	@(8,r5),r0
863	mov.l	r0,@(8,r4)
864	.global	GLOBAL(movmemSI8)
865	HIDDEN_FUNC(GLOBAL(movmemSI8))
866	HIDDEN_ALIAS(movstrSI8,movmemSI8)
867GLOBAL(movmemSI8):
868	mov.l	@(4,r5),r0
869	mov.l	r0,@(4,r4)
870	.global	GLOBAL(movmemSI4)
871	HIDDEN_FUNC(GLOBAL(movmemSI4))
872	HIDDEN_ALIAS(movstrSI4,movmemSI4)
873GLOBAL(movmemSI4):
874	mov.l	@(0,r5),r0
875	rts
876	mov.l	r0,@(0,r4)
877
878	ENDFUNC(GLOBAL(movmemSI64))
879	ENDFUNC(GLOBAL(movmemSI60))
880	ENDFUNC(GLOBAL(movmemSI56))
881	ENDFUNC(GLOBAL(movmemSI52))
882	ENDFUNC(GLOBAL(movmemSI48))
883	ENDFUNC(GLOBAL(movmemSI44))
884	ENDFUNC(GLOBAL(movmemSI40))
885	ENDFUNC(GLOBAL(movmemSI36))
886	ENDFUNC(GLOBAL(movmemSI32))
887	ENDFUNC(GLOBAL(movmemSI28))
888	ENDFUNC(GLOBAL(movmemSI24))
889	ENDFUNC(GLOBAL(movmemSI20))
890	ENDFUNC(GLOBAL(movmemSI16))
891	ENDFUNC(GLOBAL(movmemSI12))
892	ENDFUNC(GLOBAL(movmemSI8))
893	ENDFUNC(GLOBAL(movmemSI4))
894	ENDFUNC(GLOBAL(movmem))
895#endif
896
897#ifdef L_movmem_i4
898	.text
899	.global	GLOBAL(movmem_i4_even)
900	.global	GLOBAL(movmem_i4_odd)
901	.global	GLOBAL(movmemSI12_i4)
902
903	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
904	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
905	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
906
907	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
908	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
909	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
910
911	.p2align	5
912L_movmem_2mod4_end:
913	mov.l	r0,@(16,r4)
914	rts
915	mov.l	r1,@(20,r4)
916
917	.p2align	2
918
919GLOBAL(movmem_i4_even):
920	mov.l	@r5+,r0
921	bra	L_movmem_start_even
922	mov.l	@r5+,r1
923
924GLOBAL(movmem_i4_odd):
925	mov.l	@r5+,r1
926	add	#-4,r4
927	mov.l	@r5+,r2
928	mov.l	@r5+,r3
929	mov.l	r1,@(4,r4)
930	mov.l	r2,@(8,r4)
931
932L_movmem_loop:
933	mov.l	r3,@(12,r4)
934	dt	r6
935	mov.l	@r5+,r0
936	bt/s	L_movmem_2mod4_end
937	mov.l	@r5+,r1
938	add	#16,r4
939L_movmem_start_even:
940	mov.l	@r5+,r2
941	mov.l	@r5+,r3
942	mov.l	r0,@r4
943	dt	r6
944	mov.l	r1,@(4,r4)
945	bf/s	L_movmem_loop
946	mov.l	r2,@(8,r4)
947	rts
948	mov.l	r3,@(12,r4)
949
950	ENDFUNC(GLOBAL(movmem_i4_even))
951	ENDFUNC(GLOBAL(movmem_i4_odd))
952
953	.p2align	4
954GLOBAL(movmemSI12_i4):
955	mov.l	@r5,r0
956	mov.l	@(4,r5),r1
957	mov.l	@(8,r5),r2
958	mov.l	r0,@r4
959	mov.l	r1,@(4,r4)
960	rts
961	mov.l	r2,@(8,r4)
962
963	ENDFUNC(GLOBAL(movmemSI12_i4))
964#endif
965
966#ifdef L_mulsi3
967
968
969	.global	GLOBAL(mulsi3)
970	HIDDEN_FUNC(GLOBAL(mulsi3))
971
972! r4 =       aabb
973! r5 =       ccdd
974! r0 = aabb*ccdd  via partial products
975!
976! if aa == 0 and cc = 0
977! r0 = bb*dd
978!
979! else
980! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
981!
982
983GLOBAL(mulsi3):
984	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
985	mov     r5,r3		! r3 = ccdd
986	swap.w  r4,r2		! r2 = bbaa
987	xtrct   r2,r3		! r3 = aacc
988	tst  	r3,r3		! msws zero ?
989	bf      hiset
990	rts			! yes - then we have the answer
991	sts     macl,r0
992
993hiset:	sts	macl,r0		! r0 = bb*dd
994	mulu.w	r2,r5		! brewing macl = aa*dd
995	sts	macl,r1
996	mulu.w	r3,r4		! brewing macl = cc*bb
997	sts	macl,r2
998	add	r1,r2
999	shll16	r2
1000	rts
1001	add	r2,r0
1002
1003	ENDFUNC(GLOBAL(mulsi3))
1004#endif
1005#endif /* ! __SH5__ */
1006
1007/*------------------------------------------------------------------------------
1008  32 bit signed integer division that uses FPU double precision division.  */
1009
1010#ifdef L_sdivsi3_i4
1011	.title "SH DIVIDE"
1012
1013#if defined (__SH4__) || defined (__SH2A__)
1014/* This variant is used when FPSCR.PR = 1 (double precision) is the default
1015   setting.
1016   Args in r4 and r5, result in fpul, clobber dr0, dr2.  */
1017
1018	.global	GLOBAL(sdivsi3_i4)
1019	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1020GLOBAL(sdivsi3_i4):
1021	lds r4,fpul
1022	float fpul,dr0
1023	lds r5,fpul
1024	float fpul,dr2
1025	fdiv dr2,dr0
1026	rts
1027	ftrc dr0,fpul
1028
1029	ENDFUNC(GLOBAL(sdivsi3_i4))
1030
1031#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1032/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1033   setting.
1034   Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
1035   For this to work, we must temporarily switch the FPU do double precision,
1036   but we better do not touch FPSCR.FR.  See PR 6526.  */
1037
1038#if ! __SH5__ || __SH5__ == 32
1039#if __SH5__
1040	.mode	SHcompact
1041#endif
1042	.global	GLOBAL(sdivsi3_i4)
1043	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1044GLOBAL(sdivsi3_i4):
1045
1046#ifndef __SH4A__
1047	mov.l	r3,@-r15
1048	sts	fpscr,r2
1049	mov	#8,r3
1050	swap.w	r3,r3		// r3 = 1 << 19 (FPSCR.PR bit)
1051	or	r2,r3
1052	lds	r3,fpscr	// Set FPSCR.PR = 1.
1053	lds	r4,fpul
1054	float	fpul,dr0
1055	lds	r5,fpul
1056	float	fpul,dr2
1057	fdiv	dr2,dr0
1058	ftrc	dr0,fpul
1059	lds	r2,fpscr
1060	rts
1061	mov.l	@r15+,r3
1062#else
1063/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.  */
1064	fpchg
1065	lds	r4,fpul
1066	float	fpul,dr0
1067	lds	r5,fpul
1068	float	fpul,dr2
1069	fdiv	dr2,dr0
1070	ftrc	dr0,fpul
1071	rts
1072	fpchg
1073
1074#endif /* __SH4A__  */
1075
1076	ENDFUNC(GLOBAL(sdivsi3_i4))
1077#endif /* ! __SH5__ || __SH5__ == 32 */
1078#endif /* ! __SH4__ || __SH2A__  */
1079#endif /* L_sdivsi3_i4  */
1080
1081//------------------------------------------------------------------------------
1082#ifdef L_sdivsi3
1083/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1084   sh2e/sh3e code.  */
1085!!
1086!! Steve Chamberlain
1087!! sac@cygnus.com
1088!!
1089!!
1090
1091!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1092
1093	.global	GLOBAL(sdivsi3)
1094#if __SHMEDIA__
1095#if __SH5__ == 32
1096	.section	.text..SHmedia32,"ax"
1097#else
1098	.text
1099#endif
1100	.align	2
1101#if 0
1102/* The assembly code that follows is a hand-optimized version of the C
1103   code that follows.  Note that the registers that are modified are
1104   exactly those listed as clobbered in the patterns divsi3_i1 and
1105   divsi3_i1_media.
1106
1107int __sdivsi3 (i, j)
1108     int i, j;
1109{
1110  register unsigned long long r18 asm ("r18");
1111  register unsigned long long r19 asm ("r19");
1112  register unsigned long long r0 asm ("r0") = 0;
1113  register unsigned long long r1 asm ("r1") = 1;
1114  register int r2 asm ("r2") = i >> 31;
1115  register int r3 asm ("r3") = j >> 31;
1116
1117  r2 = r2 ? r2 : r1;
1118  r3 = r3 ? r3 : r1;
1119  r18 = i * r2;
1120  r19 = j * r3;
1121  r2 *= r3;
1122
1123  r19 <<= 31;
1124  r1 <<= 31;
1125  do
1126    if (r18 >= r19)
1127      r0 |= r1, r18 -= r19;
1128  while (r19 >>= 1, r1 >>= 1);
1129
1130  return r2 * (int)r0;
1131}
1132*/
1133GLOBAL(sdivsi3):
1134	pt/l	LOCAL(sdivsi3_dontadd), tr2
1135	pt/l	LOCAL(sdivsi3_loop), tr1
1136	ptabs/l	r18, tr0
1137	movi	0, r0
1138	movi	1, r1
1139	shari.l	r4, 31, r2
1140	shari.l	r5, 31, r3
1141	cmveq	r2, r1, r2
1142	cmveq	r3, r1, r3
1143	muls.l	r4, r2, r18
1144	muls.l	r5, r3, r19
1145	muls.l	r2, r3, r2
1146	shlli	r19, 31, r19
1147	shlli	r1, 31, r1
1148LOCAL(sdivsi3_loop):
1149	bgtu	r19, r18, tr2
1150	or	r0, r1, r0
1151	sub	r18, r19, r18
1152LOCAL(sdivsi3_dontadd):
1153	shlri	r1, 1, r1
1154	shlri	r19, 1, r19
1155	bnei	r1, 0, tr1
1156	muls.l	r0, r2, r0
1157	add.l	r0, r63, r0
1158	blink	tr0, r63
1159#elif 0 /* ! 0 */
1160 // inputs: r4,r5
1161 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1162 // result in r0
1163GLOBAL(sdivsi3):
1164 // can create absolute value without extra latency,
1165 // but dependent on proper sign extension of inputs:
1166 // shari.l r5,31,r2
1167 // xor r5,r2,r20
1168 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1169 shari.l r5,31,r2
1170 ori r2,1,r2
1171 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1172 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1173 shari.l r4,31,r3
1174 nsb r20,r0
1175 shlld r20,r0,r25
1176 shlri r25,48,r25
1177 sub r19,r25,r1
1178 mmulfx.w r1,r1,r2
1179 mshflo.w r1,r63,r1
1180 // If r4 was to be used in-place instead of r21, could use this sequence
1181 // to compute absolute:
1182 // sub r63,r4,r19 // compute absolute value of r4
1183 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1184 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1185 ori r3,1,r3
1186 mmulfx.w r25,r2,r2
1187 sub r19,r0,r0
1188 muls.l r4,r3,r21
1189 msub.w r1,r2,r2
1190 addi r2,-2,r1
1191 mulu.l r21,r1,r19
1192 mmulfx.w r2,r2,r2
1193 shlli r1,15,r1
1194 shlrd r19,r0,r19
1195 mulu.l r19,r20,r3
1196 mmacnfx.wl r25,r2,r1
1197 ptabs r18,tr0
1198 sub r21,r3,r25
1199
1200 mulu.l r25,r1,r2
1201 addi r0,14,r0
1202 xor r4,r5,r18
1203 shlrd r2,r0,r2
1204 mulu.l r2,r20,r3
1205 add r19,r2,r19
1206 shari.l r18,31,r18
1207 sub r25,r3,r25
1208
1209 mulu.l r25,r1,r2
1210 sub r25,r20,r25
1211 add r19,r18,r19
1212 shlrd r2,r0,r2
1213 mulu.l r2,r20,r3
1214 addi r25,1,r25
1215 add r19,r2,r19
1216
1217 cmpgt r25,r3,r25
1218 add.l r19,r25,r0
1219 xor r0,r18,r0
1220 blink tr0,r63
1221#else /* ! 0 && ! 0 */
1222
1223 // inputs: r4,r5
1224 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1225 // result in r0
1226	HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1227#ifndef __pic__
1228	FUNC(GLOBAL(sdivsi3))
1229GLOBAL(sdivsi3): /* this is the shcompact entry point */
1230 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1231 // with the SHcompact implementation, which clobbers tr1 / tr2.
1232 .global GLOBAL(sdivsi3_1)
1233GLOBAL(sdivsi3_1):
1234 .global GLOBAL(div_table_internal)
1235 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1236 shori GLOBAL(div_table_internal) & 65535, r20
1237#endif
1238 .global GLOBAL(sdivsi3_2)
1239 // div_table in r20
1240 // clobbered: r1,r18,r19,r21,r25,tr0
1241GLOBAL(sdivsi3_2):
1242 nsb r5, r1
1243 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
1244 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
1245 ldx.ub r20, r21, r19 // u0.8
1246 shari r25, 32, r25   // normalize to s2.30
1247 shlli r21, 1, r21
1248 muls.l r25, r19, r19 // s2.38
1249 ldx.w r20, r21, r21  // s2.14
1250  ptabs r18, tr0
1251 shari r19, 24, r19   // truncate to s2.14
1252 sub r21, r19, r19    // some 11 bit inverse in s1.14
1253 muls.l r19, r19, r21 // u0.28
1254  sub r63, r1, r1
1255  addi r1, 92, r1
1256 muls.l r25, r21, r18 // s2.58
1257 shlli r19, 45, r19   // multiply by two and convert to s2.58
1258  /* bubble */
1259 sub r19, r18, r18
1260 shari r18, 28, r18   // some 22 bit inverse in s1.30
1261 muls.l r18, r25, r0  // s2.60
1262  muls.l r18, r4, r25 // s32.30
1263  /* bubble */
1264 shari r0, 16, r19   // s-16.44
1265 muls.l r19, r18, r19 // s-16.74
1266  shari r25, 63, r0
1267  shari r4, 14, r18   // s19.-14
1268 shari r19, 30, r19   // s-16.44
1269 muls.l r19, r18, r19 // s15.30
1270  xor r21, r0, r21    // You could also use the constant 1 << 27.
1271  add r21, r25, r21
1272 sub r21, r19, r21
1273 shard r21, r1, r21
1274 sub r21, r0, r0
1275 blink tr0, r63
1276#ifndef __pic__
1277	ENDFUNC(GLOBAL(sdivsi3))
1278#endif
1279	ENDFUNC(GLOBAL(sdivsi3_2))
1280#endif
1281#elif __SHMEDIA__
1282/* m5compact-nofpu */
1283 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1284	.mode	SHmedia
1285	.section	.text..SHmedia32,"ax"
1286	.align	2
1287	FUNC(GLOBAL(sdivsi3))
1288GLOBAL(sdivsi3):
1289	pt/l LOCAL(sdivsi3_dontsub), tr0
1290	pt/l LOCAL(sdivsi3_loop), tr1
1291	ptabs/l r18,tr2
1292	shari.l r4,31,r18
1293	shari.l r5,31,r19
1294	xor r4,r18,r20
1295	xor r5,r19,r21
1296	sub.l r20,r18,r20
1297	sub.l r21,r19,r21
1298	xor r18,r19,r19
1299	shlli r21,32,r25
1300	addi r25,-1,r21
1301	addz.l r20,r63,r20
1302LOCAL(sdivsi3_loop):
1303	shlli r20,1,r20
1304	bgeu/u r21,r20,tr0
1305	sub r20,r21,r20
1306LOCAL(sdivsi3_dontsub):
1307	addi.l r25,-1,r25
1308	bnei r25,-32,tr1
1309	xor r20,r19,r20
1310	sub.l r20,r19,r0
1311	blink tr2,r63
1312	ENDFUNC(GLOBAL(sdivsi3))
1313#else /* ! __SHMEDIA__ */
1314	FUNC(GLOBAL(sdivsi3))
1315GLOBAL(sdivsi3):
1316	mov	r4,r1
1317	mov	r5,r0
1318
1319	tst	r0,r0
1320	bt	div0
1321	mov	#0,r2
1322	div0s	r2,r1
1323	subc	r3,r3
1324	subc	r2,r1
1325	div0s	r0,r3
1326	rotcl	r1
1327	div1	r0,r3
1328	rotcl	r1
1329	div1	r0,r3
1330	rotcl	r1
1331	div1	r0,r3
1332	rotcl	r1
1333	div1	r0,r3
1334	rotcl	r1
1335	div1	r0,r3
1336	rotcl	r1
1337	div1	r0,r3
1338	rotcl	r1
1339	div1	r0,r3
1340	rotcl	r1
1341	div1	r0,r3
1342	rotcl	r1
1343	div1	r0,r3
1344	rotcl	r1
1345	div1	r0,r3
1346	rotcl	r1
1347	div1	r0,r3
1348	rotcl	r1
1349	div1	r0,r3
1350	rotcl	r1
1351	div1	r0,r3
1352	rotcl	r1
1353	div1	r0,r3
1354	rotcl	r1
1355	div1	r0,r3
1356	rotcl	r1
1357	div1	r0,r3
1358	rotcl	r1
1359	div1	r0,r3
1360	rotcl	r1
1361	div1	r0,r3
1362	rotcl	r1
1363	div1	r0,r3
1364	rotcl	r1
1365	div1	r0,r3
1366	rotcl	r1
1367	div1	r0,r3
1368	rotcl	r1
1369	div1	r0,r3
1370	rotcl	r1
1371	div1	r0,r3
1372	rotcl	r1
1373	div1	r0,r3
1374	rotcl	r1
1375	div1	r0,r3
1376	rotcl	r1
1377	div1	r0,r3
1378	rotcl	r1
1379	div1	r0,r3
1380	rotcl	r1
1381	div1	r0,r3
1382	rotcl	r1
1383	div1	r0,r3
1384	rotcl	r1
1385	div1	r0,r3
1386	rotcl	r1
1387	div1	r0,r3
1388	rotcl	r1
1389	div1	r0,r3
1390	rotcl	r1
1391	addc	r2,r1
1392	rts
1393	mov	r1,r0
1394
1395
1396div0:	rts
1397	mov	#0,r0
1398
1399	ENDFUNC(GLOBAL(sdivsi3))
1400#endif /* ! __SHMEDIA__  */
1401#endif /* L_sdivsi3  */
1402
1403/*------------------------------------------------------------------------------
1404  32 bit unsigned integer division that uses FPU double precision division.  */
1405
1406#ifdef L_udivsi3_i4
1407	.title "SH DIVIDE"
1408
1409#if defined (__SH4__) || defined (__SH2A__)
1410/* This variant is used when FPSCR.PR = 1 (double precision) is the default
1411   setting.
1412   Args in r4 and r5, result in fpul,
1413   clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit  */
1414
1415	.global	GLOBAL(udivsi3_i4)
1416	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1417GLOBAL(udivsi3_i4):
1418	mov	#1,r1
1419	cmp/hi	r1,r5
1420	bf/s	trivial
1421	rotr	r1
1422	xor	r1,r4
1423	lds	r4,fpul
1424	mova	L1,r0
1425#ifdef FMOVD_WORKS
1426	fmov.d	@r0+,dr4
1427#else
1428	fmov.s	@r0+,DR40
1429	fmov.s	@r0,DR41
1430#endif
1431	float	fpul,dr0
1432	xor	r1,r5
1433	lds	r5,fpul
1434	float	fpul,dr2
1435	fadd	dr4,dr0
1436	fadd	dr4,dr2
1437	fdiv	dr2,dr0
1438	rts
1439	ftrc	dr0,fpul
1440
1441trivial:
1442	rts
1443	lds	r4,fpul
1444
1445	.align 2
1446#ifdef FMOVD_WORKS
1447	.align 3	// Make the double below 8 byte aligned.
1448#endif
1449L1:
1450	.double 2147483648
1451
1452	ENDFUNC(GLOBAL(udivsi3_i4))
1453
1454#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__)
1455#if ! __SH5__ || __SH5__ == 32
1456!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1457	.mode	SHmedia
1458	.global	GLOBAL(udivsi3_i4)
1459	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1460GLOBAL(udivsi3_i4):
1461	addz.l	r4,r63,r20
1462	addz.l	r5,r63,r21
1463	fmov.qd	r20,dr0
1464	fmov.qd	r21,dr32
1465	ptabs	r18,tr0
1466	float.qd dr0,dr0
1467	float.qd dr32,dr32
1468	fdiv.d	dr0,dr32,dr0
1469	ftrc.dq dr0,dr32
1470	fmov.s fr33,fr32
1471	blink tr0,r63
1472
1473	ENDFUNC(GLOBAL(udivsi3_i4))
1474#endif /* ! __SH5__ || __SH5__ == 32 */
1475
1476#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1477/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1478   setting.
1479   Args in r4 and r5, result in fpul,
1480   clobber r0, r1, r4, r5, dr0, dr2, dr4.
1481   For this to work, we must temporarily switch the FPU do double precision,
1482   but we better do not touch FPSCR.FR.  See PR 6526.  */
1483
1484	.global	GLOBAL(udivsi3_i4)
1485	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1486GLOBAL(udivsi3_i4):
1487
1488#ifndef __SH4A__
1489	mov	#1,r1
1490	cmp/hi	r1,r5
1491	bf/s	trivial
1492	rotr	r1		// r1 = 1 << 31
1493	sts.l	fpscr,@-r15
1494	xor	r1,r4
1495	mov.l	@(0,r15),r0
1496	xor	r1,r5
1497	mov.l	L2,r1
1498	lds	r4,fpul
1499	or	r0,r1
1500	mova	L1,r0
1501	lds	r1,fpscr
1502#ifdef FMOVD_WORKS
1503	fmov.d	@r0+,dr4
1504#else
1505	fmov.s	@r0+,DR40
1506	fmov.s	@r0,DR41
1507#endif
1508	float	fpul,dr0
1509	lds	r5,fpul
1510	float	fpul,dr2
1511	fadd	dr4,dr0
1512	fadd	dr4,dr2
1513	fdiv	dr2,dr0
1514	ftrc	dr0,fpul
1515	rts
1516	lds.l	@r15+,fpscr
1517
1518#ifdef FMOVD_WORKS
1519	.align 3	// Make the double below 8 byte aligned.
1520#endif
1521trivial:
1522	rts
1523	lds	r4,fpul
1524
1525	.align 2
1526L2:
1527#ifdef FMOVD_WORKS
1528	.long 0x180000	// FPSCR.PR = 1, FPSCR.SZ = 1
1529#else
1530	.long 0x80000	// FPSCR.PR = 1
1531#endif
1532L1:
1533	.double 2147483648
1534
1535#else
1536/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.
1537   Although on SH4A fmovd usually works, it would require either additional
1538   two fschg instructions or an FPSCR push + pop.  It's not worth the effort
1539   for loading only one double constant.  */
1540	mov	#1,r1
1541	cmp/hi	r1,r5
1542	bf/s	trivial
1543	rotr	r1		// r1 = 1 << 31
1544	fpchg
1545	mova	L1,r0
1546	xor	r1,r4
1547	fmov.s	@r0+,DR40
1548	lds	r4,fpul
1549	fmov.s	@r0,DR41
1550	xor	r1,r5
1551	float	fpul,dr0
1552	lds	r5,fpul
1553	float	fpul,dr2
1554	fadd	dr4,dr0
1555	fadd	dr4,dr2
1556	fdiv	dr2,dr0
1557	ftrc	dr0,fpul
1558	rts
1559	fpchg
1560
1561trivial:
1562	rts
1563	lds	r4,fpul
1564
1565	.align 2
1566L1:
1567	.double 2147483648
1568
1569#endif /* __SH4A__  */
1570
1571
1572	ENDFUNC(GLOBAL(udivsi3_i4))
1573#endif /* ! __SH4__ */
1574#endif /* L_udivsi3_i4  */
1575
1576#ifdef L_udivsi3
1577/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1578   sh2e/sh3e code.  */
1579
1580!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1581	.global	GLOBAL(udivsi3)
1582	HIDDEN_FUNC(GLOBAL(udivsi3))
1583
1584#if __SHMEDIA__
1585#if __SH5__ == 32
1586	.section	.text..SHmedia32,"ax"
1587#else
1588	.text
1589#endif
1590	.align	2
1591#if 0
1592/* The assembly code that follows is a hand-optimized version of the C
1593   code that follows.  Note that the registers that are modified are
1594   exactly those listed as clobbered in the patterns udivsi3_i1 and
1595   udivsi3_i1_media.
1596
1597unsigned
1598__udivsi3 (i, j)
1599    unsigned i, j;
1600{
1601  register unsigned long long r0 asm ("r0") = 0;
1602  register unsigned long long r18 asm ("r18") = 1;
1603  register unsigned long long r4 asm ("r4") = i;
1604  register unsigned long long r19 asm ("r19") = j;
1605
1606  r19 <<= 31;
1607  r18 <<= 31;
1608  do
1609    if (r4 >= r19)
1610      r0 |= r18, r4 -= r19;
1611  while (r19 >>= 1, r18 >>= 1);
1612
1613  return r0;
1614}
1615*/
1616GLOBAL(udivsi3):
1617	pt/l	LOCAL(udivsi3_dontadd), tr2
1618	pt/l	LOCAL(udivsi3_loop), tr1
1619	ptabs/l	r18, tr0
1620	movi	0, r0
1621	movi	1, r18
1622	addz.l	r5, r63, r19
1623	addz.l	r4, r63, r4
1624	shlli	r19, 31, r19
1625	shlli	r18, 31, r18
1626LOCAL(udivsi3_loop):
1627	bgtu	r19, r4, tr2
1628	or	r0, r18, r0
1629	sub	r4, r19, r4
1630LOCAL(udivsi3_dontadd):
1631	shlri	r18, 1, r18
1632	shlri	r19, 1, r19
1633	bnei	r18, 0, tr1
1634	blink	tr0, r63
1635#else
1636GLOBAL(udivsi3):
1637 // inputs: r4,r5
1638 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1639 // result in r0.
1640 addz.l r5,r63,r22
1641 nsb r22,r0
1642 shlld r22,r0,r25
1643 shlri r25,48,r25
1644 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1645 sub r20,r25,r21
1646 mmulfx.w r21,r21,r19
1647 mshflo.w r21,r63,r21
1648 ptabs r18,tr0
1649 mmulfx.w r25,r19,r19
1650 sub r20,r0,r0
1651 /* bubble */
1652 msub.w r21,r19,r19
1653 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1654		    before the msub.w, but we need a different value for
1655		    r19 to keep errors under control.  */
1656 mulu.l r4,r21,r18
1657 mmulfx.w r19,r19,r19
1658 shlli r21,15,r21
1659 shlrd r18,r0,r18
1660 mulu.l r18,r22,r20
1661 mmacnfx.wl r25,r19,r21
1662 /* bubble */
1663 sub r4,r20,r25
1664
1665 mulu.l r25,r21,r19
1666 addi r0,14,r0
1667 /* bubble */
1668 shlrd r19,r0,r19
1669 mulu.l r19,r22,r20
1670 add r18,r19,r18
1671 /* bubble */
1672 sub.l r25,r20,r25
1673
1674 mulu.l r25,r21,r19
1675 addz.l r25,r63,r25
1676 sub r25,r22,r25
1677 shlrd r19,r0,r19
1678 mulu.l r19,r22,r20
1679 addi r25,1,r25
1680 add r18,r19,r18
1681
1682 cmpgt r25,r20,r25
1683 add.l r18,r25,r0
1684 blink tr0,r63
1685#endif
1686#elif __SHMEDIA__
1687/* m5compact-nofpu - more emphasis on code size than on speed, but don't
1688   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1689   So use a short shmedia loop.  */
1690 // clobbered: r20,r21,r25,tr0,tr1,tr2
1691	.mode	SHmedia
1692	.section	.text..SHmedia32,"ax"
1693	.align	2
1694GLOBAL(udivsi3):
1695 pt/l LOCAL(udivsi3_dontsub), tr0
1696 pt/l LOCAL(udivsi3_loop), tr1
1697 ptabs/l r18,tr2
1698 shlli r5,32,r25
1699 addi r25,-1,r21
1700 addz.l r4,r63,r20
1701LOCAL(udivsi3_loop):
1702 shlli r20,1,r20
1703 bgeu/u r21,r20,tr0
1704 sub r20,r21,r20
1705LOCAL(udivsi3_dontsub):
1706 addi.l r25,-1,r25
1707 bnei r25,-32,tr1
1708 add.l r20,r63,r0
1709 blink tr2,r63
1710#else /* ! __SHMEDIA__ */
1711LOCAL(div8):
1712 div1 r5,r4
1713LOCAL(div7):
1714 div1 r5,r4; div1 r5,r4; div1 r5,r4
1715 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1716
1717LOCAL(divx4):
1718 div1 r5,r4; rotcl r0
1719 div1 r5,r4; rotcl r0
1720 div1 r5,r4; rotcl r0
1721 rts; div1 r5,r4
1722
1723GLOBAL(udivsi3):
1724 sts.l pr,@-r15
1725 extu.w r5,r0
1726 cmp/eq r5,r0
1727#ifdef __sh1__
1728 bf LOCAL(large_divisor)
1729#else
1730 bf/s LOCAL(large_divisor)
1731#endif
1732 div0u
1733 swap.w r4,r0
1734 shlr16 r4
1735 bsr LOCAL(div8)
1736 shll16 r5
1737 bsr LOCAL(div7)
1738 div1 r5,r4
1739 xtrct r4,r0
1740 xtrct r0,r4
1741 bsr LOCAL(div8)
1742 swap.w r4,r4
1743 bsr LOCAL(div7)
1744 div1 r5,r4
1745 lds.l @r15+,pr
1746 xtrct r4,r0
1747 swap.w r0,r0
1748 rotcl r0
1749 rts
1750 shlr16 r5
1751
1752LOCAL(large_divisor):
1753#ifdef __sh1__
1754 div0u
1755#endif
1756 mov #0,r0
1757 xtrct r4,r0
1758 xtrct r0,r4
1759 bsr LOCAL(divx4)
1760 rotcl r0
1761 bsr LOCAL(divx4)
1762 rotcl r0
1763 bsr LOCAL(divx4)
1764 rotcl r0
1765 bsr LOCAL(divx4)
1766 rotcl r0
1767 lds.l @r15+,pr
1768 rts
1769 rotcl r0
1770
1771	ENDFUNC(GLOBAL(udivsi3))
1772#endif /* ! __SHMEDIA__ */
1773#endif /* L_udivsi3 */
1774
1775#ifdef L_udivdi3
1776#if __SHMEDIA__
1777	.mode	SHmedia
1778	.section	.text..SHmedia32,"ax"
1779	.align	2
1780	.global	GLOBAL(udivdi3)
1781	FUNC(GLOBAL(udivdi3))
1782GLOBAL(udivdi3):
1783	HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1784	shlri r3,1,r4
1785	nsb r4,r22
1786	shlld r3,r22,r6
1787	shlri r6,49,r5
1788	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1789	sub r21,r5,r1
1790	mmulfx.w r1,r1,r4
1791	mshflo.w r1,r63,r1
1792	sub r63,r22,r20 // r63 == 64 % 64
1793	mmulfx.w r5,r4,r4
1794	pta LOCAL(large_divisor),tr0
1795	addi r20,32,r9
1796	msub.w r1,r4,r1
1797	madd.w r1,r1,r1
1798	mmulfx.w r1,r1,r4
1799	shlri r6,32,r7
1800	bgt/u r9,r63,tr0 // large_divisor
1801	mmulfx.w r5,r4,r4
1802	shlri r2,32+14,r19
1803	addi r22,-31,r0
1804	msub.w r1,r4,r1
1805
1806	mulu.l r1,r7,r4
1807	addi r1,-3,r5
1808	mulu.l r5,r19,r5
1809	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1810	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1811	                 the case may be, %0000000000000000 000.11111111111, still */
1812	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1813	mulu.l r5,r3,r8
1814	mshalds.l r1,r21,r1
1815	shari r4,26,r4
1816	shlld r8,r0,r8
1817	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1818	sub r2,r8,r2
1819	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1820
1821	shlri r2,22,r21
1822	mulu.l r21,r1,r21
1823	shlld r5,r0,r8
1824	addi r20,30-22,r0
1825	shlrd r21,r0,r21
1826	mulu.l r21,r3,r5
1827	add r8,r21,r8
1828	mcmpgt.l r21,r63,r21 // See Note 1
1829	addi r20,30,r0
1830	mshfhi.l r63,r21,r21
1831	sub r2,r5,r2
1832	andc r2,r21,r2
1833
1834	/* small divisor: need a third divide step */
1835	mulu.l r2,r1,r7
1836	ptabs r18,tr0
1837	addi r2,1,r2
1838	shlrd r7,r0,r7
1839	mulu.l r7,r3,r5
1840	add r8,r7,r8
1841	sub r2,r3,r2
1842	cmpgt r2,r5,r5
1843	add r8,r5,r2
1844	/* could test r3 here to check for divide by zero.  */
1845	blink tr0,r63
1846
1847LOCAL(large_divisor):
1848	mmulfx.w r5,r4,r4
1849	shlrd r2,r9,r25
1850	shlri r25,32,r8
1851	msub.w r1,r4,r1
1852
1853	mulu.l r1,r7,r4
1854	addi r1,-3,r5
1855	mulu.l r5,r8,r5
1856	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1857	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1858	                 the case may be, %0000000000000000 000.11111111111, still */
1859	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1860	shlri r5,14-1,r8
1861	mulu.l r8,r7,r5
1862	mshalds.l r1,r21,r1
1863	shari r4,26,r4
1864	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1865	sub r25,r5,r25
1866	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1867
1868	shlri r25,22,r21
1869	mulu.l r21,r1,r21
1870	pta LOCAL(no_lo_adj),tr0
1871	addi r22,32,r0
1872	shlri r21,40,r21
1873	mulu.l r21,r7,r5
1874	add r8,r21,r8
1875	shlld r2,r0,r2
1876	sub r25,r5,r25
1877	bgtu/u r7,r25,tr0 // no_lo_adj
1878	addi r8,1,r8
1879	sub r25,r7,r25
1880LOCAL(no_lo_adj):
1881	mextr4 r2,r25,r2
1882
1883	/* large_divisor: only needs a few adjustments.  */
1884	mulu.l r8,r6,r5
1885	ptabs r18,tr0
1886	/* bubble */
1887	cmpgtu r5,r2,r5
1888	sub r8,r5,r2
1889	blink tr0,r63
1890	ENDFUNC(GLOBAL(udivdi3))
1891/* Note 1: To shift the result of the second divide stage so that the result
1892   always fits into 32 bits, yet we still reduce the rest sufficiently
1893   would require a lot of instructions to do the shifts just right.  Using
1894   the full 64 bit shift result to multiply with the divisor would require
1895   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1896   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1897   know that the rest after taking this partial result into account will
1898   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1899   upper 32 bits of the partial result are nonzero.  */
1900#endif /* __SHMEDIA__ */
1901#endif /* L_udivdi3 */
1902
1903#ifdef L_divdi3
1904#if __SHMEDIA__
1905	.mode	SHmedia
1906	.section	.text..SHmedia32,"ax"
1907	.align	2
1908	.global	GLOBAL(divdi3)
1909	FUNC(GLOBAL(divdi3))
1910GLOBAL(divdi3):
1911	pta GLOBAL(udivdi3_internal),tr0
1912	shari r2,63,r22
1913	shari r3,63,r23
1914	xor r2,r22,r2
1915	xor r3,r23,r3
1916	sub r2,r22,r2
1917	sub r3,r23,r3
1918	beq/u r22,r23,tr0
1919	ptabs r18,tr1
1920	blink tr0,r18
1921	sub r63,r2,r2
1922	blink tr1,r63
1923	ENDFUNC(GLOBAL(divdi3))
1924#endif /* __SHMEDIA__ */
1925#endif /* L_divdi3 */
1926
1927#ifdef L_umoddi3
1928#if __SHMEDIA__
1929	.mode	SHmedia
1930	.section	.text..SHmedia32,"ax"
1931	.align	2
1932	.global	GLOBAL(umoddi3)
1933	FUNC(GLOBAL(umoddi3))
1934GLOBAL(umoddi3):
1935	HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1936	shlri r3,1,r4
1937	nsb r4,r22
1938	shlld r3,r22,r6
1939	shlri r6,49,r5
1940	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1941	sub r21,r5,r1
1942	mmulfx.w r1,r1,r4
1943	mshflo.w r1,r63,r1
1944	sub r63,r22,r20 // r63 == 64 % 64
1945	mmulfx.w r5,r4,r4
1946	pta LOCAL(large_divisor),tr0
1947	addi r20,32,r9
1948	msub.w r1,r4,r1
1949	madd.w r1,r1,r1
1950	mmulfx.w r1,r1,r4
1951	shlri r6,32,r7
1952	bgt/u r9,r63,tr0 // large_divisor
1953	mmulfx.w r5,r4,r4
1954	shlri r2,32+14,r19
1955	addi r22,-31,r0
1956	msub.w r1,r4,r1
1957
1958	mulu.l r1,r7,r4
1959	addi r1,-3,r5
1960	mulu.l r5,r19,r5
1961	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1962	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1963	                 the case may be, %0000000000000000 000.11111111111, still */
1964	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1965	mulu.l r5,r3,r5
1966	mshalds.l r1,r21,r1
1967	shari r4,26,r4
1968	shlld r5,r0,r5
1969	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1970	sub r2,r5,r2
1971	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1972
1973	shlri r2,22,r21
1974	mulu.l r21,r1,r21
1975	addi r20,30-22,r0
1976	/* bubble */ /* could test r3 here to check for divide by zero.  */
1977	shlrd r21,r0,r21
1978	mulu.l r21,r3,r5
1979	mcmpgt.l r21,r63,r21 // See Note 1
1980	addi r20,30,r0
1981	mshfhi.l r63,r21,r21
1982	sub r2,r5,r2
1983	andc r2,r21,r2
1984
1985	/* small divisor: need a third divide step */
1986	mulu.l r2,r1,r7
1987	ptabs r18,tr0
1988	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1989	shlrd r7,r0,r7
1990	mulu.l r7,r3,r5
1991	/* bubble */
1992	addi r8,1,r7
1993	cmpgt r7,r5,r7
1994	cmvne r7,r8,r2
1995	sub r2,r5,r2
1996	blink tr0,r63
1997
1998LOCAL(large_divisor):
1999	mmulfx.w r5,r4,r4
2000	shlrd r2,r9,r25
2001	shlri r25,32,r8
2002	msub.w r1,r4,r1
2003
2004	mulu.l r1,r7,r4
2005	addi r1,-3,r5
2006	mulu.l r5,r8,r5
2007	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
2008	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
2009	                 the case may be, %0000000000000000 000.11111111111, still */
2010	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
2011	shlri r5,14-1,r8
2012	mulu.l r8,r7,r5
2013	mshalds.l r1,r21,r1
2014	shari r4,26,r4
2015	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
2016	sub r25,r5,r25
2017	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
2018
2019	shlri r25,22,r21
2020	mulu.l r21,r1,r21
2021	pta LOCAL(no_lo_adj),tr0
2022	addi r22,32,r0
2023	shlri r21,40,r21
2024	mulu.l r21,r7,r5
2025	add r8,r21,r8
2026	shlld r2,r0,r2
2027	sub r25,r5,r25
2028	bgtu/u r7,r25,tr0 // no_lo_adj
2029	addi r8,1,r8
2030	sub r25,r7,r25
2031LOCAL(no_lo_adj):
2032	mextr4 r2,r25,r2
2033
2034	/* large_divisor: only needs a few adjustments.  */
2035	mulu.l r8,r6,r5
2036	ptabs r18,tr0
2037	add r2,r6,r7
2038	cmpgtu r5,r2,r8
2039	cmvne r8,r7,r2
2040	sub r2,r5,r2
2041	shlrd r2,r22,r2
2042	blink tr0,r63
2043	ENDFUNC(GLOBAL(umoddi3))
2044/* Note 1: To shift the result of the second divide stage so that the result
2045   always fits into 32 bits, yet we still reduce the rest sufficiently
2046   would require a lot of instructions to do the shifts just right.  Using
2047   the full 64 bit shift result to multiply with the divisor would require
2048   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
2049   Fortunately, if the upper 32 bits of the shift result are nonzero, we
2050   know that the rest after taking this partial result into account will
2051   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
2052   upper 32 bits of the partial result are nonzero.  */
2053#endif /* __SHMEDIA__ */
2054#endif /* L_umoddi3 */
2055
2056#ifdef L_moddi3
2057#if __SHMEDIA__
2058	.mode	SHmedia
2059	.section	.text..SHmedia32,"ax"
2060	.align	2
2061	.global	GLOBAL(moddi3)
2062	FUNC(GLOBAL(moddi3))
2063GLOBAL(moddi3):
2064	pta GLOBAL(umoddi3_internal),tr0
2065	shari r2,63,r22
2066	shari r3,63,r23
2067	xor r2,r22,r2
2068	xor r3,r23,r3
2069	sub r2,r22,r2
2070	sub r3,r23,r3
2071	beq/u r22,r63,tr0
2072	ptabs r18,tr1
2073	blink tr0,r18
2074	sub r63,r2,r2
2075	blink tr1,r63
2076	ENDFUNC(GLOBAL(moddi3))
2077#endif /* __SHMEDIA__ */
2078#endif /* L_moddi3 */
2079
2080#ifdef L_set_fpscr
2081#if !defined (__SH2A_NOFPU__)
2082#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
2083#ifdef __SH5__
2084	.mode	SHcompact
2085#endif
2086	.global GLOBAL(set_fpscr)
2087	HIDDEN_FUNC(GLOBAL(set_fpscr))
2088GLOBAL(set_fpscr):
2089	lds r4,fpscr
2090#ifdef __PIC__
2091	mov.l	r12,@-r15
2092#ifdef __vxworks
2093	mov.l	LOCAL(set_fpscr_L0_base),r12
2094	mov.l	LOCAL(set_fpscr_L0_index),r0
2095	mov.l	@r12,r12
2096	mov.l	@(r0,r12),r12
2097#else
2098	mova	LOCAL(set_fpscr_L0),r0
2099	mov.l	LOCAL(set_fpscr_L0),r12
2100	add	r0,r12
2101#endif
2102	mov.l	LOCAL(set_fpscr_L1),r0
2103	mov.l	@(r0,r12),r1
2104	mov.l	@r15+,r12
2105#else
2106	mov.l LOCAL(set_fpscr_L1),r1
2107#endif
2108	swap.w r4,r0
2109	or #24,r0
2110#ifndef FMOVD_WORKS
2111	xor #16,r0
2112#endif
2113#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2114	swap.w r0,r3
2115	mov.l r3,@(4,r1)
2116#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2117	swap.w r0,r2
2118	mov.l r2,@r1
2119#endif
2120#ifndef FMOVD_WORKS
2121	xor #8,r0
2122#else
2123	xor #24,r0
2124#endif
2125#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2126	swap.w r0,r2
2127	rts
2128	mov.l r2,@r1
2129#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2130	swap.w r0,r3
2131	rts
2132	mov.l r3,@(4,r1)
2133#endif
2134	.align 2
2135#ifdef __PIC__
2136#ifdef __vxworks
2137LOCAL(set_fpscr_L0_base):
2138	.long ___GOTT_BASE__
2139LOCAL(set_fpscr_L0_index):
2140	.long ___GOTT_INDEX__
2141#else
2142LOCAL(set_fpscr_L0):
2143	.long _GLOBAL_OFFSET_TABLE_
2144#endif
2145LOCAL(set_fpscr_L1):
2146	.long GLOBAL(fpscr_values@GOT)
2147#else
2148LOCAL(set_fpscr_L1):
2149	.long GLOBAL(fpscr_values)
2150#endif
2151
2152	ENDFUNC(GLOBAL(set_fpscr))
2153#ifndef NO_FPSCR_VALUES
2154#ifdef __ELF__
2155        .comm   GLOBAL(fpscr_values),8,4
2156#else
2157        .comm   GLOBAL(fpscr_values),8
2158#endif /* ELF */
2159#endif /* NO_FPSCR_VALUES */
2160#endif /* SH2E / SH3E / SH4 */
2161#endif /* __SH2A_NOFPU__ */
2162#endif /* L_set_fpscr */
2163#ifdef L_ic_invalidate
2164#if __SH5__ == 32
2165	.mode	SHmedia
2166	.section	.text..SHmedia32,"ax"
2167	.align	2
2168	.global	GLOBAL(init_trampoline)
2169	HIDDEN_FUNC(GLOBAL(init_trampoline))
2170GLOBAL(init_trampoline):
2171	st.l	r0,8,r2
2172#ifdef __LITTLE_ENDIAN__
2173	movi	9,r20
2174	shori	0x402b,r20
2175	shori	0xd101,r20
2176	shori	0xd002,r20
2177#else
2178	movi	0xffffffffffffd002,r20
2179	shori	0xd101,r20
2180	shori	0x402b,r20
2181	shori	9,r20
2182#endif
2183	st.q	r0,0,r20
2184	st.l	r0,12,r3
2185	ENDFUNC(GLOBAL(init_trampoline))
2186	.global	GLOBAL(ic_invalidate)
2187	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2188GLOBAL(ic_invalidate):
2189	ocbwb	r0,0
2190	synco
2191	icbi	r0, 0
2192	ptabs	r18, tr0
2193	synci
2194	blink	tr0, r63
2195	ENDFUNC(GLOBAL(ic_invalidate))
2196#elif defined(__SH4A__)
2197	.global GLOBAL(ic_invalidate)
2198	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2199GLOBAL(ic_invalidate):
2200	ocbwb	@r4
2201	synco
2202	icbi	@r4
2203	rts
2204	  nop
2205	ENDFUNC(GLOBAL(ic_invalidate))
2206#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2207	/* For system code, we use ic_invalidate_line_i, but user code
2208	   needs a different mechanism.  A kernel call is generally not
2209	   available, and it would also be slow.  Different SH4 variants use
2210	   different sizes and associativities of the Icache.  We use a small
2211	   bit of dispatch code that can be put hidden in every shared object,
2212	   which calls the actual processor-specific invalidation code in a
2213	   separate module.
2214	   Or if you have operating system support, the OS could mmap the
2215	   procesor-specific code from a single page, since it is highly
2216	   repetitive.  */
2217	.global GLOBAL(ic_invalidate)
2218	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2219GLOBAL(ic_invalidate):
2220#ifdef __pic__
2221#ifdef __vxworks
2222	mov.l	1f,r1
2223	mov.l	2f,r0
2224	mov.l	@r1,r1
2225	mov.l	0f,r2
2226	mov.l	@(r0,r1),r0
2227#else
2228	mov.l	1f,r1
2229	mova	1f,r0
2230	mov.l	0f,r2
2231	add	r1,r0
2232#endif
2233	mov.l	@(r0,r2),r1
2234#else
2235	mov.l	0f,r1
2236#endif
2237	ocbwb	@r4
2238	mov.l	@(8,r1),r0
2239	sub	r1,r4
2240	and	r4,r0
2241	add	r1,r0
2242	jmp	@r0
2243	mov.l	@(4,r1),r0
2244	.align	2
2245#ifndef __pic__
22460:	.long   GLOBAL(ic_invalidate_array)
2247#else /* __pic__ */
2248	.global GLOBAL(ic_invalidate_array)
22490:	.long   GLOBAL(ic_invalidate_array)@GOT
2250#ifdef __vxworks
22511:	.long	___GOTT_BASE__
22522:	.long	___GOTT_INDEX__
2253#else
22541:	.long   _GLOBAL_OFFSET_TABLE_
2255#endif
2256	ENDFUNC(GLOBAL(ic_invalidate))
2257#endif /* __pic__ */
2258#endif /* SH4 */
2259#endif /* L_ic_invalidate */
2260
2261#ifdef L_ic_invalidate_array
2262#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2263	.global GLOBAL(ic_invalidate_array)
2264	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
2265	.global GLOBAL(ic_invalidate_array)
2266	FUNC(GLOBAL(ic_invalidate_array))
2267GLOBAL(ic_invalidate_array):
2268	add	r1,r4
2269	synco
2270	icbi	@r4
2271	rts
2272	  nop
2273	.align 2
2274	.long	0
2275	ENDFUNC(GLOBAL(ic_invalidate_array))
2276#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2277	.global GLOBAL(ic_invalidate_array)
2278	.p2align 5
2279	FUNC(GLOBAL(ic_invalidate_array))
2280/* This must be aligned to the beginning of a cache line.  */
2281GLOBAL(ic_invalidate_array):
2282#ifndef WAYS
2283#define WAYS 4
2284#define WAY_SIZE 0x4000
2285#endif
2286#if WAYS == 1
2287	.rept	WAY_SIZE * WAYS / 32
2288	rts
2289	nop
2290	.rept	7
2291	.long	WAY_SIZE - 32
2292	.endr
2293	.endr
2294#elif WAYS <= 6
2295	.rept	WAY_SIZE * WAYS / 32
2296	braf	r0
2297	add	#-8,r0
2298	.long	WAY_SIZE + 8
2299	.long	WAY_SIZE - 32
2300	.rept	WAYS-2
2301	braf	r0
2302	nop
2303	.endr
2304	.rept	7 - WAYS
2305	rts
2306	nop
2307	.endr
2308	.endr
2309#else /* WAYS > 6 */
2310	/* This variant needs two different pages for mmap-ing.  */
2311 	.rept	WAYS-1
2312	.rept	WAY_SIZE / 32
2313	braf	r0
2314	nop
2315	.long	WAY_SIZE
2316	.rept 6
2317	.long	WAY_SIZE - 32
2318	.endr
2319	.endr
2320	.endr
2321	.rept	WAY_SIZE / 32
2322	rts
2323	.rept	15
2324	nop
2325	.endr
2326	.endr
2327#endif /* WAYS */
2328	ENDFUNC(GLOBAL(ic_invalidate_array))
2329#endif /* SH4 */
2330#endif /* L_ic_invalidate_array */
2331
2332#if defined (__SH5__) && __SH5__ == 32
2333#ifdef L_shcompact_call_trampoline
2334	.section	.rodata
2335	.align	1
2336LOCAL(ct_main_table):
2337.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2338.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2339.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2340.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2341.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2342.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2343.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2344.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2345.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2346.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2347.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2348.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2349.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2350.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2351.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2352.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2353.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2354.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2355.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2356.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2357.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2358.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2359.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2360.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2361.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2362.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2363.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2364.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2365.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2366.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2367.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2368.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2369.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2370	.mode	SHmedia
2371	.section	.text..SHmedia32, "ax"
2372	.align	2
2373
2374     /* This function loads 64-bit general-purpose registers from the
2375	stack, from a memory address contained in them or from an FP
2376	register, according to a cookie passed in r1.  Its execution
2377	time is linear on the number of registers that actually have
2378	to be copied.  See sh.h for details on the actual bit pattern.
2379
2380	The function to be called is passed in r0.  If a 32-bit return
2381	value is expected, the actual function will be tail-called,
2382	otherwise the return address will be stored in r10 (that the
2383	caller should expect to be clobbered) and the return value
2384	will be expanded into r2/r3 upon return.  */
2385
2386	.global	GLOBAL(GCC_shcompact_call_trampoline)
2387	FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2388GLOBAL(GCC_shcompact_call_trampoline):
2389	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
2390	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2391	pt/l	LOCAL(ct_loop), tr1
2392	addz.l	r1, r63, r1
2393	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2394LOCAL(ct_loop):
2395	nsb	r1, r28
2396	shlli	r28, 1, r29
2397	ldx.w	r0, r29, r30
2398LOCAL(ct_main_label):
2399	ptrel/l	r30, tr2
2400	blink	tr2, r63
2401LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
2402	/* It must be dr0, so just do it.  */
2403	fmov.dq	dr0, r2
2404	movi	7, r30
2405	shlli	r30, 29, r31
2406	andc	r1, r31, r1
2407	blink	tr1, r63
2408LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
2409	/* It is either dr0 or dr2.  */
2410	movi	7, r30
2411	shlri	r1, 26, r32
2412	shlli	r30, 26, r31
2413	andc	r1, r31, r1
2414	fmov.dq	dr0, r3
2415	beqi/l	r32, 4, tr1
2416	fmov.dq	dr2, r3
2417	blink	tr1, r63
2418LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
2419	shlri	r1, 23 - 3, r34
2420	andi	r34, 3 << 3, r33
2421	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2422LOCAL(ct_r4_fp_base):
2423	ptrel/l	r32, tr2
2424	movi	7, r30
2425	shlli	r30, 23, r31
2426	andc	r1, r31, r1
2427	blink	tr2, r63
2428LOCAL(ct_r4_fp_copy):
2429	fmov.dq	dr0, r4
2430	blink	tr1, r63
2431	fmov.dq	dr2, r4
2432	blink	tr1, r63
2433	fmov.dq	dr4, r4
2434	blink	tr1, r63
2435LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
2436	shlri	r1, 20 - 3, r34
2437	andi	r34, 3 << 3, r33
2438	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2439LOCAL(ct_r5_fp_base):
2440	ptrel/l	r32, tr2
2441	movi	7, r30
2442	shlli	r30, 20, r31
2443	andc	r1, r31, r1
2444	blink	tr2, r63
2445LOCAL(ct_r5_fp_copy):
2446	fmov.dq	dr0, r5
2447	blink	tr1, r63
2448	fmov.dq	dr2, r5
2449	blink	tr1, r63
2450	fmov.dq	dr4, r5
2451	blink	tr1, r63
2452	fmov.dq	dr6, r5
2453	blink	tr1, r63
2454LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
2455	/* It must be dr8.  */
2456	fmov.dq	dr8, r6
2457	movi	15, r30
2458	shlli	r30, 16, r31
2459	andc	r1, r31, r1
2460	blink	tr1, r63
2461LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
2462	shlri	r1, 16 - 3, r34
2463	andi	r34, 3 << 3, r33
2464	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2465LOCAL(ct_r6_fp_base):
2466	ptrel/l	r32, tr2
2467	movi	7, r30
2468	shlli	r30, 16, r31
2469	andc	r1, r31, r1
2470	blink	tr2, r63
2471LOCAL(ct_r6_fp_copy):
2472	fmov.dq	dr0, r6
2473	blink	tr1, r63
2474	fmov.dq	dr2, r6
2475	blink	tr1, r63
2476	fmov.dq	dr4, r6
2477	blink	tr1, r63
2478	fmov.dq	dr6, r6
2479	blink	tr1, r63
2480LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
2481	/* It is either dr8 or dr10.  */
2482	movi	15 << 12, r31
2483	shlri	r1, 12, r32
2484	andc	r1, r31, r1
2485	fmov.dq	dr8, r7
2486	beqi/l	r32, 8, tr1
2487	fmov.dq	dr10, r7
2488	blink	tr1, r63
2489LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
2490	shlri	r1, 12 - 3, r34
2491	andi	r34, 3 << 3, r33
2492	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2493LOCAL(ct_r7_fp_base):
2494	ptrel/l	r32, tr2
2495	movi	7 << 12, r31
2496	andc	r1, r31, r1
2497	blink	tr2, r63
2498LOCAL(ct_r7_fp_copy):
2499	fmov.dq	dr0, r7
2500	blink	tr1, r63
2501	fmov.dq	dr2, r7
2502	blink	tr1, r63
2503	fmov.dq	dr4, r7
2504	blink	tr1, r63
2505	fmov.dq	dr6, r7
2506	blink	tr1, r63
2507LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
2508	/* It is either dr8 or dr10.  */
2509	movi	15 << 8, r31
2510	andi	r1, 1 << 8, r32
2511	andc	r1, r31, r1
2512	fmov.dq	dr8, r8
2513	beq/l	r32, r63, tr1
2514	fmov.dq	dr10, r8
2515	blink	tr1, r63
2516LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
2517	shlri	r1, 8 - 3, r34
2518	andi	r34, 3 << 3, r33
2519	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2520LOCAL(ct_r8_fp_base):
2521	ptrel/l	r32, tr2
2522	movi	7 << 8, r31
2523	andc	r1, r31, r1
2524	blink	tr2, r63
2525LOCAL(ct_r8_fp_copy):
2526	fmov.dq	dr0, r8
2527	blink	tr1, r63
2528	fmov.dq	dr2, r8
2529	blink	tr1, r63
2530	fmov.dq	dr4, r8
2531	blink	tr1, r63
2532	fmov.dq	dr6, r8
2533	blink	tr1, r63
2534LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
2535	/* It is either dr8 or dr10.  */
2536	movi	15 << 4, r31
2537	andi	r1, 1 << 4, r32
2538	andc	r1, r31, r1
2539	fmov.dq	dr8, r9
2540	beq/l	r32, r63, tr1
2541	fmov.dq	dr10, r9
2542	blink	tr1, r63
2543LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
2544	shlri	r1, 4 - 3, r34
2545	andi	r34, 3 << 3, r33
2546	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2547LOCAL(ct_r9_fp_base):
2548	ptrel/l	r32, tr2
2549	movi	7 << 4, r31
2550	andc	r1, r31, r1
2551	blink	tr2, r63
2552LOCAL(ct_r9_fp_copy):
2553	fmov.dq	dr0, r9
2554	blink	tr1, r63
2555	fmov.dq	dr2, r9
2556	blink	tr1, r63
2557	fmov.dq	dr4, r9
2558	blink	tr1, r63
2559	fmov.dq	dr6, r9
2560	blink	tr1, r63
2561LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
2562	pt/l	LOCAL(ct_r2_load), tr2
2563	movi	3, r30
2564	shlli	r30, 29, r31
2565	and	r1, r31, r32
2566	andc	r1, r31, r1
2567	beq/l	r31, r32, tr2
2568	addi.l	r2, 8, r3
2569	ldx.q	r2, r63, r2
2570	/* Fall through.  */
2571LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
2572	pt/l	LOCAL(ct_r3_load), tr2
2573	movi	3, r30
2574	shlli	r30, 26, r31
2575	and	r1, r31, r32
2576	andc	r1, r31, r1
2577	beq/l	r31, r32, tr2
2578	addi.l	r3, 8, r4
2579	ldx.q	r3, r63, r3
2580LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
2581	pt/l	LOCAL(ct_r4_load), tr2
2582	movi	3, r30
2583	shlli	r30, 23, r31
2584	and	r1, r31, r32
2585	andc	r1, r31, r1
2586	beq/l	r31, r32, tr2
2587	addi.l	r4, 8, r5
2588	ldx.q	r4, r63, r4
2589LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
2590	pt/l	LOCAL(ct_r5_load), tr2
2591	movi	3, r30
2592	shlli	r30, 20, r31
2593	and	r1, r31, r32
2594	andc	r1, r31, r1
2595	beq/l	r31, r32, tr2
2596	addi.l	r5, 8, r6
2597	ldx.q	r5, r63, r5
2598LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
2599	pt/l	LOCAL(ct_r6_load), tr2
2600	movi	3 << 16, r31
2601	and	r1, r31, r32
2602	andc	r1, r31, r1
2603	beq/l	r31, r32, tr2
2604	addi.l	r6, 8, r7
2605	ldx.q	r6, r63, r6
2606LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
2607	pt/l	LOCAL(ct_r7_load), tr2
2608	movi	3 << 12, r31
2609	and	r1, r31, r32
2610	andc	r1, r31, r1
2611	beq/l	r31, r32, tr2
2612	addi.l	r7, 8, r8
2613	ldx.q	r7, r63, r7
2614LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
2615	pt/l	LOCAL(ct_r8_load), tr2
2616	movi	3 << 8, r31
2617	and	r1, r31, r32
2618	andc	r1, r31, r1
2619	beq/l	r31, r32, tr2
2620	addi.l	r8, 8, r9
2621	ldx.q	r8, r63, r8
2622LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
2623	pt/l	LOCAL(ct_check_tramp), tr2
2624	ldx.q	r9, r63, r9
2625	blink	tr2, r63
2626LOCAL(ct_r2_load):
2627	ldx.q	r2, r63, r2
2628	blink	tr1, r63
2629LOCAL(ct_r3_load):
2630	ldx.q	r3, r63, r3
2631	blink	tr1, r63
2632LOCAL(ct_r4_load):
2633	ldx.q	r4, r63, r4
2634	blink	tr1, r63
2635LOCAL(ct_r5_load):
2636	ldx.q	r5, r63, r5
2637	blink	tr1, r63
2638LOCAL(ct_r6_load):
2639	ldx.q	r6, r63, r6
2640	blink	tr1, r63
2641LOCAL(ct_r7_load):
2642	ldx.q	r7, r63, r7
2643	blink	tr1, r63
2644LOCAL(ct_r8_load):
2645	ldx.q	r8, r63, r8
2646	blink	tr1, r63
2647LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
2648	movi	1, r30
2649	ldx.q	r15, r63, r2
2650	shlli	r30, 29, r31
2651	addi.l	r15, 8, r15
2652	andc	r1, r31, r1
2653	blink	tr1, r63
2654LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
2655	movi	1, r30
2656	ldx.q	r15, r63, r3
2657	shlli	r30, 26, r31
2658	addi.l	r15, 8, r15
2659	andc	r1, r31, r1
2660	blink	tr1, r63
2661LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
2662	movi	1, r30
2663	ldx.q	r15, r63, r4
2664	shlli	r30, 23, r31
2665	addi.l	r15, 8, r15
2666	andc	r1, r31, r1
2667	blink	tr1, r63
2668LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
2669	movi	1, r30
2670	ldx.q	r15, r63, r5
2671	shlli	r30, 20, r31
2672	addi.l	r15, 8, r15
2673	andc	r1, r31, r1
2674	blink	tr1, r63
2675LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
2676	movi	1, r30
2677	ldx.q	r15, r63, r6
2678	shlli	r30, 16, r31
2679	addi.l	r15, 8, r15
2680	andc	r1, r31, r1
2681	blink	tr1, r63
2682LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
2683	ldx.q	r15, r63, r7
2684	movi	1 << 12, r31
2685	addi.l	r15, 8, r15
2686	andc	r1, r31, r1
2687	blink	tr1, r63
2688LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
2689	ldx.q	r15, r63, r8
2690	movi	1 << 8, r31
2691	addi.l	r15, 8, r15
2692	andc	r1, r31, r1
2693	blink	tr1, r63
2694LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
2695	andi	r1, 7 << 1, r30
2696	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2697	shlli	r30, 2, r31
2698	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
2699	sub.l	r32, r31, r33
2700	ptabs/l	r33, tr2
2701	blink	tr2, r63
2702LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
2703	ldx.q	r15, r63, r3
2704	addi.l	r15, 8, r15
2705	ldx.q	r15, r63, r4
2706	addi.l	r15, 8, r15
2707	ldx.q	r15, r63, r5
2708	addi.l	r15, 8, r15
2709	ldx.q	r15, r63, r6
2710	addi.l	r15, 8, r15
2711	ldx.q	r15, r63, r7
2712	addi.l	r15, 8, r15
2713	ldx.q	r15, r63, r8
2714	addi.l	r15, 8, r15
2715LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
2716	ldx.q	r15, r63, r9
2717	addi.l	r15, 8, r15
2718LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
2719LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
2720	pt/u	LOCAL(ct_ret_wide), tr2
2721	andi	r1, 1, r1
2722	bne/u	r1, r63, tr2
2723LOCAL(ct_call_func):	/* Just branch to the function.  */
2724	blink	tr0, r63
2725LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its
2726			   64-bit return value.  */
2727	add.l	r18, r63, r10
2728	blink	tr0, r18
2729	ptabs	r10, tr0
2730#if __LITTLE_ENDIAN__
2731	shari	r2, 32, r3
2732	add.l	r2, r63, r2
2733#else
2734	add.l	r2, r63, r3
2735	shari	r2, 32, r2
2736#endif
2737	blink	tr0, r63
2738
2739	ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2740#endif /* L_shcompact_call_trampoline */
2741
2742#ifdef L_shcompact_return_trampoline
2743     /* This function does the converse of the code in `ret_wide'
2744	above.  It is tail-called by SHcompact functions returning
2745	64-bit non-floating-point values, to pack the 32-bit values in
2746	r2 and r3 into r2.  */
2747
2748	.mode	SHmedia
2749	.section	.text..SHmedia32, "ax"
2750	.align	2
2751	.global	GLOBAL(GCC_shcompact_return_trampoline)
2752	HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2753GLOBAL(GCC_shcompact_return_trampoline):
2754	ptabs/l	r18, tr0
2755#if __LITTLE_ENDIAN__
2756	addz.l	r2, r63, r2
2757	shlli	r3, 32, r3
2758#else
2759	addz.l	r3, r63, r3
2760	shlli	r2, 32, r2
2761#endif
2762	or	r3, r2, r2
2763	blink	tr0, r63
2764
2765	ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2766#endif /* L_shcompact_return_trampoline */
2767
2768#ifdef L_shcompact_incoming_args
2769	.section	.rodata
2770	.align	1
2771LOCAL(ia_main_table):
2772.word	1 /* Invalid, just loop */
2773.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2774.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2775.word	1 /* Invalid, just loop */
2776.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2777.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2778.word	1 /* Invalid, just loop */
2779.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2780.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2781.word	1 /* Invalid, just loop */
2782.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2783.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2784.word	1 /* Invalid, just loop */
2785.word	1 /* Invalid, just loop */
2786.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2787.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2788.word	1 /* Invalid, just loop */
2789.word	1 /* Invalid, just loop */
2790.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2791.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2792.word	1 /* Invalid, just loop */
2793.word	1 /* Invalid, just loop */
2794.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2795.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2796.word	1 /* Invalid, just loop */
2797.word	1 /* Invalid, just loop */
2798.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2799.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2800.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2801.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2802.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2803.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2804.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2805	.mode	SHmedia
2806	.section	.text..SHmedia32, "ax"
2807	.align	2
2808
2809     /* This function stores 64-bit general-purpose registers back in
2810	the stack, and loads the address in which each register
2811	was stored into itself.  The lower 32 bits of r17 hold the address
2812	to begin storing, and the upper 32 bits of r17 hold the cookie.
2813	Its execution time is linear on the
2814	number of registers that actually have to be copied, and it is
2815	optimized for structures larger than 64 bits, as opposed to
2816	individual `long long' arguments.  See sh.h for details on the
2817	actual bit pattern.  */
2818
2819	.global	GLOBAL(GCC_shcompact_incoming_args)
2820 	FUNC(GLOBAL(GCC_shcompact_incoming_args))
2821GLOBAL(GCC_shcompact_incoming_args):
2822	ptabs/l	r18, tr0	/* Prepare to return.  */
2823	shlri	r17, 32, r0	/* Load the cookie.  */
2824	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2825	pt/l	LOCAL(ia_loop), tr1
2826	add.l	r17, r63, r17
2827	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2828LOCAL(ia_loop):
2829	nsb	r0, r36
2830	shlli	r36, 1, r37
2831	ldx.w	r43, r37, r38
2832LOCAL(ia_main_label):
2833	ptrel/l	r38, tr2
2834	blink	tr2, r63
2835LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
2836	movi	3, r38
2837	shlli	r38, 29, r39
2838	and	r0, r39, r40
2839	andc	r0, r39, r0
2840	stx.q	r17, r63, r2
2841	add.l	r17, r63, r2
2842	addi.l	r17, 8, r17
2843	beq/u	r39, r40, tr1
2844LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
2845	movi	3, r38
2846	shlli	r38, 26, r39
2847	and	r0, r39, r40
2848	andc	r0, r39, r0
2849	stx.q	r17, r63, r3
2850	add.l	r17, r63, r3
2851	addi.l	r17, 8, r17
2852	beq/u	r39, r40, tr1
2853LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
2854	movi	3, r38
2855	shlli	r38, 23, r39
2856	and	r0, r39, r40
2857	andc	r0, r39, r0
2858	stx.q	r17, r63, r4
2859	add.l	r17, r63, r4
2860	addi.l	r17, 8, r17
2861	beq/u	r39, r40, tr1
2862LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
2863	movi	3, r38
2864	shlli	r38, 20, r39
2865	and	r0, r39, r40
2866	andc	r0, r39, r0
2867	stx.q	r17, r63, r5
2868	add.l	r17, r63, r5
2869	addi.l	r17, 8, r17
2870	beq/u	r39, r40, tr1
2871LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
2872	movi	3, r38
2873	shlli	r38, 16, r39
2874	and	r0, r39, r40
2875	andc	r0, r39, r0
2876	stx.q	r17, r63, r6
2877	add.l	r17, r63, r6
2878	addi.l	r17, 8, r17
2879	beq/u	r39, r40, tr1
2880LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
2881	movi	3 << 12, r39
2882	and	r0, r39, r40
2883	andc	r0, r39, r0
2884	stx.q	r17, r63, r7
2885	add.l	r17, r63, r7
2886	addi.l	r17, 8, r17
2887	beq/u	r39, r40, tr1
2888LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
2889	movi	3 << 8, r39
2890	and	r0, r39, r40
2891	andc	r0, r39, r0
2892	stx.q	r17, r63, r8
2893	add.l	r17, r63, r8
2894	addi.l	r17, 8, r17
2895	beq/u	r39, r40, tr1
2896LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
2897	stx.q	r17, r63, r9
2898	add.l	r17, r63, r9
2899	blink	tr0, r63
2900LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
2901	movi	1, r38
2902	shlli	r38, 29, r39
2903	andc	r0, r39, r0
2904	stx.q	r17, r63, r2
2905	addi.l	r17, 8, r17
2906	blink	tr1, r63
2907LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
2908	movi	1, r38
2909	shlli	r38, 26, r39
2910	andc	r0, r39, r0
2911	stx.q	r17, r63, r3
2912	addi.l	r17, 8, r17
2913	blink	tr1, r63
2914LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
2915	movi	1, r38
2916	shlli	r38, 23, r39
2917	andc	r0, r39, r0
2918	stx.q	r17, r63, r4
2919	addi.l	r17, 8, r17
2920	blink	tr1, r63
2921LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
2922	movi	1, r38
2923	shlli	r38, 20, r39
2924	andc	r0, r39, r0
2925	stx.q	r17, r63, r5
2926	addi.l	r17, 8, r17
2927	blink	tr1, r63
2928LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
2929	movi	1, r38
2930	shlli	r38, 16, r39
2931	andc	r0, r39, r0
2932	stx.q	r17, r63, r6
2933	addi.l	r17, 8, r17
2934	blink	tr1, r63
2935LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
2936	movi	1 << 12, r39
2937	andc	r0, r39, r0
2938	stx.q	r17, r63, r7
2939	addi.l	r17, 8, r17
2940	blink	tr1, r63
2941LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
2942	movi	1 << 8, r39
2943	andc	r0, r39, r0
2944	stx.q	r17, r63, r8
2945	addi.l	r17, 8, r17
2946	blink	tr1, r63
2947LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
2948	andi	r0, 7 << 1, r38
2949	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2950	shlli	r38, 2, r39
2951	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
2952	sub.l	r40, r39, r41
2953	ptabs/l	r41, tr2
2954	blink	tr2, r63
2955LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
2956	stx.q	r17, r63, r3
2957	addi.l	r17, 8, r17
2958	stx.q	r17, r63, r4
2959	addi.l	r17, 8, r17
2960	stx.q	r17, r63, r5
2961	addi.l	r17, 8, r17
2962	stx.q	r17, r63, r6
2963	addi.l	r17, 8, r17
2964	stx.q	r17, r63, r7
2965	addi.l	r17, 8, r17
2966	stx.q	r17, r63, r8
2967	addi.l	r17, 8, r17
2968LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
2969	stx.q	r17, r63, r9
2970LOCAL(ia_return):	/* Return.  */
2971	blink	tr0, r63
2972LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
2973	ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2974#endif /* L_shcompact_incoming_args */
2975#endif
2976#if __SH5__
2977#ifdef L_nested_trampoline
2978#if __SH5__ == 32
2979	.section	.text..SHmedia32,"ax"
2980#else
2981	.text
2982#endif
2983	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
2984	.global	GLOBAL(GCC_nested_trampoline)
2985	HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2986GLOBAL(GCC_nested_trampoline):
2987	.mode	SHmedia
2988	ptrel/u	r63, tr0
2989	gettr	tr0, r0
2990#if __SH5__ == 64
2991	ld.q	r0, 24, r1
2992#else
2993	ld.l	r0, 24, r1
2994#endif
2995	ptabs/l	r1, tr1
2996#if __SH5__ == 64
2997	ld.q	r0, 32, r1
2998#else
2999	ld.l	r0, 28, r1
3000#endif
3001	blink	tr1, r63
3002
3003	ENDFUNC(GLOBAL(GCC_nested_trampoline))
3004#endif /* L_nested_trampoline */
3005#endif /* __SH5__ */
3006#if __SH5__ == 32
3007#ifdef L_push_pop_shmedia_regs
3008	.section	.text..SHmedia32,"ax"
3009	.mode	SHmedia
3010	.align	2
3011#ifndef __SH4_NOFPU__
3012	.global	GLOBAL(GCC_push_shmedia_regs)
3013	FUNC(GLOBAL(GCC_push_shmedia_regs))
3014GLOBAL(GCC_push_shmedia_regs):
3015	addi.l	r15, -14*8, r15
3016	fst.d	r15, 13*8, dr62
3017	fst.d	r15, 12*8, dr60
3018	fst.d	r15, 11*8, dr58
3019	fst.d	r15, 10*8, dr56
3020	fst.d	r15,  9*8, dr54
3021	fst.d	r15,  8*8, dr52
3022	fst.d	r15,  7*8, dr50
3023	fst.d	r15,  6*8, dr48
3024	fst.d	r15,  5*8, dr46
3025	fst.d	r15,  4*8, dr44
3026	fst.d	r15,  3*8, dr42
3027	fst.d	r15,  2*8, dr40
3028	fst.d	r15,  1*8, dr38
3029	fst.d	r15,  0*8, dr36
3030#else /* ! __SH4_NOFPU__ */
3031	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
3032	FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
3033GLOBAL(GCC_push_shmedia_regs_nofpu):
3034#endif /* ! __SH4_NOFPU__ */
3035	ptabs/l	r18, tr0
3036	addi.l	r15, -27*8, r15
3037	gettr	tr7, r62
3038	gettr	tr6, r61
3039	gettr	tr5, r60
3040	st.q	r15, 26*8, r62
3041	st.q	r15, 25*8, r61
3042	st.q	r15, 24*8, r60
3043	st.q	r15, 23*8, r59
3044	st.q	r15, 22*8, r58
3045	st.q	r15, 21*8, r57
3046	st.q	r15, 20*8, r56
3047	st.q	r15, 19*8, r55
3048	st.q	r15, 18*8, r54
3049	st.q	r15, 17*8, r53
3050	st.q	r15, 16*8, r52
3051	st.q	r15, 15*8, r51
3052	st.q	r15, 14*8, r50
3053	st.q	r15, 13*8, r49
3054	st.q	r15, 12*8, r48
3055	st.q	r15, 11*8, r47
3056	st.q	r15, 10*8, r46
3057	st.q	r15,  9*8, r45
3058	st.q	r15,  8*8, r44
3059	st.q	r15,  7*8, r35
3060	st.q	r15,  6*8, r34
3061	st.q	r15,  5*8, r33
3062	st.q	r15,  4*8, r32
3063	st.q	r15,  3*8, r31
3064	st.q	r15,  2*8, r30
3065	st.q	r15,  1*8, r29
3066	st.q	r15,  0*8, r28
3067	blink	tr0, r63
3068#ifndef __SH4_NOFPU__
3069	ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
3070#else
3071	ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
3072#endif
3073#ifndef __SH4_NOFPU__
3074	.global	GLOBAL(GCC_pop_shmedia_regs)
3075	FUNC(GLOBAL(GCC_pop_shmedia_regs))
3076GLOBAL(GCC_pop_shmedia_regs):
3077	pt	.L0, tr1
3078	movi	41*8, r0
3079	fld.d	r15, 40*8, dr62
3080	fld.d	r15, 39*8, dr60
3081	fld.d	r15, 38*8, dr58
3082	fld.d	r15, 37*8, dr56
3083	fld.d	r15, 36*8, dr54
3084	fld.d	r15, 35*8, dr52
3085	fld.d	r15, 34*8, dr50
3086	fld.d	r15, 33*8, dr48
3087	fld.d	r15, 32*8, dr46
3088	fld.d	r15, 31*8, dr44
3089	fld.d	r15, 30*8, dr42
3090	fld.d	r15, 29*8, dr40
3091	fld.d	r15, 28*8, dr38
3092	fld.d	r15, 27*8, dr36
3093	blink	tr1, r63
3094#else /* ! __SH4_NOFPU__	*/
3095	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
3096	FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3097GLOBAL(GCC_pop_shmedia_regs_nofpu):
3098#endif /* ! __SH4_NOFPU__	*/
3099	movi	27*8, r0
3100.L0:
3101	ptabs	r18, tr0
3102	ld.q	r15, 26*8, r62
3103	ld.q	r15, 25*8, r61
3104	ld.q	r15, 24*8, r60
3105	ptabs	r62, tr7
3106	ptabs	r61, tr6
3107	ptabs	r60, tr5
3108	ld.q	r15, 23*8, r59
3109	ld.q	r15, 22*8, r58
3110	ld.q	r15, 21*8, r57
3111	ld.q	r15, 20*8, r56
3112	ld.q	r15, 19*8, r55
3113	ld.q	r15, 18*8, r54
3114	ld.q	r15, 17*8, r53
3115	ld.q	r15, 16*8, r52
3116	ld.q	r15, 15*8, r51
3117	ld.q	r15, 14*8, r50
3118	ld.q	r15, 13*8, r49
3119	ld.q	r15, 12*8, r48
3120	ld.q	r15, 11*8, r47
3121	ld.q	r15, 10*8, r46
3122	ld.q	r15,  9*8, r45
3123	ld.q	r15,  8*8, r44
3124	ld.q	r15,  7*8, r35
3125	ld.q	r15,  6*8, r34
3126	ld.q	r15,  5*8, r33
3127	ld.q	r15,  4*8, r32
3128	ld.q	r15,  3*8, r31
3129	ld.q	r15,  2*8, r30
3130	ld.q	r15,  1*8, r29
3131	ld.q	r15,  0*8, r28
3132	add.l	r15, r0, r15
3133	blink	tr0, r63
3134
3135#ifndef __SH4_NOFPU__
3136	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3137#else
3138	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3139#endif
3140#endif /* __SH5__ == 32 */
3141#endif /* L_push_pop_shmedia_regs */
3142
3143#ifdef L_div_table
3144#if __SH5__
3145#if defined(__pic__) && __SHMEDIA__
3146	.global	GLOBAL(sdivsi3)
3147	FUNC(GLOBAL(sdivsi3))
3148#if __SH5__ == 32
3149	.section	.text..SHmedia32,"ax"
3150#else
3151	.text
3152#endif
3153#if 0
3154/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3155   in a text section does not work (at least for shared libraries):
3156   the linker sets the LSB of the address as if this was SHmedia code.  */
3157#define TEXT_DATA_BUG
3158#endif
3159	.align	2
3160 // inputs: r4,r5
3161 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3162 // result in r0
3163 .global GLOBAL(sdivsi3)
3164GLOBAL(sdivsi3):
3165#ifdef TEXT_DATA_BUG
3166 ptb datalabel Local_div_table,tr0
3167#else
3168 ptb GLOBAL(div_table_internal),tr0
3169#endif
3170 nsb r5, r1
3171 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
3172 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
3173 /* bubble */
3174 gettr tr0,r20
3175 ldx.ub r20, r21, r19 // u0.8
3176 shari r25, 32, r25   // normalize to s2.30
3177 shlli r21, 1, r21
3178 muls.l r25, r19, r19 // s2.38
3179 ldx.w r20, r21, r21  // s2.14
3180  ptabs r18, tr0
3181 shari r19, 24, r19   // truncate to s2.14
3182 sub r21, r19, r19    // some 11 bit inverse in s1.14
3183 muls.l r19, r19, r21 // u0.28
3184  sub r63, r1, r1
3185  addi r1, 92, r1
3186 muls.l r25, r21, r18 // s2.58
3187 shlli r19, 45, r19   // multiply by two and convert to s2.58
3188  /* bubble */
3189 sub r19, r18, r18
3190 shari r18, 28, r18   // some 22 bit inverse in s1.30
3191 muls.l r18, r25, r0  // s2.60
3192  muls.l r18, r4, r25 // s32.30
3193  /* bubble */
3194 shari r0, 16, r19   // s-16.44
3195 muls.l r19, r18, r19 // s-16.74
3196  shari r25, 63, r0
3197  shari r4, 14, r18   // s19.-14
3198 shari r19, 30, r19   // s-16.44
3199 muls.l r19, r18, r19 // s15.30
3200  xor r21, r0, r21    // You could also use the constant 1 << 27.
3201  add r21, r25, r21
3202 sub r21, r19, r21
3203 shard r21, r1, r21
3204 sub r21, r0, r0
3205 blink tr0, r63
3206	ENDFUNC(GLOBAL(sdivsi3))
3207/* This table has been generated by divtab.c .
3208Defects for bias -330:
3209   Max defect: 6.081536e-07 at -1.000000e+00
3210   Min defect: 2.849516e-08 at 1.030651e+00
3211   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3212   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3213   Defect at 1: 1.238659e-07
3214   Defect at -2: 1.061708e-07 */
3215#else /* ! __pic__ || ! __SHMEDIA__ */
3216	.section	.rodata
3217#endif /* __pic__ */
3218#if defined(TEXT_DATA_BUG) && defined(__pic__) && __SHMEDIA__
3219	.balign 2
3220	.type	Local_div_table,@object
3221	.size	Local_div_table,128
3222/* negative division constants */
3223	.word	-16638
3224	.word	-17135
3225	.word	-17737
3226	.word	-18433
3227	.word	-19103
3228	.word	-19751
3229	.word	-20583
3230	.word	-21383
3231	.word	-22343
3232	.word	-23353
3233	.word	-24407
3234	.word	-25582
3235	.word	-26863
3236	.word	-28382
3237	.word	-29965
3238	.word	-31800
3239/* negative division factors */
3240	.byte	66
3241	.byte	70
3242	.byte	75
3243	.byte	81
3244	.byte	87
3245	.byte	93
3246	.byte	101
3247	.byte	109
3248	.byte	119
3249	.byte	130
3250	.byte	142
3251	.byte	156
3252	.byte	172
3253	.byte	192
3254	.byte	214
3255	.byte	241
3256	.skip 16
3257Local_div_table:
3258	.skip 16
3259/* positive division factors */
3260	.byte	241
3261	.byte	214
3262	.byte	192
3263	.byte	172
3264	.byte	156
3265	.byte	142
3266	.byte	130
3267	.byte	119
3268	.byte	109
3269	.byte	101
3270	.byte	93
3271	.byte	87
3272	.byte	81
3273	.byte	75
3274	.byte	70
3275	.byte	66
3276/* positive division constants */
3277	.word	31801
3278	.word	29966
3279	.word	28383
3280	.word	26864
3281	.word	25583
3282	.word	24408
3283	.word	23354
3284	.word	22344
3285	.word	21384
3286	.word	20584
3287	.word	19752
3288	.word	19104
3289	.word	18434
3290	.word	17738
3291	.word	17136
3292	.word	16639
3293	.section	.rodata
3294#endif /* TEXT_DATA_BUG */
3295	.balign 2
3296	.type	GLOBAL(div_table),@object
3297	.size	GLOBAL(div_table),128
3298/* negative division constants */
3299	.word	-16638
3300	.word	-17135
3301	.word	-17737
3302	.word	-18433
3303	.word	-19103
3304	.word	-19751
3305	.word	-20583
3306	.word	-21383
3307	.word	-22343
3308	.word	-23353
3309	.word	-24407
3310	.word	-25582
3311	.word	-26863
3312	.word	-28382
3313	.word	-29965
3314	.word	-31800
3315/* negative division factors */
3316	.byte	66
3317	.byte	70
3318	.byte	75
3319	.byte	81
3320	.byte	87
3321	.byte	93
3322	.byte	101
3323	.byte	109
3324	.byte	119
3325	.byte	130
3326	.byte	142
3327	.byte	156
3328	.byte	172
3329	.byte	192
3330	.byte	214
3331	.byte	241
3332	.skip 16
3333	.global	GLOBAL(div_table)
3334GLOBAL(div_table):
3335	HIDDEN_ALIAS(div_table_internal,div_table)
3336	.skip 16
3337/* positive division factors */
3338	.byte	241
3339	.byte	214
3340	.byte	192
3341	.byte	172
3342	.byte	156
3343	.byte	142
3344	.byte	130
3345	.byte	119
3346	.byte	109
3347	.byte	101
3348	.byte	93
3349	.byte	87
3350	.byte	81
3351	.byte	75
3352	.byte	70
3353	.byte	66
3354/* positive division constants */
3355	.word	31801
3356	.word	29966
3357	.word	28383
3358	.word	26864
3359	.word	25583
3360	.word	24408
3361	.word	23354
3362	.word	22344
3363	.word	21384
3364	.word	20584
3365	.word	19752
3366	.word	19104
3367	.word	18434
3368	.word	17738
3369	.word	17136
3370	.word	16639
3371
3372#elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3373/* This code uses shld, thus is not suitable for SH1 / SH2.  */
3374
3375/* Signed / unsigned division without use of FPU, optimized for SH4.
3376   Uses a lookup table for divisors in the range -128 .. +128, and
3377   div1 with case distinction for larger divisors in three more ranges.
3378   The code is lumped together with the table to allow the use of mova.  */
3379#ifdef __LITTLE_ENDIAN__
3380#define L_LSB 0
3381#define L_LSWMSB 1
3382#define L_MSWLSB 2
3383#else
3384#define L_LSB 3
3385#define L_LSWMSB 2
3386#define L_MSWLSB 1
3387#endif
3388
3389	.balign 4
3390	.global	GLOBAL(udivsi3_i4i)
3391	FUNC(GLOBAL(udivsi3_i4i))
3392GLOBAL(udivsi3_i4i):
3393	mov.w LOCAL(c128_w), r1
3394	div0u
3395	mov r4,r0
3396	shlr8 r0
3397	cmp/hi r1,r5
3398	extu.w r5,r1
3399	bf LOCAL(udiv_le128)
3400	cmp/eq r5,r1
3401	bf LOCAL(udiv_ge64k)
3402	shlr r0
3403	mov r5,r1
3404	shll16 r5
3405	mov.l r4,@-r15
3406	div1 r5,r0
3407	mov.l r1,@-r15
3408	div1 r5,r0
3409	div1 r5,r0
3410	bra LOCAL(udiv_25)
3411	div1 r5,r0
3412
3413LOCAL(div_le128):
3414	mova LOCAL(div_table_ix),r0
3415	bra LOCAL(div_le128_2)
3416	mov.b @(r0,r5),r1
3417LOCAL(udiv_le128):
3418	mov.l r4,@-r15
3419	mova LOCAL(div_table_ix),r0
3420	mov.b @(r0,r5),r1
3421	mov.l r5,@-r15
3422LOCAL(div_le128_2):
3423	mova LOCAL(div_table_inv),r0
3424	mov.l @(r0,r1),r1
3425	mov r5,r0
3426	tst #0xfe,r0
3427	mova LOCAL(div_table_clz),r0
3428	dmulu.l r1,r4
3429	mov.b @(r0,r5),r1
3430	bt/s LOCAL(div_by_1)
3431	mov r4,r0
3432	mov.l @r15+,r5
3433	sts mach,r0
3434	/* clrt */
3435	addc r4,r0
3436	mov.l @r15+,r4
3437	rotcr r0
3438	rts
3439	shld r1,r0
3440
3441LOCAL(div_by_1_neg):
3442	neg r4,r0
3443LOCAL(div_by_1):
3444	mov.l @r15+,r5
3445	rts
3446	mov.l @r15+,r4
3447
3448LOCAL(div_ge64k):
3449	bt/s LOCAL(div_r8)
3450	div0u
3451	shll8 r5
3452	bra LOCAL(div_ge64k_2)
3453	div1 r5,r0
3454LOCAL(udiv_ge64k):
3455	cmp/hi r0,r5
3456	mov r5,r1
3457	bt LOCAL(udiv_r8)
3458	shll8 r5
3459	mov.l r4,@-r15
3460	div1 r5,r0
3461	mov.l r1,@-r15
3462LOCAL(div_ge64k_2):
3463	div1 r5,r0
3464	mov.l LOCAL(zero_l),r1
3465	.rept 4
3466	div1 r5,r0
3467	.endr
3468	mov.l r1,@-r15
3469	div1 r5,r0
3470	mov.w LOCAL(m256_w),r1
3471	div1 r5,r0
3472	mov.b r0,@(L_LSWMSB,r15)
3473	xor r4,r0
3474	and r1,r0
3475	bra LOCAL(div_ge64k_end)
3476	xor r4,r0
3477
3478LOCAL(div_r8):
3479	shll16 r4
3480	bra LOCAL(div_r8_2)
3481	shll8 r4
3482LOCAL(udiv_r8):
3483	mov.l r4,@-r15
3484	shll16 r4
3485	clrt
3486	shll8 r4
3487	mov.l r5,@-r15
3488LOCAL(div_r8_2):
3489	rotcl r4
3490	mov r0,r1
3491	div1 r5,r1
3492	mov r4,r0
3493	rotcl r0
3494	mov r5,r4
3495	div1 r5,r1
3496	.rept 5
3497	rotcl r0; div1 r5,r1
3498	.endr
3499	rotcl r0
3500	mov.l @r15+,r5
3501	div1 r4,r1
3502	mov.l @r15+,r4
3503	rts
3504	rotcl r0
3505
3506	ENDFUNC(GLOBAL(udivsi3_i4i))
3507
3508	.global	GLOBAL(sdivsi3_i4i)
3509	FUNC(GLOBAL(sdivsi3_i4i))
3510	/* This is link-compatible with a GLOBAL(sdivsi3) call,
3511	   but we effectively clobber only r1.  */
3512GLOBAL(sdivsi3_i4i):
3513	mov.l r4,@-r15
3514	cmp/pz r5
3515	mov.w LOCAL(c128_w), r1
3516	bt/s LOCAL(pos_divisor)
3517	cmp/pz r4
3518	mov.l r5,@-r15
3519	neg r5,r5
3520	bt/s LOCAL(neg_result)
3521	cmp/hi r1,r5
3522	neg r4,r4
3523LOCAL(pos_result):
3524	extu.w r5,r0
3525	bf LOCAL(div_le128)
3526	cmp/eq r5,r0
3527	mov r4,r0
3528	shlr8 r0
3529	bf/s LOCAL(div_ge64k)
3530	cmp/hi r0,r5
3531	div0u
3532	shll16 r5
3533	div1 r5,r0
3534	div1 r5,r0
3535	div1 r5,r0
3536LOCAL(udiv_25):
3537	mov.l LOCAL(zero_l),r1
3538	div1 r5,r0
3539	div1 r5,r0
3540	mov.l r1,@-r15
3541	.rept 3
3542	div1 r5,r0
3543	.endr
3544	mov.b r0,@(L_MSWLSB,r15)
3545	xtrct r4,r0
3546	swap.w r0,r0
3547	.rept 8
3548	div1 r5,r0
3549	.endr
3550	mov.b r0,@(L_LSWMSB,r15)
3551LOCAL(div_ge64k_end):
3552	.rept 8
3553	div1 r5,r0
3554	.endr
3555	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3556	extu.b r0,r0
3557	mov.l @r15+,r5
3558	or r4,r0
3559	mov.l @r15+,r4
3560	rts
3561	rotcl r0
3562
3563LOCAL(div_le128_neg):
3564	tst #0xfe,r0
3565	mova LOCAL(div_table_ix),r0
3566	mov.b @(r0,r5),r1
3567	mova LOCAL(div_table_inv),r0
3568	bt/s LOCAL(div_by_1_neg)
3569	mov.l @(r0,r1),r1
3570	mova LOCAL(div_table_clz),r0
3571	dmulu.l r1,r4
3572	mov.b @(r0,r5),r1
3573	mov.l @r15+,r5
3574	sts mach,r0
3575	/* clrt */
3576	addc r4,r0
3577	mov.l @r15+,r4
3578	rotcr r0
3579	shld r1,r0
3580	rts
3581	neg r0,r0
3582
3583LOCAL(pos_divisor):
3584	mov.l r5,@-r15
3585	bt/s LOCAL(pos_result)
3586	cmp/hi r1,r5
3587	neg r4,r4
3588LOCAL(neg_result):
3589	extu.w r5,r0
3590	bf LOCAL(div_le128_neg)
3591	cmp/eq r5,r0
3592	mov r4,r0
3593	shlr8 r0
3594	bf/s LOCAL(div_ge64k_neg)
3595	cmp/hi r0,r5
3596	div0u
3597	mov.l LOCAL(zero_l),r1
3598	shll16 r5
3599	div1 r5,r0
3600	mov.l r1,@-r15
3601	.rept 7
3602	div1 r5,r0
3603	.endr
3604	mov.b r0,@(L_MSWLSB,r15)
3605	xtrct r4,r0
3606	swap.w r0,r0
3607	.rept 8
3608	div1 r5,r0
3609	.endr
3610	mov.b r0,@(L_LSWMSB,r15)
3611LOCAL(div_ge64k_neg_end):
3612	.rept 8
3613	div1 r5,r0
3614	.endr
3615	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3616	extu.b r0,r1
3617	mov.l @r15+,r5
3618	or r4,r1
3619LOCAL(div_r8_neg_end):
3620	mov.l @r15+,r4
3621	rotcl r1
3622	rts
3623	neg r1,r0
3624
3625LOCAL(div_ge64k_neg):
3626	bt/s LOCAL(div_r8_neg)
3627	div0u
3628	shll8 r5
3629	mov.l LOCAL(zero_l),r1
3630	.rept 6
3631	div1 r5,r0
3632	.endr
3633	mov.l r1,@-r15
3634	div1 r5,r0
3635	mov.w LOCAL(m256_w),r1
3636	div1 r5,r0
3637	mov.b r0,@(L_LSWMSB,r15)
3638	xor r4,r0
3639	and r1,r0
3640	bra LOCAL(div_ge64k_neg_end)
3641	xor r4,r0
3642
3643LOCAL(c128_w):
3644	.word 128
3645
3646LOCAL(div_r8_neg):
3647	clrt
3648	shll16 r4
3649	mov r4,r1
3650	shll8 r1
3651	mov r5,r4
3652	.rept 7
3653	rotcl r1; div1 r5,r0
3654	.endr
3655	mov.l @r15+,r5
3656	rotcl r1
3657	bra LOCAL(div_r8_neg_end)
3658	div1 r4,r0
3659
3660LOCAL(m256_w):
3661	.word 0xff00
3662/* This table has been generated by divtab-sh4.c.  */
3663	.balign 4
3664LOCAL(div_table_clz):
3665	.byte	0
3666	.byte	1
3667	.byte	0
3668	.byte	-1
3669	.byte	-1
3670	.byte	-2
3671	.byte	-2
3672	.byte	-2
3673	.byte	-2
3674	.byte	-3
3675	.byte	-3
3676	.byte	-3
3677	.byte	-3
3678	.byte	-3
3679	.byte	-3
3680	.byte	-3
3681	.byte	-3
3682	.byte	-4
3683	.byte	-4
3684	.byte	-4
3685	.byte	-4
3686	.byte	-4
3687	.byte	-4
3688	.byte	-4
3689	.byte	-4
3690	.byte	-4
3691	.byte	-4
3692	.byte	-4
3693	.byte	-4
3694	.byte	-4
3695	.byte	-4
3696	.byte	-4
3697	.byte	-4
3698	.byte	-5
3699	.byte	-5
3700	.byte	-5
3701	.byte	-5
3702	.byte	-5
3703	.byte	-5
3704	.byte	-5
3705	.byte	-5
3706	.byte	-5
3707	.byte	-5
3708	.byte	-5
3709	.byte	-5
3710	.byte	-5
3711	.byte	-5
3712	.byte	-5
3713	.byte	-5
3714	.byte	-5
3715	.byte	-5
3716	.byte	-5
3717	.byte	-5
3718	.byte	-5
3719	.byte	-5
3720	.byte	-5
3721	.byte	-5
3722	.byte	-5
3723	.byte	-5
3724	.byte	-5
3725	.byte	-5
3726	.byte	-5
3727	.byte	-5
3728	.byte	-5
3729	.byte	-5
3730	.byte	-6
3731	.byte	-6
3732	.byte	-6
3733	.byte	-6
3734	.byte	-6
3735	.byte	-6
3736	.byte	-6
3737	.byte	-6
3738	.byte	-6
3739	.byte	-6
3740	.byte	-6
3741	.byte	-6
3742	.byte	-6
3743	.byte	-6
3744	.byte	-6
3745	.byte	-6
3746	.byte	-6
3747	.byte	-6
3748	.byte	-6
3749	.byte	-6
3750	.byte	-6
3751	.byte	-6
3752	.byte	-6
3753	.byte	-6
3754	.byte	-6
3755	.byte	-6
3756	.byte	-6
3757	.byte	-6
3758	.byte	-6
3759	.byte	-6
3760	.byte	-6
3761	.byte	-6
3762	.byte	-6
3763	.byte	-6
3764	.byte	-6
3765	.byte	-6
3766	.byte	-6
3767	.byte	-6
3768	.byte	-6
3769	.byte	-6
3770	.byte	-6
3771	.byte	-6
3772	.byte	-6
3773	.byte	-6
3774	.byte	-6
3775	.byte	-6
3776	.byte	-6
3777	.byte	-6
3778	.byte	-6
3779	.byte	-6
3780	.byte	-6
3781	.byte	-6
3782	.byte	-6
3783	.byte	-6
3784	.byte	-6
3785	.byte	-6
3786	.byte	-6
3787	.byte	-6
3788	.byte	-6
3789	.byte	-6
3790	.byte	-6
3791	.byte	-6
3792	.byte	-6
3793/* Lookup table translating positive divisor to index into table of
3794   normalized inverse.  N.B. the '0' entry is also the last entry of the
3795 previous table, and causes an unaligned access for division by zero.  */
3796LOCAL(div_table_ix):
3797	.byte	-6
3798	.byte	-128
3799	.byte	-128
3800	.byte	0
3801	.byte	-128
3802	.byte	-64
3803	.byte	0
3804	.byte	64
3805	.byte	-128
3806	.byte	-96
3807	.byte	-64
3808	.byte	-32
3809	.byte	0
3810	.byte	32
3811	.byte	64
3812	.byte	96
3813	.byte	-128
3814	.byte	-112
3815	.byte	-96
3816	.byte	-80
3817	.byte	-64
3818	.byte	-48
3819	.byte	-32
3820	.byte	-16
3821	.byte	0
3822	.byte	16
3823	.byte	32
3824	.byte	48
3825	.byte	64
3826	.byte	80
3827	.byte	96
3828	.byte	112
3829	.byte	-128
3830	.byte	-120
3831	.byte	-112
3832	.byte	-104
3833	.byte	-96
3834	.byte	-88
3835	.byte	-80
3836	.byte	-72
3837	.byte	-64
3838	.byte	-56
3839	.byte	-48
3840	.byte	-40
3841	.byte	-32
3842	.byte	-24
3843	.byte	-16
3844	.byte	-8
3845	.byte	0
3846	.byte	8
3847	.byte	16
3848	.byte	24
3849	.byte	32
3850	.byte	40
3851	.byte	48
3852	.byte	56
3853	.byte	64
3854	.byte	72
3855	.byte	80
3856	.byte	88
3857	.byte	96
3858	.byte	104
3859	.byte	112
3860	.byte	120
3861	.byte	-128
3862	.byte	-124
3863	.byte	-120
3864	.byte	-116
3865	.byte	-112
3866	.byte	-108
3867	.byte	-104
3868	.byte	-100
3869	.byte	-96
3870	.byte	-92
3871	.byte	-88
3872	.byte	-84
3873	.byte	-80
3874	.byte	-76
3875	.byte	-72
3876	.byte	-68
3877	.byte	-64
3878	.byte	-60
3879	.byte	-56
3880	.byte	-52
3881	.byte	-48
3882	.byte	-44
3883	.byte	-40
3884	.byte	-36
3885	.byte	-32
3886	.byte	-28
3887	.byte	-24
3888	.byte	-20
3889	.byte	-16
3890	.byte	-12
3891	.byte	-8
3892	.byte	-4
3893	.byte	0
3894	.byte	4
3895	.byte	8
3896	.byte	12
3897	.byte	16
3898	.byte	20
3899	.byte	24
3900	.byte	28
3901	.byte	32
3902	.byte	36
3903	.byte	40
3904	.byte	44
3905	.byte	48
3906	.byte	52
3907	.byte	56
3908	.byte	60
3909	.byte	64
3910	.byte	68
3911	.byte	72
3912	.byte	76
3913	.byte	80
3914	.byte	84
3915	.byte	88
3916	.byte	92
3917	.byte	96
3918	.byte	100
3919	.byte	104
3920	.byte	108
3921	.byte	112
3922	.byte	116
3923	.byte	120
3924	.byte	124
3925	.byte	-128
3926/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
3927	.balign 4
3928LOCAL(zero_l):
3929	.long	0x0
3930	.long	0xF81F81F9
3931	.long	0xF07C1F08
3932	.long	0xE9131AC0
3933	.long	0xE1E1E1E2
3934	.long	0xDAE6076C
3935	.long	0xD41D41D5
3936	.long	0xCD856891
3937	.long	0xC71C71C8
3938	.long	0xC0E07039
3939	.long	0xBACF914D
3940	.long	0xB4E81B4F
3941	.long	0xAF286BCB
3942	.long	0xA98EF607
3943	.long	0xA41A41A5
3944	.long	0x9EC8E952
3945	.long	0x9999999A
3946	.long	0x948B0FCE
3947	.long	0x8F9C18FA
3948	.long	0x8ACB90F7
3949	.long	0x86186187
3950	.long	0x81818182
3951	.long	0x7D05F418
3952	.long	0x78A4C818
3953	.long	0x745D1746
3954	.long	0x702E05C1
3955	.long	0x6C16C16D
3956	.long	0x68168169
3957	.long	0x642C8591
3958	.long	0x60581606
3959	.long	0x5C9882BA
3960	.long	0x58ED2309
3961LOCAL(div_table_inv):
3962	.long	0x55555556
3963	.long	0x51D07EAF
3964	.long	0x4E5E0A73
3965	.long	0x4AFD6A06
3966	.long	0x47AE147B
3967	.long	0x446F8657
3968	.long	0x41414142
3969	.long	0x3E22CBCF
3970	.long	0x3B13B13C
3971	.long	0x38138139
3972	.long	0x3521CFB3
3973	.long	0x323E34A3
3974	.long	0x2F684BDB
3975	.long	0x2C9FB4D9
3976	.long	0x29E4129F
3977	.long	0x27350B89
3978	.long	0x24924925
3979	.long	0x21FB7813
3980	.long	0x1F7047DD
3981	.long	0x1CF06ADB
3982	.long	0x1A7B9612
3983	.long	0x18118119
3984	.long	0x15B1E5F8
3985	.long	0x135C8114
3986	.long	0x11111112
3987	.long	0xECF56BF
3988	.long	0xC9714FC
3989	.long	0xA6810A7
3990	.long	0x8421085
3991	.long	0x624DD30
3992	.long	0x4104105
3993	.long	0x2040811
3994	/* maximum error: 0.987342 scaled: 0.921875*/
3995
3996	ENDFUNC(GLOBAL(sdivsi3_i4i))
3997#endif /* SH3 / SH4 */
3998
3999#endif /* L_div_table */
4000
4001#ifdef L_udiv_qrnnd_16
4002#if !__SHMEDIA__
4003	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
4004	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
4005	/* n1 < d, but n1 might be larger than d1.  */
4006	.global GLOBAL(udiv_qrnnd_16)
4007	.balign 8
4008GLOBAL(udiv_qrnnd_16):
4009	div0u
4010	cmp/hi r6,r0
4011	bt .Lots
4012	.rept 16
4013	div1 r6,r0
4014	.endr
4015	extu.w r0,r1
4016	bt 0f
4017	add r6,r0
40180:	rotcl r1
4019	mulu.w r1,r5
4020	xtrct r4,r0
4021	swap.w r0,r0
4022	sts macl,r2
4023	cmp/hs r2,r0
4024	sub r2,r0
4025	bt 0f
4026	addc r5,r0
4027	add #-1,r1
4028	bt 0f
40291:	add #-1,r1
4030	rts
4031	add r5,r0
4032	.balign 8
4033.Lots:
4034	sub r5,r0
4035	swap.w r4,r1
4036	xtrct r0,r1
4037	clrt
4038	mov r1,r0
4039	addc r5,r0
4040	mov #-1,r1
4041	SL1(bf, 1b,
4042	shlr16 r1)
40430:	rts
4044	nop
4045	ENDFUNC(GLOBAL(udiv_qrnnd_16))
4046#endif /* !__SHMEDIA__ */
4047#endif /* L_udiv_qrnnd_16 */
4048