xref: /openbsd-src/gnu/gcc/gcc/config/sh/lib1funcs.asm (revision c8ad093cdaf30968adb7190faaa67dcaf4fc45c5)
1/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2   2004, 2005
3   Free Software Foundation, Inc.
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 2, or (at your option) any
8later version.
9
10In addition to the permissions in the GNU General Public License, the
11Free Software Foundation gives you unlimited permission to link the
12compiled version of this file into combinations with other programs,
13and to distribute those combinations without any restriction coming
14from the use of this file.  (The General Public License restrictions
15do apply in other respects; for example, they cover modification of
16the file, and distribution when not linked into a combine
17executable.)
18
19This file is distributed in the hope that it will be useful, but
20WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22General Public License for more details.
23
24You should have received a copy of the GNU General Public License
25along with this program; see the file COPYING.  If not, write to
26the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27Boston, MA 02110-1301, USA.  */
28
29!! libgcc routines for the Renesas / SuperH SH CPUs.
30!! Contributed by Steve Chamberlain.
31!! sac@cygnus.com
32
33!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34!! recoded in assembly by Toshiyasu Morita
35!! tm@netcom.com
36
37/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38   ELF local label prefixes by J"orn Rennecke
39   amylaar@cygnus.com  */
40
41#include "lib1funcs.h"
42
43#if ! __SH5__
44#ifdef L_ashiftrt
45	.global	GLOBAL(ashiftrt_r4_0)
46	.global	GLOBAL(ashiftrt_r4_1)
47	.global	GLOBAL(ashiftrt_r4_2)
48	.global	GLOBAL(ashiftrt_r4_3)
49	.global	GLOBAL(ashiftrt_r4_4)
50	.global	GLOBAL(ashiftrt_r4_5)
51	.global	GLOBAL(ashiftrt_r4_6)
52	.global	GLOBAL(ashiftrt_r4_7)
53	.global	GLOBAL(ashiftrt_r4_8)
54	.global	GLOBAL(ashiftrt_r4_9)
55	.global	GLOBAL(ashiftrt_r4_10)
56	.global	GLOBAL(ashiftrt_r4_11)
57	.global	GLOBAL(ashiftrt_r4_12)
58	.global	GLOBAL(ashiftrt_r4_13)
59	.global	GLOBAL(ashiftrt_r4_14)
60	.global	GLOBAL(ashiftrt_r4_15)
61	.global	GLOBAL(ashiftrt_r4_16)
62	.global	GLOBAL(ashiftrt_r4_17)
63	.global	GLOBAL(ashiftrt_r4_18)
64	.global	GLOBAL(ashiftrt_r4_19)
65	.global	GLOBAL(ashiftrt_r4_20)
66	.global	GLOBAL(ashiftrt_r4_21)
67	.global	GLOBAL(ashiftrt_r4_22)
68	.global	GLOBAL(ashiftrt_r4_23)
69	.global	GLOBAL(ashiftrt_r4_24)
70	.global	GLOBAL(ashiftrt_r4_25)
71	.global	GLOBAL(ashiftrt_r4_26)
72	.global	GLOBAL(ashiftrt_r4_27)
73	.global	GLOBAL(ashiftrt_r4_28)
74	.global	GLOBAL(ashiftrt_r4_29)
75	.global	GLOBAL(ashiftrt_r4_30)
76	.global	GLOBAL(ashiftrt_r4_31)
77	.global	GLOBAL(ashiftrt_r4_32)
78
79	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
80	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
81	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
82	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
83	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
84	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
85	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
86	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
87	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
88	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
89	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
90	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
91	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
92	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
93	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
94	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
95	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
96	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
97	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
98	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
99	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
100	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
101	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
102	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
103	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
104	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
105	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
106	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
107	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
108	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
109	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
110	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
111	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
112
113	.align	1
114GLOBAL(ashiftrt_r4_32):
115GLOBAL(ashiftrt_r4_31):
116	rotcl	r4
117	rts
118	subc	r4,r4
119
120GLOBAL(ashiftrt_r4_30):
121	shar	r4
122GLOBAL(ashiftrt_r4_29):
123	shar	r4
124GLOBAL(ashiftrt_r4_28):
125	shar	r4
126GLOBAL(ashiftrt_r4_27):
127	shar	r4
128GLOBAL(ashiftrt_r4_26):
129	shar	r4
130GLOBAL(ashiftrt_r4_25):
131	shar	r4
132GLOBAL(ashiftrt_r4_24):
133	shlr16	r4
134	shlr8	r4
135	rts
136	exts.b	r4,r4
137
138GLOBAL(ashiftrt_r4_23):
139	shar	r4
140GLOBAL(ashiftrt_r4_22):
141	shar	r4
142GLOBAL(ashiftrt_r4_21):
143	shar	r4
144GLOBAL(ashiftrt_r4_20):
145	shar	r4
146GLOBAL(ashiftrt_r4_19):
147	shar	r4
148GLOBAL(ashiftrt_r4_18):
149	shar	r4
150GLOBAL(ashiftrt_r4_17):
151	shar	r4
152GLOBAL(ashiftrt_r4_16):
153	shlr16	r4
154	rts
155	exts.w	r4,r4
156
157GLOBAL(ashiftrt_r4_15):
158	shar	r4
159GLOBAL(ashiftrt_r4_14):
160	shar	r4
161GLOBAL(ashiftrt_r4_13):
162	shar	r4
163GLOBAL(ashiftrt_r4_12):
164	shar	r4
165GLOBAL(ashiftrt_r4_11):
166	shar	r4
167GLOBAL(ashiftrt_r4_10):
168	shar	r4
169GLOBAL(ashiftrt_r4_9):
170	shar	r4
171GLOBAL(ashiftrt_r4_8):
172	shar	r4
173GLOBAL(ashiftrt_r4_7):
174	shar	r4
175GLOBAL(ashiftrt_r4_6):
176	shar	r4
177GLOBAL(ashiftrt_r4_5):
178	shar	r4
179GLOBAL(ashiftrt_r4_4):
180	shar	r4
181GLOBAL(ashiftrt_r4_3):
182	shar	r4
183GLOBAL(ashiftrt_r4_2):
184	shar	r4
185GLOBAL(ashiftrt_r4_1):
186	rts
187	shar	r4
188
189GLOBAL(ashiftrt_r4_0):
190	rts
191	nop
192
193	ENDFUNC(GLOBAL(ashiftrt_r4_0))
194	ENDFUNC(GLOBAL(ashiftrt_r4_1))
195	ENDFUNC(GLOBAL(ashiftrt_r4_2))
196	ENDFUNC(GLOBAL(ashiftrt_r4_3))
197	ENDFUNC(GLOBAL(ashiftrt_r4_4))
198	ENDFUNC(GLOBAL(ashiftrt_r4_5))
199	ENDFUNC(GLOBAL(ashiftrt_r4_6))
200	ENDFUNC(GLOBAL(ashiftrt_r4_7))
201	ENDFUNC(GLOBAL(ashiftrt_r4_8))
202	ENDFUNC(GLOBAL(ashiftrt_r4_9))
203	ENDFUNC(GLOBAL(ashiftrt_r4_10))
204	ENDFUNC(GLOBAL(ashiftrt_r4_11))
205	ENDFUNC(GLOBAL(ashiftrt_r4_12))
206	ENDFUNC(GLOBAL(ashiftrt_r4_13))
207	ENDFUNC(GLOBAL(ashiftrt_r4_14))
208	ENDFUNC(GLOBAL(ashiftrt_r4_15))
209	ENDFUNC(GLOBAL(ashiftrt_r4_16))
210	ENDFUNC(GLOBAL(ashiftrt_r4_17))
211	ENDFUNC(GLOBAL(ashiftrt_r4_18))
212	ENDFUNC(GLOBAL(ashiftrt_r4_19))
213	ENDFUNC(GLOBAL(ashiftrt_r4_20))
214	ENDFUNC(GLOBAL(ashiftrt_r4_21))
215	ENDFUNC(GLOBAL(ashiftrt_r4_22))
216	ENDFUNC(GLOBAL(ashiftrt_r4_23))
217	ENDFUNC(GLOBAL(ashiftrt_r4_24))
218	ENDFUNC(GLOBAL(ashiftrt_r4_25))
219	ENDFUNC(GLOBAL(ashiftrt_r4_26))
220	ENDFUNC(GLOBAL(ashiftrt_r4_27))
221	ENDFUNC(GLOBAL(ashiftrt_r4_28))
222	ENDFUNC(GLOBAL(ashiftrt_r4_29))
223	ENDFUNC(GLOBAL(ashiftrt_r4_30))
224	ENDFUNC(GLOBAL(ashiftrt_r4_31))
225	ENDFUNC(GLOBAL(ashiftrt_r4_32))
226#endif
227
228#ifdef L_ashiftrt_n
229
230!
231! GLOBAL(ashrsi3)
232!
233! Entry:
234!
235! r4: Value to shift
236! r5: Shifts
237!
238! Exit:
239!
240! r0: Result
241!
242! Destroys:
243!
244! (none)
245!
246
247	.global	GLOBAL(ashrsi3)
248	HIDDEN_FUNC(GLOBAL(ashrsi3))
249	.align	2
250GLOBAL(ashrsi3):
251	mov	#31,r0
252	and	r0,r5
253	mova	LOCAL(ashrsi3_table),r0
254	mov.b	@(r0,r5),r5
255#ifdef __sh1__
256	add	r5,r0
257	jmp	@r0
258#else
259	braf	r5
260#endif
261	mov	r4,r0
262
263	.align	2
264LOCAL(ashrsi3_table):
265	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
266	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
267	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
268	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
269	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
270	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
271	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
272	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
273	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
274	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
275	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
276	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
277	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
278	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
279	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
280	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
281	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
282	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
283	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
284	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
285	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
286	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
287	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
288	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
289	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
290	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
291	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
292	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
293	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
294	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
295	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
296	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
297
298LOCAL(ashrsi3_31):
299	rotcl	r0
300	rts
301	subc	r0,r0
302
303LOCAL(ashrsi3_30):
304	shar	r0
305LOCAL(ashrsi3_29):
306	shar	r0
307LOCAL(ashrsi3_28):
308	shar	r0
309LOCAL(ashrsi3_27):
310	shar	r0
311LOCAL(ashrsi3_26):
312	shar	r0
313LOCAL(ashrsi3_25):
314	shar	r0
315LOCAL(ashrsi3_24):
316	shlr16	r0
317	shlr8	r0
318	rts
319	exts.b	r0,r0
320
321LOCAL(ashrsi3_23):
322	shar	r0
323LOCAL(ashrsi3_22):
324	shar	r0
325LOCAL(ashrsi3_21):
326	shar	r0
327LOCAL(ashrsi3_20):
328	shar	r0
329LOCAL(ashrsi3_19):
330	shar	r0
331LOCAL(ashrsi3_18):
332	shar	r0
333LOCAL(ashrsi3_17):
334	shar	r0
335LOCAL(ashrsi3_16):
336	shlr16	r0
337	rts
338	exts.w	r0,r0
339
340LOCAL(ashrsi3_15):
341	shar	r0
342LOCAL(ashrsi3_14):
343	shar	r0
344LOCAL(ashrsi3_13):
345	shar	r0
346LOCAL(ashrsi3_12):
347	shar	r0
348LOCAL(ashrsi3_11):
349	shar	r0
350LOCAL(ashrsi3_10):
351	shar	r0
352LOCAL(ashrsi3_9):
353	shar	r0
354LOCAL(ashrsi3_8):
355	shar	r0
356LOCAL(ashrsi3_7):
357	shar	r0
358LOCAL(ashrsi3_6):
359	shar	r0
360LOCAL(ashrsi3_5):
361	shar	r0
362LOCAL(ashrsi3_4):
363	shar	r0
364LOCAL(ashrsi3_3):
365	shar	r0
366LOCAL(ashrsi3_2):
367	shar	r0
368LOCAL(ashrsi3_1):
369	rts
370	shar	r0
371
372LOCAL(ashrsi3_0):
373	rts
374	nop
375
376	ENDFUNC(GLOBAL(ashrsi3))
377#endif
378
379#ifdef L_ashiftlt
380
381!
382! GLOBAL(ashlsi3)
383!
384! Entry:
385!
386! r4: Value to shift
387! r5: Shifts
388!
389! Exit:
390!
391! r0: Result
392!
393! Destroys:
394!
395! (none)
396!
397	.global	GLOBAL(ashlsi3)
398	HIDDEN_FUNC(GLOBAL(ashlsi3))
399	.align	2
400GLOBAL(ashlsi3):
401	mov	#31,r0
402	and	r0,r5
403	mova	LOCAL(ashlsi3_table),r0
404	mov.b	@(r0,r5),r5
405#ifdef __sh1__
406	add	r5,r0
407	jmp	@r0
408#else
409	braf	r5
410#endif
411	mov	r4,r0
412
413	.align	2
414LOCAL(ashlsi3_table):
415	.byte		LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
416	.byte		LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
417	.byte		LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
418	.byte		LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
419	.byte		LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
420	.byte		LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
421	.byte		LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
422	.byte		LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
423	.byte		LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
424	.byte		LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
425	.byte		LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
426	.byte		LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
427	.byte		LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
428	.byte		LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
429	.byte		LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
430	.byte		LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
431	.byte		LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
432	.byte		LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
433	.byte		LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
434	.byte		LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
435	.byte		LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
436	.byte		LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
437	.byte		LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
438	.byte		LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
439	.byte		LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
440	.byte		LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
441	.byte		LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
442	.byte		LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
443	.byte		LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
444	.byte		LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
445	.byte		LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
446	.byte		LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
447
448LOCAL(ashlsi3_6):
449	shll2	r0
450LOCAL(ashlsi3_4):
451	shll2	r0
452LOCAL(ashlsi3_2):
453	rts
454	shll2	r0
455
456LOCAL(ashlsi3_7):
457	shll2	r0
458LOCAL(ashlsi3_5):
459	shll2	r0
460LOCAL(ashlsi3_3):
461	shll2	r0
462LOCAL(ashlsi3_1):
463	rts
464	shll	r0
465
466LOCAL(ashlsi3_14):
467	shll2	r0
468LOCAL(ashlsi3_12):
469	shll2	r0
470LOCAL(ashlsi3_10):
471	shll2	r0
472LOCAL(ashlsi3_8):
473	rts
474	shll8	r0
475
476LOCAL(ashlsi3_15):
477	shll2	r0
478LOCAL(ashlsi3_13):
479	shll2	r0
480LOCAL(ashlsi3_11):
481	shll2	r0
482LOCAL(ashlsi3_9):
483	shll8	r0
484	rts
485	shll	r0
486
487LOCAL(ashlsi3_22):
488	shll2	r0
489LOCAL(ashlsi3_20):
490	shll2	r0
491LOCAL(ashlsi3_18):
492	shll2	r0
493LOCAL(ashlsi3_16):
494	rts
495	shll16	r0
496
497LOCAL(ashlsi3_23):
498	shll2	r0
499LOCAL(ashlsi3_21):
500	shll2	r0
501LOCAL(ashlsi3_19):
502	shll2	r0
503LOCAL(ashlsi3_17):
504	shll16	r0
505	rts
506	shll	r0
507
508LOCAL(ashlsi3_30):
509	shll2	r0
510LOCAL(ashlsi3_28):
511	shll2	r0
512LOCAL(ashlsi3_26):
513	shll2	r0
514LOCAL(ashlsi3_24):
515	shll16	r0
516	rts
517	shll8	r0
518
519LOCAL(ashlsi3_31):
520	shll2	r0
521LOCAL(ashlsi3_29):
522	shll2	r0
523LOCAL(ashlsi3_27):
524	shll2	r0
525LOCAL(ashlsi3_25):
526	shll16	r0
527	shll8	r0
528	rts
529	shll	r0
530
531LOCAL(ashlsi3_0):
532	rts
533	nop
534
535	ENDFUNC(GLOBAL(ashlsi3))
536#endif
537
538#ifdef L_lshiftrt
539
540!
541! GLOBAL(lshrsi3)
542!
543! Entry:
544!
545! r4: Value to shift
546! r5: Shifts
547!
548! Exit:
549!
550! r0: Result
551!
552! Destroys:
553!
554! (none)
555!
556	.global	GLOBAL(lshrsi3)
557	HIDDEN_FUNC(GLOBAL(lshrsi3))
558	.align	2
559GLOBAL(lshrsi3):
560	mov	#31,r0
561	and	r0,r5
562	mova	LOCAL(lshrsi3_table),r0
563	mov.b	@(r0,r5),r5
564#ifdef __sh1__
565	add	r5,r0
566	jmp	@r0
567#else
568	braf	r5
569#endif
570	mov	r4,r0
571
572	.align	2
573LOCAL(lshrsi3_table):
574	.byte		LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
575	.byte		LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
576	.byte		LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
577	.byte		LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
578	.byte		LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
579	.byte		LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
580	.byte		LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
581	.byte		LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
582	.byte		LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
583	.byte		LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
584	.byte		LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
585	.byte		LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
586	.byte		LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
587	.byte		LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
588	.byte		LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
589	.byte		LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
590	.byte		LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
591	.byte		LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
592	.byte		LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
593	.byte		LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
594	.byte		LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
595	.byte		LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
596	.byte		LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
597	.byte		LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
598	.byte		LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
599	.byte		LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
600	.byte		LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
601	.byte		LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
602	.byte		LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
603	.byte		LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
604	.byte		LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
605	.byte		LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
606
607LOCAL(lshrsi3_6):
608	shlr2	r0
609LOCAL(lshrsi3_4):
610	shlr2	r0
611LOCAL(lshrsi3_2):
612	rts
613	shlr2	r0
614
615LOCAL(lshrsi3_7):
616	shlr2	r0
617LOCAL(lshrsi3_5):
618	shlr2	r0
619LOCAL(lshrsi3_3):
620	shlr2	r0
621LOCAL(lshrsi3_1):
622	rts
623	shlr	r0
624
625LOCAL(lshrsi3_14):
626	shlr2	r0
627LOCAL(lshrsi3_12):
628	shlr2	r0
629LOCAL(lshrsi3_10):
630	shlr2	r0
631LOCAL(lshrsi3_8):
632	rts
633	shlr8	r0
634
635LOCAL(lshrsi3_15):
636	shlr2	r0
637LOCAL(lshrsi3_13):
638	shlr2	r0
639LOCAL(lshrsi3_11):
640	shlr2	r0
641LOCAL(lshrsi3_9):
642	shlr8	r0
643	rts
644	shlr	r0
645
646LOCAL(lshrsi3_22):
647	shlr2	r0
648LOCAL(lshrsi3_20):
649	shlr2	r0
650LOCAL(lshrsi3_18):
651	shlr2	r0
652LOCAL(lshrsi3_16):
653	rts
654	shlr16	r0
655
656LOCAL(lshrsi3_23):
657	shlr2	r0
658LOCAL(lshrsi3_21):
659	shlr2	r0
660LOCAL(lshrsi3_19):
661	shlr2	r0
662LOCAL(lshrsi3_17):
663	shlr16	r0
664	rts
665	shlr	r0
666
667LOCAL(lshrsi3_30):
668	shlr2	r0
669LOCAL(lshrsi3_28):
670	shlr2	r0
671LOCAL(lshrsi3_26):
672	shlr2	r0
673LOCAL(lshrsi3_24):
674	shlr16	r0
675	rts
676	shlr8	r0
677
678LOCAL(lshrsi3_31):
679	shlr2	r0
680LOCAL(lshrsi3_29):
681	shlr2	r0
682LOCAL(lshrsi3_27):
683	shlr2	r0
684LOCAL(lshrsi3_25):
685	shlr16	r0
686	shlr8	r0
687	rts
688	shlr	r0
689
690LOCAL(lshrsi3_0):
691	rts
692	nop
693
694	ENDFUNC(GLOBAL(lshrsi3))
695#endif
696
697#ifdef L_movmem
698	.text
699	.balign	4
700	.global	GLOBAL(movmem)
701	HIDDEN_FUNC(GLOBAL(movmem))
702	HIDDEN_ALIAS(movstr,movmem)
703	/* This would be a lot simpler if r6 contained the byte count
704	   minus 64, and we wouldn't be called here for a byte count of 64.  */
705GLOBAL(movmem):
706	sts.l	pr,@-r15
707	shll2	r6
708	bsr	GLOBAL(movmemSI52+2)
709	mov.l	@(48,r5),r0
710	.balign	4
711LOCAL(movmem_loop): /* Reached with rts */
712	mov.l	@(60,r5),r0
713	add	#-64,r6
714	mov.l	r0,@(60,r4)
715	tst	r6,r6
716	mov.l	@(56,r5),r0
717	bt	LOCAL(movmem_done)
718	mov.l	r0,@(56,r4)
719	cmp/pl	r6
720	mov.l	@(52,r5),r0
721	add	#64,r5
722	mov.l	r0,@(52,r4)
723	add	#64,r4
724	bt	GLOBAL(movmemSI52)
725! done all the large groups, do the remainder
726! jump to movmem+
727	mova	GLOBAL(movmemSI4)+4,r0
728	add	r6,r0
729	jmp	@r0
730LOCAL(movmem_done): ! share slot insn, works out aligned.
731	lds.l	@r15+,pr
732	mov.l	r0,@(56,r4)
733	mov.l	@(52,r5),r0
734	rts
735	mov.l	r0,@(52,r4)
736	.balign	4
737! ??? We need aliases movstr* for movmem* for the older libraries.  These
738! aliases will be removed at the some point in the future.
739	.global	GLOBAL(movmemSI64)
740	HIDDEN_FUNC(GLOBAL(movmemSI64))
741	HIDDEN_ALIAS(movstrSI64,movmemSI64)
742GLOBAL(movmemSI64):
743	mov.l	@(60,r5),r0
744	mov.l	r0,@(60,r4)
745	.global	GLOBAL(movmemSI60)
746	HIDDEN_FUNC(GLOBAL(movmemSI60))
747	HIDDEN_ALIAS(movstrSI60,movmemSI60)
748GLOBAL(movmemSI60):
749	mov.l	@(56,r5),r0
750	mov.l	r0,@(56,r4)
751	.global	GLOBAL(movmemSI56)
752	HIDDEN_FUNC(GLOBAL(movmemSI56))
753	HIDDEN_ALIAS(movstrSI56,movmemSI56)
754GLOBAL(movmemSI56):
755	mov.l	@(52,r5),r0
756	mov.l	r0,@(52,r4)
757	.global	GLOBAL(movmemSI52)
758	HIDDEN_FUNC(GLOBAL(movmemSI52))
759	HIDDEN_ALIAS(movstrSI52,movmemSI52)
760GLOBAL(movmemSI52):
761	mov.l	@(48,r5),r0
762	mov.l	r0,@(48,r4)
763	.global	GLOBAL(movmemSI48)
764	HIDDEN_FUNC(GLOBAL(movmemSI48))
765	HIDDEN_ALIAS(movstrSI48,movmemSI48)
766GLOBAL(movmemSI48):
767	mov.l	@(44,r5),r0
768	mov.l	r0,@(44,r4)
769	.global	GLOBAL(movmemSI44)
770	HIDDEN_FUNC(GLOBAL(movmemSI44))
771	HIDDEN_ALIAS(movstrSI44,movmemSI44)
772GLOBAL(movmemSI44):
773	mov.l	@(40,r5),r0
774	mov.l	r0,@(40,r4)
775	.global	GLOBAL(movmemSI40)
776	HIDDEN_FUNC(GLOBAL(movmemSI40))
777	HIDDEN_ALIAS(movstrSI40,movmemSI40)
778GLOBAL(movmemSI40):
779	mov.l	@(36,r5),r0
780	mov.l	r0,@(36,r4)
781	.global	GLOBAL(movmemSI36)
782	HIDDEN_FUNC(GLOBAL(movmemSI36))
783	HIDDEN_ALIAS(movstrSI36,movmemSI36)
784GLOBAL(movmemSI36):
785	mov.l	@(32,r5),r0
786	mov.l	r0,@(32,r4)
787	.global	GLOBAL(movmemSI32)
788	HIDDEN_FUNC(GLOBAL(movmemSI32))
789	HIDDEN_ALIAS(movstrSI32,movmemSI32)
790GLOBAL(movmemSI32):
791	mov.l	@(28,r5),r0
792	mov.l	r0,@(28,r4)
793	.global	GLOBAL(movmemSI28)
794	HIDDEN_FUNC(GLOBAL(movmemSI28))
795	HIDDEN_ALIAS(movstrSI28,movmemSI28)
796GLOBAL(movmemSI28):
797	mov.l	@(24,r5),r0
798	mov.l	r0,@(24,r4)
799	.global	GLOBAL(movmemSI24)
800	HIDDEN_FUNC(GLOBAL(movmemSI24))
801	HIDDEN_ALIAS(movstrSI24,movmemSI24)
802GLOBAL(movmemSI24):
803	mov.l	@(20,r5),r0
804	mov.l	r0,@(20,r4)
805	.global	GLOBAL(movmemSI20)
806	HIDDEN_FUNC(GLOBAL(movmemSI20))
807	HIDDEN_ALIAS(movstrSI20,movmemSI20)
808GLOBAL(movmemSI20):
809	mov.l	@(16,r5),r0
810	mov.l	r0,@(16,r4)
811	.global	GLOBAL(movmemSI16)
812	HIDDEN_FUNC(GLOBAL(movmemSI16))
813	HIDDEN_ALIAS(movstrSI16,movmemSI16)
814GLOBAL(movmemSI16):
815	mov.l	@(12,r5),r0
816	mov.l	r0,@(12,r4)
817	.global	GLOBAL(movmemSI12)
818	HIDDEN_FUNC(GLOBAL(movmemSI12))
819	HIDDEN_ALIAS(movstrSI12,movmemSI12)
820GLOBAL(movmemSI12):
821	mov.l	@(8,r5),r0
822	mov.l	r0,@(8,r4)
823	.global	GLOBAL(movmemSI8)
824	HIDDEN_FUNC(GLOBAL(movmemSI8))
825	HIDDEN_ALIAS(movstrSI8,movmemSI8)
826GLOBAL(movmemSI8):
827	mov.l	@(4,r5),r0
828	mov.l	r0,@(4,r4)
829	.global	GLOBAL(movmemSI4)
830	HIDDEN_FUNC(GLOBAL(movmemSI4))
831	HIDDEN_ALIAS(movstrSI4,movmemSI4)
832GLOBAL(movmemSI4):
833	mov.l	@(0,r5),r0
834	rts
835	mov.l	r0,@(0,r4)
836
837	ENDFUNC(GLOBAL(movmemSI64))
838	ENDFUNC(GLOBAL(movmemSI60))
839	ENDFUNC(GLOBAL(movmemSI56))
840	ENDFUNC(GLOBAL(movmemSI52))
841	ENDFUNC(GLOBAL(movmemSI48))
842	ENDFUNC(GLOBAL(movmemSI44))
843	ENDFUNC(GLOBAL(movmemSI40))
844	ENDFUNC(GLOBAL(movmemSI36))
845	ENDFUNC(GLOBAL(movmemSI32))
846	ENDFUNC(GLOBAL(movmemSI28))
847	ENDFUNC(GLOBAL(movmemSI24))
848	ENDFUNC(GLOBAL(movmemSI20))
849	ENDFUNC(GLOBAL(movmemSI16))
850	ENDFUNC(GLOBAL(movmemSI12))
851	ENDFUNC(GLOBAL(movmemSI8))
852	ENDFUNC(GLOBAL(movmemSI4))
853	ENDFUNC(GLOBAL(movmem))
854#endif
855
856#ifdef L_movmem_i4
857	.text
858	.global	GLOBAL(movmem_i4_even)
859	.global	GLOBAL(movmem_i4_odd)
860	.global	GLOBAL(movmemSI12_i4)
861
862	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
863	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
864	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
865
866	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
867	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
868	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
869
870	.p2align	5
871L_movmem_2mod4_end:
872	mov.l	r0,@(16,r4)
873	rts
874	mov.l	r1,@(20,r4)
875
876	.p2align	2
877
878GLOBAL(movmem_i4_even):
879	mov.l	@r5+,r0
880	bra	L_movmem_start_even
881	mov.l	@r5+,r1
882
883GLOBAL(movmem_i4_odd):
884	mov.l	@r5+,r1
885	add	#-4,r4
886	mov.l	@r5+,r2
887	mov.l	@r5+,r3
888	mov.l	r1,@(4,r4)
889	mov.l	r2,@(8,r4)
890
891L_movmem_loop:
892	mov.l	r3,@(12,r4)
893	dt	r6
894	mov.l	@r5+,r0
895	bt/s	L_movmem_2mod4_end
896	mov.l	@r5+,r1
897	add	#16,r4
898L_movmem_start_even:
899	mov.l	@r5+,r2
900	mov.l	@r5+,r3
901	mov.l	r0,@r4
902	dt	r6
903	mov.l	r1,@(4,r4)
904	bf/s	L_movmem_loop
905	mov.l	r2,@(8,r4)
906	rts
907	mov.l	r3,@(12,r4)
908
909	ENDFUNC(GLOBAL(movmem_i4_even))
910	ENDFUNC(GLOBAL(movmem_i4_odd))
911
912	.p2align	4
913GLOBAL(movmemSI12_i4):
914	mov.l	@r5,r0
915	mov.l	@(4,r5),r1
916	mov.l	@(8,r5),r2
917	mov.l	r0,@r4
918	mov.l	r1,@(4,r4)
919	rts
920	mov.l	r2,@(8,r4)
921
922	ENDFUNC(GLOBAL(movmemSI12_i4))
923#endif
924
925#ifdef L_mulsi3
926
927
928	.global	GLOBAL(mulsi3)
929	HIDDEN_FUNC(GLOBAL(mulsi3))
930
931! r4 =       aabb
932! r5 =       ccdd
933! r0 = aabb*ccdd  via partial products
934!
935! if aa == 0 and cc = 0
936! r0 = bb*dd
937!
938! else
939! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
940!
941
942GLOBAL(mulsi3):
943	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
944	mov     r5,r3		! r3 = ccdd
945	swap.w  r4,r2		! r2 = bbaa
946	xtrct   r2,r3		! r3 = aacc
947	tst  	r3,r3		! msws zero ?
948	bf      hiset
949	rts			! yes - then we have the answer
950	sts     macl,r0
951
952hiset:	sts	macl,r0		! r0 = bb*dd
953	mulu.w	r2,r5		! brewing macl = aa*dd
954	sts	macl,r1
955	mulu.w	r3,r4		! brewing macl = cc*bb
956	sts	macl,r2
957	add	r1,r2
958	shll16	r2
959	rts
960	add	r2,r0
961
962	ENDFUNC(GLOBAL(mulsi3))
963#endif
964#endif /* ! __SH5__ */
965#ifdef L_sdivsi3_i4
966	.title "SH DIVIDE"
967!! 4 byte integer Divide code for the Renesas SH
968#ifdef __SH4__
969!! args in r4 and r5, result in fpul, clobber dr0, dr2
970
971	.global	GLOBAL(sdivsi3_i4)
972	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
973GLOBAL(sdivsi3_i4):
974	lds r4,fpul
975	float fpul,dr0
976	lds r5,fpul
977	float fpul,dr2
978	fdiv dr2,dr0
979	rts
980	ftrc dr0,fpul
981
982	ENDFUNC(GLOBAL(sdivsi3_i4))
983#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
984!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
985
986#if ! __SH5__ || __SH5__ == 32
987#if __SH5__
988	.mode	SHcompact
989#endif
990	.global	GLOBAL(sdivsi3_i4)
991	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
992GLOBAL(sdivsi3_i4):
993	sts.l fpscr,@-r15
994	mov #8,r2
995	swap.w r2,r2
996	lds r2,fpscr
997	lds r4,fpul
998	float fpul,dr0
999	lds r5,fpul
1000	float fpul,dr2
1001	fdiv dr2,dr0
1002	ftrc dr0,fpul
1003	rts
1004	lds.l @r15+,fpscr
1005
1006	ENDFUNC(GLOBAL(sdivsi3_i4))
1007#endif /* ! __SH5__ || __SH5__ == 32 */
1008#endif /* ! __SH4__ */
1009#endif
1010
1011#ifdef L_sdivsi3
1012/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1013   sh2e/sh3e code.  */
1014#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__OpenBSD__)
1015!!
1016!! Steve Chamberlain
1017!! sac@cygnus.com
1018!!
1019!!
1020
1021!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1022
1023	.global	GLOBAL(sdivsi3)
1024#if __SHMEDIA__
1025#if __SH5__ == 32
1026	.section	.text..SHmedia32,"ax"
1027#else
1028	.text
1029#endif
1030	.align	2
1031#if 0
1032/* The assembly code that follows is a hand-optimized version of the C
1033   code that follows.  Note that the registers that are modified are
1034   exactly those listed as clobbered in the patterns divsi3_i1 and
1035   divsi3_i1_media.
1036
1037int __sdivsi3 (i, j)
1038     int i, j;
1039{
1040  register unsigned long long r18 asm ("r18");
1041  register unsigned long long r19 asm ("r19");
1042  register unsigned long long r0 asm ("r0") = 0;
1043  register unsigned long long r1 asm ("r1") = 1;
1044  register int r2 asm ("r2") = i >> 31;
1045  register int r3 asm ("r3") = j >> 31;
1046
1047  r2 = r2 ? r2 : r1;
1048  r3 = r3 ? r3 : r1;
1049  r18 = i * r2;
1050  r19 = j * r3;
1051  r2 *= r3;
1052
1053  r19 <<= 31;
1054  r1 <<= 31;
1055  do
1056    if (r18 >= r19)
1057      r0 |= r1, r18 -= r19;
1058  while (r19 >>= 1, r1 >>= 1);
1059
1060  return r2 * (int)r0;
1061}
1062*/
1063GLOBAL(sdivsi3):
1064	pt/l	LOCAL(sdivsi3_dontadd), tr2
1065	pt/l	LOCAL(sdivsi3_loop), tr1
1066	ptabs/l	r18, tr0
1067	movi	0, r0
1068	movi	1, r1
1069	shari.l	r4, 31, r2
1070	shari.l	r5, 31, r3
1071	cmveq	r2, r1, r2
1072	cmveq	r3, r1, r3
1073	muls.l	r4, r2, r18
1074	muls.l	r5, r3, r19
1075	muls.l	r2, r3, r2
1076	shlli	r19, 31, r19
1077	shlli	r1, 31, r1
1078LOCAL(sdivsi3_loop):
1079	bgtu	r19, r18, tr2
1080	or	r0, r1, r0
1081	sub	r18, r19, r18
1082LOCAL(sdivsi3_dontadd):
1083	shlri	r1, 1, r1
1084	shlri	r19, 1, r19
1085	bnei	r1, 0, tr1
1086	muls.l	r0, r2, r0
1087	add.l	r0, r63, r0
1088	blink	tr0, r63
1089#elif 0 /* ! 0 */
1090 // inputs: r4,r5
1091 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1092 // result in r0
1093GLOBAL(sdivsi3):
1094 // can create absolute value without extra latency,
1095 // but dependent on proper sign extension of inputs:
1096 // shari.l r5,31,r2
1097 // xor r5,r2,r20
1098 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1099 shari.l r5,31,r2
1100 ori r2,1,r2
1101 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1102 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1103 shari.l r4,31,r3
1104 nsb r20,r0
1105 shlld r20,r0,r25
1106 shlri r25,48,r25
1107 sub r19,r25,r1
1108 mmulfx.w r1,r1,r2
1109 mshflo.w r1,r63,r1
1110 // If r4 was to be used in-place instead of r21, could use this sequence
1111 // to compute absolute:
1112 // sub r63,r4,r19 // compute absolute value of r4
1113 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1114 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1115 ori r3,1,r3
1116 mmulfx.w r25,r2,r2
1117 sub r19,r0,r0
1118 muls.l r4,r3,r21
1119 msub.w r1,r2,r2
1120 addi r2,-2,r1
1121 mulu.l r21,r1,r19
1122 mmulfx.w r2,r2,r2
1123 shlli r1,15,r1
1124 shlrd r19,r0,r19
1125 mulu.l r19,r20,r3
1126 mmacnfx.wl r25,r2,r1
1127 ptabs r18,tr0
1128 sub r21,r3,r25
1129
1130 mulu.l r25,r1,r2
1131 addi r0,14,r0
1132 xor r4,r5,r18
1133 shlrd r2,r0,r2
1134 mulu.l r2,r20,r3
1135 add r19,r2,r19
1136 shari.l r18,31,r18
1137 sub r25,r3,r25
1138
1139 mulu.l r25,r1,r2
1140 sub r25,r20,r25
1141 add r19,r18,r19
1142 shlrd r2,r0,r2
1143 mulu.l r2,r20,r3
1144 addi r25,1,r25
1145 add r19,r2,r19
1146
1147 cmpgt r25,r3,r25
1148 add.l r19,r25,r0
1149 xor r0,r18,r0
1150 blink tr0,r63
1151#else /* ! 0 && ! 0 */
1152
1153 // inputs: r4,r5
1154 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1155 // result in r0
1156	HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1157#ifndef __pic__
1158	FUNC(GLOBAL(sdivsi3))
1159GLOBAL(sdivsi3): /* this is the shcompact entry point */
1160 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1161 // with the SHcompact implementation, which clobbers tr1 / tr2.
1162 .global GLOBAL(sdivsi3_1)
1163GLOBAL(sdivsi3_1):
1164 .global GLOBAL(div_table_internal)
1165 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1166 shori GLOBAL(div_table_internal) & 65535, r20
1167#endif
1168 .global GLOBAL(sdivsi3_2)
1169 // div_table in r20
1170 // clobbered: r1,r18,r19,r21,r25,tr0
1171GLOBAL(sdivsi3_2):
1172 nsb r5, r1
1173 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
1174 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
1175 ldx.ub r20, r21, r19 // u0.8
1176 shari r25, 32, r25   // normalize to s2.30
1177 shlli r21, 1, r21
1178 muls.l r25, r19, r19 // s2.38
1179 ldx.w r20, r21, r21  // s2.14
1180  ptabs r18, tr0
1181 shari r19, 24, r19   // truncate to s2.14
1182 sub r21, r19, r19    // some 11 bit inverse in s1.14
1183 muls.l r19, r19, r21 // u0.28
1184  sub r63, r1, r1
1185  addi r1, 92, r1
1186 muls.l r25, r21, r18 // s2.58
1187 shlli r19, 45, r19   // multiply by two and convert to s2.58
1188  /* bubble */
1189 sub r19, r18, r18
1190 shari r18, 28, r18   // some 22 bit inverse in s1.30
1191 muls.l r18, r25, r0  // s2.60
1192  muls.l r18, r4, r25 // s32.30
1193  /* bubble */
1194 shari r0, 16, r19   // s-16.44
1195 muls.l r19, r18, r19 // s-16.74
1196  shari r25, 63, r0
1197  shari r4, 14, r18   // s19.-14
1198 shari r19, 30, r19   // s-16.44
1199 muls.l r19, r18, r19 // s15.30
1200  xor r21, r0, r21    // You could also use the constant 1 << 27.
1201  add r21, r25, r21
1202 sub r21, r19, r21
1203 shard r21, r1, r21
1204 sub r21, r0, r0
1205 blink tr0, r63
1206#ifndef __pic__
1207	ENDFUNC(GLOBAL(sdivsi3))
1208#endif
1209	ENDFUNC(GLOBAL(sdivsi3_2))
1210#endif
1211#elif defined __SHMEDIA__
1212/* m5compact-nofpu */
1213 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1214	.mode	SHmedia
1215	.section	.text..SHmedia32,"ax"
1216	.align	2
1217	FUNC(GLOBAL(sdivsi3))
1218GLOBAL(sdivsi3):
1219	pt/l LOCAL(sdivsi3_dontsub), tr0
1220	pt/l LOCAL(sdivsi3_loop), tr1
1221	ptabs/l r18,tr2
1222	shari.l r4,31,r18
1223	shari.l r5,31,r19
1224	xor r4,r18,r20
1225	xor r5,r19,r21
1226	sub.l r20,r18,r20
1227	sub.l r21,r19,r21
1228	xor r18,r19,r19
1229	shlli r21,32,r25
1230	addi r25,-1,r21
1231	addz.l r20,r63,r20
1232LOCAL(sdivsi3_loop):
1233	shlli r20,1,r20
1234	bgeu/u r21,r20,tr0
1235	sub r20,r21,r20
1236LOCAL(sdivsi3_dontsub):
1237	addi.l r25,-1,r25
1238	bnei r25,-32,tr1
1239	xor r20,r19,r20
1240	sub.l r20,r19,r0
1241	blink tr2,r63
1242	ENDFUNC(GLOBAL(sdivsi3))
1243#else /* ! __SHMEDIA__ */
1244	FUNC(GLOBAL(sdivsi3))
1245GLOBAL(sdivsi3):
1246	mov	r4,r1
1247	mov	r5,r0
1248
1249	tst	r0,r0
1250	bt	div0
1251	mov	#0,r2
1252	div0s	r2,r1
1253	subc	r3,r3
1254	subc	r2,r1
1255	div0s	r0,r3
1256	rotcl	r1
1257	div1	r0,r3
1258	rotcl	r1
1259	div1	r0,r3
1260	rotcl	r1
1261	div1	r0,r3
1262	rotcl	r1
1263	div1	r0,r3
1264	rotcl	r1
1265	div1	r0,r3
1266	rotcl	r1
1267	div1	r0,r3
1268	rotcl	r1
1269	div1	r0,r3
1270	rotcl	r1
1271	div1	r0,r3
1272	rotcl	r1
1273	div1	r0,r3
1274	rotcl	r1
1275	div1	r0,r3
1276	rotcl	r1
1277	div1	r0,r3
1278	rotcl	r1
1279	div1	r0,r3
1280	rotcl	r1
1281	div1	r0,r3
1282	rotcl	r1
1283	div1	r0,r3
1284	rotcl	r1
1285	div1	r0,r3
1286	rotcl	r1
1287	div1	r0,r3
1288	rotcl	r1
1289	div1	r0,r3
1290	rotcl	r1
1291	div1	r0,r3
1292	rotcl	r1
1293	div1	r0,r3
1294	rotcl	r1
1295	div1	r0,r3
1296	rotcl	r1
1297	div1	r0,r3
1298	rotcl	r1
1299	div1	r0,r3
1300	rotcl	r1
1301	div1	r0,r3
1302	rotcl	r1
1303	div1	r0,r3
1304	rotcl	r1
1305	div1	r0,r3
1306	rotcl	r1
1307	div1	r0,r3
1308	rotcl	r1
1309	div1	r0,r3
1310	rotcl	r1
1311	div1	r0,r3
1312	rotcl	r1
1313	div1	r0,r3
1314	rotcl	r1
1315	div1	r0,r3
1316	rotcl	r1
1317	div1	r0,r3
1318	rotcl	r1
1319	div1	r0,r3
1320	rotcl	r1
1321	addc	r2,r1
1322	rts
1323	mov	r1,r0
1324
1325
1326div0:	rts
1327	mov	#0,r0
1328
1329	ENDFUNC(GLOBAL(sdivsi3))
1330#endif /* ! __SHMEDIA__ */
1331#endif /* ! __SH4__ */
1332#endif
1333#ifdef L_udivsi3_i4
1334
1335	.title "SH DIVIDE"
1336!! 4 byte integer Divide code for the Renesas SH
1337#ifdef __SH4__
1338!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1339!! and t bit
1340
1341	.global	GLOBAL(udivsi3_i4)
1342	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1343GLOBAL(udivsi3_i4):
1344	mov #1,r1
1345	cmp/hi r1,r5
1346	bf trivial
1347	rotr r1
1348	xor r1,r4
1349	lds r4,fpul
1350	mova L1,r0
1351#ifdef FMOVD_WORKS
1352	fmov.d @r0+,dr4
1353#else
1354	fmov.s @r0+,DR40
1355	fmov.s @r0,DR41
1356#endif
1357	float fpul,dr0
1358	xor r1,r5
1359	lds r5,fpul
1360	float fpul,dr2
1361	fadd dr4,dr0
1362	fadd dr4,dr2
1363	fdiv dr2,dr0
1364	rts
1365	ftrc dr0,fpul
1366
1367trivial:
1368	rts
1369	lds r4,fpul
1370
1371	.align 2
1372#ifdef FMOVD_WORKS
1373	.align 3	! make double below 8 byte aligned.
1374#endif
1375L1:
1376	.double 2147483648
1377
1378	ENDFUNC(GLOBAL(udivsi3_i4))
1379#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1380#if ! __SH5__ || __SH5__ == 32
1381!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1382	.mode	SHmedia
1383	.global	GLOBAL(udivsi3_i4)
1384	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1385GLOBAL(udivsi3_i4):
1386	addz.l	r4,r63,r20
1387	addz.l	r5,r63,r21
1388	fmov.qd	r20,dr0
1389	fmov.qd	r21,dr32
1390	ptabs	r18,tr0
1391	float.qd dr0,dr0
1392	float.qd dr32,dr32
1393	fdiv.d	dr0,dr32,dr0
1394	ftrc.dq dr0,dr32
1395	fmov.s fr33,fr32
1396	blink tr0,r63
1397
1398	ENDFUNC(GLOBAL(udivsi3_i4))
1399#endif /* ! __SH5__ || __SH5__ == 32 */
1400#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1401!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1402
1403	.global	GLOBAL(udivsi3_i4)
1404	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1405GLOBAL(udivsi3_i4):
1406	mov #1,r1
1407	cmp/hi r1,r5
1408	bf trivial
1409	sts.l fpscr,@-r15
1410	mova L1,r0
1411	lds.l @r0+,fpscr
1412	rotr r1
1413	xor r1,r4
1414	lds r4,fpul
1415#ifdef FMOVD_WORKS
1416	fmov.d @r0+,dr4
1417#else
1418	fmov.s @r0+,DR40
1419	fmov.s @r0,DR41
1420#endif
1421	float fpul,dr0
1422	xor r1,r5
1423	lds r5,fpul
1424	float fpul,dr2
1425	fadd dr4,dr0
1426	fadd dr4,dr2
1427	fdiv dr2,dr0
1428	ftrc dr0,fpul
1429	rts
1430	lds.l @r15+,fpscr
1431
1432#ifdef FMOVD_WORKS
1433	.align 3	! make double below 8 byte aligned.
1434#endif
1435trivial:
1436	rts
1437	lds r4,fpul
1438
1439	.align 2
1440L1:
1441#ifndef FMOVD_WORKS
1442	.long 0x80000
1443#else
1444	.long 0x180000
1445#endif
1446	.double 2147483648
1447
1448	ENDFUNC(GLOBAL(udivsi3_i4))
1449#endif /* ! __SH4__ */
1450#endif
1451
1452#ifdef L_udivsi3
1453/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1454   sh2e/sh3e code.  */
1455#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__OpenBSD__)
1456
1457!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1458	.global	GLOBAL(udivsi3)
1459	HIDDEN_FUNC(GLOBAL(udivsi3))
1460
1461#if __SHMEDIA__
1462#if __SH5__ == 32
1463	.section	.text..SHmedia32,"ax"
1464#else
1465	.text
1466#endif
1467	.align	2
1468#if 0
1469/* The assembly code that follows is a hand-optimized version of the C
1470   code that follows.  Note that the registers that are modified are
1471   exactly those listed as clobbered in the patterns udivsi3_i1 and
1472   udivsi3_i1_media.
1473
1474unsigned
1475__udivsi3 (i, j)
1476    unsigned i, j;
1477{
1478  register unsigned long long r0 asm ("r0") = 0;
1479  register unsigned long long r18 asm ("r18") = 1;
1480  register unsigned long long r4 asm ("r4") = i;
1481  register unsigned long long r19 asm ("r19") = j;
1482
1483  r19 <<= 31;
1484  r18 <<= 31;
1485  do
1486    if (r4 >= r19)
1487      r0 |= r18, r4 -= r19;
1488  while (r19 >>= 1, r18 >>= 1);
1489
1490  return r0;
1491}
1492*/
1493GLOBAL(udivsi3):
1494	pt/l	LOCAL(udivsi3_dontadd), tr2
1495	pt/l	LOCAL(udivsi3_loop), tr1
1496	ptabs/l	r18, tr0
1497	movi	0, r0
1498	movi	1, r18
1499	addz.l	r5, r63, r19
1500	addz.l	r4, r63, r4
1501	shlli	r19, 31, r19
1502	shlli	r18, 31, r18
1503LOCAL(udivsi3_loop):
1504	bgtu	r19, r4, tr2
1505	or	r0, r18, r0
1506	sub	r4, r19, r4
1507LOCAL(udivsi3_dontadd):
1508	shlri	r18, 1, r18
1509	shlri	r19, 1, r19
1510	bnei	r18, 0, tr1
1511	blink	tr0, r63
1512#else
1513GLOBAL(udivsi3):
1514 // inputs: r4,r5
1515 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1516 // result in r0.
1517 addz.l r5,r63,r22
1518 nsb r22,r0
1519 shlld r22,r0,r25
1520 shlri r25,48,r25
1521 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1522 sub r20,r25,r21
1523 mmulfx.w r21,r21,r19
1524 mshflo.w r21,r63,r21
1525 ptabs r18,tr0
1526 mmulfx.w r25,r19,r19
1527 sub r20,r0,r0
1528 /* bubble */
1529 msub.w r21,r19,r19
1530 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1531		    before the msub.w, but we need a different value for
1532		    r19 to keep errors under control.  */
1533 mulu.l r4,r21,r18
1534 mmulfx.w r19,r19,r19
1535 shlli r21,15,r21
1536 shlrd r18,r0,r18
1537 mulu.l r18,r22,r20
1538 mmacnfx.wl r25,r19,r21
1539 /* bubble */
1540 sub r4,r20,r25
1541
1542 mulu.l r25,r21,r19
1543 addi r0,14,r0
1544 /* bubble */
1545 shlrd r19,r0,r19
1546 mulu.l r19,r22,r20
1547 add r18,r19,r18
1548 /* bubble */
1549 sub.l r25,r20,r25
1550
1551 mulu.l r25,r21,r19
1552 addz.l r25,r63,r25
1553 sub r25,r22,r25
1554 shlrd r19,r0,r19
1555 mulu.l r19,r22,r20
1556 addi r25,1,r25
1557 add r18,r19,r18
1558
1559 cmpgt r25,r20,r25
1560 add.l r18,r25,r0
1561 blink tr0,r63
1562#endif
1563#elif defined (__SHMEDIA__)
1564/* m5compact-nofpu - more emphasis on code size than on speed, but don't
1565   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1566   So use a short shmedia loop.  */
1567 // clobbered: r20,r21,r25,tr0,tr1,tr2
1568	.mode	SHmedia
1569	.section	.text..SHmedia32,"ax"
1570	.align	2
1571GLOBAL(udivsi3):
1572 pt/l LOCAL(udivsi3_dontsub), tr0
1573 pt/l LOCAL(udivsi3_loop), tr1
1574 ptabs/l r18,tr2
1575 shlli r5,32,r25
1576 addi r25,-1,r21
1577 addz.l r4,r63,r20
1578LOCAL(udivsi3_loop):
1579 shlli r20,1,r20
1580 bgeu/u r21,r20,tr0
1581 sub r20,r21,r20
1582LOCAL(udivsi3_dontsub):
1583 addi.l r25,-1,r25
1584 bnei r25,-32,tr1
1585 add.l r20,r63,r0
1586 blink tr2,r63
1587#else /* ! defined (__SHMEDIA__) */
1588LOCAL(div8):
1589 div1 r5,r4
1590LOCAL(div7):
1591 div1 r5,r4; div1 r5,r4; div1 r5,r4
1592 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1593
1594LOCAL(divx4):
1595 div1 r5,r4; rotcl r0
1596 div1 r5,r4; rotcl r0
1597 div1 r5,r4; rotcl r0
1598 rts; div1 r5,r4
1599
1600GLOBAL(udivsi3):
1601 sts.l pr,@-r15
1602 extu.w r5,r0
1603 cmp/eq r5,r0
1604#ifdef __sh1__
1605 bf LOCAL(large_divisor)
1606#else
1607 bf/s LOCAL(large_divisor)
1608#endif
1609 div0u
1610 swap.w r4,r0
1611 shlr16 r4
1612 bsr LOCAL(div8)
1613 shll16 r5
1614 bsr LOCAL(div7)
1615 div1 r5,r4
1616 xtrct r4,r0
1617 xtrct r0,r4
1618 bsr LOCAL(div8)
1619 swap.w r4,r4
1620 bsr LOCAL(div7)
1621 div1 r5,r4
1622 lds.l @r15+,pr
1623 xtrct r4,r0
1624 swap.w r0,r0
1625 rotcl r0
1626 rts
1627 shlr16 r5
1628
1629LOCAL(large_divisor):
1630#ifdef __sh1__
1631 div0u
1632#endif
1633 mov #0,r0
1634 xtrct r4,r0
1635 xtrct r0,r4
1636 bsr LOCAL(divx4)
1637 rotcl r0
1638 bsr LOCAL(divx4)
1639 rotcl r0
1640 bsr LOCAL(divx4)
1641 rotcl r0
1642 bsr LOCAL(divx4)
1643 rotcl r0
1644 lds.l @r15+,pr
1645 rts
1646 rotcl r0
1647
1648	ENDFUNC(GLOBAL(udivsi3))
1649#endif /* ! __SHMEDIA__ */
1650#endif /* __SH4__ */
1651#endif /* L_udivsi3 */
1652
1653#ifdef L_udivdi3
1654#ifdef __SHMEDIA__
1655	.mode	SHmedia
1656	.section	.text..SHmedia32,"ax"
1657	.align	2
1658	.global	GLOBAL(udivdi3)
1659	FUNC(GLOBAL(udivdi3))
1660GLOBAL(udivdi3):
1661	HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1662	shlri r3,1,r4
1663	nsb r4,r22
1664	shlld r3,r22,r6
1665	shlri r6,49,r5
1666	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1667	sub r21,r5,r1
1668	mmulfx.w r1,r1,r4
1669	mshflo.w r1,r63,r1
1670	sub r63,r22,r20 // r63 == 64 % 64
1671	mmulfx.w r5,r4,r4
1672	pta LOCAL(large_divisor),tr0
1673	addi r20,32,r9
1674	msub.w r1,r4,r1
1675	madd.w r1,r1,r1
1676	mmulfx.w r1,r1,r4
1677	shlri r6,32,r7
1678	bgt/u r9,r63,tr0 // large_divisor
1679	mmulfx.w r5,r4,r4
1680	shlri r2,32+14,r19
1681	addi r22,-31,r0
1682	msub.w r1,r4,r1
1683
1684	mulu.l r1,r7,r4
1685	addi r1,-3,r5
1686	mulu.l r5,r19,r5
1687	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1688	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1689	                 the case may be, %0000000000000000 000.11111111111, still */
1690	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1691	mulu.l r5,r3,r8
1692	mshalds.l r1,r21,r1
1693	shari r4,26,r4
1694	shlld r8,r0,r8
1695	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1696	sub r2,r8,r2
1697	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1698
1699	shlri r2,22,r21
1700	mulu.l r21,r1,r21
1701	shlld r5,r0,r8
1702	addi r20,30-22,r0
1703	shlrd r21,r0,r21
1704	mulu.l r21,r3,r5
1705	add r8,r21,r8
1706	mcmpgt.l r21,r63,r21 // See Note 1
1707	addi r20,30,r0
1708	mshfhi.l r63,r21,r21
1709	sub r2,r5,r2
1710	andc r2,r21,r2
1711
1712	/* small divisor: need a third divide step */
1713	mulu.l r2,r1,r7
1714	ptabs r18,tr0
1715	addi r2,1,r2
1716	shlrd r7,r0,r7
1717	mulu.l r7,r3,r5
1718	add r8,r7,r8
1719	sub r2,r3,r2
1720	cmpgt r2,r5,r5
1721	add r8,r5,r2
1722	/* could test r3 here to check for divide by zero.  */
1723	blink tr0,r63
1724
1725LOCAL(large_divisor):
1726	mmulfx.w r5,r4,r4
1727	shlrd r2,r9,r25
1728	shlri r25,32,r8
1729	msub.w r1,r4,r1
1730
1731	mulu.l r1,r7,r4
1732	addi r1,-3,r5
1733	mulu.l r5,r8,r5
1734	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1735	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1736	                 the case may be, %0000000000000000 000.11111111111, still */
1737	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1738	shlri r5,14-1,r8
1739	mulu.l r8,r7,r5
1740	mshalds.l r1,r21,r1
1741	shari r4,26,r4
1742	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1743	sub r25,r5,r25
1744	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1745
1746	shlri r25,22,r21
1747	mulu.l r21,r1,r21
1748	pta LOCAL(no_lo_adj),tr0
1749	addi r22,32,r0
1750	shlri r21,40,r21
1751	mulu.l r21,r7,r5
1752	add r8,r21,r8
1753	shlld r2,r0,r2
1754	sub r25,r5,r25
1755	bgtu/u r7,r25,tr0 // no_lo_adj
1756	addi r8,1,r8
1757	sub r25,r7,r25
1758LOCAL(no_lo_adj):
1759	mextr4 r2,r25,r2
1760
1761	/* large_divisor: only needs a few adjustments.  */
1762	mulu.l r8,r6,r5
1763	ptabs r18,tr0
1764	/* bubble */
1765	cmpgtu r5,r2,r5
1766	sub r8,r5,r2
1767	blink tr0,r63
1768	ENDFUNC(GLOBAL(udivdi3))
1769/* Note 1: To shift the result of the second divide stage so that the result
1770   always fits into 32 bits, yet we still reduce the rest sufficiently
1771   would require a lot of instructions to do the shifts just right.  Using
1772   the full 64 bit shift result to multiply with the divisor would require
1773   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1774   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1775   know that the rest after taking this partial result into account will
1776   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1777   upper 32 bits of the partial result are nonzero.  */
1778#endif /* __SHMEDIA__ */
1779#endif /* L_udivdi3 */
1780
1781#ifdef L_divdi3
1782#ifdef __SHMEDIA__
1783	.mode	SHmedia
1784	.section	.text..SHmedia32,"ax"
1785	.align	2
1786	.global	GLOBAL(divdi3)
1787	FUNC(GLOBAL(divdi3))
1788GLOBAL(divdi3):
1789	pta GLOBAL(udivdi3_internal),tr0
1790	shari r2,63,r22
1791	shari r3,63,r23
1792	xor r2,r22,r2
1793	xor r3,r23,r3
1794	sub r2,r22,r2
1795	sub r3,r23,r3
1796	beq/u r22,r23,tr0
1797	ptabs r18,tr1
1798	blink tr0,r18
1799	sub r63,r2,r2
1800	blink tr1,r63
1801	ENDFUNC(GLOBAL(divdi3))
1802#endif /* __SHMEDIA__ */
1803#endif /* L_divdi3 */
1804
1805#ifdef L_umoddi3
1806#ifdef __SHMEDIA__
1807	.mode	SHmedia
1808	.section	.text..SHmedia32,"ax"
1809	.align	2
1810	.global	GLOBAL(umoddi3)
1811	FUNC(GLOBAL(umoddi3))
1812GLOBAL(umoddi3):
1813	HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1814	shlri r3,1,r4
1815	nsb r4,r22
1816	shlld r3,r22,r6
1817	shlri r6,49,r5
1818	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1819	sub r21,r5,r1
1820	mmulfx.w r1,r1,r4
1821	mshflo.w r1,r63,r1
1822	sub r63,r22,r20 // r63 == 64 % 64
1823	mmulfx.w r5,r4,r4
1824	pta LOCAL(large_divisor),tr0
1825	addi r20,32,r9
1826	msub.w r1,r4,r1
1827	madd.w r1,r1,r1
1828	mmulfx.w r1,r1,r4
1829	shlri r6,32,r7
1830	bgt/u r9,r63,tr0 // large_divisor
1831	mmulfx.w r5,r4,r4
1832	shlri r2,32+14,r19
1833	addi r22,-31,r0
1834	msub.w r1,r4,r1
1835
1836	mulu.l r1,r7,r4
1837	addi r1,-3,r5
1838	mulu.l r5,r19,r5
1839	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1840	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1841	                 the case may be, %0000000000000000 000.11111111111, still */
1842	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1843	mulu.l r5,r3,r5
1844	mshalds.l r1,r21,r1
1845	shari r4,26,r4
1846	shlld r5,r0,r5
1847	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1848	sub r2,r5,r2
1849	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1850
1851	shlri r2,22,r21
1852	mulu.l r21,r1,r21
1853	addi r20,30-22,r0
1854	/* bubble */ /* could test r3 here to check for divide by zero.  */
1855	shlrd r21,r0,r21
1856	mulu.l r21,r3,r5
1857	mcmpgt.l r21,r63,r21 // See Note 1
1858	addi r20,30,r0
1859	mshfhi.l r63,r21,r21
1860	sub r2,r5,r2
1861	andc r2,r21,r2
1862
1863	/* small divisor: need a third divide step */
1864	mulu.l r2,r1,r7
1865	ptabs r18,tr0
1866	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1867	shlrd r7,r0,r7
1868	mulu.l r7,r3,r5
1869	/* bubble */
1870	addi r8,1,r7
1871	cmpgt r7,r5,r7
1872	cmvne r7,r8,r2
1873	sub r2,r5,r2
1874	blink tr0,r63
1875
1876LOCAL(large_divisor):
1877	mmulfx.w r5,r4,r4
1878	shlrd r2,r9,r25
1879	shlri r25,32,r8
1880	msub.w r1,r4,r1
1881
1882	mulu.l r1,r7,r4
1883	addi r1,-3,r5
1884	mulu.l r5,r8,r5
1885	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1886	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1887	                 the case may be, %0000000000000000 000.11111111111, still */
1888	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1889	shlri r5,14-1,r8
1890	mulu.l r8,r7,r5
1891	mshalds.l r1,r21,r1
1892	shari r4,26,r4
1893	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1894	sub r25,r5,r25
1895	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1896
1897	shlri r25,22,r21
1898	mulu.l r21,r1,r21
1899	pta LOCAL(no_lo_adj),tr0
1900	addi r22,32,r0
1901	shlri r21,40,r21
1902	mulu.l r21,r7,r5
1903	add r8,r21,r8
1904	shlld r2,r0,r2
1905	sub r25,r5,r25
1906	bgtu/u r7,r25,tr0 // no_lo_adj
1907	addi r8,1,r8
1908	sub r25,r7,r25
1909LOCAL(no_lo_adj):
1910	mextr4 r2,r25,r2
1911
1912	/* large_divisor: only needs a few adjustments.  */
1913	mulu.l r8,r6,r5
1914	ptabs r18,tr0
1915	add r2,r6,r7
1916	cmpgtu r5,r2,r8
1917	cmvne r8,r7,r2
1918	sub r2,r5,r2
1919	shlrd r2,r22,r2
1920	blink tr0,r63
1921	ENDFUNC(GLOBAL(umoddi3))
1922/* Note 1: To shift the result of the second divide stage so that the result
1923   always fits into 32 bits, yet we still reduce the rest sufficiently
1924   would require a lot of instructions to do the shifts just right.  Using
1925   the full 64 bit shift result to multiply with the divisor would require
1926   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1927   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1928   know that the rest after taking this partial result into account will
1929   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1930   upper 32 bits of the partial result are nonzero.  */
1931#endif /* __SHMEDIA__ */
1932#endif /* L_umoddi3 */
1933
1934#ifdef L_moddi3
1935#ifdef __SHMEDIA__
1936	.mode	SHmedia
1937	.section	.text..SHmedia32,"ax"
1938	.align	2
1939	.global	GLOBAL(moddi3)
1940	FUNC(GLOBAL(moddi3))
1941GLOBAL(moddi3):
1942	pta GLOBAL(umoddi3_internal),tr0
1943	shari r2,63,r22
1944	shari r3,63,r23
1945	xor r2,r22,r2
1946	xor r3,r23,r3
1947	sub r2,r22,r2
1948	sub r3,r23,r3
1949	beq/u r22,r63,tr0
1950	ptabs r18,tr1
1951	blink tr0,r18
1952	sub r63,r2,r2
1953	blink tr1,r63
1954	ENDFUNC(GLOBAL(moddi3))
1955#endif /* __SHMEDIA__ */
1956#endif /* L_moddi3 */
1957
1958#ifdef L_set_fpscr
1959#if !defined (__SH2A_NOFPU__)
1960#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1961#ifdef __SH5__
1962	.mode	SHcompact
1963#endif
1964	.global GLOBAL(set_fpscr)
1965	HIDDEN_FUNC(GLOBAL(set_fpscr))
1966GLOBAL(set_fpscr):
1967	lds r4,fpscr
1968#ifdef __PIC__
1969	mov.l	r12,@-r15
1970	mova	LOCAL(set_fpscr_L0),r0
1971	mov.l	LOCAL(set_fpscr_L0),r12
1972	add	r0,r12
1973	mov.l	LOCAL(set_fpscr_L1),r0
1974	mov.l	@(r0,r12),r1
1975	mov.l	@r15+,r12
1976#else
1977	mov.l LOCAL(set_fpscr_L1),r1
1978#endif
1979	swap.w r4,r0
1980	or #24,r0
1981#ifndef FMOVD_WORKS
1982	xor #16,r0
1983#endif
1984#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1985	swap.w r0,r3
1986	mov.l r3,@(4,r1)
1987#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1988	swap.w r0,r2
1989	mov.l r2,@r1
1990#endif
1991#ifndef FMOVD_WORKS
1992	xor #8,r0
1993#else
1994	xor #24,r0
1995#endif
1996#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1997	swap.w r0,r2
1998	rts
1999	mov.l r2,@r1
2000#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2001	swap.w r0,r3
2002	rts
2003	mov.l r3,@(4,r1)
2004#endif
2005	.align 2
2006#ifdef __PIC__
2007LOCAL(set_fpscr_L0):
2008	.long _GLOBAL_OFFSET_TABLE_
2009LOCAL(set_fpscr_L1):
2010	.long GLOBAL(fpscr_values@GOT)
2011#else
2012LOCAL(set_fpscr_L1):
2013	.long GLOBAL(fpscr_values)
2014#endif
2015
2016	ENDFUNC(GLOBAL(set_fpscr))
2017#ifndef NO_FPSCR_VALUES
2018#ifdef __ELF__
2019        .comm   GLOBAL(fpscr_values),8,4
2020#else
2021        .comm   GLOBAL(fpscr_values),8
2022#endif /* ELF */
2023#endif /* NO_FPSCR_VALUES */
2024#endif /* SH2E / SH3E / SH4 */
2025#endif /* __SH2A_NOFPU__ */
2026#endif /* L_set_fpscr */
2027#ifdef L_ic_invalidate
2028#if __SH5__ == 32
2029	.mode	SHmedia
2030	.section	.text..SHmedia32,"ax"
2031	.align	2
2032	.global	GLOBAL(init_trampoline)
2033	HIDDEN_FUNC(GLOBAL(init_trampoline))
2034GLOBAL(init_trampoline):
2035	st.l	r0,8,r2
2036#ifdef __LITTLE_ENDIAN__
2037	movi	9,r20
2038	shori	0x402b,r20
2039	shori	0xd101,r20
2040	shori	0xd002,r20
2041#else
2042	movi	0xffffffffffffd002,r20
2043	shori	0xd101,r20
2044	shori	0x402b,r20
2045	shori	9,r20
2046#endif
2047	st.q	r0,0,r20
2048	st.l	r0,12,r3
2049	ENDFUNC(GLOBAL(init_trampoline))
2050	.global	GLOBAL(ic_invalidate)
2051	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2052GLOBAL(ic_invalidate):
2053	ocbwb	r0,0
2054	synco
2055	icbi	r0, 0
2056	ptabs	r18, tr0
2057	synci
2058	blink	tr0, r63
2059	ENDFUNC(GLOBAL(ic_invalidate))
2060#elif defined(__SH4A__)
2061	.global GLOBAL(ic_invalidate)
2062	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2063GLOBAL(ic_invalidate):
2064	ocbwb	@r4
2065	synco
2066	rts
2067	icbi	@r4
2068	ENDFUNC(GLOBAL(ic_invalidate))
2069#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2070	/* For system code, we use ic_invalidate_line_i, but user code
2071	   needs a different mechanism.  A kernel call is generally not
2072	   available, and it would also be slow.  Different SH4 variants use
2073	   different sizes and associativities of the Icache.  We use a small
2074	   bit of dispatch code that can be put hidden in every shared object,
2075	   which calls the actual processor-specific invalidation code in a
2076	   separate module.
2077	   Or if you have operating system support, the OS could mmap the
2078	   procesor-specific code from a single page, since it is highly
2079	   repetitive.  */
2080	.global GLOBAL(ic_invalidate)
2081	HIDDEN_FUNC(GLOBAL(ic_invalidate))
2082GLOBAL(ic_invalidate):
2083	mov.l	0f,r1
2084#ifdef __pic__
2085	mova	0f,r0
2086	mov.l	1f,r2
2087	add	r1,r0
2088	mov.l	@(r0,r2),r1
2089#endif
2090	ocbwb	@r4
2091	mov.l	@(8,r1),r0
2092	sub	r1,r4
2093	and	r4,r0
2094	add	r1,r0
2095	jmp	@r0
2096	mov.l	@(4,r1),r0
2097#ifndef __pic__
20980:	.long   GLOBAL(ic_invalidate_array)
2099#else /* __pic__ */
2100	.global GLOBAL(ic_invalidate_array)
2101	/* ??? Why won't the assembler allow to add these two constants?  */
21020:	.long   _GLOBAL_OFFSET_TABLE_
21031:	.long   GLOBAL(ic_invalidate_array)@GOT
2104	ENDFUNC(GLOBAL(ic_invalidate))
2105#endif /* __pic__ */
2106#endif /* SH4 */
2107#endif /* L_ic_invalidate */
2108
2109#ifdef L_ic_invalidate_array
2110#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2111	.global GLOBAL(ic_invalidate_array)
2112	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
2113	.global GLOBAL(ic_invalidate_array)
2114	FUNC(GLOBAL(ic_invalidate_array))
2115GLOBAL(ic_invalidate_array):
2116	add	r1,r4
2117	synco
2118	rts
2119	icbi	@r4
2120	.long	0
2121	ENDFUNC(GLOBAL(ic_invalidate_array))
2122#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2123	.global GLOBAL(ic_invalidate_array)
2124	.p2align 5
2125	FUNC(GLOBAL(ic_invalidate_array))
2126/* This must be aligned to the beginning of a cache line.  */
2127GLOBAL(ic_invalidate_array):
2128#ifndef WAYS
2129#define WAYS 4
2130#define WAY_SIZE 0x4000
2131#endif
2132#if WAYS == 1
2133	.rept	WAY_SIZE * WAYS / 32
2134	rts
2135	nop
2136	.rept	7
2137	.long	WAY_SIZE - 32
2138	.endr
2139	.endr
2140#elif WAYS <= 6
2141	.rept	WAY_SIZE * WAYS / 32
2142	braf	r0
2143	add	#-8,r0
2144	.long	WAY_SIZE + 8
2145	.long	WAY_SIZE - 32
2146	.rept	WAYS-2
2147	braf	r0
2148	nop
2149	.endr
2150	.rept	7 - WAYS
2151	rts
2152	nop
2153	.endr
2154	.endr
2155#else /* WAYS > 6 */
2156	/* This variant needs two different pages for mmap-ing.  */
2157 	.rept	WAYS-1
2158	.rept	WAY_SIZE / 32
2159	braf	r0
2160	nop
2161	.long	WAY_SIZE
2162	.rept 6
2163	.long	WAY_SIZE - 32
2164	.endr
2165	.endr
2166	.endr
2167	.rept	WAY_SIZE / 32
2168	rts
2169	.rept	15
2170	nop
2171	.endr
2172	.endr
2173#endif /* WAYS */
2174	ENDFUNC(GLOBAL(ic_invalidate_array))
2175#endif /* SH4 */
2176#endif /* L_ic_invalidate_array */
2177
2178#if defined (__SH5__) && __SH5__ == 32
2179#ifdef L_shcompact_call_trampoline
2180	.section	.rodata
2181	.align	1
2182LOCAL(ct_main_table):
2183.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2184.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2185.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2186.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2187.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2188.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2189.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2190.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2191.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2192.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2193.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2194.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2195.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2196.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2197.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2198.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2199.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2200.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2201.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2202.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2203.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2204.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2205.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2206.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2207.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2208.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2209.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2210.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2211.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2212.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2213.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2214.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2215.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2216	.mode	SHmedia
2217	.section	.text..SHmedia32, "ax"
2218	.align	2
2219
2220     /* This function loads 64-bit general-purpose registers from the
2221	stack, from a memory address contained in them or from an FP
2222	register, according to a cookie passed in r1.  Its execution
2223	time is linear on the number of registers that actually have
2224	to be copied.  See sh.h for details on the actual bit pattern.
2225
2226	The function to be called is passed in r0.  If a 32-bit return
2227	value is expected, the actual function will be tail-called,
2228	otherwise the return address will be stored in r10 (that the
2229	caller should expect to be clobbered) and the return value
2230	will be expanded into r2/r3 upon return.  */
2231
2232	.global	GLOBAL(GCC_shcompact_call_trampoline)
2233	FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2234GLOBAL(GCC_shcompact_call_trampoline):
2235	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
2236	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2237	pt/l	LOCAL(ct_loop), tr1
2238	addz.l	r1, r63, r1
2239	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2240LOCAL(ct_loop):
2241	nsb	r1, r28
2242	shlli	r28, 1, r29
2243	ldx.w	r0, r29, r30
2244LOCAL(ct_main_label):
2245	ptrel/l	r30, tr2
2246	blink	tr2, r63
2247LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
2248	/* It must be dr0, so just do it.  */
2249	fmov.dq	dr0, r2
2250	movi	7, r30
2251	shlli	r30, 29, r31
2252	andc	r1, r31, r1
2253	blink	tr1, r63
2254LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
2255	/* It is either dr0 or dr2.  */
2256	movi	7, r30
2257	shlri	r1, 26, r32
2258	shlli	r30, 26, r31
2259	andc	r1, r31, r1
2260	fmov.dq	dr0, r3
2261	beqi/l	r32, 4, tr1
2262	fmov.dq	dr2, r3
2263	blink	tr1, r63
2264LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
2265	shlri	r1, 23 - 3, r34
2266	andi	r34, 3 << 3, r33
2267	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2268LOCAL(ct_r4_fp_base):
2269	ptrel/l	r32, tr2
2270	movi	7, r30
2271	shlli	r30, 23, r31
2272	andc	r1, r31, r1
2273	blink	tr2, r63
2274LOCAL(ct_r4_fp_copy):
2275	fmov.dq	dr0, r4
2276	blink	tr1, r63
2277	fmov.dq	dr2, r4
2278	blink	tr1, r63
2279	fmov.dq	dr4, r4
2280	blink	tr1, r63
2281LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
2282	shlri	r1, 20 - 3, r34
2283	andi	r34, 3 << 3, r33
2284	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2285LOCAL(ct_r5_fp_base):
2286	ptrel/l	r32, tr2
2287	movi	7, r30
2288	shlli	r30, 20, r31
2289	andc	r1, r31, r1
2290	blink	tr2, r63
2291LOCAL(ct_r5_fp_copy):
2292	fmov.dq	dr0, r5
2293	blink	tr1, r63
2294	fmov.dq	dr2, r5
2295	blink	tr1, r63
2296	fmov.dq	dr4, r5
2297	blink	tr1, r63
2298	fmov.dq	dr6, r5
2299	blink	tr1, r63
2300LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
2301	/* It must be dr8.  */
2302	fmov.dq	dr8, r6
2303	movi	15, r30
2304	shlli	r30, 16, r31
2305	andc	r1, r31, r1
2306	blink	tr1, r63
2307LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
2308	shlri	r1, 16 - 3, r34
2309	andi	r34, 3 << 3, r33
2310	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2311LOCAL(ct_r6_fp_base):
2312	ptrel/l	r32, tr2
2313	movi	7, r30
2314	shlli	r30, 16, r31
2315	andc	r1, r31, r1
2316	blink	tr2, r63
2317LOCAL(ct_r6_fp_copy):
2318	fmov.dq	dr0, r6
2319	blink	tr1, r63
2320	fmov.dq	dr2, r6
2321	blink	tr1, r63
2322	fmov.dq	dr4, r6
2323	blink	tr1, r63
2324	fmov.dq	dr6, r6
2325	blink	tr1, r63
2326LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
2327	/* It is either dr8 or dr10.  */
2328	movi	15 << 12, r31
2329	shlri	r1, 12, r32
2330	andc	r1, r31, r1
2331	fmov.dq	dr8, r7
2332	beqi/l	r32, 8, tr1
2333	fmov.dq	dr10, r7
2334	blink	tr1, r63
2335LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
2336	shlri	r1, 12 - 3, r34
2337	andi	r34, 3 << 3, r33
2338	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2339LOCAL(ct_r7_fp_base):
2340	ptrel/l	r32, tr2
2341	movi	7 << 12, r31
2342	andc	r1, r31, r1
2343	blink	tr2, r63
2344LOCAL(ct_r7_fp_copy):
2345	fmov.dq	dr0, r7
2346	blink	tr1, r63
2347	fmov.dq	dr2, r7
2348	blink	tr1, r63
2349	fmov.dq	dr4, r7
2350	blink	tr1, r63
2351	fmov.dq	dr6, r7
2352	blink	tr1, r63
2353LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
2354	/* It is either dr8 or dr10.  */
2355	movi	15 << 8, r31
2356	andi	r1, 1 << 8, r32
2357	andc	r1, r31, r1
2358	fmov.dq	dr8, r8
2359	beq/l	r32, r63, tr1
2360	fmov.dq	dr10, r8
2361	blink	tr1, r63
2362LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
2363	shlri	r1, 8 - 3, r34
2364	andi	r34, 3 << 3, r33
2365	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2366LOCAL(ct_r8_fp_base):
2367	ptrel/l	r32, tr2
2368	movi	7 << 8, r31
2369	andc	r1, r31, r1
2370	blink	tr2, r63
2371LOCAL(ct_r8_fp_copy):
2372	fmov.dq	dr0, r8
2373	blink	tr1, r63
2374	fmov.dq	dr2, r8
2375	blink	tr1, r63
2376	fmov.dq	dr4, r8
2377	blink	tr1, r63
2378	fmov.dq	dr6, r8
2379	blink	tr1, r63
2380LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
2381	/* It is either dr8 or dr10.  */
2382	movi	15 << 4, r31
2383	andi	r1, 1 << 4, r32
2384	andc	r1, r31, r1
2385	fmov.dq	dr8, r9
2386	beq/l	r32, r63, tr1
2387	fmov.dq	dr10, r9
2388	blink	tr1, r63
2389LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
2390	shlri	r1, 4 - 3, r34
2391	andi	r34, 3 << 3, r33
2392	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2393LOCAL(ct_r9_fp_base):
2394	ptrel/l	r32, tr2
2395	movi	7 << 4, r31
2396	andc	r1, r31, r1
2397	blink	tr2, r63
2398LOCAL(ct_r9_fp_copy):
2399	fmov.dq	dr0, r9
2400	blink	tr1, r63
2401	fmov.dq	dr2, r9
2402	blink	tr1, r63
2403	fmov.dq	dr4, r9
2404	blink	tr1, r63
2405	fmov.dq	dr6, r9
2406	blink	tr1, r63
2407LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
2408	pt/l	LOCAL(ct_r2_load), tr2
2409	movi	3, r30
2410	shlli	r30, 29, r31
2411	and	r1, r31, r32
2412	andc	r1, r31, r1
2413	beq/l	r31, r32, tr2
2414	addi.l	r2, 8, r3
2415	ldx.q	r2, r63, r2
2416	/* Fall through.  */
2417LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
2418	pt/l	LOCAL(ct_r3_load), tr2
2419	movi	3, r30
2420	shlli	r30, 26, r31
2421	and	r1, r31, r32
2422	andc	r1, r31, r1
2423	beq/l	r31, r32, tr2
2424	addi.l	r3, 8, r4
2425	ldx.q	r3, r63, r3
2426LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
2427	pt/l	LOCAL(ct_r4_load), tr2
2428	movi	3, r30
2429	shlli	r30, 23, r31
2430	and	r1, r31, r32
2431	andc	r1, r31, r1
2432	beq/l	r31, r32, tr2
2433	addi.l	r4, 8, r5
2434	ldx.q	r4, r63, r4
2435LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
2436	pt/l	LOCAL(ct_r5_load), tr2
2437	movi	3, r30
2438	shlli	r30, 20, r31
2439	and	r1, r31, r32
2440	andc	r1, r31, r1
2441	beq/l	r31, r32, tr2
2442	addi.l	r5, 8, r6
2443	ldx.q	r5, r63, r5
2444LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
2445	pt/l	LOCAL(ct_r6_load), tr2
2446	movi	3 << 16, r31
2447	and	r1, r31, r32
2448	andc	r1, r31, r1
2449	beq/l	r31, r32, tr2
2450	addi.l	r6, 8, r7
2451	ldx.q	r6, r63, r6
2452LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
2453	pt/l	LOCAL(ct_r7_load), tr2
2454	movi	3 << 12, r31
2455	and	r1, r31, r32
2456	andc	r1, r31, r1
2457	beq/l	r31, r32, tr2
2458	addi.l	r7, 8, r8
2459	ldx.q	r7, r63, r7
2460LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
2461	pt/l	LOCAL(ct_r8_load), tr2
2462	movi	3 << 8, r31
2463	and	r1, r31, r32
2464	andc	r1, r31, r1
2465	beq/l	r31, r32, tr2
2466	addi.l	r8, 8, r9
2467	ldx.q	r8, r63, r8
2468LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
2469	pt/l	LOCAL(ct_check_tramp), tr2
2470	ldx.q	r9, r63, r9
2471	blink	tr2, r63
2472LOCAL(ct_r2_load):
2473	ldx.q	r2, r63, r2
2474	blink	tr1, r63
2475LOCAL(ct_r3_load):
2476	ldx.q	r3, r63, r3
2477	blink	tr1, r63
2478LOCAL(ct_r4_load):
2479	ldx.q	r4, r63, r4
2480	blink	tr1, r63
2481LOCAL(ct_r5_load):
2482	ldx.q	r5, r63, r5
2483	blink	tr1, r63
2484LOCAL(ct_r6_load):
2485	ldx.q	r6, r63, r6
2486	blink	tr1, r63
2487LOCAL(ct_r7_load):
2488	ldx.q	r7, r63, r7
2489	blink	tr1, r63
2490LOCAL(ct_r8_load):
2491	ldx.q	r8, r63, r8
2492	blink	tr1, r63
2493LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
2494	movi	1, r30
2495	ldx.q	r15, r63, r2
2496	shlli	r30, 29, r31
2497	addi.l	r15, 8, r15
2498	andc	r1, r31, r1
2499	blink	tr1, r63
2500LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
2501	movi	1, r30
2502	ldx.q	r15, r63, r3
2503	shlli	r30, 26, r31
2504	addi.l	r15, 8, r15
2505	andc	r1, r31, r1
2506	blink	tr1, r63
2507LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
2508	movi	1, r30
2509	ldx.q	r15, r63, r4
2510	shlli	r30, 23, r31
2511	addi.l	r15, 8, r15
2512	andc	r1, r31, r1
2513	blink	tr1, r63
2514LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
2515	movi	1, r30
2516	ldx.q	r15, r63, r5
2517	shlli	r30, 20, r31
2518	addi.l	r15, 8, r15
2519	andc	r1, r31, r1
2520	blink	tr1, r63
2521LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
2522	movi	1, r30
2523	ldx.q	r15, r63, r6
2524	shlli	r30, 16, r31
2525	addi.l	r15, 8, r15
2526	andc	r1, r31, r1
2527	blink	tr1, r63
2528LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
2529	ldx.q	r15, r63, r7
2530	movi	1 << 12, r31
2531	addi.l	r15, 8, r15
2532	andc	r1, r31, r1
2533	blink	tr1, r63
2534LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
2535	ldx.q	r15, r63, r8
2536	movi	1 << 8, r31
2537	addi.l	r15, 8, r15
2538	andc	r1, r31, r1
2539	blink	tr1, r63
2540LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
2541	andi	r1, 7 << 1, r30
2542	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2543	shlli	r30, 2, r31
2544	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
2545	sub.l	r32, r31, r33
2546	ptabs/l	r33, tr2
2547	blink	tr2, r63
2548LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
2549	ldx.q	r15, r63, r3
2550	addi.l	r15, 8, r15
2551	ldx.q	r15, r63, r4
2552	addi.l	r15, 8, r15
2553	ldx.q	r15, r63, r5
2554	addi.l	r15, 8, r15
2555	ldx.q	r15, r63, r6
2556	addi.l	r15, 8, r15
2557	ldx.q	r15, r63, r7
2558	addi.l	r15, 8, r15
2559	ldx.q	r15, r63, r8
2560	addi.l	r15, 8, r15
2561LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
2562	ldx.q	r15, r63, r9
2563	addi.l	r15, 8, r15
2564LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
2565LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
2566	pt/u	LOCAL(ct_ret_wide), tr2
2567	andi	r1, 1, r1
2568	bne/u	r1, r63, tr2
2569LOCAL(ct_call_func):	/* Just branch to the function.  */
2570	blink	tr0, r63
2571LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its
2572			   64-bit return value.  */
2573	add.l	r18, r63, r10
2574	blink	tr0, r18
2575	ptabs	r10, tr0
2576#if __LITTLE_ENDIAN__
2577	shari	r2, 32, r3
2578	add.l	r2, r63, r2
2579#else
2580	add.l	r2, r63, r3
2581	shari	r2, 32, r2
2582#endif
2583	blink	tr0, r63
2584
2585	ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2586#endif /* L_shcompact_call_trampoline */
2587
2588#ifdef L_shcompact_return_trampoline
2589     /* This function does the converse of the code in `ret_wide'
2590	above.  It is tail-called by SHcompact functions returning
2591	64-bit non-floating-point values, to pack the 32-bit values in
2592	r2 and r3 into r2.  */
2593
2594	.mode	SHmedia
2595	.section	.text..SHmedia32, "ax"
2596	.align	2
2597	.global	GLOBAL(GCC_shcompact_return_trampoline)
2598	HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2599GLOBAL(GCC_shcompact_return_trampoline):
2600	ptabs/l	r18, tr0
2601#if __LITTLE_ENDIAN__
2602	addz.l	r2, r63, r2
2603	shlli	r3, 32, r3
2604#else
2605	addz.l	r3, r63, r3
2606	shlli	r2, 32, r2
2607#endif
2608	or	r3, r2, r2
2609	blink	tr0, r63
2610
2611	ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2612#endif /* L_shcompact_return_trampoline */
2613
2614#ifdef L_shcompact_incoming_args
2615	.section	.rodata
2616	.align	1
2617LOCAL(ia_main_table):
2618.word	1 /* Invalid, just loop */
2619.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2620.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2621.word	1 /* Invalid, just loop */
2622.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2623.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2624.word	1 /* Invalid, just loop */
2625.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2626.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2627.word	1 /* Invalid, just loop */
2628.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2629.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2630.word	1 /* Invalid, just loop */
2631.word	1 /* Invalid, just loop */
2632.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2633.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2634.word	1 /* Invalid, just loop */
2635.word	1 /* Invalid, just loop */
2636.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2637.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2638.word	1 /* Invalid, just loop */
2639.word	1 /* Invalid, just loop */
2640.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2641.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2642.word	1 /* Invalid, just loop */
2643.word	1 /* Invalid, just loop */
2644.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2645.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2646.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2647.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2648.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2649.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2650.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2651	.mode	SHmedia
2652	.section	.text..SHmedia32, "ax"
2653	.align	2
2654
2655     /* This function stores 64-bit general-purpose registers back in
2656	the stack, and loads the address in which each register
2657	was stored into itself.  The lower 32 bits of r17 hold the address
2658	to begin storing, and the upper 32 bits of r17 hold the cookie.
2659	Its execution time is linear on the
2660	number of registers that actually have to be copied, and it is
2661	optimized for structures larger than 64 bits, as opposed to
2662	individual `long long' arguments.  See sh.h for details on the
2663	actual bit pattern.  */
2664
2665	.global	GLOBAL(GCC_shcompact_incoming_args)
2666 	FUNC(GLOBAL(GCC_shcompact_incoming_args))
2667GLOBAL(GCC_shcompact_incoming_args):
2668	ptabs/l	r18, tr0	/* Prepare to return.  */
2669	shlri	r17, 32, r0	/* Load the cookie.  */
2670	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2671	pt/l	LOCAL(ia_loop), tr1
2672	add.l	r17, r63, r17
2673	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2674LOCAL(ia_loop):
2675	nsb	r0, r36
2676	shlli	r36, 1, r37
2677	ldx.w	r43, r37, r38
2678LOCAL(ia_main_label):
2679	ptrel/l	r38, tr2
2680	blink	tr2, r63
2681LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
2682	movi	3, r38
2683	shlli	r38, 29, r39
2684	and	r0, r39, r40
2685	andc	r0, r39, r0
2686	stx.q	r17, r63, r2
2687	add.l	r17, r63, r2
2688	addi.l	r17, 8, r17
2689	beq/u	r39, r40, tr1
2690LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
2691	movi	3, r38
2692	shlli	r38, 26, r39
2693	and	r0, r39, r40
2694	andc	r0, r39, r0
2695	stx.q	r17, r63, r3
2696	add.l	r17, r63, r3
2697	addi.l	r17, 8, r17
2698	beq/u	r39, r40, tr1
2699LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
2700	movi	3, r38
2701	shlli	r38, 23, r39
2702	and	r0, r39, r40
2703	andc	r0, r39, r0
2704	stx.q	r17, r63, r4
2705	add.l	r17, r63, r4
2706	addi.l	r17, 8, r17
2707	beq/u	r39, r40, tr1
2708LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
2709	movi	3, r38
2710	shlli	r38, 20, r39
2711	and	r0, r39, r40
2712	andc	r0, r39, r0
2713	stx.q	r17, r63, r5
2714	add.l	r17, r63, r5
2715	addi.l	r17, 8, r17
2716	beq/u	r39, r40, tr1
2717LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
2718	movi	3, r38
2719	shlli	r38, 16, r39
2720	and	r0, r39, r40
2721	andc	r0, r39, r0
2722	stx.q	r17, r63, r6
2723	add.l	r17, r63, r6
2724	addi.l	r17, 8, r17
2725	beq/u	r39, r40, tr1
2726LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
2727	movi	3 << 12, r39
2728	and	r0, r39, r40
2729	andc	r0, r39, r0
2730	stx.q	r17, r63, r7
2731	add.l	r17, r63, r7
2732	addi.l	r17, 8, r17
2733	beq/u	r39, r40, tr1
2734LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
2735	movi	3 << 8, r39
2736	and	r0, r39, r40
2737	andc	r0, r39, r0
2738	stx.q	r17, r63, r8
2739	add.l	r17, r63, r8
2740	addi.l	r17, 8, r17
2741	beq/u	r39, r40, tr1
2742LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
2743	stx.q	r17, r63, r9
2744	add.l	r17, r63, r9
2745	blink	tr0, r63
2746LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
2747	movi	1, r38
2748	shlli	r38, 29, r39
2749	andc	r0, r39, r0
2750	stx.q	r17, r63, r2
2751	addi.l	r17, 8, r17
2752	blink	tr1, r63
2753LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
2754	movi	1, r38
2755	shlli	r38, 26, r39
2756	andc	r0, r39, r0
2757	stx.q	r17, r63, r3
2758	addi.l	r17, 8, r17
2759	blink	tr1, r63
2760LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
2761	movi	1, r38
2762	shlli	r38, 23, r39
2763	andc	r0, r39, r0
2764	stx.q	r17, r63, r4
2765	addi.l	r17, 8, r17
2766	blink	tr1, r63
2767LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
2768	movi	1, r38
2769	shlli	r38, 20, r39
2770	andc	r0, r39, r0
2771	stx.q	r17, r63, r5
2772	addi.l	r17, 8, r17
2773	blink	tr1, r63
2774LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
2775	movi	1, r38
2776	shlli	r38, 16, r39
2777	andc	r0, r39, r0
2778	stx.q	r17, r63, r6
2779	addi.l	r17, 8, r17
2780	blink	tr1, r63
2781LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
2782	movi	1 << 12, r39
2783	andc	r0, r39, r0
2784	stx.q	r17, r63, r7
2785	addi.l	r17, 8, r17
2786	blink	tr1, r63
2787LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
2788	movi	1 << 8, r39
2789	andc	r0, r39, r0
2790	stx.q	r17, r63, r8
2791	addi.l	r17, 8, r17
2792	blink	tr1, r63
2793LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
2794	andi	r0, 7 << 1, r38
2795	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2796	shlli	r38, 2, r39
2797	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
2798	sub.l	r40, r39, r41
2799	ptabs/l	r41, tr2
2800	blink	tr2, r63
2801LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
2802	stx.q	r17, r63, r3
2803	addi.l	r17, 8, r17
2804	stx.q	r17, r63, r4
2805	addi.l	r17, 8, r17
2806	stx.q	r17, r63, r5
2807	addi.l	r17, 8, r17
2808	stx.q	r17, r63, r6
2809	addi.l	r17, 8, r17
2810	stx.q	r17, r63, r7
2811	addi.l	r17, 8, r17
2812	stx.q	r17, r63, r8
2813	addi.l	r17, 8, r17
2814LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
2815	stx.q	r17, r63, r9
2816LOCAL(ia_return):	/* Return.  */
2817	blink	tr0, r63
2818LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
2819	ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2820#endif /* L_shcompact_incoming_args */
2821#endif
2822#if __SH5__
2823#ifdef L_nested_trampoline
2824#if __SH5__ == 32
2825	.section	.text..SHmedia32,"ax"
2826#else
2827	.text
2828#endif
2829	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
2830	.global	GLOBAL(GCC_nested_trampoline)
2831	HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2832GLOBAL(GCC_nested_trampoline):
2833	.mode	SHmedia
2834	ptrel/u	r63, tr0
2835	gettr	tr0, r0
2836#if __SH5__ == 64
2837	ld.q	r0, 24, r1
2838#else
2839	ld.l	r0, 24, r1
2840#endif
2841	ptabs/l	r1, tr1
2842#if __SH5__ == 64
2843	ld.q	r0, 32, r1
2844#else
2845	ld.l	r0, 28, r1
2846#endif
2847	blink	tr1, r63
2848
2849	ENDFUNC(GLOBAL(GCC_nested_trampoline))
2850#endif /* L_nested_trampoline */
2851#endif /* __SH5__ */
2852#if __SH5__ == 32
2853#ifdef L_push_pop_shmedia_regs
2854	.section	.text..SHmedia32,"ax"
2855	.mode	SHmedia
2856	.align	2
2857#ifndef __SH4_NOFPU__
2858	.global	GLOBAL(GCC_push_shmedia_regs)
2859	FUNC(GLOBAL(GCC_push_shmedia_regs))
2860GLOBAL(GCC_push_shmedia_regs):
2861	addi.l	r15, -14*8, r15
2862	fst.d	r15, 13*8, dr62
2863	fst.d	r15, 12*8, dr60
2864	fst.d	r15, 11*8, dr58
2865	fst.d	r15, 10*8, dr56
2866	fst.d	r15,  9*8, dr54
2867	fst.d	r15,  8*8, dr52
2868	fst.d	r15,  7*8, dr50
2869	fst.d	r15,  6*8, dr48
2870	fst.d	r15,  5*8, dr46
2871	fst.d	r15,  4*8, dr44
2872	fst.d	r15,  3*8, dr42
2873	fst.d	r15,  2*8, dr40
2874	fst.d	r15,  1*8, dr38
2875	fst.d	r15,  0*8, dr36
2876#else /* ! __SH4_NOFPU__ */
2877	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
2878	FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2879GLOBAL(GCC_push_shmedia_regs_nofpu):
2880#endif /* ! __SH4_NOFPU__ */
2881	ptabs/l	r18, tr0
2882	addi.l	r15, -27*8, r15
2883	gettr	tr7, r62
2884	gettr	tr6, r61
2885	gettr	tr5, r60
2886	st.q	r15, 26*8, r62
2887	st.q	r15, 25*8, r61
2888	st.q	r15, 24*8, r60
2889	st.q	r15, 23*8, r59
2890	st.q	r15, 22*8, r58
2891	st.q	r15, 21*8, r57
2892	st.q	r15, 20*8, r56
2893	st.q	r15, 19*8, r55
2894	st.q	r15, 18*8, r54
2895	st.q	r15, 17*8, r53
2896	st.q	r15, 16*8, r52
2897	st.q	r15, 15*8, r51
2898	st.q	r15, 14*8, r50
2899	st.q	r15, 13*8, r49
2900	st.q	r15, 12*8, r48
2901	st.q	r15, 11*8, r47
2902	st.q	r15, 10*8, r46
2903	st.q	r15,  9*8, r45
2904	st.q	r15,  8*8, r44
2905	st.q	r15,  7*8, r35
2906	st.q	r15,  6*8, r34
2907	st.q	r15,  5*8, r33
2908	st.q	r15,  4*8, r32
2909	st.q	r15,  3*8, r31
2910	st.q	r15,  2*8, r30
2911	st.q	r15,  1*8, r29
2912	st.q	r15,  0*8, r28
2913	blink	tr0, r63
2914#ifndef __SH4_NOFPU__
2915	ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2916#else
2917	ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2918#endif
2919#ifndef __SH4_NOFPU__
2920	.global	GLOBAL(GCC_pop_shmedia_regs)
2921	FUNC(GLOBAL(GCC_pop_shmedia_regs))
2922GLOBAL(GCC_pop_shmedia_regs):
2923	pt	.L0, tr1
2924	movi	41*8, r0
2925	fld.d	r15, 40*8, dr62
2926	fld.d	r15, 39*8, dr60
2927	fld.d	r15, 38*8, dr58
2928	fld.d	r15, 37*8, dr56
2929	fld.d	r15, 36*8, dr54
2930	fld.d	r15, 35*8, dr52
2931	fld.d	r15, 34*8, dr50
2932	fld.d	r15, 33*8, dr48
2933	fld.d	r15, 32*8, dr46
2934	fld.d	r15, 31*8, dr44
2935	fld.d	r15, 30*8, dr42
2936	fld.d	r15, 29*8, dr40
2937	fld.d	r15, 28*8, dr38
2938	fld.d	r15, 27*8, dr36
2939	blink	tr1, r63
2940#else /* ! __SH4_NOFPU__	*/
2941	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
2942	FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2943GLOBAL(GCC_pop_shmedia_regs_nofpu):
2944#endif /* ! __SH4_NOFPU__	*/
2945	movi	27*8, r0
2946.L0:
2947	ptabs	r18, tr0
2948	ld.q	r15, 26*8, r62
2949	ld.q	r15, 25*8, r61
2950	ld.q	r15, 24*8, r60
2951	ptabs	r62, tr7
2952	ptabs	r61, tr6
2953	ptabs	r60, tr5
2954	ld.q	r15, 23*8, r59
2955	ld.q	r15, 22*8, r58
2956	ld.q	r15, 21*8, r57
2957	ld.q	r15, 20*8, r56
2958	ld.q	r15, 19*8, r55
2959	ld.q	r15, 18*8, r54
2960	ld.q	r15, 17*8, r53
2961	ld.q	r15, 16*8, r52
2962	ld.q	r15, 15*8, r51
2963	ld.q	r15, 14*8, r50
2964	ld.q	r15, 13*8, r49
2965	ld.q	r15, 12*8, r48
2966	ld.q	r15, 11*8, r47
2967	ld.q	r15, 10*8, r46
2968	ld.q	r15,  9*8, r45
2969	ld.q	r15,  8*8, r44
2970	ld.q	r15,  7*8, r35
2971	ld.q	r15,  6*8, r34
2972	ld.q	r15,  5*8, r33
2973	ld.q	r15,  4*8, r32
2974	ld.q	r15,  3*8, r31
2975	ld.q	r15,  2*8, r30
2976	ld.q	r15,  1*8, r29
2977	ld.q	r15,  0*8, r28
2978	add.l	r15, r0, r15
2979	blink	tr0, r63
2980
2981#ifndef __SH4_NOFPU__
2982	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2983#else
2984	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2985#endif
2986#endif /* __SH5__ == 32 */
2987#endif /* L_push_pop_shmedia_regs */
2988
2989#if __SH5__
2990#ifdef L_div_table
2991#if defined(__pic__) && defined(__SHMEDIA__)
2992	.global	GLOBAL(sdivsi3)
2993	FUNC(GLOBAL(sdivsi3))
2994#if __SH5__ == 32
2995	.section	.text..SHmedia32,"ax"
2996#else
2997	.text
2998#endif
2999#if 0
3000/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3001   in a text section does not work (at least for shared libraries):
3002   the linker sets the LSB of the address as if this was SHmedia code.  */
3003#define TEXT_DATA_BUG
3004#endif
3005	.align	2
3006 // inputs: r4,r5
3007 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3008 // result in r0
3009 .global GLOBAL(sdivsi3)
3010GLOBAL(sdivsi3):
3011#ifdef TEXT_DATA_BUG
3012 ptb datalabel Local_div_table,tr0
3013#else
3014 ptb GLOBAL(div_table_internal),tr0
3015#endif
3016 nsb r5, r1
3017 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
3018 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
3019 /* bubble */
3020 gettr tr0,r20
3021 ldx.ub r20, r21, r19 // u0.8
3022 shari r25, 32, r25   // normalize to s2.30
3023 shlli r21, 1, r21
3024 muls.l r25, r19, r19 // s2.38
3025 ldx.w r20, r21, r21  // s2.14
3026  ptabs r18, tr0
3027 shari r19, 24, r19   // truncate to s2.14
3028 sub r21, r19, r19    // some 11 bit inverse in s1.14
3029 muls.l r19, r19, r21 // u0.28
3030  sub r63, r1, r1
3031  addi r1, 92, r1
3032 muls.l r25, r21, r18 // s2.58
3033 shlli r19, 45, r19   // multiply by two and convert to s2.58
3034  /* bubble */
3035 sub r19, r18, r18
3036 shari r18, 28, r18   // some 22 bit inverse in s1.30
3037 muls.l r18, r25, r0  // s2.60
3038  muls.l r18, r4, r25 // s32.30
3039  /* bubble */
3040 shari r0, 16, r19   // s-16.44
3041 muls.l r19, r18, r19 // s-16.74
3042  shari r25, 63, r0
3043  shari r4, 14, r18   // s19.-14
3044 shari r19, 30, r19   // s-16.44
3045 muls.l r19, r18, r19 // s15.30
3046  xor r21, r0, r21    // You could also use the constant 1 << 27.
3047  add r21, r25, r21
3048 sub r21, r19, r21
3049 shard r21, r1, r21
3050 sub r21, r0, r0
3051 blink tr0, r63
3052	ENDFUNC(GLOBAL(sdivsi3))
3053/* This table has been generated by divtab.c .
3054Defects for bias -330:
3055   Max defect: 6.081536e-07 at -1.000000e+00
3056   Min defect: 2.849516e-08 at 1.030651e+00
3057   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3058   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3059   Defect at 1: 1.238659e-07
3060   Defect at -2: 1.061708e-07 */
3061#else /* ! __pic__ || ! __SHMEDIA__ */
3062	.section	.rodata
3063#endif /* __pic__ */
3064#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3065	.balign 2
3066	.type	Local_div_table,@object
3067	.size	Local_div_table,128
3068/* negative division constants */
3069	.word	-16638
3070	.word	-17135
3071	.word	-17737
3072	.word	-18433
3073	.word	-19103
3074	.word	-19751
3075	.word	-20583
3076	.word	-21383
3077	.word	-22343
3078	.word	-23353
3079	.word	-24407
3080	.word	-25582
3081	.word	-26863
3082	.word	-28382
3083	.word	-29965
3084	.word	-31800
3085/* negative division factors */
3086	.byte	66
3087	.byte	70
3088	.byte	75
3089	.byte	81
3090	.byte	87
3091	.byte	93
3092	.byte	101
3093	.byte	109
3094	.byte	119
3095	.byte	130
3096	.byte	142
3097	.byte	156
3098	.byte	172
3099	.byte	192
3100	.byte	214
3101	.byte	241
3102	.skip 16
3103Local_div_table:
3104	.skip 16
3105/* positive division factors */
3106	.byte	241
3107	.byte	214
3108	.byte	192
3109	.byte	172
3110	.byte	156
3111	.byte	142
3112	.byte	130
3113	.byte	119
3114	.byte	109
3115	.byte	101
3116	.byte	93
3117	.byte	87
3118	.byte	81
3119	.byte	75
3120	.byte	70
3121	.byte	66
3122/* positive division constants */
3123	.word	31801
3124	.word	29966
3125	.word	28383
3126	.word	26864
3127	.word	25583
3128	.word	24408
3129	.word	23354
3130	.word	22344
3131	.word	21384
3132	.word	20584
3133	.word	19752
3134	.word	19104
3135	.word	18434
3136	.word	17738
3137	.word	17136
3138	.word	16639
3139	.section	.rodata
3140#endif /* TEXT_DATA_BUG */
3141	.balign 2
3142	.type	GLOBAL(div_table),@object
3143	.size	GLOBAL(div_table),128
3144/* negative division constants */
3145	.word	-16638
3146	.word	-17135
3147	.word	-17737
3148	.word	-18433
3149	.word	-19103
3150	.word	-19751
3151	.word	-20583
3152	.word	-21383
3153	.word	-22343
3154	.word	-23353
3155	.word	-24407
3156	.word	-25582
3157	.word	-26863
3158	.word	-28382
3159	.word	-29965
3160	.word	-31800
3161/* negative division factors */
3162	.byte	66
3163	.byte	70
3164	.byte	75
3165	.byte	81
3166	.byte	87
3167	.byte	93
3168	.byte	101
3169	.byte	109
3170	.byte	119
3171	.byte	130
3172	.byte	142
3173	.byte	156
3174	.byte	172
3175	.byte	192
3176	.byte	214
3177	.byte	241
3178	.skip 16
3179	.global	GLOBAL(div_table)
3180GLOBAL(div_table):
3181	HIDDEN_ALIAS(div_table_internal,div_table)
3182	.skip 16
3183/* positive division factors */
3184	.byte	241
3185	.byte	214
3186	.byte	192
3187	.byte	172
3188	.byte	156
3189	.byte	142
3190	.byte	130
3191	.byte	119
3192	.byte	109
3193	.byte	101
3194	.byte	93
3195	.byte	87
3196	.byte	81
3197	.byte	75
3198	.byte	70
3199	.byte	66
3200/* positive division constants */
3201	.word	31801
3202	.word	29966
3203	.word	28383
3204	.word	26864
3205	.word	25583
3206	.word	24408
3207	.word	23354
3208	.word	22344
3209	.word	21384
3210	.word	20584
3211	.word	19752
3212	.word	19104
3213	.word	18434
3214	.word	17738
3215	.word	17136
3216	.word	16639
3217#endif /* L_div_table */
3218#endif /* __SH5__ */
3219
3220#ifdef L_udiv_qrnnd_16
3221#if !__SHMEDIA__
3222	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3223	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3224	/* n1 < d, but n1 might be larger than d1.  */
3225	.global GLOBAL(udiv_qrnnd_16)
3226	.balign 8
3227GLOBAL(udiv_qrnnd_16):
3228	div0u
3229	cmp/hi r6,r0
3230	bt .Lots
3231	.rept 16
3232	div1 r6,r0
3233	.endr
3234	extu.w r0,r1
3235	bt 0f
3236	add r6,r0
32370:	rotcl r1
3238	mulu.w r1,r5
3239	xtrct r4,r0
3240	swap.w r0,r0
3241	sts macl,r2
3242	cmp/hs r2,r0
3243	sub r2,r0
3244	bt 0f
3245	addc r5,r0
3246	add #-1,r1
3247	bt 0f
32481:	add #-1,r1
3249	rts
3250	add r5,r0
3251	.balign 8
3252.Lots:
3253	sub r5,r0
3254	swap.w r4,r1
3255	xtrct r0,r1
3256	clrt
3257	mov r1,r0
3258	addc r5,r0
3259	mov #-1,r1
3260	SL1(bf, 1b,
3261	shlr16 r1)
32620:	rts
3263	nop
3264	ENDFUNC(GLOBAL(udiv_qrnnd_16))
3265#endif /* !__SHMEDIA__ */
3266#endif /* L_udiv_qrnnd_16 */
3267