xref: /openbsd-src/gnu/usr.bin/gcc/gcc/config/sh/lib1funcs.asm (revision 7ddfa9898c87e5b44c09446cb1ec3eb99f144f86)
1/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002
2   Free Software Foundation, Inc.
3
4This file is free software; you can redistribute it and/or modify it
5under the terms of the GNU General Public License as published by the
6Free Software Foundation; either version 2, or (at your option) any
7later version.
8
9In addition to the permissions in the GNU General Public License, the
10Free Software Foundation gives you unlimited permission to link the
11compiled version of this file into combinations with other programs,
12and to distribute those combinations without any restriction coming
13from the use of this file.  (The General Public License restrictions
14do apply in other respects; for example, they cover modification of
15the file, and distribution when not linked into a combine
16executable.)
17
18This file is distributed in the hope that it will be useful, but
19WITHOUT ANY WARRANTY; without even the implied warranty of
20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21General Public License for more details.
22
23You should have received a copy of the GNU General Public License
24along with this program; see the file COPYING.  If not, write to
25the Free Software Foundation, 59 Temple Place - Suite 330,
26Boston, MA 02111-1307, USA.  */
27
28!! libgcc routines for the Hitachi / SuperH SH CPUs.
29!! Contributed by Steve Chamberlain.
30!! sac@cygnus.com
31
32!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
33!! recoded in assembly by Toshiyasu Morita
34!! tm@netcom.com
35
36/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37   ELF local label prefixes by J"orn Rennecke
38   amylaar@cygnus.com  */
39
40#ifdef __ELF__
41#define LOCAL(X) .L_##X
42#define	FUNC(X,Y) .type X,Y; .hidden X
43#define	ENDFUNC(X) .size X,.-X
44#else
45#define LOCAL(X) L_##X
46#define	FUNC(X,Y)
47#define	ENDFUNC(X)
48#endif
49
50#define	CONCAT(A,B)	A##B
51#define	GLOBAL0(U,X)	CONCAT(U,__##X)
52#define	GLOBAL(X)	GLOBAL0(__USER_LABEL_PREFIX__,X)
53
54#if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
55#define FMOVD_WORKS
56#endif
57
58#if ! __SH5__
59#ifdef L_ashiftrt
60	.global	GLOBAL(ashiftrt_r4_0)
61	.global	GLOBAL(ashiftrt_r4_1)
62	.global	GLOBAL(ashiftrt_r4_2)
63	.global	GLOBAL(ashiftrt_r4_3)
64	.global	GLOBAL(ashiftrt_r4_4)
65	.global	GLOBAL(ashiftrt_r4_5)
66	.global	GLOBAL(ashiftrt_r4_6)
67	.global	GLOBAL(ashiftrt_r4_7)
68	.global	GLOBAL(ashiftrt_r4_8)
69	.global	GLOBAL(ashiftrt_r4_9)
70	.global	GLOBAL(ashiftrt_r4_10)
71	.global	GLOBAL(ashiftrt_r4_11)
72	.global	GLOBAL(ashiftrt_r4_12)
73	.global	GLOBAL(ashiftrt_r4_13)
74	.global	GLOBAL(ashiftrt_r4_14)
75	.global	GLOBAL(ashiftrt_r4_15)
76	.global	GLOBAL(ashiftrt_r4_16)
77	.global	GLOBAL(ashiftrt_r4_17)
78	.global	GLOBAL(ashiftrt_r4_18)
79	.global	GLOBAL(ashiftrt_r4_19)
80	.global	GLOBAL(ashiftrt_r4_20)
81	.global	GLOBAL(ashiftrt_r4_21)
82	.global	GLOBAL(ashiftrt_r4_22)
83	.global	GLOBAL(ashiftrt_r4_23)
84	.global	GLOBAL(ashiftrt_r4_24)
85	.global	GLOBAL(ashiftrt_r4_25)
86	.global	GLOBAL(ashiftrt_r4_26)
87	.global	GLOBAL(ashiftrt_r4_27)
88	.global	GLOBAL(ashiftrt_r4_28)
89	.global	GLOBAL(ashiftrt_r4_29)
90	.global	GLOBAL(ashiftrt_r4_30)
91	.global	GLOBAL(ashiftrt_r4_31)
92	.global	GLOBAL(ashiftrt_r4_32)
93
94	FUNC(GLOBAL(ashiftrt_r4_0),function)
95	FUNC(GLOBAL(ashiftrt_r4_1),function)
96	FUNC(GLOBAL(ashiftrt_r4_2),function)
97	FUNC(GLOBAL(ashiftrt_r4_3),function)
98	FUNC(GLOBAL(ashiftrt_r4_4),function)
99	FUNC(GLOBAL(ashiftrt_r4_5),function)
100	FUNC(GLOBAL(ashiftrt_r4_6),function)
101	FUNC(GLOBAL(ashiftrt_r4_7),function)
102	FUNC(GLOBAL(ashiftrt_r4_8),function)
103	FUNC(GLOBAL(ashiftrt_r4_9),function)
104	FUNC(GLOBAL(ashiftrt_r4_10),function)
105	FUNC(GLOBAL(ashiftrt_r4_11),function)
106	FUNC(GLOBAL(ashiftrt_r4_12),function)
107	FUNC(GLOBAL(ashiftrt_r4_13),function)
108	FUNC(GLOBAL(ashiftrt_r4_14),function)
109	FUNC(GLOBAL(ashiftrt_r4_15),function)
110	FUNC(GLOBAL(ashiftrt_r4_16),function)
111	FUNC(GLOBAL(ashiftrt_r4_17),function)
112	FUNC(GLOBAL(ashiftrt_r4_18),function)
113	FUNC(GLOBAL(ashiftrt_r4_19),function)
114	FUNC(GLOBAL(ashiftrt_r4_20),function)
115	FUNC(GLOBAL(ashiftrt_r4_21),function)
116	FUNC(GLOBAL(ashiftrt_r4_22),function)
117	FUNC(GLOBAL(ashiftrt_r4_23),function)
118	FUNC(GLOBAL(ashiftrt_r4_24),function)
119	FUNC(GLOBAL(ashiftrt_r4_25),function)
120	FUNC(GLOBAL(ashiftrt_r4_26),function)
121	FUNC(GLOBAL(ashiftrt_r4_27),function)
122	FUNC(GLOBAL(ashiftrt_r4_28),function)
123	FUNC(GLOBAL(ashiftrt_r4_29),function)
124	FUNC(GLOBAL(ashiftrt_r4_30),function)
125	FUNC(GLOBAL(ashiftrt_r4_31),function)
126	FUNC(GLOBAL(ashiftrt_r4_32),function)
127
128	.align	1
129GLOBAL(ashiftrt_r4_32):
130GLOBAL(ashiftrt_r4_31):
131	rotcl	r4
132	rts
133	subc	r4,r4
134
135GLOBAL(ashiftrt_r4_30):
136	shar	r4
137GLOBAL(ashiftrt_r4_29):
138	shar	r4
139GLOBAL(ashiftrt_r4_28):
140	shar	r4
141GLOBAL(ashiftrt_r4_27):
142	shar	r4
143GLOBAL(ashiftrt_r4_26):
144	shar	r4
145GLOBAL(ashiftrt_r4_25):
146	shar	r4
147GLOBAL(ashiftrt_r4_24):
148	shlr16	r4
149	shlr8	r4
150	rts
151	exts.b	r4,r4
152
153GLOBAL(ashiftrt_r4_23):
154	shar	r4
155GLOBAL(ashiftrt_r4_22):
156	shar	r4
157GLOBAL(ashiftrt_r4_21):
158	shar	r4
159GLOBAL(ashiftrt_r4_20):
160	shar	r4
161GLOBAL(ashiftrt_r4_19):
162	shar	r4
163GLOBAL(ashiftrt_r4_18):
164	shar	r4
165GLOBAL(ashiftrt_r4_17):
166	shar	r4
167GLOBAL(ashiftrt_r4_16):
168	shlr16	r4
169	rts
170	exts.w	r4,r4
171
172GLOBAL(ashiftrt_r4_15):
173	shar	r4
174GLOBAL(ashiftrt_r4_14):
175	shar	r4
176GLOBAL(ashiftrt_r4_13):
177	shar	r4
178GLOBAL(ashiftrt_r4_12):
179	shar	r4
180GLOBAL(ashiftrt_r4_11):
181	shar	r4
182GLOBAL(ashiftrt_r4_10):
183	shar	r4
184GLOBAL(ashiftrt_r4_9):
185	shar	r4
186GLOBAL(ashiftrt_r4_8):
187	shar	r4
188GLOBAL(ashiftrt_r4_7):
189	shar	r4
190GLOBAL(ashiftrt_r4_6):
191	shar	r4
192GLOBAL(ashiftrt_r4_5):
193	shar	r4
194GLOBAL(ashiftrt_r4_4):
195	shar	r4
196GLOBAL(ashiftrt_r4_3):
197	shar	r4
198GLOBAL(ashiftrt_r4_2):
199	shar	r4
200GLOBAL(ashiftrt_r4_1):
201	rts
202	shar	r4
203
204GLOBAL(ashiftrt_r4_0):
205	rts
206	nop
207#endif
208
209#ifdef L_ashiftrt_n
210
211!
212! GLOBAL(ashrsi3)
213!
214! Entry:
215!
216! r4: Value to shift
217! r5: Shifts
218!
219! Exit:
220!
221! r0: Result
222!
223! Destroys:
224!
225! (none)
226!
227
228	.global	GLOBAL(ashrsi3)
229	FUNC(GLOBAL(ashrsi3),function)
230	.align	2
231GLOBAL(ashrsi3):
232	mov	#31,r0
233	and	r0,r5
234	mova	LOCAL(ashrsi3_table),r0
235	mov.b	@(r0,r5),r5
236#ifdef __sh1__
237	add	r5,r0
238	jmp	@r0
239#else
240	braf	r5
241#endif
242	mov	r4,r0
243	ENDFUNC(GLOBAL(ashrsi3))
244
245	.align	2
246LOCAL(ashrsi3_table):
247	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
248	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
249	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
250	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
251	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
252	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
253	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
254	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
255	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
256	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
257	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
258	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
259	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
260	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
261	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
262	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
263	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
264	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
265	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
266	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
267	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
268	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
269	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
270	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
271	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
272	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
273	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
274	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
275	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
276	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
277	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
278	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
279
280LOCAL(ashrsi3_31):
281	rotcl	r0
282	rts
283	subc	r0,r0
284
285LOCAL(ashrsi3_30):
286	shar	r0
287LOCAL(ashrsi3_29):
288	shar	r0
289LOCAL(ashrsi3_28):
290	shar	r0
291LOCAL(ashrsi3_27):
292	shar	r0
293LOCAL(ashrsi3_26):
294	shar	r0
295LOCAL(ashrsi3_25):
296	shar	r0
297LOCAL(ashrsi3_24):
298	shlr16	r0
299	shlr8	r0
300	rts
301	exts.b	r0,r0
302
303LOCAL(ashrsi3_23):
304	shar	r0
305LOCAL(ashrsi3_22):
306	shar	r0
307LOCAL(ashrsi3_21):
308	shar	r0
309LOCAL(ashrsi3_20):
310	shar	r0
311LOCAL(ashrsi3_19):
312	shar	r0
313LOCAL(ashrsi3_18):
314	shar	r0
315LOCAL(ashrsi3_17):
316	shar	r0
317LOCAL(ashrsi3_16):
318	shlr16	r0
319	rts
320	exts.w	r0,r0
321
322LOCAL(ashrsi3_15):
323	shar	r0
324LOCAL(ashrsi3_14):
325	shar	r0
326LOCAL(ashrsi3_13):
327	shar	r0
328LOCAL(ashrsi3_12):
329	shar	r0
330LOCAL(ashrsi3_11):
331	shar	r0
332LOCAL(ashrsi3_10):
333	shar	r0
334LOCAL(ashrsi3_9):
335	shar	r0
336LOCAL(ashrsi3_8):
337	shar	r0
338LOCAL(ashrsi3_7):
339	shar	r0
340LOCAL(ashrsi3_6):
341	shar	r0
342LOCAL(ashrsi3_5):
343	shar	r0
344LOCAL(ashrsi3_4):
345	shar	r0
346LOCAL(ashrsi3_3):
347	shar	r0
348LOCAL(ashrsi3_2):
349	shar	r0
350LOCAL(ashrsi3_1):
351	rts
352	shar	r0
353
354LOCAL(ashrsi3_0):
355	rts
356	nop
357
358#endif
359
360#ifdef L_ashiftlt
361
362!
363! GLOBAL(ashlsi3)
364!
365! Entry:
366!
367! r4: Value to shift
368! r5: Shifts
369!
370! Exit:
371!
372! r0: Result
373!
374! Destroys:
375!
376! (none)
377!
378	.global	GLOBAL(ashlsi3)
379	FUNC(GLOBAL(ashlsi3),function)
380	.align	2
381GLOBAL(ashlsi3):
382	mov	#31,r0
383	and	r0,r5
384	mova	LOCAL(ashlsi3_table),r0
385	mov.b	@(r0,r5),r5
386#ifdef __sh1__
387	add	r5,r0
388	jmp	@r0
389#else
390	braf	r5
391#endif
392	mov	r4,r0
393	ENDFUNC(GLOBAL(ashlsi3))
394
395	.align	2
396LOCAL(ashlsi3_table):
397	.byte		LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
398	.byte		LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
399	.byte		LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
400	.byte		LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
401	.byte		LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
402	.byte		LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
403	.byte		LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
404	.byte		LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
405	.byte		LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
406	.byte		LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
407	.byte		LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
408	.byte		LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
409	.byte		LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
410	.byte		LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
411	.byte		LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
412	.byte		LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
413	.byte		LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
414	.byte		LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
415	.byte		LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
416	.byte		LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
417	.byte		LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
418	.byte		LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
419	.byte		LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
420	.byte		LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
421	.byte		LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
422	.byte		LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
423	.byte		LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
424	.byte		LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
425	.byte		LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
426	.byte		LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
427	.byte		LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
428	.byte		LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
429
430LOCAL(ashlsi3_6):
431	shll2	r0
432LOCAL(ashlsi3_4):
433	shll2	r0
434LOCAL(ashlsi3_2):
435	rts
436	shll2	r0
437
438LOCAL(ashlsi3_7):
439	shll2	r0
440LOCAL(ashlsi3_5):
441	shll2	r0
442LOCAL(ashlsi3_3):
443	shll2	r0
444LOCAL(ashlsi3_1):
445	rts
446	shll	r0
447
448LOCAL(ashlsi3_14):
449	shll2	r0
450LOCAL(ashlsi3_12):
451	shll2	r0
452LOCAL(ashlsi3_10):
453	shll2	r0
454LOCAL(ashlsi3_8):
455	rts
456	shll8	r0
457
458LOCAL(ashlsi3_15):
459	shll2	r0
460LOCAL(ashlsi3_13):
461	shll2	r0
462LOCAL(ashlsi3_11):
463	shll2	r0
464LOCAL(ashlsi3_9):
465	shll8	r0
466	rts
467	shll	r0
468
469LOCAL(ashlsi3_22):
470	shll2	r0
471LOCAL(ashlsi3_20):
472	shll2	r0
473LOCAL(ashlsi3_18):
474	shll2	r0
475LOCAL(ashlsi3_16):
476	rts
477	shll16	r0
478
479LOCAL(ashlsi3_23):
480	shll2	r0
481LOCAL(ashlsi3_21):
482	shll2	r0
483LOCAL(ashlsi3_19):
484	shll2	r0
485LOCAL(ashlsi3_17):
486	shll16	r0
487	rts
488	shll	r0
489
490LOCAL(ashlsi3_30):
491	shll2	r0
492LOCAL(ashlsi3_28):
493	shll2	r0
494LOCAL(ashlsi3_26):
495	shll2	r0
496LOCAL(ashlsi3_24):
497	shll16	r0
498	rts
499	shll8	r0
500
501LOCAL(ashlsi3_31):
502	shll2	r0
503LOCAL(ashlsi3_29):
504	shll2	r0
505LOCAL(ashlsi3_27):
506	shll2	r0
507LOCAL(ashlsi3_25):
508	shll16	r0
509	shll8	r0
510	rts
511	shll	r0
512
513LOCAL(ashlsi3_0):
514	rts
515	nop
516
517#endif
518
519#ifdef L_lshiftrt
520
521!
522! GLOBAL(lshrsi3)
523!
524! Entry:
525!
526! r4: Value to shift
527! r5: Shifts
528!
529! Exit:
530!
531! r0: Result
532!
533! Destroys:
534!
535! (none)
536!
537	.global	GLOBAL(lshrsi3)
538	FUNC(GLOBAL(lshrsi3),function)
539	.align	2
540GLOBAL(lshrsi3):
541	mov	#31,r0
542	and	r0,r5
543	mova	LOCAL(lshrsi3_table),r0
544	mov.b	@(r0,r5),r5
545#ifdef __sh1__
546	add	r5,r0
547	jmp	@r0
548#else
549	braf	r5
550#endif
551	mov	r4,r0
552	ENDFUNC(GLOBAL(lshrsi3))
553
554	.align	2
555LOCAL(lshrsi3_table):
556	.byte		LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
557	.byte		LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
558	.byte		LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
559	.byte		LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
560	.byte		LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
561	.byte		LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
562	.byte		LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
563	.byte		LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
564	.byte		LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
565	.byte		LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
566	.byte		LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
567	.byte		LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
568	.byte		LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
569	.byte		LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
570	.byte		LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
571	.byte		LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
572	.byte		LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
573	.byte		LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
574	.byte		LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
575	.byte		LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
576	.byte		LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
577	.byte		LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
578	.byte		LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
579	.byte		LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
580	.byte		LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
581	.byte		LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
582	.byte		LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
583	.byte		LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
584	.byte		LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
585	.byte		LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
586	.byte		LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
587	.byte		LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
588
589LOCAL(lshrsi3_6):
590	shlr2	r0
591LOCAL(lshrsi3_4):
592	shlr2	r0
593LOCAL(lshrsi3_2):
594	rts
595	shlr2	r0
596
597LOCAL(lshrsi3_7):
598	shlr2	r0
599LOCAL(lshrsi3_5):
600	shlr2	r0
601LOCAL(lshrsi3_3):
602	shlr2	r0
603LOCAL(lshrsi3_1):
604	rts
605	shlr	r0
606
607LOCAL(lshrsi3_14):
608	shlr2	r0
609LOCAL(lshrsi3_12):
610	shlr2	r0
611LOCAL(lshrsi3_10):
612	shlr2	r0
613LOCAL(lshrsi3_8):
614	rts
615	shlr8	r0
616
617LOCAL(lshrsi3_15):
618	shlr2	r0
619LOCAL(lshrsi3_13):
620	shlr2	r0
621LOCAL(lshrsi3_11):
622	shlr2	r0
623LOCAL(lshrsi3_9):
624	shlr8	r0
625	rts
626	shlr	r0
627
628LOCAL(lshrsi3_22):
629	shlr2	r0
630LOCAL(lshrsi3_20):
631	shlr2	r0
632LOCAL(lshrsi3_18):
633	shlr2	r0
634LOCAL(lshrsi3_16):
635	rts
636	shlr16	r0
637
638LOCAL(lshrsi3_23):
639	shlr2	r0
640LOCAL(lshrsi3_21):
641	shlr2	r0
642LOCAL(lshrsi3_19):
643	shlr2	r0
644LOCAL(lshrsi3_17):
645	shlr16	r0
646	rts
647	shlr	r0
648
649LOCAL(lshrsi3_30):
650	shlr2	r0
651LOCAL(lshrsi3_28):
652	shlr2	r0
653LOCAL(lshrsi3_26):
654	shlr2	r0
655LOCAL(lshrsi3_24):
656	shlr16	r0
657	rts
658	shlr8	r0
659
660LOCAL(lshrsi3_31):
661	shlr2	r0
662LOCAL(lshrsi3_29):
663	shlr2	r0
664LOCAL(lshrsi3_27):
665	shlr2	r0
666LOCAL(lshrsi3_25):
667	shlr16	r0
668	shlr8	r0
669	rts
670	shlr	r0
671
672LOCAL(lshrsi3_0):
673	rts
674	nop
675
676#endif
677
678#ifdef L_movstr
679	.text
680! done all the large groups, do the remainder
681
682! jump to movstr+
683done:
684	add	#64,r5
685	mova	GLOBAL(movstrSI0),r0
686	shll2	r6
687	add	r6,r0
688	jmp	@r0
689	add	#64,r4
690	.align	4
691	.global	GLOBAL(movstrSI64)
692	FUNC(GLOBAL(movstrSI64),function)
693GLOBAL(movstrSI64):
694	mov.l	@(60,r5),r0
695	mov.l	r0,@(60,r4)
696	.global	GLOBAL(movstrSI60)
697	FUNC(GLOBAL(movstrSI60),function)
698GLOBAL(movstrSI60):
699	mov.l	@(56,r5),r0
700	mov.l	r0,@(56,r4)
701	.global	GLOBAL(movstrSI56)
702	FUNC(GLOBAL(movstrSI56),function)
703GLOBAL(movstrSI56):
704	mov.l	@(52,r5),r0
705	mov.l	r0,@(52,r4)
706	.global	GLOBAL(movstrSI52)
707	FUNC(GLOBAL(movstrSI52),function)
708GLOBAL(movstrSI52):
709	mov.l	@(48,r5),r0
710	mov.l	r0,@(48,r4)
711	.global	GLOBAL(movstrSI48)
712	FUNC(GLOBAL(movstrSI48),function)
713GLOBAL(movstrSI48):
714	mov.l	@(44,r5),r0
715	mov.l	r0,@(44,r4)
716	.global	GLOBAL(movstrSI44)
717	FUNC(GLOBAL(movstrSI44),function)
718GLOBAL(movstrSI44):
719	mov.l	@(40,r5),r0
720	mov.l	r0,@(40,r4)
721	.global	GLOBAL(movstrSI40)
722	FUNC(GLOBAL(movstrSI40),function)
723GLOBAL(movstrSI40):
724	mov.l	@(36,r5),r0
725	mov.l	r0,@(36,r4)
726	.global	GLOBAL(movstrSI36)
727	FUNC(GLOBAL(movstrSI36),function)
728GLOBAL(movstrSI36):
729	mov.l	@(32,r5),r0
730	mov.l	r0,@(32,r4)
731	.global	GLOBAL(movstrSI32)
732	FUNC(GLOBAL(movstrSI32),function)
733GLOBAL(movstrSI32):
734	mov.l	@(28,r5),r0
735	mov.l	r0,@(28,r4)
736	.global	GLOBAL(movstrSI28)
737	FUNC(GLOBAL(movstrSI28),function)
738GLOBAL(movstrSI28):
739	mov.l	@(24,r5),r0
740	mov.l	r0,@(24,r4)
741	.global	GLOBAL(movstrSI24)
742	FUNC(GLOBAL(movstrSI24),function)
743GLOBAL(movstrSI24):
744	mov.l	@(20,r5),r0
745	mov.l	r0,@(20,r4)
746	.global	GLOBAL(movstrSI20)
747	FUNC(GLOBAL(movstrSI20),function)
748GLOBAL(movstrSI20):
749	mov.l	@(16,r5),r0
750	mov.l	r0,@(16,r4)
751	.global	GLOBAL(movstrSI16)
752	FUNC(GLOBAL(movstrSI16),function)
753GLOBAL(movstrSI16):
754	mov.l	@(12,r5),r0
755	mov.l	r0,@(12,r4)
756	.global	GLOBAL(movstrSI12)
757	FUNC(GLOBAL(movstrSI12),function)
758GLOBAL(movstrSI12):
759	mov.l	@(8,r5),r0
760	mov.l	r0,@(8,r4)
761	.global	GLOBAL(movstrSI8)
762	FUNC(GLOBAL(movstrSI8),function)
763GLOBAL(movstrSI8):
764	mov.l	@(4,r5),r0
765	mov.l	r0,@(4,r4)
766	.global	GLOBAL(movstrSI4)
767	FUNC(GLOBAL(movstrSI4),function)
768GLOBAL(movstrSI4):
769	mov.l	@(0,r5),r0
770	mov.l	r0,@(0,r4)
771GLOBAL(movstrSI0):
772	FUNC(GLOBAL(movstrSI0),function)
773	rts
774	nop
775	ENDFUNC(GLOBAL(movstrSI64))
776	ENDFUNC(GLOBAL(movstrSI60))
777	ENDFUNC(GLOBAL(movstrSI56))
778	ENDFUNC(GLOBAL(movstrSI52))
779	ENDFUNC(GLOBAL(movstrSI48))
780	ENDFUNC(GLOBAL(movstrSI44))
781	ENDFUNC(GLOBAL(movstrSI40))
782	ENDFUNC(GLOBAL(movstrSI36))
783	ENDFUNC(GLOBAL(movstrSI32))
784	ENDFUNC(GLOBAL(movstrSI28))
785	ENDFUNC(GLOBAL(movstrSI24))
786	ENDFUNC(GLOBAL(movstrSI20))
787	ENDFUNC(GLOBAL(movstrSI16))
788	ENDFUNC(GLOBAL(movstrSI12))
789	ENDFUNC(GLOBAL(movstrSI8))
790	ENDFUNC(GLOBAL(movstrSI4))
791	ENDFUNC(GLOBAL(movstrSI0))
792
793	.align	4
794
795	.global	GLOBAL(movstr)
796	FUNC(GLOBAL(movstr),function)
797GLOBAL(movstr):
798	mov.l	@(60,r5),r0
799	mov.l	r0,@(60,r4)
800
801	mov.l	@(56,r5),r0
802	mov.l	r0,@(56,r4)
803
804	mov.l	@(52,r5),r0
805	mov.l	r0,@(52,r4)
806
807	mov.l	@(48,r5),r0
808	mov.l	r0,@(48,r4)
809
810	mov.l	@(44,r5),r0
811	mov.l	r0,@(44,r4)
812
813	mov.l	@(40,r5),r0
814	mov.l	r0,@(40,r4)
815
816	mov.l	@(36,r5),r0
817	mov.l	r0,@(36,r4)
818
819	mov.l	@(32,r5),r0
820	mov.l	r0,@(32,r4)
821
822	mov.l	@(28,r5),r0
823	mov.l	r0,@(28,r4)
824
825	mov.l	@(24,r5),r0
826	mov.l	r0,@(24,r4)
827
828	mov.l	@(20,r5),r0
829	mov.l	r0,@(20,r4)
830
831	mov.l	@(16,r5),r0
832	mov.l	r0,@(16,r4)
833
834	mov.l	@(12,r5),r0
835	mov.l	r0,@(12,r4)
836
837	mov.l	@(8,r5),r0
838	mov.l	r0,@(8,r4)
839
840	mov.l	@(4,r5),r0
841	mov.l	r0,@(4,r4)
842
843	mov.l	@(0,r5),r0
844	mov.l	r0,@(0,r4)
845
846	add	#-16,r6
847	cmp/pl	r6
848	bf	done
849
850	add	#64,r5
851	bra	GLOBAL(movstr)
852	add	#64,r4
853	ENDFUNC(GLOBAL(movstr))
854#endif
855
856#ifdef L_movstr_i4
857	.text
858	.global	GLOBAL(movstr_i4_even)
859	.global	GLOBAL(movstr_i4_odd)
860	.global	GLOBAL(movstrSI12_i4)
861	FUNC(GLOBAL(movstr_i4_even),function)
862	FUNC(GLOBAL(movstr_i4_odd),function)
863	FUNC(GLOBAL(movstrSI12_i4),function)
864
865	.p2align	5
866L_movstr_2mod4_end:
867	mov.l	r0,@(16,r4)
868	rts
869	mov.l	r1,@(20,r4)
870
871	.p2align	2
872
873GLOBAL(movstr_i4_odd):
874	mov.l	@r5+,r1
875	add	#-4,r4
876	mov.l	@r5+,r2
877	mov.l	@r5+,r3
878	mov.l	r1,@(4,r4)
879	mov.l	r2,@(8,r4)
880
881L_movstr_loop:
882	mov.l	r3,@(12,r4)
883	dt	r6
884	mov.l	@r5+,r0
885	bt/s	L_movstr_2mod4_end
886	mov.l	@r5+,r1
887	add	#16,r4
888L_movstr_start_even:
889	mov.l	@r5+,r2
890	mov.l	@r5+,r3
891	mov.l	r0,@r4
892	dt	r6
893	mov.l	r1,@(4,r4)
894	bf/s	L_movstr_loop
895	mov.l	r2,@(8,r4)
896	rts
897	mov.l	r3,@(12,r4)
898	ENDFUNC(GLOBAL(movstr_i4_odd))
899
900GLOBAL(movstr_i4_even):
901	mov.l	@r5+,r0
902	bra	L_movstr_start_even
903	mov.l	@r5+,r1
904
905	.p2align	4
906GLOBAL(movstrSI12_i4):
907	mov.l	@r5,r0
908	mov.l	@(4,r5),r1
909	mov.l	@(8,r5),r2
910	mov.l	r0,@r4
911	mov.l	r1,@(4,r4)
912	rts
913	mov.l	r2,@(8,r4)
914	ENDFUNC(GLOBAL(movstr_i4_even))
915	ENDFUNC(GLOBAL(movstrSI12_i4))
916#endif
917
918#ifdef L_mulsi3
919
920
921	.global	GLOBAL(mulsi3)
922	FUNC(GLOBAL(mulsi3),function)
923
924! r4 =       aabb
925! r5 =       ccdd
926! r0 = aabb*ccdd  via partial products
927!
928! if aa == 0 and cc = 0
929! r0 = bb*dd
930!
931! else
932! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
933!
934
935GLOBAL(mulsi3):
936	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
937	mov     r5,r3		! r3 = ccdd
938	swap.w  r4,r2		! r2 = bbaa
939	xtrct   r2,r3		! r3 = aacc
940	tst  	r3,r3		! msws zero ?
941	bf      hiset
942	rts			! yes - then we have the answer
943	sts     macl,r0
944
945hiset:	sts	macl,r0		! r0 = bb*dd
946	mulu.w	r2,r5		! brewing macl = aa*dd
947	sts	macl,r1
948	mulu.w	r3,r4		! brewing macl = cc*bb
949	sts	macl,r2
950	add	r1,r2
951	shll16	r2
952	rts
953	add	r2,r0
954
955
956#endif
957#endif /* ! __SH5__ */
958#ifdef L_sdivsi3_i4
959	.title "SH DIVIDE"
960!! 4 byte integer Divide code for the Hitachi SH
961#ifdef __SH4__
962!! args in r4 and r5, result in fpul, clobber dr0, dr2
963
964	.global	GLOBAL(sdivsi3_i4)
965	FUNC(GLOBAL(sdivsi3_i4),function)
966GLOBAL(sdivsi3_i4):
967	lds r4,fpul
968	float fpul,dr0
969	lds r5,fpul
970	float fpul,dr2
971	fdiv dr2,dr0
972	rts
973	ftrc dr0,fpul
974	ENDFUNC(GLOBAL(sdivsi3_i4))
975
976#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
977!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
978
979#if ! __SH5__ || __SH5__ == 32
980#if __SH5__
981	.mode	SHcompact
982#endif
983	.global	GLOBAL(sdivsi3_i4)
984	FUNC(GLOBAL(sdivsi3_i4),function)
985GLOBAL(sdivsi3_i4):
986	sts.l fpscr,@-r15
987	mov #8,r2
988	swap.w r2,r2
989	lds r2,fpscr
990	lds r4,fpul
991	float fpul,dr0
992	lds r5,fpul
993	float fpul,dr2
994	fdiv dr2,dr0
995	ftrc dr0,fpul
996	rts
997	lds.l @r15+,fpscr
998
999#endif /* ! __SH5__ || __SH5__ == 32 */
1000#endif /* ! __SH4__ */
1001#endif
1002
1003#ifdef L_sdivsi3
1004/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1005   sh3e code.  */
1006#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__OpenBSD__)
1007!!
1008!! Steve Chamberlain
1009!! sac@cygnus.com
1010!!
1011!!
1012
1013!! args in r4 and r5, result in r0 clobber r1,r2,r3
1014
1015	.global	GLOBAL(sdivsi3)
1016	FUNC(GLOBAL(sdivsi3), function)
1017#if __SHMEDIA__
1018#if __SH5__ == 32
1019	.section	.text..SHmedia32,"ax"
1020#else
1021	.text
1022#endif
1023	.align	2
1024#if 0
1025/* The assembly code that follows is a hand-optimized version of the C
1026   code that follows.  Note that the registers that are modified are
1027   exactly those listed as clobbered in the patterns divsi3_i1 and
1028   divsi3_i1_media.
1029
1030int __sdivsi3 (i, j)
1031     int i, j;
1032{
1033  register unsigned long long r18 asm ("r18");
1034  register unsigned long long r19 asm ("r19");
1035  register unsigned long long r0 asm ("r0") = 0;
1036  register unsigned long long r1 asm ("r1") = 1;
1037  register int r2 asm ("r2") = i >> 31;
1038  register int r3 asm ("r3") = j >> 31;
1039
1040  r2 = r2 ? r2 : r1;
1041  r3 = r3 ? r3 : r1;
1042  r18 = i * r2;
1043  r19 = j * r3;
1044  r2 *= r3;
1045
1046  r19 <<= 31;
1047  r1 <<= 31;
1048  do
1049    if (r18 >= r19)
1050      r0 |= r1, r18 -= r19;
1051  while (r19 >>= 1, r1 >>= 1);
1052
1053  return r2 * (int)r0;
1054}
1055*/
1056GLOBAL(sdivsi3):
1057	pt/l	LOCAL(sdivsi3_dontadd), tr2
1058	pt/l	LOCAL(sdivsi3_loop), tr1
1059	ptabs/l	r18, tr0
1060	movi	0, r0
1061	movi	1, r1
1062	shari.l	r4, 31, r2
1063	shari.l	r5, 31, r3
1064	cmveq	r2, r1, r2
1065	cmveq	r3, r1, r3
1066	muls.l	r4, r2, r18
1067	muls.l	r5, r3, r19
1068	muls.l	r2, r3, r2
1069	shlli	r19, 31, r19
1070	shlli	r1, 31, r1
1071LOCAL(sdivsi3_loop):
1072	bgtu	r19, r18, tr2
1073	or	r0, r1, r0
1074	sub	r18, r19, r18
1075LOCAL(sdivsi3_dontadd):
1076	shlri	r1, 1, r1
1077	shlri	r19, 1, r19
1078	bnei	r1, 0, tr1
1079	muls.l	r0, r2, r0
1080	add.l	r0, r63, r0
1081	blink	tr0, r63
1082#else /* ! 0 */
1083 // inputs: r4,r5
1084 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1085 // result in r0
1086GLOBAL(sdivsi3):
1087 // can create absolute value without extra latency,
1088 // but dependent on proper sign extension of inputs:
1089 // shari.l r5,31,r2
1090 // xor r5,r2,r20
1091 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1092 shari.l r5,31,r2
1093 ori r2,1,r2
1094 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1095 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1096 shari.l r4,31,r3
1097 nsb r20,r0
1098 shlld r20,r0,r25
1099 shlri r25,48,r25
1100 sub r19,r25,r1
1101 mmulfx.w r1,r1,r2
1102 mshflo.w r1,r63,r1
1103 // If r4 was to be used in-place instead of r21, could use this sequence
1104 // to compute absolute:
1105 // sub r63,r4,r19 // compute absolute value of r4
1106 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1107 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1108 ori r3,1,r3
1109 mmulfx.w r25,r2,r2
1110 sub r19,r0,r0
1111 muls.l r4,r3,r21
1112 msub.w r1,r2,r2
1113 addi r2,-2,r1
1114 mulu.l r21,r1,r19
1115 mmulfx.w r2,r2,r2
1116 shlli r1,15,r1
1117 shlrd r19,r0,r19
1118 mulu.l r19,r20,r3
1119 mmacnfx.wl r25,r2,r1
1120 ptabs r18,tr0
1121 sub r21,r3,r25
1122
1123 mulu.l r25,r1,r2
1124 addi r0,14,r0
1125 xor r4,r5,r18
1126 shlrd r2,r0,r2
1127 mulu.l r2,r20,r3
1128 add r19,r2,r19
1129 shari.l r18,31,r18
1130 sub r25,r3,r25
1131
1132 mulu.l r25,r1,r2
1133 sub r25,r20,r25
1134 add r19,r18,r19
1135 shlrd r2,r0,r2
1136 mulu.l r2,r20,r3
1137 addi r25,1,r25
1138 add r19,r2,r19
1139
1140 cmpgt r25,r3,r25
1141 add.l r19,r25,r0
1142 xor r0,r18,r0
1143 blink tr0,r63
1144#endif
1145#elif defined __SHMEDIA__
1146/* m5compact-nofpu */
1147 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1148	.mode	SHmedia
1149	.section	.text..SHmedia32,"ax"
1150	.align	2
1151GLOBAL(sdivsi3):
1152	pt/l LOCAL(sdivsi3_dontsub), tr0
1153	pt/l LOCAL(sdivsi3_loop), tr1
1154	ptabs/l r18,tr2
1155	shari.l r4,31,r18
1156	shari.l r5,31,r19
1157	xor r4,r18,r20
1158	xor r5,r19,r21
1159	sub.l r20,r18,r20
1160	sub.l r21,r19,r21
1161	xor r18,r19,r19
1162	shlli r21,32,r25
1163	addi r25,-1,r21
1164	addz.l r20,r63,r20
1165LOCAL(sdivsi3_loop):
1166	shlli r20,1,r20
1167	bgeu/u r21,r20,tr0
1168	sub r20,r21,r20
1169LOCAL(sdivsi3_dontsub):
1170	addi.l r25,-1,r25
1171	bnei r25,-32,tr1
1172	xor r20,r19,r20
1173	sub.l r20,r19,r0
1174	blink tr2,r63
1175#else /* ! __SHMEDIA__ */
1176GLOBAL(sdivsi3):
1177	mov	r4,r1
1178	mov	r5,r0
1179
1180	tst	r0,r0
1181	bt	div0
1182	mov	#0,r2
1183	div0s	r2,r1
1184	subc	r3,r3
1185	subc	r2,r1
1186	div0s	r0,r3
1187	rotcl	r1
1188	div1	r0,r3
1189	rotcl	r1
1190	div1	r0,r3
1191	rotcl	r1
1192	div1	r0,r3
1193	rotcl	r1
1194	div1	r0,r3
1195	rotcl	r1
1196	div1	r0,r3
1197	rotcl	r1
1198	div1	r0,r3
1199	rotcl	r1
1200	div1	r0,r3
1201	rotcl	r1
1202	div1	r0,r3
1203	rotcl	r1
1204	div1	r0,r3
1205	rotcl	r1
1206	div1	r0,r3
1207	rotcl	r1
1208	div1	r0,r3
1209	rotcl	r1
1210	div1	r0,r3
1211	rotcl	r1
1212	div1	r0,r3
1213	rotcl	r1
1214	div1	r0,r3
1215	rotcl	r1
1216	div1	r0,r3
1217	rotcl	r1
1218	div1	r0,r3
1219	rotcl	r1
1220	div1	r0,r3
1221	rotcl	r1
1222	div1	r0,r3
1223	rotcl	r1
1224	div1	r0,r3
1225	rotcl	r1
1226	div1	r0,r3
1227	rotcl	r1
1228	div1	r0,r3
1229	rotcl	r1
1230	div1	r0,r3
1231	rotcl	r1
1232	div1	r0,r3
1233	rotcl	r1
1234	div1	r0,r3
1235	rotcl	r1
1236	div1	r0,r3
1237	rotcl	r1
1238	div1	r0,r3
1239	rotcl	r1
1240	div1	r0,r3
1241	rotcl	r1
1242	div1	r0,r3
1243	rotcl	r1
1244	div1	r0,r3
1245	rotcl	r1
1246	div1	r0,r3
1247	rotcl	r1
1248	div1	r0,r3
1249	rotcl	r1
1250	div1	r0,r3
1251	rotcl	r1
1252	addc	r2,r1
1253	rts
1254	mov	r1,r0
1255
1256
1257div0:	rts
1258	mov	#0,r0
1259	ENDFUNC(GLOBAL(sdivsi3))
1260
1261#endif /* ! __SHMEDIA__ */
1262#endif /* ! __SH4__ */
1263#endif
1264#ifdef L_udivsi3_i4
1265
1266	.title "SH DIVIDE"
1267!! 4 byte integer Divide code for the Hitachi SH
1268#ifdef __SH4__
1269!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1270
1271	.global	GLOBAL(udivsi3_i4)
1272	FUNC(GLOBAL(udivsi3_i4),function)
1273GLOBAL(udivsi3_i4):
1274	mov #1,r1
1275	cmp/hi r1,r5
1276	bf trivial
1277	rotr r1
1278	xor r1,r4
1279	lds r4,fpul
1280	mova L1,r0
1281#ifdef FMOVD_WORKS
1282	fmov.d @r0+,dr4
1283#else
1284#ifdef __LITTLE_ENDIAN__
1285	fmov.s @r0+,fr5
1286	fmov.s @r0,fr4
1287#else
1288	fmov.s @r0+,fr4
1289	fmov.s @r0,fr5
1290#endif
1291#endif
1292	float fpul,dr0
1293	xor r1,r5
1294	lds r5,fpul
1295	float fpul,dr2
1296	fadd dr4,dr0
1297	fadd dr4,dr2
1298	fdiv dr2,dr0
1299	rts
1300	ftrc dr0,fpul
1301
1302trivial:
1303	rts
1304	lds r4,fpul
1305
1306	.align 2
1307#ifdef FMOVD_WORKS
1308	.align 3	! make double below 8 byte aligned.
1309#endif
1310L1:
1311	.double 2147483648
1312
1313#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1314#if ! __SH5__ || __SH5__ == 32
1315!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1316	.mode	SHmedia
1317	.global	GLOBAL(udivsi3_i4)
1318	FUNC(GLOBAL(udivsi3_i4),function)
1319GLOBAL(udivsi3_i4):
1320	addz.l	r4,r63,r20
1321	addz.l	r5,r63,r21
1322	fmov.qd	r20,dr0
1323	fmov.qd	r21,dr32
1324	ptabs	r18,tr0
1325	float.qd dr0,dr0
1326	float.qd dr32,dr32
1327	fdiv.d	dr0,dr32,dr0
1328	ftrc.dq dr0,dr32
1329	fmov.s fr33,fr32
1330	blink tr0,r63
1331#endif /* ! __SH5__ || __SH5__ == 32 */
1332#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1333!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1334
1335	.global	GLOBAL(udivsi3_i4)
1336	FUNC(GLOBAL(udivsi3_i4),function)
1337GLOBAL(udivsi3_i4):
1338	mov #1,r1
1339	cmp/hi r1,r5
1340	bf trivial
1341	sts.l fpscr,@-r15
1342	mova L1,r0
1343	lds.l @r0+,fpscr
1344	rotr r1
1345	xor r1,r4
1346	lds r4,fpul
1347#ifdef FMOVD_WORKS
1348	fmov.d @r0+,dr4
1349#else
1350#ifdef __LITTLE_ENDIAN__
1351	fmov.s @r0+,fr5
1352	fmov.s @r0,fr4
1353#else
1354	fmov.s @r0+,fr4
1355	fmov.s @r0,fr5
1356#endif
1357#endif
1358	float fpul,dr0
1359	xor r1,r5
1360	lds r5,fpul
1361	float fpul,dr2
1362	fadd dr4,dr0
1363	fadd dr4,dr2
1364	fdiv dr2,dr0
1365	ftrc dr0,fpul
1366	rts
1367	lds.l @r15+,fpscr
1368	ENDFUNC(GLOBAL(udivsi3_i4))
1369
1370#ifdef FMOVD_WORKS
1371	.align 3	! make double below 8 byte aligned.
1372#endif
1373trivial:
1374	rts
1375	lds r4,fpul
1376
1377	.align 2
1378L1:
1379#ifndef FMOVD_WORKS
1380	.long 0x80000
1381#else
1382	.long 0x180000
1383#endif
1384	.double 2147483648
1385
1386#endif /* ! __SH4__ */
1387#endif
1388
1389#ifdef L_udivsi3
1390/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1391   sh3e code.  */
1392#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__OpenBSD__)
1393
1394!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1395	.global	GLOBAL(udivsi3)
1396	FUNC(GLOBAL(udivsi3),function)
1397
1398#if __SHMEDIA__
1399#if __SH5__ == 32
1400	.section	.text..SHmedia32,"ax"
1401#else
1402	.text
1403#endif
1404	.align	2
1405#if 0
1406/* The assembly code that follows is a hand-optimized version of the C
1407   code that follows.  Note that the registers that are modified are
1408   exactly those listed as clobbered in the patterns udivsi3_i1 and
1409   udivsi3_i1_media.
1410
1411unsigned
1412__udivsi3 (i, j)
1413    unsigned i, j;
1414{
1415  register unsigned long long r0 asm ("r0") = 0;
1416  register unsigned long long r18 asm ("r18") = 1;
1417  register unsigned long long r4 asm ("r4") = i;
1418  register unsigned long long r19 asm ("r19") = j;
1419
1420  r19 <<= 31;
1421  r18 <<= 31;
1422  do
1423    if (r4 >= r19)
1424      r0 |= r18, r4 -= r19;
1425  while (r19 >>= 1, r18 >>= 1);
1426
1427  return r0;
1428}
1429*/
1430GLOBAL(udivsi3):
1431	pt/l	LOCAL(udivsi3_dontadd), tr2
1432	pt/l	LOCAL(udivsi3_loop), tr1
1433	ptabs/l	r18, tr0
1434	movi	0, r0
1435	movi	1, r18
1436	addz.l	r5, r63, r19
1437	addz.l	r4, r63, r4
1438	shlli	r19, 31, r19
1439	shlli	r18, 31, r18
1440LOCAL(udivsi3_loop):
1441	bgtu	r19, r4, tr2
1442	or	r0, r18, r0
1443	sub	r4, r19, r4
1444LOCAL(udivsi3_dontadd):
1445	shlri	r18, 1, r18
1446	shlri	r19, 1, r19
1447	bnei	r18, 0, tr1
1448	blink	tr0, r63
1449#else
1450GLOBAL(udivsi3):
1451 // inputs: r4,r5
1452 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1453 // result in r0.
1454 addz.l r5,r63,r22
1455 nsb r22,r0
1456 shlld r22,r0,r25
1457 shlri r25,48,r25
1458 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1459 sub r20,r25,r21
1460 mmulfx.w r21,r21,r19
1461 mshflo.w r21,r63,r21
1462 ptabs r18,tr0
1463 mmulfx.w r25,r19,r19
1464 sub r20,r0,r0
1465 /* bubble */
1466 msub.w r21,r19,r19
1467 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1468		    before the msub.w, but we need a different value for
1469		    r19 to keep errors under control.  */
1470 mulu.l r4,r21,r18
1471 mmulfx.w r19,r19,r19
1472 shlli r21,15,r21
1473 shlrd r18,r0,r18
1474 mulu.l r18,r22,r20
1475 mmacnfx.wl r25,r19,r21
1476 /* bubble */
1477 sub r4,r20,r25
1478
1479 mulu.l r25,r21,r19
1480 addi r0,14,r0
1481 /* bubble */
1482 shlrd r19,r0,r19
1483 mulu.l r19,r22,r20
1484 add r18,r19,r18
1485 /* bubble */
1486 sub.l r25,r20,r25
1487
1488 mulu.l r25,r21,r19
1489 addz.l r25,r63,r25
1490 sub r25,r22,r25
1491 shlrd r19,r0,r19
1492 mulu.l r19,r22,r20
1493 addi r25,1,r25
1494 add r18,r19,r18
1495
1496 cmpgt r25,r20,r25
1497 add.l r18,r25,r0
1498 blink tr0,r63
1499#endif
1500#elif defined (__SHMEDIA__)
1501/* m5compact-nofpu - more emphasis on code size than on speed, but don't
1502   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1503   So use a short shmedia loop.  */
1504 // clobbered: r20,r21,r25,tr0,tr1,tr2
1505	.mode	SHmedia
1506	.section	.text..SHmedia32,"ax"
1507	.align	2
1508GLOBAL(udivsi3):
1509 pt/l LOCAL(udivsi3_dontsub), tr0
1510 pt/l LOCAL(udivsi3_loop), tr1
1511 ptabs/l r18,tr2
1512 shlli r5,32,r25
1513 addi r25,-1,r21
1514 addz.l r4,r63,r20
1515LOCAL(udivsi3_loop):
1516 shlli r20,1,r20
1517 bgeu/u r21,r20,tr0
1518 sub r20,r21,r20
1519LOCAL(udivsi3_dontsub):
1520 addi.l r25,-1,r25
1521 bnei r25,-32,tr1
1522 add.l r20,r63,r0
1523 blink tr2,r63
1524#else /* ! defined (__SHMEDIA__) */
1525LOCAL(div8):
1526 div1 r5,r4
1527LOCAL(div7):
1528 div1 r5,r4; div1 r5,r4; div1 r5,r4
1529 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1530
1531LOCAL(divx4):
1532 div1 r5,r4; rotcl r0
1533 div1 r5,r4; rotcl r0
1534 div1 r5,r4; rotcl r0
1535 rts; div1 r5,r4
1536
1537GLOBAL(udivsi3):
1538 sts.l pr,@-r15
1539 extu.w r5,r0
1540 cmp/eq r5,r0
1541#ifdef __sh1__
1542 bf LOCAL(large_divisor)
1543#else
1544 bf/s LOCAL(large_divisor)
1545#endif
1546 div0u
1547 swap.w r4,r0
1548 shlr16 r4
1549 bsr LOCAL(div8)
1550 shll16 r5
1551 bsr LOCAL(div7)
1552 div1 r5,r4
1553 xtrct r4,r0
1554 xtrct r0,r4
1555 bsr LOCAL(div8)
1556 swap.w r4,r4
1557 bsr LOCAL(div7)
1558 div1 r5,r4
1559 lds.l @r15+,pr
1560 xtrct r4,r0
1561 swap.w r0,r0
1562 rotcl r0
1563 rts
1564 shlr16 r5
1565 ENDFUNC(GLOBAL(udivsi3))
1566
1567LOCAL(large_divisor):
1568#ifdef __sh1__
1569 div0u
1570#endif
1571 mov #0,r0
1572 xtrct r4,r0
1573 xtrct r0,r4
1574 bsr LOCAL(divx4)
1575 rotcl r0
1576 bsr LOCAL(divx4)
1577 rotcl r0
1578 bsr LOCAL(divx4)
1579 rotcl r0
1580 bsr LOCAL(divx4)
1581 rotcl r0
1582 lds.l @r15+,pr
1583 rts
1584 rotcl r0
1585
1586#endif /* ! __SHMEDIA__ */
1587#endif /* __SH4__ */
1588#endif /* L_udivsi3 */
1589
1590#ifdef L_udivdi3
1591#ifdef __SHMEDIA__
1592	.mode	SHmedia
1593	.section	.text..SHmedia32,"ax"
1594	.align	2
1595	.global	GLOBAL(udivdi3)
1596	FUNC(GLOBAL(udivdi3),function)
1597GLOBAL(udivdi3):
1598	shlri r3,1,r4
1599	nsb r4,r22
1600	shlld r3,r22,r6
1601	shlri r6,49,r5
1602	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1603	sub r21,r5,r1
1604	mmulfx.w r1,r1,r4
1605	mshflo.w r1,r63,r1
1606	sub r63,r22,r20 // r63 == 64 % 64
1607	mmulfx.w r5,r4,r4
1608	pta LOCAL(large_divisor),tr0
1609	addi r20,32,r9
1610	msub.w r1,r4,r1
1611	madd.w r1,r1,r1
1612	mmulfx.w r1,r1,r4
1613	shlri r6,32,r7
1614	bgt/u r9,r63,tr0 // large_divisor
1615	mmulfx.w r5,r4,r4
1616	shlri r2,32+14,r19
1617	addi r22,-31,r0
1618	msub.w r1,r4,r1
1619
1620	mulu.l r1,r7,r4
1621	addi r1,-3,r5
1622	mulu.l r5,r19,r5
1623	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1624	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1625	                 the case may be, %0000000000000000 000.11111111111, still */
1626	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1627	mulu.l r5,r3,r8
1628	mshalds.l r1,r21,r1
1629	shari r4,26,r4
1630	shlld r8,r0,r8
1631	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1632	sub r2,r8,r2
1633	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1634
1635	shlri r2,22,r21
1636	mulu.l r21,r1,r21
1637	shlld r5,r0,r8
1638	addi r20,30-22,r0
1639	shlrd r21,r0,r21
1640	mulu.l r21,r3,r5
1641	add r8,r21,r8
1642	mcmpgt.l r21,r63,r21 // See Note 1
1643	addi r20,30,r0
1644	mshfhi.l r63,r21,r21
1645	sub r2,r5,r2
1646	andc r2,r21,r2
1647
1648	/* small divisor: need a third divide step */
1649	mulu.l r2,r1,r7
1650	ptabs r18,tr0
1651	addi r2,1,r2
1652	shlrd r7,r0,r7
1653	mulu.l r7,r3,r5
1654	add r8,r7,r8
1655	sub r2,r3,r2
1656	cmpgt r2,r5,r5
1657	add r8,r5,r2
1658	/* could test r3 here to check for divide by zero.  */
1659	blink tr0,r63
1660
1661LOCAL(large_divisor):
1662	mmulfx.w r5,r4,r4
1663	shlrd r2,r9,r25
1664	shlri r25,32,r8
1665	msub.w r1,r4,r1
1666
1667	mulu.l r1,r7,r4
1668	addi r1,-3,r5
1669	mulu.l r5,r8,r5
1670	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1671	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1672	                 the case may be, %0000000000000000 000.11111111111, still */
1673	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1674	shlri r5,14-1,r8
1675	mulu.l r8,r7,r5
1676	mshalds.l r1,r21,r1
1677	shari r4,26,r4
1678	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1679	sub r25,r5,r25
1680	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1681
1682	shlri r25,22,r21
1683	mulu.l r21,r1,r21
1684	pta LOCAL(no_lo_adj),tr0
1685	addi r22,32,r0
1686	shlri r21,40,r21
1687	mulu.l r21,r7,r5
1688	add r8,r21,r8
1689	shlld r2,r0,r2
1690	sub r25,r5,r25
1691	bgtu/u r7,r25,tr0 // no_lo_adj
1692	addi r8,1,r8
1693	sub r25,r7,r25
1694LOCAL(no_lo_adj):
1695	mextr4 r2,r25,r2
1696
1697	/* large_divisor: only needs a few adjustments.  */
1698	mulu.l r8,r6,r5
1699	ptabs r18,tr0
1700	/* bubble */
1701	cmpgtu r5,r2,r5
1702	sub r8,r5,r2
1703	blink tr0,r63
1704/* Note 1: To shift the result of the second divide stage so that the result
1705   always fits into 32 bits, yet we still reduce the rest sufficiently
1706   would require a lot of instructions to do the shifts just right.  Using
1707   the full 64 bit shift result to multiply with the divisor would require
1708   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1709   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1710   know that the rest after taking this partial result into account will
1711   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1712   upper 32 bits of the partial result are nonzero.  */
1713#endif /* __SHMEDIA__ */
1714#endif /* L_udivdi3 */
1715
1716#ifdef L_divdi3
1717#ifdef __SHMEDIA__
1718	.mode	SHmedia
1719	.section	.text..SHmedia32,"ax"
1720	.align	2
1721	.global	GLOBAL(divdi3)
1722	FUNC(GLOBAL(divdi3),function)
1723GLOBAL(divdi3):
1724	pta GLOBAL(udivdi3),tr0
1725	shari r2,63,r22
1726	shari r3,63,r23
1727	xor r2,r22,r2
1728	xor r3,r23,r3
1729	sub r2,r22,r2
1730	sub r3,r23,r3
1731	beq/u r22,r23,tr0
1732	ptabs r18,tr1
1733	blink tr0,r18
1734	sub r63,r2,r2
1735	blink tr1,r63
1736#endif /* __SHMEDIA__ */
1737#endif /* L_divdi3 */
1738
1739#ifdef L_umoddi3
1740#ifdef __SHMEDIA__
1741	.mode	SHmedia
1742	.section	.text..SHmedia32,"ax"
1743	.align	2
1744	.global	GLOBAL(umoddi3)
1745	FUNC(GLOBAL(umoddi3),function)
1746GLOBAL(umoddi3):
1747	shlri r3,1,r4
1748	nsb r4,r22
1749	shlld r3,r22,r6
1750	shlri r6,49,r5
1751	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1752	sub r21,r5,r1
1753	mmulfx.w r1,r1,r4
1754	mshflo.w r1,r63,r1
1755	sub r63,r22,r20 // r63 == 64 % 64
1756	mmulfx.w r5,r4,r4
1757	pta LOCAL(large_divisor),tr0
1758	addi r20,32,r9
1759	msub.w r1,r4,r1
1760	madd.w r1,r1,r1
1761	mmulfx.w r1,r1,r4
1762	shlri r6,32,r7
1763	bgt/u r9,r63,tr0 // large_divisor
1764	mmulfx.w r5,r4,r4
1765	shlri r2,32+14,r19
1766	addi r22,-31,r0
1767	msub.w r1,r4,r1
1768
1769	mulu.l r1,r7,r4
1770	addi r1,-3,r5
1771	mulu.l r5,r19,r5
1772	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1773	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1774	                 the case may be, %0000000000000000 000.11111111111, still */
1775	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1776	mulu.l r5,r3,r5
1777	mshalds.l r1,r21,r1
1778	shari r4,26,r4
1779	shlld r5,r0,r5
1780	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1781	sub r2,r5,r2
1782	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1783
1784	shlri r2,22,r21
1785	mulu.l r21,r1,r21
1786	addi r20,30-22,r0
1787	/* bubble */ /* could test r3 here to check for divide by zero.  */
1788	shlrd r21,r0,r21
1789	mulu.l r21,r3,r5
1790	mcmpgt.l r21,r63,r21 // See Note 1
1791	addi r20,30,r0
1792	mshfhi.l r63,r21,r21
1793	sub r2,r5,r2
1794	andc r2,r21,r2
1795
1796	/* small divisor: need a third divide step */
1797	mulu.l r2,r1,r7
1798	ptabs r18,tr0
1799	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1800	shlrd r7,r0,r7
1801	mulu.l r7,r3,r5
1802	/* bubble */
1803	addi r8,1,r7
1804	cmpgt r7,r5,r7
1805	cmvne r7,r8,r2
1806	sub r2,r5,r2
1807	blink tr0,r63
1808
1809LOCAL(large_divisor):
1810	mmulfx.w r5,r4,r4
1811	shlrd r2,r9,r25
1812	shlri r25,32,r8
1813	msub.w r1,r4,r1
1814
1815	mulu.l r1,r7,r4
1816	addi r1,-3,r5
1817	mulu.l r5,r8,r5
1818	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1819	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1820	                 the case may be, %0000000000000000 000.11111111111, still */
1821	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1822	shlri r5,14-1,r8
1823	mulu.l r8,r7,r5
1824	mshalds.l r1,r21,r1
1825	shari r4,26,r4
1826	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1827	sub r25,r5,r25
1828	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1829
1830	shlri r25,22,r21
1831	mulu.l r21,r1,r21
1832	pta LOCAL(no_lo_adj),tr0
1833	addi r22,32,r0
1834	shlri r21,40,r21
1835	mulu.l r21,r7,r5
1836	add r8,r21,r8
1837	shlld r2,r0,r2
1838	sub r25,r5,r25
1839	bgtu/u r7,r25,tr0 // no_lo_adj
1840	addi r8,1,r8
1841	sub r25,r7,r25
1842LOCAL(no_lo_adj):
1843	mextr4 r2,r25,r2
1844
1845	/* large_divisor: only needs a few adjustments.  */
1846	mulu.l r8,r6,r5
1847	ptabs r18,tr0
1848	add r2,r6,r7
1849	cmpgtu r5,r2,r8
1850	cmvne r8,r7,r2
1851	sub r2,r5,r2
1852	shlrd r2,r22,r2
1853	blink tr0,r63
1854/* Note 1: To shift the result of the second divide stage so that the result
1855   always fits into 32 bits, yet we still reduce the rest sufficiently
1856   would require a lot of instructions to do the shifts just right.  Using
1857   the full 64 bit shift result to multiply with the divisor would require
1858   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1859   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1860   know that the rest after taking this partial result into account will
1861   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1862   upper 32 bits of the partial result are nonzero.  */
1863#endif /* __SHMEDIA__ */
1864#endif /* L_umoddi3 */
1865
1866#ifdef L_moddi3
1867#ifdef __SHMEDIA__
1868	.mode	SHmedia
1869	.section	.text..SHmedia32,"ax"
1870	.align	2
1871	.global	GLOBAL(moddi3)
1872	FUNC(GLOBAL(moddi3),function)
1873GLOBAL(moddi3):
1874	pta GLOBAL(umoddi3),tr0
1875	shari r2,63,r22
1876	shari r3,63,r23
1877	xor r2,r22,r2
1878	xor r3,r23,r3
1879	sub r2,r22,r2
1880	sub r3,r23,r3
1881	beq/u r22,r63,tr0
1882	ptabs r18,tr1
1883	blink tr0,r18
1884	sub r63,r2,r2
1885	blink tr1,r63
1886#endif /* __SHMEDIA__ */
1887#endif /* L_moddi3 */
1888
1889#ifdef L_set_fpscr
1890#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1891#ifdef __SH5__
1892	.mode	SHcompact
1893#endif
1894	.global GLOBAL(set_fpscr)
1895	FUNC(GLOBAL(set_fpscr),function)
1896GLOBAL(set_fpscr):
1897	lds r4,fpscr
1898#ifdef __PIC__
1899	mov.l	r12,@-r15
1900	mova	LOCAL(set_fpscr_L0),r0
1901	mov.l	LOCAL(set_fpscr_L0),r12
1902	add	r0,r12
1903	mov.l	LOCAL(set_fpscr_L1),r0
1904	mov.l	@(r0,r12),r1
1905	mov.l	@r15+,r12
1906#else
1907	mov.l LOCAL(set_fpscr_L1),r1
1908#endif
1909	swap.w r4,r0
1910	or #24,r0
1911#ifndef FMOVD_WORKS
1912	xor #16,r0
1913#endif
1914#if defined(__SH4__)
1915	swap.w r0,r3
1916	mov.l r3,@(4,r1)
1917#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1918	swap.w r0,r2
1919	mov.l r2,@r1
1920#endif
1921#ifndef FMOVD_WORKS
1922	xor #8,r0
1923#else
1924	xor #24,r0
1925#endif
1926#if defined(__SH4__)
1927	swap.w r0,r2
1928	rts
1929	mov.l r2,@r1
1930#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1931	swap.w r0,r3
1932	rts
1933	mov.l r3,@(4,r1)
1934#endif
1935	.align 2
1936#ifdef __PIC__
1937LOCAL(set_fpscr_L0):
1938	.long _GLOBAL_OFFSET_TABLE_
1939LOCAL(set_fpscr_L1):
1940	.long GLOBAL(fpscr_values@GOT)
1941#else
1942LOCAL(set_fpscr_L1):
1943	.long GLOBAL(fpscr_values)
1944#endif
1945
1946#ifdef __ELF__
1947        .comm   GLOBAL(fpscr_values),8,4
1948#else
1949        .comm   GLOBAL(fpscr_values),8
1950#endif /* ELF */
1951#endif /* SH3E / SH4 */
1952#endif /* L_set_fpscr */
1953#ifdef L_ic_invalidate
1954#if __SH5__ == 32
1955	.mode	SHmedia
1956	.section	.text..SHmedia32,"ax"
1957	.align	2
1958	.global	GLOBAL(init_trampoline)
1959	FUNC(GLOBAL(set_fpscr),function)
1960GLOBAL(init_trampoline):
1961	st.l	r0,8,r2
1962#ifdef __LITTLE_ENDIAN__
1963	movi	9,r20
1964	shori	0x402b,r20
1965	shori	0xd101,r20
1966	shori	0xd002,r20
1967#else
1968	movi	0xffffffffffffd002,r20
1969	shori	0xd101,r20
1970	shori	0x402b,r20
1971	shori	9,r20
1972#endif
1973	st.q	r0,0,r20
1974	st.l	r0,12,r3
1975	.global	GLOBAL(ic_invalidate)
1976	FUNC(GLOBAL(ic_invalidate),function)
1977GLOBAL(ic_invalidate):
1978	ocbwb	r0,0
1979	synco
1980	icbi	r0, 0
1981	ptabs	r18, tr0
1982	synci
1983	blink	tr0, r63
1984#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1985	.global GLOBAL(ic_invalidate)
1986	FUNC(GLOBAL(ic_invalidate),function)
1987GLOBAL(ic_invalidate):
1988	ocbwb	@r4
1989	mova	0f,r0
1990	mov.w	1f,r1
1991/* Compute how many cache lines 0f is away from r4.  */
1992	sub	r0,r4
1993	and	r1,r4
1994/* Prepare to branch to 0f plus the cache-line offset.  */
1995	add	# 0f - 1f,r4
1996	braf	r4
1997	nop
19981:
1999	.short	0x1fe0
2000	.p2align 5
2001/* This must be aligned to the beginning of a cache line.  */
20020:
2003	.rept	256 /* There are 256 cache lines of 32 bytes.  */
2004	rts
2005	.rept	15
2006	nop
2007	.endr
2008	.endr
2009#endif /* SH4 */
2010#endif /* L_ic_invalidate */
2011
2012#if defined (__SH5__) && __SH5__ == 32
2013#ifdef L_shcompact_call_trampoline
2014	.section	.rodata
2015	.align	1
2016LOCAL(ct_main_table):
2017.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2018.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2019.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2020.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2021.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2022.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2023.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2024.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2025.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2026.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2027.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2028.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2029.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2030.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2031.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2032.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2033.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2034.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2035.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2036.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2037.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2038.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2039.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2040.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2041.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2042.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2043.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2044.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2045.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2046.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2047.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2048.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2049.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2050	.mode	SHmedia
2051	.section	.text..SHmedia32, "ax"
2052	.align	2
2053
2054     /* This function loads 64-bit general-purpose registers from the
2055	stack, from a memory address contained in them or from an FP
2056	register, according to a cookie passed in r1.  Its execution
2057	time is linear on the number of registers that actually have
2058	to be copied.  See sh.h for details on the actual bit pattern.
2059
2060	The function to be called is passed in r0.  If a 32-bit return
2061	value is expected, the actual function will be tail-called,
2062	otherwise the return address will be stored in r10 (that the
2063	caller should expect to be clobbered) and the return value
2064	will be expanded into r2/r3 upon return.  */
2065
2066	.global	GLOBAL(GCC_shcompact_call_trampoline)
2067	FUNC(GLOBAL(GCC_shcompact_call_trampoline),function)
2068GLOBAL(GCC_shcompact_call_trampoline):
2069	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
2070	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2071	pt/l	LOCAL(ct_loop), tr1
2072	addz.l	r1, r63, r1
2073	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2074LOCAL(ct_loop):
2075	nsb	r1, r28
2076	shlli	r28, 1, r29
2077	ldx.w	r0, r29, r30
2078LOCAL(ct_main_label):
2079	ptrel/l	r30, tr2
2080	blink	tr2, r63
2081LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
2082	/* It must be dr0, so just do it.  */
2083	fmov.dq	dr0, r2
2084	movi	7, r30
2085	shlli	r30, 29, r31
2086	andc	r1, r31, r1
2087	blink	tr1, r63
2088LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
2089	/* It is either dr0 or dr2.  */
2090	movi	7, r30
2091	shlri	r1, 26, r32
2092	shlli	r30, 26, r31
2093	andc	r1, r31, r1
2094	fmov.dq	dr0, r3
2095	beqi/l	r32, 4, tr1
2096	fmov.dq	dr2, r3
2097	blink	tr1, r63
2098LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
2099	shlri	r1, 23 - 3, r34
2100	andi	r34, 3 << 3, r33
2101	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2102LOCAL(ct_r4_fp_base):
2103	ptrel/l	r32, tr2
2104	movi	7, r30
2105	shlli	r30, 23, r31
2106	andc	r1, r31, r1
2107	blink	tr2, r63
2108LOCAL(ct_r4_fp_copy):
2109	fmov.dq	dr0, r4
2110	blink	tr1, r63
2111	fmov.dq	dr2, r4
2112	blink	tr1, r63
2113	fmov.dq	dr4, r4
2114	blink	tr1, r63
2115LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
2116	shlri	r1, 20 - 3, r34
2117	andi	r34, 3 << 3, r33
2118	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2119LOCAL(ct_r5_fp_base):
2120	ptrel/l	r32, tr2
2121	movi	7, r30
2122	shlli	r30, 20, r31
2123	andc	r1, r31, r1
2124	blink	tr2, r63
2125LOCAL(ct_r5_fp_copy):
2126	fmov.dq	dr0, r5
2127	blink	tr1, r63
2128	fmov.dq	dr2, r5
2129	blink	tr1, r63
2130	fmov.dq	dr4, r5
2131	blink	tr1, r63
2132	fmov.dq	dr6, r5
2133	blink	tr1, r63
2134LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
2135	/* It must be dr8.  */
2136	fmov.dq	dr8, r6
2137	movi	15, r30
2138	shlli	r30, 16, r31
2139	andc	r1, r31, r1
2140	blink	tr1, r63
2141LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
2142	shlri	r1, 16 - 3, r34
2143	andi	r34, 3 << 3, r33
2144	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2145LOCAL(ct_r6_fp_base):
2146	ptrel/l	r32, tr2
2147	movi	7, r30
2148	shlli	r30, 16, r31
2149	andc	r1, r31, r1
2150	blink	tr2, r63
2151LOCAL(ct_r6_fp_copy):
2152	fmov.dq	dr0, r6
2153	blink	tr1, r63
2154	fmov.dq	dr2, r6
2155	blink	tr1, r63
2156	fmov.dq	dr4, r6
2157	blink	tr1, r63
2158	fmov.dq	dr6, r6
2159	blink	tr1, r63
2160LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
2161	/* It is either dr8 or dr10.  */
2162	movi	15 << 12, r31
2163	shlri	r1, 12, r32
2164	andc	r1, r31, r1
2165	fmov.dq	dr8, r7
2166	beqi/l	r32, 8, tr1
2167	fmov.dq	dr10, r7
2168	blink	tr1, r63
2169LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
2170	shlri	r1, 12 - 3, r34
2171	andi	r34, 3 << 3, r33
2172	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2173LOCAL(ct_r7_fp_base):
2174	ptrel/l	r32, tr2
2175	movi	7 << 12, r31
2176	andc	r1, r31, r1
2177	blink	tr2, r63
2178LOCAL(ct_r7_fp_copy):
2179	fmov.dq	dr0, r7
2180	blink	tr1, r63
2181	fmov.dq	dr2, r7
2182	blink	tr1, r63
2183	fmov.dq	dr4, r7
2184	blink	tr1, r63
2185	fmov.dq	dr6, r7
2186	blink	tr1, r63
2187LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
2188	/* It is either dr8 or dr10.  */
2189	movi	15 << 8, r31
2190	andi	r1, 1 << 8, r32
2191	andc	r1, r31, r1
2192	fmov.dq	dr8, r8
2193	beq/l	r32, r63, tr1
2194	fmov.dq	dr10, r8
2195	blink	tr1, r63
2196LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
2197	shlri	r1, 8 - 3, r34
2198	andi	r34, 3 << 3, r33
2199	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2200LOCAL(ct_r8_fp_base):
2201	ptrel/l	r32, tr2
2202	movi	7 << 8, r31
2203	andc	r1, r31, r1
2204	blink	tr2, r63
2205LOCAL(ct_r8_fp_copy):
2206	fmov.dq	dr0, r8
2207	blink	tr1, r63
2208	fmov.dq	dr2, r8
2209	blink	tr1, r63
2210	fmov.dq	dr4, r8
2211	blink	tr1, r63
2212	fmov.dq	dr6, r8
2213	blink	tr1, r63
2214LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
2215	/* It is either dr8 or dr10.  */
2216	movi	15 << 4, r31
2217	andi	r1, 1 << 4, r32
2218	andc	r1, r31, r1
2219	fmov.dq	dr8, r9
2220	beq/l	r32, r63, tr1
2221	fmov.dq	dr10, r9
2222	blink	tr1, r63
2223LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
2224	shlri	r1, 4 - 3, r34
2225	andi	r34, 3 << 3, r33
2226	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2227LOCAL(ct_r9_fp_base):
2228	ptrel/l	r32, tr2
2229	movi	7 << 4, r31
2230	andc	r1, r31, r1
2231	blink	tr2, r63
2232LOCAL(ct_r9_fp_copy):
2233	fmov.dq	dr0, r9
2234	blink	tr1, r63
2235	fmov.dq	dr2, r9
2236	blink	tr1, r63
2237	fmov.dq	dr4, r9
2238	blink	tr1, r63
2239	fmov.dq	dr6, r9
2240	blink	tr1, r63
2241LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
2242	pt/l	LOCAL(ct_r2_load), tr2
2243	movi	3, r30
2244	shlli	r30, 29, r31
2245	and	r1, r31, r32
2246	andc	r1, r31, r1
2247	beq/l	r31, r32, tr2
2248	addi.l	r2, 8, r3
2249	ldx.q	r2, r63, r2
2250	/* Fall through.  */
2251LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
2252	pt/l	LOCAL(ct_r3_load), tr2
2253	movi	3, r30
2254	shlli	r30, 26, r31
2255	and	r1, r31, r32
2256	andc	r1, r31, r1
2257	beq/l	r31, r32, tr2
2258	addi.l	r3, 8, r4
2259	ldx.q	r3, r63, r3
2260LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
2261	pt/l	LOCAL(ct_r4_load), tr2
2262	movi	3, r30
2263	shlli	r30, 23, r31
2264	and	r1, r31, r32
2265	andc	r1, r31, r1
2266	beq/l	r31, r32, tr2
2267	addi.l	r4, 8, r5
2268	ldx.q	r4, r63, r4
2269LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
2270	pt/l	LOCAL(ct_r5_load), tr2
2271	movi	3, r30
2272	shlli	r30, 20, r31
2273	and	r1, r31, r32
2274	andc	r1, r31, r1
2275	beq/l	r31, r32, tr2
2276	addi.l	r5, 8, r6
2277	ldx.q	r5, r63, r5
2278LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
2279	pt/l	LOCAL(ct_r6_load), tr2
2280	movi	3 << 16, r31
2281	and	r1, r31, r32
2282	andc	r1, r31, r1
2283	beq/l	r31, r32, tr2
2284	addi.l	r6, 8, r7
2285	ldx.q	r6, r63, r6
2286LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
2287	pt/l	LOCAL(ct_r7_load), tr2
2288	movi	3 << 12, r31
2289	and	r1, r31, r32
2290	andc	r1, r31, r1
2291	beq/l	r31, r32, tr2
2292	addi.l	r7, 8, r8
2293	ldx.q	r7, r63, r7
2294LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
2295	pt/l	LOCAL(ct_r8_load), tr2
2296	movi	3 << 8, r31
2297	and	r1, r31, r32
2298	andc	r1, r31, r1
2299	beq/l	r31, r32, tr2
2300	addi.l	r8, 8, r9
2301	ldx.q	r8, r63, r8
2302LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
2303	pt/l	LOCAL(ct_check_tramp), tr2
2304	ldx.q	r9, r63, r9
2305	blink	tr2, r63
2306LOCAL(ct_r2_load):
2307	ldx.q	r2, r63, r2
2308	blink	tr1, r63
2309LOCAL(ct_r3_load):
2310	ldx.q	r3, r63, r3
2311	blink	tr1, r63
2312LOCAL(ct_r4_load):
2313	ldx.q	r4, r63, r4
2314	blink	tr1, r63
2315LOCAL(ct_r5_load):
2316	ldx.q	r5, r63, r5
2317	blink	tr1, r63
2318LOCAL(ct_r6_load):
2319	ldx.q	r6, r63, r6
2320	blink	tr1, r63
2321LOCAL(ct_r7_load):
2322	ldx.q	r7, r63, r7
2323	blink	tr1, r63
2324LOCAL(ct_r8_load):
2325	ldx.q	r8, r63, r8
2326	blink	tr1, r63
2327LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
2328	movi	1, r30
2329	ldx.q	r15, r63, r2
2330	shlli	r30, 29, r31
2331	addi.l	r15, 8, r15
2332	andc	r1, r31, r1
2333	blink	tr1, r63
2334LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
2335	movi	1, r30
2336	ldx.q	r15, r63, r3
2337	shlli	r30, 26, r31
2338	addi.l	r15, 8, r15
2339	andc	r1, r31, r1
2340	blink	tr1, r63
2341LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
2342	movi	1, r30
2343	ldx.q	r15, r63, r4
2344	shlli	r30, 23, r31
2345	addi.l	r15, 8, r15
2346	andc	r1, r31, r1
2347	blink	tr1, r63
2348LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
2349	movi	1, r30
2350	ldx.q	r15, r63, r5
2351	shlli	r30, 20, r31
2352	addi.l	r15, 8, r15
2353	andc	r1, r31, r1
2354	blink	tr1, r63
2355LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
2356	movi	1, r30
2357	ldx.q	r15, r63, r6
2358	shlli	r30, 16, r31
2359	addi.l	r15, 8, r15
2360	andc	r1, r31, r1
2361	blink	tr1, r63
2362LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
2363	ldx.q	r15, r63, r7
2364	movi	1 << 12, r31
2365	addi.l	r15, 8, r15
2366	andc	r1, r31, r1
2367	blink	tr1, r63
2368LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
2369	ldx.q	r15, r63, r8
2370	movi	1 << 8, r31
2371	addi.l	r15, 8, r15
2372	andc	r1, r31, r1
2373	blink	tr1, r63
2374LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
2375	andi	r1, 7 << 1, r30
2376	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2377	shlli	r30, 2, r31
2378	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
2379	sub.l	r32, r31, r33
2380	ptabs/l	r33, tr2
2381	blink	tr2, r63
2382LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
2383	ldx.q	r15, r63, r3
2384	addi.l	r15, 8, r15
2385	ldx.q	r15, r63, r4
2386	addi.l	r15, 8, r15
2387	ldx.q	r15, r63, r5
2388	addi.l	r15, 8, r15
2389	ldx.q	r15, r63, r6
2390	addi.l	r15, 8, r15
2391	ldx.q	r15, r63, r7
2392	addi.l	r15, 8, r15
2393	ldx.q	r15, r63, r8
2394	addi.l	r15, 8, r15
2395LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
2396	ldx.q	r15, r63, r9
2397	addi.l	r15, 8, r15
2398LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
2399LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
2400	pt/u	LOCAL(ct_ret_wide), tr2
2401	andi	r1, 1, r1
2402	bne/u	r1, r63, tr2
2403LOCAL(ct_call_func):	/* Just branch to the function.  */
2404	blink	tr0, r63
2405LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its
2406			   64-bit return value.  */
2407	add.l	r18, r63, r10
2408	blink	tr0, r18
2409	ptabs	r10, tr0
2410#if __LITTLE_ENDIAN__
2411	shari	r2, 32, r3
2412	add.l	r2, r63, r2
2413#else
2414	add.l	r2, r63, r3
2415	shari	r2, 32, r2
2416#endif
2417	blink	tr0, r63
2418#endif /* L_shcompact_call_trampoline */
2419
2420#ifdef L_shcompact_return_trampoline
2421     /* This function does the converse of the code in `ret_wide'
2422	above.  It is tail-called by SHcompact functions returning
2423	64-bit non-floating-point values, to pack the 32-bit values in
2424	r2 and r3 into r2.  */
2425
2426	.mode	SHmedia
2427	.section	.text..SHmedia32, "ax"
2428	.align	2
2429	.global	GLOBAL(GCC_shcompact_return_trampoline)
2430	FUNC(GLOBAL(GCC_shcompact_return_trampoline),function)
2431GLOBAL(GCC_shcompact_return_trampoline):
2432	ptabs/l	r18, tr0
2433#if __LITTLE_ENDIAN__
2434	addz.l	r2, r63, r2
2435	shlli	r3, 32, r3
2436#else
2437	addz.l	r3, r63, r3
2438	shlli	r2, 32, r2
2439#endif
2440	or	r3, r2, r2
2441	blink	tr0, r63
2442#endif /* L_shcompact_return_trampoline */
2443
2444#ifdef L_shcompact_incoming_args
2445	.section	.rodata
2446	.align	1
2447LOCAL(ia_main_table):
2448.word	1 /* Invalid, just loop */
2449.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2450.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2451.word	1 /* Invalid, just loop */
2452.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2453.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2454.word	1 /* Invalid, just loop */
2455.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2456.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2457.word	1 /* Invalid, just loop */
2458.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2459.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2460.word	1 /* Invalid, just loop */
2461.word	1 /* Invalid, just loop */
2462.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2463.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2464.word	1 /* Invalid, just loop */
2465.word	1 /* Invalid, just loop */
2466.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2467.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2468.word	1 /* Invalid, just loop */
2469.word	1 /* Invalid, just loop */
2470.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2471.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2472.word	1 /* Invalid, just loop */
2473.word	1 /* Invalid, just loop */
2474.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2475.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2476.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2477.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2478.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2479.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2480.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2481	.mode	SHmedia
2482	.section	.text..SHmedia32, "ax"
2483	.align	2
2484
2485     /* This function stores 64-bit general-purpose registers back in
2486	the stack, and loads the address in which each register
2487	was stored into itself.  The lower 32 bits of r17 hold the address
2488	to begin storing, and the upper 32 bits of r17 hold the cookie.
2489	Its execution time is linear on the
2490	number of registers that actually have to be copied, and it is
2491	optimized for structures larger than 64 bits, as opposed to
2492	invidivual `long long' arguments.  See sh.h for details on the
2493	actual bit pattern.  */
2494
2495	.global	GLOBAL(GCC_shcompact_incoming_args)
2496	FUNC(GLOBAL(GCC_shcompact_incoming_args))
2497GLOBAL(GCC_shcompact_incoming_args):
2498	ptabs/l	r18, tr0	/* Prepare to return.  */
2499	shlri	r17, 32, r0	/* Load the cookie.  */
2500	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2501	pt/l	LOCAL(ia_loop), tr1
2502	add.l	r17, r63, r17
2503	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2504LOCAL(ia_loop):
2505	nsb	r0, r36
2506	shlli	r36, 1, r37
2507	ldx.w	r43, r37, r38
2508LOCAL(ia_main_label):
2509	ptrel/l	r38, tr2
2510	blink	tr2, r63
2511LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
2512	movi	3, r38
2513	shlli	r38, 29, r39
2514	and	r0, r39, r40
2515	andc	r0, r39, r0
2516	stx.q	r17, r63, r2
2517	add.l	r17, r63, r2
2518	addi.l	r17, 8, r17
2519	beq/u	r39, r40, tr1
2520LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
2521	movi	3, r38
2522	shlli	r38, 26, r39
2523	and	r0, r39, r40
2524	andc	r0, r39, r0
2525	stx.q	r17, r63, r3
2526	add.l	r17, r63, r3
2527	addi.l	r17, 8, r17
2528	beq/u	r39, r40, tr1
2529LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
2530	movi	3, r38
2531	shlli	r38, 23, r39
2532	and	r0, r39, r40
2533	andc	r0, r39, r0
2534	stx.q	r17, r63, r4
2535	add.l	r17, r63, r4
2536	addi.l	r17, 8, r17
2537	beq/u	r39, r40, tr1
2538LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
2539	movi	3, r38
2540	shlli	r38, 20, r39
2541	and	r0, r39, r40
2542	andc	r0, r39, r0
2543	stx.q	r17, r63, r5
2544	add.l	r17, r63, r5
2545	addi.l	r17, 8, r17
2546	beq/u	r39, r40, tr1
2547LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
2548	movi	3, r38
2549	shlli	r38, 16, r39
2550	and	r0, r39, r40
2551	andc	r0, r39, r0
2552	stx.q	r17, r63, r6
2553	add.l	r17, r63, r6
2554	addi.l	r17, 8, r17
2555	beq/u	r39, r40, tr1
2556LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
2557	movi	3 << 12, r39
2558	and	r0, r39, r40
2559	andc	r0, r39, r0
2560	stx.q	r17, r63, r7
2561	add.l	r17, r63, r7
2562	addi.l	r17, 8, r17
2563	beq/u	r39, r40, tr1
2564LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
2565	movi	3 << 8, r39
2566	and	r0, r39, r40
2567	andc	r0, r39, r0
2568	stx.q	r17, r63, r8
2569	add.l	r17, r63, r8
2570	addi.l	r17, 8, r17
2571	beq/u	r39, r40, tr1
2572LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
2573	stx.q	r17, r63, r9
2574	add.l	r17, r63, r9
2575	blink	tr0, r63
2576LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
2577	movi	1, r38
2578	shlli	r38, 29, r39
2579	andc	r0, r39, r0
2580	stx.q	r17, r63, r2
2581	addi.l	r17, 8, r17
2582	blink	tr1, r63
2583LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
2584	movi	1, r38
2585	shlli	r38, 26, r39
2586	andc	r0, r39, r0
2587	stx.q	r17, r63, r3
2588	addi.l	r17, 8, r17
2589	blink	tr1, r63
2590LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
2591	movi	1, r38
2592	shlli	r38, 23, r39
2593	andc	r0, r39, r0
2594	stx.q	r17, r63, r4
2595	addi.l	r17, 8, r17
2596	blink	tr1, r63
2597LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
2598	movi	1, r38
2599	shlli	r38, 20, r39
2600	andc	r0, r39, r0
2601	stx.q	r17, r63, r5
2602	addi.l	r17, 8, r17
2603	blink	tr1, r63
2604LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
2605	movi	1, r38
2606	shlli	r38, 16, r39
2607	andc	r0, r39, r0
2608	stx.q	r17, r63, r6
2609	addi.l	r17, 8, r17
2610	blink	tr1, r63
2611LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
2612	movi	1 << 12, r39
2613	andc	r0, r39, r0
2614	stx.q	r17, r63, r7
2615	addi.l	r17, 8, r17
2616	blink	tr1, r63
2617LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
2618	movi	1 << 8, r39
2619	andc	r0, r39, r0
2620	stx.q	r17, r63, r8
2621	addi.l	r17, 8, r17
2622	blink	tr1, r63
2623LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
2624	andi	r0, 7 << 1, r38
2625	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2626	shlli	r38, 2, r39
2627	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
2628	sub.l	r40, r39, r41
2629	ptabs/l	r41, tr2
2630	blink	tr2, r63
2631LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
2632	stx.q	r17, r63, r3
2633	addi.l	r17, 8, r17
2634	stx.q	r17, r63, r4
2635	addi.l	r17, 8, r17
2636	stx.q	r17, r63, r5
2637	addi.l	r17, 8, r17
2638	stx.q	r17, r63, r6
2639	addi.l	r17, 8, r17
2640	stx.q	r17, r63, r7
2641	addi.l	r17, 8, r17
2642	stx.q	r17, r63, r8
2643	addi.l	r17, 8, r17
2644LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
2645	stx.q	r17, r63, r9
2646LOCAL(ia_return):	/* Return.  */
2647	blink	tr0, r63
2648LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
2649#endif /* L_shcompact_incoming_args */
2650#endif
2651#if __SH5__
2652#ifdef L_nested_trampoline
2653#if __SH5__ == 32
2654	.section	.text..SHmedia32,"ax"
2655#else
2656	.text
2657#endif
2658	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
2659	.global	GLOBAL(GCC_nested_trampoline)
2660GLOBAL(GCC_nested_trampoline):
2661	.mode	SHmedia
2662	ptrel/u	r63, tr0
2663	gettr	tr0, r0
2664#if __SH5__ == 64
2665	ld.q	r0, 24, r1
2666#else
2667	ld.l	r0, 24, r1
2668#endif
2669	ptabs/l	r1, tr1
2670#if __SH5__ == 64
2671	ld.q	r0, 32, r1
2672#else
2673	ld.l	r0, 28, r1
2674#endif
2675	blink	tr1, r63
2676#endif /* L_nested_trampoline */
2677#endif /* __SH5__ */
2678#if __SH5__ == 32
2679#ifdef L_push_pop_shmedia_regs
2680	.section	.text..SHmedia32,"ax"
2681	.mode	SHmedia
2682	.align	2
2683#ifndef __SH4_NOFPU__
2684	.global	GLOBAL(GCC_push_shmedia_regs)
2685GLOBAL(GCC_push_shmedia_regs):
2686	addi.l	r15, -14*8, r15
2687	fst.d	r15, 13*8, dr62
2688	fst.d	r15, 12*8, dr60
2689	fst.d	r15, 11*8, dr58
2690	fst.d	r15, 10*8, dr56
2691	fst.d	r15,  9*8, dr54
2692	fst.d	r15,  8*8, dr52
2693	fst.d	r15,  7*8, dr50
2694	fst.d	r15,  6*8, dr48
2695	fst.d	r15,  5*8, dr46
2696	fst.d	r15,  4*8, dr44
2697	fst.d	r15,  3*8, dr42
2698	fst.d	r15,  2*8, dr40
2699	fst.d	r15,  1*8, dr38
2700	fst.d	r15,  0*8, dr36
2701#endif
2702	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
2703GLOBAL(GCC_push_shmedia_regs_nofpu):
2704	ptabs/l	r18, tr0
2705	addi.l	r15, -27*8, r15
2706	gettr	tr7, r62
2707	gettr	tr6, r61
2708	gettr	tr5, r60
2709	st.q	r15, 26*8, r62
2710	st.q	r15, 25*8, r61
2711	st.q	r15, 24*8, r60
2712	st.q	r15, 23*8, r59
2713	st.q	r15, 22*8, r58
2714	st.q	r15, 21*8, r57
2715	st.q	r15, 20*8, r56
2716	st.q	r15, 19*8, r55
2717	st.q	r15, 18*8, r54
2718	st.q	r15, 17*8, r53
2719	st.q	r15, 16*8, r52
2720	st.q	r15, 15*8, r51
2721	st.q	r15, 14*8, r50
2722	st.q	r15, 13*8, r49
2723	st.q	r15, 12*8, r48
2724	st.q	r15, 11*8, r47
2725	st.q	r15, 10*8, r46
2726	st.q	r15,  9*8, r45
2727	st.q	r15,  8*8, r44
2728	st.q	r15,  7*8, r35
2729	st.q	r15,  6*8, r34
2730	st.q	r15,  5*8, r33
2731	st.q	r15,  4*8, r32
2732	st.q	r15,  3*8, r31
2733	st.q	r15,  2*8, r30
2734	st.q	r15,  1*8, r29
2735	st.q	r15,  0*8, r28
2736	blink	tr0, r63
2737
2738#ifndef __SH4_NOFPU__
2739	.global	GLOBAL(GCC_pop_shmedia_regs)
2740GLOBAL(GCC_pop_shmedia_regs):
2741	pt	.L0, tr1
2742	movi	41*8, r0
2743	fld.d	r15, 40*8, dr62
2744	fld.d	r15, 39*8, dr60
2745	fld.d	r15, 38*8, dr58
2746	fld.d	r15, 37*8, dr56
2747	fld.d	r15, 36*8, dr54
2748	fld.d	r15, 35*8, dr52
2749	fld.d	r15, 34*8, dr50
2750	fld.d	r15, 33*8, dr48
2751	fld.d	r15, 32*8, dr46
2752	fld.d	r15, 31*8, dr44
2753	fld.d	r15, 30*8, dr42
2754	fld.d	r15, 29*8, dr40
2755	fld.d	r15, 28*8, dr38
2756	fld.d	r15, 27*8, dr36
2757	blink	tr1, r63
2758#endif
2759	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
2760GLOBAL(GCC_pop_shmedia_regs_nofpu):
2761	movi	27*8, r0
2762.L0:
2763	ptabs	r18, tr0
2764	ld.q	r15, 26*8, r62
2765	ld.q	r15, 25*8, r61
2766	ld.q	r15, 24*8, r60
2767	ptabs	r62, tr7
2768	ptabs	r61, tr6
2769	ptabs	r60, tr5
2770	ld.q	r15, 23*8, r59
2771	ld.q	r15, 22*8, r58
2772	ld.q	r15, 21*8, r57
2773	ld.q	r15, 20*8, r56
2774	ld.q	r15, 19*8, r55
2775	ld.q	r15, 18*8, r54
2776	ld.q	r15, 17*8, r53
2777	ld.q	r15, 16*8, r52
2778	ld.q	r15, 15*8, r51
2779	ld.q	r15, 14*8, r50
2780	ld.q	r15, 13*8, r49
2781	ld.q	r15, 12*8, r48
2782	ld.q	r15, 11*8, r47
2783	ld.q	r15, 10*8, r46
2784	ld.q	r15,  9*8, r45
2785	ld.q	r15,  8*8, r44
2786	ld.q	r15,  7*8, r35
2787	ld.q	r15,  6*8, r34
2788	ld.q	r15,  5*8, r33
2789	ld.q	r15,  4*8, r32
2790	ld.q	r15,  3*8, r31
2791	ld.q	r15,  2*8, r30
2792	ld.q	r15,  1*8, r29
2793	ld.q	r15,  0*8, r28
2794	add.l	r15, r0, r15
2795	blink	tr0, r63
2796#endif /* __SH5__ == 32 */
2797#endif /* L_push_pop_shmedia_regs */
2798