xref: /netbsd-src/external/gpl3/gcc/dist/libgcc/config/arc/ieee-754/arc600-dsp/muldf3.S (revision f0fde9902fd4d72ded2807793acc7bfaa1ebf243)
1/* Copyright (C) 2008-2020 Free Software Foundation, Inc.
2   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3		on behalf of Synopsys Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26#include "../arc-ieee-754.h"
27
28#if 0 /* DEBUG */
29	.global __muldf3
30	.balign 4
31__muldf3:
32	push_s blink
33	push_s r2
34	push_s r3
35	push_s r0
36	bl.d __muldf3_c
37	push_s r1
38	ld_s r2,[sp,12]
39	ld_s r3,[sp,8]
40	st_s r0,[sp,12]
41	st_s r1,[sp,8]
42	pop_s r1
43	bl.d __muldf3_asm
44	pop_s r0
45	pop_s r3
46	pop_s r2
47	pop_s blink
48	cmp r0,r2
49	cmp.eq r1,r3
50	jeq_s [blink]
51	b abort
52#define __muldf3 __muldf3_asm
53#endif /* DEBUG */
54
55__muldf3_support: /* This label makes debugger output saner.  */
56	.balign 4
57	FUNC(__muldf3)
58.Ldenorm_2:
59	breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
60	norm.f r12,DBL1L
61	mov.mi r12,21
62	add.pl r12,r12,22
63	neg r11,r12
64	asl_s r12,r12,20
65	lsr.f DBL1H,DBL1L,r11
66	ror DBL1L,DBL1L,r11
67	sub_s DBL0H,DBL0H,r12
68	mov.eq DBL1H,DBL1L
69	sub_l DBL1L,DBL1L,DBL1H
70	/* Fall through.  */
71	.global __muldf3
72	.balign 4
73__muldf3:
74	mululw 0,DBL0L,DBL1L
75	machulw r4,DBL0L,DBL1L
76	ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)]
77	bmsk r6,DBL0H,19
78	bset r6,r6,20
79	mov r8,acc2
80	mululw 0,r4,1
81	and r11,DBL0H,r9
82	breq.d r11,0,.Ldenorm_dbl0
83	and r12,DBL1H,r9
84	breq.d r12,0,.Ldenorm_dbl1
85	maclw 0,r6,DBL1L
86	machulw 0,r6,DBL1L
87	breq.d r11,r9,.Linf_nan
88	bmsk r10,DBL1H,19
89	breq.d r12,r9,.Linf_nan
90	bset r10,r10,20
91	maclw 0,r10,DBL0L
92	machulw r5,r10,DBL0L
93	add_s r12,r12,r11 ; add exponents
94	mov r4,acc2
95	mululw 0,r5,1
96	maclw 0,r6,r10
97	machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8
98	tst r8,r8
99	bclr r8,r9,30 ; 0x3ff00000
100	bset.ne r4,r4,0 ; put least significant word into sticky bit
101	bclr r6,r9,20 ; 0x7fe00000
102	lsr.f r10,r7,9
103	rsub.eq r8,r8,r9 ; 0x40000000
104	sub r12,r12,r8 ; subtract bias + implicit 1
105	brhs.d r12,r6,.Linf_denorm
106	rsub r10,r10,12
107.Lshift_frac:
108	neg r8,r10
109	asl r6,r4,r10
110	lsr DBL0L,r4,r8
111	add.f 0,r6,r6
112	btst.eq DBL0L,0
113	cmp.eq r4,r4 ; round to nearest / round to even
114	asl r4,acc2,r10
115	lsr r5,acc2,r8
116	adc.f DBL0L,DBL0L,r4
117	xor.f 0,DBL0H,DBL1H
118	asl r7,r7,r10
119	add_s r12,r12,r5
120	adc DBL0H,r12,r7
121	j_s.d [blink]
122	bset.mi DBL0H,DBL0H,31
123
124/* N.B. This is optimized for ARC700.
125  ARC600 has very different scheduling / instruction selection criteria.  */
126
127/* If one number is denormal, subtract some from the exponent of the other
128   one (if the other exponent is too small, return 0), and normalize the
129   denormal.  Then re-run the computation.  */
130.Lret0_2:
131	lsr_s DBL0H,DBL0H,31
132	asl_s DBL0H,DBL0H,31
133	j_s.d [blink]
134	mov_s DBL0L,0
135	.balign 4
136.Ldenorm_dbl0:
137	mov_s r12,DBL0L
138	mov_s DBL0L,DBL1L
139	mov_s DBL1L,r12
140	mov_s r12,DBL0H
141	mov_s DBL0H,DBL1H
142	mov_s DBL1H,r12
143	and r11,DBL0H,r9
144.Ldenorm_dbl1:
145	brhs r11,r9,.Linf_nan
146	brhs 0x3ca00001,r11,.Lret0
147	sub_s DBL0H,DBL0H,DBL1H
148	bmsk.f DBL1H,DBL1H,30
149	add_s DBL0H,DBL0H,DBL1H
150	beq.d .Ldenorm_2
151	norm r12,DBL1H
152	sub_s r12,r12,10
153	asl r5,r12,20
154	asl_s DBL1H,DBL1H,r12
155	sub DBL0H,DBL0H,r5
156	neg r5,r12
157	lsr r6,DBL1L,r5
158	asl_s DBL1L,DBL1L,r12
159	b.d __muldf3
160	add_s DBL1H,DBL1H,r6
161
162.Lret0:	xor_s DBL0H,DBL0H,DBL1H
163	bclr DBL1H,DBL0H,31
164	xor_s DBL0H,DBL0H,DBL1H
165	j_s.d [blink]
166	mov_s DBL0L,0
167
168	.balign 4
169.Linf_nan:
170	bclr r12,DBL1H,31
171	xor_s DBL1H,DBL1H,DBL0H
172	bclr_s DBL0H,DBL0H,31
173	max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
174	or.f 0,DBL0H,DBL0L
175	mov_s DBL0L,0
176	or.ne.f DBL1L,DBL1L,r12
177	not_s DBL0H,DBL0L ; inf * 0 -> NaN
178	mov.ne DBL0H,r8
179	tst_s DBL1H,DBL1H
180	j_s.d [blink]
181	bset.mi DBL0H,DBL0H,31
182
183/* We have checked for infinity / NaN input before, and transformed
184   denormalized inputs into normalized inputs.  Thus, the worst case
185   exponent overflows are:
186       1 +     1 - 0x400 == 0xc02 : maximum underflow
187   0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
188   N.B. 0x7e and 0x7f are also values for overflow.
189
190   If (r12 <= -54), we have an underflow to zero.  */
191	.balign 4
192.Linf_denorm:
193	lsr r6,r12,28
194	brlo.d r6,0xc,.Linf
195	asr r6,r12,20
196	add.f r10,r10,r6
197	brgt.d r10,0,.Lshift_frac
198	mov_s r12,0
199	beq.d .Lround_frac
200	add r10,r10,32
201.Lshift32_frac:
202	tst r4,r4
203	mov r4,acc2
204	bset.ne r4,r4,1
205	mululw 0,r7,1
206	brge.d r10,1,.Lshift_frac
207	mov r7,0
208	breq.d r10,0,.Lround_frac
209	add r10,r10,32
210	brgt r10,21,.Lshift32_frac
211	b_s .Lret0
212
213.Lround_frac:
214	add.f 0,r4,r4
215	btst.eq acc2,0
216	mov_s DBL0L,acc2
217	mov_s DBL0H,r7
218	adc.eq.f DBL0L,DBL0L,0
219	j_s.d [blink]
220	adc.eq DBL0H,DBL0H,0
221
222.Linf:	mov_s DBL0L,0
223	xor.f DBL1H,DBL1H,DBL0H
224	mov_s DBL0H,r9
225	j_s.d [blink]
226	bset.mi DBL0H,DBL0H,31
227	ENDFUNC(__muldf3)
228
229	.balign 4
230.L7ff00000:
231	.long 0x7ff00000
232