xref: /netbsd-src/sys/arch/m68k/fpsp/bindec.sa (revision ae9172d6cd9432a6a1a56760d86b32c57a66c39c)
1*	$NetBSD: bindec.sa,v 1.3 1994/10/26 07:48:51 cgd Exp $
2
3*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4*	M68000 Hi-Performance Microprocessor Division
5*	M68040 Software Package
6*
7*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8*	All rights reserved.
9*
10*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11*	To the maximum extent permitted by applicable law,
12*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14*	PARTICULAR PURPOSE and any warranty against infringement with
15*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16*	and any accompanying written materials.
17*
18*	To the maximum extent permitted by applicable law,
19*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
24*	and support of the SOFTWARE.
25*
26*	You are hereby granted a copyright license to use, modify, and
27*	distribute the SOFTWARE so long as this entire notice is retained
28*	without alteration in any modified and/or redistributed versions,
29*	and that such modified versions are clearly identified as such.
30*	No licenses are granted by implication, estoppel or otherwise
31*	under any patents or trademarks of Motorola, Inc.
32
33*
34*	bindec.sa 3.4 1/3/91
35*
36*	bindec
37*
38*	Description:
39*		Converts an input in extended precision format
40*		to bcd format.
41*
42*	Input:
43*		a0 points to the input extended precision value
44*		value in memory; d0 contains the k-factor sign-extended
45*		to 32-bits.  The input may be either normalized,
46*		unnormalized, or denormalized.
47*
48*	Output:	result in the FP_SCR1 space on the stack.
49*
50*	Saves and Modifies: D2-D7,A2,FP2
51*
52*	Algorithm:
53*
54*	A1.	Set RM and size ext;  Set SIGMA = sign of input.
55*		The k-factor is saved for use in d7. Clear the
56*		BINDEC_FLG for separating normalized/denormalized
57*		input.  If input is unnormalized or denormalized,
58*		normalize it.
59*
60*	A2.	Set X = abs(input).
61*
62*	A3.	Compute ILOG.
63*		ILOG is the log base 10 of the input value.  It is
64*		approximated by adding e + 0.f when the original
65*		value is viewed as 2^^e * 1.f in extended precision.
66*		This value is stored in d6.
67*
68*	A4.	Clr INEX bit.
69*		The operation in A3 above may have set INEX2.
70*
71*	A5.	Set ICTR = 0;
72*		ICTR is a flag used in A13.  It must be set before the
73*		loop entry A6.
74*
75*	A6.	Calculate LEN.
76*		LEN is the number of digits to be displayed.  The
77*		k-factor can dictate either the total number of digits,
78*		if it is a positive number, or the number of digits
79*		after the decimal point which are to be included as
80*		significant.  See the 68882 manual for examples.
81*		If LEN is computed to be greater than 17, set OPERR in
82*		USER_FPSR.  LEN is stored in d4.
83*
84*	A7.	Calculate SCALE.
85*		SCALE is equal to 10^ISCALE, where ISCALE is the number
86*		of decimal places needed to insure LEN integer digits
87*		in the output before conversion to bcd. LAMBDA is the
88*		sign of ISCALE, used in A9. Fp1 contains
89*		10^^(abs(ISCALE)) using a rounding mode which is a
90*		function of the original rounding mode and the signs
91*		of ISCALE and X.  A table is given in the code.
92*
93*	A8.	Clr INEX; Force RZ.
94*		The operation in A3 above may have set INEX2.
95*		RZ mode is forced for the scaling operation to insure
96*		only one rounding error.  The grs bits are collected in
97*		the INEX flag for use in A10.
98*
99*	A9.	Scale X -> Y.
100*		The mantissa is scaled to the desired number of
101*		significant digits.  The excess digits are collected
102*		in INEX2.
103*
104*	A10.	Or in INEX.
105*		If INEX is set, round error occured.  This is
106*		compensated for by 'or-ing' in the INEX2 flag to
107*		the lsb of Y.
108*
109*	A11.	Restore original FPCR; set size ext.
110*		Perform FINT operation in the user's rounding mode.
111*		Keep the size to extended.
112*
113*	A12.	Calculate YINT = FINT(Y) according to user's rounding
114*		mode.  The FPSP routine sintd0 is used.  The output
115*		is in fp0.
116*
117*	A13.	Check for LEN digits.
118*		If the int operation results in more than LEN digits,
119*		or less than LEN -1 digits, adjust ILOG and repeat from
120*		A6.  This test occurs only on the first pass.  If the
121*		result is exactly 10^LEN, decrement ILOG and divide
122*		the mantissa by 10.
123*
124*	A14.	Convert the mantissa to bcd.
125*		The binstr routine is used to convert the LEN digit
126*		mantissa to bcd in memory.  The input to binstr is
127*		to be a fraction; i.e. (mantissa)/10^LEN and adjusted
128*		such that the decimal point is to the left of bit 63.
129*		The bcd digits are stored in the correct position in
130*		the final string area in memory.
131*
132*	A15.	Convert the exponent to bcd.
133*		As in A14 above, the exp is converted to bcd and the
134*		digits are stored in the final string.
135*		Test the length of the final exponent string.  If the
136*		length is 4, set operr.
137*
138*	A16.	Write sign bits to final string.
139*
140*	Implementation Notes:
141*
142*	The registers are used as follows:
143*
144*		d0: scratch; LEN input to binstr
145*		d1: scratch
146*		d2: upper 32-bits of mantissa for binstr
147*		d3: scratch;lower 32-bits of mantissa for binstr
148*		d4: LEN
149*      		d5: LAMBDA/ICTR
150*		d6: ILOG
151*		d7: k-factor
152*		a0: ptr for original operand/final result
153*		a1: scratch pointer
154*		a2: pointer to FP_X; abs(original value) in ext
155*		fp0: scratch
156*		fp1: scratch
157*		fp2: scratch
158*		F_SCR1:
159*		F_SCR2:
160*		L_SCR1:
161*		L_SCR2:
162*
163
164BINDEC    IDNT    2,1 Motorola 040 Floating Point Software Package
165
166	include	fpsp.h
167
168	section	8
169
170* Constants in extended precision
171LOG2 	dc.l	$3FFD0000,$9A209A84,$FBCFF798,$00000000
172LOG2UP1	dc.l	$3FFD0000,$9A209A84,$FBCFF799,$00000000
173
174* Constants in single precision
175FONE 	dc.l	$3F800000,$00000000,$00000000,$00000000
176FTWO	dc.l	$40000000,$00000000,$00000000,$00000000
177FTEN 	dc.l	$41200000,$00000000,$00000000,$00000000
178F4933	dc.l	$459A2800,$00000000,$00000000,$00000000
179
180RBDTBL 	dc.b	0,0,0,0
181	dc.b	3,3,2,2
182	dc.b	3,2,2,3
183	dc.b	2,3,3,2
184
185	xref	binstr
186	xref	sintdo
187	xref	ptenrn,ptenrm,ptenrp
188
189	xdef	bindec
190	xdef	sc_mul
191bindec:
192	movem.l	d2-d7/a2,-(a7)
193	fmovem.x fp0-fp2,-(a7)
194
195* A1. Set RM and size ext. Set SIGMA = sign input;
196*     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
197*     separating  normalized/denormalized input.  If the input
198*     is a denormalized number, set the BINDEC_FLG memory word
199*     to signal denorm.  If the input is unnormalized, normalize
200*     the input and test for denormalized result.
201*
202	fmove.l	#rm_mode,FPCR	;set RM and ext
203	move.l	(a0),L_SCR2(a6)	;save exponent for sign check
204	move.l	d0,d7		;move k-factor to d7
205	clr.b	BINDEC_FLG(a6)	;clr norm/denorm flag
206	move.w	STAG(a6),d0	;get stag
207	andi.w	#$e000,d0	;isolate stag bits
208	beq	A2_str		;if zero, input is norm
209*
210* Normalize the denorm
211*
212un_de_norm:
213	move.w	(a0),d0
214	andi.w	#$7fff,d0	;strip sign of normalized exp
215	move.l	4(a0),d1
216	move.l	8(a0),d2
217norm_loop:
218	sub.w	#1,d0
219	add.l	d2,d2
220	addx.l	d1,d1
221	tst.l	d1
222	bge.b	norm_loop
223*
224* Test if the normalized input is denormalized
225*
226	tst.w	d0
227	bgt.b	pos_exp		;if greater than zero, it is a norm
228	st	BINDEC_FLG(a6)	;set flag for denorm
229pos_exp:
230	andi.w	#$7fff,d0	;strip sign of normalized exp
231	move.w	d0,(a0)
232	move.l	d1,4(a0)
233	move.l	d2,8(a0)
234
235* A2. Set X = abs(input).
236*
237A2_str:
238	move.l	(a0),FP_SCR2(a6) ; move input to work space
239	move.l	4(a0),FP_SCR2+4(a6) ; move input to work space
240	move.l	8(a0),FP_SCR2+8(a6) ; move input to work space
241	andi.l	#$7fffffff,FP_SCR2(a6) ;create abs(X)
242
243* A3. Compute ILOG.
244*     ILOG is the log base 10 of the input value.  It is approx-
245*     imated by adding e + 0.f when the original value is viewed
246*     as 2^^e * 1.f in extended precision.  This value is stored
247*     in d6.
248*
249* Register usage:
250*	Input/Output
251*	d0: k-factor/exponent
252*	d2: x/x
253*	d3: x/x
254*	d4: x/x
255*	d5: x/x
256*	d6: x/ILOG
257*	d7: k-factor/Unchanged
258*	a0: ptr for original operand/final result
259*	a1: x/x
260*	a2: x/x
261*	fp0: x/float(ILOG)
262*	fp1: x/x
263*	fp2: x/x
264*	F_SCR1:x/x
265*	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
266*	L_SCR1:x/x
267*	L_SCR2:first word of X packed/Unchanged
268
269	tst.b	BINDEC_FLG(a6)	;check for denorm
270	beq.b	A3_cont		;if clr, continue with norm
271	move.l	#-4933,d6	;force ILOG = -4933
272	bra.b	A4_str
273A3_cont:
274	move.w	FP_SCR2(a6),d0	;move exp to d0
275	move.w	#$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
276	fmove.x	FP_SCR2(a6),fp0	;now fp0 has 1.f
277	sub.w	#$3fff,d0	;strip off bias
278	fadd.w	d0,fp0		;add in exp
279	fsub.s	FONE,fp0	;subtract off 1.0
280	fbge.w	pos_res		;if pos, branch
281	fmul.x	LOG2UP1,fp0	;if neg, mul by LOG2UP1
282	fmove.l	fp0,d6		;put ILOG in d6 as a lword
283	bra.b	A4_str		;go move out ILOG
284pos_res:
285	fmul.x	LOG2,fp0	;if pos, mul by LOG2
286	fmove.l	fp0,d6		;put ILOG in d6 as a lword
287
288
289* A4. Clr INEX bit.
290*     The operation in A3 above may have set INEX2.
291
292A4_str:
293	fmove.l	#0,FPSR		;zero all of fpsr - nothing needed
294
295
296* A5. Set ICTR = 0;
297*     ICTR is a flag used in A13.  It must be set before the
298*     loop entry A6. The lower word of d5 is used for ICTR.
299
300	clr.w	d5		;clear ICTR
301
302
303* A6. Calculate LEN.
304*     LEN is the number of digits to be displayed.  The k-factor
305*     can dictate either the total number of digits, if it is
306*     a positive number, or the number of digits after the
307*     original decimal point which are to be included as
308*     significant.  See the 68882 manual for examples.
309*     If LEN is computed to be greater than 17, set OPERR in
310*     USER_FPSR.  LEN is stored in d4.
311*
312* Register usage:
313*	Input/Output
314*	d0: exponent/Unchanged
315*	d2: x/x/scratch
316*	d3: x/x
317*	d4: exc picture/LEN
318*	d5: ICTR/Unchanged
319*	d6: ILOG/Unchanged
320*	d7: k-factor/Unchanged
321*	a0: ptr for original operand/final result
322*	a1: x/x
323*	a2: x/x
324*	fp0: float(ILOG)/Unchanged
325*	fp1: x/x
326*	fp2: x/x
327*	F_SCR1:x/x
328*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
329*	L_SCR1:x/x
330*	L_SCR2:first word of X packed/Unchanged
331
332A6_str:
333	tst.l	d7		;branch on sign of k
334	ble.b	k_neg		;if k <= 0, LEN = ILOG + 1 - k
335	move.l	d7,d4		;if k > 0, LEN = k
336	bra.b	len_ck		;skip to LEN check
337k_neg:
338	move.l	d6,d4		;first load ILOG to d4
339	sub.l	d7,d4		;subtract off k
340	addq.l	#1,d4		;add in the 1
341len_ck:
342	tst.l	d4		;LEN check: branch on sign of LEN
343	ble.b	LEN_ng		;if neg, set LEN = 1
344	cmp.l	#17,d4		;test if LEN > 17
345	ble.b	A7_str		;if not, forget it
346	move.l	#17,d4		;set max LEN = 17
347	tst.l	d7		;if negative, never set OPERR
348	ble.b	A7_str		;if positive, continue
349	or.l	#opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
350	bra.b	A7_str		;finished here
351LEN_ng:
352	moveq.l	#1,d4		;min LEN is 1
353
354
355* A7. Calculate SCALE.
356*     SCALE is equal to 10^ISCALE, where ISCALE is the number
357*     of decimal places needed to insure LEN integer digits
358*     in the output before conversion to bcd. LAMBDA is the sign
359*     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
360*     the rounding mode as given in the following table (see
361*     Coonen, p. 7.23 as ref.; however, the SCALE variable is
362*     of opposite sign in bindec.sa from Coonen).
363*
364*	Initial					USE
365*	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
366*	----------------------------------------------
367*	 RN	00	   0	   0		00/0	RN
368*	 RN	00	   0	   1		00/0	RN
369*	 RN	00	   1	   0		00/0	RN
370*	 RN	00	   1	   1		00/0	RN
371*	 RZ	01	   0	   0		11/3	RP
372*	 RZ	01	   0	   1		11/3	RP
373*	 RZ	01	   1	   0		10/2	RM
374*	 RZ	01	   1	   1		10/2	RM
375*	 RM	10	   0	   0		11/3	RP
376*	 RM	10	   0	   1		10/2	RM
377*	 RM	10	   1	   0		10/2	RM
378*	 RM	10	   1	   1		11/3	RP
379*	 RP	11	   0	   0		10/2	RM
380*	 RP	11	   0	   1		11/3	RP
381*	 RP	11	   1	   0		11/3	RP
382*	 RP	11	   1	   1		10/2	RM
383*
384* Register usage:
385*	Input/Output
386*	d0: exponent/scratch - final is 0
387*	d2: x/0 or 24 for A9
388*	d3: x/scratch - offset ptr into PTENRM array
389*	d4: LEN/Unchanged
390*	d5: 0/ICTR:LAMBDA
391*	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
392*	d7: k-factor/Unchanged
393*	a0: ptr for original operand/final result
394*	a1: x/ptr to PTENRM array
395*	a2: x/x
396*	fp0: float(ILOG)/Unchanged
397*	fp1: x/10^ISCALE
398*	fp2: x/x
399*	F_SCR1:x/x
400*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
401*	L_SCR1:x/x
402*	L_SCR2:first word of X packed/Unchanged
403
404A7_str:
405	tst.l	d7		;test sign of k
406	bgt.b	k_pos		;if pos and > 0, skip this
407	cmp.l	d6,d7		;test k - ILOG
408	blt.b	k_pos		;if ILOG >= k, skip this
409	move.l	d7,d6		;if ((k<0) & (ILOG < k)) ILOG = k
410k_pos:
411	move.l	d6,d0		;calc ILOG + 1 - LEN in d0
412	addq.l	#1,d0		;add the 1
413	sub.l	d4,d0		;sub off LEN
414	swap	d5		;use upper word of d5 for LAMBDA
415	clr.w	d5		;set it zero initially
416	clr.w	d2		;set up d2 for very small case
417	tst.l	d0		;test sign of ISCALE
418	bge.b	iscale		;if pos, skip next inst
419	addq.w	#1,d5		;if neg, set LAMBDA true
420	cmp.l	#$ffffecd4,d0	;test iscale <= -4908
421	bgt.b	no_inf		;if false, skip rest
422	addi.l	#24,d0		;add in 24 to iscale
423	move.l	#24,d2		;put 24 in d2 for A9
424no_inf:
425	neg.l	d0		;and take abs of ISCALE
426iscale:
427	fmove.s	FONE,fp1	;init fp1 to 1
428	bfextu	USER_FPCR(a6){26:2},d1 ;get initial rmode bits
429	add.w	d1,d1		;put them in bits 2:1
430	add.w	d5,d1		;add in LAMBDA
431	add.w	d1,d1		;put them in bits 3:1
432	tst.l	L_SCR2(a6)	;test sign of original x
433	bge.b	x_pos		;if pos, don't set bit 0
434	addq.l	#1,d1		;if neg, set bit 0
435x_pos:
436	lea.l	RBDTBL,a2	;load rbdtbl base
437	move.b	(a2,d1),d3	;load d3 with new rmode
438	lsl.l	#4,d3		;put bits in proper position
439	fmove.l	d3,fpcr		;load bits into fpu
440	lsr.l	#4,d3		;put bits in proper position
441	tst.b	d3		;decode new rmode for pten table
442	bne.b	not_rn		;if zero, it is RN
443	lea.l	PTENRN,a1	;load a1 with RN table base
444	bra.b	rmode		;exit decode
445not_rn:
446	lsr.b	#1,d3		;get lsb in carry
447	bcc.b	not_rp		;if carry clear, it is RM
448	lea.l	PTENRP,a1	;load a1 with RP table base
449	bra.b	rmode		;exit decode
450not_rp:
451	lea.l	PTENRM,a1	;load a1 with RM table base
452rmode:
453	clr.l	d3		;clr table index
454e_loop:
455	lsr.l	#1,d0		;shift next bit into carry
456	bcc.b	e_next		;if zero, skip the mul
457	fmul.x	(a1,d3),fp1	;mul by 10**(d3_bit_no)
458e_next:
459	add.l	#12,d3		;inc d3 to next pwrten table entry
460	tst.l	d0		;test if ISCALE is zero
461	bne.b	e_loop		;if not, loop
462
463
464* A8. Clr INEX; Force RZ.
465*     The operation in A3 above may have set INEX2.
466*     RZ mode is forced for the scaling operation to insure
467*     only one rounding error.  The grs bits are collected in
468*     the INEX flag for use in A10.
469*
470* Register usage:
471*	Input/Output
472
473	fmove.l	#0,FPSR		;clr INEX
474	fmove.l	#rz_mode,FPCR	;set RZ rounding mode
475
476
477* A9. Scale X -> Y.
478*     The mantissa is scaled to the desired number of significant
479*     digits.  The excess digits are collected in INEX2. If mul,
480*     Check d2 for excess 10 exponential value.  If not zero,
481*     the iscale value would have caused the pwrten calculation
482*     to overflow.  Only a negative iscale can cause this, so
483*     multiply by 10^(d2), which is now only allowed to be 24,
484*     with a multiply by 10^8 and 10^16, which is exact since
485*     10^24 is exact.  If the input was denormalized, we must
486*     create a busy stack frame with the mul command and the
487*     two operands, and allow the fpu to complete the multiply.
488*
489* Register usage:
490*	Input/Output
491*	d0: FPCR with RZ mode/Unchanged
492*	d2: 0 or 24/unchanged
493*	d3: x/x
494*	d4: LEN/Unchanged
495*	d5: ICTR:LAMBDA
496*	d6: ILOG/Unchanged
497*	d7: k-factor/Unchanged
498*	a0: ptr for original operand/final result
499*	a1: ptr to PTENRM array/Unchanged
500*	a2: x/x
501*	fp0: float(ILOG)/X adjusted for SCALE (Y)
502*	fp1: 10^ISCALE/Unchanged
503*	fp2: x/x
504*	F_SCR1:x/x
505*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
506*	L_SCR1:x/x
507*	L_SCR2:first word of X packed/Unchanged
508
509A9_str:
510	fmove.x	(a0),fp0	;load X from memory
511	fabs.x	fp0		;use abs(X)
512	tst.w	d5		;LAMBDA is in lower word of d5
513	bne.b	sc_mul		;if neg (LAMBDA = 1), scale by mul
514	fdiv.x	fp1,fp0		;calculate X / SCALE -> Y to fp0
515	bra.b	A10_st		;branch to A10
516
517sc_mul:
518	tst.b	BINDEC_FLG(a6)	;check for denorm
519	beq.b	A9_norm		;if norm, continue with mul
520	fmovem.x fp1,-(a7)	;load ETEMP with 10^ISCALE
521	move.l	8(a0),-(a7)	;load FPTEMP with input arg
522	move.l	4(a0),-(a7)
523	move.l	(a0),-(a7)
524	move.l	#18,d3		;load count for busy stack
525A9_loop:
526	clr.l	-(a7)		;clear lword on stack
527	dbf.w	d3,A9_loop
528	move.b	VER_TMP(a6),(a7) ;write current version number
529	move.b	#BUSY_SIZE-4,1(a7) ;write current busy size
530	move.b	#$10,$44(a7)	;set fcefpte[15] bit
531	move.w	#$0023,$40(a7)	;load cmdreg1b with mul command
532	move.b	#$fe,$8(a7)	;load all 1s to cu savepc
533	frestore (a7)+		;restore frame to fpu for completion
534	fmul.x	36(a1),fp0	;multiply fp0 by 10^8
535	fmul.x	48(a1),fp0	;multiply fp0 by 10^16
536	bra.b	A10_st
537A9_norm:
538	tst.w	d2		;test for small exp case
539	beq.b	A9_con		;if zero, continue as normal
540	fmul.x	36(a1),fp0	;multiply fp0 by 10^8
541	fmul.x	48(a1),fp0	;multiply fp0 by 10^16
542A9_con:
543	fmul.x	fp1,fp0		;calculate X * SCALE -> Y to fp0
544
545
546* A10. Or in INEX.
547*      If INEX is set, round error occured.  This is compensated
548*      for by 'or-ing' in the INEX2 flag to the lsb of Y.
549*
550* Register usage:
551*	Input/Output
552*	d0: FPCR with RZ mode/FPSR with INEX2 isolated
553*	d2: x/x
554*	d3: x/x
555*	d4: LEN/Unchanged
556*	d5: ICTR:LAMBDA
557*	d6: ILOG/Unchanged
558*	d7: k-factor/Unchanged
559*	a0: ptr for original operand/final result
560*	a1: ptr to PTENxx array/Unchanged
561*	a2: x/ptr to FP_SCR2(a6)
562*	fp0: Y/Y with lsb adjusted
563*	fp1: 10^ISCALE/Unchanged
564*	fp2: x/x
565
566A10_st:
567	fmove.l	FPSR,d0		;get FPSR
568	fmove.x	fp0,FP_SCR2(a6)	;move Y to memory
569	lea.l	FP_SCR2(a6),a2	;load a2 with ptr to FP_SCR2
570	btst.l	#9,d0		;check if INEX2 set
571	beq.b	A11_st		;if clear, skip rest
572	ori.l	#1,8(a2)	;or in 1 to lsb of mantissa
573	fmove.x	FP_SCR2(a6),fp0	;write adjusted Y back to fpu
574
575
576* A11. Restore original FPCR; set size ext.
577*      Perform FINT operation in the user's rounding mode.  Keep
578*      the size to extended.  The sintdo entry point in the sint
579*      routine expects the FPCR value to be in USER_FPCR for
580*      mode and precision.  The original FPCR is saved in L_SCR1.
581
582A11_st:
583	move.l	USER_FPCR(a6),L_SCR1(a6) ;save it for later
584	andi.l	#$00000030,USER_FPCR(a6) ;set size to ext,
585*					;block exceptions
586
587
588* A12. Calculate YINT = FINT(Y) according to user's rounding mode.
589*      The FPSP routine sintd0 is used.  The output is in fp0.
590*
591* Register usage:
592*	Input/Output
593*	d0: FPSR with AINEX cleared/FPCR with size set to ext
594*	d2: x/x/scratch
595*	d3: x/x
596*	d4: LEN/Unchanged
597*	d5: ICTR:LAMBDA/Unchanged
598*	d6: ILOG/Unchanged
599*	d7: k-factor/Unchanged
600*	a0: ptr for original operand/src ptr for sintdo
601*	a1: ptr to PTENxx array/Unchanged
602*	a2: ptr to FP_SCR2(a6)/Unchanged
603*	a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
604*	fp0: Y/YINT
605*	fp1: 10^ISCALE/Unchanged
606*	fp2: x/x
607*	F_SCR1:x/x
608*	F_SCR2:Y adjusted for inex/Y with original exponent
609*	L_SCR1:x/original USER_FPCR
610*	L_SCR2:first word of X packed/Unchanged
611
612A12_st:
613	movem.l	d0-d1/a0-a1,-(a7)	;save regs used by sintd0
614	move.l	L_SCR1(a6),-(a7)
615	move.l	L_SCR2(a6),-(a7)
616	lea.l	FP_SCR2(a6),a0		;a0 is ptr to F_SCR2(a6)
617	fmove.x	fp0,(a0)		;move Y to memory at FP_SCR2(a6)
618	tst.l	L_SCR2(a6)		;test sign of original operand
619	bge.b	do_fint			;if pos, use Y
620	or.l	#$80000000,(a0)		;if neg, use -Y
621do_fint:
622	move.l	USER_FPSR(a6),-(a7)
623	bsr	sintdo			;sint routine returns int in fp0
624	move.b	(a7),USER_FPSR(a6)
625	add.l	#4,a7
626	move.l	(a7)+,L_SCR2(a6)
627	move.l	(a7)+,L_SCR1(a6)
628	movem.l	(a7)+,d0-d1/a0-a1	;restore regs used by sint
629	move.l	L_SCR2(a6),FP_SCR2(a6)	;restore original exponent
630	move.l	L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
631
632
633* A13. Check for LEN digits.
634*      If the int operation results in more than LEN digits,
635*      or less than LEN -1 digits, adjust ILOG and repeat from
636*      A6.  This test occurs only on the first pass.  If the
637*      result is exactly 10^LEN, decrement ILOG and divide
638*      the mantissa by 10.  The calculation of 10^LEN cannot
639*      be inexact, since all powers of ten upto 10^27 are exact
640*      in extended precision, so the use of a previous power-of-ten
641*      table will introduce no error.
642*
643*
644* Register usage:
645*	Input/Output
646*	d0: FPCR with size set to ext/scratch final = 0
647*	d2: x/x
648*	d3: x/scratch final = x
649*	d4: LEN/LEN adjusted
650*	d5: ICTR:LAMBDA/LAMBDA:ICTR
651*	d6: ILOG/ILOG adjusted
652*	d7: k-factor/Unchanged
653*	a0: pointer into memory for packed bcd string formation
654*	a1: ptr to PTENxx array/Unchanged
655*	a2: ptr to FP_SCR2(a6)/Unchanged
656*	fp0: int portion of Y/abs(YINT) adjusted
657*	fp1: 10^ISCALE/Unchanged
658*	fp2: x/10^LEN
659*	F_SCR1:x/x
660*	F_SCR2:Y with original exponent/Unchanged
661*	L_SCR1:original USER_FPCR/Unchanged
662*	L_SCR2:first word of X packed/Unchanged
663
664A13_st:
665	swap	d5		;put ICTR in lower word of d5
666	tst.w	d5		;check if ICTR = 0
667	bne	not_zr		;if non-zero, go to second test
668*
669* Compute 10^(LEN-1)
670*
671	fmove.s	FONE,fp2	;init fp2 to 1.0
672	move.l	d4,d0		;put LEN in d0
673	subq.l	#1,d0		;d0 = LEN -1
674	clr.l	d3		;clr table index
675l_loop:
676	lsr.l	#1,d0		;shift next bit into carry
677	bcc.b	l_next		;if zero, skip the mul
678	fmul.x	(a1,d3),fp2	;mul by 10**(d3_bit_no)
679l_next:
680	add.l	#12,d3		;inc d3 to next pwrten table entry
681	tst.l	d0		;test if LEN is zero
682	bne.b	l_loop		;if not, loop
683*
684* 10^LEN-1 is computed for this test and A14.  If the input was
685* denormalized, check only the case in which YINT > 10^LEN.
686*
687	tst.b	BINDEC_FLG(a6)	;check if input was norm
688	beq.b	A13_con		;if norm, continue with checking
689	fabs.x	fp0		;take abs of YINT
690	bra	test_2
691*
692* Compare abs(YINT) to 10^(LEN-1) and 10^LEN
693*
694A13_con:
695	fabs.x	fp0		;take abs of YINT
696	fcmp.x	fp2,fp0		;compare abs(YINT) with 10^(LEN-1)
697	fbge.w	test_2		;if greater, do next test
698	subq.l	#1,d6		;subtract 1 from ILOG
699	move.w	#1,d5		;set ICTR
700	fmove.l	#rm_mode,FPCR	;set rmode to RM
701	fmul.s	FTEN,fp2	;compute 10^LEN
702	bra.w	A6_str		;return to A6 and recompute YINT
703test_2:
704	fmul.s	FTEN,fp2	;compute 10^LEN
705	fcmp.x	fp2,fp0		;compare abs(YINT) with 10^LEN
706	fblt.w	A14_st		;if less, all is ok, go to A14
707	fbgt.w	fix_ex		;if greater, fix and redo
708	fdiv.s	FTEN,fp0	;if equal, divide by 10
709	addq.l	#1,d6		; and inc ILOG
710	bra.b	A14_st		; and continue elsewhere
711fix_ex:
712	addq.l	#1,d6		;increment ILOG by 1
713	move.w	#1,d5		;set ICTR
714	fmove.l	#rm_mode,FPCR	;set rmode to RM
715	bra.w	A6_str		;return to A6 and recompute YINT
716*
717* Since ICTR <> 0, we have already been through one adjustment,
718* and shouldn't have another; this is to check if abs(YINT) = 10^LEN
719* 10^LEN is again computed using whatever table is in a1 since the
720* value calculated cannot be inexact.
721*
722not_zr:
723	fmove.s	FONE,fp2	;init fp2 to 1.0
724	move.l	d4,d0		;put LEN in d0
725	clr.l	d3		;clr table index
726z_loop:
727	lsr.l	#1,d0		;shift next bit into carry
728	bcc.b	z_next		;if zero, skip the mul
729	fmul.x	(a1,d3),fp2	;mul by 10**(d3_bit_no)
730z_next:
731	add.l	#12,d3		;inc d3 to next pwrten table entry
732	tst.l	d0		;test if LEN is zero
733	bne.b	z_loop		;if not, loop
734	fabs.x	fp0		;get abs(YINT)
735	fcmp.x	fp2,fp0		;check if abs(YINT) = 10^LEN
736	fbne.w	A14_st		;if not, skip this
737	fdiv.s	FTEN,fp0	;divide abs(YINT) by 10
738	addq.l	#1,d6		;and inc ILOG by 1
739	addq.l	#1,d4		; and inc LEN
740	fmul.s	FTEN,fp2	; if LEN++, the get 10^^LEN
741
742
743* A14. Convert the mantissa to bcd.
744*      The binstr routine is used to convert the LEN digit
745*      mantissa to bcd in memory.  The input to binstr is
746*      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
747*      such that the decimal point is to the left of bit 63.
748*      The bcd digits are stored in the correct position in
749*      the final string area in memory.
750*
751*
752* Register usage:
753*	Input/Output
754*	d0: x/LEN call to binstr - final is 0
755*	d1: x/0
756*	d2: x/ms 32-bits of mant of abs(YINT)
757*	d3: x/ls 32-bits of mant of abs(YINT)
758*	d4: LEN/Unchanged
759*	d5: ICTR:LAMBDA/LAMBDA:ICTR
760*	d6: ILOG
761*	d7: k-factor/Unchanged
762*	a0: pointer into memory for packed bcd string formation
763*	    /ptr to first mantissa byte in result string
764*	a1: ptr to PTENxx array/Unchanged
765*	a2: ptr to FP_SCR2(a6)/Unchanged
766*	fp0: int portion of Y/abs(YINT) adjusted
767*	fp1: 10^ISCALE/Unchanged
768*	fp2: 10^LEN/Unchanged
769*	F_SCR1:x/Work area for final result
770*	F_SCR2:Y with original exponent/Unchanged
771*	L_SCR1:original USER_FPCR/Unchanged
772*	L_SCR2:first word of X packed/Unchanged
773
774A14_st:
775	fmove.l	#rz_mode,FPCR	;force rz for conversion
776	fdiv.x	fp2,fp0		;divide abs(YINT) by 10^LEN
777	lea.l	FP_SCR1(a6),a0
778	fmove.x	fp0,(a0)	;move abs(YINT)/10^LEN to memory
779	move.l	4(a0),d2	;move 2nd word of FP_RES to d2
780	move.l	8(a0),d3	;move 3rd word of FP_RES to d3
781	clr.l	4(a0)		;zero word 2 of FP_RES
782	clr.l	8(a0)		;zero word 3 of FP_RES
783	move.l	(a0),d0		;move exponent to d0
784	swap	d0		;put exponent in lower word
785	beq.b	no_sft		;if zero, don't shift
786	subi.l	#$3ffd,d0	;sub bias less 2 to make fract
787	tst.l	d0		;check if > 1
788	bgt.b	no_sft		;if so, don't shift
789	neg.l	d0		;make exp positive
790m_loop:
791	lsr.l	#1,d2		;shift d2:d3 right, add 0s
792	roxr.l	#1,d3		;the number of places
793	dbf.w	d0,m_loop	;given in d0
794no_sft:
795	tst.l	d2		;check for mantissa of zero
796	bne.b	no_zr		;if not, go on
797	tst.l	d3		;continue zero check
798	beq.b	zer_m		;if zero, go directly to binstr
799no_zr:
800	clr.l	d1		;put zero in d1 for addx
801	addi.l	#$00000080,d3	;inc at bit 7
802	addx.l	d1,d2		;continue inc
803	andi.l	#$ffffff80,d3	;strip off lsb not used by 882
804zer_m:
805	move.l	d4,d0		;put LEN in d0 for binstr call
806	addq.l	#3,a0		;a0 points to M16 byte in result
807	bsr	binstr		;call binstr to convert mant
808
809
810* A15. Convert the exponent to bcd.
811*      As in A14 above, the exp is converted to bcd and the
812*      digits are stored in the final string.
813*
814*      Digits are stored in L_SCR1(a6) on return from BINDEC as:
815*
816*  	 32               16 15                0
817*	-----------------------------------------
818*  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
819*	-----------------------------------------
820*
821* And are moved into their proper places in FP_SCR1.  If digit e4
822* is non-zero, OPERR is signaled.  In all cases, all 4 digits are
823* written as specified in the 881/882 manual for packed decimal.
824*
825* Register usage:
826*	Input/Output
827*	d0: x/LEN call to binstr - final is 0
828*	d1: x/scratch (0);shift count for final exponent packing
829*	d2: x/ms 32-bits of exp fraction/scratch
830*	d3: x/ls 32-bits of exp fraction
831*	d4: LEN/Unchanged
832*	d5: ICTR:LAMBDA/LAMBDA:ICTR
833*	d6: ILOG
834*	d7: k-factor/Unchanged
835*	a0: ptr to result string/ptr to L_SCR1(a6)
836*	a1: ptr to PTENxx array/Unchanged
837*	a2: ptr to FP_SCR2(a6)/Unchanged
838*	fp0: abs(YINT) adjusted/float(ILOG)
839*	fp1: 10^ISCALE/Unchanged
840*	fp2: 10^LEN/Unchanged
841*	F_SCR1:Work area for final result/BCD result
842*	F_SCR2:Y with original exponent/ILOG/10^4
843*	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
844*	L_SCR2:first word of X packed/Unchanged
845
846A15_st:
847	tst.b	BINDEC_FLG(a6)	;check for denorm
848	beq.b	not_denorm
849	ftst.x	fp0		;test for zero
850	fbeq.w	den_zero	;if zero, use k-factor or 4933
851	fmove.l	d6,fp0		;float ILOG
852	fabs.x	fp0		;get abs of ILOG
853	bra.b	convrt
854den_zero:
855	tst.l	d7		;check sign of the k-factor
856	blt.b	use_ilog	;if negative, use ILOG
857	fmove.s	F4933,fp0	;force exponent to 4933
858	bra.b	convrt		;do it
859use_ilog:
860	fmove.l	d6,fp0		;float ILOG
861	fabs.x	fp0		;get abs of ILOG
862	bra.b	convrt
863not_denorm:
864	ftst.x	fp0		;test for zero
865	fbne.w	not_zero	;if zero, force exponent
866	fmove.s	FONE,fp0	;force exponent to 1
867	bra.b	convrt		;do it
868not_zero:
869	fmove.l	d6,fp0		;float ILOG
870	fabs.x	fp0		;get abs of ILOG
871convrt:
872	fdiv.x	24(a1),fp0	;compute ILOG/10^4
873	fmove.x	fp0,FP_SCR2(a6)	;store fp0 in memory
874	move.l	4(a2),d2	;move word 2 to d2
875	move.l	8(a2),d3	;move word 3 to d3
876	move.w	(a2),d0		;move exp to d0
877	beq.b	x_loop_fin	;if zero, skip the shift
878	subi.w	#$3ffd,d0	;subtract off bias
879	neg.w	d0		;make exp positive
880x_loop:
881	lsr.l	#1,d2		;shift d2:d3 right
882	roxr.l	#1,d3		;the number of places
883	dbf.w	d0,x_loop	;given in d0
884x_loop_fin:
885	clr.l	d1		;put zero in d1 for addx
886	addi.l	#$00000080,d3	;inc at bit 6
887	addx.l	d1,d2		;continue inc
888	andi.l	#$ffffff80,d3	;strip off lsb not used by 882
889	move.l	#4,d0		;put 4 in d0 for binstr call
890	lea.l	L_SCR1(a6),a0	;a0 is ptr to L_SCR1 for exp digits
891	bsr	binstr		;call binstr to convert exp
892	move.l	L_SCR1(a6),d0	;load L_SCR1 lword to d0
893	move.l	#12,d1		;use d1 for shift count
894	lsr.l	d1,d0		;shift d0 right by 12
895	bfins	d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
896	lsr.l	d1,d0		;shift d0 right by 12
897	bfins	d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
898	tst.b	d0		;check if e4 is zero
899	beq.b	A16_st		;if zero, skip rest
900	or.l	#opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
901
902
903* A16. Write sign bits to final string.
904*	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
905*
906* Register usage:
907*	Input/Output
908*	d0: x/scratch - final is x
909*	d2: x/x
910*	d3: x/x
911*	d4: LEN/Unchanged
912*	d5: ICTR:LAMBDA/LAMBDA:ICTR
913*	d6: ILOG/ILOG adjusted
914*	d7: k-factor/Unchanged
915*	a0: ptr to L_SCR1(a6)/Unchanged
916*	a1: ptr to PTENxx array/Unchanged
917*	a2: ptr to FP_SCR2(a6)/Unchanged
918*	fp0: float(ILOG)/Unchanged
919*	fp1: 10^ISCALE/Unchanged
920*	fp2: 10^LEN/Unchanged
921*	F_SCR1:BCD result with correct signs
922*	F_SCR2:ILOG/10^4
923*	L_SCR1:Exponent digits on return from binstr
924*	L_SCR2:first word of X packed/Unchanged
925
926A16_st:
927	clr.l	d0		;clr d0 for collection of signs
928	andi.b	#$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
929	tst.l	L_SCR2(a6)	;check sign of original mantissa
930	bge.b	mant_p		;if pos, don't set SM
931	moveq.l	#2,d0		;move 2 in to d0 for SM
932mant_p:
933	tst.l	d6		;check sign of ILOG
934	bge.b	wr_sgn		;if pos, don't set SE
935	addq.l	#1,d0		;set bit 0 in d0 for SE
936wr_sgn:
937	bfins	d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
938
939* Clean up and restore all registers used.
940
941	fmove.l	#0,FPSR		;clear possible inex2/ainex bits
942	fmovem.x (a7)+,fp0-fp2
943	movem.l	(a7)+,d2-d7/a2
944	rts
945
946	end
947