xref: /onnv-gate/usr/src/common/util/i386/muldiv.s (revision 436:c9ab97f06761)
1*436Sdmick/*
2*436Sdmick * CDDL HEADER START
3*436Sdmick *
4*436Sdmick * The contents of this file are subject to the terms of the
5*436Sdmick * Common Development and Distribution License, Version 1.0 only
6*436Sdmick * (the "License").  You may not use this file except in compliance
7*436Sdmick * with the License.
8*436Sdmick *
9*436Sdmick * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*436Sdmick * or http://www.opensolaris.org/os/licensing.
11*436Sdmick * See the License for the specific language governing permissions
12*436Sdmick * and limitations under the License.
13*436Sdmick *
14*436Sdmick * When distributing Covered Code, include this CDDL HEADER in each
15*436Sdmick * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*436Sdmick * If applicable, add the following below this CDDL HEADER, with the
17*436Sdmick * fields enclosed by brackets "[]" replaced with your own identifying
18*436Sdmick * information: Portions Copyright [yyyy] [name of copyright owner]
19*436Sdmick *
20*436Sdmick * CDDL HEADER END
21*436Sdmick */
22*436Sdmick/*
23*436Sdmick * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*436Sdmick * Use is subject to license terms.
25*436Sdmick */
26*436Sdmick
27*436Sdmick#if !defined(lint)
28*436Sdmick	.ident	"%Z%%M%	%I%	%E% SMI"
29*436Sdmick
30*436Sdmick	.file	"muldiv.s"
31*436Sdmick#endif
32*436Sdmick
33*436Sdmick#if defined(__i386) && !defined(__amd64)
34*436Sdmick
35*436Sdmick/*
36*436Sdmick * Helper routines for 32-bit compilers to perform 64-bit math.
37*436Sdmick * These are used both by the Sun and GCC compilers.
38*436Sdmick */
39*436Sdmick
40*436Sdmick#include <sys/asm_linkage.h>
41*436Sdmick#include <sys/asm_misc.h>
42*436Sdmick
43*436Sdmick
44*436Sdmick#if defined(__lint)
45*436Sdmick#include <sys/types.h>
46*436Sdmick
47*436Sdmick/* ARGSUSED */
48*436Sdmickint64_t
49*436Sdmick__mul64(int64_t a, int64_t b)
50*436Sdmick{
51*436Sdmick	return (0);
52*436Sdmick}
53*436Sdmick
54*436Sdmick#else   /* __lint */
55*436Sdmick
56*436Sdmick/
57*436Sdmick/   function __mul64(A,B:Longint):Longint;
58*436Sdmick/	{Overflow is not checked}
59*436Sdmick/
60*436Sdmick/ We essentially do multiply by longhand, using base 2**32 digits.
61*436Sdmick/               a       b	parameter A
62*436Sdmick/	     x 	c       d	parameter B
63*436Sdmick/		---------
64*436Sdmick/               ad      bd
65*436Sdmick/       ac	bc
66*436Sdmick/       -----------------
67*436Sdmick/       ac	ad+bc	bd
68*436Sdmick/
69*436Sdmick/       We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened.
70*436Sdmick/
71*436Sdmick	ENTRY(__mul64)
72*436Sdmick	push	%ebp
73*436Sdmick	mov    	%esp,%ebp
74*436Sdmick	pushl	%esi
75*436Sdmick	mov	12(%ebp),%eax	/ A.hi (a)
76*436Sdmick	mull	16(%ebp)	/ Multiply A.hi by B.lo (produces ad)
77*436Sdmick	xchg	%ecx,%eax	/ ecx = bottom half of ad.
78*436Sdmick	movl    8(%ebp),%eax	/ A.Lo (b)
79*436Sdmick	movl	%eax,%esi	/ Save A.lo for later
80*436Sdmick	mull	16(%ebp)	/ Multiply A.Lo by B.LO (dx:ax = bd.)
81*436Sdmick	addl	%edx,%ecx	/ cx is ad
82*436Sdmick	xchg	%eax,%esi       / esi is bd, eax = A.lo (d)
83*436Sdmick	mull	20(%ebp)	/ Multiply A.lo * B.hi (producing bc)
84*436Sdmick	addl	%ecx,%eax	/ Produce ad+bc
85*436Sdmick	movl	%esi,%edx
86*436Sdmick	xchg	%eax,%edx
87*436Sdmick	popl	%esi
88*436Sdmick	movl	%ebp,%esp
89*436Sdmick	popl	%ebp
90*436Sdmick	ret     $16
91*436Sdmick	SET_SIZE(__mul64)
92*436Sdmick
93*436Sdmick#endif	/* __lint */
94*436Sdmick
95*436Sdmick/*
96*436Sdmick * C support for 64-bit modulo and division.
97*436Sdmick * Hand-customized compiler output - see comments for details.
98*436Sdmick */
99*436Sdmick#if defined(__lint)
100*436Sdmick
101*436Sdmick/* ARGSUSED */
102*436Sdmickuint64_t
103*436Sdmick__udiv64(uint64_t a, uint64_t b)
104*436Sdmick{ return (0); }
105*436Sdmick
106*436Sdmick/* ARGSUSED */
107*436Sdmickuint64_t
108*436Sdmick__urem64(int64_t a, int64_t b)
109*436Sdmick{ return (0); }
110*436Sdmick
111*436Sdmick/* ARGSUSED */
112*436Sdmickint64_t
113*436Sdmick__div64(int64_t a, int64_t b)
114*436Sdmick{ return (0); }
115*436Sdmick
116*436Sdmick/* ARGSUSED */
117*436Sdmickint64_t
118*436Sdmick__rem64(int64_t a, int64_t b)
119*436Sdmick{ return (0); }
120*436Sdmick
121*436Sdmick#else	/* __lint */
122*436Sdmick
123*436Sdmick/ /*
124*436Sdmick/  * Unsigned division with remainder.
125*436Sdmick/  * Divide two uint64_ts, and calculate remainder.
126*436Sdmick/  */
127*436Sdmick/ uint64_t
128*436Sdmick/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
129*436Sdmick/ {
130*436Sdmick/ 	/* simple cases: y is a single uint32_t */
131*436Sdmick/ 	if (HI(y) == 0) {
132*436Sdmick/ 		uint32_t	div_hi, div_rem;
133*436Sdmick/ 		uint32_t 	q0, q1;
134*436Sdmick/
135*436Sdmick/ 		/* calculate q1 */
136*436Sdmick/ 		if (HI(x) < LO(y)) {
137*436Sdmick/ 			/* result is a single uint32_t, use one division */
138*436Sdmick/ 			q1 = 0;
139*436Sdmick/ 			div_hi = HI(x);
140*436Sdmick/ 		} else {
141*436Sdmick/ 			/* result is a double uint32_t, use two divisions */
142*436Sdmick/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
143*436Sdmick/ 		}
144*436Sdmick/
145*436Sdmick/ 		/* calculate q0 and remainder */
146*436Sdmick/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
147*436Sdmick/
148*436Sdmick/ 		/* return remainder */
149*436Sdmick/ 		*pmod = div_rem;
150*436Sdmick/
151*436Sdmick/ 		/* return result */
152*436Sdmick/ 		return (HILO(q1, q0));
153*436Sdmick/
154*436Sdmick/ 	} else if (HI(x) < HI(y)) {
155*436Sdmick/ 		/* HI(x) < HI(y) => x < y => result is 0 */
156*436Sdmick/
157*436Sdmick/ 		/* return remainder */
158*436Sdmick/ 		*pmod = x;
159*436Sdmick/
160*436Sdmick/ 		/* return result */
161*436Sdmick/ 		return (0);
162*436Sdmick/
163*436Sdmick/ 	} else {
164*436Sdmick/ 		/*
165*436Sdmick/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
166*436Sdmick/ 		 * result
167*436Sdmick/ 		 */
168*436Sdmick/ 		uint32_t		y0, y1;
169*436Sdmick/ 		uint32_t		x1, x0;
170*436Sdmick/ 		uint32_t		q0;
171*436Sdmick/ 		uint32_t		normshift;
172*436Sdmick/
173*436Sdmick/ 		/* normalize by shifting x and y so MSB(y) == 1 */
174*436Sdmick/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
175*436Sdmick/ 		normshift = 31 - normshift;
176*436Sdmick/
177*436Sdmick/ 		if (normshift == 0) {
178*436Sdmick/ 			/* no shifting needed, and x < 2*y so q <= 1 */
179*436Sdmick/ 			y1 = HI(y);
180*436Sdmick/ 			y0 = LO(y);
181*436Sdmick/ 			x1 = HI(x);
182*436Sdmick/ 			x0 = LO(x);
183*436Sdmick/
184*436Sdmick/ 			/* if x >= y then q = 1 (note x1 >= y1) */
185*436Sdmick/ 			if (x1 > y1 || x0 >= y0) {
186*436Sdmick/ 				q0 = 1;
187*436Sdmick/ 				/* subtract y from x to get remainder */
188*436Sdmick/ 				A_SUB2(y0, y1, x0, x1);
189*436Sdmick/ 			} else {
190*436Sdmick/ 				q0 = 0;
191*436Sdmick/ 			}
192*436Sdmick/
193*436Sdmick/ 			/* return remainder */
194*436Sdmick/ 			*pmod = HILO(x1, x0);
195*436Sdmick/
196*436Sdmick/ 			/* return result */
197*436Sdmick/ 			return (q0);
198*436Sdmick/
199*436Sdmick/ 		} else {
200*436Sdmick/ 			/*
201*436Sdmick/ 			 * the last case: result is one uint32_t, but we need to
202*436Sdmick/ 			 * normalize
203*436Sdmick/ 			 */
204*436Sdmick/ 			uint64_t	dt;
205*436Sdmick/ 			uint32_t		t0, t1, x2;
206*436Sdmick/
207*436Sdmick/ 			/* normalize y */
208*436Sdmick/ 			dt = (y << normshift);
209*436Sdmick/ 			y1 = HI(dt);
210*436Sdmick/ 			y0 = LO(dt);
211*436Sdmick/
212*436Sdmick/ 			/* normalize x (we need 3 uint32_ts!!!) */
213*436Sdmick/ 			x2 = (HI(x) >> (32 - normshift));
214*436Sdmick/ 			dt = (x << normshift);
215*436Sdmick/ 			x1 = HI(dt);
216*436Sdmick/ 			x0 = LO(dt);
217*436Sdmick/
218*436Sdmick/ 			/* estimate q0, and reduce x to a two uint32_t value */
219*436Sdmick/ 			A_DIV32(x1, x2, y1, q0, x1);
220*436Sdmick/
221*436Sdmick/ 			/* adjust q0 down if too high */
222*436Sdmick/ 			/*
223*436Sdmick/ 			 * because of the limited range of x2 we can only be
224*436Sdmick/ 			 * one off
225*436Sdmick/ 			 */
226*436Sdmick/ 			A_MUL32(y0, q0, t0, t1);
227*436Sdmick/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
228*436Sdmick/ 				q0--;
229*436Sdmick/ 				A_SUB2(y0, y1, t0, t1);
230*436Sdmick/ 			}
231*436Sdmick/ 			/* return remainder */
232*436Sdmick/ 			/* subtract product from x to get remainder */
233*436Sdmick/ 			A_SUB2(t0, t1, x0, x1);
234*436Sdmick/ 			*pmod = (HILO(x1, x0) >> normshift);
235*436Sdmick/
236*436Sdmick/ 			/* return result */
237*436Sdmick/ 			return (q0);
238*436Sdmick/ 		}
239*436Sdmick/ 	}
240*436Sdmick/ }
241*436Sdmick	ENTRY(UDivRem)
242*436Sdmick	pushl	%ebp
243*436Sdmick	pushl	%edi
244*436Sdmick	pushl	%esi
245*436Sdmick	subl	$48, %esp
246*436Sdmick	movl	68(%esp), %edi	/ y,
247*436Sdmick	testl	%edi, %edi	/ tmp63
248*436Sdmick	movl	%eax, 40(%esp)	/ x, x
249*436Sdmick	movl	%edx, 44(%esp)	/ x, x
250*436Sdmick	movl	%edi, %esi	/, tmp62
251*436Sdmick	movl	%edi, %ecx	/ tmp62, tmp63
252*436Sdmick	jne	.LL2
253*436Sdmick	movl	%edx, %eax	/, tmp68
254*436Sdmick	cmpl	64(%esp), %eax	/ y, tmp68
255*436Sdmick	jae	.LL21
256*436Sdmick.LL4:
257*436Sdmick	movl	72(%esp), %ebp	/ pmod,
258*436Sdmick	xorl	%esi, %esi	/ <result>
259*436Sdmick	movl	40(%esp), %eax	/ x, q0
260*436Sdmick	movl	%ecx, %edi	/ <result>, <result>
261*436Sdmick	divl	64(%esp)	/ y
262*436Sdmick	movl	%edx, (%ebp)	/ div_rem,
263*436Sdmick	xorl	%edx, %edx	/ q0
264*436Sdmick	addl	%eax, %esi	/ q0, <result>
265*436Sdmick	movl	$0, 4(%ebp)
266*436Sdmick	adcl	%edx, %edi	/ q0, <result>
267*436Sdmick	addl	$48, %esp
268*436Sdmick	movl	%esi, %eax	/ <result>, <result>
269*436Sdmick	popl	%esi
270*436Sdmick	movl	%edi, %edx	/ <result>, <result>
271*436Sdmick	popl	%edi
272*436Sdmick	popl	%ebp
273*436Sdmick	ret
274*436Sdmick	.align	16
275*436Sdmick.LL2:
276*436Sdmick	movl	44(%esp), %eax	/ x,
277*436Sdmick	xorl	%edx, %edx
278*436Sdmick	cmpl	%esi, %eax	/ tmp62, tmp5
279*436Sdmick	movl	%eax, 32(%esp)	/ tmp5,
280*436Sdmick	movl	%edx, 36(%esp)
281*436Sdmick	jae	.LL6
282*436Sdmick	movl	72(%esp), %esi	/ pmod,
283*436Sdmick	movl	40(%esp), %ebp	/ x,
284*436Sdmick	movl	44(%esp), %ecx	/ x,
285*436Sdmick	movl	%ebp, (%esi)
286*436Sdmick	movl	%ecx, 4(%esi)
287*436Sdmick	xorl	%edi, %edi	/ <result>
288*436Sdmick	xorl	%esi, %esi	/ <result>
289*436Sdmick.LL22:
290*436Sdmick	addl	$48, %esp
291*436Sdmick	movl	%esi, %eax	/ <result>, <result>
292*436Sdmick	popl	%esi
293*436Sdmick	movl	%edi, %edx	/ <result>, <result>
294*436Sdmick	popl	%edi
295*436Sdmick	popl	%ebp
296*436Sdmick	ret
297*436Sdmick	.align	16
298*436Sdmick.LL21:
299*436Sdmick	movl	%edi, %edx	/ tmp63, div_hi
300*436Sdmick	divl	64(%esp)	/ y
301*436Sdmick	movl	%eax, %ecx	/, q1
302*436Sdmick	jmp	.LL4
303*436Sdmick	.align	16
304*436Sdmick.LL6:
305*436Sdmick	movl	$31, %edi	/, tmp87
306*436Sdmick	bsrl	%esi,%edx	/ tmp62, normshift
307*436Sdmick	subl	%edx, %edi	/ normshift, tmp87
308*436Sdmick	movl	%edi, 28(%esp)	/ tmp87,
309*436Sdmick	jne	.LL8
310*436Sdmick	movl	32(%esp), %edx	/, x1
311*436Sdmick	cmpl	%ecx, %edx	/ y1, x1
312*436Sdmick	movl	64(%esp), %edi	/ y, y0
313*436Sdmick	movl	40(%esp), %esi	/ x, x0
314*436Sdmick	ja	.LL10
315*436Sdmick	xorl	%ebp, %ebp	/ q0
316*436Sdmick	cmpl	%edi, %esi	/ y0, x0
317*436Sdmick	jb	.LL11
318*436Sdmick.LL10:
319*436Sdmick	movl	$1, %ebp	/, q0
320*436Sdmick	subl	%edi,%esi	/ y0, x0
321*436Sdmick	sbbl	%ecx,%edx	/ tmp63, x1
322*436Sdmick.LL11:
323*436Sdmick	movl	%edx, %ecx	/ x1, x1
324*436Sdmick	xorl	%edx, %edx	/ x1
325*436Sdmick	xorl	%edi, %edi	/ x0
326*436Sdmick	addl	%esi, %edx	/ x0, x1
327*436Sdmick	adcl	%edi, %ecx	/ x0, x1
328*436Sdmick	movl	72(%esp), %esi	/ pmod,
329*436Sdmick	movl	%edx, (%esi)	/ x1,
330*436Sdmick	movl	%ecx, 4(%esi)	/ x1,
331*436Sdmick	xorl	%edi, %edi	/ <result>
332*436Sdmick	movl	%ebp, %esi	/ q0, <result>
333*436Sdmick	jmp	.LL22
334*436Sdmick	.align	16
335*436Sdmick.LL8:
336*436Sdmick	movb	28(%esp), %cl
337*436Sdmick	movl	64(%esp), %esi	/ y, dt
338*436Sdmick	movl	68(%esp), %edi	/ y, dt
339*436Sdmick	shldl	%esi, %edi	/, dt, dt
340*436Sdmick	sall	%cl, %esi	/, dt
341*436Sdmick	andl	$32, %ecx
342*436Sdmick	jne	.LL23
343*436Sdmick.LL17:
344*436Sdmick	movl	$32, %ecx	/, tmp102
345*436Sdmick	subl	28(%esp), %ecx	/, tmp102
346*436Sdmick	movl	%esi, %ebp	/ dt, y0
347*436Sdmick	movl	32(%esp), %esi
348*436Sdmick	shrl	%cl, %esi	/ tmp102,
349*436Sdmick	movl	%edi, 24(%esp)	/ tmp99,
350*436Sdmick	movb	28(%esp), %cl
351*436Sdmick	movl	%esi, 12(%esp)	/, x2
352*436Sdmick	movl	44(%esp), %edi	/ x, dt
353*436Sdmick	movl	40(%esp), %esi	/ x, dt
354*436Sdmick	shldl	%esi, %edi	/, dt, dt
355*436Sdmick	sall	%cl, %esi	/, dt
356*436Sdmick	andl	$32, %ecx
357*436Sdmick	je	.LL18
358*436Sdmick	movl	%esi, %edi	/ dt, dt
359*436Sdmick	xorl	%esi, %esi	/ dt
360*436Sdmick.LL18:
361*436Sdmick	movl	%edi, %ecx	/ dt,
362*436Sdmick	movl	%edi, %eax	/ tmp2,
363*436Sdmick	movl	%ecx, (%esp)
364*436Sdmick	movl	12(%esp), %edx	/ x2,
365*436Sdmick	divl	24(%esp)
366*436Sdmick	movl	%edx, %ecx	/, x1
367*436Sdmick	xorl	%edi, %edi
368*436Sdmick	movl	%eax, 20(%esp)
369*436Sdmick	movl	%ebp, %eax	/ y0, t0
370*436Sdmick	mull	20(%esp)
371*436Sdmick	cmpl	%ecx, %edx	/ x1, t1
372*436Sdmick	movl	%edi, 4(%esp)
373*436Sdmick	ja	.LL14
374*436Sdmick	je	.LL24
375*436Sdmick.LL15:
376*436Sdmick	movl	%ecx, %edi	/ x1,
377*436Sdmick	subl	%eax,%esi	/ t0, x0
378*436Sdmick	sbbl	%edx,%edi	/ t1,
379*436Sdmick	movl	%edi, %eax	/, x1
380*436Sdmick	movl	%eax, %edx	/ x1, x1
381*436Sdmick	xorl	%eax, %eax	/ x1
382*436Sdmick	xorl	%ebp, %ebp	/ x0
383*436Sdmick	addl	%esi, %eax	/ x0, x1
384*436Sdmick	adcl	%ebp, %edx	/ x0, x1
385*436Sdmick	movb	28(%esp), %cl
386*436Sdmick	shrdl	%edx, %eax	/, x1, x1
387*436Sdmick	shrl	%cl, %edx	/, x1
388*436Sdmick	andl	$32, %ecx
389*436Sdmick	je	.LL16
390*436Sdmick	movl	%edx, %eax	/ x1, x1
391*436Sdmick	xorl	%edx, %edx	/ x1
392*436Sdmick.LL16:
393*436Sdmick	movl	72(%esp), %ecx	/ pmod,
394*436Sdmick	movl	20(%esp), %esi	/, <result>
395*436Sdmick	xorl	%edi, %edi	/ <result>
396*436Sdmick	movl	%eax, (%ecx)	/ x1,
397*436Sdmick	movl	%edx, 4(%ecx)	/ x1,
398*436Sdmick	jmp	.LL22
399*436Sdmick	.align	16
400*436Sdmick.LL24:
401*436Sdmick	cmpl	%esi, %eax	/ x0, t0
402*436Sdmick	jbe	.LL15
403*436Sdmick.LL14:
404*436Sdmick	decl	20(%esp)
405*436Sdmick	subl	%ebp,%eax	/ y0, t0
406*436Sdmick	sbbl	24(%esp),%edx	/, t1
407*436Sdmick	jmp	.LL15
408*436Sdmick.LL23:
409*436Sdmick	movl	%esi, %edi	/ dt, dt
410*436Sdmick	xorl	%esi, %esi	/ dt
411*436Sdmick	jmp	.LL17
412*436Sdmick	SET_SIZE(UDivRem)
413*436Sdmick
414*436Sdmick/*
415*436Sdmick * Unsigned division without remainder.
416*436Sdmick */
417*436Sdmick/ uint64_t
418*436Sdmick/ UDiv(uint64_t x, uint64_t y)
419*436Sdmick/ {
420*436Sdmick/ 	if (HI(y) == 0) {
421*436Sdmick/ 		/* simple cases: y is a single uint32_t */
422*436Sdmick/ 		uint32_t	div_hi, div_rem;
423*436Sdmick/ 		uint32_t	q0, q1;
424*436Sdmick/
425*436Sdmick/ 		/* calculate q1 */
426*436Sdmick/ 		if (HI(x) < LO(y)) {
427*436Sdmick/ 			/* result is a single uint32_t, use one division */
428*436Sdmick/ 			q1 = 0;
429*436Sdmick/ 			div_hi = HI(x);
430*436Sdmick/ 		} else {
431*436Sdmick/ 			/* result is a double uint32_t, use two divisions */
432*436Sdmick/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
433*436Sdmick/ 		}
434*436Sdmick/
435*436Sdmick/ 		/* calculate q0 and remainder */
436*436Sdmick/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
437*436Sdmick/
438*436Sdmick/ 		/* return result */
439*436Sdmick/ 		return (HILO(q1, q0));
440*436Sdmick/
441*436Sdmick/ 	} else if (HI(x) < HI(y)) {
442*436Sdmick/ 		/* HI(x) < HI(y) => x < y => result is 0 */
443*436Sdmick/
444*436Sdmick/ 		/* return result */
445*436Sdmick/ 		return (0);
446*436Sdmick/
447*436Sdmick/ 	} else {
448*436Sdmick/ 		/*
449*436Sdmick/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
450*436Sdmick/ 		 * result
451*436Sdmick/ 		 */
452*436Sdmick/ 		uint32_t		y0, y1;
453*436Sdmick/ 		uint32_t		x1, x0;
454*436Sdmick/ 		uint32_t		q0;
455*436Sdmick/ 		unsigned		normshift;
456*436Sdmick/
457*436Sdmick/ 		/* normalize by shifting x and y so MSB(y) == 1 */
458*436Sdmick/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
459*436Sdmick/ 		normshift = 31 - normshift;
460*436Sdmick/
461*436Sdmick/ 		if (normshift == 0) {
462*436Sdmick/ 			/* no shifting needed, and x < 2*y so q <= 1 */
463*436Sdmick/ 			y1 = HI(y);
464*436Sdmick/ 			y0 = LO(y);
465*436Sdmick/ 			x1 = HI(x);
466*436Sdmick/ 			x0 = LO(x);
467*436Sdmick/
468*436Sdmick/ 			/* if x >= y then q = 1 (note x1 >= y1) */
469*436Sdmick/ 			if (x1 > y1 || x0 >= y0) {
470*436Sdmick/ 				q0 = 1;
471*436Sdmick/ 				/* subtract y from x to get remainder */
472*436Sdmick/ 				/* A_SUB2(y0, y1, x0, x1); */
473*436Sdmick/ 			} else {
474*436Sdmick/ 				q0 = 0;
475*436Sdmick/ 			}
476*436Sdmick/
477*436Sdmick/ 			/* return result */
478*436Sdmick/ 			return (q0);
479*436Sdmick/
480*436Sdmick/ 		} else {
481*436Sdmick/ 			/*
482*436Sdmick/ 			 * the last case: result is one uint32_t, but we need to
483*436Sdmick/ 			 * normalize
484*436Sdmick/ 			 */
485*436Sdmick/ 			uint64_t	dt;
486*436Sdmick/ 			uint32_t		t0, t1, x2;
487*436Sdmick/
488*436Sdmick/ 			/* normalize y */
489*436Sdmick/ 			dt = (y << normshift);
490*436Sdmick/ 			y1 = HI(dt);
491*436Sdmick/ 			y0 = LO(dt);
492*436Sdmick/
493*436Sdmick/ 			/* normalize x (we need 3 uint32_ts!!!) */
494*436Sdmick/ 			x2 = (HI(x) >> (32 - normshift));
495*436Sdmick/ 			dt = (x << normshift);
496*436Sdmick/ 			x1 = HI(dt);
497*436Sdmick/ 			x0 = LO(dt);
498*436Sdmick/
499*436Sdmick/ 			/* estimate q0, and reduce x to a two uint32_t value */
500*436Sdmick/ 			A_DIV32(x1, x2, y1, q0, x1);
501*436Sdmick/
502*436Sdmick/ 			/* adjust q0 down if too high */
503*436Sdmick/ 			/*
504*436Sdmick/ 			 * because of the limited range of x2 we can only be
505*436Sdmick/ 			 * one off
506*436Sdmick/ 			 */
507*436Sdmick/ 			A_MUL32(y0, q0, t0, t1);
508*436Sdmick/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
509*436Sdmick/ 				q0--;
510*436Sdmick/ 			}
511*436Sdmick/ 			/* return result */
512*436Sdmick/ 			return (q0);
513*436Sdmick/ 		}
514*436Sdmick/ 	}
515*436Sdmick/ }
516*436Sdmick	ENTRY(UDiv)
517*436Sdmick	pushl	%ebp
518*436Sdmick	pushl	%edi
519*436Sdmick	pushl	%esi
520*436Sdmick	subl	$40, %esp
521*436Sdmick	movl	%edx, 36(%esp)	/ x, x
522*436Sdmick	movl	60(%esp), %edx	/ y,
523*436Sdmick	testl	%edx, %edx	/ tmp62
524*436Sdmick	movl	%eax, 32(%esp)	/ x, x
525*436Sdmick	movl	%edx, %ecx	/ tmp61, tmp62
526*436Sdmick	movl	%edx, %eax	/, tmp61
527*436Sdmick	jne	.LL26
528*436Sdmick	movl	36(%esp), %esi	/ x,
529*436Sdmick	cmpl	56(%esp), %esi	/ y, tmp67
530*436Sdmick	movl	%esi, %eax	/, tmp67
531*436Sdmick	movl	%esi, %edx	/ tmp67, div_hi
532*436Sdmick	jb	.LL28
533*436Sdmick	movl	%ecx, %edx	/ tmp62, div_hi
534*436Sdmick	divl	56(%esp)	/ y
535*436Sdmick	movl	%eax, %ecx	/, q1
536*436Sdmick.LL28:
537*436Sdmick	xorl	%esi, %esi	/ <result>
538*436Sdmick	movl	%ecx, %edi	/ <result>, <result>
539*436Sdmick	movl	32(%esp), %eax	/ x, q0
540*436Sdmick	xorl	%ecx, %ecx	/ q0
541*436Sdmick	divl	56(%esp)	/ y
542*436Sdmick	addl	%eax, %esi	/ q0, <result>
543*436Sdmick	adcl	%ecx, %edi	/ q0, <result>
544*436Sdmick.LL25:
545*436Sdmick	addl	$40, %esp
546*436Sdmick	movl	%esi, %eax	/ <result>, <result>
547*436Sdmick	popl	%esi
548*436Sdmick	movl	%edi, %edx	/ <result>, <result>
549*436Sdmick	popl	%edi
550*436Sdmick	popl	%ebp
551*436Sdmick	ret
552*436Sdmick	.align	16
553*436Sdmick.LL26:
554*436Sdmick	movl	36(%esp), %esi	/ x,
555*436Sdmick	xorl	%edi, %edi
556*436Sdmick	movl	%esi, 24(%esp)	/ tmp1,
557*436Sdmick	movl	%edi, 28(%esp)
558*436Sdmick	xorl	%esi, %esi	/ <result>
559*436Sdmick	xorl	%edi, %edi	/ <result>
560*436Sdmick	cmpl	%eax, 24(%esp)	/ tmp61,
561*436Sdmick	jb	.LL25
562*436Sdmick	bsrl	%eax,%ebp	/ tmp61, normshift
563*436Sdmick	movl	$31, %eax	/, tmp85
564*436Sdmick	subl	%ebp, %eax	/ normshift, normshift
565*436Sdmick	jne	.LL32
566*436Sdmick	movl	24(%esp), %eax	/, x1
567*436Sdmick	cmpl	%ecx, %eax	/ tmp62, x1
568*436Sdmick	movl	56(%esp), %esi	/ y, y0
569*436Sdmick	movl	32(%esp), %edx	/ x, x0
570*436Sdmick	ja	.LL34
571*436Sdmick	xorl	%eax, %eax	/ q0
572*436Sdmick	cmpl	%esi, %edx	/ y0, x0
573*436Sdmick	jb	.LL35
574*436Sdmick.LL34:
575*436Sdmick	movl	$1, %eax	/, q0
576*436Sdmick.LL35:
577*436Sdmick	movl	%eax, %esi	/ q0, <result>
578*436Sdmick	xorl	%edi, %edi	/ <result>
579*436Sdmick.LL45:
580*436Sdmick	addl	$40, %esp
581*436Sdmick	movl	%esi, %eax	/ <result>, <result>
582*436Sdmick	popl	%esi
583*436Sdmick	movl	%edi, %edx	/ <result>, <result>
584*436Sdmick	popl	%edi
585*436Sdmick	popl	%ebp
586*436Sdmick	ret
587*436Sdmick	.align	16
588*436Sdmick.LL32:
589*436Sdmick	movb	%al, %cl
590*436Sdmick	movl	56(%esp), %esi	/ y,
591*436Sdmick	movl	60(%esp), %edi	/ y,
592*436Sdmick	shldl	%esi, %edi
593*436Sdmick	sall	%cl, %esi
594*436Sdmick	andl	$32, %ecx
595*436Sdmick	jne	.LL43
596*436Sdmick.LL40:
597*436Sdmick	movl	$32, %ecx	/, tmp96
598*436Sdmick	subl	%eax, %ecx	/ normshift, tmp96
599*436Sdmick	movl	%edi, %edx
600*436Sdmick	movl	%edi, 20(%esp)	/, dt
601*436Sdmick	movl	24(%esp), %ebp	/, x2
602*436Sdmick	xorl	%edi, %edi
603*436Sdmick	shrl	%cl, %ebp	/ tmp96, x2
604*436Sdmick	movl	%esi, 16(%esp)	/, dt
605*436Sdmick	movb	%al, %cl
606*436Sdmick	movl	32(%esp), %esi	/ x, dt
607*436Sdmick	movl	%edi, 12(%esp)
608*436Sdmick	movl	36(%esp), %edi	/ x, dt
609*436Sdmick	shldl	%esi, %edi	/, dt, dt
610*436Sdmick	sall	%cl, %esi	/, dt
611*436Sdmick	andl	$32, %ecx
612*436Sdmick	movl	%edx, 8(%esp)
613*436Sdmick	je	.LL41
614*436Sdmick	movl	%esi, %edi	/ dt, dt
615*436Sdmick	xorl	%esi, %esi	/ dt
616*436Sdmick.LL41:
617*436Sdmick	xorl	%ecx, %ecx
618*436Sdmick	movl	%edi, %eax	/ tmp1,
619*436Sdmick	movl	%ebp, %edx	/ x2,
620*436Sdmick	divl	8(%esp)
621*436Sdmick	movl	%edx, %ebp	/, x1
622*436Sdmick	movl	%ecx, 4(%esp)
623*436Sdmick	movl	%eax, %ecx	/, q0
624*436Sdmick	movl	16(%esp), %eax	/ dt,
625*436Sdmick	mull	%ecx	/ q0
626*436Sdmick	cmpl	%ebp, %edx	/ x1, t1
627*436Sdmick	movl	%edi, (%esp)
628*436Sdmick	movl	%esi, %edi	/ dt, x0
629*436Sdmick	ja	.LL38
630*436Sdmick	je	.LL44
631*436Sdmick.LL39:
632*436Sdmick	movl	%ecx, %esi	/ q0, <result>
633*436Sdmick.LL46:
634*436Sdmick	xorl	%edi, %edi	/ <result>
635*436Sdmick	jmp	.LL45
636*436Sdmick.LL44:
637*436Sdmick	cmpl	%edi, %eax	/ x0, t0
638*436Sdmick	jbe	.LL39
639*436Sdmick.LL38:
640*436Sdmick	decl	%ecx		/ q0
641*436Sdmick	movl	%ecx, %esi	/ q0, <result>
642*436Sdmick	jmp	.LL46
643*436Sdmick.LL43:
644*436Sdmick	movl	%esi, %edi
645*436Sdmick	xorl	%esi, %esi
646*436Sdmick	jmp	.LL40
647*436Sdmick	SET_SIZE(UDiv)
648*436Sdmick
649*436Sdmick/*
650*436Sdmick * __udiv64
651*436Sdmick *
652*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the
653*436Sdmick * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
654*436Sdmick */
655*436Sdmick	ENTRY(__udiv64)
656*436Sdmick	movl	4(%esp), %eax	/ x, x
657*436Sdmick	movl	8(%esp), %edx	/ x, x
658*436Sdmick	pushl	16(%esp)	/ y
659*436Sdmick	pushl	16(%esp)
660*436Sdmick	call	UDiv
661*436Sdmick	addl	$8, %esp
662*436Sdmick	ret     $16
663*436Sdmick	SET_SIZE(__udiv64)
664*436Sdmick
665*436Sdmick/*
666*436Sdmick * __urem64
667*436Sdmick *
668*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the
669*436Sdmick * remainder in %edx:%eax.  __urem64 pops the arguments on return
670*436Sdmick */
671*436Sdmick	ENTRY(__urem64)
672*436Sdmick	subl	$12, %esp
673*436Sdmick	movl	%esp, %ecx	/, tmp65
674*436Sdmick	movl	16(%esp), %eax	/ x, x
675*436Sdmick	movl	20(%esp), %edx	/ x, x
676*436Sdmick	pushl	%ecx		/ tmp65
677*436Sdmick	pushl	32(%esp)	/ y
678*436Sdmick	pushl	32(%esp)
679*436Sdmick	call	UDivRem
680*436Sdmick	movl	12(%esp), %eax	/ rem, rem
681*436Sdmick	movl	16(%esp), %edx	/ rem, rem
682*436Sdmick	addl	$24, %esp
683*436Sdmick	ret	$16
684*436Sdmick	SET_SIZE(__urem64)
685*436Sdmick
686*436Sdmick/*
687*436Sdmick * __div64
688*436Sdmick *
689*436Sdmick * Perform division of two signed 64-bit quantities, returning the
690*436Sdmick * quotient in %edx:%eax.  __div64 pops the arguments on return.
691*436Sdmick */
692*436Sdmick/ int64_t
693*436Sdmick/ __div64(int64_t x, int64_t y)
694*436Sdmick/ {
695*436Sdmick/ 	int		negative;
696*436Sdmick/ 	uint64_t	xt, yt, r;
697*436Sdmick/
698*436Sdmick/ 	if (x < 0) {
699*436Sdmick/ 		xt = -(uint64_t) x;
700*436Sdmick/ 		negative = 1;
701*436Sdmick/ 	} else {
702*436Sdmick/ 		xt = x;
703*436Sdmick/ 		negative = 0;
704*436Sdmick/ 	}
705*436Sdmick/ 	if (y < 0) {
706*436Sdmick/ 		yt = -(uint64_t) y;
707*436Sdmick/ 		negative ^= 1;
708*436Sdmick/ 	} else {
709*436Sdmick/ 		yt = y;
710*436Sdmick/ 	}
711*436Sdmick/ 	r = UDiv(xt, yt);
712*436Sdmick/ 	return (negative ? (int64_t) - r : r);
713*436Sdmick/ }
714*436Sdmick	ENTRY(__div64)
715*436Sdmick	pushl	%ebp
716*436Sdmick	pushl	%edi
717*436Sdmick	pushl	%esi
718*436Sdmick	subl	$8, %esp
719*436Sdmick	movl	28(%esp), %edx	/ x, x
720*436Sdmick	testl	%edx, %edx	/ x
721*436Sdmick	movl	24(%esp), %eax	/ x, x
722*436Sdmick	movl	32(%esp), %esi	/ y, y
723*436Sdmick	movl	36(%esp), %edi	/ y, y
724*436Sdmick	js	.LL84
725*436Sdmick	xorl	%ebp, %ebp	/ negative
726*436Sdmick	testl	%edi, %edi	/ y
727*436Sdmick	movl	%eax, (%esp)	/ x, xt
728*436Sdmick	movl	%edx, 4(%esp)	/ x, xt
729*436Sdmick	movl	%esi, %eax	/ y, yt
730*436Sdmick	movl	%edi, %edx	/ y, yt
731*436Sdmick	js	.LL85
732*436Sdmick.LL82:
733*436Sdmick	pushl	%edx		/ yt
734*436Sdmick	pushl	%eax		/ yt
735*436Sdmick	movl	8(%esp), %eax	/ xt, xt
736*436Sdmick	movl	12(%esp), %edx	/ xt, xt
737*436Sdmick	call	UDiv
738*436Sdmick	popl	%ecx
739*436Sdmick	testl	%ebp, %ebp	/ negative
740*436Sdmick	popl	%esi
741*436Sdmick	je	.LL83
742*436Sdmick	negl	%eax		/ r
743*436Sdmick	adcl	$0, %edx	/, r
744*436Sdmick	negl	%edx		/ r
745*436Sdmick.LL83:
746*436Sdmick	addl	$8, %esp
747*436Sdmick	popl	%esi
748*436Sdmick	popl	%edi
749*436Sdmick	popl	%ebp
750*436Sdmick	ret	$16
751*436Sdmick	.align	16
752*436Sdmick.LL84:
753*436Sdmick	negl	%eax		/ x
754*436Sdmick	adcl	$0, %edx	/, x
755*436Sdmick	negl	%edx		/ x
756*436Sdmick	testl	%edi, %edi	/ y
757*436Sdmick	movl	%eax, (%esp)	/ x, xt
758*436Sdmick	movl	%edx, 4(%esp)	/ x, xt
759*436Sdmick	movl	$1, %ebp	/, negative
760*436Sdmick	movl	%esi, %eax	/ y, yt
761*436Sdmick	movl	%edi, %edx	/ y, yt
762*436Sdmick	jns	.LL82
763*436Sdmick	.align	16
764*436Sdmick.LL85:
765*436Sdmick	negl	%eax		/ yt
766*436Sdmick	adcl	$0, %edx	/, yt
767*436Sdmick	negl	%edx		/ yt
768*436Sdmick	xorl	$1, %ebp	/, negative
769*436Sdmick	jmp	.LL82
770*436Sdmick	SET_SIZE(__div64)
771*436Sdmick
772*436Sdmick/*
773*436Sdmick * __rem64
774*436Sdmick *
775*436Sdmick * Perform division of two signed 64-bit quantities, returning the
776*436Sdmick * remainder in %edx:%eax.  __rem64 pops the arguments on return.
777*436Sdmick */
778*436Sdmick/ int64_t
779*436Sdmick/ __rem64(int64_t x, int64_t y)
780*436Sdmick/ {
781*436Sdmick/ 	uint64_t	xt, yt, rem;
782*436Sdmick/
783*436Sdmick/ 	if (x < 0) {
784*436Sdmick/ 		xt = -(uint64_t) x;
785*436Sdmick/ 	} else {
786*436Sdmick/ 		xt = x;
787*436Sdmick/ 	}
788*436Sdmick/ 	if (y < 0) {
789*436Sdmick/ 		yt = -(uint64_t) y;
790*436Sdmick/ 	} else {
791*436Sdmick/ 		yt = y;
792*436Sdmick/ 	}
793*436Sdmick/ 	(void) UDivRem(xt, yt, &rem);
794*436Sdmick/ 	return (x < 0 ? (int64_t) - rem : rem);
795*436Sdmick/ }
796*436Sdmick	ENTRY(__rem64)
797*436Sdmick	pushl	%edi
798*436Sdmick	pushl	%esi
799*436Sdmick	subl	$20, %esp
800*436Sdmick	movl	36(%esp), %ecx	/ x,
801*436Sdmick	movl	32(%esp), %esi	/ x,
802*436Sdmick	movl	36(%esp), %edi	/ x,
803*436Sdmick	testl	%ecx, %ecx
804*436Sdmick	movl	40(%esp), %eax	/ y, y
805*436Sdmick	movl	44(%esp), %edx	/ y, y
806*436Sdmick	movl	%esi, (%esp)	/, xt
807*436Sdmick	movl	%edi, 4(%esp)	/, xt
808*436Sdmick	js	.LL92
809*436Sdmick	testl	%edx, %edx	/ y
810*436Sdmick	movl	%eax, %esi	/ y, yt
811*436Sdmick	movl	%edx, %edi	/ y, yt
812*436Sdmick	js	.LL93
813*436Sdmick.LL90:
814*436Sdmick	leal	8(%esp), %eax	/, tmp66
815*436Sdmick	pushl	%eax		/ tmp66
816*436Sdmick	pushl	%edi		/ yt
817*436Sdmick	pushl	%esi		/ yt
818*436Sdmick	movl	12(%esp), %eax	/ xt, xt
819*436Sdmick	movl	16(%esp), %edx	/ xt, xt
820*436Sdmick	call	UDivRem
821*436Sdmick	addl	$12, %esp
822*436Sdmick	movl	36(%esp), %edi	/ x,
823*436Sdmick	testl	%edi, %edi
824*436Sdmick	movl	8(%esp), %eax	/ rem, rem
825*436Sdmick	movl	12(%esp), %edx	/ rem, rem
826*436Sdmick	js	.LL94
827*436Sdmick	addl	$20, %esp
828*436Sdmick	popl	%esi
829*436Sdmick	popl	%edi
830*436Sdmick	ret	$16
831*436Sdmick	.align	16
832*436Sdmick.LL92:
833*436Sdmick	negl	%esi
834*436Sdmick	adcl	$0, %edi
835*436Sdmick	negl	%edi
836*436Sdmick	testl	%edx, %edx	/ y
837*436Sdmick	movl	%esi, (%esp)	/, xt
838*436Sdmick	movl	%edi, 4(%esp)	/, xt
839*436Sdmick	movl	%eax, %esi	/ y, yt
840*436Sdmick	movl	%edx, %edi	/ y, yt
841*436Sdmick	jns	.LL90
842*436Sdmick	.align	16
843*436Sdmick.LL93:
844*436Sdmick	negl	%esi		/ yt
845*436Sdmick	adcl	$0, %edi	/, yt
846*436Sdmick	negl	%edi		/ yt
847*436Sdmick	jmp	.LL90
848*436Sdmick	.align	16
849*436Sdmick.LL94:
850*436Sdmick	negl	%eax		/ rem
851*436Sdmick	adcl	$0, %edx	/, rem
852*436Sdmick	addl	$20, %esp
853*436Sdmick	popl	%esi
854*436Sdmick	negl	%edx		/ rem
855*436Sdmick	popl	%edi
856*436Sdmick	ret	$16
857*436Sdmick	SET_SIZE(__rem64)
858*436Sdmick
859*436Sdmick#endif	/* __lint */
860*436Sdmick
861*436Sdmick#if defined(__lint)
862*436Sdmick
863*436Sdmick/*
864*436Sdmick * C support for 64-bit modulo and division.
865*436Sdmick * GNU routines callable from C (though generated by the compiler).
866*436Sdmick * Hand-customized compiler output - see comments for details.
867*436Sdmick */
868*436Sdmick/*ARGSUSED*/
869*436Sdmickunsigned long long
870*436Sdmick__udivdi3(unsigned long long a, unsigned long long b)
871*436Sdmick{ return (0); }
872*436Sdmick
873*436Sdmick/*ARGSUSED*/
874*436Sdmickunsigned long long
875*436Sdmick__umoddi3(unsigned long long a, unsigned long long b)
876*436Sdmick{ return (0); }
877*436Sdmick
878*436Sdmick/*ARGSUSED*/
879*436Sdmicklong long
880*436Sdmick__divdi3(long long a, long long b)
881*436Sdmick{ return (0); }
882*436Sdmick
883*436Sdmick/*ARGSUSED*/
884*436Sdmicklong long
885*436Sdmick__moddi3(long long a, long long b)
886*436Sdmick{ return (0); }
887*436Sdmick
888*436Sdmick/* ARGSUSED */
889*436Sdmickint64_t __divrem64(int64_t a, int64_t b)
890*436Sdmick{ return (0); }
891*436Sdmick
892*436Sdmick/* ARGSUSED */
893*436Sdmickuint64_t __udivrem64(uint64_t a, uint64_t b)
894*436Sdmick{ return (0); }
895*436Sdmick
896*436Sdmick#else	/* __lint */
897*436Sdmick
898*436Sdmick/*
899*436Sdmick * int32_t/int64_t division/manipulation
900*436Sdmick *
901*436Sdmick * Hand-customized compiler output: the non-GCC entry points depart from
902*436Sdmick * the SYS V ABI by requiring their arguments to be popped, and in the
903*436Sdmick * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
904*436Sdmick * compiler-generated use of %edx:%eax for the first argument of
905*436Sdmick * internal entry points.
906*436Sdmick *
907*436Sdmick * Inlines for speed:
908*436Sdmick * - counting the number of leading zeros in a word
909*436Sdmick * - multiplying two 32-bit numbers giving a 64-bit result
910*436Sdmick * - dividing a 64-bit number by a 32-bit number, giving both quotient
911*436Sdmick *	and remainder
912*436Sdmick * - subtracting two 64-bit results
913*436Sdmick */
914*436Sdmick/ #define	LO(X)		((uint32_t)(X) & 0xffffffff)
915*436Sdmick/ #define	HI(X)		((uint32_t)((X) >> 32) & 0xffffffff)
916*436Sdmick/ #define	HILO(H, L)	(((uint64_t)(H) << 32) + (L))
917*436Sdmick/
918*436Sdmick/ /* give index of highest bit */
919*436Sdmick/ #define	HIBIT(a, r) \
920*436Sdmick/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
921*436Sdmick/
922*436Sdmick/ /* multiply two uint32_ts resulting in a uint64_t */
923*436Sdmick/ #define	A_MUL32(a, b, lo, hi) \
924*436Sdmick/     asm("mull %2" \
925*436Sdmick/ 	: "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
926*436Sdmick/
927*436Sdmick/ /* divide a uint64_t by a uint32_t */
928*436Sdmick/ #define	A_DIV32(lo, hi, b, q, r) \
929*436Sdmick/     asm("divl %2" \
930*436Sdmick/ 	: "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
931*436Sdmick/ 	: "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
932*436Sdmick/
933*436Sdmick/ /* subtract two uint64_ts (with borrow) */
934*436Sdmick/ #define	A_SUB2(bl, bh, al, ah) \
935*436Sdmick/     asm("subl %4,%0\n\tsbbl %5,%1" \
936*436Sdmick/ 	: "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
937*436Sdmick/ 	: "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
938*436Sdmick/ 	"g"((uint32_t)(bh)))
939*436Sdmick
940*436Sdmick/*
941*436Sdmick * __udivdi3
942*436Sdmick *
943*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the
944*436Sdmick * quotient in %edx:%eax.
945*436Sdmick */
946*436Sdmick	ENTRY(__udivdi3)
947*436Sdmick	movl	4(%esp), %eax	/ x, x
948*436Sdmick	movl	8(%esp), %edx	/ x, x
949*436Sdmick	pushl	16(%esp)	/ y
950*436Sdmick	pushl	16(%esp)
951*436Sdmick	call	UDiv
952*436Sdmick	addl	$8, %esp
953*436Sdmick	ret
954*436Sdmick	SET_SIZE(__udivdi3)
955*436Sdmick
956*436Sdmick/*
957*436Sdmick * __umoddi3
958*436Sdmick *
959*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the
960*436Sdmick * remainder in %edx:%eax.
961*436Sdmick */
962*436Sdmick	ENTRY(__umoddi3)
963*436Sdmick	subl	$12, %esp
964*436Sdmick	movl	%esp, %ecx	/, tmp65
965*436Sdmick	movl	16(%esp), %eax	/ x, x
966*436Sdmick	movl	20(%esp), %edx	/ x, x
967*436Sdmick	pushl	%ecx		/ tmp65
968*436Sdmick	pushl	32(%esp)	/ y
969*436Sdmick	pushl	32(%esp)
970*436Sdmick	call	UDivRem
971*436Sdmick	movl	12(%esp), %eax	/ rem, rem
972*436Sdmick	movl	16(%esp), %edx	/ rem, rem
973*436Sdmick	addl	$24, %esp
974*436Sdmick	ret
975*436Sdmick	SET_SIZE(__umoddi3)
976*436Sdmick
977*436Sdmick/*
978*436Sdmick * __divdi3
979*436Sdmick *
980*436Sdmick * Perform division of two signed 64-bit quantities, returning the
981*436Sdmick * quotient in %edx:%eax.
982*436Sdmick */
983*436Sdmick/ int64_t
984*436Sdmick/ __divdi3(int64_t x, int64_t y)
985*436Sdmick/ {
986*436Sdmick/ 	int		negative;
987*436Sdmick/ 	uint64_t	xt, yt, r;
988*436Sdmick/
989*436Sdmick/ 	if (x < 0) {
990*436Sdmick/ 		xt = -(uint64_t) x;
991*436Sdmick/ 		negative = 1;
992*436Sdmick/ 	} else {
993*436Sdmick/ 		xt = x;
994*436Sdmick/ 		negative = 0;
995*436Sdmick/ 	}
996*436Sdmick/ 	if (y < 0) {
997*436Sdmick/ 		yt = -(uint64_t) y;
998*436Sdmick/ 		negative ^= 1;
999*436Sdmick/ 	} else {
1000*436Sdmick/ 		yt = y;
1001*436Sdmick/ 	}
1002*436Sdmick/ 	r = UDiv(xt, yt);
1003*436Sdmick/ 	return (negative ? (int64_t) - r : r);
1004*436Sdmick/ }
1005*436Sdmick	ENTRY(__divdi3)
1006*436Sdmick	pushl	%ebp
1007*436Sdmick	pushl	%edi
1008*436Sdmick	pushl	%esi
1009*436Sdmick	subl	$8, %esp
1010*436Sdmick	movl	28(%esp), %edx	/ x, x
1011*436Sdmick	testl	%edx, %edx	/ x
1012*436Sdmick	movl	24(%esp), %eax	/ x, x
1013*436Sdmick	movl	32(%esp), %esi	/ y, y
1014*436Sdmick	movl	36(%esp), %edi	/ y, y
1015*436Sdmick	js	.LL55
1016*436Sdmick	xorl	%ebp, %ebp	/ negative
1017*436Sdmick	testl	%edi, %edi	/ y
1018*436Sdmick	movl	%eax, (%esp)	/ x, xt
1019*436Sdmick	movl	%edx, 4(%esp)	/ x, xt
1020*436Sdmick	movl	%esi, %eax	/ y, yt
1021*436Sdmick	movl	%edi, %edx	/ y, yt
1022*436Sdmick	js	.LL56
1023*436Sdmick.LL53:
1024*436Sdmick	pushl	%edx		/ yt
1025*436Sdmick	pushl	%eax		/ yt
1026*436Sdmick	movl	8(%esp), %eax	/ xt, xt
1027*436Sdmick	movl	12(%esp), %edx	/ xt, xt
1028*436Sdmick	call	UDiv
1029*436Sdmick	popl	%ecx
1030*436Sdmick	testl	%ebp, %ebp	/ negative
1031*436Sdmick	popl	%esi
1032*436Sdmick	je	.LL54
1033*436Sdmick	negl	%eax		/ r
1034*436Sdmick	adcl	$0, %edx	/, r
1035*436Sdmick	negl	%edx		/ r
1036*436Sdmick.LL54:
1037*436Sdmick	addl	$8, %esp
1038*436Sdmick	popl	%esi
1039*436Sdmick	popl	%edi
1040*436Sdmick	popl	%ebp
1041*436Sdmick	ret
1042*436Sdmick	.align	16
1043*436Sdmick.LL55:
1044*436Sdmick	negl	%eax		/ x
1045*436Sdmick	adcl	$0, %edx	/, x
1046*436Sdmick	negl	%edx		/ x
1047*436Sdmick	testl	%edi, %edi	/ y
1048*436Sdmick	movl	%eax, (%esp)	/ x, xt
1049*436Sdmick	movl	%edx, 4(%esp)	/ x, xt
1050*436Sdmick	movl	$1, %ebp	/, negative
1051*436Sdmick	movl	%esi, %eax	/ y, yt
1052*436Sdmick	movl	%edi, %edx	/ y, yt
1053*436Sdmick	jns	.LL53
1054*436Sdmick	.align	16
1055*436Sdmick.LL56:
1056*436Sdmick	negl	%eax		/ yt
1057*436Sdmick	adcl	$0, %edx	/, yt
1058*436Sdmick	negl	%edx		/ yt
1059*436Sdmick	xorl	$1, %ebp	/, negative
1060*436Sdmick	jmp	.LL53
1061*436Sdmick	SET_SIZE(__divdi3)
1062*436Sdmick
1063*436Sdmick/*
1064*436Sdmick * __moddi3
1065*436Sdmick *
1066*436Sdmick * Perform division of two signed 64-bit quantities, returning the
1067*436Sdmick * quotient in %edx:%eax.
1068*436Sdmick */
1069*436Sdmick/ int64_t
1070*436Sdmick/ __moddi3(int64_t x, int64_t y)
1071*436Sdmick/ {
1072*436Sdmick/ 	uint64_t	xt, yt, rem;
1073*436Sdmick/
1074*436Sdmick/ 	if (x < 0) {
1075*436Sdmick/ 		xt = -(uint64_t) x;
1076*436Sdmick/ 	} else {
1077*436Sdmick/ 		xt = x;
1078*436Sdmick/ 	}
1079*436Sdmick/ 	if (y < 0) {
1080*436Sdmick/ 		yt = -(uint64_t) y;
1081*436Sdmick/ 	} else {
1082*436Sdmick/ 		yt = y;
1083*436Sdmick/ 	}
1084*436Sdmick/ 	(void) UDivRem(xt, yt, &rem);
1085*436Sdmick/ 	return (x < 0 ? (int64_t) - rem : rem);
1086*436Sdmick/ }
1087*436Sdmick	ENTRY(__moddi3)
1088*436Sdmick	pushl	%edi
1089*436Sdmick	pushl	%esi
1090*436Sdmick	subl	$20, %esp
1091*436Sdmick	movl	36(%esp), %ecx	/ x,
1092*436Sdmick	movl	32(%esp), %esi	/ x,
1093*436Sdmick	movl	36(%esp), %edi	/ x,
1094*436Sdmick	testl	%ecx, %ecx
1095*436Sdmick	movl	40(%esp), %eax	/ y, y
1096*436Sdmick	movl	44(%esp), %edx	/ y, y
1097*436Sdmick	movl	%esi, (%esp)	/, xt
1098*436Sdmick	movl	%edi, 4(%esp)	/, xt
1099*436Sdmick	js	.LL63
1100*436Sdmick	testl	%edx, %edx	/ y
1101*436Sdmick	movl	%eax, %esi	/ y, yt
1102*436Sdmick	movl	%edx, %edi	/ y, yt
1103*436Sdmick	js	.LL64
1104*436Sdmick.LL61:
1105*436Sdmick	leal	8(%esp), %eax	/, tmp66
1106*436Sdmick	pushl	%eax		/ tmp66
1107*436Sdmick	pushl	%edi		/ yt
1108*436Sdmick	pushl	%esi		/ yt
1109*436Sdmick	movl	12(%esp), %eax	/ xt, xt
1110*436Sdmick	movl	16(%esp), %edx	/ xt, xt
1111*436Sdmick	call	UDivRem
1112*436Sdmick	addl	$12, %esp
1113*436Sdmick	movl	36(%esp), %edi	/ x,
1114*436Sdmick	testl	%edi, %edi
1115*436Sdmick	movl	8(%esp), %eax	/ rem, rem
1116*436Sdmick	movl	12(%esp), %edx	/ rem, rem
1117*436Sdmick	js	.LL65
1118*436Sdmick	addl	$20, %esp
1119*436Sdmick	popl	%esi
1120*436Sdmick	popl	%edi
1121*436Sdmick	ret
1122*436Sdmick	.align	16
1123*436Sdmick.LL63:
1124*436Sdmick	negl	%esi
1125*436Sdmick	adcl	$0, %edi
1126*436Sdmick	negl	%edi
1127*436Sdmick	testl	%edx, %edx	/ y
1128*436Sdmick	movl	%esi, (%esp)	/, xt
1129*436Sdmick	movl	%edi, 4(%esp)	/, xt
1130*436Sdmick	movl	%eax, %esi	/ y, yt
1131*436Sdmick	movl	%edx, %edi	/ y, yt
1132*436Sdmick	jns	.LL61
1133*436Sdmick	.align	16
1134*436Sdmick.LL64:
1135*436Sdmick	negl	%esi		/ yt
1136*436Sdmick	adcl	$0, %edi	/, yt
1137*436Sdmick	negl	%edi		/ yt
1138*436Sdmick	jmp	.LL61
1139*436Sdmick	.align	16
1140*436Sdmick.LL65:
1141*436Sdmick	negl	%eax		/ rem
1142*436Sdmick	adcl	$0, %edx	/, rem
1143*436Sdmick	addl	$20, %esp
1144*436Sdmick	popl	%esi
1145*436Sdmick	negl	%edx		/ rem
1146*436Sdmick	popl	%edi
1147*436Sdmick	ret
1148*436Sdmick	SET_SIZE(__moddi3)
1149*436Sdmick
1150*436Sdmick/*
1151*436Sdmick * __udivrem64
1152*436Sdmick *
1153*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the
1154*436Sdmick * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
1155*436Sdmick * pops the arguments on return.
1156*436Sdmick */
1157*436Sdmick	ENTRY(__udivrem64)
1158*436Sdmick	subl	$12, %esp
1159*436Sdmick	movl	%esp, %ecx	/, tmp64
1160*436Sdmick	movl	16(%esp), %eax	/ x, x
1161*436Sdmick	movl	20(%esp), %edx	/ x, x
1162*436Sdmick	pushl	%ecx		/ tmp64
1163*436Sdmick	pushl	32(%esp)	/ y
1164*436Sdmick	pushl	32(%esp)
1165*436Sdmick	call	UDivRem
1166*436Sdmick	movl	16(%esp), %ecx	/ rem, tmp63
1167*436Sdmick	movl	12(%esp), %esi	/ rem
1168*436Sdmick	addl	$24, %esp
1169*436Sdmick	ret	$16
1170*436Sdmick	SET_SIZE(__udivrem64)
1171*436Sdmick
1172*436Sdmick/*
1173*436Sdmick * Signed division with remainder.
1174*436Sdmick */
1175*436Sdmick/ int64_t
1176*436Sdmick/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
1177*436Sdmick/ {
1178*436Sdmick/ 	int		negative;
1179*436Sdmick/ 	uint64_t	xt, yt, r, rem;
1180*436Sdmick/
1181*436Sdmick/ 	if (x < 0) {
1182*436Sdmick/ 		xt = -(uint64_t) x;
1183*436Sdmick/ 		negative = 1;
1184*436Sdmick/ 	} else {
1185*436Sdmick/ 		xt = x;
1186*436Sdmick/ 		negative = 0;
1187*436Sdmick/ 	}
1188*436Sdmick/ 	if (y < 0) {
1189*436Sdmick/ 		yt = -(uint64_t) y;
1190*436Sdmick/ 		negative ^= 1;
1191*436Sdmick/ 	} else {
1192*436Sdmick/ 		yt = y;
1193*436Sdmick/ 	}
1194*436Sdmick/ 	r = UDivRem(xt, yt, &rem);
1195*436Sdmick/ 	*pmod = (x < 0 ? (int64_t) - rem : rem);
1196*436Sdmick/ 	return (negative ? (int64_t) - r : r);
1197*436Sdmick/ }
1198*436Sdmick	ENTRY(SDivRem)
1199*436Sdmick	pushl	%ebp
1200*436Sdmick	pushl	%edi
1201*436Sdmick	pushl	%esi
1202*436Sdmick	subl	$24, %esp
1203*436Sdmick	testl	%edx, %edx	/ x
1204*436Sdmick	movl	%edx, %edi	/ x, x
1205*436Sdmick	js	.LL73
1206*436Sdmick	movl	44(%esp), %esi	/ y,
1207*436Sdmick	xorl	%ebp, %ebp	/ negative
1208*436Sdmick	testl	%esi, %esi
1209*436Sdmick	movl	%edx, 12(%esp)	/ x, xt
1210*436Sdmick	movl	%eax, 8(%esp)	/ x, xt
1211*436Sdmick	movl	40(%esp), %edx	/ y, yt
1212*436Sdmick	movl	44(%esp), %ecx	/ y, yt
1213*436Sdmick	js	.LL74
1214*436Sdmick.LL70:
1215*436Sdmick	leal	16(%esp), %eax	/, tmp70
1216*436Sdmick	pushl	%eax		/ tmp70
1217*436Sdmick	pushl	%ecx		/ yt
1218*436Sdmick	pushl	%edx		/ yt
1219*436Sdmick	movl	20(%esp), %eax	/ xt, xt
1220*436Sdmick	movl	24(%esp), %edx	/ xt, xt
1221*436Sdmick	call	UDivRem
1222*436Sdmick	movl	%edx, 16(%esp)	/, r
1223*436Sdmick	movl	%eax, 12(%esp)	/, r
1224*436Sdmick	addl	$12, %esp
1225*436Sdmick	testl	%edi, %edi	/ x
1226*436Sdmick	movl	16(%esp), %edx	/ rem, rem
1227*436Sdmick	movl	20(%esp), %ecx	/ rem, rem
1228*436Sdmick	js	.LL75
1229*436Sdmick.LL71:
1230*436Sdmick	movl	48(%esp), %edi	/ pmod, pmod
1231*436Sdmick	testl	%ebp, %ebp	/ negative
1232*436Sdmick	movl	%edx, (%edi)	/ rem,* pmod
1233*436Sdmick	movl	%ecx, 4(%edi)	/ rem,
1234*436Sdmick	movl	(%esp), %eax	/ r, r
1235*436Sdmick	movl	4(%esp), %edx	/ r, r
1236*436Sdmick	je	.LL72
1237*436Sdmick	negl	%eax		/ r
1238*436Sdmick	adcl	$0, %edx	/, r
1239*436Sdmick	negl	%edx		/ r
1240*436Sdmick.LL72:
1241*436Sdmick	addl	$24, %esp
1242*436Sdmick	popl	%esi
1243*436Sdmick	popl	%edi
1244*436Sdmick	popl	%ebp
1245*436Sdmick	ret
1246*436Sdmick	.align	16
1247*436Sdmick.LL73:
1248*436Sdmick	negl	%eax
1249*436Sdmick	adcl	$0, %edx
1250*436Sdmick	movl	44(%esp), %esi	/ y,
1251*436Sdmick	negl	%edx
1252*436Sdmick	testl	%esi, %esi
1253*436Sdmick	movl	%edx, 12(%esp)	/, xt
1254*436Sdmick	movl	%eax, 8(%esp)	/, xt
1255*436Sdmick	movl	$1, %ebp	/, negative
1256*436Sdmick	movl	40(%esp), %edx	/ y, yt
1257*436Sdmick	movl	44(%esp), %ecx	/ y, yt
1258*436Sdmick	jns	.LL70
1259*436Sdmick	.align	16
1260*436Sdmick.LL74:
1261*436Sdmick	negl	%edx		/ yt
1262*436Sdmick	adcl	$0, %ecx	/, yt
1263*436Sdmick	negl	%ecx		/ yt
1264*436Sdmick	xorl	$1, %ebp	/, negative
1265*436Sdmick	jmp	.LL70
1266*436Sdmick	.align	16
1267*436Sdmick.LL75:
1268*436Sdmick	negl	%edx		/ rem
1269*436Sdmick	adcl	$0, %ecx	/, rem
1270*436Sdmick	negl	%ecx		/ rem
1271*436Sdmick	jmp	.LL71
1272*436Sdmick	SET_SIZE(SDivRem)
1273*436Sdmick
1274*436Sdmick/*
1275*436Sdmick * __divrem64
1276*436Sdmick *
1277*436Sdmick * Perform division of two signed 64-bit quantities, returning the
1278*436Sdmick * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
1279*436Sdmick * pops the arguments on return.
1280*436Sdmick */
1281*436Sdmick	ENTRY(__divrem64)
1282*436Sdmick	subl	$20, %esp
1283*436Sdmick	movl	%esp, %ecx	/, tmp64
1284*436Sdmick	movl	24(%esp), %eax	/ x, x
1285*436Sdmick	movl	28(%esp), %edx	/ x, x
1286*436Sdmick	pushl	%ecx		/ tmp64
1287*436Sdmick	pushl	40(%esp)	/ y
1288*436Sdmick	pushl	40(%esp)
1289*436Sdmick	call	SDivRem
1290*436Sdmick	movl	16(%esp), %ecx
1291*436Sdmick	movl	12(%esp),%esi	/ rem
1292*436Sdmick	addl	$32, %esp
1293*436Sdmick	ret	$16
1294*436Sdmick	SET_SIZE(__divrem64)
1295*436Sdmick
1296*436Sdmick
1297*436Sdmick#endif /* __lint */
1298*436Sdmick
1299*436Sdmick#endif /* defined(__i386) && !defined(__amd64) */
1300