1*436Sdmick/* 2*436Sdmick * CDDL HEADER START 3*436Sdmick * 4*436Sdmick * The contents of this file are subject to the terms of the 5*436Sdmick * Common Development and Distribution License, Version 1.0 only 6*436Sdmick * (the "License"). You may not use this file except in compliance 7*436Sdmick * with the License. 8*436Sdmick * 9*436Sdmick * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*436Sdmick * or http://www.opensolaris.org/os/licensing. 11*436Sdmick * See the License for the specific language governing permissions 12*436Sdmick * and limitations under the License. 13*436Sdmick * 14*436Sdmick * When distributing Covered Code, include this CDDL HEADER in each 15*436Sdmick * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*436Sdmick * If applicable, add the following below this CDDL HEADER, with the 17*436Sdmick * fields enclosed by brackets "[]" replaced with your own identifying 18*436Sdmick * information: Portions Copyright [yyyy] [name of copyright owner] 19*436Sdmick * 20*436Sdmick * CDDL HEADER END 21*436Sdmick */ 22*436Sdmick/* 23*436Sdmick * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*436Sdmick * Use is subject to license terms. 25*436Sdmick */ 26*436Sdmick 27*436Sdmick#if !defined(lint) 28*436Sdmick .ident "%Z%%M% %I% %E% SMI" 29*436Sdmick 30*436Sdmick .file "muldiv.s" 31*436Sdmick#endif 32*436Sdmick 33*436Sdmick#if defined(__i386) && !defined(__amd64) 34*436Sdmick 35*436Sdmick/* 36*436Sdmick * Helper routines for 32-bit compilers to perform 64-bit math. 37*436Sdmick * These are used both by the Sun and GCC compilers. 38*436Sdmick */ 39*436Sdmick 40*436Sdmick#include <sys/asm_linkage.h> 41*436Sdmick#include <sys/asm_misc.h> 42*436Sdmick 43*436Sdmick 44*436Sdmick#if defined(__lint) 45*436Sdmick#include <sys/types.h> 46*436Sdmick 47*436Sdmick/* ARGSUSED */ 48*436Sdmickint64_t 49*436Sdmick__mul64(int64_t a, int64_t b) 50*436Sdmick{ 51*436Sdmick return (0); 52*436Sdmick} 53*436Sdmick 54*436Sdmick#else /* __lint */ 55*436Sdmick 56*436Sdmick/ 57*436Sdmick/ function __mul64(A,B:Longint):Longint; 58*436Sdmick/ {Overflow is not checked} 59*436Sdmick/ 60*436Sdmick/ We essentially do multiply by longhand, using base 2**32 digits. 61*436Sdmick/ a b parameter A 62*436Sdmick/ x c d parameter B 63*436Sdmick/ --------- 64*436Sdmick/ ad bd 65*436Sdmick/ ac bc 66*436Sdmick/ ----------------- 67*436Sdmick/ ac ad+bc bd 68*436Sdmick/ 69*436Sdmick/ We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened. 70*436Sdmick/ 71*436Sdmick ENTRY(__mul64) 72*436Sdmick push %ebp 73*436Sdmick mov %esp,%ebp 74*436Sdmick pushl %esi 75*436Sdmick mov 12(%ebp),%eax / A.hi (a) 76*436Sdmick mull 16(%ebp) / Multiply A.hi by B.lo (produces ad) 77*436Sdmick xchg %ecx,%eax / ecx = bottom half of ad. 78*436Sdmick movl 8(%ebp),%eax / A.Lo (b) 79*436Sdmick movl %eax,%esi / Save A.lo for later 80*436Sdmick mull 16(%ebp) / Multiply A.Lo by B.LO (dx:ax = bd.) 81*436Sdmick addl %edx,%ecx / cx is ad 82*436Sdmick xchg %eax,%esi / esi is bd, eax = A.lo (d) 83*436Sdmick mull 20(%ebp) / Multiply A.lo * B.hi (producing bc) 84*436Sdmick addl %ecx,%eax / Produce ad+bc 85*436Sdmick movl %esi,%edx 86*436Sdmick xchg %eax,%edx 87*436Sdmick popl %esi 88*436Sdmick movl %ebp,%esp 89*436Sdmick popl %ebp 90*436Sdmick ret $16 91*436Sdmick SET_SIZE(__mul64) 92*436Sdmick 93*436Sdmick#endif /* __lint */ 94*436Sdmick 95*436Sdmick/* 96*436Sdmick * C support for 64-bit modulo and division. 97*436Sdmick * Hand-customized compiler output - see comments for details. 98*436Sdmick */ 99*436Sdmick#if defined(__lint) 100*436Sdmick 101*436Sdmick/* ARGSUSED */ 102*436Sdmickuint64_t 103*436Sdmick__udiv64(uint64_t a, uint64_t b) 104*436Sdmick{ return (0); } 105*436Sdmick 106*436Sdmick/* ARGSUSED */ 107*436Sdmickuint64_t 108*436Sdmick__urem64(int64_t a, int64_t b) 109*436Sdmick{ return (0); } 110*436Sdmick 111*436Sdmick/* ARGSUSED */ 112*436Sdmickint64_t 113*436Sdmick__div64(int64_t a, int64_t b) 114*436Sdmick{ return (0); } 115*436Sdmick 116*436Sdmick/* ARGSUSED */ 117*436Sdmickint64_t 118*436Sdmick__rem64(int64_t a, int64_t b) 119*436Sdmick{ return (0); } 120*436Sdmick 121*436Sdmick#else /* __lint */ 122*436Sdmick 123*436Sdmick/ /* 124*436Sdmick/ * Unsigned division with remainder. 125*436Sdmick/ * Divide two uint64_ts, and calculate remainder. 126*436Sdmick/ */ 127*436Sdmick/ uint64_t 128*436Sdmick/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod) 129*436Sdmick/ { 130*436Sdmick/ /* simple cases: y is a single uint32_t */ 131*436Sdmick/ if (HI(y) == 0) { 132*436Sdmick/ uint32_t div_hi, div_rem; 133*436Sdmick/ uint32_t q0, q1; 134*436Sdmick/ 135*436Sdmick/ /* calculate q1 */ 136*436Sdmick/ if (HI(x) < LO(y)) { 137*436Sdmick/ /* result is a single uint32_t, use one division */ 138*436Sdmick/ q1 = 0; 139*436Sdmick/ div_hi = HI(x); 140*436Sdmick/ } else { 141*436Sdmick/ /* result is a double uint32_t, use two divisions */ 142*436Sdmick/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 143*436Sdmick/ } 144*436Sdmick/ 145*436Sdmick/ /* calculate q0 and remainder */ 146*436Sdmick/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 147*436Sdmick/ 148*436Sdmick/ /* return remainder */ 149*436Sdmick/ *pmod = div_rem; 150*436Sdmick/ 151*436Sdmick/ /* return result */ 152*436Sdmick/ return (HILO(q1, q0)); 153*436Sdmick/ 154*436Sdmick/ } else if (HI(x) < HI(y)) { 155*436Sdmick/ /* HI(x) < HI(y) => x < y => result is 0 */ 156*436Sdmick/ 157*436Sdmick/ /* return remainder */ 158*436Sdmick/ *pmod = x; 159*436Sdmick/ 160*436Sdmick/ /* return result */ 161*436Sdmick/ return (0); 162*436Sdmick/ 163*436Sdmick/ } else { 164*436Sdmick/ /* 165*436Sdmick/ * uint64_t by uint64_t division, resulting in a one-uint32_t 166*436Sdmick/ * result 167*436Sdmick/ */ 168*436Sdmick/ uint32_t y0, y1; 169*436Sdmick/ uint32_t x1, x0; 170*436Sdmick/ uint32_t q0; 171*436Sdmick/ uint32_t normshift; 172*436Sdmick/ 173*436Sdmick/ /* normalize by shifting x and y so MSB(y) == 1 */ 174*436Sdmick/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 175*436Sdmick/ normshift = 31 - normshift; 176*436Sdmick/ 177*436Sdmick/ if (normshift == 0) { 178*436Sdmick/ /* no shifting needed, and x < 2*y so q <= 1 */ 179*436Sdmick/ y1 = HI(y); 180*436Sdmick/ y0 = LO(y); 181*436Sdmick/ x1 = HI(x); 182*436Sdmick/ x0 = LO(x); 183*436Sdmick/ 184*436Sdmick/ /* if x >= y then q = 1 (note x1 >= y1) */ 185*436Sdmick/ if (x1 > y1 || x0 >= y0) { 186*436Sdmick/ q0 = 1; 187*436Sdmick/ /* subtract y from x to get remainder */ 188*436Sdmick/ A_SUB2(y0, y1, x0, x1); 189*436Sdmick/ } else { 190*436Sdmick/ q0 = 0; 191*436Sdmick/ } 192*436Sdmick/ 193*436Sdmick/ /* return remainder */ 194*436Sdmick/ *pmod = HILO(x1, x0); 195*436Sdmick/ 196*436Sdmick/ /* return result */ 197*436Sdmick/ return (q0); 198*436Sdmick/ 199*436Sdmick/ } else { 200*436Sdmick/ /* 201*436Sdmick/ * the last case: result is one uint32_t, but we need to 202*436Sdmick/ * normalize 203*436Sdmick/ */ 204*436Sdmick/ uint64_t dt; 205*436Sdmick/ uint32_t t0, t1, x2; 206*436Sdmick/ 207*436Sdmick/ /* normalize y */ 208*436Sdmick/ dt = (y << normshift); 209*436Sdmick/ y1 = HI(dt); 210*436Sdmick/ y0 = LO(dt); 211*436Sdmick/ 212*436Sdmick/ /* normalize x (we need 3 uint32_ts!!!) */ 213*436Sdmick/ x2 = (HI(x) >> (32 - normshift)); 214*436Sdmick/ dt = (x << normshift); 215*436Sdmick/ x1 = HI(dt); 216*436Sdmick/ x0 = LO(dt); 217*436Sdmick/ 218*436Sdmick/ /* estimate q0, and reduce x to a two uint32_t value */ 219*436Sdmick/ A_DIV32(x1, x2, y1, q0, x1); 220*436Sdmick/ 221*436Sdmick/ /* adjust q0 down if too high */ 222*436Sdmick/ /* 223*436Sdmick/ * because of the limited range of x2 we can only be 224*436Sdmick/ * one off 225*436Sdmick/ */ 226*436Sdmick/ A_MUL32(y0, q0, t0, t1); 227*436Sdmick/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 228*436Sdmick/ q0--; 229*436Sdmick/ A_SUB2(y0, y1, t0, t1); 230*436Sdmick/ } 231*436Sdmick/ /* return remainder */ 232*436Sdmick/ /* subtract product from x to get remainder */ 233*436Sdmick/ A_SUB2(t0, t1, x0, x1); 234*436Sdmick/ *pmod = (HILO(x1, x0) >> normshift); 235*436Sdmick/ 236*436Sdmick/ /* return result */ 237*436Sdmick/ return (q0); 238*436Sdmick/ } 239*436Sdmick/ } 240*436Sdmick/ } 241*436Sdmick ENTRY(UDivRem) 242*436Sdmick pushl %ebp 243*436Sdmick pushl %edi 244*436Sdmick pushl %esi 245*436Sdmick subl $48, %esp 246*436Sdmick movl 68(%esp), %edi / y, 247*436Sdmick testl %edi, %edi / tmp63 248*436Sdmick movl %eax, 40(%esp) / x, x 249*436Sdmick movl %edx, 44(%esp) / x, x 250*436Sdmick movl %edi, %esi /, tmp62 251*436Sdmick movl %edi, %ecx / tmp62, tmp63 252*436Sdmick jne .LL2 253*436Sdmick movl %edx, %eax /, tmp68 254*436Sdmick cmpl 64(%esp), %eax / y, tmp68 255*436Sdmick jae .LL21 256*436Sdmick.LL4: 257*436Sdmick movl 72(%esp), %ebp / pmod, 258*436Sdmick xorl %esi, %esi / <result> 259*436Sdmick movl 40(%esp), %eax / x, q0 260*436Sdmick movl %ecx, %edi / <result>, <result> 261*436Sdmick divl 64(%esp) / y 262*436Sdmick movl %edx, (%ebp) / div_rem, 263*436Sdmick xorl %edx, %edx / q0 264*436Sdmick addl %eax, %esi / q0, <result> 265*436Sdmick movl $0, 4(%ebp) 266*436Sdmick adcl %edx, %edi / q0, <result> 267*436Sdmick addl $48, %esp 268*436Sdmick movl %esi, %eax / <result>, <result> 269*436Sdmick popl %esi 270*436Sdmick movl %edi, %edx / <result>, <result> 271*436Sdmick popl %edi 272*436Sdmick popl %ebp 273*436Sdmick ret 274*436Sdmick .align 16 275*436Sdmick.LL2: 276*436Sdmick movl 44(%esp), %eax / x, 277*436Sdmick xorl %edx, %edx 278*436Sdmick cmpl %esi, %eax / tmp62, tmp5 279*436Sdmick movl %eax, 32(%esp) / tmp5, 280*436Sdmick movl %edx, 36(%esp) 281*436Sdmick jae .LL6 282*436Sdmick movl 72(%esp), %esi / pmod, 283*436Sdmick movl 40(%esp), %ebp / x, 284*436Sdmick movl 44(%esp), %ecx / x, 285*436Sdmick movl %ebp, (%esi) 286*436Sdmick movl %ecx, 4(%esi) 287*436Sdmick xorl %edi, %edi / <result> 288*436Sdmick xorl %esi, %esi / <result> 289*436Sdmick.LL22: 290*436Sdmick addl $48, %esp 291*436Sdmick movl %esi, %eax / <result>, <result> 292*436Sdmick popl %esi 293*436Sdmick movl %edi, %edx / <result>, <result> 294*436Sdmick popl %edi 295*436Sdmick popl %ebp 296*436Sdmick ret 297*436Sdmick .align 16 298*436Sdmick.LL21: 299*436Sdmick movl %edi, %edx / tmp63, div_hi 300*436Sdmick divl 64(%esp) / y 301*436Sdmick movl %eax, %ecx /, q1 302*436Sdmick jmp .LL4 303*436Sdmick .align 16 304*436Sdmick.LL6: 305*436Sdmick movl $31, %edi /, tmp87 306*436Sdmick bsrl %esi,%edx / tmp62, normshift 307*436Sdmick subl %edx, %edi / normshift, tmp87 308*436Sdmick movl %edi, 28(%esp) / tmp87, 309*436Sdmick jne .LL8 310*436Sdmick movl 32(%esp), %edx /, x1 311*436Sdmick cmpl %ecx, %edx / y1, x1 312*436Sdmick movl 64(%esp), %edi / y, y0 313*436Sdmick movl 40(%esp), %esi / x, x0 314*436Sdmick ja .LL10 315*436Sdmick xorl %ebp, %ebp / q0 316*436Sdmick cmpl %edi, %esi / y0, x0 317*436Sdmick jb .LL11 318*436Sdmick.LL10: 319*436Sdmick movl $1, %ebp /, q0 320*436Sdmick subl %edi,%esi / y0, x0 321*436Sdmick sbbl %ecx,%edx / tmp63, x1 322*436Sdmick.LL11: 323*436Sdmick movl %edx, %ecx / x1, x1 324*436Sdmick xorl %edx, %edx / x1 325*436Sdmick xorl %edi, %edi / x0 326*436Sdmick addl %esi, %edx / x0, x1 327*436Sdmick adcl %edi, %ecx / x0, x1 328*436Sdmick movl 72(%esp), %esi / pmod, 329*436Sdmick movl %edx, (%esi) / x1, 330*436Sdmick movl %ecx, 4(%esi) / x1, 331*436Sdmick xorl %edi, %edi / <result> 332*436Sdmick movl %ebp, %esi / q0, <result> 333*436Sdmick jmp .LL22 334*436Sdmick .align 16 335*436Sdmick.LL8: 336*436Sdmick movb 28(%esp), %cl 337*436Sdmick movl 64(%esp), %esi / y, dt 338*436Sdmick movl 68(%esp), %edi / y, dt 339*436Sdmick shldl %esi, %edi /, dt, dt 340*436Sdmick sall %cl, %esi /, dt 341*436Sdmick andl $32, %ecx 342*436Sdmick jne .LL23 343*436Sdmick.LL17: 344*436Sdmick movl $32, %ecx /, tmp102 345*436Sdmick subl 28(%esp), %ecx /, tmp102 346*436Sdmick movl %esi, %ebp / dt, y0 347*436Sdmick movl 32(%esp), %esi 348*436Sdmick shrl %cl, %esi / tmp102, 349*436Sdmick movl %edi, 24(%esp) / tmp99, 350*436Sdmick movb 28(%esp), %cl 351*436Sdmick movl %esi, 12(%esp) /, x2 352*436Sdmick movl 44(%esp), %edi / x, dt 353*436Sdmick movl 40(%esp), %esi / x, dt 354*436Sdmick shldl %esi, %edi /, dt, dt 355*436Sdmick sall %cl, %esi /, dt 356*436Sdmick andl $32, %ecx 357*436Sdmick je .LL18 358*436Sdmick movl %esi, %edi / dt, dt 359*436Sdmick xorl %esi, %esi / dt 360*436Sdmick.LL18: 361*436Sdmick movl %edi, %ecx / dt, 362*436Sdmick movl %edi, %eax / tmp2, 363*436Sdmick movl %ecx, (%esp) 364*436Sdmick movl 12(%esp), %edx / x2, 365*436Sdmick divl 24(%esp) 366*436Sdmick movl %edx, %ecx /, x1 367*436Sdmick xorl %edi, %edi 368*436Sdmick movl %eax, 20(%esp) 369*436Sdmick movl %ebp, %eax / y0, t0 370*436Sdmick mull 20(%esp) 371*436Sdmick cmpl %ecx, %edx / x1, t1 372*436Sdmick movl %edi, 4(%esp) 373*436Sdmick ja .LL14 374*436Sdmick je .LL24 375*436Sdmick.LL15: 376*436Sdmick movl %ecx, %edi / x1, 377*436Sdmick subl %eax,%esi / t0, x0 378*436Sdmick sbbl %edx,%edi / t1, 379*436Sdmick movl %edi, %eax /, x1 380*436Sdmick movl %eax, %edx / x1, x1 381*436Sdmick xorl %eax, %eax / x1 382*436Sdmick xorl %ebp, %ebp / x0 383*436Sdmick addl %esi, %eax / x0, x1 384*436Sdmick adcl %ebp, %edx / x0, x1 385*436Sdmick movb 28(%esp), %cl 386*436Sdmick shrdl %edx, %eax /, x1, x1 387*436Sdmick shrl %cl, %edx /, x1 388*436Sdmick andl $32, %ecx 389*436Sdmick je .LL16 390*436Sdmick movl %edx, %eax / x1, x1 391*436Sdmick xorl %edx, %edx / x1 392*436Sdmick.LL16: 393*436Sdmick movl 72(%esp), %ecx / pmod, 394*436Sdmick movl 20(%esp), %esi /, <result> 395*436Sdmick xorl %edi, %edi / <result> 396*436Sdmick movl %eax, (%ecx) / x1, 397*436Sdmick movl %edx, 4(%ecx) / x1, 398*436Sdmick jmp .LL22 399*436Sdmick .align 16 400*436Sdmick.LL24: 401*436Sdmick cmpl %esi, %eax / x0, t0 402*436Sdmick jbe .LL15 403*436Sdmick.LL14: 404*436Sdmick decl 20(%esp) 405*436Sdmick subl %ebp,%eax / y0, t0 406*436Sdmick sbbl 24(%esp),%edx /, t1 407*436Sdmick jmp .LL15 408*436Sdmick.LL23: 409*436Sdmick movl %esi, %edi / dt, dt 410*436Sdmick xorl %esi, %esi / dt 411*436Sdmick jmp .LL17 412*436Sdmick SET_SIZE(UDivRem) 413*436Sdmick 414*436Sdmick/* 415*436Sdmick * Unsigned division without remainder. 416*436Sdmick */ 417*436Sdmick/ uint64_t 418*436Sdmick/ UDiv(uint64_t x, uint64_t y) 419*436Sdmick/ { 420*436Sdmick/ if (HI(y) == 0) { 421*436Sdmick/ /* simple cases: y is a single uint32_t */ 422*436Sdmick/ uint32_t div_hi, div_rem; 423*436Sdmick/ uint32_t q0, q1; 424*436Sdmick/ 425*436Sdmick/ /* calculate q1 */ 426*436Sdmick/ if (HI(x) < LO(y)) { 427*436Sdmick/ /* result is a single uint32_t, use one division */ 428*436Sdmick/ q1 = 0; 429*436Sdmick/ div_hi = HI(x); 430*436Sdmick/ } else { 431*436Sdmick/ /* result is a double uint32_t, use two divisions */ 432*436Sdmick/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 433*436Sdmick/ } 434*436Sdmick/ 435*436Sdmick/ /* calculate q0 and remainder */ 436*436Sdmick/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 437*436Sdmick/ 438*436Sdmick/ /* return result */ 439*436Sdmick/ return (HILO(q1, q0)); 440*436Sdmick/ 441*436Sdmick/ } else if (HI(x) < HI(y)) { 442*436Sdmick/ /* HI(x) < HI(y) => x < y => result is 0 */ 443*436Sdmick/ 444*436Sdmick/ /* return result */ 445*436Sdmick/ return (0); 446*436Sdmick/ 447*436Sdmick/ } else { 448*436Sdmick/ /* 449*436Sdmick/ * uint64_t by uint64_t division, resulting in a one-uint32_t 450*436Sdmick/ * result 451*436Sdmick/ */ 452*436Sdmick/ uint32_t y0, y1; 453*436Sdmick/ uint32_t x1, x0; 454*436Sdmick/ uint32_t q0; 455*436Sdmick/ unsigned normshift; 456*436Sdmick/ 457*436Sdmick/ /* normalize by shifting x and y so MSB(y) == 1 */ 458*436Sdmick/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 459*436Sdmick/ normshift = 31 - normshift; 460*436Sdmick/ 461*436Sdmick/ if (normshift == 0) { 462*436Sdmick/ /* no shifting needed, and x < 2*y so q <= 1 */ 463*436Sdmick/ y1 = HI(y); 464*436Sdmick/ y0 = LO(y); 465*436Sdmick/ x1 = HI(x); 466*436Sdmick/ x0 = LO(x); 467*436Sdmick/ 468*436Sdmick/ /* if x >= y then q = 1 (note x1 >= y1) */ 469*436Sdmick/ if (x1 > y1 || x0 >= y0) { 470*436Sdmick/ q0 = 1; 471*436Sdmick/ /* subtract y from x to get remainder */ 472*436Sdmick/ /* A_SUB2(y0, y1, x0, x1); */ 473*436Sdmick/ } else { 474*436Sdmick/ q0 = 0; 475*436Sdmick/ } 476*436Sdmick/ 477*436Sdmick/ /* return result */ 478*436Sdmick/ return (q0); 479*436Sdmick/ 480*436Sdmick/ } else { 481*436Sdmick/ /* 482*436Sdmick/ * the last case: result is one uint32_t, but we need to 483*436Sdmick/ * normalize 484*436Sdmick/ */ 485*436Sdmick/ uint64_t dt; 486*436Sdmick/ uint32_t t0, t1, x2; 487*436Sdmick/ 488*436Sdmick/ /* normalize y */ 489*436Sdmick/ dt = (y << normshift); 490*436Sdmick/ y1 = HI(dt); 491*436Sdmick/ y0 = LO(dt); 492*436Sdmick/ 493*436Sdmick/ /* normalize x (we need 3 uint32_ts!!!) */ 494*436Sdmick/ x2 = (HI(x) >> (32 - normshift)); 495*436Sdmick/ dt = (x << normshift); 496*436Sdmick/ x1 = HI(dt); 497*436Sdmick/ x0 = LO(dt); 498*436Sdmick/ 499*436Sdmick/ /* estimate q0, and reduce x to a two uint32_t value */ 500*436Sdmick/ A_DIV32(x1, x2, y1, q0, x1); 501*436Sdmick/ 502*436Sdmick/ /* adjust q0 down if too high */ 503*436Sdmick/ /* 504*436Sdmick/ * because of the limited range of x2 we can only be 505*436Sdmick/ * one off 506*436Sdmick/ */ 507*436Sdmick/ A_MUL32(y0, q0, t0, t1); 508*436Sdmick/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 509*436Sdmick/ q0--; 510*436Sdmick/ } 511*436Sdmick/ /* return result */ 512*436Sdmick/ return (q0); 513*436Sdmick/ } 514*436Sdmick/ } 515*436Sdmick/ } 516*436Sdmick ENTRY(UDiv) 517*436Sdmick pushl %ebp 518*436Sdmick pushl %edi 519*436Sdmick pushl %esi 520*436Sdmick subl $40, %esp 521*436Sdmick movl %edx, 36(%esp) / x, x 522*436Sdmick movl 60(%esp), %edx / y, 523*436Sdmick testl %edx, %edx / tmp62 524*436Sdmick movl %eax, 32(%esp) / x, x 525*436Sdmick movl %edx, %ecx / tmp61, tmp62 526*436Sdmick movl %edx, %eax /, tmp61 527*436Sdmick jne .LL26 528*436Sdmick movl 36(%esp), %esi / x, 529*436Sdmick cmpl 56(%esp), %esi / y, tmp67 530*436Sdmick movl %esi, %eax /, tmp67 531*436Sdmick movl %esi, %edx / tmp67, div_hi 532*436Sdmick jb .LL28 533*436Sdmick movl %ecx, %edx / tmp62, div_hi 534*436Sdmick divl 56(%esp) / y 535*436Sdmick movl %eax, %ecx /, q1 536*436Sdmick.LL28: 537*436Sdmick xorl %esi, %esi / <result> 538*436Sdmick movl %ecx, %edi / <result>, <result> 539*436Sdmick movl 32(%esp), %eax / x, q0 540*436Sdmick xorl %ecx, %ecx / q0 541*436Sdmick divl 56(%esp) / y 542*436Sdmick addl %eax, %esi / q0, <result> 543*436Sdmick adcl %ecx, %edi / q0, <result> 544*436Sdmick.LL25: 545*436Sdmick addl $40, %esp 546*436Sdmick movl %esi, %eax / <result>, <result> 547*436Sdmick popl %esi 548*436Sdmick movl %edi, %edx / <result>, <result> 549*436Sdmick popl %edi 550*436Sdmick popl %ebp 551*436Sdmick ret 552*436Sdmick .align 16 553*436Sdmick.LL26: 554*436Sdmick movl 36(%esp), %esi / x, 555*436Sdmick xorl %edi, %edi 556*436Sdmick movl %esi, 24(%esp) / tmp1, 557*436Sdmick movl %edi, 28(%esp) 558*436Sdmick xorl %esi, %esi / <result> 559*436Sdmick xorl %edi, %edi / <result> 560*436Sdmick cmpl %eax, 24(%esp) / tmp61, 561*436Sdmick jb .LL25 562*436Sdmick bsrl %eax,%ebp / tmp61, normshift 563*436Sdmick movl $31, %eax /, tmp85 564*436Sdmick subl %ebp, %eax / normshift, normshift 565*436Sdmick jne .LL32 566*436Sdmick movl 24(%esp), %eax /, x1 567*436Sdmick cmpl %ecx, %eax / tmp62, x1 568*436Sdmick movl 56(%esp), %esi / y, y0 569*436Sdmick movl 32(%esp), %edx / x, x0 570*436Sdmick ja .LL34 571*436Sdmick xorl %eax, %eax / q0 572*436Sdmick cmpl %esi, %edx / y0, x0 573*436Sdmick jb .LL35 574*436Sdmick.LL34: 575*436Sdmick movl $1, %eax /, q0 576*436Sdmick.LL35: 577*436Sdmick movl %eax, %esi / q0, <result> 578*436Sdmick xorl %edi, %edi / <result> 579*436Sdmick.LL45: 580*436Sdmick addl $40, %esp 581*436Sdmick movl %esi, %eax / <result>, <result> 582*436Sdmick popl %esi 583*436Sdmick movl %edi, %edx / <result>, <result> 584*436Sdmick popl %edi 585*436Sdmick popl %ebp 586*436Sdmick ret 587*436Sdmick .align 16 588*436Sdmick.LL32: 589*436Sdmick movb %al, %cl 590*436Sdmick movl 56(%esp), %esi / y, 591*436Sdmick movl 60(%esp), %edi / y, 592*436Sdmick shldl %esi, %edi 593*436Sdmick sall %cl, %esi 594*436Sdmick andl $32, %ecx 595*436Sdmick jne .LL43 596*436Sdmick.LL40: 597*436Sdmick movl $32, %ecx /, tmp96 598*436Sdmick subl %eax, %ecx / normshift, tmp96 599*436Sdmick movl %edi, %edx 600*436Sdmick movl %edi, 20(%esp) /, dt 601*436Sdmick movl 24(%esp), %ebp /, x2 602*436Sdmick xorl %edi, %edi 603*436Sdmick shrl %cl, %ebp / tmp96, x2 604*436Sdmick movl %esi, 16(%esp) /, dt 605*436Sdmick movb %al, %cl 606*436Sdmick movl 32(%esp), %esi / x, dt 607*436Sdmick movl %edi, 12(%esp) 608*436Sdmick movl 36(%esp), %edi / x, dt 609*436Sdmick shldl %esi, %edi /, dt, dt 610*436Sdmick sall %cl, %esi /, dt 611*436Sdmick andl $32, %ecx 612*436Sdmick movl %edx, 8(%esp) 613*436Sdmick je .LL41 614*436Sdmick movl %esi, %edi / dt, dt 615*436Sdmick xorl %esi, %esi / dt 616*436Sdmick.LL41: 617*436Sdmick xorl %ecx, %ecx 618*436Sdmick movl %edi, %eax / tmp1, 619*436Sdmick movl %ebp, %edx / x2, 620*436Sdmick divl 8(%esp) 621*436Sdmick movl %edx, %ebp /, x1 622*436Sdmick movl %ecx, 4(%esp) 623*436Sdmick movl %eax, %ecx /, q0 624*436Sdmick movl 16(%esp), %eax / dt, 625*436Sdmick mull %ecx / q0 626*436Sdmick cmpl %ebp, %edx / x1, t1 627*436Sdmick movl %edi, (%esp) 628*436Sdmick movl %esi, %edi / dt, x0 629*436Sdmick ja .LL38 630*436Sdmick je .LL44 631*436Sdmick.LL39: 632*436Sdmick movl %ecx, %esi / q0, <result> 633*436Sdmick.LL46: 634*436Sdmick xorl %edi, %edi / <result> 635*436Sdmick jmp .LL45 636*436Sdmick.LL44: 637*436Sdmick cmpl %edi, %eax / x0, t0 638*436Sdmick jbe .LL39 639*436Sdmick.LL38: 640*436Sdmick decl %ecx / q0 641*436Sdmick movl %ecx, %esi / q0, <result> 642*436Sdmick jmp .LL46 643*436Sdmick.LL43: 644*436Sdmick movl %esi, %edi 645*436Sdmick xorl %esi, %esi 646*436Sdmick jmp .LL40 647*436Sdmick SET_SIZE(UDiv) 648*436Sdmick 649*436Sdmick/* 650*436Sdmick * __udiv64 651*436Sdmick * 652*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the 653*436Sdmick * quotient in %edx:%eax. __udiv64 pops the arguments on return, 654*436Sdmick */ 655*436Sdmick ENTRY(__udiv64) 656*436Sdmick movl 4(%esp), %eax / x, x 657*436Sdmick movl 8(%esp), %edx / x, x 658*436Sdmick pushl 16(%esp) / y 659*436Sdmick pushl 16(%esp) 660*436Sdmick call UDiv 661*436Sdmick addl $8, %esp 662*436Sdmick ret $16 663*436Sdmick SET_SIZE(__udiv64) 664*436Sdmick 665*436Sdmick/* 666*436Sdmick * __urem64 667*436Sdmick * 668*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the 669*436Sdmick * remainder in %edx:%eax. __urem64 pops the arguments on return 670*436Sdmick */ 671*436Sdmick ENTRY(__urem64) 672*436Sdmick subl $12, %esp 673*436Sdmick movl %esp, %ecx /, tmp65 674*436Sdmick movl 16(%esp), %eax / x, x 675*436Sdmick movl 20(%esp), %edx / x, x 676*436Sdmick pushl %ecx / tmp65 677*436Sdmick pushl 32(%esp) / y 678*436Sdmick pushl 32(%esp) 679*436Sdmick call UDivRem 680*436Sdmick movl 12(%esp), %eax / rem, rem 681*436Sdmick movl 16(%esp), %edx / rem, rem 682*436Sdmick addl $24, %esp 683*436Sdmick ret $16 684*436Sdmick SET_SIZE(__urem64) 685*436Sdmick 686*436Sdmick/* 687*436Sdmick * __div64 688*436Sdmick * 689*436Sdmick * Perform division of two signed 64-bit quantities, returning the 690*436Sdmick * quotient in %edx:%eax. __div64 pops the arguments on return. 691*436Sdmick */ 692*436Sdmick/ int64_t 693*436Sdmick/ __div64(int64_t x, int64_t y) 694*436Sdmick/ { 695*436Sdmick/ int negative; 696*436Sdmick/ uint64_t xt, yt, r; 697*436Sdmick/ 698*436Sdmick/ if (x < 0) { 699*436Sdmick/ xt = -(uint64_t) x; 700*436Sdmick/ negative = 1; 701*436Sdmick/ } else { 702*436Sdmick/ xt = x; 703*436Sdmick/ negative = 0; 704*436Sdmick/ } 705*436Sdmick/ if (y < 0) { 706*436Sdmick/ yt = -(uint64_t) y; 707*436Sdmick/ negative ^= 1; 708*436Sdmick/ } else { 709*436Sdmick/ yt = y; 710*436Sdmick/ } 711*436Sdmick/ r = UDiv(xt, yt); 712*436Sdmick/ return (negative ? (int64_t) - r : r); 713*436Sdmick/ } 714*436Sdmick ENTRY(__div64) 715*436Sdmick pushl %ebp 716*436Sdmick pushl %edi 717*436Sdmick pushl %esi 718*436Sdmick subl $8, %esp 719*436Sdmick movl 28(%esp), %edx / x, x 720*436Sdmick testl %edx, %edx / x 721*436Sdmick movl 24(%esp), %eax / x, x 722*436Sdmick movl 32(%esp), %esi / y, y 723*436Sdmick movl 36(%esp), %edi / y, y 724*436Sdmick js .LL84 725*436Sdmick xorl %ebp, %ebp / negative 726*436Sdmick testl %edi, %edi / y 727*436Sdmick movl %eax, (%esp) / x, xt 728*436Sdmick movl %edx, 4(%esp) / x, xt 729*436Sdmick movl %esi, %eax / y, yt 730*436Sdmick movl %edi, %edx / y, yt 731*436Sdmick js .LL85 732*436Sdmick.LL82: 733*436Sdmick pushl %edx / yt 734*436Sdmick pushl %eax / yt 735*436Sdmick movl 8(%esp), %eax / xt, xt 736*436Sdmick movl 12(%esp), %edx / xt, xt 737*436Sdmick call UDiv 738*436Sdmick popl %ecx 739*436Sdmick testl %ebp, %ebp / negative 740*436Sdmick popl %esi 741*436Sdmick je .LL83 742*436Sdmick negl %eax / r 743*436Sdmick adcl $0, %edx /, r 744*436Sdmick negl %edx / r 745*436Sdmick.LL83: 746*436Sdmick addl $8, %esp 747*436Sdmick popl %esi 748*436Sdmick popl %edi 749*436Sdmick popl %ebp 750*436Sdmick ret $16 751*436Sdmick .align 16 752*436Sdmick.LL84: 753*436Sdmick negl %eax / x 754*436Sdmick adcl $0, %edx /, x 755*436Sdmick negl %edx / x 756*436Sdmick testl %edi, %edi / y 757*436Sdmick movl %eax, (%esp) / x, xt 758*436Sdmick movl %edx, 4(%esp) / x, xt 759*436Sdmick movl $1, %ebp /, negative 760*436Sdmick movl %esi, %eax / y, yt 761*436Sdmick movl %edi, %edx / y, yt 762*436Sdmick jns .LL82 763*436Sdmick .align 16 764*436Sdmick.LL85: 765*436Sdmick negl %eax / yt 766*436Sdmick adcl $0, %edx /, yt 767*436Sdmick negl %edx / yt 768*436Sdmick xorl $1, %ebp /, negative 769*436Sdmick jmp .LL82 770*436Sdmick SET_SIZE(__div64) 771*436Sdmick 772*436Sdmick/* 773*436Sdmick * __rem64 774*436Sdmick * 775*436Sdmick * Perform division of two signed 64-bit quantities, returning the 776*436Sdmick * remainder in %edx:%eax. __rem64 pops the arguments on return. 777*436Sdmick */ 778*436Sdmick/ int64_t 779*436Sdmick/ __rem64(int64_t x, int64_t y) 780*436Sdmick/ { 781*436Sdmick/ uint64_t xt, yt, rem; 782*436Sdmick/ 783*436Sdmick/ if (x < 0) { 784*436Sdmick/ xt = -(uint64_t) x; 785*436Sdmick/ } else { 786*436Sdmick/ xt = x; 787*436Sdmick/ } 788*436Sdmick/ if (y < 0) { 789*436Sdmick/ yt = -(uint64_t) y; 790*436Sdmick/ } else { 791*436Sdmick/ yt = y; 792*436Sdmick/ } 793*436Sdmick/ (void) UDivRem(xt, yt, &rem); 794*436Sdmick/ return (x < 0 ? (int64_t) - rem : rem); 795*436Sdmick/ } 796*436Sdmick ENTRY(__rem64) 797*436Sdmick pushl %edi 798*436Sdmick pushl %esi 799*436Sdmick subl $20, %esp 800*436Sdmick movl 36(%esp), %ecx / x, 801*436Sdmick movl 32(%esp), %esi / x, 802*436Sdmick movl 36(%esp), %edi / x, 803*436Sdmick testl %ecx, %ecx 804*436Sdmick movl 40(%esp), %eax / y, y 805*436Sdmick movl 44(%esp), %edx / y, y 806*436Sdmick movl %esi, (%esp) /, xt 807*436Sdmick movl %edi, 4(%esp) /, xt 808*436Sdmick js .LL92 809*436Sdmick testl %edx, %edx / y 810*436Sdmick movl %eax, %esi / y, yt 811*436Sdmick movl %edx, %edi / y, yt 812*436Sdmick js .LL93 813*436Sdmick.LL90: 814*436Sdmick leal 8(%esp), %eax /, tmp66 815*436Sdmick pushl %eax / tmp66 816*436Sdmick pushl %edi / yt 817*436Sdmick pushl %esi / yt 818*436Sdmick movl 12(%esp), %eax / xt, xt 819*436Sdmick movl 16(%esp), %edx / xt, xt 820*436Sdmick call UDivRem 821*436Sdmick addl $12, %esp 822*436Sdmick movl 36(%esp), %edi / x, 823*436Sdmick testl %edi, %edi 824*436Sdmick movl 8(%esp), %eax / rem, rem 825*436Sdmick movl 12(%esp), %edx / rem, rem 826*436Sdmick js .LL94 827*436Sdmick addl $20, %esp 828*436Sdmick popl %esi 829*436Sdmick popl %edi 830*436Sdmick ret $16 831*436Sdmick .align 16 832*436Sdmick.LL92: 833*436Sdmick negl %esi 834*436Sdmick adcl $0, %edi 835*436Sdmick negl %edi 836*436Sdmick testl %edx, %edx / y 837*436Sdmick movl %esi, (%esp) /, xt 838*436Sdmick movl %edi, 4(%esp) /, xt 839*436Sdmick movl %eax, %esi / y, yt 840*436Sdmick movl %edx, %edi / y, yt 841*436Sdmick jns .LL90 842*436Sdmick .align 16 843*436Sdmick.LL93: 844*436Sdmick negl %esi / yt 845*436Sdmick adcl $0, %edi /, yt 846*436Sdmick negl %edi / yt 847*436Sdmick jmp .LL90 848*436Sdmick .align 16 849*436Sdmick.LL94: 850*436Sdmick negl %eax / rem 851*436Sdmick adcl $0, %edx /, rem 852*436Sdmick addl $20, %esp 853*436Sdmick popl %esi 854*436Sdmick negl %edx / rem 855*436Sdmick popl %edi 856*436Sdmick ret $16 857*436Sdmick SET_SIZE(__rem64) 858*436Sdmick 859*436Sdmick#endif /* __lint */ 860*436Sdmick 861*436Sdmick#if defined(__lint) 862*436Sdmick 863*436Sdmick/* 864*436Sdmick * C support for 64-bit modulo and division. 865*436Sdmick * GNU routines callable from C (though generated by the compiler). 866*436Sdmick * Hand-customized compiler output - see comments for details. 867*436Sdmick */ 868*436Sdmick/*ARGSUSED*/ 869*436Sdmickunsigned long long 870*436Sdmick__udivdi3(unsigned long long a, unsigned long long b) 871*436Sdmick{ return (0); } 872*436Sdmick 873*436Sdmick/*ARGSUSED*/ 874*436Sdmickunsigned long long 875*436Sdmick__umoddi3(unsigned long long a, unsigned long long b) 876*436Sdmick{ return (0); } 877*436Sdmick 878*436Sdmick/*ARGSUSED*/ 879*436Sdmicklong long 880*436Sdmick__divdi3(long long a, long long b) 881*436Sdmick{ return (0); } 882*436Sdmick 883*436Sdmick/*ARGSUSED*/ 884*436Sdmicklong long 885*436Sdmick__moddi3(long long a, long long b) 886*436Sdmick{ return (0); } 887*436Sdmick 888*436Sdmick/* ARGSUSED */ 889*436Sdmickint64_t __divrem64(int64_t a, int64_t b) 890*436Sdmick{ return (0); } 891*436Sdmick 892*436Sdmick/* ARGSUSED */ 893*436Sdmickuint64_t __udivrem64(uint64_t a, uint64_t b) 894*436Sdmick{ return (0); } 895*436Sdmick 896*436Sdmick#else /* __lint */ 897*436Sdmick 898*436Sdmick/* 899*436Sdmick * int32_t/int64_t division/manipulation 900*436Sdmick * 901*436Sdmick * Hand-customized compiler output: the non-GCC entry points depart from 902*436Sdmick * the SYS V ABI by requiring their arguments to be popped, and in the 903*436Sdmick * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the 904*436Sdmick * compiler-generated use of %edx:%eax for the first argument of 905*436Sdmick * internal entry points. 906*436Sdmick * 907*436Sdmick * Inlines for speed: 908*436Sdmick * - counting the number of leading zeros in a word 909*436Sdmick * - multiplying two 32-bit numbers giving a 64-bit result 910*436Sdmick * - dividing a 64-bit number by a 32-bit number, giving both quotient 911*436Sdmick * and remainder 912*436Sdmick * - subtracting two 64-bit results 913*436Sdmick */ 914*436Sdmick/ #define LO(X) ((uint32_t)(X) & 0xffffffff) 915*436Sdmick/ #define HI(X) ((uint32_t)((X) >> 32) & 0xffffffff) 916*436Sdmick/ #define HILO(H, L) (((uint64_t)(H) << 32) + (L)) 917*436Sdmick/ 918*436Sdmick/ /* give index of highest bit */ 919*436Sdmick/ #define HIBIT(a, r) \ 920*436Sdmick/ asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a)) 921*436Sdmick/ 922*436Sdmick/ /* multiply two uint32_ts resulting in a uint64_t */ 923*436Sdmick/ #define A_MUL32(a, b, lo, hi) \ 924*436Sdmick/ asm("mull %2" \ 925*436Sdmick/ : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a)) 926*436Sdmick/ 927*436Sdmick/ /* divide a uint64_t by a uint32_t */ 928*436Sdmick/ #define A_DIV32(lo, hi, b, q, r) \ 929*436Sdmick/ asm("divl %2" \ 930*436Sdmick/ : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \ 931*436Sdmick/ : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi)) 932*436Sdmick/ 933*436Sdmick/ /* subtract two uint64_ts (with borrow) */ 934*436Sdmick/ #define A_SUB2(bl, bh, al, ah) \ 935*436Sdmick/ asm("subl %4,%0\n\tsbbl %5,%1" \ 936*436Sdmick/ : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \ 937*436Sdmick/ : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \ 938*436Sdmick/ "g"((uint32_t)(bh))) 939*436Sdmick 940*436Sdmick/* 941*436Sdmick * __udivdi3 942*436Sdmick * 943*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the 944*436Sdmick * quotient in %edx:%eax. 945*436Sdmick */ 946*436Sdmick ENTRY(__udivdi3) 947*436Sdmick movl 4(%esp), %eax / x, x 948*436Sdmick movl 8(%esp), %edx / x, x 949*436Sdmick pushl 16(%esp) / y 950*436Sdmick pushl 16(%esp) 951*436Sdmick call UDiv 952*436Sdmick addl $8, %esp 953*436Sdmick ret 954*436Sdmick SET_SIZE(__udivdi3) 955*436Sdmick 956*436Sdmick/* 957*436Sdmick * __umoddi3 958*436Sdmick * 959*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the 960*436Sdmick * remainder in %edx:%eax. 961*436Sdmick */ 962*436Sdmick ENTRY(__umoddi3) 963*436Sdmick subl $12, %esp 964*436Sdmick movl %esp, %ecx /, tmp65 965*436Sdmick movl 16(%esp), %eax / x, x 966*436Sdmick movl 20(%esp), %edx / x, x 967*436Sdmick pushl %ecx / tmp65 968*436Sdmick pushl 32(%esp) / y 969*436Sdmick pushl 32(%esp) 970*436Sdmick call UDivRem 971*436Sdmick movl 12(%esp), %eax / rem, rem 972*436Sdmick movl 16(%esp), %edx / rem, rem 973*436Sdmick addl $24, %esp 974*436Sdmick ret 975*436Sdmick SET_SIZE(__umoddi3) 976*436Sdmick 977*436Sdmick/* 978*436Sdmick * __divdi3 979*436Sdmick * 980*436Sdmick * Perform division of two signed 64-bit quantities, returning the 981*436Sdmick * quotient in %edx:%eax. 982*436Sdmick */ 983*436Sdmick/ int64_t 984*436Sdmick/ __divdi3(int64_t x, int64_t y) 985*436Sdmick/ { 986*436Sdmick/ int negative; 987*436Sdmick/ uint64_t xt, yt, r; 988*436Sdmick/ 989*436Sdmick/ if (x < 0) { 990*436Sdmick/ xt = -(uint64_t) x; 991*436Sdmick/ negative = 1; 992*436Sdmick/ } else { 993*436Sdmick/ xt = x; 994*436Sdmick/ negative = 0; 995*436Sdmick/ } 996*436Sdmick/ if (y < 0) { 997*436Sdmick/ yt = -(uint64_t) y; 998*436Sdmick/ negative ^= 1; 999*436Sdmick/ } else { 1000*436Sdmick/ yt = y; 1001*436Sdmick/ } 1002*436Sdmick/ r = UDiv(xt, yt); 1003*436Sdmick/ return (negative ? (int64_t) - r : r); 1004*436Sdmick/ } 1005*436Sdmick ENTRY(__divdi3) 1006*436Sdmick pushl %ebp 1007*436Sdmick pushl %edi 1008*436Sdmick pushl %esi 1009*436Sdmick subl $8, %esp 1010*436Sdmick movl 28(%esp), %edx / x, x 1011*436Sdmick testl %edx, %edx / x 1012*436Sdmick movl 24(%esp), %eax / x, x 1013*436Sdmick movl 32(%esp), %esi / y, y 1014*436Sdmick movl 36(%esp), %edi / y, y 1015*436Sdmick js .LL55 1016*436Sdmick xorl %ebp, %ebp / negative 1017*436Sdmick testl %edi, %edi / y 1018*436Sdmick movl %eax, (%esp) / x, xt 1019*436Sdmick movl %edx, 4(%esp) / x, xt 1020*436Sdmick movl %esi, %eax / y, yt 1021*436Sdmick movl %edi, %edx / y, yt 1022*436Sdmick js .LL56 1023*436Sdmick.LL53: 1024*436Sdmick pushl %edx / yt 1025*436Sdmick pushl %eax / yt 1026*436Sdmick movl 8(%esp), %eax / xt, xt 1027*436Sdmick movl 12(%esp), %edx / xt, xt 1028*436Sdmick call UDiv 1029*436Sdmick popl %ecx 1030*436Sdmick testl %ebp, %ebp / negative 1031*436Sdmick popl %esi 1032*436Sdmick je .LL54 1033*436Sdmick negl %eax / r 1034*436Sdmick adcl $0, %edx /, r 1035*436Sdmick negl %edx / r 1036*436Sdmick.LL54: 1037*436Sdmick addl $8, %esp 1038*436Sdmick popl %esi 1039*436Sdmick popl %edi 1040*436Sdmick popl %ebp 1041*436Sdmick ret 1042*436Sdmick .align 16 1043*436Sdmick.LL55: 1044*436Sdmick negl %eax / x 1045*436Sdmick adcl $0, %edx /, x 1046*436Sdmick negl %edx / x 1047*436Sdmick testl %edi, %edi / y 1048*436Sdmick movl %eax, (%esp) / x, xt 1049*436Sdmick movl %edx, 4(%esp) / x, xt 1050*436Sdmick movl $1, %ebp /, negative 1051*436Sdmick movl %esi, %eax / y, yt 1052*436Sdmick movl %edi, %edx / y, yt 1053*436Sdmick jns .LL53 1054*436Sdmick .align 16 1055*436Sdmick.LL56: 1056*436Sdmick negl %eax / yt 1057*436Sdmick adcl $0, %edx /, yt 1058*436Sdmick negl %edx / yt 1059*436Sdmick xorl $1, %ebp /, negative 1060*436Sdmick jmp .LL53 1061*436Sdmick SET_SIZE(__divdi3) 1062*436Sdmick 1063*436Sdmick/* 1064*436Sdmick * __moddi3 1065*436Sdmick * 1066*436Sdmick * Perform division of two signed 64-bit quantities, returning the 1067*436Sdmick * quotient in %edx:%eax. 1068*436Sdmick */ 1069*436Sdmick/ int64_t 1070*436Sdmick/ __moddi3(int64_t x, int64_t y) 1071*436Sdmick/ { 1072*436Sdmick/ uint64_t xt, yt, rem; 1073*436Sdmick/ 1074*436Sdmick/ if (x < 0) { 1075*436Sdmick/ xt = -(uint64_t) x; 1076*436Sdmick/ } else { 1077*436Sdmick/ xt = x; 1078*436Sdmick/ } 1079*436Sdmick/ if (y < 0) { 1080*436Sdmick/ yt = -(uint64_t) y; 1081*436Sdmick/ } else { 1082*436Sdmick/ yt = y; 1083*436Sdmick/ } 1084*436Sdmick/ (void) UDivRem(xt, yt, &rem); 1085*436Sdmick/ return (x < 0 ? (int64_t) - rem : rem); 1086*436Sdmick/ } 1087*436Sdmick ENTRY(__moddi3) 1088*436Sdmick pushl %edi 1089*436Sdmick pushl %esi 1090*436Sdmick subl $20, %esp 1091*436Sdmick movl 36(%esp), %ecx / x, 1092*436Sdmick movl 32(%esp), %esi / x, 1093*436Sdmick movl 36(%esp), %edi / x, 1094*436Sdmick testl %ecx, %ecx 1095*436Sdmick movl 40(%esp), %eax / y, y 1096*436Sdmick movl 44(%esp), %edx / y, y 1097*436Sdmick movl %esi, (%esp) /, xt 1098*436Sdmick movl %edi, 4(%esp) /, xt 1099*436Sdmick js .LL63 1100*436Sdmick testl %edx, %edx / y 1101*436Sdmick movl %eax, %esi / y, yt 1102*436Sdmick movl %edx, %edi / y, yt 1103*436Sdmick js .LL64 1104*436Sdmick.LL61: 1105*436Sdmick leal 8(%esp), %eax /, tmp66 1106*436Sdmick pushl %eax / tmp66 1107*436Sdmick pushl %edi / yt 1108*436Sdmick pushl %esi / yt 1109*436Sdmick movl 12(%esp), %eax / xt, xt 1110*436Sdmick movl 16(%esp), %edx / xt, xt 1111*436Sdmick call UDivRem 1112*436Sdmick addl $12, %esp 1113*436Sdmick movl 36(%esp), %edi / x, 1114*436Sdmick testl %edi, %edi 1115*436Sdmick movl 8(%esp), %eax / rem, rem 1116*436Sdmick movl 12(%esp), %edx / rem, rem 1117*436Sdmick js .LL65 1118*436Sdmick addl $20, %esp 1119*436Sdmick popl %esi 1120*436Sdmick popl %edi 1121*436Sdmick ret 1122*436Sdmick .align 16 1123*436Sdmick.LL63: 1124*436Sdmick negl %esi 1125*436Sdmick adcl $0, %edi 1126*436Sdmick negl %edi 1127*436Sdmick testl %edx, %edx / y 1128*436Sdmick movl %esi, (%esp) /, xt 1129*436Sdmick movl %edi, 4(%esp) /, xt 1130*436Sdmick movl %eax, %esi / y, yt 1131*436Sdmick movl %edx, %edi / y, yt 1132*436Sdmick jns .LL61 1133*436Sdmick .align 16 1134*436Sdmick.LL64: 1135*436Sdmick negl %esi / yt 1136*436Sdmick adcl $0, %edi /, yt 1137*436Sdmick negl %edi / yt 1138*436Sdmick jmp .LL61 1139*436Sdmick .align 16 1140*436Sdmick.LL65: 1141*436Sdmick negl %eax / rem 1142*436Sdmick adcl $0, %edx /, rem 1143*436Sdmick addl $20, %esp 1144*436Sdmick popl %esi 1145*436Sdmick negl %edx / rem 1146*436Sdmick popl %edi 1147*436Sdmick ret 1148*436Sdmick SET_SIZE(__moddi3) 1149*436Sdmick 1150*436Sdmick/* 1151*436Sdmick * __udivrem64 1152*436Sdmick * 1153*436Sdmick * Perform division of two unsigned 64-bit quantities, returning the 1154*436Sdmick * quotient in %edx:%eax, and the remainder in %ecx:%esi. __udivrem64 1155*436Sdmick * pops the arguments on return. 1156*436Sdmick */ 1157*436Sdmick ENTRY(__udivrem64) 1158*436Sdmick subl $12, %esp 1159*436Sdmick movl %esp, %ecx /, tmp64 1160*436Sdmick movl 16(%esp), %eax / x, x 1161*436Sdmick movl 20(%esp), %edx / x, x 1162*436Sdmick pushl %ecx / tmp64 1163*436Sdmick pushl 32(%esp) / y 1164*436Sdmick pushl 32(%esp) 1165*436Sdmick call UDivRem 1166*436Sdmick movl 16(%esp), %ecx / rem, tmp63 1167*436Sdmick movl 12(%esp), %esi / rem 1168*436Sdmick addl $24, %esp 1169*436Sdmick ret $16 1170*436Sdmick SET_SIZE(__udivrem64) 1171*436Sdmick 1172*436Sdmick/* 1173*436Sdmick * Signed division with remainder. 1174*436Sdmick */ 1175*436Sdmick/ int64_t 1176*436Sdmick/ SDivRem(int64_t x, int64_t y, int64_t * pmod) 1177*436Sdmick/ { 1178*436Sdmick/ int negative; 1179*436Sdmick/ uint64_t xt, yt, r, rem; 1180*436Sdmick/ 1181*436Sdmick/ if (x < 0) { 1182*436Sdmick/ xt = -(uint64_t) x; 1183*436Sdmick/ negative = 1; 1184*436Sdmick/ } else { 1185*436Sdmick/ xt = x; 1186*436Sdmick/ negative = 0; 1187*436Sdmick/ } 1188*436Sdmick/ if (y < 0) { 1189*436Sdmick/ yt = -(uint64_t) y; 1190*436Sdmick/ negative ^= 1; 1191*436Sdmick/ } else { 1192*436Sdmick/ yt = y; 1193*436Sdmick/ } 1194*436Sdmick/ r = UDivRem(xt, yt, &rem); 1195*436Sdmick/ *pmod = (x < 0 ? (int64_t) - rem : rem); 1196*436Sdmick/ return (negative ? (int64_t) - r : r); 1197*436Sdmick/ } 1198*436Sdmick ENTRY(SDivRem) 1199*436Sdmick pushl %ebp 1200*436Sdmick pushl %edi 1201*436Sdmick pushl %esi 1202*436Sdmick subl $24, %esp 1203*436Sdmick testl %edx, %edx / x 1204*436Sdmick movl %edx, %edi / x, x 1205*436Sdmick js .LL73 1206*436Sdmick movl 44(%esp), %esi / y, 1207*436Sdmick xorl %ebp, %ebp / negative 1208*436Sdmick testl %esi, %esi 1209*436Sdmick movl %edx, 12(%esp) / x, xt 1210*436Sdmick movl %eax, 8(%esp) / x, xt 1211*436Sdmick movl 40(%esp), %edx / y, yt 1212*436Sdmick movl 44(%esp), %ecx / y, yt 1213*436Sdmick js .LL74 1214*436Sdmick.LL70: 1215*436Sdmick leal 16(%esp), %eax /, tmp70 1216*436Sdmick pushl %eax / tmp70 1217*436Sdmick pushl %ecx / yt 1218*436Sdmick pushl %edx / yt 1219*436Sdmick movl 20(%esp), %eax / xt, xt 1220*436Sdmick movl 24(%esp), %edx / xt, xt 1221*436Sdmick call UDivRem 1222*436Sdmick movl %edx, 16(%esp) /, r 1223*436Sdmick movl %eax, 12(%esp) /, r 1224*436Sdmick addl $12, %esp 1225*436Sdmick testl %edi, %edi / x 1226*436Sdmick movl 16(%esp), %edx / rem, rem 1227*436Sdmick movl 20(%esp), %ecx / rem, rem 1228*436Sdmick js .LL75 1229*436Sdmick.LL71: 1230*436Sdmick movl 48(%esp), %edi / pmod, pmod 1231*436Sdmick testl %ebp, %ebp / negative 1232*436Sdmick movl %edx, (%edi) / rem,* pmod 1233*436Sdmick movl %ecx, 4(%edi) / rem, 1234*436Sdmick movl (%esp), %eax / r, r 1235*436Sdmick movl 4(%esp), %edx / r, r 1236*436Sdmick je .LL72 1237*436Sdmick negl %eax / r 1238*436Sdmick adcl $0, %edx /, r 1239*436Sdmick negl %edx / r 1240*436Sdmick.LL72: 1241*436Sdmick addl $24, %esp 1242*436Sdmick popl %esi 1243*436Sdmick popl %edi 1244*436Sdmick popl %ebp 1245*436Sdmick ret 1246*436Sdmick .align 16 1247*436Sdmick.LL73: 1248*436Sdmick negl %eax 1249*436Sdmick adcl $0, %edx 1250*436Sdmick movl 44(%esp), %esi / y, 1251*436Sdmick negl %edx 1252*436Sdmick testl %esi, %esi 1253*436Sdmick movl %edx, 12(%esp) /, xt 1254*436Sdmick movl %eax, 8(%esp) /, xt 1255*436Sdmick movl $1, %ebp /, negative 1256*436Sdmick movl 40(%esp), %edx / y, yt 1257*436Sdmick movl 44(%esp), %ecx / y, yt 1258*436Sdmick jns .LL70 1259*436Sdmick .align 16 1260*436Sdmick.LL74: 1261*436Sdmick negl %edx / yt 1262*436Sdmick adcl $0, %ecx /, yt 1263*436Sdmick negl %ecx / yt 1264*436Sdmick xorl $1, %ebp /, negative 1265*436Sdmick jmp .LL70 1266*436Sdmick .align 16 1267*436Sdmick.LL75: 1268*436Sdmick negl %edx / rem 1269*436Sdmick adcl $0, %ecx /, rem 1270*436Sdmick negl %ecx / rem 1271*436Sdmick jmp .LL71 1272*436Sdmick SET_SIZE(SDivRem) 1273*436Sdmick 1274*436Sdmick/* 1275*436Sdmick * __divrem64 1276*436Sdmick * 1277*436Sdmick * Perform division of two signed 64-bit quantities, returning the 1278*436Sdmick * quotient in %edx:%eax, and the remainder in %ecx:%esi. __divrem64 1279*436Sdmick * pops the arguments on return. 1280*436Sdmick */ 1281*436Sdmick ENTRY(__divrem64) 1282*436Sdmick subl $20, %esp 1283*436Sdmick movl %esp, %ecx /, tmp64 1284*436Sdmick movl 24(%esp), %eax / x, x 1285*436Sdmick movl 28(%esp), %edx / x, x 1286*436Sdmick pushl %ecx / tmp64 1287*436Sdmick pushl 40(%esp) / y 1288*436Sdmick pushl 40(%esp) 1289*436Sdmick call SDivRem 1290*436Sdmick movl 16(%esp), %ecx 1291*436Sdmick movl 12(%esp),%esi / rem 1292*436Sdmick addl $32, %esp 1293*436Sdmick ret $16 1294*436Sdmick SET_SIZE(__divrem64) 1295*436Sdmick 1296*436Sdmick 1297*436Sdmick#endif /* __lint */ 1298*436Sdmick 1299*436Sdmick#endif /* defined(__i386) && !defined(__amd64) */ 1300