1*6891d857SDavid du Colombier#define BDNZ BC 16,0, 2*6891d857SDavid du Colombier 3*6891d857SDavid du Colombier/* 4*6891d857SDavid du Colombier * 64/64 division adapted from powerpc compiler writer's handbook 5*6891d857SDavid du Colombier * 6*6891d857SDavid du Colombier * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b) 7*6891d857SDavid du Colombier * quo dvd dvs 8*6891d857SDavid du Colombier * 9*6891d857SDavid du Colombier * Remainder is left in R7:R8 10*6891d857SDavid du Colombier * 11*6891d857SDavid du Colombier * Code comment notation: 12*6891d857SDavid du Colombier * msw = most-significant (high-order) word, i.e. bits 0..31 13*6891d857SDavid du Colombier * lsw = least-significant (low-order) word, i.e. bits 32..63 14*6891d857SDavid du Colombier * LZ = Leading Zeroes 15*6891d857SDavid du Colombier * SD = Significant Digits 16*6891d857SDavid du Colombier * 17*6891d857SDavid du Colombier * R3:R4 = dvd (input dividend); quo (output quotient) 18*6891d857SDavid du Colombier * R5:R6 = dvs (input divisor) 19*6891d857SDavid du Colombier * 20*6891d857SDavid du Colombier * R7:R8 = tmp; rem (output remainder) 21*6891d857SDavid du Colombier */ 22*6891d857SDavid du Colombier 23*6891d857SDavid du ColombierTEXT _divu64(SB), $0 24*6891d857SDavid du Colombier MOVW a+0(FP), R3 25*6891d857SDavid du Colombier MOVW a+4(FP), R4 26*6891d857SDavid du Colombier MOVW b+8(FP), R5 27*6891d857SDavid du Colombier MOVW b+12(FP), R6 28*6891d857SDavid du Colombier 29*6891d857SDavid du Colombier /* count the number of leading 0s in the dividend */ 30*6891d857SDavid du Colombier CMP R3, $0 /* dvd.msw == 0? R3, */ 31*6891d857SDavid du Colombier CNTLZW R3, R11 /* R11 = dvd.msw.LZ */ 32*6891d857SDavid du Colombier CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */ 33*6891d857SDavid du Colombier BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */ 34*6891d857SDavid du Colombier ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */ 35*6891d857SDavid du Colombier 36*6891d857SDavid du Colombierlab1: 37*6891d857SDavid du Colombier /* count the number of leading 0s in the divisor */ 38*6891d857SDavid du Colombier CMP R5, $0 /* dvd.msw == 0? */ 39*6891d857SDavid du Colombier CNTLZW R5, R9 /* R9 = dvs.msw.LZ */ 40*6891d857SDavid du Colombier CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */ 41*6891d857SDavid du Colombier BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */ 42*6891d857SDavid du Colombier ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */ 43*6891d857SDavid du Colombier 44*6891d857SDavid du Colombierlab2: 45*6891d857SDavid du Colombier /* determine shift amounts to minimize the number of iterations */ 46*6891d857SDavid du Colombier CMP R11, R9 /* compare dvd.LZ to dvs.LZ */ 47*6891d857SDavid du Colombier SUBC R11, $64, R10 /* R10 = dvd.SD */ 48*6891d857SDavid du Colombier BGT lab9 /* if(dvs > dvd) quotient = 0 */ 49*6891d857SDavid du Colombier ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */ 50*6891d857SDavid du Colombier SUBC R9, $64, R9 /* R9 = dvs.SD */ 51*6891d857SDavid du Colombier ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */ 52*6891d857SDavid du Colombier /* initial dvd */ 53*6891d857SDavid du Colombier SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */ 54*6891d857SDavid du Colombier /* initial tmp */ 55*6891d857SDavid du Colombier MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */ 56*6891d857SDavid du Colombier 57*6891d857SDavid du Colombier /* R7:R8 = R3:R4 >> R9 */ 58*6891d857SDavid du Colombier CMP R9, $32 59*6891d857SDavid du Colombier ADD $-32, R9, R7 60*6891d857SDavid du Colombier BLT lab3 /* if(R9 < 32) jump to lab3 */ 61*6891d857SDavid du Colombier SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */ 62*6891d857SDavid du Colombier MOVW $0, R7 /* tmp.msw = 0 */ 63*6891d857SDavid du Colombier BR lab4 64*6891d857SDavid du Colombierlab3: 65*6891d857SDavid du Colombier SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */ 66*6891d857SDavid du Colombier SUBC R9, $32, R7 67*6891d857SDavid du Colombier SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */ 68*6891d857SDavid du Colombier OR R7, R8 /* tmp.lsw = R8 | R7 */ 69*6891d857SDavid du Colombier SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */ 70*6891d857SDavid du Colombier 71*6891d857SDavid du Colombierlab4: 72*6891d857SDavid du Colombier /* R3:R4 = R3:R4 << R11 */ 73*6891d857SDavid du Colombier CMP R11,$32 74*6891d857SDavid du Colombier ADDC $-32, R11, R9 75*6891d857SDavid du Colombier BLT lab5 /* (R11 < 32)? */ 76*6891d857SDavid du Colombier SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */ 77*6891d857SDavid du Colombier MOVW $0, R4 /* dvd.lsw = 0 */ 78*6891d857SDavid du Colombier BR lab6 79*6891d857SDavid du Colombier 80*6891d857SDavid du Colombierlab5: 81*6891d857SDavid du Colombier SLW R11, R3 /* R3 = dvd.msw << R11 */ 82*6891d857SDavid du Colombier SUBC R11, $32, R9 83*6891d857SDavid du Colombier SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */ 84*6891d857SDavid du Colombier OR R9, R3 /* dvd.msw = R3 | R9 */ 85*6891d857SDavid du Colombier SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */ 86*6891d857SDavid du Colombier 87*6891d857SDavid du Colombierlab6: 88*6891d857SDavid du Colombier /* restoring division shift and subtract loop */ 89*6891d857SDavid du Colombier MOVW $-1, R10 90*6891d857SDavid du Colombier ADDC $0, R7 /* clear carry bit before loop starts */ 91*6891d857SDavid du Colombierlab7: 92*6891d857SDavid du Colombier /* tmp:dvd is considered one large register */ 93*6891d857SDavid du Colombier /* each portion is shifted left 1 bit by adding it to itself */ 94*6891d857SDavid du Colombier /* adde sums the carry from the previous and creates a new carry */ 95*6891d857SDavid du Colombier ADDE R4,R4 /* shift dvd.lsw left 1 bit */ 96*6891d857SDavid du Colombier ADDE R3,R3 /* shift dvd.msw to left 1 bit */ 97*6891d857SDavid du Colombier ADDE R8,R8 /* shift tmp.lsw to left 1 bit */ 98*6891d857SDavid du Colombier ADDE R7,R7 /* shift tmp.msw to left 1 bit */ 99*6891d857SDavid du Colombier SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */ 100*6891d857SDavid du Colombier SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */ 101*6891d857SDavid du Colombier BLT lab8 /* if(result < 0) clear carry bit */ 102*6891d857SDavid du Colombier MOVW R11, R8 /* move lsw */ 103*6891d857SDavid du Colombier MOVW R9, R7 /* move msw */ 104*6891d857SDavid du Colombier ADDC $1, R10, R11 /* set carry bit */ 105*6891d857SDavid du Colombierlab8: 106*6891d857SDavid du Colombier BDNZ lab7 107*6891d857SDavid du Colombier 108*6891d857SDavid du Colombier ADDE R4,R4 /* quo.lsw (lsb = CA) */ 109*6891d857SDavid du Colombier ADDE R3,R3 /* quo.msw (lsb from lsw) */ 110*6891d857SDavid du Colombier 111*6891d857SDavid du Colombierlab10: 112*6891d857SDavid du Colombier MOVW qp+16(FP), R9 113*6891d857SDavid du Colombier MOVW rp+20(FP), R10 114*6891d857SDavid du Colombier CMP R9, $0 115*6891d857SDavid du Colombier BEQ lab11 116*6891d857SDavid du Colombier MOVW R3, 0(R9) 117*6891d857SDavid du Colombier MOVW R4, 4(R9) 118*6891d857SDavid du Colombierlab11: 119*6891d857SDavid du Colombier CMP R10, $0 120*6891d857SDavid du Colombier BEQ lab12 121*6891d857SDavid du Colombier MOVW R7, 0(R10) 122*6891d857SDavid du Colombier MOVW R8, 4(R10) 123*6891d857SDavid du Colombierlab12: 1247dd7cddfSDavid du Colombier RETURN 125*6891d857SDavid du Colombier 126*6891d857SDavid du Colombierlab9: 127*6891d857SDavid du Colombier /* Quotient is 0 (dvs > dvd) */ 128*6891d857SDavid du Colombier MOVW R4, R8 /* rmd.lsw = dvd.lsw */ 129*6891d857SDavid du Colombier MOVW R3, R7 /* rmd.msw = dvd.msw */ 130*6891d857SDavid du Colombier MOVW $0, R4 /* dvd.lsw = 0 */ 131*6891d857SDavid du Colombier MOVW $0, R3 /* dvd.msw = 0 */ 132*6891d857SDavid du Colombier BR lab10 133