1#define BDNZ BC 16,0, 2 3/* 4 * 64/64 division adapted from powerpc compiler writer's handbook 5 * 6 * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b) 7 * quo dvd dvs 8 * 9 * Remainder is left in R7:R8 10 * 11 * Code comment notation: 12 * msw = most-significant (high-order) word, i.e. bits 0..31 13 * lsw = least-significant (low-order) word, i.e. bits 32..63 14 * LZ = Leading Zeroes 15 * SD = Significant Digits 16 * 17 * R3:R4 = dvd (input dividend); quo (output quotient) 18 * R5:R6 = dvs (input divisor) 19 * 20 * R7:R8 = tmp; rem (output remainder) 21 */ 22 23TEXT _divu64(SB), $0 24 MOVW a+0(FP), R3 25 MOVW a+4(FP), R4 26 MOVW b+8(FP), R5 27 MOVW b+12(FP), R6 28 29 /* count the number of leading 0s in the dividend */ 30 CMP R3, $0 /* dvd.msw == 0? R3, */ 31 CNTLZW R3, R11 /* R11 = dvd.msw.LZ */ 32 CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */ 33 BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */ 34 ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */ 35 36lab1: 37 /* count the number of leading 0s in the divisor */ 38 CMP R5, $0 /* dvd.msw == 0? */ 39 CNTLZW R5, R9 /* R9 = dvs.msw.LZ */ 40 CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */ 41 BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */ 42 ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */ 43 44lab2: 45 /* determine shift amounts to minimize the number of iterations */ 46 CMP R11, R9 /* compare dvd.LZ to dvs.LZ */ 47 SUBC R11, $64, R10 /* R10 = dvd.SD */ 48 BGT lab9 /* if(dvs > dvd) quotient = 0 */ 49 ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */ 50 SUBC R9, $64, R9 /* R9 = dvs.SD */ 51 ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */ 52 /* initial dvd */ 53 SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */ 54 /* initial tmp */ 55 MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */ 56 57 /* R7:R8 = R3:R4 >> R9 */ 58 CMP R9, $32 59 ADD $-32, R9, R7 60 BLT lab3 /* if(R9 < 32) jump to lab3 */ 61 SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */ 62 MOVW $0, R7 /* tmp.msw = 0 */ 63 BR lab4 64lab3: 65 SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */ 66 SUBC R9, $32, R7 67 SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */ 68 OR R7, R8 /* tmp.lsw = R8 | R7 */ 69 SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */ 70 71lab4: 72 /* R3:R4 = R3:R4 << R11 */ 73 CMP R11,$32 74 ADDC $-32, R11, R9 75 BLT lab5 /* (R11 < 32)? */ 76 SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */ 77 MOVW $0, R4 /* dvd.lsw = 0 */ 78 BR lab6 79 80lab5: 81 SLW R11, R3 /* R3 = dvd.msw << R11 */ 82 SUBC R11, $32, R9 83 SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */ 84 OR R9, R3 /* dvd.msw = R3 | R9 */ 85 SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */ 86 87lab6: 88 /* restoring division shift and subtract loop */ 89 MOVW $-1, R10 90 ADDC $0, R7 /* clear carry bit before loop starts */ 91lab7: 92 /* tmp:dvd is considered one large register */ 93 /* each portion is shifted left 1 bit by adding it to itself */ 94 /* adde sums the carry from the previous and creates a new carry */ 95 ADDE R4,R4 /* shift dvd.lsw left 1 bit */ 96 ADDE R3,R3 /* shift dvd.msw to left 1 bit */ 97 ADDE R8,R8 /* shift tmp.lsw to left 1 bit */ 98 ADDE R7,R7 /* shift tmp.msw to left 1 bit */ 99 SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */ 100 SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */ 101 BLT lab8 /* if(result < 0) clear carry bit */ 102 MOVW R11, R8 /* move lsw */ 103 MOVW R9, R7 /* move msw */ 104 ADDC $1, R10, R11 /* set carry bit */ 105lab8: 106 BDNZ lab7 107 108 ADDE R4,R4 /* quo.lsw (lsb = CA) */ 109 ADDE R3,R3 /* quo.msw (lsb from lsw) */ 110 111lab10: 112 MOVW qp+16(FP), R9 113 MOVW rp+20(FP), R10 114 CMP R9, $0 115 BEQ lab11 116 MOVW R3, 0(R9) 117 MOVW R4, 4(R9) 118lab11: 119 CMP R10, $0 120 BEQ lab12 121 MOVW R7, 0(R10) 122 MOVW R8, 4(R10) 123lab12: 124 RETURN 125 126lab9: 127 /* Quotient is 0 (dvs > dvd) */ 128 MOVW R4, R8 /* rmd.lsw = dvd.lsw */ 129 MOVW R3, R7 /* rmd.msw = dvd.msw */ 130 MOVW $0, R4 /* dvd.lsw = 0 */ 131 MOVW $0, R3 /* dvd.msw = 0 */ 132 BR lab10 133