1 *37da2899SCharles.ForsythTEXT _mulv(SB), $0 2 *37da2899SCharles.Forsyth MOVW u1+8(FP), R8 3 *37da2899SCharles.Forsyth MOVW u2+16(FP), R13 4 *37da2899SCharles.Forsyth 5 *37da2899SCharles.Forsyth MOVW R13, R16 /* save low parts for later */ 6 *37da2899SCharles.Forsyth MOVW R8, R12 7 *37da2899SCharles.Forsyth 8 *37da2899SCharles.Forsyth /* 9 *37da2899SCharles.Forsyth * unsigned 32x32 => 64 multiply 10 *37da2899SCharles.Forsyth */ 11 *37da2899SCharles.Forsyth CMP R13, R8 12 *37da2899SCharles.Forsyth BLE mul1 13 *37da2899SCharles.Forsyth MOVW R12, R13 14 *37da2899SCharles.Forsyth MOVW R16, R8 15 *37da2899SCharles.Forsythmul1: 16 *37da2899SCharles.Forsyth MOVW R13, Y 17 *37da2899SCharles.Forsyth ANDNCC $0xFFF, R13, R0 18 *37da2899SCharles.Forsyth BE mul_shortway 19 *37da2899SCharles.Forsyth ANDCC R0, R0, R9 /* zero partial product and clear N and V cond's */ 20 *37da2899SCharles.Forsyth 21 *37da2899SCharles.Forsyth /* long multiply */ 22 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 0 */ 23 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 1 */ 24 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 2 */ 25 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 3 */ 26 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 4 */ 27 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 5 */ 28 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 6 */ 29 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 7 */ 30 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 8 */ 31 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 9 */ 32 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 10 */ 33 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 11 */ 34 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 12 */ 35 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 13 */ 36 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 14 */ 37 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 15 */ 38 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 16 */ 39 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 17 */ 40 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 18 */ 41 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 19 */ 42 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 20 */ 43 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 21 */ 44 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 22 */ 45 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 23 */ 46 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 24 */ 47 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 25 */ 48 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 26 */ 49 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 27 */ 50 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 28 */ 51 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 29 */ 52 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 30 */ 53 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 31 */ 54 *37da2899SCharles.Forsyth MULSCC R0, R9, R9 /* 32; shift only; r9 is high part */ 55 *37da2899SCharles.Forsyth 56 *37da2899SCharles.Forsyth /* 57 *37da2899SCharles.Forsyth * need to correct top word if top bit set 58 *37da2899SCharles.Forsyth */ 59 *37da2899SCharles.Forsyth CMP R8, R0 60 *37da2899SCharles.Forsyth BGE mul_tstlow 61 *37da2899SCharles.Forsyth ADD R13, R9 /* adjust the high parts */ 62 *37da2899SCharles.Forsyth 63 *37da2899SCharles.Forsythmul_tstlow: 64 *37da2899SCharles.Forsyth MOVW Y, R13 /* get low part */ 65 *37da2899SCharles.Forsyth BA mul_done 66 *37da2899SCharles.Forsyth 67 *37da2899SCharles.Forsythmul_shortway: 68 *37da2899SCharles.Forsyth ANDCC R0, R0, R9 /* zero partial product and clear N and V cond's */ 69 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 0 */ 70 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 1 */ 71 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 2 */ 72 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 3 */ 73 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 4 */ 74 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 5 */ 75 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 6 */ 76 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 7 */ 77 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 8 */ 78 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 9 */ 79 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 10 */ 80 *37da2899SCharles.Forsyth MULSCC R8, R9, R9 /* 11 */ 81 *37da2899SCharles.Forsyth MULSCC R0, R9, R9 /* 12; shift only; r9 is high part */ 82 *37da2899SCharles.Forsyth 83 *37da2899SCharles.Forsyth MOVW Y, R8 /* make low part of partial low part & high part */ 84 *37da2899SCharles.Forsyth SLL $12, R9, R13 85 *37da2899SCharles.Forsyth SRL $20, R8 86 *37da2899SCharles.Forsyth OR R8, R13 87 *37da2899SCharles.Forsyth 88 *37da2899SCharles.Forsyth SRA $20, R9 /* high part */ 89 *37da2899SCharles.Forsyth 90 *37da2899SCharles.Forsythmul_done: 91 *37da2899SCharles.Forsyth 92 *37da2899SCharles.Forsyth /* 93 *37da2899SCharles.Forsyth * mul by high halves if needed 94 *37da2899SCharles.Forsyth */ 95 *37da2899SCharles.Forsyth MOVW R13, 4(R7) 96 *37da2899SCharles.Forsyth MOVW u2+12(FP), R11 97 *37da2899SCharles.Forsyth CMP R11, R0 98 *37da2899SCharles.Forsyth BE nomul1 99 *37da2899SCharles.Forsyth MUL R11, R12 100 *37da2899SCharles.Forsyth ADD R12, R9 101 *37da2899SCharles.Forsyth 102 *37da2899SCharles.Forsythnomul1: 103 *37da2899SCharles.Forsyth MOVW u1+4(FP), R11 104 *37da2899SCharles.Forsyth CMP R11, R0 105 *37da2899SCharles.Forsyth BE nomul2 106 *37da2899SCharles.Forsyth MUL R11, R16 107 *37da2899SCharles.Forsyth ADD R16, R9 108 *37da2899SCharles.Forsyth 109 *37da2899SCharles.Forsythnomul2: 110 *37da2899SCharles.Forsyth 111 *37da2899SCharles.Forsyth MOVW R9, 0(R7) 112 *37da2899SCharles.Forsyth RETURN 113