1; mc88100 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2; store difference in a third limb vector. 3 4; Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc. 5 6; This file is part of the GNU MP Library. 7; 8; The GNU MP Library is free software; you can redistribute it and/or modify 9; it under the terms of either: 10; 11; * the GNU Lesser General Public License as published by the Free 12; Software Foundation; either version 3 of the License, or (at your 13; option) any later version. 14; 15; or 16; 17; * the GNU General Public License as published by the Free Software 18; Foundation; either version 2 of the License, or (at your option) any 19; later version. 20; 21; or both in parallel, as here. 22; 23; The GNU MP Library is distributed in the hope that it will be useful, but 24; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26; for more details. 27; 28; You should have received copies of the GNU General Public License and the 29; GNU Lesser General Public License along with the GNU MP Library. If not, 30; see https://www.gnu.org/licenses/. 31 32 33; INPUT PARAMETERS 34; res_ptr r2 35; s1_ptr r3 36; s2_ptr r4 37; size r5 38 39; This code has been optimized to run one instruction per clock, avoiding 40; load stalls and writeback contention. As a result, the instruction 41; order is not always natural. 42 43; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100, 44; but on the 88110, it seems to run much slower, 6.6 clocks/limb. 45 46 text 47 align 16 48 global ___gmpn_sub_n 49___gmpn_sub_n: 50 ld r6,r3,0 ; read first limb from s1_ptr 51 extu r10,r5,3 52 ld r7,r4,0 ; read first limb from s2_ptr 53 54 subu r5,r0,r5 55 mak r5,r5,3<4> 56 bcnd.n eq0,r5,Lzero 57 subu.co r0,r0,r0 ; initialize carry 58 59 or r12,r0,lo16(Lbase) 60 or.u r12,r12,hi16(Lbase) 61 addu r12,r12,r5 ; r12 is address for entering in loop 62 63 extu r5,r5,2 ; divide by 4 64 subu r2,r2,r5 ; adjust res_ptr 65 subu r3,r3,r5 ; adjust s1_ptr 66 subu r4,r4,r5 ; adjust s2_ptr 67 68 or r8,r6,r0 69 70 jmp.n r12 71 or r9,r7,r0 72 73Loop: addu r3,r3,32 74 st r8,r2,28 75 addu r4,r4,32 76 ld r6,r3,0 77 addu r2,r2,32 78 ld r7,r4,0 79Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt) 80Lbase: ld r8,r3,4 81 subu.cio r6,r6,r7 82 ld r9,r4,4 83 st r6,r2,0 84 ld r6,r3,8 ; subtract 7 + 8r limbs 85 subu.cio r8,r8,r9 86 ld r7,r4,8 87 st r8,r2,4 88 ld r8,r3,12 ; subtract 6 + 8r limbs 89 subu.cio r6,r6,r7 90 ld r9,r4,12 91 st r6,r2,8 92 ld r6,r3,16 ; subtract 5 + 8r limbs 93 subu.cio r8,r8,r9 94 ld r7,r4,16 95 st r8,r2,12 96 ld r8,r3,20 ; subtract 4 + 8r limbs 97 subu.cio r6,r6,r7 98 ld r9,r4,20 99 st r6,r2,16 100 ld r6,r3,24 ; subtract 3 + 8r limbs 101 subu.cio r8,r8,r9 102 ld r7,r4,24 103 st r8,r2,20 104 ld r8,r3,28 ; subtract 2 + 8r limbs 105 subu.cio r6,r6,r7 106 ld r9,r4,28 107 st r6,r2,24 108 bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs 109 subu.cio r8,r8,r9 110 111 st r8,r2,28 ; store most significant limb 112 113 addu.ci r2,r0,r0 ; return carry-out from most sign. limb 114 jmp.n r1 115 xor r2,r2,1 116