1dnl S/390-64 mpn_add_n and mpn_sub_n. 2 3dnl Copyright 2011 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C z900 5.5 24C z990 3 25C z9 ? 26C z10 ? 27C z196 ? 28 29C TODO 30C * Optimise for small n 31C * Use r0 and save/restore one less register 32C * Using logops_n's v1 inner loop operand order make the loop about 20% 33C faster, at the expense of highly alignment-dependent performance. 34 35C INPUT PARAMETERS 36define(`rp', `%r2') 37define(`up', `%r3') 38define(`vp', `%r4') 39define(`n', `%r5') 40 41ifdef(`OPERATION_add_n', ` 42 define(ADSB, alg) 43 define(ADSBCR, alcgr) 44 define(ADSBC, alcg) 45 define(RETVAL,`dnl 46 lghi %r2, 0 47 alcgr %r2, %r2') 48 define(func, mpn_add_n) 49 define(func_nc, mpn_add_nc)') 50ifdef(`OPERATION_sub_n', ` 51 define(ADSB, slg) 52 define(ADSBCR, slbgr) 53 define(ADSBC, slbg) 54 define(RETVAL,`dnl 55 slbgr %r2, %r2 56 lcgr %r2, %r2') 57 define(func, mpn_sub_n) 58 define(func_nc, mpn_sub_nc)') 59 60MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n) 61 62ASM_START() 63PROLOGUE(func) 64 stmg %r6, %r8, 48(%r15) 65 66 aghi n, 3 67 lghi %r7, 3 68 srlg %r1, n, 2 69 ngr %r7, n C n mod 4 70 je L(b1) 71 cghi %r7, 2 72 jl L(b2) 73 jne L(b0) 74 75L(b3): lmg %r5, %r7, 0(up) 76 la up, 24(up) 77 ADSB %r5, 0(vp) 78 ADSBC %r6, 8(vp) 79 ADSBC %r7, 16(vp) 80 la vp, 24(vp) 81 stmg %r5, %r7, 0(rp) 82 la rp, 24(rp) 83 brctg %r1, L(top) 84 j L(end) 85 86L(b0): lmg %r5, %r8, 0(up) C This redundant insns is no mistake, 87 la up, 32(up) C it is needed to make main loop run 88 ADSB %r5, 0(vp) C fast for n = 0 (mod 4). 89 ADSBC %r6, 8(vp) 90 j L(m0) 91 92L(b1): lg %r5, 0(up) 93 la up, 8(up) 94 ADSB %r5, 0(vp) 95 la vp, 8(vp) 96 stg %r5, 0(rp) 97 la rp, 8(rp) 98 brctg %r1, L(top) 99 j L(end) 100 101L(b2): lmg %r5, %r6, 0(up) 102 la up, 16(up) 103 ADSB %r5, 0(vp) 104 ADSBC %r6, 8(vp) 105 la vp, 16(vp) 106 stmg %r5, %r6, 0(rp) 107 la rp, 16(rp) 108 brctg %r1, L(top) 109 j L(end) 110 111L(top): lmg %r5, %r8, 0(up) 112 la up, 32(up) 113 ADSBC %r5, 0(vp) 114 ADSBC %r6, 8(vp) 115L(m0): ADSBC %r7, 16(vp) 116 ADSBC %r8, 24(vp) 117 la vp, 32(vp) 118 stmg %r5, %r8, 0(rp) 119 la rp, 32(rp) 120 brctg %r1, L(top) 121 122L(end): RETVAL 123 lmg %r6, %r8, 48(%r15) 124 br %r14 125EPILOGUE() 126