1dnl PowerPC 750 mpn_rshift -- mpn right shift. 2 3dnl Copyright 2002, 2003 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/limb 35C 750: 3.0 36C 7400: 3.0 37 38 39C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, 40C unsigned shift); 41C 42C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but 43C smaller and saving about 30 or so cycles of overhead. 44 45ASM_START() 46PROLOGUE(mpn_rshift) 47 48 C r3 dst 49 C r4 src 50 C r5 size 51 C r6 shift 52 53 mtctr r5 C size 54 lwz r8, 0(r4) C src[0] 55 56 subfic r7, r6, 32 C 32-shift 57 addi r5, r3, -4 C dst-4 58 59 slw r3, r8, r7 C return value 60 bdz L(one) 61 62 lwzu r9, 4(r4) C src[1] 63 srw r8, r8, r6 C src[0] >> shift 64 bdz L(two) 65 66 67L(top): 68 C r3 return value 69 C r4 src, incrementing 70 C r5 dst, incrementing 71 C r6 shift 72 C r7 32-shift 73 C r8 src[i-1] >> shift 74 C r9 src[i] 75 C r10 76 77 lwzu r10, 4(r4) 78 slw r11, r9, r7 79 80 or r8, r8, r11 81 stwu r8, 4(r5) 82 83 srw r8, r9, r6 84 bdz L(odd) 85 86 C r8 src[i-1] >> shift 87 C r9 88 C r10 src[i] 89 90 lwzu r9, 4(r4) 91 slw r11, r10, r7 92 93 or r8, r8, r11 94 stwu r8, 4(r5) 95 96 srw r8, r10, r6 97 bdnz L(top) 98 99 100L(two): 101 C r3 return value 102 C r4 103 C r5 &dst[size-2] 104 C r6 shift 105 C r7 32-shift 106 C r8 src[size-2] >> shift 107 C r9 src[size-1] 108 C r10 109 110 slw r11, r9, r7 111 srw r12, r9, r6 C src[size-1] >> shift 112 113 or r8, r8, r11 114 stw r12, 8(r5) C dst[size-1] 115 116 stw r8, 4(r5) C dst[size-2] 117 blr 118 119 120L(odd): 121 C r3 return value 122 C r4 123 C r5 &dst[size-2] 124 C r6 shift 125 C r7 32-shift 126 C r8 src[size-2] >> shift 127 C r9 128 C r10 src[size-1] 129 130 slw r11, r10, r7 131 srw r12, r10, r6 132 133 or r8, r8, r11 134 stw r12, 8(r5) C dst[size-1] 135 136 stw r8, 4(r5) C dst[size-2] 137 blr 138 139 140L(one): 141 C r3 return value 142 C r4 143 C r5 dst-4 144 C r6 shift 145 C r7 146 C r8 src[0] 147 148 srw r8, r8, r6 149 150 stw r8, 4(r5) C dst[0] 151 blr 152 153EPILOGUE(mpn_rshift) 154