1dnl PowerPC 750 mpn_lshift -- mpn left shift. 2 3dnl Copyright 2002, 2003 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/limb 35C 750: 3.0 36C 7400: 3.0 37 38 39C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, 40C unsigned shift); 41C 42C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but 43C smaller and saving about 30 or so cycles of overhead. 44 45ASM_START() 46PROLOGUE(mpn_lshift) 47 48 C r3 dst 49 C r4 src 50 C r5 size 51 C r6 shift 52 53 mtctr r5 C size 54 slwi r5, r5, 2 C 4*size 55 56 subfic r7, r6, 32 C 32-shift 57 add r4, r4, r5 C &src[size] 58 59 add r5, r3, r5 C &dst[size] 60 lwz r8, -4(r4) C src[size-1] 61 bdz L(one) 62 63 lwzu r9, -8(r4) C src[size-2] 64 65 srw r3, r8, r7 C return value 66 slw r8, r8, r6 C src[size-1] << shift 67 bdz L(two) 68 69 70L(top): 71 C r3 return value 72 C r4 src, incrementing 73 C r5 dst, incrementing 74 C r6 lshift 75 C r7 32-shift 76 C r8 src[i+1] << shift 77 C r9 src[i] 78 C r10 79 80 lwzu r10, -4(r4) 81 srw r11, r9, r7 82 83 or r8, r8, r11 84 stwu r8, -4(r5) 85 86 slw r8, r9, r6 87 bdz L(odd) 88 89 C r8 src[i+1] << shift 90 C r9 91 C r10 src[i] 92 93 lwzu r9, -4(r4) 94 srw r11, r10, r7 95 96 or r8, r8, r11 97 stwu r8, -4(r5) 98 99 slw r8, r10, r6 100 bdnz L(top) 101 102 103L(two): 104 C r3 return value 105 C r4 106 C r5 &dst[2] 107 C r6 shift 108 C r7 32-shift 109 C r8 src[1] << shift 110 C r9 src[0] 111 C r10 112 113 srw r11, r9, r7 114 slw r12, r9, r6 C src[0] << shift 115 116 or r8, r8, r11 117 stw r12, -8(r5) C dst[0] 118 119 stw r8, -4(r5) C dst[1] 120 blr 121 122 123L(odd): 124 C r3 return value 125 C r4 126 C r5 &dst[2] 127 C r6 shift 128 C r7 32-shift 129 C r8 src[1] << shift 130 C r9 131 C r10 src[0] 132 133 srw r11, r10, r7 134 slw r12, r10, r6 135 136 or r8, r8, r11 137 stw r12, -8(r5) C dst[0] 138 139 stw r8, -4(r5) C dst[1] 140 blr 141 142 143L(one): 144 C r5 &dst[1] 145 C r6 shift 146 C r7 32-shift 147 C r8 src[0] 148 149 srw r3, r8, r7 C return value 150 slw r8, r8, r6 C src[size-1] << shift 151 152 stw r8, -4(r5) C dst[0] 153 blr 154 155EPILOGUE(mpn_lshift) 156