1dnl PowerPC-64 mpn_sqr_diagonal. 2 3dnl Copyright 2001-2003, 2005, 2006, 20010 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C POWER3/PPC630 18 35C POWER4/PPC970 ? 36C POWER5 7.25 37C POWER6 9.5 38 39C INPUT PARAMETERS 40define(`rp', r3) 41define(`up', r4) 42define(`n', r5) 43 44ASM_START() 45PROLOGUE(mpn_sqr_diagonal) 46ifdef(`HAVE_ABI_mode32', 47` rldicl n, n, 0, 32') C zero extend n 48 49 rldicl. r0, n, 0,62 C r0 = n & 3, set cr0 50 addi n, n, 3 C compute count... 51 cmpdi cr6, r0, 2 52 srdi n, n, 2 C ...for ctr 53 mtctr n C copy count into ctr 54 beq cr0, L(b00) 55 blt cr6, L(b01) 56 beq cr6, L(b10) 57 58L(b11): ld r0, 0(up) 59 ld r10, 8(up) 60 ld r12, 16(up) 61 addi rp, rp, -16 62 mulld r7, r0, r0 63 mulhdu r8, r0, r0 64 mulld r9, r10, r10 65 mulhdu r10, r10, r10 66 mulld r11, r12, r12 67 mulhdu r12, r12, r12 68 addi up, up, 24 69 b L(11) 70 71 ALIGN(16) 72L(b01): ld r0, 0(up) 73 addi rp, rp, -48 74 addi up, up, 8 75 mulld r11, r0, r0 76 mulhdu r12, r0, r0 77 b L(01) 78 79 ALIGN(16) 80L(b10): ld r0, 0(up) 81 ld r12, 8(up) 82 addi rp, rp, -32 83 addi up, up, 16 84 mulld r9, r0, r0 85 mulhdu r10, r0, r0 86 mulld r11, r12, r12 87 mulhdu r12, r12, r12 88 b L(10) 89 90 ALIGN(32) 91L(b00): 92L(top): ld r0, 0(up) 93 ld r8, 8(up) 94 ld r10, 16(up) 95 ld r12, 24(up) 96 mulld r5, r0, r0 97 mulhdu r6, r0, r0 98 mulld r7, r8, r8 99 mulhdu r8, r8, r8 100 mulld r9, r10, r10 101 mulhdu r10, r10, r10 102 mulld r11, r12, r12 103 mulhdu r12, r12, r12 104 addi up, up, 32 105 std r5, 0(rp) 106 std r6, 8(rp) 107L(11): std r7, 16(rp) 108 std r8, 24(rp) 109L(10): std r9, 32(rp) 110 std r10, 40(rp) 111L(01): std r11, 48(rp) 112 std r12, 56(rp) 113 addi rp, rp, 64 114 bdnz L(top) 115 116 blr 117EPILOGUE() 118