1dnl Intel Atom mpn_bdiv_dbm1. 2 3dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 4dnl 5dnl Copyright 2011 Free Software Foundation, Inc. 6dnl 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or 10dnl modify it under the terms of the GNU Lesser General Public License as 11dnl published by the Free Software Foundation; either version 3 of the 12dnl License, or (at your option) any later version. 13dnl 14dnl The GNU MP Library is distributed in the hope that it will be useful, 15dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 16dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17dnl Lesser General Public License for more details. 18dnl 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C cycles/limb 25C cycles/limb 26C P5 - 27C P6 model 0-8,10-12 - 28C P6 model 9 (Banias) 9.75 29C P6 model 13 (Dothan) 30C P4 model 0 (Willamette) 31C P4 model 1 (?) 32C P4 model 2 (Northwood) 8.25 33C P4 model 3 (Prescott) 34C P4 model 4 (Nocona) 35C Intel Atom 8 36C AMD K6 - 37C AMD K7 - 38C AMD K8 39C AMD K10 40 41C TODO: This code was optimised for atom-32, consider moving it back to atom 42C dir(atom currently grabs this code), and write a 4-way version(7c/l). 43 44defframe(PARAM_CARRY,20) 45defframe(PARAM_MUL, 16) 46defframe(PARAM_SIZE, 12) 47defframe(PARAM_SRC, 8) 48defframe(PARAM_DST, 4) 49 50dnl re-use parameter space 51define(SAVE_RP,`PARAM_MUL') 52define(SAVE_UP,`PARAM_SIZE') 53 54define(`rp', `%edi') 55define(`up', `%esi') 56define(`n', `%ecx') 57define(`reg', `%edx') 58define(`cy', `%eax') C contains the return value 59 60ASM_START() 61 TEXT 62 ALIGN(16) 63deflit(`FRAME',0) 64 65PROLOGUE(mpn_bdiv_dbm1c) 66 mov PARAM_SIZE, n C size 67 mov up, SAVE_UP 68 mov PARAM_SRC, up 69 movd PARAM_MUL, %mm7 70 mov rp, SAVE_RP 71 mov PARAM_DST, rp 72 73 movd (up), %mm0 74 pmuludq %mm7, %mm0 75 shr n 76 mov PARAM_CARRY, cy 77 jz L(eq1) 78 79 movd 4(up), %mm1 80 jc L(odd) 81 82 lea 4(up), up 83 pmuludq %mm7, %mm1 84 movd %mm0, reg 85 psrlq $32, %mm0 86 sub reg, cy 87 movd %mm0, reg 88 movq %mm1, %mm0 89 dec n 90 mov cy, (rp) 91 lea 4(rp), rp 92 jz L(end) 93 94C ALIGN(16) 95L(top): movd 4(up), %mm1 96 sbb reg, cy 97L(odd): movd %mm0, reg 98 psrlq $32, %mm0 99 pmuludq %mm7, %mm1 100 sub reg, cy 101 lea 8(up), up 102 movd %mm0, reg 103 movd (up), %mm0 104 mov cy, (rp) 105 sbb reg, cy 106 movd %mm1, reg 107 psrlq $32, %mm1 108 sub reg, cy 109 movd %mm1, reg 110 pmuludq %mm7, %mm0 111 dec n 112 mov cy, 4(rp) 113 lea 8(rp), rp 114 jnz L(top) 115 116L(end): sbb reg, cy 117 118L(eq1): movd %mm0, reg 119 psrlq $32, %mm0 120 mov SAVE_UP, up 121 sub reg, cy 122 movd %mm0, reg 123 emms 124 mov cy, (rp) 125 sbb reg, cy 126 127 mov SAVE_RP, rp 128 ret 129EPILOGUE() 130ASM_END() 131