xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/atom/rshift.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  Intel Atom mpn_rshift -- mpn right shift.
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  Converted from AMD64 by Marco Bodrato.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
36C			unsigned cnt);
37
38C				cycles/limb
39C P5
40C P6 model 0-8,10-12
41C P6 model 9  (Banias)
42C P6 model 13 (Dothan)
43C P4 model 0  (Willamette)
44C P4 model 1  (?)
45C P4 model 2  (Northwood)
46C P4 model 3  (Prescott)
47C P4 model 4  (Nocona)
48C Intel Atom			 5
49C AMD K6
50C AMD K7
51C AMD K8
52C AMD K10
53
54defframe(PARAM_CNT, 16)
55defframe(PARAM_SIZE,12)
56defframe(PARAM_SRC,  8)
57defframe(PARAM_DST,  4)
58
59dnl  re-use parameter space
60define(SAVE_UP,`PARAM_CNT')
61define(VAR_COUNT,`PARAM_SIZE')
62define(SAVE_EBX,`PARAM_SRC')
63define(SAVE_EBP,`PARAM_DST')
64
65define(`rp',  `%edi')
66define(`up',  `%esi')
67define(`cnt',  `%ecx')
68
69ASM_START()
70	TEXT
71	ALIGN(8)
72deflit(`FRAME',0)
73PROLOGUE(mpn_rshift)
74	mov	PARAM_CNT, cnt
75	mov	PARAM_SIZE, %edx
76	mov	up, SAVE_UP
77	mov	PARAM_SRC, up
78	push	rp			FRAME_pushl()
79	mov	PARAM_DST, rp
80	mov	%ebx, SAVE_EBX
81
82	shr	%edx
83	mov	(up), %eax
84	mov	%edx, VAR_COUNT
85	jnc	L(evn)
86
87	mov	%eax, %ebx
88	shr	%cl, %ebx
89	neg	cnt
90	shl	%cl, %eax
91	test	%edx, %edx
92	jnz	L(gt1)
93	mov	%ebx, (rp)
94	jmp	L(quit)
95
96L(gt1):	mov	%ebp, SAVE_EBP
97	push	%eax
98	mov	4(up), %eax
99	mov	%eax, %ebp
100	shl	%cl, %eax
101	jmp	L(lo1)
102
103L(evn):	mov	%ebp, SAVE_EBP
104	neg	cnt
105	mov	%eax, %ebp
106	mov	4(up), %edx
107	shl	%cl, %eax
108	mov	%edx, %ebx
109	shl	%cl, %edx
110	neg	cnt
111	decl	VAR_COUNT
112	lea	-4(rp), rp
113	lea	4(up), up
114	jz	L(end)
115	push	%eax			FRAME_pushl()
116
117	ALIGN(8)
118L(top):	shr	%cl, %ebp
119	or	%ebp, %edx
120	shr	%cl, %ebx
121	neg	cnt
122	mov	4(up), %eax
123	mov	%eax, %ebp
124	mov	%edx, 4(rp)
125	shl	%cl, %eax
126	lea	8(rp), rp
127L(lo1):	mov	8(up), %edx
128	or	%ebx, %eax
129	mov	%edx, %ebx
130	shl	%cl, %edx
131	lea	8(up), up
132	neg	cnt
133	mov	%eax, (rp)
134	decl	VAR_COUNT
135	jg	L(top)
136
137	pop	%eax			FRAME_popl()
138L(end):
139	shr	%cl, %ebp
140	shr	%cl, %ebx
141	or	%ebp, %edx
142	mov	SAVE_EBP, %ebp
143	mov	%edx, 4(rp)
144	mov	%ebx, 8(rp)
145
146L(quit):
147	mov	SAVE_UP, up
148	mov	SAVE_EBX, %ebx
149	pop	rp			FRAME_popl()
150	ret
151EPILOGUE()
152ASM_END()
153