xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/atom/rshift.asm (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1dnl  Intel Atom mpn_rshift -- mpn right shift.
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  Converted from AMD64 by Marco Bodrato.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
25C			unsigned cnt);
26
27C				cycles/limb
28C P5
29C P6 model 0-8,10-12
30C P6 model 9  (Banias)
31C P6 model 13 (Dothan)
32C P4 model 0  (Willamette)
33C P4 model 1  (?)
34C P4 model 2  (Northwood)
35C P4 model 3  (Prescott)
36C P4 model 4  (Nocona)
37C Intel Atom			 5
38C AMD K6
39C AMD K7
40C AMD K8
41C AMD K10
42
43defframe(PARAM_CNT, 16)
44defframe(PARAM_SIZE,12)
45defframe(PARAM_SRC,  8)
46defframe(PARAM_DST,  4)
47
48dnl  re-use parameter space
49define(SAVE_UP,`PARAM_CNT')
50define(VAR_COUNT,`PARAM_SIZE')
51define(SAVE_EBX,`PARAM_SRC')
52define(SAVE_EBP,`PARAM_DST')
53
54define(`rp',  `%edi')
55define(`up',  `%esi')
56define(`cnt',  `%ecx')
57
58ASM_START()
59	TEXT
60	ALIGN(8)
61deflit(`FRAME',0)
62PROLOGUE(mpn_rshift)
63	mov	PARAM_CNT, cnt
64	mov	PARAM_SIZE, %edx
65	mov	up, SAVE_UP
66	mov	PARAM_SRC, up
67	push	rp			FRAME_pushl()
68	mov	PARAM_DST, rp
69	mov	%ebx, SAVE_EBX
70
71	shr	%edx
72	mov	(up), %eax
73	mov	%edx, VAR_COUNT
74	jnc	L(evn)
75
76	mov	%eax, %ebx
77	shr	%cl, %ebx
78	neg	cnt
79	shl	%cl, %eax
80	test	%edx, %edx
81	jnz	L(gt1)
82	mov	%ebx, (rp)
83	jmp	L(quit)
84
85L(gt1):	mov	%ebp, SAVE_EBP
86	push	%eax
87	mov	4(up), %eax
88	mov	%eax, %ebp
89	shl	%cl, %eax
90	jmp	L(lo1)
91
92L(evn):	mov	%ebp, SAVE_EBP
93	neg	cnt
94	mov	%eax, %ebp
95	mov	4(up), %edx
96	shl	%cl, %eax
97	mov	%edx, %ebx
98	shl	%cl, %edx
99	neg	cnt
100	decl	VAR_COUNT
101	lea	-4(rp), rp
102	lea	4(up), up
103	jz	L(end)
104	push	%eax			FRAME_pushl()
105
106	ALIGN(8)
107L(top):	shr	%cl, %ebp
108	or	%ebp, %edx
109	shr	%cl, %ebx
110	neg	cnt
111	mov	4(up), %eax
112	mov	%eax, %ebp
113	mov	%edx, 4(rp)
114	shl	%cl, %eax
115	lea	8(rp), rp
116L(lo1):	mov	8(up), %edx
117	or	%ebx, %eax
118	mov	%edx, %ebx
119	shl	%cl, %edx
120	lea	8(up), up
121	neg	cnt
122	mov	%eax, (rp)
123	decl	VAR_COUNT
124	jg	L(top)
125
126	pop	%eax			FRAME_popl()
127L(end):
128	shr	%cl, %ebp
129	shr	%cl, %ebx
130	or	%ebp, %edx
131	mov	SAVE_EBP, %ebp
132	mov	%edx, 4(rp)
133	mov	%ebx, 8(rp)
134
135L(quit):
136	mov	SAVE_UP, up
137	mov	SAVE_EBX, %ebx
138	pop	rp			FRAME_popl()
139	ret
140EPILOGUE()
141ASM_END()
142