xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/atom/lshift.asm (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1dnl  Intel Atom mpn_lshift -- mpn left shift.
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
25C			unsigned cnt);
26
27C				  cycles/limb
28C				cnt!=1	cnt==1
29C P5
30C P6 model 0-8,10-12
31C P6 model 9  (Banias)
32C P6 model 13 (Dothan)
33C P4 model 0  (Willamette)
34C P4 model 1  (?)
35C P4 model 2  (Northwood)
36C P4 model 3  (Prescott)
37C P4 model 4  (Nocona)
38C Intel Atom			 5	 2.5
39C AMD K6
40C AMD K7
41C AMD K8
42C AMD K10
43
44defframe(PARAM_CNT, 16)
45defframe(PARAM_SIZE,12)
46defframe(PARAM_SRC,  8)
47defframe(PARAM_DST,  4)
48
49dnl  re-use parameter space
50define(SAVE_UP,`PARAM_CNT')
51define(VAR_COUNT,`PARAM_SIZE')
52define(SAVE_EBX,`PARAM_SRC')
53define(SAVE_EBP,`PARAM_DST')
54
55define(`rp',  `%edi')
56define(`up',  `%esi')
57define(`cnt',  `%ecx')
58
59ASM_START()
60	TEXT
61	ALIGN(8)
62deflit(`FRAME',0)
63PROLOGUE(mpn_lshift)
64	mov	PARAM_CNT, cnt
65	mov	PARAM_SIZE, %edx
66	mov	up, SAVE_UP
67	mov	PARAM_SRC, up
68	push	rp			FRAME_pushl()
69	mov	PARAM_DST, rp
70
71C We can use faster code for shift-by-1 under certain conditions.
72	cmp	$1,cnt
73	jne	L(normal)
74	cmpl	rp, up
75	jnc	L(special)		C jump if s_ptr + 1 >= res_ptr
76	leal	(up,%edx,4),%eax
77	cmpl	%eax,rp
78	jnc	L(special)		C jump if res_ptr >= s_ptr + size
79
80L(normal):
81	lea	-4(up,%edx,4), up
82	mov	%ebx, SAVE_EBX
83	lea	-4(rp,%edx,4), rp
84
85	shr	%edx
86	mov	(up), %eax
87	mov	%edx, VAR_COUNT
88	jnc	L(evn)
89
90	mov	%eax, %ebx
91	shl	%cl, %ebx
92	neg	cnt
93	shr	%cl, %eax
94	test	%edx, %edx
95	jnz	L(gt1)
96	mov	%ebx, (rp)
97	jmp	L(quit)
98
99L(gt1):	mov	%ebp, SAVE_EBP
100	push	%eax
101	mov	-4(up), %eax
102	mov	%eax, %ebp
103	shr	%cl, %eax
104	jmp	L(lo1)
105
106L(evn):	mov	%ebp, SAVE_EBP
107	neg	cnt
108	mov	%eax, %ebp
109	mov	-4(up), %edx
110	shr	%cl, %eax
111	mov	%edx, %ebx
112	shr	%cl, %edx
113	neg	cnt
114	decl	VAR_COUNT
115	lea	4(rp), rp
116	lea	-4(up), up
117	jz	L(end)
118	push	%eax			FRAME_pushl()
119
120	ALIGN(8)
121L(top):	shl	%cl, %ebp
122	or	%ebp, %edx
123	shl	%cl, %ebx
124	neg	cnt
125	mov	-4(up), %eax
126	mov	%eax, %ebp
127	mov	%edx, -4(rp)
128	shr	%cl, %eax
129	lea	-8(rp), rp
130L(lo1):	mov	-8(up), %edx
131	or	%ebx, %eax
132	mov	%edx, %ebx
133	shr	%cl, %edx
134	lea	-8(up), up
135	neg	cnt
136	mov	%eax, (rp)
137	decl	VAR_COUNT
138	jg	L(top)
139
140	pop	%eax			FRAME_popl()
141L(end):
142	shl	%cl, %ebp
143	shl	%cl, %ebx
144	or	%ebp, %edx
145	mov	SAVE_EBP, %ebp
146	mov	%edx, -4(rp)
147	mov	%ebx, -8(rp)
148
149L(quit):
150	mov	SAVE_UP, up
151	mov	SAVE_EBX, %ebx
152	pop	rp			FRAME_popl()
153	ret
154
155L(special):
156deflit(`FRAME',4)
157	lea	3(%edx), %eax		C size + 3
158	dec	%edx			C size - 1
159	mov	(up), %ecx
160	shr	$2, %eax		C (size + 3) / 4
161	and	$3, %edx		C (size - 1) % 4
162	jz	L(goloop)		C jmp if  size == 1 (mod 4)
163	shr	%edx
164	jnc	L(odd)			C jum if  size == 3 (mod 4)
165
166	add	%ecx, %ecx
167	lea	4(up), up
168	mov	%ecx, (rp)
169	mov	(up), %ecx
170	lea	4(rp), rp
171
172	dec	%edx
173	jnz	L(goloop)		C jump if  size == 0 (mod 4)
174L(odd):	lea	-8(up), up
175	lea	-8(rp), rp
176	jmp	L(sentry)		C reached if size == 2 or 3 (mod 4)
177
178L(sloop):
179	adc	%ecx, %ecx
180	mov	4(up), %edx
181	mov	%ecx, (rp)
182	adc	%edx, %edx
183	mov	8(up), %ecx
184	mov	%edx, 4(rp)
185L(sentry):
186	adc	%ecx, %ecx
187	mov	12(up), %edx
188	mov	%ecx, 8(rp)
189	adc	%edx, %edx
190	lea	16(up), up
191	mov	%edx, 12(rp)
192	lea	16(rp), rp
193	mov	(up), %ecx
194L(goloop):
195	decl	%eax
196	jnz	L(sloop)
197
198L(squit):
199	adc	%ecx, %ecx
200	mov	%ecx, (rp)
201	adc	%eax, %eax
202
203	mov	SAVE_UP, up
204	pop	rp			FRAME_popl()
205	ret
206EPILOGUE()
207ASM_END()
208