xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/lshift.asm (revision f0fde9902fd4d72ded2807793acc7bfaa1ebf243)
1dnl  AMD64 mpn_lshift -- mpn left shift.
2
3dnl  Copyright 2003, 2005, 2007, 2009, 2011, 2012, 2018 Free Software
4dnl  Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32include(`../config.m4')
33
34
35C	     cycles/limb   cycles/limb cnt=1
36C AMD K8,K9	 2.375		 1.375
37C AMD K10	 2.375		 1.375
38C Intel P4	 8		10.5
39C Intel core2	 2.11		 4.28
40C Intel corei	 ?		 ?
41C Intel atom	 5.75		 3.5
42C VIA nano	 3.5		 2.25
43
44
45C INPUT PARAMETERS
46define(`rp',	`%rdi')
47define(`up',	`%rsi')
48define(`n',	`%rdx')
49define(`cnt',	`%rcx')
50
51ABI_SUPPORT(DOS64)
52ABI_SUPPORT(STD64)
53
54ASM_START()
55	TEXT
56	ALIGN(32)
57PROLOGUE(mpn_lshift)
58	FUNC_ENTRY(4)
59	neg	R32(%rcx)		C put rsh count in cl
60	mov	-8(up,n,8), %rax
61	shr	R8(%rcx), %rax		C function return value
62
63	neg	R32(%rcx)		C put lsh count in cl
64	lea	1(n), R32(%r8)
65	and	$3, R32(%r8)
66	je	L(rlx)			C jump for n = 3, 7, 11, ...
67
68	dec	R32(%r8)
69	jne	L(1)
70C	n = 4, 8, 12, ...
71	mov	-8(up,n,8), %r10
72	shl	R8(%rcx), %r10
73	neg	R32(%rcx)		C put rsh count in cl
74	mov	-16(up,n,8), %r8
75	shr	R8(%rcx), %r8
76	or	%r8, %r10
77	mov	%r10, -8(rp,n,8)
78	dec	n
79	jmp	L(rll)
80
81L(1):	dec	R32(%r8)
82	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
83C	n = 2, 6, 10, 16, ...
84	mov	-8(up,n,8), %r10
85	shl	R8(%rcx), %r10
86	neg	R32(%rcx)		C put rsh count in cl
87	mov	-16(up,n,8), %r8
88	shr	R8(%rcx), %r8
89	or	%r8, %r10
90	mov	%r10, -8(rp,n,8)
91	dec	n
92	neg	R32(%rcx)		C put lsh count in cl
93L(1x):
94	cmp	$1, n
95	je	L(ast)
96	mov	-8(up,n,8), %r10
97	shl	R8(%rcx), %r10
98	mov	-16(up,n,8), %r11
99	shl	R8(%rcx), %r11
100	neg	R32(%rcx)		C put rsh count in cl
101	mov	-16(up,n,8), %r8
102	mov	-24(up,n,8), %r9
103	shr	R8(%rcx), %r8
104	or	%r8, %r10
105	shr	R8(%rcx), %r9
106	or	%r9, %r11
107	mov	%r10, -8(rp,n,8)
108	mov	%r11, -16(rp,n,8)
109	sub	$2, n
110
111L(rll):	neg	R32(%rcx)		C put lsh count in cl
112L(rlx):	mov	-8(up,n,8), %r10
113	shl	R8(%rcx), %r10
114	mov	-16(up,n,8), %r11
115	shl	R8(%rcx), %r11
116
117	sub	$4, n			C				      4
118	jb	L(end)			C				      2
119	ALIGN(16)
120L(top):
121	C finish stuff from lsh block
122	neg	R32(%rcx)		C put rsh count in cl
123	mov	16(up,n,8), %r8
124	mov	8(up,n,8), %r9
125	shr	R8(%rcx), %r8
126	or	%r8, %r10
127	shr	R8(%rcx), %r9
128	or	%r9, %r11
129	mov	%r10, 24(rp,n,8)
130	mov	%r11, 16(rp,n,8)
131	C start two new rsh
132	mov	0(up,n,8), %r8
133	mov	-8(up,n,8), %r9
134	shr	R8(%rcx), %r8
135	shr	R8(%rcx), %r9
136
137	C finish stuff from rsh block
138	neg	R32(%rcx)		C put lsh count in cl
139	mov	8(up,n,8), %r10
140	mov	0(up,n,8), %r11
141	shl	R8(%rcx), %r10
142	or	%r10, %r8
143	shl	R8(%rcx), %r11
144	or	%r11, %r9
145	mov	%r8, 8(rp,n,8)
146	mov	%r9, 0(rp,n,8)
147	C start two new lsh
148	mov	-8(up,n,8), %r10
149	mov	-16(up,n,8), %r11
150	shl	R8(%rcx), %r10
151	shl	R8(%rcx), %r11
152
153	sub	$4, n
154	jae	L(top)			C				      2
155L(end):
156	neg	R32(%rcx)		C put rsh count in cl
157	mov	8(up), %r8
158	shr	R8(%rcx), %r8
159	or	%r8, %r10
160	mov	(up), %r9
161	shr	R8(%rcx), %r9
162	or	%r9, %r11
163	mov	%r10, 16(rp)
164	mov	%r11, 8(rp)
165
166	neg	R32(%rcx)		C put lsh count in cl
167L(ast):	mov	(up), %r10
168	shl	R8(%rcx), %r10
169	mov	%r10, (rp)
170	FUNC_EXIT()
171	ret
172EPILOGUE()
173