xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/lshiftc.asm (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1dnl  AMD64 mpn_lshiftc -- mpn left shift with complement.
2
3dnl  Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C	     cycles/limb
35C AMD K8,K9	 2.75
36C AMD K10	 2.75
37C Intel P4	 ?
38C Intel core2	 ?
39C Intel corei	 ?
40C Intel atom	 ?
41C VIA nano	 3.75
42
43
44C INPUT PARAMETERS
45define(`rp',	`%rdi')
46define(`up',	`%rsi')
47define(`n',	`%rdx')
48define(`cnt',	`%rcx')
49
50ABI_SUPPORT(DOS64)
51ABI_SUPPORT(STD64)
52
53ASM_START()
54	TEXT
55	ALIGN(32)
56PROLOGUE(mpn_lshiftc)
57	FUNC_ENTRY(4)
58	neg	R32(%rcx)		C put rsh count in cl
59	mov	-8(up,n,8), %rax
60	shr	R8(%rcx), %rax		C function return value
61
62	neg	R32(%rcx)		C put lsh count in cl
63	lea	1(n), R32(%r8)
64	and	$3, R32(%r8)
65	je	L(rlx)			C jump for n = 3, 7, 11, ...
66
67	dec	R32(%r8)
68	jne	L(1)
69C	n = 4, 8, 12, ...
70	mov	-8(up,n,8), %r10
71	shl	R8(%rcx), %r10
72	neg	R32(%rcx)		C put rsh count in cl
73	mov	-16(up,n,8), %r8
74	shr	R8(%rcx), %r8
75	or	%r8, %r10
76	not	%r10
77	mov	%r10, -8(rp,n,8)
78	dec	n
79	jmp	L(rll)
80
81L(1):	dec	R32(%r8)
82	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
83C	n = 2, 6, 10, 16, ...
84	mov	-8(up,n,8), %r10
85	shl	R8(%rcx), %r10
86	neg	R32(%rcx)		C put rsh count in cl
87	mov	-16(up,n,8), %r8
88	shr	R8(%rcx), %r8
89	or	%r8, %r10
90	not	%r10
91	mov	%r10, -8(rp,n,8)
92	dec	n
93	neg	R32(%rcx)		C put lsh count in cl
94L(1x):
95	cmp	$1, n
96	je	L(ast)
97	mov	-8(up,n,8), %r10
98	shl	R8(%rcx), %r10
99	mov	-16(up,n,8), %r11
100	shl	R8(%rcx), %r11
101	neg	R32(%rcx)		C put rsh count in cl
102	mov	-16(up,n,8), %r8
103	mov	-24(up,n,8), %r9
104	shr	R8(%rcx), %r8
105	or	%r8, %r10
106	shr	R8(%rcx), %r9
107	or	%r9, %r11
108	not	%r10
109	not	%r11
110	mov	%r10, -8(rp,n,8)
111	mov	%r11, -16(rp,n,8)
112	sub	$2, n
113
114L(rll):	neg	R32(%rcx)		C put lsh count in cl
115L(rlx):	mov	-8(up,n,8), %r10
116	shl	R8(%rcx), %r10
117	mov	-16(up,n,8), %r11
118	shl	R8(%rcx), %r11
119
120	sub	$4, n			C				      4
121	jb	L(end)			C				      2
122	ALIGN(16)
123L(top):
124	C finish stuff from lsh block
125	neg	R32(%rcx)		C put rsh count in cl
126	mov	16(up,n,8), %r8
127	mov	8(up,n,8), %r9
128	shr	R8(%rcx), %r8
129	or	%r8, %r10
130	shr	R8(%rcx), %r9
131	or	%r9, %r11
132	not	%r10
133	not	%r11
134	mov	%r10, 24(rp,n,8)
135	mov	%r11, 16(rp,n,8)
136	C start two new rsh
137	mov	0(up,n,8), %r8
138	mov	-8(up,n,8), %r9
139	shr	R8(%rcx), %r8
140	shr	R8(%rcx), %r9
141
142	C finish stuff from rsh block
143	neg	R32(%rcx)		C put lsh count in cl
144	mov	8(up,n,8), %r10
145	mov	0(up,n,8), %r11
146	shl	R8(%rcx), %r10
147	or	%r10, %r8
148	shl	R8(%rcx), %r11
149	or	%r11, %r9
150	not	%r8
151	not	%r9
152	mov	%r8, 8(rp,n,8)
153	mov	%r9, 0(rp,n,8)
154	C start two new lsh
155	mov	-8(up,n,8), %r10
156	mov	-16(up,n,8), %r11
157	shl	R8(%rcx), %r10
158	shl	R8(%rcx), %r11
159
160	sub	$4, n
161	jae	L(top)			C				      2
162L(end):
163	neg	R32(%rcx)		C put rsh count in cl
164	mov	8(up), %r8
165	shr	R8(%rcx), %r8
166	or	%r8, %r10
167	mov	(up), %r9
168	shr	R8(%rcx), %r9
169	or	%r9, %r11
170	not	%r10
171	not	%r11
172	mov	%r10, 16(rp)
173	mov	%r11, 8(rp)
174
175	neg	R32(%rcx)		C put lsh count in cl
176L(ast):	mov	(up), %r10
177	shl	R8(%rcx), %r10
178	not	%r10
179	mov	%r10, (rp)
180	FUNC_EXIT()
181	ret
182EPILOGUE()
183