xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/aorrlshC_n.asm (revision d11b170b9000ada93db553723522a63d5deac310)
1dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
2dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
3
4dnl  Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22C	     cycles/limb
23C AMD K8,K9	 2
24C AMD K10	 2
25C Intel P4	 ?
26C Intel core2	 3
27C Intel NHM	 2.75
28C Intel SBR	 2.55
29C Intel atom	 ?
30C VIA nano	 ?
31
32C INPUT PARAMETERS
33define(`rp',	`%rdi')
34define(`up',	`%rsi')
35define(`vp',	`%rdx')
36define(`n',	`%rcx')
37
38define(M, eval(m4_lshift(1,LSH)))
39
40ABI_SUPPORT(DOS64)
41ABI_SUPPORT(STD64)
42
43ASM_START()
44	TEXT
45	ALIGN(16)
46PROLOGUE(func)
47	FUNC_ENTRY(4)
48	push	%r12
49	push	%r13
50	push	%r14
51	push	%r15
52
53	mov	(vp), %r8
54	lea	(,%r8,M), %r12
55	shr	$RSH, %r8
56
57	mov	R32(n), R32(%rax)
58	lea	(rp,n,8), rp
59	lea	(up,n,8), up
60	lea	(vp,n,8), vp
61	neg	n
62	and	$3, R8(%rax)
63	je	L(b00)
64	cmp	$2, R8(%rax)
65	jc	L(b01)
66	je	L(b10)
67
68L(b11):	mov	8(vp,n,8), %r10
69	lea	(%r8,%r10,M), %r14
70	shr	$RSH, %r10
71	mov	16(vp,n,8), %r11
72	lea	(%r10,%r11,M), %r15
73	shr	$RSH, %r11
74	ADDSUB	(up,n,8), %r12
75	ADCSBB	8(up,n,8), %r14
76	ADCSBB	16(up,n,8), %r15
77	sbb	R32(%rax), R32(%rax)		  C save carry for next
78	mov	%r12, (rp,n,8)
79	mov	%r14, 8(rp,n,8)
80	mov	%r15, 16(rp,n,8)
81	add	$3, n
82	js	L(top)
83	jmp	L(end)
84
85L(b01):	mov	%r8, %r11
86	ADDSUB	(up,n,8), %r12
87	sbb	R32(%rax), R32(%rax)		  C save carry for next
88	mov	%r12, (rp,n,8)
89	add	$1, n
90	js	L(top)
91	jmp	L(end)
92
93L(b10):	mov	8(vp,n,8), %r11
94	lea	(%r8,%r11,M), %r15
95	shr	$RSH, %r11
96	ADDSUB	(up,n,8), %r12
97	ADCSBB	8(up,n,8), %r15
98	sbb	R32(%rax), R32(%rax)		  C save carry for next
99	mov	%r12, (rp,n,8)
100	mov	%r15, 8(rp,n,8)
101	add	$2, n
102	js	L(top)
103	jmp	L(end)
104
105L(b00):	mov	8(vp,n,8), %r9
106	mov	16(vp,n,8), %r10
107	jmp	L(e00)
108
109	ALIGN(16)
110L(top):	mov	16(vp,n,8), %r10
111	mov	(vp,n,8), %r8
112	mov	8(vp,n,8), %r9
113	lea	(%r11,%r8,M), %r12
114	shr	$RSH, %r8
115L(e00):	lea	(%r8,%r9,M), %r13
116	shr	$RSH, %r9
117	mov	24(vp,n,8), %r11
118	lea	(%r9,%r10,M), %r14
119	shr	$RSH, %r10
120	lea	(%r10,%r11,M), %r15
121	shr	$RSH, %r11
122	add	R32(%rax), R32(%rax)		  C restore carry
123	ADCSBB	(up,n,8), %r12
124	ADCSBB	8(up,n,8), %r13
125	ADCSBB	16(up,n,8), %r14
126	ADCSBB	24(up,n,8), %r15
127	mov	%r12, (rp,n,8)
128	mov	%r13, 8(rp,n,8)
129	mov	%r14, 16(rp,n,8)
130	sbb	R32(%rax), R32(%rax)		  C save carry for next
131	mov	%r15, 24(rp,n,8)
132	add	$4, n
133	js	L(top)
134L(end):
135
136ifelse(ADDSUB,add,`
137	sub	R32(%r11), R32(%rax)
138	neg	R32(%rax)
139',`
140	add	R32(%r11), R32(%rax)
141	movslq	R32(%rax), %rax
142')
143	pop	%r15
144	pop	%r14
145	pop	%r13
146	pop	%r12
147	FUNC_EXIT()
148	ret
149EPILOGUE()
150