xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/core2/lshift.asm (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1dnl  x86-64 mpn_lshift optimised for Conroe/Penryn and Nehalem.
2
3dnl  Copyright 2007, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C	     cycles/limb
34C AMD K8,K9
35C AMD K10
36C AMD bd1
37C AMD bd2
38C AMD bd3
39C AMD bd4
40C AMD zen
41C AMD bobcat
42C AMD jaguar
43C Intel P4
44C Intel core2	 1.32
45C Intel NHM	 1.30	(drops to 2.5 for n > 256)
46C Intel SBR
47C Intel IBR
48C Intel HWL
49C Intel BWL
50C Intel SKL
51C Intel atom
52C Intel SLM
53C VIA nano
54
55C INPUT PARAMETERS
56define(`rp',	`%rdi')
57define(`up',	`%rsi')
58define(`n',	`%rdx')
59define(`cnt',	`%rcx')
60
61ABI_SUPPORT(DOS64)
62ABI_SUPPORT(STD64)
63
64ASM_START()
65	TEXT
66	ALIGN(16)
67PROLOGUE(mpn_lshift)
68	FUNC_ENTRY(4)
69
70	xor	R32(%rax), R32(%rax)
71
72	test	$1, R8(n)
73	jnz	L(bx1)
74L(bx0):	test	$2, R8(n)
75	jnz	L(b10)
76
77L(b00):	lea	-8(up,n,8), up
78	lea	16(rp,n,8), rp
79	mov	(up), %r10
80	mov	-8(up), %r11
81	shld	R8(cnt), %r10, %rax
82	mov	-16(up), %r8
83	shr	$2, n
84	jmp	L(00)
85
86L(bx1):	test	$2, R8(n)
87	jnz	L(b11)
88
89L(b01):	lea	-16(up,n,8), up
90	lea	8(rp,n,8), rp
91	mov	8(up), %r9
92	shld	R8(cnt), %r9, %rax
93	shr	$2, n
94	jz	L(1)
95	mov	(up), %r10
96	mov	-8(up), %r11
97	jmp	L(01)
98
99L(b10):	lea	-24(up,n,8), up
100	lea	(rp,n,8), rp
101	mov	16(up), %r8
102	mov	8(up), %r9
103	shld	R8(cnt), %r8, %rax
104	shr	$2, n
105	jz	L(2)
106	mov	(up), %r10
107	jmp	L(10)
108
109	ALIGN(16)
110L(b11):	lea	-32(up,n,8), up
111	lea	-8(rp,n,8), rp
112	mov	24(up), %r11
113	mov	16(up), %r8
114	mov	8(up), %r9
115	shld	R8(cnt), %r11, %rax
116	shr	$2, n
117	jz	L(end)
118
119	ALIGN(16)
120L(top):	shld	R8(cnt), %r8, %r11
121	mov	(up), %r10
122	mov	%r11, (rp)
123L(10):	shld	R8(cnt), %r9, %r8
124	mov	-8(up), %r11
125	mov	%r8, -8(rp)
126L(01):	shld	R8(cnt), %r10, %r9
127	mov	-16(up), %r8
128	mov	%r9, -16(rp)
129L(00):	shld	R8(cnt), %r11, %r10
130	mov	-24(up), %r9
131	add	$-32, up
132	mov	%r10, -24(rp)
133	add	$-32, rp
134	dec	n
135	jnz	L(top)
136
137L(end):	shld	R8(cnt), %r8, %r11
138	mov	%r11, (rp)
139L(2):	shld	R8(cnt), %r9, %r8
140	mov	%r8, -8(rp)
141L(1):	shl	R8(cnt), %r9
142	mov	%r9, -16(rp)
143	FUNC_EXIT()
144	ret
145EPILOGUE()
146