xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/sec_tabselect.asm (revision 3f351f34c6d827cf017cdcff3543f6ec0c88b420)
1dnl  PowerPC-64 mpn_sec_tabselect.
2
3dnl  Contributed to the GNU project by Torbjörn Granlund.
4
5dnl  Copyright 2011-2013 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C                   cycles/limb
36C POWER3/PPC630		 1.75
37C POWER4/PPC970		 2.0
38C POWER5		 ?
39C POWER6		 5.0
40C POWER7		 1.75
41
42define(`rp',     `r3')
43define(`tp',     `r4')
44define(`n',      `r5')
45define(`nents',  `r6')
46define(`which',  `r7')
47
48define(`i',      `r8')
49define(`j',      `r9')
50define(`stride', `r12')
51define(`mask',   `r11')
52
53
54ASM_START()
55PROLOGUE(mpn_sec_tabselect)
56	addic.	j, n, -4		C outer loop induction variable
57	std	r31, -8(r1)
58	std	r30, -16(r1)
59	std	r29, -24(r1)
60	std	r28, -32(r1)
61	std	r27, -40(r1)
62	sldi	stride, n, 3
63
64	blt	cr0, L(outer_end)
65L(outer_top):
66	mtctr	nents
67	mr	r10, tp
68	li	r28, 0
69	li	r29, 0
70	li	r30, 0
71	li	r31, 0
72	addic.	j, j, -4		C outer loop induction variable
73	mr	i, which
74
75	ALIGN(16)
76L(top):	addic	i, i, -1		C set carry iff i != 0
77	subfe	mask, mask, mask
78	ld	r0, 0(tp)
79	ld	r27, 8(tp)
80	and	r0, r0, mask
81	and	r27, r27, mask
82	or	r28, r28, r0
83	or	r29, r29, r27
84	ld	r0, 16(tp)
85	ld	r27, 24(tp)
86	and	r0, r0, mask
87	and	r27, r27, mask
88	or	r30, r30, r0
89	or	r31, r31, r27
90	add	tp, tp, stride
91	bdnz	L(top)
92
93	std	r28, 0(rp)
94	std	r29, 8(rp)
95	std	r30, 16(rp)
96	std	r31, 24(rp)
97	addi	tp, r10, 32
98	addi	rp, rp, 32
99	bge	cr0, L(outer_top)
100L(outer_end):
101
102	rldicl.	r0, n, 63, 63
103	beq	cr0, L(b0x)
104L(b1x):	mtctr	nents
105	mr	r10, tp
106	li	r28, 0
107	li	r29, 0
108	mr	i, which
109	ALIGN(16)
110L(tp2):	addic	i, i, -1
111	subfe	mask, mask, mask
112	ld	r0, 0(tp)
113	ld	r27, 8(tp)
114	and	r0, r0, mask
115	and	r27, r27, mask
116	or	r28, r28, r0
117	or	r29, r29, r27
118	add	tp, tp, stride
119	bdnz	L(tp2)
120	std	r28, 0(rp)
121	std	r29, 8(rp)
122	addi	tp, r10, 16
123	addi	rp, rp, 16
124
125L(b0x):	rldicl.	r0, n, 0, 63
126	beq	cr0, L(b00)
127L(b01):	mtctr	nents
128	mr	r10, tp
129	li	r28, 0
130	mr	i, which
131	ALIGN(16)
132L(tp1):	addic	i, i, -1
133	subfe	mask, mask, mask
134	ld	r0, 0(tp)
135	and	r0, r0, mask
136	or	r28, r28, r0
137	add	tp, tp, stride
138	bdnz	L(tp1)
139	std	r28, 0(rp)
140
141L(b00):	ld	r31, -8(r1)
142	ld	r30, -16(r1)
143	ld	r29, -24(r1)
144	ld	r28, -32(r1)
145	ld	r27, -40(r1)
146	blr
147EPILOGUE()
148