xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/sparc32/v9/sub_n.asm (revision 3f351f34c6d827cf017cdcff3543f6ec0c88b420)
1dnl  SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2dnl  store difference in a third limb vector.
3
4dnl  Copyright 2001 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32
33include(`../config.m4')
34
35C INPUT PARAMETERS
36define(rp,%o0)
37define(s1p,%o1)
38define(s2p,%o2)
39define(n,%o3)
40define(cy,%g1)
41
42C This code uses 64-bit operations on `o' and `g' registers.  It doesn't
43C require that `o' registers' upper 32 bits are preserved by the operating
44C system, but if they are not, they must be zeroed.  That is indeed what
45C happens at least on Slowaris 2.5 and 2.6.
46
47C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
48C about 10 cycles/limb from the Ecache.
49
50ASM_START()
51PROLOGUE(mpn_sub_n)
52	lduw	[s1p+0],%o4
53	lduw	[s2p+0],%o5
54	addcc	n,-2,n
55	bl,pn	%icc,L(end1)
56	lduw	[s1p+4],%g2
57	lduw	[s2p+4],%g3
58	be,pn	%icc,L(end2)
59	mov	0,cy
60
61	.align	16
62L(loop):
63	sub	%o4,%o5,%g4
64	add	rp,8,rp
65	lduw	[s1p+8],%o4
66	fitod	%f0,%f2
67C ---
68	sub	%g4,cy,%g4
69	addcc	n,-1,n
70	lduw	[s2p+8],%o5
71	fitod	%f0,%f2
72C ---
73	srlx	%g4,63,cy
74	add	s2p,8,s2p
75	stw	%g4,[rp-8]
76	be,pn	%icc,L(exito)+4
77C ---
78	sub	%g2,%g3,%g4
79	addcc	n,-1,n
80	lduw	[s1p+12],%g2
81	fitod	%f0,%f2
82C ---
83	sub	%g4,cy,%g4
84	add	s1p,8,s1p
85	lduw	[s2p+4],%g3
86	fitod	%f0,%f2
87C ---
88	srlx	%g4,63,cy
89	bne,pt	%icc,L(loop)
90	stw	%g4,[rp-4]
91C ---
92L(exite):
93	sub	%o4,%o5,%g4
94	sub	%g4,cy,%g4
95	srlx	%g4,63,cy
96	stw	%g4,[rp+0]
97	sub	%g2,%g3,%g4
98	sub	%g4,cy,%g4
99	stw	%g4,[rp+4]
100	retl
101	srlx	%g4,63,%o0
102
103L(exito):
104	sub	%g2,%g3,%g4
105	sub	%g4,cy,%g4
106	srlx	%g4,63,cy
107	stw	%g4,[rp-4]
108	sub	%o4,%o5,%g4
109	sub	%g4,cy,%g4
110	stw	%g4,[rp+0]
111	retl
112	srlx	%g4,63,%o0
113
114L(end1):
115	sub	%o4,%o5,%g4
116	stw	%g4,[rp+0]
117	retl
118	srlx	%g4,63,%o0
119
120L(end2):
121	sub	%o4,%o5,%g4
122	srlx	%g4,63,cy
123	stw	%g4,[rp+0]
124	sub	%g2,%g3,%g4
125	sub	%g4,cy,%g4
126	stw	%g4,[rp+4]
127	retl
128	srlx	%g4,63,%o0
129EPILOGUE(mpn_sub_n)
130