xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/ia64/copyi.asm (revision 4d5abbe83f525258eb479e5fca29f25cb943f379)
1dnl  IA-64 mpn_copyi -- copy limb vector, incrementing.
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C         cycles/limb
25C Itanium:    1
26C Itanium 2:  0.5
27
28C INPUT PARAMETERS
29C rp = r32
30C sp = r33
31C n = r34
32
33ASM_START()
34PROLOGUE(mpn_copyi)
35	.prologue
36	.save ar.lc, r2
37	.body
38ifdef(`HAVE_ABI_32',
39`	addp4		r32 = 0, r32
40	addp4		r33 = 0, r33
41	sxt4		r34 = r34
42	;;
43')
44{.mmi
45	nop		0
46	nop		0
47	mov.i		r2 = ar.lc
48}
49{.mmi
50	and		r14 = 3, r34
51	cmp.ge		p14, p15 = 3, r34
52	add		r34 = -4, r34
53	;;
54}
55{.mmi
56	cmp.eq		p8, p0 = 1, r14
57	cmp.eq		p10, p0 = 2, r14
58	cmp.eq		p12, p0 = 3, r14
59}
60{.bbb
61  (p8)	br.dptk		.Lb01
62  (p10)	br.dptk		.Lb10
63  (p12)	br.dptk		.Lb11
64}
65
66.Lb00:	C  n = 0, 4, 8, 12, ...
67  (p14)	br.dptk		.Ls00
68	;;
69	add		r21 = 8, r33
70	ld8		r16 = [r33], 16
71	shr		r15 = r34, 2
72	;;
73	ld8		r17 = [r21], 16
74	mov.i		ar.lc = r15
75	ld8		r18 = [r33], 16
76	add		r20 = 8, r32
77	;;
78	ld8		r19 = [r21], 16
79	br.cloop.dptk	.Loop
80	;;
81	br.sptk		.Lend
82	;;
83
84.Lb01:	C  n = 1, 5, 9, 13, ...
85	add		r21 = 0, r33
86	add		r20 = 0, r32
87	add		r33 = 8, r33
88	add		r32 = 8, r32
89	;;
90	ld8		r19 = [r21], 16
91	shr		r15 = r34, 2
92  (p14)	br.dptk		.Ls01
93	;;
94	ld8		r16 = [r33], 16
95	mov.i		ar.lc = r15
96	;;
97	ld8		r17 = [r21], 16
98	ld8		r18 = [r33], 16
99	br.sptk		.Li01
100	;;
101
102.Lb10:	C  n = 2,6, 10, 14, ...
103	add		r21 = 8, r33
104	add		r20 = 8, r32
105	ld8		r18 = [r33], 16
106	shr		r15 = r34, 2
107	;;
108	ld8		r19 = [r21], 16
109	mov.i		ar.lc = r15
110  (p14)	br.dptk		.Ls10
111	;;
112	ld8		r16 = [r33], 16
113	ld8		r17 = [r21], 16
114	br.sptk		.Li10
115	;;
116
117.Lb11:	C  n = 3, 7, 11, 15, ...
118	add		r21 = 0, r33
119	add		r20 = 0, r32
120	add		r33 = 8, r33
121	add		r32 = 8, r32
122	;;
123	ld8		r17 = [r21], 16
124	shr		r15 = r34, 2
125	;;
126	ld8		r18 = [r33], 16
127	mov.i		ar.lc = r15
128	ld8		r19 = [r21], 16
129  (p14)	br.dptk		.Ls11
130	;;
131	ld8		r16 = [r33], 16
132	br.sptk		.Li11
133	;;
134
135	ALIGN(32)
136.Loop:
137.Li00:
138{.mmb
139	st8		[r32] = r16, 16
140	ld8		r16 = [r33], 16
141	nop.b		0
142}
143.Li11:
144{.mmb
145	st8		[r20] = r17, 16
146	ld8		r17 = [r21], 16
147	nop.b		0
148	;;
149}
150.Li10:
151{.mmb
152	st8		[r32] = r18, 16
153	ld8		r18 = [r33], 16
154	nop.b		0
155}
156.Li01:
157{.mmb
158	st8		[r20] = r19, 16
159	ld8		r19 = [r21], 16
160	br.cloop.dptk	.Loop
161	;;
162}
163.Lend:	st8		[r32] = r16, 16
164.Ls11:	st8		[r20] = r17, 16
165	;;
166.Ls10:	st8		[r32] = r18, 16
167.Ls01:	st8		[r20] = r19, 16
168.Ls00:	mov.i		ar.lc = r2
169	br.ret.sptk.many b0
170EPILOGUE()
171ASM_END()
172