xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/mod_34lsub1.asm (revision 8585484ef87f5a04d32332313cdb799625f4faf8)
1dnl  PowerPC-64 mpn_mod_34lsub1 -- modulo 2^48-1.
2
3dnl  Copyright 2005 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C                   cycles/limb
23C POWER3/PPC630          1.33
24C POWER4/PPC970          1.5
25C POWER5                 1.32
26C POWER6                 2.35
27C POWER7                 1
28
29C INPUT PARAMETERS
30define(`up',`r3')
31define(`n',`r4')
32
33ASM_START()
34PROLOGUE(mpn_mod_34lsub1)
35	li	r8, 0
36	li	r9, 0
37	li	r10, 0
38	li	r11, 0
39
40	cmpdi	cr6, n, 3
41	blt	cr6, L(lt3)
42
43	li	r0, -0x5556		C 0xFFFFFFFFFFFFAAAA
44	rldimi	r0, r0, 16, 32		C 0xFFFFFFFFAAAAAAAA
45	rldimi	r0, r0, 32, 63		C 0xAAAAAAAAAAAAAAAB
46	mulhdu	r0, r0, n
47	srdi	r0, r0, 1		C r0 = [n / 3]
48	mtctr	r0
49
50	ld	r5, 0(up)
51	ld	r6, 8(up)
52	ld	r7, 16(up)
53	addi	up, up, 24
54	bdz	L(end)
55
56	ALIGN(16)
57L(top):	addc	r8, r8, r5
58	nop
59	ld	r5, 0(up)
60	adde	r9, r9, r6
61	ld	r6, 8(up)
62	adde	r10, r10, r7
63	ld	r7, 16(up)
64	addi	up, up, 48
65	addze	r11, r11
66	bdz	L(endx)
67	addc	r8, r8, r5
68	nop
69	ld	r5, -24(up)
70	adde	r9, r9, r6
71	ld	r6, -16(up)
72	adde	r10, r10, r7
73	ld	r7, -8(up)
74	addze	r11, r11
75	bdnz	L(top)
76
77	addi	up, up, 24
78L(endx):
79	addi	up, up, -24
80
81L(end):	addc	r8, r8, r5
82	adde	r9, r9, r6
83	adde	r10, r10, r7
84	addze	r11, r11
85
86	sldi	r5, r0, 1
87	add	r5, r5, r0		C r11 = n / 3 * 3
88	sub	n, n, r5		C n = n mod 3
89L(lt3):	cmpdi	cr6, n, 1
90	blt	cr6, L(2)
91
92	ld	r5, 0(up)
93	addc	r8, r8, r5
94	li	r6, 0
95	beq	cr6, L(1)
96
97	ld	r6, 8(up)
98L(1):	adde	r9, r9, r6
99	addze	r10, r10
100	addze	r11, r11
101
102L(2):	rldicl	r0, r8, 0, 16		C r0 = r8 mod 2^48
103	srdi	r3, r8, 48		C r3 = r8 div 2^48
104	rldic	r4, r9, 16, 16		C r4 = (r9 mod 2^32) << 16
105	srdi	r5, r9, 32		C r5 = r9 div 2^32
106	rldic	r6, r10, 32, 16		C r6 = (r10 mod 2^16) << 32
107	srdi	r7, r10, 16		C r7 = r10 div 2^16
108
109	add	r0, r0, r3
110	add	r4, r4, r5
111	add	r6, r6, r7
112
113	add	r0, r0, r4
114	add	r6, r6, r11
115
116	add	r3, r0, r6
117	blr
118EPILOGUE()
119
120C |__r10__|__r9___|__r8___|
121C |-----|-----|-----|-----|
122