xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc32/README (revision 230b95665bbd3a9d1a53658a36b1053f8382a519)
1Copyright 2002, 2005 Free Software Foundation, Inc.
2
3This file is part of the GNU MP Library.
4
5The GNU MP Library is free software; you can redistribute it and/or modify
6it under the terms of the GNU Lesser General Public License as published by
7the Free Software Foundation; either version 3 of the License, or (at your
8option) any later version.
9
10The GNU MP Library is distributed in the hope that it will be useful, but
11WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
13License for more details.
14
15You should have received a copy of the GNU Lesser General Public License
16along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
17
18
19
20
21
22                    POWERPC 32-BIT MPN SUBROUTINES
23
24
25This directory contains mpn functions for various 32-bit PowerPC chips.
26
27
28CODE ORGANIZATION
29
30	directory	  used for
31	================================================
32	powerpc           generic, 604, 604e, 744x, 745x
33	powerpc/750       740, 750, 7400, 7410
34
35
36The top-level powerpc directory is currently mostly aimed at 604/604e but
37should be reasonable on all powerpcs.
38
39
40
41STATUS
42
43The code is quite well optimized for the 604e, other chips have had less
44attention.
45
46Altivec SIMD available in 74xx might hold some promise, but unfortunately
47GMP only guarantees 32-bit data alignment, so there's lots of fiddling
48around with partial operations at the start and end of limb vectors.  A
49128-bit limb would be a novel idea, but is unlikely to be practical, since
50it would have to work with ordinary +, -, * etc in the C code.
51
52Also, Altivec isn't very well suited for the GMP multiplication needs.
53Using floating-point based multiplication has much better better performance
54potential for all current powerpcs, both the ones with slow integer multiply
55units (603, 740, 750, 7400, 7410) and those with fast (604, 604e, 744x,
56745x).  This is because all powerpcs do some level of pipelining in the FPU:
57
58603 and 750 can sustain one fmadd every 2nd cycle.
59604 and 604e can sustain one fmadd per cycle.
607400 and 7410 can sustain 3 fmadd in 4 cycles.
61744x and 745x can sustain 4 fmadd in 5 cycles.
62
63
64
65REGISTER NAMES
66
67The normal powerpc convention is to give registers as plain numbers, like
68"mtctr 6", but on Apple MacOS X (powerpc*-*-rhapsody* and
69powerpc*-*-darwin*) the assembler demands an "r" like "mtctr r6".  Note
70however when register 0 in an instruction means a literal zero the "r" is
71omitted, for instance "lwzx r6,0,r7".
72
73The GMP code uses the "r" forms, powerpc-defs.m4 transforms them to plain
74numbers according to what GMP_ASM_POWERPC_R_REGISTERS finds is needed.
75(Note that this style isn't fully general, as the identifier r4 and the
76register r4 will not be distinguishable on some systems.  However, this is
77not a problem for the limited GMP assembly usage.)
78
79
80
81GLOBAL REFERENCES
82
83Linux non-PIC
84	lis	9, __gmp_binvert_limb_table@ha
85	rlwinm	11, 5, 31, 25, 31
86	la	9, __gmp_binvert_limb_table@l(9)
87	lbzx	11, 9, 11
88
89Linux PIC (FIXME)
90.LCL0:
91	.long .LCTOC1-.LCF0
92	bcl	20, 31, .LCF0
93.LCF0:
94	mflr	30
95	lwz	7, .LCL0-.LCF0(30)
96	add	30, 7, 30
97	lwz	11, .LC0-.LCTOC1(30)
98	rlwinm	3, 5, 31, 25, 31
99	lbzx	7, 11, 3
100
101AIX (always PIC)
102LC..0:
103	.tc __gmp_binvert_limb_table[TC],__gmp_binvert_limb_table[RW]
104	lwz	9, LC..0(2)
105	rlwinm	0, 5, 31, 25, 31
106	lbzx	0, 9, 0
107
108Darwin (non-PIC)
109	lis	r2, ha16(___gmp_binvert_limb_table)
110	rlwinm	r9, r5, 31, 25, 31
111	la	r2, lo16(___gmp_binvert_limb_table)(r2)
112	lbzx	r0, r2, r9
113Darwin (PIC)
114	mflr	r0
115	bcl	20, 31, L0001$pb
116L0001$pb:
117	mflr	r7
118	mtlr	r0
119	addis	r2, r7, ha16(L___gmp_binvert_limb_table$non_lazy_ptr-L0001$pb)
120	rlwinm	r9, r5, 31, 25, 31
121	lwz	r2, lo16(L___gmp_binvert_limb_table$non_lazy_ptr-L0001$pb)(r2)
122	lbzx	r0, r2, r9
123------
124	.non_lazy_symbol_pointer
125L___gmp_binvert_limb_table$non_lazy_ptr:
126	.indirect_symbol ___gmp_binvert_limb_table
127	.long	0
128	.subsections_via_symbols
129
130
131For GNU/Linux and Darwin, we might want to duplicate __gmp_binvert_limb_table
132into the text section in this file.  We should thus be able to reach it like
133this:
134
135	blr	L0
136L0:	mflr	r2
137	rlwinm	r9, r5, 31, 25, 31
138	addi	r9, r9, lo16(local_binvert_table-L0)
139	lbzx	r0, r2, r9
140
141
142
143REFERENCES
144
145PowerPC Microprocessor Family: The Programming Environments for 32-bit
146Microprocessors, IBM document G522-0290-01, 2000.
147
148PowerPC 604e RISC Microprocessor User's Manual with Supplement for PowerPC
149604 Microprocessor, IBM document G552-0330-00, Freescale document
150MPC604EUM/AD, 3/1998.
151
152MPC7410/MPC7400 RISC Microprocessor User's Manual, Freescale document
153MPC7400UM/D, rev 1, 11/2002.
154
155MPC7450 RISC Microprocessor Family Reference Manual, Freescale document
156MPC7450UM, rev 5, 1/2005.
157
158The above are available online from
159
160	http://www.ibm.com/chips/techlib/techlib.nsf/productfamilies/PowerPC
161	http://www.freescale.com/PowerPC
162
163
164
165----------------
166Local variables:
167mode: text
168fill-column: 76
169End:
170