xref: /netbsd-src/external/lgpl3/mpfr/dist/src/invsqrt_limb.h (revision 73d56d5b0be8704e4f0a7e8221a2c7309572c9a1)
1 /* __gmpfr_invsqrt_limb_approx -- reciprocal approximate square root of a limb
2 
3 Copyright 2017-2020 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* for now, we only provide __gmpfr_invert_limb for 64-bit limb */
27 #if GMP_NUMB_BITS == 64
28 
29 /* For 257 <= d10 <= 1024, T[d10-257] = floor(sqrt(2^30/d10)).
30    Sage code:
31    T = [floor(sqrt(2^30/d10)) for d10 in [257..1024]] */
32 static const mp_limb_t T[768] =
33   {       2044, 2040, 2036, 2032, 2028, 2024, 2020, 2016, 2012, 2009, 2005,
34     2001, 1997, 1994, 1990, 1986, 1983, 1979, 1975, 1972, 1968, 1965, 1961,
35     1958, 1954, 1951, 1947, 1944, 1941, 1937, 1934, 1930, 1927, 1924, 1920,
36     1917, 1914, 1911, 1907, 1904, 1901, 1898, 1895, 1891, 1888, 1885, 1882,
37     1879, 1876, 1873, 1870, 1867, 1864, 1861, 1858, 1855, 1852, 1849, 1846,
38     1843, 1840, 1837, 1834, 1831, 1828, 1826, 1823, 1820, 1817, 1814, 1812,
39     1809, 1806, 1803, 1801, 1798, 1795, 1792, 1790, 1787, 1784, 1782, 1779,
40     1777, 1774, 1771, 1769, 1766, 1764, 1761, 1759, 1756, 1754, 1751, 1749,
41     1746, 1744, 1741, 1739, 1736, 1734, 1731, 1729, 1727, 1724, 1722, 1719,
42     1717, 1715, 1712, 1710, 1708, 1705, 1703, 1701, 1698, 1696, 1694, 1692,
43     1689, 1687, 1685, 1683, 1680, 1678, 1676, 1674, 1672, 1670, 1667, 1665,
44     1663, 1661, 1659, 1657, 1655, 1652, 1650, 1648, 1646, 1644, 1642, 1640,
45     1638, 1636, 1634, 1632, 1630, 1628, 1626, 1624, 1622, 1620, 1618, 1616,
46     1614, 1612, 1610, 1608, 1606, 1604, 1602, 1600, 1598, 1597, 1595, 1593,
47     1591, 1589, 1587, 1585, 1583, 1582, 1580, 1578, 1576, 1574, 1572, 1571,
48     1569, 1567, 1565, 1563, 1562, 1560, 1558, 1556, 1555, 1553, 1551, 1549,
49     1548, 1546, 1544, 1542, 1541, 1539, 1537, 1536, 1534, 1532, 1531, 1529,
50     1527, 1526, 1524, 1522, 1521, 1519, 1517, 1516, 1514, 1513, 1511, 1509,
51     1508, 1506, 1505, 1503, 1501, 1500, 1498, 1497, 1495, 1494, 1492, 1490,
52     1489, 1487, 1486, 1484, 1483, 1481, 1480, 1478, 1477, 1475, 1474, 1472,
53     1471, 1469, 1468, 1466, 1465, 1463, 1462, 1461, 1459, 1458, 1456, 1455,
54     1453, 1452, 1450, 1449, 1448, 1446, 1445, 1443, 1442, 1441, 1439, 1438,
55     1436, 1435, 1434, 1432, 1431, 1430, 1428, 1427, 1426, 1424, 1423, 1422,
56     1420, 1419, 1418, 1416, 1415, 1414, 1412, 1411, 1410, 1408, 1407, 1406,
57     1404, 1403, 1402, 1401, 1399, 1398, 1397, 1395, 1394, 1393, 1392, 1390,
58     1389, 1388, 1387, 1385, 1384, 1383, 1382, 1381, 1379, 1378, 1377, 1376,
59     1374, 1373, 1372, 1371, 1370, 1368, 1367, 1366, 1365, 1364, 1362, 1361,
60     1360, 1359, 1358, 1357, 1355, 1354, 1353, 1352, 1351, 1350, 1349, 1347,
61     1346, 1345, 1344, 1343, 1342, 1341, 1339, 1338, 1337, 1336, 1335, 1334,
62     1333, 1332, 1331, 1330, 1328, 1327, 1326, 1325, 1324, 1323, 1322, 1321,
63     1320, 1319, 1318, 1317, 1315, 1314, 1313, 1312, 1311, 1310, 1309, 1308,
64     1307, 1306, 1305, 1304, 1303, 1302, 1301, 1300, 1299, 1298, 1297, 1296,
65     1295, 1294, 1293, 1292, 1291, 1290, 1289, 1288, 1287, 1286, 1285, 1284,
66     1283, 1282, 1281, 1280, 1279, 1278, 1277, 1276, 1275, 1274, 1273, 1272,
67     1271, 1270, 1269, 1268, 1267, 1266, 1265, 1264, 1264, 1263, 1262, 1261,
68     1260, 1259, 1258, 1257, 1256, 1255, 1254, 1253, 1252, 1252, 1251, 1250,
69     1249, 1248, 1247, 1246, 1245, 1244, 1243, 1242, 1242, 1241, 1240, 1239,
70     1238, 1237, 1236, 1235, 1234, 1234, 1233, 1232, 1231, 1230, 1229, 1228,
71     1228, 1227, 1226, 1225, 1224, 1223, 1222, 1222, 1221, 1220, 1219, 1218,
72     1217, 1216, 1216, 1215, 1214, 1213, 1212, 1211, 1211, 1210, 1209, 1208,
73     1207, 1207, 1206, 1205, 1204, 1203, 1202, 1202, 1201, 1200, 1199, 1198,
74     1198, 1197, 1196, 1195, 1194, 1194, 1193, 1192, 1191, 1190, 1190, 1189,
75     1188, 1187, 1187, 1186, 1185, 1184, 1183, 1183, 1182, 1181, 1180, 1180,
76     1179, 1178, 1177, 1177, 1176, 1175, 1174, 1174, 1173, 1172, 1171, 1171,
77     1170, 1169, 1168, 1168, 1167, 1166, 1165, 1165, 1164, 1163, 1162, 1162,
78     1161, 1160, 1159, 1159, 1158, 1157, 1157, 1156, 1155, 1154, 1154, 1153,
79     1152, 1152, 1151, 1150, 1149, 1149, 1148, 1147, 1147, 1146, 1145, 1145,
80     1144, 1143, 1142, 1142, 1141, 1140, 1140, 1139, 1138, 1138, 1137, 1136,
81     1136, 1135, 1134, 1133, 1133, 1132, 1131, 1131, 1130, 1129, 1129, 1128,
82     1127, 1127, 1126, 1125, 1125, 1124, 1123, 1123, 1122, 1121, 1121, 1120,
83     1119, 1119, 1118, 1118, 1117, 1116, 1116, 1115, 1114, 1114, 1113, 1112,
84     1112, 1111, 1110, 1110, 1109, 1109, 1108, 1107, 1107, 1106, 1105, 1105,
85     1104, 1103, 1103, 1102, 1102, 1101, 1100, 1100, 1099, 1099, 1098, 1097,
86     1097, 1096, 1095, 1095, 1094, 1094, 1093, 1092, 1092, 1091, 1091, 1090,
87     1089, 1089, 1088, 1088, 1087, 1086, 1086, 1085, 1085, 1084, 1083, 1083,
88     1082, 1082, 1081, 1080, 1080, 1079, 1079, 1078, 1077, 1077, 1076, 1076,
89     1075, 1075, 1074, 1073, 1073, 1072, 1072, 1071, 1071, 1070, 1069, 1069,
90     1068, 1068, 1067, 1067, 1066, 1065, 1065, 1064, 1064, 1063, 1063, 1062,
91     1062, 1061, 1060, 1060, 1059, 1059, 1058, 1058, 1057, 1057, 1056, 1055,
92     1055, 1054, 1054, 1053, 1053, 1052, 1052, 1051, 1051, 1050, 1049, 1049,
93     1048, 1048, 1047, 1047, 1046, 1046, 1045, 1045, 1044, 1044, 1043, 1043,
94     1042, 1041, 1041, 1040, 1040, 1039, 1039, 1038, 1038, 1037, 1037, 1036,
95     1036, 1035, 1035, 1034, 1034, 1033, 1033, 1032, 1032, 1031, 1031, 1030,
96     1030, 1029, 1029, 1028, 1028, 1027, 1027, 1026, 1026, 1025, 1025, 1024,
97     1024 };
98 
99 /* table of v0^3 */
100 static const mp_limb_t T3[768] =
101   {             8539701184, 8489664000, 8439822656, 8390176768, 8340725952,
102     8291469824, 8242408000, 8193540096, 8144865728, 8108486729, 8060150125,
103     8012006001, 7964053973, 7928215784, 7880599000, 7833173256, 7797729087,
104     7750636739, 7703734375, 7668682048, 7622111232, 7587307125, 7541066681,
105     7506509912, 7460598664, 7426288351, 7380705123, 7346640384, 7312680621,
106     7267563953, 7233848504, 7189057000, 7155584983, 7122217024, 7077888000,
107     7044762213, 7011739944, 6978821031, 6935089643, 6902411264, 6869835701,
108     6837362792, 6804992375, 6761990971, 6729859072, 6697829125, 6665900968,
109     6634074439, 6602349376, 6570725617, 6539203000, 6507781363, 6476460544,
110     6445240381, 6414120712, 6383101375, 6352182208, 6321363049, 6290643736,
111     6260024107, 6229504000, 6199083253, 6168761704, 6138539191, 6108415552,
112     6088387976, 6058428767, 6028568000, 5998805513, 5969141144, 5949419328,
113     5919918129, 5890514616, 5861208627, 5841725401, 5812581592, 5783534875,
114     5754585088, 5735339000, 5706550403, 5677858304, 5658783768, 5630252139,
115     5611284433, 5582912824, 5554637011, 5535839609, 5507723096, 5489031744,
116     5461074081, 5442488479, 5414689216, 5396209064, 5368567751, 5350192749,
117     5322708936, 5304438784, 5277112021, 5258946419, 5231776256, 5213714904,
118     5186700891, 5168743489, 5150827583, 5124031424, 5106219048, 5079577959,
119     5061868813, 5044200875, 5017776128, 5000211000, 4982686912, 4956477625,
120     4939055927, 4921675101, 4895680392, 4878401536, 4861163384, 4843965888,
121     4818245769, 4801149703, 4784094125, 4767078987, 4741632000, 4724717752,
122     4707843776, 4691010024, 4674216448, 4657463000, 4632407963, 4615754625,
123     4599141247, 4582567781, 4566034179, 4549540393, 4533086375, 4508479808,
124     4492125000, 4475809792, 4459534136, 4443297984, 4427101288, 4410944000,
125     4394826072, 4378747456, 4362708104, 4346707968, 4330747000, 4314825152,
126     4298942376, 4283098624, 4267293848, 4251528000, 4235801032, 4220112896,
127     4204463544, 4188852928, 4173281000, 4157747712, 4142253016, 4126796864,
128     4111379208, 4096000000, 4080659192, 4073003173, 4057719875, 4042474857,
129     4027268071, 4012099469, 3996969003, 3981876625, 3966822287, 3959309368,
130     3944312000, 3929352552, 3914430976, 3899547224, 3884701248, 3877292411,
131     3862503009, 3847751263, 3833037125, 3818360547, 3811036328, 3796416000,
132     3781833112, 3767287616, 3760028875, 3745539377, 3731087151, 3716672149,
133     3709478592, 3695119336, 3680797184, 3666512088, 3659383421, 3645153819,
134     3630961153, 3623878656, 3609741304, 3595640768, 3588604291, 3574558889,
135     3560550183, 3553559576, 3539605824, 3525688648, 3518743761, 3504881359,
136     3491055413, 3484156096, 3470384744, 3463512697, 3449795831, 3436115229,
137     3429288512, 3415662216, 3408862625, 3395290527, 3381754501, 3375000000,
138     3361517992, 3354790473, 3341362375, 3334661784, 3321287488, 3307949000,
139     3301293169, 3288008303, 3281379256, 3268147904, 3261545587, 3248367641,
140     3241792000, 3228667352, 3222118333, 3209046875, 3202524424, 3189506048,
141     3183010111, 3170044709, 3163575232, 3150662696, 3144219625, 3131359847,
142     3124943128, 3118535181, 3105745579, 3099363912, 3086626816, 3080271375,
143     3067586677, 3061257408, 3048625000, 3042321849, 3036027392, 3023464536,
144     3017196125, 3004685307, 2998442888, 2992209121, 2979767519, 2973559672,
145     2961169856, 2954987875, 2948814504, 2936493568, 2930345991, 2924207000,
146     2911954752, 2905841483, 2899736776, 2887553024, 2881473967, 2875403448,
147     2863288000, 2857243059, 2851206632, 2839159296, 2833148375, 2827145944,
148     2815166528, 2809189531, 2803221000, 2791309312, 2785366143, 2779431416,
149     2767587264, 2761677827, 2755776808, 2749884201, 2738124199, 2732256792,
150     2726397773, 2714704875, 2708870984, 2703045457, 2697228288, 2685619000,
151     2679826869, 2674043072, 2668267603, 2656741625, 2650991104, 2645248887,
152     2639514968, 2633789341, 2622362939, 2616662152, 2610969633, 2605285376,
153     2593941624, 2588282117, 2582630848, 2576987811, 2571353000, 2560108032,
154     2554497863, 2548895896, 2543302125, 2537716544, 2526569928, 2521008881,
155     2515456000, 2509911279, 2504374712, 2498846293, 2487813875, 2482309864,
156     2476813977, 2471326208, 2465846551, 2460375000, 2454911549, 2444008923,
157     2438569736, 2433138625, 2427715584, 2422300607, 2416893688, 2411494821,
158     2400721219, 2395346472, 2389979753, 2384621056, 2379270375, 2373927704,
159     2368593037, 2363266368, 2357947691, 2352637000, 2342039552, 2336752783,
160     2331473976, 2326203125, 2320940224, 2315685267, 2310438248, 2305199161,
161     2299968000, 2294744759, 2289529432, 2284322013, 2273930875, 2268747144,
162     2263571297, 2258403328, 2253243231, 2248091000, 2242946629, 2237810112,
163     2232681443, 2227560616, 2222447625, 2217342464, 2212245127, 2207155608,
164     2202073901, 2197000000, 2191933899, 2186875592, 2181825073, 2176782336,
165     2171747375, 2166720184, 2161700757, 2156689088, 2151685171, 2146689000,
166     2141700569, 2136719872, 2131746903, 2126781656, 2121824125, 2116874304,
167     2111932187, 2106997768, 2102071041, 2097152000, 2092240639, 2087336952,
168     2082440933, 2077552576, 2072671875, 2067798824, 2062933417, 2058075648,
169     2053225511, 2048383000, 2043548109, 2038720832, 2033901163, 2029089096,
170     2024284625, 2019487744, 2019487744, 2014698447, 2009916728, 2005142581,
171     2000376000, 1995616979, 1990865512, 1986121593, 1981385216, 1976656375,
172     1971935064, 1967221277, 1962515008, 1962515008, 1957816251, 1953125000,
173     1948441249, 1943764992, 1939096223, 1934434936, 1929781125, 1925134784,
174     1920495907, 1915864488, 1915864488, 1911240521, 1906624000, 1902014919,
175     1897413272, 1892819053, 1888232256, 1883652875, 1879080904, 1879080904,
176     1874516337, 1869959168, 1865409391, 1860867000, 1856331989, 1851804352,
177     1851804352, 1847284083, 1842771176, 1838265625, 1833767424, 1829276567,
178     1824793048, 1824793048, 1820316861, 1815848000, 1811386459, 1806932232,
179     1802485313, 1798045696, 1798045696, 1793613375, 1789188344, 1784770597,
180     1780360128, 1775956931, 1775956931, 1771561000, 1767172329, 1762790912,
181     1758416743, 1758416743, 1754049816, 1749690125, 1745337664, 1740992427,
182     1736654408, 1736654408, 1732323601, 1728000000, 1723683599, 1719374392,
183     1719374392, 1715072373, 1710777536, 1706489875, 1702209384, 1702209384,
184     1697936057, 1693669888, 1689410871, 1685159000, 1685159000, 1680914269,
185     1676676672, 1672446203, 1672446203, 1668222856, 1664006625, 1659797504,
186     1655595487, 1655595487, 1651400568, 1647212741, 1643032000, 1643032000,
187     1638858339, 1634691752, 1630532233, 1630532233, 1626379776, 1622234375,
188     1618096024, 1618096024, 1613964717, 1609840448, 1605723211, 1605723211,
189     1601613000, 1597509809, 1593413632, 1593413632, 1589324463, 1585242296,
190     1581167125, 1581167125, 1577098944, 1573037747, 1568983528, 1568983528,
191     1564936281, 1560896000, 1556862679, 1556862679, 1552836312, 1548816893,
192     1548816893, 1544804416, 1540798875, 1536800264, 1536800264, 1532808577,
193     1528823808, 1528823808, 1524845951, 1520875000, 1516910949, 1516910949,
194     1512953792, 1509003523, 1509003523, 1505060136, 1501123625, 1501123625,
195     1497193984, 1493271207, 1489355288, 1489355288, 1485446221, 1481544000,
196     1481544000, 1477648619, 1473760072, 1473760072, 1469878353, 1466003456,
197     1466003456, 1462135375, 1458274104, 1454419637, 1454419637, 1450571968,
198     1446731091, 1446731091, 1442897000, 1439069689, 1439069689, 1435249152,
199     1431435383, 1431435383, 1427628376, 1423828125, 1423828125, 1420034624,
200     1416247867, 1416247867, 1412467848, 1408694561, 1408694561, 1404928000,
201     1401168159, 1401168159, 1397415032, 1397415032, 1393668613, 1389928896,
202     1389928896, 1386195875, 1382469544, 1382469544, 1378749897, 1375036928,
203     1375036928, 1371330631, 1367631000, 1367631000, 1363938029, 1363938029,
204     1360251712, 1356572043, 1356572043, 1352899016, 1349232625, 1349232625,
205     1345572864, 1341919727, 1341919727, 1338273208, 1338273208, 1334633301,
206     1331000000, 1331000000, 1327373299, 1327373299, 1323753192, 1320139673,
207     1320139673, 1316532736, 1312932375, 1312932375, 1309338584, 1309338584,
208     1305751357, 1302170688, 1302170688, 1298596571, 1298596571, 1295029000,
209     1291467969, 1291467969, 1287913472, 1287913472, 1284365503, 1280824056,
210     1280824056, 1277289125, 1277289125, 1273760704, 1270238787, 1270238787,
211     1266723368, 1266723368, 1263214441, 1259712000, 1259712000, 1256216039,
212     1256216039, 1252726552, 1249243533, 1249243533, 1245766976, 1245766976,
213     1242296875, 1242296875, 1238833224, 1235376017, 1235376017, 1231925248,
214     1231925248, 1228480911, 1228480911, 1225043000, 1221611509, 1221611509,
215     1218186432, 1218186432, 1214767763, 1214767763, 1211355496, 1207949625,
216     1207949625, 1204550144, 1204550144, 1201157047, 1201157047, 1197770328,
217     1197770328, 1194389981, 1191016000, 1191016000, 1187648379, 1187648379,
218     1184287112, 1184287112, 1180932193, 1180932193, 1177583616, 1174241375,
219     1174241375, 1170905464, 1170905464, 1167575877, 1167575877, 1164252608,
220     1164252608, 1160935651, 1160935651, 1157625000, 1154320649, 1154320649,
221     1151022592, 1151022592, 1147730823, 1147730823, 1144445336, 1144445336,
222     1141166125, 1141166125, 1137893184, 1137893184, 1134626507, 1134626507,
223     1131366088, 1128111921, 1128111921, 1124864000, 1124864000, 1121622319,
224     1121622319, 1118386872, 1118386872, 1115157653, 1115157653, 1111934656,
225     1111934656, 1108717875, 1108717875, 1105507304, 1105507304, 1102302937,
226     1102302937, 1099104768, 1099104768, 1095912791, 1095912791, 1092727000,
227     1092727000, 1089547389, 1089547389, 1086373952, 1086373952, 1083206683,
228     1083206683, 1080045576, 1080045576, 1076890625, 1076890625, 1073741824,
229     1073741824 };
230 
231 /* umul_hi(h, x, y) puts in h the high part of x*y */
232 #ifdef HAVE_MULX_U64
233 #include <immintrin.h>
234 #define umul_hi(h, x, y) _mulx_u64 (x, y, (unsigned long long *) &(h))
235 #else
236 #define umul_hi(h, x, y)                        \
237   do {                                          \
238     mp_limb_t _l;                               \
239     umul_ppmm (h, _l, x, y);                    \
240     (void) _l;  /* unused variable */           \
241   } while (0)
242 #endif
243 
244 /* given 2^62 <= d < 2^64, put in r an approximation of
245    s = floor(2^96/sqrt(d)) - 2^64, with r <= s <= r + 15 */
246 #define __gmpfr_invsqrt_limb_approx(r, d)                               \
247   do {                                                                  \
248     mp_limb_t _d, _i, _v0, _e0, _d37, _v1, _e1, _h, _v2, _e2;           \
249     _d = (d);                                                           \
250     _i = (_d >> 54) - 256;                                              \
251     /* i = d10 - 256 */                                                 \
252     _v0 = T[_i];                                                        \
253     _d37 = 1 + (_d >> 27);                                              \
254     _e0 = T3[_i] * _d37;                                                \
255     /* the value (_v0 << 57) - _e0 is less than 2^61 */                 \
256     _v1 = (_v0 << 11) + (((_v0 << 57) - _e0) >> 47);                    \
257     _e1 = - _v1 * _v1 * _d37;                                           \
258     umul_hi (_h, _v1, _e1);                                             \
259     /* _h = floor(e_1*v_1/2^64) */                                      \
260     _v2 = (_v1 << 10) + (_h >> 6);                                      \
261     umul_hi (_h, _v2 * _v2, _d);                                        \
262     /* in _h + 2, one +1 accounts for the lower neglected part of */    \
263     /* v2^2*d. the other +1 is to compute ceil((h+1)/2) */              \
264     _e2 = (MPFR_LIMB_ONE << 61) - ((_h + 2) >> 1);                      \
265     _h = _v2 * _e2;                                                     \
266     (r) = (_v2 << 33) + (_h >> 29);                                     \
267   } while (0)
268 
269 /* given 2^62 <= d < 2^64, return a 32-bit approximation r of
270    sqrt(2^126/d) */
271 #define __gmpfr_invsqrt_halflimb_approx(r, d)                           \
272   do {                                                                  \
273     mp_limb_t _d, _i, _v0, _e0, _d37, _v1, _e1, _h;                     \
274     _d = (d);                                                           \
275     _i = (_d >> 54) - 256;                                              \
276     /* i = d10 - 256 */                                                 \
277     _v0 = T[_i];                                                        \
278     _d37 = 1 + (_d >> 27);                                              \
279     _e0 = T3[_i] * _d37;                                                \
280     /* the value (_v0 << 57) - _e0 is less than 2^61 */                 \
281     _v1 = (_v0 << 11) + (((_v0 << 57) - _e0) >> 47);                    \
282     _e1 = - _v1 * _v1 * _d37;                                           \
283     umul_hi (_h, _v1, _e1);                                             \
284     /* _h = floor(e_1*v_1/2^64) */                                      \
285     (r) = (_v1 << 10) + (_h >> 6);                                      \
286   } while (0)
287 
288 /* given 2^62 <= n < 2^64, put in s an approximation of sqrt(2^64*n),
289    with: s <= floor(sqrt(2^64*n)) <= s + 7 */
290 #define __gmpfr_sqrt_limb_approx(s, n)                                  \
291   do {                                                                  \
292     mp_limb_t _n, _x, _y, _z, _t;                                       \
293     _n = (n);                                                           \
294     __gmpfr_invsqrt_halflimb_approx (_x, _n);                           \
295     MPFR_ASSERTD(_x < MPFR_LIMB_ONE << 32);                             \
296     /* x has 32 bits, and is near (by below) sqrt(2^126/n) */           \
297     _y = (_x * (_n >> 31)) >> 32;                                       \
298     MPFR_ASSERTD(_y < MPFR_LIMB_ONE << 32);                             \
299     /* y is near (by below) sqrt(n) */                                  \
300     _z = _n - _y * _y;                                                  \
301     /* reduce _z so that _z <= 2*_y */                                  \
302     /* the maximal value of _z is 2*(2^32-1) */                         \
303     while (_z > 2 * ((MPFR_LIMB_ONE << 32) - 1))                        \
304       {                                                                 \
305         _z -= (_y + _y) + 1;                                            \
306         _y ++;                                                          \
307       }                                                                 \
308     /* now z <= 2*(2^32-1): one reduction is enough */                  \
309     if (_z > _y + _y)                                                   \
310       {                                                                 \
311         _z -= (_y + _y) + 1;                                            \
312         _y ++;                                                          \
313       }                                                                 \
314     /* _x * _z should be < 2^64 */                                      \
315     _t = (_x * _z) >> 32;                                               \
316     (s) = (_y << 32) + _t;                                              \
317   } while (0)
318 
319 /* given 2^62 <= u < 2^64, put in s the value floor(sqrt(2^64*u)),
320    and in rh in rl the remainder: 2^64*u - s^2 = 2^64*rh + rl, with
321    2^64*rh + rl <= 2*s, and in invs the approximation of 2^96/sqrt(u) */
322 #define __gmpfr_sqrt_limb(s, rh, rl, invs, u)                   \
323   do {                                                          \
324     mp_limb_t _u, _invs, _r, _h, _l;                            \
325     _u = (u);                                                   \
326     __gmpfr_invsqrt_limb_approx (_invs, _u);                    \
327     umul_hi (_h, _invs, _u);                                    \
328     _r = _h + _u;                                               \
329     /* make sure _r has its most significant bit set */         \
330     if (MPFR_UNLIKELY(_r < MPFR_LIMB_HIGHBIT))                  \
331       _r = MPFR_LIMB_HIGHBIT;                                   \
332     /* we know _r <= sqrt(2^64*u) <= _r + 16 */                 \
333     umul_ppmm (_h, _l, _r, _r);                                 \
334     sub_ddmmss (_h, _l, _u, 0, _h, _l);                         \
335     /* now h:l <= 30*_r */                                      \
336     MPFR_ASSERTD(_h < 30);                                      \
337     if (_h >= 16)                                               \
338       { /* subtract 16r+64 to h:l, add 8 to _r */               \
339         sub_ddmmss (_h, _l, _h, _l, _r >> 60, _r << 4);         \
340         sub_ddmmss (_h, _l, _h, _l, 0, 64);                     \
341         _r += 8;                                                \
342       }                                                         \
343     if (_h >= 8)                                                \
344       { /* subtract 8r+16 to h:l, add 4 to _r */                \
345         sub_ddmmss (_h, _l, _h, _l, _r >> 61, _r << 3);         \
346         sub_ddmmss (_h, _l, _h, _l, 0, 16);                     \
347         _r += 4;                                                \
348       }                                                         \
349     if (_h >= 4)                                                \
350       { /* subtract 4r+4 to h:l, add 2 to _r */                 \
351         sub_ddmmss (_h, _l, _h, _l, _r >> 62, _r << 2);         \
352         sub_ddmmss (_h, _l, _h, _l, 0, 4);                      \
353         _r += 2;                                                \
354       }                                                         \
355     while (_h > 1 || ((_h == 1) && (_l > 2 * _r)))              \
356       { /* subtract 2r+1 to h:l, add 1 to _r */                 \
357         sub_ddmmss (_h, _l, _h, _l, _r >> 63, (_r << 1) + 1);   \
358         _r ++;                                                  \
359       }                                                         \
360     (s) = _r;                                                   \
361     (rh) = _h;                                                   \
362     (rl) = _l;                                                   \
363     (invs) = _invs;                                             \
364   } while (0)
365 
366 #endif /* GMP_NUMB_BITS == 64 */
367