1 /* __gmpfr_invsqrt_limb_approx -- reciprocal approximate square root of a limb 2 3 Copyright 2017-2023 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* for now, we only provide __gmpfr_invert_limb for 64-bit limb */ 27 #if GMP_NUMB_BITS == 64 28 29 /* For 257 <= d10 <= 1024, T[d10-257] = floor(sqrt(2^30/d10)). 30 Sage code: 31 T = [floor(sqrt(2^30/d10)) for d10 in [257..1024]] */ 32 static const unsigned short T[768] = 33 { 2044, 2040, 2036, 2032, 2028, 2024, 2020, 2016, 2012, 2009, 2005, 34 2001, 1997, 1994, 1990, 1986, 1983, 1979, 1975, 1972, 1968, 1965, 1961, 35 1958, 1954, 1951, 1947, 1944, 1941, 1937, 1934, 1930, 1927, 1924, 1920, 36 1917, 1914, 1911, 1907, 1904, 1901, 1898, 1895, 1891, 1888, 1885, 1882, 37 1879, 1876, 1873, 1870, 1867, 1864, 1861, 1858, 1855, 1852, 1849, 1846, 38 1843, 1840, 1837, 1834, 1831, 1828, 1826, 1823, 1820, 1817, 1814, 1812, 39 1809, 1806, 1803, 1801, 1798, 1795, 1792, 1790, 1787, 1784, 1782, 1779, 40 1777, 1774, 1771, 1769, 1766, 1764, 1761, 1759, 1756, 1754, 1751, 1749, 41 1746, 1744, 1741, 1739, 1736, 1734, 1731, 1729, 1727, 1724, 1722, 1719, 42 1717, 1715, 1712, 1710, 1708, 1705, 1703, 1701, 1698, 1696, 1694, 1692, 43 1689, 1687, 1685, 1683, 1680, 1678, 1676, 1674, 1672, 1670, 1667, 1665, 44 1663, 1661, 1659, 1657, 1655, 1652, 1650, 1648, 1646, 1644, 1642, 1640, 45 1638, 1636, 1634, 1632, 1630, 1628, 1626, 1624, 1622, 1620, 1618, 1616, 46 1614, 1612, 1610, 1608, 1606, 1604, 1602, 1600, 1598, 1597, 1595, 1593, 47 1591, 1589, 1587, 1585, 1583, 1582, 1580, 1578, 1576, 1574, 1572, 1571, 48 1569, 1567, 1565, 1563, 1562, 1560, 1558, 1556, 1555, 1553, 1551, 1549, 49 1548, 1546, 1544, 1542, 1541, 1539, 1537, 1536, 1534, 1532, 1531, 1529, 50 1527, 1526, 1524, 1522, 1521, 1519, 1517, 1516, 1514, 1513, 1511, 1509, 51 1508, 1506, 1505, 1503, 1501, 1500, 1498, 1497, 1495, 1494, 1492, 1490, 52 1489, 1487, 1486, 1484, 1483, 1481, 1480, 1478, 1477, 1475, 1474, 1472, 53 1471, 1469, 1468, 1466, 1465, 1463, 1462, 1461, 1459, 1458, 1456, 1455, 54 1453, 1452, 1450, 1449, 1448, 1446, 1445, 1443, 1442, 1441, 1439, 1438, 55 1436, 1435, 1434, 1432, 1431, 1430, 1428, 1427, 1426, 1424, 1423, 1422, 56 1420, 1419, 1418, 1416, 1415, 1414, 1412, 1411, 1410, 1408, 1407, 1406, 57 1404, 1403, 1402, 1401, 1399, 1398, 1397, 1395, 1394, 1393, 1392, 1390, 58 1389, 1388, 1387, 1385, 1384, 1383, 1382, 1381, 1379, 1378, 1377, 1376, 59 1374, 1373, 1372, 1371, 1370, 1368, 1367, 1366, 1365, 1364, 1362, 1361, 60 1360, 1359, 1358, 1357, 1355, 1354, 1353, 1352, 1351, 1350, 1349, 1347, 61 1346, 1345, 1344, 1343, 1342, 1341, 1339, 1338, 1337, 1336, 1335, 1334, 62 1333, 1332, 1331, 1330, 1328, 1327, 1326, 1325, 1324, 1323, 1322, 1321, 63 1320, 1319, 1318, 1317, 1315, 1314, 1313, 1312, 1311, 1310, 1309, 1308, 64 1307, 1306, 1305, 1304, 1303, 1302, 1301, 1300, 1299, 1298, 1297, 1296, 65 1295, 1294, 1293, 1292, 1291, 1290, 1289, 1288, 1287, 1286, 1285, 1284, 66 1283, 1282, 1281, 1280, 1279, 1278, 1277, 1276, 1275, 1274, 1273, 1272, 67 1271, 1270, 1269, 1268, 1267, 1266, 1265, 1264, 1264, 1263, 1262, 1261, 68 1260, 1259, 1258, 1257, 1256, 1255, 1254, 1253, 1252, 1252, 1251, 1250, 69 1249, 1248, 1247, 1246, 1245, 1244, 1243, 1242, 1242, 1241, 1240, 1239, 70 1238, 1237, 1236, 1235, 1234, 1234, 1233, 1232, 1231, 1230, 1229, 1228, 71 1228, 1227, 1226, 1225, 1224, 1223, 1222, 1222, 1221, 1220, 1219, 1218, 72 1217, 1216, 1216, 1215, 1214, 1213, 1212, 1211, 1211, 1210, 1209, 1208, 73 1207, 1207, 1206, 1205, 1204, 1203, 1202, 1202, 1201, 1200, 1199, 1198, 74 1198, 1197, 1196, 1195, 1194, 1194, 1193, 1192, 1191, 1190, 1190, 1189, 75 1188, 1187, 1187, 1186, 1185, 1184, 1183, 1183, 1182, 1181, 1180, 1180, 76 1179, 1178, 1177, 1177, 1176, 1175, 1174, 1174, 1173, 1172, 1171, 1171, 77 1170, 1169, 1168, 1168, 1167, 1166, 1165, 1165, 1164, 1163, 1162, 1162, 78 1161, 1160, 1159, 1159, 1158, 1157, 1157, 1156, 1155, 1154, 1154, 1153, 79 1152, 1152, 1151, 1150, 1149, 1149, 1148, 1147, 1147, 1146, 1145, 1145, 80 1144, 1143, 1142, 1142, 1141, 1140, 1140, 1139, 1138, 1138, 1137, 1136, 81 1136, 1135, 1134, 1133, 1133, 1132, 1131, 1131, 1130, 1129, 1129, 1128, 82 1127, 1127, 1126, 1125, 1125, 1124, 1123, 1123, 1122, 1121, 1121, 1120, 83 1119, 1119, 1118, 1118, 1117, 1116, 1116, 1115, 1114, 1114, 1113, 1112, 84 1112, 1111, 1110, 1110, 1109, 1109, 1108, 1107, 1107, 1106, 1105, 1105, 85 1104, 1103, 1103, 1102, 1102, 1101, 1100, 1100, 1099, 1099, 1098, 1097, 86 1097, 1096, 1095, 1095, 1094, 1094, 1093, 1092, 1092, 1091, 1091, 1090, 87 1089, 1089, 1088, 1088, 1087, 1086, 1086, 1085, 1085, 1084, 1083, 1083, 88 1082, 1082, 1081, 1080, 1080, 1079, 1079, 1078, 1077, 1077, 1076, 1076, 89 1075, 1075, 1074, 1073, 1073, 1072, 1072, 1071, 1071, 1070, 1069, 1069, 90 1068, 1068, 1067, 1067, 1066, 1065, 1065, 1064, 1064, 1063, 1063, 1062, 91 1062, 1061, 1060, 1060, 1059, 1059, 1058, 1058, 1057, 1057, 1056, 1055, 92 1055, 1054, 1054, 1053, 1053, 1052, 1052, 1051, 1051, 1050, 1049, 1049, 93 1048, 1048, 1047, 1047, 1046, 1046, 1045, 1045, 1044, 1044, 1043, 1043, 94 1042, 1041, 1041, 1040, 1040, 1039, 1039, 1038, 1038, 1037, 1037, 1036, 95 1036, 1035, 1035, 1034, 1034, 1033, 1033, 1032, 1032, 1031, 1031, 1030, 96 1030, 1029, 1029, 1028, 1028, 1027, 1027, 1026, 1026, 1025, 1025, 1024, 97 1024 }; 98 99 /* table of v0^3 */ 100 static const mp_limb_t T3[768] = 101 { 8539701184, 8489664000, 8439822656, 8390176768, 8340725952, 102 8291469824, 8242408000, 8193540096, 8144865728, 8108486729, 8060150125, 103 8012006001, 7964053973, 7928215784, 7880599000, 7833173256, 7797729087, 104 7750636739, 7703734375, 7668682048, 7622111232, 7587307125, 7541066681, 105 7506509912, 7460598664, 7426288351, 7380705123, 7346640384, 7312680621, 106 7267563953, 7233848504, 7189057000, 7155584983, 7122217024, 7077888000, 107 7044762213, 7011739944, 6978821031, 6935089643, 6902411264, 6869835701, 108 6837362792, 6804992375, 6761990971, 6729859072, 6697829125, 6665900968, 109 6634074439, 6602349376, 6570725617, 6539203000, 6507781363, 6476460544, 110 6445240381, 6414120712, 6383101375, 6352182208, 6321363049, 6290643736, 111 6260024107, 6229504000, 6199083253, 6168761704, 6138539191, 6108415552, 112 6088387976, 6058428767, 6028568000, 5998805513, 5969141144, 5949419328, 113 5919918129, 5890514616, 5861208627, 5841725401, 5812581592, 5783534875, 114 5754585088, 5735339000, 5706550403, 5677858304, 5658783768, 5630252139, 115 5611284433, 5582912824, 5554637011, 5535839609, 5507723096, 5489031744, 116 5461074081, 5442488479, 5414689216, 5396209064, 5368567751, 5350192749, 117 5322708936, 5304438784, 5277112021, 5258946419, 5231776256, 5213714904, 118 5186700891, 5168743489, 5150827583, 5124031424, 5106219048, 5079577959, 119 5061868813, 5044200875, 5017776128, 5000211000, 4982686912, 4956477625, 120 4939055927, 4921675101, 4895680392, 4878401536, 4861163384, 4843965888, 121 4818245769, 4801149703, 4784094125, 4767078987, 4741632000, 4724717752, 122 4707843776, 4691010024, 4674216448, 4657463000, 4632407963, 4615754625, 123 4599141247, 4582567781, 4566034179, 4549540393, 4533086375, 4508479808, 124 4492125000, 4475809792, 4459534136, 4443297984, 4427101288, 4410944000, 125 4394826072, 4378747456, 4362708104, 4346707968, 4330747000, 4314825152, 126 4298942376, 4283098624, 4267293848, 4251528000, 4235801032, 4220112896, 127 4204463544, 4188852928, 4173281000, 4157747712, 4142253016, 4126796864, 128 4111379208, 4096000000, 4080659192, 4073003173, 4057719875, 4042474857, 129 4027268071, 4012099469, 3996969003, 3981876625, 3966822287, 3959309368, 130 3944312000, 3929352552, 3914430976, 3899547224, 3884701248, 3877292411, 131 3862503009, 3847751263, 3833037125, 3818360547, 3811036328, 3796416000, 132 3781833112, 3767287616, 3760028875, 3745539377, 3731087151, 3716672149, 133 3709478592, 3695119336, 3680797184, 3666512088, 3659383421, 3645153819, 134 3630961153, 3623878656, 3609741304, 3595640768, 3588604291, 3574558889, 135 3560550183, 3553559576, 3539605824, 3525688648, 3518743761, 3504881359, 136 3491055413, 3484156096, 3470384744, 3463512697, 3449795831, 3436115229, 137 3429288512, 3415662216, 3408862625, 3395290527, 3381754501, 3375000000, 138 3361517992, 3354790473, 3341362375, 3334661784, 3321287488, 3307949000, 139 3301293169, 3288008303, 3281379256, 3268147904, 3261545587, 3248367641, 140 3241792000, 3228667352, 3222118333, 3209046875, 3202524424, 3189506048, 141 3183010111, 3170044709, 3163575232, 3150662696, 3144219625, 3131359847, 142 3124943128, 3118535181, 3105745579, 3099363912, 3086626816, 3080271375, 143 3067586677, 3061257408, 3048625000, 3042321849, 3036027392, 3023464536, 144 3017196125, 3004685307, 2998442888, 2992209121, 2979767519, 2973559672, 145 2961169856, 2954987875, 2948814504, 2936493568, 2930345991, 2924207000, 146 2911954752, 2905841483, 2899736776, 2887553024, 2881473967, 2875403448, 147 2863288000, 2857243059, 2851206632, 2839159296, 2833148375, 2827145944, 148 2815166528, 2809189531, 2803221000, 2791309312, 2785366143, 2779431416, 149 2767587264, 2761677827, 2755776808, 2749884201, 2738124199, 2732256792, 150 2726397773, 2714704875, 2708870984, 2703045457, 2697228288, 2685619000, 151 2679826869, 2674043072, 2668267603, 2656741625, 2650991104, 2645248887, 152 2639514968, 2633789341, 2622362939, 2616662152, 2610969633, 2605285376, 153 2593941624, 2588282117, 2582630848, 2576987811, 2571353000, 2560108032, 154 2554497863, 2548895896, 2543302125, 2537716544, 2526569928, 2521008881, 155 2515456000, 2509911279, 2504374712, 2498846293, 2487813875, 2482309864, 156 2476813977, 2471326208, 2465846551, 2460375000, 2454911549, 2444008923, 157 2438569736, 2433138625, 2427715584, 2422300607, 2416893688, 2411494821, 158 2400721219, 2395346472, 2389979753, 2384621056, 2379270375, 2373927704, 159 2368593037, 2363266368, 2357947691, 2352637000, 2342039552, 2336752783, 160 2331473976, 2326203125, 2320940224, 2315685267, 2310438248, 2305199161, 161 2299968000, 2294744759, 2289529432, 2284322013, 2273930875, 2268747144, 162 2263571297, 2258403328, 2253243231, 2248091000, 2242946629, 2237810112, 163 2232681443, 2227560616, 2222447625, 2217342464, 2212245127, 2207155608, 164 2202073901, 2197000000, 2191933899, 2186875592, 2181825073, 2176782336, 165 2171747375, 2166720184, 2161700757, 2156689088, 2151685171, 2146689000, 166 2141700569, 2136719872, 2131746903, 2126781656, 2121824125, 2116874304, 167 2111932187, 2106997768, 2102071041, 2097152000, 2092240639, 2087336952, 168 2082440933, 2077552576, 2072671875, 2067798824, 2062933417, 2058075648, 169 2053225511, 2048383000, 2043548109, 2038720832, 2033901163, 2029089096, 170 2024284625, 2019487744, 2019487744, 2014698447, 2009916728, 2005142581, 171 2000376000, 1995616979, 1990865512, 1986121593, 1981385216, 1976656375, 172 1971935064, 1967221277, 1962515008, 1962515008, 1957816251, 1953125000, 173 1948441249, 1943764992, 1939096223, 1934434936, 1929781125, 1925134784, 174 1920495907, 1915864488, 1915864488, 1911240521, 1906624000, 1902014919, 175 1897413272, 1892819053, 1888232256, 1883652875, 1879080904, 1879080904, 176 1874516337, 1869959168, 1865409391, 1860867000, 1856331989, 1851804352, 177 1851804352, 1847284083, 1842771176, 1838265625, 1833767424, 1829276567, 178 1824793048, 1824793048, 1820316861, 1815848000, 1811386459, 1806932232, 179 1802485313, 1798045696, 1798045696, 1793613375, 1789188344, 1784770597, 180 1780360128, 1775956931, 1775956931, 1771561000, 1767172329, 1762790912, 181 1758416743, 1758416743, 1754049816, 1749690125, 1745337664, 1740992427, 182 1736654408, 1736654408, 1732323601, 1728000000, 1723683599, 1719374392, 183 1719374392, 1715072373, 1710777536, 1706489875, 1702209384, 1702209384, 184 1697936057, 1693669888, 1689410871, 1685159000, 1685159000, 1680914269, 185 1676676672, 1672446203, 1672446203, 1668222856, 1664006625, 1659797504, 186 1655595487, 1655595487, 1651400568, 1647212741, 1643032000, 1643032000, 187 1638858339, 1634691752, 1630532233, 1630532233, 1626379776, 1622234375, 188 1618096024, 1618096024, 1613964717, 1609840448, 1605723211, 1605723211, 189 1601613000, 1597509809, 1593413632, 1593413632, 1589324463, 1585242296, 190 1581167125, 1581167125, 1577098944, 1573037747, 1568983528, 1568983528, 191 1564936281, 1560896000, 1556862679, 1556862679, 1552836312, 1548816893, 192 1548816893, 1544804416, 1540798875, 1536800264, 1536800264, 1532808577, 193 1528823808, 1528823808, 1524845951, 1520875000, 1516910949, 1516910949, 194 1512953792, 1509003523, 1509003523, 1505060136, 1501123625, 1501123625, 195 1497193984, 1493271207, 1489355288, 1489355288, 1485446221, 1481544000, 196 1481544000, 1477648619, 1473760072, 1473760072, 1469878353, 1466003456, 197 1466003456, 1462135375, 1458274104, 1454419637, 1454419637, 1450571968, 198 1446731091, 1446731091, 1442897000, 1439069689, 1439069689, 1435249152, 199 1431435383, 1431435383, 1427628376, 1423828125, 1423828125, 1420034624, 200 1416247867, 1416247867, 1412467848, 1408694561, 1408694561, 1404928000, 201 1401168159, 1401168159, 1397415032, 1397415032, 1393668613, 1389928896, 202 1389928896, 1386195875, 1382469544, 1382469544, 1378749897, 1375036928, 203 1375036928, 1371330631, 1367631000, 1367631000, 1363938029, 1363938029, 204 1360251712, 1356572043, 1356572043, 1352899016, 1349232625, 1349232625, 205 1345572864, 1341919727, 1341919727, 1338273208, 1338273208, 1334633301, 206 1331000000, 1331000000, 1327373299, 1327373299, 1323753192, 1320139673, 207 1320139673, 1316532736, 1312932375, 1312932375, 1309338584, 1309338584, 208 1305751357, 1302170688, 1302170688, 1298596571, 1298596571, 1295029000, 209 1291467969, 1291467969, 1287913472, 1287913472, 1284365503, 1280824056, 210 1280824056, 1277289125, 1277289125, 1273760704, 1270238787, 1270238787, 211 1266723368, 1266723368, 1263214441, 1259712000, 1259712000, 1256216039, 212 1256216039, 1252726552, 1249243533, 1249243533, 1245766976, 1245766976, 213 1242296875, 1242296875, 1238833224, 1235376017, 1235376017, 1231925248, 214 1231925248, 1228480911, 1228480911, 1225043000, 1221611509, 1221611509, 215 1218186432, 1218186432, 1214767763, 1214767763, 1211355496, 1207949625, 216 1207949625, 1204550144, 1204550144, 1201157047, 1201157047, 1197770328, 217 1197770328, 1194389981, 1191016000, 1191016000, 1187648379, 1187648379, 218 1184287112, 1184287112, 1180932193, 1180932193, 1177583616, 1174241375, 219 1174241375, 1170905464, 1170905464, 1167575877, 1167575877, 1164252608, 220 1164252608, 1160935651, 1160935651, 1157625000, 1154320649, 1154320649, 221 1151022592, 1151022592, 1147730823, 1147730823, 1144445336, 1144445336, 222 1141166125, 1141166125, 1137893184, 1137893184, 1134626507, 1134626507, 223 1131366088, 1128111921, 1128111921, 1124864000, 1124864000, 1121622319, 224 1121622319, 1118386872, 1118386872, 1115157653, 1115157653, 1111934656, 225 1111934656, 1108717875, 1108717875, 1105507304, 1105507304, 1102302937, 226 1102302937, 1099104768, 1099104768, 1095912791, 1095912791, 1092727000, 227 1092727000, 1089547389, 1089547389, 1086373952, 1086373952, 1083206683, 228 1083206683, 1080045576, 1080045576, 1076890625, 1076890625, 1073741824, 229 1073741824 }; 230 231 /* given 2^62 <= d < 2^64, put in r an approximation of 232 s = floor(2^96/sqrt(d)) - 2^64, with r <= s <= r + 15 */ 233 #define __gmpfr_invsqrt_limb_approx(r, d) \ 234 do { \ 235 mp_limb_t _d, _i, _v0, _e0, _d37, _v1, _e1, _h, _v2, _e2; \ 236 _d = (d); \ 237 _i = (_d >> 54) - 256; \ 238 /* i = d10 - 256 */ \ 239 _v0 = T[_i]; \ 240 _d37 = 1 + (_d >> 27); \ 241 _e0 = T3[_i] * _d37; \ 242 /* the value (_v0 << 57) - _e0 is less than 2^61 */ \ 243 _v1 = (_v0 << 11) + (((_v0 << 57) - _e0) >> 47); \ 244 _e1 = - _v1 * _v1 * _d37; \ 245 umul_hi (_h, _v1, _e1); \ 246 /* _h = floor(e_1*v_1/2^64) */ \ 247 _v2 = (_v1 << 10) + (_h >> 6); \ 248 umul_hi (_h, _v2 * _v2, _d); \ 249 /* in _h + 2, one +1 accounts for the lower neglected part of */ \ 250 /* v2^2*d. the other +1 is to compute ceil((h+1)/2) */ \ 251 _e2 = (MPFR_LIMB_ONE << 61) - ((_h + 2) >> 1); \ 252 _h = _v2 * _e2; \ 253 (r) = (_v2 << 33) + (_h >> 29); \ 254 } while (0) 255 256 /* given 2^62 <= d < 2^64, return a 32-bit approximation r of 257 sqrt(2^126/d) */ 258 #define __gmpfr_invsqrt_halflimb_approx(r, d) \ 259 do { \ 260 mp_limb_t _d, _i, _v0, _e0, _d37, _v1, _e1, _h; \ 261 _d = (d); \ 262 _i = (_d >> 54) - 256; \ 263 /* i = d10 - 256 */ \ 264 _v0 = T[_i]; \ 265 _d37 = 1 + (_d >> 27); \ 266 _e0 = T3[_i] * _d37; \ 267 /* the value (_v0 << 57) - _e0 is less than 2^61 */ \ 268 _v1 = (_v0 << 11) + (((_v0 << 57) - _e0) >> 47); \ 269 _e1 = - _v1 * _v1 * _d37; \ 270 umul_hi (_h, _v1, _e1); \ 271 /* _h = floor(e_1*v_1/2^64) */ \ 272 (r) = (_v1 << 10) + (_h >> 6); \ 273 } while (0) 274 275 /* given 2^62 <= n < 2^64, put in s an approximation of sqrt(2^64*n), 276 with: s <= floor(sqrt(2^64*n)) <= s + 7 */ 277 #define __gmpfr_sqrt_limb_approx(s, n) \ 278 do { \ 279 mp_limb_t _n, _x, _y, _z, _t; \ 280 _n = (n); \ 281 __gmpfr_invsqrt_halflimb_approx (_x, _n); \ 282 MPFR_ASSERTD(_x < MPFR_LIMB_ONE << 32); \ 283 /* x has 32 bits, and is near (by below) sqrt(2^126/n) */ \ 284 _y = (_x * (_n >> 31)) >> 32; \ 285 MPFR_ASSERTD(_y < MPFR_LIMB_ONE << 32); \ 286 /* y is near (by below) sqrt(n) */ \ 287 _z = _n - _y * _y; \ 288 /* reduce _z so that _z <= 2*_y */ \ 289 /* the maximal value of _z is 2*(2^32-1) */ \ 290 while (_z > 2 * ((MPFR_LIMB_ONE << 32) - 1)) \ 291 { \ 292 _z -= (_y + _y) + 1; \ 293 _y ++; \ 294 } \ 295 /* now z <= 2*(2^32-1): one reduction is enough */ \ 296 if (_z > _y + _y) \ 297 { \ 298 _z -= (_y + _y) + 1; \ 299 _y ++; \ 300 } \ 301 /* _x * _z should be < 2^64 */ \ 302 _t = (_x * _z) >> 32; \ 303 (s) = (_y << 32) + _t; \ 304 } while (0) 305 306 /* given 2^62 <= u < 2^64, put in s the value floor(sqrt(2^64*u)), 307 and in rh in rl the remainder: 2^64*u - s^2 = 2^64*rh + rl, with 308 2^64*rh + rl <= 2*s, and in invs the approximation of 2^96/sqrt(u) */ 309 #define __gmpfr_sqrt_limb(s, rh, rl, invs, u) \ 310 do { \ 311 mp_limb_t _u, _invs, _r, _h, _l; \ 312 _u = (u); \ 313 __gmpfr_invsqrt_limb_approx (_invs, _u); \ 314 umul_hi (_h, _invs, _u); \ 315 _r = _h + _u; \ 316 /* make sure _r has its most significant bit set */ \ 317 if (MPFR_UNLIKELY(_r < MPFR_LIMB_HIGHBIT)) \ 318 _r = MPFR_LIMB_HIGHBIT; \ 319 /* we know _r <= sqrt(2^64*u) <= _r + 16 */ \ 320 umul_ppmm (_h, _l, _r, _r); \ 321 sub_ddmmss (_h, _l, _u, 0, _h, _l); \ 322 /* now h:l <= 30*_r */ \ 323 MPFR_ASSERTD(_h < 30); \ 324 if (_h >= 16) \ 325 { /* subtract 16r+64 to h:l, add 8 to _r */ \ 326 sub_ddmmss (_h, _l, _h, _l, _r >> 60, _r << 4); \ 327 sub_ddmmss (_h, _l, _h, _l, 0, 64); \ 328 _r += 8; \ 329 } \ 330 if (_h >= 8) \ 331 { /* subtract 8r+16 to h:l, add 4 to _r */ \ 332 sub_ddmmss (_h, _l, _h, _l, _r >> 61, _r << 3); \ 333 sub_ddmmss (_h, _l, _h, _l, 0, 16); \ 334 _r += 4; \ 335 } \ 336 if (_h >= 4) \ 337 { /* subtract 4r+4 to h:l, add 2 to _r */ \ 338 sub_ddmmss (_h, _l, _h, _l, _r >> 62, _r << 2); \ 339 sub_ddmmss (_h, _l, _h, _l, 0, 4); \ 340 _r += 2; \ 341 } \ 342 while (_h > 1 || ((_h == 1) && (_l > 2 * _r))) \ 343 { /* subtract 2r+1 to h:l, add 1 to _r */ \ 344 sub_ddmmss (_h, _l, _h, _l, _r >> 63, (_r << 1) + 1); \ 345 _r ++; \ 346 } \ 347 (s) = _r; \ 348 (rh) = _h; \ 349 (rl) = _l; \ 350 (invs) = _invs; \ 351 } while (0) 352 353 #endif /* GMP_NUMB_BITS == 64 */ 354