1 /* __gmpfr_invsqrt_limb_approx -- reciprocal approximate square root of a limb 2 3 Copyright 2017-2020 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* for now, we only provide __gmpfr_invert_limb for 64-bit limb */ 27 #if GMP_NUMB_BITS == 64 28 29 /* For 257 <= d10 <= 1024, T[d10-257] = floor(sqrt(2^30/d10)). 30 Sage code: 31 T = [floor(sqrt(2^30/d10)) for d10 in [257..1024]] */ 32 static const mp_limb_t T[768] = 33 { 2044, 2040, 2036, 2032, 2028, 2024, 2020, 2016, 2012, 2009, 2005, 34 2001, 1997, 1994, 1990, 1986, 1983, 1979, 1975, 1972, 1968, 1965, 1961, 35 1958, 1954, 1951, 1947, 1944, 1941, 1937, 1934, 1930, 1927, 1924, 1920, 36 1917, 1914, 1911, 1907, 1904, 1901, 1898, 1895, 1891, 1888, 1885, 1882, 37 1879, 1876, 1873, 1870, 1867, 1864, 1861, 1858, 1855, 1852, 1849, 1846, 38 1843, 1840, 1837, 1834, 1831, 1828, 1826, 1823, 1820, 1817, 1814, 1812, 39 1809, 1806, 1803, 1801, 1798, 1795, 1792, 1790, 1787, 1784, 1782, 1779, 40 1777, 1774, 1771, 1769, 1766, 1764, 1761, 1759, 1756, 1754, 1751, 1749, 41 1746, 1744, 1741, 1739, 1736, 1734, 1731, 1729, 1727, 1724, 1722, 1719, 42 1717, 1715, 1712, 1710, 1708, 1705, 1703, 1701, 1698, 1696, 1694, 1692, 43 1689, 1687, 1685, 1683, 1680, 1678, 1676, 1674, 1672, 1670, 1667, 1665, 44 1663, 1661, 1659, 1657, 1655, 1652, 1650, 1648, 1646, 1644, 1642, 1640, 45 1638, 1636, 1634, 1632, 1630, 1628, 1626, 1624, 1622, 1620, 1618, 1616, 46 1614, 1612, 1610, 1608, 1606, 1604, 1602, 1600, 1598, 1597, 1595, 1593, 47 1591, 1589, 1587, 1585, 1583, 1582, 1580, 1578, 1576, 1574, 1572, 1571, 48 1569, 1567, 1565, 1563, 1562, 1560, 1558, 1556, 1555, 1553, 1551, 1549, 49 1548, 1546, 1544, 1542, 1541, 1539, 1537, 1536, 1534, 1532, 1531, 1529, 50 1527, 1526, 1524, 1522, 1521, 1519, 1517, 1516, 1514, 1513, 1511, 1509, 51 1508, 1506, 1505, 1503, 1501, 1500, 1498, 1497, 1495, 1494, 1492, 1490, 52 1489, 1487, 1486, 1484, 1483, 1481, 1480, 1478, 1477, 1475, 1474, 1472, 53 1471, 1469, 1468, 1466, 1465, 1463, 1462, 1461, 1459, 1458, 1456, 1455, 54 1453, 1452, 1450, 1449, 1448, 1446, 1445, 1443, 1442, 1441, 1439, 1438, 55 1436, 1435, 1434, 1432, 1431, 1430, 1428, 1427, 1426, 1424, 1423, 1422, 56 1420, 1419, 1418, 1416, 1415, 1414, 1412, 1411, 1410, 1408, 1407, 1406, 57 1404, 1403, 1402, 1401, 1399, 1398, 1397, 1395, 1394, 1393, 1392, 1390, 58 1389, 1388, 1387, 1385, 1384, 1383, 1382, 1381, 1379, 1378, 1377, 1376, 59 1374, 1373, 1372, 1371, 1370, 1368, 1367, 1366, 1365, 1364, 1362, 1361, 60 1360, 1359, 1358, 1357, 1355, 1354, 1353, 1352, 1351, 1350, 1349, 1347, 61 1346, 1345, 1344, 1343, 1342, 1341, 1339, 1338, 1337, 1336, 1335, 1334, 62 1333, 1332, 1331, 1330, 1328, 1327, 1326, 1325, 1324, 1323, 1322, 1321, 63 1320, 1319, 1318, 1317, 1315, 1314, 1313, 1312, 1311, 1310, 1309, 1308, 64 1307, 1306, 1305, 1304, 1303, 1302, 1301, 1300, 1299, 1298, 1297, 1296, 65 1295, 1294, 1293, 1292, 1291, 1290, 1289, 1288, 1287, 1286, 1285, 1284, 66 1283, 1282, 1281, 1280, 1279, 1278, 1277, 1276, 1275, 1274, 1273, 1272, 67 1271, 1270, 1269, 1268, 1267, 1266, 1265, 1264, 1264, 1263, 1262, 1261, 68 1260, 1259, 1258, 1257, 1256, 1255, 1254, 1253, 1252, 1252, 1251, 1250, 69 1249, 1248, 1247, 1246, 1245, 1244, 1243, 1242, 1242, 1241, 1240, 1239, 70 1238, 1237, 1236, 1235, 1234, 1234, 1233, 1232, 1231, 1230, 1229, 1228, 71 1228, 1227, 1226, 1225, 1224, 1223, 1222, 1222, 1221, 1220, 1219, 1218, 72 1217, 1216, 1216, 1215, 1214, 1213, 1212, 1211, 1211, 1210, 1209, 1208, 73 1207, 1207, 1206, 1205, 1204, 1203, 1202, 1202, 1201, 1200, 1199, 1198, 74 1198, 1197, 1196, 1195, 1194, 1194, 1193, 1192, 1191, 1190, 1190, 1189, 75 1188, 1187, 1187, 1186, 1185, 1184, 1183, 1183, 1182, 1181, 1180, 1180, 76 1179, 1178, 1177, 1177, 1176, 1175, 1174, 1174, 1173, 1172, 1171, 1171, 77 1170, 1169, 1168, 1168, 1167, 1166, 1165, 1165, 1164, 1163, 1162, 1162, 78 1161, 1160, 1159, 1159, 1158, 1157, 1157, 1156, 1155, 1154, 1154, 1153, 79 1152, 1152, 1151, 1150, 1149, 1149, 1148, 1147, 1147, 1146, 1145, 1145, 80 1144, 1143, 1142, 1142, 1141, 1140, 1140, 1139, 1138, 1138, 1137, 1136, 81 1136, 1135, 1134, 1133, 1133, 1132, 1131, 1131, 1130, 1129, 1129, 1128, 82 1127, 1127, 1126, 1125, 1125, 1124, 1123, 1123, 1122, 1121, 1121, 1120, 83 1119, 1119, 1118, 1118, 1117, 1116, 1116, 1115, 1114, 1114, 1113, 1112, 84 1112, 1111, 1110, 1110, 1109, 1109, 1108, 1107, 1107, 1106, 1105, 1105, 85 1104, 1103, 1103, 1102, 1102, 1101, 1100, 1100, 1099, 1099, 1098, 1097, 86 1097, 1096, 1095, 1095, 1094, 1094, 1093, 1092, 1092, 1091, 1091, 1090, 87 1089, 1089, 1088, 1088, 1087, 1086, 1086, 1085, 1085, 1084, 1083, 1083, 88 1082, 1082, 1081, 1080, 1080, 1079, 1079, 1078, 1077, 1077, 1076, 1076, 89 1075, 1075, 1074, 1073, 1073, 1072, 1072, 1071, 1071, 1070, 1069, 1069, 90 1068, 1068, 1067, 1067, 1066, 1065, 1065, 1064, 1064, 1063, 1063, 1062, 91 1062, 1061, 1060, 1060, 1059, 1059, 1058, 1058, 1057, 1057, 1056, 1055, 92 1055, 1054, 1054, 1053, 1053, 1052, 1052, 1051, 1051, 1050, 1049, 1049, 93 1048, 1048, 1047, 1047, 1046, 1046, 1045, 1045, 1044, 1044, 1043, 1043, 94 1042, 1041, 1041, 1040, 1040, 1039, 1039, 1038, 1038, 1037, 1037, 1036, 95 1036, 1035, 1035, 1034, 1034, 1033, 1033, 1032, 1032, 1031, 1031, 1030, 96 1030, 1029, 1029, 1028, 1028, 1027, 1027, 1026, 1026, 1025, 1025, 1024, 97 1024 }; 98 99 /* table of v0^3 */ 100 static const mp_limb_t T3[768] = 101 { 8539701184, 8489664000, 8439822656, 8390176768, 8340725952, 102 8291469824, 8242408000, 8193540096, 8144865728, 8108486729, 8060150125, 103 8012006001, 7964053973, 7928215784, 7880599000, 7833173256, 7797729087, 104 7750636739, 7703734375, 7668682048, 7622111232, 7587307125, 7541066681, 105 7506509912, 7460598664, 7426288351, 7380705123, 7346640384, 7312680621, 106 7267563953, 7233848504, 7189057000, 7155584983, 7122217024, 7077888000, 107 7044762213, 7011739944, 6978821031, 6935089643, 6902411264, 6869835701, 108 6837362792, 6804992375, 6761990971, 6729859072, 6697829125, 6665900968, 109 6634074439, 6602349376, 6570725617, 6539203000, 6507781363, 6476460544, 110 6445240381, 6414120712, 6383101375, 6352182208, 6321363049, 6290643736, 111 6260024107, 6229504000, 6199083253, 6168761704, 6138539191, 6108415552, 112 6088387976, 6058428767, 6028568000, 5998805513, 5969141144, 5949419328, 113 5919918129, 5890514616, 5861208627, 5841725401, 5812581592, 5783534875, 114 5754585088, 5735339000, 5706550403, 5677858304, 5658783768, 5630252139, 115 5611284433, 5582912824, 5554637011, 5535839609, 5507723096, 5489031744, 116 5461074081, 5442488479, 5414689216, 5396209064, 5368567751, 5350192749, 117 5322708936, 5304438784, 5277112021, 5258946419, 5231776256, 5213714904, 118 5186700891, 5168743489, 5150827583, 5124031424, 5106219048, 5079577959, 119 5061868813, 5044200875, 5017776128, 5000211000, 4982686912, 4956477625, 120 4939055927, 4921675101, 4895680392, 4878401536, 4861163384, 4843965888, 121 4818245769, 4801149703, 4784094125, 4767078987, 4741632000, 4724717752, 122 4707843776, 4691010024, 4674216448, 4657463000, 4632407963, 4615754625, 123 4599141247, 4582567781, 4566034179, 4549540393, 4533086375, 4508479808, 124 4492125000, 4475809792, 4459534136, 4443297984, 4427101288, 4410944000, 125 4394826072, 4378747456, 4362708104, 4346707968, 4330747000, 4314825152, 126 4298942376, 4283098624, 4267293848, 4251528000, 4235801032, 4220112896, 127 4204463544, 4188852928, 4173281000, 4157747712, 4142253016, 4126796864, 128 4111379208, 4096000000, 4080659192, 4073003173, 4057719875, 4042474857, 129 4027268071, 4012099469, 3996969003, 3981876625, 3966822287, 3959309368, 130 3944312000, 3929352552, 3914430976, 3899547224, 3884701248, 3877292411, 131 3862503009, 3847751263, 3833037125, 3818360547, 3811036328, 3796416000, 132 3781833112, 3767287616, 3760028875, 3745539377, 3731087151, 3716672149, 133 3709478592, 3695119336, 3680797184, 3666512088, 3659383421, 3645153819, 134 3630961153, 3623878656, 3609741304, 3595640768, 3588604291, 3574558889, 135 3560550183, 3553559576, 3539605824, 3525688648, 3518743761, 3504881359, 136 3491055413, 3484156096, 3470384744, 3463512697, 3449795831, 3436115229, 137 3429288512, 3415662216, 3408862625, 3395290527, 3381754501, 3375000000, 138 3361517992, 3354790473, 3341362375, 3334661784, 3321287488, 3307949000, 139 3301293169, 3288008303, 3281379256, 3268147904, 3261545587, 3248367641, 140 3241792000, 3228667352, 3222118333, 3209046875, 3202524424, 3189506048, 141 3183010111, 3170044709, 3163575232, 3150662696, 3144219625, 3131359847, 142 3124943128, 3118535181, 3105745579, 3099363912, 3086626816, 3080271375, 143 3067586677, 3061257408, 3048625000, 3042321849, 3036027392, 3023464536, 144 3017196125, 3004685307, 2998442888, 2992209121, 2979767519, 2973559672, 145 2961169856, 2954987875, 2948814504, 2936493568, 2930345991, 2924207000, 146 2911954752, 2905841483, 2899736776, 2887553024, 2881473967, 2875403448, 147 2863288000, 2857243059, 2851206632, 2839159296, 2833148375, 2827145944, 148 2815166528, 2809189531, 2803221000, 2791309312, 2785366143, 2779431416, 149 2767587264, 2761677827, 2755776808, 2749884201, 2738124199, 2732256792, 150 2726397773, 2714704875, 2708870984, 2703045457, 2697228288, 2685619000, 151 2679826869, 2674043072, 2668267603, 2656741625, 2650991104, 2645248887, 152 2639514968, 2633789341, 2622362939, 2616662152, 2610969633, 2605285376, 153 2593941624, 2588282117, 2582630848, 2576987811, 2571353000, 2560108032, 154 2554497863, 2548895896, 2543302125, 2537716544, 2526569928, 2521008881, 155 2515456000, 2509911279, 2504374712, 2498846293, 2487813875, 2482309864, 156 2476813977, 2471326208, 2465846551, 2460375000, 2454911549, 2444008923, 157 2438569736, 2433138625, 2427715584, 2422300607, 2416893688, 2411494821, 158 2400721219, 2395346472, 2389979753, 2384621056, 2379270375, 2373927704, 159 2368593037, 2363266368, 2357947691, 2352637000, 2342039552, 2336752783, 160 2331473976, 2326203125, 2320940224, 2315685267, 2310438248, 2305199161, 161 2299968000, 2294744759, 2289529432, 2284322013, 2273930875, 2268747144, 162 2263571297, 2258403328, 2253243231, 2248091000, 2242946629, 2237810112, 163 2232681443, 2227560616, 2222447625, 2217342464, 2212245127, 2207155608, 164 2202073901, 2197000000, 2191933899, 2186875592, 2181825073, 2176782336, 165 2171747375, 2166720184, 2161700757, 2156689088, 2151685171, 2146689000, 166 2141700569, 2136719872, 2131746903, 2126781656, 2121824125, 2116874304, 167 2111932187, 2106997768, 2102071041, 2097152000, 2092240639, 2087336952, 168 2082440933, 2077552576, 2072671875, 2067798824, 2062933417, 2058075648, 169 2053225511, 2048383000, 2043548109, 2038720832, 2033901163, 2029089096, 170 2024284625, 2019487744, 2019487744, 2014698447, 2009916728, 2005142581, 171 2000376000, 1995616979, 1990865512, 1986121593, 1981385216, 1976656375, 172 1971935064, 1967221277, 1962515008, 1962515008, 1957816251, 1953125000, 173 1948441249, 1943764992, 1939096223, 1934434936, 1929781125, 1925134784, 174 1920495907, 1915864488, 1915864488, 1911240521, 1906624000, 1902014919, 175 1897413272, 1892819053, 1888232256, 1883652875, 1879080904, 1879080904, 176 1874516337, 1869959168, 1865409391, 1860867000, 1856331989, 1851804352, 177 1851804352, 1847284083, 1842771176, 1838265625, 1833767424, 1829276567, 178 1824793048, 1824793048, 1820316861, 1815848000, 1811386459, 1806932232, 179 1802485313, 1798045696, 1798045696, 1793613375, 1789188344, 1784770597, 180 1780360128, 1775956931, 1775956931, 1771561000, 1767172329, 1762790912, 181 1758416743, 1758416743, 1754049816, 1749690125, 1745337664, 1740992427, 182 1736654408, 1736654408, 1732323601, 1728000000, 1723683599, 1719374392, 183 1719374392, 1715072373, 1710777536, 1706489875, 1702209384, 1702209384, 184 1697936057, 1693669888, 1689410871, 1685159000, 1685159000, 1680914269, 185 1676676672, 1672446203, 1672446203, 1668222856, 1664006625, 1659797504, 186 1655595487, 1655595487, 1651400568, 1647212741, 1643032000, 1643032000, 187 1638858339, 1634691752, 1630532233, 1630532233, 1626379776, 1622234375, 188 1618096024, 1618096024, 1613964717, 1609840448, 1605723211, 1605723211, 189 1601613000, 1597509809, 1593413632, 1593413632, 1589324463, 1585242296, 190 1581167125, 1581167125, 1577098944, 1573037747, 1568983528, 1568983528, 191 1564936281, 1560896000, 1556862679, 1556862679, 1552836312, 1548816893, 192 1548816893, 1544804416, 1540798875, 1536800264, 1536800264, 1532808577, 193 1528823808, 1528823808, 1524845951, 1520875000, 1516910949, 1516910949, 194 1512953792, 1509003523, 1509003523, 1505060136, 1501123625, 1501123625, 195 1497193984, 1493271207, 1489355288, 1489355288, 1485446221, 1481544000, 196 1481544000, 1477648619, 1473760072, 1473760072, 1469878353, 1466003456, 197 1466003456, 1462135375, 1458274104, 1454419637, 1454419637, 1450571968, 198 1446731091, 1446731091, 1442897000, 1439069689, 1439069689, 1435249152, 199 1431435383, 1431435383, 1427628376, 1423828125, 1423828125, 1420034624, 200 1416247867, 1416247867, 1412467848, 1408694561, 1408694561, 1404928000, 201 1401168159, 1401168159, 1397415032, 1397415032, 1393668613, 1389928896, 202 1389928896, 1386195875, 1382469544, 1382469544, 1378749897, 1375036928, 203 1375036928, 1371330631, 1367631000, 1367631000, 1363938029, 1363938029, 204 1360251712, 1356572043, 1356572043, 1352899016, 1349232625, 1349232625, 205 1345572864, 1341919727, 1341919727, 1338273208, 1338273208, 1334633301, 206 1331000000, 1331000000, 1327373299, 1327373299, 1323753192, 1320139673, 207 1320139673, 1316532736, 1312932375, 1312932375, 1309338584, 1309338584, 208 1305751357, 1302170688, 1302170688, 1298596571, 1298596571, 1295029000, 209 1291467969, 1291467969, 1287913472, 1287913472, 1284365503, 1280824056, 210 1280824056, 1277289125, 1277289125, 1273760704, 1270238787, 1270238787, 211 1266723368, 1266723368, 1263214441, 1259712000, 1259712000, 1256216039, 212 1256216039, 1252726552, 1249243533, 1249243533, 1245766976, 1245766976, 213 1242296875, 1242296875, 1238833224, 1235376017, 1235376017, 1231925248, 214 1231925248, 1228480911, 1228480911, 1225043000, 1221611509, 1221611509, 215 1218186432, 1218186432, 1214767763, 1214767763, 1211355496, 1207949625, 216 1207949625, 1204550144, 1204550144, 1201157047, 1201157047, 1197770328, 217 1197770328, 1194389981, 1191016000, 1191016000, 1187648379, 1187648379, 218 1184287112, 1184287112, 1180932193, 1180932193, 1177583616, 1174241375, 219 1174241375, 1170905464, 1170905464, 1167575877, 1167575877, 1164252608, 220 1164252608, 1160935651, 1160935651, 1157625000, 1154320649, 1154320649, 221 1151022592, 1151022592, 1147730823, 1147730823, 1144445336, 1144445336, 222 1141166125, 1141166125, 1137893184, 1137893184, 1134626507, 1134626507, 223 1131366088, 1128111921, 1128111921, 1124864000, 1124864000, 1121622319, 224 1121622319, 1118386872, 1118386872, 1115157653, 1115157653, 1111934656, 225 1111934656, 1108717875, 1108717875, 1105507304, 1105507304, 1102302937, 226 1102302937, 1099104768, 1099104768, 1095912791, 1095912791, 1092727000, 227 1092727000, 1089547389, 1089547389, 1086373952, 1086373952, 1083206683, 228 1083206683, 1080045576, 1080045576, 1076890625, 1076890625, 1073741824, 229 1073741824 }; 230 231 /* umul_hi(h, x, y) puts in h the high part of x*y */ 232 #ifdef HAVE_MULX_U64 233 #include <immintrin.h> 234 #define umul_hi(h, x, y) _mulx_u64 (x, y, (unsigned long long *) &(h)) 235 #else 236 #define umul_hi(h, x, y) \ 237 do { \ 238 mp_limb_t _l; \ 239 umul_ppmm (h, _l, x, y); \ 240 (void) _l; /* unused variable */ \ 241 } while (0) 242 #endif 243 244 /* given 2^62 <= d < 2^64, put in r an approximation of 245 s = floor(2^96/sqrt(d)) - 2^64, with r <= s <= r + 15 */ 246 #define __gmpfr_invsqrt_limb_approx(r, d) \ 247 do { \ 248 mp_limb_t _d, _i, _v0, _e0, _d37, _v1, _e1, _h, _v2, _e2; \ 249 _d = (d); \ 250 _i = (_d >> 54) - 256; \ 251 /* i = d10 - 256 */ \ 252 _v0 = T[_i]; \ 253 _d37 = 1 + (_d >> 27); \ 254 _e0 = T3[_i] * _d37; \ 255 /* the value (_v0 << 57) - _e0 is less than 2^61 */ \ 256 _v1 = (_v0 << 11) + (((_v0 << 57) - _e0) >> 47); \ 257 _e1 = - _v1 * _v1 * _d37; \ 258 umul_hi (_h, _v1, _e1); \ 259 /* _h = floor(e_1*v_1/2^64) */ \ 260 _v2 = (_v1 << 10) + (_h >> 6); \ 261 umul_hi (_h, _v2 * _v2, _d); \ 262 /* in _h + 2, one +1 accounts for the lower neglected part of */ \ 263 /* v2^2*d. the other +1 is to compute ceil((h+1)/2) */ \ 264 _e2 = (MPFR_LIMB_ONE << 61) - ((_h + 2) >> 1); \ 265 _h = _v2 * _e2; \ 266 (r) = (_v2 << 33) + (_h >> 29); \ 267 } while (0) 268 269 /* given 2^62 <= d < 2^64, return a 32-bit approximation r of 270 sqrt(2^126/d) */ 271 #define __gmpfr_invsqrt_halflimb_approx(r, d) \ 272 do { \ 273 mp_limb_t _d, _i, _v0, _e0, _d37, _v1, _e1, _h; \ 274 _d = (d); \ 275 _i = (_d >> 54) - 256; \ 276 /* i = d10 - 256 */ \ 277 _v0 = T[_i]; \ 278 _d37 = 1 + (_d >> 27); \ 279 _e0 = T3[_i] * _d37; \ 280 /* the value (_v0 << 57) - _e0 is less than 2^61 */ \ 281 _v1 = (_v0 << 11) + (((_v0 << 57) - _e0) >> 47); \ 282 _e1 = - _v1 * _v1 * _d37; \ 283 umul_hi (_h, _v1, _e1); \ 284 /* _h = floor(e_1*v_1/2^64) */ \ 285 (r) = (_v1 << 10) + (_h >> 6); \ 286 } while (0) 287 288 /* given 2^62 <= n < 2^64, put in s an approximation of sqrt(2^64*n), 289 with: s <= floor(sqrt(2^64*n)) <= s + 7 */ 290 #define __gmpfr_sqrt_limb_approx(s, n) \ 291 do { \ 292 mp_limb_t _n, _x, _y, _z, _t; \ 293 _n = (n); \ 294 __gmpfr_invsqrt_halflimb_approx (_x, _n); \ 295 MPFR_ASSERTD(_x < MPFR_LIMB_ONE << 32); \ 296 /* x has 32 bits, and is near (by below) sqrt(2^126/n) */ \ 297 _y = (_x * (_n >> 31)) >> 32; \ 298 MPFR_ASSERTD(_y < MPFR_LIMB_ONE << 32); \ 299 /* y is near (by below) sqrt(n) */ \ 300 _z = _n - _y * _y; \ 301 /* reduce _z so that _z <= 2*_y */ \ 302 /* the maximal value of _z is 2*(2^32-1) */ \ 303 while (_z > 2 * ((MPFR_LIMB_ONE << 32) - 1)) \ 304 { \ 305 _z -= (_y + _y) + 1; \ 306 _y ++; \ 307 } \ 308 /* now z <= 2*(2^32-1): one reduction is enough */ \ 309 if (_z > _y + _y) \ 310 { \ 311 _z -= (_y + _y) + 1; \ 312 _y ++; \ 313 } \ 314 /* _x * _z should be < 2^64 */ \ 315 _t = (_x * _z) >> 32; \ 316 (s) = (_y << 32) + _t; \ 317 } while (0) 318 319 /* given 2^62 <= u < 2^64, put in s the value floor(sqrt(2^64*u)), 320 and in rh in rl the remainder: 2^64*u - s^2 = 2^64*rh + rl, with 321 2^64*rh + rl <= 2*s, and in invs the approximation of 2^96/sqrt(u) */ 322 #define __gmpfr_sqrt_limb(s, rh, rl, invs, u) \ 323 do { \ 324 mp_limb_t _u, _invs, _r, _h, _l; \ 325 _u = (u); \ 326 __gmpfr_invsqrt_limb_approx (_invs, _u); \ 327 umul_hi (_h, _invs, _u); \ 328 _r = _h + _u; \ 329 /* make sure _r has its most significant bit set */ \ 330 if (MPFR_UNLIKELY(_r < MPFR_LIMB_HIGHBIT)) \ 331 _r = MPFR_LIMB_HIGHBIT; \ 332 /* we know _r <= sqrt(2^64*u) <= _r + 16 */ \ 333 umul_ppmm (_h, _l, _r, _r); \ 334 sub_ddmmss (_h, _l, _u, 0, _h, _l); \ 335 /* now h:l <= 30*_r */ \ 336 MPFR_ASSERTD(_h < 30); \ 337 if (_h >= 16) \ 338 { /* subtract 16r+64 to h:l, add 8 to _r */ \ 339 sub_ddmmss (_h, _l, _h, _l, _r >> 60, _r << 4); \ 340 sub_ddmmss (_h, _l, _h, _l, 0, 64); \ 341 _r += 8; \ 342 } \ 343 if (_h >= 8) \ 344 { /* subtract 8r+16 to h:l, add 4 to _r */ \ 345 sub_ddmmss (_h, _l, _h, _l, _r >> 61, _r << 3); \ 346 sub_ddmmss (_h, _l, _h, _l, 0, 16); \ 347 _r += 4; \ 348 } \ 349 if (_h >= 4) \ 350 { /* subtract 4r+4 to h:l, add 2 to _r */ \ 351 sub_ddmmss (_h, _l, _h, _l, _r >> 62, _r << 2); \ 352 sub_ddmmss (_h, _l, _h, _l, 0, 4); \ 353 _r += 2; \ 354 } \ 355 while (_h > 1 || ((_h == 1) && (_l > 2 * _r))) \ 356 { /* subtract 2r+1 to h:l, add 1 to _r */ \ 357 sub_ddmmss (_h, _l, _h, _l, _r >> 63, (_r << 1) + 1); \ 358 _r ++; \ 359 } \ 360 (s) = _r; \ 361 (rh) = _h; \ 362 (rl) = _l; \ 363 (invs) = _invs; \ 364 } while (0) 365 366 #endif /* GMP_NUMB_BITS == 64 */ 367