xref: /dpdk/lib/eal/common/rte_reciprocal.c (revision 3d4e27fd7ff050d565c7450930c92fb945706518)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2017 Cavium, Inc
399a2dd95SBruce Richardson  * Copyright(c) Hannes Frederic Sowa
499a2dd95SBruce Richardson  * All rights reserved.
599a2dd95SBruce Richardson  */
699a2dd95SBruce Richardson 
799a2dd95SBruce Richardson #include <stdio.h>
899a2dd95SBruce Richardson #include <stdint.h>
999a2dd95SBruce Richardson 
1099a2dd95SBruce Richardson #include <rte_common.h>
117253e3d2STyler Retzlaff #include <rte_bitops.h>
1299a2dd95SBruce Richardson 
1399a2dd95SBruce Richardson #include "rte_reciprocal.h"
1499a2dd95SBruce Richardson 
rte_reciprocal_value(uint32_t d)1599a2dd95SBruce Richardson struct rte_reciprocal rte_reciprocal_value(uint32_t d)
1699a2dd95SBruce Richardson {
1799a2dd95SBruce Richardson 	struct rte_reciprocal R;
1899a2dd95SBruce Richardson 	uint64_t m;
1999a2dd95SBruce Richardson 	int l;
2099a2dd95SBruce Richardson 
2199a2dd95SBruce Richardson 	l = rte_fls_u32(d - 1);
2299a2dd95SBruce Richardson 	m = ((1ULL << 32) * ((1ULL << l) - d));
2399a2dd95SBruce Richardson 	m /= d;
2499a2dd95SBruce Richardson 
2599a2dd95SBruce Richardson 	++m;
2699a2dd95SBruce Richardson 	R.m = m;
2799a2dd95SBruce Richardson 	R.sh1 = RTE_MIN(l, 1);
2899a2dd95SBruce Richardson 	R.sh2 = RTE_MAX(l - 1, 0);
2999a2dd95SBruce Richardson 
3099a2dd95SBruce Richardson 	return R;
3199a2dd95SBruce Richardson }
3299a2dd95SBruce Richardson 
3399a2dd95SBruce Richardson /*
3499a2dd95SBruce Richardson  * Code taken from Hacker's Delight:
3599a2dd95SBruce Richardson  * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt
3699a2dd95SBruce Richardson  * License permits inclusion here per:
3799a2dd95SBruce Richardson  * http://www.hackersdelight.org/permissions.htm
3899a2dd95SBruce Richardson  */
3999a2dd95SBruce Richardson static uint64_t
divide_128_div_64_to_64(uint64_t u1,uint64_t u0,uint64_t v,uint64_t * r)4099a2dd95SBruce Richardson divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
4199a2dd95SBruce Richardson {
4299a2dd95SBruce Richardson 	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
4399a2dd95SBruce Richardson 	uint64_t un1, un0,           /* Norm. dividend LSD's. */
4499a2dd95SBruce Richardson 		 vn1, vn0,           /* Norm. divisor digits. */
4599a2dd95SBruce Richardson 		 q1, q0,             /* Quotient digits. */
4699a2dd95SBruce Richardson 		 un64, un21, un10,   /* Dividend digit pairs. */
4799a2dd95SBruce Richardson 		 rhat;               /* A remainder. */
4899a2dd95SBruce Richardson 	int s;                       /* Shift amount for norm. */
4999a2dd95SBruce Richardson 
5099a2dd95SBruce Richardson 	/* If overflow, set rem. to an impossible value. */
5199a2dd95SBruce Richardson 	if (u1 >= v) {
5299a2dd95SBruce Richardson 		if (r != NULL)
5399a2dd95SBruce Richardson 			*r = (uint64_t) -1;
5499a2dd95SBruce Richardson 		return (uint64_t) -1;
5599a2dd95SBruce Richardson 	}
5699a2dd95SBruce Richardson 
5799a2dd95SBruce Richardson 	/* Count leading zeros. */
58*3d4e27fdSDavid Marchand 	s = rte_clz64(v);
5999a2dd95SBruce Richardson 	if (s > 0) {
6099a2dd95SBruce Richardson 		v = v << s;
6199a2dd95SBruce Richardson 		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
6299a2dd95SBruce Richardson 		un10 = u0 << s;
6399a2dd95SBruce Richardson 	} else {
6499a2dd95SBruce Richardson 
6599a2dd95SBruce Richardson 		un64 = u1 | u0;
6699a2dd95SBruce Richardson 		un10 = u0;
6799a2dd95SBruce Richardson 	}
6899a2dd95SBruce Richardson 
6999a2dd95SBruce Richardson 	vn1 = v >> 32;
7099a2dd95SBruce Richardson 	vn0 = v & 0xFFFFFFFF;
7199a2dd95SBruce Richardson 
7299a2dd95SBruce Richardson 	un1 = un10 >> 32;
7399a2dd95SBruce Richardson 	un0 = un10 & 0xFFFFFFFF;
7499a2dd95SBruce Richardson 
7599a2dd95SBruce Richardson 	q1 = un64/vn1;
7699a2dd95SBruce Richardson 	rhat = un64 - q1*vn1;
7799a2dd95SBruce Richardson again1:
7899a2dd95SBruce Richardson 	if (q1 >= b || q1*vn0 > b*rhat + un1) {
7999a2dd95SBruce Richardson 		q1 = q1 - 1;
8099a2dd95SBruce Richardson 		rhat = rhat + vn1;
8199a2dd95SBruce Richardson 		if (rhat < b)
8299a2dd95SBruce Richardson 			goto again1;
8399a2dd95SBruce Richardson 	}
8499a2dd95SBruce Richardson 
8599a2dd95SBruce Richardson 	un21 = un64*b + un1 - q1*v;
8699a2dd95SBruce Richardson 
8799a2dd95SBruce Richardson 	q0 = un21/vn1;
8899a2dd95SBruce Richardson 	rhat = un21 - q0*vn1;
8999a2dd95SBruce Richardson again2:
9099a2dd95SBruce Richardson 	if (q0 >= b || q0*vn0 > b*rhat + un0) {
9199a2dd95SBruce Richardson 		q0 = q0 - 1;
9299a2dd95SBruce Richardson 		rhat = rhat + vn1;
9399a2dd95SBruce Richardson 		if (rhat < b)
9499a2dd95SBruce Richardson 			goto again2;
9599a2dd95SBruce Richardson 	}
9699a2dd95SBruce Richardson 
9799a2dd95SBruce Richardson 	if (r != NULL)
9899a2dd95SBruce Richardson 		*r = (un21*b + un0 - q0*v) >> s;
9999a2dd95SBruce Richardson 	return q1*b + q0;
10099a2dd95SBruce Richardson }
10199a2dd95SBruce Richardson 
10299a2dd95SBruce Richardson struct rte_reciprocal_u64
rte_reciprocal_value_u64(uint64_t d)10399a2dd95SBruce Richardson rte_reciprocal_value_u64(uint64_t d)
10499a2dd95SBruce Richardson {
10599a2dd95SBruce Richardson 	struct rte_reciprocal_u64 R;
10699a2dd95SBruce Richardson 	uint64_t m;
10799a2dd95SBruce Richardson 	uint64_t r;
10899a2dd95SBruce Richardson 	int l;
10999a2dd95SBruce Richardson 
110*3d4e27fdSDavid Marchand 	l = 63 - rte_clz64(d);
11199a2dd95SBruce Richardson 
11299a2dd95SBruce Richardson 	m = divide_128_div_64_to_64((1ULL << l), 0, d, &r) << 1;
11399a2dd95SBruce Richardson 	if (r << 1 < r || r << 1 >= d)
11499a2dd95SBruce Richardson 		m++;
11599a2dd95SBruce Richardson 	m = (1ULL << l) - d ? m + 1 : 1;
11699a2dd95SBruce Richardson 	R.m = m;
11799a2dd95SBruce Richardson 
11899a2dd95SBruce Richardson 	R.sh1 = l > 1 ? 1 : l;
11999a2dd95SBruce Richardson 	R.sh2 = (l > 0) ? l : 0;
12099a2dd95SBruce Richardson 	R.sh2 -= R.sh2 && (m == 1) ? 1 : 0;
12199a2dd95SBruce Richardson 
12299a2dd95SBruce Richardson 	return R;
12399a2dd95SBruce Richardson }
124