xref: /dpdk/lib/eal/include/rte_reciprocal.h (revision 99a2dd955fba6e4cc23b77d590a033650ced9c45)
1*99a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2*99a2dd95SBruce Richardson  * Copyright(c) 2017 Cavium, Inc
3*99a2dd95SBruce Richardson  */
4*99a2dd95SBruce Richardson /*
5*99a2dd95SBruce Richardson  * Reciprocal divide
6*99a2dd95SBruce Richardson  *
7*99a2dd95SBruce Richardson  * Used with permission from original authors
8*99a2dd95SBruce Richardson  *  Hannes Frederic Sowa and Daniel Borkmann
9*99a2dd95SBruce Richardson  *
10*99a2dd95SBruce Richardson  * This algorithm is based on the paper "Division by Invariant
11*99a2dd95SBruce Richardson  * Integers Using Multiplication" by Torbjörn Granlund and Peter
12*99a2dd95SBruce Richardson  * L. Montgomery.
13*99a2dd95SBruce Richardson  *
14*99a2dd95SBruce Richardson  * The assembler implementation from Agner Fog, which this code is
15*99a2dd95SBruce Richardson  * based on, can be found here:
16*99a2dd95SBruce Richardson  * http://www.agner.org/optimize/asmlib.zip
17*99a2dd95SBruce Richardson  *
18*99a2dd95SBruce Richardson  * This optimization for A/B is helpful if the divisor B is mostly
19*99a2dd95SBruce Richardson  * runtime invariant. The reciprocal of B is calculated in the
20*99a2dd95SBruce Richardson  * slow-path with reciprocal_value(). The fast-path can then just use
21*99a2dd95SBruce Richardson  * a much faster multiplication operation with a variable dividend A
22*99a2dd95SBruce Richardson  * to calculate the division A/B.
23*99a2dd95SBruce Richardson  */
24*99a2dd95SBruce Richardson 
25*99a2dd95SBruce Richardson #ifndef _RTE_RECIPROCAL_H_
26*99a2dd95SBruce Richardson #define _RTE_RECIPROCAL_H_
27*99a2dd95SBruce Richardson 
28*99a2dd95SBruce Richardson #include <stdint.h>
29*99a2dd95SBruce Richardson 
30*99a2dd95SBruce Richardson #include <rte_common.h>
31*99a2dd95SBruce Richardson 
32*99a2dd95SBruce Richardson #ifdef __cplusplus
33*99a2dd95SBruce Richardson extern "C" {
34*99a2dd95SBruce Richardson #endif
35*99a2dd95SBruce Richardson 
36*99a2dd95SBruce Richardson struct rte_reciprocal {
37*99a2dd95SBruce Richardson 	uint32_t m;
38*99a2dd95SBruce Richardson 	uint8_t sh1, sh2;
39*99a2dd95SBruce Richardson };
40*99a2dd95SBruce Richardson 
41*99a2dd95SBruce Richardson struct rte_reciprocal_u64 {
42*99a2dd95SBruce Richardson 	uint64_t m;
43*99a2dd95SBruce Richardson 	uint8_t sh1, sh2;
44*99a2dd95SBruce Richardson };
45*99a2dd95SBruce Richardson 
rte_reciprocal_divide(uint32_t a,struct rte_reciprocal R)46*99a2dd95SBruce Richardson static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
47*99a2dd95SBruce Richardson {
48*99a2dd95SBruce Richardson 	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
49*99a2dd95SBruce Richardson 
50*99a2dd95SBruce Richardson 	return (t + ((a - t) >> R.sh1)) >> R.sh2;
51*99a2dd95SBruce Richardson }
52*99a2dd95SBruce Richardson 
53*99a2dd95SBruce Richardson static __rte_always_inline uint64_t
mullhi_u64(uint64_t x,uint64_t y)54*99a2dd95SBruce Richardson mullhi_u64(uint64_t x, uint64_t y)
55*99a2dd95SBruce Richardson {
56*99a2dd95SBruce Richardson #ifdef __SIZEOF_INT128__
57*99a2dd95SBruce Richardson 	__uint128_t xl = x;
58*99a2dd95SBruce Richardson 	__uint128_t rl = xl * y;
59*99a2dd95SBruce Richardson 
60*99a2dd95SBruce Richardson 	return (rl >> 64);
61*99a2dd95SBruce Richardson #else
62*99a2dd95SBruce Richardson 	uint64_t u0, u1, v0, v1, k, t;
63*99a2dd95SBruce Richardson 	uint64_t w1, w2;
64*99a2dd95SBruce Richardson 	uint64_t whi;
65*99a2dd95SBruce Richardson 
66*99a2dd95SBruce Richardson 	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
67*99a2dd95SBruce Richardson 	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
68*99a2dd95SBruce Richardson 
69*99a2dd95SBruce Richardson 	t = u0*v0;
70*99a2dd95SBruce Richardson 	k = t >> 32;
71*99a2dd95SBruce Richardson 
72*99a2dd95SBruce Richardson 	t = u1*v0 + k;
73*99a2dd95SBruce Richardson 	w1 = t & 0xFFFFFFFF;
74*99a2dd95SBruce Richardson 	w2 = t >> 32;
75*99a2dd95SBruce Richardson 
76*99a2dd95SBruce Richardson 	t = u0*v1 + w1;
77*99a2dd95SBruce Richardson 	k = t >> 32;
78*99a2dd95SBruce Richardson 
79*99a2dd95SBruce Richardson 	whi = u1*v1 + w2 + k;
80*99a2dd95SBruce Richardson 
81*99a2dd95SBruce Richardson 	return whi;
82*99a2dd95SBruce Richardson #endif
83*99a2dd95SBruce Richardson }
84*99a2dd95SBruce Richardson 
85*99a2dd95SBruce Richardson static __rte_always_inline uint64_t
rte_reciprocal_divide_u64(uint64_t a,const struct rte_reciprocal_u64 * R)86*99a2dd95SBruce Richardson rte_reciprocal_divide_u64(uint64_t a, const struct rte_reciprocal_u64 *R)
87*99a2dd95SBruce Richardson {
88*99a2dd95SBruce Richardson 	uint64_t t = mullhi_u64(a, R->m);
89*99a2dd95SBruce Richardson 
90*99a2dd95SBruce Richardson 	return (t + ((a - t) >> R->sh1)) >> R->sh2;
91*99a2dd95SBruce Richardson }
92*99a2dd95SBruce Richardson 
93*99a2dd95SBruce Richardson struct rte_reciprocal rte_reciprocal_value(uint32_t d);
94*99a2dd95SBruce Richardson struct rte_reciprocal_u64 rte_reciprocal_value_u64(uint64_t d);
95*99a2dd95SBruce Richardson 
96*99a2dd95SBruce Richardson #ifdef __cplusplus
97*99a2dd95SBruce Richardson }
98*99a2dd95SBruce Richardson #endif
99*99a2dd95SBruce Richardson 
100*99a2dd95SBruce Richardson #endif /* _RTE_RECIPROCAL_H_ */
101