xref: /dpdk/lib/acl/acl_vect.h (revision d5d13ef979c83d33518c70727b5a4ef091bd8134)
1*99a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2*99a2dd95SBruce Richardson  * Copyright(c) 2010-2014 Intel Corporation
3*99a2dd95SBruce Richardson  */
4*99a2dd95SBruce Richardson 
5*99a2dd95SBruce Richardson #ifndef _RTE_ACL_VECT_H_
6*99a2dd95SBruce Richardson #define _RTE_ACL_VECT_H_
7*99a2dd95SBruce Richardson 
8*99a2dd95SBruce Richardson /**
9*99a2dd95SBruce Richardson  * @file
10*99a2dd95SBruce Richardson  *
11*99a2dd95SBruce Richardson  * RTE ACL SSE/AVX related header.
12*99a2dd95SBruce Richardson  */
13*99a2dd95SBruce Richardson 
14*99a2dd95SBruce Richardson 
15*99a2dd95SBruce Richardson /*
16*99a2dd95SBruce Richardson  * Takes 2 SIMD registers containing N transitions each (tr0, tr1).
17*99a2dd95SBruce Richardson  * Shuffles it into different representation:
18*99a2dd95SBruce Richardson  * lo - contains low 32 bits of given N transitions.
19*99a2dd95SBruce Richardson  * hi - contains high 32 bits of given N transitions.
20*99a2dd95SBruce Richardson  */
21*99a2dd95SBruce Richardson #define	ACL_TR_HILO(P, TC, tr0, tr1, lo, hi)                        do { \
22*99a2dd95SBruce Richardson 	lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88);  \
23*99a2dd95SBruce Richardson 	hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd);  \
24*99a2dd95SBruce Richardson } while (0)
25*99a2dd95SBruce Richardson 
26*99a2dd95SBruce Richardson 
27*99a2dd95SBruce Richardson /*
28*99a2dd95SBruce Richardson  * Calculate the address of the next transition for
29*99a2dd95SBruce Richardson  * all types of nodes. Note that only DFA nodes and range
30*99a2dd95SBruce Richardson  * nodes actually transition to another node. Match
31*99a2dd95SBruce Richardson  * nodes not supposed to be encountered here.
32*99a2dd95SBruce Richardson  * For quad range nodes:
33*99a2dd95SBruce Richardson  * Calculate number of range boundaries that are less than the
34*99a2dd95SBruce Richardson  * input value. Range boundaries for each node are in signed 8 bit,
35*99a2dd95SBruce Richardson  * ordered from -128 to 127.
36*99a2dd95SBruce Richardson  * This is effectively a popcnt of bytes that are greater than the
37*99a2dd95SBruce Richardson  * input byte.
38*99a2dd95SBruce Richardson  * Single nodes are processed in the same ways as quad range nodes.
39*99a2dd95SBruce Richardson  */
40*99a2dd95SBruce Richardson #define ACL_TR_CALC_ADDR(P, S,					\
41*99a2dd95SBruce Richardson 	addr, index_mask, next_input, shuffle_input,		\
42*99a2dd95SBruce Richardson 	ones_16, range_base, tr_lo, tr_hi)               do {	\
43*99a2dd95SBruce Richardson 								\
44*99a2dd95SBruce Richardson 	typeof(addr) in, node_type, r, t;			\
45*99a2dd95SBruce Richardson 	typeof(addr) dfa_msk, dfa_ofs, quad_ofs;		\
46*99a2dd95SBruce Richardson 								\
47*99a2dd95SBruce Richardson 	t = _##P##_xor_si##S(index_mask, index_mask);		\
48*99a2dd95SBruce Richardson 	in = _##P##_shuffle_epi8(next_input, shuffle_input);	\
49*99a2dd95SBruce Richardson 								\
50*99a2dd95SBruce Richardson 	/* Calc node type and node addr */			\
51*99a2dd95SBruce Richardson 	node_type = _##P##_andnot_si##S(index_mask, tr_lo);	\
52*99a2dd95SBruce Richardson 	addr = _##P##_and_si##S(index_mask, tr_lo);		\
53*99a2dd95SBruce Richardson 								\
54*99a2dd95SBruce Richardson 	/* mask for DFA type(0) nodes */			\
55*99a2dd95SBruce Richardson 	dfa_msk = _##P##_cmpeq_epi32(node_type, t);		\
56*99a2dd95SBruce Richardson 								\
57*99a2dd95SBruce Richardson 	/* DFA calculations. */					\
58*99a2dd95SBruce Richardson 	r = _##P##_srli_epi32(in, 30);				\
59*99a2dd95SBruce Richardson 	r = _##P##_add_epi8(r, range_base);			\
60*99a2dd95SBruce Richardson 	t = _##P##_srli_epi32(in, 24);				\
61*99a2dd95SBruce Richardson 	r = _##P##_shuffle_epi8(tr_hi, r);			\
62*99a2dd95SBruce Richardson 								\
63*99a2dd95SBruce Richardson 	dfa_ofs = _##P##_sub_epi32(t, r);			\
64*99a2dd95SBruce Richardson 								\
65*99a2dd95SBruce Richardson 	/* QUAD/SINGLE calculations. */				\
66*99a2dd95SBruce Richardson 	t = _##P##_cmpgt_epi8(in, tr_hi);			\
67*99a2dd95SBruce Richardson 	t = _##P##_sign_epi8(t, t);				\
68*99a2dd95SBruce Richardson 	t = _##P##_maddubs_epi16(t, t);				\
69*99a2dd95SBruce Richardson 	quad_ofs = _##P##_madd_epi16(t, ones_16);		\
70*99a2dd95SBruce Richardson 								\
71*99a2dd95SBruce Richardson 	/* blend DFA and QUAD/SINGLE. */			\
72*99a2dd95SBruce Richardson 	t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk);	\
73*99a2dd95SBruce Richardson 								\
74*99a2dd95SBruce Richardson 	/* calculate address for next transitions. */		\
75*99a2dd95SBruce Richardson 	addr = _##P##_add_epi32(addr, t);			\
76*99a2dd95SBruce Richardson } while (0)
77*99a2dd95SBruce Richardson 
78*99a2dd95SBruce Richardson 
79*99a2dd95SBruce Richardson #endif /* _RTE_ACL_VECT_H_ */
80