1*99a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 2*99a2dd95SBruce Richardson * Copyright(c) 2010-2014 Intel Corporation 3*99a2dd95SBruce Richardson */ 4*99a2dd95SBruce Richardson 5*99a2dd95SBruce Richardson #ifndef _RTE_ACL_VECT_H_ 6*99a2dd95SBruce Richardson #define _RTE_ACL_VECT_H_ 7*99a2dd95SBruce Richardson 8*99a2dd95SBruce Richardson /** 9*99a2dd95SBruce Richardson * @file 10*99a2dd95SBruce Richardson * 11*99a2dd95SBruce Richardson * RTE ACL SSE/AVX related header. 12*99a2dd95SBruce Richardson */ 13*99a2dd95SBruce Richardson 14*99a2dd95SBruce Richardson 15*99a2dd95SBruce Richardson /* 16*99a2dd95SBruce Richardson * Takes 2 SIMD registers containing N transitions each (tr0, tr1). 17*99a2dd95SBruce Richardson * Shuffles it into different representation: 18*99a2dd95SBruce Richardson * lo - contains low 32 bits of given N transitions. 19*99a2dd95SBruce Richardson * hi - contains high 32 bits of given N transitions. 20*99a2dd95SBruce Richardson */ 21*99a2dd95SBruce Richardson #define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi) do { \ 22*99a2dd95SBruce Richardson lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88); \ 23*99a2dd95SBruce Richardson hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd); \ 24*99a2dd95SBruce Richardson } while (0) 25*99a2dd95SBruce Richardson 26*99a2dd95SBruce Richardson 27*99a2dd95SBruce Richardson /* 28*99a2dd95SBruce Richardson * Calculate the address of the next transition for 29*99a2dd95SBruce Richardson * all types of nodes. Note that only DFA nodes and range 30*99a2dd95SBruce Richardson * nodes actually transition to another node. Match 31*99a2dd95SBruce Richardson * nodes not supposed to be encountered here. 32*99a2dd95SBruce Richardson * For quad range nodes: 33*99a2dd95SBruce Richardson * Calculate number of range boundaries that are less than the 34*99a2dd95SBruce Richardson * input value. Range boundaries for each node are in signed 8 bit, 35*99a2dd95SBruce Richardson * ordered from -128 to 127. 36*99a2dd95SBruce Richardson * This is effectively a popcnt of bytes that are greater than the 37*99a2dd95SBruce Richardson * input byte. 38*99a2dd95SBruce Richardson * Single nodes are processed in the same ways as quad range nodes. 39*99a2dd95SBruce Richardson */ 40*99a2dd95SBruce Richardson #define ACL_TR_CALC_ADDR(P, S, \ 41*99a2dd95SBruce Richardson addr, index_mask, next_input, shuffle_input, \ 42*99a2dd95SBruce Richardson ones_16, range_base, tr_lo, tr_hi) do { \ 43*99a2dd95SBruce Richardson \ 44*99a2dd95SBruce Richardson typeof(addr) in, node_type, r, t; \ 45*99a2dd95SBruce Richardson typeof(addr) dfa_msk, dfa_ofs, quad_ofs; \ 46*99a2dd95SBruce Richardson \ 47*99a2dd95SBruce Richardson t = _##P##_xor_si##S(index_mask, index_mask); \ 48*99a2dd95SBruce Richardson in = _##P##_shuffle_epi8(next_input, shuffle_input); \ 49*99a2dd95SBruce Richardson \ 50*99a2dd95SBruce Richardson /* Calc node type and node addr */ \ 51*99a2dd95SBruce Richardson node_type = _##P##_andnot_si##S(index_mask, tr_lo); \ 52*99a2dd95SBruce Richardson addr = _##P##_and_si##S(index_mask, tr_lo); \ 53*99a2dd95SBruce Richardson \ 54*99a2dd95SBruce Richardson /* mask for DFA type(0) nodes */ \ 55*99a2dd95SBruce Richardson dfa_msk = _##P##_cmpeq_epi32(node_type, t); \ 56*99a2dd95SBruce Richardson \ 57*99a2dd95SBruce Richardson /* DFA calculations. */ \ 58*99a2dd95SBruce Richardson r = _##P##_srli_epi32(in, 30); \ 59*99a2dd95SBruce Richardson r = _##P##_add_epi8(r, range_base); \ 60*99a2dd95SBruce Richardson t = _##P##_srli_epi32(in, 24); \ 61*99a2dd95SBruce Richardson r = _##P##_shuffle_epi8(tr_hi, r); \ 62*99a2dd95SBruce Richardson \ 63*99a2dd95SBruce Richardson dfa_ofs = _##P##_sub_epi32(t, r); \ 64*99a2dd95SBruce Richardson \ 65*99a2dd95SBruce Richardson /* QUAD/SINGLE calculations. */ \ 66*99a2dd95SBruce Richardson t = _##P##_cmpgt_epi8(in, tr_hi); \ 67*99a2dd95SBruce Richardson t = _##P##_sign_epi8(t, t); \ 68*99a2dd95SBruce Richardson t = _##P##_maddubs_epi16(t, t); \ 69*99a2dd95SBruce Richardson quad_ofs = _##P##_madd_epi16(t, ones_16); \ 70*99a2dd95SBruce Richardson \ 71*99a2dd95SBruce Richardson /* blend DFA and QUAD/SINGLE. */ \ 72*99a2dd95SBruce Richardson t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk); \ 73*99a2dd95SBruce Richardson \ 74*99a2dd95SBruce Richardson /* calculate address for next transitions. */ \ 75*99a2dd95SBruce Richardson addr = _##P##_add_epi32(addr, t); \ 76*99a2dd95SBruce Richardson } while (0) 77*99a2dd95SBruce Richardson 78*99a2dd95SBruce Richardson 79*99a2dd95SBruce Richardson #endif /* _RTE_ACL_VECT_H_ */ 80