xref: /dpdk/lib/member/rte_member_sketch_avx512.c (revision 373b51ef0270fd2d76f3a94f716eed3cb3b8f2b5)
1db354bd2SLeyi Rong /* SPDX-License-Identifier: BSD-3-Clause
2db354bd2SLeyi Rong  * Copyright(c) 2020 Intel Corporation
3db354bd2SLeyi Rong  */
4db354bd2SLeyi Rong 
5db354bd2SLeyi Rong #include "rte_xxh64_avx512.h"
6db354bd2SLeyi Rong #include "rte_member_sketch_avx512.h"
7db354bd2SLeyi Rong 
8db354bd2SLeyi Rong __rte_always_inline void
sketch_update_avx512(const struct rte_member_setsum * ss,const void * key,uint32_t count)9db354bd2SLeyi Rong sketch_update_avx512(const struct rte_member_setsum *ss,
10db354bd2SLeyi Rong 		     const void *key,
11db354bd2SLeyi Rong 		     uint32_t count)
12db354bd2SLeyi Rong {
13db354bd2SLeyi Rong 	uint64_t *count_array = ss->table;
14db354bd2SLeyi Rong 	uint32_t num_col = ss->num_col;
15db354bd2SLeyi Rong 	uint32_t key_len = ss->key_len;
16db354bd2SLeyi Rong 	__m256i v_row_base;
17db354bd2SLeyi Rong 	__m256i v_hash_result;
18db354bd2SLeyi Rong 	__m512i current_sketch;
19db354bd2SLeyi Rong 	__m512i updated_sketch;
20db354bd2SLeyi Rong 	__m512i v_count;
21db354bd2SLeyi Rong 
22db354bd2SLeyi Rong 	const __m256i v_idx = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
23db354bd2SLeyi Rong 	const __m256i v_col = _mm256_set1_epi32(num_col);
24db354bd2SLeyi Rong 
25db354bd2SLeyi Rong 	/* compute the hash result parallelly */
26db354bd2SLeyi Rong 	v_hash_result = rte_xxh64_sketch_avx512
27db354bd2SLeyi Rong 		(key, key_len, *(__m512i *)ss->hash_seeds, num_col);
28db354bd2SLeyi Rong 	v_row_base = _mm256_mullo_epi32(v_idx, v_col);
29db354bd2SLeyi Rong 	v_hash_result = _mm256_add_epi32(v_row_base, v_hash_result);
30db354bd2SLeyi Rong 
31*373b51efSLeyi Rong 	current_sketch = _mm512_i32gather_epi64
32*373b51efSLeyi Rong 				(v_hash_result, (void *)count_array, 8);
33db354bd2SLeyi Rong 	v_count = _mm512_set1_epi64(count);
34db354bd2SLeyi Rong 	updated_sketch = _mm512_add_epi64(current_sketch, v_count);
35db354bd2SLeyi Rong 	_mm512_i32scatter_epi64
36db354bd2SLeyi Rong 		((void *)count_array, v_hash_result, updated_sketch, 8);
37db354bd2SLeyi Rong }
38db354bd2SLeyi Rong 
39db354bd2SLeyi Rong uint64_t
sketch_lookup_avx512(const struct rte_member_setsum * ss,const void * key)40db354bd2SLeyi Rong sketch_lookup_avx512(const struct rte_member_setsum *ss, const void *key)
41db354bd2SLeyi Rong {
42db354bd2SLeyi Rong 	uint32_t col[ss->num_row];
43db354bd2SLeyi Rong 
44db354bd2SLeyi Rong 	/* currently only for sketch byte count mode */
45db354bd2SLeyi Rong 	__m256i v_hash_result = rte_xxh64_sketch_avx512
46db354bd2SLeyi Rong 		(key, ss->key_len, *(__m512i *)ss->hash_seeds, ss->num_col);
47db354bd2SLeyi Rong 	_mm256_storeu_si256((__m256i *)col, v_hash_result);
48db354bd2SLeyi Rong 
49db354bd2SLeyi Rong 	return count_min(ss, col);
50db354bd2SLeyi Rong }
51db354bd2SLeyi Rong 
52db354bd2SLeyi Rong void
sketch_delete_avx512(const struct rte_member_setsum * ss,const void * key)53db354bd2SLeyi Rong sketch_delete_avx512(const struct rte_member_setsum *ss, const void *key)
54db354bd2SLeyi Rong {
55db354bd2SLeyi Rong 	uint32_t col[ss->num_row];
56db354bd2SLeyi Rong 	uint64_t *count_array = ss->table;
57db354bd2SLeyi Rong 	uint64_t min = UINT64_MAX;
58db354bd2SLeyi Rong 	uint32_t cur_row;
59db354bd2SLeyi Rong 
60db354bd2SLeyi Rong 	__m256i v_hash_result = rte_xxh64_sketch_avx512
61db354bd2SLeyi Rong 		(key, ss->key_len, *(__m512i *)ss->hash_seeds,
62db354bd2SLeyi Rong 		 RTE_ALIGN_FLOOR(ss->num_col, 32));
63db354bd2SLeyi Rong 	_mm256_storeu_si256((__m256i *)col, v_hash_result);
64db354bd2SLeyi Rong 
65db354bd2SLeyi Rong 	min = count_min(ss, col);
66db354bd2SLeyi Rong 
67db354bd2SLeyi Rong 	/* subtract the min value from all the counters */
68db354bd2SLeyi Rong 	for (cur_row = 0; cur_row < ss->num_row; cur_row++)
69db354bd2SLeyi Rong 		count_array[cur_row * ss->num_col + col[cur_row]] -= min;
70db354bd2SLeyi Rong }
71