1*077596a4SPaul Szczepanek.. SPDX-License-Identifier: BSD-3-Clause 2*077596a4SPaul Szczepanek Copyright(c) 2024 Arm Limited. 3*077596a4SPaul Szczepanek 4*077596a4SPaul SzczepanekPointer Compression Library 5*077596a4SPaul Szczepanek=========================== 6*077596a4SPaul Szczepanek 7*077596a4SPaul SzczepanekUse ``rte_ptr_compress_16_shift()`` and ``rte_ptr_decompress_16_shift()`` 8*077596a4SPaul Szczepanekto compress and decompress pointers into 16-bit offsets. 9*077596a4SPaul SzczepanekUse ``rte_ptr_compress_32_shift()`` and ``rte_ptr_decompress_32_shift()`` 10*077596a4SPaul Szczepanekto compress and decompress pointers into 32-bit offsets. 11*077596a4SPaul Szczepanek 12*077596a4SPaul SzczepanekCompression takes advantage of the fact that pointers are usually located in a limited memory region (like a mempool). 13*077596a4SPaul SzczepanekBy converting them to offsets from a base memory address they can be stored in fewer bytes. 14*077596a4SPaul SzczepanekHow many bytes are needed to store the offset is dictated by the memory region size and alignment of objects the pointers point to. 15*077596a4SPaul Szczepanek 16*077596a4SPaul SzczepanekFor example, a pointer which is part of a 4GB memory pool can be stored as 32 bit offset. 17*077596a4SPaul SzczepanekIf the pointer points to memory that is 8 bytes aligned then 3 bits can be dropped from the offset and 18*077596a4SPaul Szczepaneka 32GB memory pool can now fit in 32 bits. 19*077596a4SPaul Szczepanek 20*077596a4SPaul SzczepanekFor performance reasons these requirements are not enforced programmatically. 21*077596a4SPaul SzczepanekThe programmer is responsible for ensuring that the combination of distance from the base pointer and 22*077596a4SPaul Szczepanekmemory alignment allow for storing of the offset in the number of bits indicated by the function name (16 or 32). 23*077596a4SPaul SzczepanekStart of mempool memory would be a good candidate for the base pointer. 24*077596a4SPaul SzczepanekOtherwise any pointer that precedes all pointers, is close enough and 25*077596a4SPaul Szczepanekhas the same alignment as the pointers being compressed will work. 26*077596a4SPaul Szczepanek 27*077596a4SPaul SzczepanekMacros present in the rte_ptr_compress.h header may be used to evaluate whether compression is possible: 28*077596a4SPaul Szczepanek 29*077596a4SPaul Szczepanek* RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE 30*077596a4SPaul Szczepanek 31*077596a4SPaul Szczepanek* RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT 32*077596a4SPaul Szczepanek 33*077596a4SPaul Szczepanek* RTE_PTR_COMPRESS_CAN_COMPRESS_16_SHIFT 34*077596a4SPaul Szczepanek 35*077596a4SPaul Szczepanek* RTE_PTR_COMPRESS_CAN_COMPRESS_32_SHIFT 36*077596a4SPaul Szczepanek 37*077596a4SPaul SzczepanekThese will help you calculate compression parameters and whether these are legal for particular compression function. 38*077596a4SPaul Szczepanek 39*077596a4SPaul Szczepanek.. note:: 40*077596a4SPaul Szczepanek 41*077596a4SPaul Szczepanek Performance gains depend on the batch size of pointers and CPU capabilities such as vector extensions. 42*077596a4SPaul Szczepanek It's important to measure the performance increase on target hardware. 43*077596a4SPaul Szczepanek 44*077596a4SPaul SzczepanekExample usage 45*077596a4SPaul Szczepanek------------- 46*077596a4SPaul Szczepanek 47*077596a4SPaul SzczepanekIn this example we send pointers between two cores through a ring. 48*077596a4SPaul SzczepanekWhile this is a realistic use case the code is simplified for demonstration purposes and does not have error handling. 49*077596a4SPaul Szczepanek 50*077596a4SPaul Szczepanek.. code-block:: c 51*077596a4SPaul Szczepanek 52*077596a4SPaul Szczepanek #include <rte_launch.h> 53*077596a4SPaul Szczepanek #include <rte_ptr_compress.h> 54*077596a4SPaul Szczepanek #include <rte_ring.h> 55*077596a4SPaul Szczepanek #include <rte_ring_elem.h> 56*077596a4SPaul Szczepanek 57*077596a4SPaul Szczepanek #define ITEMS_ARRAY_SIZE (1024) 58*077596a4SPaul Szczepanek #define BATCH_SIZE (128) 59*077596a4SPaul Szczepanek #define ALIGN_EXPONENT (3) 60*077596a4SPaul Szczepanek #define ITEM_ALIGN (1<<ALIGN_EXPONENT) 61*077596a4SPaul Szczepanek #define CORE_SEND (1) 62*077596a4SPaul Szczepanek #define CORE_RECV (2) 63*077596a4SPaul Szczepanek 64*077596a4SPaul Szczepanek struct item { 65*077596a4SPaul Szczepanek alignas(ITEM_ALIGN) int a; 66*077596a4SPaul Szczepanek }; 67*077596a4SPaul Szczepanek 68*077596a4SPaul Szczepanek static struct item items[ITEMS_ARRAY_SIZE] = {0}; 69*077596a4SPaul Szczepanek static struct rte_ring *ring = NULL; 70*077596a4SPaul Szczepanek 71*077596a4SPaul Szczepanek static int 72*077596a4SPaul Szczepanek send_compressed(void *args) 73*077596a4SPaul Szczepanek { 74*077596a4SPaul Szczepanek struct item *ptrs_send[BATCH_SIZE] = {0}; 75*077596a4SPaul Szczepanek unsigned int n_send = 0; 76*077596a4SPaul Szczepanek struct rte_ring_zc_data zcd = {0}; 77*077596a4SPaul Szczepanek 78*077596a4SPaul Szczepanek /* in this example we only fill the ptrs_send once and reuse */ 79*077596a4SPaul Szczepanek for (;n_send < BATCH_SIZE; n_send++) 80*077596a4SPaul Szczepanek ptrs_send[n_send] = &items[n_send]; 81*077596a4SPaul Szczepanek 82*077596a4SPaul Szczepanek for(;;) { 83*077596a4SPaul Szczepanek n_send = rte_ring_enqueue_zc_burst_elem_start( 84*077596a4SPaul Szczepanek ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL); 85*077596a4SPaul Szczepanek 86*077596a4SPaul Szczepanek /* compress ptrs_send into offsets */ 87*077596a4SPaul Szczepanek rte_ptr_compress_32_shift(items, /* base pointer */ 88*077596a4SPaul Szczepanek ptrs_send, /* source array to be compressed */ 89*077596a4SPaul Szczepanek zcd.ptr1, /* destination array to store offsets */ 90*077596a4SPaul Szczepanek zcd.n1, /* how many pointers to compress */ 91*077596a4SPaul Szczepanek ALIGN_EXPONENT /* how many bits can we drop from the offset */); 92*077596a4SPaul Szczepanek 93*077596a4SPaul Szczepanek if (zcd.ptr2 != NULL) 94*077596a4SPaul Szczepanek rte_ptr_compress_32_shift(items, ptrs_send + zcd.n1, 95*077596a4SPaul Szczepanek zcd.ptr2, n_send - zcd.n1, ALIGN_EXPONENT); 96*077596a4SPaul Szczepanek 97*077596a4SPaul Szczepanek rte_ring_enqueue_zc_finish(ring, n_send); 98*077596a4SPaul Szczepanek } 99*077596a4SPaul Szczepanek return 1; 100*077596a4SPaul Szczepanek } 101*077596a4SPaul Szczepanek 102*077596a4SPaul Szczepanek static int 103*077596a4SPaul Szczepanek recv_compressed(void *args) 104*077596a4SPaul Szczepanek { 105*077596a4SPaul Szczepanek struct item *ptrs_recv[BATCH_SIZE] = {0}; 106*077596a4SPaul Szczepanek unsigned int n_recv; 107*077596a4SPaul Szczepanek struct rte_ring_zc_data zcd = {0}; 108*077596a4SPaul Szczepanek 109*077596a4SPaul Szczepanek for(;;) { 110*077596a4SPaul Szczepanek /* receive compressed pointers from the ring */ 111*077596a4SPaul Szczepanek n_recv = rte_ring_dequeue_zc_burst_elem_start( 112*077596a4SPaul Szczepanek ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL); 113*077596a4SPaul Szczepanek 114*077596a4SPaul Szczepanek rte_ptr_decompress_32_shift(items, /* base pointer */ 115*077596a4SPaul Szczepanek zcd.ptr1, /* source array to decompress */ 116*077596a4SPaul Szczepanek ptrs_recv, /* destination array to store pointers */ 117*077596a4SPaul Szczepanek zcd.n1, /* how many pointers to decompress */ 118*077596a4SPaul Szczepanek ALIGN_EXPONENT /* how many bits were dropped from the offset */); 119*077596a4SPaul Szczepanek 120*077596a4SPaul Szczepanek /* handle the potential secondary buffer (caused by ring boundary) */ 121*077596a4SPaul Szczepanek if (zcd.ptr2 != NULL) 122*077596a4SPaul Szczepanek rte_ptr_decompress_32_shift(items, 123*077596a4SPaul Szczepanek zcd.ptr2, 124*077596a4SPaul Szczepanek ptrs_recv + zcd.n1, 125*077596a4SPaul Szczepanek n_recv - zcd.n1, 126*077596a4SPaul Szczepanek ALIGN_EXPONENT); 127*077596a4SPaul Szczepanek 128*077596a4SPaul Szczepanek rte_ring_dequeue_zc_finish(ring, n_recv); 129*077596a4SPaul Szczepanek 130*077596a4SPaul Szczepanek /* ptrs_recv contains what ptrs_send contained in the other thread */ 131*077596a4SPaul Szczepanek /* (...) */ 132*077596a4SPaul Szczepanek } 133*077596a4SPaul Szczepanek return 1; 134*077596a4SPaul Szczepanek } 135*077596a4SPaul Szczepanek 136*077596a4SPaul Szczepanek void 137*077596a4SPaul Szczepanek compression_example(void) 138*077596a4SPaul Szczepanek { 139*077596a4SPaul Szczepanek ring = rte_ring_create_elem( 140*077596a4SPaul Szczepanek "COMPR_PTRS", sizeof(uint32_t), 141*077596a4SPaul Szczepanek 1024, rte_socket_id(), 142*077596a4SPaul Szczepanek RING_F_SP_ENQ | RING_F_SC_DEQ); 143*077596a4SPaul Szczepanek 144*077596a4SPaul Szczepanek rte_eal_remote_launch(send_compressed, NULL, CORE_SEND); 145*077596a4SPaul Szczepanek rte_eal_remote_launch(recv_compressed, NULL, CORE_RECV); 146*077596a4SPaul Szczepanek 147*077596a4SPaul Szczepanek for(;;) {} 148*077596a4SPaul Szczepanek } 149