1.. SPDX-License-Identifier: BSD-3-Clause 2 Copyright(c) 2024 Arm Limited. 3 4Pointer Compression Library 5=========================== 6 7Use ``rte_ptr_compress_16_shift()`` and ``rte_ptr_decompress_16_shift()`` 8to compress and decompress pointers into 16-bit offsets. 9Use ``rte_ptr_compress_32_shift()`` and ``rte_ptr_decompress_32_shift()`` 10to compress and decompress pointers into 32-bit offsets. 11 12Compression takes advantage of the fact that pointers are usually located in a limited memory region (like a mempool). 13By converting them to offsets from a base memory address they can be stored in fewer bytes. 14How many bytes are needed to store the offset is dictated by the memory region size and alignment of objects the pointers point to. 15 16For example, a pointer which is part of a 4GB memory pool can be stored as 32 bit offset. 17If the pointer points to memory that is 8 bytes aligned then 3 bits can be dropped from the offset and 18a 32GB memory pool can now fit in 32 bits. 19 20For performance reasons these requirements are not enforced programmatically. 21The programmer is responsible for ensuring that the combination of distance from the base pointer and 22memory alignment allow for storing of the offset in the number of bits indicated by the function name (16 or 32). 23Start of mempool memory would be a good candidate for the base pointer. 24Otherwise any pointer that precedes all pointers, is close enough and 25has the same alignment as the pointers being compressed will work. 26 27Macros present in the rte_ptr_compress.h header may be used to evaluate whether compression is possible: 28 29* RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE 30 31* RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT 32 33* RTE_PTR_COMPRESS_CAN_COMPRESS_16_SHIFT 34 35* RTE_PTR_COMPRESS_CAN_COMPRESS_32_SHIFT 36 37These will help you calculate compression parameters and whether these are legal for particular compression function. 38 39If using a mempool you can get the parameters you need to use in the compression macros and functions 40by using ``rte_mempool_get_mem_range()`` and ``rte_mempool_get_obj_alignment()``. 41 42.. note:: 43 44 Performance gains depend on the batch size of pointers and CPU capabilities such as vector extensions. 45 It's important to measure the performance increase on target hardware. 46 A test called ``ring_perf_autotest`` in ``dpdk-test`` can provide the measurements. 47 48Example usage 49------------- 50 51In this example we send pointers between two cores through a ring. 52While this is a realistic use case the code is simplified for demonstration purposes and does not have error handling. 53 54.. code-block:: c 55 56 #include <rte_launch.h> 57 #include <rte_ptr_compress.h> 58 #include <rte_ring.h> 59 #include <rte_ring_elem.h> 60 61 #define ITEMS_ARRAY_SIZE (1024) 62 #define BATCH_SIZE (128) 63 #define ALIGN_EXPONENT (3) 64 #define ITEM_ALIGN (1<<ALIGN_EXPONENT) 65 #define CORE_SEND (1) 66 #define CORE_RECV (2) 67 68 struct item { 69 alignas(ITEM_ALIGN) int a; 70 }; 71 72 static struct item items[ITEMS_ARRAY_SIZE] = {0}; 73 static struct rte_ring *ring = NULL; 74 75 static int 76 send_compressed(void *args) 77 { 78 struct item *ptrs_send[BATCH_SIZE] = {0}; 79 unsigned int n_send = 0; 80 struct rte_ring_zc_data zcd = {0}; 81 82 /* in this example we only fill the ptrs_send once and reuse */ 83 for (;n_send < BATCH_SIZE; n_send++) 84 ptrs_send[n_send] = &items[n_send]; 85 86 for(;;) { 87 n_send = rte_ring_enqueue_zc_burst_elem_start( 88 ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL); 89 90 /* compress ptrs_send into offsets */ 91 rte_ptr_compress_32_shift(items, /* base pointer */ 92 ptrs_send, /* source array to be compressed */ 93 zcd.ptr1, /* destination array to store offsets */ 94 zcd.n1, /* how many pointers to compress */ 95 ALIGN_EXPONENT /* how many bits can we drop from the offset */); 96 97 if (zcd.ptr2 != NULL) 98 rte_ptr_compress_32_shift(items, ptrs_send + zcd.n1, 99 zcd.ptr2, n_send - zcd.n1, ALIGN_EXPONENT); 100 101 rte_ring_enqueue_zc_finish(ring, n_send); 102 } 103 return 1; 104 } 105 106 static int 107 recv_compressed(void *args) 108 { 109 struct item *ptrs_recv[BATCH_SIZE] = {0}; 110 unsigned int n_recv; 111 struct rte_ring_zc_data zcd = {0}; 112 113 for(;;) { 114 /* receive compressed pointers from the ring */ 115 n_recv = rte_ring_dequeue_zc_burst_elem_start( 116 ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL); 117 118 rte_ptr_decompress_32_shift(items, /* base pointer */ 119 zcd.ptr1, /* source array to decompress */ 120 ptrs_recv, /* destination array to store pointers */ 121 zcd.n1, /* how many pointers to decompress */ 122 ALIGN_EXPONENT /* how many bits were dropped from the offset */); 123 124 /* handle the potential secondary buffer (caused by ring boundary) */ 125 if (zcd.ptr2 != NULL) 126 rte_ptr_decompress_32_shift(items, 127 zcd.ptr2, 128 ptrs_recv + zcd.n1, 129 n_recv - zcd.n1, 130 ALIGN_EXPONENT); 131 132 rte_ring_dequeue_zc_finish(ring, n_recv); 133 134 /* ptrs_recv contains what ptrs_send contained in the other thread */ 135 /* (...) */ 136 } 137 return 1; 138 } 139 140 void 141 compression_example(void) 142 { 143 ring = rte_ring_create_elem( 144 "COMPR_PTRS", sizeof(uint32_t), 145 1024, rte_socket_id(), 146 RING_F_SP_ENQ | RING_F_SC_DEQ); 147 148 rte_eal_remote_launch(send_compressed, NULL, CORE_SEND); 149 rte_eal_remote_launch(recv_compressed, NULL, CORE_RECV); 150 151 for(;;) {} 152 } 153