xref: /dpdk/doc/guides/prog_guide/ptr_compress_lib.rst (revision 2f1015d8d56d32465cc260faf469950ebb9cf73b)
1..  SPDX-License-Identifier: BSD-3-Clause
2    Copyright(c) 2024 Arm Limited.
3
4Pointer Compression Library
5===========================
6
7Use ``rte_ptr_compress_16_shift()`` and ``rte_ptr_decompress_16_shift()``
8to compress and decompress pointers into 16-bit offsets.
9Use ``rte_ptr_compress_32_shift()`` and ``rte_ptr_decompress_32_shift()``
10to compress and decompress pointers into 32-bit offsets.
11
12Compression takes advantage of the fact that pointers are usually located in a limited memory region (like a mempool).
13By converting them to offsets from a base memory address they can be stored in fewer bytes.
14How many bytes are needed to store the offset is dictated by the memory region size and alignment of objects the pointers point to.
15
16For example, a pointer which is part of a 4GB memory pool can be stored as 32 bit offset.
17If the pointer points to memory that is 8 bytes aligned then 3 bits can be dropped from the offset and
18a 32GB memory pool can now fit in 32 bits.
19
20For performance reasons these requirements are not enforced programmatically.
21The programmer is responsible for ensuring that the combination of distance from the base pointer and
22memory alignment allow for storing of the offset in the number of bits indicated by the function name (16 or 32).
23Start of mempool memory would be a good candidate for the base pointer.
24Otherwise any pointer that precedes all pointers, is close enough and
25has the same alignment as the pointers being compressed will work.
26
27Macros present in the rte_ptr_compress.h header may be used to evaluate whether compression is possible:
28
29*   RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE
30
31*   RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT
32
33*   RTE_PTR_COMPRESS_CAN_COMPRESS_16_SHIFT
34
35*   RTE_PTR_COMPRESS_CAN_COMPRESS_32_SHIFT
36
37These will help you calculate compression parameters and whether these are legal for particular compression function.
38
39If using a mempool you can get the parameters you need to use in the compression macros and functions
40by using ``rte_mempool_get_mem_range()`` and ``rte_mempool_get_obj_alignment()``.
41
42.. note::
43
44    Performance gains depend on the batch size of pointers and CPU capabilities such as vector extensions.
45    It's important to measure the performance increase on target hardware.
46    A test called ``ring_perf_autotest`` in ``dpdk-test`` can provide the measurements.
47
48Example usage
49-------------
50
51In this example we send pointers between two cores through a ring.
52While this is a realistic use case the code is simplified for demonstration purposes and does not have error handling.
53
54.. code-block:: c
55
56    #include <rte_launch.h>
57    #include <rte_ptr_compress.h>
58    #include <rte_ring.h>
59    #include <rte_ring_elem.h>
60
61    #define ITEMS_ARRAY_SIZE (1024)
62    #define BATCH_SIZE (128)
63    #define ALIGN_EXPONENT (3)
64    #define ITEM_ALIGN (1<<ALIGN_EXPONENT)
65    #define CORE_SEND (1)
66    #define CORE_RECV (2)
67
68    struct item {
69      alignas(ITEM_ALIGN) int a;
70    };
71
72    static struct item items[ITEMS_ARRAY_SIZE] = {0};
73    static struct rte_ring *ring = NULL;
74
75    static int
76    send_compressed(void *args)
77    {
78      struct item *ptrs_send[BATCH_SIZE] = {0};
79      unsigned int n_send = 0;
80      struct rte_ring_zc_data zcd = {0};
81
82      /* in this example we only fill the ptrs_send once and reuse */
83      for (;n_send < BATCH_SIZE; n_send++)
84        ptrs_send[n_send] = &items[n_send];
85
86      for(;;) {
87        n_send = rte_ring_enqueue_zc_burst_elem_start(
88          ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL);
89
90        /* compress ptrs_send into offsets */
91        rte_ptr_compress_32_shift(items, /* base pointer */
92          ptrs_send, /* source array to be compressed */
93          zcd.ptr1, /* destination array to store offsets */
94          zcd.n1, /* how many pointers to compress */
95          ALIGN_EXPONENT /* how many bits can we drop from the offset */);
96
97        if (zcd.ptr2 != NULL)
98          rte_ptr_compress_32_shift(items, ptrs_send + zcd.n1,
99            zcd.ptr2, n_send - zcd.n1, ALIGN_EXPONENT);
100
101        rte_ring_enqueue_zc_finish(ring, n_send);
102      }
103      return 1;
104    }
105
106    static int
107    recv_compressed(void *args)
108    {
109      struct item *ptrs_recv[BATCH_SIZE] = {0};
110      unsigned int n_recv;
111      struct rte_ring_zc_data zcd = {0};
112
113      for(;;) {
114        /* receive compressed pointers from the ring */
115        n_recv = rte_ring_dequeue_zc_burst_elem_start(
116          ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL);
117
118        rte_ptr_decompress_32_shift(items, /* base pointer */
119          zcd.ptr1, /* source array to decompress */
120          ptrs_recv, /* destination array to store pointers */
121          zcd.n1, /* how many pointers to decompress */
122          ALIGN_EXPONENT /* how many bits were dropped from the offset */);
123
124        /* handle the potential secondary buffer (caused by ring boundary) */
125        if (zcd.ptr2 != NULL)
126          rte_ptr_decompress_32_shift(items,
127            zcd.ptr2,
128            ptrs_recv + zcd.n1,
129            n_recv - zcd.n1,
130            ALIGN_EXPONENT);
131
132        rte_ring_dequeue_zc_finish(ring, n_recv);
133
134        /* ptrs_recv contains what ptrs_send contained in the other thread */
135        /* (...) */
136      }
137      return 1;
138    }
139
140    void
141    compression_example(void)
142    {
143      ring = rte_ring_create_elem(
144        "COMPR_PTRS", sizeof(uint32_t),
145        1024, rte_socket_id(),
146        RING_F_SP_ENQ | RING_F_SC_DEQ);
147
148      rte_eal_remote_launch(send_compressed, NULL, CORE_SEND);
149      rte_eal_remote_launch(recv_compressed, NULL, CORE_RECV);
150
151      for(;;) {}
152    }
153