xref: /dpdk/doc/guides/prog_guide/ptr_compress_lib.rst (revision 077596a4b0776d98c58787529a4858de69e605c7)
1*077596a4SPaul Szczepanek..  SPDX-License-Identifier: BSD-3-Clause
2*077596a4SPaul Szczepanek    Copyright(c) 2024 Arm Limited.
3*077596a4SPaul Szczepanek
4*077596a4SPaul SzczepanekPointer Compression Library
5*077596a4SPaul Szczepanek===========================
6*077596a4SPaul Szczepanek
7*077596a4SPaul SzczepanekUse ``rte_ptr_compress_16_shift()`` and ``rte_ptr_decompress_16_shift()``
8*077596a4SPaul Szczepanekto compress and decompress pointers into 16-bit offsets.
9*077596a4SPaul SzczepanekUse ``rte_ptr_compress_32_shift()`` and ``rte_ptr_decompress_32_shift()``
10*077596a4SPaul Szczepanekto compress and decompress pointers into 32-bit offsets.
11*077596a4SPaul Szczepanek
12*077596a4SPaul SzczepanekCompression takes advantage of the fact that pointers are usually located in a limited memory region (like a mempool).
13*077596a4SPaul SzczepanekBy converting them to offsets from a base memory address they can be stored in fewer bytes.
14*077596a4SPaul SzczepanekHow many bytes are needed to store the offset is dictated by the memory region size and alignment of objects the pointers point to.
15*077596a4SPaul Szczepanek
16*077596a4SPaul SzczepanekFor example, a pointer which is part of a 4GB memory pool can be stored as 32 bit offset.
17*077596a4SPaul SzczepanekIf the pointer points to memory that is 8 bytes aligned then 3 bits can be dropped from the offset and
18*077596a4SPaul Szczepaneka 32GB memory pool can now fit in 32 bits.
19*077596a4SPaul Szczepanek
20*077596a4SPaul SzczepanekFor performance reasons these requirements are not enforced programmatically.
21*077596a4SPaul SzczepanekThe programmer is responsible for ensuring that the combination of distance from the base pointer and
22*077596a4SPaul Szczepanekmemory alignment allow for storing of the offset in the number of bits indicated by the function name (16 or 32).
23*077596a4SPaul SzczepanekStart of mempool memory would be a good candidate for the base pointer.
24*077596a4SPaul SzczepanekOtherwise any pointer that precedes all pointers, is close enough and
25*077596a4SPaul Szczepanekhas the same alignment as the pointers being compressed will work.
26*077596a4SPaul Szczepanek
27*077596a4SPaul SzczepanekMacros present in the rte_ptr_compress.h header may be used to evaluate whether compression is possible:
28*077596a4SPaul Szczepanek
29*077596a4SPaul Szczepanek*   RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE
30*077596a4SPaul Szczepanek
31*077596a4SPaul Szczepanek*   RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT
32*077596a4SPaul Szczepanek
33*077596a4SPaul Szczepanek*   RTE_PTR_COMPRESS_CAN_COMPRESS_16_SHIFT
34*077596a4SPaul Szczepanek
35*077596a4SPaul Szczepanek*   RTE_PTR_COMPRESS_CAN_COMPRESS_32_SHIFT
36*077596a4SPaul Szczepanek
37*077596a4SPaul SzczepanekThese will help you calculate compression parameters and whether these are legal for particular compression function.
38*077596a4SPaul Szczepanek
39*077596a4SPaul Szczepanek.. note::
40*077596a4SPaul Szczepanek
41*077596a4SPaul Szczepanek    Performance gains depend on the batch size of pointers and CPU capabilities such as vector extensions.
42*077596a4SPaul Szczepanek    It's important to measure the performance increase on target hardware.
43*077596a4SPaul Szczepanek
44*077596a4SPaul SzczepanekExample usage
45*077596a4SPaul Szczepanek-------------
46*077596a4SPaul Szczepanek
47*077596a4SPaul SzczepanekIn this example we send pointers between two cores through a ring.
48*077596a4SPaul SzczepanekWhile this is a realistic use case the code is simplified for demonstration purposes and does not have error handling.
49*077596a4SPaul Szczepanek
50*077596a4SPaul Szczepanek.. code-block:: c
51*077596a4SPaul Szczepanek
52*077596a4SPaul Szczepanek    #include <rte_launch.h>
53*077596a4SPaul Szczepanek    #include <rte_ptr_compress.h>
54*077596a4SPaul Szczepanek    #include <rte_ring.h>
55*077596a4SPaul Szczepanek    #include <rte_ring_elem.h>
56*077596a4SPaul Szczepanek
57*077596a4SPaul Szczepanek    #define ITEMS_ARRAY_SIZE (1024)
58*077596a4SPaul Szczepanek    #define BATCH_SIZE (128)
59*077596a4SPaul Szczepanek    #define ALIGN_EXPONENT (3)
60*077596a4SPaul Szczepanek    #define ITEM_ALIGN (1<<ALIGN_EXPONENT)
61*077596a4SPaul Szczepanek    #define CORE_SEND (1)
62*077596a4SPaul Szczepanek    #define CORE_RECV (2)
63*077596a4SPaul Szczepanek
64*077596a4SPaul Szczepanek    struct item {
65*077596a4SPaul Szczepanek      alignas(ITEM_ALIGN) int a;
66*077596a4SPaul Szczepanek    };
67*077596a4SPaul Szczepanek
68*077596a4SPaul Szczepanek    static struct item items[ITEMS_ARRAY_SIZE] = {0};
69*077596a4SPaul Szczepanek    static struct rte_ring *ring = NULL;
70*077596a4SPaul Szczepanek
71*077596a4SPaul Szczepanek    static int
72*077596a4SPaul Szczepanek    send_compressed(void *args)
73*077596a4SPaul Szczepanek    {
74*077596a4SPaul Szczepanek      struct item *ptrs_send[BATCH_SIZE] = {0};
75*077596a4SPaul Szczepanek      unsigned int n_send = 0;
76*077596a4SPaul Szczepanek      struct rte_ring_zc_data zcd = {0};
77*077596a4SPaul Szczepanek
78*077596a4SPaul Szczepanek      /* in this example we only fill the ptrs_send once and reuse */
79*077596a4SPaul Szczepanek      for (;n_send < BATCH_SIZE; n_send++)
80*077596a4SPaul Szczepanek        ptrs_send[n_send] = &items[n_send];
81*077596a4SPaul Szczepanek
82*077596a4SPaul Szczepanek      for(;;) {
83*077596a4SPaul Szczepanek        n_send = rte_ring_enqueue_zc_burst_elem_start(
84*077596a4SPaul Szczepanek          ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL);
85*077596a4SPaul Szczepanek
86*077596a4SPaul Szczepanek        /* compress ptrs_send into offsets */
87*077596a4SPaul Szczepanek        rte_ptr_compress_32_shift(items, /* base pointer */
88*077596a4SPaul Szczepanek          ptrs_send, /* source array to be compressed */
89*077596a4SPaul Szczepanek          zcd.ptr1, /* destination array to store offsets */
90*077596a4SPaul Szczepanek          zcd.n1, /* how many pointers to compress */
91*077596a4SPaul Szczepanek          ALIGN_EXPONENT /* how many bits can we drop from the offset */);
92*077596a4SPaul Szczepanek
93*077596a4SPaul Szczepanek        if (zcd.ptr2 != NULL)
94*077596a4SPaul Szczepanek          rte_ptr_compress_32_shift(items, ptrs_send + zcd.n1,
95*077596a4SPaul Szczepanek            zcd.ptr2, n_send - zcd.n1, ALIGN_EXPONENT);
96*077596a4SPaul Szczepanek
97*077596a4SPaul Szczepanek        rte_ring_enqueue_zc_finish(ring, n_send);
98*077596a4SPaul Szczepanek      }
99*077596a4SPaul Szczepanek      return 1;
100*077596a4SPaul Szczepanek    }
101*077596a4SPaul Szczepanek
102*077596a4SPaul Szczepanek    static int
103*077596a4SPaul Szczepanek    recv_compressed(void *args)
104*077596a4SPaul Szczepanek    {
105*077596a4SPaul Szczepanek      struct item *ptrs_recv[BATCH_SIZE] = {0};
106*077596a4SPaul Szczepanek      unsigned int n_recv;
107*077596a4SPaul Szczepanek      struct rte_ring_zc_data zcd = {0};
108*077596a4SPaul Szczepanek
109*077596a4SPaul Szczepanek      for(;;) {
110*077596a4SPaul Szczepanek        /* receive compressed pointers from the ring */
111*077596a4SPaul Szczepanek        n_recv = rte_ring_dequeue_zc_burst_elem_start(
112*077596a4SPaul Szczepanek          ring, sizeof(uint32_t), BATCH_SIZE, &zcd, NULL);
113*077596a4SPaul Szczepanek
114*077596a4SPaul Szczepanek        rte_ptr_decompress_32_shift(items, /* base pointer */
115*077596a4SPaul Szczepanek          zcd.ptr1, /* source array to decompress */
116*077596a4SPaul Szczepanek          ptrs_recv, /* destination array to store pointers */
117*077596a4SPaul Szczepanek          zcd.n1, /* how many pointers to decompress */
118*077596a4SPaul Szczepanek          ALIGN_EXPONENT /* how many bits were dropped from the offset */);
119*077596a4SPaul Szczepanek
120*077596a4SPaul Szczepanek        /* handle the potential secondary buffer (caused by ring boundary) */
121*077596a4SPaul Szczepanek        if (zcd.ptr2 != NULL)
122*077596a4SPaul Szczepanek          rte_ptr_decompress_32_shift(items,
123*077596a4SPaul Szczepanek            zcd.ptr2,
124*077596a4SPaul Szczepanek            ptrs_recv + zcd.n1,
125*077596a4SPaul Szczepanek            n_recv - zcd.n1,
126*077596a4SPaul Szczepanek            ALIGN_EXPONENT);
127*077596a4SPaul Szczepanek
128*077596a4SPaul Szczepanek        rte_ring_dequeue_zc_finish(ring, n_recv);
129*077596a4SPaul Szczepanek
130*077596a4SPaul Szczepanek        /* ptrs_recv contains what ptrs_send contained in the other thread */
131*077596a4SPaul Szczepanek        /* (...) */
132*077596a4SPaul Szczepanek      }
133*077596a4SPaul Szczepanek      return 1;
134*077596a4SPaul Szczepanek    }
135*077596a4SPaul Szczepanek
136*077596a4SPaul Szczepanek    void
137*077596a4SPaul Szczepanek    compression_example(void)
138*077596a4SPaul Szczepanek    {
139*077596a4SPaul Szczepanek      ring = rte_ring_create_elem(
140*077596a4SPaul Szczepanek        "COMPR_PTRS", sizeof(uint32_t),
141*077596a4SPaul Szczepanek        1024, rte_socket_id(),
142*077596a4SPaul Szczepanek        RING_F_SP_ENQ | RING_F_SC_DEQ);
143*077596a4SPaul Szczepanek
144*077596a4SPaul Szczepanek      rte_eal_remote_launch(send_compressed, NULL, CORE_SEND);
145*077596a4SPaul Szczepanek      rte_eal_remote_launch(recv_compressed, NULL, CORE_RECV);
146*077596a4SPaul Szczepanek
147*077596a4SPaul Szczepanek      for(;;) {}
148*077596a4SPaul Szczepanek    }
149