xref: /dpdk/lib/eal/include/generic/rte_vect.h (revision 719834a6849e1daf4a70ff7742bbcc3ae7e25607)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  */
4 
5 #ifndef _RTE_VECT_H_
6 #define _RTE_VECT_H_
7 
8 /**
9  * @file
10  * SIMD vector types and control
11  *
12  * This file defines types to use vector instructions with generic C code
13  * and APIs to enable the code using them.
14  */
15 
16 #include <stdint.h>
17 
18 #ifdef RTE_TOOLCHAIN_MSVC
19 
20 #include <intrin.h>
21 
22 #else
23 
24 /* Unsigned vector types */
25 
26 /**
27  * 64 bits vector size to use with unsigned 8 bits elements.
28  *
29  * a = (rte_v64u8_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
30  */
31 typedef uint8_t rte_v64u8_t __attribute__((vector_size(8), aligned(8)));
32 
33 /**
34  * 64 bits vector size to use with unsigned 16 bits elements.
35  *
36  * a = (rte_v64u16_t){ a0, a1, a2, a3 }
37  */
38 typedef uint16_t rte_v64u16_t __attribute__((vector_size(8), aligned(8)));
39 
40 /**
41  * 64 bits vector size to use with unsigned 32 bits elements.
42  *
43  * a = (rte_v64u32_t){ a0, a1 }
44  */
45 typedef uint32_t rte_v64u32_t __attribute__((vector_size(8), aligned(8)));
46 
47 /**
48  * 128 bits vector size to use with unsigned 8 bits elements.
49  *
50  * a = (rte_v128u8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
51  *                     a08, a09, a10, a11, a12, a13, a14, a15 }
52  */
53 typedef uint8_t rte_v128u8_t __attribute__((vector_size(16), aligned(16)));
54 
55 /**
56  * 128 bits vector size to use with unsigned 16 bits elements.
57  *
58  * a = (rte_v128u16_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
59  */
60 typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16)));
61 
62 /**
63  * 128 bits vector size to use with unsigned 32 bits elements.
64  *
65  * a = (rte_v128u32_t){ a0, a1, a2, a3 }
66  */
67 typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16)));
68 
69 /**
70  * 128 bits vector size to use with unsigned 64 bits elements.
71  *
72  * a = (rte_v128u64_t){ a0, a1 }
73  */
74 typedef uint64_t rte_v128u64_t __attribute__((vector_size(16), aligned(16)));
75 
76 /**
77  * 256 bits vector size to use with unsigned 8 bits elements.
78  *
79  * a = (rte_v256u8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
80  *                     a08, a09, a10, a11, a12, a13, a14, a15,
81  *                     a16, a17, a18, a19, a20, a21, a22, a23,
82  *                     a24, a25, a26, a27, a28, a29, a30, a31 }
83  */
84 typedef uint8_t rte_v256u8_t __attribute__((vector_size(32), aligned(32)));
85 
86 /**
87  * 256 bits vector size to use with unsigned 16 bits elements.
88  *
89  * a = (rte_v256u16_t){ a00, a01, a02, a03, a04, a05, a06, a07,
90  *                      a08, a09, a10, a11, a12, a13, a14, a15 }
91  */
92 typedef uint16_t rte_v256u16_t __attribute__((vector_size(32), aligned(32)));
93 
94 /**
95  * 256 bits vector size to use with unsigned 32 bits elements.
96  *
97  * a = (rte_v256u32_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
98  */
99 typedef uint32_t rte_v256u32_t __attribute__((vector_size(32), aligned(32)));
100 
101 /**
102  * 256 bits vector size to use with unsigned 64 bits elements.
103  *
104  * a = (rte_v256u64_t){ a0, a1, a2, a3 }
105  */
106 typedef uint64_t rte_v256u64_t __attribute__((vector_size(32), aligned(32)));
107 
108 
109 /* Signed vector types */
110 
111 /**
112  * 64 bits vector size to use with 8 bits elements.
113  *
114  * a = (rte_v64s8_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
115  */
116 typedef int8_t rte_v64s8_t __attribute__((vector_size(8), aligned(8)));
117 
118 /**
119  * 64 bits vector size to use with 16 bits elements.
120  *
121  * a = (rte_v64s16_t){ a0, a1, a2, a3 }
122  */
123 typedef int16_t rte_v64s16_t __attribute__((vector_size(8), aligned(8)));
124 
125 /**
126  * 64 bits vector size to use with 32 bits elements.
127  *
128  * a = (rte_v64s32_t){ a0, a1 }
129  */
130 typedef int32_t rte_v64s32_t __attribute__((vector_size(8), aligned(8)));
131 
132 /**
133  * 128 bits vector size to use with 8 bits elements.
134  *
135  * a = (rte_v128s8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
136  *                     a08, a09, a10, a11, a12, a13, a14, a15 }
137  */
138 typedef int8_t rte_v128s8_t __attribute__((vector_size(16), aligned(16)));
139 
140 /**
141  * 128 bits vector size to use with 16 bits elements.
142  *
143  * a = (rte_v128s16_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
144  */
145 typedef int16_t rte_v128s16_t __attribute__((vector_size(16), aligned(16)));
146 
147 /**
148  * 128 bits vector size to use with 32 bits elements.
149  *
150  * a = (rte_v128s32_t){ a0, a1, a2, a3 }
151  */
152 typedef int32_t rte_v128s32_t __attribute__((vector_size(16), aligned(16)));
153 
154 /**
155  * 128 bits vector size to use with 64 bits elements.
156  *
157  * a = (rte_v128s64_t){ a1, a2 }
158  */
159 typedef int64_t rte_v128s64_t __attribute__((vector_size(16), aligned(16)));
160 
161 /**
162  * 256 bits vector size to use with 8 bits elements.
163  *
164  * a = (rte_v256s8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
165  *                     a08, a09, a10, a11, a12, a13, a14, a15,
166  *                     a16, a17, a18, a19, a20, a21, a22, a23,
167  *                     a24, a25, a26, a27, a28, a29, a30, a31 }
168  */
169 typedef int8_t rte_v256s8_t __attribute__((vector_size(32), aligned(32)));
170 
171 /**
172  * 256 bits vector size to use with 16 bits elements.
173  *
174  * a = (rte_v256s16_t){ a00, a01, a02, a03, a04, a05, a06, a07,
175  *                      a08, a09, a10, a11, a12, a13, a14, a15 }
176  */
177 typedef int16_t rte_v256s16_t __attribute__((vector_size(32), aligned(32)));
178 
179 /**
180  * 256 bits vector size to use with 32 bits elements.
181  *
182  * a = (rte_v256s32_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
183  */
184 typedef int32_t rte_v256s32_t __attribute__((vector_size(32), aligned(32)));
185 
186 /**
187  * 256 bits vector size to use with 64 bits elements.
188  *
189  * a = (rte_v256s64_t){ a0, a1, a2, a3 }
190  */
191 typedef int64_t rte_v256s64_t __attribute__((vector_size(32), aligned(32)));
192 
193 #endif
194 
195 /**
196  * The max SIMD bitwidth value to limit vector path selection.
197  */
198 enum rte_vect_max_simd {
199 	RTE_VECT_SIMD_DISABLED = 64,
200 	/**< Limits path selection to scalar, disables all vector paths. */
201 	RTE_VECT_SIMD_128 = 128,
202 	/**< Limits path selection to SSE/NEON/Altivec or below. */
203 	RTE_VECT_SIMD_256 = 256, /**< Limits path selection to AVX2 or below. */
204 	RTE_VECT_SIMD_512 = 512, /**< Limits path selection to AVX512 or below. */
205 	RTE_VECT_SIMD_MAX = INT16_MAX + 1,
206 	/**<
207 	 * Disables limiting by max SIMD bitwidth, allows all suitable paths.
208 	 * This value is used as it is a large number and a power of 2.
209 	 */
210 };
211 
212 #ifdef __cplusplus
213 extern "C" {
214 #endif
215 
216 /**
217  * Get the supported SIMD bitwidth.
218  *
219  * @return
220  *   uint16_t bitwidth.
221  */
222 uint16_t rte_vect_get_max_simd_bitwidth(void);
223 
224 /**
225  * Set the supported SIMD bitwidth.
226  * This API should only be called once at initialization, before EAL init.
227  *
228  * @param bitwidth
229  *   uint16_t bitwidth.
230  * @return
231  *   - 0 on success.
232  *   - -EINVAL on invalid bitwidth parameter.
233  *   - -EPERM if bitwidth is forced.
234  */
235 int rte_vect_set_max_simd_bitwidth(uint16_t bitwidth);
236 
237 #ifdef __cplusplus
238 }
239 #endif
240 
241 #endif /* _RTE_VECT_H_ */
242