199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 299a2dd95SBruce Richardson * Copyright 2016 6WIND S.A. 399a2dd95SBruce Richardson */ 499a2dd95SBruce Richardson 599a2dd95SBruce Richardson #ifndef _RTE_VECT_H_ 699a2dd95SBruce Richardson #define _RTE_VECT_H_ 799a2dd95SBruce Richardson 899a2dd95SBruce Richardson /** 999a2dd95SBruce Richardson * @file 1099a2dd95SBruce Richardson * SIMD vector types and control 1199a2dd95SBruce Richardson * 1299a2dd95SBruce Richardson * This file defines types to use vector instructions with generic C code 1399a2dd95SBruce Richardson * and APIs to enable the code using them. 1499a2dd95SBruce Richardson */ 1599a2dd95SBruce Richardson 1699a2dd95SBruce Richardson #include <stdint.h> 1799a2dd95SBruce Richardson 18846f6650STyler Retzlaff #ifdef RTE_TOOLCHAIN_MSVC 19846f6650STyler Retzlaff 20846f6650STyler Retzlaff #include <intrin.h> 21846f6650STyler Retzlaff 22846f6650STyler Retzlaff #else 2386cefa79STyler Retzlaff 2499a2dd95SBruce Richardson /* Unsigned vector types */ 2599a2dd95SBruce Richardson 2699a2dd95SBruce Richardson /** 2799a2dd95SBruce Richardson * 64 bits vector size to use with unsigned 8 bits elements. 2899a2dd95SBruce Richardson * 2999a2dd95SBruce Richardson * a = (rte_v64u8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } 3099a2dd95SBruce Richardson */ 3199a2dd95SBruce Richardson typedef uint8_t rte_v64u8_t __attribute__((vector_size(8), aligned(8))); 3299a2dd95SBruce Richardson 3399a2dd95SBruce Richardson /** 3499a2dd95SBruce Richardson * 64 bits vector size to use with unsigned 16 bits elements. 3599a2dd95SBruce Richardson * 3699a2dd95SBruce Richardson * a = (rte_v64u16_t){ a0, a1, a2, a3 } 3799a2dd95SBruce Richardson */ 3899a2dd95SBruce Richardson typedef uint16_t rte_v64u16_t __attribute__((vector_size(8), aligned(8))); 3999a2dd95SBruce Richardson 4099a2dd95SBruce Richardson /** 4199a2dd95SBruce Richardson * 64 bits vector size to use with unsigned 32 bits elements. 4299a2dd95SBruce Richardson * 4399a2dd95SBruce Richardson * a = (rte_v64u32_t){ a0, a1 } 4499a2dd95SBruce Richardson */ 4599a2dd95SBruce Richardson typedef uint32_t rte_v64u32_t __attribute__((vector_size(8), aligned(8))); 4699a2dd95SBruce Richardson 4799a2dd95SBruce Richardson /** 4899a2dd95SBruce Richardson * 128 bits vector size to use with unsigned 8 bits elements. 4999a2dd95SBruce Richardson * 5099a2dd95SBruce Richardson * a = (rte_v128u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, 5199a2dd95SBruce Richardson * a08, a09, a10, a11, a12, a13, a14, a15 } 5299a2dd95SBruce Richardson */ 5399a2dd95SBruce Richardson typedef uint8_t rte_v128u8_t __attribute__((vector_size(16), aligned(16))); 5499a2dd95SBruce Richardson 5599a2dd95SBruce Richardson /** 5699a2dd95SBruce Richardson * 128 bits vector size to use with unsigned 16 bits elements. 5799a2dd95SBruce Richardson * 5899a2dd95SBruce Richardson * a = (rte_v128u16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } 5999a2dd95SBruce Richardson */ 6099a2dd95SBruce Richardson typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16))); 6199a2dd95SBruce Richardson 6299a2dd95SBruce Richardson /** 6399a2dd95SBruce Richardson * 128 bits vector size to use with unsigned 32 bits elements. 6499a2dd95SBruce Richardson * 6599a2dd95SBruce Richardson * a = (rte_v128u32_t){ a0, a1, a2, a3 } 6699a2dd95SBruce Richardson */ 6799a2dd95SBruce Richardson typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16))); 6899a2dd95SBruce Richardson 6999a2dd95SBruce Richardson /** 7099a2dd95SBruce Richardson * 128 bits vector size to use with unsigned 64 bits elements. 7199a2dd95SBruce Richardson * 7299a2dd95SBruce Richardson * a = (rte_v128u64_t){ a0, a1 } 7399a2dd95SBruce Richardson */ 7499a2dd95SBruce Richardson typedef uint64_t rte_v128u64_t __attribute__((vector_size(16), aligned(16))); 7599a2dd95SBruce Richardson 7699a2dd95SBruce Richardson /** 7799a2dd95SBruce Richardson * 256 bits vector size to use with unsigned 8 bits elements. 7899a2dd95SBruce Richardson * 7999a2dd95SBruce Richardson * a = (rte_v256u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, 8099a2dd95SBruce Richardson * a08, a09, a10, a11, a12, a13, a14, a15, 8199a2dd95SBruce Richardson * a16, a17, a18, a19, a20, a21, a22, a23, 8299a2dd95SBruce Richardson * a24, a25, a26, a27, a28, a29, a30, a31 } 8399a2dd95SBruce Richardson */ 8499a2dd95SBruce Richardson typedef uint8_t rte_v256u8_t __attribute__((vector_size(32), aligned(32))); 8599a2dd95SBruce Richardson 8699a2dd95SBruce Richardson /** 8799a2dd95SBruce Richardson * 256 bits vector size to use with unsigned 16 bits elements. 8899a2dd95SBruce Richardson * 8999a2dd95SBruce Richardson * a = (rte_v256u16_t){ a00, a01, a02, a03, a04, a05, a06, a07, 9099a2dd95SBruce Richardson * a08, a09, a10, a11, a12, a13, a14, a15 } 9199a2dd95SBruce Richardson */ 9299a2dd95SBruce Richardson typedef uint16_t rte_v256u16_t __attribute__((vector_size(32), aligned(32))); 9399a2dd95SBruce Richardson 9499a2dd95SBruce Richardson /** 9599a2dd95SBruce Richardson * 256 bits vector size to use with unsigned 32 bits elements. 9699a2dd95SBruce Richardson * 9799a2dd95SBruce Richardson * a = (rte_v256u32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } 9899a2dd95SBruce Richardson */ 9999a2dd95SBruce Richardson typedef uint32_t rte_v256u32_t __attribute__((vector_size(32), aligned(32))); 10099a2dd95SBruce Richardson 10199a2dd95SBruce Richardson /** 10299a2dd95SBruce Richardson * 256 bits vector size to use with unsigned 64 bits elements. 10399a2dd95SBruce Richardson * 10499a2dd95SBruce Richardson * a = (rte_v256u64_t){ a0, a1, a2, a3 } 10599a2dd95SBruce Richardson */ 10699a2dd95SBruce Richardson typedef uint64_t rte_v256u64_t __attribute__((vector_size(32), aligned(32))); 10799a2dd95SBruce Richardson 10899a2dd95SBruce Richardson 10999a2dd95SBruce Richardson /* Signed vector types */ 11099a2dd95SBruce Richardson 11199a2dd95SBruce Richardson /** 11299a2dd95SBruce Richardson * 64 bits vector size to use with 8 bits elements. 11399a2dd95SBruce Richardson * 11499a2dd95SBruce Richardson * a = (rte_v64s8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } 11599a2dd95SBruce Richardson */ 11699a2dd95SBruce Richardson typedef int8_t rte_v64s8_t __attribute__((vector_size(8), aligned(8))); 11799a2dd95SBruce Richardson 11899a2dd95SBruce Richardson /** 11999a2dd95SBruce Richardson * 64 bits vector size to use with 16 bits elements. 12099a2dd95SBruce Richardson * 12199a2dd95SBruce Richardson * a = (rte_v64s16_t){ a0, a1, a2, a3 } 12299a2dd95SBruce Richardson */ 12399a2dd95SBruce Richardson typedef int16_t rte_v64s16_t __attribute__((vector_size(8), aligned(8))); 12499a2dd95SBruce Richardson 12599a2dd95SBruce Richardson /** 12699a2dd95SBruce Richardson * 64 bits vector size to use with 32 bits elements. 12799a2dd95SBruce Richardson * 12899a2dd95SBruce Richardson * a = (rte_v64s32_t){ a0, a1 } 12999a2dd95SBruce Richardson */ 13099a2dd95SBruce Richardson typedef int32_t rte_v64s32_t __attribute__((vector_size(8), aligned(8))); 13199a2dd95SBruce Richardson 13299a2dd95SBruce Richardson /** 13399a2dd95SBruce Richardson * 128 bits vector size to use with 8 bits elements. 13499a2dd95SBruce Richardson * 13599a2dd95SBruce Richardson * a = (rte_v128s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, 13699a2dd95SBruce Richardson * a08, a09, a10, a11, a12, a13, a14, a15 } 13799a2dd95SBruce Richardson */ 13899a2dd95SBruce Richardson typedef int8_t rte_v128s8_t __attribute__((vector_size(16), aligned(16))); 13999a2dd95SBruce Richardson 14099a2dd95SBruce Richardson /** 14199a2dd95SBruce Richardson * 128 bits vector size to use with 16 bits elements. 14299a2dd95SBruce Richardson * 14399a2dd95SBruce Richardson * a = (rte_v128s16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } 14499a2dd95SBruce Richardson */ 14599a2dd95SBruce Richardson typedef int16_t rte_v128s16_t __attribute__((vector_size(16), aligned(16))); 14699a2dd95SBruce Richardson 14799a2dd95SBruce Richardson /** 14899a2dd95SBruce Richardson * 128 bits vector size to use with 32 bits elements. 14999a2dd95SBruce Richardson * 15099a2dd95SBruce Richardson * a = (rte_v128s32_t){ a0, a1, a2, a3 } 15199a2dd95SBruce Richardson */ 15299a2dd95SBruce Richardson typedef int32_t rte_v128s32_t __attribute__((vector_size(16), aligned(16))); 15399a2dd95SBruce Richardson 15499a2dd95SBruce Richardson /** 15599a2dd95SBruce Richardson * 128 bits vector size to use with 64 bits elements. 15699a2dd95SBruce Richardson * 15799a2dd95SBruce Richardson * a = (rte_v128s64_t){ a1, a2 } 15899a2dd95SBruce Richardson */ 15999a2dd95SBruce Richardson typedef int64_t rte_v128s64_t __attribute__((vector_size(16), aligned(16))); 16099a2dd95SBruce Richardson 16199a2dd95SBruce Richardson /** 16299a2dd95SBruce Richardson * 256 bits vector size to use with 8 bits elements. 16399a2dd95SBruce Richardson * 16499a2dd95SBruce Richardson * a = (rte_v256s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, 16599a2dd95SBruce Richardson * a08, a09, a10, a11, a12, a13, a14, a15, 16699a2dd95SBruce Richardson * a16, a17, a18, a19, a20, a21, a22, a23, 16799a2dd95SBruce Richardson * a24, a25, a26, a27, a28, a29, a30, a31 } 16899a2dd95SBruce Richardson */ 16999a2dd95SBruce Richardson typedef int8_t rte_v256s8_t __attribute__((vector_size(32), aligned(32))); 17099a2dd95SBruce Richardson 17199a2dd95SBruce Richardson /** 17299a2dd95SBruce Richardson * 256 bits vector size to use with 16 bits elements. 17399a2dd95SBruce Richardson * 17499a2dd95SBruce Richardson * a = (rte_v256s16_t){ a00, a01, a02, a03, a04, a05, a06, a07, 17599a2dd95SBruce Richardson * a08, a09, a10, a11, a12, a13, a14, a15 } 17699a2dd95SBruce Richardson */ 17799a2dd95SBruce Richardson typedef int16_t rte_v256s16_t __attribute__((vector_size(32), aligned(32))); 17899a2dd95SBruce Richardson 17999a2dd95SBruce Richardson /** 18099a2dd95SBruce Richardson * 256 bits vector size to use with 32 bits elements. 18199a2dd95SBruce Richardson * 18299a2dd95SBruce Richardson * a = (rte_v256s32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } 18399a2dd95SBruce Richardson */ 18499a2dd95SBruce Richardson typedef int32_t rte_v256s32_t __attribute__((vector_size(32), aligned(32))); 18599a2dd95SBruce Richardson 18699a2dd95SBruce Richardson /** 18799a2dd95SBruce Richardson * 256 bits vector size to use with 64 bits elements. 18899a2dd95SBruce Richardson * 18999a2dd95SBruce Richardson * a = (rte_v256s64_t){ a0, a1, a2, a3 } 19099a2dd95SBruce Richardson */ 19199a2dd95SBruce Richardson typedef int64_t rte_v256s64_t __attribute__((vector_size(32), aligned(32))); 19299a2dd95SBruce Richardson 19386cefa79STyler Retzlaff #endif 19486cefa79STyler Retzlaff 19599a2dd95SBruce Richardson /** 19699a2dd95SBruce Richardson * The max SIMD bitwidth value to limit vector path selection. 19799a2dd95SBruce Richardson */ 19899a2dd95SBruce Richardson enum rte_vect_max_simd { 19999a2dd95SBruce Richardson RTE_VECT_SIMD_DISABLED = 64, 20099a2dd95SBruce Richardson /**< Limits path selection to scalar, disables all vector paths. */ 20199a2dd95SBruce Richardson RTE_VECT_SIMD_128 = 128, 20299a2dd95SBruce Richardson /**< Limits path selection to SSE/NEON/Altivec or below. */ 20399a2dd95SBruce Richardson RTE_VECT_SIMD_256 = 256, /**< Limits path selection to AVX2 or below. */ 20499a2dd95SBruce Richardson RTE_VECT_SIMD_512 = 512, /**< Limits path selection to AVX512 or below. */ 20599a2dd95SBruce Richardson RTE_VECT_SIMD_MAX = INT16_MAX + 1, 20699a2dd95SBruce Richardson /**< 20799a2dd95SBruce Richardson * Disables limiting by max SIMD bitwidth, allows all suitable paths. 20899a2dd95SBruce Richardson * This value is used as it is a large number and a power of 2. 20999a2dd95SBruce Richardson */ 21099a2dd95SBruce Richardson }; 21199a2dd95SBruce Richardson 212*719834a6SMattias Rönnblom #ifdef __cplusplus 213*719834a6SMattias Rönnblom extern "C" { 214*719834a6SMattias Rönnblom #endif 215*719834a6SMattias Rönnblom 21699a2dd95SBruce Richardson /** 21799a2dd95SBruce Richardson * Get the supported SIMD bitwidth. 21899a2dd95SBruce Richardson * 21999a2dd95SBruce Richardson * @return 22099a2dd95SBruce Richardson * uint16_t bitwidth. 22199a2dd95SBruce Richardson */ 22299a2dd95SBruce Richardson uint16_t rte_vect_get_max_simd_bitwidth(void); 22399a2dd95SBruce Richardson 22499a2dd95SBruce Richardson /** 22599a2dd95SBruce Richardson * Set the supported SIMD bitwidth. 22699a2dd95SBruce Richardson * This API should only be called once at initialization, before EAL init. 22799a2dd95SBruce Richardson * 22899a2dd95SBruce Richardson * @param bitwidth 22999a2dd95SBruce Richardson * uint16_t bitwidth. 23099a2dd95SBruce Richardson * @return 23199a2dd95SBruce Richardson * - 0 on success. 23299a2dd95SBruce Richardson * - -EINVAL on invalid bitwidth parameter. 23399a2dd95SBruce Richardson * - -EPERM if bitwidth is forced. 23499a2dd95SBruce Richardson */ 23599a2dd95SBruce Richardson int rte_vect_set_max_simd_bitwidth(uint16_t bitwidth); 23699a2dd95SBruce Richardson 237*719834a6SMattias Rönnblom #ifdef __cplusplus 238*719834a6SMattias Rönnblom } 239*719834a6SMattias Rönnblom #endif 240*719834a6SMattias Rönnblom 24199a2dd95SBruce Richardson #endif /* _RTE_VECT_H_ */ 242