xref: /dpdk/lib/eal/x86/include/rte_vect.h (revision 5b856206c74bbcf19e12cafa15382a7e15b0a1b5)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2010-2015 Intel Corporation
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #ifndef _RTE_VECT_X86_H_
699a2dd95SBruce Richardson #define _RTE_VECT_X86_H_
799a2dd95SBruce Richardson 
899a2dd95SBruce Richardson /**
999a2dd95SBruce Richardson  * @file
1099a2dd95SBruce Richardson  *
1199a2dd95SBruce Richardson  * RTE SSE/AVX related header.
1299a2dd95SBruce Richardson  */
1399a2dd95SBruce Richardson 
1445f1004fSTyler Retzlaff #include <assert.h>
1599a2dd95SBruce Richardson #include <stdint.h>
1699a2dd95SBruce Richardson #include <rte_config.h>
1799a2dd95SBruce Richardson #include <rte_common.h>
1899a2dd95SBruce Richardson #include "generic/rte_vect.h"
1999a2dd95SBruce Richardson 
20ada2839cSDavid Marchand #if defined(__ICC) || defined(_WIN64)
2199a2dd95SBruce Richardson #include <smmintrin.h> /* SSE4 */
2299a2dd95SBruce Richardson #include <immintrin.h>
2399a2dd95SBruce Richardson #else
2499a2dd95SBruce Richardson #include <x86intrin.h>
2599a2dd95SBruce Richardson #endif
2699a2dd95SBruce Richardson 
2799a2dd95SBruce Richardson #ifdef __cplusplus
2899a2dd95SBruce Richardson extern "C" {
2999a2dd95SBruce Richardson #endif
3099a2dd95SBruce Richardson 
3199a2dd95SBruce Richardson #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_256
3299a2dd95SBruce Richardson 
3399a2dd95SBruce Richardson typedef __m128i xmm_t;
3499a2dd95SBruce Richardson 
3545f1004fSTyler Retzlaff #define	XMM_SIZE	16
3699a2dd95SBruce Richardson #define	XMM_MASK	(XMM_SIZE - 1)
3799a2dd95SBruce Richardson 
3845f1004fSTyler Retzlaff static_assert(sizeof(xmm_t) == XMM_SIZE, "");
3945f1004fSTyler Retzlaff 
4099a2dd95SBruce Richardson typedef union rte_xmm {
4199a2dd95SBruce Richardson 	xmm_t    x;
4299a2dd95SBruce Richardson 	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
4399a2dd95SBruce Richardson 	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
4499a2dd95SBruce Richardson 	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
4599a2dd95SBruce Richardson 	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
4699a2dd95SBruce Richardson 	double   pd[XMM_SIZE / sizeof(double)];
4799a2dd95SBruce Richardson } rte_xmm_t;
4899a2dd95SBruce Richardson 
4999a2dd95SBruce Richardson #ifdef __AVX__
5099a2dd95SBruce Richardson 
5199a2dd95SBruce Richardson typedef __m256i ymm_t;
5299a2dd95SBruce Richardson 
5399a2dd95SBruce Richardson #define	YMM_SIZE	(sizeof(ymm_t))
5499a2dd95SBruce Richardson #define	YMM_MASK	(YMM_SIZE - 1)
5599a2dd95SBruce Richardson 
5699a2dd95SBruce Richardson typedef union rte_ymm {
5799a2dd95SBruce Richardson 	ymm_t    y;
5899a2dd95SBruce Richardson 	xmm_t    x[YMM_SIZE / sizeof(xmm_t)];
5999a2dd95SBruce Richardson 	uint8_t  u8[YMM_SIZE / sizeof(uint8_t)];
6099a2dd95SBruce Richardson 	uint16_t u16[YMM_SIZE / sizeof(uint16_t)];
6199a2dd95SBruce Richardson 	uint32_t u32[YMM_SIZE / sizeof(uint32_t)];
6299a2dd95SBruce Richardson 	uint64_t u64[YMM_SIZE / sizeof(uint64_t)];
6399a2dd95SBruce Richardson 	double   pd[YMM_SIZE / sizeof(double)];
6499a2dd95SBruce Richardson } rte_ymm_t;
6599a2dd95SBruce Richardson 
6699a2dd95SBruce Richardson #endif /* __AVX__ */
6799a2dd95SBruce Richardson 
6899a2dd95SBruce Richardson #ifdef RTE_ARCH_I686
6999a2dd95SBruce Richardson #define _mm_cvtsi128_si64(a)    \
7099a2dd95SBruce Richardson __extension__ ({                \
7199a2dd95SBruce Richardson 	rte_xmm_t m;            \
7299a2dd95SBruce Richardson 	m.x = (a);              \
7399a2dd95SBruce Richardson 	(m.u64[0]);             \
7499a2dd95SBruce Richardson })
7599a2dd95SBruce Richardson #endif
7699a2dd95SBruce Richardson 
7799a2dd95SBruce Richardson /*
7899a2dd95SBruce Richardson  * Prior to version 12.1 icc doesn't support _mm_set_epi64x.
7999a2dd95SBruce Richardson  */
8099a2dd95SBruce Richardson #if (defined(__ICC) && __ICC < 1210)
8199a2dd95SBruce Richardson #define _mm_set_epi64x(a, b)     \
8299a2dd95SBruce Richardson __extension__ ({                 \
8399a2dd95SBruce Richardson 	rte_xmm_t m;             \
8499a2dd95SBruce Richardson 	m.u64[0] = b;            \
8599a2dd95SBruce Richardson 	m.u64[1] = a;            \
8699a2dd95SBruce Richardson 	(m.x);                   \
8799a2dd95SBruce Richardson })
8899a2dd95SBruce Richardson #endif /* (defined(__ICC) && __ICC < 1210) */
8999a2dd95SBruce Richardson 
9099a2dd95SBruce Richardson #ifdef __AVX512F__
9199a2dd95SBruce Richardson 
9299a2dd95SBruce Richardson #define RTE_X86_ZMM_SIZE	(sizeof(__m512i))
9399a2dd95SBruce Richardson #define RTE_X86_ZMM_MASK	(RTE_X86_ZMM_SIZE - 1)
9499a2dd95SBruce Richardson 
95*c6552d9aSTyler Retzlaff typedef union __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm {
9699a2dd95SBruce Richardson 	__m512i	 z;
9799a2dd95SBruce Richardson 	ymm_t    y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
9899a2dd95SBruce Richardson 	xmm_t    x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
9999a2dd95SBruce Richardson 	uint8_t  u8[RTE_X86_ZMM_SIZE / sizeof(uint8_t)];
10099a2dd95SBruce Richardson 	uint16_t u16[RTE_X86_ZMM_SIZE / sizeof(uint16_t)];
10199a2dd95SBruce Richardson 	uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
10299a2dd95SBruce Richardson 	uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
10399a2dd95SBruce Richardson 	double   pd[RTE_X86_ZMM_SIZE / sizeof(double)];
104*c6552d9aSTyler Retzlaff } __rte_x86_zmm_t;
10599a2dd95SBruce Richardson 
10699a2dd95SBruce Richardson #endif /* __AVX512F__ */
10799a2dd95SBruce Richardson 
10899a2dd95SBruce Richardson #ifdef __cplusplus
10999a2dd95SBruce Richardson }
11099a2dd95SBruce Richardson #endif
11199a2dd95SBruce Richardson 
11299a2dd95SBruce Richardson #endif /* _RTE_VECT_X86_H_ */
113