xref: /dpdk/lib/eal/arm/include/rte_memcpy_64.h (revision 719834a6849e1daf4a70ff7742bbcc3ae7e25607)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2015 Cavium, Inc
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #ifndef _RTE_MEMCPY_ARM64_H_
699a2dd95SBruce Richardson #define _RTE_MEMCPY_ARM64_H_
799a2dd95SBruce Richardson 
899a2dd95SBruce Richardson #include <stdint.h>
999a2dd95SBruce Richardson #include <string.h>
1099a2dd95SBruce Richardson 
1199a2dd95SBruce Richardson #include "generic/rte_memcpy.h"
1299a2dd95SBruce Richardson 
1399a2dd95SBruce Richardson #ifdef RTE_ARCH_ARM64_MEMCPY
1499a2dd95SBruce Richardson #include <rte_common.h>
1599a2dd95SBruce Richardson #include <rte_branch_prediction.h>
1699a2dd95SBruce Richardson 
17*719834a6SMattias Rönnblom #ifdef __cplusplus
18*719834a6SMattias Rönnblom extern "C" {
19*719834a6SMattias Rönnblom #endif
20*719834a6SMattias Rönnblom 
2199a2dd95SBruce Richardson /*
2299a2dd95SBruce Richardson  * The memory copy performance differs on different AArch64 micro-architectures.
2399a2dd95SBruce Richardson  * And the most recent glibc (e.g. 2.23 or later) can provide a better memcpy()
2499a2dd95SBruce Richardson  * performance compared to old glibc versions. It's always suggested to use a
2599a2dd95SBruce Richardson  * more recent glibc if possible, from which the entire system can get benefit.
2699a2dd95SBruce Richardson  *
2799a2dd95SBruce Richardson  * This implementation improves memory copy on some aarch64 micro-architectures,
2899a2dd95SBruce Richardson  * when an old glibc (e.g. 2.19, 2.17...) is being used. It is disabled by
2999a2dd95SBruce Richardson  * default and needs "RTE_ARCH_ARM64_MEMCPY" defined to activate. It's not
3099a2dd95SBruce Richardson  * always providing better performance than memcpy() so users need to run unit
3199a2dd95SBruce Richardson  * test "memcpy_perf_autotest" and customize parameters in customization section
3299a2dd95SBruce Richardson  * below for best performance.
3399a2dd95SBruce Richardson  *
3499a2dd95SBruce Richardson  * Compiler version will also impact the rte_memcpy() performance. It's observed
3599a2dd95SBruce Richardson  * on some platforms and with the same code, GCC 7.2.0 compiled binaries can
3699a2dd95SBruce Richardson  * provide better performance than GCC 4.8.5 compiled binaries.
3799a2dd95SBruce Richardson  */
3899a2dd95SBruce Richardson 
3999a2dd95SBruce Richardson /**************************************
4099a2dd95SBruce Richardson  * Beginning of customization section
4199a2dd95SBruce Richardson  **************************************/
4299a2dd95SBruce Richardson #ifndef RTE_ARM64_MEMCPY_ALIGN_MASK
4399a2dd95SBruce Richardson #define RTE_ARM64_MEMCPY_ALIGN_MASK ((RTE_CACHE_LINE_SIZE >> 3) - 1)
4499a2dd95SBruce Richardson #endif
4599a2dd95SBruce Richardson 
4699a2dd95SBruce Richardson #ifndef RTE_ARM64_MEMCPY_STRICT_ALIGN
4799a2dd95SBruce Richardson /* Only src unalignment will be treated as unaligned copy */
4899a2dd95SBruce Richardson #define RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) \
4999a2dd95SBruce Richardson 	((uintptr_t)(src) & RTE_ARM64_MEMCPY_ALIGN_MASK)
5099a2dd95SBruce Richardson #else
5199a2dd95SBruce Richardson /* Both dst and src unalignment will be treated as unaligned copy */
5299a2dd95SBruce Richardson #define RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) \
5399a2dd95SBruce Richardson 	(((uintptr_t)(dst) | (uintptr_t)(src)) & RTE_ARM64_MEMCPY_ALIGN_MASK)
5499a2dd95SBruce Richardson #endif
5599a2dd95SBruce Richardson 
5699a2dd95SBruce Richardson 
5799a2dd95SBruce Richardson /*
5899a2dd95SBruce Richardson  * If copy size is larger than threshold, memcpy() will be used.
5999a2dd95SBruce Richardson  * Run "memcpy_perf_autotest" to determine the proper threshold.
6099a2dd95SBruce Richardson  */
6199a2dd95SBruce Richardson #ifdef RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD
6299a2dd95SBruce Richardson #define USE_ALIGNED_RTE_MEMCPY(dst, src, n) \
6399a2dd95SBruce Richardson (!RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) && \
6499a2dd95SBruce Richardson n <= (size_t)RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD)
6599a2dd95SBruce Richardson #else
6699a2dd95SBruce Richardson #define USE_ALIGNED_RTE_MEMCPY(dst, src, n) \
6799a2dd95SBruce Richardson (!RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src))
6899a2dd95SBruce Richardson #endif
6999a2dd95SBruce Richardson #ifdef RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD
7099a2dd95SBruce Richardson #define USE_UNALIGNED_RTE_MEMCPY(dst, src, n) \
7199a2dd95SBruce Richardson (RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) && \
7299a2dd95SBruce Richardson n <= (size_t)RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD)
7399a2dd95SBruce Richardson #else
7499a2dd95SBruce Richardson #define USE_UNALIGNED_RTE_MEMCPY(dst, src, n) \
7599a2dd95SBruce Richardson (RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src))
7699a2dd95SBruce Richardson #endif
7799a2dd95SBruce Richardson /*
7899a2dd95SBruce Richardson  * The logic of USE_RTE_MEMCPY() can also be modified to best fit platform.
7999a2dd95SBruce Richardson  */
8099a2dd95SBruce Richardson #if defined(RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD) \
8199a2dd95SBruce Richardson || defined(RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD)
8299a2dd95SBruce Richardson #define USE_RTE_MEMCPY(dst, src, n) \
8399a2dd95SBruce Richardson (USE_ALIGNED_RTE_MEMCPY(dst, src, n) || USE_UNALIGNED_RTE_MEMCPY(dst, src, n))
8499a2dd95SBruce Richardson #else
8599a2dd95SBruce Richardson #define USE_RTE_MEMCPY(dst, src, n) (1)
8699a2dd95SBruce Richardson #endif
8799a2dd95SBruce Richardson /**************************************
8899a2dd95SBruce Richardson  * End of customization section
8999a2dd95SBruce Richardson  **************************************/
9099a2dd95SBruce Richardson 
9199a2dd95SBruce Richardson 
9299a2dd95SBruce Richardson #if RTE_CC_IS_GNU && !defined RTE_ARM64_MEMCPY_SKIP_GCC_VER_CHECK
9399a2dd95SBruce Richardson #if (GCC_VERSION < 50400)
9499a2dd95SBruce Richardson #warning "The GCC version is quite old, which may result in sub-optimal \
9599a2dd95SBruce Richardson performance of the compiled code. It is suggested that at least GCC 5.4.0 \
9699a2dd95SBruce Richardson be used."
9799a2dd95SBruce Richardson #endif
9899a2dd95SBruce Richardson #endif
9999a2dd95SBruce Richardson 
10099a2dd95SBruce Richardson static __rte_always_inline
10199a2dd95SBruce Richardson void rte_mov16(uint8_t *dst, const uint8_t *src)
10299a2dd95SBruce Richardson {
10399a2dd95SBruce Richardson 	__uint128_t *dst128 = (__uint128_t *)dst;
10499a2dd95SBruce Richardson 	const __uint128_t *src128 = (const __uint128_t *)src;
10599a2dd95SBruce Richardson 	*dst128 = *src128;
10699a2dd95SBruce Richardson }
10799a2dd95SBruce Richardson 
10899a2dd95SBruce Richardson static __rte_always_inline
10999a2dd95SBruce Richardson void rte_mov32(uint8_t *dst, const uint8_t *src)
11099a2dd95SBruce Richardson {
11199a2dd95SBruce Richardson 	__uint128_t *dst128 = (__uint128_t *)dst;
11299a2dd95SBruce Richardson 	const __uint128_t *src128 = (const __uint128_t *)src;
11399a2dd95SBruce Richardson 	const __uint128_t x0 = src128[0], x1 = src128[1];
11499a2dd95SBruce Richardson 	dst128[0] = x0;
11599a2dd95SBruce Richardson 	dst128[1] = x1;
11699a2dd95SBruce Richardson }
11799a2dd95SBruce Richardson 
11899a2dd95SBruce Richardson static __rte_always_inline
11999a2dd95SBruce Richardson void rte_mov48(uint8_t *dst, const uint8_t *src)
12099a2dd95SBruce Richardson {
12199a2dd95SBruce Richardson 	__uint128_t *dst128 = (__uint128_t *)dst;
12299a2dd95SBruce Richardson 	const __uint128_t *src128 = (const __uint128_t *)src;
12399a2dd95SBruce Richardson 	const __uint128_t x0 = src128[0], x1 = src128[1], x2 = src128[2];
12499a2dd95SBruce Richardson 	dst128[0] = x0;
12599a2dd95SBruce Richardson 	dst128[1] = x1;
12699a2dd95SBruce Richardson 	dst128[2] = x2;
12799a2dd95SBruce Richardson }
12899a2dd95SBruce Richardson 
12999a2dd95SBruce Richardson static __rte_always_inline
13099a2dd95SBruce Richardson void rte_mov64(uint8_t *dst, const uint8_t *src)
13199a2dd95SBruce Richardson {
13299a2dd95SBruce Richardson 	__uint128_t *dst128 = (__uint128_t *)dst;
13399a2dd95SBruce Richardson 	const __uint128_t *src128 = (const __uint128_t *)src;
13499a2dd95SBruce Richardson 	const __uint128_t
13599a2dd95SBruce Richardson 		x0 = src128[0], x1 = src128[1], x2 = src128[2], x3 = src128[3];
13699a2dd95SBruce Richardson 	dst128[0] = x0;
13799a2dd95SBruce Richardson 	dst128[1] = x1;
13899a2dd95SBruce Richardson 	dst128[2] = x2;
13999a2dd95SBruce Richardson 	dst128[3] = x3;
14099a2dd95SBruce Richardson }
14199a2dd95SBruce Richardson 
14299a2dd95SBruce Richardson static __rte_always_inline
14399a2dd95SBruce Richardson void rte_mov128(uint8_t *dst, const uint8_t *src)
14499a2dd95SBruce Richardson {
14599a2dd95SBruce Richardson 	__uint128_t *dst128 = (__uint128_t *)dst;
14699a2dd95SBruce Richardson 	const __uint128_t *src128 = (const __uint128_t *)src;
14799a2dd95SBruce Richardson 	/* Keep below declaration & copy sequence for optimized instructions */
14899a2dd95SBruce Richardson 	const __uint128_t
14999a2dd95SBruce Richardson 		x0 = src128[0], x1 = src128[1], x2 = src128[2], x3 = src128[3];
15099a2dd95SBruce Richardson 	dst128[0] = x0;
15199a2dd95SBruce Richardson 	__uint128_t x4 = src128[4];
15299a2dd95SBruce Richardson 	dst128[1] = x1;
15399a2dd95SBruce Richardson 	__uint128_t x5 = src128[5];
15499a2dd95SBruce Richardson 	dst128[2] = x2;
15599a2dd95SBruce Richardson 	__uint128_t x6 = src128[6];
15699a2dd95SBruce Richardson 	dst128[3] = x3;
15799a2dd95SBruce Richardson 	__uint128_t x7 = src128[7];
15899a2dd95SBruce Richardson 	dst128[4] = x4;
15999a2dd95SBruce Richardson 	dst128[5] = x5;
16099a2dd95SBruce Richardson 	dst128[6] = x6;
16199a2dd95SBruce Richardson 	dst128[7] = x7;
16299a2dd95SBruce Richardson }
16399a2dd95SBruce Richardson 
16499a2dd95SBruce Richardson static __rte_always_inline
16599a2dd95SBruce Richardson void rte_mov256(uint8_t *dst, const uint8_t *src)
16699a2dd95SBruce Richardson {
16799a2dd95SBruce Richardson 	rte_mov128(dst, src);
16899a2dd95SBruce Richardson 	rte_mov128(dst + 128, src + 128);
16999a2dd95SBruce Richardson }
17099a2dd95SBruce Richardson 
17199a2dd95SBruce Richardson static __rte_always_inline void
17299a2dd95SBruce Richardson rte_memcpy_lt16(uint8_t *dst, const uint8_t *src, size_t n)
17399a2dd95SBruce Richardson {
17499a2dd95SBruce Richardson 	if (n & 0x08) {
17599a2dd95SBruce Richardson 		/* copy 8 ~ 15 bytes */
17699a2dd95SBruce Richardson 		*(uint64_t *)dst = *(const uint64_t *)src;
17799a2dd95SBruce Richardson 		*(uint64_t *)(dst - 8 + n) = *(const uint64_t *)(src - 8 + n);
17899a2dd95SBruce Richardson 	} else if (n & 0x04) {
17999a2dd95SBruce Richardson 		/* copy 4 ~ 7 bytes */
18099a2dd95SBruce Richardson 		*(uint32_t *)dst = *(const uint32_t *)src;
18199a2dd95SBruce Richardson 		*(uint32_t *)(dst - 4 + n) = *(const uint32_t *)(src - 4 + n);
18299a2dd95SBruce Richardson 	} else if (n & 0x02) {
18399a2dd95SBruce Richardson 		/* copy 2 ~ 3 bytes */
18499a2dd95SBruce Richardson 		*(uint16_t *)dst = *(const uint16_t *)src;
18599a2dd95SBruce Richardson 		*(uint16_t *)(dst - 2 + n) = *(const uint16_t *)(src - 2 + n);
18699a2dd95SBruce Richardson 	} else if (n & 0x01) {
18799a2dd95SBruce Richardson 		/* copy 1 byte */
18899a2dd95SBruce Richardson 		*dst = *src;
18999a2dd95SBruce Richardson 	}
19099a2dd95SBruce Richardson }
19199a2dd95SBruce Richardson 
19299a2dd95SBruce Richardson static __rte_always_inline
19399a2dd95SBruce Richardson void rte_memcpy_ge16_lt128(uint8_t *dst, const uint8_t *src, size_t n)
19499a2dd95SBruce Richardson {
19599a2dd95SBruce Richardson 	if (n < 64) {
19699a2dd95SBruce Richardson 		if (n == 16) {
19799a2dd95SBruce Richardson 			rte_mov16(dst, src);
19899a2dd95SBruce Richardson 		} else if (n <= 32) {
19999a2dd95SBruce Richardson 			rte_mov16(dst, src);
20099a2dd95SBruce Richardson 			rte_mov16(dst - 16 + n, src - 16 + n);
20199a2dd95SBruce Richardson 		} else if (n <= 48) {
20299a2dd95SBruce Richardson 			rte_mov32(dst, src);
20399a2dd95SBruce Richardson 			rte_mov16(dst - 16 + n, src - 16 + n);
20499a2dd95SBruce Richardson 		} else {
20599a2dd95SBruce Richardson 			rte_mov48(dst, src);
20699a2dd95SBruce Richardson 			rte_mov16(dst - 16 + n, src - 16 + n);
20799a2dd95SBruce Richardson 		}
20899a2dd95SBruce Richardson 	} else {
20999a2dd95SBruce Richardson 		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
21099a2dd95SBruce Richardson 		if (n > 48 + 64)
21199a2dd95SBruce Richardson 			rte_mov64(dst - 64 + n, src - 64 + n);
21299a2dd95SBruce Richardson 		else if (n > 32 + 64)
21399a2dd95SBruce Richardson 			rte_mov48(dst - 48 + n, src - 48 + n);
21499a2dd95SBruce Richardson 		else if (n > 16 + 64)
21599a2dd95SBruce Richardson 			rte_mov32(dst - 32 + n, src - 32 + n);
21699a2dd95SBruce Richardson 		else if (n > 64)
21799a2dd95SBruce Richardson 			rte_mov16(dst - 16 + n, src - 16 + n);
21899a2dd95SBruce Richardson 	}
21999a2dd95SBruce Richardson }
22099a2dd95SBruce Richardson 
22199a2dd95SBruce Richardson static __rte_always_inline
22299a2dd95SBruce Richardson void rte_memcpy_ge128(uint8_t *dst, const uint8_t *src, size_t n)
22399a2dd95SBruce Richardson {
22499a2dd95SBruce Richardson 	do {
22599a2dd95SBruce Richardson 		rte_mov128(dst, src);
22699a2dd95SBruce Richardson 		src += 128;
22799a2dd95SBruce Richardson 		dst += 128;
22899a2dd95SBruce Richardson 		n -= 128;
22999a2dd95SBruce Richardson 	} while (likely(n >= 128));
23099a2dd95SBruce Richardson 
23199a2dd95SBruce Richardson 	if (likely(n)) {
23299a2dd95SBruce Richardson 		if (n <= 16)
23399a2dd95SBruce Richardson 			rte_mov16(dst - 16 + n, src - 16 + n);
23499a2dd95SBruce Richardson 		else if (n <= 32)
23599a2dd95SBruce Richardson 			rte_mov32(dst - 32 + n, src - 32 + n);
23699a2dd95SBruce Richardson 		else if (n <= 48)
23799a2dd95SBruce Richardson 			rte_mov48(dst - 48 + n, src - 48 + n);
23899a2dd95SBruce Richardson 		else if (n <= 64)
23999a2dd95SBruce Richardson 			rte_mov64(dst - 64 + n, src - 64 + n);
24099a2dd95SBruce Richardson 		else
24199a2dd95SBruce Richardson 			rte_memcpy_ge16_lt128(dst, src, n);
24299a2dd95SBruce Richardson 	}
24399a2dd95SBruce Richardson }
24499a2dd95SBruce Richardson 
24599a2dd95SBruce Richardson static __rte_always_inline
24699a2dd95SBruce Richardson void rte_memcpy_ge16_lt64(uint8_t *dst, const uint8_t *src, size_t n)
24799a2dd95SBruce Richardson {
24899a2dd95SBruce Richardson 	if (n == 16) {
24999a2dd95SBruce Richardson 		rte_mov16(dst, src);
25099a2dd95SBruce Richardson 	} else if (n <= 32) {
25199a2dd95SBruce Richardson 		rte_mov16(dst, src);
25299a2dd95SBruce Richardson 		rte_mov16(dst - 16 + n, src - 16 + n);
25399a2dd95SBruce Richardson 	} else if (n <= 48) {
25499a2dd95SBruce Richardson 		rte_mov32(dst, src);
25599a2dd95SBruce Richardson 		rte_mov16(dst - 16 + n, src - 16 + n);
25699a2dd95SBruce Richardson 	} else {
25799a2dd95SBruce Richardson 		rte_mov48(dst, src);
25899a2dd95SBruce Richardson 		rte_mov16(dst - 16 + n, src - 16 + n);
25999a2dd95SBruce Richardson 	}
26099a2dd95SBruce Richardson }
26199a2dd95SBruce Richardson 
26299a2dd95SBruce Richardson static __rte_always_inline
26399a2dd95SBruce Richardson void rte_memcpy_ge64(uint8_t *dst, const uint8_t *src, size_t n)
26499a2dd95SBruce Richardson {
26599a2dd95SBruce Richardson 	do {
26699a2dd95SBruce Richardson 		rte_mov64(dst, src);
26799a2dd95SBruce Richardson 		src += 64;
26899a2dd95SBruce Richardson 		dst += 64;
26999a2dd95SBruce Richardson 		n -= 64;
27099a2dd95SBruce Richardson 	} while (likely(n >= 64));
27199a2dd95SBruce Richardson 
27299a2dd95SBruce Richardson 	if (likely(n)) {
27399a2dd95SBruce Richardson 		if (n <= 16)
27499a2dd95SBruce Richardson 			rte_mov16(dst - 16 + n, src - 16 + n);
27599a2dd95SBruce Richardson 		else if (n <= 32)
27699a2dd95SBruce Richardson 			rte_mov32(dst - 32 + n, src - 32 + n);
27799a2dd95SBruce Richardson 		else if (n <= 48)
27899a2dd95SBruce Richardson 			rte_mov48(dst - 48 + n, src - 48 + n);
27999a2dd95SBruce Richardson 		else
28099a2dd95SBruce Richardson 			rte_mov64(dst - 64 + n, src - 64 + n);
28199a2dd95SBruce Richardson 	}
28299a2dd95SBruce Richardson }
28399a2dd95SBruce Richardson 
28499a2dd95SBruce Richardson #if RTE_CACHE_LINE_SIZE >= 128
28599a2dd95SBruce Richardson static __rte_always_inline
28699a2dd95SBruce Richardson void *rte_memcpy(void *dst, const void *src, size_t n)
28799a2dd95SBruce Richardson {
28899a2dd95SBruce Richardson 	if (n < 16) {
28999a2dd95SBruce Richardson 		rte_memcpy_lt16((uint8_t *)dst, (const uint8_t *)src, n);
29099a2dd95SBruce Richardson 		return dst;
29199a2dd95SBruce Richardson 	}
29299a2dd95SBruce Richardson 	if (n < 128) {
29399a2dd95SBruce Richardson 		rte_memcpy_ge16_lt128((uint8_t *)dst, (const uint8_t *)src, n);
29499a2dd95SBruce Richardson 		return dst;
29599a2dd95SBruce Richardson 	}
29699a2dd95SBruce Richardson 	__builtin_prefetch(src, 0, 0);
29799a2dd95SBruce Richardson 	__builtin_prefetch(dst, 1, 0);
29899a2dd95SBruce Richardson 	if (likely(USE_RTE_MEMCPY(dst, src, n))) {
29999a2dd95SBruce Richardson 		rte_memcpy_ge128((uint8_t *)dst, (const uint8_t *)src, n);
30099a2dd95SBruce Richardson 		return dst;
30199a2dd95SBruce Richardson 	} else
30299a2dd95SBruce Richardson 		return memcpy(dst, src, n);
30399a2dd95SBruce Richardson }
30499a2dd95SBruce Richardson 
30599a2dd95SBruce Richardson #else
30699a2dd95SBruce Richardson static __rte_always_inline
30799a2dd95SBruce Richardson void *rte_memcpy(void *dst, const void *src, size_t n)
30899a2dd95SBruce Richardson {
30999a2dd95SBruce Richardson 	if (n < 16) {
31099a2dd95SBruce Richardson 		rte_memcpy_lt16((uint8_t *)dst, (const uint8_t *)src, n);
31199a2dd95SBruce Richardson 		return dst;
31299a2dd95SBruce Richardson 	}
31399a2dd95SBruce Richardson 	if (n < 64) {
31499a2dd95SBruce Richardson 		rte_memcpy_ge16_lt64((uint8_t *)dst, (const uint8_t *)src, n);
31599a2dd95SBruce Richardson 		return dst;
31699a2dd95SBruce Richardson 	}
31799a2dd95SBruce Richardson 	__builtin_prefetch(src, 0, 0);
31899a2dd95SBruce Richardson 	__builtin_prefetch(dst, 1, 0);
31999a2dd95SBruce Richardson 	if (likely(USE_RTE_MEMCPY(dst, src, n))) {
32099a2dd95SBruce Richardson 		rte_memcpy_ge64((uint8_t *)dst, (const uint8_t *)src, n);
32199a2dd95SBruce Richardson 		return dst;
32299a2dd95SBruce Richardson 	} else
32399a2dd95SBruce Richardson 		return memcpy(dst, src, n);
32499a2dd95SBruce Richardson }
32599a2dd95SBruce Richardson #endif /* RTE_CACHE_LINE_SIZE >= 128 */
32699a2dd95SBruce Richardson 
327*719834a6SMattias Rönnblom #ifdef __cplusplus
328*719834a6SMattias Rönnblom }
329*719834a6SMattias Rönnblom #endif
330*719834a6SMattias Rönnblom 
331*719834a6SMattias Rönnblom #else /* RTE_ARCH_ARM64_MEMCPY */
332*719834a6SMattias Rönnblom 
333*719834a6SMattias Rönnblom #ifdef __cplusplus
334*719834a6SMattias Rönnblom extern "C" {
335*719834a6SMattias Rönnblom #endif
336*719834a6SMattias Rönnblom 
33799a2dd95SBruce Richardson static inline void
33899a2dd95SBruce Richardson rte_mov16(uint8_t *dst, const uint8_t *src)
33999a2dd95SBruce Richardson {
34099a2dd95SBruce Richardson 	memcpy(dst, src, 16);
34199a2dd95SBruce Richardson }
34299a2dd95SBruce Richardson 
34399a2dd95SBruce Richardson static inline void
34499a2dd95SBruce Richardson rte_mov32(uint8_t *dst, const uint8_t *src)
34599a2dd95SBruce Richardson {
34699a2dd95SBruce Richardson 	memcpy(dst, src, 32);
34799a2dd95SBruce Richardson }
34899a2dd95SBruce Richardson 
34999a2dd95SBruce Richardson static inline void
35099a2dd95SBruce Richardson rte_mov48(uint8_t *dst, const uint8_t *src)
35199a2dd95SBruce Richardson {
35299a2dd95SBruce Richardson 	memcpy(dst, src, 48);
35399a2dd95SBruce Richardson }
35499a2dd95SBruce Richardson 
35599a2dd95SBruce Richardson static inline void
35699a2dd95SBruce Richardson rte_mov64(uint8_t *dst, const uint8_t *src)
35799a2dd95SBruce Richardson {
35899a2dd95SBruce Richardson 	memcpy(dst, src, 64);
35999a2dd95SBruce Richardson }
36099a2dd95SBruce Richardson 
36199a2dd95SBruce Richardson static inline void
36299a2dd95SBruce Richardson rte_mov128(uint8_t *dst, const uint8_t *src)
36399a2dd95SBruce Richardson {
36499a2dd95SBruce Richardson 	memcpy(dst, src, 128);
36599a2dd95SBruce Richardson }
36699a2dd95SBruce Richardson 
36799a2dd95SBruce Richardson static inline void
36899a2dd95SBruce Richardson rte_mov256(uint8_t *dst, const uint8_t *src)
36999a2dd95SBruce Richardson {
37099a2dd95SBruce Richardson 	memcpy(dst, src, 256);
37199a2dd95SBruce Richardson }
37299a2dd95SBruce Richardson 
37399a2dd95SBruce Richardson #define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
37499a2dd95SBruce Richardson 
37599a2dd95SBruce Richardson #ifdef __cplusplus
37699a2dd95SBruce Richardson }
37799a2dd95SBruce Richardson #endif
37899a2dd95SBruce Richardson 
379*719834a6SMattias Rönnblom #endif /* RTE_ARCH_ARM64_MEMCPY */
380*719834a6SMattias Rönnblom 
38199a2dd95SBruce Richardson #endif /* _RTE_MEMCPY_ARM_64_H_ */
382