199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 299a2dd95SBruce Richardson * Copyright(c) 2015 Cavium, Inc 399a2dd95SBruce Richardson */ 499a2dd95SBruce Richardson 599a2dd95SBruce Richardson #ifndef _RTE_MEMCPY_ARM64_H_ 699a2dd95SBruce Richardson #define _RTE_MEMCPY_ARM64_H_ 799a2dd95SBruce Richardson 899a2dd95SBruce Richardson #include <stdint.h> 999a2dd95SBruce Richardson #include <string.h> 1099a2dd95SBruce Richardson 1199a2dd95SBruce Richardson #include "generic/rte_memcpy.h" 1299a2dd95SBruce Richardson 1399a2dd95SBruce Richardson #ifdef RTE_ARCH_ARM64_MEMCPY 1499a2dd95SBruce Richardson #include <rte_common.h> 1599a2dd95SBruce Richardson #include <rte_branch_prediction.h> 1699a2dd95SBruce Richardson 17*719834a6SMattias Rönnblom #ifdef __cplusplus 18*719834a6SMattias Rönnblom extern "C" { 19*719834a6SMattias Rönnblom #endif 20*719834a6SMattias Rönnblom 2199a2dd95SBruce Richardson /* 2299a2dd95SBruce Richardson * The memory copy performance differs on different AArch64 micro-architectures. 2399a2dd95SBruce Richardson * And the most recent glibc (e.g. 2.23 or later) can provide a better memcpy() 2499a2dd95SBruce Richardson * performance compared to old glibc versions. It's always suggested to use a 2599a2dd95SBruce Richardson * more recent glibc if possible, from which the entire system can get benefit. 2699a2dd95SBruce Richardson * 2799a2dd95SBruce Richardson * This implementation improves memory copy on some aarch64 micro-architectures, 2899a2dd95SBruce Richardson * when an old glibc (e.g. 2.19, 2.17...) is being used. It is disabled by 2999a2dd95SBruce Richardson * default and needs "RTE_ARCH_ARM64_MEMCPY" defined to activate. It's not 3099a2dd95SBruce Richardson * always providing better performance than memcpy() so users need to run unit 3199a2dd95SBruce Richardson * test "memcpy_perf_autotest" and customize parameters in customization section 3299a2dd95SBruce Richardson * below for best performance. 3399a2dd95SBruce Richardson * 3499a2dd95SBruce Richardson * Compiler version will also impact the rte_memcpy() performance. It's observed 3599a2dd95SBruce Richardson * on some platforms and with the same code, GCC 7.2.0 compiled binaries can 3699a2dd95SBruce Richardson * provide better performance than GCC 4.8.5 compiled binaries. 3799a2dd95SBruce Richardson */ 3899a2dd95SBruce Richardson 3999a2dd95SBruce Richardson /************************************** 4099a2dd95SBruce Richardson * Beginning of customization section 4199a2dd95SBruce Richardson **************************************/ 4299a2dd95SBruce Richardson #ifndef RTE_ARM64_MEMCPY_ALIGN_MASK 4399a2dd95SBruce Richardson #define RTE_ARM64_MEMCPY_ALIGN_MASK ((RTE_CACHE_LINE_SIZE >> 3) - 1) 4499a2dd95SBruce Richardson #endif 4599a2dd95SBruce Richardson 4699a2dd95SBruce Richardson #ifndef RTE_ARM64_MEMCPY_STRICT_ALIGN 4799a2dd95SBruce Richardson /* Only src unalignment will be treated as unaligned copy */ 4899a2dd95SBruce Richardson #define RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) \ 4999a2dd95SBruce Richardson ((uintptr_t)(src) & RTE_ARM64_MEMCPY_ALIGN_MASK) 5099a2dd95SBruce Richardson #else 5199a2dd95SBruce Richardson /* Both dst and src unalignment will be treated as unaligned copy */ 5299a2dd95SBruce Richardson #define RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) \ 5399a2dd95SBruce Richardson (((uintptr_t)(dst) | (uintptr_t)(src)) & RTE_ARM64_MEMCPY_ALIGN_MASK) 5499a2dd95SBruce Richardson #endif 5599a2dd95SBruce Richardson 5699a2dd95SBruce Richardson 5799a2dd95SBruce Richardson /* 5899a2dd95SBruce Richardson * If copy size is larger than threshold, memcpy() will be used. 5999a2dd95SBruce Richardson * Run "memcpy_perf_autotest" to determine the proper threshold. 6099a2dd95SBruce Richardson */ 6199a2dd95SBruce Richardson #ifdef RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD 6299a2dd95SBruce Richardson #define USE_ALIGNED_RTE_MEMCPY(dst, src, n) \ 6399a2dd95SBruce Richardson (!RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) && \ 6499a2dd95SBruce Richardson n <= (size_t)RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD) 6599a2dd95SBruce Richardson #else 6699a2dd95SBruce Richardson #define USE_ALIGNED_RTE_MEMCPY(dst, src, n) \ 6799a2dd95SBruce Richardson (!RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src)) 6899a2dd95SBruce Richardson #endif 6999a2dd95SBruce Richardson #ifdef RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD 7099a2dd95SBruce Richardson #define USE_UNALIGNED_RTE_MEMCPY(dst, src, n) \ 7199a2dd95SBruce Richardson (RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) && \ 7299a2dd95SBruce Richardson n <= (size_t)RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD) 7399a2dd95SBruce Richardson #else 7499a2dd95SBruce Richardson #define USE_UNALIGNED_RTE_MEMCPY(dst, src, n) \ 7599a2dd95SBruce Richardson (RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src)) 7699a2dd95SBruce Richardson #endif 7799a2dd95SBruce Richardson /* 7899a2dd95SBruce Richardson * The logic of USE_RTE_MEMCPY() can also be modified to best fit platform. 7999a2dd95SBruce Richardson */ 8099a2dd95SBruce Richardson #if defined(RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD) \ 8199a2dd95SBruce Richardson || defined(RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD) 8299a2dd95SBruce Richardson #define USE_RTE_MEMCPY(dst, src, n) \ 8399a2dd95SBruce Richardson (USE_ALIGNED_RTE_MEMCPY(dst, src, n) || USE_UNALIGNED_RTE_MEMCPY(dst, src, n)) 8499a2dd95SBruce Richardson #else 8599a2dd95SBruce Richardson #define USE_RTE_MEMCPY(dst, src, n) (1) 8699a2dd95SBruce Richardson #endif 8799a2dd95SBruce Richardson /************************************** 8899a2dd95SBruce Richardson * End of customization section 8999a2dd95SBruce Richardson **************************************/ 9099a2dd95SBruce Richardson 9199a2dd95SBruce Richardson 9299a2dd95SBruce Richardson #if RTE_CC_IS_GNU && !defined RTE_ARM64_MEMCPY_SKIP_GCC_VER_CHECK 9399a2dd95SBruce Richardson #if (GCC_VERSION < 50400) 9499a2dd95SBruce Richardson #warning "The GCC version is quite old, which may result in sub-optimal \ 9599a2dd95SBruce Richardson performance of the compiled code. It is suggested that at least GCC 5.4.0 \ 9699a2dd95SBruce Richardson be used." 9799a2dd95SBruce Richardson #endif 9899a2dd95SBruce Richardson #endif 9999a2dd95SBruce Richardson 10099a2dd95SBruce Richardson static __rte_always_inline 10199a2dd95SBruce Richardson void rte_mov16(uint8_t *dst, const uint8_t *src) 10299a2dd95SBruce Richardson { 10399a2dd95SBruce Richardson __uint128_t *dst128 = (__uint128_t *)dst; 10499a2dd95SBruce Richardson const __uint128_t *src128 = (const __uint128_t *)src; 10599a2dd95SBruce Richardson *dst128 = *src128; 10699a2dd95SBruce Richardson } 10799a2dd95SBruce Richardson 10899a2dd95SBruce Richardson static __rte_always_inline 10999a2dd95SBruce Richardson void rte_mov32(uint8_t *dst, const uint8_t *src) 11099a2dd95SBruce Richardson { 11199a2dd95SBruce Richardson __uint128_t *dst128 = (__uint128_t *)dst; 11299a2dd95SBruce Richardson const __uint128_t *src128 = (const __uint128_t *)src; 11399a2dd95SBruce Richardson const __uint128_t x0 = src128[0], x1 = src128[1]; 11499a2dd95SBruce Richardson dst128[0] = x0; 11599a2dd95SBruce Richardson dst128[1] = x1; 11699a2dd95SBruce Richardson } 11799a2dd95SBruce Richardson 11899a2dd95SBruce Richardson static __rte_always_inline 11999a2dd95SBruce Richardson void rte_mov48(uint8_t *dst, const uint8_t *src) 12099a2dd95SBruce Richardson { 12199a2dd95SBruce Richardson __uint128_t *dst128 = (__uint128_t *)dst; 12299a2dd95SBruce Richardson const __uint128_t *src128 = (const __uint128_t *)src; 12399a2dd95SBruce Richardson const __uint128_t x0 = src128[0], x1 = src128[1], x2 = src128[2]; 12499a2dd95SBruce Richardson dst128[0] = x0; 12599a2dd95SBruce Richardson dst128[1] = x1; 12699a2dd95SBruce Richardson dst128[2] = x2; 12799a2dd95SBruce Richardson } 12899a2dd95SBruce Richardson 12999a2dd95SBruce Richardson static __rte_always_inline 13099a2dd95SBruce Richardson void rte_mov64(uint8_t *dst, const uint8_t *src) 13199a2dd95SBruce Richardson { 13299a2dd95SBruce Richardson __uint128_t *dst128 = (__uint128_t *)dst; 13399a2dd95SBruce Richardson const __uint128_t *src128 = (const __uint128_t *)src; 13499a2dd95SBruce Richardson const __uint128_t 13599a2dd95SBruce Richardson x0 = src128[0], x1 = src128[1], x2 = src128[2], x3 = src128[3]; 13699a2dd95SBruce Richardson dst128[0] = x0; 13799a2dd95SBruce Richardson dst128[1] = x1; 13899a2dd95SBruce Richardson dst128[2] = x2; 13999a2dd95SBruce Richardson dst128[3] = x3; 14099a2dd95SBruce Richardson } 14199a2dd95SBruce Richardson 14299a2dd95SBruce Richardson static __rte_always_inline 14399a2dd95SBruce Richardson void rte_mov128(uint8_t *dst, const uint8_t *src) 14499a2dd95SBruce Richardson { 14599a2dd95SBruce Richardson __uint128_t *dst128 = (__uint128_t *)dst; 14699a2dd95SBruce Richardson const __uint128_t *src128 = (const __uint128_t *)src; 14799a2dd95SBruce Richardson /* Keep below declaration & copy sequence for optimized instructions */ 14899a2dd95SBruce Richardson const __uint128_t 14999a2dd95SBruce Richardson x0 = src128[0], x1 = src128[1], x2 = src128[2], x3 = src128[3]; 15099a2dd95SBruce Richardson dst128[0] = x0; 15199a2dd95SBruce Richardson __uint128_t x4 = src128[4]; 15299a2dd95SBruce Richardson dst128[1] = x1; 15399a2dd95SBruce Richardson __uint128_t x5 = src128[5]; 15499a2dd95SBruce Richardson dst128[2] = x2; 15599a2dd95SBruce Richardson __uint128_t x6 = src128[6]; 15699a2dd95SBruce Richardson dst128[3] = x3; 15799a2dd95SBruce Richardson __uint128_t x7 = src128[7]; 15899a2dd95SBruce Richardson dst128[4] = x4; 15999a2dd95SBruce Richardson dst128[5] = x5; 16099a2dd95SBruce Richardson dst128[6] = x6; 16199a2dd95SBruce Richardson dst128[7] = x7; 16299a2dd95SBruce Richardson } 16399a2dd95SBruce Richardson 16499a2dd95SBruce Richardson static __rte_always_inline 16599a2dd95SBruce Richardson void rte_mov256(uint8_t *dst, const uint8_t *src) 16699a2dd95SBruce Richardson { 16799a2dd95SBruce Richardson rte_mov128(dst, src); 16899a2dd95SBruce Richardson rte_mov128(dst + 128, src + 128); 16999a2dd95SBruce Richardson } 17099a2dd95SBruce Richardson 17199a2dd95SBruce Richardson static __rte_always_inline void 17299a2dd95SBruce Richardson rte_memcpy_lt16(uint8_t *dst, const uint8_t *src, size_t n) 17399a2dd95SBruce Richardson { 17499a2dd95SBruce Richardson if (n & 0x08) { 17599a2dd95SBruce Richardson /* copy 8 ~ 15 bytes */ 17699a2dd95SBruce Richardson *(uint64_t *)dst = *(const uint64_t *)src; 17799a2dd95SBruce Richardson *(uint64_t *)(dst - 8 + n) = *(const uint64_t *)(src - 8 + n); 17899a2dd95SBruce Richardson } else if (n & 0x04) { 17999a2dd95SBruce Richardson /* copy 4 ~ 7 bytes */ 18099a2dd95SBruce Richardson *(uint32_t *)dst = *(const uint32_t *)src; 18199a2dd95SBruce Richardson *(uint32_t *)(dst - 4 + n) = *(const uint32_t *)(src - 4 + n); 18299a2dd95SBruce Richardson } else if (n & 0x02) { 18399a2dd95SBruce Richardson /* copy 2 ~ 3 bytes */ 18499a2dd95SBruce Richardson *(uint16_t *)dst = *(const uint16_t *)src; 18599a2dd95SBruce Richardson *(uint16_t *)(dst - 2 + n) = *(const uint16_t *)(src - 2 + n); 18699a2dd95SBruce Richardson } else if (n & 0x01) { 18799a2dd95SBruce Richardson /* copy 1 byte */ 18899a2dd95SBruce Richardson *dst = *src; 18999a2dd95SBruce Richardson } 19099a2dd95SBruce Richardson } 19199a2dd95SBruce Richardson 19299a2dd95SBruce Richardson static __rte_always_inline 19399a2dd95SBruce Richardson void rte_memcpy_ge16_lt128(uint8_t *dst, const uint8_t *src, size_t n) 19499a2dd95SBruce Richardson { 19599a2dd95SBruce Richardson if (n < 64) { 19699a2dd95SBruce Richardson if (n == 16) { 19799a2dd95SBruce Richardson rte_mov16(dst, src); 19899a2dd95SBruce Richardson } else if (n <= 32) { 19999a2dd95SBruce Richardson rte_mov16(dst, src); 20099a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 20199a2dd95SBruce Richardson } else if (n <= 48) { 20299a2dd95SBruce Richardson rte_mov32(dst, src); 20399a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 20499a2dd95SBruce Richardson } else { 20599a2dd95SBruce Richardson rte_mov48(dst, src); 20699a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 20799a2dd95SBruce Richardson } 20899a2dd95SBruce Richardson } else { 20999a2dd95SBruce Richardson rte_mov64((uint8_t *)dst, (const uint8_t *)src); 21099a2dd95SBruce Richardson if (n > 48 + 64) 21199a2dd95SBruce Richardson rte_mov64(dst - 64 + n, src - 64 + n); 21299a2dd95SBruce Richardson else if (n > 32 + 64) 21399a2dd95SBruce Richardson rte_mov48(dst - 48 + n, src - 48 + n); 21499a2dd95SBruce Richardson else if (n > 16 + 64) 21599a2dd95SBruce Richardson rte_mov32(dst - 32 + n, src - 32 + n); 21699a2dd95SBruce Richardson else if (n > 64) 21799a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 21899a2dd95SBruce Richardson } 21999a2dd95SBruce Richardson } 22099a2dd95SBruce Richardson 22199a2dd95SBruce Richardson static __rte_always_inline 22299a2dd95SBruce Richardson void rte_memcpy_ge128(uint8_t *dst, const uint8_t *src, size_t n) 22399a2dd95SBruce Richardson { 22499a2dd95SBruce Richardson do { 22599a2dd95SBruce Richardson rte_mov128(dst, src); 22699a2dd95SBruce Richardson src += 128; 22799a2dd95SBruce Richardson dst += 128; 22899a2dd95SBruce Richardson n -= 128; 22999a2dd95SBruce Richardson } while (likely(n >= 128)); 23099a2dd95SBruce Richardson 23199a2dd95SBruce Richardson if (likely(n)) { 23299a2dd95SBruce Richardson if (n <= 16) 23399a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 23499a2dd95SBruce Richardson else if (n <= 32) 23599a2dd95SBruce Richardson rte_mov32(dst - 32 + n, src - 32 + n); 23699a2dd95SBruce Richardson else if (n <= 48) 23799a2dd95SBruce Richardson rte_mov48(dst - 48 + n, src - 48 + n); 23899a2dd95SBruce Richardson else if (n <= 64) 23999a2dd95SBruce Richardson rte_mov64(dst - 64 + n, src - 64 + n); 24099a2dd95SBruce Richardson else 24199a2dd95SBruce Richardson rte_memcpy_ge16_lt128(dst, src, n); 24299a2dd95SBruce Richardson } 24399a2dd95SBruce Richardson } 24499a2dd95SBruce Richardson 24599a2dd95SBruce Richardson static __rte_always_inline 24699a2dd95SBruce Richardson void rte_memcpy_ge16_lt64(uint8_t *dst, const uint8_t *src, size_t n) 24799a2dd95SBruce Richardson { 24899a2dd95SBruce Richardson if (n == 16) { 24999a2dd95SBruce Richardson rte_mov16(dst, src); 25099a2dd95SBruce Richardson } else if (n <= 32) { 25199a2dd95SBruce Richardson rte_mov16(dst, src); 25299a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 25399a2dd95SBruce Richardson } else if (n <= 48) { 25499a2dd95SBruce Richardson rte_mov32(dst, src); 25599a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 25699a2dd95SBruce Richardson } else { 25799a2dd95SBruce Richardson rte_mov48(dst, src); 25899a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 25999a2dd95SBruce Richardson } 26099a2dd95SBruce Richardson } 26199a2dd95SBruce Richardson 26299a2dd95SBruce Richardson static __rte_always_inline 26399a2dd95SBruce Richardson void rte_memcpy_ge64(uint8_t *dst, const uint8_t *src, size_t n) 26499a2dd95SBruce Richardson { 26599a2dd95SBruce Richardson do { 26699a2dd95SBruce Richardson rte_mov64(dst, src); 26799a2dd95SBruce Richardson src += 64; 26899a2dd95SBruce Richardson dst += 64; 26999a2dd95SBruce Richardson n -= 64; 27099a2dd95SBruce Richardson } while (likely(n >= 64)); 27199a2dd95SBruce Richardson 27299a2dd95SBruce Richardson if (likely(n)) { 27399a2dd95SBruce Richardson if (n <= 16) 27499a2dd95SBruce Richardson rte_mov16(dst - 16 + n, src - 16 + n); 27599a2dd95SBruce Richardson else if (n <= 32) 27699a2dd95SBruce Richardson rte_mov32(dst - 32 + n, src - 32 + n); 27799a2dd95SBruce Richardson else if (n <= 48) 27899a2dd95SBruce Richardson rte_mov48(dst - 48 + n, src - 48 + n); 27999a2dd95SBruce Richardson else 28099a2dd95SBruce Richardson rte_mov64(dst - 64 + n, src - 64 + n); 28199a2dd95SBruce Richardson } 28299a2dd95SBruce Richardson } 28399a2dd95SBruce Richardson 28499a2dd95SBruce Richardson #if RTE_CACHE_LINE_SIZE >= 128 28599a2dd95SBruce Richardson static __rte_always_inline 28699a2dd95SBruce Richardson void *rte_memcpy(void *dst, const void *src, size_t n) 28799a2dd95SBruce Richardson { 28899a2dd95SBruce Richardson if (n < 16) { 28999a2dd95SBruce Richardson rte_memcpy_lt16((uint8_t *)dst, (const uint8_t *)src, n); 29099a2dd95SBruce Richardson return dst; 29199a2dd95SBruce Richardson } 29299a2dd95SBruce Richardson if (n < 128) { 29399a2dd95SBruce Richardson rte_memcpy_ge16_lt128((uint8_t *)dst, (const uint8_t *)src, n); 29499a2dd95SBruce Richardson return dst; 29599a2dd95SBruce Richardson } 29699a2dd95SBruce Richardson __builtin_prefetch(src, 0, 0); 29799a2dd95SBruce Richardson __builtin_prefetch(dst, 1, 0); 29899a2dd95SBruce Richardson if (likely(USE_RTE_MEMCPY(dst, src, n))) { 29999a2dd95SBruce Richardson rte_memcpy_ge128((uint8_t *)dst, (const uint8_t *)src, n); 30099a2dd95SBruce Richardson return dst; 30199a2dd95SBruce Richardson } else 30299a2dd95SBruce Richardson return memcpy(dst, src, n); 30399a2dd95SBruce Richardson } 30499a2dd95SBruce Richardson 30599a2dd95SBruce Richardson #else 30699a2dd95SBruce Richardson static __rte_always_inline 30799a2dd95SBruce Richardson void *rte_memcpy(void *dst, const void *src, size_t n) 30899a2dd95SBruce Richardson { 30999a2dd95SBruce Richardson if (n < 16) { 31099a2dd95SBruce Richardson rte_memcpy_lt16((uint8_t *)dst, (const uint8_t *)src, n); 31199a2dd95SBruce Richardson return dst; 31299a2dd95SBruce Richardson } 31399a2dd95SBruce Richardson if (n < 64) { 31499a2dd95SBruce Richardson rte_memcpy_ge16_lt64((uint8_t *)dst, (const uint8_t *)src, n); 31599a2dd95SBruce Richardson return dst; 31699a2dd95SBruce Richardson } 31799a2dd95SBruce Richardson __builtin_prefetch(src, 0, 0); 31899a2dd95SBruce Richardson __builtin_prefetch(dst, 1, 0); 31999a2dd95SBruce Richardson if (likely(USE_RTE_MEMCPY(dst, src, n))) { 32099a2dd95SBruce Richardson rte_memcpy_ge64((uint8_t *)dst, (const uint8_t *)src, n); 32199a2dd95SBruce Richardson return dst; 32299a2dd95SBruce Richardson } else 32399a2dd95SBruce Richardson return memcpy(dst, src, n); 32499a2dd95SBruce Richardson } 32599a2dd95SBruce Richardson #endif /* RTE_CACHE_LINE_SIZE >= 128 */ 32699a2dd95SBruce Richardson 327*719834a6SMattias Rönnblom #ifdef __cplusplus 328*719834a6SMattias Rönnblom } 329*719834a6SMattias Rönnblom #endif 330*719834a6SMattias Rönnblom 331*719834a6SMattias Rönnblom #else /* RTE_ARCH_ARM64_MEMCPY */ 332*719834a6SMattias Rönnblom 333*719834a6SMattias Rönnblom #ifdef __cplusplus 334*719834a6SMattias Rönnblom extern "C" { 335*719834a6SMattias Rönnblom #endif 336*719834a6SMattias Rönnblom 33799a2dd95SBruce Richardson static inline void 33899a2dd95SBruce Richardson rte_mov16(uint8_t *dst, const uint8_t *src) 33999a2dd95SBruce Richardson { 34099a2dd95SBruce Richardson memcpy(dst, src, 16); 34199a2dd95SBruce Richardson } 34299a2dd95SBruce Richardson 34399a2dd95SBruce Richardson static inline void 34499a2dd95SBruce Richardson rte_mov32(uint8_t *dst, const uint8_t *src) 34599a2dd95SBruce Richardson { 34699a2dd95SBruce Richardson memcpy(dst, src, 32); 34799a2dd95SBruce Richardson } 34899a2dd95SBruce Richardson 34999a2dd95SBruce Richardson static inline void 35099a2dd95SBruce Richardson rte_mov48(uint8_t *dst, const uint8_t *src) 35199a2dd95SBruce Richardson { 35299a2dd95SBruce Richardson memcpy(dst, src, 48); 35399a2dd95SBruce Richardson } 35499a2dd95SBruce Richardson 35599a2dd95SBruce Richardson static inline void 35699a2dd95SBruce Richardson rte_mov64(uint8_t *dst, const uint8_t *src) 35799a2dd95SBruce Richardson { 35899a2dd95SBruce Richardson memcpy(dst, src, 64); 35999a2dd95SBruce Richardson } 36099a2dd95SBruce Richardson 36199a2dd95SBruce Richardson static inline void 36299a2dd95SBruce Richardson rte_mov128(uint8_t *dst, const uint8_t *src) 36399a2dd95SBruce Richardson { 36499a2dd95SBruce Richardson memcpy(dst, src, 128); 36599a2dd95SBruce Richardson } 36699a2dd95SBruce Richardson 36799a2dd95SBruce Richardson static inline void 36899a2dd95SBruce Richardson rte_mov256(uint8_t *dst, const uint8_t *src) 36999a2dd95SBruce Richardson { 37099a2dd95SBruce Richardson memcpy(dst, src, 256); 37199a2dd95SBruce Richardson } 37299a2dd95SBruce Richardson 37399a2dd95SBruce Richardson #define rte_memcpy(d, s, n) memcpy((d), (s), (n)) 37499a2dd95SBruce Richardson 37599a2dd95SBruce Richardson #ifdef __cplusplus 37699a2dd95SBruce Richardson } 37799a2dd95SBruce Richardson #endif 37899a2dd95SBruce Richardson 379*719834a6SMattias Rönnblom #endif /* RTE_ARCH_ARM64_MEMCPY */ 380*719834a6SMattias Rönnblom 38199a2dd95SBruce Richardson #endif /* _RTE_MEMCPY_ARM_64_H_ */ 382