1aaf4363eSJerin Jacob /* SPDX-License-Identifier: BSD-3-Clause 2aaf4363eSJerin Jacob * Copyright(c) 2017 Cavium, Inc 3f0c7bb1bSJerin Jacob */ 4f0c7bb1bSJerin Jacob 5f0c7bb1bSJerin Jacob #ifndef __OCTEONTX_IO_H__ 6f0c7bb1bSJerin Jacob #define __OCTEONTX_IO_H__ 7f0c7bb1bSJerin Jacob 8f0c7bb1bSJerin Jacob #include <stddef.h> 9f0c7bb1bSJerin Jacob #include <stdint.h> 10f0c7bb1bSJerin Jacob 11f0c7bb1bSJerin Jacob #include <rte_io.h> 12f0c7bb1bSJerin Jacob 13*95a6b04fSAnoob Joseph /* In Cavium OCTEON TX SoC, all accesses to the device registers are 14f0c7bb1bSJerin Jacob * implicitly strongly ordered. So, The relaxed version of IO operation is 15f0c7bb1bSJerin Jacob * safe to use with out any IO memory barriers. 16f0c7bb1bSJerin Jacob */ 17f0c7bb1bSJerin Jacob #define octeontx_read64 rte_read64_relaxed 18f0c7bb1bSJerin Jacob #define octeontx_write64 rte_write64_relaxed 19f0c7bb1bSJerin Jacob 20f0c7bb1bSJerin Jacob /* ARM64 specific functions */ 21f0c7bb1bSJerin Jacob #if defined(RTE_ARCH_ARM64) 22f0c7bb1bSJerin Jacob #define octeontx_prefetch_store_keep(_ptr) ({\ 23f0c7bb1bSJerin Jacob asm volatile("prfm pstl1keep, %a0\n" : : "p" (_ptr)); }) 24f0c7bb1bSJerin Jacob 25f0c7bb1bSJerin Jacob #define octeontx_load_pair(val0, val1, addr) ({ \ 26f0c7bb1bSJerin Jacob asm volatile( \ 27f0c7bb1bSJerin Jacob "ldp %x[x0], %x[x1], [%x[p1]]" \ 28f0c7bb1bSJerin Jacob :[x0]"=r"(val0), [x1]"=r"(val1) \ 29f0c7bb1bSJerin Jacob :[p1]"r"(addr) \ 30f0c7bb1bSJerin Jacob ); }) 31f0c7bb1bSJerin Jacob 32f0c7bb1bSJerin Jacob #define octeontx_store_pair(val0, val1, addr) ({ \ 33f0c7bb1bSJerin Jacob asm volatile( \ 34f0c7bb1bSJerin Jacob "stp %x[x0], %x[x1], [%x[p1]]" \ 35f0c7bb1bSJerin Jacob ::[x0]"r"(val0), [x1]"r"(val1), [p1]"r"(addr) \ 36f0c7bb1bSJerin Jacob ); }) 37f0c7bb1bSJerin Jacob #else /* Un optimized functions for building on non arm64 arch */ 38f0c7bb1bSJerin Jacob 39f0c7bb1bSJerin Jacob #define octeontx_prefetch_store_keep(_ptr) do {} while (0) 40f0c7bb1bSJerin Jacob 41f0c7bb1bSJerin Jacob #define octeontx_load_pair(val0, val1, addr) \ 42f0c7bb1bSJerin Jacob do { \ 43f0c7bb1bSJerin Jacob val0 = rte_read64(addr); \ 44f0c7bb1bSJerin Jacob val1 = rte_read64(((uint8_t *)addr) + 8); \ 45f0c7bb1bSJerin Jacob } while (0) 46f0c7bb1bSJerin Jacob 47f0c7bb1bSJerin Jacob #define octeontx_store_pair(val0, val1, addr) \ 48f0c7bb1bSJerin Jacob do { \ 49f0c7bb1bSJerin Jacob rte_write64(val0, addr); \ 50f0c7bb1bSJerin Jacob rte_write64(val1, (((uint8_t *)addr) + 8)); \ 51f0c7bb1bSJerin Jacob } while (0) 52f0c7bb1bSJerin Jacob #endif 53f0c7bb1bSJerin Jacob 54f0c7bb1bSJerin Jacob #if defined(RTE_ARCH_ARM64) 55f0c7bb1bSJerin Jacob /** 56f0c7bb1bSJerin Jacob * Perform an atomic fetch-and-add operation. 57f0c7bb1bSJerin Jacob */ 58f0c7bb1bSJerin Jacob static inline uint64_t 59f0c7bb1bSJerin Jacob octeontx_reg_ldadd_u64(void *addr, int64_t off) 60f0c7bb1bSJerin Jacob { 61f0c7bb1bSJerin Jacob uint64_t old_val; 62f0c7bb1bSJerin Jacob 63f0c7bb1bSJerin Jacob __asm__ volatile( 64f0c7bb1bSJerin Jacob " .cpu generic+lse\n" 65f0c7bb1bSJerin Jacob " ldadd %1, %0, [%2]\n" 66f0c7bb1bSJerin Jacob : "=r" (old_val) : "r" (off), "r" (addr) : "memory"); 67f0c7bb1bSJerin Jacob 68f0c7bb1bSJerin Jacob return old_val; 69f0c7bb1bSJerin Jacob } 70f0c7bb1bSJerin Jacob 71f0c7bb1bSJerin Jacob /** 72f0c7bb1bSJerin Jacob * Perform a LMTST operation - an atomic write of up to 128 byte to 73f0c7bb1bSJerin Jacob * an I/O block that supports this operation type. 74f0c7bb1bSJerin Jacob * 75f0c7bb1bSJerin Jacob * @param lmtline_va is the address where LMTLINE is mapped 76f0c7bb1bSJerin Jacob * @param ioreg_va is the virtual address of the device register 77f0c7bb1bSJerin Jacob * @param cmdbuf is the array of peripheral commands to execute 78f0c7bb1bSJerin Jacob * @param cmdsize is the number of 64-bit words in 'cmdbuf' 79f0c7bb1bSJerin Jacob * 80f0c7bb1bSJerin Jacob * @return N/A 81f0c7bb1bSJerin Jacob */ 82f0c7bb1bSJerin Jacob static inline void 83f0c7bb1bSJerin Jacob octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[], 84f0c7bb1bSJerin Jacob uint64_t cmdsize) 85f0c7bb1bSJerin Jacob { 86f0c7bb1bSJerin Jacob uint64_t result; 87f0c7bb1bSJerin Jacob uint64_t word_count; 88f0c7bb1bSJerin Jacob uint64_t *lmtline = lmtline_va; 89f0c7bb1bSJerin Jacob 90f0c7bb1bSJerin Jacob word_count = cmdsize; 91f0c7bb1bSJerin Jacob 92f0c7bb1bSJerin Jacob do { 93f0c7bb1bSJerin Jacob /* Copy commands to LMTLINE */ 94f0c7bb1bSJerin Jacob for (result = 0; result < word_count; result += 2) { 95f0c7bb1bSJerin Jacob lmtline[result + 0] = cmdbuf[result + 0]; 96f0c7bb1bSJerin Jacob lmtline[result + 1] = cmdbuf[result + 1]; 97f0c7bb1bSJerin Jacob } 98f0c7bb1bSJerin Jacob 99f0c7bb1bSJerin Jacob /* LDEOR initiates atomic transfer to I/O device */ 100f0c7bb1bSJerin Jacob __asm__ volatile( 101f0c7bb1bSJerin Jacob " .cpu generic+lse\n" 102f0c7bb1bSJerin Jacob " ldeor xzr, %0, [%1]\n" 103f0c7bb1bSJerin Jacob : "=r" (result) : "r" (ioreg_va) : "memory"); 104f0c7bb1bSJerin Jacob } while (!result); 105f0c7bb1bSJerin Jacob } 106f0c7bb1bSJerin Jacob 107f0c7bb1bSJerin Jacob #else 108f0c7bb1bSJerin Jacob 109f0c7bb1bSJerin Jacob static inline uint64_t 110f0c7bb1bSJerin Jacob octeontx_reg_ldadd_u64(void *addr, int64_t off) 111f0c7bb1bSJerin Jacob { 112f0c7bb1bSJerin Jacob RTE_SET_USED(addr); 113f0c7bb1bSJerin Jacob RTE_SET_USED(off); 114f0c7bb1bSJerin Jacob return 0; 115f0c7bb1bSJerin Jacob } 116f0c7bb1bSJerin Jacob 117f0c7bb1bSJerin Jacob static inline void 118f0c7bb1bSJerin Jacob octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[], 119f0c7bb1bSJerin Jacob uint64_t cmdsize) 120f0c7bb1bSJerin Jacob { 121f0c7bb1bSJerin Jacob RTE_SET_USED(lmtline_va); 122f0c7bb1bSJerin Jacob RTE_SET_USED(ioreg_va); 123f0c7bb1bSJerin Jacob RTE_SET_USED(cmdbuf); 124f0c7bb1bSJerin Jacob RTE_SET_USED(cmdsize); 125f0c7bb1bSJerin Jacob } 126f0c7bb1bSJerin Jacob 127f0c7bb1bSJerin Jacob #endif 128f0c7bb1bSJerin Jacob #endif /* __OCTEONTX_IO_H__ */ 129