12e99ea80SHyong Youb Kim /* SPDX-License-Identifier: BSD-3-Clause 22e99ea80SHyong Youb Kim * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved. 372f3de30SBruce Richardson * Copyright 2007 Nuova Systems, Inc. All rights reserved. 472f3de30SBruce Richardson */ 572f3de30SBruce Richardson 672f3de30SBruce Richardson #ifndef _VNIC_WQ_H_ 772f3de30SBruce Richardson #define _VNIC_WQ_H_ 872f3de30SBruce Richardson 972f3de30SBruce Richardson 1072f3de30SBruce Richardson #include "vnic_dev.h" 1172f3de30SBruce Richardson #include "vnic_cq.h" 12fc2c8c06SJohn Daley #include <rte_memzone.h> 1372f3de30SBruce Richardson 1472f3de30SBruce Richardson /* Work queue control */ 1572f3de30SBruce Richardson struct vnic_wq_ctrl { 1604e8ec74SJohn Daley uint64_t ring_base; /* 0x00 */ 1704e8ec74SJohn Daley uint32_t ring_size; /* 0x08 */ 1804e8ec74SJohn Daley uint32_t pad0; 1904e8ec74SJohn Daley uint32_t posted_index; /* 0x10 */ 2004e8ec74SJohn Daley uint32_t pad1; 2104e8ec74SJohn Daley uint32_t cq_index; /* 0x18 */ 2204e8ec74SJohn Daley uint32_t pad2; 2304e8ec74SJohn Daley uint32_t enable; /* 0x20 */ 2404e8ec74SJohn Daley uint32_t pad3; 2504e8ec74SJohn Daley uint32_t running; /* 0x28 */ 2604e8ec74SJohn Daley uint32_t pad4; 2704e8ec74SJohn Daley uint32_t fetch_index; /* 0x30 */ 2804e8ec74SJohn Daley uint32_t pad5; 2904e8ec74SJohn Daley uint32_t dca_value; /* 0x38 */ 3004e8ec74SJohn Daley uint32_t pad6; 3104e8ec74SJohn Daley uint32_t error_interrupt_enable; /* 0x40 */ 3204e8ec74SJohn Daley uint32_t pad7; 3304e8ec74SJohn Daley uint32_t error_interrupt_offset; /* 0x48 */ 3404e8ec74SJohn Daley uint32_t pad8; 3504e8ec74SJohn Daley uint32_t error_status; /* 0x50 */ 3604e8ec74SJohn Daley uint32_t pad9; 3772f3de30SBruce Richardson }; 3872f3de30SBruce Richardson 3972f3de30SBruce Richardson struct vnic_wq { 4072f3de30SBruce Richardson unsigned int index; 4193fb21fdSHyong Youb Kim uint64_t tx_offload_notsup_mask; 4272f3de30SBruce Richardson struct vnic_dev *vdev; 4372f3de30SBruce Richardson struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */ 4472f3de30SBruce Richardson struct vnic_dev_ring ring; 45d355a942SHyong Youb Kim struct rte_mbuf **bufs; 46a3b1e955SJohn Daley unsigned int head_idx; 47c55614d1SHyong Youb Kim unsigned int cq_pend; 48a3b1e955SJohn Daley unsigned int tail_idx; 4972f3de30SBruce Richardson unsigned int socket_id; 50fc2c8c06SJohn Daley const struct rte_memzone *cqmsg_rz; 51fc2c8c06SJohn Daley uint16_t last_completed_index; 52bcaa54c1SHyong Youb Kim uint64_t offloads; 53*00ce4311SHyong Youb Kim bool admin_chan; 54*00ce4311SHyong Youb Kim const struct rte_memzone *admin_msg_rz; 55*00ce4311SHyong Youb Kim uint64_t soft_stats_tx; 5672f3de30SBruce Richardson }; 5772f3de30SBruce Richardson 5872f3de30SBruce Richardson static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq) 5972f3de30SBruce Richardson { 6072f3de30SBruce Richardson /* how many does SW own? */ 6172f3de30SBruce Richardson return wq->ring.desc_avail; 6272f3de30SBruce Richardson } 6372f3de30SBruce Richardson 6472f3de30SBruce Richardson static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq) 6572f3de30SBruce Richardson { 6672f3de30SBruce Richardson /* how many does HW own? */ 6772f3de30SBruce Richardson return wq->ring.desc_count - wq->ring.desc_avail - 1; 6872f3de30SBruce Richardson } 6972f3de30SBruce Richardson 7072f3de30SBruce Richardson #define PI_LOG2_CACHE_LINE_SIZE 5 7172f3de30SBruce Richardson #define PI_INDEX_BITS 12 7272f3de30SBruce Richardson #define PI_INDEX_MASK ((1U << PI_INDEX_BITS) - 1) 7372f3de30SBruce Richardson #define PI_PREFETCH_LEN_MASK ((1U << PI_LOG2_CACHE_LINE_SIZE) - 1) 7472f3de30SBruce Richardson #define PI_PREFETCH_LEN_OFF 16 7572f3de30SBruce Richardson #define PI_PREFETCH_ADDR_BITS 43 7672f3de30SBruce Richardson #define PI_PREFETCH_ADDR_MASK ((1ULL << PI_PREFETCH_ADDR_BITS) - 1) 7772f3de30SBruce Richardson #define PI_PREFETCH_ADDR_OFF 21 7872f3de30SBruce Richardson 7972f3de30SBruce Richardson /** How many cache lines are touched by buffer (addr, len). */ 8072f3de30SBruce Richardson static inline unsigned int num_cache_lines_touched(dma_addr_t addr, 8172f3de30SBruce Richardson unsigned int len) 8272f3de30SBruce Richardson { 8372f3de30SBruce Richardson const unsigned long mask = PI_PREFETCH_LEN_MASK; 8472f3de30SBruce Richardson const unsigned long laddr = (unsigned long)addr; 8572f3de30SBruce Richardson unsigned long lines, equiv_len; 8672f3de30SBruce Richardson /* A. If addr is aligned, our solution is just to round up len to the 8772f3de30SBruce Richardson next boundary. 8872f3de30SBruce Richardson 8972f3de30SBruce Richardson e.g. addr = 0, len = 48 9072f3de30SBruce Richardson +--------------------+ 9172f3de30SBruce Richardson |XXXXXXXXXXXXXXXXXXXX| 32-byte cacheline a 9272f3de30SBruce Richardson +--------------------+ 9372f3de30SBruce Richardson |XXXXXXXXXX | cacheline b 9472f3de30SBruce Richardson +--------------------+ 9572f3de30SBruce Richardson 9672f3de30SBruce Richardson B. If addr is not aligned, however, we may use an extra 9772f3de30SBruce Richardson cacheline. e.g. addr = 12, len = 22 9872f3de30SBruce Richardson 9972f3de30SBruce Richardson +--------------------+ 10072f3de30SBruce Richardson | XXXXXXXXXXXXX| 10172f3de30SBruce Richardson +--------------------+ 10272f3de30SBruce Richardson |XX | 10372f3de30SBruce Richardson +--------------------+ 10472f3de30SBruce Richardson 10572f3de30SBruce Richardson Our solution is to make the problem equivalent to case A 10672f3de30SBruce Richardson above by adding the empty space in the first cacheline to the length: 10772f3de30SBruce Richardson unsigned long len; 10872f3de30SBruce Richardson 10972f3de30SBruce Richardson +--------------------+ 11072f3de30SBruce Richardson |eeeeeeeXXXXXXXXXXXXX| "e" is empty space, which we add to len 11172f3de30SBruce Richardson +--------------------+ 11272f3de30SBruce Richardson |XX | 11372f3de30SBruce Richardson +--------------------+ 11472f3de30SBruce Richardson 11572f3de30SBruce Richardson */ 11672f3de30SBruce Richardson equiv_len = len + (laddr & mask); 11772f3de30SBruce Richardson 11872f3de30SBruce Richardson /* Now we can just round up this len to the next 32-byte boundary. */ 11972f3de30SBruce Richardson lines = (equiv_len + mask) & (~mask); 12072f3de30SBruce Richardson 12172f3de30SBruce Richardson /* Scale bytes -> cachelines. */ 12272f3de30SBruce Richardson return lines >> PI_LOG2_CACHE_LINE_SIZE; 12372f3de30SBruce Richardson } 12472f3de30SBruce Richardson 12504e8ec74SJohn Daley static inline uint64_t vnic_cached_posted_index(dma_addr_t addr, 12604e8ec74SJohn Daley unsigned int len, 12772f3de30SBruce Richardson unsigned int index) 12872f3de30SBruce Richardson { 12972f3de30SBruce Richardson unsigned int num_cache_lines = num_cache_lines_touched(addr, len); 13072f3de30SBruce Richardson /* Wish we could avoid a branch here. We could have separate 13172f3de30SBruce Richardson * vnic_wq_post() and vinc_wq_post_inline(), the latter 13272f3de30SBruce Richardson * only supporting < 1k (2^5 * 2^5) sends, I suppose. This would 13372f3de30SBruce Richardson * eliminate the if (eop) branch as well. 13472f3de30SBruce Richardson */ 13572f3de30SBruce Richardson if (num_cache_lines > PI_PREFETCH_LEN_MASK) 13672f3de30SBruce Richardson num_cache_lines = 0; 13772f3de30SBruce Richardson return (index & PI_INDEX_MASK) | 13872f3de30SBruce Richardson ((num_cache_lines & PI_PREFETCH_LEN_MASK) << PI_PREFETCH_LEN_OFF) | 13972f3de30SBruce Richardson (((addr >> PI_LOG2_CACHE_LINE_SIZE) & 14072f3de30SBruce Richardson PI_PREFETCH_ADDR_MASK) << PI_PREFETCH_ADDR_OFF); 14172f3de30SBruce Richardson } 14272f3de30SBruce Richardson 143a3b1e955SJohn Daley static inline uint32_t 144a3b1e955SJohn Daley buf_idx_incr(uint32_t n_descriptors, uint32_t idx) 145a3b1e955SJohn Daley { 146a3b1e955SJohn Daley idx++; 147a3b1e955SJohn Daley if (unlikely(idx == n_descriptors)) 148a3b1e955SJohn Daley idx = 0; 149a3b1e955SJohn Daley return idx; 150a3b1e955SJohn Daley } 151a3b1e955SJohn Daley 15272f3de30SBruce Richardson void vnic_wq_free(struct vnic_wq *wq); 15372f3de30SBruce Richardson int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, 15472f3de30SBruce Richardson unsigned int desc_count, unsigned int desc_size); 155*00ce4311SHyong Youb Kim int vnic_admin_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, 156*00ce4311SHyong Youb Kim unsigned int desc_count, unsigned int desc_size); 15772f3de30SBruce Richardson void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index, 15872f3de30SBruce Richardson unsigned int fetch_index, unsigned int posted_index, 15972f3de30SBruce Richardson unsigned int error_interrupt_enable, 16072f3de30SBruce Richardson unsigned int error_interrupt_offset); 16172f3de30SBruce Richardson void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, 16272f3de30SBruce Richardson unsigned int error_interrupt_enable, 16372f3de30SBruce Richardson unsigned int error_interrupt_offset); 16472f3de30SBruce Richardson void vnic_wq_error_out(struct vnic_wq *wq, unsigned int error); 16572f3de30SBruce Richardson unsigned int vnic_wq_error_status(struct vnic_wq *wq); 16672f3de30SBruce Richardson void vnic_wq_enable(struct vnic_wq *wq); 16772f3de30SBruce Richardson int vnic_wq_disable(struct vnic_wq *wq); 16872f3de30SBruce Richardson void vnic_wq_clean(struct vnic_wq *wq, 169d355a942SHyong Youb Kim void (*buf_clean)(struct rte_mbuf **buf)); 17072f3de30SBruce Richardson #endif /* _VNIC_WQ_H_ */ 171