1 /* $NetBSD: if_enavar.h,v 1.8 2021/07/19 21:16:33 jmcneill Exp $ */ 2 3 /*- 4 * BSD LICENSE 5 * 6 * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 * $FreeBSD: head/sys/dev/ena/ena.h 333450 2018-05-10 09:06:21Z mw $ 33 * 34 */ 35 36 #ifndef ENA_H 37 #define ENA_H 38 39 #include <sys/types.h> 40 #include <sys/atomic.h> 41 #include <sys/pcq.h> 42 43 #include "external/bsd/ena-com/ena_com.h" 44 #include "external/bsd/ena-com/ena_eth_com.h" 45 46 #define DRV_MODULE_VER_MAJOR 0 47 #define DRV_MODULE_VER_MINOR 8 48 #define DRV_MODULE_VER_SUBMINOR 1 49 50 #define DRV_MODULE_NAME "ena" 51 52 #ifndef DRV_MODULE_VERSION 53 #define DRV_MODULE_VERSION \ 54 ___STRING(DRV_MODULE_VER_MAJOR) "." \ 55 ___STRING(DRV_MODULE_VER_MINOR) "." \ 56 ___STRING(DRV_MODULE_VER_SUBMINOR) 57 #endif 58 #define DEVICE_NAME "Elastic Network Adapter (ENA)" 59 #define DEVICE_DESC "ENA adapter" 60 61 /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */ 62 #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL) 63 64 /* 1 for AENQ + ADMIN */ 65 #define ENA_ADMIN_MSIX_VEC 1 66 #define ENA_MAX_MSIX_VEC(io_queues) (ENA_ADMIN_MSIX_VEC + (io_queues)) 67 68 #define ENA_REG_BAR PCI_BAR(0) 69 #define ENA_MEM_BAR PCI_BAR(2) 70 71 #define ENA_BUS_DMA_SEGS 32 72 73 #define ENA_DEFAULT_RING_SIZE 1024 74 75 #define ENA_RX_REFILL_THRESH_DIVIDER 8 76 77 #define ENA_IRQNAME_SIZE 40 78 79 #define ENA_PKT_MAX_BUFS 19 80 81 #define ENA_RX_RSS_TABLE_LOG_SIZE 7 82 #define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE) 83 84 #define ENA_HASH_KEY_SIZE 40 85 86 #define ENA_MAX_FRAME_LEN 10000 87 #define ENA_MIN_FRAME_LEN 60 88 89 #define ENA_TX_CLEANUP_THRESHOLD 128 90 91 #define DB_THRESHOLD 64 92 93 #define TX_COMMIT 32 94 /* 95 * TX budget for cleaning. It should be half of the RX budget to reduce amount 96 * of TCP retransmissions. 97 */ 98 #define TX_BUDGET 128 99 /* RX cleanup budget. -1 stands for infinity. */ 100 #define RX_BUDGET 256 101 /* 102 * How many times we can repeat cleanup in the io irq handling routine if the 103 * RX or TX budget was depleted. 104 */ 105 #define CLEAN_BUDGET 8 106 107 #define RX_IRQ_INTERVAL 20 108 #define TX_IRQ_INTERVAL 50 109 110 #define ENA_MIN_MTU 128 111 112 #define ENA_TSO_MAXSIZE 65536 113 114 #define ENA_MMIO_DISABLE_REG_READ BIT(0) 115 116 #define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) 117 118 #define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) 119 120 #define ENA_IO_TXQ_IDX(q) (2 * (q)) 121 #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) 122 123 #define ENA_MGMNT_IRQ_IDX 0 124 #define ENA_IO_IRQ_FIRST_IDX 1 125 #define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q)) 126 127 /* 128 * ENA device should send keep alive msg every 1 sec. 129 * We wait for 6 sec just to be on the safe side. 130 */ 131 #define DEFAULT_KEEP_ALIVE_TO (SBT_1S * 6) 132 133 /* Time in jiffies before concluding the transmitter is hung. */ 134 #define DEFAULT_TX_CMP_TO (SBT_1S * 5) 135 136 /* Number of queues to check for missing queues per timer tick */ 137 #define DEFAULT_TX_MONITORED_QUEUES (4) 138 139 /* Max number of timeouted packets before device reset */ 140 #define DEFAULT_TX_CMP_THRESHOLD (128) 141 142 /* 143 * Supported PCI vendor and devices IDs 144 */ 145 #define PCI_VENDOR_ID_AMAZON 0x1d0f 146 147 #define PCI_DEV_ID_ENA_PF 0x0ec2 148 #define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2 149 #define PCI_DEV_ID_ENA_VF 0xec20 150 #define PCI_DEV_ID_ENA_LLQ_VF 0xec21 151 152 /* 153 * Flags indicating current ENA driver state 154 */ 155 enum ena_flags_t { 156 ENA_FLAG_DEVICE_RUNNING, 157 ENA_FLAG_DEV_UP, 158 ENA_FLAG_LINK_UP, 159 ENA_FLAG_MSIX_ENABLED, 160 ENA_FLAG_TRIGGER_RESET, 161 ENA_FLAG_ONGOING_RESET, 162 ENA_FLAG_DEV_UP_BEFORE_RESET, 163 ENA_FLAG_RSS_ACTIVE, 164 ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE 165 }; 166 167 #define ENA_FLAG_BITMASK(bit) (~(uint32_t)__BIT(bit)) 168 #define ENA_FLAG_ZERO(adapter) (adapter)->flags = 0; 169 #define ENA_FLAG_ISSET(bit, adapter) ((adapter)->flags & __BIT(bit)) 170 #define ENA_FLAG_SET_ATOMIC(bit, adapter) \ 171 atomic_or_32(&(adapter)->flags, __BIT(bit)) 172 #define ENA_FLAG_CLEAR_ATOMIC(bit, adapter) \ 173 atomic_and_32(&(adapter)->flags, ENA_FLAG_BITMASK(bit)) 174 175 typedef __int64_t sbintime_t; 176 177 struct msix_entry { 178 int entry; 179 int vector; 180 }; 181 182 typedef struct _ena_vendor_info_t { 183 unsigned int vendor_id; 184 unsigned int device_id; 185 unsigned int index; 186 } ena_vendor_info_t; 187 188 struct ena_que { 189 struct ena_adapter *adapter; 190 struct ena_ring *tx_ring; 191 struct ena_ring *rx_ring; 192 uint32_t id; 193 int cpu; 194 }; 195 196 struct ena_tx_buffer { 197 struct mbuf *mbuf; 198 /* # of ena desc for this specific mbuf 199 * (includes data desc and metadata desc) */ 200 unsigned int tx_descs; 201 /* # of buffers used by this mbuf */ 202 unsigned int num_of_bufs; 203 bus_dmamap_t map; 204 205 /* Used to detect missing tx packets */ 206 struct bintime timestamp; 207 bool print_once; 208 209 struct ena_com_buf bufs[ENA_PKT_MAX_BUFS]; 210 } __aligned(CACHE_LINE_SIZE); 211 212 struct ena_rx_buffer { 213 struct mbuf *mbuf; 214 bus_dmamap_t map; 215 struct ena_com_buf ena_buf; 216 } __aligned(CACHE_LINE_SIZE); 217 218 struct ena_stats_tx { 219 char name[16]; 220 struct evcnt cnt; 221 struct evcnt bytes; 222 struct evcnt prepare_ctx_err; 223 struct evcnt dma_mapping_err; 224 struct evcnt doorbells; 225 struct evcnt missing_tx_comp; 226 struct evcnt bad_req_id; 227 struct evcnt collapse; 228 struct evcnt collapse_err; 229 struct evcnt pcq_drops; 230 }; 231 232 struct ena_stats_rx { 233 char name[16]; 234 struct evcnt cnt; 235 struct evcnt bytes; 236 struct evcnt refil_partial; 237 struct evcnt bad_csum; 238 struct evcnt mbuf_alloc_fail; 239 struct evcnt dma_mapping_err; 240 struct evcnt bad_desc_num; 241 struct evcnt bad_req_id; 242 struct evcnt empty_rx_ring; 243 }; 244 245 /* 246 * Locking notes: 247 * + For TX, a field in ena_ring is protected by ring_mtx (a spin mutex). 248 * - protect them only when I/F is up. 249 * - when I/F is down or attaching, detaching, no need to protect them. 250 * + For RX, a field "stopping" is protected by ring_mtx (a spin mutex). 251 * - other fields in ena_ring are not protected. 252 * + a fields in ena_adapter is protected by global_mtx (a adaptive mutex). 253 * 254 * + a field marked "stable" is unlocked. 255 * + a field marked "atomic" is unlocked, 256 * but must use atomic ops to read/write. 257 * 258 * Lock order: 259 * + global_mtx -> ring_mtx 260 */ 261 struct ena_ring { 262 /* Holds the empty requests for TX/RX out of order completions */ 263 union { 264 uint16_t *free_tx_ids; 265 uint16_t *free_rx_ids; 266 }; 267 struct ena_com_dev *ena_dev; 268 struct ena_adapter *adapter; 269 struct ena_com_io_cq *ena_com_io_cq; 270 struct ena_com_io_sq *ena_com_io_sq; 271 272 uint16_t qid; 273 274 /* Determines if device will use LLQ or normal mode for TX */ 275 enum ena_admin_placement_policy_type tx_mem_queue_type; 276 /* The maximum length the driver can push to the device (For LLQ) */ 277 uint8_t tx_max_header_size; 278 279 struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS]; 280 281 /* 282 * Fields used for Adaptive Interrupt Modulation - to be implemented in 283 * the future releases 284 */ 285 uint32_t smoothed_interval; 286 enum ena_intr_moder_level moder_tbl_idx; 287 288 struct ena_que *que; 289 #ifdef LRO 290 struct lro_ctrl lro; 291 #endif 292 293 uint16_t next_to_use; 294 uint16_t next_to_clean; 295 296 union { 297 struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */ 298 struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */ 299 }; 300 int ring_size; /* number of tx/rx_buffer_info's entries */ 301 302 pcq_t *br; /* only for TX */ 303 304 kmutex_t ring_mtx; 305 char mtx_name[16]; 306 307 union { 308 struct { 309 struct work enqueue_task; 310 struct workqueue *enqueue_tq; 311 }; 312 struct { 313 struct work cleanup_task; 314 struct workqueue *cleanup_tq; 315 }; 316 }; 317 u_int task_pending; /* atomic */ 318 bool stopping; 319 320 union { 321 struct ena_stats_tx tx_stats; 322 struct ena_stats_rx rx_stats; 323 }; 324 325 int empty_rx_queue; 326 } __aligned(CACHE_LINE_SIZE); 327 328 struct ena_stats_dev { 329 char name[16]; 330 struct evcnt wd_expired; 331 struct evcnt interface_up; 332 struct evcnt interface_down; 333 struct evcnt admin_q_pause; 334 }; 335 336 struct ena_hw_stats { 337 char name[16]; 338 struct evcnt rx_packets; 339 struct evcnt tx_packets; 340 341 struct evcnt rx_bytes; 342 struct evcnt tx_bytes; 343 344 struct evcnt rx_drops; 345 }; 346 347 /* Board specific private data structure */ 348 struct ena_adapter { 349 struct ena_com_dev *ena_dev; 350 351 /* OS defined structs */ 352 device_t pdev; 353 struct ethercom sc_ec; 354 struct ifnet *ifp; /* set to point to sc_ec */ 355 struct ifmedia media; 356 357 /* OS resources */ 358 kmutex_t global_mtx; 359 360 void *sc_ihs[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)]; 361 pci_intr_handle_t *sc_intrs; 362 int sc_nintrs; 363 struct pci_attach_args sc_pa; 364 365 /* Registers */ 366 bus_space_handle_t sc_bhandle; 367 bus_space_tag_t sc_btag; 368 bus_addr_t sc_memaddr; 369 bus_size_t sc_mapsize; 370 371 /* DMA tag used throughout the driver adapter for Tx and Rx */ 372 bus_dma_tag_t sc_dmat; 373 int dma_width; 374 375 uint32_t max_mtu; 376 377 uint16_t max_tx_sgl_size; 378 uint16_t max_rx_sgl_size; 379 380 uint32_t tx_offload_cap; 381 382 /* Tx fast path data */ 383 int num_queues; 384 385 unsigned int tx_ring_size; 386 unsigned int rx_ring_size; 387 388 /* RSS*/ 389 uint8_t rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE]; 390 bool rss_support; 391 392 uint8_t mac_addr[ETHER_ADDR_LEN]; 393 /* mdio and phy*/ 394 395 uint32_t flags; /* atomic */ 396 397 /* Queue will represent one TX and one RX ring */ 398 struct ena_que que[ENA_MAX_NUM_IO_QUEUES] 399 __aligned(CACHE_LINE_SIZE); /* stable */ 400 401 /* TX */ 402 struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES] 403 __aligned(CACHE_LINE_SIZE); 404 405 /* RX */ 406 struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES] 407 __aligned(CACHE_LINE_SIZE); 408 409 /* Timer service */ 410 struct callout timer_service; 411 sbintime_t keep_alive_timestamp; 412 uint32_t next_monitored_tx_qid; 413 struct work reset_task; 414 struct workqueue *reset_tq; 415 int wd_active; 416 sbintime_t keep_alive_timeout; 417 sbintime_t missing_tx_timeout; 418 uint32_t missing_tx_max_queues; 419 uint32_t missing_tx_threshold; 420 421 /* Statistics */ 422 struct ena_stats_dev dev_stats; 423 struct ena_hw_stats hw_stats; 424 425 enum ena_regs_reset_reason_types reset_reason; 426 }; 427 428 #define ENA_RING_MTX_LOCK(_ring) mutex_enter(&(_ring)->ring_mtx) 429 #define ENA_RING_MTX_TRYLOCK(_ring) mutex_tryenter(&(_ring)->ring_mtx) 430 #define ENA_RING_MTX_UNLOCK(_ring) mutex_exit(&(_ring)->ring_mtx) 431 #define ENA_RING_MTX_OWNED(_ring) mutex_owned(&(_ring)->ring_mtx) 432 433 #define ENA_CORE_MTX_LOCK(_adapter) mutex_enter(&(_adapter)->global_mtx) 434 #define ENA_CORE_MTX_TRYLOCK(_adapter) mutex_tryenter(&(_adapter)->global_mtx) 435 #define ENA_CORE_MTX_UNLOCK(_adapter) mutex_exit(&(_adapter)->global_mtx) 436 #define ENA_CORE_MTX_OWNED(_adapter) mutex_owned(&(_adapter)->global_mtx) 437 438 static inline int ena_mbuf_count(struct mbuf *mbuf) 439 { 440 int count = 1; 441 442 while ((mbuf = mbuf->m_next) != NULL) 443 ++count; 444 445 return count; 446 } 447 448 /* provide FreeBSD-compatible macros */ 449 #define if_getcapenable(ifp) (ifp)->if_capenable 450 #define if_setcapenable(ifp, s) SET((ifp)->if_capenable, s) 451 #define if_getcapabilities(ifp) (ifp)->if_capabilities 452 #define if_setcapabilities(ifp, s) SET((ifp)->if_capabilities, s) 453 #define if_setcapabilitiesbit(ifp, s, c) do { \ 454 CLR((ifp)->if_capabilities, c); \ 455 SET((ifp)->if_capabilities, s); \ 456 } while (0) 457 #define if_getsoftc(ifp) (ifp)->if_softc 458 #define if_setmtu(ifp, new_mtu) (ifp)->if_mtu = (new_mtu) 459 #define if_getdrvflags(ifp) (ifp)->if_flags 460 #define if_setdrvflagbits(ifp, s, c) do { \ 461 CLR((ifp)->if_flags, c); \ 462 SET((ifp)->if_flags, s); \ 463 } while (0) 464 #define if_setflags(ifp, s) SET((ifp)->if_flags, s) 465 #define if_sethwassistbits(ifp, s, c) do { \ 466 CLR((ifp)->if_csum_flags_rx, c); \ 467 SET((ifp)->if_csum_flags_rx, s); \ 468 } while (0) 469 #define if_clearhwassist(ifp) (ifp)->if_csum_flags_rx = 0 470 #define if_setbaudrate(ifp, r) (ifp)->if_baudrate = (r) 471 #define if_setdev(ifp, dev) do { } while (0) 472 #define if_setsoftc(ifp, softc) (ifp)->if_softc = (softc) 473 #define if_setinitfn(ifp, initfn) (ifp)->if_init = (initfn) 474 #define if_settransmitfn(ifp, txfn) (ifp)->if_transmit = (txfn) 475 #define if_setioctlfn(ifp, ioctlfn) (ifp)->if_ioctl = (ioctlfn) 476 #define if_setsendqlen(ifp, sqlen) \ 477 IFQ_SET_MAXLEN(&(ifp)->if_snd, uimax(sqlen, IFQ_MAXLEN)) 478 #define if_setsendqready(ifp) IFQ_SET_READY(&(ifp)->if_snd) 479 #define if_setifheaderlen(ifp, len) (ifp)->if_hdrlen = (len) 480 481 #define SBT_1S ((sbintime_t)1 << 32) 482 #define bintime_clear(a) ((a)->sec = (a)->frac = 0) 483 #define bintime_isset(a) ((a)->sec || (a)->frac) 484 485 static __inline sbintime_t 486 bttosbt(const struct bintime _bt) 487 { 488 return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32)); 489 } 490 491 static __inline sbintime_t 492 getsbinuptime(void) 493 { 494 struct bintime _bt; 495 496 getbinuptime(&_bt); 497 return (bttosbt(_bt)); 498 } 499 500 /* Intentionally non-atomic, it's just unnecessary overhead */ 501 #define counter_u64_add(x, cnt) (x).ev_count += (cnt) 502 #define counter_u64_zero(x) (x).ev_count = 0 503 #define counter_u64_free(x) evcnt_detach(&(x)) 504 505 #define counter_u64_add_protected(x, cnt) (x).ev_count += (cnt) 506 #define counter_enter() do {} while (0) 507 #define counter_exit() do {} while (0) 508 509 /* Misc other constants */ 510 #define mp_ncpus ncpu 511 #define osreldate __NetBSD_Version__ 512 513 /* 514 * XXX XXX XXX just to make compile, must provide replacement XXX XXX XXX 515 * Other than that, TODO: 516 * - decide whether to import <sys/buf_ring.h> 517 * - recheck the M_CSUM/IPCAP mapping 518 * - recheck workqueue use - FreeBSD taskqueues might have different semantics 519 */ 520 #define buf_ring_alloc(a, b, c, d) (void *)&a 521 #define drbr_free(ifp, b) do { } while (0) 522 #define drbr_flush(ifp, b) IFQ_PURGE(&(ifp)->if_snd) 523 #define drbr_advance(ifp, b) \ 524 ({ \ 525 struct mbuf *__m; \ 526 IFQ_DEQUEUE(&(ifp)->if_snd, __m); \ 527 __m; \ 528 }) 529 #define drbr_putback(ifp, b, m) do { } while (0) 530 #define drbr_empty(ifp, b) IFQ_IS_EMPTY(&(ifp)->if_snd) 531 #define drbr_peek(ifp, b) \ 532 ({ \ 533 struct mbuf *__m; \ 534 IFQ_POLL(&(ifp)->if_snd, __m); \ 535 __m; \ 536 }) 537 #define drbr_enqueue(ifp, b, m) \ 538 ({ \ 539 int __err; \ 540 IFQ_ENQUEUE(&(ifp)->if_snd, m, __err); \ 541 __err; \ 542 }) 543 #define m_getjcl(a, b, c, d) NULL 544 #define MJUM16BYTES MCLBYTES 545 #define m_append(m, len, cp) ena_m_append(m, len, cp) 546 #define m_collapse(m, how, maxfrags) m_defrag(m, how) /* XXX */ 547 /* XXX XXX XXX */ 548 549 static inline int 550 ena_m_append(struct mbuf *m0, int len, const void *cpv) 551 { 552 struct mbuf *m, *n; 553 int remainder, space; 554 const char *cp = cpv; 555 556 KASSERT(len != M_COPYALL); 557 for (m = m0; m->m_next != NULL; m = m->m_next) 558 continue; 559 remainder = len; 560 space = M_TRAILINGSPACE(m); 561 if (space > 0) { 562 /* 563 * Copy into available space. 564 */ 565 if (space > remainder) 566 space = remainder; 567 memmove(mtod(m, char *) + m->m_len, cp, space); 568 m->m_len += space; 569 cp = cp + space, remainder -= space; 570 } 571 while (remainder > 0) { 572 /* 573 * Allocate a new mbuf; could check space 574 * and allocate a cluster instead. 575 */ 576 n = m_get(M_DONTWAIT, m->m_type); 577 if (n == NULL) 578 break; 579 n->m_len = uimin(MLEN, remainder); 580 memmove(mtod(n, void *), cp, n->m_len); 581 cp += n->m_len, remainder -= n->m_len; 582 m->m_next = n; 583 m = n; 584 } 585 if (m0->m_flags & M_PKTHDR) 586 m0->m_pkthdr.len += len - remainder; 587 return (remainder == 0); 588 } 589 #endif /* !(ENA_H) */ 590