1 /* $NetBSD: if_enavar.h,v 1.10 2024/02/09 22:08:35 andvar Exp $ */
2
3 /*-
4 * BSD LICENSE
5 *
6 * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * $FreeBSD: head/sys/dev/ena/ena.h 333450 2018-05-10 09:06:21Z mw $
33 *
34 */
35
36 #ifndef ENA_H
37 #define ENA_H
38
39 #include <sys/types.h>
40 #include <sys/atomic.h>
41 #include <sys/pcq.h>
42
43 #include "external/bsd/ena-com/ena_com.h"
44 #include "external/bsd/ena-com/ena_eth_com.h"
45
46 #define DRV_MODULE_VER_MAJOR 0
47 #define DRV_MODULE_VER_MINOR 8
48 #define DRV_MODULE_VER_SUBMINOR 1
49
50 #define DRV_MODULE_NAME "ena"
51
52 #ifndef DRV_MODULE_VERSION
53 #define DRV_MODULE_VERSION \
54 ___STRING(DRV_MODULE_VER_MAJOR) "." \
55 ___STRING(DRV_MODULE_VER_MINOR) "." \
56 ___STRING(DRV_MODULE_VER_SUBMINOR)
57 #endif
58 #define DEVICE_NAME "Elastic Network Adapter (ENA)"
59 #define DEVICE_DESC "ENA adapter"
60
61 /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */
62 #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL)
63
64 /* 1 for AENQ + ADMIN */
65 #define ENA_ADMIN_MSIX_VEC 1
66 #define ENA_MAX_MSIX_VEC(io_queues) (ENA_ADMIN_MSIX_VEC + (io_queues))
67
68 #define ENA_REG_BAR PCI_BAR(0)
69 #define ENA_MEM_BAR PCI_BAR(2)
70
71 #define ENA_BUS_DMA_SEGS 32
72
73 #define ENA_DEFAULT_RING_SIZE 1024
74
75 #define ENA_RX_REFILL_THRESH_DIVIDER 8
76
77 #define ENA_IRQNAME_SIZE 40
78
79 #define ENA_PKT_MAX_BUFS 19
80
81 #define ENA_RX_RSS_TABLE_LOG_SIZE 7
82 #define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE)
83
84 #define ENA_HASH_KEY_SIZE 40
85
86 #define ENA_MAX_FRAME_LEN 10000
87 #define ENA_MIN_FRAME_LEN 60
88
89 #define ENA_TX_CLEANUP_THRESHOLD 128
90
91 #define DB_THRESHOLD 64
92
93 #define TX_COMMIT 32
94 /*
95 * TX budget for cleaning. It should be half of the RX budget to reduce amount
96 * of TCP retransmissions.
97 */
98 #define TX_BUDGET 128
99 /* RX cleanup budget. -1 stands for infinity. */
100 #define RX_BUDGET 256
101 /*
102 * How many times we can repeat cleanup in the io irq handling routine if the
103 * RX or TX budget was depleted.
104 */
105 #define CLEAN_BUDGET 8
106
107 #define RX_IRQ_INTERVAL 20
108 #define TX_IRQ_INTERVAL 50
109
110 #define ENA_MIN_MTU 128
111
112 #define ENA_TSO_MAXSIZE 65536
113
114 #define ENA_MMIO_DISABLE_REG_READ BIT(0)
115
116 #define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
117
118 #define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
119
120 #define ENA_IO_TXQ_IDX(q) (2 * (q))
121 #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1)
122
123 #define ENA_MGMNT_IRQ_IDX 0
124 #define ENA_IO_IRQ_FIRST_IDX 1
125 #define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q))
126
127 /*
128 * ENA device should send keep alive msg every 1 sec.
129 * We wait for 6 sec just to be on the safe side.
130 */
131 #define DEFAULT_KEEP_ALIVE_TO (SBT_1S * 6)
132
133 /* Time in jiffies before concluding the transmitter is hung. */
134 #define DEFAULT_TX_CMP_TO (SBT_1S * 5)
135
136 /* Number of queues to check for missing queues per timer tick */
137 #define DEFAULT_TX_MONITORED_QUEUES (4)
138
139 /* Max number of timeouted packets before device reset */
140 #define DEFAULT_TX_CMP_THRESHOLD (128)
141
142 /*
143 * Supported PCI vendor and devices IDs
144 */
145 #define PCI_VENDOR_ID_AMAZON 0x1d0f
146
147 #define PCI_DEV_ID_ENA_PF 0x0ec2
148 #define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2
149 #define PCI_DEV_ID_ENA_VF 0xec20
150 #define PCI_DEV_ID_ENA_LLQ_VF 0xec21
151
152 /*
153 * Flags indicating current ENA driver state
154 */
155 enum ena_flags_t {
156 ENA_FLAG_DEVICE_RUNNING,
157 ENA_FLAG_DEV_UP,
158 ENA_FLAG_LINK_UP,
159 ENA_FLAG_MSIX_ENABLED,
160 ENA_FLAG_TRIGGER_RESET,
161 ENA_FLAG_ONGOING_RESET,
162 ENA_FLAG_DEV_UP_BEFORE_RESET,
163 ENA_FLAG_RSS_ACTIVE,
164 ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE
165 };
166
167 #define ENA_FLAG_BITMASK(bit) (~(uint32_t)__BIT(bit))
168 #define ENA_FLAG_ZERO(adapter) (adapter)->flags = 0;
169 #define ENA_FLAG_ISSET(bit, adapter) ((adapter)->flags & __BIT(bit))
170 #define ENA_FLAG_SET_ATOMIC(bit, adapter) \
171 atomic_or_32(&(adapter)->flags, __BIT(bit))
172 #define ENA_FLAG_CLEAR_ATOMIC(bit, adapter) \
173 atomic_and_32(&(adapter)->flags, ENA_FLAG_BITMASK(bit))
174
175 typedef __int64_t sbintime_t;
176
177 struct msix_entry {
178 int entry;
179 int vector;
180 };
181
182 typedef struct _ena_vendor_info_t {
183 unsigned int vendor_id;
184 unsigned int device_id;
185 unsigned int index;
186 } ena_vendor_info_t;
187
188 struct ena_que {
189 struct ena_adapter *adapter;
190 struct ena_ring *tx_ring;
191 struct ena_ring *rx_ring;
192 uint32_t id;
193 int cpu;
194 };
195
196 struct ena_tx_buffer {
197 struct mbuf *mbuf;
198 /* # of ena desc for this specific mbuf
199 * (includes data desc and metadata desc) */
200 unsigned int tx_descs;
201 /* # of buffers used by this mbuf */
202 unsigned int num_of_bufs;
203 bus_dmamap_t map;
204
205 /* Used to detect missing tx packets */
206 struct bintime timestamp;
207 bool print_once;
208
209 struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
210 } __aligned(CACHE_LINE_SIZE);
211
212 struct ena_rx_buffer {
213 struct mbuf *mbuf;
214 bus_dmamap_t map;
215 struct ena_com_buf ena_buf;
216 } __aligned(CACHE_LINE_SIZE);
217
218 struct ena_stats_tx {
219 char name[16];
220 struct evcnt cnt;
221 struct evcnt bytes;
222 struct evcnt prepare_ctx_err;
223 struct evcnt dma_mapping_err;
224 struct evcnt doorbells;
225 struct evcnt missing_tx_comp;
226 struct evcnt bad_req_id;
227 struct evcnt collapse;
228 struct evcnt collapse_err;
229 struct evcnt pcq_drops;
230 };
231
232 struct ena_stats_rx {
233 char name[16];
234 struct evcnt cnt;
235 struct evcnt bytes;
236 struct evcnt refil_partial;
237 struct evcnt bad_csum;
238 struct evcnt mbuf_alloc_fail;
239 struct evcnt dma_mapping_err;
240 struct evcnt bad_desc_num;
241 struct evcnt bad_req_id;
242 struct evcnt empty_rx_ring;
243 };
244
245 /*
246 * Locking notes:
247 * + For TX, a field in ena_ring is protected by ring_mtx (a spin mutex).
248 * - protect them only when I/F is up.
249 * - when I/F is down or attaching, detaching, no need to protect them.
250 * + For RX, a field "stopping" is protected by ring_mtx (a spin mutex).
251 * - other fields in ena_ring are not protected.
252 * + a fields in ena_adapter is protected by global_mtx (a adaptive mutex).
253 *
254 * + a field marked "stable" is unlocked.
255 * + a field marked "atomic" is unlocked,
256 * but must use atomic ops to read/write.
257 *
258 * Lock order:
259 * + global_mtx -> ring_mtx
260 */
261 struct ena_ring {
262 /* Holds the empty requests for TX/RX out of order completions */
263 union {
264 uint16_t *free_tx_ids;
265 uint16_t *free_rx_ids;
266 };
267 struct ena_com_dev *ena_dev;
268 struct ena_adapter *adapter;
269 struct ena_com_io_cq *ena_com_io_cq;
270 struct ena_com_io_sq *ena_com_io_sq;
271
272 uint16_t qid;
273
274 /* Determines if device will use LLQ or normal mode for TX */
275 enum ena_admin_placement_policy_type tx_mem_queue_type;
276 /* The maximum length the driver can push to the device (For LLQ) */
277 uint8_t tx_max_header_size;
278
279 struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
280
281 /*
282 * Fields used for Adaptive Interrupt Modulation - to be implemented in
283 * the future releases
284 */
285 uint32_t smoothed_interval;
286 enum ena_intr_moder_level moder_tbl_idx;
287
288 struct ena_que *que;
289 #ifdef LRO
290 struct lro_ctrl lro;
291 #endif
292
293 uint16_t next_to_use;
294 uint16_t next_to_clean;
295
296 union {
297 struct ena_tx_buffer *tx_buffer_info; /* context of tx packet */
298 struct ena_rx_buffer *rx_buffer_info; /* context of rx packet */
299 };
300 int ring_size; /* number of tx/rx_buffer_info's entries */
301
302 pcq_t *br; /* only for TX */
303
304 kmutex_t ring_mtx;
305 char mtx_name[16];
306
307 union {
308 struct {
309 struct work enqueue_task;
310 struct workqueue *enqueue_tq;
311 };
312 struct {
313 struct work cleanup_task;
314 struct workqueue *cleanup_tq;
315 };
316 };
317 u_int task_pending; /* atomic */
318 bool stopping;
319
320 union {
321 struct ena_stats_tx tx_stats;
322 struct ena_stats_rx rx_stats;
323 };
324
325 int empty_rx_queue;
326 } __aligned(CACHE_LINE_SIZE);
327
328 struct ena_stats_dev {
329 char name[16];
330 struct evcnt wd_expired;
331 struct evcnt interface_up;
332 struct evcnt interface_down;
333 struct evcnt admin_q_pause;
334 };
335
336 struct ena_hw_stats {
337 char name[16];
338 struct evcnt rx_packets;
339 struct evcnt tx_packets;
340
341 struct evcnt rx_bytes;
342 struct evcnt tx_bytes;
343
344 struct evcnt rx_drops;
345 };
346
347 /* Board specific private data structure */
348 struct ena_adapter {
349 struct ena_com_dev *ena_dev;
350
351 /* OS defined structs */
352 device_t pdev;
353 struct ethercom sc_ec;
354 struct ifnet *ifp; /* set to point to sc_ec */
355 struct ifmedia media;
356
357 /* OS resources */
358 kmutex_t global_mtx;
359
360 void *sc_ihs[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
361 pci_intr_handle_t *sc_intrs;
362 int sc_nintrs;
363 struct pci_attach_args sc_pa;
364
365 /* Registers */
366 bus_space_handle_t sc_bhandle;
367 bus_space_tag_t sc_btag;
368 bus_addr_t sc_memaddr;
369 bus_size_t sc_mapsize;
370
371 /* DMA tag used throughout the driver adapter for Tx and Rx */
372 bus_dma_tag_t sc_dmat;
373 int dma_width;
374
375 uint32_t max_mtu;
376
377 uint16_t max_tx_sgl_size;
378 uint16_t max_rx_sgl_size;
379
380 uint32_t tx_offload_cap;
381
382 /* Tx fast path data */
383 int num_queues;
384
385 unsigned int tx_ring_size;
386 unsigned int rx_ring_size;
387
388 /* RSS*/
389 uint8_t rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE];
390 bool rss_support;
391 int initialized;
392
393 uint8_t mac_addr[ETHER_ADDR_LEN];
394 /* mdio and phy*/
395
396 uint32_t flags; /* atomic */
397
398 /* Queue will represent one TX and one RX ring */
399 struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
400 __aligned(CACHE_LINE_SIZE); /* stable */
401
402 /* TX */
403 struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
404 __aligned(CACHE_LINE_SIZE);
405
406 /* RX */
407 struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
408 __aligned(CACHE_LINE_SIZE);
409
410 /* Timer service */
411 struct callout timer_service;
412 sbintime_t keep_alive_timestamp;
413 uint32_t next_monitored_tx_qid;
414 struct work reset_task;
415 struct workqueue *reset_tq;
416 int wd_active;
417 sbintime_t keep_alive_timeout;
418 sbintime_t missing_tx_timeout;
419 uint32_t missing_tx_max_queues;
420 uint32_t missing_tx_threshold;
421
422 /* Statistics */
423 struct ena_stats_dev dev_stats;
424 struct ena_hw_stats hw_stats;
425
426 enum ena_regs_reset_reason_types reset_reason;
427 };
428
429 #define ENA_RING_MTX_LOCK(_ring) mutex_enter(&(_ring)->ring_mtx)
430 #define ENA_RING_MTX_TRYLOCK(_ring) mutex_tryenter(&(_ring)->ring_mtx)
431 #define ENA_RING_MTX_UNLOCK(_ring) mutex_exit(&(_ring)->ring_mtx)
432 #define ENA_RING_MTX_OWNED(_ring) mutex_owned(&(_ring)->ring_mtx)
433
434 #define ENA_CORE_MTX_LOCK(_adapter) mutex_enter(&(_adapter)->global_mtx)
435 #define ENA_CORE_MTX_TRYLOCK(_adapter) mutex_tryenter(&(_adapter)->global_mtx)
436 #define ENA_CORE_MTX_UNLOCK(_adapter) mutex_exit(&(_adapter)->global_mtx)
437 #define ENA_CORE_MTX_OWNED(_adapter) mutex_owned(&(_adapter)->global_mtx)
438
ena_mbuf_count(struct mbuf * mbuf)439 static inline int ena_mbuf_count(struct mbuf *mbuf)
440 {
441 int count = 1;
442
443 while ((mbuf = mbuf->m_next) != NULL)
444 ++count;
445
446 return count;
447 }
448
449 /* provide FreeBSD-compatible macros */
450 #define if_getcapenable(ifp) (ifp)->if_capenable
451 #define if_setcapenable(ifp, s) SET((ifp)->if_capenable, s)
452 #define if_getcapabilities(ifp) (ifp)->if_capabilities
453 #define if_setcapabilities(ifp, s) SET((ifp)->if_capabilities, s)
454 #define if_setcapabilitiesbit(ifp, s, c) do { \
455 CLR((ifp)->if_capabilities, c); \
456 SET((ifp)->if_capabilities, s); \
457 } while (0)
458 #define if_getsoftc(ifp) (ifp)->if_softc
459 #define if_setmtu(ifp, new_mtu) (ifp)->if_mtu = (new_mtu)
460 #define if_getdrvflags(ifp) (ifp)->if_flags
461 #define if_setdrvflagbits(ifp, s, c) do { \
462 CLR((ifp)->if_flags, c); \
463 SET((ifp)->if_flags, s); \
464 } while (0)
465 #define if_setflags(ifp, s) SET((ifp)->if_flags, s)
466 #define if_sethwassistbits(ifp, s, c) do { \
467 CLR((ifp)->if_csum_flags_rx, c); \
468 SET((ifp)->if_csum_flags_rx, s); \
469 } while (0)
470 #define if_clearhwassist(ifp) (ifp)->if_csum_flags_rx = 0
471 #define if_setbaudrate(ifp, r) (ifp)->if_baudrate = (r)
472 #define if_setdev(ifp, dev) do { } while (0)
473 #define if_setsoftc(ifp, softc) (ifp)->if_softc = (softc)
474 #define if_setinitfn(ifp, initfn) (ifp)->if_init = (initfn)
475 #define if_settransmitfn(ifp, txfn) (ifp)->if_transmit = (txfn)
476 #define if_setioctlfn(ifp, ioctlfn) (ifp)->if_ioctl = (ioctlfn)
477 #define if_setsendqlen(ifp, sqlen) \
478 IFQ_SET_MAXLEN(&(ifp)->if_snd, uimax(sqlen, IFQ_MAXLEN))
479 #define if_setsendqready(ifp) IFQ_SET_READY(&(ifp)->if_snd)
480 #define if_setifheaderlen(ifp, len) (ifp)->if_hdrlen = (len)
481
482 #define SBT_1S ((sbintime_t)1 << 32)
483 #define bintime_clear(a) ((a)->sec = (a)->frac = 0)
484 #define bintime_isset(a) ((a)->sec || (a)->frac)
485
486 static __inline sbintime_t
bttosbt(const struct bintime _bt)487 bttosbt(const struct bintime _bt)
488 {
489 return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32));
490 }
491
492 static __inline sbintime_t
getsbinuptime(void)493 getsbinuptime(void)
494 {
495 struct bintime _bt;
496
497 getbinuptime(&_bt);
498 return (bttosbt(_bt));
499 }
500
501 /* Intentionally non-atomic, it's just unnecessary overhead */
502 #define counter_u64_add(x, cnt) (x).ev_count += (cnt)
503 #define counter_u64_zero(x) (x).ev_count = 0
504 #define counter_u64_free(x) evcnt_detach(&(x))
505
506 #define counter_u64_add_protected(x, cnt) (x).ev_count += (cnt)
507 #define counter_enter() do {} while (0)
508 #define counter_exit() do {} while (0)
509
510 /* Misc other constants */
511 #define mp_ncpus ncpu
512 #define osreldate __NetBSD_Version__
513
514 /*
515 * XXX XXX XXX just to make compile, must provide replacement XXX XXX XXX
516 * Other than that, TODO:
517 * - decide whether to import <sys/buf_ring.h>
518 * - recheck the M_CSUM/IPCAP mapping
519 * - recheck workqueue use - FreeBSD taskqueues might have different semantics
520 */
521 #define buf_ring_alloc(a, b, c, d) (void *)&a
522 #define drbr_free(ifp, b) do { } while (0)
523 #define drbr_flush(ifp, b) IFQ_PURGE(&(ifp)->if_snd)
524 #define drbr_advance(ifp, b) \
525 ({ \
526 struct mbuf *__m; \
527 IFQ_DEQUEUE(&(ifp)->if_snd, __m); \
528 __m; \
529 })
530 #define drbr_putback(ifp, b, m) do { } while (0)
531 #define drbr_empty(ifp, b) IFQ_IS_EMPTY(&(ifp)->if_snd)
532 #define drbr_peek(ifp, b) \
533 ({ \
534 struct mbuf *__m; \
535 IFQ_POLL(&(ifp)->if_snd, __m); \
536 __m; \
537 })
538 #define drbr_enqueue(ifp, b, m) \
539 ({ \
540 int __err; \
541 IFQ_ENQUEUE(&(ifp)->if_snd, m, __err); \
542 __err; \
543 })
544 #define m_getjcl(a, b, c, d) NULL
545 #define MJUM16BYTES MCLBYTES
546 #define m_append(m, len, cp) ena_m_append(m, len, cp)
547 #define m_collapse(m, how, maxfrags) m_defrag(m, how) /* XXX */
548 /* XXX XXX XXX */
549
550 static inline int
ena_m_append(struct mbuf * m0,int len,const void * cpv)551 ena_m_append(struct mbuf *m0, int len, const void *cpv)
552 {
553 struct mbuf *m, *n;
554 int remainder, space;
555 const char *cp = cpv;
556
557 KASSERT(len != M_COPYALL);
558 for (m = m0; m->m_next != NULL; m = m->m_next)
559 continue;
560 remainder = len;
561 space = M_TRAILINGSPACE(m);
562 if (space > 0) {
563 /*
564 * Copy into available space.
565 */
566 if (space > remainder)
567 space = remainder;
568 memmove(mtod(m, char *) + m->m_len, cp, space);
569 m->m_len += space;
570 cp = cp + space, remainder -= space;
571 }
572 while (remainder > 0) {
573 /*
574 * Allocate a new mbuf; could check space
575 * and allocate a cluster instead.
576 */
577 n = m_get(M_DONTWAIT, m->m_type);
578 if (n == NULL)
579 break;
580 n->m_len = uimin(MLEN, remainder);
581 memmove(mtod(n, void *), cp, n->m_len);
582 cp += n->m_len, remainder -= n->m_len;
583 m->m_next = n;
584 m = n;
585 }
586 if (m0->m_flags & M_PKTHDR)
587 m0->m_pkthdr.len += len - remainder;
588 return (remainder == 0);
589 }
590 #endif /* !(ENA_H) */
591