xref: /netbsd-src/sys/dev/pci/if_enavar.h (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: if_enavar.h,v 1.7 2018/12/23 12:32:33 jmcneill Exp $	*/
2 
3 /*-
4  * BSD LICENSE
5  *
6  * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  * $FreeBSD: head/sys/dev/ena/ena.h 333450 2018-05-10 09:06:21Z mw $
33  *
34  */
35 
36 #ifndef ENA_H
37 #define ENA_H
38 
39 #include <sys/types.h>
40 
41 #include "external/bsd/ena-com/ena_com.h"
42 #include "external/bsd/ena-com/ena_eth_com.h"
43 
44 #define DRV_MODULE_VER_MAJOR	0
45 #define DRV_MODULE_VER_MINOR	8
46 #define DRV_MODULE_VER_SUBMINOR 1
47 
48 #define DRV_MODULE_NAME		"ena"
49 
50 #ifndef DRV_MODULE_VERSION
51 #define DRV_MODULE_VERSION				\
52 	___STRING(DRV_MODULE_VER_MAJOR) "."		\
53 	___STRING(DRV_MODULE_VER_MINOR) "."		\
54 	___STRING(DRV_MODULE_VER_SUBMINOR)
55 #endif
56 #define DEVICE_NAME	"Elastic Network Adapter (ENA)"
57 #define DEVICE_DESC	"ENA adapter"
58 
59 /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */
60 #define ENA_DMA_BIT_MASK(x)		((1ULL << (x)) - 1ULL)
61 
62 /* 1 for AENQ + ADMIN */
63 #define	ENA_ADMIN_MSIX_VEC		1
64 #define	ENA_MAX_MSIX_VEC(io_queues)	(ENA_ADMIN_MSIX_VEC + (io_queues))
65 
66 #define	ENA_REG_BAR			PCI_BAR(0)
67 #define	ENA_MEM_BAR			PCI_BAR(2)
68 
69 #define	ENA_BUS_DMA_SEGS		32
70 
71 #define	ENA_DEFAULT_RING_SIZE		1024
72 
73 #define	ENA_RX_REFILL_THRESH_DIVIDER	8
74 
75 #define	ENA_IRQNAME_SIZE		40
76 
77 #define	ENA_PKT_MAX_BUFS 		19
78 
79 #define	ENA_RX_RSS_TABLE_LOG_SIZE	7
80 #define	ENA_RX_RSS_TABLE_SIZE		(1 << ENA_RX_RSS_TABLE_LOG_SIZE)
81 
82 #define	ENA_HASH_KEY_SIZE		40
83 
84 #define	ENA_MAX_FRAME_LEN		10000
85 #define	ENA_MIN_FRAME_LEN 		60
86 
87 #define ENA_TX_CLEANUP_THRESHOLD	128
88 
89 #define DB_THRESHOLD	64
90 
91 #define TX_COMMIT	32
92  /*
93  * TX budget for cleaning. It should be half of the RX budget to reduce amount
94  *  of TCP retransmissions.
95  */
96 #define TX_BUDGET	128
97 /* RX cleanup budget. -1 stands for infinity. */
98 #define RX_BUDGET	256
99 /*
100  * How many times we can repeat cleanup in the io irq handling routine if the
101  * RX or TX budget was depleted.
102  */
103 #define CLEAN_BUDGET	8
104 
105 #define RX_IRQ_INTERVAL 20
106 #define TX_IRQ_INTERVAL 50
107 
108 #define	ENA_MIN_MTU		128
109 
110 #define	ENA_TSO_MAXSIZE		65536
111 
112 #define	ENA_MMIO_DISABLE_REG_READ	BIT(0)
113 
114 #define	ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
115 
116 #define	ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
117 
118 #define	ENA_IO_TXQ_IDX(q)		(2 * (q))
119 #define	ENA_IO_RXQ_IDX(q)		(2 * (q) + 1)
120 
121 #define	ENA_MGMNT_IRQ_IDX		0
122 #define	ENA_IO_IRQ_FIRST_IDX		1
123 #define	ENA_IO_IRQ_IDX(q)		(ENA_IO_IRQ_FIRST_IDX + (q))
124 
125 /*
126  * ENA device should send keep alive msg every 1 sec.
127  * We wait for 6 sec just to be on the safe side.
128  */
129 #define DEFAULT_KEEP_ALIVE_TO		(SBT_1S * 6)
130 
131 /* Time in jiffies before concluding the transmitter is hung. */
132 #define DEFAULT_TX_CMP_TO		(SBT_1S * 5)
133 
134 /* Number of queues to check for missing queues per timer tick */
135 #define DEFAULT_TX_MONITORED_QUEUES	(4)
136 
137 /* Max number of timeouted packets before device reset */
138 #define DEFAULT_TX_CMP_THRESHOLD	(128)
139 
140 /*
141  * Supported PCI vendor and devices IDs
142  */
143 #define	PCI_VENDOR_ID_AMAZON	0x1d0f
144 
145 #define	PCI_DEV_ID_ENA_PF	0x0ec2
146 #define	PCI_DEV_ID_ENA_LLQ_PF	0x1ec2
147 #define	PCI_DEV_ID_ENA_VF	0xec20
148 #define	PCI_DEV_ID_ENA_LLQ_VF	0xec21
149 
150 typedef __int64_t sbintime_t;
151 
152 struct msix_entry {
153 	int entry;
154 	int vector;
155 };
156 
157 typedef struct _ena_vendor_info_t {
158 	unsigned int vendor_id;
159 	unsigned int device_id;
160 	unsigned int index;
161 } ena_vendor_info_t;
162 
163 struct ena_que {
164 	struct ena_adapter *adapter;
165 	struct ena_ring *tx_ring;
166 	struct ena_ring *rx_ring;
167 	uint32_t id;
168 	int cpu;
169 };
170 
171 struct ena_tx_buffer {
172 	struct mbuf *mbuf;
173 	/* # of ena desc for this specific mbuf
174 	 * (includes data desc and metadata desc) */
175 	unsigned int tx_descs;
176 	/* # of buffers used by this mbuf */
177 	unsigned int num_of_bufs;
178 	bus_dmamap_t map;
179 
180 	/* Used to detect missing tx packets */
181 	struct bintime timestamp;
182 	bool print_once;
183 
184 	struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
185 } __aligned(CACHE_LINE_SIZE);
186 
187 struct ena_rx_buffer {
188 	struct mbuf *mbuf;
189 	bus_dmamap_t map;
190 	struct ena_com_buf ena_buf;
191 } __aligned(CACHE_LINE_SIZE);
192 
193 struct ena_stats_tx {
194 	char name[16];
195 	struct evcnt cnt;
196 	struct evcnt bytes;
197 	struct evcnt prepare_ctx_err;
198 	struct evcnt dma_mapping_err;
199 	struct evcnt doorbells;
200 	struct evcnt missing_tx_comp;
201 	struct evcnt bad_req_id;
202 	struct evcnt collapse;
203 	struct evcnt collapse_err;
204 };
205 
206 struct ena_stats_rx {
207 	char name[16];
208 	struct evcnt cnt;
209 	struct evcnt bytes;
210 	struct evcnt refil_partial;
211 	struct evcnt bad_csum;
212 	struct evcnt mjum_alloc_fail;
213 	struct evcnt mbuf_alloc_fail;
214 	struct evcnt dma_mapping_err;
215 	struct evcnt bad_desc_num;
216 	struct evcnt bad_req_id;
217 	struct evcnt empty_rx_ring;
218 };
219 
220 struct ena_ring {
221 	/* Holds the empty requests for TX/RX out of order completions */
222 	union {
223 		uint16_t *free_tx_ids;
224 		uint16_t *free_rx_ids;
225 	};
226 	struct ena_com_dev *ena_dev;
227 	struct ena_adapter *adapter;
228 	struct ena_com_io_cq *ena_com_io_cq;
229 	struct ena_com_io_sq *ena_com_io_sq;
230 
231 	uint16_t qid;
232 
233 	/* Determines if device will use LLQ or normal mode for TX */
234 	enum ena_admin_placement_policy_type tx_mem_queue_type;
235 	/* The maximum length the driver can push to the device (For LLQ) */
236 	uint8_t tx_max_header_size;
237 
238 	struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
239 
240 	/*
241 	 * Fields used for Adaptive Interrupt Modulation - to be implemented in
242 	 * the future releases
243 	 */
244 	uint32_t  smoothed_interval;
245 	enum ena_intr_moder_level moder_tbl_idx;
246 
247 	struct ena_que *que;
248 #ifdef LRO
249 	struct lro_ctrl lro;
250 #endif
251 
252 	uint16_t next_to_use;
253 	uint16_t next_to_clean;
254 
255 	union {
256 		struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */
257 		struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */
258 	};
259 	int ring_size; /* number of tx/rx_buffer_info's entries */
260 
261 	struct buf_ring *br; /* only for TX */
262 
263 	kmutex_t ring_mtx;
264 	char mtx_name[16];
265 
266 	union {
267 		struct {
268 			struct work enqueue_task;
269 			struct workqueue *enqueue_tq;
270 		};
271 		struct {
272 			struct work cmpl_task;
273 			struct workqueue *cmpl_tq;
274 		};
275 	};
276 	u_int task_pending;
277 
278 	union {
279 		struct ena_stats_tx tx_stats;
280 		struct ena_stats_rx rx_stats;
281 	};
282 
283 	int empty_rx_queue;
284 } __aligned(CACHE_LINE_SIZE);
285 
286 struct ena_stats_dev {
287 	char name[16];
288 	struct evcnt wd_expired;
289 	struct evcnt interface_up;
290 	struct evcnt interface_down;
291 	struct evcnt admin_q_pause;
292 };
293 
294 struct ena_hw_stats {
295 	char name[16];
296 	struct evcnt rx_packets;
297 	struct evcnt tx_packets;
298 
299 	struct evcnt rx_bytes;
300 	struct evcnt tx_bytes;
301 
302 	struct evcnt rx_drops;
303 };
304 
305 /* Board specific private data structure */
306 struct ena_adapter {
307 	struct ena_com_dev *ena_dev;
308 
309 	/* OS defined structs */
310 	device_t pdev;
311         struct ethercom sc_ec;
312 	struct ifnet *ifp;		/* set to point to sc_ec */
313 	struct ifmedia	media;
314 
315 	/* OS resources */
316 	kmutex_t global_mtx;
317 	krwlock_t ioctl_sx;
318 
319 	void *sc_ihs[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
320 	pci_intr_handle_t *sc_intrs;
321 	int sc_nintrs;
322 	struct pci_attach_args sc_pa;
323 
324 	/* Registers */
325 	bus_space_handle_t sc_bhandle;
326 	bus_space_tag_t	sc_btag;
327 
328 	/* DMA tag used throughout the driver adapter for Tx and Rx */
329 	bus_dma_tag_t sc_dmat;
330 	int dma_width;
331 
332 	uint32_t max_mtu;
333 
334 	uint16_t max_tx_sgl_size;
335 	uint16_t max_rx_sgl_size;
336 
337 	uint32_t tx_offload_cap;
338 
339 	/* Tx fast path data */
340 	int num_queues;
341 
342 	unsigned int tx_ring_size;
343 	unsigned int rx_ring_size;
344 
345 	/* RSS*/
346 	uint8_t	rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE];
347 	bool rss_support;
348 
349 	uint8_t mac_addr[ETHER_ADDR_LEN];
350 	/* mdio and phy*/
351 
352 	bool link_status;
353 	bool trigger_reset;
354 	bool up;
355 	bool running;
356 
357 	/* Queue will represent one TX and one RX ring */
358 	struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
359 	    __aligned(CACHE_LINE_SIZE);
360 
361 	/* TX */
362 	struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
363 	    __aligned(CACHE_LINE_SIZE);
364 
365 	/* RX */
366 	struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
367 	    __aligned(CACHE_LINE_SIZE);
368 
369 	/* Timer service */
370 	struct callout timer_service;
371 	sbintime_t keep_alive_timestamp;
372 	uint32_t next_monitored_tx_qid;
373 	struct work reset_task;
374 	struct workqueue *reset_tq;
375 	int wd_active;
376 	sbintime_t keep_alive_timeout;
377 	sbintime_t missing_tx_timeout;
378 	uint32_t missing_tx_max_queues;
379 	uint32_t missing_tx_threshold;
380 
381 	/* Statistics */
382 	struct ena_stats_dev dev_stats;
383 	struct ena_hw_stats hw_stats;
384 
385 	enum ena_regs_reset_reason_types reset_reason;
386 };
387 
388 #define	ENA_RING_MTX_LOCK(_ring)	mutex_enter(&(_ring)->ring_mtx)
389 #define	ENA_RING_MTX_TRYLOCK(_ring)	mutex_tryenter(&(_ring)->ring_mtx)
390 #define	ENA_RING_MTX_UNLOCK(_ring)	mutex_exit(&(_ring)->ring_mtx)
391 
392 static inline int ena_mbuf_count(struct mbuf *mbuf)
393 {
394 	int count = 1;
395 
396 	while ((mbuf = mbuf->m_next) != NULL)
397 		++count;
398 
399 	return count;
400 }
401 
402 /* provide FreeBSD-compatible macros */
403 #define	if_getcapenable(ifp)		(ifp)->if_capenable
404 #define	if_setcapenable(ifp, s)		SET((ifp)->if_capenable, s)
405 #define if_getcapabilities(ifp)		(ifp)->if_capabilities
406 #define if_setcapabilities(ifp, s)	SET((ifp)->if_capabilities, s)
407 #define if_setcapabilitiesbit(ifp, s, c) do {	\
408 		CLR((ifp)->if_capabilities, c);	\
409 		SET((ifp)->if_capabilities, s);	\
410 	} while (0)
411 #define	if_getsoftc(ifp)		(ifp)->if_softc
412 #define if_setmtu(ifp, new_mtu)		(ifp)->if_mtu = (new_mtu)
413 #define if_getdrvflags(ifp)		(ifp)->if_flags
414 #define if_setdrvflagbits(ifp, s, c)	do {	\
415 		CLR((ifp)->if_flags, c);	\
416 		SET((ifp)->if_flags, s);	\
417 	} while (0)
418 #define	if_setflags(ifp, s)		SET((ifp)->if_flags, s)
419 #define if_sethwassistbits(ifp, s, c)	do {		\
420 		CLR((ifp)->if_csum_flags_rx, c);	\
421 		SET((ifp)->if_csum_flags_rx, s);	\
422 	} while (0)
423 #define if_clearhwassist(ifp)		(ifp)->if_csum_flags_rx = 0
424 #define if_setbaudrate(ifp, r)		(ifp)->if_baudrate = (r)
425 #define if_setdev(ifp, dev)		do { } while (0)
426 #define if_setsoftc(ifp, softc)		(ifp)->if_softc = (softc)
427 #define if_setinitfn(ifp, initfn)	(ifp)->if_init = (initfn)
428 #define if_settransmitfn(ifp, txfn)	(ifp)->if_transmit = (txfn)
429 #define if_setioctlfn(ifp, ioctlfn)	(ifp)->if_ioctl = (ioctlfn)
430 #define if_setsendqlen(ifp, sqlen)	\
431 	IFQ_SET_MAXLEN(&(ifp)->if_snd, uimax(sqlen, IFQ_MAXLEN))
432 #define if_setsendqready(ifp)		IFQ_SET_READY(&(ifp)->if_snd)
433 #define if_setifheaderlen(ifp, len)	(ifp)->if_hdrlen = (len)
434 
435 #define	SBT_1S	((sbintime_t)1 << 32)
436 #define bintime_clear(a)	((a)->sec = (a)->frac = 0)
437 #define	bintime_isset(a)	((a)->sec || (a)->frac)
438 
439 static __inline sbintime_t
440 bttosbt(const struct bintime _bt)
441 {
442 	return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32));
443 }
444 
445 static __inline sbintime_t
446 getsbinuptime(void)
447 {
448 	struct bintime _bt;
449 
450 	getbinuptime(&_bt);
451 	return (bttosbt(_bt));
452 }
453 
454 /* Intentionally non-atomic, it's just unnecessary overhead */
455 #define counter_u64_add(x, cnt)			(x).ev_count += (cnt)
456 #define counter_u64_zero(x)			(x).ev_count = 0
457 #define counter_u64_free(x)			evcnt_detach(&(x))
458 
459 #define counter_u64_add_protected(x, cnt)	(x).ev_count += (cnt)
460 #define counter_enter()				do {} while (0)
461 #define counter_exit()				do {} while (0)
462 
463 /* Misc other constants */
464 #define	mp_ncpus			ncpu
465 #define osreldate			__NetBSD_Version__
466 
467 /*
468  * XXX XXX XXX just to make compile, must provide replacement XXX XXX XXX
469  * Other than that, TODO:
470  * - decide whether to import <sys/buf_ring.h>
471  * - recheck the M_CSUM/IPCAP mapping
472  * - recheck workqueue use - FreeBSD taskqueues might have different semantics
473  */
474 #define buf_ring_alloc(a, b, c, d)	(void *)&a
475 #define drbr_free(ifp, b)		do { } while (0)
476 #define drbr_flush(ifp, b)		IFQ_PURGE(&(ifp)->if_snd)
477 #define drbr_advance(ifp, b)					\
478 	({							\
479 		struct mbuf *__m;				\
480 		IFQ_DEQUEUE(&(ifp)->if_snd, __m);		\
481 		__m;						\
482 	})
483 #define drbr_putback(ifp, b, m)		do { } while (0)
484 #define drbr_empty(ifp, b)		IFQ_IS_EMPTY(&(ifp)->if_snd)
485 #define drbr_peek(ifp, b)					\
486 	({							\
487 		struct mbuf *__m;				\
488 		IFQ_POLL(&(ifp)->if_snd, __m);			\
489 		__m;						\
490 	})
491 #define drbr_enqueue(ifp, b, m)					\
492 	({							\
493 		int __err;					\
494 		IFQ_ENQUEUE(&(ifp)->if_snd, m, __err);		\
495 		__err;						\
496 	})
497 #define m_getjcl(a, b, c, d)		NULL
498 #define MJUM16BYTES			MCLBYTES
499 #define m_append(m, len, cp)		ena_m_append(m, len, cp)
500 #define m_collapse(m, how, maxfrags)	m_defrag(m, how)	/* XXX */
501 /* XXX XXX XXX */
502 
503 static inline int
504 ena_m_append(struct mbuf *m0, int len, const void *cpv)
505 {
506 	struct mbuf *m, *n;
507 	int remainder, space;
508 	const char *cp = cpv;
509 
510 	KASSERT(len != M_COPYALL);
511 	for (m = m0; m->m_next != NULL; m = m->m_next)
512 		continue;
513 	remainder = len;
514 	space = M_TRAILINGSPACE(m);
515 	if (space > 0) {
516 		/*
517 		 * Copy into available space.
518 		 */
519 		if (space > remainder)
520 			space = remainder;
521 		memmove(mtod(m, char *) + m->m_len, cp, space);
522 		m->m_len += space;
523 		cp = cp + space, remainder -= space;
524 	}
525 	while (remainder > 0) {
526 		/*
527 		 * Allocate a new mbuf; could check space
528 		 * and allocate a cluster instead.
529 		 */
530 		n = m_get(M_DONTWAIT, m->m_type);
531 		if (n == NULL)
532 			break;
533 		n->m_len = uimin(MLEN, remainder);
534 		memmove(mtod(n, void *), cp, n->m_len);
535 		cp += n->m_len, remainder -= n->m_len;
536 		m->m_next = n;
537 		m = n;
538 	}
539 	if (m0->m_flags & M_PKTHDR)
540 		m0->m_pkthdr.len += len - remainder;
541 	return (remainder == 0);
542 }
543 #endif /* !(ENA_H) */
544