xref: /dpdk/drivers/net/mana/mana.h (revision 02d36ef6a9528e0f4a3403956e66bcea5fadbf8c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2022 Microsoft Corporation
3  */
4 
5 #ifndef __MANA_H__
6 #define __MANA_H__
7 
8 #define	PCI_VENDOR_ID_MICROSOFT		0x1414
9 #define PCI_DEVICE_ID_MICROSOFT_MANA	0x00ba
10 
11 /* Shared data between primary/secondary processes */
12 struct mana_shared_data {
13 	rte_spinlock_t lock;
14 	int init_done;
15 	unsigned int primary_cnt;
16 	unsigned int secondary_cnt;
17 };
18 
19 #define MIN_RX_BUF_SIZE	1024
20 #define MAX_FRAME_SIZE	RTE_ETHER_MAX_LEN
21 #define MANA_MAX_MAC_ADDR 1
22 
23 #define MANA_DEV_RX_OFFLOAD_SUPPORT ( \
24 		RTE_ETH_RX_OFFLOAD_CHECKSUM | \
25 		RTE_ETH_RX_OFFLOAD_RSS_HASH)
26 
27 #define MANA_DEV_TX_OFFLOAD_SUPPORT ( \
28 		RTE_ETH_TX_OFFLOAD_MULTI_SEGS | \
29 		RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \
30 		RTE_ETH_TX_OFFLOAD_TCP_CKSUM | \
31 		RTE_ETH_TX_OFFLOAD_UDP_CKSUM)
32 
33 #define INDIRECTION_TABLE_NUM_ELEMENTS 64
34 #define TOEPLITZ_HASH_KEY_SIZE_IN_BYTES 40
35 #define MANA_ETH_RSS_SUPPORT ( \
36 	RTE_ETH_RSS_IPV4 |	     \
37 	RTE_ETH_RSS_NONFRAG_IPV4_TCP | \
38 	RTE_ETH_RSS_NONFRAG_IPV4_UDP | \
39 	RTE_ETH_RSS_IPV6 |	     \
40 	RTE_ETH_RSS_NONFRAG_IPV6_TCP | \
41 	RTE_ETH_RSS_NONFRAG_IPV6_UDP)
42 
43 #define MIN_BUFFERS_PER_QUEUE		64
44 #define MAX_RECEIVE_BUFFERS_PER_QUEUE	256
45 #define MAX_SEND_BUFFERS_PER_QUEUE	256
46 
47 #define GDMA_WQE_ALIGNMENT_UNIT_SIZE 32
48 
49 #define COMP_ENTRY_SIZE 64
50 #define MAX_TX_WQE_SIZE 512
51 #define MAX_RX_WQE_SIZE 256
52 
53 /* Values from the GDMA specification document, WQE format description */
54 #define INLINE_OOB_SMALL_SIZE_IN_BYTES 8
55 #define INLINE_OOB_LARGE_SIZE_IN_BYTES 24
56 
57 #define NOT_USING_CLIENT_DATA_UNIT 0
58 
59 enum tx_packet_format_v2 {
60 	SHORT_PACKET_FORMAT = 0,
61 	LONG_PACKET_FORMAT = 1
62 };
63 
64 struct transmit_short_oob_v2 {
65 	enum tx_packet_format_v2 packet_format : 2;
66 	uint32_t tx_is_outer_ipv4 : 1;
67 	uint32_t tx_is_outer_ipv6 : 1;
68 	uint32_t tx_compute_IP_header_checksum : 1;
69 	uint32_t tx_compute_TCP_checksum : 1;
70 	uint32_t tx_compute_UDP_checksum : 1;
71 	uint32_t suppress_tx_CQE_generation : 1;
72 	uint32_t VCQ_number : 24;
73 	uint32_t tx_transport_header_offset : 10;
74 	uint32_t VSQ_frame_num : 14;
75 	uint32_t short_vport_offset : 8;
76 };
77 
78 struct transmit_long_oob_v2 {
79 	uint32_t tx_is_encapsulated_packet : 1;
80 	uint32_t tx_inner_is_ipv6 : 1;
81 	uint32_t tx_inner_TCP_options_present : 1;
82 	uint32_t inject_vlan_prior_tag : 1;
83 	uint32_t reserved1 : 12;
84 	uint32_t priority_code_point : 3;
85 	uint32_t drop_eligible_indicator : 1;
86 	uint32_t vlan_identifier : 12;
87 	uint32_t tx_inner_frame_offset : 10;
88 	uint32_t tx_inner_IP_header_relative_offset : 6;
89 	uint32_t long_vport_offset : 12;
90 	uint32_t reserved3 : 4;
91 	uint32_t reserved4 : 32;
92 	uint32_t reserved5 : 32;
93 };
94 
95 struct transmit_oob_v2 {
96 	struct transmit_short_oob_v2 short_oob;
97 	struct transmit_long_oob_v2 long_oob;
98 };
99 
100 enum gdma_queue_types {
101 	GDMA_QUEUE_TYPE_INVALID  = 0,
102 	GDMA_QUEUE_SEND,
103 	GDMA_QUEUE_RECEIVE,
104 	GDMA_QUEUE_COMPLETION,
105 	GDMA_QUEUE_EVENT,
106 	GDMA_QUEUE_TYPE_MAX = 16,
107 	/*Room for expansion */
108 
109 	/* This enum can be expanded to add more queue types but
110 	 * it's expected to be done in a contiguous manner.
111 	 * Failing that will result in unexpected behavior.
112 	 */
113 };
114 
115 #define WORK_QUEUE_NUMBER_BASE_BITS 10
116 
117 struct gdma_header {
118 	/* size of the entire gdma structure, including the entire length of
119 	 * the struct that is formed by extending other gdma struct. i.e.
120 	 * GDMA_BASE_SPEC extends gdma_header, GDMA_EVENT_QUEUE_SPEC extends
121 	 * GDMA_BASE_SPEC, StructSize for GDMA_EVENT_QUEUE_SPEC will be size of
122 	 * GDMA_EVENT_QUEUE_SPEC which includes size of GDMA_BASE_SPEC and size
123 	 * of gdma_header.
124 	 * Above example is for illustration purpose and is not in code
125 	 */
126 	size_t struct_size;
127 };
128 
129 /* The following macros are from GDMA SPEC 3.6, "Table 2: CQE data structure"
130  * and "Table 4: Event Queue Entry (EQE) data format"
131  */
132 #define GDMA_COMP_DATA_SIZE 0x3C /* Must be a multiple of 4 */
133 #define GDMA_COMP_DATA_SIZE_IN_UINT32 (GDMA_COMP_DATA_SIZE / 4)
134 
135 #define COMPLETION_QUEUE_ENTRY_WORK_QUEUE_INDEX 0
136 #define COMPLETION_QUEUE_ENTRY_WORK_QUEUE_SIZE 24
137 #define COMPLETION_QUEUE_ENTRY_SEND_WORK_QUEUE_INDEX 24
138 #define COMPLETION_QUEUE_ENTRY_SEND_WORK_QUEUE_SIZE 1
139 #define COMPLETION_QUEUE_ENTRY_OWNER_BITS_INDEX 29
140 #define COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE 3
141 
142 #define COMPLETION_QUEUE_OWNER_MASK \
143 	((1 << (COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE)) - 1)
144 
145 struct gdma_comp {
146 	struct gdma_header gdma_header;
147 
148 	/* Filled by GDMA core */
149 	uint32_t completion_data[GDMA_COMP_DATA_SIZE_IN_UINT32];
150 
151 	/* Filled by GDMA core */
152 	uint32_t work_queue_number;
153 
154 	/* Filled by GDMA core */
155 	bool send_work_queue;
156 };
157 
158 struct gdma_hardware_completion_entry {
159 	char dma_client_data[GDMA_COMP_DATA_SIZE];
160 	union {
161 		uint32_t work_queue_owner_bits;
162 		struct {
163 			uint32_t wq_num		: 24;
164 			uint32_t is_sq		: 1;
165 			uint32_t reserved	: 4;
166 			uint32_t owner_bits	: 3;
167 		};
168 	};
169 }; /* HW DATA */
170 
171 struct gdma_posted_wqe_info {
172 	struct gdma_header gdma_header;
173 
174 	/* size of the written wqe in basic units (32B), filled by GDMA core.
175 	 * Use this value to progress the work queue after the wqe is processed
176 	 * by hardware.
177 	 */
178 	uint32_t wqe_size_in_bu;
179 
180 	/* At the time of writing the wqe to the work queue, the offset in the
181 	 * work queue buffer where by the wqe will be written. Each unit
182 	 * represents 32B of buffer space.
183 	 */
184 	uint32_t wqe_index;
185 
186 	/* Unmasked offset in the queue to which the WQE was written.
187 	 * In 32 byte units.
188 	 */
189 	uint32_t unmasked_queue_offset;
190 };
191 
192 struct gdma_sgl_element {
193 	uint64_t address;
194 	uint32_t memory_key;
195 	uint32_t size;
196 };
197 
198 #define MAX_SGL_ENTRIES_FOR_TRANSMIT 30
199 
200 struct one_sgl {
201 	struct gdma_sgl_element gdma_sgl[MAX_SGL_ENTRIES_FOR_TRANSMIT];
202 };
203 
204 struct gdma_work_request {
205 	struct gdma_header gdma_header;
206 	struct gdma_sgl_element *sgl;
207 	uint32_t num_sgl_elements;
208 	uint32_t inline_oob_size_in_bytes;
209 	void *inline_oob_data;
210 	uint32_t flags; /* From _gdma_work_request_FLAGS */
211 	uint32_t client_data_unit; /* For LSO, this is the MTU of the data */
212 };
213 
214 enum mana_cqe_type {
215 	CQE_INVALID                     = 0,
216 
217 	CQE_RX_OKAY                     = 1,
218 	CQE_RX_COALESCED_4              = 2,
219 	CQE_RX_OBJECT_FENCE             = 3,
220 	CQE_RX_TRUNCATED                = 4,
221 
222 	CQE_TX_OKAY                     = 32,
223 	CQE_TX_SA_DROP                  = 33,
224 	CQE_TX_MTU_DROP                 = 34,
225 	CQE_TX_INVALID_OOB              = 35,
226 	CQE_TX_INVALID_ETH_TYPE         = 36,
227 	CQE_TX_HDR_PROCESSING_ERROR     = 37,
228 	CQE_TX_VF_DISABLED              = 38,
229 	CQE_TX_VPORT_IDX_OUT_OF_RANGE   = 39,
230 	CQE_TX_VPORT_DISABLED           = 40,
231 	CQE_TX_VLAN_TAGGING_VIOLATION   = 41,
232 };
233 
234 struct mana_cqe_header {
235 	uint32_t cqe_type    : 6;
236 	uint32_t client_type : 2;
237 	uint32_t vendor_err  : 24;
238 }; /* HW DATA */
239 
240 struct mana_tx_comp_oob {
241 	struct mana_cqe_header cqe_hdr;
242 
243 	uint32_t tx_data_offset;
244 
245 	uint32_t tx_sgl_offset       : 5;
246 	uint32_t tx_wqe_offset       : 27;
247 
248 	uint32_t reserved[12];
249 }; /* HW DATA */
250 
251 /* NDIS HASH Types */
252 #define NDIS_HASH_IPV4          RTE_BIT32(0)
253 #define NDIS_HASH_TCP_IPV4      RTE_BIT32(1)
254 #define NDIS_HASH_UDP_IPV4      RTE_BIT32(2)
255 #define NDIS_HASH_IPV6          RTE_BIT32(3)
256 #define NDIS_HASH_TCP_IPV6      RTE_BIT32(4)
257 #define NDIS_HASH_UDP_IPV6      RTE_BIT32(5)
258 #define NDIS_HASH_IPV6_EX       RTE_BIT32(6)
259 #define NDIS_HASH_TCP_IPV6_EX   RTE_BIT32(7)
260 #define NDIS_HASH_UDP_IPV6_EX   RTE_BIT32(8)
261 
262 #define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX)
263 #define MANA_HASH_L4                                                         \
264 	(NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 |      \
265 	 NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX)
266 
267 struct mana_rx_comp_per_packet_info {
268 	uint32_t packet_length	: 16;
269 	uint32_t reserved0	: 16;
270 	uint32_t reserved1;
271 	uint32_t packet_hash;
272 }; /* HW DATA */
273 #define RX_COM_OOB_NUM_PACKETINFO_SEGMENTS 4
274 
275 struct mana_rx_comp_oob {
276 	struct mana_cqe_header cqe_hdr;
277 
278 	uint32_t rx_vlan_id				: 12;
279 	uint32_t rx_vlan_tag_present			: 1;
280 	uint32_t rx_outer_ip_header_checksum_succeeded	: 1;
281 	uint32_t rx_outer_ip_header_checksum_failed	: 1;
282 	uint32_t reserved				: 1;
283 	uint32_t rx_hash_type				: 9;
284 	uint32_t rx_ip_header_checksum_succeeded	: 1;
285 	uint32_t rx_ip_header_checksum_failed		: 1;
286 	uint32_t rx_tcp_checksum_succeeded		: 1;
287 	uint32_t rx_tcp_checksum_failed			: 1;
288 	uint32_t rx_udp_checksum_succeeded		: 1;
289 	uint32_t rx_udp_checksum_failed			: 1;
290 	uint32_t reserved1				: 1;
291 	struct mana_rx_comp_per_packet_info
292 		packet_info[RX_COM_OOB_NUM_PACKETINFO_SEGMENTS];
293 	uint32_t received_wqe_offset;
294 }; /* HW DATA */
295 
296 struct gdma_wqe_dma_oob {
297 	uint32_t reserved:24;
298 	uint32_t last_v_bytes:8;
299 	union {
300 		uint32_t flags;
301 		struct {
302 			uint32_t num_sgl_entries:8;
303 			uint32_t inline_client_oob_size_in_dwords:3;
304 			uint32_t client_oob_in_sgl:1;
305 			uint32_t consume_credit:1;
306 			uint32_t fence:1;
307 			uint32_t reserved1:2;
308 			uint32_t client_data_unit:14;
309 			uint32_t check_sn:1;
310 			uint32_t sgl_direct:1;
311 		};
312 	};
313 };
314 
315 struct mana_mr_cache {
316 	uint32_t	lkey;
317 	uintptr_t	addr;
318 	size_t		len;
319 	void		*verb_obj;
320 };
321 
322 #define MANA_MR_BTREE_CACHE_N	512
323 struct mana_mr_btree {
324 	uint16_t	len;	/* Used entries */
325 	uint16_t	size;	/* Total entries */
326 	int		overflow;
327 	int		socket;
328 	struct mana_mr_cache *table;
329 };
330 
331 struct mana_process_priv {
332 	void *db_page;
333 };
334 
335 struct mana_priv {
336 	struct rte_eth_dev_data *dev_data;
337 	struct mana_process_priv *process_priv;
338 	int num_queues;
339 
340 	/* DPDK port */
341 	uint16_t port_id;
342 
343 	/* IB device port */
344 	uint8_t dev_port;
345 
346 	struct ibv_context *ib_ctx;
347 	struct ibv_pd *ib_pd;
348 	struct ibv_pd *ib_parent_pd;
349 	struct ibv_rwq_ind_table *ind_table;
350 	struct ibv_qp *rwq_qp;
351 	void *db_page;
352 	struct rte_eth_rss_conf rss_conf;
353 	struct rte_intr_handle *intr_handle;
354 	int max_rx_queues;
355 	int max_tx_queues;
356 	int max_rx_desc;
357 	int max_tx_desc;
358 	int max_send_sge;
359 	int max_recv_sge;
360 	int max_mr;
361 	uint64_t max_mr_size;
362 	struct mana_mr_btree mr_btree;
363 	rte_spinlock_t	mr_btree_lock;
364 };
365 
366 struct mana_txq_desc {
367 	struct rte_mbuf *pkt;
368 	uint32_t wqe_size_in_bu;
369 };
370 
371 struct mana_rxq_desc {
372 	struct rte_mbuf *pkt;
373 	uint32_t wqe_size_in_bu;
374 };
375 
376 struct mana_stats {
377 	uint64_t packets;
378 	uint64_t bytes;
379 	uint64_t errors;
380 	uint64_t nombuf;
381 };
382 
383 struct mana_gdma_queue {
384 	void *buffer;
385 	uint32_t count;	/* in entries */
386 	uint32_t size;	/* in bytes */
387 	uint32_t id;
388 	uint32_t head;
389 	uint32_t tail;
390 };
391 
392 #define MANA_MR_BTREE_PER_QUEUE_N	64
393 
394 struct mana_txq {
395 	struct mana_priv *priv;
396 	uint32_t num_desc;
397 	struct ibv_cq *cq;
398 	struct ibv_qp *qp;
399 
400 	struct mana_gdma_queue gdma_sq;
401 	struct mana_gdma_queue gdma_cq;
402 
403 	uint32_t tx_vp_offset;
404 
405 	/* For storing pending requests */
406 	struct mana_txq_desc *desc_ring;
407 
408 	/* desc_ring_head is where we put pending requests to ring,
409 	 * completion pull off desc_ring_tail
410 	 */
411 	uint32_t desc_ring_head, desc_ring_tail;
412 
413 	struct mana_mr_btree mr_btree;
414 	struct mana_stats stats;
415 	unsigned int socket;
416 };
417 
418 struct mana_rxq {
419 	struct mana_priv *priv;
420 	uint32_t num_desc;
421 	struct rte_mempool *mp;
422 	struct ibv_cq *cq;
423 	struct ibv_comp_channel *channel;
424 	struct ibv_wq *wq;
425 
426 	/* For storing pending requests */
427 	struct mana_rxq_desc *desc_ring;
428 
429 	/* desc_ring_head is where we put pending requests to ring,
430 	 * completion pull off desc_ring_tail
431 	 */
432 	uint32_t desc_ring_head, desc_ring_tail;
433 
434 	struct mana_gdma_queue gdma_rq;
435 	struct mana_gdma_queue gdma_cq;
436 
437 	struct mana_stats stats;
438 	struct mana_mr_btree mr_btree;
439 
440 	unsigned int socket;
441 };
442 
443 extern int mana_logtype_driver;
444 extern int mana_logtype_init;
445 
446 #define DRV_LOG(level, fmt, args...) \
447 	rte_log(RTE_LOG_ ## level, mana_logtype_driver, "%s(): " fmt "\n", \
448 		__func__, ## args)
449 
450 #define PMD_INIT_LOG(level, fmt, args...) \
451 	rte_log(RTE_LOG_ ## level, mana_logtype_init, "%s(): " fmt "\n",\
452 		__func__, ## args)
453 
454 #define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
455 
456 int mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type,
457 		       uint32_t queue_id, uint32_t tail, uint8_t arm);
458 int mana_rq_ring_doorbell(struct mana_rxq *rxq, uint8_t arm);
459 
460 int gdma_post_work_request(struct mana_gdma_queue *queue,
461 			   struct gdma_work_request *work_req,
462 			   struct gdma_posted_wqe_info *wqe_info);
463 uint8_t *gdma_get_wqe_pointer(struct mana_gdma_queue *queue);
464 
465 uint16_t mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **rx_pkts,
466 		       uint16_t pkts_n);
467 uint16_t mana_tx_burst(void *dpdk_txq, struct rte_mbuf **tx_pkts,
468 		       uint16_t pkts_n);
469 
470 uint16_t mana_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
471 			       uint16_t pkts_n);
472 
473 uint16_t mana_tx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
474 			       uint16_t pkts_n);
475 
476 int gdma_poll_completion_queue(struct mana_gdma_queue *cq,
477 			       struct gdma_comp *comp);
478 
479 int mana_start_rx_queues(struct rte_eth_dev *dev);
480 int mana_start_tx_queues(struct rte_eth_dev *dev);
481 
482 int mana_stop_rx_queues(struct rte_eth_dev *dev);
483 int mana_stop_tx_queues(struct rte_eth_dev *dev);
484 
485 struct mana_mr_cache *mana_find_pmd_mr(struct mana_mr_btree *local_tree,
486 				       struct mana_priv *priv,
487 				       struct rte_mbuf *mbuf);
488 int mana_new_pmd_mr(struct mana_mr_btree *local_tree, struct mana_priv *priv,
489 		    struct rte_mempool *pool);
490 void mana_remove_all_mr(struct mana_priv *priv);
491 void mana_del_pmd_mr(struct mana_mr_cache *mr);
492 
493 void mana_mempool_chunk_cb(struct rte_mempool *mp, void *opaque,
494 			   struct rte_mempool_memhdr *memhdr, unsigned int idx);
495 
496 struct mana_mr_cache *mana_mr_btree_lookup(struct mana_mr_btree *bt,
497 					   uint16_t *idx,
498 					   uintptr_t addr, size_t len);
499 int mana_mr_btree_insert(struct mana_mr_btree *bt, struct mana_mr_cache *entry);
500 int mana_mr_btree_init(struct mana_mr_btree *bt, int n, int socket);
501 void mana_mr_btree_free(struct mana_mr_btree *bt);
502 
503 /** Request timeout for IPC. */
504 #define MANA_MP_REQ_TIMEOUT_SEC 5
505 
506 /* Request types for IPC. */
507 enum mana_mp_req_type {
508 	MANA_MP_REQ_VERBS_CMD_FD = 1,
509 	MANA_MP_REQ_CREATE_MR,
510 	MANA_MP_REQ_START_RXTX,
511 	MANA_MP_REQ_STOP_RXTX,
512 };
513 
514 /* Pameters for IPC. */
515 struct mana_mp_param {
516 	enum mana_mp_req_type type;
517 	int port_id;
518 	int result;
519 
520 	/* MANA_MP_REQ_CREATE_MR */
521 	uintptr_t addr;
522 	uint32_t len;
523 };
524 
525 #define MANA_MP_NAME	"net_mana_mp"
526 int mana_mp_init_primary(void);
527 int mana_mp_init_secondary(void);
528 void mana_mp_uninit_primary(void);
529 void mana_mp_uninit_secondary(void);
530 int mana_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
531 int mana_mp_req_mr_create(struct mana_priv *priv, uintptr_t addr, uint32_t len);
532 
533 void mana_mp_req_on_rxtx(struct rte_eth_dev *dev, enum mana_mp_req_type type);
534 
535 void *mana_alloc_verbs_buf(size_t size, void *data);
536 void mana_free_verbs_buf(void *ptr, void *data __rte_unused);
537 
538 int mana_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
539 int mana_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
540 int mana_fd_set_non_blocking(int fd);
541 
542 #endif
543