xref: /dpdk/drivers/net/memif/rte_eth_memif.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4 
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16 
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <ethdev_driver.h>
21 #include <ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30 
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33 
34 #define ETH_MEMIF_ID_ARG		"id"
35 #define ETH_MEMIF_ROLE_ARG		"role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG	"bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG		"rsize"
38 #define ETH_MEMIF_SOCKET_ARG		"socket"
39 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG	"socket-abstract"
40 #define ETH_MEMIF_MAC_ARG		"mac"
41 #define ETH_MEMIF_ZC_ARG		"zero-copy"
42 #define ETH_MEMIF_SECRET_ARG		"secret"
43 
44 static const char * const valid_arguments[] = {
45 	ETH_MEMIF_ID_ARG,
46 	ETH_MEMIF_ROLE_ARG,
47 	ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
48 	ETH_MEMIF_RING_SIZE_ARG,
49 	ETH_MEMIF_SOCKET_ARG,
50 	ETH_MEMIF_SOCKET_ABSTRACT_ARG,
51 	ETH_MEMIF_MAC_ARG,
52 	ETH_MEMIF_ZC_ARG,
53 	ETH_MEMIF_SECRET_ARG,
54 	NULL
55 };
56 
57 static const struct rte_eth_link pmd_link = {
58 	.link_speed = RTE_ETH_SPEED_NUM_10G,
59 	.link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
60 	.link_status = RTE_ETH_LINK_DOWN,
61 	.link_autoneg = RTE_ETH_LINK_AUTONEG
62 };
63 
64 #define MEMIF_MP_SEND_REGION		"memif_mp_send_region"
65 
66 
67 static int memif_region_init_zc(const struct rte_memseg_list *msl,
68 				const struct rte_memseg *ms, void *arg);
69 
70 const char *
71 memif_version(void)
72 {
73 	return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
74 }
75 
76 /* Message header to synchronize regions */
77 struct mp_region_msg {
78 	char port_name[RTE_DEV_NAME_MAX_LEN];
79 	memif_region_index_t idx;
80 	memif_region_size_t size;
81 };
82 
83 static int
84 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
85 {
86 	struct rte_eth_dev *dev;
87 	struct pmd_process_private *proc_private;
88 	const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
89 	struct rte_mp_msg reply;
90 	struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
91 	uint16_t port_id;
92 	int ret;
93 
94 	/* Get requested port */
95 	ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
96 	if (ret) {
97 		MIF_LOG(ERR, "Failed to get port id for %s",
98 			msg_param->port_name);
99 		return -1;
100 	}
101 	dev = &rte_eth_devices[port_id];
102 	proc_private = dev->process_private;
103 
104 	memset(&reply, 0, sizeof(reply));
105 	strlcpy(reply.name, msg->name, sizeof(reply.name));
106 	reply_param->idx = msg_param->idx;
107 	if (proc_private->regions[msg_param->idx] != NULL) {
108 		reply_param->size = proc_private->regions[msg_param->idx]->region_size;
109 		reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
110 		reply.num_fds = 1;
111 	}
112 	reply.len_param = sizeof(*reply_param);
113 	if (rte_mp_reply(&reply, peer) < 0) {
114 		MIF_LOG(ERR, "Failed to reply to an add region request");
115 		return -1;
116 	}
117 
118 	return 0;
119 }
120 
121 /*
122  * Request regions
123  * Called by secondary process, when ports link status goes up.
124  */
125 static int
126 memif_mp_request_regions(struct rte_eth_dev *dev)
127 {
128 	int ret, i;
129 	struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
130 	struct rte_mp_msg msg, *reply;
131 	struct rte_mp_reply replies;
132 	struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
133 	struct mp_region_msg *reply_param;
134 	struct memif_region *r;
135 	struct pmd_process_private *proc_private = dev->process_private;
136 	struct pmd_internals *pmd = dev->data->dev_private;
137 	/* in case of zero-copy client, only request region 0 */
138 	uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
139 				   1 : ETH_MEMIF_MAX_REGION_NUM;
140 
141 	MIF_LOG(DEBUG, "Requesting memory regions");
142 
143 	for (i = 0; i < max_region_num; i++) {
144 		/* Prepare the message */
145 		memset(&msg, 0, sizeof(msg));
146 		strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
147 		strlcpy(msg_param->port_name, dev->data->name,
148 			sizeof(msg_param->port_name));
149 		msg_param->idx = i;
150 		msg.len_param = sizeof(*msg_param);
151 
152 		/* Send message */
153 		ret = rte_mp_request_sync(&msg, &replies, &timeout);
154 		if (ret < 0 || replies.nb_received != 1) {
155 			MIF_LOG(ERR, "Failed to send mp msg: %d",
156 				rte_errno);
157 			return -1;
158 		}
159 
160 		reply = &replies.msgs[0];
161 		reply_param = (struct mp_region_msg *)reply->param;
162 
163 		if (reply_param->size > 0) {
164 			r = rte_zmalloc("region", sizeof(struct memif_region), 0);
165 			if (r == NULL) {
166 				MIF_LOG(ERR, "Failed to alloc memif region.");
167 				free(reply);
168 				return -ENOMEM;
169 			}
170 			r->region_size = reply_param->size;
171 			if (reply->num_fds < 1) {
172 				MIF_LOG(ERR, "Missing file descriptor.");
173 				free(reply);
174 				return -1;
175 			}
176 			r->fd = reply->fds[0];
177 			r->addr = NULL;
178 
179 			proc_private->regions[reply_param->idx] = r;
180 			proc_private->regions_num++;
181 		}
182 		free(reply);
183 	}
184 
185 	if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
186 		ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
187 		if (ret < 0)
188 			return ret;
189 	}
190 
191 	return memif_connect(dev);
192 }
193 
194 static int
195 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
196 {
197 	dev_info->max_mac_addrs = 1;
198 	dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
199 	dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
200 	dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
201 	dev_info->min_rx_bufsize = 0;
202 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
203 
204 	return 0;
205 }
206 
207 static memif_ring_t *
208 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
209 	       memif_ring_type_t type, uint16_t ring_num)
210 {
211 	/* rings only in region 0 */
212 	void *p = proc_private->regions[0]->addr;
213 	int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
214 	    (1 << pmd->run.log2_ring_size);
215 
216 	p = (uint8_t *)p + (ring_num + type * pmd->run.num_c2s_rings) * ring_size;
217 
218 	return (memif_ring_t *)p;
219 }
220 
221 static memif_region_offset_t
222 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
223 		      memif_ring_type_t type, uint16_t num)
224 {
225 	struct pmd_internals *pmd = dev->data->dev_private;
226 	struct pmd_process_private *proc_private = dev->process_private;
227 
228 	return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
229 		(uint8_t *)proc_private->regions[mq->region]->addr);
230 }
231 
232 static memif_ring_t *
233 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
234 			  struct memif_queue *mq)
235 {
236 	struct memif_region *r;
237 
238 	r = proc_private->regions[mq->region];
239 	if (r == NULL)
240 		return NULL;
241 
242 	return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
243 }
244 
245 static void *
246 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
247 {
248 	return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
249 }
250 
251 /* Free mbufs received by server */
252 static void
253 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
254 {
255 	uint16_t cur_tail;
256 	uint16_t mask = (1 << mq->log2_ring_size) - 1;
257 	memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
258 
259 	/* FIXME: improve performance */
260 	/* The ring->tail acts as a guard variable between Tx and Rx
261 	 * threads, so using load-acquire pairs with store-release
262 	 * in function eth_memif_rx for C2S queues.
263 	 */
264 	cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
265 	while (mq->last_tail != cur_tail) {
266 		RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
267 		/* Decrement refcnt and free mbuf. (current segment) */
268 		rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
269 		rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
270 		mq->last_tail++;
271 	}
272 }
273 
274 static int
275 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
276 		    struct rte_mbuf *tail)
277 {
278 	/* Check for number-of-segments-overflow */
279 	if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
280 		return -EOVERFLOW;
281 
282 	/* Chain 'tail' onto the old tail */
283 	cur_tail->next = tail;
284 
285 	/* accumulate number of segments and total length. */
286 	head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
287 
288 	tail->pkt_len = tail->data_len;
289 	head->pkt_len += tail->pkt_len;
290 
291 	return 0;
292 }
293 
294 static uint16_t
295 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
296 {
297 	struct memif_queue *mq = queue;
298 	struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
299 	struct pmd_process_private *proc_private =
300 		rte_eth_devices[mq->in_port].process_private;
301 	memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
302 	uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
303 	uint16_t n_rx_pkts = 0;
304 	uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
305 		RTE_PKTMBUF_HEADROOM;
306 	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
307 	memif_ring_type_t type = mq->type;
308 	memif_desc_t *d0;
309 	struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
310 	uint64_t b;
311 	ssize_t size __rte_unused;
312 	uint16_t head;
313 	int ret;
314 	struct rte_eth_link link;
315 
316 	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
317 		return 0;
318 	if (unlikely(ring == NULL)) {
319 		/* Secondary process will attempt to request regions. */
320 		ret = rte_eth_link_get(mq->in_port, &link);
321 		if (ret < 0)
322 			MIF_LOG(ERR, "Failed to get port %u link info: %s",
323 				mq->in_port, rte_strerror(-ret));
324 		return 0;
325 	}
326 
327 	/* consume interrupt */
328 	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
329 		size = read(mq->intr_handle.fd, &b, sizeof(b));
330 
331 	ring_size = 1 << mq->log2_ring_size;
332 	mask = ring_size - 1;
333 
334 	if (type == MEMIF_RING_C2S) {
335 		cur_slot = mq->last_head;
336 		last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
337 	} else {
338 		cur_slot = mq->last_tail;
339 		last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
340 	}
341 
342 	if (cur_slot == last_slot)
343 		goto refill;
344 	n_slots = last_slot - cur_slot;
345 
346 	while (n_slots && n_rx_pkts < nb_pkts) {
347 		mbuf_head = rte_pktmbuf_alloc(mq->mempool);
348 		if (unlikely(mbuf_head == NULL))
349 			goto no_free_bufs;
350 		mbuf = mbuf_head;
351 		mbuf->port = mq->in_port;
352 
353 next_slot:
354 		s0 = cur_slot & mask;
355 		d0 = &ring->desc[s0];
356 
357 		src_len = d0->length;
358 		dst_off = 0;
359 		src_off = 0;
360 
361 		do {
362 			dst_len = mbuf_size - dst_off;
363 			if (dst_len == 0) {
364 				dst_off = 0;
365 				dst_len = mbuf_size;
366 
367 				/* store pointer to tail */
368 				mbuf_tail = mbuf;
369 				mbuf = rte_pktmbuf_alloc(mq->mempool);
370 				if (unlikely(mbuf == NULL))
371 					goto no_free_bufs;
372 				mbuf->port = mq->in_port;
373 				ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
374 				if (unlikely(ret < 0)) {
375 					MIF_LOG(ERR, "number-of-segments-overflow");
376 					rte_pktmbuf_free(mbuf);
377 					goto no_free_bufs;
378 				}
379 			}
380 			cp_len = RTE_MIN(dst_len, src_len);
381 
382 			rte_pktmbuf_data_len(mbuf) += cp_len;
383 			rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
384 			if (mbuf != mbuf_head)
385 				rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
386 
387 			rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
388 							   dst_off),
389 				(uint8_t *)memif_get_buffer(proc_private, d0) +
390 				src_off, cp_len);
391 
392 			src_off += cp_len;
393 			dst_off += cp_len;
394 			src_len -= cp_len;
395 		} while (src_len);
396 
397 		cur_slot++;
398 		n_slots--;
399 
400 		if (d0->flags & MEMIF_DESC_FLAG_NEXT)
401 			goto next_slot;
402 
403 		mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
404 		*bufs++ = mbuf_head;
405 		n_rx_pkts++;
406 	}
407 
408 no_free_bufs:
409 	if (type == MEMIF_RING_C2S) {
410 		__atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
411 		mq->last_head = cur_slot;
412 	} else {
413 		mq->last_tail = cur_slot;
414 	}
415 
416 refill:
417 	if (type == MEMIF_RING_S2C) {
418 		/* ring->head is updated by the receiver and this function
419 		 * is called in the context of receiver thread. The loads in
420 		 * the receiver do not need to synchronize with its own stores.
421 		 */
422 		head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
423 		n_slots = ring_size - head + mq->last_tail;
424 
425 		while (n_slots--) {
426 			s0 = head++ & mask;
427 			d0 = &ring->desc[s0];
428 			d0->length = pmd->run.pkt_buffer_size;
429 		}
430 		__atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
431 	}
432 
433 	mq->n_pkts += n_rx_pkts;
434 	return n_rx_pkts;
435 }
436 
437 static uint16_t
438 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
439 {
440 	struct memif_queue *mq = queue;
441 	struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
442 	struct pmd_process_private *proc_private =
443 		rte_eth_devices[mq->in_port].process_private;
444 	memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
445 	uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
446 	uint16_t n_rx_pkts = 0;
447 	memif_desc_t *d0;
448 	struct rte_mbuf *mbuf, *mbuf_tail;
449 	struct rte_mbuf *mbuf_head = NULL;
450 	int ret;
451 	struct rte_eth_link link;
452 
453 	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
454 		return 0;
455 	if (unlikely(ring == NULL)) {
456 		/* Secondary process will attempt to request regions. */
457 		rte_eth_link_get(mq->in_port, &link);
458 		return 0;
459 	}
460 
461 	/* consume interrupt */
462 	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
463 		uint64_t b;
464 		ssize_t size __rte_unused;
465 		size = read(mq->intr_handle.fd, &b, sizeof(b));
466 	}
467 
468 	ring_size = 1 << mq->log2_ring_size;
469 	mask = ring_size - 1;
470 
471 	cur_slot = mq->last_tail;
472 	/* The ring->tail acts as a guard variable between Tx and Rx
473 	 * threads, so using load-acquire pairs with store-release
474 	 * to synchronize it between threads.
475 	 */
476 	last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
477 	if (cur_slot == last_slot)
478 		goto refill;
479 	n_slots = last_slot - cur_slot;
480 
481 	while (n_slots && n_rx_pkts < nb_pkts) {
482 		s0 = cur_slot & mask;
483 
484 		d0 = &ring->desc[s0];
485 		mbuf_head = mq->buffers[s0];
486 		mbuf = mbuf_head;
487 
488 next_slot:
489 		/* prefetch next descriptor */
490 		if (n_rx_pkts + 1 < nb_pkts)
491 			rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
492 
493 		mbuf->port = mq->in_port;
494 		rte_pktmbuf_data_len(mbuf) = d0->length;
495 		rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
496 
497 		mq->n_bytes += rte_pktmbuf_data_len(mbuf);
498 
499 		cur_slot++;
500 		n_slots--;
501 		if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
502 			s0 = cur_slot & mask;
503 			d0 = &ring->desc[s0];
504 			mbuf_tail = mbuf;
505 			mbuf = mq->buffers[s0];
506 			ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
507 			if (unlikely(ret < 0)) {
508 				MIF_LOG(ERR, "number-of-segments-overflow");
509 				goto refill;
510 			}
511 			goto next_slot;
512 		}
513 
514 		*bufs++ = mbuf_head;
515 		n_rx_pkts++;
516 	}
517 
518 	mq->last_tail = cur_slot;
519 
520 /* Supply server with new buffers */
521 refill:
522 	/* ring->head is updated by the receiver and this function
523 	 * is called in the context of receiver thread. The loads in
524 	 * the receiver do not need to synchronize with its own stores.
525 	 */
526 	head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
527 	n_slots = ring_size - head + mq->last_tail;
528 
529 	if (n_slots < 32)
530 		goto no_free_mbufs;
531 
532 	ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
533 	if (unlikely(ret < 0))
534 		goto no_free_mbufs;
535 
536 	while (n_slots--) {
537 		s0 = head++ & mask;
538 		if (n_slots > 0)
539 			rte_prefetch0(mq->buffers[head & mask]);
540 		d0 = &ring->desc[s0];
541 		/* store buffer header */
542 		mbuf = mq->buffers[s0];
543 		/* populate descriptor */
544 		d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
545 				RTE_PKTMBUF_HEADROOM;
546 		d0->region = 1;
547 		d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
548 			(uint8_t *)proc_private->regions[d0->region]->addr;
549 	}
550 no_free_mbufs:
551 	/* The ring->head acts as a guard variable between Tx and Rx
552 	 * threads, so using store-release pairs with load-acquire
553 	 * in function eth_memif_tx.
554 	 */
555 	__atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
556 
557 	mq->n_pkts += n_rx_pkts;
558 
559 	return n_rx_pkts;
560 }
561 
562 static uint16_t
563 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
564 {
565 	struct memif_queue *mq = queue;
566 	struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
567 	struct pmd_process_private *proc_private =
568 		rte_eth_devices[mq->in_port].process_private;
569 	memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
570 	uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
571 	uint16_t src_len, src_off, dst_len, dst_off, cp_len, nb_segs;
572 	memif_ring_type_t type = mq->type;
573 	memif_desc_t *d0;
574 	struct rte_mbuf *mbuf;
575 	struct rte_mbuf *mbuf_head;
576 	uint64_t a;
577 	ssize_t size;
578 	struct rte_eth_link link;
579 
580 	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
581 		return 0;
582 	if (unlikely(ring == NULL)) {
583 		int ret;
584 
585 		/* Secondary process will attempt to request regions. */
586 		ret = rte_eth_link_get(mq->in_port, &link);
587 		if (ret < 0)
588 			MIF_LOG(ERR, "Failed to get port %u link info: %s",
589 				mq->in_port, rte_strerror(-ret));
590 		return 0;
591 	}
592 
593 	ring_size = 1 << mq->log2_ring_size;
594 	mask = ring_size - 1;
595 
596 	if (type == MEMIF_RING_C2S) {
597 		/* For C2S queues ring->head is updated by the sender and
598 		 * this function is called in the context of sending thread.
599 		 * The loads in the sender do not need to synchronize with
600 		 * its own stores. Hence, the following load can be a
601 		 * relaxed load.
602 		 */
603 		slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
604 		n_free = ring_size - slot +
605 				__atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
606 	} else {
607 		/* For S2C queues ring->tail is updated by the sender and
608 		 * this function is called in the context of sending thread.
609 		 * The loads in the sender do not need to synchronize with
610 		 * its own stores. Hence, the following load can be a
611 		 * relaxed load.
612 		 */
613 		slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED);
614 		n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
615 	}
616 
617 	while (n_tx_pkts < nb_pkts && n_free) {
618 		mbuf_head = *bufs++;
619 		nb_segs = mbuf_head->nb_segs;
620 		mbuf = mbuf_head;
621 
622 		saved_slot = slot;
623 		d0 = &ring->desc[slot & mask];
624 		dst_off = 0;
625 		dst_len = (type == MEMIF_RING_C2S) ?
626 			pmd->run.pkt_buffer_size : d0->length;
627 
628 next_in_chain:
629 		src_off = 0;
630 		src_len = rte_pktmbuf_data_len(mbuf);
631 
632 		while (src_len) {
633 			if (dst_len == 0) {
634 				if (n_free) {
635 					slot++;
636 					n_free--;
637 					d0->flags |= MEMIF_DESC_FLAG_NEXT;
638 					d0 = &ring->desc[slot & mask];
639 					dst_off = 0;
640 					dst_len = (type == MEMIF_RING_C2S) ?
641 					    pmd->run.pkt_buffer_size : d0->length;
642 					d0->flags = 0;
643 				} else {
644 					slot = saved_slot;
645 					goto no_free_slots;
646 				}
647 			}
648 			cp_len = RTE_MIN(dst_len, src_len);
649 
650 			rte_memcpy((uint8_t *)memif_get_buffer(proc_private,
651 							       d0) + dst_off,
652 				rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
653 				cp_len);
654 
655 			mq->n_bytes += cp_len;
656 			src_off += cp_len;
657 			dst_off += cp_len;
658 			src_len -= cp_len;
659 			dst_len -= cp_len;
660 
661 			d0->length = dst_off;
662 		}
663 
664 		if (--nb_segs > 0) {
665 			mbuf = mbuf->next;
666 			goto next_in_chain;
667 		}
668 
669 		n_tx_pkts++;
670 		slot++;
671 		n_free--;
672 		rte_pktmbuf_free(mbuf_head);
673 	}
674 
675 no_free_slots:
676 	if (type == MEMIF_RING_C2S)
677 		__atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
678 	else
679 		__atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
680 
681 	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
682 		a = 1;
683 		size = write(mq->intr_handle.fd, &a, sizeof(a));
684 		if (unlikely(size < 0)) {
685 			MIF_LOG(WARNING,
686 				"Failed to send interrupt. %s", strerror(errno));
687 		}
688 	}
689 
690 	mq->n_pkts += n_tx_pkts;
691 	return n_tx_pkts;
692 }
693 
694 
695 static int
696 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
697 		memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
698 		uint16_t slot, uint16_t n_free)
699 {
700 	memif_desc_t *d0;
701 	uint16_t nb_segs = mbuf->nb_segs;
702 	int used_slots = 1;
703 
704 next_in_chain:
705 	/* store pointer to mbuf to free it later */
706 	mq->buffers[slot & mask] = mbuf;
707 	/* Increment refcnt to make sure the buffer is not freed before server
708 	 * receives it. (current segment)
709 	 */
710 	rte_mbuf_refcnt_update(mbuf, 1);
711 	/* populate descriptor */
712 	d0 = &ring->desc[slot & mask];
713 	d0->length = rte_pktmbuf_data_len(mbuf);
714 	mq->n_bytes += rte_pktmbuf_data_len(mbuf);
715 	/* FIXME: get region index */
716 	d0->region = 1;
717 	d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
718 		(uint8_t *)proc_private->regions[d0->region]->addr;
719 	d0->flags = 0;
720 
721 	/* check if buffer is chained */
722 	if (--nb_segs > 0) {
723 		if (n_free < 2)
724 			return 0;
725 		/* mark buffer as chained */
726 		d0->flags |= MEMIF_DESC_FLAG_NEXT;
727 		/* advance mbuf */
728 		mbuf = mbuf->next;
729 		/* update counters */
730 		used_slots++;
731 		slot++;
732 		n_free--;
733 		goto next_in_chain;
734 	}
735 	return used_slots;
736 }
737 
738 static uint16_t
739 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
740 {
741 	struct memif_queue *mq = queue;
742 	struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
743 	struct pmd_process_private *proc_private =
744 		rte_eth_devices[mq->in_port].process_private;
745 	memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
746 	uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
747 	struct rte_eth_link link;
748 
749 	if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
750 		return 0;
751 	if (unlikely(ring == NULL)) {
752 		/* Secondary process will attempt to request regions. */
753 		rte_eth_link_get(mq->in_port, &link);
754 		return 0;
755 	}
756 
757 	ring_size = 1 << mq->log2_ring_size;
758 	mask = ring_size - 1;
759 
760 	/* free mbufs received by server */
761 	memif_free_stored_mbufs(proc_private, mq);
762 
763 	/* ring type always MEMIF_RING_C2S */
764 	/* For C2S queues ring->head is updated by the sender and
765 	 * this function is called in the context of sending thread.
766 	 * The loads in the sender do not need to synchronize with
767 	 * its own stores. Hence, the following load can be a
768 	 * relaxed load.
769 	 */
770 	slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
771 	n_free = ring_size - slot + mq->last_tail;
772 
773 	int used_slots;
774 
775 	while (n_free && (n_tx_pkts < nb_pkts)) {
776 		while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
777 			if ((nb_pkts - n_tx_pkts) > 8) {
778 				rte_prefetch0(*bufs + 4);
779 				rte_prefetch0(*bufs + 5);
780 				rte_prefetch0(*bufs + 6);
781 				rte_prefetch0(*bufs + 7);
782 			}
783 			used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
784 				mask, slot, n_free);
785 			if (unlikely(used_slots < 1))
786 				goto no_free_slots;
787 			n_tx_pkts++;
788 			slot += used_slots;
789 			n_free -= used_slots;
790 
791 			used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
792 				mask, slot, n_free);
793 			if (unlikely(used_slots < 1))
794 				goto no_free_slots;
795 			n_tx_pkts++;
796 			slot += used_slots;
797 			n_free -= used_slots;
798 
799 			used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
800 				mask, slot, n_free);
801 			if (unlikely(used_slots < 1))
802 				goto no_free_slots;
803 			n_tx_pkts++;
804 			slot += used_slots;
805 			n_free -= used_slots;
806 
807 			used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
808 				mask, slot, n_free);
809 			if (unlikely(used_slots < 1))
810 				goto no_free_slots;
811 			n_tx_pkts++;
812 			slot += used_slots;
813 			n_free -= used_slots;
814 		}
815 		used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
816 			mask, slot, n_free);
817 		if (unlikely(used_slots < 1))
818 			goto no_free_slots;
819 		n_tx_pkts++;
820 		slot += used_slots;
821 		n_free -= used_slots;
822 	}
823 
824 no_free_slots:
825 	/* ring type always MEMIF_RING_C2S */
826 	/* The ring->head acts as a guard variable between Tx and Rx
827 	 * threads, so using store-release pairs with load-acquire
828 	 * in function eth_memif_rx for C2S rings.
829 	 */
830 	__atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
831 
832 	/* Send interrupt, if enabled. */
833 	if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
834 		uint64_t a = 1;
835 		ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
836 		if (unlikely(size < 0)) {
837 			MIF_LOG(WARNING,
838 				"Failed to send interrupt. %s", strerror(errno));
839 		}
840 	}
841 
842 	/* increment queue counters */
843 	mq->n_pkts += n_tx_pkts;
844 
845 	return n_tx_pkts;
846 }
847 
848 void
849 memif_free_regions(struct rte_eth_dev *dev)
850 {
851 	struct pmd_process_private *proc_private = dev->process_private;
852 	struct pmd_internals *pmd = dev->data->dev_private;
853 	int i;
854 	struct memif_region *r;
855 
856 	/* regions are allocated contiguously, so it's
857 	 * enough to loop until 'proc_private->regions_num'
858 	 */
859 	for (i = 0; i < proc_private->regions_num; i++) {
860 		r = proc_private->regions[i];
861 		if (r != NULL) {
862 			/* This is memzone */
863 			if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
864 				r->addr = NULL;
865 				if (r->fd > 0)
866 					close(r->fd);
867 			}
868 			if (r->addr != NULL) {
869 				munmap(r->addr, r->region_size);
870 				if (r->fd > 0) {
871 					close(r->fd);
872 					r->fd = -1;
873 				}
874 			}
875 			rte_free(r);
876 			proc_private->regions[i] = NULL;
877 		}
878 	}
879 	proc_private->regions_num = 0;
880 }
881 
882 static int
883 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
884 		     void *arg)
885 {
886 	struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
887 	struct memif_region *r;
888 
889 	if (proc_private->regions_num < 1) {
890 		MIF_LOG(ERR, "Missing descriptor region");
891 		return -1;
892 	}
893 
894 	r = proc_private->regions[proc_private->regions_num - 1];
895 
896 	if (r->addr != msl->base_va)
897 		r = proc_private->regions[++proc_private->regions_num - 1];
898 
899 	if (r == NULL) {
900 		r = rte_zmalloc("region", sizeof(struct memif_region), 0);
901 		if (r == NULL) {
902 			MIF_LOG(ERR, "Failed to alloc memif region.");
903 			return -ENOMEM;
904 		}
905 
906 		r->addr = msl->base_va;
907 		r->region_size = ms->len;
908 		r->fd = rte_memseg_get_fd(ms);
909 		if (r->fd < 0)
910 			return -1;
911 		r->pkt_buffer_offset = 0;
912 
913 		proc_private->regions[proc_private->regions_num - 1] = r;
914 	} else {
915 		r->region_size += ms->len;
916 	}
917 
918 	return 0;
919 }
920 
921 static int
922 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
923 {
924 	struct pmd_internals *pmd = dev->data->dev_private;
925 	struct pmd_process_private *proc_private = dev->process_private;
926 	char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
927 	int ret = 0;
928 	struct memif_region *r;
929 
930 	if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
931 		MIF_LOG(ERR, "Too many regions.");
932 		return -1;
933 	}
934 
935 	r = rte_zmalloc("region", sizeof(struct memif_region), 0);
936 	if (r == NULL) {
937 		MIF_LOG(ERR, "Failed to alloc memif region.");
938 		return -ENOMEM;
939 	}
940 
941 	/* calculate buffer offset */
942 	r->pkt_buffer_offset = (pmd->run.num_c2s_rings + pmd->run.num_s2c_rings) *
943 	    (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
944 	    (1 << pmd->run.log2_ring_size));
945 
946 	r->region_size = r->pkt_buffer_offset;
947 	/* if region has buffers, add buffers size to region_size */
948 	if (has_buffers == 1)
949 		r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
950 			(1 << pmd->run.log2_ring_size) *
951 			(pmd->run.num_c2s_rings +
952 			 pmd->run.num_s2c_rings));
953 
954 	memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
955 	snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
956 		 proc_private->regions_num);
957 
958 	r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
959 	if (r->fd < 0) {
960 		MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
961 		ret = -1;
962 		goto error;
963 	}
964 
965 	ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
966 	if (ret < 0) {
967 		MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
968 		goto error;
969 	}
970 
971 	ret = ftruncate(r->fd, r->region_size);
972 	if (ret < 0) {
973 		MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
974 		goto error;
975 	}
976 
977 	r->addr = mmap(NULL, r->region_size, PROT_READ |
978 		       PROT_WRITE, MAP_SHARED, r->fd, 0);
979 	if (r->addr == MAP_FAILED) {
980 		MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
981 		ret = -1;
982 		goto error;
983 	}
984 
985 	proc_private->regions[proc_private->regions_num] = r;
986 	proc_private->regions_num++;
987 
988 	return ret;
989 
990 error:
991 	if (r->fd > 0)
992 		close(r->fd);
993 	r->fd = -1;
994 
995 	return ret;
996 }
997 
998 static int
999 memif_regions_init(struct rte_eth_dev *dev)
1000 {
1001 	struct pmd_internals *pmd = dev->data->dev_private;
1002 	int ret;
1003 
1004 	/*
1005 	 * Zero-copy exposes dpdk memory.
1006 	 * Each memseg list will be represented by memif region.
1007 	 * Zero-copy regions indexing: memseg list idx + 1,
1008 	 * as we already have region 0 reserved for descriptors.
1009 	 */
1010 	if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1011 		/* create region idx 0 containing descriptors */
1012 		ret = memif_region_init_shm(dev, 0);
1013 		if (ret < 0)
1014 			return ret;
1015 		ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
1016 		if (ret < 0)
1017 			return ret;
1018 	} else {
1019 		/* create one memory region contaning rings and buffers */
1020 		ret = memif_region_init_shm(dev, /* has buffers */ 1);
1021 		if (ret < 0)
1022 			return ret;
1023 	}
1024 
1025 	return 0;
1026 }
1027 
1028 static void
1029 memif_init_rings(struct rte_eth_dev *dev)
1030 {
1031 	struct pmd_internals *pmd = dev->data->dev_private;
1032 	struct pmd_process_private *proc_private = dev->process_private;
1033 	memif_ring_t *ring;
1034 	int i, j;
1035 	uint16_t slot;
1036 
1037 	for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1038 		ring = memif_get_ring(pmd, proc_private, MEMIF_RING_C2S, i);
1039 		__atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1040 		__atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1041 		ring->cookie = MEMIF_COOKIE;
1042 		ring->flags = 0;
1043 
1044 		if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1045 			continue;
1046 
1047 		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1048 			slot = i * (1 << pmd->run.log2_ring_size) + j;
1049 			ring->desc[j].region = 0;
1050 			ring->desc[j].offset =
1051 				proc_private->regions[0]->pkt_buffer_offset +
1052 				(uint32_t)(slot * pmd->run.pkt_buffer_size);
1053 			ring->desc[j].length = pmd->run.pkt_buffer_size;
1054 		}
1055 	}
1056 
1057 	for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1058 		ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2C, i);
1059 		__atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1060 		__atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1061 		ring->cookie = MEMIF_COOKIE;
1062 		ring->flags = 0;
1063 
1064 		if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1065 			continue;
1066 
1067 		for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1068 			slot = (i + pmd->run.num_c2s_rings) *
1069 			    (1 << pmd->run.log2_ring_size) + j;
1070 			ring->desc[j].region = 0;
1071 			ring->desc[j].offset =
1072 				proc_private->regions[0]->pkt_buffer_offset +
1073 				(uint32_t)(slot * pmd->run.pkt_buffer_size);
1074 			ring->desc[j].length = pmd->run.pkt_buffer_size;
1075 		}
1076 	}
1077 }
1078 
1079 /* called only by client */
1080 static int
1081 memif_init_queues(struct rte_eth_dev *dev)
1082 {
1083 	struct pmd_internals *pmd = dev->data->dev_private;
1084 	struct memif_queue *mq;
1085 	int i;
1086 
1087 	for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1088 		mq = dev->data->tx_queues[i];
1089 		mq->log2_ring_size = pmd->run.log2_ring_size;
1090 		/* queues located only in region 0 */
1091 		mq->region = 0;
1092 		mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_C2S, i);
1093 		mq->last_head = 0;
1094 		mq->last_tail = 0;
1095 		mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1096 		if (mq->intr_handle.fd < 0) {
1097 			MIF_LOG(WARNING,
1098 				"Failed to create eventfd for tx queue %d: %s.", i,
1099 				strerror(errno));
1100 		}
1101 		mq->buffers = NULL;
1102 		if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1103 			mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1104 						  (1 << mq->log2_ring_size), 0);
1105 			if (mq->buffers == NULL)
1106 				return -ENOMEM;
1107 		}
1108 	}
1109 
1110 	for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1111 		mq = dev->data->rx_queues[i];
1112 		mq->log2_ring_size = pmd->run.log2_ring_size;
1113 		/* queues located only in region 0 */
1114 		mq->region = 0;
1115 		mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2C, i);
1116 		mq->last_head = 0;
1117 		mq->last_tail = 0;
1118 		mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1119 		if (mq->intr_handle.fd < 0) {
1120 			MIF_LOG(WARNING,
1121 				"Failed to create eventfd for rx queue %d: %s.", i,
1122 				strerror(errno));
1123 		}
1124 		mq->buffers = NULL;
1125 		if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1126 			mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1127 						  (1 << mq->log2_ring_size), 0);
1128 			if (mq->buffers == NULL)
1129 				return -ENOMEM;
1130 		}
1131 	}
1132 	return 0;
1133 }
1134 
1135 int
1136 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1137 {
1138 	int ret;
1139 
1140 	ret = memif_regions_init(dev);
1141 	if (ret < 0)
1142 		return ret;
1143 
1144 	memif_init_rings(dev);
1145 
1146 	ret = memif_init_queues(dev);
1147 	if (ret < 0)
1148 		return ret;
1149 
1150 	return 0;
1151 }
1152 
1153 int
1154 memif_connect(struct rte_eth_dev *dev)
1155 {
1156 	struct pmd_internals *pmd = dev->data->dev_private;
1157 	struct pmd_process_private *proc_private = dev->process_private;
1158 	struct memif_region *mr;
1159 	struct memif_queue *mq;
1160 	memif_ring_t *ring;
1161 	int i;
1162 
1163 	for (i = 0; i < proc_private->regions_num; i++) {
1164 		mr = proc_private->regions[i];
1165 		if (mr != NULL) {
1166 			if (mr->addr == NULL) {
1167 				if (mr->fd < 0)
1168 					return -1;
1169 				mr->addr = mmap(NULL, mr->region_size,
1170 						PROT_READ | PROT_WRITE,
1171 						MAP_SHARED, mr->fd, 0);
1172 				if (mr->addr == MAP_FAILED) {
1173 					MIF_LOG(ERR, "mmap failed: %s\n",
1174 						strerror(errno));
1175 					return -1;
1176 				}
1177 			}
1178 			if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1179 				/* close memseg file */
1180 				close(mr->fd);
1181 				mr->fd = -1;
1182 			}
1183 		}
1184 	}
1185 
1186 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1187 		for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1188 			mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1189 			    dev->data->tx_queues[i] : dev->data->rx_queues[i];
1190 			ring = memif_get_ring_from_queue(proc_private, mq);
1191 			if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1192 				MIF_LOG(ERR, "Wrong ring");
1193 				return -1;
1194 			}
1195 			__atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1196 			__atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1197 			mq->last_head = 0;
1198 			mq->last_tail = 0;
1199 			/* enable polling mode */
1200 			if (pmd->role == MEMIF_ROLE_SERVER)
1201 				ring->flags = MEMIF_RING_FLAG_MASK_INT;
1202 		}
1203 		for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1204 			mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1205 			    dev->data->rx_queues[i] : dev->data->tx_queues[i];
1206 			ring = memif_get_ring_from_queue(proc_private, mq);
1207 			if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1208 				MIF_LOG(ERR, "Wrong ring");
1209 				return -1;
1210 			}
1211 			__atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1212 			__atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1213 			mq->last_head = 0;
1214 			mq->last_tail = 0;
1215 			/* enable polling mode */
1216 			if (pmd->role == MEMIF_ROLE_CLIENT)
1217 				ring->flags = MEMIF_RING_FLAG_MASK_INT;
1218 		}
1219 
1220 		pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1221 		pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1222 		dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
1223 	}
1224 	MIF_LOG(INFO, "Connected.");
1225 	return 0;
1226 }
1227 
1228 static int
1229 memif_dev_start(struct rte_eth_dev *dev)
1230 {
1231 	struct pmd_internals *pmd = dev->data->dev_private;
1232 	int ret = 0;
1233 
1234 	switch (pmd->role) {
1235 	case MEMIF_ROLE_CLIENT:
1236 		ret = memif_connect_client(dev);
1237 		break;
1238 	case MEMIF_ROLE_SERVER:
1239 		ret = memif_connect_server(dev);
1240 		break;
1241 	default:
1242 		MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1243 		ret = -1;
1244 		break;
1245 	}
1246 
1247 	return ret;
1248 }
1249 
1250 static int
1251 memif_dev_close(struct rte_eth_dev *dev)
1252 {
1253 	struct pmd_internals *pmd = dev->data->dev_private;
1254 	int i;
1255 
1256 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1257 		memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1258 		memif_disconnect(dev);
1259 
1260 		for (i = 0; i < dev->data->nb_rx_queues; i++)
1261 			(*dev->dev_ops->rx_queue_release)(dev, i);
1262 		for (i = 0; i < dev->data->nb_tx_queues; i++)
1263 			(*dev->dev_ops->tx_queue_release)(dev, i);
1264 
1265 		memif_socket_remove_device(dev);
1266 	} else {
1267 		memif_disconnect(dev);
1268 	}
1269 
1270 	rte_free(dev->process_private);
1271 
1272 	return 0;
1273 }
1274 
1275 static int
1276 memif_dev_configure(struct rte_eth_dev *dev)
1277 {
1278 	struct pmd_internals *pmd = dev->data->dev_private;
1279 
1280 	/*
1281 	 * CLIENT - TXQ
1282 	 * SERVER - RXQ
1283 	 */
1284 	pmd->cfg.num_c2s_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1285 				  dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1286 
1287 	/*
1288 	 * CLIENT - RXQ
1289 	 * SERVER - TXQ
1290 	 */
1291 	pmd->cfg.num_s2c_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1292 				  dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1293 
1294 	return 0;
1295 }
1296 
1297 static int
1298 memif_tx_queue_setup(struct rte_eth_dev *dev,
1299 		     uint16_t qid,
1300 		     uint16_t nb_tx_desc __rte_unused,
1301 		     unsigned int socket_id __rte_unused,
1302 		     const struct rte_eth_txconf *tx_conf __rte_unused)
1303 {
1304 	struct pmd_internals *pmd = dev->data->dev_private;
1305 	struct memif_queue *mq;
1306 
1307 	mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1308 	if (mq == NULL) {
1309 		MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1310 		return -ENOMEM;
1311 	}
1312 
1313 	mq->type =
1314 	    (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_C2S : MEMIF_RING_S2C;
1315 	mq->n_pkts = 0;
1316 	mq->n_bytes = 0;
1317 	mq->intr_handle.fd = -1;
1318 	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1319 	mq->in_port = dev->data->port_id;
1320 	dev->data->tx_queues[qid] = mq;
1321 
1322 	return 0;
1323 }
1324 
1325 static int
1326 memif_rx_queue_setup(struct rte_eth_dev *dev,
1327 		     uint16_t qid,
1328 		     uint16_t nb_rx_desc __rte_unused,
1329 		     unsigned int socket_id __rte_unused,
1330 		     const struct rte_eth_rxconf *rx_conf __rte_unused,
1331 		     struct rte_mempool *mb_pool)
1332 {
1333 	struct pmd_internals *pmd = dev->data->dev_private;
1334 	struct memif_queue *mq;
1335 
1336 	mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1337 	if (mq == NULL) {
1338 		MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1339 		return -ENOMEM;
1340 	}
1341 
1342 	mq->type = (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_S2C : MEMIF_RING_C2S;
1343 	mq->n_pkts = 0;
1344 	mq->n_bytes = 0;
1345 	mq->intr_handle.fd = -1;
1346 	mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1347 	mq->mempool = mb_pool;
1348 	mq->in_port = dev->data->port_id;
1349 	dev->data->rx_queues[qid] = mq;
1350 
1351 	return 0;
1352 }
1353 
1354 static void
1355 memif_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1356 {
1357 	struct memif_queue *mq = dev->data->rx_queues[qid];
1358 
1359 	if (!mq)
1360 		return;
1361 
1362 	rte_free(mq);
1363 }
1364 
1365 static void
1366 memif_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1367 {
1368 	struct memif_queue *mq = dev->data->tx_queues[qid];
1369 
1370 	if (!mq)
1371 		return;
1372 
1373 	rte_free(mq);
1374 }
1375 
1376 static int
1377 memif_link_update(struct rte_eth_dev *dev,
1378 		  int wait_to_complete __rte_unused)
1379 {
1380 	struct pmd_process_private *proc_private;
1381 
1382 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1383 		proc_private = dev->process_private;
1384 		if (dev->data->dev_link.link_status == RTE_ETH_LINK_UP &&
1385 				proc_private->regions_num == 0) {
1386 			memif_mp_request_regions(dev);
1387 		} else if (dev->data->dev_link.link_status == RTE_ETH_LINK_DOWN &&
1388 				proc_private->regions_num > 0) {
1389 			memif_free_regions(dev);
1390 		}
1391 	}
1392 	return 0;
1393 }
1394 
1395 static int
1396 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1397 {
1398 	struct pmd_internals *pmd = dev->data->dev_private;
1399 	struct memif_queue *mq;
1400 	int i;
1401 	uint8_t tmp, nq;
1402 
1403 	stats->ipackets = 0;
1404 	stats->ibytes = 0;
1405 	stats->opackets = 0;
1406 	stats->obytes = 0;
1407 
1408 	tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_c2s_rings :
1409 	    pmd->run.num_s2c_rings;
1410 	nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1411 	    RTE_ETHDEV_QUEUE_STAT_CNTRS;
1412 
1413 	/* RX stats */
1414 	for (i = 0; i < nq; i++) {
1415 		mq = dev->data->rx_queues[i];
1416 		stats->q_ipackets[i] = mq->n_pkts;
1417 		stats->q_ibytes[i] = mq->n_bytes;
1418 		stats->ipackets += mq->n_pkts;
1419 		stats->ibytes += mq->n_bytes;
1420 	}
1421 
1422 	tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_s2c_rings :
1423 	    pmd->run.num_c2s_rings;
1424 	nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1425 	    RTE_ETHDEV_QUEUE_STAT_CNTRS;
1426 
1427 	/* TX stats */
1428 	for (i = 0; i < nq; i++) {
1429 		mq = dev->data->tx_queues[i];
1430 		stats->q_opackets[i] = mq->n_pkts;
1431 		stats->q_obytes[i] = mq->n_bytes;
1432 		stats->opackets += mq->n_pkts;
1433 		stats->obytes += mq->n_bytes;
1434 	}
1435 	return 0;
1436 }
1437 
1438 static int
1439 memif_stats_reset(struct rte_eth_dev *dev)
1440 {
1441 	struct pmd_internals *pmd = dev->data->dev_private;
1442 	int i;
1443 	struct memif_queue *mq;
1444 
1445 	for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1446 		mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->tx_queues[i] :
1447 		    dev->data->rx_queues[i];
1448 		mq->n_pkts = 0;
1449 		mq->n_bytes = 0;
1450 	}
1451 	for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1452 		mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->rx_queues[i] :
1453 		    dev->data->tx_queues[i];
1454 		mq->n_pkts = 0;
1455 		mq->n_bytes = 0;
1456 	}
1457 
1458 	return 0;
1459 }
1460 
1461 static int
1462 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1463 			   uint16_t qid __rte_unused)
1464 {
1465 	MIF_LOG(WARNING, "Interrupt mode not supported.");
1466 
1467 	return -1;
1468 }
1469 
1470 static int
1471 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1472 {
1473 	struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1474 
1475 	return 0;
1476 }
1477 
1478 static const struct eth_dev_ops ops = {
1479 	.dev_start = memif_dev_start,
1480 	.dev_close = memif_dev_close,
1481 	.dev_infos_get = memif_dev_info,
1482 	.dev_configure = memif_dev_configure,
1483 	.tx_queue_setup = memif_tx_queue_setup,
1484 	.rx_queue_setup = memif_rx_queue_setup,
1485 	.rx_queue_release = memif_rx_queue_release,
1486 	.tx_queue_release = memif_tx_queue_release,
1487 	.rx_queue_intr_enable = memif_rx_queue_intr_enable,
1488 	.rx_queue_intr_disable = memif_rx_queue_intr_disable,
1489 	.link_update = memif_link_update,
1490 	.stats_get = memif_stats_get,
1491 	.stats_reset = memif_stats_reset,
1492 };
1493 
1494 static int
1495 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1496 	     memif_interface_id_t id, uint32_t flags,
1497 	     const char *socket_filename,
1498 	     memif_log2_ring_size_t log2_ring_size,
1499 	     uint16_t pkt_buffer_size, const char *secret,
1500 	     struct rte_ether_addr *ether_addr)
1501 {
1502 	int ret = 0;
1503 	struct rte_eth_dev *eth_dev;
1504 	struct rte_eth_dev_data *data;
1505 	struct pmd_internals *pmd;
1506 	struct pmd_process_private *process_private;
1507 	const unsigned int numa_node = vdev->device.numa_node;
1508 	const char *name = rte_vdev_device_name(vdev);
1509 
1510 	eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1511 	if (eth_dev == NULL) {
1512 		MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1513 		return -1;
1514 	}
1515 
1516 	process_private = (struct pmd_process_private *)
1517 		rte_zmalloc(name, sizeof(struct pmd_process_private),
1518 			    RTE_CACHE_LINE_SIZE);
1519 
1520 	if (process_private == NULL) {
1521 		MIF_LOG(ERR, "Failed to alloc memory for process private");
1522 		return -1;
1523 	}
1524 	eth_dev->process_private = process_private;
1525 
1526 	pmd = eth_dev->data->dev_private;
1527 	memset(pmd, 0, sizeof(*pmd));
1528 
1529 	pmd->id = id;
1530 	pmd->flags = flags;
1531 	pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1532 	pmd->role = role;
1533 	/* Zero-copy flag irelevant to server. */
1534 	if (pmd->role == MEMIF_ROLE_SERVER)
1535 		pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1536 
1537 	ret = memif_socket_init(eth_dev, socket_filename);
1538 	if (ret < 0)
1539 		return ret;
1540 
1541 	memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1542 	if (secret != NULL)
1543 		strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1544 
1545 	pmd->cfg.log2_ring_size = log2_ring_size;
1546 	/* set in .dev_configure() */
1547 	pmd->cfg.num_c2s_rings = 0;
1548 	pmd->cfg.num_s2c_rings = 0;
1549 
1550 	pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1551 	rte_spinlock_init(&pmd->cc_lock);
1552 
1553 	data = eth_dev->data;
1554 	data->dev_private = pmd;
1555 	data->numa_node = numa_node;
1556 	data->dev_link = pmd_link;
1557 	data->mac_addrs = ether_addr;
1558 	data->promiscuous = 1;
1559 	data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1560 
1561 	eth_dev->dev_ops = &ops;
1562 	eth_dev->device = &vdev->device;
1563 	if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1564 		eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1565 		eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1566 	} else {
1567 		eth_dev->rx_pkt_burst = eth_memif_rx;
1568 		eth_dev->tx_pkt_burst = eth_memif_tx;
1569 	}
1570 
1571 	rte_eth_dev_probing_finish(eth_dev);
1572 
1573 	return 0;
1574 }
1575 
1576 static int
1577 memif_set_role(const char *key __rte_unused, const char *value,
1578 	       void *extra_args)
1579 {
1580 	enum memif_role_t *role = (enum memif_role_t *)extra_args;
1581 
1582 	if (strstr(value, "server") != NULL) {
1583 		*role = MEMIF_ROLE_SERVER;
1584 	} else if (strstr(value, "client") != NULL) {
1585 		*role = MEMIF_ROLE_CLIENT;
1586 	} else if (strstr(value, "master") != NULL) {
1587 		MIF_LOG(NOTICE, "Role argument \"master\" is deprecated, use \"server\"");
1588 		*role = MEMIF_ROLE_SERVER;
1589 	} else if (strstr(value, "slave") != NULL) {
1590 		MIF_LOG(NOTICE, "Role argument \"slave\" is deprecated, use \"client\"");
1591 		*role = MEMIF_ROLE_CLIENT;
1592 	} else {
1593 		MIF_LOG(ERR, "Unknown role: %s.", value);
1594 		return -EINVAL;
1595 	}
1596 	return 0;
1597 }
1598 
1599 static int
1600 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1601 {
1602 	uint32_t *flags = (uint32_t *)extra_args;
1603 
1604 	if (strstr(value, "yes") != NULL) {
1605 		if (!rte_mcfg_get_single_file_segments()) {
1606 			MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1607 			return -ENOTSUP;
1608 		}
1609 		*flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1610 	} else if (strstr(value, "no") != NULL) {
1611 		*flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1612 	} else {
1613 		MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1614 		return -EINVAL;
1615 	}
1616 	return 0;
1617 }
1618 
1619 static int
1620 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1621 {
1622 	memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1623 
1624 	/* even if parsing fails, 0 is a valid id */
1625 	*id = strtoul(value, NULL, 10);
1626 	return 0;
1627 }
1628 
1629 static int
1630 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1631 {
1632 	unsigned long tmp;
1633 	uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1634 
1635 	tmp = strtoul(value, NULL, 10);
1636 	if (tmp == 0 || tmp > 0xFFFF) {
1637 		MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1638 		return -EINVAL;
1639 	}
1640 	*pkt_buffer_size = tmp;
1641 	return 0;
1642 }
1643 
1644 static int
1645 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1646 {
1647 	unsigned long tmp;
1648 	memif_log2_ring_size_t *log2_ring_size =
1649 	    (memif_log2_ring_size_t *)extra_args;
1650 
1651 	tmp = strtoul(value, NULL, 10);
1652 	if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1653 		MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1654 			value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1655 		return -EINVAL;
1656 	}
1657 	*log2_ring_size = tmp;
1658 	return 0;
1659 }
1660 
1661 /* check if directory exists and if we have permission to read/write */
1662 static int
1663 memif_check_socket_filename(const char *filename)
1664 {
1665 	char *dir = NULL, *tmp;
1666 	uint32_t idx;
1667 	int ret = 0;
1668 
1669 	if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1670 		MIF_LOG(ERR, "Unix socket address too long (max 108).");
1671 		return -1;
1672 	}
1673 
1674 	tmp = strrchr(filename, '/');
1675 	if (tmp != NULL) {
1676 		idx = tmp - filename;
1677 		dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1678 		if (dir == NULL) {
1679 			MIF_LOG(ERR, "Failed to allocate memory.");
1680 			return -1;
1681 		}
1682 		strlcpy(dir, filename, sizeof(char) * (idx + 1));
1683 	}
1684 
1685 	if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1686 					W_OK, AT_EACCESS) < 0)) {
1687 		MIF_LOG(ERR, "Invalid socket directory.");
1688 		ret = -EINVAL;
1689 	}
1690 
1691 	if (dir != NULL)
1692 		rte_free(dir);
1693 
1694 	return ret;
1695 }
1696 
1697 static int
1698 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1699 			  void *extra_args)
1700 {
1701 	const char **socket_filename = (const char **)extra_args;
1702 
1703 	*socket_filename = value;
1704 	return 0;
1705 }
1706 
1707 static int
1708 memif_set_is_socket_abstract(const char *key __rte_unused, const char *value, void *extra_args)
1709 {
1710 	uint32_t *flags = (uint32_t *)extra_args;
1711 
1712 	if (strstr(value, "yes") != NULL) {
1713 		*flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1714 	} else if (strstr(value, "no") != NULL) {
1715 		*flags &= ~ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1716 	} else {
1717 		MIF_LOG(ERR, "Failed to parse socket-abstract param: %s.", value);
1718 		return -EINVAL;
1719 	}
1720 	return 0;
1721 }
1722 
1723 static int
1724 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1725 {
1726 	struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1727 
1728 	if (rte_ether_unformat_addr(value, ether_addr) < 0)
1729 		MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1730 	return 0;
1731 }
1732 
1733 static int
1734 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1735 {
1736 	const char **secret = (const char **)extra_args;
1737 
1738 	*secret = value;
1739 	return 0;
1740 }
1741 
1742 static int
1743 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1744 {
1745 	RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1746 	RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1747 	int ret = 0;
1748 	struct rte_kvargs *kvlist;
1749 	const char *name = rte_vdev_device_name(vdev);
1750 	enum memif_role_t role = MEMIF_ROLE_CLIENT;
1751 	memif_interface_id_t id = 0;
1752 	uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1753 	memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1754 	const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1755 	uint32_t flags = 0;
1756 	const char *secret = NULL;
1757 	struct rte_ether_addr *ether_addr = rte_zmalloc("",
1758 		sizeof(struct rte_ether_addr), 0);
1759 	struct rte_eth_dev *eth_dev;
1760 
1761 	rte_eth_random_addr(ether_addr->addr_bytes);
1762 
1763 	MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1764 
1765 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1766 		eth_dev = rte_eth_dev_attach_secondary(name);
1767 		if (!eth_dev) {
1768 			MIF_LOG(ERR, "Failed to probe %s", name);
1769 			return -1;
1770 		}
1771 
1772 		eth_dev->dev_ops = &ops;
1773 		eth_dev->device = &vdev->device;
1774 		eth_dev->rx_pkt_burst = eth_memif_rx;
1775 		eth_dev->tx_pkt_burst = eth_memif_tx;
1776 
1777 		if (!rte_eal_primary_proc_alive(NULL)) {
1778 			MIF_LOG(ERR, "Primary process is missing");
1779 			return -1;
1780 		}
1781 
1782 		eth_dev->process_private = (struct pmd_process_private *)
1783 			rte_zmalloc(name,
1784 				sizeof(struct pmd_process_private),
1785 				RTE_CACHE_LINE_SIZE);
1786 		if (eth_dev->process_private == NULL) {
1787 			MIF_LOG(ERR,
1788 				"Failed to alloc memory for process private");
1789 			return -1;
1790 		}
1791 
1792 		rte_eth_dev_probing_finish(eth_dev);
1793 
1794 		return 0;
1795 	}
1796 
1797 	ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1798 	/*
1799 	 * Primary process can continue probing, but secondary process won't
1800 	 * be able to get memory regions information
1801 	 */
1802 	if (ret < 0 && rte_errno != EEXIST)
1803 		MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1804 			strerror(rte_errno));
1805 
1806 	/* use abstract address by default */
1807 	flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1808 
1809 	kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1810 
1811 	/* parse parameters */
1812 	if (kvlist != NULL) {
1813 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1814 					 &memif_set_role, &role);
1815 		if (ret < 0)
1816 			goto exit;
1817 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1818 					 &memif_set_id, &id);
1819 		if (ret < 0)
1820 			goto exit;
1821 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1822 					 &memif_set_bs, &pkt_buffer_size);
1823 		if (ret < 0)
1824 			goto exit;
1825 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1826 					 &memif_set_rs, &log2_ring_size);
1827 		if (ret < 0)
1828 			goto exit;
1829 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1830 					 &memif_set_socket_filename,
1831 					 (void *)(&socket_filename));
1832 		if (ret < 0)
1833 			goto exit;
1834 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ABSTRACT_ARG,
1835 					 &memif_set_is_socket_abstract, &flags);
1836 		if (ret < 0)
1837 			goto exit;
1838 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1839 					 &memif_set_mac, ether_addr);
1840 		if (ret < 0)
1841 			goto exit;
1842 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1843 					 &memif_set_zc, &flags);
1844 		if (ret < 0)
1845 			goto exit;
1846 		ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1847 					 &memif_set_secret, (void *)(&secret));
1848 		if (ret < 0)
1849 			goto exit;
1850 	}
1851 
1852 	if (!(flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) {
1853 		ret = memif_check_socket_filename(socket_filename);
1854 		if (ret < 0)
1855 			goto exit;
1856 	}
1857 
1858 	/* create interface */
1859 	ret = memif_create(vdev, role, id, flags, socket_filename,
1860 			   log2_ring_size, pkt_buffer_size, secret, ether_addr);
1861 
1862 exit:
1863 	if (kvlist != NULL)
1864 		rte_kvargs_free(kvlist);
1865 	return ret;
1866 }
1867 
1868 static int
1869 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1870 {
1871 	struct rte_eth_dev *eth_dev;
1872 
1873 	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1874 	if (eth_dev == NULL)
1875 		return 0;
1876 
1877 	return rte_eth_dev_close(eth_dev->data->port_id);
1878 }
1879 
1880 static struct rte_vdev_driver pmd_memif_drv = {
1881 	.probe = rte_pmd_memif_probe,
1882 	.remove = rte_pmd_memif_remove,
1883 };
1884 
1885 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1886 
1887 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1888 			      ETH_MEMIF_ID_ARG "=<int>"
1889 			      ETH_MEMIF_ROLE_ARG "=server|client"
1890 			      ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1891 			      ETH_MEMIF_RING_SIZE_ARG "=<int>"
1892 			      ETH_MEMIF_SOCKET_ARG "=<string>"
1893 				  ETH_MEMIF_SOCKET_ABSTRACT_ARG "=yes|no"
1894 			      ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1895 			      ETH_MEMIF_ZC_ARG "=yes|no"
1896 			      ETH_MEMIF_SECRET_ARG "=<string>");
1897 
1898 RTE_LOG_REGISTER_DEFAULT(memif_logtype, NOTICE);
1899