xref: /dpdk/drivers/net/memif/memif_socket.c (revision b53d106d34b5c638f5a2cbdfee0da5bd42d4383f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4 
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/ioctl.h>
11 #include <errno.h>
12 
13 #include <rte_version.h>
14 #include <rte_mbuf.h>
15 #include <rte_ether.h>
16 #include <ethdev_driver.h>
17 #include <ethdev_vdev.h>
18 #include <rte_malloc.h>
19 #include <rte_kvargs.h>
20 #include <rte_bus_vdev.h>
21 #include <rte_hash.h>
22 #include <rte_jhash.h>
23 #include <rte_string_fns.h>
24 
25 #include "rte_eth_memif.h"
26 #include "memif_socket.h"
27 
28 static void memif_intr_handler(void *arg);
29 
30 static ssize_t
31 memif_msg_send(int fd, memif_msg_t *msg, int afd)
32 {
33 	struct msghdr mh = { 0 };
34 	struct iovec iov[1];
35 	struct cmsghdr *cmsg;
36 	char ctl[CMSG_SPACE(sizeof(int))];
37 
38 	iov[0].iov_base = msg;
39 	iov[0].iov_len = sizeof(memif_msg_t);
40 	mh.msg_iov = iov;
41 	mh.msg_iovlen = 1;
42 
43 	if (afd > 0) {
44 		memset(&ctl, 0, sizeof(ctl));
45 		mh.msg_control = ctl;
46 		mh.msg_controllen = sizeof(ctl);
47 		cmsg = CMSG_FIRSTHDR(&mh);
48 		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
49 		cmsg->cmsg_level = SOL_SOCKET;
50 		cmsg->cmsg_type = SCM_RIGHTS;
51 		rte_memcpy(CMSG_DATA(cmsg), &afd, sizeof(int));
52 	}
53 
54 	return sendmsg(fd, &mh, 0);
55 }
56 
57 static int
58 memif_msg_send_from_queue(struct memif_control_channel *cc)
59 {
60 	ssize_t size;
61 	int ret = 0;
62 	struct memif_msg_queue_elt *e;
63 
64 	e = TAILQ_FIRST(&cc->msg_queue);
65 	if (e == NULL)
66 		return 0;
67 
68 	if (rte_intr_fd_get(cc->intr_handle) < 0)
69 		return -1;
70 
71 	size = memif_msg_send(rte_intr_fd_get(cc->intr_handle), &e->msg,
72 			      e->fd);
73 	if (size != sizeof(memif_msg_t)) {
74 		MIF_LOG(ERR, "sendmsg fail: %s.", strerror(errno));
75 		ret = -1;
76 	} else {
77 		MIF_LOG(DEBUG, "Sent msg type %u.", e->msg.type);
78 	}
79 	TAILQ_REMOVE(&cc->msg_queue, e, next);
80 	rte_free(e);
81 
82 	return ret;
83 }
84 
85 static struct memif_msg_queue_elt *
86 memif_msg_enq(struct memif_control_channel *cc)
87 {
88 	struct memif_msg_queue_elt *e;
89 
90 	e = rte_zmalloc("memif_msg", sizeof(struct memif_msg_queue_elt), 0);
91 	if (e == NULL) {
92 		MIF_LOG(ERR, "Failed to allocate control message.");
93 		return NULL;
94 	}
95 
96 	e->fd = -1;
97 	TAILQ_INSERT_TAIL(&cc->msg_queue, e, next);
98 
99 	return e;
100 }
101 
102 void
103 memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason,
104 			 int err_code)
105 {
106 	struct memif_msg_queue_elt *e;
107 	struct pmd_internals *pmd;
108 	memif_msg_disconnect_t *d;
109 
110 	if (cc == NULL) {
111 		MIF_LOG(DEBUG, "Missing control channel.");
112 		return;
113 	}
114 
115 	e = memif_msg_enq(cc);
116 	if (e == NULL) {
117 		MIF_LOG(WARNING, "Failed to enqueue disconnect message.");
118 		return;
119 	}
120 
121 	d = &e->msg.disconnect;
122 
123 	e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
124 	d->code = err_code;
125 
126 	if (reason != NULL) {
127 		strlcpy((char *)d->string, reason, sizeof(d->string));
128 		if (cc->dev != NULL) {
129 			pmd = cc->dev->data->dev_private;
130 			strlcpy(pmd->local_disc_string, reason,
131 				sizeof(pmd->local_disc_string));
132 		}
133 	}
134 }
135 
136 static int
137 memif_msg_enq_hello(struct memif_control_channel *cc)
138 {
139 	struct memif_msg_queue_elt *e = memif_msg_enq(cc);
140 	memif_msg_hello_t *h;
141 
142 	if (e == NULL)
143 		return -1;
144 
145 	h = &e->msg.hello;
146 
147 	e->msg.type = MEMIF_MSG_TYPE_HELLO;
148 	h->min_version = MEMIF_VERSION;
149 	h->max_version = MEMIF_VERSION;
150 	h->max_c2s_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
151 	h->max_s2c_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
152 	h->max_region = ETH_MEMIF_MAX_REGION_NUM - 1;
153 	h->max_log2_ring_size = ETH_MEMIF_MAX_LOG2_RING_SIZE;
154 
155 	strlcpy((char *)h->name, rte_version(), sizeof(h->name));
156 
157 	return 0;
158 }
159 
160 static int
161 memif_msg_receive_hello(struct rte_eth_dev *dev, memif_msg_t *msg)
162 {
163 	struct pmd_internals *pmd = dev->data->dev_private;
164 	memif_msg_hello_t *h = &msg->hello;
165 
166 	if (h->min_version > MEMIF_VERSION || h->max_version < MEMIF_VERSION) {
167 		memif_msg_enq_disconnect(pmd->cc, "Incompatible memif version", 0);
168 		return -1;
169 	}
170 
171 	/* Set parameters for active connection */
172 	pmd->run.num_c2s_rings = RTE_MIN(h->max_c2s_ring + 1,
173 					   pmd->cfg.num_c2s_rings);
174 	pmd->run.num_s2c_rings = RTE_MIN(h->max_s2c_ring + 1,
175 					   pmd->cfg.num_s2c_rings);
176 	pmd->run.log2_ring_size = RTE_MIN(h->max_log2_ring_size,
177 					    pmd->cfg.log2_ring_size);
178 	pmd->run.pkt_buffer_size = pmd->cfg.pkt_buffer_size;
179 
180 	strlcpy(pmd->remote_name, (char *)h->name, sizeof(pmd->remote_name));
181 
182 	MIF_LOG(DEBUG, "Connecting to %s.", pmd->remote_name);
183 
184 	return 0;
185 }
186 
187 static int
188 memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t *msg)
189 {
190 	memif_msg_init_t *i = &msg->init;
191 	struct memif_socket_dev_list_elt *elt;
192 	struct pmd_internals *pmd;
193 	struct rte_eth_dev *dev;
194 
195 	if (i->version != MEMIF_VERSION) {
196 		memif_msg_enq_disconnect(cc, "Incompatible memif version", 0);
197 		return -1;
198 	}
199 
200 	if (cc->socket == NULL) {
201 		memif_msg_enq_disconnect(cc, "Device error", 0);
202 		return -1;
203 	}
204 
205 	/* Find device with requested ID */
206 	TAILQ_FOREACH(elt, &cc->socket->dev_queue, next) {
207 		dev = elt->dev;
208 		pmd = dev->data->dev_private;
209 		if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0) &&
210 		    (pmd->id == i->id) && (pmd->role == MEMIF_ROLE_SERVER)) {
211 			if (pmd->flags & (ETH_MEMIF_FLAG_CONNECTING |
212 					   ETH_MEMIF_FLAG_CONNECTED)) {
213 				memif_msg_enq_disconnect(cc,
214 							 "Already connected", 0);
215 				return -1;
216 			}
217 
218 			/* assign control channel to device */
219 			cc->dev = dev;
220 			pmd->cc = cc;
221 
222 			if (i->mode != MEMIF_INTERFACE_MODE_ETHERNET) {
223 				memif_msg_enq_disconnect(pmd->cc,
224 							 "Only ethernet mode supported",
225 							 0);
226 				return -1;
227 			}
228 
229 			strlcpy(pmd->remote_name, (char *)i->name,
230 				sizeof(pmd->remote_name));
231 
232 			if (*pmd->secret != '\0') {
233 				if (*i->secret == '\0') {
234 					memif_msg_enq_disconnect(pmd->cc,
235 								 "Secret required", 0);
236 					return -1;
237 				}
238 				if (strncmp(pmd->secret, (char *)i->secret,
239 						ETH_MEMIF_SECRET_SIZE) != 0) {
240 					memif_msg_enq_disconnect(pmd->cc,
241 								 "Incorrect secret", 0);
242 					return -1;
243 				}
244 			}
245 
246 			pmd->flags |= ETH_MEMIF_FLAG_CONNECTING;
247 			return 0;
248 		}
249 	}
250 
251 	/* ID not found on this socket */
252 	MIF_LOG(DEBUG, "ID %u not found.", i->id);
253 	memif_msg_enq_disconnect(cc, "ID not found", 0);
254 	return -1;
255 }
256 
257 static int
258 memif_msg_receive_add_region(struct rte_eth_dev *dev, memif_msg_t *msg,
259 			     int fd)
260 {
261 	struct pmd_internals *pmd = dev->data->dev_private;
262 	struct pmd_process_private *proc_private = dev->process_private;
263 	memif_msg_add_region_t *ar = &msg->add_region;
264 	struct memif_region *r;
265 
266 	if (fd < 0) {
267 		memif_msg_enq_disconnect(pmd->cc, "Missing region fd", 0);
268 		return -1;
269 	}
270 
271 	if (ar->index >= ETH_MEMIF_MAX_REGION_NUM ||
272 			ar->index != proc_private->regions_num ||
273 			proc_private->regions[ar->index] != NULL) {
274 		memif_msg_enq_disconnect(pmd->cc, "Invalid region index", 0);
275 		return -1;
276 	}
277 
278 	r = rte_zmalloc("region", sizeof(struct memif_region), 0);
279 	if (r == NULL) {
280 		memif_msg_enq_disconnect(pmd->cc, "Failed to alloc memif region.", 0);
281 		return -ENOMEM;
282 	}
283 
284 	r->fd = fd;
285 	r->region_size = ar->size;
286 	r->addr = NULL;
287 
288 	proc_private->regions[ar->index] = r;
289 	proc_private->regions_num++;
290 
291 	return 0;
292 }
293 
294 static int
295 memif_msg_receive_add_ring(struct rte_eth_dev *dev, memif_msg_t *msg, int fd)
296 {
297 	struct pmd_internals *pmd = dev->data->dev_private;
298 	memif_msg_add_ring_t *ar = &msg->add_ring;
299 	struct memif_queue *mq;
300 
301 	if (fd < 0) {
302 		memif_msg_enq_disconnect(pmd->cc, "Missing interrupt fd", 0);
303 		return -1;
304 	}
305 
306 	/* check if we have enough queues */
307 	if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_C2S) {
308 		if (ar->index >= pmd->cfg.num_c2s_rings) {
309 			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0);
310 			return -1;
311 		}
312 		pmd->run.num_c2s_rings++;
313 	} else {
314 		if (ar->index >= pmd->cfg.num_s2c_rings) {
315 			memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0);
316 			return -1;
317 		}
318 		pmd->run.num_s2c_rings++;
319 	}
320 
321 	mq = (ar->flags & MEMIF_MSG_ADD_RING_FLAG_C2S) ?
322 	    dev->data->rx_queues[ar->index] : dev->data->tx_queues[ar->index];
323 
324 	if (rte_intr_fd_set(mq->intr_handle, fd))
325 		return -1;
326 
327 	mq->log2_ring_size = ar->log2_ring_size;
328 	mq->region = ar->region;
329 	mq->ring_offset = ar->offset;
330 
331 	return 0;
332 }
333 
334 static int
335 memif_msg_receive_connect(struct rte_eth_dev *dev, memif_msg_t *msg)
336 {
337 	struct pmd_internals *pmd = dev->data->dev_private;
338 	memif_msg_connect_t *c = &msg->connect;
339 	int ret;
340 
341 	ret = memif_connect(dev);
342 	if (ret < 0)
343 		return ret;
344 
345 	strlcpy(pmd->remote_if_name, (char *)c->if_name,
346 		sizeof(pmd->remote_if_name));
347 	MIF_LOG(INFO, "Remote interface %s connected.", pmd->remote_if_name);
348 
349 	return 0;
350 }
351 
352 static int
353 memif_msg_receive_connected(struct rte_eth_dev *dev, memif_msg_t *msg)
354 {
355 	struct pmd_internals *pmd = dev->data->dev_private;
356 	memif_msg_connected_t *c = &msg->connected;
357 	int ret;
358 
359 	ret = memif_connect(dev);
360 	if (ret < 0)
361 		return ret;
362 
363 	strlcpy(pmd->remote_if_name, (char *)c->if_name,
364 		sizeof(pmd->remote_if_name));
365 	MIF_LOG(INFO, "Remote interface %s connected.", pmd->remote_if_name);
366 
367 	return 0;
368 }
369 
370 static int
371 memif_msg_receive_disconnect(struct rte_eth_dev *dev, memif_msg_t *msg)
372 {
373 	struct pmd_internals *pmd = dev->data->dev_private;
374 	memif_msg_disconnect_t *d = &msg->disconnect;
375 
376 	memset(pmd->remote_disc_string, 0, sizeof(pmd->remote_disc_string));
377 	strlcpy(pmd->remote_disc_string, (char *)d->string,
378 		sizeof(pmd->remote_disc_string));
379 
380 	MIF_LOG(INFO, "Disconnect received: %s", pmd->remote_disc_string);
381 
382 	memset(pmd->local_disc_string, 0, 96);
383 	memif_disconnect(dev);
384 	return 0;
385 }
386 
387 static int
388 memif_msg_enq_ack(struct rte_eth_dev *dev)
389 {
390 	struct pmd_internals *pmd = dev->data->dev_private;
391 	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
392 	if (e == NULL)
393 		return -1;
394 
395 	e->msg.type = MEMIF_MSG_TYPE_ACK;
396 
397 	return 0;
398 }
399 
400 static int
401 memif_msg_enq_init(struct rte_eth_dev *dev)
402 {
403 	struct pmd_internals *pmd = dev->data->dev_private;
404 	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
405 	memif_msg_init_t *i = &e->msg.init;
406 
407 	if (e == NULL)
408 		return -1;
409 
410 	i = &e->msg.init;
411 	e->msg.type = MEMIF_MSG_TYPE_INIT;
412 	i->version = MEMIF_VERSION;
413 	i->id = pmd->id;
414 	i->mode = MEMIF_INTERFACE_MODE_ETHERNET;
415 
416 	strlcpy((char *)i->name, rte_version(), sizeof(i->name));
417 
418 	if (*pmd->secret != '\0')
419 		strlcpy((char *)i->secret, pmd->secret, sizeof(i->secret));
420 
421 	return 0;
422 }
423 
424 static int
425 memif_msg_enq_add_region(struct rte_eth_dev *dev, uint8_t idx)
426 {
427 	struct pmd_internals *pmd = dev->data->dev_private;
428 	struct pmd_process_private *proc_private = dev->process_private;
429 	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
430 	memif_msg_add_region_t *ar;
431 	struct memif_region *mr = proc_private->regions[idx];
432 
433 	if (e == NULL)
434 		return -1;
435 
436 	ar = &e->msg.add_region;
437 	e->msg.type = MEMIF_MSG_TYPE_ADD_REGION;
438 	e->fd = mr->fd;
439 	ar->index = idx;
440 	ar->size = mr->region_size;
441 
442 	return 0;
443 }
444 
445 static int
446 memif_msg_enq_add_ring(struct rte_eth_dev *dev, uint8_t idx,
447 		       memif_ring_type_t type)
448 {
449 	struct pmd_internals *pmd = dev->data->dev_private;
450 	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
451 	struct memif_queue *mq;
452 	memif_msg_add_ring_t *ar;
453 
454 	if (e == NULL)
455 		return -1;
456 
457 	ar = &e->msg.add_ring;
458 	mq = (type == MEMIF_RING_C2S) ? dev->data->tx_queues[idx] :
459 	    dev->data->rx_queues[idx];
460 
461 	e->msg.type = MEMIF_MSG_TYPE_ADD_RING;
462 	e->fd = rte_intr_fd_get(mq->intr_handle);
463 	ar->index = idx;
464 	ar->offset = mq->ring_offset;
465 	ar->region = mq->region;
466 	ar->log2_ring_size = mq->log2_ring_size;
467 	ar->flags = (type == MEMIF_RING_C2S) ? MEMIF_MSG_ADD_RING_FLAG_C2S : 0;
468 	ar->private_hdr_size = 0;
469 
470 	return 0;
471 }
472 
473 static int
474 memif_msg_enq_connect(struct rte_eth_dev *dev)
475 {
476 	struct pmd_internals *pmd = dev->data->dev_private;
477 	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
478 	memif_msg_connect_t *c;
479 
480 	if (e == NULL)
481 		return -1;
482 
483 	c = &e->msg.connect;
484 	e->msg.type = MEMIF_MSG_TYPE_CONNECT;
485 	strlcpy((char *)c->if_name, dev->data->name, sizeof(c->if_name));
486 
487 	return 0;
488 }
489 
490 static int
491 memif_msg_enq_connected(struct rte_eth_dev *dev)
492 {
493 	struct pmd_internals *pmd = dev->data->dev_private;
494 	struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
495 	memif_msg_connected_t *c;
496 
497 	if (e == NULL)
498 		return -1;
499 
500 	c = &e->msg.connected;
501 	e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
502 	strlcpy((char *)c->if_name, dev->data->name, sizeof(c->if_name));
503 
504 	return 0;
505 }
506 
507 static void
508 memif_intr_unregister_handler(struct rte_intr_handle *intr_handle, void *arg)
509 {
510 	struct memif_msg_queue_elt *elt;
511 	struct memif_control_channel *cc = arg;
512 
513 	/* close control channel fd */
514 	if (rte_intr_fd_get(intr_handle) >= 0)
515 		close(rte_intr_fd_get(intr_handle));
516 	/* clear message queue */
517 	while ((elt = TAILQ_FIRST(&cc->msg_queue)) != NULL) {
518 		TAILQ_REMOVE(&cc->msg_queue, elt, next);
519 		rte_free(elt);
520 	}
521 	rte_intr_instance_free(cc->intr_handle);
522 	/* free control channel */
523 	rte_free(cc);
524 }
525 
526 void
527 memif_disconnect(struct rte_eth_dev *dev)
528 {
529 	struct pmd_internals *pmd = dev->data->dev_private;
530 	struct memif_msg_queue_elt *elt, *next;
531 	struct memif_queue *mq;
532 	struct rte_intr_handle *ih;
533 	int i;
534 	int ret;
535 
536 	dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
537 	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
538 	pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTED;
539 
540 	rte_spinlock_lock(&pmd->cc_lock);
541 	if (pmd->cc != NULL) {
542 		/* Clear control message queue (except disconnect message if any). */
543 		for (elt = TAILQ_FIRST(&pmd->cc->msg_queue); elt != NULL; elt = next) {
544 			next = TAILQ_NEXT(elt, next);
545 			if (elt->msg.type != MEMIF_MSG_TYPE_DISCONNECT) {
546 				TAILQ_REMOVE(&pmd->cc->msg_queue, elt, next);
547 				rte_free(elt);
548 			}
549 		}
550 		/* send disconnect message (if there is any in queue) */
551 		memif_msg_send_from_queue(pmd->cc);
552 
553 		/* at this point, there should be no more messages in queue */
554 		if (TAILQ_FIRST(&pmd->cc->msg_queue) != NULL) {
555 			MIF_LOG(WARNING,
556 				"Unexpected message(s) in message queue.");
557 		}
558 
559 		ih = pmd->cc->intr_handle;
560 		if (rte_intr_fd_get(ih) > 0) {
561 			ret = rte_intr_callback_unregister(ih,
562 							memif_intr_handler,
563 							pmd->cc);
564 			/*
565 			 * If callback is active (disconnecting based on
566 			 * received control message).
567 			 */
568 			if (ret == -EAGAIN) {
569 				ret = rte_intr_callback_unregister_pending(ih,
570 							memif_intr_handler,
571 							pmd->cc,
572 							memif_intr_unregister_handler);
573 			} else if (ret > 0) {
574 				close(rte_intr_fd_get(ih));
575 				rte_intr_instance_free(ih);
576 				rte_free(pmd->cc);
577 			}
578 			pmd->cc = NULL;
579 			if (ret <= 0)
580 				MIF_LOG(WARNING,
581 					"Failed to unregister control channel callback.");
582 		}
583 	}
584 	rte_spinlock_unlock(&pmd->cc_lock);
585 
586 	/* unconfig interrupts */
587 	for (i = 0; i < pmd->cfg.num_c2s_rings; i++) {
588 		if (pmd->role == MEMIF_ROLE_CLIENT) {
589 			if (dev->data->tx_queues != NULL)
590 				mq = dev->data->tx_queues[i];
591 			else
592 				continue;
593 		} else {
594 			if (dev->data->rx_queues != NULL)
595 				mq = dev->data->rx_queues[i];
596 			else
597 				continue;
598 		}
599 
600 		if (rte_intr_fd_get(mq->intr_handle) > 0) {
601 			close(rte_intr_fd_get(mq->intr_handle));
602 			rte_intr_fd_set(mq->intr_handle, -1);
603 		}
604 	}
605 	for (i = 0; i < pmd->cfg.num_s2c_rings; i++) {
606 		if (pmd->role == MEMIF_ROLE_SERVER) {
607 			if (dev->data->tx_queues != NULL)
608 				mq = dev->data->tx_queues[i];
609 			else
610 				continue;
611 		} else {
612 			if (dev->data->rx_queues != NULL)
613 				mq = dev->data->rx_queues[i];
614 			else
615 				continue;
616 		}
617 
618 		if (rte_intr_fd_get(mq->intr_handle) > 0) {
619 			close(rte_intr_fd_get(mq->intr_handle));
620 			rte_intr_fd_set(mq->intr_handle, -1);
621 		}
622 	}
623 
624 	memif_free_regions(dev);
625 
626 	/* reset connection configuration */
627 	memset(&pmd->run, 0, sizeof(pmd->run));
628 
629 	MIF_LOG(DEBUG, "Disconnected, id: %d, role: %s.", pmd->id,
630 		(pmd->role == MEMIF_ROLE_SERVER) ? "server" : "client");
631 }
632 
633 static int
634 memif_msg_receive(struct memif_control_channel *cc)
635 {
636 	char ctl[CMSG_SPACE(sizeof(int)) +
637 		 CMSG_SPACE(sizeof(struct ucred))] = { 0 };
638 	struct msghdr mh = { 0 };
639 	struct iovec iov[1];
640 	memif_msg_t msg = { 0 };
641 	ssize_t size;
642 	int ret = 0;
643 	struct ucred *cr __rte_unused;
644 	cr = 0;
645 	struct cmsghdr *cmsg;
646 	int afd = -1;
647 	int i;
648 	struct pmd_internals *pmd;
649 	struct pmd_process_private *proc_private;
650 
651 	iov[0].iov_base = (void *)&msg;
652 	iov[0].iov_len = sizeof(memif_msg_t);
653 	mh.msg_iov = iov;
654 	mh.msg_iovlen = 1;
655 	mh.msg_control = ctl;
656 	mh.msg_controllen = sizeof(ctl);
657 
658 	if (rte_intr_fd_get(cc->intr_handle) < 0)
659 		return -1;
660 
661 	size = recvmsg(rte_intr_fd_get(cc->intr_handle), &mh, 0);
662 	if (size != sizeof(memif_msg_t)) {
663 		MIF_LOG(DEBUG, "Invalid message size = %zd", size);
664 		if (size > 0)
665 			/* 0 means end-of-file, negative size means error,
666 			 * don't send further disconnect message in such cases.
667 			 */
668 			memif_msg_enq_disconnect(cc, "Invalid message size", 0);
669 		return -1;
670 	}
671 	MIF_LOG(DEBUG, "Received msg type: %u.", msg.type);
672 
673 	cmsg = CMSG_FIRSTHDR(&mh);
674 	while (cmsg) {
675 		if (cmsg->cmsg_level == SOL_SOCKET) {
676 			if (cmsg->cmsg_type == SCM_CREDENTIALS)
677 				cr = (struct ucred *)CMSG_DATA(cmsg);
678 			else if (cmsg->cmsg_type == SCM_RIGHTS)
679 				rte_memcpy(&afd, CMSG_DATA(cmsg), sizeof(int));
680 		}
681 		cmsg = CMSG_NXTHDR(&mh, cmsg);
682 	}
683 
684 	if (cc->dev == NULL && msg.type != MEMIF_MSG_TYPE_INIT) {
685 		MIF_LOG(DEBUG, "Unexpected message.");
686 		memif_msg_enq_disconnect(cc, "Unexpected message", 0);
687 		return -1;
688 	}
689 
690 	/* get device from hash data */
691 	switch (msg.type) {
692 	case MEMIF_MSG_TYPE_ACK:
693 		break;
694 	case MEMIF_MSG_TYPE_HELLO:
695 		ret = memif_msg_receive_hello(cc->dev, &msg);
696 		if (ret < 0)
697 			goto exit;
698 		ret = memif_init_regions_and_queues(cc->dev);
699 		if (ret < 0)
700 			goto exit;
701 		ret = memif_msg_enq_init(cc->dev);
702 		if (ret < 0)
703 			goto exit;
704 		pmd = cc->dev->data->dev_private;
705 		proc_private = cc->dev->process_private;
706 		for (i = 0; i < proc_private->regions_num; i++) {
707 			ret = memif_msg_enq_add_region(cc->dev, i);
708 			if (ret < 0)
709 				goto exit;
710 		}
711 		for (i = 0; i < pmd->run.num_c2s_rings; i++) {
712 			ret = memif_msg_enq_add_ring(cc->dev, i,
713 						     MEMIF_RING_C2S);
714 			if (ret < 0)
715 				goto exit;
716 		}
717 		for (i = 0; i < pmd->run.num_s2c_rings; i++) {
718 			ret = memif_msg_enq_add_ring(cc->dev, i,
719 						     MEMIF_RING_S2C);
720 			if (ret < 0)
721 				goto exit;
722 		}
723 		ret = memif_msg_enq_connect(cc->dev);
724 		if (ret < 0)
725 			goto exit;
726 		break;
727 	case MEMIF_MSG_TYPE_INIT:
728 		/*
729 		 * This cc does not have an interface asociated with it.
730 		 * If suitable interface is found it will be assigned here.
731 		 */
732 		ret = memif_msg_receive_init(cc, &msg);
733 		if (ret < 0)
734 			goto exit;
735 		ret = memif_msg_enq_ack(cc->dev);
736 		if (ret < 0)
737 			goto exit;
738 		break;
739 	case MEMIF_MSG_TYPE_ADD_REGION:
740 		ret = memif_msg_receive_add_region(cc->dev, &msg, afd);
741 		if (ret < 0)
742 			goto exit;
743 		ret = memif_msg_enq_ack(cc->dev);
744 		if (ret < 0)
745 			goto exit;
746 		break;
747 	case MEMIF_MSG_TYPE_ADD_RING:
748 		ret = memif_msg_receive_add_ring(cc->dev, &msg, afd);
749 		if (ret < 0)
750 			goto exit;
751 		ret = memif_msg_enq_ack(cc->dev);
752 		if (ret < 0)
753 			goto exit;
754 		break;
755 	case MEMIF_MSG_TYPE_CONNECT:
756 		ret = memif_msg_receive_connect(cc->dev, &msg);
757 		if (ret < 0)
758 			goto exit;
759 		ret = memif_msg_enq_connected(cc->dev);
760 		if (ret < 0)
761 			goto exit;
762 		break;
763 	case MEMIF_MSG_TYPE_CONNECTED:
764 		ret = memif_msg_receive_connected(cc->dev, &msg);
765 		break;
766 	case MEMIF_MSG_TYPE_DISCONNECT:
767 		ret = memif_msg_receive_disconnect(cc->dev, &msg);
768 		if (ret < 0)
769 			goto exit;
770 		break;
771 	default:
772 		memif_msg_enq_disconnect(cc, "Unknown message type", 0);
773 		ret = -1;
774 		goto exit;
775 	}
776 
777  exit:
778 	return ret;
779 }
780 
781 static void
782 memif_intr_handler(void *arg)
783 {
784 	struct memif_control_channel *cc = arg;
785 	int ret;
786 
787 	ret = memif_msg_receive(cc);
788 	/* if driver failed to assign device */
789 	if (cc->dev == NULL) {
790 		memif_msg_send_from_queue(cc);
791 		ret = rte_intr_callback_unregister_pending(cc->intr_handle,
792 							   memif_intr_handler,
793 							   cc,
794 							   memif_intr_unregister_handler);
795 		if (ret < 0)
796 			MIF_LOG(WARNING,
797 				"Failed to unregister control channel callback.");
798 		return;
799 	}
800 	/* if memif_msg_receive failed */
801 	if (ret < 0)
802 		goto disconnect;
803 
804 	ret = memif_msg_send_from_queue(cc);
805 	if (ret < 0)
806 		goto disconnect;
807 
808 	return;
809 
810  disconnect:
811 	if (cc->dev == NULL) {
812 		MIF_LOG(WARNING, "eth dev not allocated");
813 		return;
814 	}
815 	memif_disconnect(cc->dev);
816 }
817 
818 static void
819 memif_listener_handler(void *arg)
820 {
821 	struct memif_socket *socket = arg;
822 	int sockfd;
823 	int addr_len;
824 	struct sockaddr_un client;
825 	struct memif_control_channel *cc;
826 	int ret;
827 
828 	addr_len = sizeof(client);
829 	sockfd = accept(rte_intr_fd_get(socket->intr_handle),
830 			(struct sockaddr *)&client, (socklen_t *)&addr_len);
831 	if (sockfd < 0) {
832 		MIF_LOG(ERR,
833 			"Failed to accept connection request on socket fd %d",
834 			rte_intr_fd_get(socket->intr_handle));
835 		return;
836 	}
837 
838 	MIF_LOG(DEBUG, "%s: Connection request accepted.", socket->filename);
839 
840 	cc = rte_zmalloc("memif-cc", sizeof(struct memif_control_channel), 0);
841 	if (cc == NULL) {
842 		MIF_LOG(ERR, "Failed to allocate control channel.");
843 		goto error;
844 	}
845 
846 	/* Allocate interrupt instance */
847 	cc->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
848 	if (cc->intr_handle == NULL) {
849 		MIF_LOG(ERR, "Failed to allocate intr handle");
850 		goto error;
851 	}
852 
853 	if (rte_intr_fd_set(cc->intr_handle, sockfd))
854 		goto error;
855 
856 	if (rte_intr_type_set(cc->intr_handle, RTE_INTR_HANDLE_EXT))
857 		goto error;
858 
859 	cc->socket = socket;
860 	cc->dev = NULL;
861 	TAILQ_INIT(&cc->msg_queue);
862 
863 	ret = rte_intr_callback_register(cc->intr_handle, memif_intr_handler,
864 					 cc);
865 	if (ret < 0) {
866 		MIF_LOG(ERR, "Failed to register control channel callback.");
867 		goto error;
868 	}
869 
870 	ret = memif_msg_enq_hello(cc);
871 	if (ret < 0) {
872 		MIF_LOG(ERR, "Failed to enqueue hello message.");
873 		goto error;
874 	}
875 	ret = memif_msg_send_from_queue(cc);
876 	if (ret < 0)
877 		goto error;
878 
879 	return;
880 
881  error:
882 	if (sockfd >= 0) {
883 		close(sockfd);
884 		sockfd = -1;
885 	}
886 	if (cc != NULL) {
887 		rte_intr_instance_free(cc->intr_handle);
888 		rte_free(cc);
889 	}
890 }
891 
892 static struct memif_socket *
893 memif_socket_create(char *key, uint8_t listener, bool is_abstract)
894 {
895 	struct memif_socket *sock;
896 	struct sockaddr_un un = { 0 };
897 	uint32_t sunlen;
898 	int sockfd;
899 	int ret;
900 	int on = 1;
901 
902 	sock = rte_zmalloc("memif-socket", sizeof(struct memif_socket), 0);
903 	if (sock == NULL) {
904 		MIF_LOG(ERR, "Failed to allocate memory for memif socket");
905 		return NULL;
906 	}
907 
908 	sock->listener = listener;
909 	strlcpy(sock->filename, key, MEMIF_SOCKET_UN_SIZE);
910 	TAILQ_INIT(&sock->dev_queue);
911 
912 	if (listener != 0) {
913 		sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
914 		if (sockfd < 0)
915 			goto error;
916 
917 		un.sun_family = AF_UNIX;
918 		if (is_abstract) {
919 			/* abstract address */
920 			un.sun_path[0] = '\0';
921 			strlcpy(un.sun_path + 1, sock->filename, MEMIF_SOCKET_UN_SIZE - 1);
922 			sunlen = RTE_MIN(1 + strlen(sock->filename),
923 					 MEMIF_SOCKET_UN_SIZE) +
924 				 sizeof(un) - sizeof(un.sun_path);
925 		} else {
926 			sunlen = sizeof(un);
927 			strlcpy(un.sun_path, sock->filename, MEMIF_SOCKET_UN_SIZE);
928 		}
929 
930 		ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSCRED, &on,
931 				 sizeof(on));
932 		if (ret < 0)
933 			goto error;
934 
935 		ret = bind(sockfd, (struct sockaddr *)&un, sunlen);
936 		if (ret < 0)
937 			goto error;
938 
939 		ret = listen(sockfd, 1);
940 		if (ret < 0)
941 			goto error;
942 
943 		MIF_LOG(DEBUG, "Memif listener socket %s created.", sock->filename);
944 
945 		/* Allocate interrupt instance */
946 		sock->intr_handle =
947 			rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
948 		if (sock->intr_handle == NULL) {
949 			MIF_LOG(ERR, "Failed to allocate intr handle");
950 			goto error;
951 		}
952 
953 		if (rte_intr_fd_set(sock->intr_handle, sockfd))
954 			goto error;
955 
956 		if (rte_intr_type_set(sock->intr_handle, RTE_INTR_HANDLE_EXT))
957 			goto error;
958 
959 		ret = rte_intr_callback_register(sock->intr_handle,
960 						 memif_listener_handler, sock);
961 		if (ret < 0) {
962 			MIF_LOG(ERR, "Failed to register interrupt "
963 				"callback for listener socket");
964 			return NULL;
965 		}
966 	}
967 
968 	return sock;
969 
970  error:
971 	MIF_LOG(ERR, "Failed to setup socket %s: %s", key, strerror(errno));
972 	if (sock != NULL) {
973 		rte_intr_instance_free(sock->intr_handle);
974 		rte_free(sock);
975 	}
976 	if (sockfd >= 0)
977 		close(sockfd);
978 	return NULL;
979 }
980 
981 static struct rte_hash *
982 memif_create_socket_hash(void)
983 {
984 	struct rte_hash_parameters params = { 0 };
985 
986 	params.name = MEMIF_SOCKET_HASH_NAME;
987 	params.entries = 256;
988 	params.key_len = MEMIF_SOCKET_UN_SIZE;
989 	params.hash_func = rte_jhash;
990 	params.hash_func_init_val = 0;
991 	params.socket_id = SOCKET_ID_ANY;
992 	return rte_hash_create(&params);
993 }
994 
995 int
996 memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename)
997 {
998 	struct pmd_internals *pmd = dev->data->dev_private;
999 	struct memif_socket *socket = NULL;
1000 	struct memif_socket_dev_list_elt *elt;
1001 	struct pmd_internals *tmp_pmd;
1002 	struct rte_hash *hash;
1003 	int ret;
1004 	char key[MEMIF_SOCKET_UN_SIZE];
1005 
1006 	hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
1007 	if (hash == NULL) {
1008 		hash = memif_create_socket_hash();
1009 		if (hash == NULL) {
1010 			MIF_LOG(ERR, "Failed to create memif socket hash.");
1011 			return -1;
1012 		}
1013 	}
1014 
1015 	memset(key, 0, MEMIF_SOCKET_UN_SIZE);
1016 	strlcpy(key, socket_filename, MEMIF_SOCKET_UN_SIZE);
1017 	ret = rte_hash_lookup_data(hash, key, (void **)&socket);
1018 	if (ret < 0) {
1019 		socket = memif_socket_create(key,
1020 			(pmd->role == MEMIF_ROLE_CLIENT) ? 0 : 1,
1021 			pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT);
1022 		if (socket == NULL)
1023 			return -1;
1024 		ret = rte_hash_add_key_data(hash, key, socket);
1025 		if (ret < 0) {
1026 			MIF_LOG(ERR, "Failed to add socket to socket hash.");
1027 			return ret;
1028 		}
1029 	}
1030 	pmd->socket_filename = socket->filename;
1031 
1032 	TAILQ_FOREACH(elt, &socket->dev_queue, next) {
1033 		tmp_pmd = elt->dev->data->dev_private;
1034 		if (tmp_pmd->id == pmd->id && tmp_pmd->role == pmd->role) {
1035 			MIF_LOG(ERR, "Two interfaces with the same id (%d) can "
1036 				"not have the same role.", pmd->id);
1037 			return -1;
1038 		}
1039 	}
1040 
1041 	elt = rte_malloc("pmd-queue", sizeof(struct memif_socket_dev_list_elt), 0);
1042 	if (elt == NULL) {
1043 		MIF_LOG(ERR, "Failed to add device to socket device list.");
1044 		return -1;
1045 	}
1046 	elt->dev = dev;
1047 	TAILQ_INSERT_TAIL(&socket->dev_queue, elt, next);
1048 
1049 	return 0;
1050 }
1051 
1052 void
1053 memif_socket_remove_device(struct rte_eth_dev *dev)
1054 {
1055 	struct pmd_internals *pmd = dev->data->dev_private;
1056 	struct memif_socket *socket = NULL;
1057 	struct memif_socket_dev_list_elt *elt, *next;
1058 	struct rte_hash *hash;
1059 	int ret;
1060 
1061 	hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
1062 	if (hash == NULL)
1063 		return;
1064 
1065 	if (pmd->socket_filename == NULL)
1066 		return;
1067 
1068 	if (rte_hash_lookup_data(hash, pmd->socket_filename, (void **)&socket) < 0)
1069 		return;
1070 
1071 	for (elt = TAILQ_FIRST(&socket->dev_queue); elt != NULL; elt = next) {
1072 		next = TAILQ_NEXT(elt, next);
1073 		if (elt->dev == dev) {
1074 			TAILQ_REMOVE(&socket->dev_queue, elt, next);
1075 			rte_free(elt);
1076 			pmd->socket_filename = NULL;
1077 		}
1078 	}
1079 
1080 	/* remove socket, if this was the last device using it */
1081 	if (TAILQ_EMPTY(&socket->dev_queue)) {
1082 		rte_hash_del_key(hash, socket->filename);
1083 		if (socket->listener && !(pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) {
1084 			/* remove listener socket file,
1085 			 * so we can create new one later.
1086 			 */
1087 			ret = remove(socket->filename);
1088 			if (ret < 0)
1089 				MIF_LOG(ERR, "Failed to remove socket file: %s",
1090 					socket->filename);
1091 		}
1092 		if (pmd->role != MEMIF_ROLE_CLIENT)
1093 			rte_intr_instance_free(socket->intr_handle);
1094 		rte_free(socket);
1095 	}
1096 }
1097 
1098 int
1099 memif_connect_server(struct rte_eth_dev *dev)
1100 {
1101 	struct pmd_internals *pmd = dev->data->dev_private;
1102 
1103 	memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
1104 	memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
1105 	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
1106 	return 0;
1107 }
1108 
1109 int
1110 memif_connect_client(struct rte_eth_dev *dev)
1111 {
1112 	int sockfd;
1113 	int ret;
1114 	uint32_t sunlen;
1115 	struct sockaddr_un sun = { 0 };
1116 	struct pmd_internals *pmd = dev->data->dev_private;
1117 
1118 	memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
1119 	memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
1120 	pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
1121 
1122 	sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
1123 	if (sockfd < 0) {
1124 		MIF_LOG(ERR, "Failed to open socket.");
1125 		return -1;
1126 	}
1127 
1128 	sun.sun_family = AF_UNIX;
1129 	sunlen = sizeof(struct sockaddr_un);
1130 	if (pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT) {
1131 		/* abstract address */
1132 		sun.sun_path[0] = '\0';
1133 		strlcpy(sun.sun_path + 1,  pmd->socket_filename, MEMIF_SOCKET_UN_SIZE - 1);
1134 		sunlen = RTE_MIN(strlen(pmd->socket_filename) + 1,
1135 				 MEMIF_SOCKET_UN_SIZE) +
1136 			 sizeof(sun) - sizeof(sun.sun_path);
1137 	} else {
1138 		strlcpy(sun.sun_path,  pmd->socket_filename, MEMIF_SOCKET_UN_SIZE);
1139 	}
1140 
1141 	ret = connect(sockfd, (struct sockaddr *)&sun, sunlen);
1142 	if (ret < 0) {
1143 		MIF_LOG(ERR, "Failed to connect socket: %s.", pmd->socket_filename);
1144 		goto error;
1145 	}
1146 
1147 	MIF_LOG(DEBUG, "Memif socket: %s connected.", pmd->socket_filename);
1148 
1149 	pmd->cc = rte_zmalloc("memif-cc",
1150 			      sizeof(struct memif_control_channel), 0);
1151 	if (pmd->cc == NULL) {
1152 		MIF_LOG(ERR, "Failed to allocate control channel.");
1153 		goto error;
1154 	}
1155 
1156 	/* Allocate interrupt instance */
1157 	pmd->cc->intr_handle =
1158 		rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
1159 	if (pmd->cc->intr_handle == NULL) {
1160 		MIF_LOG(ERR, "Failed to allocate intr handle");
1161 		goto error;
1162 	}
1163 
1164 	if (rte_intr_fd_set(pmd->cc->intr_handle, sockfd))
1165 		goto error;
1166 
1167 	if (rte_intr_type_set(pmd->cc->intr_handle, RTE_INTR_HANDLE_EXT))
1168 		goto error;
1169 
1170 	pmd->cc->socket = NULL;
1171 	pmd->cc->dev = dev;
1172 	TAILQ_INIT(&pmd->cc->msg_queue);
1173 
1174 	ret = rte_intr_callback_register(pmd->cc->intr_handle,
1175 					 memif_intr_handler, pmd->cc);
1176 	if (ret < 0) {
1177 		MIF_LOG(ERR, "Failed to register interrupt callback for control fd");
1178 		goto error;
1179 	}
1180 
1181 	return 0;
1182 
1183  error:
1184 	if (sockfd >= 0) {
1185 		close(sockfd);
1186 		sockfd = -1;
1187 	}
1188 	if (pmd->cc != NULL) {
1189 		rte_intr_instance_free(pmd->cc->intr_handle);
1190 		rte_free(pmd->cc);
1191 		pmd->cc = NULL;
1192 	}
1193 	return -1;
1194 }
1195