xref: /dpdk/drivers/net/failsafe/failsafe_ops.c (revision 89f0711f9ddfb5822da9d34f384b92f72a61c4dc)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2017 6WIND S.A.
5  *   Copyright 2017 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stdbool.h>
35 #include <stdint.h>
36 #include <unistd.h>
37 
38 #include <rte_debug.h>
39 #include <rte_atomic.h>
40 #include <rte_ethdev_driver.h>
41 #include <rte_malloc.h>
42 #include <rte_flow.h>
43 #include <rte_cycles.h>
44 
45 #include "failsafe_private.h"
46 
47 static struct rte_eth_dev_info default_infos = {
48 	/* Max possible number of elements */
49 	.max_rx_pktlen = UINT32_MAX,
50 	.max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
51 	.max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
52 	.max_mac_addrs = FAILSAFE_MAX_ETHADDR,
53 	.max_hash_mac_addrs = UINT32_MAX,
54 	.max_vfs = UINT16_MAX,
55 	.max_vmdq_pools = UINT16_MAX,
56 	.rx_desc_lim = {
57 		.nb_max = UINT16_MAX,
58 		.nb_min = 0,
59 		.nb_align = 1,
60 		.nb_seg_max = UINT16_MAX,
61 		.nb_mtu_seg_max = UINT16_MAX,
62 	},
63 	.tx_desc_lim = {
64 		.nb_max = UINT16_MAX,
65 		.nb_min = 0,
66 		.nb_align = 1,
67 		.nb_seg_max = UINT16_MAX,
68 		.nb_mtu_seg_max = UINT16_MAX,
69 	},
70 	/*
71 	 * Set of capabilities that can be verified upon
72 	 * configuring a sub-device.
73 	 */
74 	.rx_offload_capa =
75 		DEV_RX_OFFLOAD_VLAN_STRIP |
76 		DEV_RX_OFFLOAD_IPV4_CKSUM |
77 		DEV_RX_OFFLOAD_UDP_CKSUM |
78 		DEV_RX_OFFLOAD_TCP_CKSUM |
79 		DEV_RX_OFFLOAD_TCP_LRO |
80 		DEV_RX_OFFLOAD_QINQ_STRIP |
81 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
82 		DEV_RX_OFFLOAD_MACSEC_STRIP |
83 		DEV_RX_OFFLOAD_HEADER_SPLIT |
84 		DEV_RX_OFFLOAD_VLAN_FILTER |
85 		DEV_RX_OFFLOAD_VLAN_EXTEND |
86 		DEV_RX_OFFLOAD_JUMBO_FRAME |
87 		DEV_RX_OFFLOAD_CRC_STRIP |
88 		DEV_RX_OFFLOAD_SCATTER |
89 		DEV_RX_OFFLOAD_TIMESTAMP |
90 		DEV_RX_OFFLOAD_SECURITY,
91 	.rx_queue_offload_capa =
92 		DEV_RX_OFFLOAD_VLAN_STRIP |
93 		DEV_RX_OFFLOAD_IPV4_CKSUM |
94 		DEV_RX_OFFLOAD_UDP_CKSUM |
95 		DEV_RX_OFFLOAD_TCP_CKSUM |
96 		DEV_RX_OFFLOAD_TCP_LRO |
97 		DEV_RX_OFFLOAD_QINQ_STRIP |
98 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
99 		DEV_RX_OFFLOAD_MACSEC_STRIP |
100 		DEV_RX_OFFLOAD_HEADER_SPLIT |
101 		DEV_RX_OFFLOAD_VLAN_FILTER |
102 		DEV_RX_OFFLOAD_VLAN_EXTEND |
103 		DEV_RX_OFFLOAD_JUMBO_FRAME |
104 		DEV_RX_OFFLOAD_CRC_STRIP |
105 		DEV_RX_OFFLOAD_SCATTER |
106 		DEV_RX_OFFLOAD_TIMESTAMP |
107 		DEV_RX_OFFLOAD_SECURITY,
108 	.tx_offload_capa = 0x0,
109 	.flow_type_rss_offloads = 0x0,
110 };
111 
112 static int
113 fs_dev_configure(struct rte_eth_dev *dev)
114 {
115 	struct sub_device *sdev;
116 	uint64_t supp_tx_offloads;
117 	uint64_t tx_offloads;
118 	uint8_t i;
119 	int ret;
120 
121 	supp_tx_offloads = PRIV(dev)->infos.tx_offload_capa;
122 	tx_offloads = dev->data->dev_conf.txmode.offloads;
123 	if ((tx_offloads & supp_tx_offloads) != tx_offloads) {
124 		rte_errno = ENOTSUP;
125 		ERROR("Some Tx offloads are not supported, "
126 		      "requested 0x%" PRIx64 " supported 0x%" PRIx64,
127 		      tx_offloads, supp_tx_offloads);
128 		return -rte_errno;
129 	}
130 	FOREACH_SUBDEV(sdev, i, dev) {
131 		int rmv_interrupt = 0;
132 		int lsc_interrupt = 0;
133 		int lsc_enabled;
134 
135 		if (sdev->state != DEV_PROBED)
136 			continue;
137 
138 		rmv_interrupt = ETH(sdev)->data->dev_flags &
139 				RTE_ETH_DEV_INTR_RMV;
140 		if (rmv_interrupt) {
141 			DEBUG("Enabling RMV interrupts for sub_device %d", i);
142 			dev->data->dev_conf.intr_conf.rmv = 1;
143 		} else {
144 			DEBUG("sub_device %d does not support RMV event", i);
145 		}
146 		lsc_enabled = dev->data->dev_conf.intr_conf.lsc;
147 		lsc_interrupt = lsc_enabled &&
148 				(ETH(sdev)->data->dev_flags &
149 				 RTE_ETH_DEV_INTR_LSC);
150 		if (lsc_interrupt) {
151 			DEBUG("Enabling LSC interrupts for sub_device %d", i);
152 			dev->data->dev_conf.intr_conf.lsc = 1;
153 		} else if (lsc_enabled && !lsc_interrupt) {
154 			DEBUG("Disabling LSC interrupts for sub_device %d", i);
155 			dev->data->dev_conf.intr_conf.lsc = 0;
156 		}
157 		DEBUG("Configuring sub-device %d", i);
158 		sdev->remove = 0;
159 		ret = rte_eth_dev_configure(PORT_ID(sdev),
160 					dev->data->nb_rx_queues,
161 					dev->data->nb_tx_queues,
162 					&dev->data->dev_conf);
163 		if (ret) {
164 			if (!fs_err(sdev, ret))
165 				continue;
166 			ERROR("Could not configure sub_device %d", i);
167 			return ret;
168 		}
169 		if (rmv_interrupt) {
170 			ret = rte_eth_dev_callback_register(PORT_ID(sdev),
171 					RTE_ETH_EVENT_INTR_RMV,
172 					failsafe_eth_rmv_event_callback,
173 					sdev);
174 			if (ret)
175 				WARN("Failed to register RMV callback for sub_device %d",
176 				     SUB_ID(sdev));
177 		}
178 		dev->data->dev_conf.intr_conf.rmv = 0;
179 		if (lsc_interrupt) {
180 			ret = rte_eth_dev_callback_register(PORT_ID(sdev),
181 						RTE_ETH_EVENT_INTR_LSC,
182 						failsafe_eth_lsc_event_callback,
183 						dev);
184 			if (ret)
185 				WARN("Failed to register LSC callback for sub_device %d",
186 				     SUB_ID(sdev));
187 		}
188 		dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
189 		sdev->state = DEV_ACTIVE;
190 	}
191 	if (PRIV(dev)->state < DEV_ACTIVE)
192 		PRIV(dev)->state = DEV_ACTIVE;
193 	return 0;
194 }
195 
196 static int
197 fs_dev_start(struct rte_eth_dev *dev)
198 {
199 	struct sub_device *sdev;
200 	uint8_t i;
201 	int ret;
202 
203 	ret = failsafe_rx_intr_install(dev);
204 	if (ret)
205 		return ret;
206 	FOREACH_SUBDEV(sdev, i, dev) {
207 		if (sdev->state != DEV_ACTIVE)
208 			continue;
209 		DEBUG("Starting sub_device %d", i);
210 		ret = rte_eth_dev_start(PORT_ID(sdev));
211 		if (ret) {
212 			if (!fs_err(sdev, ret))
213 				continue;
214 			return ret;
215 		}
216 		ret = failsafe_rx_intr_install_subdevice(sdev);
217 		if (ret) {
218 			if (!fs_err(sdev, ret))
219 				continue;
220 			rte_eth_dev_stop(PORT_ID(sdev));
221 			return ret;
222 		}
223 		sdev->state = DEV_STARTED;
224 	}
225 	if (PRIV(dev)->state < DEV_STARTED)
226 		PRIV(dev)->state = DEV_STARTED;
227 	fs_switch_dev(dev, NULL);
228 	return 0;
229 }
230 
231 static void
232 fs_dev_stop(struct rte_eth_dev *dev)
233 {
234 	struct sub_device *sdev;
235 	uint8_t i;
236 
237 	PRIV(dev)->state = DEV_STARTED - 1;
238 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) {
239 		rte_eth_dev_stop(PORT_ID(sdev));
240 		failsafe_rx_intr_uninstall_subdevice(sdev);
241 		sdev->state = DEV_STARTED - 1;
242 	}
243 	failsafe_rx_intr_uninstall(dev);
244 }
245 
246 static int
247 fs_dev_set_link_up(struct rte_eth_dev *dev)
248 {
249 	struct sub_device *sdev;
250 	uint8_t i;
251 	int ret;
252 
253 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
254 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
255 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
256 		if ((ret = fs_err(sdev, ret))) {
257 			ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
258 			      " with error %d", i, ret);
259 			return ret;
260 		}
261 	}
262 	return 0;
263 }
264 
265 static int
266 fs_dev_set_link_down(struct rte_eth_dev *dev)
267 {
268 	struct sub_device *sdev;
269 	uint8_t i;
270 	int ret;
271 
272 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
273 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
274 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
275 		if ((ret = fs_err(sdev, ret))) {
276 			ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
277 			      " with error %d", i, ret);
278 			return ret;
279 		}
280 	}
281 	return 0;
282 }
283 
284 static void fs_dev_free_queues(struct rte_eth_dev *dev);
285 static void
286 fs_dev_close(struct rte_eth_dev *dev)
287 {
288 	struct sub_device *sdev;
289 	uint8_t i;
290 
291 	failsafe_hotplug_alarm_cancel(dev);
292 	if (PRIV(dev)->state == DEV_STARTED)
293 		dev->dev_ops->dev_stop(dev);
294 	PRIV(dev)->state = DEV_ACTIVE - 1;
295 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
296 		DEBUG("Closing sub_device %d", i);
297 		rte_eth_dev_close(PORT_ID(sdev));
298 		sdev->state = DEV_ACTIVE - 1;
299 	}
300 	fs_dev_free_queues(dev);
301 }
302 
303 static bool
304 fs_rxq_offloads_valid(struct rte_eth_dev *dev, uint64_t offloads)
305 {
306 	uint64_t port_offloads;
307 	uint64_t queue_supp_offloads;
308 	uint64_t port_supp_offloads;
309 
310 	port_offloads = dev->data->dev_conf.rxmode.offloads;
311 	queue_supp_offloads = PRIV(dev)->infos.rx_queue_offload_capa;
312 	port_supp_offloads = PRIV(dev)->infos.rx_offload_capa;
313 	if ((offloads & (queue_supp_offloads | port_supp_offloads)) !=
314 	     offloads)
315 		return false;
316 	/* Verify we have no conflict with port offloads */
317 	if ((port_offloads ^ offloads) & port_supp_offloads)
318 		return false;
319 	return true;
320 }
321 
322 static void
323 fs_rx_queue_release(void *queue)
324 {
325 	struct rte_eth_dev *dev;
326 	struct sub_device *sdev;
327 	uint8_t i;
328 	struct rxq *rxq;
329 
330 	if (queue == NULL)
331 		return;
332 	rxq = queue;
333 	if (rxq->event_fd > 0)
334 		close(rxq->event_fd);
335 	dev = rxq->priv->dev;
336 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
337 		SUBOPS(sdev, rx_queue_release)
338 			(ETH(sdev)->data->rx_queues[rxq->qid]);
339 	dev->data->rx_queues[rxq->qid] = NULL;
340 	rte_free(rxq);
341 }
342 
343 static int
344 fs_rx_queue_setup(struct rte_eth_dev *dev,
345 		uint16_t rx_queue_id,
346 		uint16_t nb_rx_desc,
347 		unsigned int socket_id,
348 		const struct rte_eth_rxconf *rx_conf,
349 		struct rte_mempool *mb_pool)
350 {
351 	/*
352 	 * FIXME: Add a proper interface in rte_eal_interrupts for
353 	 * allocating eventfd as an interrupt vector.
354 	 * For the time being, fake as if we are using MSIX interrupts,
355 	 * this will cause rte_intr_efd_enable to allocate an eventfd for us.
356 	 */
357 	struct rte_intr_handle intr_handle = {
358 		.type = RTE_INTR_HANDLE_VFIO_MSIX,
359 		.efds = { -1, },
360 	};
361 	struct sub_device *sdev;
362 	struct rxq *rxq;
363 	uint8_t i;
364 	int ret;
365 
366 	rxq = dev->data->rx_queues[rx_queue_id];
367 	if (rxq != NULL) {
368 		fs_rx_queue_release(rxq);
369 		dev->data->rx_queues[rx_queue_id] = NULL;
370 	}
371 	/* Verify application offloads are valid for our port and queue. */
372 	if (fs_rxq_offloads_valid(dev, rx_conf->offloads) == false) {
373 		rte_errno = ENOTSUP;
374 		ERROR("Rx queue offloads 0x%" PRIx64
375 		      " don't match port offloads 0x%" PRIx64
376 		      " or supported offloads 0x%" PRIx64,
377 		      rx_conf->offloads,
378 		      dev->data->dev_conf.rxmode.offloads,
379 		      PRIV(dev)->infos.rx_offload_capa |
380 		      PRIV(dev)->infos.rx_queue_offload_capa);
381 		return -rte_errno;
382 	}
383 	rxq = rte_zmalloc(NULL,
384 			  sizeof(*rxq) +
385 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
386 			  RTE_CACHE_LINE_SIZE);
387 	if (rxq == NULL)
388 		return -ENOMEM;
389 	FOREACH_SUBDEV(sdev, i, dev)
390 		rte_atomic64_init(&rxq->refcnt[i]);
391 	rxq->qid = rx_queue_id;
392 	rxq->socket_id = socket_id;
393 	rxq->info.mp = mb_pool;
394 	rxq->info.conf = *rx_conf;
395 	rxq->info.nb_desc = nb_rx_desc;
396 	rxq->priv = PRIV(dev);
397 	rxq->sdev = PRIV(dev)->subs;
398 	ret = rte_intr_efd_enable(&intr_handle, 1);
399 	if (ret < 0)
400 		return ret;
401 	rxq->event_fd = intr_handle.efds[0];
402 	dev->data->rx_queues[rx_queue_id] = rxq;
403 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
404 		ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
405 				rx_queue_id,
406 				nb_rx_desc, socket_id,
407 				rx_conf, mb_pool);
408 		if ((ret = fs_err(sdev, ret))) {
409 			ERROR("RX queue setup failed for sub_device %d", i);
410 			goto free_rxq;
411 		}
412 	}
413 	return 0;
414 free_rxq:
415 	fs_rx_queue_release(rxq);
416 	return ret;
417 }
418 
419 static int
420 fs_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
421 {
422 	struct rxq *rxq;
423 	struct sub_device *sdev;
424 	uint8_t i;
425 	int ret;
426 	int rc = 0;
427 
428 	if (idx >= dev->data->nb_rx_queues) {
429 		rte_errno = EINVAL;
430 		return -rte_errno;
431 	}
432 	rxq = dev->data->rx_queues[idx];
433 	if (rxq == NULL || rxq->event_fd <= 0) {
434 		rte_errno = EINVAL;
435 		return -rte_errno;
436 	}
437 	/* Fail if proxy service is nor running. */
438 	if (PRIV(dev)->rxp.sstate != SS_RUNNING) {
439 		ERROR("failsafe interrupt services are not running");
440 		rte_errno = EAGAIN;
441 		return -rte_errno;
442 	}
443 	rxq->enable_events = 1;
444 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
445 		ret = rte_eth_dev_rx_intr_enable(PORT_ID(sdev), idx);
446 		ret = fs_err(sdev, ret);
447 		if (ret)
448 			rc = ret;
449 	}
450 	if (rc)
451 		rte_errno = -rc;
452 	return rc;
453 }
454 
455 static int
456 fs_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
457 {
458 	struct rxq *rxq;
459 	struct sub_device *sdev;
460 	uint64_t u64;
461 	uint8_t i;
462 	int rc = 0;
463 	int ret;
464 
465 	if (idx >= dev->data->nb_rx_queues) {
466 		rte_errno = EINVAL;
467 		return -rte_errno;
468 	}
469 	rxq = dev->data->rx_queues[idx];
470 	if (rxq == NULL || rxq->event_fd <= 0) {
471 		rte_errno = EINVAL;
472 		return -rte_errno;
473 	}
474 	rxq->enable_events = 0;
475 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
476 		ret = rte_eth_dev_rx_intr_disable(PORT_ID(sdev), idx);
477 		ret = fs_err(sdev, ret);
478 		if (ret)
479 			rc = ret;
480 	}
481 	/* Clear pending events */
482 	while (read(rxq->event_fd, &u64, sizeof(uint64_t)) >  0)
483 		;
484 	if (rc)
485 		rte_errno = -rc;
486 	return rc;
487 }
488 
489 static bool
490 fs_txq_offloads_valid(struct rte_eth_dev *dev, uint64_t offloads)
491 {
492 	uint64_t port_offloads;
493 	uint64_t queue_supp_offloads;
494 	uint64_t port_supp_offloads;
495 
496 	port_offloads = dev->data->dev_conf.txmode.offloads;
497 	queue_supp_offloads = PRIV(dev)->infos.tx_queue_offload_capa;
498 	port_supp_offloads = PRIV(dev)->infos.tx_offload_capa;
499 	if ((offloads & (queue_supp_offloads | port_supp_offloads)) !=
500 	     offloads)
501 		return false;
502 	/* Verify we have no conflict with port offloads */
503 	if ((port_offloads ^ offloads) & port_supp_offloads)
504 		return false;
505 	return true;
506 }
507 
508 static void
509 fs_tx_queue_release(void *queue)
510 {
511 	struct rte_eth_dev *dev;
512 	struct sub_device *sdev;
513 	uint8_t i;
514 	struct txq *txq;
515 
516 	if (queue == NULL)
517 		return;
518 	txq = queue;
519 	dev = txq->priv->dev;
520 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
521 		SUBOPS(sdev, tx_queue_release)
522 			(ETH(sdev)->data->tx_queues[txq->qid]);
523 	dev->data->tx_queues[txq->qid] = NULL;
524 	rte_free(txq);
525 }
526 
527 static int
528 fs_tx_queue_setup(struct rte_eth_dev *dev,
529 		uint16_t tx_queue_id,
530 		uint16_t nb_tx_desc,
531 		unsigned int socket_id,
532 		const struct rte_eth_txconf *tx_conf)
533 {
534 	struct sub_device *sdev;
535 	struct txq *txq;
536 	uint8_t i;
537 	int ret;
538 
539 	txq = dev->data->tx_queues[tx_queue_id];
540 	if (txq != NULL) {
541 		fs_tx_queue_release(txq);
542 		dev->data->tx_queues[tx_queue_id] = NULL;
543 	}
544 	/*
545 	 * Don't verify queue offloads for applications which
546 	 * use the old API.
547 	 */
548 	if (tx_conf != NULL &&
549 	    (tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE) &&
550 	    fs_txq_offloads_valid(dev, tx_conf->offloads) == false) {
551 		rte_errno = ENOTSUP;
552 		ERROR("Tx queue offloads 0x%" PRIx64
553 		      " don't match port offloads 0x%" PRIx64
554 		      " or supported offloads 0x%" PRIx64,
555 		      tx_conf->offloads,
556 		      dev->data->dev_conf.txmode.offloads,
557 		      PRIV(dev)->infos.tx_offload_capa |
558 		      PRIV(dev)->infos.tx_queue_offload_capa);
559 		return -rte_errno;
560 	}
561 	txq = rte_zmalloc("ethdev TX queue",
562 			  sizeof(*txq) +
563 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
564 			  RTE_CACHE_LINE_SIZE);
565 	if (txq == NULL)
566 		return -ENOMEM;
567 	FOREACH_SUBDEV(sdev, i, dev)
568 		rte_atomic64_init(&txq->refcnt[i]);
569 	txq->qid = tx_queue_id;
570 	txq->socket_id = socket_id;
571 	txq->info.conf = *tx_conf;
572 	txq->info.nb_desc = nb_tx_desc;
573 	txq->priv = PRIV(dev);
574 	dev->data->tx_queues[tx_queue_id] = txq;
575 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
576 		ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
577 				tx_queue_id,
578 				nb_tx_desc, socket_id,
579 				tx_conf);
580 		if ((ret = fs_err(sdev, ret))) {
581 			ERROR("TX queue setup failed for sub_device %d", i);
582 			goto free_txq;
583 		}
584 	}
585 	return 0;
586 free_txq:
587 	fs_tx_queue_release(txq);
588 	return ret;
589 }
590 
591 static void
592 fs_dev_free_queues(struct rte_eth_dev *dev)
593 {
594 	uint16_t i;
595 
596 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
597 		fs_rx_queue_release(dev->data->rx_queues[i]);
598 		dev->data->rx_queues[i] = NULL;
599 	}
600 	dev->data->nb_rx_queues = 0;
601 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
602 		fs_tx_queue_release(dev->data->tx_queues[i]);
603 		dev->data->tx_queues[i] = NULL;
604 	}
605 	dev->data->nb_tx_queues = 0;
606 }
607 
608 static void
609 fs_promiscuous_enable(struct rte_eth_dev *dev)
610 {
611 	struct sub_device *sdev;
612 	uint8_t i;
613 
614 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
615 		rte_eth_promiscuous_enable(PORT_ID(sdev));
616 }
617 
618 static void
619 fs_promiscuous_disable(struct rte_eth_dev *dev)
620 {
621 	struct sub_device *sdev;
622 	uint8_t i;
623 
624 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
625 		rte_eth_promiscuous_disable(PORT_ID(sdev));
626 }
627 
628 static void
629 fs_allmulticast_enable(struct rte_eth_dev *dev)
630 {
631 	struct sub_device *sdev;
632 	uint8_t i;
633 
634 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
635 		rte_eth_allmulticast_enable(PORT_ID(sdev));
636 }
637 
638 static void
639 fs_allmulticast_disable(struct rte_eth_dev *dev)
640 {
641 	struct sub_device *sdev;
642 	uint8_t i;
643 
644 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
645 		rte_eth_allmulticast_disable(PORT_ID(sdev));
646 }
647 
648 static int
649 fs_link_update(struct rte_eth_dev *dev,
650 		int wait_to_complete)
651 {
652 	struct sub_device *sdev;
653 	uint8_t i;
654 	int ret;
655 
656 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
657 		DEBUG("Calling link_update on sub_device %d", i);
658 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
659 		if (ret && ret != -1 && sdev->remove == 0 &&
660 		    rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
661 			ERROR("Link update failed for sub_device %d with error %d",
662 			      i, ret);
663 			return ret;
664 		}
665 	}
666 	if (TX_SUBDEV(dev)) {
667 		struct rte_eth_link *l1;
668 		struct rte_eth_link *l2;
669 
670 		l1 = &dev->data->dev_link;
671 		l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
672 		if (memcmp(l1, l2, sizeof(*l1))) {
673 			*l1 = *l2;
674 			return 0;
675 		}
676 	}
677 	return -1;
678 }
679 
680 static int
681 fs_stats_get(struct rte_eth_dev *dev,
682 	     struct rte_eth_stats *stats)
683 {
684 	struct rte_eth_stats backup;
685 	struct sub_device *sdev;
686 	uint8_t i;
687 	int ret;
688 
689 	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
690 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
691 		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
692 		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
693 
694 		rte_memcpy(&backup, snapshot, sizeof(backup));
695 		ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
696 		if (ret) {
697 			if (!fs_err(sdev, ret)) {
698 				rte_memcpy(snapshot, &backup, sizeof(backup));
699 				goto inc;
700 			}
701 			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
702 				  i, ret);
703 			*timestamp = 0;
704 			return ret;
705 		}
706 		*timestamp = rte_rdtsc();
707 inc:
708 		failsafe_stats_increment(stats, snapshot);
709 	}
710 	return 0;
711 }
712 
713 static void
714 fs_stats_reset(struct rte_eth_dev *dev)
715 {
716 	struct sub_device *sdev;
717 	uint8_t i;
718 
719 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
720 		rte_eth_stats_reset(PORT_ID(sdev));
721 		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
722 	}
723 	memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
724 }
725 
726 /**
727  * Fail-safe dev_infos_get rules:
728  *
729  * No sub_device:
730  *   Numerables:
731  *      Use the maximum possible values for any field, so as not
732  *      to impede any further configuration effort.
733  *   Capabilities:
734  *      Limits capabilities to those that are understood by the
735  *      fail-safe PMD. This understanding stems from the fail-safe
736  *      being capable of verifying that the related capability is
737  *      expressed within the device configuration (struct rte_eth_conf).
738  *
739  * At least one probed sub_device:
740  *   Numerables:
741  *      Uses values from the active probed sub_device
742  *      The rationale here is that if any sub_device is less capable
743  *      (for example concerning the number of queues) than the active
744  *      sub_device, then its subsequent configuration will fail.
745  *      It is impossible to foresee this failure when the failing sub_device
746  *      is supposed to be plugged-in later on, so the configuration process
747  *      is the single point of failure and error reporting.
748  *   Capabilities:
749  *      Uses a logical AND of RX capabilities among
750  *      all sub_devices and the default capabilities.
751  *      Uses a logical AND of TX capabilities among
752  *      the active probed sub_device and the default capabilities.
753  *
754  */
755 static void
756 fs_dev_infos_get(struct rte_eth_dev *dev,
757 		  struct rte_eth_dev_info *infos)
758 {
759 	struct sub_device *sdev;
760 	uint8_t i;
761 
762 	sdev = TX_SUBDEV(dev);
763 	if (sdev == NULL) {
764 		DEBUG("No probed device, using default infos");
765 		rte_memcpy(&PRIV(dev)->infos, &default_infos,
766 			   sizeof(default_infos));
767 	} else {
768 		uint64_t rx_offload_capa;
769 		uint64_t rxq_offload_capa;
770 
771 		rx_offload_capa = default_infos.rx_offload_capa;
772 		rxq_offload_capa = default_infos.rx_queue_offload_capa;
773 		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
774 			rte_eth_dev_info_get(PORT_ID(sdev),
775 					&PRIV(dev)->infos);
776 			rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
777 			rxq_offload_capa &=
778 					PRIV(dev)->infos.rx_queue_offload_capa;
779 		}
780 		sdev = TX_SUBDEV(dev);
781 		rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
782 		PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
783 		PRIV(dev)->infos.rx_queue_offload_capa = rxq_offload_capa;
784 		PRIV(dev)->infos.tx_offload_capa &=
785 					default_infos.tx_offload_capa;
786 		PRIV(dev)->infos.tx_queue_offload_capa &=
787 					default_infos.tx_queue_offload_capa;
788 		PRIV(dev)->infos.flow_type_rss_offloads &=
789 					default_infos.flow_type_rss_offloads;
790 	}
791 	rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
792 }
793 
794 static const uint32_t *
795 fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
796 {
797 	struct sub_device *sdev;
798 	struct rte_eth_dev *edev;
799 
800 	sdev = TX_SUBDEV(dev);
801 	if (sdev == NULL)
802 		return NULL;
803 	edev = ETH(sdev);
804 	/* ENOTSUP: counts as no supported ptypes */
805 	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
806 		return NULL;
807 	/*
808 	 * The API does not permit to do a clean AND of all ptypes,
809 	 * It is also incomplete by design and we do not really care
810 	 * to have a best possible value in this context.
811 	 * We just return the ptypes of the device of highest
812 	 * priority, usually the PREFERRED device.
813 	 */
814 	return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
815 }
816 
817 static int
818 fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
819 {
820 	struct sub_device *sdev;
821 	uint8_t i;
822 	int ret;
823 
824 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
825 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
826 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
827 		if ((ret = fs_err(sdev, ret))) {
828 			ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
829 			      i, ret);
830 			return ret;
831 		}
832 	}
833 	return 0;
834 }
835 
836 static int
837 fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
838 {
839 	struct sub_device *sdev;
840 	uint8_t i;
841 	int ret;
842 
843 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
844 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
845 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
846 		if ((ret = fs_err(sdev, ret))) {
847 			ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
848 			      " with error %d", i, ret);
849 			return ret;
850 		}
851 	}
852 	return 0;
853 }
854 
855 static int
856 fs_flow_ctrl_get(struct rte_eth_dev *dev,
857 		struct rte_eth_fc_conf *fc_conf)
858 {
859 	struct sub_device *sdev;
860 
861 	sdev = TX_SUBDEV(dev);
862 	if (sdev == NULL)
863 		return 0;
864 	if (SUBOPS(sdev, flow_ctrl_get) == NULL)
865 		return -ENOTSUP;
866 	return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
867 }
868 
869 static int
870 fs_flow_ctrl_set(struct rte_eth_dev *dev,
871 		struct rte_eth_fc_conf *fc_conf)
872 {
873 	struct sub_device *sdev;
874 	uint8_t i;
875 	int ret;
876 
877 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
878 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
879 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
880 		if ((ret = fs_err(sdev, ret))) {
881 			ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
882 			      " with error %d", i, ret);
883 			return ret;
884 		}
885 	}
886 	return 0;
887 }
888 
889 static void
890 fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
891 {
892 	struct sub_device *sdev;
893 	uint8_t i;
894 
895 	/* No check: already done within the rte_eth_dev_mac_addr_remove
896 	 * call for the fail-safe device.
897 	 */
898 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
899 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
900 				&dev->data->mac_addrs[index]);
901 	PRIV(dev)->mac_addr_pool[index] = 0;
902 }
903 
904 static int
905 fs_mac_addr_add(struct rte_eth_dev *dev,
906 		struct ether_addr *mac_addr,
907 		uint32_t index,
908 		uint32_t vmdq)
909 {
910 	struct sub_device *sdev;
911 	int ret;
912 	uint8_t i;
913 
914 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
915 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
916 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
917 		if ((ret = fs_err(sdev, ret))) {
918 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
919 			      PRIu8 " with error %d", i, ret);
920 			return ret;
921 		}
922 	}
923 	if (index >= PRIV(dev)->nb_mac_addr) {
924 		DEBUG("Growing mac_addrs array");
925 		PRIV(dev)->nb_mac_addr = index;
926 	}
927 	PRIV(dev)->mac_addr_pool[index] = vmdq;
928 	return 0;
929 }
930 
931 static void
932 fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
933 {
934 	struct sub_device *sdev;
935 	uint8_t i;
936 
937 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
938 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
939 }
940 
941 static int
942 fs_filter_ctrl(struct rte_eth_dev *dev,
943 		enum rte_filter_type type,
944 		enum rte_filter_op op,
945 		void *arg)
946 {
947 	struct sub_device *sdev;
948 	uint8_t i;
949 	int ret;
950 
951 	if (type == RTE_ETH_FILTER_GENERIC &&
952 	    op == RTE_ETH_FILTER_GET) {
953 		*(const void **)arg = &fs_flow_ops;
954 		return 0;
955 	}
956 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
957 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
958 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
959 		if ((ret = fs_err(sdev, ret))) {
960 			ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
961 			      " with error %d", i, ret);
962 			return ret;
963 		}
964 	}
965 	return 0;
966 }
967 
968 const struct eth_dev_ops failsafe_ops = {
969 	.dev_configure = fs_dev_configure,
970 	.dev_start = fs_dev_start,
971 	.dev_stop = fs_dev_stop,
972 	.dev_set_link_down = fs_dev_set_link_down,
973 	.dev_set_link_up = fs_dev_set_link_up,
974 	.dev_close = fs_dev_close,
975 	.promiscuous_enable = fs_promiscuous_enable,
976 	.promiscuous_disable = fs_promiscuous_disable,
977 	.allmulticast_enable = fs_allmulticast_enable,
978 	.allmulticast_disable = fs_allmulticast_disable,
979 	.link_update = fs_link_update,
980 	.stats_get = fs_stats_get,
981 	.stats_reset = fs_stats_reset,
982 	.dev_infos_get = fs_dev_infos_get,
983 	.dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
984 	.mtu_set = fs_mtu_set,
985 	.vlan_filter_set = fs_vlan_filter_set,
986 	.rx_queue_setup = fs_rx_queue_setup,
987 	.tx_queue_setup = fs_tx_queue_setup,
988 	.rx_queue_release = fs_rx_queue_release,
989 	.tx_queue_release = fs_tx_queue_release,
990 	.rx_queue_intr_enable = fs_rx_intr_enable,
991 	.rx_queue_intr_disable = fs_rx_intr_disable,
992 	.flow_ctrl_get = fs_flow_ctrl_get,
993 	.flow_ctrl_set = fs_flow_ctrl_set,
994 	.mac_addr_remove = fs_mac_addr_remove,
995 	.mac_addr_add = fs_mac_addr_add,
996 	.mac_addr_set = fs_mac_addr_set,
997 	.filter_ctrl = fs_filter_ctrl,
998 };
999