xref: /dpdk/drivers/net/failsafe/failsafe_ops.c (revision d429cc0b53735cc7b1e304ec1d0f35ae06ace7d0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox.
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <unistd.h>
9 
10 #include <rte_debug.h>
11 #include <rte_atomic.h>
12 #include <rte_ethdev_driver.h>
13 #include <rte_malloc.h>
14 #include <rte_flow.h>
15 #include <rte_cycles.h>
16 
17 #include "failsafe_private.h"
18 
19 static struct rte_eth_dev_info default_infos = {
20 	/* Max possible number of elements */
21 	.max_rx_pktlen = UINT32_MAX,
22 	.max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
23 	.max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
24 	.max_mac_addrs = FAILSAFE_MAX_ETHADDR,
25 	.max_hash_mac_addrs = UINT32_MAX,
26 	.max_vfs = UINT16_MAX,
27 	.max_vmdq_pools = UINT16_MAX,
28 	.rx_desc_lim = {
29 		.nb_max = UINT16_MAX,
30 		.nb_min = 0,
31 		.nb_align = 1,
32 		.nb_seg_max = UINT16_MAX,
33 		.nb_mtu_seg_max = UINT16_MAX,
34 	},
35 	.tx_desc_lim = {
36 		.nb_max = UINT16_MAX,
37 		.nb_min = 0,
38 		.nb_align = 1,
39 		.nb_seg_max = UINT16_MAX,
40 		.nb_mtu_seg_max = UINT16_MAX,
41 	},
42 	/*
43 	 * Set of capabilities that can be verified upon
44 	 * configuring a sub-device.
45 	 */
46 	.rx_offload_capa =
47 		DEV_RX_OFFLOAD_VLAN_STRIP |
48 		DEV_RX_OFFLOAD_IPV4_CKSUM |
49 		DEV_RX_OFFLOAD_UDP_CKSUM |
50 		DEV_RX_OFFLOAD_TCP_CKSUM |
51 		DEV_RX_OFFLOAD_TCP_LRO |
52 		DEV_RX_OFFLOAD_QINQ_STRIP |
53 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
54 		DEV_RX_OFFLOAD_MACSEC_STRIP |
55 		DEV_RX_OFFLOAD_HEADER_SPLIT |
56 		DEV_RX_OFFLOAD_VLAN_FILTER |
57 		DEV_RX_OFFLOAD_VLAN_EXTEND |
58 		DEV_RX_OFFLOAD_JUMBO_FRAME |
59 		DEV_RX_OFFLOAD_CRC_STRIP |
60 		DEV_RX_OFFLOAD_SCATTER |
61 		DEV_RX_OFFLOAD_TIMESTAMP |
62 		DEV_RX_OFFLOAD_SECURITY,
63 	.rx_queue_offload_capa =
64 		DEV_RX_OFFLOAD_VLAN_STRIP |
65 		DEV_RX_OFFLOAD_IPV4_CKSUM |
66 		DEV_RX_OFFLOAD_UDP_CKSUM |
67 		DEV_RX_OFFLOAD_TCP_CKSUM |
68 		DEV_RX_OFFLOAD_TCP_LRO |
69 		DEV_RX_OFFLOAD_QINQ_STRIP |
70 		DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
71 		DEV_RX_OFFLOAD_MACSEC_STRIP |
72 		DEV_RX_OFFLOAD_HEADER_SPLIT |
73 		DEV_RX_OFFLOAD_VLAN_FILTER |
74 		DEV_RX_OFFLOAD_VLAN_EXTEND |
75 		DEV_RX_OFFLOAD_JUMBO_FRAME |
76 		DEV_RX_OFFLOAD_CRC_STRIP |
77 		DEV_RX_OFFLOAD_SCATTER |
78 		DEV_RX_OFFLOAD_TIMESTAMP |
79 		DEV_RX_OFFLOAD_SECURITY,
80 	.tx_offload_capa =
81 		DEV_TX_OFFLOAD_MULTI_SEGS |
82 		DEV_TX_OFFLOAD_IPV4_CKSUM |
83 		DEV_TX_OFFLOAD_UDP_CKSUM |
84 		DEV_TX_OFFLOAD_TCP_CKSUM,
85 	.flow_type_rss_offloads = 0x0,
86 };
87 
88 static int
89 fs_dev_configure(struct rte_eth_dev *dev)
90 {
91 	struct sub_device *sdev;
92 	uint64_t supp_tx_offloads;
93 	uint64_t tx_offloads;
94 	uint8_t i;
95 	int ret;
96 
97 	fs_lock(dev, 0);
98 	supp_tx_offloads = PRIV(dev)->infos.tx_offload_capa;
99 	tx_offloads = dev->data->dev_conf.txmode.offloads;
100 	if ((tx_offloads & supp_tx_offloads) != tx_offloads) {
101 		rte_errno = ENOTSUP;
102 		ERROR("Some Tx offloads are not supported, "
103 		      "requested 0x%" PRIx64 " supported 0x%" PRIx64,
104 		      tx_offloads, supp_tx_offloads);
105 		fs_unlock(dev, 0);
106 		return -rte_errno;
107 	}
108 	FOREACH_SUBDEV(sdev, i, dev) {
109 		int rmv_interrupt = 0;
110 		int lsc_interrupt = 0;
111 		int lsc_enabled;
112 
113 		if (sdev->state != DEV_PROBED &&
114 		    !(PRIV(dev)->alarm_lock == 0 && sdev->state == DEV_ACTIVE))
115 			continue;
116 
117 		rmv_interrupt = ETH(sdev)->data->dev_flags &
118 				RTE_ETH_DEV_INTR_RMV;
119 		if (rmv_interrupt) {
120 			DEBUG("Enabling RMV interrupts for sub_device %d", i);
121 			dev->data->dev_conf.intr_conf.rmv = 1;
122 		} else {
123 			DEBUG("sub_device %d does not support RMV event", i);
124 		}
125 		lsc_enabled = dev->data->dev_conf.intr_conf.lsc;
126 		lsc_interrupt = lsc_enabled &&
127 				(ETH(sdev)->data->dev_flags &
128 				 RTE_ETH_DEV_INTR_LSC);
129 		if (lsc_interrupt) {
130 			DEBUG("Enabling LSC interrupts for sub_device %d", i);
131 			dev->data->dev_conf.intr_conf.lsc = 1;
132 		} else if (lsc_enabled && !lsc_interrupt) {
133 			DEBUG("Disabling LSC interrupts for sub_device %d", i);
134 			dev->data->dev_conf.intr_conf.lsc = 0;
135 		}
136 		DEBUG("Configuring sub-device %d", i);
137 		ret = rte_eth_dev_configure(PORT_ID(sdev),
138 					dev->data->nb_rx_queues,
139 					dev->data->nb_tx_queues,
140 					&dev->data->dev_conf);
141 		if (ret) {
142 			if (!fs_err(sdev, ret))
143 				continue;
144 			ERROR("Could not configure sub_device %d", i);
145 			fs_unlock(dev, 0);
146 			return ret;
147 		}
148 		if (rmv_interrupt) {
149 			ret = rte_eth_dev_callback_register(PORT_ID(sdev),
150 					RTE_ETH_EVENT_INTR_RMV,
151 					failsafe_eth_rmv_event_callback,
152 					sdev);
153 			if (ret)
154 				WARN("Failed to register RMV callback for sub_device %d",
155 				     SUB_ID(sdev));
156 		}
157 		dev->data->dev_conf.intr_conf.rmv = 0;
158 		if (lsc_interrupt) {
159 			ret = rte_eth_dev_callback_register(PORT_ID(sdev),
160 						RTE_ETH_EVENT_INTR_LSC,
161 						failsafe_eth_lsc_event_callback,
162 						dev);
163 			if (ret)
164 				WARN("Failed to register LSC callback for sub_device %d",
165 				     SUB_ID(sdev));
166 		}
167 		dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
168 		sdev->state = DEV_ACTIVE;
169 	}
170 	if (PRIV(dev)->state < DEV_ACTIVE)
171 		PRIV(dev)->state = DEV_ACTIVE;
172 	fs_unlock(dev, 0);
173 	return 0;
174 }
175 
176 static int
177 fs_dev_start(struct rte_eth_dev *dev)
178 {
179 	struct sub_device *sdev;
180 	uint8_t i;
181 	int ret;
182 
183 	fs_lock(dev, 0);
184 	ret = failsafe_rx_intr_install(dev);
185 	if (ret) {
186 		fs_unlock(dev, 0);
187 		return ret;
188 	}
189 	FOREACH_SUBDEV(sdev, i, dev) {
190 		if (sdev->state != DEV_ACTIVE)
191 			continue;
192 		DEBUG("Starting sub_device %d", i);
193 		ret = rte_eth_dev_start(PORT_ID(sdev));
194 		if (ret) {
195 			if (!fs_err(sdev, ret))
196 				continue;
197 			fs_unlock(dev, 0);
198 			return ret;
199 		}
200 		ret = failsafe_rx_intr_install_subdevice(sdev);
201 		if (ret) {
202 			if (!fs_err(sdev, ret))
203 				continue;
204 			rte_eth_dev_stop(PORT_ID(sdev));
205 			fs_unlock(dev, 0);
206 			return ret;
207 		}
208 		sdev->state = DEV_STARTED;
209 	}
210 	if (PRIV(dev)->state < DEV_STARTED)
211 		PRIV(dev)->state = DEV_STARTED;
212 	fs_switch_dev(dev, NULL);
213 	fs_unlock(dev, 0);
214 	return 0;
215 }
216 
217 static void
218 fs_dev_stop(struct rte_eth_dev *dev)
219 {
220 	struct sub_device *sdev;
221 	uint8_t i;
222 
223 	fs_lock(dev, 0);
224 	PRIV(dev)->state = DEV_STARTED - 1;
225 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) {
226 		rte_eth_dev_stop(PORT_ID(sdev));
227 		failsafe_rx_intr_uninstall_subdevice(sdev);
228 		sdev->state = DEV_STARTED - 1;
229 	}
230 	failsafe_rx_intr_uninstall(dev);
231 	fs_unlock(dev, 0);
232 }
233 
234 static int
235 fs_dev_set_link_up(struct rte_eth_dev *dev)
236 {
237 	struct sub_device *sdev;
238 	uint8_t i;
239 	int ret;
240 
241 	fs_lock(dev, 0);
242 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
243 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
244 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
245 		if ((ret = fs_err(sdev, ret))) {
246 			ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
247 			      " with error %d", i, ret);
248 			fs_unlock(dev, 0);
249 			return ret;
250 		}
251 	}
252 	fs_unlock(dev, 0);
253 	return 0;
254 }
255 
256 static int
257 fs_dev_set_link_down(struct rte_eth_dev *dev)
258 {
259 	struct sub_device *sdev;
260 	uint8_t i;
261 	int ret;
262 
263 	fs_lock(dev, 0);
264 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
265 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
266 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
267 		if ((ret = fs_err(sdev, ret))) {
268 			ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
269 			      " with error %d", i, ret);
270 			fs_unlock(dev, 0);
271 			return ret;
272 		}
273 	}
274 	fs_unlock(dev, 0);
275 	return 0;
276 }
277 
278 static void fs_dev_free_queues(struct rte_eth_dev *dev);
279 static void
280 fs_dev_close(struct rte_eth_dev *dev)
281 {
282 	struct sub_device *sdev;
283 	uint8_t i;
284 
285 	fs_lock(dev, 0);
286 	failsafe_hotplug_alarm_cancel(dev);
287 	if (PRIV(dev)->state == DEV_STARTED)
288 		dev->dev_ops->dev_stop(dev);
289 	PRIV(dev)->state = DEV_ACTIVE - 1;
290 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
291 		DEBUG("Closing sub_device %d", i);
292 		rte_eth_dev_close(PORT_ID(sdev));
293 		sdev->state = DEV_ACTIVE - 1;
294 	}
295 	fs_dev_free_queues(dev);
296 	fs_unlock(dev, 0);
297 }
298 
299 static bool
300 fs_rxq_offloads_valid(struct rte_eth_dev *dev, uint64_t offloads)
301 {
302 	uint64_t port_offloads;
303 	uint64_t queue_supp_offloads;
304 	uint64_t port_supp_offloads;
305 
306 	port_offloads = dev->data->dev_conf.rxmode.offloads;
307 	queue_supp_offloads = PRIV(dev)->infos.rx_queue_offload_capa;
308 	port_supp_offloads = PRIV(dev)->infos.rx_offload_capa;
309 	if ((offloads & (queue_supp_offloads | port_supp_offloads)) !=
310 	     offloads)
311 		return false;
312 	/* Verify we have no conflict with port offloads */
313 	if ((port_offloads ^ offloads) & port_supp_offloads)
314 		return false;
315 	return true;
316 }
317 
318 static void
319 fs_rx_queue_release(void *queue)
320 {
321 	struct rte_eth_dev *dev;
322 	struct sub_device *sdev;
323 	uint8_t i;
324 	struct rxq *rxq;
325 
326 	if (queue == NULL)
327 		return;
328 	rxq = queue;
329 	dev = rxq->priv->dev;
330 	fs_lock(dev, 0);
331 	if (rxq->event_fd > 0)
332 		close(rxq->event_fd);
333 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
334 		SUBOPS(sdev, rx_queue_release)
335 			(ETH(sdev)->data->rx_queues[rxq->qid]);
336 	dev->data->rx_queues[rxq->qid] = NULL;
337 	rte_free(rxq);
338 	fs_unlock(dev, 0);
339 }
340 
341 static int
342 fs_rx_queue_setup(struct rte_eth_dev *dev,
343 		uint16_t rx_queue_id,
344 		uint16_t nb_rx_desc,
345 		unsigned int socket_id,
346 		const struct rte_eth_rxconf *rx_conf,
347 		struct rte_mempool *mb_pool)
348 {
349 	/*
350 	 * FIXME: Add a proper interface in rte_eal_interrupts for
351 	 * allocating eventfd as an interrupt vector.
352 	 * For the time being, fake as if we are using MSIX interrupts,
353 	 * this will cause rte_intr_efd_enable to allocate an eventfd for us.
354 	 */
355 	struct rte_intr_handle intr_handle = {
356 		.type = RTE_INTR_HANDLE_VFIO_MSIX,
357 		.efds = { -1, },
358 	};
359 	struct sub_device *sdev;
360 	struct rxq *rxq;
361 	uint8_t i;
362 	int ret;
363 
364 	fs_lock(dev, 0);
365 	rxq = dev->data->rx_queues[rx_queue_id];
366 	if (rxq != NULL) {
367 		fs_rx_queue_release(rxq);
368 		dev->data->rx_queues[rx_queue_id] = NULL;
369 	}
370 	/* Verify application offloads are valid for our port and queue. */
371 	if (fs_rxq_offloads_valid(dev, rx_conf->offloads) == false) {
372 		rte_errno = ENOTSUP;
373 		ERROR("Rx queue offloads 0x%" PRIx64
374 		      " don't match port offloads 0x%" PRIx64
375 		      " or supported offloads 0x%" PRIx64,
376 		      rx_conf->offloads,
377 		      dev->data->dev_conf.rxmode.offloads,
378 		      PRIV(dev)->infos.rx_offload_capa |
379 		      PRIV(dev)->infos.rx_queue_offload_capa);
380 		fs_unlock(dev, 0);
381 		return -rte_errno;
382 	}
383 	rxq = rte_zmalloc(NULL,
384 			  sizeof(*rxq) +
385 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
386 			  RTE_CACHE_LINE_SIZE);
387 	if (rxq == NULL) {
388 		fs_unlock(dev, 0);
389 		return -ENOMEM;
390 	}
391 	FOREACH_SUBDEV(sdev, i, dev)
392 		rte_atomic64_init(&rxq->refcnt[i]);
393 	rxq->qid = rx_queue_id;
394 	rxq->socket_id = socket_id;
395 	rxq->info.mp = mb_pool;
396 	rxq->info.conf = *rx_conf;
397 	rxq->info.nb_desc = nb_rx_desc;
398 	rxq->priv = PRIV(dev);
399 	rxq->sdev = PRIV(dev)->subs;
400 	ret = rte_intr_efd_enable(&intr_handle, 1);
401 	if (ret < 0) {
402 		fs_unlock(dev, 0);
403 		return ret;
404 	}
405 	rxq->event_fd = intr_handle.efds[0];
406 	dev->data->rx_queues[rx_queue_id] = rxq;
407 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
408 		ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
409 				rx_queue_id,
410 				nb_rx_desc, socket_id,
411 				rx_conf, mb_pool);
412 		if ((ret = fs_err(sdev, ret))) {
413 			ERROR("RX queue setup failed for sub_device %d", i);
414 			goto free_rxq;
415 		}
416 	}
417 	fs_unlock(dev, 0);
418 	return 0;
419 free_rxq:
420 	fs_rx_queue_release(rxq);
421 	fs_unlock(dev, 0);
422 	return ret;
423 }
424 
425 static int
426 fs_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
427 {
428 	struct rxq *rxq;
429 	struct sub_device *sdev;
430 	uint8_t i;
431 	int ret;
432 	int rc = 0;
433 
434 	fs_lock(dev, 0);
435 	if (idx >= dev->data->nb_rx_queues) {
436 		rc = -EINVAL;
437 		goto unlock;
438 	}
439 	rxq = dev->data->rx_queues[idx];
440 	if (rxq == NULL || rxq->event_fd <= 0) {
441 		rc = -EINVAL;
442 		goto unlock;
443 	}
444 	/* Fail if proxy service is nor running. */
445 	if (PRIV(dev)->rxp.sstate != SS_RUNNING) {
446 		ERROR("failsafe interrupt services are not running");
447 		rc = -EAGAIN;
448 		goto unlock;
449 	}
450 	rxq->enable_events = 1;
451 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
452 		ret = rte_eth_dev_rx_intr_enable(PORT_ID(sdev), idx);
453 		ret = fs_err(sdev, ret);
454 		if (ret)
455 			rc = ret;
456 	}
457 unlock:
458 	fs_unlock(dev, 0);
459 	if (rc)
460 		rte_errno = -rc;
461 	return rc;
462 }
463 
464 static int
465 fs_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
466 {
467 	struct rxq *rxq;
468 	struct sub_device *sdev;
469 	uint64_t u64;
470 	uint8_t i;
471 	int rc = 0;
472 	int ret;
473 
474 	fs_lock(dev, 0);
475 	if (idx >= dev->data->nb_rx_queues) {
476 		rc = -EINVAL;
477 		goto unlock;
478 	}
479 	rxq = dev->data->rx_queues[idx];
480 	if (rxq == NULL || rxq->event_fd <= 0) {
481 		rc = -EINVAL;
482 		goto unlock;
483 	}
484 	rxq->enable_events = 0;
485 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
486 		ret = rte_eth_dev_rx_intr_disable(PORT_ID(sdev), idx);
487 		ret = fs_err(sdev, ret);
488 		if (ret)
489 			rc = ret;
490 	}
491 	/* Clear pending events */
492 	while (read(rxq->event_fd, &u64, sizeof(uint64_t)) >  0)
493 		;
494 unlock:
495 	fs_unlock(dev, 0);
496 	if (rc)
497 		rte_errno = -rc;
498 	return rc;
499 }
500 
501 static bool
502 fs_txq_offloads_valid(struct rte_eth_dev *dev, uint64_t offloads)
503 {
504 	uint64_t port_offloads;
505 	uint64_t queue_supp_offloads;
506 	uint64_t port_supp_offloads;
507 
508 	port_offloads = dev->data->dev_conf.txmode.offloads;
509 	queue_supp_offloads = PRIV(dev)->infos.tx_queue_offload_capa;
510 	port_supp_offloads = PRIV(dev)->infos.tx_offload_capa;
511 	if ((offloads & (queue_supp_offloads | port_supp_offloads)) !=
512 	     offloads)
513 		return false;
514 	/* Verify we have no conflict with port offloads */
515 	if ((port_offloads ^ offloads) & port_supp_offloads)
516 		return false;
517 	return true;
518 }
519 
520 static void
521 fs_tx_queue_release(void *queue)
522 {
523 	struct rte_eth_dev *dev;
524 	struct sub_device *sdev;
525 	uint8_t i;
526 	struct txq *txq;
527 
528 	if (queue == NULL)
529 		return;
530 	txq = queue;
531 	dev = txq->priv->dev;
532 	fs_lock(dev, 0);
533 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
534 		SUBOPS(sdev, tx_queue_release)
535 			(ETH(sdev)->data->tx_queues[txq->qid]);
536 	dev->data->tx_queues[txq->qid] = NULL;
537 	rte_free(txq);
538 	fs_unlock(dev, 0);
539 }
540 
541 static int
542 fs_tx_queue_setup(struct rte_eth_dev *dev,
543 		uint16_t tx_queue_id,
544 		uint16_t nb_tx_desc,
545 		unsigned int socket_id,
546 		const struct rte_eth_txconf *tx_conf)
547 {
548 	struct sub_device *sdev;
549 	struct txq *txq;
550 	uint8_t i;
551 	int ret;
552 
553 	fs_lock(dev, 0);
554 	txq = dev->data->tx_queues[tx_queue_id];
555 	if (txq != NULL) {
556 		fs_tx_queue_release(txq);
557 		dev->data->tx_queues[tx_queue_id] = NULL;
558 	}
559 	/*
560 	 * Don't verify queue offloads for applications which
561 	 * use the old API.
562 	 */
563 	if (tx_conf != NULL &&
564 	    (tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE) &&
565 	    fs_txq_offloads_valid(dev, tx_conf->offloads) == false) {
566 		rte_errno = ENOTSUP;
567 		ERROR("Tx queue offloads 0x%" PRIx64
568 		      " don't match port offloads 0x%" PRIx64
569 		      " or supported offloads 0x%" PRIx64,
570 		      tx_conf->offloads,
571 		      dev->data->dev_conf.txmode.offloads,
572 		      PRIV(dev)->infos.tx_offload_capa |
573 		      PRIV(dev)->infos.tx_queue_offload_capa);
574 		fs_unlock(dev, 0);
575 		return -rte_errno;
576 	}
577 	txq = rte_zmalloc("ethdev TX queue",
578 			  sizeof(*txq) +
579 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
580 			  RTE_CACHE_LINE_SIZE);
581 	if (txq == NULL) {
582 		fs_unlock(dev, 0);
583 		return -ENOMEM;
584 	}
585 	FOREACH_SUBDEV(sdev, i, dev)
586 		rte_atomic64_init(&txq->refcnt[i]);
587 	txq->qid = tx_queue_id;
588 	txq->socket_id = socket_id;
589 	txq->info.conf = *tx_conf;
590 	txq->info.nb_desc = nb_tx_desc;
591 	txq->priv = PRIV(dev);
592 	dev->data->tx_queues[tx_queue_id] = txq;
593 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
594 		ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
595 				tx_queue_id,
596 				nb_tx_desc, socket_id,
597 				tx_conf);
598 		if ((ret = fs_err(sdev, ret))) {
599 			ERROR("TX queue setup failed for sub_device %d", i);
600 			goto free_txq;
601 		}
602 	}
603 	fs_unlock(dev, 0);
604 	return 0;
605 free_txq:
606 	fs_tx_queue_release(txq);
607 	fs_unlock(dev, 0);
608 	return ret;
609 }
610 
611 static void
612 fs_dev_free_queues(struct rte_eth_dev *dev)
613 {
614 	uint16_t i;
615 
616 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
617 		fs_rx_queue_release(dev->data->rx_queues[i]);
618 		dev->data->rx_queues[i] = NULL;
619 	}
620 	dev->data->nb_rx_queues = 0;
621 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
622 		fs_tx_queue_release(dev->data->tx_queues[i]);
623 		dev->data->tx_queues[i] = NULL;
624 	}
625 	dev->data->nb_tx_queues = 0;
626 }
627 
628 static void
629 fs_promiscuous_enable(struct rte_eth_dev *dev)
630 {
631 	struct sub_device *sdev;
632 	uint8_t i;
633 
634 	fs_lock(dev, 0);
635 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
636 		rte_eth_promiscuous_enable(PORT_ID(sdev));
637 	fs_unlock(dev, 0);
638 }
639 
640 static void
641 fs_promiscuous_disable(struct rte_eth_dev *dev)
642 {
643 	struct sub_device *sdev;
644 	uint8_t i;
645 
646 	fs_lock(dev, 0);
647 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
648 		rte_eth_promiscuous_disable(PORT_ID(sdev));
649 	fs_unlock(dev, 0);
650 }
651 
652 static void
653 fs_allmulticast_enable(struct rte_eth_dev *dev)
654 {
655 	struct sub_device *sdev;
656 	uint8_t i;
657 
658 	fs_lock(dev, 0);
659 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
660 		rte_eth_allmulticast_enable(PORT_ID(sdev));
661 	fs_unlock(dev, 0);
662 }
663 
664 static void
665 fs_allmulticast_disable(struct rte_eth_dev *dev)
666 {
667 	struct sub_device *sdev;
668 	uint8_t i;
669 
670 	fs_lock(dev, 0);
671 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
672 		rte_eth_allmulticast_disable(PORT_ID(sdev));
673 	fs_unlock(dev, 0);
674 }
675 
676 static int
677 fs_link_update(struct rte_eth_dev *dev,
678 		int wait_to_complete)
679 {
680 	struct sub_device *sdev;
681 	uint8_t i;
682 	int ret;
683 
684 	fs_lock(dev, 0);
685 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
686 		DEBUG("Calling link_update on sub_device %d", i);
687 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
688 		if (ret && ret != -1 && sdev->remove == 0 &&
689 		    rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
690 			ERROR("Link update failed for sub_device %d with error %d",
691 			      i, ret);
692 			fs_unlock(dev, 0);
693 			return ret;
694 		}
695 	}
696 	if (TX_SUBDEV(dev)) {
697 		struct rte_eth_link *l1;
698 		struct rte_eth_link *l2;
699 
700 		l1 = &dev->data->dev_link;
701 		l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
702 		if (memcmp(l1, l2, sizeof(*l1))) {
703 			*l1 = *l2;
704 			fs_unlock(dev, 0);
705 			return 0;
706 		}
707 	}
708 	fs_unlock(dev, 0);
709 	return -1;
710 }
711 
712 static int
713 fs_stats_get(struct rte_eth_dev *dev,
714 	     struct rte_eth_stats *stats)
715 {
716 	struct rte_eth_stats backup;
717 	struct sub_device *sdev;
718 	uint8_t i;
719 	int ret;
720 
721 	fs_lock(dev, 0);
722 	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
723 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
724 		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
725 		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
726 
727 		rte_memcpy(&backup, snapshot, sizeof(backup));
728 		ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
729 		if (ret) {
730 			if (!fs_err(sdev, ret)) {
731 				rte_memcpy(snapshot, &backup, sizeof(backup));
732 				goto inc;
733 			}
734 			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
735 				  i, ret);
736 			*timestamp = 0;
737 			fs_unlock(dev, 0);
738 			return ret;
739 		}
740 		*timestamp = rte_rdtsc();
741 inc:
742 		failsafe_stats_increment(stats, snapshot);
743 	}
744 	fs_unlock(dev, 0);
745 	return 0;
746 }
747 
748 static void
749 fs_stats_reset(struct rte_eth_dev *dev)
750 {
751 	struct sub_device *sdev;
752 	uint8_t i;
753 
754 	fs_lock(dev, 0);
755 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
756 		rte_eth_stats_reset(PORT_ID(sdev));
757 		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
758 	}
759 	memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
760 	fs_unlock(dev, 0);
761 }
762 
763 /**
764  * Fail-safe dev_infos_get rules:
765  *
766  * No sub_device:
767  *   Numerables:
768  *      Use the maximum possible values for any field, so as not
769  *      to impede any further configuration effort.
770  *   Capabilities:
771  *      Limits capabilities to those that are understood by the
772  *      fail-safe PMD. This understanding stems from the fail-safe
773  *      being capable of verifying that the related capability is
774  *      expressed within the device configuration (struct rte_eth_conf).
775  *
776  * At least one probed sub_device:
777  *   Numerables:
778  *      Uses values from the active probed sub_device
779  *      The rationale here is that if any sub_device is less capable
780  *      (for example concerning the number of queues) than the active
781  *      sub_device, then its subsequent configuration will fail.
782  *      It is impossible to foresee this failure when the failing sub_device
783  *      is supposed to be plugged-in later on, so the configuration process
784  *      is the single point of failure and error reporting.
785  *   Capabilities:
786  *      Uses a logical AND of RX capabilities among
787  *      all sub_devices and the default capabilities.
788  *      Uses a logical AND of TX capabilities among
789  *      the active probed sub_device and the default capabilities.
790  *
791  */
792 static void
793 fs_dev_infos_get(struct rte_eth_dev *dev,
794 		  struct rte_eth_dev_info *infos)
795 {
796 	struct sub_device *sdev;
797 	uint8_t i;
798 
799 	sdev = TX_SUBDEV(dev);
800 	if (sdev == NULL) {
801 		DEBUG("No probed device, using default infos");
802 		rte_memcpy(&PRIV(dev)->infos, &default_infos,
803 			   sizeof(default_infos));
804 	} else {
805 		uint64_t rx_offload_capa;
806 		uint64_t rxq_offload_capa;
807 
808 		rx_offload_capa = default_infos.rx_offload_capa;
809 		rxq_offload_capa = default_infos.rx_queue_offload_capa;
810 		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
811 			rte_eth_dev_info_get(PORT_ID(sdev),
812 					&PRIV(dev)->infos);
813 			rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
814 			rxq_offload_capa &=
815 					PRIV(dev)->infos.rx_queue_offload_capa;
816 		}
817 		sdev = TX_SUBDEV(dev);
818 		rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
819 		PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
820 		PRIV(dev)->infos.rx_queue_offload_capa = rxq_offload_capa;
821 		PRIV(dev)->infos.tx_offload_capa &=
822 					default_infos.tx_offload_capa;
823 		PRIV(dev)->infos.tx_queue_offload_capa &=
824 					default_infos.tx_queue_offload_capa;
825 		PRIV(dev)->infos.flow_type_rss_offloads &=
826 					default_infos.flow_type_rss_offloads;
827 	}
828 	rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
829 }
830 
831 static const uint32_t *
832 fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
833 {
834 	struct sub_device *sdev;
835 	struct rte_eth_dev *edev;
836 	const uint32_t *ret;
837 
838 	fs_lock(dev, 0);
839 	sdev = TX_SUBDEV(dev);
840 	if (sdev == NULL) {
841 		ret = NULL;
842 		goto unlock;
843 	}
844 	edev = ETH(sdev);
845 	/* ENOTSUP: counts as no supported ptypes */
846 	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL) {
847 		ret = NULL;
848 		goto unlock;
849 	}
850 	/*
851 	 * The API does not permit to do a clean AND of all ptypes,
852 	 * It is also incomplete by design and we do not really care
853 	 * to have a best possible value in this context.
854 	 * We just return the ptypes of the device of highest
855 	 * priority, usually the PREFERRED device.
856 	 */
857 	ret = SUBOPS(sdev, dev_supported_ptypes_get)(edev);
858 unlock:
859 	fs_unlock(dev, 0);
860 	return ret;
861 }
862 
863 static int
864 fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
865 {
866 	struct sub_device *sdev;
867 	uint8_t i;
868 	int ret;
869 
870 	fs_lock(dev, 0);
871 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
872 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
873 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
874 		if ((ret = fs_err(sdev, ret))) {
875 			ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
876 			      i, ret);
877 			fs_unlock(dev, 0);
878 			return ret;
879 		}
880 	}
881 	fs_unlock(dev, 0);
882 	return 0;
883 }
884 
885 static int
886 fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
887 {
888 	struct sub_device *sdev;
889 	uint8_t i;
890 	int ret;
891 
892 	fs_lock(dev, 0);
893 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
894 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
895 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
896 		if ((ret = fs_err(sdev, ret))) {
897 			ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
898 			      " with error %d", i, ret);
899 			fs_unlock(dev, 0);
900 			return ret;
901 		}
902 	}
903 	fs_unlock(dev, 0);
904 	return 0;
905 }
906 
907 static int
908 fs_flow_ctrl_get(struct rte_eth_dev *dev,
909 		struct rte_eth_fc_conf *fc_conf)
910 {
911 	struct sub_device *sdev;
912 	int ret;
913 
914 	fs_lock(dev, 0);
915 	sdev = TX_SUBDEV(dev);
916 	if (sdev == NULL) {
917 		ret = 0;
918 		goto unlock;
919 	}
920 	if (SUBOPS(sdev, flow_ctrl_get) == NULL) {
921 		ret = -ENOTSUP;
922 		goto unlock;
923 	}
924 	ret = SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
925 unlock:
926 	fs_unlock(dev, 0);
927 	return ret;
928 }
929 
930 static int
931 fs_flow_ctrl_set(struct rte_eth_dev *dev,
932 		struct rte_eth_fc_conf *fc_conf)
933 {
934 	struct sub_device *sdev;
935 	uint8_t i;
936 	int ret;
937 
938 	fs_lock(dev, 0);
939 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
940 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
941 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
942 		if ((ret = fs_err(sdev, ret))) {
943 			ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
944 			      " with error %d", i, ret);
945 			fs_unlock(dev, 0);
946 			return ret;
947 		}
948 	}
949 	fs_unlock(dev, 0);
950 	return 0;
951 }
952 
953 static void
954 fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
955 {
956 	struct sub_device *sdev;
957 	uint8_t i;
958 
959 	fs_lock(dev, 0);
960 	/* No check: already done within the rte_eth_dev_mac_addr_remove
961 	 * call for the fail-safe device.
962 	 */
963 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
964 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
965 				&dev->data->mac_addrs[index]);
966 	PRIV(dev)->mac_addr_pool[index] = 0;
967 	fs_unlock(dev, 0);
968 }
969 
970 static int
971 fs_mac_addr_add(struct rte_eth_dev *dev,
972 		struct ether_addr *mac_addr,
973 		uint32_t index,
974 		uint32_t vmdq)
975 {
976 	struct sub_device *sdev;
977 	int ret;
978 	uint8_t i;
979 
980 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
981 	fs_lock(dev, 0);
982 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
983 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
984 		if ((ret = fs_err(sdev, ret))) {
985 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
986 			      PRIu8 " with error %d", i, ret);
987 			fs_unlock(dev, 0);
988 			return ret;
989 		}
990 	}
991 	if (index >= PRIV(dev)->nb_mac_addr) {
992 		DEBUG("Growing mac_addrs array");
993 		PRIV(dev)->nb_mac_addr = index;
994 	}
995 	PRIV(dev)->mac_addr_pool[index] = vmdq;
996 	fs_unlock(dev, 0);
997 	return 0;
998 }
999 
1000 static void
1001 fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
1002 {
1003 	struct sub_device *sdev;
1004 	uint8_t i;
1005 
1006 	fs_lock(dev, 0);
1007 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
1008 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
1009 	fs_unlock(dev, 0);
1010 }
1011 
1012 static int
1013 fs_filter_ctrl(struct rte_eth_dev *dev,
1014 		enum rte_filter_type type,
1015 		enum rte_filter_op op,
1016 		void *arg)
1017 {
1018 	struct sub_device *sdev;
1019 	uint8_t i;
1020 	int ret;
1021 
1022 	if (type == RTE_ETH_FILTER_GENERIC &&
1023 	    op == RTE_ETH_FILTER_GET) {
1024 		*(const void **)arg = &fs_flow_ops;
1025 		return 0;
1026 	}
1027 	fs_lock(dev, 0);
1028 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
1029 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
1030 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
1031 		if ((ret = fs_err(sdev, ret))) {
1032 			ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
1033 			      " with error %d", i, ret);
1034 			fs_unlock(dev, 0);
1035 			return ret;
1036 		}
1037 	}
1038 	fs_unlock(dev, 0);
1039 	return 0;
1040 }
1041 
1042 const struct eth_dev_ops failsafe_ops = {
1043 	.dev_configure = fs_dev_configure,
1044 	.dev_start = fs_dev_start,
1045 	.dev_stop = fs_dev_stop,
1046 	.dev_set_link_down = fs_dev_set_link_down,
1047 	.dev_set_link_up = fs_dev_set_link_up,
1048 	.dev_close = fs_dev_close,
1049 	.promiscuous_enable = fs_promiscuous_enable,
1050 	.promiscuous_disable = fs_promiscuous_disable,
1051 	.allmulticast_enable = fs_allmulticast_enable,
1052 	.allmulticast_disable = fs_allmulticast_disable,
1053 	.link_update = fs_link_update,
1054 	.stats_get = fs_stats_get,
1055 	.stats_reset = fs_stats_reset,
1056 	.dev_infos_get = fs_dev_infos_get,
1057 	.dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
1058 	.mtu_set = fs_mtu_set,
1059 	.vlan_filter_set = fs_vlan_filter_set,
1060 	.rx_queue_setup = fs_rx_queue_setup,
1061 	.tx_queue_setup = fs_tx_queue_setup,
1062 	.rx_queue_release = fs_rx_queue_release,
1063 	.tx_queue_release = fs_tx_queue_release,
1064 	.rx_queue_intr_enable = fs_rx_intr_enable,
1065 	.rx_queue_intr_disable = fs_rx_intr_disable,
1066 	.flow_ctrl_get = fs_flow_ctrl_get,
1067 	.flow_ctrl_set = fs_flow_ctrl_set,
1068 	.mac_addr_remove = fs_mac_addr_remove,
1069 	.mac_addr_add = fs_mac_addr_add,
1070 	.mac_addr_set = fs_mac_addr_set,
1071 	.filter_ctrl = fs_filter_ctrl,
1072 };
1073