xref: /dpdk/drivers/net/failsafe/failsafe_ether.c (revision 55509e3a49fb28317c1e56a534cdcc4a3849df79)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_flow.h>
9 #include <rte_flow_driver.h>
10 #include <rte_cycles.h>
11 
12 #include "failsafe_private.h"
13 
14 /** Print a message out of a flow error. */
15 static int
16 fs_flow_complain(struct rte_flow_error *error)
17 {
18 	static const char *const errstrlist[] = {
19 		[RTE_FLOW_ERROR_TYPE_NONE] = "no error",
20 		[RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
21 		[RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
22 		[RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
23 		[RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
24 		[RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
25 		[RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
26 		[RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
27 		[RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
28 		[RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
29 		[RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
30 		[RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
31 	};
32 	const char *errstr;
33 	char buf[32];
34 	int err = rte_errno;
35 
36 	if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
37 			!errstrlist[error->type])
38 		errstr = "unknown type";
39 	else
40 		errstr = errstrlist[error->type];
41 	ERROR("Caught error type %d (%s): %s%s\n",
42 		error->type, errstr,
43 		error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
44 				error->cause), buf) : "",
45 		error->message ? error->message : "(no stated reason)");
46 	return -err;
47 }
48 
49 static int
50 eth_dev_flow_isolate_set(struct rte_eth_dev *dev,
51 			 struct sub_device *sdev)
52 {
53 	struct rte_flow_error ferror;
54 	int ret;
55 
56 	if (!PRIV(dev)->flow_isolated) {
57 		DEBUG("Flow isolation already disabled");
58 	} else {
59 		DEBUG("Enabling flow isolation");
60 		ret = rte_flow_isolate(PORT_ID(sdev),
61 				       PRIV(dev)->flow_isolated,
62 				       &ferror);
63 		if (ret) {
64 			fs_flow_complain(&ferror);
65 			return ret;
66 		}
67 	}
68 	return 0;
69 }
70 
71 static int
72 fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
73 		struct sub_device *sdev)
74 {
75 	struct rte_eth_dev *edev;
76 	struct rte_vlan_filter_conf *vfc1;
77 	struct rte_vlan_filter_conf *vfc2;
78 	struct rte_flow *flow;
79 	struct rte_flow_error ferror;
80 	uint32_t i;
81 	int ret;
82 
83 	edev = ETH(sdev);
84 	/* RX queue setup */
85 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
86 		struct rxq *rxq;
87 
88 		rxq = dev->data->rx_queues[i];
89 		ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
90 				rxq->info.nb_desc, rxq->socket_id,
91 				&rxq->info.conf, rxq->info.mp);
92 		if (ret) {
93 			ERROR("rx_queue_setup failed");
94 			return ret;
95 		}
96 	}
97 	/* TX queue setup */
98 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
99 		struct txq *txq;
100 
101 		txq = dev->data->tx_queues[i];
102 		ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
103 				txq->info.nb_desc, txq->socket_id,
104 				&txq->info.conf);
105 		if (ret) {
106 			ERROR("tx_queue_setup failed");
107 			return ret;
108 		}
109 	}
110 	/* dev_link.link_status */
111 	if (dev->data->dev_link.link_status !=
112 	    edev->data->dev_link.link_status) {
113 		DEBUG("Configuring link_status");
114 		if (dev->data->dev_link.link_status)
115 			ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
116 		else
117 			ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
118 		if (ret) {
119 			ERROR("Failed to apply link_status");
120 			return ret;
121 		}
122 	} else {
123 		DEBUG("link_status already set");
124 	}
125 	/* promiscuous */
126 	if (dev->data->promiscuous != edev->data->promiscuous) {
127 		DEBUG("Configuring promiscuous");
128 		if (dev->data->promiscuous)
129 			ret = rte_eth_promiscuous_enable(PORT_ID(sdev));
130 		else
131 			ret = rte_eth_promiscuous_disable(PORT_ID(sdev));
132 		if (ret != 0) {
133 			ERROR("Failed to apply promiscuous mode");
134 			return ret;
135 		}
136 	} else {
137 		DEBUG("promiscuous already set");
138 	}
139 	/* all_multicast */
140 	if (dev->data->all_multicast != edev->data->all_multicast) {
141 		DEBUG("Configuring all_multicast");
142 		if (dev->data->all_multicast)
143 			ret = rte_eth_allmulticast_enable(PORT_ID(sdev));
144 		else
145 			ret = rte_eth_allmulticast_disable(PORT_ID(sdev));
146 		if (ret != 0) {
147 			ERROR("Failed to apply allmulticast mode");
148 			return ret;
149 		}
150 	} else {
151 		DEBUG("all_multicast already set");
152 	}
153 	/* MTU */
154 	if (dev->data->mtu != edev->data->mtu) {
155 		DEBUG("Configuring MTU");
156 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
157 		if (ret) {
158 			ERROR("Failed to apply MTU");
159 			return ret;
160 		}
161 	} else {
162 		DEBUG("MTU already set");
163 	}
164 	/* default MAC */
165 	DEBUG("Configuring default MAC address");
166 	ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
167 			&dev->data->mac_addrs[0]);
168 	if (ret) {
169 		ERROR("Setting default MAC address failed");
170 		return ret;
171 	}
172 	/* additional MAC */
173 	if (PRIV(dev)->nb_mac_addr > 1)
174 		DEBUG("Configure additional MAC address%s",
175 			(PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
176 	for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
177 		struct rte_ether_addr *ea;
178 
179 		ea = &dev->data->mac_addrs[i];
180 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
181 				PRIV(dev)->mac_addr_pool[i]);
182 		if (ret) {
183 			char ea_fmt[RTE_ETHER_ADDR_FMT_SIZE];
184 
185 			rte_ether_format_addr(ea_fmt,
186 					RTE_ETHER_ADDR_FMT_SIZE, ea);
187 			ERROR("Adding MAC address %s failed", ea_fmt);
188 			return ret;
189 		}
190 	}
191 	/*
192 	 * Propagate multicast MAC addresses to sub-devices,
193 	 * if non zero number of addresses is set.
194 	 * The condition is required to avoid breakage of failsafe
195 	 * for sub-devices which do not support the operation
196 	 * if the feature is really not used.
197 	 */
198 	if (PRIV(dev)->nb_mcast_addr > 0) {
199 		DEBUG("Configuring multicast MAC addresses");
200 		ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
201 						   PRIV(dev)->mcast_addrs,
202 						   PRIV(dev)->nb_mcast_addr);
203 		if (ret) {
204 			ERROR("Failed to apply multicast MAC addresses");
205 			return ret;
206 		}
207 	}
208 	/* VLAN filter */
209 	vfc1 = &dev->data->vlan_filter_conf;
210 	vfc2 = &edev->data->vlan_filter_conf;
211 	if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
212 		uint64_t vbit;
213 		uint64_t ids;
214 		size_t i;
215 		uint16_t vlan_id;
216 
217 		DEBUG("Configuring VLAN filter");
218 		for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
219 			if (vfc1->ids[i] == 0)
220 				continue;
221 			ids = vfc1->ids[i];
222 			while (ids) {
223 				vlan_id = 64 * i;
224 				/* count trailing zeroes */
225 				vbit = ~ids & (ids - 1);
226 				/* clear least significant bit set */
227 				ids ^= (ids ^ (ids - 1)) ^ vbit;
228 				for (; vbit; vlan_id++)
229 					vbit >>= 1;
230 				ret = rte_eth_dev_vlan_filter(
231 					PORT_ID(sdev), vlan_id, 1);
232 				if (ret) {
233 					ERROR("Failed to apply VLAN filter %hu",
234 						vlan_id);
235 					return ret;
236 				}
237 			}
238 		}
239 	} else {
240 		DEBUG("VLAN filter already set");
241 	}
242 	/* rte_flow */
243 	if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
244 		DEBUG("rte_flow already set");
245 	} else {
246 		DEBUG("Resetting rte_flow configuration");
247 		ret = rte_flow_flush(PORT_ID(sdev), &ferror);
248 		if (ret) {
249 			fs_flow_complain(&ferror);
250 			return ret;
251 		}
252 		i = 0;
253 		rte_errno = 0;
254 		DEBUG("Configuring rte_flow");
255 		TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
256 			DEBUG("Creating flow #%" PRIu32, i++);
257 			flow->flows[SUB_ID(sdev)] =
258 				rte_flow_create(PORT_ID(sdev),
259 						flow->rule.attr,
260 						flow->rule.pattern,
261 						flow->rule.actions,
262 						&ferror);
263 			ret = rte_errno;
264 			if (ret)
265 				break;
266 		}
267 		if (ret) {
268 			fs_flow_complain(&ferror);
269 			return ret;
270 		}
271 	}
272 	return 0;
273 }
274 
275 static void
276 fs_dev_remove(struct sub_device *sdev)
277 {
278 	int ret;
279 
280 	if (sdev == NULL)
281 		return;
282 	switch (sdev->state) {
283 	case DEV_STARTED:
284 		failsafe_rx_intr_uninstall_subdevice(sdev);
285 		rte_eth_dev_stop(PORT_ID(sdev));
286 		sdev->state = DEV_ACTIVE;
287 		/* fallthrough */
288 	case DEV_ACTIVE:
289 		failsafe_eth_dev_unregister_callbacks(sdev);
290 		rte_eth_dev_close(PORT_ID(sdev));
291 		sdev->state = DEV_PROBED;
292 		/* fallthrough */
293 	case DEV_PROBED:
294 		ret = rte_dev_remove(sdev->dev);
295 		if (ret < 0) {
296 			ERROR("Bus detach failed for sub_device %u",
297 			      SUB_ID(sdev));
298 		} else {
299 			rte_eth_dev_release_port(ETH(sdev));
300 		}
301 		sdev->state = DEV_PARSED;
302 		/* fallthrough */
303 	case DEV_PARSED:
304 	case DEV_UNDEFINED:
305 		sdev->state = DEV_UNDEFINED;
306 		sdev->sdev_port_id = RTE_MAX_ETHPORTS;
307 		/* the end */
308 		break;
309 	}
310 	sdev->remove = 0;
311 	failsafe_hotplug_alarm_install(fs_dev(sdev));
312 }
313 
314 static void
315 fs_dev_stats_save(struct sub_device *sdev)
316 {
317 	struct rte_eth_stats stats;
318 	int err;
319 
320 	/* Attempt to read current stats. */
321 	err = rte_eth_stats_get(PORT_ID(sdev), &stats);
322 	if (err) {
323 		uint64_t timestamp = sdev->stats_snapshot.timestamp;
324 
325 		WARN("Could not access latest statistics from sub-device %d.",
326 			 SUB_ID(sdev));
327 		if (timestamp != 0)
328 			WARN("Using latest snapshot taken before %"PRIu64" seconds.",
329 				 (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
330 	}
331 	failsafe_stats_increment
332 		(&PRIV(fs_dev(sdev))->stats_accumulator,
333 		err ? &sdev->stats_snapshot.stats : &stats);
334 	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
335 }
336 
337 static inline int
338 fs_rxtx_clean(struct sub_device *sdev)
339 {
340 	uint16_t i;
341 
342 	for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
343 		if (FS_ATOMIC_RX(sdev, i))
344 			return 0;
345 	for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
346 		if (FS_ATOMIC_TX(sdev, i))
347 			return 0;
348 	return 1;
349 }
350 
351 void
352 failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev)
353 {
354 	int ret;
355 
356 	if (sdev == NULL)
357 		return;
358 	if (sdev->rmv_callback) {
359 		ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
360 						RTE_ETH_EVENT_INTR_RMV,
361 						failsafe_eth_rmv_event_callback,
362 						sdev);
363 		if (ret)
364 			WARN("Failed to unregister RMV callback for sub_device"
365 			     " %d", SUB_ID(sdev));
366 		sdev->rmv_callback = 0;
367 	}
368 	if (sdev->lsc_callback) {
369 		ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
370 						RTE_ETH_EVENT_INTR_LSC,
371 						failsafe_eth_lsc_event_callback,
372 						sdev);
373 		if (ret)
374 			WARN("Failed to unregister LSC callback for sub_device"
375 			     " %d", SUB_ID(sdev));
376 		sdev->lsc_callback = 0;
377 	}
378 }
379 
380 void
381 failsafe_dev_remove(struct rte_eth_dev *dev)
382 {
383 	struct sub_device *sdev;
384 	uint8_t i;
385 
386 	FOREACH_SUBDEV(sdev, i, dev) {
387 		if (!sdev->remove)
388 			continue;
389 
390 		/* Active devices must have finished their burst and
391 		 * their stats must be saved.
392 		 */
393 		if (sdev->state >= DEV_ACTIVE &&
394 		    fs_rxtx_clean(sdev) == 0)
395 			continue;
396 		if (fs_lock(dev, 1) != 0)
397 			return;
398 		if (sdev->state >= DEV_ACTIVE)
399 			fs_dev_stats_save(sdev);
400 		fs_dev_remove(sdev);
401 		fs_unlock(dev, 1);
402 	}
403 }
404 
405 static int
406 failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev)
407 {
408 	struct rxq *rxq;
409 	int ret;
410 	uint16_t i;
411 
412 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
413 		rxq = dev->data->rx_queues[i];
414 
415 		if (rxq->info.conf.rx_deferred_start &&
416 		    dev->data->rx_queue_state[i] ==
417 						RTE_ETH_QUEUE_STATE_STARTED) {
418 			/*
419 			 * The subdevice Rx queue does not launch on device
420 			 * start if deferred start flag is set. It needs to be
421 			 * started manually in case an appropriate failsafe Rx
422 			 * queue has been started earlier.
423 			 */
424 			ret = dev->dev_ops->rx_queue_start(dev, i);
425 			if (ret) {
426 				ERROR("Could not synchronize Rx queue %d", i);
427 				return ret;
428 			}
429 		} else if (dev->data->rx_queue_state[i] ==
430 						RTE_ETH_QUEUE_STATE_STOPPED) {
431 			/*
432 			 * The subdevice Rx queue needs to be stopped manually
433 			 * in case an appropriate failsafe Rx queue has been
434 			 * stopped earlier.
435 			 */
436 			ret = dev->dev_ops->rx_queue_stop(dev, i);
437 			if (ret) {
438 				ERROR("Could not synchronize Rx queue %d", i);
439 				return ret;
440 			}
441 		}
442 	}
443 	return 0;
444 }
445 
446 static int
447 failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev)
448 {
449 	struct txq *txq;
450 	int ret;
451 	uint16_t i;
452 
453 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
454 		txq = dev->data->tx_queues[i];
455 
456 		if (txq->info.conf.tx_deferred_start &&
457 		    dev->data->tx_queue_state[i] ==
458 						RTE_ETH_QUEUE_STATE_STARTED) {
459 			/*
460 			 * The subdevice Tx queue does not launch on device
461 			 * start if deferred start flag is set. It needs to be
462 			 * started manually in case an appropriate failsafe Tx
463 			 * queue has been started earlier.
464 			 */
465 			ret = dev->dev_ops->tx_queue_start(dev, i);
466 			if (ret) {
467 				ERROR("Could not synchronize Tx queue %d", i);
468 				return ret;
469 			}
470 		} else if (dev->data->tx_queue_state[i] ==
471 						RTE_ETH_QUEUE_STATE_STOPPED) {
472 			/*
473 			 * The subdevice Tx queue needs to be stopped manually
474 			 * in case an appropriate failsafe Tx queue has been
475 			 * stopped earlier.
476 			 */
477 			ret = dev->dev_ops->tx_queue_stop(dev, i);
478 			if (ret) {
479 				ERROR("Could not synchronize Tx queue %d", i);
480 				return ret;
481 			}
482 		}
483 	}
484 	return 0;
485 }
486 
487 int
488 failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
489 {
490 	struct sub_device *sdev;
491 	uint32_t inactive;
492 	int ret;
493 	uint8_t i;
494 
495 	if (PRIV(dev)->state < DEV_PARSED)
496 		return 0;
497 
498 	ret = failsafe_args_parse_subs(dev);
499 	if (ret)
500 		goto err_remove;
501 
502 	if (PRIV(dev)->state < DEV_PROBED)
503 		return 0;
504 	ret = failsafe_eal_init(dev);
505 	if (ret)
506 		goto err_remove;
507 	if (PRIV(dev)->state < DEV_ACTIVE)
508 		return 0;
509 	inactive = 0;
510 	FOREACH_SUBDEV(sdev, i, dev) {
511 		if (sdev->state == DEV_PROBED) {
512 			inactive |= UINT32_C(1) << i;
513 			ret = eth_dev_flow_isolate_set(dev, sdev);
514 			if (ret) {
515 				ERROR("Could not apply configuration to sub_device %d",
516 				      i);
517 				goto err_remove;
518 			}
519 		}
520 	}
521 	ret = dev->dev_ops->dev_configure(dev);
522 	if (ret)
523 		goto err_remove;
524 	FOREACH_SUBDEV(sdev, i, dev) {
525 		if (inactive & (UINT32_C(1) << i)) {
526 			ret = fs_eth_dev_conf_apply(dev, sdev);
527 			if (ret) {
528 				ERROR("Could not apply configuration to sub_device %d",
529 				      i);
530 				goto err_remove;
531 			}
532 		}
533 	}
534 	/*
535 	 * If new devices have been configured, check if
536 	 * the link state has changed.
537 	 */
538 	if (inactive)
539 		dev->dev_ops->link_update(dev, 1);
540 	if (PRIV(dev)->state < DEV_STARTED)
541 		return 0;
542 	ret = dev->dev_ops->dev_start(dev);
543 	if (ret)
544 		goto err_remove;
545 	ret = failsafe_eth_dev_rx_queues_sync(dev);
546 	if (ret)
547 		goto err_remove;
548 	ret = failsafe_eth_dev_tx_queues_sync(dev);
549 	if (ret)
550 		goto err_remove;
551 	return 0;
552 err_remove:
553 	FOREACH_SUBDEV(sdev, i, dev)
554 		if (sdev->state != PRIV(dev)->state)
555 			sdev->remove = 1;
556 	return ret;
557 }
558 
559 void
560 failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from)
561 {
562 	uint32_t i;
563 
564 	RTE_ASSERT(to != NULL && from != NULL);
565 	to->ipackets += from->ipackets;
566 	to->opackets += from->opackets;
567 	to->ibytes += from->ibytes;
568 	to->obytes += from->obytes;
569 	to->imissed += from->imissed;
570 	to->ierrors += from->ierrors;
571 	to->oerrors += from->oerrors;
572 	to->rx_nombuf += from->rx_nombuf;
573 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
574 		to->q_ipackets[i] += from->q_ipackets[i];
575 		to->q_opackets[i] += from->q_opackets[i];
576 		to->q_ibytes[i] += from->q_ibytes[i];
577 		to->q_obytes[i] += from->q_obytes[i];
578 		to->q_errors[i] += from->q_errors[i];
579 	}
580 }
581 
582 int
583 failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
584 				enum rte_eth_event_type event __rte_unused,
585 				void *cb_arg, void *out __rte_unused)
586 {
587 	struct sub_device *sdev = cb_arg;
588 
589 	fs_lock(fs_dev(sdev), 0);
590 	/* Switch as soon as possible tx_dev. */
591 	fs_switch_dev(fs_dev(sdev), sdev);
592 	/* Use safe bursts in any case. */
593 	failsafe_set_burst_fn(fs_dev(sdev), 1);
594 	/*
595 	 * Async removal, the sub-PMD will try to unregister
596 	 * the callback at the source of the current thread context.
597 	 */
598 	sdev->remove = 1;
599 	fs_unlock(fs_dev(sdev), 0);
600 	return 0;
601 }
602 
603 int
604 failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
605 				enum rte_eth_event_type event __rte_unused,
606 				void *cb_arg, void *out __rte_unused)
607 {
608 	struct rte_eth_dev *dev = cb_arg;
609 	int ret;
610 
611 	ret = dev->dev_ops->link_update(dev, 0);
612 	/* We must pass on the LSC event */
613 	if (ret)
614 		return rte_eth_dev_callback_process(dev,
615 						    RTE_ETH_EVENT_INTR_LSC,
616 						    NULL);
617 	else
618 		return 0;
619 }
620 
621 /* Take sub-device ownership before it becomes exposed to the application. */
622 int
623 failsafe_eth_new_event_callback(uint16_t port_id,
624 				enum rte_eth_event_type event __rte_unused,
625 				void *cb_arg, void *out __rte_unused)
626 {
627 	struct rte_eth_dev *fs_dev = cb_arg;
628 	struct sub_device *sdev;
629 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
630 	uint8_t i;
631 
632 	FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
633 		if (sdev->state >= DEV_PROBED)
634 			continue;
635 		if (dev->device == NULL) {
636 			WARN("Trying to probe malformed device %s.\n",
637 			     sdev->devargs.name);
638 			continue;
639 		}
640 		if (strcmp(sdev->devargs.name, dev->device->name) != 0)
641 			continue;
642 		rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
643 		/* The actual owner will be checked after the port probing. */
644 		break;
645 	}
646 	return 0;
647 }
648