xref: /dpdk/drivers/net/failsafe/failsafe_ether.c (revision 2d0c29a37a9c080c1cccb1ad7941aba2ccf5437e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_flow.h>
9 #include <rte_flow_driver.h>
10 #include <rte_cycles.h>
11 
12 #include "failsafe_private.h"
13 
14 /** Print a message out of a flow error. */
15 static int
16 fs_flow_complain(struct rte_flow_error *error)
17 {
18 	static const char *const errstrlist[] = {
19 		[RTE_FLOW_ERROR_TYPE_NONE] = "no error",
20 		[RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
21 		[RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
22 		[RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
23 		[RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
24 		[RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
25 		[RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
26 		[RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
27 		[RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
28 		[RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
29 		[RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
30 		[RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
31 	};
32 	const char *errstr;
33 	char buf[32];
34 	int err = rte_errno;
35 
36 	if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
37 			!errstrlist[error->type])
38 		errstr = "unknown type";
39 	else
40 		errstr = errstrlist[error->type];
41 	ERROR("Caught error type %d (%s): %s%s\n",
42 		error->type, errstr,
43 		error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
44 				error->cause), buf) : "",
45 		error->message ? error->message : "(no stated reason)");
46 	return -err;
47 }
48 
49 static int
50 eth_dev_flow_isolate_set(struct rte_eth_dev *dev,
51 			 struct sub_device *sdev)
52 {
53 	struct rte_flow_error ferror;
54 	int ret;
55 
56 	if (!PRIV(dev)->flow_isolated) {
57 		DEBUG("Flow isolation already disabled");
58 	} else {
59 		DEBUG("Enabling flow isolation");
60 		ret = rte_flow_isolate(PORT_ID(sdev),
61 				       PRIV(dev)->flow_isolated,
62 				       &ferror);
63 		if (ret) {
64 			fs_flow_complain(&ferror);
65 			return ret;
66 		}
67 	}
68 	return 0;
69 }
70 
71 static int
72 fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
73 		struct sub_device *sdev)
74 {
75 	struct rte_eth_dev *edev;
76 	struct rte_vlan_filter_conf *vfc1;
77 	struct rte_vlan_filter_conf *vfc2;
78 	struct rte_flow *flow;
79 	struct rte_flow_error ferror;
80 	uint32_t i;
81 	int ret;
82 
83 	edev = ETH(sdev);
84 	/* RX queue setup */
85 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
86 		struct rxq *rxq;
87 
88 		rxq = dev->data->rx_queues[i];
89 		ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
90 				rxq->info.nb_desc, rxq->socket_id,
91 				&rxq->info.conf, rxq->info.mp);
92 		if (ret) {
93 			ERROR("rx_queue_setup failed");
94 			return ret;
95 		}
96 	}
97 	/* TX queue setup */
98 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
99 		struct txq *txq;
100 
101 		txq = dev->data->tx_queues[i];
102 		ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
103 				txq->info.nb_desc, txq->socket_id,
104 				&txq->info.conf);
105 		if (ret) {
106 			ERROR("tx_queue_setup failed");
107 			return ret;
108 		}
109 	}
110 	/* dev_link.link_status */
111 	if (dev->data->dev_link.link_status !=
112 	    edev->data->dev_link.link_status) {
113 		DEBUG("Configuring link_status");
114 		if (dev->data->dev_link.link_status)
115 			ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
116 		else
117 			ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
118 		if (ret) {
119 			ERROR("Failed to apply link_status");
120 			return ret;
121 		}
122 	} else {
123 		DEBUG("link_status already set");
124 	}
125 	/* promiscuous */
126 	if (dev->data->promiscuous != edev->data->promiscuous) {
127 		DEBUG("Configuring promiscuous");
128 		if (dev->data->promiscuous)
129 			rte_eth_promiscuous_enable(PORT_ID(sdev));
130 		else
131 			rte_eth_promiscuous_disable(PORT_ID(sdev));
132 	} else {
133 		DEBUG("promiscuous already set");
134 	}
135 	/* all_multicast */
136 	if (dev->data->all_multicast != edev->data->all_multicast) {
137 		DEBUG("Configuring all_multicast");
138 		if (dev->data->all_multicast)
139 			rte_eth_allmulticast_enable(PORT_ID(sdev));
140 		else
141 			rte_eth_allmulticast_disable(PORT_ID(sdev));
142 	} else {
143 		DEBUG("all_multicast already set");
144 	}
145 	/* MTU */
146 	if (dev->data->mtu != edev->data->mtu) {
147 		DEBUG("Configuring MTU");
148 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
149 		if (ret) {
150 			ERROR("Failed to apply MTU");
151 			return ret;
152 		}
153 	} else {
154 		DEBUG("MTU already set");
155 	}
156 	/* default MAC */
157 	DEBUG("Configuring default MAC address");
158 	ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
159 			&dev->data->mac_addrs[0]);
160 	if (ret) {
161 		ERROR("Setting default MAC address failed");
162 		return ret;
163 	}
164 	/* additional MAC */
165 	if (PRIV(dev)->nb_mac_addr > 1)
166 		DEBUG("Configure additional MAC address%s",
167 			(PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
168 	for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
169 		struct ether_addr *ea;
170 
171 		ea = &dev->data->mac_addrs[i];
172 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
173 				PRIV(dev)->mac_addr_pool[i]);
174 		if (ret) {
175 			char ea_fmt[ETHER_ADDR_FMT_SIZE];
176 
177 			ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
178 			ERROR("Adding MAC address %s failed", ea_fmt);
179 			return ret;
180 		}
181 	}
182 	/*
183 	 * Propagate multicast MAC addresses to sub-devices,
184 	 * if non zero number of addresses is set.
185 	 * The condition is required to avoid breakage of failsafe
186 	 * for sub-devices which do not support the operation
187 	 * if the feature is really not used.
188 	 */
189 	if (PRIV(dev)->nb_mcast_addr > 0) {
190 		DEBUG("Configuring multicast MAC addresses");
191 		ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
192 						   PRIV(dev)->mcast_addrs,
193 						   PRIV(dev)->nb_mcast_addr);
194 		if (ret) {
195 			ERROR("Failed to apply multicast MAC addresses");
196 			return ret;
197 		}
198 	}
199 	/* VLAN filter */
200 	vfc1 = &dev->data->vlan_filter_conf;
201 	vfc2 = &edev->data->vlan_filter_conf;
202 	if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
203 		uint64_t vbit;
204 		uint64_t ids;
205 		size_t i;
206 		uint16_t vlan_id;
207 
208 		DEBUG("Configuring VLAN filter");
209 		for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
210 			if (vfc1->ids[i] == 0)
211 				continue;
212 			ids = vfc1->ids[i];
213 			while (ids) {
214 				vlan_id = 64 * i;
215 				/* count trailing zeroes */
216 				vbit = ~ids & (ids - 1);
217 				/* clear least significant bit set */
218 				ids ^= (ids ^ (ids - 1)) ^ vbit;
219 				for (; vbit; vlan_id++)
220 					vbit >>= 1;
221 				ret = rte_eth_dev_vlan_filter(
222 					PORT_ID(sdev), vlan_id, 1);
223 				if (ret) {
224 					ERROR("Failed to apply VLAN filter %hu",
225 						vlan_id);
226 					return ret;
227 				}
228 			}
229 		}
230 	} else {
231 		DEBUG("VLAN filter already set");
232 	}
233 	/* rte_flow */
234 	if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
235 		DEBUG("rte_flow already set");
236 	} else {
237 		DEBUG("Resetting rte_flow configuration");
238 		ret = rte_flow_flush(PORT_ID(sdev), &ferror);
239 		if (ret) {
240 			fs_flow_complain(&ferror);
241 			return ret;
242 		}
243 		i = 0;
244 		rte_errno = 0;
245 		DEBUG("Configuring rte_flow");
246 		TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
247 			DEBUG("Creating flow #%" PRIu32, i++);
248 			flow->flows[SUB_ID(sdev)] =
249 				rte_flow_create(PORT_ID(sdev),
250 						flow->rule.attr,
251 						flow->rule.pattern,
252 						flow->rule.actions,
253 						&ferror);
254 			ret = rte_errno;
255 			if (ret)
256 				break;
257 		}
258 		if (ret) {
259 			fs_flow_complain(&ferror);
260 			return ret;
261 		}
262 	}
263 	return 0;
264 }
265 
266 static void
267 fs_dev_remove(struct sub_device *sdev)
268 {
269 	int ret;
270 
271 	if (sdev == NULL)
272 		return;
273 	switch (sdev->state) {
274 	case DEV_STARTED:
275 		failsafe_rx_intr_uninstall_subdevice(sdev);
276 		rte_eth_dev_stop(PORT_ID(sdev));
277 		sdev->state = DEV_ACTIVE;
278 		/* fallthrough */
279 	case DEV_ACTIVE:
280 		failsafe_eth_dev_unregister_callbacks(sdev);
281 		rte_eth_dev_close(PORT_ID(sdev));
282 		sdev->state = DEV_PROBED;
283 		/* fallthrough */
284 	case DEV_PROBED:
285 		ret = rte_dev_remove(sdev->dev);
286 		if (ret) {
287 			ERROR("Bus detach failed for sub_device %u",
288 			      SUB_ID(sdev));
289 		} else {
290 			rte_eth_dev_release_port(ETH(sdev));
291 		}
292 		sdev->state = DEV_PARSED;
293 		/* fallthrough */
294 	case DEV_PARSED:
295 	case DEV_UNDEFINED:
296 		sdev->state = DEV_UNDEFINED;
297 		sdev->sdev_port_id = RTE_MAX_ETHPORTS;
298 		/* the end */
299 		break;
300 	}
301 	sdev->remove = 0;
302 	failsafe_hotplug_alarm_install(fs_dev(sdev));
303 }
304 
305 static void
306 fs_dev_stats_save(struct sub_device *sdev)
307 {
308 	struct rte_eth_stats stats;
309 	int err;
310 
311 	/* Attempt to read current stats. */
312 	err = rte_eth_stats_get(PORT_ID(sdev), &stats);
313 	if (err) {
314 		uint64_t timestamp = sdev->stats_snapshot.timestamp;
315 
316 		WARN("Could not access latest statistics from sub-device %d.\n",
317 			 SUB_ID(sdev));
318 		if (timestamp != 0)
319 			WARN("Using latest snapshot taken before %"PRIu64" seconds.\n",
320 				 (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
321 	}
322 	failsafe_stats_increment
323 		(&PRIV(fs_dev(sdev))->stats_accumulator,
324 		err ? &sdev->stats_snapshot.stats : &stats);
325 	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
326 }
327 
328 static inline int
329 fs_rxtx_clean(struct sub_device *sdev)
330 {
331 	uint16_t i;
332 
333 	for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
334 		if (FS_ATOMIC_RX(sdev, i))
335 			return 0;
336 	for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
337 		if (FS_ATOMIC_TX(sdev, i))
338 			return 0;
339 	return 1;
340 }
341 
342 void
343 failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev)
344 {
345 	int ret;
346 
347 	if (sdev == NULL)
348 		return;
349 	if (sdev->rmv_callback) {
350 		ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
351 						RTE_ETH_EVENT_INTR_RMV,
352 						failsafe_eth_rmv_event_callback,
353 						sdev);
354 		if (ret)
355 			WARN("Failed to unregister RMV callback for sub_device"
356 			     " %d", SUB_ID(sdev));
357 		sdev->rmv_callback = 0;
358 	}
359 	if (sdev->lsc_callback) {
360 		ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
361 						RTE_ETH_EVENT_INTR_LSC,
362 						failsafe_eth_lsc_event_callback,
363 						sdev);
364 		if (ret)
365 			WARN("Failed to unregister LSC callback for sub_device"
366 			     " %d", SUB_ID(sdev));
367 		sdev->lsc_callback = 0;
368 	}
369 }
370 
371 void
372 failsafe_dev_remove(struct rte_eth_dev *dev)
373 {
374 	struct sub_device *sdev;
375 	uint8_t i;
376 
377 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
378 		if (sdev->remove && fs_rxtx_clean(sdev)) {
379 			if (fs_lock(dev, 1) != 0)
380 				return;
381 			fs_dev_stats_save(sdev);
382 			fs_dev_remove(sdev);
383 			fs_unlock(dev, 1);
384 		}
385 }
386 
387 static int
388 failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev)
389 {
390 	struct rxq *rxq;
391 	int ret;
392 	uint16_t i;
393 
394 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
395 		rxq = dev->data->rx_queues[i];
396 
397 		if (rxq->info.conf.rx_deferred_start &&
398 		    dev->data->rx_queue_state[i] ==
399 						RTE_ETH_QUEUE_STATE_STARTED) {
400 			/*
401 			 * The subdevice Rx queue does not launch on device
402 			 * start if deferred start flag is set. It needs to be
403 			 * started manually in case an appropriate failsafe Rx
404 			 * queue has been started earlier.
405 			 */
406 			ret = dev->dev_ops->rx_queue_start(dev, i);
407 			if (ret) {
408 				ERROR("Could not synchronize Rx queue %d", i);
409 				return ret;
410 			}
411 		} else if (dev->data->rx_queue_state[i] ==
412 						RTE_ETH_QUEUE_STATE_STOPPED) {
413 			/*
414 			 * The subdevice Rx queue needs to be stopped manually
415 			 * in case an appropriate failsafe Rx queue has been
416 			 * stopped earlier.
417 			 */
418 			ret = dev->dev_ops->rx_queue_stop(dev, i);
419 			if (ret) {
420 				ERROR("Could not synchronize Rx queue %d", i);
421 				return ret;
422 			}
423 		}
424 	}
425 	return 0;
426 }
427 
428 static int
429 failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev)
430 {
431 	struct txq *txq;
432 	int ret;
433 	uint16_t i;
434 
435 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
436 		txq = dev->data->tx_queues[i];
437 
438 		if (txq->info.conf.tx_deferred_start &&
439 		    dev->data->tx_queue_state[i] ==
440 						RTE_ETH_QUEUE_STATE_STARTED) {
441 			/*
442 			 * The subdevice Tx queue does not launch on device
443 			 * start if deferred start flag is set. It needs to be
444 			 * started manually in case an appropriate failsafe Tx
445 			 * queue has been started earlier.
446 			 */
447 			ret = dev->dev_ops->tx_queue_start(dev, i);
448 			if (ret) {
449 				ERROR("Could not synchronize Tx queue %d", i);
450 				return ret;
451 			}
452 		} else if (dev->data->tx_queue_state[i] ==
453 						RTE_ETH_QUEUE_STATE_STOPPED) {
454 			/*
455 			 * The subdevice Tx queue needs to be stopped manually
456 			 * in case an appropriate failsafe Tx queue has been
457 			 * stopped earlier.
458 			 */
459 			ret = dev->dev_ops->tx_queue_stop(dev, i);
460 			if (ret) {
461 				ERROR("Could not synchronize Tx queue %d", i);
462 				return ret;
463 			}
464 		}
465 	}
466 	return 0;
467 }
468 
469 int
470 failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
471 {
472 	struct sub_device *sdev;
473 	uint32_t inactive;
474 	int ret;
475 	uint8_t i;
476 
477 	if (PRIV(dev)->state < DEV_PARSED)
478 		return 0;
479 
480 	ret = failsafe_args_parse_subs(dev);
481 	if (ret)
482 		goto err_remove;
483 
484 	if (PRIV(dev)->state < DEV_PROBED)
485 		return 0;
486 	ret = failsafe_eal_init(dev);
487 	if (ret)
488 		goto err_remove;
489 	if (PRIV(dev)->state < DEV_ACTIVE)
490 		return 0;
491 	inactive = 0;
492 	FOREACH_SUBDEV(sdev, i, dev) {
493 		if (sdev->state == DEV_PROBED) {
494 			inactive |= UINT32_C(1) << i;
495 			ret = eth_dev_flow_isolate_set(dev, sdev);
496 			if (ret) {
497 				ERROR("Could not apply configuration to sub_device %d",
498 				      i);
499 				goto err_remove;
500 			}
501 		}
502 	}
503 	ret = dev->dev_ops->dev_configure(dev);
504 	if (ret)
505 		goto err_remove;
506 	FOREACH_SUBDEV(sdev, i, dev) {
507 		if (inactive & (UINT32_C(1) << i)) {
508 			ret = fs_eth_dev_conf_apply(dev, sdev);
509 			if (ret) {
510 				ERROR("Could not apply configuration to sub_device %d",
511 				      i);
512 				goto err_remove;
513 			}
514 		}
515 	}
516 	/*
517 	 * If new devices have been configured, check if
518 	 * the link state has changed.
519 	 */
520 	if (inactive)
521 		dev->dev_ops->link_update(dev, 1);
522 	if (PRIV(dev)->state < DEV_STARTED)
523 		return 0;
524 	ret = dev->dev_ops->dev_start(dev);
525 	if (ret)
526 		goto err_remove;
527 	ret = failsafe_eth_dev_rx_queues_sync(dev);
528 	if (ret)
529 		goto err_remove;
530 	ret = failsafe_eth_dev_tx_queues_sync(dev);
531 	if (ret)
532 		goto err_remove;
533 	return 0;
534 err_remove:
535 	FOREACH_SUBDEV(sdev, i, dev)
536 		if (sdev->state != PRIV(dev)->state)
537 			sdev->remove = 1;
538 	return ret;
539 }
540 
541 void
542 failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from)
543 {
544 	uint32_t i;
545 
546 	RTE_ASSERT(to != NULL && from != NULL);
547 	to->ipackets += from->ipackets;
548 	to->opackets += from->opackets;
549 	to->ibytes += from->ibytes;
550 	to->obytes += from->obytes;
551 	to->imissed += from->imissed;
552 	to->ierrors += from->ierrors;
553 	to->oerrors += from->oerrors;
554 	to->rx_nombuf += from->rx_nombuf;
555 	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
556 		to->q_ipackets[i] += from->q_ipackets[i];
557 		to->q_opackets[i] += from->q_opackets[i];
558 		to->q_ibytes[i] += from->q_ibytes[i];
559 		to->q_obytes[i] += from->q_obytes[i];
560 		to->q_errors[i] += from->q_errors[i];
561 	}
562 }
563 
564 int
565 failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
566 				enum rte_eth_event_type event __rte_unused,
567 				void *cb_arg, void *out __rte_unused)
568 {
569 	struct sub_device *sdev = cb_arg;
570 
571 	fs_lock(fs_dev(sdev), 0);
572 	/* Switch as soon as possible tx_dev. */
573 	fs_switch_dev(fs_dev(sdev), sdev);
574 	/* Use safe bursts in any case. */
575 	failsafe_set_burst_fn(fs_dev(sdev), 1);
576 	/*
577 	 * Async removal, the sub-PMD will try to unregister
578 	 * the callback at the source of the current thread context.
579 	 */
580 	sdev->remove = 1;
581 	fs_unlock(fs_dev(sdev), 0);
582 	return 0;
583 }
584 
585 int
586 failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
587 				enum rte_eth_event_type event __rte_unused,
588 				void *cb_arg, void *out __rte_unused)
589 {
590 	struct rte_eth_dev *dev = cb_arg;
591 	int ret;
592 
593 	ret = dev->dev_ops->link_update(dev, 0);
594 	/* We must pass on the LSC event */
595 	if (ret)
596 		return _rte_eth_dev_callback_process(dev,
597 						     RTE_ETH_EVENT_INTR_LSC,
598 						     NULL);
599 	else
600 		return 0;
601 }
602 
603 /* Take sub-device ownership before it becomes exposed to the application. */
604 int
605 failsafe_eth_new_event_callback(uint16_t port_id,
606 				enum rte_eth_event_type event __rte_unused,
607 				void *cb_arg, void *out __rte_unused)
608 {
609 	struct rte_eth_dev *fs_dev = cb_arg;
610 	struct sub_device *sdev;
611 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
612 	uint8_t i;
613 
614 	FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
615 		if (sdev->state >= DEV_PROBED)
616 			continue;
617 		if (strcmp(sdev->devargs.name, dev->device->name) != 0)
618 			continue;
619 		rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
620 		/* The actual owner will be checked after the port probing. */
621 		break;
622 	}
623 	return 0;
624 }
625