1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_flow.h> 9 #include <rte_flow_driver.h> 10 #include <rte_cycles.h> 11 12 #include "failsafe_private.h" 13 14 /** Print a message out of a flow error. */ 15 static int 16 fs_flow_complain(struct rte_flow_error *error) 17 { 18 static const char *const errstrlist[] = { 19 [RTE_FLOW_ERROR_TYPE_NONE] = "no error", 20 [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified", 21 [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)", 22 [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field", 23 [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field", 24 [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field", 25 [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field", 26 [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure", 27 [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length", 28 [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item", 29 [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions", 30 [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action", 31 }; 32 const char *errstr; 33 char buf[32]; 34 int err = rte_errno; 35 36 if ((unsigned int)error->type >= RTE_DIM(errstrlist) || 37 !errstrlist[error->type]) 38 errstr = "unknown type"; 39 else 40 errstr = errstrlist[error->type]; 41 ERROR("Caught error type %d (%s): %s%s", 42 error->type, errstr, 43 error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ", 44 error->cause), buf) : "", 45 error->message ? error->message : "(no stated reason)"); 46 return -err; 47 } 48 49 static int 50 eth_dev_flow_isolate_set(struct rte_eth_dev *dev, 51 struct sub_device *sdev) 52 { 53 struct rte_flow_error ferror; 54 int ret; 55 56 if (!PRIV(dev)->flow_isolated) { 57 DEBUG("Flow isolation already disabled"); 58 } else { 59 DEBUG("Enabling flow isolation"); 60 ret = rte_flow_isolate(PORT_ID(sdev), 61 PRIV(dev)->flow_isolated, 62 &ferror); 63 if (ret) { 64 fs_flow_complain(&ferror); 65 return ret; 66 } 67 } 68 return 0; 69 } 70 71 static int 72 fs_eth_dev_conf_apply(struct rte_eth_dev *dev, 73 struct sub_device *sdev) 74 { 75 struct rte_eth_dev *edev; 76 struct rte_vlan_filter_conf *vfc1; 77 struct rte_vlan_filter_conf *vfc2; 78 struct rte_flow *flow; 79 struct rte_flow_error ferror; 80 uint32_t i; 81 int ret; 82 83 edev = ETH(sdev); 84 /* RX queue setup */ 85 for (i = 0; i < dev->data->nb_rx_queues; i++) { 86 struct rxq *rxq; 87 88 rxq = dev->data->rx_queues[i]; 89 ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i, 90 rxq->info.nb_desc, rxq->socket_id, 91 &rxq->info.conf, rxq->info.mp); 92 if (ret) { 93 ERROR("rx_queue_setup failed"); 94 return ret; 95 } 96 } 97 /* TX queue setup */ 98 for (i = 0; i < dev->data->nb_tx_queues; i++) { 99 struct txq *txq; 100 101 txq = dev->data->tx_queues[i]; 102 ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i, 103 txq->info.nb_desc, txq->socket_id, 104 &txq->info.conf); 105 if (ret) { 106 ERROR("tx_queue_setup failed"); 107 return ret; 108 } 109 } 110 /* dev_link.link_status */ 111 if (dev->data->dev_link.link_status != 112 edev->data->dev_link.link_status) { 113 DEBUG("Configuring link_status"); 114 if (dev->data->dev_link.link_status) 115 ret = rte_eth_dev_set_link_up(PORT_ID(sdev)); 116 else 117 ret = rte_eth_dev_set_link_down(PORT_ID(sdev)); 118 if (ret) { 119 ERROR("Failed to apply link_status"); 120 return ret; 121 } 122 } else { 123 DEBUG("link_status already set"); 124 } 125 /* promiscuous */ 126 if (dev->data->promiscuous != edev->data->promiscuous) { 127 DEBUG("Configuring promiscuous"); 128 if (dev->data->promiscuous) 129 ret = rte_eth_promiscuous_enable(PORT_ID(sdev)); 130 else 131 ret = rte_eth_promiscuous_disable(PORT_ID(sdev)); 132 if (ret != 0) { 133 ERROR("Failed to apply promiscuous mode"); 134 return ret; 135 } 136 } else { 137 DEBUG("promiscuous already set"); 138 } 139 /* all_multicast */ 140 if (dev->data->all_multicast != edev->data->all_multicast) { 141 DEBUG("Configuring all_multicast"); 142 if (dev->data->all_multicast) 143 ret = rte_eth_allmulticast_enable(PORT_ID(sdev)); 144 else 145 ret = rte_eth_allmulticast_disable(PORT_ID(sdev)); 146 if (ret != 0) { 147 ERROR("Failed to apply allmulticast mode"); 148 return ret; 149 } 150 } else { 151 DEBUG("all_multicast already set"); 152 } 153 /* MTU */ 154 if (dev->data->mtu != edev->data->mtu) { 155 DEBUG("Configuring MTU"); 156 ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu); 157 if (ret) { 158 ERROR("Failed to apply MTU"); 159 return ret; 160 } 161 } else { 162 DEBUG("MTU already set"); 163 } 164 /* default MAC */ 165 DEBUG("Configuring default MAC address"); 166 ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), 167 &dev->data->mac_addrs[0]); 168 if (ret) { 169 ERROR("Setting default MAC address failed"); 170 return ret; 171 } 172 /* additional MAC */ 173 if (PRIV(dev)->nb_mac_addr > 1) 174 DEBUG("Configure additional MAC address%s", 175 (PRIV(dev)->nb_mac_addr > 2 ? "es" : "")); 176 for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) { 177 struct rte_ether_addr *ea; 178 179 ea = &dev->data->mac_addrs[i]; 180 ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea, 181 PRIV(dev)->mac_addr_pool[i]); 182 if (ret) { 183 char ea_fmt[RTE_ETHER_ADDR_FMT_SIZE]; 184 185 rte_ether_format_addr(ea_fmt, 186 RTE_ETHER_ADDR_FMT_SIZE, ea); 187 ERROR("Adding MAC address %s failed", ea_fmt); 188 return ret; 189 } 190 } 191 /* 192 * Propagate multicast MAC addresses to sub-devices, 193 * if non zero number of addresses is set. 194 * The condition is required to avoid breakage of failsafe 195 * for sub-devices which do not support the operation 196 * if the feature is really not used. 197 */ 198 if (PRIV(dev)->nb_mcast_addr > 0) { 199 DEBUG("Configuring multicast MAC addresses"); 200 ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev), 201 PRIV(dev)->mcast_addrs, 202 PRIV(dev)->nb_mcast_addr); 203 if (ret) { 204 ERROR("Failed to apply multicast MAC addresses"); 205 return ret; 206 } 207 } 208 /* VLAN filter */ 209 vfc1 = &dev->data->vlan_filter_conf; 210 vfc2 = &edev->data->vlan_filter_conf; 211 if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) { 212 uint64_t vbit; 213 uint64_t ids; 214 size_t i; 215 uint16_t vlan_id; 216 217 DEBUG("Configuring VLAN filter"); 218 for (i = 0; i < RTE_DIM(vfc1->ids); i++) { 219 if (vfc1->ids[i] == 0) 220 continue; 221 ids = vfc1->ids[i]; 222 while (ids) { 223 vlan_id = 64 * i; 224 /* count trailing zeroes */ 225 vbit = ~ids & (ids - 1); 226 /* clear least significant bit set */ 227 ids ^= (ids ^ (ids - 1)) ^ vbit; 228 for (; vbit; vlan_id++) 229 vbit >>= 1; 230 ret = rte_eth_dev_vlan_filter( 231 PORT_ID(sdev), vlan_id, 1); 232 if (ret) { 233 ERROR("Failed to apply VLAN filter %hu", 234 vlan_id); 235 return ret; 236 } 237 } 238 } 239 } else { 240 DEBUG("VLAN filter already set"); 241 } 242 /* rte_flow */ 243 if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) { 244 DEBUG("rte_flow already set"); 245 } else { 246 DEBUG("Resetting rte_flow configuration"); 247 ret = rte_flow_flush(PORT_ID(sdev), &ferror); 248 if (ret) { 249 fs_flow_complain(&ferror); 250 return ret; 251 } 252 i = 0; 253 rte_errno = 0; 254 DEBUG("Configuring rte_flow"); 255 TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) { 256 DEBUG("Creating flow #%" PRIu32, i++); 257 flow->flows[SUB_ID(sdev)] = 258 rte_flow_create(PORT_ID(sdev), 259 flow->rule.attr, 260 flow->rule.pattern, 261 flow->rule.actions, 262 &ferror); 263 ret = rte_errno; 264 if (ret) 265 break; 266 } 267 if (ret) { 268 fs_flow_complain(&ferror); 269 return ret; 270 } 271 } 272 return 0; 273 } 274 275 static void 276 fs_dev_remove(struct sub_device *sdev) 277 { 278 int ret; 279 280 if (sdev == NULL) 281 return; 282 switch (sdev->state) { 283 case DEV_STARTED: 284 failsafe_rx_intr_uninstall_subdevice(sdev); 285 ret = rte_eth_dev_stop(PORT_ID(sdev)); 286 if (ret < 0) 287 ERROR("Failed to stop sub-device %u", SUB_ID(sdev)); 288 sdev->state = DEV_ACTIVE; 289 /* fallthrough */ 290 case DEV_ACTIVE: 291 failsafe_eth_dev_unregister_callbacks(sdev); 292 ret = rte_eth_dev_close(PORT_ID(sdev)); 293 if (ret < 0) { 294 ERROR("Port close failed for sub-device %u", 295 PORT_ID(sdev)); 296 } 297 sdev->state = DEV_PROBED; 298 /* fallthrough */ 299 case DEV_PROBED: 300 ret = rte_dev_remove(sdev->dev); 301 if (ret < 0) { 302 ERROR("Bus detach failed for sub_device %u", 303 SUB_ID(sdev)); 304 } else { 305 rte_eth_dev_release_port(ETH(sdev)); 306 } 307 sdev->state = DEV_PARSED; 308 /* fallthrough */ 309 case DEV_PARSED: 310 case DEV_UNDEFINED: 311 sdev->state = DEV_UNDEFINED; 312 sdev->sdev_port_id = RTE_MAX_ETHPORTS; 313 /* the end */ 314 break; 315 } 316 sdev->remove = 0; 317 failsafe_hotplug_alarm_install(fs_dev(sdev)); 318 } 319 320 static void 321 fs_dev_stats_save(struct sub_device *sdev) 322 { 323 struct rte_eth_stats stats; 324 int err; 325 326 /* Attempt to read current stats. */ 327 err = rte_eth_stats_get(PORT_ID(sdev), &stats); 328 if (err) { 329 uint64_t timestamp = sdev->stats_snapshot.timestamp; 330 331 WARN("Could not access latest statistics from sub-device %d.", 332 SUB_ID(sdev)); 333 if (timestamp != 0) 334 WARN("Using latest snapshot taken before %"PRIu64" seconds.", 335 (rte_rdtsc() - timestamp) / rte_get_tsc_hz()); 336 } 337 failsafe_stats_increment 338 (&PRIV(fs_dev(sdev))->stats_accumulator, 339 err ? &sdev->stats_snapshot.stats : &stats); 340 memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); 341 } 342 343 static inline int 344 fs_rxtx_clean(struct sub_device *sdev) 345 { 346 uint16_t i; 347 348 for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++) 349 if (FS_ATOMIC_RX(sdev, i)) 350 return 0; 351 for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++) 352 if (FS_ATOMIC_TX(sdev, i)) 353 return 0; 354 return 1; 355 } 356 357 void 358 failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev) 359 { 360 int ret; 361 362 if (sdev == NULL) 363 return; 364 if (sdev->rmv_callback) { 365 ret = rte_eth_dev_callback_unregister(PORT_ID(sdev), 366 RTE_ETH_EVENT_INTR_RMV, 367 failsafe_eth_rmv_event_callback, 368 sdev); 369 if (ret) 370 WARN("Failed to unregister RMV callback for sub_device" 371 " %d", SUB_ID(sdev)); 372 sdev->rmv_callback = 0; 373 } 374 if (sdev->lsc_callback) { 375 ret = rte_eth_dev_callback_unregister(PORT_ID(sdev), 376 RTE_ETH_EVENT_INTR_LSC, 377 failsafe_eth_lsc_event_callback, 378 sdev); 379 if (ret) 380 WARN("Failed to unregister LSC callback for sub_device" 381 " %d", SUB_ID(sdev)); 382 sdev->lsc_callback = 0; 383 } 384 } 385 386 void 387 failsafe_dev_remove(struct rte_eth_dev *dev) 388 { 389 struct sub_device *sdev; 390 uint8_t i; 391 392 FOREACH_SUBDEV(sdev, i, dev) { 393 if (!sdev->remove) 394 continue; 395 396 /* Active devices must have finished their burst and 397 * their stats must be saved. 398 */ 399 if (sdev->state >= DEV_ACTIVE && 400 fs_rxtx_clean(sdev) == 0) 401 continue; 402 if (fs_lock(dev, 1) != 0) 403 return; 404 if (sdev->state >= DEV_ACTIVE) 405 fs_dev_stats_save(sdev); 406 fs_dev_remove(sdev); 407 fs_unlock(dev, 1); 408 } 409 } 410 411 static int 412 failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev) 413 { 414 struct rxq *rxq; 415 int ret; 416 uint16_t i; 417 418 for (i = 0; i < dev->data->nb_rx_queues; i++) { 419 rxq = dev->data->rx_queues[i]; 420 421 if (rxq->info.conf.rx_deferred_start && 422 dev->data->rx_queue_state[i] == 423 RTE_ETH_QUEUE_STATE_STARTED) { 424 /* 425 * The subdevice Rx queue does not launch on device 426 * start if deferred start flag is set. It needs to be 427 * started manually in case an appropriate failsafe Rx 428 * queue has been started earlier. 429 */ 430 ret = dev->dev_ops->rx_queue_start(dev, i); 431 if (ret) { 432 ERROR("Could not synchronize Rx queue %d", i); 433 return ret; 434 } 435 } else if (dev->data->rx_queue_state[i] == 436 RTE_ETH_QUEUE_STATE_STOPPED) { 437 /* 438 * The subdevice Rx queue needs to be stopped manually 439 * in case an appropriate failsafe Rx queue has been 440 * stopped earlier. 441 */ 442 ret = dev->dev_ops->rx_queue_stop(dev, i); 443 if (ret) { 444 ERROR("Could not synchronize Rx queue %d", i); 445 return ret; 446 } 447 } 448 } 449 return 0; 450 } 451 452 static int 453 failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev) 454 { 455 struct txq *txq; 456 int ret; 457 uint16_t i; 458 459 for (i = 0; i < dev->data->nb_tx_queues; i++) { 460 txq = dev->data->tx_queues[i]; 461 462 if (txq->info.conf.tx_deferred_start && 463 dev->data->tx_queue_state[i] == 464 RTE_ETH_QUEUE_STATE_STARTED) { 465 /* 466 * The subdevice Tx queue does not launch on device 467 * start if deferred start flag is set. It needs to be 468 * started manually in case an appropriate failsafe Tx 469 * queue has been started earlier. 470 */ 471 ret = dev->dev_ops->tx_queue_start(dev, i); 472 if (ret) { 473 ERROR("Could not synchronize Tx queue %d", i); 474 return ret; 475 } 476 } else if (dev->data->tx_queue_state[i] == 477 RTE_ETH_QUEUE_STATE_STOPPED) { 478 /* 479 * The subdevice Tx queue needs to be stopped manually 480 * in case an appropriate failsafe Tx queue has been 481 * stopped earlier. 482 */ 483 ret = dev->dev_ops->tx_queue_stop(dev, i); 484 if (ret) { 485 ERROR("Could not synchronize Tx queue %d", i); 486 return ret; 487 } 488 } 489 } 490 return 0; 491 } 492 493 int 494 failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) 495 { 496 struct sub_device *sdev; 497 uint32_t inactive; 498 int ret; 499 uint8_t i; 500 501 if (PRIV(dev)->state < DEV_PARSED) 502 return 0; 503 504 ret = failsafe_args_parse_subs(dev); 505 if (ret) 506 goto err_remove; 507 508 if (PRIV(dev)->state < DEV_PROBED) 509 return 0; 510 ret = failsafe_eal_init(dev); 511 if (ret) 512 goto err_remove; 513 if (PRIV(dev)->state < DEV_ACTIVE) 514 return 0; 515 inactive = 0; 516 FOREACH_SUBDEV(sdev, i, dev) { 517 if (sdev->state == DEV_PROBED) { 518 inactive |= UINT32_C(1) << i; 519 ret = eth_dev_flow_isolate_set(dev, sdev); 520 if (ret) { 521 ERROR("Could not apply configuration to sub_device %d", 522 i); 523 goto err_remove; 524 } 525 } 526 } 527 ret = dev->dev_ops->dev_configure(dev); 528 if (ret) 529 goto err_remove; 530 FOREACH_SUBDEV(sdev, i, dev) { 531 if (inactive & (UINT32_C(1) << i)) { 532 ret = fs_eth_dev_conf_apply(dev, sdev); 533 if (ret) { 534 ERROR("Could not apply configuration to sub_device %d", 535 i); 536 goto err_remove; 537 } 538 } 539 } 540 /* 541 * If new devices have been configured, check if 542 * the link state has changed. 543 */ 544 if (inactive) 545 dev->dev_ops->link_update(dev, 1); 546 if (PRIV(dev)->state < DEV_STARTED) 547 return 0; 548 ret = dev->dev_ops->dev_start(dev); 549 if (ret) 550 goto err_remove; 551 ret = failsafe_eth_dev_rx_queues_sync(dev); 552 if (ret) 553 goto err_remove; 554 ret = failsafe_eth_dev_tx_queues_sync(dev); 555 if (ret) 556 goto err_remove; 557 return 0; 558 err_remove: 559 FOREACH_SUBDEV(sdev, i, dev) 560 if (sdev->state != PRIV(dev)->state) 561 sdev->remove = 1; 562 return ret; 563 } 564 565 void 566 failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from) 567 { 568 uint32_t i; 569 570 RTE_ASSERT(to != NULL && from != NULL); 571 to->ipackets += from->ipackets; 572 to->opackets += from->opackets; 573 to->ibytes += from->ibytes; 574 to->obytes += from->obytes; 575 to->imissed += from->imissed; 576 to->ierrors += from->ierrors; 577 to->oerrors += from->oerrors; 578 to->rx_nombuf += from->rx_nombuf; 579 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) { 580 to->q_ipackets[i] += from->q_ipackets[i]; 581 to->q_opackets[i] += from->q_opackets[i]; 582 to->q_ibytes[i] += from->q_ibytes[i]; 583 to->q_obytes[i] += from->q_obytes[i]; 584 to->q_errors[i] += from->q_errors[i]; 585 } 586 } 587 588 int 589 failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused, 590 enum rte_eth_event_type event __rte_unused, 591 void *cb_arg, void *out __rte_unused) 592 { 593 struct sub_device *sdev = cb_arg; 594 595 if (fs_lock(fs_dev(sdev), 0) != 0) 596 return -1; 597 /* Switch as soon as possible tx_dev. */ 598 fs_switch_dev(fs_dev(sdev), sdev); 599 /* Use safe bursts in any case. */ 600 failsafe_set_burst_fn(fs_dev(sdev), 1); 601 /* 602 * Async removal, the sub-PMD will try to unregister 603 * the callback at the source of the current thread context. 604 */ 605 sdev->remove = 1; 606 fs_unlock(fs_dev(sdev), 0); 607 return 0; 608 } 609 610 int 611 failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused, 612 enum rte_eth_event_type event __rte_unused, 613 void *cb_arg, void *out __rte_unused) 614 { 615 struct rte_eth_dev *dev = cb_arg; 616 int ret; 617 618 ret = dev->dev_ops->link_update(dev, 0); 619 /* We must pass on the LSC event */ 620 if (ret) 621 return rte_eth_dev_callback_process(dev, 622 RTE_ETH_EVENT_INTR_LSC, 623 NULL); 624 else 625 return 0; 626 } 627 628 /* Take sub-device ownership before it becomes exposed to the application. */ 629 int 630 failsafe_eth_new_event_callback(uint16_t port_id, 631 enum rte_eth_event_type event __rte_unused, 632 void *cb_arg, void *out __rte_unused) 633 { 634 struct rte_eth_dev *fs_dev = cb_arg; 635 struct sub_device *sdev; 636 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 637 uint8_t i; 638 639 FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) { 640 if (sdev->state >= DEV_PROBED) 641 continue; 642 if (dev->device == NULL) { 643 WARN("Trying to probe malformed device %s.", 644 sdev->devargs.name); 645 continue; 646 } 647 if (strcmp(sdev->devargs.name, dev->device->name) != 0) 648 continue; 649 rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner); 650 /* The actual owner will be checked after the port probing. */ 651 break; 652 } 653 return 0; 654 } 655