1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_flow.h> 9 #include <rte_flow_driver.h> 10 #include <rte_cycles.h> 11 12 #include "failsafe_private.h" 13 14 /** Print a message out of a flow error. */ 15 static int 16 fs_flow_complain(struct rte_flow_error *error) 17 { 18 static const char *const errstrlist[] = { 19 [RTE_FLOW_ERROR_TYPE_NONE] = "no error", 20 [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified", 21 [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)", 22 [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field", 23 [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field", 24 [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field", 25 [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field", 26 [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure", 27 [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length", 28 [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item", 29 [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions", 30 [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action", 31 }; 32 const char *errstr; 33 char buf[32]; 34 int err = rte_errno; 35 36 if ((unsigned int)error->type >= RTE_DIM(errstrlist) || 37 !errstrlist[error->type]) 38 errstr = "unknown type"; 39 else 40 errstr = errstrlist[error->type]; 41 ERROR("Caught error type %d (%s): %s%s\n", 42 error->type, errstr, 43 error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ", 44 error->cause), buf) : "", 45 error->message ? error->message : "(no stated reason)"); 46 return -err; 47 } 48 49 static int 50 eth_dev_flow_isolate_set(struct rte_eth_dev *dev, 51 struct sub_device *sdev) 52 { 53 struct rte_flow_error ferror; 54 int ret; 55 56 if (!PRIV(dev)->flow_isolated) { 57 DEBUG("Flow isolation already disabled"); 58 } else { 59 DEBUG("Enabling flow isolation"); 60 ret = rte_flow_isolate(PORT_ID(sdev), 61 PRIV(dev)->flow_isolated, 62 &ferror); 63 if (ret) { 64 fs_flow_complain(&ferror); 65 return ret; 66 } 67 } 68 return 0; 69 } 70 71 static int 72 fs_eth_dev_conf_apply(struct rte_eth_dev *dev, 73 struct sub_device *sdev) 74 { 75 struct rte_eth_dev *edev; 76 struct rte_vlan_filter_conf *vfc1; 77 struct rte_vlan_filter_conf *vfc2; 78 struct rte_flow *flow; 79 struct rte_flow_error ferror; 80 uint32_t i; 81 int ret; 82 83 edev = ETH(sdev); 84 /* RX queue setup */ 85 for (i = 0; i < dev->data->nb_rx_queues; i++) { 86 struct rxq *rxq; 87 88 rxq = dev->data->rx_queues[i]; 89 ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i, 90 rxq->info.nb_desc, rxq->socket_id, 91 &rxq->info.conf, rxq->info.mp); 92 if (ret) { 93 ERROR("rx_queue_setup failed"); 94 return ret; 95 } 96 } 97 /* TX queue setup */ 98 for (i = 0; i < dev->data->nb_tx_queues; i++) { 99 struct txq *txq; 100 101 txq = dev->data->tx_queues[i]; 102 ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i, 103 txq->info.nb_desc, txq->socket_id, 104 &txq->info.conf); 105 if (ret) { 106 ERROR("tx_queue_setup failed"); 107 return ret; 108 } 109 } 110 /* dev_link.link_status */ 111 if (dev->data->dev_link.link_status != 112 edev->data->dev_link.link_status) { 113 DEBUG("Configuring link_status"); 114 if (dev->data->dev_link.link_status) 115 ret = rte_eth_dev_set_link_up(PORT_ID(sdev)); 116 else 117 ret = rte_eth_dev_set_link_down(PORT_ID(sdev)); 118 if (ret) { 119 ERROR("Failed to apply link_status"); 120 return ret; 121 } 122 } else { 123 DEBUG("link_status already set"); 124 } 125 /* promiscuous */ 126 if (dev->data->promiscuous != edev->data->promiscuous) { 127 DEBUG("Configuring promiscuous"); 128 if (dev->data->promiscuous) 129 rte_eth_promiscuous_enable(PORT_ID(sdev)); 130 else 131 rte_eth_promiscuous_disable(PORT_ID(sdev)); 132 } else { 133 DEBUG("promiscuous already set"); 134 } 135 /* all_multicast */ 136 if (dev->data->all_multicast != edev->data->all_multicast) { 137 DEBUG("Configuring all_multicast"); 138 if (dev->data->all_multicast) 139 rte_eth_allmulticast_enable(PORT_ID(sdev)); 140 else 141 rte_eth_allmulticast_disable(PORT_ID(sdev)); 142 } else { 143 DEBUG("all_multicast already set"); 144 } 145 /* MTU */ 146 if (dev->data->mtu != edev->data->mtu) { 147 DEBUG("Configuring MTU"); 148 ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu); 149 if (ret) { 150 ERROR("Failed to apply MTU"); 151 return ret; 152 } 153 } else { 154 DEBUG("MTU already set"); 155 } 156 /* default MAC */ 157 DEBUG("Configuring default MAC address"); 158 ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), 159 &dev->data->mac_addrs[0]); 160 if (ret) { 161 ERROR("Setting default MAC address failed"); 162 return ret; 163 } 164 /* additional MAC */ 165 if (PRIV(dev)->nb_mac_addr > 1) 166 DEBUG("Configure additional MAC address%s", 167 (PRIV(dev)->nb_mac_addr > 2 ? "es" : "")); 168 for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) { 169 struct rte_ether_addr *ea; 170 171 ea = &dev->data->mac_addrs[i]; 172 ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea, 173 PRIV(dev)->mac_addr_pool[i]); 174 if (ret) { 175 char ea_fmt[RTE_ETHER_ADDR_FMT_SIZE]; 176 177 rte_ether_format_addr(ea_fmt, 178 RTE_ETHER_ADDR_FMT_SIZE, ea); 179 ERROR("Adding MAC address %s failed", ea_fmt); 180 return ret; 181 } 182 } 183 /* 184 * Propagate multicast MAC addresses to sub-devices, 185 * if non zero number of addresses is set. 186 * The condition is required to avoid breakage of failsafe 187 * for sub-devices which do not support the operation 188 * if the feature is really not used. 189 */ 190 if (PRIV(dev)->nb_mcast_addr > 0) { 191 DEBUG("Configuring multicast MAC addresses"); 192 ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev), 193 PRIV(dev)->mcast_addrs, 194 PRIV(dev)->nb_mcast_addr); 195 if (ret) { 196 ERROR("Failed to apply multicast MAC addresses"); 197 return ret; 198 } 199 } 200 /* VLAN filter */ 201 vfc1 = &dev->data->vlan_filter_conf; 202 vfc2 = &edev->data->vlan_filter_conf; 203 if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) { 204 uint64_t vbit; 205 uint64_t ids; 206 size_t i; 207 uint16_t vlan_id; 208 209 DEBUG("Configuring VLAN filter"); 210 for (i = 0; i < RTE_DIM(vfc1->ids); i++) { 211 if (vfc1->ids[i] == 0) 212 continue; 213 ids = vfc1->ids[i]; 214 while (ids) { 215 vlan_id = 64 * i; 216 /* count trailing zeroes */ 217 vbit = ~ids & (ids - 1); 218 /* clear least significant bit set */ 219 ids ^= (ids ^ (ids - 1)) ^ vbit; 220 for (; vbit; vlan_id++) 221 vbit >>= 1; 222 ret = rte_eth_dev_vlan_filter( 223 PORT_ID(sdev), vlan_id, 1); 224 if (ret) { 225 ERROR("Failed to apply VLAN filter %hu", 226 vlan_id); 227 return ret; 228 } 229 } 230 } 231 } else { 232 DEBUG("VLAN filter already set"); 233 } 234 /* rte_flow */ 235 if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) { 236 DEBUG("rte_flow already set"); 237 } else { 238 DEBUG("Resetting rte_flow configuration"); 239 ret = rte_flow_flush(PORT_ID(sdev), &ferror); 240 if (ret) { 241 fs_flow_complain(&ferror); 242 return ret; 243 } 244 i = 0; 245 rte_errno = 0; 246 DEBUG("Configuring rte_flow"); 247 TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) { 248 DEBUG("Creating flow #%" PRIu32, i++); 249 flow->flows[SUB_ID(sdev)] = 250 rte_flow_create(PORT_ID(sdev), 251 flow->rule.attr, 252 flow->rule.pattern, 253 flow->rule.actions, 254 &ferror); 255 ret = rte_errno; 256 if (ret) 257 break; 258 } 259 if (ret) { 260 fs_flow_complain(&ferror); 261 return ret; 262 } 263 } 264 return 0; 265 } 266 267 static void 268 fs_dev_remove(struct sub_device *sdev) 269 { 270 int ret; 271 272 if (sdev == NULL) 273 return; 274 switch (sdev->state) { 275 case DEV_STARTED: 276 failsafe_rx_intr_uninstall_subdevice(sdev); 277 rte_eth_dev_stop(PORT_ID(sdev)); 278 sdev->state = DEV_ACTIVE; 279 /* fallthrough */ 280 case DEV_ACTIVE: 281 failsafe_eth_dev_unregister_callbacks(sdev); 282 rte_eth_dev_close(PORT_ID(sdev)); 283 sdev->state = DEV_PROBED; 284 /* fallthrough */ 285 case DEV_PROBED: 286 ret = rte_dev_remove(sdev->dev); 287 if (ret < 0) { 288 ERROR("Bus detach failed for sub_device %u", 289 SUB_ID(sdev)); 290 } else { 291 rte_eth_dev_release_port(ETH(sdev)); 292 } 293 sdev->state = DEV_PARSED; 294 /* fallthrough */ 295 case DEV_PARSED: 296 case DEV_UNDEFINED: 297 sdev->state = DEV_UNDEFINED; 298 sdev->sdev_port_id = RTE_MAX_ETHPORTS; 299 /* the end */ 300 break; 301 } 302 sdev->remove = 0; 303 failsafe_hotplug_alarm_install(fs_dev(sdev)); 304 } 305 306 static void 307 fs_dev_stats_save(struct sub_device *sdev) 308 { 309 struct rte_eth_stats stats; 310 int err; 311 312 /* Attempt to read current stats. */ 313 err = rte_eth_stats_get(PORT_ID(sdev), &stats); 314 if (err) { 315 uint64_t timestamp = sdev->stats_snapshot.timestamp; 316 317 WARN("Could not access latest statistics from sub-device %d.\n", 318 SUB_ID(sdev)); 319 if (timestamp != 0) 320 WARN("Using latest snapshot taken before %"PRIu64" seconds.\n", 321 (rte_rdtsc() - timestamp) / rte_get_tsc_hz()); 322 } 323 failsafe_stats_increment 324 (&PRIV(fs_dev(sdev))->stats_accumulator, 325 err ? &sdev->stats_snapshot.stats : &stats); 326 memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot)); 327 } 328 329 static inline int 330 fs_rxtx_clean(struct sub_device *sdev) 331 { 332 uint16_t i; 333 334 for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++) 335 if (FS_ATOMIC_RX(sdev, i)) 336 return 0; 337 for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++) 338 if (FS_ATOMIC_TX(sdev, i)) 339 return 0; 340 return 1; 341 } 342 343 void 344 failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev) 345 { 346 int ret; 347 348 if (sdev == NULL) 349 return; 350 if (sdev->rmv_callback) { 351 ret = rte_eth_dev_callback_unregister(PORT_ID(sdev), 352 RTE_ETH_EVENT_INTR_RMV, 353 failsafe_eth_rmv_event_callback, 354 sdev); 355 if (ret) 356 WARN("Failed to unregister RMV callback for sub_device" 357 " %d", SUB_ID(sdev)); 358 sdev->rmv_callback = 0; 359 } 360 if (sdev->lsc_callback) { 361 ret = rte_eth_dev_callback_unregister(PORT_ID(sdev), 362 RTE_ETH_EVENT_INTR_LSC, 363 failsafe_eth_lsc_event_callback, 364 sdev); 365 if (ret) 366 WARN("Failed to unregister LSC callback for sub_device" 367 " %d", SUB_ID(sdev)); 368 sdev->lsc_callback = 0; 369 } 370 } 371 372 void 373 failsafe_dev_remove(struct rte_eth_dev *dev) 374 { 375 struct sub_device *sdev; 376 uint8_t i; 377 378 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) 379 if (sdev->remove && fs_rxtx_clean(sdev)) { 380 if (fs_lock(dev, 1) != 0) 381 return; 382 fs_dev_stats_save(sdev); 383 fs_dev_remove(sdev); 384 fs_unlock(dev, 1); 385 } 386 } 387 388 static int 389 failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev) 390 { 391 struct rxq *rxq; 392 int ret; 393 uint16_t i; 394 395 for (i = 0; i < dev->data->nb_rx_queues; i++) { 396 rxq = dev->data->rx_queues[i]; 397 398 if (rxq->info.conf.rx_deferred_start && 399 dev->data->rx_queue_state[i] == 400 RTE_ETH_QUEUE_STATE_STARTED) { 401 /* 402 * The subdevice Rx queue does not launch on device 403 * start if deferred start flag is set. It needs to be 404 * started manually in case an appropriate failsafe Rx 405 * queue has been started earlier. 406 */ 407 ret = dev->dev_ops->rx_queue_start(dev, i); 408 if (ret) { 409 ERROR("Could not synchronize Rx queue %d", i); 410 return ret; 411 } 412 } else if (dev->data->rx_queue_state[i] == 413 RTE_ETH_QUEUE_STATE_STOPPED) { 414 /* 415 * The subdevice Rx queue needs to be stopped manually 416 * in case an appropriate failsafe Rx queue has been 417 * stopped earlier. 418 */ 419 ret = dev->dev_ops->rx_queue_stop(dev, i); 420 if (ret) { 421 ERROR("Could not synchronize Rx queue %d", i); 422 return ret; 423 } 424 } 425 } 426 return 0; 427 } 428 429 static int 430 failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev) 431 { 432 struct txq *txq; 433 int ret; 434 uint16_t i; 435 436 for (i = 0; i < dev->data->nb_tx_queues; i++) { 437 txq = dev->data->tx_queues[i]; 438 439 if (txq->info.conf.tx_deferred_start && 440 dev->data->tx_queue_state[i] == 441 RTE_ETH_QUEUE_STATE_STARTED) { 442 /* 443 * The subdevice Tx queue does not launch on device 444 * start if deferred start flag is set. It needs to be 445 * started manually in case an appropriate failsafe Tx 446 * queue has been started earlier. 447 */ 448 ret = dev->dev_ops->tx_queue_start(dev, i); 449 if (ret) { 450 ERROR("Could not synchronize Tx queue %d", i); 451 return ret; 452 } 453 } else if (dev->data->tx_queue_state[i] == 454 RTE_ETH_QUEUE_STATE_STOPPED) { 455 /* 456 * The subdevice Tx queue needs to be stopped manually 457 * in case an appropriate failsafe Tx queue has been 458 * stopped earlier. 459 */ 460 ret = dev->dev_ops->tx_queue_stop(dev, i); 461 if (ret) { 462 ERROR("Could not synchronize Tx queue %d", i); 463 return ret; 464 } 465 } 466 } 467 return 0; 468 } 469 470 int 471 failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) 472 { 473 struct sub_device *sdev; 474 uint32_t inactive; 475 int ret; 476 uint8_t i; 477 478 if (PRIV(dev)->state < DEV_PARSED) 479 return 0; 480 481 ret = failsafe_args_parse_subs(dev); 482 if (ret) 483 goto err_remove; 484 485 if (PRIV(dev)->state < DEV_PROBED) 486 return 0; 487 ret = failsafe_eal_init(dev); 488 if (ret) 489 goto err_remove; 490 if (PRIV(dev)->state < DEV_ACTIVE) 491 return 0; 492 inactive = 0; 493 FOREACH_SUBDEV(sdev, i, dev) { 494 if (sdev->state == DEV_PROBED) { 495 inactive |= UINT32_C(1) << i; 496 ret = eth_dev_flow_isolate_set(dev, sdev); 497 if (ret) { 498 ERROR("Could not apply configuration to sub_device %d", 499 i); 500 goto err_remove; 501 } 502 } 503 } 504 ret = dev->dev_ops->dev_configure(dev); 505 if (ret) 506 goto err_remove; 507 FOREACH_SUBDEV(sdev, i, dev) { 508 if (inactive & (UINT32_C(1) << i)) { 509 ret = fs_eth_dev_conf_apply(dev, sdev); 510 if (ret) { 511 ERROR("Could not apply configuration to sub_device %d", 512 i); 513 goto err_remove; 514 } 515 } 516 } 517 /* 518 * If new devices have been configured, check if 519 * the link state has changed. 520 */ 521 if (inactive) 522 dev->dev_ops->link_update(dev, 1); 523 if (PRIV(dev)->state < DEV_STARTED) 524 return 0; 525 ret = dev->dev_ops->dev_start(dev); 526 if (ret) 527 goto err_remove; 528 ret = failsafe_eth_dev_rx_queues_sync(dev); 529 if (ret) 530 goto err_remove; 531 ret = failsafe_eth_dev_tx_queues_sync(dev); 532 if (ret) 533 goto err_remove; 534 return 0; 535 err_remove: 536 FOREACH_SUBDEV(sdev, i, dev) 537 if (sdev->state != PRIV(dev)->state) 538 sdev->remove = 1; 539 return ret; 540 } 541 542 void 543 failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from) 544 { 545 uint32_t i; 546 547 RTE_ASSERT(to != NULL && from != NULL); 548 to->ipackets += from->ipackets; 549 to->opackets += from->opackets; 550 to->ibytes += from->ibytes; 551 to->obytes += from->obytes; 552 to->imissed += from->imissed; 553 to->ierrors += from->ierrors; 554 to->oerrors += from->oerrors; 555 to->rx_nombuf += from->rx_nombuf; 556 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) { 557 to->q_ipackets[i] += from->q_ipackets[i]; 558 to->q_opackets[i] += from->q_opackets[i]; 559 to->q_ibytes[i] += from->q_ibytes[i]; 560 to->q_obytes[i] += from->q_obytes[i]; 561 to->q_errors[i] += from->q_errors[i]; 562 } 563 } 564 565 int 566 failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused, 567 enum rte_eth_event_type event __rte_unused, 568 void *cb_arg, void *out __rte_unused) 569 { 570 struct sub_device *sdev = cb_arg; 571 572 fs_lock(fs_dev(sdev), 0); 573 /* Switch as soon as possible tx_dev. */ 574 fs_switch_dev(fs_dev(sdev), sdev); 575 /* Use safe bursts in any case. */ 576 failsafe_set_burst_fn(fs_dev(sdev), 1); 577 /* 578 * Async removal, the sub-PMD will try to unregister 579 * the callback at the source of the current thread context. 580 */ 581 sdev->remove = 1; 582 fs_unlock(fs_dev(sdev), 0); 583 return 0; 584 } 585 586 int 587 failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused, 588 enum rte_eth_event_type event __rte_unused, 589 void *cb_arg, void *out __rte_unused) 590 { 591 struct rte_eth_dev *dev = cb_arg; 592 int ret; 593 594 ret = dev->dev_ops->link_update(dev, 0); 595 /* We must pass on the LSC event */ 596 if (ret) 597 return _rte_eth_dev_callback_process(dev, 598 RTE_ETH_EVENT_INTR_LSC, 599 NULL); 600 else 601 return 0; 602 } 603 604 /* Take sub-device ownership before it becomes exposed to the application. */ 605 int 606 failsafe_eth_new_event_callback(uint16_t port_id, 607 enum rte_eth_event_type event __rte_unused, 608 void *cb_arg, void *out __rte_unused) 609 { 610 struct rte_eth_dev *fs_dev = cb_arg; 611 struct sub_device *sdev; 612 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 613 uint8_t i; 614 615 FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) { 616 if (sdev->state >= DEV_PROBED) 617 continue; 618 if (strcmp(sdev->devargs.name, dev->device->name) != 0) 619 continue; 620 rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner); 621 /* The actual owner will be checked after the port probing. */ 622 break; 623 } 624 return 0; 625 } 626