1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 6WIND S.A. 3 * Copyright 2018 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <linux/if_link.h> 8 #include <linux/rtnetlink.h> 9 #include <linux/genetlink.h> 10 #include <net/if.h> 11 #include <rdma/rdma_netlink.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <stdlib.h> 15 #include <stdalign.h> 16 #include <string.h> 17 #include <sys/socket.h> 18 #include <unistd.h> 19 20 #include <rte_errno.h> 21 22 #include "mlx5_nl.h" 23 #include "../mlx5_common_log.h" 24 #include "mlx5_malloc.h" 25 #ifdef HAVE_DEVLINK 26 #include <linux/devlink.h> 27 #endif 28 29 30 /* Size of the buffer to receive kernel messages */ 31 #define MLX5_NL_BUF_SIZE (32 * 1024) 32 /* Send buffer size for the Netlink socket */ 33 #define MLX5_SEND_BUF_SIZE 32768 34 /* Receive buffer size for the Netlink socket */ 35 #define MLX5_RECV_BUF_SIZE 32768 36 /* Maximal physical port name length. */ 37 #define MLX5_PHYS_PORT_NAME_MAX 128 38 39 /** Parameters of VLAN devices created by driver. */ 40 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" 41 /* 42 * Define NDA_RTA as defined in iproute2 sources. 43 * 44 * see in iproute2 sources file include/libnetlink.h 45 */ 46 #ifndef MLX5_NDA_RTA 47 #define MLX5_NDA_RTA(r) \ 48 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 49 #endif 50 /* 51 * Define NLMSG_TAIL as defined in iproute2 sources. 52 * 53 * see in iproute2 sources file include/libnetlink.h 54 */ 55 #ifndef NLMSG_TAIL 56 #define NLMSG_TAIL(nmsg) \ 57 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) 58 #endif 59 /* 60 * The following definitions are normally found in rdma/rdma_netlink.h, 61 * however they are so recent that most systems do not expose them yet. 62 */ 63 #ifndef HAVE_RDMA_NL_NLDEV 64 #define RDMA_NL_NLDEV 5 65 #endif 66 #ifndef HAVE_RDMA_NLDEV_CMD_GET 67 #define RDMA_NLDEV_CMD_GET 1 68 #endif 69 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET 70 #define RDMA_NLDEV_CMD_PORT_GET 5 71 #endif 72 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX 73 #define RDMA_NLDEV_ATTR_DEV_INDEX 1 74 #endif 75 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME 76 #define RDMA_NLDEV_ATTR_DEV_NAME 2 77 #endif 78 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX 79 #define RDMA_NLDEV_ATTR_PORT_INDEX 3 80 #endif 81 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_STATE 82 #define RDMA_NLDEV_ATTR_PORT_STATE 12 83 #endif 84 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX 85 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50 86 #endif 87 88 /* These are normally found in linux/if_link.h. */ 89 #ifndef HAVE_IFLA_NUM_VF 90 #define IFLA_NUM_VF 21 91 #endif 92 #ifndef HAVE_IFLA_EXT_MASK 93 #define IFLA_EXT_MASK 29 94 #endif 95 #ifndef HAVE_IFLA_PHYS_SWITCH_ID 96 #define IFLA_PHYS_SWITCH_ID 36 97 #endif 98 #ifndef HAVE_IFLA_PHYS_PORT_NAME 99 #define IFLA_PHYS_PORT_NAME 38 100 #endif 101 102 /* 103 * Some Devlink defines may be missed in old kernel versions, 104 * adjust used defines. 105 */ 106 #ifndef DEVLINK_GENL_NAME 107 #define DEVLINK_GENL_NAME "devlink" 108 #endif 109 #ifndef DEVLINK_GENL_VERSION 110 #define DEVLINK_GENL_VERSION 1 111 #endif 112 #ifndef DEVLINK_ATTR_BUS_NAME 113 #define DEVLINK_ATTR_BUS_NAME 1 114 #endif 115 #ifndef DEVLINK_ATTR_DEV_NAME 116 #define DEVLINK_ATTR_DEV_NAME 2 117 #endif 118 #ifndef DEVLINK_ATTR_PARAM 119 #define DEVLINK_ATTR_PARAM 80 120 #endif 121 #ifndef DEVLINK_ATTR_PARAM_NAME 122 #define DEVLINK_ATTR_PARAM_NAME 81 123 #endif 124 #ifndef DEVLINK_ATTR_PARAM_TYPE 125 #define DEVLINK_ATTR_PARAM_TYPE 83 126 #endif 127 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST 128 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84 129 #endif 130 #ifndef DEVLINK_ATTR_PARAM_VALUE 131 #define DEVLINK_ATTR_PARAM_VALUE 85 132 #endif 133 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA 134 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86 135 #endif 136 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE 137 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87 138 #endif 139 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT 140 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1 141 #endif 142 #ifndef DEVLINK_CMD_RELOAD 143 #define DEVLINK_CMD_RELOAD 37 144 #endif 145 #ifndef DEVLINK_CMD_PARAM_GET 146 #define DEVLINK_CMD_PARAM_GET 38 147 #endif 148 #ifndef DEVLINK_CMD_PARAM_SET 149 #define DEVLINK_CMD_PARAM_SET 39 150 #endif 151 #ifndef NLA_FLAG 152 #define NLA_FLAG 6 153 #endif 154 155 /* Add/remove MAC address through Netlink */ 156 struct mlx5_nl_mac_addr { 157 struct rte_ether_addr (*mac)[]; 158 /**< MAC address handled by the device. */ 159 int mac_n; /**< Number of addresses in the array. */ 160 }; 161 162 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0) 163 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) 164 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) 165 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) 166 #define MLX5_NL_CMD_GET_PORT_STATE (1 << 4) 167 168 /** Data structure used by mlx5_nl_cmdget_cb(). */ 169 struct mlx5_nl_port_info { 170 const char *name; /**< IB device name (in). */ 171 uint32_t flags; /**< found attribute flags (out). */ 172 uint32_t ibindex; /**< IB device index (out). */ 173 uint32_t ifindex; /**< Network interface index (out). */ 174 uint32_t portnum; /**< IB device max port number (out). */ 175 uint16_t state; /**< IB device port state (out). */ 176 }; 177 178 RTE_ATOMIC(uint32_t) atomic_sn; 179 180 /* Generate Netlink sequence number. */ 181 #define MLX5_NL_SN_GENERATE (rte_atomic_fetch_add_explicit(&atomic_sn, 1, \ 182 rte_memory_order_relaxed) + 1) 183 184 /** 185 * Opens a Netlink socket. 186 * 187 * @param protocol 188 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). 189 * @param groups 190 * Groups to listen (e.g. RTMGRP_LINK), can be 0. 191 * 192 * @return 193 * A file descriptor on success, a negative errno value otherwise and 194 * rte_errno is set. 195 */ 196 int 197 mlx5_nl_init(int protocol, int groups) 198 { 199 int fd; 200 int buf_size; 201 socklen_t opt_size; 202 struct sockaddr_nl local = { 203 .nl_family = AF_NETLINK, 204 .nl_groups = groups, 205 }; 206 int ret; 207 208 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); 209 if (fd == -1) { 210 rte_errno = errno; 211 return -rte_errno; 212 } 213 opt_size = sizeof(buf_size); 214 ret = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf_size, &opt_size); 215 if (ret == -1) { 216 rte_errno = errno; 217 goto error; 218 } 219 DRV_LOG(DEBUG, "Netlink socket send buffer: %d", buf_size); 220 if (buf_size < MLX5_SEND_BUF_SIZE) { 221 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 222 &buf_size, sizeof(buf_size)); 223 if (ret == -1) { 224 rte_errno = errno; 225 goto error; 226 } 227 } 228 opt_size = sizeof(buf_size); 229 ret = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size); 230 if (ret == -1) { 231 rte_errno = errno; 232 goto error; 233 } 234 DRV_LOG(DEBUG, "Netlink socket recv buffer: %d", buf_size); 235 if (buf_size < MLX5_RECV_BUF_SIZE) { 236 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 237 &buf_size, sizeof(buf_size)); 238 if (ret == -1) { 239 rte_errno = errno; 240 goto error; 241 } 242 } 243 ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); 244 if (ret == -1) { 245 rte_errno = errno; 246 goto error; 247 } 248 return fd; 249 error: 250 close(fd); 251 return -rte_errno; 252 } 253 254 /** 255 * Send a request message to the kernel on the Netlink socket. 256 * 257 * @param[in] nlsk_fd 258 * Netlink socket file descriptor. 259 * @param[in] nh 260 * The Netlink message send to the kernel. 261 * @param[in] ssn 262 * Sequence number. 263 * @param[in] req 264 * Pointer to the request structure. 265 * @param[in] len 266 * Length of the request in bytes. 267 * 268 * @return 269 * The number of sent bytes on success, a negative errno value otherwise and 270 * rte_errno is set. 271 */ 272 static int 273 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, 274 int len) 275 { 276 struct sockaddr_nl sa = { 277 .nl_family = AF_NETLINK, 278 }; 279 struct iovec iov[2] = { 280 { .iov_base = nh, .iov_len = sizeof(*nh), }, 281 { .iov_base = req, .iov_len = len, }, 282 }; 283 struct msghdr msg = { 284 .msg_name = &sa, 285 .msg_namelen = sizeof(sa), 286 .msg_iov = iov, 287 .msg_iovlen = 2, 288 }; 289 int send_bytes; 290 291 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 292 nh->nlmsg_seq = sn; 293 send_bytes = sendmsg(nlsk_fd, &msg, 0); 294 if (send_bytes < 0) { 295 rte_errno = errno; 296 return -rte_errno; 297 } 298 return send_bytes; 299 } 300 301 /** 302 * Send a message to the kernel on the Netlink socket. 303 * 304 * @param[in] nlsk_fd 305 * The Netlink socket file descriptor used for communication. 306 * @param[in] nh 307 * The Netlink message send to the kernel. 308 * @param[in] sn 309 * Sequence number. 310 * 311 * @return 312 * The number of sent bytes on success, a negative errno value otherwise and 313 * rte_errno is set. 314 */ 315 static int 316 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) 317 { 318 struct sockaddr_nl sa = { 319 .nl_family = AF_NETLINK, 320 }; 321 struct iovec iov = { 322 .iov_base = nh, 323 .iov_len = nh->nlmsg_len, 324 }; 325 struct msghdr msg = { 326 .msg_name = &sa, 327 .msg_namelen = sizeof(sa), 328 .msg_iov = &iov, 329 .msg_iovlen = 1, 330 }; 331 int send_bytes; 332 333 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 334 nh->nlmsg_seq = sn; 335 send_bytes = sendmsg(nlsk_fd, &msg, 0); 336 if (send_bytes < 0) { 337 rte_errno = errno; 338 return -rte_errno; 339 } 340 return send_bytes; 341 } 342 343 /** 344 * Receive a message from the kernel on the Netlink socket, following 345 * mlx5_nl_send(). 346 * 347 * @param[in] nlsk_fd 348 * The Netlink socket file descriptor used for communication. 349 * @param[in] sn 350 * Sequence number. 351 * @param[in] cb 352 * The callback function to call for each Netlink message received. 353 * @param[in, out] arg 354 * Custom arguments for the callback. 355 * 356 * @return 357 * 0 on success, a negative errno value otherwise and rte_errno is set. 358 */ 359 static int 360 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), 361 void *arg) 362 { 363 struct sockaddr_nl sa; 364 struct iovec iov; 365 struct msghdr msg = { 366 .msg_name = &sa, 367 .msg_namelen = sizeof(sa), 368 .msg_iov = &iov, 369 /* One message at a time */ 370 .msg_iovlen = 1, 371 }; 372 void *buf = NULL; 373 int multipart = 0; 374 int ret = 0; 375 376 do { 377 struct nlmsghdr *nh; 378 int recv_bytes; 379 380 do { 381 /* Query length of incoming message. */ 382 iov.iov_base = NULL; 383 iov.iov_len = 0; 384 recv_bytes = recvmsg(nlsk_fd, &msg, 385 MSG_PEEK | MSG_TRUNC); 386 if (recv_bytes < 0) { 387 rte_errno = errno; 388 ret = -rte_errno; 389 goto exit; 390 } 391 if (recv_bytes == 0) { 392 rte_errno = ENODATA; 393 ret = -rte_errno; 394 goto exit; 395 } 396 /* Allocate buffer to fetch the message. */ 397 if (recv_bytes < MLX5_RECV_BUF_SIZE) 398 recv_bytes = MLX5_RECV_BUF_SIZE; 399 mlx5_free(buf); 400 buf = mlx5_malloc(0, recv_bytes, 0, SOCKET_ID_ANY); 401 if (!buf) { 402 rte_errno = ENOMEM; 403 ret = -rte_errno; 404 goto exit; 405 } 406 /* Fetch the message. */ 407 iov.iov_base = buf; 408 iov.iov_len = recv_bytes; 409 recv_bytes = recvmsg(nlsk_fd, &msg, 0); 410 if (recv_bytes == -1) { 411 rte_errno = errno; 412 ret = -rte_errno; 413 goto exit; 414 } 415 nh = (struct nlmsghdr *)buf; 416 } while (nh->nlmsg_seq != sn); 417 for (; 418 NLMSG_OK(nh, (unsigned int)recv_bytes); 419 nh = NLMSG_NEXT(nh, recv_bytes)) { 420 if (nh->nlmsg_type == NLMSG_ERROR) { 421 struct nlmsgerr *err_data = NLMSG_DATA(nh); 422 423 if (err_data->error < 0) { 424 rte_errno = -err_data->error; 425 ret = -rte_errno; 426 goto exit; 427 } 428 /* Ack message. */ 429 ret = 0; 430 goto exit; 431 } 432 /* Multi-part msgs and their trailing DONE message. */ 433 if (nh->nlmsg_flags & NLM_F_MULTI) { 434 if (nh->nlmsg_type == NLMSG_DONE) { 435 ret = 0; 436 goto exit; 437 } 438 multipart = 1; 439 } 440 if (cb) { 441 ret = cb(nh, arg); 442 if (ret < 0) 443 goto exit; 444 } 445 } 446 } while (multipart); 447 exit: 448 mlx5_free(buf); 449 return ret; 450 } 451 452 /** 453 * Parse Netlink message to retrieve the bridge MAC address. 454 * 455 * @param nh 456 * Pointer to Netlink Message Header. 457 * @param arg 458 * PMD data register with this callback. 459 * 460 * @return 461 * 0 on success, a negative errno value otherwise and rte_errno is set. 462 */ 463 static int 464 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) 465 { 466 struct mlx5_nl_mac_addr *data = arg; 467 struct ndmsg *r = NLMSG_DATA(nh); 468 struct rtattr *attribute; 469 int len; 470 471 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); 472 for (attribute = MLX5_NDA_RTA(r); 473 RTA_OK(attribute, len); 474 attribute = RTA_NEXT(attribute, len)) { 475 if (attribute->rta_type == NDA_LLADDR) { 476 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { 477 DRV_LOG(WARNING, 478 "not enough room to finalize the" 479 " request"); 480 rte_errno = ENOMEM; 481 return -rte_errno; 482 } 483 #ifdef RTE_LIBRTE_MLX5_DEBUG 484 char m[RTE_ETHER_ADDR_FMT_SIZE]; 485 486 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, 487 RTA_DATA(attribute)); 488 DRV_LOG(DEBUG, "bridge MAC address %s", m); 489 #endif 490 memcpy(&(*data->mac)[data->mac_n++], 491 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); 492 } 493 } 494 return 0; 495 } 496 497 /** 498 * Get bridge MAC addresses. 499 * 500 * @param[in] nlsk_fd 501 * Netlink socket file descriptor. 502 * @param[in] iface_idx 503 * Net device interface index. 504 * @param mac[out] 505 * Pointer to the array table of MAC addresses to fill. 506 * Its size should be of MLX5_MAX_MAC_ADDRESSES. 507 * @param mac_n[out] 508 * Number of entries filled in MAC array. 509 * 510 * @return 511 * 0 on success, a negative errno value otherwise and rte_errno is set. 512 */ 513 static int 514 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, 515 struct rte_ether_addr (*mac)[], int *mac_n) 516 { 517 struct { 518 struct nlmsghdr hdr; 519 struct ifinfomsg ifm; 520 } req = { 521 .hdr = { 522 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 523 .nlmsg_type = RTM_GETNEIGH, 524 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 525 }, 526 .ifm = { 527 .ifi_family = PF_BRIDGE, 528 .ifi_index = iface_idx, 529 }, 530 }; 531 struct mlx5_nl_mac_addr data = { 532 .mac = mac, 533 .mac_n = 0, 534 }; 535 uint32_t sn = MLX5_NL_SN_GENERATE; 536 int ret; 537 538 if (nlsk_fd == -1) 539 return 0; 540 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, 541 sizeof(struct ifinfomsg)); 542 if (ret < 0) 543 goto error; 544 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); 545 if (ret < 0) 546 goto error; 547 *mac_n = data.mac_n; 548 return 0; 549 error: 550 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", 551 iface_idx, strerror(rte_errno)); 552 return -rte_errno; 553 } 554 555 /** 556 * Modify the MAC address neighbour table with Netlink. 557 * 558 * @param[in] nlsk_fd 559 * Netlink socket file descriptor. 560 * @param[in] iface_idx 561 * Net device interface index. 562 * @param mac 563 * MAC address to consider. 564 * @param add 565 * 1 to add the MAC address, 0 to remove the MAC address. 566 * 567 * @return 568 * 0 on success, a negative errno value otherwise and rte_errno is set. 569 */ 570 static int 571 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 572 struct rte_ether_addr *mac, int add) 573 { 574 struct { 575 struct nlmsghdr hdr; 576 struct ndmsg ndm; 577 struct rtattr rta; 578 uint8_t buffer[RTE_ETHER_ADDR_LEN]; 579 } req = { 580 .hdr = { 581 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), 582 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 583 NLM_F_EXCL | NLM_F_ACK, 584 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, 585 }, 586 .ndm = { 587 .ndm_family = PF_BRIDGE, 588 .ndm_state = NUD_NOARP | NUD_PERMANENT, 589 .ndm_ifindex = iface_idx, 590 .ndm_flags = NTF_SELF, 591 }, 592 .rta = { 593 .rta_type = NDA_LLADDR, 594 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), 595 }, 596 }; 597 uint32_t sn = MLX5_NL_SN_GENERATE; 598 int ret; 599 600 if (nlsk_fd == -1) 601 return 0; 602 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); 603 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 604 RTA_ALIGN(req.rta.rta_len); 605 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 606 if (ret < 0) 607 goto error; 608 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 609 if (ret < 0) 610 goto error; 611 return 0; 612 error: 613 #ifdef RTE_LIBRTE_MLX5_DEBUG 614 { 615 char m[RTE_ETHER_ADDR_FMT_SIZE]; 616 617 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac); 618 DRV_LOG(DEBUG, 619 "Interface %u cannot %s MAC address %s %s", 620 iface_idx, 621 add ? "add" : "remove", m, strerror(rte_errno)); 622 } 623 #endif 624 return -rte_errno; 625 } 626 627 /** 628 * Modify the VF MAC address neighbour table with Netlink. 629 * 630 * @param[in] nlsk_fd 631 * Netlink socket file descriptor. 632 * @param[in] iface_idx 633 * Net device interface index. 634 * @param mac 635 * MAC address to consider. 636 * @param vf_index 637 * VF index. 638 * 639 * @return 640 * 0 on success, a negative errno value otherwise and rte_errno is set. 641 */ 642 int 643 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 644 struct rte_ether_addr *mac, int vf_index) 645 { 646 int ret; 647 struct { 648 struct nlmsghdr hdr; 649 struct ifinfomsg ifm; 650 struct rtattr vf_list_rta; 651 struct rtattr vf_info_rta; 652 struct rtattr vf_mac_rta; 653 struct ifla_vf_mac ivm; 654 } req = { 655 .hdr = { 656 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 657 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 658 .nlmsg_type = RTM_BASE, 659 }, 660 .ifm = { 661 .ifi_index = iface_idx, 662 }, 663 .vf_list_rta = { 664 .rta_type = IFLA_VFINFO_LIST, 665 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 666 }, 667 .vf_info_rta = { 668 .rta_type = IFLA_VF_INFO, 669 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 670 }, 671 .vf_mac_rta = { 672 .rta_type = IFLA_VF_MAC, 673 }, 674 }; 675 struct ifla_vf_mac ivm = { 676 .vf = vf_index, 677 }; 678 uint32_t sn = MLX5_NL_SN_GENERATE; 679 680 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); 681 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); 682 683 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); 684 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 685 RTA_ALIGN(req.vf_list_rta.rta_len) + 686 RTA_ALIGN(req.vf_info_rta.rta_len) + 687 RTA_ALIGN(req.vf_mac_rta.rta_len); 688 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 689 &req.vf_list_rta); 690 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 691 &req.vf_info_rta); 692 693 if (nlsk_fd < 0) 694 return -1; 695 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 696 if (ret < 0) 697 goto error; 698 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 699 if (ret < 0) 700 goto error; 701 return 0; 702 error: 703 DRV_LOG(ERR, 704 "representor %u cannot set VF MAC address " 705 RTE_ETHER_ADDR_PRT_FMT " : %s", 706 vf_index, 707 RTE_ETHER_ADDR_BYTES(mac), 708 strerror(rte_errno)); 709 return -rte_errno; 710 } 711 712 /** 713 * Add a MAC address. 714 * 715 * @param[in] nlsk_fd 716 * Netlink socket file descriptor. 717 * @param[in] iface_idx 718 * Net device interface index. 719 * @param mac_own 720 * BITFIELD_DECLARE array to store the mac. 721 * @param mac 722 * MAC address to register. 723 * @param index 724 * MAC address index. 725 * 726 * @return 727 * 0 on success, a negative errno value otherwise and rte_errno is set. 728 */ 729 int 730 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, 731 uint64_t *mac_own, struct rte_ether_addr *mac, 732 uint32_t index) 733 { 734 int ret; 735 736 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); 737 if (!ret) { 738 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 739 if (index >= MLX5_MAX_MAC_ADDRESSES) 740 return -EINVAL; 741 742 BITFIELD_SET(mac_own, index); 743 } 744 if (ret == -EEXIST) 745 return 0; 746 return ret; 747 } 748 749 /** 750 * Remove a MAC address. 751 * 752 * @param[in] nlsk_fd 753 * Netlink socket file descriptor. 754 * @param[in] iface_idx 755 * Net device interface index. 756 * @param mac_own 757 * BITFIELD_DECLARE array to store the mac. 758 * @param mac 759 * MAC address to remove. 760 * @param index 761 * MAC address index. 762 * 763 * @return 764 * 0 on success, a negative errno value otherwise and rte_errno is set. 765 */ 766 int 767 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, 768 struct rte_ether_addr *mac, uint32_t index) 769 { 770 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 771 if (index >= MLX5_MAX_MAC_ADDRESSES) 772 return -EINVAL; 773 774 BITFIELD_RESET(mac_own, index); 775 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); 776 } 777 778 /** 779 * Synchronize Netlink bridge table to the internal table. 780 * 781 * @param[in] nlsk_fd 782 * Netlink socket file descriptor. 783 * @param[in] iface_idx 784 * Net device interface index. 785 * @param mac_addrs 786 * Mac addresses array to sync. 787 * @param n 788 * @p mac_addrs array size. 789 */ 790 void 791 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, 792 struct rte_ether_addr *mac_addrs, int n) 793 { 794 struct rte_ether_addr macs[n]; 795 int macs_n = 0; 796 int i; 797 int ret; 798 799 memset(macs, 0, n * sizeof(macs[0])); 800 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); 801 if (ret) 802 return; 803 for (i = 0; i != macs_n; ++i) { 804 int j; 805 806 /* Verify the address is not in the array yet. */ 807 for (j = 0; j != n; ++j) 808 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) 809 break; 810 if (j != n) 811 continue; 812 if (rte_is_multicast_ether_addr(&macs[i])) { 813 /* Find the first entry available. */ 814 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) { 815 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 816 mac_addrs[j] = macs[i]; 817 break; 818 } 819 } 820 } else { 821 /* Find the first entry available. */ 822 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) { 823 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 824 mac_addrs[j] = macs[i]; 825 break; 826 } 827 } 828 } 829 } 830 } 831 832 /** 833 * Flush all added MAC addresses. 834 * 835 * @param[in] nlsk_fd 836 * Netlink socket file descriptor. 837 * @param[in] iface_idx 838 * Net device interface index. 839 * @param[in] mac_addrs 840 * Mac addresses array to flush. 841 * @param n 842 * @p mac_addrs array size. 843 * @param mac_own 844 * BITFIELD_DECLARE array to store the mac. 845 */ 846 void 847 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, 848 struct rte_ether_addr *mac_addrs, int n, 849 uint64_t *mac_own) 850 { 851 int i; 852 853 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES) 854 return; 855 856 for (i = n - 1; i >= 0; --i) { 857 struct rte_ether_addr *m = &mac_addrs[i]; 858 859 if (BITFIELD_ISSET(mac_own, i)) 860 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, 861 i); 862 } 863 } 864 865 /** 866 * Enable promiscuous / all multicast mode through Netlink. 867 * 868 * @param[in] nlsk_fd 869 * Netlink socket file descriptor. 870 * @param[in] iface_idx 871 * Net device interface index. 872 * @param flags 873 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. 874 * @param enable 875 * Nonzero to enable, disable otherwise. 876 * 877 * @return 878 * 0 on success, a negative errno value otherwise and rte_errno is set. 879 */ 880 static int 881 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, 882 int enable) 883 { 884 struct { 885 struct nlmsghdr hdr; 886 struct ifinfomsg ifi; 887 } req = { 888 .hdr = { 889 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 890 .nlmsg_type = RTM_NEWLINK, 891 .nlmsg_flags = NLM_F_REQUEST, 892 }, 893 .ifi = { 894 .ifi_flags = enable ? flags : 0, 895 .ifi_change = flags, 896 .ifi_index = iface_idx, 897 }, 898 }; 899 uint32_t sn = MLX5_NL_SN_GENERATE; 900 int ret; 901 902 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); 903 if (nlsk_fd < 0) 904 return 0; 905 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 906 if (ret < 0) 907 return ret; 908 return 0; 909 } 910 911 /** 912 * Enable promiscuous mode through Netlink. 913 * 914 * @param[in] nlsk_fd 915 * Netlink socket file descriptor. 916 * @param[in] iface_idx 917 * Net device interface index. 918 * @param enable 919 * Nonzero to enable, disable otherwise. 920 * 921 * @return 922 * 0 on success, a negative errno value otherwise and rte_errno is set. 923 */ 924 int 925 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) 926 { 927 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); 928 929 if (ret) 930 DRV_LOG(DEBUG, 931 "Interface %u cannot %s promisc mode: Netlink error %s", 932 iface_idx, enable ? "enable" : "disable", 933 strerror(rte_errno)); 934 return ret; 935 } 936 937 /** 938 * Enable all multicast mode through Netlink. 939 * 940 * @param[in] nlsk_fd 941 * Netlink socket file descriptor. 942 * @param[in] iface_idx 943 * Net device interface index. 944 * @param enable 945 * Nonzero to enable, disable otherwise. 946 * 947 * @return 948 * 0 on success, a negative errno value otherwise and rte_errno is set. 949 */ 950 int 951 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) 952 { 953 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, 954 enable); 955 956 if (ret) 957 DRV_LOG(DEBUG, 958 "Interface %u cannot %s allmulti : Netlink error %s", 959 iface_idx, enable ? "enable" : "disable", 960 strerror(rte_errno)); 961 return ret; 962 } 963 964 /** 965 * Process network interface information from Netlink message. 966 * 967 * @param nh 968 * Pointer to Netlink message header. 969 * @param arg 970 * Opaque data pointer for this callback. 971 * 972 * @return 973 * 0 on success, a negative errno value otherwise and rte_errno is set. 974 */ 975 static int 976 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) 977 { 978 struct mlx5_nl_port_info *data = arg; 979 struct mlx5_nl_port_info local = { 980 .flags = 0, 981 }; 982 size_t off = NLMSG_HDRLEN; 983 984 if (nh->nlmsg_type != 985 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && 986 nh->nlmsg_type != 987 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) 988 goto error; 989 while (off < nh->nlmsg_len) { 990 struct nlattr *na = (void *)((uintptr_t)nh + off); 991 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); 992 993 if (na->nla_len > nh->nlmsg_len - off) 994 goto error; 995 switch (na->nla_type) { 996 case RDMA_NLDEV_ATTR_DEV_INDEX: 997 local.ibindex = *(uint32_t *)payload; 998 local.flags |= MLX5_NL_CMD_GET_IB_INDEX; 999 break; 1000 case RDMA_NLDEV_ATTR_DEV_NAME: 1001 if (!strcmp(payload, data->name)) 1002 local.flags |= MLX5_NL_CMD_GET_IB_NAME; 1003 break; 1004 case RDMA_NLDEV_ATTR_NDEV_INDEX: 1005 local.ifindex = *(uint32_t *)payload; 1006 local.flags |= MLX5_NL_CMD_GET_NET_INDEX; 1007 break; 1008 case RDMA_NLDEV_ATTR_PORT_INDEX: 1009 local.portnum = *(uint32_t *)payload; 1010 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; 1011 break; 1012 case RDMA_NLDEV_ATTR_PORT_STATE: 1013 local.state = *(uint8_t *)payload; 1014 local.flags |= MLX5_NL_CMD_GET_PORT_STATE; 1015 break; 1016 default: 1017 break; 1018 } 1019 off += NLA_ALIGN(na->nla_len); 1020 } 1021 /* 1022 * It is possible to have multiple messages for all 1023 * Infiniband devices in the system with appropriate name. 1024 * So we should gather parameters locally and copy to 1025 * query context only in case of coinciding device name. 1026 */ 1027 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { 1028 data->flags = local.flags; 1029 data->ibindex = local.ibindex; 1030 data->ifindex = local.ifindex; 1031 data->portnum = local.portnum; 1032 data->state = local.state; 1033 } 1034 return 0; 1035 error: 1036 rte_errno = EINVAL; 1037 return -rte_errno; 1038 } 1039 1040 /** 1041 * Get port info of network interface associated with some IB device. 1042 * 1043 * This is the only somewhat safe method to avoid resorting to heuristics 1044 * when faced with port representors. Unfortunately it requires at least 1045 * Linux 4.17. 1046 * 1047 * @param nl 1048 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1049 * @param[in] pindex 1050 * IB device port index, starting from 1 1051 * @param[out] data 1052 * Pointer to port info. 1053 * @return 1054 * 0 on success, negative on error and rte_errno is set. 1055 */ 1056 static int 1057 mlx5_nl_port_info(int nl, uint32_t pindex, struct mlx5_nl_port_info *data) 1058 { 1059 union { 1060 struct nlmsghdr nh; 1061 uint8_t buf[NLMSG_HDRLEN + 1062 NLA_HDRLEN + NLA_ALIGN(sizeof(data->ibindex)) + 1063 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; 1064 } req = { 1065 .nh = { 1066 .nlmsg_len = NLMSG_LENGTH(0), 1067 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1068 RDMA_NLDEV_CMD_GET), 1069 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1070 }, 1071 }; 1072 struct nlattr *na; 1073 uint32_t sn = MLX5_NL_SN_GENERATE; 1074 int ret; 1075 1076 ret = mlx5_nl_send(nl, &req.nh, sn); 1077 if (ret < 0) 1078 return ret; 1079 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data); 1080 if (ret < 0) 1081 return ret; 1082 if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) || 1083 !(data->flags & MLX5_NL_CMD_GET_IB_INDEX)) 1084 goto error; 1085 data->flags = 0; 1086 sn = MLX5_NL_SN_GENERATE; 1087 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1088 RDMA_NLDEV_CMD_PORT_GET); 1089 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1090 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); 1091 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); 1092 na->nla_len = NLA_HDRLEN + sizeof(data->ibindex); 1093 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; 1094 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1095 &data->ibindex, sizeof(data->ibindex)); 1096 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); 1097 na->nla_len = NLA_HDRLEN + sizeof(pindex); 1098 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; 1099 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1100 &pindex, sizeof(pindex)); 1101 ret = mlx5_nl_send(nl, &req.nh, sn); 1102 if (ret < 0) 1103 return ret; 1104 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data); 1105 if (ret < 0) 1106 return ret; 1107 if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) || 1108 !(data->flags & MLX5_NL_CMD_GET_IB_INDEX) || 1109 !(data->flags & MLX5_NL_CMD_GET_NET_INDEX) || 1110 !data->ifindex) 1111 goto error; 1112 return 1; 1113 error: 1114 rte_errno = ENODEV; 1115 return -rte_errno; 1116 } 1117 1118 /** 1119 * Get index of network interface associated with some IB device. 1120 * 1121 * This is the only somewhat safe method to avoid resorting to heuristics 1122 * when faced with port representors. Unfortunately it requires at least 1123 * Linux 4.17. 1124 * 1125 * @param nl 1126 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1127 * @param[in] name 1128 * IB device name. 1129 * @param[in] pindex 1130 * IB device port index, starting from 1 1131 * @return 1132 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno 1133 * is set. 1134 */ 1135 unsigned int 1136 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) 1137 { 1138 struct mlx5_nl_port_info data = { 1139 .ifindex = 0, 1140 .name = name, 1141 }; 1142 1143 if (mlx5_nl_port_info(nl, pindex, &data) < 0) 1144 return 0; 1145 return data.ifindex; 1146 } 1147 1148 /** 1149 * Get IB device port state. 1150 * 1151 * This is the only somewhat safe method to get info for port number >= 255. 1152 * Unfortunately it requires at least Linux 4.17. 1153 * 1154 * @param nl 1155 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1156 * @param[in] name 1157 * IB device name. 1158 * @param[in] pindex 1159 * IB device port index, starting from 1 1160 * @return 1161 * Port state (ibv_port_state) on success, negative on error 1162 * and rte_errno is set. 1163 */ 1164 int 1165 mlx5_nl_port_state(int nl, const char *name, uint32_t pindex) 1166 { 1167 struct mlx5_nl_port_info data = { 1168 .state = 0, 1169 .name = name, 1170 }; 1171 1172 if (mlx5_nl_port_info(nl, pindex, &data) < 0) 1173 return -rte_errno; 1174 if ((data.flags & MLX5_NL_CMD_GET_PORT_STATE) == 0) { 1175 rte_errno = ENOTSUP; 1176 return -rte_errno; 1177 } 1178 return (int)data.state; 1179 } 1180 1181 /** 1182 * Get the number of physical ports of given IB device. 1183 * 1184 * @param nl 1185 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1186 * @param[in] name 1187 * IB device name. 1188 * 1189 * @return 1190 * A valid (nonzero) number of ports on success, 0 otherwise 1191 * and rte_errno is set. 1192 */ 1193 unsigned int 1194 mlx5_nl_portnum(int nl, const char *name) 1195 { 1196 struct mlx5_nl_port_info data = { 1197 .flags = 0, 1198 .name = name, 1199 .ifindex = 0, 1200 .portnum = 0, 1201 }; 1202 struct nlmsghdr req = { 1203 .nlmsg_len = NLMSG_LENGTH(0), 1204 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1205 RDMA_NLDEV_CMD_GET), 1206 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1207 }; 1208 uint32_t sn = MLX5_NL_SN_GENERATE; 1209 int ret; 1210 1211 ret = mlx5_nl_send(nl, &req, sn); 1212 if (ret < 0) 1213 return 0; 1214 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1215 if (ret < 0) 1216 return 0; 1217 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1218 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1219 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { 1220 rte_errno = ENODEV; 1221 return 0; 1222 } 1223 if (!data.portnum) 1224 rte_errno = EINVAL; 1225 return data.portnum; 1226 } 1227 1228 /** 1229 * Analyze gathered port parameters via Netlink to recognize master 1230 * and representor devices for E-Switch configuration. 1231 * 1232 * @param[in] num_vf_set 1233 * flag of presence of number of VFs port attribute. 1234 * @param[inout] switch_info 1235 * Port information, including port name as a number and port name 1236 * type if recognized 1237 * 1238 * @return 1239 * master and representor flags are set in switch_info according to 1240 * recognized parameters (if any). 1241 */ 1242 static void 1243 mlx5_nl_check_switch_info(bool num_vf_set, 1244 struct mlx5_switch_info *switch_info) 1245 { 1246 switch (switch_info->name_type) { 1247 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1248 /* 1249 * Name is not recognized, assume the master, 1250 * check the number of VFs key presence. 1251 */ 1252 switch_info->master = num_vf_set; 1253 break; 1254 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1255 /* 1256 * Name is not set, this assumes the legacy naming 1257 * schema for master, just check if there is a 1258 * number of VFs key. 1259 */ 1260 switch_info->master = num_vf_set; 1261 break; 1262 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1263 /* New uplink naming schema recognized. */ 1264 switch_info->master = 1; 1265 break; 1266 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1267 /* Legacy representors naming schema. */ 1268 switch_info->representor = !num_vf_set; 1269 break; 1270 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1271 /* Fallthrough */ 1272 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1273 /* Fallthrough */ 1274 case MLX5_PHYS_PORT_NAME_TYPE_PFSF: 1275 /* New representors naming schema. */ 1276 switch_info->representor = 1; 1277 break; 1278 } 1279 } 1280 1281 /** 1282 * Process switch information from Netlink message. 1283 * 1284 * @param nh 1285 * Pointer to Netlink message header. 1286 * @param arg 1287 * Opaque data pointer for this callback. 1288 * 1289 * @return 1290 * 0 on success, a negative errno value otherwise and rte_errno is set. 1291 */ 1292 static int 1293 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) 1294 { 1295 struct mlx5_switch_info info = { 1296 .master = 0, 1297 .representor = 0, 1298 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1299 .port_name = 0, 1300 .switch_id = 0, 1301 }; 1302 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 1303 bool switch_id_set = false; 1304 bool num_vf_set = false; 1305 int len; 1306 1307 if (nh->nlmsg_type != RTM_NEWLINK) 1308 goto error; 1309 while (off < nh->nlmsg_len) { 1310 struct rtattr *ra = (void *)((uintptr_t)nh + off); 1311 void *payload = RTA_DATA(ra); 1312 unsigned int i; 1313 1314 if (ra->rta_len > nh->nlmsg_len - off) 1315 goto error; 1316 switch (ra->rta_type) { 1317 case IFLA_NUM_VF: 1318 num_vf_set = true; 1319 break; 1320 case IFLA_PHYS_PORT_NAME: 1321 len = RTA_PAYLOAD(ra); 1322 /* Some kernels do not pad attributes with zero. */ 1323 if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) { 1324 char name[MLX5_PHYS_PORT_NAME_MAX]; 1325 1326 /* 1327 * We can't just patch the message with padding 1328 * zero - it might corrupt the following items 1329 * in the message, we have to copy the string 1330 * by attribute length and pad the copied one. 1331 */ 1332 memcpy(name, payload, len); 1333 name[len] = 0; 1334 mlx5_translate_port_name(name, &info); 1335 } else { 1336 info.name_type = 1337 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 1338 } 1339 break; 1340 case IFLA_PHYS_SWITCH_ID: 1341 info.switch_id = 0; 1342 for (i = 0; i < RTA_PAYLOAD(ra); ++i) { 1343 info.switch_id <<= 8; 1344 info.switch_id |= ((uint8_t *)payload)[i]; 1345 } 1346 switch_id_set = true; 1347 break; 1348 } 1349 off += RTA_ALIGN(ra->rta_len); 1350 } 1351 if (switch_id_set) { 1352 /* We have some E-Switch configuration. */ 1353 mlx5_nl_check_switch_info(num_vf_set, &info); 1354 } 1355 MLX5_ASSERT(!(info.master && info.representor)); 1356 memcpy(arg, &info, sizeof(info)); 1357 return 0; 1358 error: 1359 rte_errno = EINVAL; 1360 return -rte_errno; 1361 } 1362 1363 /** 1364 * Get switch information associated with network interface. 1365 * 1366 * @param nl 1367 * Netlink socket of the ROUTE kind (NETLINK_ROUTE). 1368 * @param ifindex 1369 * Network interface index. 1370 * @param[out] info 1371 * Switch information object, populated in case of success. 1372 * 1373 * @return 1374 * 0 on success, a negative errno value otherwise and rte_errno is set. 1375 */ 1376 int 1377 mlx5_nl_switch_info(int nl, unsigned int ifindex, 1378 struct mlx5_switch_info *info) 1379 { 1380 struct { 1381 struct nlmsghdr nh; 1382 struct ifinfomsg info; 1383 struct rtattr rta; 1384 uint32_t extmask; 1385 } req = { 1386 .nh = { 1387 .nlmsg_len = NLMSG_LENGTH 1388 (sizeof(req.info) + 1389 RTA_LENGTH(sizeof(uint32_t))), 1390 .nlmsg_type = RTM_GETLINK, 1391 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1392 }, 1393 .info = { 1394 .ifi_family = AF_UNSPEC, 1395 .ifi_index = ifindex, 1396 }, 1397 .rta = { 1398 .rta_type = IFLA_EXT_MASK, 1399 .rta_len = RTA_LENGTH(sizeof(int32_t)), 1400 }, 1401 .extmask = RTE_LE32(1), 1402 }; 1403 uint32_t sn = MLX5_NL_SN_GENERATE; 1404 int ret; 1405 1406 ret = mlx5_nl_send(nl, &req.nh, sn); 1407 if (ret >= 0) 1408 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); 1409 if (info->master && info->representor) { 1410 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1411 " and as representor", ifindex); 1412 rte_errno = ENODEV; 1413 ret = -rte_errno; 1414 } 1415 return ret; 1416 } 1417 1418 /* 1419 * Delete VLAN network device by ifindex. 1420 * 1421 * @param[in] tcf 1422 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1423 * @param[in] ifindex 1424 * Interface index of network device to delete. 1425 */ 1426 void 1427 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, 1428 uint32_t ifindex) 1429 { 1430 uint32_t sn = MLX5_NL_SN_GENERATE; 1431 int ret; 1432 struct { 1433 struct nlmsghdr nh; 1434 struct ifinfomsg info; 1435 } req = { 1436 .nh = { 1437 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 1438 .nlmsg_type = RTM_DELLINK, 1439 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1440 }, 1441 .info = { 1442 .ifi_family = AF_UNSPEC, 1443 .ifi_index = ifindex, 1444 }, 1445 }; 1446 1447 if (ifindex) { 1448 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); 1449 if (ret >= 0) 1450 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1451 if (ret < 0) 1452 DRV_LOG(WARNING, "netlink: error deleting VLAN WA" 1453 " ifindex %u, %d", ifindex, ret); 1454 } 1455 } 1456 1457 /* Set of subroutines to build Netlink message. */ 1458 static struct nlattr * 1459 nl_msg_tail(struct nlmsghdr *nlh) 1460 { 1461 return (struct nlattr *) 1462 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); 1463 } 1464 1465 static void 1466 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) 1467 { 1468 struct nlattr *nla = nl_msg_tail(nlh); 1469 1470 nla->nla_type = type; 1471 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen; 1472 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len); 1473 1474 if (alen) 1475 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); 1476 } 1477 1478 static struct nlattr * 1479 nl_attr_nest_start(struct nlmsghdr *nlh, int type) 1480 { 1481 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); 1482 1483 nl_attr_put(nlh, type, NULL, 0); 1484 return nest; 1485 } 1486 1487 static void 1488 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) 1489 { 1490 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; 1491 } 1492 1493 /* 1494 * Create network VLAN device with specified VLAN tag. 1495 * 1496 * @param[in] tcf 1497 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1498 * @param[in] ifindex 1499 * Base network interface index. 1500 * @param[in] tag 1501 * VLAN tag for VLAN network device to create. 1502 */ 1503 uint32_t 1504 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, 1505 uint32_t ifindex, uint16_t tag) 1506 { 1507 struct nlmsghdr *nlh; 1508 struct ifinfomsg *ifm; 1509 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; 1510 1511 alignas(RTE_CACHE_LINE_SIZE) 1512 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1513 NLMSG_ALIGN(sizeof(struct ifinfomsg)) + 1514 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + 1515 NLMSG_ALIGN(sizeof(uint32_t)) + 1516 NLMSG_ALIGN(sizeof(name)) + 1517 NLMSG_ALIGN(sizeof("vlan")) + 1518 NLMSG_ALIGN(sizeof(uint32_t)) + 1519 NLMSG_ALIGN(sizeof(uint16_t)) + 16]; 1520 struct nlattr *na_info; 1521 struct nlattr *na_vlan; 1522 uint32_t sn = MLX5_NL_SN_GENERATE; 1523 int ret; 1524 1525 memset(buf, 0, sizeof(buf)); 1526 nlh = (struct nlmsghdr *)buf; 1527 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1528 nlh->nlmsg_type = RTM_NEWLINK; 1529 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 1530 NLM_F_EXCL | NLM_F_ACK; 1531 ifm = (struct ifinfomsg *)nl_msg_tail(nlh); 1532 nlh->nlmsg_len += sizeof(struct ifinfomsg); 1533 ifm->ifi_family = AF_UNSPEC; 1534 ifm->ifi_type = 0; 1535 ifm->ifi_index = 0; 1536 ifm->ifi_flags = IFF_UP; 1537 ifm->ifi_change = 0xffffffff; 1538 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); 1539 ret = snprintf(name, sizeof(name), "%s.%u.%u", 1540 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); 1541 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); 1542 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); 1543 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); 1544 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); 1545 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); 1546 nl_attr_nest_end(nlh, na_vlan); 1547 nl_attr_nest_end(nlh, na_info); 1548 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len); 1549 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); 1550 if (ret >= 0) 1551 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1552 if (ret < 0) { 1553 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, 1554 ret); 1555 } 1556 /* Try to get ifindex of created or pre-existing device. */ 1557 ret = if_nametoindex(name); 1558 if (!ret) { 1559 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, 1560 errno); 1561 return 0; 1562 } 1563 return ret; 1564 } 1565 1566 /** 1567 * Parse Netlink message to retrieve the general family ID. 1568 * 1569 * @param nh 1570 * Pointer to Netlink Message Header. 1571 * @param arg 1572 * PMD data register with this callback. 1573 * 1574 * @return 1575 * 0 on success, a negative errno value otherwise and rte_errno is set. 1576 */ 1577 static int 1578 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg) 1579 { 1580 1581 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1582 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1583 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1584 1585 for (; nla->nla_len && nla < tail; 1586 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) { 1587 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) { 1588 *(uint16_t *)arg = *(uint16_t *)(nla + 1); 1589 return 0; 1590 } 1591 } 1592 return -EINVAL; 1593 } 1594 1595 #define MLX5_NL_MAX_ATTR_SIZE 100 1596 /** 1597 * Get generic netlink family ID. 1598 * 1599 * @param[in] nlsk_fd 1600 * Netlink socket file descriptor. 1601 * @param[in] name 1602 * The family name. 1603 * 1604 * @return 1605 * ID >= 0 on success and @p enable is updated, a negative errno value 1606 * otherwise and rte_errno is set. 1607 */ 1608 static int 1609 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name) 1610 { 1611 struct nlmsghdr *nlh; 1612 struct genlmsghdr *genl; 1613 uint32_t sn = MLX5_NL_SN_GENERATE; 1614 int name_size = strlen(name) + 1; 1615 int ret; 1616 uint16_t id = -1; 1617 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1618 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1619 NLMSG_ALIGN(sizeof(struct nlattr)) + 1620 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)]; 1621 1622 memset(buf, 0, sizeof(buf)); 1623 nlh = (struct nlmsghdr *)buf; 1624 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1625 nlh->nlmsg_type = GENL_ID_CTRL; 1626 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1627 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1628 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1629 genl->cmd = CTRL_CMD_GETFAMILY; 1630 genl->version = 1; 1631 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size); 1632 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1633 if (ret >= 0) 1634 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id); 1635 if (ret < 0) { 1636 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name, 1637 ret); 1638 return ret; 1639 } 1640 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id); 1641 return (int)id; 1642 } 1643 1644 /** 1645 * Get Devlink family ID. 1646 * 1647 * @param[in] nlsk_fd 1648 * Netlink socket file descriptor. 1649 * 1650 * @return 1651 * ID >= 0 on success and @p enable is updated, a negative errno value 1652 * otherwise and rte_errno is set. 1653 */ 1654 1655 int 1656 mlx5_nl_devlink_family_id_get(int nlsk_fd) 1657 { 1658 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME); 1659 } 1660 1661 /** 1662 * Parse Netlink message to retrieve the ROCE enable status. 1663 * 1664 * @param nh 1665 * Pointer to Netlink Message Header. 1666 * @param arg 1667 * PMD data register with this callback. 1668 * 1669 * @return 1670 * 0 on success, a negative errno value otherwise and rte_errno is set. 1671 */ 1672 static int 1673 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) 1674 { 1675 1676 int ret = -EINVAL; 1677 int *enable = arg; 1678 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1679 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1680 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1681 1682 while (nla->nla_len && nla < tail) { 1683 switch (nla->nla_type) { 1684 /* Expected nested attributes case. */ 1685 case DEVLINK_ATTR_PARAM: 1686 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1687 case DEVLINK_ATTR_PARAM_VALUE: 1688 ret = 0; 1689 nla += 1; 1690 break; 1691 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1692 *enable = 1; 1693 return 0; 1694 default: 1695 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1696 } 1697 } 1698 *enable = 0; 1699 return ret; 1700 } 1701 1702 /** 1703 * Get ROCE enable status through Netlink. 1704 * 1705 * @param[in] nlsk_fd 1706 * Netlink socket file descriptor. 1707 * @param[in] family_id 1708 * the Devlink family ID. 1709 * @param pci_addr 1710 * The device PCI address. 1711 * @param[out] enable 1712 * Where to store the enable status. 1713 * 1714 * @return 1715 * 0 on success and @p enable is updated, a negative errno value otherwise 1716 * and rte_errno is set. 1717 */ 1718 int 1719 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, 1720 int *enable) 1721 { 1722 struct nlmsghdr *nlh; 1723 struct genlmsghdr *genl; 1724 uint32_t sn = MLX5_NL_SN_GENERATE; 1725 int ret; 1726 int cur_en = 0; 1727 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1728 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1729 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 1730 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 1731 1732 memset(buf, 0, sizeof(buf)); 1733 nlh = (struct nlmsghdr *)buf; 1734 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1735 nlh->nlmsg_type = family_id; 1736 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1737 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1738 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1739 genl->cmd = DEVLINK_CMD_PARAM_GET; 1740 genl->version = DEVLINK_GENL_VERSION; 1741 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1742 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1743 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1744 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1745 if (ret >= 0) 1746 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); 1747 if (ret < 0) { 1748 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", 1749 pci_addr, ret); 1750 return ret; 1751 } 1752 *enable = cur_en; 1753 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", 1754 cur_en ? "en" : "dis", pci_addr); 1755 return ret; 1756 } 1757 1758 /** 1759 * Reload mlx5 device kernel driver through Netlink. 1760 * 1761 * @param[in] nlsk_fd 1762 * Netlink socket file descriptor. 1763 * @param[in] family_id 1764 * the Devlink family ID. 1765 * @param pci_addr 1766 * The device PCI address. 1767 * @param[out] enable 1768 * The enable status to set. 1769 * 1770 * @return 1771 * 0 on success, a negative errno value otherwise and rte_errno is set. 1772 */ 1773 static int 1774 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) 1775 { 1776 struct nlmsghdr *nlh; 1777 struct genlmsghdr *genl; 1778 uint32_t sn = MLX5_NL_SN_GENERATE; 1779 int ret; 1780 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1781 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1782 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 + 1783 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2]; 1784 1785 memset(buf, 0, sizeof(buf)); 1786 nlh = (struct nlmsghdr *)buf; 1787 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1788 nlh->nlmsg_type = family_id; 1789 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1790 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1791 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1792 genl->cmd = DEVLINK_CMD_RELOAD; 1793 genl->version = DEVLINK_GENL_VERSION; 1794 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1795 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1796 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1797 if (ret >= 0) 1798 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1799 if (ret < 0) { 1800 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", 1801 pci_addr, ret); 1802 return ret; 1803 } 1804 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", 1805 pci_addr); 1806 return 0; 1807 } 1808 1809 /** 1810 * Set ROCE enable status through Netlink. 1811 * 1812 * @param[in] nlsk_fd 1813 * Netlink socket file descriptor. 1814 * @param[in] family_id 1815 * the Devlink family ID. 1816 * @param pci_addr 1817 * The device PCI address. 1818 * @param[out] enable 1819 * The enable status to set. 1820 * 1821 * @return 1822 * 0 on success, a negative errno value otherwise and rte_errno is set. 1823 */ 1824 int 1825 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, 1826 int enable) 1827 { 1828 struct nlmsghdr *nlh; 1829 struct genlmsghdr *genl; 1830 uint32_t sn = MLX5_NL_SN_GENERATE; 1831 int ret; 1832 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1833 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1834 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 + 1835 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; 1836 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; 1837 uint8_t ptype = NLA_FLAG; 1838 ; 1839 1840 memset(buf, 0, sizeof(buf)); 1841 nlh = (struct nlmsghdr *)buf; 1842 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1843 nlh->nlmsg_type = family_id; 1844 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1845 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1846 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1847 genl->cmd = DEVLINK_CMD_PARAM_SET; 1848 genl->version = DEVLINK_GENL_VERSION; 1849 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1850 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1851 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1852 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); 1853 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); 1854 if (enable) 1855 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0); 1856 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1857 if (ret >= 0) 1858 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1859 if (ret < 0) { 1860 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" 1861 " %d.", enable ? "en" : "dis", pci_addr, ret); 1862 return ret; 1863 } 1864 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", 1865 pci_addr, enable ? "en" : "dis"); 1866 /* Now, need to reload the driver. */ 1867 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); 1868 } 1869 1870 /** 1871 * Try to parse a Netlink message as a link status update. 1872 * 1873 * @param hdr 1874 * Netlink message header. 1875 * @param[out] ifindex 1876 * Index of the updated interface. 1877 * 1878 * @return 1879 * 0 on success, negative on failure. 1880 */ 1881 int 1882 mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex) 1883 { 1884 struct ifinfomsg *info; 1885 1886 switch (hdr->nlmsg_type) { 1887 case RTM_NEWLINK: 1888 case RTM_DELLINK: 1889 case RTM_GETLINK: 1890 case RTM_SETLINK: 1891 info = NLMSG_DATA(hdr); 1892 *ifindex = info->ifi_index; 1893 return 0; 1894 } 1895 return -1; 1896 } 1897 1898 /** 1899 * Read pending events from a Netlink socket. 1900 * 1901 * @param nlsk_fd 1902 * Netlink socket. 1903 * @param cb 1904 * Callback invoked for each of the events. 1905 * @param cb_arg 1906 * User data for the callback. 1907 * 1908 * @return 1909 * 0 on success, including the case when there are no events. 1910 * Negative on failure and rte_errno is set. 1911 */ 1912 int 1913 mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg) 1914 { 1915 char buf[8192]; 1916 struct sockaddr_nl addr; 1917 struct iovec iov = { 1918 .iov_base = buf, 1919 .iov_len = sizeof(buf), 1920 }; 1921 struct msghdr msg = { 1922 .msg_name = &addr, 1923 .msg_namelen = sizeof(addr), 1924 .msg_iov = &iov, 1925 .msg_iovlen = 1, 1926 }; 1927 struct nlmsghdr *hdr; 1928 ssize_t size; 1929 1930 while (1) { 1931 size = recvmsg(nlsk_fd, &msg, MSG_DONTWAIT); 1932 if (size < 0) { 1933 if (errno == EAGAIN) 1934 return 0; 1935 if (errno == EINTR) 1936 continue; 1937 DRV_LOG(DEBUG, "Failed to receive netlink message: %s", 1938 strerror(errno)); 1939 rte_errno = errno; 1940 return -rte_errno; 1941 } 1942 hdr = (struct nlmsghdr *)buf; 1943 while (size >= (ssize_t)sizeof(*hdr)) { 1944 ssize_t msg_len = hdr->nlmsg_len; 1945 ssize_t data_len = msg_len - sizeof(*hdr); 1946 ssize_t aligned_len; 1947 1948 if (data_len < 0) { 1949 DRV_LOG(DEBUG, "Netlink message too short"); 1950 rte_errno = EINVAL; 1951 return -rte_errno; 1952 } 1953 aligned_len = NLMSG_ALIGN(msg_len); 1954 if (aligned_len > size) { 1955 DRV_LOG(DEBUG, "Netlink message too long"); 1956 rte_errno = EINVAL; 1957 return -rte_errno; 1958 } 1959 cb(hdr, cb_arg); 1960 hdr = RTE_PTR_ADD(hdr, aligned_len); 1961 size -= aligned_len; 1962 } 1963 } 1964 return 0; 1965 } 1966 1967 static int 1968 mlx5_nl_esw_multiport_cb(struct nlmsghdr *nh, void *arg) 1969 { 1970 1971 int ret = -EINVAL; 1972 int *enable = arg; 1973 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1974 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1975 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1976 1977 while (nla->nla_len && nla < tail) { 1978 switch (nla->nla_type) { 1979 /* Expected nested attributes case. */ 1980 case DEVLINK_ATTR_PARAM: 1981 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1982 case DEVLINK_ATTR_PARAM_VALUE: 1983 ret = 0; 1984 nla += 1; 1985 break; 1986 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1987 *enable = 1; 1988 return 0; 1989 default: 1990 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1991 } 1992 } 1993 *enable = 0; 1994 return ret; 1995 } 1996 1997 #define NL_ESW_MULTIPORT_PARAM "esw_multiport" 1998 1999 int 2000 mlx5_nl_devlink_esw_multiport_get(int nlsk_fd, int family_id, const char *pci_addr, int *enable) 2001 { 2002 struct nlmsghdr *nlh; 2003 struct genlmsghdr *genl; 2004 uint32_t sn = MLX5_NL_SN_GENERATE; 2005 int ret; 2006 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 2007 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 2008 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 2009 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 2010 2011 memset(buf, 0, sizeof(buf)); 2012 nlh = (struct nlmsghdr *)buf; 2013 nlh->nlmsg_len = sizeof(struct nlmsghdr); 2014 nlh->nlmsg_type = family_id; 2015 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 2016 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 2017 nlh->nlmsg_len += sizeof(struct genlmsghdr); 2018 genl->cmd = DEVLINK_CMD_PARAM_GET; 2019 genl->version = DEVLINK_GENL_VERSION; 2020 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 2021 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 2022 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, 2023 NL_ESW_MULTIPORT_PARAM, sizeof(NL_ESW_MULTIPORT_PARAM)); 2024 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 2025 if (ret >= 0) 2026 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_esw_multiport_cb, enable); 2027 if (ret < 0) { 2028 DRV_LOG(DEBUG, "Failed to get Multiport E-Switch enable on device %s: %d.", 2029 pci_addr, ret); 2030 return ret; 2031 } 2032 DRV_LOG(DEBUG, "Multiport E-Switch is %sabled for device \"%s\".", 2033 *enable ? "en" : "dis", pci_addr); 2034 return ret; 2035 } 2036