1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 6WIND S.A. 3 * Copyright 2018 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <linux/if_link.h> 8 #include <linux/rtnetlink.h> 9 #include <linux/genetlink.h> 10 #include <net/if.h> 11 #include <rdma/rdma_netlink.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <stdlib.h> 15 #include <stdalign.h> 16 #include <string.h> 17 #include <sys/socket.h> 18 #include <unistd.h> 19 20 #include <rte_errno.h> 21 22 #include "mlx5_nl.h" 23 #include "../mlx5_common_log.h" 24 #include "mlx5_malloc.h" 25 #ifdef HAVE_DEVLINK 26 #include <linux/devlink.h> 27 #endif 28 29 30 /* Size of the buffer to receive kernel messages */ 31 #define MLX5_NL_BUF_SIZE (32 * 1024) 32 /* Send buffer size for the Netlink socket */ 33 #define MLX5_SEND_BUF_SIZE 32768 34 /* Receive buffer size for the Netlink socket */ 35 #define MLX5_RECV_BUF_SIZE 32768 36 /* Maximal physical port name length. */ 37 #define MLX5_PHYS_PORT_NAME_MAX 128 38 39 /** Parameters of VLAN devices created by driver. */ 40 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" 41 /* 42 * Define NDA_RTA as defined in iproute2 sources. 43 * 44 * see in iproute2 sources file include/libnetlink.h 45 */ 46 #ifndef MLX5_NDA_RTA 47 #define MLX5_NDA_RTA(r) \ 48 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 49 #endif 50 /* 51 * Define NLMSG_TAIL as defined in iproute2 sources. 52 * 53 * see in iproute2 sources file include/libnetlink.h 54 */ 55 #ifndef NLMSG_TAIL 56 #define NLMSG_TAIL(nmsg) \ 57 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) 58 #endif 59 /* 60 * The following definitions are normally found in rdma/rdma_netlink.h, 61 * however they are so recent that most systems do not expose them yet. 62 */ 63 #ifndef HAVE_RDMA_NL_NLDEV 64 #define RDMA_NL_NLDEV 5 65 #endif 66 #ifndef HAVE_RDMA_NLDEV_CMD_GET 67 #define RDMA_NLDEV_CMD_GET 1 68 #endif 69 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET 70 #define RDMA_NLDEV_CMD_PORT_GET 5 71 #endif 72 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX 73 #define RDMA_NLDEV_ATTR_DEV_INDEX 1 74 #endif 75 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME 76 #define RDMA_NLDEV_ATTR_DEV_NAME 2 77 #endif 78 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX 79 #define RDMA_NLDEV_ATTR_PORT_INDEX 3 80 #endif 81 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_STATE 82 #define RDMA_NLDEV_ATTR_PORT_STATE 12 83 #endif 84 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX 85 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50 86 #endif 87 88 /* These are normally found in linux/if_link.h. */ 89 #ifndef HAVE_IFLA_NUM_VF 90 #define IFLA_NUM_VF 21 91 #endif 92 #ifndef HAVE_IFLA_EXT_MASK 93 #define IFLA_EXT_MASK 29 94 #endif 95 #ifndef HAVE_IFLA_PHYS_SWITCH_ID 96 #define IFLA_PHYS_SWITCH_ID 36 97 #endif 98 #ifndef HAVE_IFLA_PHYS_PORT_NAME 99 #define IFLA_PHYS_PORT_NAME 38 100 #endif 101 102 /* 103 * Some Devlink defines may be missed in old kernel versions, 104 * adjust used defines. 105 */ 106 #ifndef DEVLINK_GENL_NAME 107 #define DEVLINK_GENL_NAME "devlink" 108 #endif 109 #ifndef DEVLINK_GENL_VERSION 110 #define DEVLINK_GENL_VERSION 1 111 #endif 112 #ifndef DEVLINK_ATTR_BUS_NAME 113 #define DEVLINK_ATTR_BUS_NAME 1 114 #endif 115 #ifndef DEVLINK_ATTR_DEV_NAME 116 #define DEVLINK_ATTR_DEV_NAME 2 117 #endif 118 #ifndef DEVLINK_ATTR_PARAM 119 #define DEVLINK_ATTR_PARAM 80 120 #endif 121 #ifndef DEVLINK_ATTR_PARAM_NAME 122 #define DEVLINK_ATTR_PARAM_NAME 81 123 #endif 124 #ifndef DEVLINK_ATTR_PARAM_TYPE 125 #define DEVLINK_ATTR_PARAM_TYPE 83 126 #endif 127 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST 128 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84 129 #endif 130 #ifndef DEVLINK_ATTR_PARAM_VALUE 131 #define DEVLINK_ATTR_PARAM_VALUE 85 132 #endif 133 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA 134 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86 135 #endif 136 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE 137 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87 138 #endif 139 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT 140 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1 141 #endif 142 #ifndef DEVLINK_CMD_RELOAD 143 #define DEVLINK_CMD_RELOAD 37 144 #endif 145 #ifndef DEVLINK_CMD_PARAM_GET 146 #define DEVLINK_CMD_PARAM_GET 38 147 #endif 148 #ifndef DEVLINK_CMD_PARAM_SET 149 #define DEVLINK_CMD_PARAM_SET 39 150 #endif 151 #ifndef NLA_FLAG 152 #define NLA_FLAG 6 153 #endif 154 155 /* Add/remove MAC address through Netlink */ 156 struct mlx5_nl_mac_addr { 157 struct rte_ether_addr (*mac)[]; 158 /**< MAC address handled by the device. */ 159 int mac_n; /**< Number of addresses in the array. */ 160 }; 161 162 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0) 163 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) 164 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) 165 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) 166 #define MLX5_NL_CMD_GET_PORT_STATE (1 << 4) 167 168 /** Data structure used by mlx5_nl_cmdget_cb(). */ 169 struct mlx5_nl_port_info { 170 const char *name; /**< IB device name (in). */ 171 uint32_t flags; /**< found attribute flags (out). */ 172 uint32_t ibindex; /**< IB device index (out). */ 173 uint32_t ifindex; /**< Network interface index (out). */ 174 uint32_t portnum; /**< IB device max port number (out). */ 175 uint16_t state; /**< IB device port state (out). */ 176 }; 177 178 uint32_t atomic_sn; 179 180 /* Generate Netlink sequence number. */ 181 #define MLX5_NL_SN_GENERATE (__atomic_fetch_add(&atomic_sn, 1, __ATOMIC_RELAXED) + 1) 182 183 /** 184 * Opens a Netlink socket. 185 * 186 * @param protocol 187 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). 188 * @param groups 189 * Groups to listen (e.g. RTMGRP_LINK), can be 0. 190 * 191 * @return 192 * A file descriptor on success, a negative errno value otherwise and 193 * rte_errno is set. 194 */ 195 int 196 mlx5_nl_init(int protocol, int groups) 197 { 198 int fd; 199 int buf_size; 200 socklen_t opt_size; 201 struct sockaddr_nl local = { 202 .nl_family = AF_NETLINK, 203 .nl_groups = groups, 204 }; 205 int ret; 206 207 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); 208 if (fd == -1) { 209 rte_errno = errno; 210 return -rte_errno; 211 } 212 opt_size = sizeof(buf_size); 213 ret = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf_size, &opt_size); 214 if (ret == -1) { 215 rte_errno = errno; 216 goto error; 217 } 218 DRV_LOG(DEBUG, "Netlink socket send buffer: %d", buf_size); 219 if (buf_size < MLX5_SEND_BUF_SIZE) { 220 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 221 &buf_size, sizeof(buf_size)); 222 if (ret == -1) { 223 rte_errno = errno; 224 goto error; 225 } 226 } 227 opt_size = sizeof(buf_size); 228 ret = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size); 229 if (ret == -1) { 230 rte_errno = errno; 231 goto error; 232 } 233 DRV_LOG(DEBUG, "Netlink socket recv buffer: %d", buf_size); 234 if (buf_size < MLX5_RECV_BUF_SIZE) { 235 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 236 &buf_size, sizeof(buf_size)); 237 if (ret == -1) { 238 rte_errno = errno; 239 goto error; 240 } 241 } 242 ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); 243 if (ret == -1) { 244 rte_errno = errno; 245 goto error; 246 } 247 return fd; 248 error: 249 close(fd); 250 return -rte_errno; 251 } 252 253 /** 254 * Send a request message to the kernel on the Netlink socket. 255 * 256 * @param[in] nlsk_fd 257 * Netlink socket file descriptor. 258 * @param[in] nh 259 * The Netlink message send to the kernel. 260 * @param[in] ssn 261 * Sequence number. 262 * @param[in] req 263 * Pointer to the request structure. 264 * @param[in] len 265 * Length of the request in bytes. 266 * 267 * @return 268 * The number of sent bytes on success, a negative errno value otherwise and 269 * rte_errno is set. 270 */ 271 static int 272 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, 273 int len) 274 { 275 struct sockaddr_nl sa = { 276 .nl_family = AF_NETLINK, 277 }; 278 struct iovec iov[2] = { 279 { .iov_base = nh, .iov_len = sizeof(*nh), }, 280 { .iov_base = req, .iov_len = len, }, 281 }; 282 struct msghdr msg = { 283 .msg_name = &sa, 284 .msg_namelen = sizeof(sa), 285 .msg_iov = iov, 286 .msg_iovlen = 2, 287 }; 288 int send_bytes; 289 290 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 291 nh->nlmsg_seq = sn; 292 send_bytes = sendmsg(nlsk_fd, &msg, 0); 293 if (send_bytes < 0) { 294 rte_errno = errno; 295 return -rte_errno; 296 } 297 return send_bytes; 298 } 299 300 /** 301 * Send a message to the kernel on the Netlink socket. 302 * 303 * @param[in] nlsk_fd 304 * The Netlink socket file descriptor used for communication. 305 * @param[in] nh 306 * The Netlink message send to the kernel. 307 * @param[in] sn 308 * Sequence number. 309 * 310 * @return 311 * The number of sent bytes on success, a negative errno value otherwise and 312 * rte_errno is set. 313 */ 314 static int 315 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) 316 { 317 struct sockaddr_nl sa = { 318 .nl_family = AF_NETLINK, 319 }; 320 struct iovec iov = { 321 .iov_base = nh, 322 .iov_len = nh->nlmsg_len, 323 }; 324 struct msghdr msg = { 325 .msg_name = &sa, 326 .msg_namelen = sizeof(sa), 327 .msg_iov = &iov, 328 .msg_iovlen = 1, 329 }; 330 int send_bytes; 331 332 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 333 nh->nlmsg_seq = sn; 334 send_bytes = sendmsg(nlsk_fd, &msg, 0); 335 if (send_bytes < 0) { 336 rte_errno = errno; 337 return -rte_errno; 338 } 339 return send_bytes; 340 } 341 342 /** 343 * Receive a message from the kernel on the Netlink socket, following 344 * mlx5_nl_send(). 345 * 346 * @param[in] nlsk_fd 347 * The Netlink socket file descriptor used for communication. 348 * @param[in] sn 349 * Sequence number. 350 * @param[in] cb 351 * The callback function to call for each Netlink message received. 352 * @param[in, out] arg 353 * Custom arguments for the callback. 354 * 355 * @return 356 * 0 on success, a negative errno value otherwise and rte_errno is set. 357 */ 358 static int 359 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), 360 void *arg) 361 { 362 struct sockaddr_nl sa; 363 struct iovec iov; 364 struct msghdr msg = { 365 .msg_name = &sa, 366 .msg_namelen = sizeof(sa), 367 .msg_iov = &iov, 368 /* One message at a time */ 369 .msg_iovlen = 1, 370 }; 371 void *buf = NULL; 372 int multipart = 0; 373 int ret = 0; 374 375 do { 376 struct nlmsghdr *nh; 377 int recv_bytes; 378 379 do { 380 /* Query length of incoming message. */ 381 iov.iov_base = NULL; 382 iov.iov_len = 0; 383 recv_bytes = recvmsg(nlsk_fd, &msg, 384 MSG_PEEK | MSG_TRUNC); 385 if (recv_bytes < 0) { 386 rte_errno = errno; 387 ret = -rte_errno; 388 goto exit; 389 } 390 if (recv_bytes == 0) { 391 rte_errno = ENODATA; 392 ret = -rte_errno; 393 goto exit; 394 } 395 /* Allocate buffer to fetch the message. */ 396 if (recv_bytes < MLX5_RECV_BUF_SIZE) 397 recv_bytes = MLX5_RECV_BUF_SIZE; 398 mlx5_free(buf); 399 buf = mlx5_malloc(0, recv_bytes, 0, SOCKET_ID_ANY); 400 if (!buf) { 401 rte_errno = ENOMEM; 402 ret = -rte_errno; 403 goto exit; 404 } 405 /* Fetch the message. */ 406 iov.iov_base = buf; 407 iov.iov_len = recv_bytes; 408 recv_bytes = recvmsg(nlsk_fd, &msg, 0); 409 if (recv_bytes == -1) { 410 rte_errno = errno; 411 ret = -rte_errno; 412 goto exit; 413 } 414 nh = (struct nlmsghdr *)buf; 415 } while (nh->nlmsg_seq != sn); 416 for (; 417 NLMSG_OK(nh, (unsigned int)recv_bytes); 418 nh = NLMSG_NEXT(nh, recv_bytes)) { 419 if (nh->nlmsg_type == NLMSG_ERROR) { 420 struct nlmsgerr *err_data = NLMSG_DATA(nh); 421 422 if (err_data->error < 0) { 423 rte_errno = -err_data->error; 424 ret = -rte_errno; 425 goto exit; 426 } 427 /* Ack message. */ 428 ret = 0; 429 goto exit; 430 } 431 /* Multi-part msgs and their trailing DONE message. */ 432 if (nh->nlmsg_flags & NLM_F_MULTI) { 433 if (nh->nlmsg_type == NLMSG_DONE) { 434 ret = 0; 435 goto exit; 436 } 437 multipart = 1; 438 } 439 if (cb) { 440 ret = cb(nh, arg); 441 if (ret < 0) 442 goto exit; 443 } 444 } 445 } while (multipart); 446 exit: 447 mlx5_free(buf); 448 return ret; 449 } 450 451 /** 452 * Parse Netlink message to retrieve the bridge MAC address. 453 * 454 * @param nh 455 * Pointer to Netlink Message Header. 456 * @param arg 457 * PMD data register with this callback. 458 * 459 * @return 460 * 0 on success, a negative errno value otherwise and rte_errno is set. 461 */ 462 static int 463 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) 464 { 465 struct mlx5_nl_mac_addr *data = arg; 466 struct ndmsg *r = NLMSG_DATA(nh); 467 struct rtattr *attribute; 468 int len; 469 470 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); 471 for (attribute = MLX5_NDA_RTA(r); 472 RTA_OK(attribute, len); 473 attribute = RTA_NEXT(attribute, len)) { 474 if (attribute->rta_type == NDA_LLADDR) { 475 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { 476 DRV_LOG(WARNING, 477 "not enough room to finalize the" 478 " request"); 479 rte_errno = ENOMEM; 480 return -rte_errno; 481 } 482 #ifdef RTE_LIBRTE_MLX5_DEBUG 483 char m[RTE_ETHER_ADDR_FMT_SIZE]; 484 485 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, 486 RTA_DATA(attribute)); 487 DRV_LOG(DEBUG, "bridge MAC address %s", m); 488 #endif 489 memcpy(&(*data->mac)[data->mac_n++], 490 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); 491 } 492 } 493 return 0; 494 } 495 496 /** 497 * Get bridge MAC addresses. 498 * 499 * @param[in] nlsk_fd 500 * Netlink socket file descriptor. 501 * @param[in] iface_idx 502 * Net device interface index. 503 * @param mac[out] 504 * Pointer to the array table of MAC addresses to fill. 505 * Its size should be of MLX5_MAX_MAC_ADDRESSES. 506 * @param mac_n[out] 507 * Number of entries filled in MAC array. 508 * 509 * @return 510 * 0 on success, a negative errno value otherwise and rte_errno is set. 511 */ 512 static int 513 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, 514 struct rte_ether_addr (*mac)[], int *mac_n) 515 { 516 struct { 517 struct nlmsghdr hdr; 518 struct ifinfomsg ifm; 519 } req = { 520 .hdr = { 521 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 522 .nlmsg_type = RTM_GETNEIGH, 523 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 524 }, 525 .ifm = { 526 .ifi_family = PF_BRIDGE, 527 .ifi_index = iface_idx, 528 }, 529 }; 530 struct mlx5_nl_mac_addr data = { 531 .mac = mac, 532 .mac_n = 0, 533 }; 534 uint32_t sn = MLX5_NL_SN_GENERATE; 535 int ret; 536 537 if (nlsk_fd == -1) 538 return 0; 539 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, 540 sizeof(struct ifinfomsg)); 541 if (ret < 0) 542 goto error; 543 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); 544 if (ret < 0) 545 goto error; 546 *mac_n = data.mac_n; 547 return 0; 548 error: 549 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", 550 iface_idx, strerror(rte_errno)); 551 return -rte_errno; 552 } 553 554 /** 555 * Modify the MAC address neighbour table with Netlink. 556 * 557 * @param[in] nlsk_fd 558 * Netlink socket file descriptor. 559 * @param[in] iface_idx 560 * Net device interface index. 561 * @param mac 562 * MAC address to consider. 563 * @param add 564 * 1 to add the MAC address, 0 to remove the MAC address. 565 * 566 * @return 567 * 0 on success, a negative errno value otherwise and rte_errno is set. 568 */ 569 static int 570 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 571 struct rte_ether_addr *mac, int add) 572 { 573 struct { 574 struct nlmsghdr hdr; 575 struct ndmsg ndm; 576 struct rtattr rta; 577 uint8_t buffer[RTE_ETHER_ADDR_LEN]; 578 } req = { 579 .hdr = { 580 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), 581 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 582 NLM_F_EXCL | NLM_F_ACK, 583 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, 584 }, 585 .ndm = { 586 .ndm_family = PF_BRIDGE, 587 .ndm_state = NUD_NOARP | NUD_PERMANENT, 588 .ndm_ifindex = iface_idx, 589 .ndm_flags = NTF_SELF, 590 }, 591 .rta = { 592 .rta_type = NDA_LLADDR, 593 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), 594 }, 595 }; 596 uint32_t sn = MLX5_NL_SN_GENERATE; 597 int ret; 598 599 if (nlsk_fd == -1) 600 return 0; 601 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); 602 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 603 RTA_ALIGN(req.rta.rta_len); 604 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 605 if (ret < 0) 606 goto error; 607 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 608 if (ret < 0) 609 goto error; 610 return 0; 611 error: 612 #ifdef RTE_LIBRTE_MLX5_DEBUG 613 { 614 char m[RTE_ETHER_ADDR_FMT_SIZE]; 615 616 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac); 617 DRV_LOG(DEBUG, 618 "Interface %u cannot %s MAC address %s %s", 619 iface_idx, 620 add ? "add" : "remove", m, strerror(rte_errno)); 621 } 622 #endif 623 return -rte_errno; 624 } 625 626 /** 627 * Modify the VF MAC address neighbour table with Netlink. 628 * 629 * @param[in] nlsk_fd 630 * Netlink socket file descriptor. 631 * @param[in] iface_idx 632 * Net device interface index. 633 * @param mac 634 * MAC address to consider. 635 * @param vf_index 636 * VF index. 637 * 638 * @return 639 * 0 on success, a negative errno value otherwise and rte_errno is set. 640 */ 641 int 642 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 643 struct rte_ether_addr *mac, int vf_index) 644 { 645 int ret; 646 struct { 647 struct nlmsghdr hdr; 648 struct ifinfomsg ifm; 649 struct rtattr vf_list_rta; 650 struct rtattr vf_info_rta; 651 struct rtattr vf_mac_rta; 652 struct ifla_vf_mac ivm; 653 } req = { 654 .hdr = { 655 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 656 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 657 .nlmsg_type = RTM_BASE, 658 }, 659 .ifm = { 660 .ifi_index = iface_idx, 661 }, 662 .vf_list_rta = { 663 .rta_type = IFLA_VFINFO_LIST, 664 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 665 }, 666 .vf_info_rta = { 667 .rta_type = IFLA_VF_INFO, 668 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 669 }, 670 .vf_mac_rta = { 671 .rta_type = IFLA_VF_MAC, 672 }, 673 }; 674 struct ifla_vf_mac ivm = { 675 .vf = vf_index, 676 }; 677 uint32_t sn = MLX5_NL_SN_GENERATE; 678 679 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); 680 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); 681 682 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); 683 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 684 RTA_ALIGN(req.vf_list_rta.rta_len) + 685 RTA_ALIGN(req.vf_info_rta.rta_len) + 686 RTA_ALIGN(req.vf_mac_rta.rta_len); 687 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 688 &req.vf_list_rta); 689 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 690 &req.vf_info_rta); 691 692 if (nlsk_fd < 0) 693 return -1; 694 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 695 if (ret < 0) 696 goto error; 697 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 698 if (ret < 0) 699 goto error; 700 return 0; 701 error: 702 DRV_LOG(ERR, 703 "representor %u cannot set VF MAC address " 704 RTE_ETHER_ADDR_PRT_FMT " : %s", 705 vf_index, 706 RTE_ETHER_ADDR_BYTES(mac), 707 strerror(rte_errno)); 708 return -rte_errno; 709 } 710 711 /** 712 * Add a MAC address. 713 * 714 * @param[in] nlsk_fd 715 * Netlink socket file descriptor. 716 * @param[in] iface_idx 717 * Net device interface index. 718 * @param mac_own 719 * BITFIELD_DECLARE array to store the mac. 720 * @param mac 721 * MAC address to register. 722 * @param index 723 * MAC address index. 724 * 725 * @return 726 * 0 on success, a negative errno value otherwise and rte_errno is set. 727 */ 728 int 729 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, 730 uint64_t *mac_own, struct rte_ether_addr *mac, 731 uint32_t index) 732 { 733 int ret; 734 735 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); 736 if (!ret) { 737 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 738 if (index >= MLX5_MAX_MAC_ADDRESSES) 739 return -EINVAL; 740 741 BITFIELD_SET(mac_own, index); 742 } 743 if (ret == -EEXIST) 744 return 0; 745 return ret; 746 } 747 748 /** 749 * Remove a MAC address. 750 * 751 * @param[in] nlsk_fd 752 * Netlink socket file descriptor. 753 * @param[in] iface_idx 754 * Net device interface index. 755 * @param mac_own 756 * BITFIELD_DECLARE array to store the mac. 757 * @param mac 758 * MAC address to remove. 759 * @param index 760 * MAC address index. 761 * 762 * @return 763 * 0 on success, a negative errno value otherwise and rte_errno is set. 764 */ 765 int 766 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, 767 struct rte_ether_addr *mac, uint32_t index) 768 { 769 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 770 if (index >= MLX5_MAX_MAC_ADDRESSES) 771 return -EINVAL; 772 773 BITFIELD_RESET(mac_own, index); 774 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); 775 } 776 777 /** 778 * Synchronize Netlink bridge table to the internal table. 779 * 780 * @param[in] nlsk_fd 781 * Netlink socket file descriptor. 782 * @param[in] iface_idx 783 * Net device interface index. 784 * @param mac_addrs 785 * Mac addresses array to sync. 786 * @param n 787 * @p mac_addrs array size. 788 */ 789 void 790 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, 791 struct rte_ether_addr *mac_addrs, int n) 792 { 793 struct rte_ether_addr macs[n]; 794 int macs_n = 0; 795 int i; 796 int ret; 797 798 memset(macs, 0, n * sizeof(macs[0])); 799 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); 800 if (ret) 801 return; 802 for (i = 0; i != macs_n; ++i) { 803 int j; 804 805 /* Verify the address is not in the array yet. */ 806 for (j = 0; j != n; ++j) 807 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) 808 break; 809 if (j != n) 810 continue; 811 if (rte_is_multicast_ether_addr(&macs[i])) { 812 /* Find the first entry available. */ 813 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) { 814 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 815 mac_addrs[j] = macs[i]; 816 break; 817 } 818 } 819 } else { 820 /* Find the first entry available. */ 821 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) { 822 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 823 mac_addrs[j] = macs[i]; 824 break; 825 } 826 } 827 } 828 } 829 } 830 831 /** 832 * Flush all added MAC addresses. 833 * 834 * @param[in] nlsk_fd 835 * Netlink socket file descriptor. 836 * @param[in] iface_idx 837 * Net device interface index. 838 * @param[in] mac_addrs 839 * Mac addresses array to flush. 840 * @param n 841 * @p mac_addrs array size. 842 * @param mac_own 843 * BITFIELD_DECLARE array to store the mac. 844 */ 845 void 846 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, 847 struct rte_ether_addr *mac_addrs, int n, 848 uint64_t *mac_own) 849 { 850 int i; 851 852 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES) 853 return; 854 855 for (i = n - 1; i >= 0; --i) { 856 struct rte_ether_addr *m = &mac_addrs[i]; 857 858 if (BITFIELD_ISSET(mac_own, i)) 859 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, 860 i); 861 } 862 } 863 864 /** 865 * Enable promiscuous / all multicast mode through Netlink. 866 * 867 * @param[in] nlsk_fd 868 * Netlink socket file descriptor. 869 * @param[in] iface_idx 870 * Net device interface index. 871 * @param flags 872 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. 873 * @param enable 874 * Nonzero to enable, disable otherwise. 875 * 876 * @return 877 * 0 on success, a negative errno value otherwise and rte_errno is set. 878 */ 879 static int 880 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, 881 int enable) 882 { 883 struct { 884 struct nlmsghdr hdr; 885 struct ifinfomsg ifi; 886 } req = { 887 .hdr = { 888 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 889 .nlmsg_type = RTM_NEWLINK, 890 .nlmsg_flags = NLM_F_REQUEST, 891 }, 892 .ifi = { 893 .ifi_flags = enable ? flags : 0, 894 .ifi_change = flags, 895 .ifi_index = iface_idx, 896 }, 897 }; 898 uint32_t sn = MLX5_NL_SN_GENERATE; 899 int ret; 900 901 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); 902 if (nlsk_fd < 0) 903 return 0; 904 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 905 if (ret < 0) 906 return ret; 907 return 0; 908 } 909 910 /** 911 * Enable promiscuous mode through Netlink. 912 * 913 * @param[in] nlsk_fd 914 * Netlink socket file descriptor. 915 * @param[in] iface_idx 916 * Net device interface index. 917 * @param enable 918 * Nonzero to enable, disable otherwise. 919 * 920 * @return 921 * 0 on success, a negative errno value otherwise and rte_errno is set. 922 */ 923 int 924 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) 925 { 926 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); 927 928 if (ret) 929 DRV_LOG(DEBUG, 930 "Interface %u cannot %s promisc mode: Netlink error %s", 931 iface_idx, enable ? "enable" : "disable", 932 strerror(rte_errno)); 933 return ret; 934 } 935 936 /** 937 * Enable all multicast mode through Netlink. 938 * 939 * @param[in] nlsk_fd 940 * Netlink socket file descriptor. 941 * @param[in] iface_idx 942 * Net device interface index. 943 * @param enable 944 * Nonzero to enable, disable otherwise. 945 * 946 * @return 947 * 0 on success, a negative errno value otherwise and rte_errno is set. 948 */ 949 int 950 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) 951 { 952 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, 953 enable); 954 955 if (ret) 956 DRV_LOG(DEBUG, 957 "Interface %u cannot %s allmulti : Netlink error %s", 958 iface_idx, enable ? "enable" : "disable", 959 strerror(rte_errno)); 960 return ret; 961 } 962 963 /** 964 * Process network interface information from Netlink message. 965 * 966 * @param nh 967 * Pointer to Netlink message header. 968 * @param arg 969 * Opaque data pointer for this callback. 970 * 971 * @return 972 * 0 on success, a negative errno value otherwise and rte_errno is set. 973 */ 974 static int 975 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) 976 { 977 struct mlx5_nl_port_info *data = arg; 978 struct mlx5_nl_port_info local = { 979 .flags = 0, 980 }; 981 size_t off = NLMSG_HDRLEN; 982 983 if (nh->nlmsg_type != 984 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && 985 nh->nlmsg_type != 986 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) 987 goto error; 988 while (off < nh->nlmsg_len) { 989 struct nlattr *na = (void *)((uintptr_t)nh + off); 990 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); 991 992 if (na->nla_len > nh->nlmsg_len - off) 993 goto error; 994 switch (na->nla_type) { 995 case RDMA_NLDEV_ATTR_DEV_INDEX: 996 local.ibindex = *(uint32_t *)payload; 997 local.flags |= MLX5_NL_CMD_GET_IB_INDEX; 998 break; 999 case RDMA_NLDEV_ATTR_DEV_NAME: 1000 if (!strcmp(payload, data->name)) 1001 local.flags |= MLX5_NL_CMD_GET_IB_NAME; 1002 break; 1003 case RDMA_NLDEV_ATTR_NDEV_INDEX: 1004 local.ifindex = *(uint32_t *)payload; 1005 local.flags |= MLX5_NL_CMD_GET_NET_INDEX; 1006 break; 1007 case RDMA_NLDEV_ATTR_PORT_INDEX: 1008 local.portnum = *(uint32_t *)payload; 1009 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; 1010 break; 1011 case RDMA_NLDEV_ATTR_PORT_STATE: 1012 local.state = *(uint8_t *)payload; 1013 local.flags |= MLX5_NL_CMD_GET_PORT_STATE; 1014 break; 1015 default: 1016 break; 1017 } 1018 off += NLA_ALIGN(na->nla_len); 1019 } 1020 /* 1021 * It is possible to have multiple messages for all 1022 * Infiniband devices in the system with appropriate name. 1023 * So we should gather parameters locally and copy to 1024 * query context only in case of coinciding device name. 1025 */ 1026 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { 1027 data->flags = local.flags; 1028 data->ibindex = local.ibindex; 1029 data->ifindex = local.ifindex; 1030 data->portnum = local.portnum; 1031 data->state = local.state; 1032 } 1033 return 0; 1034 error: 1035 rte_errno = EINVAL; 1036 return -rte_errno; 1037 } 1038 1039 /** 1040 * Get port info of network interface associated with some IB device. 1041 * 1042 * This is the only somewhat safe method to avoid resorting to heuristics 1043 * when faced with port representors. Unfortunately it requires at least 1044 * Linux 4.17. 1045 * 1046 * @param nl 1047 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1048 * @param[in] pindex 1049 * IB device port index, starting from 1 1050 * @param[out] data 1051 * Pointer to port info. 1052 * @return 1053 * 0 on success, negative on error and rte_errno is set. 1054 */ 1055 static int 1056 mlx5_nl_port_info(int nl, uint32_t pindex, struct mlx5_nl_port_info *data) 1057 { 1058 union { 1059 struct nlmsghdr nh; 1060 uint8_t buf[NLMSG_HDRLEN + 1061 NLA_HDRLEN + NLA_ALIGN(sizeof(data->ibindex)) + 1062 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; 1063 } req = { 1064 .nh = { 1065 .nlmsg_len = NLMSG_LENGTH(0), 1066 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1067 RDMA_NLDEV_CMD_GET), 1068 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1069 }, 1070 }; 1071 struct nlattr *na; 1072 uint32_t sn = MLX5_NL_SN_GENERATE; 1073 int ret; 1074 1075 ret = mlx5_nl_send(nl, &req.nh, sn); 1076 if (ret < 0) 1077 return ret; 1078 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data); 1079 if (ret < 0) 1080 return ret; 1081 if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) || 1082 !(data->flags & MLX5_NL_CMD_GET_IB_INDEX)) 1083 goto error; 1084 data->flags = 0; 1085 sn = MLX5_NL_SN_GENERATE; 1086 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1087 RDMA_NLDEV_CMD_PORT_GET); 1088 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1089 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); 1090 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); 1091 na->nla_len = NLA_HDRLEN + sizeof(data->ibindex); 1092 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; 1093 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1094 &data->ibindex, sizeof(data->ibindex)); 1095 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); 1096 na->nla_len = NLA_HDRLEN + sizeof(pindex); 1097 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; 1098 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1099 &pindex, sizeof(pindex)); 1100 ret = mlx5_nl_send(nl, &req.nh, sn); 1101 if (ret < 0) 1102 return ret; 1103 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data); 1104 if (ret < 0) 1105 return ret; 1106 if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) || 1107 !(data->flags & MLX5_NL_CMD_GET_IB_INDEX) || 1108 !(data->flags & MLX5_NL_CMD_GET_NET_INDEX) || 1109 !data->ifindex) 1110 goto error; 1111 return 1; 1112 error: 1113 rte_errno = ENODEV; 1114 return -rte_errno; 1115 } 1116 1117 /** 1118 * Get index of network interface associated with some IB device. 1119 * 1120 * This is the only somewhat safe method to avoid resorting to heuristics 1121 * when faced with port representors. Unfortunately it requires at least 1122 * Linux 4.17. 1123 * 1124 * @param nl 1125 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1126 * @param[in] name 1127 * IB device name. 1128 * @param[in] pindex 1129 * IB device port index, starting from 1 1130 * @return 1131 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno 1132 * is set. 1133 */ 1134 unsigned int 1135 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) 1136 { 1137 struct mlx5_nl_port_info data = { 1138 .ifindex = 0, 1139 .name = name, 1140 }; 1141 1142 if (mlx5_nl_port_info(nl, pindex, &data) < 0) 1143 return 0; 1144 return data.ifindex; 1145 } 1146 1147 /** 1148 * Get IB device port state. 1149 * 1150 * This is the only somewhat safe method to get info for port number >= 255. 1151 * Unfortunately it requires at least Linux 4.17. 1152 * 1153 * @param nl 1154 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1155 * @param[in] name 1156 * IB device name. 1157 * @param[in] pindex 1158 * IB device port index, starting from 1 1159 * @return 1160 * Port state (ibv_port_state) on success, negative on error 1161 * and rte_errno is set. 1162 */ 1163 int 1164 mlx5_nl_port_state(int nl, const char *name, uint32_t pindex) 1165 { 1166 struct mlx5_nl_port_info data = { 1167 .state = 0, 1168 .name = name, 1169 }; 1170 1171 if (mlx5_nl_port_info(nl, pindex, &data) < 0) 1172 return -rte_errno; 1173 if ((data.flags & MLX5_NL_CMD_GET_PORT_STATE) == 0) { 1174 rte_errno = ENOTSUP; 1175 return -rte_errno; 1176 } 1177 return (int)data.state; 1178 } 1179 1180 /** 1181 * Get the number of physical ports of given IB device. 1182 * 1183 * @param nl 1184 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1185 * @param[in] name 1186 * IB device name. 1187 * 1188 * @return 1189 * A valid (nonzero) number of ports on success, 0 otherwise 1190 * and rte_errno is set. 1191 */ 1192 unsigned int 1193 mlx5_nl_portnum(int nl, const char *name) 1194 { 1195 struct mlx5_nl_port_info data = { 1196 .flags = 0, 1197 .name = name, 1198 .ifindex = 0, 1199 .portnum = 0, 1200 }; 1201 struct nlmsghdr req = { 1202 .nlmsg_len = NLMSG_LENGTH(0), 1203 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1204 RDMA_NLDEV_CMD_GET), 1205 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1206 }; 1207 uint32_t sn = MLX5_NL_SN_GENERATE; 1208 int ret; 1209 1210 ret = mlx5_nl_send(nl, &req, sn); 1211 if (ret < 0) 1212 return 0; 1213 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1214 if (ret < 0) 1215 return 0; 1216 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1217 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1218 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { 1219 rte_errno = ENODEV; 1220 return 0; 1221 } 1222 if (!data.portnum) 1223 rte_errno = EINVAL; 1224 return data.portnum; 1225 } 1226 1227 /** 1228 * Analyze gathered port parameters via Netlink to recognize master 1229 * and representor devices for E-Switch configuration. 1230 * 1231 * @param[in] num_vf_set 1232 * flag of presence of number of VFs port attribute. 1233 * @param[inout] switch_info 1234 * Port information, including port name as a number and port name 1235 * type if recognized 1236 * 1237 * @return 1238 * master and representor flags are set in switch_info according to 1239 * recognized parameters (if any). 1240 */ 1241 static void 1242 mlx5_nl_check_switch_info(bool num_vf_set, 1243 struct mlx5_switch_info *switch_info) 1244 { 1245 switch (switch_info->name_type) { 1246 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1247 /* 1248 * Name is not recognized, assume the master, 1249 * check the number of VFs key presence. 1250 */ 1251 switch_info->master = num_vf_set; 1252 break; 1253 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1254 /* 1255 * Name is not set, this assumes the legacy naming 1256 * schema for master, just check if there is a 1257 * number of VFs key. 1258 */ 1259 switch_info->master = num_vf_set; 1260 break; 1261 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1262 /* New uplink naming schema recognized. */ 1263 switch_info->master = 1; 1264 break; 1265 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1266 /* Legacy representors naming schema. */ 1267 switch_info->representor = !num_vf_set; 1268 break; 1269 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1270 /* Fallthrough */ 1271 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1272 /* Fallthrough */ 1273 case MLX5_PHYS_PORT_NAME_TYPE_PFSF: 1274 /* New representors naming schema. */ 1275 switch_info->representor = 1; 1276 break; 1277 } 1278 } 1279 1280 /** 1281 * Process switch information from Netlink message. 1282 * 1283 * @param nh 1284 * Pointer to Netlink message header. 1285 * @param arg 1286 * Opaque data pointer for this callback. 1287 * 1288 * @return 1289 * 0 on success, a negative errno value otherwise and rte_errno is set. 1290 */ 1291 static int 1292 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) 1293 { 1294 struct mlx5_switch_info info = { 1295 .master = 0, 1296 .representor = 0, 1297 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1298 .port_name = 0, 1299 .switch_id = 0, 1300 }; 1301 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 1302 bool switch_id_set = false; 1303 bool num_vf_set = false; 1304 int len; 1305 1306 if (nh->nlmsg_type != RTM_NEWLINK) 1307 goto error; 1308 while (off < nh->nlmsg_len) { 1309 struct rtattr *ra = (void *)((uintptr_t)nh + off); 1310 void *payload = RTA_DATA(ra); 1311 unsigned int i; 1312 1313 if (ra->rta_len > nh->nlmsg_len - off) 1314 goto error; 1315 switch (ra->rta_type) { 1316 case IFLA_NUM_VF: 1317 num_vf_set = true; 1318 break; 1319 case IFLA_PHYS_PORT_NAME: 1320 len = RTA_PAYLOAD(ra); 1321 /* Some kernels do not pad attributes with zero. */ 1322 if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) { 1323 char name[MLX5_PHYS_PORT_NAME_MAX]; 1324 1325 /* 1326 * We can't just patch the message with padding 1327 * zero - it might corrupt the following items 1328 * in the message, we have to copy the string 1329 * by attribute length and pad the copied one. 1330 */ 1331 memcpy(name, payload, len); 1332 name[len] = 0; 1333 mlx5_translate_port_name(name, &info); 1334 } else { 1335 info.name_type = 1336 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 1337 } 1338 break; 1339 case IFLA_PHYS_SWITCH_ID: 1340 info.switch_id = 0; 1341 for (i = 0; i < RTA_PAYLOAD(ra); ++i) { 1342 info.switch_id <<= 8; 1343 info.switch_id |= ((uint8_t *)payload)[i]; 1344 } 1345 switch_id_set = true; 1346 break; 1347 } 1348 off += RTA_ALIGN(ra->rta_len); 1349 } 1350 if (switch_id_set) { 1351 /* We have some E-Switch configuration. */ 1352 mlx5_nl_check_switch_info(num_vf_set, &info); 1353 } 1354 MLX5_ASSERT(!(info.master && info.representor)); 1355 memcpy(arg, &info, sizeof(info)); 1356 return 0; 1357 error: 1358 rte_errno = EINVAL; 1359 return -rte_errno; 1360 } 1361 1362 /** 1363 * Get switch information associated with network interface. 1364 * 1365 * @param nl 1366 * Netlink socket of the ROUTE kind (NETLINK_ROUTE). 1367 * @param ifindex 1368 * Network interface index. 1369 * @param[out] info 1370 * Switch information object, populated in case of success. 1371 * 1372 * @return 1373 * 0 on success, a negative errno value otherwise and rte_errno is set. 1374 */ 1375 int 1376 mlx5_nl_switch_info(int nl, unsigned int ifindex, 1377 struct mlx5_switch_info *info) 1378 { 1379 struct { 1380 struct nlmsghdr nh; 1381 struct ifinfomsg info; 1382 struct rtattr rta; 1383 uint32_t extmask; 1384 } req = { 1385 .nh = { 1386 .nlmsg_len = NLMSG_LENGTH 1387 (sizeof(req.info) + 1388 RTA_LENGTH(sizeof(uint32_t))), 1389 .nlmsg_type = RTM_GETLINK, 1390 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1391 }, 1392 .info = { 1393 .ifi_family = AF_UNSPEC, 1394 .ifi_index = ifindex, 1395 }, 1396 .rta = { 1397 .rta_type = IFLA_EXT_MASK, 1398 .rta_len = RTA_LENGTH(sizeof(int32_t)), 1399 }, 1400 .extmask = RTE_LE32(1), 1401 }; 1402 uint32_t sn = MLX5_NL_SN_GENERATE; 1403 int ret; 1404 1405 ret = mlx5_nl_send(nl, &req.nh, sn); 1406 if (ret >= 0) 1407 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); 1408 if (info->master && info->representor) { 1409 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1410 " and as representor", ifindex); 1411 rte_errno = ENODEV; 1412 ret = -rte_errno; 1413 } 1414 return ret; 1415 } 1416 1417 /* 1418 * Delete VLAN network device by ifindex. 1419 * 1420 * @param[in] tcf 1421 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1422 * @param[in] ifindex 1423 * Interface index of network device to delete. 1424 */ 1425 void 1426 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, 1427 uint32_t ifindex) 1428 { 1429 uint32_t sn = MLX5_NL_SN_GENERATE; 1430 int ret; 1431 struct { 1432 struct nlmsghdr nh; 1433 struct ifinfomsg info; 1434 } req = { 1435 .nh = { 1436 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 1437 .nlmsg_type = RTM_DELLINK, 1438 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1439 }, 1440 .info = { 1441 .ifi_family = AF_UNSPEC, 1442 .ifi_index = ifindex, 1443 }, 1444 }; 1445 1446 if (ifindex) { 1447 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); 1448 if (ret >= 0) 1449 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1450 if (ret < 0) 1451 DRV_LOG(WARNING, "netlink: error deleting VLAN WA" 1452 " ifindex %u, %d", ifindex, ret); 1453 } 1454 } 1455 1456 /* Set of subroutines to build Netlink message. */ 1457 static struct nlattr * 1458 nl_msg_tail(struct nlmsghdr *nlh) 1459 { 1460 return (struct nlattr *) 1461 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); 1462 } 1463 1464 static void 1465 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) 1466 { 1467 struct nlattr *nla = nl_msg_tail(nlh); 1468 1469 nla->nla_type = type; 1470 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen; 1471 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len); 1472 1473 if (alen) 1474 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); 1475 } 1476 1477 static struct nlattr * 1478 nl_attr_nest_start(struct nlmsghdr *nlh, int type) 1479 { 1480 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); 1481 1482 nl_attr_put(nlh, type, NULL, 0); 1483 return nest; 1484 } 1485 1486 static void 1487 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) 1488 { 1489 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; 1490 } 1491 1492 /* 1493 * Create network VLAN device with specified VLAN tag. 1494 * 1495 * @param[in] tcf 1496 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1497 * @param[in] ifindex 1498 * Base network interface index. 1499 * @param[in] tag 1500 * VLAN tag for VLAN network device to create. 1501 */ 1502 uint32_t 1503 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, 1504 uint32_t ifindex, uint16_t tag) 1505 { 1506 struct nlmsghdr *nlh; 1507 struct ifinfomsg *ifm; 1508 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; 1509 1510 __rte_cache_aligned 1511 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1512 NLMSG_ALIGN(sizeof(struct ifinfomsg)) + 1513 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + 1514 NLMSG_ALIGN(sizeof(uint32_t)) + 1515 NLMSG_ALIGN(sizeof(name)) + 1516 NLMSG_ALIGN(sizeof("vlan")) + 1517 NLMSG_ALIGN(sizeof(uint32_t)) + 1518 NLMSG_ALIGN(sizeof(uint16_t)) + 16]; 1519 struct nlattr *na_info; 1520 struct nlattr *na_vlan; 1521 uint32_t sn = MLX5_NL_SN_GENERATE; 1522 int ret; 1523 1524 memset(buf, 0, sizeof(buf)); 1525 nlh = (struct nlmsghdr *)buf; 1526 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1527 nlh->nlmsg_type = RTM_NEWLINK; 1528 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 1529 NLM_F_EXCL | NLM_F_ACK; 1530 ifm = (struct ifinfomsg *)nl_msg_tail(nlh); 1531 nlh->nlmsg_len += sizeof(struct ifinfomsg); 1532 ifm->ifi_family = AF_UNSPEC; 1533 ifm->ifi_type = 0; 1534 ifm->ifi_index = 0; 1535 ifm->ifi_flags = IFF_UP; 1536 ifm->ifi_change = 0xffffffff; 1537 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); 1538 ret = snprintf(name, sizeof(name), "%s.%u.%u", 1539 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); 1540 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); 1541 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); 1542 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); 1543 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); 1544 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); 1545 nl_attr_nest_end(nlh, na_vlan); 1546 nl_attr_nest_end(nlh, na_info); 1547 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len); 1548 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); 1549 if (ret >= 0) 1550 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1551 if (ret < 0) { 1552 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, 1553 ret); 1554 } 1555 /* Try to get ifindex of created or pre-existing device. */ 1556 ret = if_nametoindex(name); 1557 if (!ret) { 1558 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, 1559 errno); 1560 return 0; 1561 } 1562 return ret; 1563 } 1564 1565 /** 1566 * Parse Netlink message to retrieve the general family ID. 1567 * 1568 * @param nh 1569 * Pointer to Netlink Message Header. 1570 * @param arg 1571 * PMD data register with this callback. 1572 * 1573 * @return 1574 * 0 on success, a negative errno value otherwise and rte_errno is set. 1575 */ 1576 static int 1577 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg) 1578 { 1579 1580 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1581 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1582 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1583 1584 for (; nla->nla_len && nla < tail; 1585 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) { 1586 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) { 1587 *(uint16_t *)arg = *(uint16_t *)(nla + 1); 1588 return 0; 1589 } 1590 } 1591 return -EINVAL; 1592 } 1593 1594 #define MLX5_NL_MAX_ATTR_SIZE 100 1595 /** 1596 * Get generic netlink family ID. 1597 * 1598 * @param[in] nlsk_fd 1599 * Netlink socket file descriptor. 1600 * @param[in] name 1601 * The family name. 1602 * 1603 * @return 1604 * ID >= 0 on success and @p enable is updated, a negative errno value 1605 * otherwise and rte_errno is set. 1606 */ 1607 static int 1608 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name) 1609 { 1610 struct nlmsghdr *nlh; 1611 struct genlmsghdr *genl; 1612 uint32_t sn = MLX5_NL_SN_GENERATE; 1613 int name_size = strlen(name) + 1; 1614 int ret; 1615 uint16_t id = -1; 1616 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1617 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1618 NLMSG_ALIGN(sizeof(struct nlattr)) + 1619 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)]; 1620 1621 memset(buf, 0, sizeof(buf)); 1622 nlh = (struct nlmsghdr *)buf; 1623 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1624 nlh->nlmsg_type = GENL_ID_CTRL; 1625 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1626 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1627 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1628 genl->cmd = CTRL_CMD_GETFAMILY; 1629 genl->version = 1; 1630 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size); 1631 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1632 if (ret >= 0) 1633 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id); 1634 if (ret < 0) { 1635 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name, 1636 ret); 1637 return ret; 1638 } 1639 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id); 1640 return (int)id; 1641 } 1642 1643 /** 1644 * Get Devlink family ID. 1645 * 1646 * @param[in] nlsk_fd 1647 * Netlink socket file descriptor. 1648 * 1649 * @return 1650 * ID >= 0 on success and @p enable is updated, a negative errno value 1651 * otherwise and rte_errno is set. 1652 */ 1653 1654 int 1655 mlx5_nl_devlink_family_id_get(int nlsk_fd) 1656 { 1657 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME); 1658 } 1659 1660 /** 1661 * Parse Netlink message to retrieve the ROCE enable status. 1662 * 1663 * @param nh 1664 * Pointer to Netlink Message Header. 1665 * @param arg 1666 * PMD data register with this callback. 1667 * 1668 * @return 1669 * 0 on success, a negative errno value otherwise and rte_errno is set. 1670 */ 1671 static int 1672 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) 1673 { 1674 1675 int ret = -EINVAL; 1676 int *enable = arg; 1677 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1678 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1679 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1680 1681 while (nla->nla_len && nla < tail) { 1682 switch (nla->nla_type) { 1683 /* Expected nested attributes case. */ 1684 case DEVLINK_ATTR_PARAM: 1685 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1686 case DEVLINK_ATTR_PARAM_VALUE: 1687 ret = 0; 1688 nla += 1; 1689 break; 1690 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1691 *enable = 1; 1692 return 0; 1693 default: 1694 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1695 } 1696 } 1697 *enable = 0; 1698 return ret; 1699 } 1700 1701 /** 1702 * Get ROCE enable status through Netlink. 1703 * 1704 * @param[in] nlsk_fd 1705 * Netlink socket file descriptor. 1706 * @param[in] family_id 1707 * the Devlink family ID. 1708 * @param pci_addr 1709 * The device PCI address. 1710 * @param[out] enable 1711 * Where to store the enable status. 1712 * 1713 * @return 1714 * 0 on success and @p enable is updated, a negative errno value otherwise 1715 * and rte_errno is set. 1716 */ 1717 int 1718 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, 1719 int *enable) 1720 { 1721 struct nlmsghdr *nlh; 1722 struct genlmsghdr *genl; 1723 uint32_t sn = MLX5_NL_SN_GENERATE; 1724 int ret; 1725 int cur_en = 0; 1726 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1727 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1728 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 1729 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 1730 1731 memset(buf, 0, sizeof(buf)); 1732 nlh = (struct nlmsghdr *)buf; 1733 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1734 nlh->nlmsg_type = family_id; 1735 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1736 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1737 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1738 genl->cmd = DEVLINK_CMD_PARAM_GET; 1739 genl->version = DEVLINK_GENL_VERSION; 1740 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1741 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1742 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1743 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1744 if (ret >= 0) 1745 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); 1746 if (ret < 0) { 1747 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", 1748 pci_addr, ret); 1749 return ret; 1750 } 1751 *enable = cur_en; 1752 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", 1753 cur_en ? "en" : "dis", pci_addr); 1754 return ret; 1755 } 1756 1757 /** 1758 * Reload mlx5 device kernel driver through Netlink. 1759 * 1760 * @param[in] nlsk_fd 1761 * Netlink socket file descriptor. 1762 * @param[in] family_id 1763 * the Devlink family ID. 1764 * @param pci_addr 1765 * The device PCI address. 1766 * @param[out] enable 1767 * The enable status to set. 1768 * 1769 * @return 1770 * 0 on success, a negative errno value otherwise and rte_errno is set. 1771 */ 1772 static int 1773 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) 1774 { 1775 struct nlmsghdr *nlh; 1776 struct genlmsghdr *genl; 1777 uint32_t sn = MLX5_NL_SN_GENERATE; 1778 int ret; 1779 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1780 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1781 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 + 1782 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2]; 1783 1784 memset(buf, 0, sizeof(buf)); 1785 nlh = (struct nlmsghdr *)buf; 1786 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1787 nlh->nlmsg_type = family_id; 1788 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1789 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1790 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1791 genl->cmd = DEVLINK_CMD_RELOAD; 1792 genl->version = DEVLINK_GENL_VERSION; 1793 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1794 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1795 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1796 if (ret >= 0) 1797 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1798 if (ret < 0) { 1799 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", 1800 pci_addr, ret); 1801 return ret; 1802 } 1803 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", 1804 pci_addr); 1805 return 0; 1806 } 1807 1808 /** 1809 * Set ROCE enable status through Netlink. 1810 * 1811 * @param[in] nlsk_fd 1812 * Netlink socket file descriptor. 1813 * @param[in] family_id 1814 * the Devlink family ID. 1815 * @param pci_addr 1816 * The device PCI address. 1817 * @param[out] enable 1818 * The enable status to set. 1819 * 1820 * @return 1821 * 0 on success, a negative errno value otherwise and rte_errno is set. 1822 */ 1823 int 1824 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, 1825 int enable) 1826 { 1827 struct nlmsghdr *nlh; 1828 struct genlmsghdr *genl; 1829 uint32_t sn = MLX5_NL_SN_GENERATE; 1830 int ret; 1831 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1832 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1833 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 + 1834 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; 1835 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; 1836 uint8_t ptype = NLA_FLAG; 1837 ; 1838 1839 memset(buf, 0, sizeof(buf)); 1840 nlh = (struct nlmsghdr *)buf; 1841 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1842 nlh->nlmsg_type = family_id; 1843 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1844 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1845 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1846 genl->cmd = DEVLINK_CMD_PARAM_SET; 1847 genl->version = DEVLINK_GENL_VERSION; 1848 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1849 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1850 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1851 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); 1852 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); 1853 if (enable) 1854 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0); 1855 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1856 if (ret >= 0) 1857 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1858 if (ret < 0) { 1859 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" 1860 " %d.", enable ? "en" : "dis", pci_addr, ret); 1861 return ret; 1862 } 1863 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", 1864 pci_addr, enable ? "en" : "dis"); 1865 /* Now, need to reload the driver. */ 1866 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); 1867 } 1868 1869 /** 1870 * Try to parse a Netlink message as a link status update. 1871 * 1872 * @param hdr 1873 * Netlink message header. 1874 * @param[out] ifindex 1875 * Index of the updated interface. 1876 * 1877 * @return 1878 * 0 on success, negative on failure. 1879 */ 1880 int 1881 mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex) 1882 { 1883 struct ifinfomsg *info; 1884 1885 switch (hdr->nlmsg_type) { 1886 case RTM_NEWLINK: 1887 case RTM_DELLINK: 1888 case RTM_GETLINK: 1889 case RTM_SETLINK: 1890 info = NLMSG_DATA(hdr); 1891 *ifindex = info->ifi_index; 1892 return 0; 1893 } 1894 return -1; 1895 } 1896 1897 /** 1898 * Read pending events from a Netlink socket. 1899 * 1900 * @param nlsk_fd 1901 * Netlink socket. 1902 * @param cb 1903 * Callback invoked for each of the events. 1904 * @param cb_arg 1905 * User data for the callback. 1906 * 1907 * @return 1908 * 0 on success, including the case when there are no events. 1909 * Negative on failure and rte_errno is set. 1910 */ 1911 int 1912 mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg) 1913 { 1914 char buf[8192]; 1915 struct sockaddr_nl addr; 1916 struct iovec iov = { 1917 .iov_base = buf, 1918 .iov_len = sizeof(buf), 1919 }; 1920 struct msghdr msg = { 1921 .msg_name = &addr, 1922 .msg_namelen = sizeof(addr), 1923 .msg_iov = &iov, 1924 .msg_iovlen = 1, 1925 }; 1926 struct nlmsghdr *hdr; 1927 ssize_t size; 1928 1929 while (1) { 1930 size = recvmsg(nlsk_fd, &msg, MSG_DONTWAIT); 1931 if (size < 0) { 1932 if (errno == EAGAIN) 1933 return 0; 1934 if (errno == EINTR) 1935 continue; 1936 DRV_LOG(DEBUG, "Failed to receive netlink message: %s", 1937 strerror(errno)); 1938 rte_errno = errno; 1939 return -rte_errno; 1940 } 1941 hdr = (struct nlmsghdr *)buf; 1942 while (size >= (ssize_t)sizeof(*hdr)) { 1943 ssize_t msg_len = hdr->nlmsg_len; 1944 ssize_t data_len = msg_len - sizeof(*hdr); 1945 ssize_t aligned_len; 1946 1947 if (data_len < 0) { 1948 DRV_LOG(DEBUG, "Netlink message too short"); 1949 rte_errno = EINVAL; 1950 return -rte_errno; 1951 } 1952 aligned_len = NLMSG_ALIGN(msg_len); 1953 if (aligned_len > size) { 1954 DRV_LOG(DEBUG, "Netlink message too long"); 1955 rte_errno = EINVAL; 1956 return -rte_errno; 1957 } 1958 cb(hdr, cb_arg); 1959 hdr = RTE_PTR_ADD(hdr, aligned_len); 1960 size -= aligned_len; 1961 } 1962 } 1963 return 0; 1964 } 1965 1966 static int 1967 mlx5_nl_esw_multiport_cb(struct nlmsghdr *nh, void *arg) 1968 { 1969 1970 int ret = -EINVAL; 1971 int *enable = arg; 1972 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1973 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1974 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1975 1976 while (nla->nla_len && nla < tail) { 1977 switch (nla->nla_type) { 1978 /* Expected nested attributes case. */ 1979 case DEVLINK_ATTR_PARAM: 1980 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1981 case DEVLINK_ATTR_PARAM_VALUE: 1982 ret = 0; 1983 nla += 1; 1984 break; 1985 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1986 *enable = 1; 1987 return 0; 1988 default: 1989 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1990 } 1991 } 1992 *enable = 0; 1993 return ret; 1994 } 1995 1996 #define NL_ESW_MULTIPORT_PARAM "esw_multiport" 1997 1998 int 1999 mlx5_nl_devlink_esw_multiport_get(int nlsk_fd, int family_id, const char *pci_addr, int *enable) 2000 { 2001 struct nlmsghdr *nlh; 2002 struct genlmsghdr *genl; 2003 uint32_t sn = MLX5_NL_SN_GENERATE; 2004 int ret; 2005 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 2006 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 2007 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 2008 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 2009 2010 memset(buf, 0, sizeof(buf)); 2011 nlh = (struct nlmsghdr *)buf; 2012 nlh->nlmsg_len = sizeof(struct nlmsghdr); 2013 nlh->nlmsg_type = family_id; 2014 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 2015 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 2016 nlh->nlmsg_len += sizeof(struct genlmsghdr); 2017 genl->cmd = DEVLINK_CMD_PARAM_GET; 2018 genl->version = DEVLINK_GENL_VERSION; 2019 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 2020 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 2021 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, 2022 NL_ESW_MULTIPORT_PARAM, sizeof(NL_ESW_MULTIPORT_PARAM)); 2023 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 2024 if (ret >= 0) 2025 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_esw_multiport_cb, enable); 2026 if (ret < 0) { 2027 DRV_LOG(DEBUG, "Failed to get Multiport E-Switch enable on device %s: %d.", 2028 pci_addr, ret); 2029 return ret; 2030 } 2031 DRV_LOG(DEBUG, "Multiport E-Switch is %sabled for device \"%s\".", 2032 *enable ? "en" : "dis", pci_addr); 2033 return ret; 2034 } 2035