1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 6WIND S.A. 3 * Copyright 2018 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <linux/if_link.h> 8 #include <linux/rtnetlink.h> 9 #include <linux/genetlink.h> 10 #include <net/if.h> 11 #include <rdma/rdma_netlink.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <stdlib.h> 15 #include <stdalign.h> 16 #include <string.h> 17 #include <sys/socket.h> 18 #include <unistd.h> 19 20 #include <rte_errno.h> 21 22 #include "mlx5_nl.h" 23 #include "../mlx5_common_log.h" 24 #include "mlx5_malloc.h" 25 #ifdef HAVE_DEVLINK 26 #include <linux/devlink.h> 27 #endif 28 29 30 /* Size of the buffer to receive kernel messages */ 31 #define MLX5_NL_BUF_SIZE (32 * 1024) 32 /* Send buffer size for the Netlink socket */ 33 #define MLX5_SEND_BUF_SIZE 32768 34 /* Receive buffer size for the Netlink socket */ 35 #define MLX5_RECV_BUF_SIZE 32768 36 /* Maximal physical port name length. */ 37 #define MLX5_PHYS_PORT_NAME_MAX 128 38 39 /** Parameters of VLAN devices created by driver. */ 40 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" 41 /* 42 * Define NDA_RTA as defined in iproute2 sources. 43 * 44 * see in iproute2 sources file include/libnetlink.h 45 */ 46 #ifndef MLX5_NDA_RTA 47 #define MLX5_NDA_RTA(r) \ 48 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 49 #endif 50 /* 51 * Define NLMSG_TAIL as defined in iproute2 sources. 52 * 53 * see in iproute2 sources file include/libnetlink.h 54 */ 55 #ifndef NLMSG_TAIL 56 #define NLMSG_TAIL(nmsg) \ 57 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) 58 #endif 59 /* 60 * The following definitions are normally found in rdma/rdma_netlink.h, 61 * however they are so recent that most systems do not expose them yet. 62 */ 63 #ifndef HAVE_RDMA_NL_NLDEV 64 #define RDMA_NL_NLDEV 5 65 #endif 66 #ifndef HAVE_RDMA_NLDEV_CMD_GET 67 #define RDMA_NLDEV_CMD_GET 1 68 #endif 69 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET 70 #define RDMA_NLDEV_CMD_PORT_GET 5 71 #endif 72 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX 73 #define RDMA_NLDEV_ATTR_DEV_INDEX 1 74 #endif 75 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME 76 #define RDMA_NLDEV_ATTR_DEV_NAME 2 77 #endif 78 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX 79 #define RDMA_NLDEV_ATTR_PORT_INDEX 3 80 #endif 81 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX 82 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50 83 #endif 84 85 /* These are normally found in linux/if_link.h. */ 86 #ifndef HAVE_IFLA_NUM_VF 87 #define IFLA_NUM_VF 21 88 #endif 89 #ifndef HAVE_IFLA_EXT_MASK 90 #define IFLA_EXT_MASK 29 91 #endif 92 #ifndef HAVE_IFLA_PHYS_SWITCH_ID 93 #define IFLA_PHYS_SWITCH_ID 36 94 #endif 95 #ifndef HAVE_IFLA_PHYS_PORT_NAME 96 #define IFLA_PHYS_PORT_NAME 38 97 #endif 98 99 /* 100 * Some Devlink defines may be missed in old kernel versions, 101 * adjust used defines. 102 */ 103 #ifndef DEVLINK_GENL_NAME 104 #define DEVLINK_GENL_NAME "devlink" 105 #endif 106 #ifndef DEVLINK_GENL_VERSION 107 #define DEVLINK_GENL_VERSION 1 108 #endif 109 #ifndef DEVLINK_ATTR_BUS_NAME 110 #define DEVLINK_ATTR_BUS_NAME 1 111 #endif 112 #ifndef DEVLINK_ATTR_DEV_NAME 113 #define DEVLINK_ATTR_DEV_NAME 2 114 #endif 115 #ifndef DEVLINK_ATTR_PARAM 116 #define DEVLINK_ATTR_PARAM 80 117 #endif 118 #ifndef DEVLINK_ATTR_PARAM_NAME 119 #define DEVLINK_ATTR_PARAM_NAME 81 120 #endif 121 #ifndef DEVLINK_ATTR_PARAM_TYPE 122 #define DEVLINK_ATTR_PARAM_TYPE 83 123 #endif 124 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST 125 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84 126 #endif 127 #ifndef DEVLINK_ATTR_PARAM_VALUE 128 #define DEVLINK_ATTR_PARAM_VALUE 85 129 #endif 130 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA 131 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86 132 #endif 133 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE 134 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87 135 #endif 136 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT 137 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1 138 #endif 139 #ifndef DEVLINK_CMD_RELOAD 140 #define DEVLINK_CMD_RELOAD 37 141 #endif 142 #ifndef DEVLINK_CMD_PARAM_GET 143 #define DEVLINK_CMD_PARAM_GET 38 144 #endif 145 #ifndef DEVLINK_CMD_PARAM_SET 146 #define DEVLINK_CMD_PARAM_SET 39 147 #endif 148 #ifndef NLA_FLAG 149 #define NLA_FLAG 6 150 #endif 151 152 /* Add/remove MAC address through Netlink */ 153 struct mlx5_nl_mac_addr { 154 struct rte_ether_addr (*mac)[]; 155 /**< MAC address handled by the device. */ 156 int mac_n; /**< Number of addresses in the array. */ 157 }; 158 159 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0) 160 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) 161 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) 162 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) 163 164 /** Data structure used by mlx5_nl_cmdget_cb(). */ 165 struct mlx5_nl_ifindex_data { 166 const char *name; /**< IB device name (in). */ 167 uint32_t flags; /**< found attribute flags (out). */ 168 uint32_t ibindex; /**< IB device index (out). */ 169 uint32_t ifindex; /**< Network interface index (out). */ 170 uint32_t portnum; /**< IB device max port number (out). */ 171 }; 172 173 uint32_t atomic_sn; 174 175 /* Generate Netlink sequence number. */ 176 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED) 177 178 /** 179 * Opens a Netlink socket. 180 * 181 * @param protocol 182 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). 183 * 184 * @return 185 * A file descriptor on success, a negative errno value otherwise and 186 * rte_errno is set. 187 */ 188 int 189 mlx5_nl_init(int protocol) 190 { 191 int fd; 192 int sndbuf_size = MLX5_SEND_BUF_SIZE; 193 int rcvbuf_size = MLX5_RECV_BUF_SIZE; 194 struct sockaddr_nl local = { 195 .nl_family = AF_NETLINK, 196 }; 197 int ret; 198 199 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); 200 if (fd == -1) { 201 rte_errno = errno; 202 return -rte_errno; 203 } 204 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); 205 if (ret == -1) { 206 rte_errno = errno; 207 goto error; 208 } 209 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); 210 if (ret == -1) { 211 rte_errno = errno; 212 goto error; 213 } 214 ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); 215 if (ret == -1) { 216 rte_errno = errno; 217 goto error; 218 } 219 return fd; 220 error: 221 close(fd); 222 return -rte_errno; 223 } 224 225 /** 226 * Send a request message to the kernel on the Netlink socket. 227 * 228 * @param[in] nlsk_fd 229 * Netlink socket file descriptor. 230 * @param[in] nh 231 * The Netlink message send to the kernel. 232 * @param[in] ssn 233 * Sequence number. 234 * @param[in] req 235 * Pointer to the request structure. 236 * @param[in] len 237 * Length of the request in bytes. 238 * 239 * @return 240 * The number of sent bytes on success, a negative errno value otherwise and 241 * rte_errno is set. 242 */ 243 static int 244 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, 245 int len) 246 { 247 struct sockaddr_nl sa = { 248 .nl_family = AF_NETLINK, 249 }; 250 struct iovec iov[2] = { 251 { .iov_base = nh, .iov_len = sizeof(*nh), }, 252 { .iov_base = req, .iov_len = len, }, 253 }; 254 struct msghdr msg = { 255 .msg_name = &sa, 256 .msg_namelen = sizeof(sa), 257 .msg_iov = iov, 258 .msg_iovlen = 2, 259 }; 260 int send_bytes; 261 262 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 263 nh->nlmsg_seq = sn; 264 send_bytes = sendmsg(nlsk_fd, &msg, 0); 265 if (send_bytes < 0) { 266 rte_errno = errno; 267 return -rte_errno; 268 } 269 return send_bytes; 270 } 271 272 /** 273 * Send a message to the kernel on the Netlink socket. 274 * 275 * @param[in] nlsk_fd 276 * The Netlink socket file descriptor used for communication. 277 * @param[in] nh 278 * The Netlink message send to the kernel. 279 * @param[in] sn 280 * Sequence number. 281 * 282 * @return 283 * The number of sent bytes on success, a negative errno value otherwise and 284 * rte_errno is set. 285 */ 286 static int 287 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) 288 { 289 struct sockaddr_nl sa = { 290 .nl_family = AF_NETLINK, 291 }; 292 struct iovec iov = { 293 .iov_base = nh, 294 .iov_len = nh->nlmsg_len, 295 }; 296 struct msghdr msg = { 297 .msg_name = &sa, 298 .msg_namelen = sizeof(sa), 299 .msg_iov = &iov, 300 .msg_iovlen = 1, 301 }; 302 int send_bytes; 303 304 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 305 nh->nlmsg_seq = sn; 306 send_bytes = sendmsg(nlsk_fd, &msg, 0); 307 if (send_bytes < 0) { 308 rte_errno = errno; 309 return -rte_errno; 310 } 311 return send_bytes; 312 } 313 314 /** 315 * Receive a message from the kernel on the Netlink socket, following 316 * mlx5_nl_send(). 317 * 318 * @param[in] nlsk_fd 319 * The Netlink socket file descriptor used for communication. 320 * @param[in] sn 321 * Sequence number. 322 * @param[in] cb 323 * The callback function to call for each Netlink message received. 324 * @param[in, out] arg 325 * Custom arguments for the callback. 326 * 327 * @return 328 * 0 on success, a negative errno value otherwise and rte_errno is set. 329 */ 330 static int 331 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), 332 void *arg) 333 { 334 struct sockaddr_nl sa; 335 void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY); 336 struct iovec iov = { 337 .iov_base = buf, 338 .iov_len = MLX5_RECV_BUF_SIZE, 339 }; 340 struct msghdr msg = { 341 .msg_name = &sa, 342 .msg_namelen = sizeof(sa), 343 .msg_iov = &iov, 344 /* One message at a time */ 345 .msg_iovlen = 1, 346 }; 347 int multipart = 0; 348 int ret = 0; 349 350 if (!buf) { 351 rte_errno = ENOMEM; 352 return -rte_errno; 353 } 354 do { 355 struct nlmsghdr *nh; 356 int recv_bytes = 0; 357 358 do { 359 recv_bytes = recvmsg(nlsk_fd, &msg, 0); 360 if (recv_bytes == -1) { 361 rte_errno = errno; 362 ret = -rte_errno; 363 goto exit; 364 } 365 nh = (struct nlmsghdr *)buf; 366 } while (nh->nlmsg_seq != sn); 367 for (; 368 NLMSG_OK(nh, (unsigned int)recv_bytes); 369 nh = NLMSG_NEXT(nh, recv_bytes)) { 370 if (nh->nlmsg_type == NLMSG_ERROR) { 371 struct nlmsgerr *err_data = NLMSG_DATA(nh); 372 373 if (err_data->error < 0) { 374 rte_errno = -err_data->error; 375 ret = -rte_errno; 376 goto exit; 377 } 378 /* Ack message. */ 379 ret = 0; 380 goto exit; 381 } 382 /* Multi-part msgs and their trailing DONE message. */ 383 if (nh->nlmsg_flags & NLM_F_MULTI) { 384 if (nh->nlmsg_type == NLMSG_DONE) { 385 ret = 0; 386 goto exit; 387 } 388 multipart = 1; 389 } 390 if (cb) { 391 ret = cb(nh, arg); 392 if (ret < 0) 393 goto exit; 394 } 395 } 396 } while (multipart); 397 exit: 398 mlx5_free(buf); 399 return ret; 400 } 401 402 /** 403 * Parse Netlink message to retrieve the bridge MAC address. 404 * 405 * @param nh 406 * Pointer to Netlink Message Header. 407 * @param arg 408 * PMD data register with this callback. 409 * 410 * @return 411 * 0 on success, a negative errno value otherwise and rte_errno is set. 412 */ 413 static int 414 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) 415 { 416 struct mlx5_nl_mac_addr *data = arg; 417 struct ndmsg *r = NLMSG_DATA(nh); 418 struct rtattr *attribute; 419 int len; 420 421 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); 422 for (attribute = MLX5_NDA_RTA(r); 423 RTA_OK(attribute, len); 424 attribute = RTA_NEXT(attribute, len)) { 425 if (attribute->rta_type == NDA_LLADDR) { 426 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { 427 DRV_LOG(WARNING, 428 "not enough room to finalize the" 429 " request"); 430 rte_errno = ENOMEM; 431 return -rte_errno; 432 } 433 #ifdef RTE_LIBRTE_MLX5_DEBUG 434 char m[RTE_ETHER_ADDR_FMT_SIZE]; 435 436 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, 437 RTA_DATA(attribute)); 438 DRV_LOG(DEBUG, "bridge MAC address %s", m); 439 #endif 440 memcpy(&(*data->mac)[data->mac_n++], 441 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); 442 } 443 } 444 return 0; 445 } 446 447 /** 448 * Get bridge MAC addresses. 449 * 450 * @param[in] nlsk_fd 451 * Netlink socket file descriptor. 452 * @param[in] iface_idx 453 * Net device interface index. 454 * @param mac[out] 455 * Pointer to the array table of MAC addresses to fill. 456 * Its size should be of MLX5_MAX_MAC_ADDRESSES. 457 * @param mac_n[out] 458 * Number of entries filled in MAC array. 459 * 460 * @return 461 * 0 on success, a negative errno value otherwise and rte_errno is set. 462 */ 463 static int 464 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, 465 struct rte_ether_addr (*mac)[], int *mac_n) 466 { 467 struct { 468 struct nlmsghdr hdr; 469 struct ifinfomsg ifm; 470 } req = { 471 .hdr = { 472 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 473 .nlmsg_type = RTM_GETNEIGH, 474 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 475 }, 476 .ifm = { 477 .ifi_family = PF_BRIDGE, 478 .ifi_index = iface_idx, 479 }, 480 }; 481 struct mlx5_nl_mac_addr data = { 482 .mac = mac, 483 .mac_n = 0, 484 }; 485 uint32_t sn = MLX5_NL_SN_GENERATE; 486 int ret; 487 488 if (nlsk_fd == -1) 489 return 0; 490 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, 491 sizeof(struct ifinfomsg)); 492 if (ret < 0) 493 goto error; 494 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); 495 if (ret < 0) 496 goto error; 497 *mac_n = data.mac_n; 498 return 0; 499 error: 500 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", 501 iface_idx, strerror(rte_errno)); 502 return -rte_errno; 503 } 504 505 /** 506 * Modify the MAC address neighbour table with Netlink. 507 * 508 * @param[in] nlsk_fd 509 * Netlink socket file descriptor. 510 * @param[in] iface_idx 511 * Net device interface index. 512 * @param mac 513 * MAC address to consider. 514 * @param add 515 * 1 to add the MAC address, 0 to remove the MAC address. 516 * 517 * @return 518 * 0 on success, a negative errno value otherwise and rte_errno is set. 519 */ 520 static int 521 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 522 struct rte_ether_addr *mac, int add) 523 { 524 struct { 525 struct nlmsghdr hdr; 526 struct ndmsg ndm; 527 struct rtattr rta; 528 uint8_t buffer[RTE_ETHER_ADDR_LEN]; 529 } req = { 530 .hdr = { 531 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), 532 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 533 NLM_F_EXCL | NLM_F_ACK, 534 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, 535 }, 536 .ndm = { 537 .ndm_family = PF_BRIDGE, 538 .ndm_state = NUD_NOARP | NUD_PERMANENT, 539 .ndm_ifindex = iface_idx, 540 .ndm_flags = NTF_SELF, 541 }, 542 .rta = { 543 .rta_type = NDA_LLADDR, 544 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), 545 }, 546 }; 547 uint32_t sn = MLX5_NL_SN_GENERATE; 548 int ret; 549 550 if (nlsk_fd == -1) 551 return 0; 552 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); 553 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 554 RTA_ALIGN(req.rta.rta_len); 555 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 556 if (ret < 0) 557 goto error; 558 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 559 if (ret < 0) 560 goto error; 561 return 0; 562 error: 563 #ifdef RTE_LIBRTE_MLX5_DEBUG 564 { 565 char m[RTE_ETHER_ADDR_FMT_SIZE]; 566 567 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac); 568 DRV_LOG(DEBUG, 569 "Interface %u cannot %s MAC address %s %s", 570 iface_idx, 571 add ? "add" : "remove", m, strerror(rte_errno)); 572 } 573 #endif 574 return -rte_errno; 575 } 576 577 /** 578 * Modify the VF MAC address neighbour table with Netlink. 579 * 580 * @param[in] nlsk_fd 581 * Netlink socket file descriptor. 582 * @param[in] iface_idx 583 * Net device interface index. 584 * @param mac 585 * MAC address to consider. 586 * @param vf_index 587 * VF index. 588 * 589 * @return 590 * 0 on success, a negative errno value otherwise and rte_errno is set. 591 */ 592 int 593 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 594 struct rte_ether_addr *mac, int vf_index) 595 { 596 int ret; 597 struct { 598 struct nlmsghdr hdr; 599 struct ifinfomsg ifm; 600 struct rtattr vf_list_rta; 601 struct rtattr vf_info_rta; 602 struct rtattr vf_mac_rta; 603 struct ifla_vf_mac ivm; 604 } req = { 605 .hdr = { 606 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 607 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 608 .nlmsg_type = RTM_BASE, 609 }, 610 .ifm = { 611 .ifi_index = iface_idx, 612 }, 613 .vf_list_rta = { 614 .rta_type = IFLA_VFINFO_LIST, 615 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 616 }, 617 .vf_info_rta = { 618 .rta_type = IFLA_VF_INFO, 619 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 620 }, 621 .vf_mac_rta = { 622 .rta_type = IFLA_VF_MAC, 623 }, 624 }; 625 struct ifla_vf_mac ivm = { 626 .vf = vf_index, 627 }; 628 uint32_t sn = MLX5_NL_SN_GENERATE; 629 630 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); 631 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); 632 633 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); 634 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 635 RTA_ALIGN(req.vf_list_rta.rta_len) + 636 RTA_ALIGN(req.vf_info_rta.rta_len) + 637 RTA_ALIGN(req.vf_mac_rta.rta_len); 638 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 639 &req.vf_list_rta); 640 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 641 &req.vf_info_rta); 642 643 if (nlsk_fd < 0) 644 return -1; 645 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 646 if (ret < 0) 647 goto error; 648 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 649 if (ret < 0) 650 goto error; 651 return 0; 652 error: 653 DRV_LOG(ERR, 654 "representor %u cannot set VF MAC address " 655 "%02X:%02X:%02X:%02X:%02X:%02X : %s", 656 vf_index, 657 mac->addr_bytes[0], mac->addr_bytes[1], 658 mac->addr_bytes[2], mac->addr_bytes[3], 659 mac->addr_bytes[4], mac->addr_bytes[5], 660 strerror(rte_errno)); 661 return -rte_errno; 662 } 663 664 /** 665 * Add a MAC address. 666 * 667 * @param[in] nlsk_fd 668 * Netlink socket file descriptor. 669 * @param[in] iface_idx 670 * Net device interface index. 671 * @param mac_own 672 * BITFIELD_DECLARE array to store the mac. 673 * @param mac 674 * MAC address to register. 675 * @param index 676 * MAC address index. 677 * 678 * @return 679 * 0 on success, a negative errno value otherwise and rte_errno is set. 680 */ 681 int 682 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, 683 uint64_t *mac_own, struct rte_ether_addr *mac, 684 uint32_t index) 685 { 686 int ret; 687 688 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); 689 if (!ret) { 690 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 691 if (index >= MLX5_MAX_MAC_ADDRESSES) 692 return -EINVAL; 693 694 BITFIELD_SET(mac_own, index); 695 } 696 if (ret == -EEXIST) 697 return 0; 698 return ret; 699 } 700 701 /** 702 * Remove a MAC address. 703 * 704 * @param[in] nlsk_fd 705 * Netlink socket file descriptor. 706 * @param[in] iface_idx 707 * Net device interface index. 708 * @param mac_own 709 * BITFIELD_DECLARE array to store the mac. 710 * @param mac 711 * MAC address to remove. 712 * @param index 713 * MAC address index. 714 * 715 * @return 716 * 0 on success, a negative errno value otherwise and rte_errno is set. 717 */ 718 int 719 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, 720 struct rte_ether_addr *mac, uint32_t index) 721 { 722 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 723 if (index >= MLX5_MAX_MAC_ADDRESSES) 724 return -EINVAL; 725 726 BITFIELD_RESET(mac_own, index); 727 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); 728 } 729 730 /** 731 * Synchronize Netlink bridge table to the internal table. 732 * 733 * @param[in] nlsk_fd 734 * Netlink socket file descriptor. 735 * @param[in] iface_idx 736 * Net device interface index. 737 * @param mac_addrs 738 * Mac addresses array to sync. 739 * @param n 740 * @p mac_addrs array size. 741 */ 742 void 743 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, 744 struct rte_ether_addr *mac_addrs, int n) 745 { 746 struct rte_ether_addr macs[n]; 747 int macs_n = 0; 748 int i; 749 int ret; 750 751 memset(macs, 0, n * sizeof(macs[0])); 752 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); 753 if (ret) 754 return; 755 for (i = 0; i != macs_n; ++i) { 756 int j; 757 758 /* Verify the address is not in the array yet. */ 759 for (j = 0; j != n; ++j) 760 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) 761 break; 762 if (j != n) 763 continue; 764 if (rte_is_multicast_ether_addr(&macs[i])) { 765 /* Find the first entry available. */ 766 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) { 767 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 768 mac_addrs[j] = macs[i]; 769 break; 770 } 771 } 772 } else { 773 /* Find the first entry available. */ 774 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) { 775 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 776 mac_addrs[j] = macs[i]; 777 break; 778 } 779 } 780 } 781 } 782 } 783 784 /** 785 * Flush all added MAC addresses. 786 * 787 * @param[in] nlsk_fd 788 * Netlink socket file descriptor. 789 * @param[in] iface_idx 790 * Net device interface index. 791 * @param[in] mac_addrs 792 * Mac addresses array to flush. 793 * @param n 794 * @p mac_addrs array size. 795 * @param mac_own 796 * BITFIELD_DECLARE array to store the mac. 797 */ 798 void 799 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, 800 struct rte_ether_addr *mac_addrs, int n, 801 uint64_t *mac_own) 802 { 803 int i; 804 805 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES) 806 return; 807 808 for (i = n - 1; i >= 0; --i) { 809 struct rte_ether_addr *m = &mac_addrs[i]; 810 811 if (BITFIELD_ISSET(mac_own, i)) 812 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, 813 i); 814 } 815 } 816 817 /** 818 * Enable promiscuous / all multicast mode through Netlink. 819 * 820 * @param[in] nlsk_fd 821 * Netlink socket file descriptor. 822 * @param[in] iface_idx 823 * Net device interface index. 824 * @param flags 825 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. 826 * @param enable 827 * Nonzero to enable, disable otherwise. 828 * 829 * @return 830 * 0 on success, a negative errno value otherwise and rte_errno is set. 831 */ 832 static int 833 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, 834 int enable) 835 { 836 struct { 837 struct nlmsghdr hdr; 838 struct ifinfomsg ifi; 839 } req = { 840 .hdr = { 841 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 842 .nlmsg_type = RTM_NEWLINK, 843 .nlmsg_flags = NLM_F_REQUEST, 844 }, 845 .ifi = { 846 .ifi_flags = enable ? flags : 0, 847 .ifi_change = flags, 848 .ifi_index = iface_idx, 849 }, 850 }; 851 uint32_t sn = MLX5_NL_SN_GENERATE; 852 int ret; 853 854 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); 855 if (nlsk_fd < 0) 856 return 0; 857 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 858 if (ret < 0) 859 return ret; 860 return 0; 861 } 862 863 /** 864 * Enable promiscuous mode through Netlink. 865 * 866 * @param[in] nlsk_fd 867 * Netlink socket file descriptor. 868 * @param[in] iface_idx 869 * Net device interface index. 870 * @param enable 871 * Nonzero to enable, disable otherwise. 872 * 873 * @return 874 * 0 on success, a negative errno value otherwise and rte_errno is set. 875 */ 876 int 877 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) 878 { 879 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); 880 881 if (ret) 882 DRV_LOG(DEBUG, 883 "Interface %u cannot %s promisc mode: Netlink error %s", 884 iface_idx, enable ? "enable" : "disable", 885 strerror(rte_errno)); 886 return ret; 887 } 888 889 /** 890 * Enable all multicast mode through Netlink. 891 * 892 * @param[in] nlsk_fd 893 * Netlink socket file descriptor. 894 * @param[in] iface_idx 895 * Net device interface index. 896 * @param enable 897 * Nonzero to enable, disable otherwise. 898 * 899 * @return 900 * 0 on success, a negative errno value otherwise and rte_errno is set. 901 */ 902 int 903 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) 904 { 905 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, 906 enable); 907 908 if (ret) 909 DRV_LOG(DEBUG, 910 "Interface %u cannot %s allmulti : Netlink error %s", 911 iface_idx, enable ? "enable" : "disable", 912 strerror(rte_errno)); 913 return ret; 914 } 915 916 /** 917 * Process network interface information from Netlink message. 918 * 919 * @param nh 920 * Pointer to Netlink message header. 921 * @param arg 922 * Opaque data pointer for this callback. 923 * 924 * @return 925 * 0 on success, a negative errno value otherwise and rte_errno is set. 926 */ 927 static int 928 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) 929 { 930 struct mlx5_nl_ifindex_data *data = arg; 931 struct mlx5_nl_ifindex_data local = { 932 .flags = 0, 933 }; 934 size_t off = NLMSG_HDRLEN; 935 936 if (nh->nlmsg_type != 937 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && 938 nh->nlmsg_type != 939 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) 940 goto error; 941 while (off < nh->nlmsg_len) { 942 struct nlattr *na = (void *)((uintptr_t)nh + off); 943 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); 944 945 if (na->nla_len > nh->nlmsg_len - off) 946 goto error; 947 switch (na->nla_type) { 948 case RDMA_NLDEV_ATTR_DEV_INDEX: 949 local.ibindex = *(uint32_t *)payload; 950 local.flags |= MLX5_NL_CMD_GET_IB_INDEX; 951 break; 952 case RDMA_NLDEV_ATTR_DEV_NAME: 953 if (!strcmp(payload, data->name)) 954 local.flags |= MLX5_NL_CMD_GET_IB_NAME; 955 break; 956 case RDMA_NLDEV_ATTR_NDEV_INDEX: 957 local.ifindex = *(uint32_t *)payload; 958 local.flags |= MLX5_NL_CMD_GET_NET_INDEX; 959 break; 960 case RDMA_NLDEV_ATTR_PORT_INDEX: 961 local.portnum = *(uint32_t *)payload; 962 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; 963 break; 964 default: 965 break; 966 } 967 off += NLA_ALIGN(na->nla_len); 968 } 969 /* 970 * It is possible to have multiple messages for all 971 * Infiniband devices in the system with appropriate name. 972 * So we should gather parameters locally and copy to 973 * query context only in case of coinciding device name. 974 */ 975 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { 976 data->flags = local.flags; 977 data->ibindex = local.ibindex; 978 data->ifindex = local.ifindex; 979 data->portnum = local.portnum; 980 } 981 return 0; 982 error: 983 rte_errno = EINVAL; 984 return -rte_errno; 985 } 986 987 /** 988 * Get index of network interface associated with some IB device. 989 * 990 * This is the only somewhat safe method to avoid resorting to heuristics 991 * when faced with port representors. Unfortunately it requires at least 992 * Linux 4.17. 993 * 994 * @param nl 995 * Netlink socket of the RDMA kind (NETLINK_RDMA). 996 * @param[in] name 997 * IB device name. 998 * @param[in] pindex 999 * IB device port index, starting from 1 1000 * @return 1001 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno 1002 * is set. 1003 */ 1004 unsigned int 1005 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) 1006 { 1007 struct mlx5_nl_ifindex_data data = { 1008 .name = name, 1009 .flags = 0, 1010 .ibindex = 0, /* Determined during first pass. */ 1011 .ifindex = 0, /* Determined during second pass. */ 1012 }; 1013 union { 1014 struct nlmsghdr nh; 1015 uint8_t buf[NLMSG_HDRLEN + 1016 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) + 1017 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; 1018 } req = { 1019 .nh = { 1020 .nlmsg_len = NLMSG_LENGTH(0), 1021 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1022 RDMA_NLDEV_CMD_GET), 1023 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1024 }, 1025 }; 1026 struct nlattr *na; 1027 uint32_t sn = MLX5_NL_SN_GENERATE; 1028 int ret; 1029 1030 ret = mlx5_nl_send(nl, &req.nh, sn); 1031 if (ret < 0) 1032 return 0; 1033 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1034 if (ret < 0) 1035 return 0; 1036 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1037 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) 1038 goto error; 1039 data.flags = 0; 1040 sn = MLX5_NL_SN_GENERATE; 1041 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1042 RDMA_NLDEV_CMD_PORT_GET); 1043 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1044 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); 1045 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); 1046 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex); 1047 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; 1048 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1049 &data.ibindex, sizeof(data.ibindex)); 1050 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); 1051 na->nla_len = NLA_HDRLEN + sizeof(pindex); 1052 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; 1053 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1054 &pindex, sizeof(pindex)); 1055 ret = mlx5_nl_send(nl, &req.nh, sn); 1056 if (ret < 0) 1057 return 0; 1058 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1059 if (ret < 0) 1060 return 0; 1061 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1062 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1063 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || 1064 !data.ifindex) 1065 goto error; 1066 return data.ifindex; 1067 error: 1068 rte_errno = ENODEV; 1069 return 0; 1070 } 1071 1072 /** 1073 * Get the number of physical ports of given IB device. 1074 * 1075 * @param nl 1076 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1077 * @param[in] name 1078 * IB device name. 1079 * 1080 * @return 1081 * A valid (nonzero) number of ports on success, 0 otherwise 1082 * and rte_errno is set. 1083 */ 1084 unsigned int 1085 mlx5_nl_portnum(int nl, const char *name) 1086 { 1087 struct mlx5_nl_ifindex_data data = { 1088 .flags = 0, 1089 .name = name, 1090 .ifindex = 0, 1091 .portnum = 0, 1092 }; 1093 struct nlmsghdr req = { 1094 .nlmsg_len = NLMSG_LENGTH(0), 1095 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1096 RDMA_NLDEV_CMD_GET), 1097 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1098 }; 1099 uint32_t sn = MLX5_NL_SN_GENERATE; 1100 int ret; 1101 1102 ret = mlx5_nl_send(nl, &req, sn); 1103 if (ret < 0) 1104 return 0; 1105 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1106 if (ret < 0) 1107 return 0; 1108 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1109 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1110 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { 1111 rte_errno = ENODEV; 1112 return 0; 1113 } 1114 if (!data.portnum) 1115 rte_errno = EINVAL; 1116 return data.portnum; 1117 } 1118 1119 /** 1120 * Analyze gathered port parameters via Netlink to recognize master 1121 * and representor devices for E-Switch configuration. 1122 * 1123 * @param[in] num_vf_set 1124 * flag of presence of number of VFs port attribute. 1125 * @param[inout] switch_info 1126 * Port information, including port name as a number and port name 1127 * type if recognized 1128 * 1129 * @return 1130 * master and representor flags are set in switch_info according to 1131 * recognized parameters (if any). 1132 */ 1133 static void 1134 mlx5_nl_check_switch_info(bool num_vf_set, 1135 struct mlx5_switch_info *switch_info) 1136 { 1137 switch (switch_info->name_type) { 1138 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1139 /* 1140 * Name is not recognized, assume the master, 1141 * check the number of VFs key presence. 1142 */ 1143 switch_info->master = num_vf_set; 1144 break; 1145 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1146 /* 1147 * Name is not set, this assumes the legacy naming 1148 * schema for master, just check if there is a 1149 * number of VFs key. 1150 */ 1151 switch_info->master = num_vf_set; 1152 break; 1153 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1154 /* New uplink naming schema recognized. */ 1155 switch_info->master = 1; 1156 break; 1157 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1158 /* Legacy representors naming schema. */ 1159 switch_info->representor = !num_vf_set; 1160 break; 1161 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1162 /* Fallthrough */ 1163 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1164 /* Fallthrough */ 1165 case MLX5_PHYS_PORT_NAME_TYPE_PFSF: 1166 /* New representors naming schema. */ 1167 switch_info->representor = 1; 1168 break; 1169 } 1170 } 1171 1172 /** 1173 * Process switch information from Netlink message. 1174 * 1175 * @param nh 1176 * Pointer to Netlink message header. 1177 * @param arg 1178 * Opaque data pointer for this callback. 1179 * 1180 * @return 1181 * 0 on success, a negative errno value otherwise and rte_errno is set. 1182 */ 1183 static int 1184 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) 1185 { 1186 struct mlx5_switch_info info = { 1187 .master = 0, 1188 .representor = 0, 1189 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1190 .port_name = 0, 1191 .switch_id = 0, 1192 }; 1193 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 1194 bool switch_id_set = false; 1195 bool num_vf_set = false; 1196 int len; 1197 1198 if (nh->nlmsg_type != RTM_NEWLINK) 1199 goto error; 1200 while (off < nh->nlmsg_len) { 1201 struct rtattr *ra = (void *)((uintptr_t)nh + off); 1202 void *payload = RTA_DATA(ra); 1203 unsigned int i; 1204 1205 if (ra->rta_len > nh->nlmsg_len - off) 1206 goto error; 1207 switch (ra->rta_type) { 1208 case IFLA_NUM_VF: 1209 num_vf_set = true; 1210 break; 1211 case IFLA_PHYS_PORT_NAME: 1212 len = RTA_PAYLOAD(ra); 1213 /* Some kernels do not pad attributes with zero. */ 1214 if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) { 1215 char name[MLX5_PHYS_PORT_NAME_MAX]; 1216 1217 /* 1218 * We can't just patch the message with padding 1219 * zero - it might corrupt the following items 1220 * in the message, we have to copy the string 1221 * by attribute length and pad the copied one. 1222 */ 1223 memcpy(name, payload, len); 1224 name[len] = 0; 1225 mlx5_translate_port_name(name, &info); 1226 } else { 1227 info.name_type = 1228 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 1229 } 1230 break; 1231 case IFLA_PHYS_SWITCH_ID: 1232 info.switch_id = 0; 1233 for (i = 0; i < RTA_PAYLOAD(ra); ++i) { 1234 info.switch_id <<= 8; 1235 info.switch_id |= ((uint8_t *)payload)[i]; 1236 } 1237 switch_id_set = true; 1238 break; 1239 } 1240 off += RTA_ALIGN(ra->rta_len); 1241 } 1242 if (switch_id_set) { 1243 /* We have some E-Switch configuration. */ 1244 mlx5_nl_check_switch_info(num_vf_set, &info); 1245 } 1246 MLX5_ASSERT(!(info.master && info.representor)); 1247 memcpy(arg, &info, sizeof(info)); 1248 return 0; 1249 error: 1250 rte_errno = EINVAL; 1251 return -rte_errno; 1252 } 1253 1254 /** 1255 * Get switch information associated with network interface. 1256 * 1257 * @param nl 1258 * Netlink socket of the ROUTE kind (NETLINK_ROUTE). 1259 * @param ifindex 1260 * Network interface index. 1261 * @param[out] info 1262 * Switch information object, populated in case of success. 1263 * 1264 * @return 1265 * 0 on success, a negative errno value otherwise and rte_errno is set. 1266 */ 1267 int 1268 mlx5_nl_switch_info(int nl, unsigned int ifindex, 1269 struct mlx5_switch_info *info) 1270 { 1271 struct { 1272 struct nlmsghdr nh; 1273 struct ifinfomsg info; 1274 struct rtattr rta; 1275 uint32_t extmask; 1276 } req = { 1277 .nh = { 1278 .nlmsg_len = NLMSG_LENGTH 1279 (sizeof(req.info) + 1280 RTA_LENGTH(sizeof(uint32_t))), 1281 .nlmsg_type = RTM_GETLINK, 1282 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1283 }, 1284 .info = { 1285 .ifi_family = AF_UNSPEC, 1286 .ifi_index = ifindex, 1287 }, 1288 .rta = { 1289 .rta_type = IFLA_EXT_MASK, 1290 .rta_len = RTA_LENGTH(sizeof(int32_t)), 1291 }, 1292 .extmask = RTE_LE32(1), 1293 }; 1294 uint32_t sn = MLX5_NL_SN_GENERATE; 1295 int ret; 1296 1297 ret = mlx5_nl_send(nl, &req.nh, sn); 1298 if (ret >= 0) 1299 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); 1300 if (info->master && info->representor) { 1301 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1302 " and as representor", ifindex); 1303 rte_errno = ENODEV; 1304 ret = -rte_errno; 1305 } 1306 return ret; 1307 } 1308 1309 /* 1310 * Delete VLAN network device by ifindex. 1311 * 1312 * @param[in] tcf 1313 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1314 * @param[in] ifindex 1315 * Interface index of network device to delete. 1316 */ 1317 void 1318 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, 1319 uint32_t ifindex) 1320 { 1321 uint32_t sn = MLX5_NL_SN_GENERATE; 1322 int ret; 1323 struct { 1324 struct nlmsghdr nh; 1325 struct ifinfomsg info; 1326 } req = { 1327 .nh = { 1328 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 1329 .nlmsg_type = RTM_DELLINK, 1330 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1331 }, 1332 .info = { 1333 .ifi_family = AF_UNSPEC, 1334 .ifi_index = ifindex, 1335 }, 1336 }; 1337 1338 if (ifindex) { 1339 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); 1340 if (ret >= 0) 1341 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1342 if (ret < 0) 1343 DRV_LOG(WARNING, "netlink: error deleting VLAN WA" 1344 " ifindex %u, %d", ifindex, ret); 1345 } 1346 } 1347 1348 /* Set of subroutines to build Netlink message. */ 1349 static struct nlattr * 1350 nl_msg_tail(struct nlmsghdr *nlh) 1351 { 1352 return (struct nlattr *) 1353 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); 1354 } 1355 1356 static void 1357 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) 1358 { 1359 struct nlattr *nla = nl_msg_tail(nlh); 1360 1361 nla->nla_type = type; 1362 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen; 1363 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len); 1364 1365 if (alen) 1366 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); 1367 } 1368 1369 static struct nlattr * 1370 nl_attr_nest_start(struct nlmsghdr *nlh, int type) 1371 { 1372 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); 1373 1374 nl_attr_put(nlh, type, NULL, 0); 1375 return nest; 1376 } 1377 1378 static void 1379 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) 1380 { 1381 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; 1382 } 1383 1384 /* 1385 * Create network VLAN device with specified VLAN tag. 1386 * 1387 * @param[in] tcf 1388 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1389 * @param[in] ifindex 1390 * Base network interface index. 1391 * @param[in] tag 1392 * VLAN tag for VLAN network device to create. 1393 */ 1394 uint32_t 1395 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, 1396 uint32_t ifindex, uint16_t tag) 1397 { 1398 struct nlmsghdr *nlh; 1399 struct ifinfomsg *ifm; 1400 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; 1401 1402 __rte_cache_aligned 1403 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1404 NLMSG_ALIGN(sizeof(struct ifinfomsg)) + 1405 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + 1406 NLMSG_ALIGN(sizeof(uint32_t)) + 1407 NLMSG_ALIGN(sizeof(name)) + 1408 NLMSG_ALIGN(sizeof("vlan")) + 1409 NLMSG_ALIGN(sizeof(uint32_t)) + 1410 NLMSG_ALIGN(sizeof(uint16_t)) + 16]; 1411 struct nlattr *na_info; 1412 struct nlattr *na_vlan; 1413 uint32_t sn = MLX5_NL_SN_GENERATE; 1414 int ret; 1415 1416 memset(buf, 0, sizeof(buf)); 1417 nlh = (struct nlmsghdr *)buf; 1418 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1419 nlh->nlmsg_type = RTM_NEWLINK; 1420 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 1421 NLM_F_EXCL | NLM_F_ACK; 1422 ifm = (struct ifinfomsg *)nl_msg_tail(nlh); 1423 nlh->nlmsg_len += sizeof(struct ifinfomsg); 1424 ifm->ifi_family = AF_UNSPEC; 1425 ifm->ifi_type = 0; 1426 ifm->ifi_index = 0; 1427 ifm->ifi_flags = IFF_UP; 1428 ifm->ifi_change = 0xffffffff; 1429 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); 1430 ret = snprintf(name, sizeof(name), "%s.%u.%u", 1431 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); 1432 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); 1433 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); 1434 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); 1435 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); 1436 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); 1437 nl_attr_nest_end(nlh, na_vlan); 1438 nl_attr_nest_end(nlh, na_info); 1439 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len); 1440 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); 1441 if (ret >= 0) 1442 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1443 if (ret < 0) { 1444 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, 1445 ret); 1446 } 1447 /* Try to get ifindex of created or pre-existing device. */ 1448 ret = if_nametoindex(name); 1449 if (!ret) { 1450 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, 1451 errno); 1452 return 0; 1453 } 1454 return ret; 1455 } 1456 1457 /** 1458 * Parse Netlink message to retrieve the general family ID. 1459 * 1460 * @param nh 1461 * Pointer to Netlink Message Header. 1462 * @param arg 1463 * PMD data register with this callback. 1464 * 1465 * @return 1466 * 0 on success, a negative errno value otherwise and rte_errno is set. 1467 */ 1468 static int 1469 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg) 1470 { 1471 1472 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1473 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1474 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1475 1476 for (; nla->nla_len && nla < tail; 1477 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) { 1478 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) { 1479 *(uint16_t *)arg = *(uint16_t *)(nla + 1); 1480 return 0; 1481 } 1482 } 1483 return -EINVAL; 1484 } 1485 1486 #define MLX5_NL_MAX_ATTR_SIZE 100 1487 /** 1488 * Get generic netlink family ID. 1489 * 1490 * @param[in] nlsk_fd 1491 * Netlink socket file descriptor. 1492 * @param[in] name 1493 * The family name. 1494 * 1495 * @return 1496 * ID >= 0 on success and @p enable is updated, a negative errno value 1497 * otherwise and rte_errno is set. 1498 */ 1499 static int 1500 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name) 1501 { 1502 struct nlmsghdr *nlh; 1503 struct genlmsghdr *genl; 1504 uint32_t sn = MLX5_NL_SN_GENERATE; 1505 int name_size = strlen(name) + 1; 1506 int ret; 1507 uint16_t id = -1; 1508 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1509 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1510 NLMSG_ALIGN(sizeof(struct nlattr)) + 1511 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)]; 1512 1513 memset(buf, 0, sizeof(buf)); 1514 nlh = (struct nlmsghdr *)buf; 1515 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1516 nlh->nlmsg_type = GENL_ID_CTRL; 1517 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1518 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1519 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1520 genl->cmd = CTRL_CMD_GETFAMILY; 1521 genl->version = 1; 1522 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size); 1523 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1524 if (ret >= 0) 1525 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id); 1526 if (ret < 0) { 1527 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name, 1528 ret); 1529 return ret; 1530 } 1531 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id); 1532 return (int)id; 1533 } 1534 1535 /** 1536 * Get Devlink family ID. 1537 * 1538 * @param[in] nlsk_fd 1539 * Netlink socket file descriptor. 1540 * 1541 * @return 1542 * ID >= 0 on success and @p enable is updated, a negative errno value 1543 * otherwise and rte_errno is set. 1544 */ 1545 1546 int 1547 mlx5_nl_devlink_family_id_get(int nlsk_fd) 1548 { 1549 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME); 1550 } 1551 1552 /** 1553 * Parse Netlink message to retrieve the ROCE enable status. 1554 * 1555 * @param nh 1556 * Pointer to Netlink Message Header. 1557 * @param arg 1558 * PMD data register with this callback. 1559 * 1560 * @return 1561 * 0 on success, a negative errno value otherwise and rte_errno is set. 1562 */ 1563 static int 1564 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) 1565 { 1566 1567 int ret = -EINVAL; 1568 int *enable = arg; 1569 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1570 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1571 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1572 1573 while (nla->nla_len && nla < tail) { 1574 switch (nla->nla_type) { 1575 /* Expected nested attributes case. */ 1576 case DEVLINK_ATTR_PARAM: 1577 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1578 case DEVLINK_ATTR_PARAM_VALUE: 1579 ret = 0; 1580 nla += 1; 1581 break; 1582 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1583 *enable = 1; 1584 return 0; 1585 default: 1586 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1587 } 1588 } 1589 *enable = 0; 1590 return ret; 1591 } 1592 1593 /** 1594 * Get ROCE enable status through Netlink. 1595 * 1596 * @param[in] nlsk_fd 1597 * Netlink socket file descriptor. 1598 * @param[in] family_id 1599 * the Devlink family ID. 1600 * @param pci_addr 1601 * The device PCI address. 1602 * @param[out] enable 1603 * Where to store the enable status. 1604 * 1605 * @return 1606 * 0 on success and @p enable is updated, a negative errno value otherwise 1607 * and rte_errno is set. 1608 */ 1609 int 1610 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, 1611 int *enable) 1612 { 1613 struct nlmsghdr *nlh; 1614 struct genlmsghdr *genl; 1615 uint32_t sn = MLX5_NL_SN_GENERATE; 1616 int ret; 1617 int cur_en = 0; 1618 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1619 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1620 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 1621 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 1622 1623 memset(buf, 0, sizeof(buf)); 1624 nlh = (struct nlmsghdr *)buf; 1625 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1626 nlh->nlmsg_type = family_id; 1627 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1628 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1629 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1630 genl->cmd = DEVLINK_CMD_PARAM_GET; 1631 genl->version = DEVLINK_GENL_VERSION; 1632 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1633 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1634 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1635 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1636 if (ret >= 0) 1637 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); 1638 if (ret < 0) { 1639 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", 1640 pci_addr, ret); 1641 return ret; 1642 } 1643 *enable = cur_en; 1644 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", 1645 cur_en ? "en" : "dis", pci_addr); 1646 return ret; 1647 } 1648 1649 /** 1650 * Reload mlx5 device kernel driver through Netlink. 1651 * 1652 * @param[in] nlsk_fd 1653 * Netlink socket file descriptor. 1654 * @param[in] family_id 1655 * the Devlink family ID. 1656 * @param pci_addr 1657 * The device PCI address. 1658 * @param[out] enable 1659 * The enable status to set. 1660 * 1661 * @return 1662 * 0 on success, a negative errno value otherwise and rte_errno is set. 1663 */ 1664 int 1665 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) 1666 { 1667 struct nlmsghdr *nlh; 1668 struct genlmsghdr *genl; 1669 uint32_t sn = MLX5_NL_SN_GENERATE; 1670 int ret; 1671 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1672 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1673 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 + 1674 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2]; 1675 1676 memset(buf, 0, sizeof(buf)); 1677 nlh = (struct nlmsghdr *)buf; 1678 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1679 nlh->nlmsg_type = family_id; 1680 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1681 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1682 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1683 genl->cmd = DEVLINK_CMD_RELOAD; 1684 genl->version = DEVLINK_GENL_VERSION; 1685 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1686 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1687 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1688 if (ret >= 0) 1689 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1690 if (ret < 0) { 1691 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", 1692 pci_addr, ret); 1693 return ret; 1694 } 1695 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", 1696 pci_addr); 1697 return 0; 1698 } 1699 1700 /** 1701 * Set ROCE enable status through Netlink. 1702 * 1703 * @param[in] nlsk_fd 1704 * Netlink socket file descriptor. 1705 * @param[in] family_id 1706 * the Devlink family ID. 1707 * @param pci_addr 1708 * The device PCI address. 1709 * @param[out] enable 1710 * The enable status to set. 1711 * 1712 * @return 1713 * 0 on success, a negative errno value otherwise and rte_errno is set. 1714 */ 1715 int 1716 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, 1717 int enable) 1718 { 1719 struct nlmsghdr *nlh; 1720 struct genlmsghdr *genl; 1721 uint32_t sn = MLX5_NL_SN_GENERATE; 1722 int ret; 1723 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1724 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1725 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 + 1726 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; 1727 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; 1728 uint8_t ptype = NLA_FLAG; 1729 ; 1730 1731 memset(buf, 0, sizeof(buf)); 1732 nlh = (struct nlmsghdr *)buf; 1733 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1734 nlh->nlmsg_type = family_id; 1735 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1736 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1737 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1738 genl->cmd = DEVLINK_CMD_PARAM_SET; 1739 genl->version = DEVLINK_GENL_VERSION; 1740 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1741 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1742 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1743 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); 1744 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); 1745 if (enable) 1746 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0); 1747 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1748 if (ret >= 0) 1749 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1750 if (ret < 0) { 1751 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" 1752 " %d.", enable ? "en" : "dis", pci_addr, ret); 1753 return ret; 1754 } 1755 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", 1756 pci_addr, enable ? "en" : "dis"); 1757 /* Now, need to reload the driver. */ 1758 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); 1759 } 1760