1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 6WIND S.A. 3 * Copyright 2018 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <linux/if_link.h> 8 #include <linux/rtnetlink.h> 9 #include <linux/genetlink.h> 10 #include <net/if.h> 11 #include <rdma/rdma_netlink.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <stdlib.h> 15 #include <stdalign.h> 16 #include <string.h> 17 #include <sys/socket.h> 18 #include <unistd.h> 19 20 #include <rte_errno.h> 21 22 #include "mlx5_nl.h" 23 #include "mlx5_common_utils.h" 24 #include "mlx5_malloc.h" 25 #ifdef HAVE_DEVLINK 26 #include <linux/devlink.h> 27 #endif 28 29 30 /* Size of the buffer to receive kernel messages */ 31 #define MLX5_NL_BUF_SIZE (32 * 1024) 32 /* Send buffer size for the Netlink socket */ 33 #define MLX5_SEND_BUF_SIZE 32768 34 /* Receive buffer size for the Netlink socket */ 35 #define MLX5_RECV_BUF_SIZE 32768 36 37 /** Parameters of VLAN devices created by driver. */ 38 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" 39 /* 40 * Define NDA_RTA as defined in iproute2 sources. 41 * 42 * see in iproute2 sources file include/libnetlink.h 43 */ 44 #ifndef MLX5_NDA_RTA 45 #define MLX5_NDA_RTA(r) \ 46 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 47 #endif 48 /* 49 * Define NLMSG_TAIL as defined in iproute2 sources. 50 * 51 * see in iproute2 sources file include/libnetlink.h 52 */ 53 #ifndef NLMSG_TAIL 54 #define NLMSG_TAIL(nmsg) \ 55 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) 56 #endif 57 /* 58 * The following definitions are normally found in rdma/rdma_netlink.h, 59 * however they are so recent that most systems do not expose them yet. 60 */ 61 #ifndef HAVE_RDMA_NL_NLDEV 62 #define RDMA_NL_NLDEV 5 63 #endif 64 #ifndef HAVE_RDMA_NLDEV_CMD_GET 65 #define RDMA_NLDEV_CMD_GET 1 66 #endif 67 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET 68 #define RDMA_NLDEV_CMD_PORT_GET 5 69 #endif 70 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX 71 #define RDMA_NLDEV_ATTR_DEV_INDEX 1 72 #endif 73 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME 74 #define RDMA_NLDEV_ATTR_DEV_NAME 2 75 #endif 76 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX 77 #define RDMA_NLDEV_ATTR_PORT_INDEX 3 78 #endif 79 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX 80 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50 81 #endif 82 83 /* These are normally found in linux/if_link.h. */ 84 #ifndef HAVE_IFLA_NUM_VF 85 #define IFLA_NUM_VF 21 86 #endif 87 #ifndef HAVE_IFLA_EXT_MASK 88 #define IFLA_EXT_MASK 29 89 #endif 90 #ifndef HAVE_IFLA_PHYS_SWITCH_ID 91 #define IFLA_PHYS_SWITCH_ID 36 92 #endif 93 #ifndef HAVE_IFLA_PHYS_PORT_NAME 94 #define IFLA_PHYS_PORT_NAME 38 95 #endif 96 97 /* 98 * Some Devlink defines may be missed in old kernel versions, 99 * adjust used defines. 100 */ 101 #ifndef DEVLINK_GENL_NAME 102 #define DEVLINK_GENL_NAME "devlink" 103 #endif 104 #ifndef DEVLINK_GENL_VERSION 105 #define DEVLINK_GENL_VERSION 1 106 #endif 107 #ifndef DEVLINK_ATTR_BUS_NAME 108 #define DEVLINK_ATTR_BUS_NAME 1 109 #endif 110 #ifndef DEVLINK_ATTR_DEV_NAME 111 #define DEVLINK_ATTR_DEV_NAME 2 112 #endif 113 #ifndef DEVLINK_ATTR_PARAM 114 #define DEVLINK_ATTR_PARAM 80 115 #endif 116 #ifndef DEVLINK_ATTR_PARAM_NAME 117 #define DEVLINK_ATTR_PARAM_NAME 81 118 #endif 119 #ifndef DEVLINK_ATTR_PARAM_TYPE 120 #define DEVLINK_ATTR_PARAM_TYPE 83 121 #endif 122 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST 123 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84 124 #endif 125 #ifndef DEVLINK_ATTR_PARAM_VALUE 126 #define DEVLINK_ATTR_PARAM_VALUE 85 127 #endif 128 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA 129 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86 130 #endif 131 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE 132 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87 133 #endif 134 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT 135 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1 136 #endif 137 #ifndef DEVLINK_CMD_RELOAD 138 #define DEVLINK_CMD_RELOAD 37 139 #endif 140 #ifndef DEVLINK_CMD_PARAM_GET 141 #define DEVLINK_CMD_PARAM_GET 38 142 #endif 143 #ifndef DEVLINK_CMD_PARAM_SET 144 #define DEVLINK_CMD_PARAM_SET 39 145 #endif 146 #ifndef NLA_FLAG 147 #define NLA_FLAG 6 148 #endif 149 150 /* Add/remove MAC address through Netlink */ 151 struct mlx5_nl_mac_addr { 152 struct rte_ether_addr (*mac)[]; 153 /**< MAC address handled by the device. */ 154 int mac_n; /**< Number of addresses in the array. */ 155 }; 156 157 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0) 158 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) 159 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) 160 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) 161 162 /** Data structure used by mlx5_nl_cmdget_cb(). */ 163 struct mlx5_nl_ifindex_data { 164 const char *name; /**< IB device name (in). */ 165 uint32_t flags; /**< found attribute flags (out). */ 166 uint32_t ibindex; /**< IB device index (out). */ 167 uint32_t ifindex; /**< Network interface index (out). */ 168 uint32_t portnum; /**< IB device max port number (out). */ 169 }; 170 171 uint32_t atomic_sn; 172 173 /* Generate Netlink sequence number. */ 174 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED) 175 176 /** 177 * Opens a Netlink socket. 178 * 179 * @param protocol 180 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). 181 * 182 * @return 183 * A file descriptor on success, a negative errno value otherwise and 184 * rte_errno is set. 185 */ 186 int 187 mlx5_nl_init(int protocol) 188 { 189 int fd; 190 int sndbuf_size = MLX5_SEND_BUF_SIZE; 191 int rcvbuf_size = MLX5_RECV_BUF_SIZE; 192 struct sockaddr_nl local = { 193 .nl_family = AF_NETLINK, 194 }; 195 int ret; 196 197 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); 198 if (fd == -1) { 199 rte_errno = errno; 200 return -rte_errno; 201 } 202 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); 203 if (ret == -1) { 204 rte_errno = errno; 205 goto error; 206 } 207 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); 208 if (ret == -1) { 209 rte_errno = errno; 210 goto error; 211 } 212 ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); 213 if (ret == -1) { 214 rte_errno = errno; 215 goto error; 216 } 217 return fd; 218 error: 219 close(fd); 220 return -rte_errno; 221 } 222 223 /** 224 * Send a request message to the kernel on the Netlink socket. 225 * 226 * @param[in] nlsk_fd 227 * Netlink socket file descriptor. 228 * @param[in] nh 229 * The Netlink message send to the kernel. 230 * @param[in] ssn 231 * Sequence number. 232 * @param[in] req 233 * Pointer to the request structure. 234 * @param[in] len 235 * Length of the request in bytes. 236 * 237 * @return 238 * The number of sent bytes on success, a negative errno value otherwise and 239 * rte_errno is set. 240 */ 241 static int 242 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, 243 int len) 244 { 245 struct sockaddr_nl sa = { 246 .nl_family = AF_NETLINK, 247 }; 248 struct iovec iov[2] = { 249 { .iov_base = nh, .iov_len = sizeof(*nh), }, 250 { .iov_base = req, .iov_len = len, }, 251 }; 252 struct msghdr msg = { 253 .msg_name = &sa, 254 .msg_namelen = sizeof(sa), 255 .msg_iov = iov, 256 .msg_iovlen = 2, 257 }; 258 int send_bytes; 259 260 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 261 nh->nlmsg_seq = sn; 262 send_bytes = sendmsg(nlsk_fd, &msg, 0); 263 if (send_bytes < 0) { 264 rte_errno = errno; 265 return -rte_errno; 266 } 267 return send_bytes; 268 } 269 270 /** 271 * Send a message to the kernel on the Netlink socket. 272 * 273 * @param[in] nlsk_fd 274 * The Netlink socket file descriptor used for communication. 275 * @param[in] nh 276 * The Netlink message send to the kernel. 277 * @param[in] sn 278 * Sequence number. 279 * 280 * @return 281 * The number of sent bytes on success, a negative errno value otherwise and 282 * rte_errno is set. 283 */ 284 static int 285 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) 286 { 287 struct sockaddr_nl sa = { 288 .nl_family = AF_NETLINK, 289 }; 290 struct iovec iov = { 291 .iov_base = nh, 292 .iov_len = nh->nlmsg_len, 293 }; 294 struct msghdr msg = { 295 .msg_name = &sa, 296 .msg_namelen = sizeof(sa), 297 .msg_iov = &iov, 298 .msg_iovlen = 1, 299 }; 300 int send_bytes; 301 302 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 303 nh->nlmsg_seq = sn; 304 send_bytes = sendmsg(nlsk_fd, &msg, 0); 305 if (send_bytes < 0) { 306 rte_errno = errno; 307 return -rte_errno; 308 } 309 return send_bytes; 310 } 311 312 /** 313 * Receive a message from the kernel on the Netlink socket, following 314 * mlx5_nl_send(). 315 * 316 * @param[in] nlsk_fd 317 * The Netlink socket file descriptor used for communication. 318 * @param[in] sn 319 * Sequence number. 320 * @param[in] cb 321 * The callback function to call for each Netlink message received. 322 * @param[in, out] arg 323 * Custom arguments for the callback. 324 * 325 * @return 326 * 0 on success, a negative errno value otherwise and rte_errno is set. 327 */ 328 static int 329 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), 330 void *arg) 331 { 332 struct sockaddr_nl sa; 333 void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY); 334 struct iovec iov = { 335 .iov_base = buf, 336 .iov_len = MLX5_RECV_BUF_SIZE, 337 }; 338 struct msghdr msg = { 339 .msg_name = &sa, 340 .msg_namelen = sizeof(sa), 341 .msg_iov = &iov, 342 /* One message at a time */ 343 .msg_iovlen = 1, 344 }; 345 int multipart = 0; 346 int ret = 0; 347 348 if (!buf) { 349 rte_errno = ENOMEM; 350 return -rte_errno; 351 } 352 do { 353 struct nlmsghdr *nh; 354 int recv_bytes = 0; 355 356 do { 357 recv_bytes = recvmsg(nlsk_fd, &msg, 0); 358 if (recv_bytes == -1) { 359 rte_errno = errno; 360 ret = -rte_errno; 361 goto exit; 362 } 363 nh = (struct nlmsghdr *)buf; 364 } while (nh->nlmsg_seq != sn); 365 for (; 366 NLMSG_OK(nh, (unsigned int)recv_bytes); 367 nh = NLMSG_NEXT(nh, recv_bytes)) { 368 if (nh->nlmsg_type == NLMSG_ERROR) { 369 struct nlmsgerr *err_data = NLMSG_DATA(nh); 370 371 if (err_data->error < 0) { 372 rte_errno = -err_data->error; 373 ret = -rte_errno; 374 goto exit; 375 } 376 /* Ack message. */ 377 ret = 0; 378 goto exit; 379 } 380 /* Multi-part msgs and their trailing DONE message. */ 381 if (nh->nlmsg_flags & NLM_F_MULTI) { 382 if (nh->nlmsg_type == NLMSG_DONE) { 383 ret = 0; 384 goto exit; 385 } 386 multipart = 1; 387 } 388 if (cb) { 389 ret = cb(nh, arg); 390 if (ret < 0) 391 goto exit; 392 } 393 } 394 } while (multipart); 395 exit: 396 mlx5_free(buf); 397 return ret; 398 } 399 400 /** 401 * Parse Netlink message to retrieve the bridge MAC address. 402 * 403 * @param nh 404 * Pointer to Netlink Message Header. 405 * @param arg 406 * PMD data register with this callback. 407 * 408 * @return 409 * 0 on success, a negative errno value otherwise and rte_errno is set. 410 */ 411 static int 412 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) 413 { 414 struct mlx5_nl_mac_addr *data = arg; 415 struct ndmsg *r = NLMSG_DATA(nh); 416 struct rtattr *attribute; 417 int len; 418 419 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); 420 for (attribute = MLX5_NDA_RTA(r); 421 RTA_OK(attribute, len); 422 attribute = RTA_NEXT(attribute, len)) { 423 if (attribute->rta_type == NDA_LLADDR) { 424 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { 425 DRV_LOG(WARNING, 426 "not enough room to finalize the" 427 " request"); 428 rte_errno = ENOMEM; 429 return -rte_errno; 430 } 431 #ifdef RTE_LIBRTE_MLX5_DEBUG 432 char m[RTE_ETHER_ADDR_FMT_SIZE]; 433 434 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, 435 RTA_DATA(attribute)); 436 DRV_LOG(DEBUG, "bridge MAC address %s", m); 437 #endif 438 memcpy(&(*data->mac)[data->mac_n++], 439 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); 440 } 441 } 442 return 0; 443 } 444 445 /** 446 * Get bridge MAC addresses. 447 * 448 * @param[in] nlsk_fd 449 * Netlink socket file descriptor. 450 * @param[in] iface_idx 451 * Net device interface index. 452 * @param mac[out] 453 * Pointer to the array table of MAC addresses to fill. 454 * Its size should be of MLX5_MAX_MAC_ADDRESSES. 455 * @param mac_n[out] 456 * Number of entries filled in MAC array. 457 * 458 * @return 459 * 0 on success, a negative errno value otherwise and rte_errno is set. 460 */ 461 static int 462 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, 463 struct rte_ether_addr (*mac)[], int *mac_n) 464 { 465 struct { 466 struct nlmsghdr hdr; 467 struct ifinfomsg ifm; 468 } req = { 469 .hdr = { 470 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 471 .nlmsg_type = RTM_GETNEIGH, 472 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 473 }, 474 .ifm = { 475 .ifi_family = PF_BRIDGE, 476 .ifi_index = iface_idx, 477 }, 478 }; 479 struct mlx5_nl_mac_addr data = { 480 .mac = mac, 481 .mac_n = 0, 482 }; 483 uint32_t sn = MLX5_NL_SN_GENERATE; 484 int ret; 485 486 if (nlsk_fd == -1) 487 return 0; 488 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, 489 sizeof(struct ifinfomsg)); 490 if (ret < 0) 491 goto error; 492 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); 493 if (ret < 0) 494 goto error; 495 *mac_n = data.mac_n; 496 return 0; 497 error: 498 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", 499 iface_idx, strerror(rte_errno)); 500 return -rte_errno; 501 } 502 503 /** 504 * Modify the MAC address neighbour table with Netlink. 505 * 506 * @param[in] nlsk_fd 507 * Netlink socket file descriptor. 508 * @param[in] iface_idx 509 * Net device interface index. 510 * @param mac 511 * MAC address to consider. 512 * @param add 513 * 1 to add the MAC address, 0 to remove the MAC address. 514 * 515 * @return 516 * 0 on success, a negative errno value otherwise and rte_errno is set. 517 */ 518 static int 519 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 520 struct rte_ether_addr *mac, int add) 521 { 522 struct { 523 struct nlmsghdr hdr; 524 struct ndmsg ndm; 525 struct rtattr rta; 526 uint8_t buffer[RTE_ETHER_ADDR_LEN]; 527 } req = { 528 .hdr = { 529 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), 530 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 531 NLM_F_EXCL | NLM_F_ACK, 532 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, 533 }, 534 .ndm = { 535 .ndm_family = PF_BRIDGE, 536 .ndm_state = NUD_NOARP | NUD_PERMANENT, 537 .ndm_ifindex = iface_idx, 538 .ndm_flags = NTF_SELF, 539 }, 540 .rta = { 541 .rta_type = NDA_LLADDR, 542 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), 543 }, 544 }; 545 uint32_t sn = MLX5_NL_SN_GENERATE; 546 int ret; 547 548 if (nlsk_fd == -1) 549 return 0; 550 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); 551 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 552 RTA_ALIGN(req.rta.rta_len); 553 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 554 if (ret < 0) 555 goto error; 556 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 557 if (ret < 0) 558 goto error; 559 return 0; 560 error: 561 #ifdef RTE_LIBRTE_MLX5_DEBUG 562 { 563 char m[RTE_ETHER_ADDR_FMT_SIZE]; 564 565 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac); 566 DRV_LOG(DEBUG, 567 "Interface %u cannot %s MAC address %s %s", 568 iface_idx, 569 add ? "add" : "remove", m, strerror(rte_errno)); 570 } 571 #endif 572 return -rte_errno; 573 } 574 575 /** 576 * Modify the VF MAC address neighbour table with Netlink. 577 * 578 * @param[in] nlsk_fd 579 * Netlink socket file descriptor. 580 * @param[in] iface_idx 581 * Net device interface index. 582 * @param mac 583 * MAC address to consider. 584 * @param vf_index 585 * VF index. 586 * 587 * @return 588 * 0 on success, a negative errno value otherwise and rte_errno is set. 589 */ 590 int 591 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 592 struct rte_ether_addr *mac, int vf_index) 593 { 594 int ret; 595 struct { 596 struct nlmsghdr hdr; 597 struct ifinfomsg ifm; 598 struct rtattr vf_list_rta; 599 struct rtattr vf_info_rta; 600 struct rtattr vf_mac_rta; 601 struct ifla_vf_mac ivm; 602 } req = { 603 .hdr = { 604 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 605 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 606 .nlmsg_type = RTM_BASE, 607 }, 608 .ifm = { 609 .ifi_index = iface_idx, 610 }, 611 .vf_list_rta = { 612 .rta_type = IFLA_VFINFO_LIST, 613 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 614 }, 615 .vf_info_rta = { 616 .rta_type = IFLA_VF_INFO, 617 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 618 }, 619 .vf_mac_rta = { 620 .rta_type = IFLA_VF_MAC, 621 }, 622 }; 623 struct ifla_vf_mac ivm = { 624 .vf = vf_index, 625 }; 626 uint32_t sn = MLX5_NL_SN_GENERATE; 627 628 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); 629 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); 630 631 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); 632 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 633 RTA_ALIGN(req.vf_list_rta.rta_len) + 634 RTA_ALIGN(req.vf_info_rta.rta_len) + 635 RTA_ALIGN(req.vf_mac_rta.rta_len); 636 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 637 &req.vf_list_rta); 638 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 639 &req.vf_info_rta); 640 641 if (nlsk_fd < 0) 642 return -1; 643 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 644 if (ret < 0) 645 goto error; 646 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 647 if (ret < 0) 648 goto error; 649 return 0; 650 error: 651 DRV_LOG(ERR, 652 "representor %u cannot set VF MAC address " 653 "%02X:%02X:%02X:%02X:%02X:%02X : %s", 654 vf_index, 655 mac->addr_bytes[0], mac->addr_bytes[1], 656 mac->addr_bytes[2], mac->addr_bytes[3], 657 mac->addr_bytes[4], mac->addr_bytes[5], 658 strerror(rte_errno)); 659 return -rte_errno; 660 } 661 662 /** 663 * Add a MAC address. 664 * 665 * @param[in] nlsk_fd 666 * Netlink socket file descriptor. 667 * @param[in] iface_idx 668 * Net device interface index. 669 * @param mac_own 670 * BITFIELD_DECLARE array to store the mac. 671 * @param mac 672 * MAC address to register. 673 * @param index 674 * MAC address index. 675 * 676 * @return 677 * 0 on success, a negative errno value otherwise and rte_errno is set. 678 */ 679 int 680 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, 681 uint64_t *mac_own, struct rte_ether_addr *mac, 682 uint32_t index) 683 { 684 int ret; 685 686 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); 687 if (!ret) { 688 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 689 if (index >= MLX5_MAX_MAC_ADDRESSES) 690 return -EINVAL; 691 692 BITFIELD_SET(mac_own, index); 693 } 694 if (ret == -EEXIST) 695 return 0; 696 return ret; 697 } 698 699 /** 700 * Remove a MAC address. 701 * 702 * @param[in] nlsk_fd 703 * Netlink socket file descriptor. 704 * @param[in] iface_idx 705 * Net device interface index. 706 * @param mac_own 707 * BITFIELD_DECLARE array to store the mac. 708 * @param mac 709 * MAC address to remove. 710 * @param index 711 * MAC address index. 712 * 713 * @return 714 * 0 on success, a negative errno value otherwise and rte_errno is set. 715 */ 716 int 717 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, 718 struct rte_ether_addr *mac, uint32_t index) 719 { 720 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 721 if (index >= MLX5_MAX_MAC_ADDRESSES) 722 return -EINVAL; 723 724 BITFIELD_RESET(mac_own, index); 725 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); 726 } 727 728 /** 729 * Synchronize Netlink bridge table to the internal table. 730 * 731 * @param[in] nlsk_fd 732 * Netlink socket file descriptor. 733 * @param[in] iface_idx 734 * Net device interface index. 735 * @param mac_addrs 736 * Mac addresses array to sync. 737 * @param n 738 * @p mac_addrs array size. 739 */ 740 void 741 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, 742 struct rte_ether_addr *mac_addrs, int n) 743 { 744 struct rte_ether_addr macs[n]; 745 int macs_n = 0; 746 int i; 747 int ret; 748 749 memset(macs, 0, n * sizeof(macs[0])); 750 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); 751 if (ret) 752 return; 753 for (i = 0; i != macs_n; ++i) { 754 int j; 755 756 /* Verify the address is not in the array yet. */ 757 for (j = 0; j != n; ++j) 758 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) 759 break; 760 if (j != n) 761 continue; 762 if (rte_is_multicast_ether_addr(&macs[i])) { 763 /* Find the first entry available. */ 764 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) { 765 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 766 mac_addrs[j] = macs[i]; 767 break; 768 } 769 } 770 } else { 771 /* Find the first entry available. */ 772 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) { 773 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 774 mac_addrs[j] = macs[i]; 775 break; 776 } 777 } 778 } 779 } 780 } 781 782 /** 783 * Flush all added MAC addresses. 784 * 785 * @param[in] nlsk_fd 786 * Netlink socket file descriptor. 787 * @param[in] iface_idx 788 * Net device interface index. 789 * @param[in] mac_addrs 790 * Mac addresses array to flush. 791 * @param n 792 * @p mac_addrs array size. 793 * @param mac_own 794 * BITFIELD_DECLARE array to store the mac. 795 */ 796 void 797 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, 798 struct rte_ether_addr *mac_addrs, int n, 799 uint64_t *mac_own) 800 { 801 int i; 802 803 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES) 804 return; 805 806 for (i = n - 1; i >= 0; --i) { 807 struct rte_ether_addr *m = &mac_addrs[i]; 808 809 if (BITFIELD_ISSET(mac_own, i)) 810 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, 811 i); 812 } 813 } 814 815 /** 816 * Enable promiscuous / all multicast mode through Netlink. 817 * 818 * @param[in] nlsk_fd 819 * Netlink socket file descriptor. 820 * @param[in] iface_idx 821 * Net device interface index. 822 * @param flags 823 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. 824 * @param enable 825 * Nonzero to enable, disable otherwise. 826 * 827 * @return 828 * 0 on success, a negative errno value otherwise and rte_errno is set. 829 */ 830 static int 831 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, 832 int enable) 833 { 834 struct { 835 struct nlmsghdr hdr; 836 struct ifinfomsg ifi; 837 } req = { 838 .hdr = { 839 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 840 .nlmsg_type = RTM_NEWLINK, 841 .nlmsg_flags = NLM_F_REQUEST, 842 }, 843 .ifi = { 844 .ifi_flags = enable ? flags : 0, 845 .ifi_change = flags, 846 .ifi_index = iface_idx, 847 }, 848 }; 849 uint32_t sn = MLX5_NL_SN_GENERATE; 850 int ret; 851 852 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); 853 if (nlsk_fd < 0) 854 return 0; 855 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 856 if (ret < 0) 857 return ret; 858 return 0; 859 } 860 861 /** 862 * Enable promiscuous mode through Netlink. 863 * 864 * @param[in] nlsk_fd 865 * Netlink socket file descriptor. 866 * @param[in] iface_idx 867 * Net device interface index. 868 * @param enable 869 * Nonzero to enable, disable otherwise. 870 * 871 * @return 872 * 0 on success, a negative errno value otherwise and rte_errno is set. 873 */ 874 int 875 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) 876 { 877 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); 878 879 if (ret) 880 DRV_LOG(DEBUG, 881 "Interface %u cannot %s promisc mode: Netlink error %s", 882 iface_idx, enable ? "enable" : "disable", 883 strerror(rte_errno)); 884 return ret; 885 } 886 887 /** 888 * Enable all multicast mode through Netlink. 889 * 890 * @param[in] nlsk_fd 891 * Netlink socket file descriptor. 892 * @param[in] iface_idx 893 * Net device interface index. 894 * @param enable 895 * Nonzero to enable, disable otherwise. 896 * 897 * @return 898 * 0 on success, a negative errno value otherwise and rte_errno is set. 899 */ 900 int 901 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) 902 { 903 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, 904 enable); 905 906 if (ret) 907 DRV_LOG(DEBUG, 908 "Interface %u cannot %s allmulti : Netlink error %s", 909 iface_idx, enable ? "enable" : "disable", 910 strerror(rte_errno)); 911 return ret; 912 } 913 914 /** 915 * Process network interface information from Netlink message. 916 * 917 * @param nh 918 * Pointer to Netlink message header. 919 * @param arg 920 * Opaque data pointer for this callback. 921 * 922 * @return 923 * 0 on success, a negative errno value otherwise and rte_errno is set. 924 */ 925 static int 926 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) 927 { 928 struct mlx5_nl_ifindex_data *data = arg; 929 struct mlx5_nl_ifindex_data local = { 930 .flags = 0, 931 }; 932 size_t off = NLMSG_HDRLEN; 933 934 if (nh->nlmsg_type != 935 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && 936 nh->nlmsg_type != 937 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) 938 goto error; 939 while (off < nh->nlmsg_len) { 940 struct nlattr *na = (void *)((uintptr_t)nh + off); 941 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); 942 943 if (na->nla_len > nh->nlmsg_len - off) 944 goto error; 945 switch (na->nla_type) { 946 case RDMA_NLDEV_ATTR_DEV_INDEX: 947 local.ibindex = *(uint32_t *)payload; 948 local.flags |= MLX5_NL_CMD_GET_IB_INDEX; 949 break; 950 case RDMA_NLDEV_ATTR_DEV_NAME: 951 if (!strcmp(payload, data->name)) 952 local.flags |= MLX5_NL_CMD_GET_IB_NAME; 953 break; 954 case RDMA_NLDEV_ATTR_NDEV_INDEX: 955 local.ifindex = *(uint32_t *)payload; 956 local.flags |= MLX5_NL_CMD_GET_NET_INDEX; 957 break; 958 case RDMA_NLDEV_ATTR_PORT_INDEX: 959 local.portnum = *(uint32_t *)payload; 960 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; 961 break; 962 default: 963 break; 964 } 965 off += NLA_ALIGN(na->nla_len); 966 } 967 /* 968 * It is possible to have multiple messages for all 969 * Infiniband devices in the system with appropriate name. 970 * So we should gather parameters locally and copy to 971 * query context only in case of coinciding device name. 972 */ 973 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { 974 data->flags = local.flags; 975 data->ibindex = local.ibindex; 976 data->ifindex = local.ifindex; 977 data->portnum = local.portnum; 978 } 979 return 0; 980 error: 981 rte_errno = EINVAL; 982 return -rte_errno; 983 } 984 985 /** 986 * Get index of network interface associated with some IB device. 987 * 988 * This is the only somewhat safe method to avoid resorting to heuristics 989 * when faced with port representors. Unfortunately it requires at least 990 * Linux 4.17. 991 * 992 * @param nl 993 * Netlink socket of the RDMA kind (NETLINK_RDMA). 994 * @param[in] name 995 * IB device name. 996 * @param[in] pindex 997 * IB device port index, starting from 1 998 * @return 999 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno 1000 * is set. 1001 */ 1002 unsigned int 1003 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) 1004 { 1005 struct mlx5_nl_ifindex_data data = { 1006 .name = name, 1007 .flags = 0, 1008 .ibindex = 0, /* Determined during first pass. */ 1009 .ifindex = 0, /* Determined during second pass. */ 1010 }; 1011 union { 1012 struct nlmsghdr nh; 1013 uint8_t buf[NLMSG_HDRLEN + 1014 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) + 1015 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; 1016 } req = { 1017 .nh = { 1018 .nlmsg_len = NLMSG_LENGTH(0), 1019 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1020 RDMA_NLDEV_CMD_GET), 1021 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1022 }, 1023 }; 1024 struct nlattr *na; 1025 uint32_t sn = MLX5_NL_SN_GENERATE; 1026 int ret; 1027 1028 ret = mlx5_nl_send(nl, &req.nh, sn); 1029 if (ret < 0) 1030 return 0; 1031 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1032 if (ret < 0) 1033 return 0; 1034 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1035 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) 1036 goto error; 1037 data.flags = 0; 1038 sn = MLX5_NL_SN_GENERATE; 1039 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1040 RDMA_NLDEV_CMD_PORT_GET); 1041 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1042 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); 1043 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); 1044 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex); 1045 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; 1046 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1047 &data.ibindex, sizeof(data.ibindex)); 1048 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); 1049 na->nla_len = NLA_HDRLEN + sizeof(pindex); 1050 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; 1051 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1052 &pindex, sizeof(pindex)); 1053 ret = mlx5_nl_send(nl, &req.nh, sn); 1054 if (ret < 0) 1055 return 0; 1056 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1057 if (ret < 0) 1058 return 0; 1059 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1060 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1061 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || 1062 !data.ifindex) 1063 goto error; 1064 return data.ifindex; 1065 error: 1066 rte_errno = ENODEV; 1067 return 0; 1068 } 1069 1070 /** 1071 * Get the number of physical ports of given IB device. 1072 * 1073 * @param nl 1074 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1075 * @param[in] name 1076 * IB device name. 1077 * 1078 * @return 1079 * A valid (nonzero) number of ports on success, 0 otherwise 1080 * and rte_errno is set. 1081 */ 1082 unsigned int 1083 mlx5_nl_portnum(int nl, const char *name) 1084 { 1085 struct mlx5_nl_ifindex_data data = { 1086 .flags = 0, 1087 .name = name, 1088 .ifindex = 0, 1089 .portnum = 0, 1090 }; 1091 struct nlmsghdr req = { 1092 .nlmsg_len = NLMSG_LENGTH(0), 1093 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1094 RDMA_NLDEV_CMD_GET), 1095 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1096 }; 1097 uint32_t sn = MLX5_NL_SN_GENERATE; 1098 int ret; 1099 1100 ret = mlx5_nl_send(nl, &req, sn); 1101 if (ret < 0) 1102 return 0; 1103 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1104 if (ret < 0) 1105 return 0; 1106 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1107 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1108 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { 1109 rte_errno = ENODEV; 1110 return 0; 1111 } 1112 if (!data.portnum) 1113 rte_errno = EINVAL; 1114 return data.portnum; 1115 } 1116 1117 /** 1118 * Analyze gathered port parameters via Netlink to recognize master 1119 * and representor devices for E-Switch configuration. 1120 * 1121 * @param[in] num_vf_set 1122 * flag of presence of number of VFs port attribute. 1123 * @param[inout] switch_info 1124 * Port information, including port name as a number and port name 1125 * type if recognized 1126 * 1127 * @return 1128 * master and representor flags are set in switch_info according to 1129 * recognized parameters (if any). 1130 */ 1131 static void 1132 mlx5_nl_check_switch_info(bool num_vf_set, 1133 struct mlx5_switch_info *switch_info) 1134 { 1135 switch (switch_info->name_type) { 1136 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1137 /* 1138 * Name is not recognized, assume the master, 1139 * check the number of VFs key presence. 1140 */ 1141 switch_info->master = num_vf_set; 1142 break; 1143 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1144 /* 1145 * Name is not set, this assumes the legacy naming 1146 * schema for master, just check if there is a 1147 * number of VFs key. 1148 */ 1149 switch_info->master = num_vf_set; 1150 break; 1151 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1152 /* New uplink naming schema recognized. */ 1153 switch_info->master = 1; 1154 break; 1155 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1156 /* Legacy representors naming schema. */ 1157 switch_info->representor = !num_vf_set; 1158 break; 1159 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1160 /* Fallthrough */ 1161 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1162 /* Fallthrough */ 1163 case MLX5_PHYS_PORT_NAME_TYPE_PFSF: 1164 /* New representors naming schema. */ 1165 switch_info->representor = 1; 1166 break; 1167 } 1168 } 1169 1170 /** 1171 * Process switch information from Netlink message. 1172 * 1173 * @param nh 1174 * Pointer to Netlink message header. 1175 * @param arg 1176 * Opaque data pointer for this callback. 1177 * 1178 * @return 1179 * 0 on success, a negative errno value otherwise and rte_errno is set. 1180 */ 1181 static int 1182 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) 1183 { 1184 struct mlx5_switch_info info = { 1185 .master = 0, 1186 .representor = 0, 1187 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1188 .port_name = 0, 1189 .switch_id = 0, 1190 }; 1191 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 1192 bool switch_id_set = false; 1193 bool num_vf_set = false; 1194 1195 if (nh->nlmsg_type != RTM_NEWLINK) 1196 goto error; 1197 while (off < nh->nlmsg_len) { 1198 struct rtattr *ra = (void *)((uintptr_t)nh + off); 1199 void *payload = RTA_DATA(ra); 1200 unsigned int i; 1201 1202 if (ra->rta_len > nh->nlmsg_len - off) 1203 goto error; 1204 switch (ra->rta_type) { 1205 case IFLA_NUM_VF: 1206 num_vf_set = true; 1207 break; 1208 case IFLA_PHYS_PORT_NAME: 1209 mlx5_translate_port_name((char *)payload, &info); 1210 break; 1211 case IFLA_PHYS_SWITCH_ID: 1212 info.switch_id = 0; 1213 for (i = 0; i < RTA_PAYLOAD(ra); ++i) { 1214 info.switch_id <<= 8; 1215 info.switch_id |= ((uint8_t *)payload)[i]; 1216 } 1217 switch_id_set = true; 1218 break; 1219 } 1220 off += RTA_ALIGN(ra->rta_len); 1221 } 1222 if (switch_id_set) { 1223 /* We have some E-Switch configuration. */ 1224 mlx5_nl_check_switch_info(num_vf_set, &info); 1225 } 1226 MLX5_ASSERT(!(info.master && info.representor)); 1227 memcpy(arg, &info, sizeof(info)); 1228 return 0; 1229 error: 1230 rte_errno = EINVAL; 1231 return -rte_errno; 1232 } 1233 1234 /** 1235 * Get switch information associated with network interface. 1236 * 1237 * @param nl 1238 * Netlink socket of the ROUTE kind (NETLINK_ROUTE). 1239 * @param ifindex 1240 * Network interface index. 1241 * @param[out] info 1242 * Switch information object, populated in case of success. 1243 * 1244 * @return 1245 * 0 on success, a negative errno value otherwise and rte_errno is set. 1246 */ 1247 int 1248 mlx5_nl_switch_info(int nl, unsigned int ifindex, 1249 struct mlx5_switch_info *info) 1250 { 1251 struct { 1252 struct nlmsghdr nh; 1253 struct ifinfomsg info; 1254 struct rtattr rta; 1255 uint32_t extmask; 1256 } req = { 1257 .nh = { 1258 .nlmsg_len = NLMSG_LENGTH 1259 (sizeof(req.info) + 1260 RTA_LENGTH(sizeof(uint32_t))), 1261 .nlmsg_type = RTM_GETLINK, 1262 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1263 }, 1264 .info = { 1265 .ifi_family = AF_UNSPEC, 1266 .ifi_index = ifindex, 1267 }, 1268 .rta = { 1269 .rta_type = IFLA_EXT_MASK, 1270 .rta_len = RTA_LENGTH(sizeof(int32_t)), 1271 }, 1272 .extmask = RTE_LE32(1), 1273 }; 1274 uint32_t sn = MLX5_NL_SN_GENERATE; 1275 int ret; 1276 1277 ret = mlx5_nl_send(nl, &req.nh, sn); 1278 if (ret >= 0) 1279 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); 1280 if (info->master && info->representor) { 1281 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1282 " and as representor", ifindex); 1283 rte_errno = ENODEV; 1284 ret = -rte_errno; 1285 } 1286 return ret; 1287 } 1288 1289 /* 1290 * Delete VLAN network device by ifindex. 1291 * 1292 * @param[in] tcf 1293 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1294 * @param[in] ifindex 1295 * Interface index of network device to delete. 1296 */ 1297 void 1298 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, 1299 uint32_t ifindex) 1300 { 1301 uint32_t sn = MLX5_NL_SN_GENERATE; 1302 int ret; 1303 struct { 1304 struct nlmsghdr nh; 1305 struct ifinfomsg info; 1306 } req = { 1307 .nh = { 1308 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 1309 .nlmsg_type = RTM_DELLINK, 1310 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1311 }, 1312 .info = { 1313 .ifi_family = AF_UNSPEC, 1314 .ifi_index = ifindex, 1315 }, 1316 }; 1317 1318 if (ifindex) { 1319 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); 1320 if (ret >= 0) 1321 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1322 if (ret < 0) 1323 DRV_LOG(WARNING, "netlink: error deleting VLAN WA" 1324 " ifindex %u, %d", ifindex, ret); 1325 } 1326 } 1327 1328 /* Set of subroutines to build Netlink message. */ 1329 static struct nlattr * 1330 nl_msg_tail(struct nlmsghdr *nlh) 1331 { 1332 return (struct nlattr *) 1333 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); 1334 } 1335 1336 static void 1337 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) 1338 { 1339 struct nlattr *nla = nl_msg_tail(nlh); 1340 1341 nla->nla_type = type; 1342 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen; 1343 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len); 1344 1345 if (alen) 1346 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); 1347 } 1348 1349 static struct nlattr * 1350 nl_attr_nest_start(struct nlmsghdr *nlh, int type) 1351 { 1352 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); 1353 1354 nl_attr_put(nlh, type, NULL, 0); 1355 return nest; 1356 } 1357 1358 static void 1359 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) 1360 { 1361 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; 1362 } 1363 1364 /* 1365 * Create network VLAN device with specified VLAN tag. 1366 * 1367 * @param[in] tcf 1368 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1369 * @param[in] ifindex 1370 * Base network interface index. 1371 * @param[in] tag 1372 * VLAN tag for VLAN network device to create. 1373 */ 1374 uint32_t 1375 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, 1376 uint32_t ifindex, uint16_t tag) 1377 { 1378 struct nlmsghdr *nlh; 1379 struct ifinfomsg *ifm; 1380 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; 1381 1382 __rte_cache_aligned 1383 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1384 NLMSG_ALIGN(sizeof(struct ifinfomsg)) + 1385 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + 1386 NLMSG_ALIGN(sizeof(uint32_t)) + 1387 NLMSG_ALIGN(sizeof(name)) + 1388 NLMSG_ALIGN(sizeof("vlan")) + 1389 NLMSG_ALIGN(sizeof(uint32_t)) + 1390 NLMSG_ALIGN(sizeof(uint16_t)) + 16]; 1391 struct nlattr *na_info; 1392 struct nlattr *na_vlan; 1393 uint32_t sn = MLX5_NL_SN_GENERATE; 1394 int ret; 1395 1396 memset(buf, 0, sizeof(buf)); 1397 nlh = (struct nlmsghdr *)buf; 1398 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1399 nlh->nlmsg_type = RTM_NEWLINK; 1400 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 1401 NLM_F_EXCL | NLM_F_ACK; 1402 ifm = (struct ifinfomsg *)nl_msg_tail(nlh); 1403 nlh->nlmsg_len += sizeof(struct ifinfomsg); 1404 ifm->ifi_family = AF_UNSPEC; 1405 ifm->ifi_type = 0; 1406 ifm->ifi_index = 0; 1407 ifm->ifi_flags = IFF_UP; 1408 ifm->ifi_change = 0xffffffff; 1409 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); 1410 ret = snprintf(name, sizeof(name), "%s.%u.%u", 1411 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); 1412 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); 1413 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); 1414 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); 1415 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); 1416 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); 1417 nl_attr_nest_end(nlh, na_vlan); 1418 nl_attr_nest_end(nlh, na_info); 1419 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len); 1420 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); 1421 if (ret >= 0) 1422 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1423 if (ret < 0) { 1424 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, 1425 ret); 1426 } 1427 /* Try to get ifindex of created or pre-existing device. */ 1428 ret = if_nametoindex(name); 1429 if (!ret) { 1430 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, 1431 errno); 1432 return 0; 1433 } 1434 return ret; 1435 } 1436 1437 /** 1438 * Parse Netlink message to retrieve the general family ID. 1439 * 1440 * @param nh 1441 * Pointer to Netlink Message Header. 1442 * @param arg 1443 * PMD data register with this callback. 1444 * 1445 * @return 1446 * 0 on success, a negative errno value otherwise and rte_errno is set. 1447 */ 1448 static int 1449 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg) 1450 { 1451 1452 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1453 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1454 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1455 1456 for (; nla->nla_len && nla < tail; 1457 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) { 1458 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) { 1459 *(uint16_t *)arg = *(uint16_t *)(nla + 1); 1460 return 0; 1461 } 1462 } 1463 return -EINVAL; 1464 } 1465 1466 #define MLX5_NL_MAX_ATTR_SIZE 100 1467 /** 1468 * Get generic netlink family ID. 1469 * 1470 * @param[in] nlsk_fd 1471 * Netlink socket file descriptor. 1472 * @param[in] name 1473 * The family name. 1474 * 1475 * @return 1476 * ID >= 0 on success and @p enable is updated, a negative errno value 1477 * otherwise and rte_errno is set. 1478 */ 1479 static int 1480 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name) 1481 { 1482 struct nlmsghdr *nlh; 1483 struct genlmsghdr *genl; 1484 uint32_t sn = MLX5_NL_SN_GENERATE; 1485 int name_size = strlen(name) + 1; 1486 int ret; 1487 uint16_t id = -1; 1488 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1489 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1490 NLMSG_ALIGN(sizeof(struct nlattr)) + 1491 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)]; 1492 1493 memset(buf, 0, sizeof(buf)); 1494 nlh = (struct nlmsghdr *)buf; 1495 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1496 nlh->nlmsg_type = GENL_ID_CTRL; 1497 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1498 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1499 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1500 genl->cmd = CTRL_CMD_GETFAMILY; 1501 genl->version = 1; 1502 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size); 1503 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1504 if (ret >= 0) 1505 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id); 1506 if (ret < 0) { 1507 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name, 1508 ret); 1509 return ret; 1510 } 1511 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id); 1512 return (int)id; 1513 } 1514 1515 /** 1516 * Get Devlink family ID. 1517 * 1518 * @param[in] nlsk_fd 1519 * Netlink socket file descriptor. 1520 * 1521 * @return 1522 * ID >= 0 on success and @p enable is updated, a negative errno value 1523 * otherwise and rte_errno is set. 1524 */ 1525 1526 int 1527 mlx5_nl_devlink_family_id_get(int nlsk_fd) 1528 { 1529 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME); 1530 } 1531 1532 /** 1533 * Parse Netlink message to retrieve the ROCE enable status. 1534 * 1535 * @param nh 1536 * Pointer to Netlink Message Header. 1537 * @param arg 1538 * PMD data register with this callback. 1539 * 1540 * @return 1541 * 0 on success, a negative errno value otherwise and rte_errno is set. 1542 */ 1543 static int 1544 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) 1545 { 1546 1547 int ret = -EINVAL; 1548 int *enable = arg; 1549 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1550 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1551 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1552 1553 while (nla->nla_len && nla < tail) { 1554 switch (nla->nla_type) { 1555 /* Expected nested attributes case. */ 1556 case DEVLINK_ATTR_PARAM: 1557 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1558 case DEVLINK_ATTR_PARAM_VALUE: 1559 ret = 0; 1560 nla += 1; 1561 break; 1562 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1563 *enable = 1; 1564 return 0; 1565 default: 1566 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1567 } 1568 } 1569 *enable = 0; 1570 return ret; 1571 } 1572 1573 /** 1574 * Get ROCE enable status through Netlink. 1575 * 1576 * @param[in] nlsk_fd 1577 * Netlink socket file descriptor. 1578 * @param[in] family_id 1579 * the Devlink family ID. 1580 * @param pci_addr 1581 * The device PCI address. 1582 * @param[out] enable 1583 * Where to store the enable status. 1584 * 1585 * @return 1586 * 0 on success and @p enable is updated, a negative errno value otherwise 1587 * and rte_errno is set. 1588 */ 1589 int 1590 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, 1591 int *enable) 1592 { 1593 struct nlmsghdr *nlh; 1594 struct genlmsghdr *genl; 1595 uint32_t sn = MLX5_NL_SN_GENERATE; 1596 int ret; 1597 int cur_en = 0; 1598 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1599 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1600 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 1601 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 1602 1603 memset(buf, 0, sizeof(buf)); 1604 nlh = (struct nlmsghdr *)buf; 1605 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1606 nlh->nlmsg_type = family_id; 1607 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1608 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1609 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1610 genl->cmd = DEVLINK_CMD_PARAM_GET; 1611 genl->version = DEVLINK_GENL_VERSION; 1612 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1613 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1614 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1615 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1616 if (ret >= 0) 1617 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); 1618 if (ret < 0) { 1619 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", 1620 pci_addr, ret); 1621 return ret; 1622 } 1623 *enable = cur_en; 1624 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", 1625 cur_en ? "en" : "dis", pci_addr); 1626 return ret; 1627 } 1628 1629 /** 1630 * Reload mlx5 device kernel driver through Netlink. 1631 * 1632 * @param[in] nlsk_fd 1633 * Netlink socket file descriptor. 1634 * @param[in] family_id 1635 * the Devlink family ID. 1636 * @param pci_addr 1637 * The device PCI address. 1638 * @param[out] enable 1639 * The enable status to set. 1640 * 1641 * @return 1642 * 0 on success, a negative errno value otherwise and rte_errno is set. 1643 */ 1644 int 1645 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) 1646 { 1647 struct nlmsghdr *nlh; 1648 struct genlmsghdr *genl; 1649 uint32_t sn = MLX5_NL_SN_GENERATE; 1650 int ret; 1651 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1652 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1653 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 + 1654 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2]; 1655 1656 memset(buf, 0, sizeof(buf)); 1657 nlh = (struct nlmsghdr *)buf; 1658 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1659 nlh->nlmsg_type = family_id; 1660 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1661 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1662 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1663 genl->cmd = DEVLINK_CMD_RELOAD; 1664 genl->version = DEVLINK_GENL_VERSION; 1665 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1666 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1667 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1668 if (ret >= 0) 1669 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1670 if (ret < 0) { 1671 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", 1672 pci_addr, ret); 1673 return ret; 1674 } 1675 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", 1676 pci_addr); 1677 return 0; 1678 } 1679 1680 /** 1681 * Set ROCE enable status through Netlink. 1682 * 1683 * @param[in] nlsk_fd 1684 * Netlink socket file descriptor. 1685 * @param[in] family_id 1686 * the Devlink family ID. 1687 * @param pci_addr 1688 * The device PCI address. 1689 * @param[out] enable 1690 * The enable status to set. 1691 * 1692 * @return 1693 * 0 on success, a negative errno value otherwise and rte_errno is set. 1694 */ 1695 int 1696 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, 1697 int enable) 1698 { 1699 struct nlmsghdr *nlh; 1700 struct genlmsghdr *genl; 1701 uint32_t sn = MLX5_NL_SN_GENERATE; 1702 int ret; 1703 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1704 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1705 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 + 1706 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; 1707 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; 1708 uint8_t ptype = NLA_FLAG; 1709 ; 1710 1711 memset(buf, 0, sizeof(buf)); 1712 nlh = (struct nlmsghdr *)buf; 1713 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1714 nlh->nlmsg_type = family_id; 1715 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1716 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1717 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1718 genl->cmd = DEVLINK_CMD_PARAM_SET; 1719 genl->version = DEVLINK_GENL_VERSION; 1720 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1721 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1722 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1723 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); 1724 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); 1725 if (enable) 1726 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0); 1727 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1728 if (ret >= 0) 1729 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1730 if (ret < 0) { 1731 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" 1732 " %d.", enable ? "en" : "dis", pci_addr, ret); 1733 return ret; 1734 } 1735 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", 1736 pci_addr, enable ? "en" : "dis"); 1737 /* Now, need to reload the driver. */ 1738 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); 1739 } 1740