1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 6WIND S.A. 3 * Copyright 2018 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <linux/if_link.h> 8 #include <linux/rtnetlink.h> 9 #include <linux/genetlink.h> 10 #include <net/if.h> 11 #include <rdma/rdma_netlink.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <stdlib.h> 15 #include <stdalign.h> 16 #include <string.h> 17 #include <sys/socket.h> 18 #include <unistd.h> 19 20 #include <rte_errno.h> 21 22 #include "mlx5_nl.h" 23 #include "mlx5_common_utils.h" 24 #include "mlx5_malloc.h" 25 #ifdef HAVE_DEVLINK 26 #include <linux/devlink.h> 27 #endif 28 29 30 /* Size of the buffer to receive kernel messages */ 31 #define MLX5_NL_BUF_SIZE (32 * 1024) 32 /* Send buffer size for the Netlink socket */ 33 #define MLX5_SEND_BUF_SIZE 32768 34 /* Receive buffer size for the Netlink socket */ 35 #define MLX5_RECV_BUF_SIZE 32768 36 37 /** Parameters of VLAN devices created by driver. */ 38 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" 39 /* 40 * Define NDA_RTA as defined in iproute2 sources. 41 * 42 * see in iproute2 sources file include/libnetlink.h 43 */ 44 #ifndef MLX5_NDA_RTA 45 #define MLX5_NDA_RTA(r) \ 46 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 47 #endif 48 /* 49 * Define NLMSG_TAIL as defined in iproute2 sources. 50 * 51 * see in iproute2 sources file include/libnetlink.h 52 */ 53 #ifndef NLMSG_TAIL 54 #define NLMSG_TAIL(nmsg) \ 55 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) 56 #endif 57 /* 58 * The following definitions are normally found in rdma/rdma_netlink.h, 59 * however they are so recent that most systems do not expose them yet. 60 */ 61 #ifndef HAVE_RDMA_NL_NLDEV 62 #define RDMA_NL_NLDEV 5 63 #endif 64 #ifndef HAVE_RDMA_NLDEV_CMD_GET 65 #define RDMA_NLDEV_CMD_GET 1 66 #endif 67 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET 68 #define RDMA_NLDEV_CMD_PORT_GET 5 69 #endif 70 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX 71 #define RDMA_NLDEV_ATTR_DEV_INDEX 1 72 #endif 73 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME 74 #define RDMA_NLDEV_ATTR_DEV_NAME 2 75 #endif 76 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX 77 #define RDMA_NLDEV_ATTR_PORT_INDEX 3 78 #endif 79 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX 80 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50 81 #endif 82 83 /* These are normally found in linux/if_link.h. */ 84 #ifndef HAVE_IFLA_NUM_VF 85 #define IFLA_NUM_VF 21 86 #endif 87 #ifndef HAVE_IFLA_EXT_MASK 88 #define IFLA_EXT_MASK 29 89 #endif 90 #ifndef HAVE_IFLA_PHYS_SWITCH_ID 91 #define IFLA_PHYS_SWITCH_ID 36 92 #endif 93 #ifndef HAVE_IFLA_PHYS_PORT_NAME 94 #define IFLA_PHYS_PORT_NAME 38 95 #endif 96 97 /* 98 * Some Devlink defines may be missed in old kernel versions, 99 * adjust used defines. 100 */ 101 #ifndef DEVLINK_GENL_NAME 102 #define DEVLINK_GENL_NAME "devlink" 103 #endif 104 #ifndef DEVLINK_GENL_VERSION 105 #define DEVLINK_GENL_VERSION 1 106 #endif 107 #ifndef DEVLINK_ATTR_BUS_NAME 108 #define DEVLINK_ATTR_BUS_NAME 1 109 #endif 110 #ifndef DEVLINK_ATTR_DEV_NAME 111 #define DEVLINK_ATTR_DEV_NAME 2 112 #endif 113 #ifndef DEVLINK_ATTR_PARAM 114 #define DEVLINK_ATTR_PARAM 80 115 #endif 116 #ifndef DEVLINK_ATTR_PARAM_NAME 117 #define DEVLINK_ATTR_PARAM_NAME 81 118 #endif 119 #ifndef DEVLINK_ATTR_PARAM_TYPE 120 #define DEVLINK_ATTR_PARAM_TYPE 83 121 #endif 122 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST 123 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84 124 #endif 125 #ifndef DEVLINK_ATTR_PARAM_VALUE 126 #define DEVLINK_ATTR_PARAM_VALUE 85 127 #endif 128 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA 129 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86 130 #endif 131 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE 132 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87 133 #endif 134 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT 135 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1 136 #endif 137 #ifndef DEVLINK_CMD_RELOAD 138 #define DEVLINK_CMD_RELOAD 37 139 #endif 140 #ifndef DEVLINK_CMD_PARAM_GET 141 #define DEVLINK_CMD_PARAM_GET 38 142 #endif 143 #ifndef DEVLINK_CMD_PARAM_SET 144 #define DEVLINK_CMD_PARAM_SET 39 145 #endif 146 #ifndef NLA_FLAG 147 #define NLA_FLAG 6 148 #endif 149 150 /* Add/remove MAC address through Netlink */ 151 struct mlx5_nl_mac_addr { 152 struct rte_ether_addr (*mac)[]; 153 /**< MAC address handled by the device. */ 154 int mac_n; /**< Number of addresses in the array. */ 155 }; 156 157 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0) 158 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) 159 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) 160 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) 161 162 /** Data structure used by mlx5_nl_cmdget_cb(). */ 163 struct mlx5_nl_ifindex_data { 164 const char *name; /**< IB device name (in). */ 165 uint32_t flags; /**< found attribute flags (out). */ 166 uint32_t ibindex; /**< IB device index (out). */ 167 uint32_t ifindex; /**< Network interface index (out). */ 168 uint32_t portnum; /**< IB device max port number (out). */ 169 }; 170 171 uint32_t atomic_sn; 172 173 /* Generate Netlink sequence number. */ 174 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED) 175 176 /** 177 * Opens a Netlink socket. 178 * 179 * @param protocol 180 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). 181 * 182 * @return 183 * A file descriptor on success, a negative errno value otherwise and 184 * rte_errno is set. 185 */ 186 int 187 mlx5_nl_init(int protocol) 188 { 189 int fd; 190 int sndbuf_size = MLX5_SEND_BUF_SIZE; 191 int rcvbuf_size = MLX5_RECV_BUF_SIZE; 192 struct sockaddr_nl local = { 193 .nl_family = AF_NETLINK, 194 }; 195 int ret; 196 197 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); 198 if (fd == -1) { 199 rte_errno = errno; 200 return -rte_errno; 201 } 202 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); 203 if (ret == -1) { 204 rte_errno = errno; 205 goto error; 206 } 207 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); 208 if (ret == -1) { 209 rte_errno = errno; 210 goto error; 211 } 212 ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); 213 if (ret == -1) { 214 rte_errno = errno; 215 goto error; 216 } 217 return fd; 218 error: 219 close(fd); 220 return -rte_errno; 221 } 222 223 /** 224 * Send a request message to the kernel on the Netlink socket. 225 * 226 * @param[in] nlsk_fd 227 * Netlink socket file descriptor. 228 * @param[in] nh 229 * The Netlink message send to the kernel. 230 * @param[in] ssn 231 * Sequence number. 232 * @param[in] req 233 * Pointer to the request structure. 234 * @param[in] len 235 * Length of the request in bytes. 236 * 237 * @return 238 * The number of sent bytes on success, a negative errno value otherwise and 239 * rte_errno is set. 240 */ 241 static int 242 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, 243 int len) 244 { 245 struct sockaddr_nl sa = { 246 .nl_family = AF_NETLINK, 247 }; 248 struct iovec iov[2] = { 249 { .iov_base = nh, .iov_len = sizeof(*nh), }, 250 { .iov_base = req, .iov_len = len, }, 251 }; 252 struct msghdr msg = { 253 .msg_name = &sa, 254 .msg_namelen = sizeof(sa), 255 .msg_iov = iov, 256 .msg_iovlen = 2, 257 }; 258 int send_bytes; 259 260 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 261 nh->nlmsg_seq = sn; 262 send_bytes = sendmsg(nlsk_fd, &msg, 0); 263 if (send_bytes < 0) { 264 rte_errno = errno; 265 return -rte_errno; 266 } 267 return send_bytes; 268 } 269 270 /** 271 * Send a message to the kernel on the Netlink socket. 272 * 273 * @param[in] nlsk_fd 274 * The Netlink socket file descriptor used for communication. 275 * @param[in] nh 276 * The Netlink message send to the kernel. 277 * @param[in] sn 278 * Sequence number. 279 * 280 * @return 281 * The number of sent bytes on success, a negative errno value otherwise and 282 * rte_errno is set. 283 */ 284 static int 285 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) 286 { 287 struct sockaddr_nl sa = { 288 .nl_family = AF_NETLINK, 289 }; 290 struct iovec iov = { 291 .iov_base = nh, 292 .iov_len = nh->nlmsg_len, 293 }; 294 struct msghdr msg = { 295 .msg_name = &sa, 296 .msg_namelen = sizeof(sa), 297 .msg_iov = &iov, 298 .msg_iovlen = 1, 299 }; 300 int send_bytes; 301 302 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 303 nh->nlmsg_seq = sn; 304 send_bytes = sendmsg(nlsk_fd, &msg, 0); 305 if (send_bytes < 0) { 306 rte_errno = errno; 307 return -rte_errno; 308 } 309 return send_bytes; 310 } 311 312 /** 313 * Receive a message from the kernel on the Netlink socket, following 314 * mlx5_nl_send(). 315 * 316 * @param[in] nlsk_fd 317 * The Netlink socket file descriptor used for communication. 318 * @param[in] sn 319 * Sequence number. 320 * @param[in] cb 321 * The callback function to call for each Netlink message received. 322 * @param[in, out] arg 323 * Custom arguments for the callback. 324 * 325 * @return 326 * 0 on success, a negative errno value otherwise and rte_errno is set. 327 */ 328 static int 329 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), 330 void *arg) 331 { 332 struct sockaddr_nl sa; 333 void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY); 334 struct iovec iov = { 335 .iov_base = buf, 336 .iov_len = MLX5_RECV_BUF_SIZE, 337 }; 338 struct msghdr msg = { 339 .msg_name = &sa, 340 .msg_namelen = sizeof(sa), 341 .msg_iov = &iov, 342 /* One message at a time */ 343 .msg_iovlen = 1, 344 }; 345 int multipart = 0; 346 int ret = 0; 347 348 if (!buf) { 349 rte_errno = ENOMEM; 350 return -rte_errno; 351 } 352 do { 353 struct nlmsghdr *nh; 354 int recv_bytes = 0; 355 356 do { 357 recv_bytes = recvmsg(nlsk_fd, &msg, 0); 358 if (recv_bytes == -1) { 359 rte_errno = errno; 360 ret = -rte_errno; 361 goto exit; 362 } 363 nh = (struct nlmsghdr *)buf; 364 } while (nh->nlmsg_seq != sn); 365 for (; 366 NLMSG_OK(nh, (unsigned int)recv_bytes); 367 nh = NLMSG_NEXT(nh, recv_bytes)) { 368 if (nh->nlmsg_type == NLMSG_ERROR) { 369 struct nlmsgerr *err_data = NLMSG_DATA(nh); 370 371 if (err_data->error < 0) { 372 rte_errno = -err_data->error; 373 ret = -rte_errno; 374 goto exit; 375 } 376 /* Ack message. */ 377 ret = 0; 378 goto exit; 379 } 380 /* Multi-part msgs and their trailing DONE message. */ 381 if (nh->nlmsg_flags & NLM_F_MULTI) { 382 if (nh->nlmsg_type == NLMSG_DONE) { 383 ret = 0; 384 goto exit; 385 } 386 multipart = 1; 387 } 388 if (cb) { 389 ret = cb(nh, arg); 390 if (ret < 0) 391 goto exit; 392 } 393 } 394 } while (multipart); 395 exit: 396 mlx5_free(buf); 397 return ret; 398 } 399 400 /** 401 * Parse Netlink message to retrieve the bridge MAC address. 402 * 403 * @param nh 404 * Pointer to Netlink Message Header. 405 * @param arg 406 * PMD data register with this callback. 407 * 408 * @return 409 * 0 on success, a negative errno value otherwise and rte_errno is set. 410 */ 411 static int 412 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) 413 { 414 struct mlx5_nl_mac_addr *data = arg; 415 struct ndmsg *r = NLMSG_DATA(nh); 416 struct rtattr *attribute; 417 int len; 418 419 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); 420 for (attribute = MLX5_NDA_RTA(r); 421 RTA_OK(attribute, len); 422 attribute = RTA_NEXT(attribute, len)) { 423 if (attribute->rta_type == NDA_LLADDR) { 424 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { 425 DRV_LOG(WARNING, 426 "not enough room to finalize the" 427 " request"); 428 rte_errno = ENOMEM; 429 return -rte_errno; 430 } 431 #ifdef RTE_LIBRTE_MLX5_DEBUG 432 char m[RTE_ETHER_ADDR_FMT_SIZE]; 433 434 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, 435 RTA_DATA(attribute)); 436 DRV_LOG(DEBUG, "bridge MAC address %s", m); 437 #endif 438 memcpy(&(*data->mac)[data->mac_n++], 439 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); 440 } 441 } 442 return 0; 443 } 444 445 /** 446 * Get bridge MAC addresses. 447 * 448 * @param[in] nlsk_fd 449 * Netlink socket file descriptor. 450 * @param[in] iface_idx 451 * Net device interface index. 452 * @param mac[out] 453 * Pointer to the array table of MAC addresses to fill. 454 * Its size should be of MLX5_MAX_MAC_ADDRESSES. 455 * @param mac_n[out] 456 * Number of entries filled in MAC array. 457 * 458 * @return 459 * 0 on success, a negative errno value otherwise and rte_errno is set. 460 */ 461 static int 462 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, 463 struct rte_ether_addr (*mac)[], int *mac_n) 464 { 465 struct { 466 struct nlmsghdr hdr; 467 struct ifinfomsg ifm; 468 } req = { 469 .hdr = { 470 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 471 .nlmsg_type = RTM_GETNEIGH, 472 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 473 }, 474 .ifm = { 475 .ifi_family = PF_BRIDGE, 476 .ifi_index = iface_idx, 477 }, 478 }; 479 struct mlx5_nl_mac_addr data = { 480 .mac = mac, 481 .mac_n = 0, 482 }; 483 uint32_t sn = MLX5_NL_SN_GENERATE; 484 int ret; 485 486 if (nlsk_fd == -1) 487 return 0; 488 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, 489 sizeof(struct ifinfomsg)); 490 if (ret < 0) 491 goto error; 492 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); 493 if (ret < 0) 494 goto error; 495 *mac_n = data.mac_n; 496 return 0; 497 error: 498 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", 499 iface_idx, strerror(rte_errno)); 500 return -rte_errno; 501 } 502 503 /** 504 * Modify the MAC address neighbour table with Netlink. 505 * 506 * @param[in] nlsk_fd 507 * Netlink socket file descriptor. 508 * @param[in] iface_idx 509 * Net device interface index. 510 * @param mac 511 * MAC address to consider. 512 * @param add 513 * 1 to add the MAC address, 0 to remove the MAC address. 514 * 515 * @return 516 * 0 on success, a negative errno value otherwise and rte_errno is set. 517 */ 518 static int 519 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 520 struct rte_ether_addr *mac, int add) 521 { 522 struct { 523 struct nlmsghdr hdr; 524 struct ndmsg ndm; 525 struct rtattr rta; 526 uint8_t buffer[RTE_ETHER_ADDR_LEN]; 527 } req = { 528 .hdr = { 529 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), 530 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 531 NLM_F_EXCL | NLM_F_ACK, 532 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, 533 }, 534 .ndm = { 535 .ndm_family = PF_BRIDGE, 536 .ndm_state = NUD_NOARP | NUD_PERMANENT, 537 .ndm_ifindex = iface_idx, 538 .ndm_flags = NTF_SELF, 539 }, 540 .rta = { 541 .rta_type = NDA_LLADDR, 542 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), 543 }, 544 }; 545 uint32_t sn = MLX5_NL_SN_GENERATE; 546 int ret; 547 548 if (nlsk_fd == -1) 549 return 0; 550 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); 551 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 552 RTA_ALIGN(req.rta.rta_len); 553 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 554 if (ret < 0) 555 goto error; 556 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 557 if (ret < 0) 558 goto error; 559 return 0; 560 error: 561 #ifdef RTE_LIBRTE_MLX5_DEBUG 562 { 563 char m[RTE_ETHER_ADDR_FMT_SIZE]; 564 565 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac); 566 DRV_LOG(DEBUG, 567 "Interface %u cannot %s MAC address %s %s", 568 iface_idx, 569 add ? "add" : "remove", m, strerror(rte_errno)); 570 } 571 #endif 572 return -rte_errno; 573 } 574 575 /** 576 * Modify the VF MAC address neighbour table with Netlink. 577 * 578 * @param[in] nlsk_fd 579 * Netlink socket file descriptor. 580 * @param[in] iface_idx 581 * Net device interface index. 582 * @param mac 583 * MAC address to consider. 584 * @param vf_index 585 * VF index. 586 * 587 * @return 588 * 0 on success, a negative errno value otherwise and rte_errno is set. 589 */ 590 int 591 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 592 struct rte_ether_addr *mac, int vf_index) 593 { 594 int ret; 595 struct { 596 struct nlmsghdr hdr; 597 struct ifinfomsg ifm; 598 struct rtattr vf_list_rta; 599 struct rtattr vf_info_rta; 600 struct rtattr vf_mac_rta; 601 struct ifla_vf_mac ivm; 602 } req = { 603 .hdr = { 604 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 605 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 606 .nlmsg_type = RTM_BASE, 607 }, 608 .ifm = { 609 .ifi_index = iface_idx, 610 }, 611 .vf_list_rta = { 612 .rta_type = IFLA_VFINFO_LIST, 613 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 614 }, 615 .vf_info_rta = { 616 .rta_type = IFLA_VF_INFO, 617 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 618 }, 619 .vf_mac_rta = { 620 .rta_type = IFLA_VF_MAC, 621 }, 622 }; 623 struct ifla_vf_mac ivm = { 624 .vf = vf_index, 625 }; 626 uint32_t sn = MLX5_NL_SN_GENERATE; 627 628 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); 629 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); 630 631 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); 632 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 633 RTA_ALIGN(req.vf_list_rta.rta_len) + 634 RTA_ALIGN(req.vf_info_rta.rta_len) + 635 RTA_ALIGN(req.vf_mac_rta.rta_len); 636 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 637 &req.vf_list_rta); 638 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 639 &req.vf_info_rta); 640 641 if (nlsk_fd < 0) 642 return -1; 643 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 644 if (ret < 0) 645 goto error; 646 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 647 if (ret < 0) 648 goto error; 649 return 0; 650 error: 651 DRV_LOG(ERR, 652 "representor %u cannot set VF MAC address " 653 "%02X:%02X:%02X:%02X:%02X:%02X : %s", 654 vf_index, 655 mac->addr_bytes[0], mac->addr_bytes[1], 656 mac->addr_bytes[2], mac->addr_bytes[3], 657 mac->addr_bytes[4], mac->addr_bytes[5], 658 strerror(rte_errno)); 659 return -rte_errno; 660 } 661 662 /** 663 * Add a MAC address. 664 * 665 * @param[in] nlsk_fd 666 * Netlink socket file descriptor. 667 * @param[in] iface_idx 668 * Net device interface index. 669 * @param mac_own 670 * BITFIELD_DECLARE array to store the mac. 671 * @param mac 672 * MAC address to register. 673 * @param index 674 * MAC address index. 675 * 676 * @return 677 * 0 on success, a negative errno value otherwise and rte_errno is set. 678 */ 679 int 680 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, 681 uint64_t *mac_own, struct rte_ether_addr *mac, 682 uint32_t index) 683 { 684 int ret; 685 686 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); 687 if (!ret) { 688 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 689 if (index >= MLX5_MAX_MAC_ADDRESSES) 690 return -EINVAL; 691 692 BITFIELD_SET(mac_own, index); 693 } 694 if (ret == -EEXIST) 695 return 0; 696 return ret; 697 } 698 699 /** 700 * Remove a MAC address. 701 * 702 * @param[in] nlsk_fd 703 * Netlink socket file descriptor. 704 * @param[in] iface_idx 705 * Net device interface index. 706 * @param mac_own 707 * BITFIELD_DECLARE array to store the mac. 708 * @param mac 709 * MAC address to remove. 710 * @param index 711 * MAC address index. 712 * 713 * @return 714 * 0 on success, a negative errno value otherwise and rte_errno is set. 715 */ 716 int 717 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, 718 struct rte_ether_addr *mac, uint32_t index) 719 { 720 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 721 if (index >= MLX5_MAX_MAC_ADDRESSES) 722 return -EINVAL; 723 724 BITFIELD_RESET(mac_own, index); 725 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); 726 } 727 728 /** 729 * Synchronize Netlink bridge table to the internal table. 730 * 731 * @param[in] nlsk_fd 732 * Netlink socket file descriptor. 733 * @param[in] iface_idx 734 * Net device interface index. 735 * @param mac_addrs 736 * Mac addresses array to sync. 737 * @param n 738 * @p mac_addrs array size. 739 */ 740 void 741 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, 742 struct rte_ether_addr *mac_addrs, int n) 743 { 744 struct rte_ether_addr macs[n]; 745 int macs_n = 0; 746 int i; 747 int ret; 748 749 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); 750 if (ret) 751 return; 752 for (i = 0; i != macs_n; ++i) { 753 int j; 754 755 /* Verify the address is not in the array yet. */ 756 for (j = 0; j != n; ++j) 757 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) 758 break; 759 if (j != n) 760 continue; 761 if (rte_is_multicast_ether_addr(&macs[i])) { 762 /* Find the first entry available. */ 763 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) { 764 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 765 mac_addrs[j] = macs[i]; 766 break; 767 } 768 } 769 } else { 770 /* Find the first entry available. */ 771 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) { 772 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 773 mac_addrs[j] = macs[i]; 774 break; 775 } 776 } 777 } 778 } 779 } 780 781 /** 782 * Flush all added MAC addresses. 783 * 784 * @param[in] nlsk_fd 785 * Netlink socket file descriptor. 786 * @param[in] iface_idx 787 * Net device interface index. 788 * @param[in] mac_addrs 789 * Mac addresses array to flush. 790 * @param n 791 * @p mac_addrs array size. 792 * @param mac_own 793 * BITFIELD_DECLARE array to store the mac. 794 */ 795 void 796 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, 797 struct rte_ether_addr *mac_addrs, int n, 798 uint64_t *mac_own) 799 { 800 int i; 801 802 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES) 803 return; 804 805 for (i = n - 1; i >= 0; --i) { 806 struct rte_ether_addr *m = &mac_addrs[i]; 807 808 if (BITFIELD_ISSET(mac_own, i)) 809 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, 810 i); 811 } 812 } 813 814 /** 815 * Enable promiscuous / all multicast mode through Netlink. 816 * 817 * @param[in] nlsk_fd 818 * Netlink socket file descriptor. 819 * @param[in] iface_idx 820 * Net device interface index. 821 * @param flags 822 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. 823 * @param enable 824 * Nonzero to enable, disable otherwise. 825 * 826 * @return 827 * 0 on success, a negative errno value otherwise and rte_errno is set. 828 */ 829 static int 830 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, 831 int enable) 832 { 833 struct { 834 struct nlmsghdr hdr; 835 struct ifinfomsg ifi; 836 } req = { 837 .hdr = { 838 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 839 .nlmsg_type = RTM_NEWLINK, 840 .nlmsg_flags = NLM_F_REQUEST, 841 }, 842 .ifi = { 843 .ifi_flags = enable ? flags : 0, 844 .ifi_change = flags, 845 .ifi_index = iface_idx, 846 }, 847 }; 848 uint32_t sn = MLX5_NL_SN_GENERATE; 849 int ret; 850 851 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); 852 if (nlsk_fd < 0) 853 return 0; 854 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 855 if (ret < 0) 856 return ret; 857 return 0; 858 } 859 860 /** 861 * Enable promiscuous mode through Netlink. 862 * 863 * @param[in] nlsk_fd 864 * Netlink socket file descriptor. 865 * @param[in] iface_idx 866 * Net device interface index. 867 * @param enable 868 * Nonzero to enable, disable otherwise. 869 * 870 * @return 871 * 0 on success, a negative errno value otherwise and rte_errno is set. 872 */ 873 int 874 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) 875 { 876 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); 877 878 if (ret) 879 DRV_LOG(DEBUG, 880 "Interface %u cannot %s promisc mode: Netlink error %s", 881 iface_idx, enable ? "enable" : "disable", 882 strerror(rte_errno)); 883 return ret; 884 } 885 886 /** 887 * Enable all multicast mode through Netlink. 888 * 889 * @param[in] nlsk_fd 890 * Netlink socket file descriptor. 891 * @param[in] iface_idx 892 * Net device interface index. 893 * @param enable 894 * Nonzero to enable, disable otherwise. 895 * 896 * @return 897 * 0 on success, a negative errno value otherwise and rte_errno is set. 898 */ 899 int 900 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) 901 { 902 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, 903 enable); 904 905 if (ret) 906 DRV_LOG(DEBUG, 907 "Interface %u cannot %s allmulti : Netlink error %s", 908 iface_idx, enable ? "enable" : "disable", 909 strerror(rte_errno)); 910 return ret; 911 } 912 913 /** 914 * Process network interface information from Netlink message. 915 * 916 * @param nh 917 * Pointer to Netlink message header. 918 * @param arg 919 * Opaque data pointer for this callback. 920 * 921 * @return 922 * 0 on success, a negative errno value otherwise and rte_errno is set. 923 */ 924 static int 925 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) 926 { 927 struct mlx5_nl_ifindex_data *data = arg; 928 struct mlx5_nl_ifindex_data local = { 929 .flags = 0, 930 }; 931 size_t off = NLMSG_HDRLEN; 932 933 if (nh->nlmsg_type != 934 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && 935 nh->nlmsg_type != 936 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) 937 goto error; 938 while (off < nh->nlmsg_len) { 939 struct nlattr *na = (void *)((uintptr_t)nh + off); 940 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); 941 942 if (na->nla_len > nh->nlmsg_len - off) 943 goto error; 944 switch (na->nla_type) { 945 case RDMA_NLDEV_ATTR_DEV_INDEX: 946 local.ibindex = *(uint32_t *)payload; 947 local.flags |= MLX5_NL_CMD_GET_IB_INDEX; 948 break; 949 case RDMA_NLDEV_ATTR_DEV_NAME: 950 if (!strcmp(payload, data->name)) 951 local.flags |= MLX5_NL_CMD_GET_IB_NAME; 952 break; 953 case RDMA_NLDEV_ATTR_NDEV_INDEX: 954 local.ifindex = *(uint32_t *)payload; 955 local.flags |= MLX5_NL_CMD_GET_NET_INDEX; 956 break; 957 case RDMA_NLDEV_ATTR_PORT_INDEX: 958 local.portnum = *(uint32_t *)payload; 959 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; 960 break; 961 default: 962 break; 963 } 964 off += NLA_ALIGN(na->nla_len); 965 } 966 /* 967 * It is possible to have multiple messages for all 968 * Infiniband devices in the system with appropriate name. 969 * So we should gather parameters locally and copy to 970 * query context only in case of coinciding device name. 971 */ 972 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { 973 data->flags = local.flags; 974 data->ibindex = local.ibindex; 975 data->ifindex = local.ifindex; 976 data->portnum = local.portnum; 977 } 978 return 0; 979 error: 980 rte_errno = EINVAL; 981 return -rte_errno; 982 } 983 984 /** 985 * Get index of network interface associated with some IB device. 986 * 987 * This is the only somewhat safe method to avoid resorting to heuristics 988 * when faced with port representors. Unfortunately it requires at least 989 * Linux 4.17. 990 * 991 * @param nl 992 * Netlink socket of the RDMA kind (NETLINK_RDMA). 993 * @param[in] name 994 * IB device name. 995 * @param[in] pindex 996 * IB device port index, starting from 1 997 * @return 998 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno 999 * is set. 1000 */ 1001 unsigned int 1002 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) 1003 { 1004 struct mlx5_nl_ifindex_data data = { 1005 .name = name, 1006 .flags = 0, 1007 .ibindex = 0, /* Determined during first pass. */ 1008 .ifindex = 0, /* Determined during second pass. */ 1009 }; 1010 union { 1011 struct nlmsghdr nh; 1012 uint8_t buf[NLMSG_HDRLEN + 1013 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) + 1014 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; 1015 } req = { 1016 .nh = { 1017 .nlmsg_len = NLMSG_LENGTH(0), 1018 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1019 RDMA_NLDEV_CMD_GET), 1020 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1021 }, 1022 }; 1023 struct nlattr *na; 1024 uint32_t sn = MLX5_NL_SN_GENERATE; 1025 int ret; 1026 1027 ret = mlx5_nl_send(nl, &req.nh, sn); 1028 if (ret < 0) 1029 return 0; 1030 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1031 if (ret < 0) 1032 return 0; 1033 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1034 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) 1035 goto error; 1036 data.flags = 0; 1037 sn = MLX5_NL_SN_GENERATE; 1038 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1039 RDMA_NLDEV_CMD_PORT_GET); 1040 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1041 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); 1042 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); 1043 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex); 1044 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; 1045 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1046 &data.ibindex, sizeof(data.ibindex)); 1047 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); 1048 na->nla_len = NLA_HDRLEN + sizeof(pindex); 1049 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; 1050 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1051 &pindex, sizeof(pindex)); 1052 ret = mlx5_nl_send(nl, &req.nh, sn); 1053 if (ret < 0) 1054 return 0; 1055 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1056 if (ret < 0) 1057 return 0; 1058 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1059 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1060 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || 1061 !data.ifindex) 1062 goto error; 1063 return data.ifindex; 1064 error: 1065 rte_errno = ENODEV; 1066 return 0; 1067 } 1068 1069 /** 1070 * Get the number of physical ports of given IB device. 1071 * 1072 * @param nl 1073 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1074 * @param[in] name 1075 * IB device name. 1076 * 1077 * @return 1078 * A valid (nonzero) number of ports on success, 0 otherwise 1079 * and rte_errno is set. 1080 */ 1081 unsigned int 1082 mlx5_nl_portnum(int nl, const char *name) 1083 { 1084 struct mlx5_nl_ifindex_data data = { 1085 .flags = 0, 1086 .name = name, 1087 .ifindex = 0, 1088 .portnum = 0, 1089 }; 1090 struct nlmsghdr req = { 1091 .nlmsg_len = NLMSG_LENGTH(0), 1092 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1093 RDMA_NLDEV_CMD_GET), 1094 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1095 }; 1096 uint32_t sn = MLX5_NL_SN_GENERATE; 1097 int ret; 1098 1099 ret = mlx5_nl_send(nl, &req, sn); 1100 if (ret < 0) 1101 return 0; 1102 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1103 if (ret < 0) 1104 return 0; 1105 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1106 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1107 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { 1108 rte_errno = ENODEV; 1109 return 0; 1110 } 1111 if (!data.portnum) 1112 rte_errno = EINVAL; 1113 return data.portnum; 1114 } 1115 1116 /** 1117 * Analyze gathered port parameters via Netlink to recognize master 1118 * and representor devices for E-Switch configuration. 1119 * 1120 * @param[in] num_vf_set 1121 * flag of presence of number of VFs port attribute. 1122 * @param[inout] switch_info 1123 * Port information, including port name as a number and port name 1124 * type if recognized 1125 * 1126 * @return 1127 * master and representor flags are set in switch_info according to 1128 * recognized parameters (if any). 1129 */ 1130 static void 1131 mlx5_nl_check_switch_info(bool num_vf_set, 1132 struct mlx5_switch_info *switch_info) 1133 { 1134 switch (switch_info->name_type) { 1135 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1136 /* 1137 * Name is not recognized, assume the master, 1138 * check the number of VFs key presence. 1139 */ 1140 switch_info->master = num_vf_set; 1141 break; 1142 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1143 /* 1144 * Name is not set, this assumes the legacy naming 1145 * schema for master, just check if there is a 1146 * number of VFs key. 1147 */ 1148 switch_info->master = num_vf_set; 1149 break; 1150 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1151 /* New uplink naming schema recognized. */ 1152 switch_info->master = 1; 1153 break; 1154 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1155 /* Legacy representors naming schema. */ 1156 switch_info->representor = !num_vf_set; 1157 break; 1158 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1159 /* Fallthrough */ 1160 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1161 /* New representors naming schema. */ 1162 switch_info->representor = 1; 1163 break; 1164 } 1165 } 1166 1167 /** 1168 * Process switch information from Netlink message. 1169 * 1170 * @param nh 1171 * Pointer to Netlink message header. 1172 * @param arg 1173 * Opaque data pointer for this callback. 1174 * 1175 * @return 1176 * 0 on success, a negative errno value otherwise and rte_errno is set. 1177 */ 1178 static int 1179 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) 1180 { 1181 struct mlx5_switch_info info = { 1182 .master = 0, 1183 .representor = 0, 1184 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1185 .port_name = 0, 1186 .switch_id = 0, 1187 }; 1188 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 1189 bool switch_id_set = false; 1190 bool num_vf_set = false; 1191 1192 if (nh->nlmsg_type != RTM_NEWLINK) 1193 goto error; 1194 while (off < nh->nlmsg_len) { 1195 struct rtattr *ra = (void *)((uintptr_t)nh + off); 1196 void *payload = RTA_DATA(ra); 1197 unsigned int i; 1198 1199 if (ra->rta_len > nh->nlmsg_len - off) 1200 goto error; 1201 switch (ra->rta_type) { 1202 case IFLA_NUM_VF: 1203 num_vf_set = true; 1204 break; 1205 case IFLA_PHYS_PORT_NAME: 1206 mlx5_translate_port_name((char *)payload, &info); 1207 break; 1208 case IFLA_PHYS_SWITCH_ID: 1209 info.switch_id = 0; 1210 for (i = 0; i < RTA_PAYLOAD(ra); ++i) { 1211 info.switch_id <<= 8; 1212 info.switch_id |= ((uint8_t *)payload)[i]; 1213 } 1214 switch_id_set = true; 1215 break; 1216 } 1217 off += RTA_ALIGN(ra->rta_len); 1218 } 1219 if (switch_id_set) { 1220 /* We have some E-Switch configuration. */ 1221 mlx5_nl_check_switch_info(num_vf_set, &info); 1222 } 1223 MLX5_ASSERT(!(info.master && info.representor)); 1224 memcpy(arg, &info, sizeof(info)); 1225 return 0; 1226 error: 1227 rte_errno = EINVAL; 1228 return -rte_errno; 1229 } 1230 1231 /** 1232 * Get switch information associated with network interface. 1233 * 1234 * @param nl 1235 * Netlink socket of the ROUTE kind (NETLINK_ROUTE). 1236 * @param ifindex 1237 * Network interface index. 1238 * @param[out] info 1239 * Switch information object, populated in case of success. 1240 * 1241 * @return 1242 * 0 on success, a negative errno value otherwise and rte_errno is set. 1243 */ 1244 int 1245 mlx5_nl_switch_info(int nl, unsigned int ifindex, 1246 struct mlx5_switch_info *info) 1247 { 1248 struct { 1249 struct nlmsghdr nh; 1250 struct ifinfomsg info; 1251 struct rtattr rta; 1252 uint32_t extmask; 1253 } req = { 1254 .nh = { 1255 .nlmsg_len = NLMSG_LENGTH 1256 (sizeof(req.info) + 1257 RTA_LENGTH(sizeof(uint32_t))), 1258 .nlmsg_type = RTM_GETLINK, 1259 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1260 }, 1261 .info = { 1262 .ifi_family = AF_UNSPEC, 1263 .ifi_index = ifindex, 1264 }, 1265 .rta = { 1266 .rta_type = IFLA_EXT_MASK, 1267 .rta_len = RTA_LENGTH(sizeof(int32_t)), 1268 }, 1269 .extmask = RTE_LE32(1), 1270 }; 1271 uint32_t sn = MLX5_NL_SN_GENERATE; 1272 int ret; 1273 1274 ret = mlx5_nl_send(nl, &req.nh, sn); 1275 if (ret >= 0) 1276 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); 1277 if (info->master && info->representor) { 1278 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1279 " and as representor", ifindex); 1280 rte_errno = ENODEV; 1281 ret = -rte_errno; 1282 } 1283 return ret; 1284 } 1285 1286 /* 1287 * Delete VLAN network device by ifindex. 1288 * 1289 * @param[in] tcf 1290 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1291 * @param[in] ifindex 1292 * Interface index of network device to delete. 1293 */ 1294 void 1295 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, 1296 uint32_t ifindex) 1297 { 1298 uint32_t sn = MLX5_NL_SN_GENERATE; 1299 int ret; 1300 struct { 1301 struct nlmsghdr nh; 1302 struct ifinfomsg info; 1303 } req = { 1304 .nh = { 1305 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 1306 .nlmsg_type = RTM_DELLINK, 1307 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1308 }, 1309 .info = { 1310 .ifi_family = AF_UNSPEC, 1311 .ifi_index = ifindex, 1312 }, 1313 }; 1314 1315 if (ifindex) { 1316 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); 1317 if (ret >= 0) 1318 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1319 if (ret < 0) 1320 DRV_LOG(WARNING, "netlink: error deleting VLAN WA" 1321 " ifindex %u, %d", ifindex, ret); 1322 } 1323 } 1324 1325 /* Set of subroutines to build Netlink message. */ 1326 static struct nlattr * 1327 nl_msg_tail(struct nlmsghdr *nlh) 1328 { 1329 return (struct nlattr *) 1330 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); 1331 } 1332 1333 static void 1334 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) 1335 { 1336 struct nlattr *nla = nl_msg_tail(nlh); 1337 1338 nla->nla_type = type; 1339 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen; 1340 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len); 1341 1342 if (alen) 1343 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); 1344 } 1345 1346 static struct nlattr * 1347 nl_attr_nest_start(struct nlmsghdr *nlh, int type) 1348 { 1349 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); 1350 1351 nl_attr_put(nlh, type, NULL, 0); 1352 return nest; 1353 } 1354 1355 static void 1356 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) 1357 { 1358 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; 1359 } 1360 1361 /* 1362 * Create network VLAN device with specified VLAN tag. 1363 * 1364 * @param[in] tcf 1365 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1366 * @param[in] ifindex 1367 * Base network interface index. 1368 * @param[in] tag 1369 * VLAN tag for VLAN network device to create. 1370 */ 1371 uint32_t 1372 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, 1373 uint32_t ifindex, uint16_t tag) 1374 { 1375 struct nlmsghdr *nlh; 1376 struct ifinfomsg *ifm; 1377 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; 1378 1379 __rte_cache_aligned 1380 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1381 NLMSG_ALIGN(sizeof(struct ifinfomsg)) + 1382 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + 1383 NLMSG_ALIGN(sizeof(uint32_t)) + 1384 NLMSG_ALIGN(sizeof(name)) + 1385 NLMSG_ALIGN(sizeof("vlan")) + 1386 NLMSG_ALIGN(sizeof(uint32_t)) + 1387 NLMSG_ALIGN(sizeof(uint16_t)) + 16]; 1388 struct nlattr *na_info; 1389 struct nlattr *na_vlan; 1390 uint32_t sn = MLX5_NL_SN_GENERATE; 1391 int ret; 1392 1393 memset(buf, 0, sizeof(buf)); 1394 nlh = (struct nlmsghdr *)buf; 1395 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1396 nlh->nlmsg_type = RTM_NEWLINK; 1397 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 1398 NLM_F_EXCL | NLM_F_ACK; 1399 ifm = (struct ifinfomsg *)nl_msg_tail(nlh); 1400 nlh->nlmsg_len += sizeof(struct ifinfomsg); 1401 ifm->ifi_family = AF_UNSPEC; 1402 ifm->ifi_type = 0; 1403 ifm->ifi_index = 0; 1404 ifm->ifi_flags = IFF_UP; 1405 ifm->ifi_change = 0xffffffff; 1406 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); 1407 ret = snprintf(name, sizeof(name), "%s.%u.%u", 1408 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); 1409 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); 1410 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); 1411 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); 1412 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); 1413 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); 1414 nl_attr_nest_end(nlh, na_vlan); 1415 nl_attr_nest_end(nlh, na_info); 1416 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len); 1417 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); 1418 if (ret >= 0) 1419 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1420 if (ret < 0) { 1421 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, 1422 ret); 1423 } 1424 /* Try to get ifindex of created or pre-existing device. */ 1425 ret = if_nametoindex(name); 1426 if (!ret) { 1427 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, 1428 errno); 1429 return 0; 1430 } 1431 return ret; 1432 } 1433 1434 /** 1435 * Parse Netlink message to retrieve the general family ID. 1436 * 1437 * @param nh 1438 * Pointer to Netlink Message Header. 1439 * @param arg 1440 * PMD data register with this callback. 1441 * 1442 * @return 1443 * 0 on success, a negative errno value otherwise and rte_errno is set. 1444 */ 1445 static int 1446 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg) 1447 { 1448 1449 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1450 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1451 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1452 1453 for (; nla->nla_len && nla < tail; 1454 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) { 1455 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) { 1456 *(uint16_t *)arg = *(uint16_t *)(nla + 1); 1457 return 0; 1458 } 1459 } 1460 return -EINVAL; 1461 } 1462 1463 #define MLX5_NL_MAX_ATTR_SIZE 100 1464 /** 1465 * Get generic netlink family ID. 1466 * 1467 * @param[in] nlsk_fd 1468 * Netlink socket file descriptor. 1469 * @param[in] name 1470 * The family name. 1471 * 1472 * @return 1473 * ID >= 0 on success and @p enable is updated, a negative errno value 1474 * otherwise and rte_errno is set. 1475 */ 1476 static int 1477 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name) 1478 { 1479 struct nlmsghdr *nlh; 1480 struct genlmsghdr *genl; 1481 uint32_t sn = MLX5_NL_SN_GENERATE; 1482 int name_size = strlen(name) + 1; 1483 int ret; 1484 uint16_t id = -1; 1485 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1486 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1487 NLMSG_ALIGN(sizeof(struct nlattr)) + 1488 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)]; 1489 1490 memset(buf, 0, sizeof(buf)); 1491 nlh = (struct nlmsghdr *)buf; 1492 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1493 nlh->nlmsg_type = GENL_ID_CTRL; 1494 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1495 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1496 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1497 genl->cmd = CTRL_CMD_GETFAMILY; 1498 genl->version = 1; 1499 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size); 1500 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1501 if (ret >= 0) 1502 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id); 1503 if (ret < 0) { 1504 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name, 1505 ret); 1506 return ret; 1507 } 1508 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id); 1509 return (int)id; 1510 } 1511 1512 /** 1513 * Get Devlink family ID. 1514 * 1515 * @param[in] nlsk_fd 1516 * Netlink socket file descriptor. 1517 * 1518 * @return 1519 * ID >= 0 on success and @p enable is updated, a negative errno value 1520 * otherwise and rte_errno is set. 1521 */ 1522 1523 int 1524 mlx5_nl_devlink_family_id_get(int nlsk_fd) 1525 { 1526 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME); 1527 } 1528 1529 /** 1530 * Parse Netlink message to retrieve the ROCE enable status. 1531 * 1532 * @param nh 1533 * Pointer to Netlink Message Header. 1534 * @param arg 1535 * PMD data register with this callback. 1536 * 1537 * @return 1538 * 0 on success, a negative errno value otherwise and rte_errno is set. 1539 */ 1540 static int 1541 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) 1542 { 1543 1544 int ret = -EINVAL; 1545 int *enable = arg; 1546 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1547 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1548 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1549 1550 while (nla->nla_len && nla < tail) { 1551 switch (nla->nla_type) { 1552 /* Expected nested attributes case. */ 1553 case DEVLINK_ATTR_PARAM: 1554 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1555 case DEVLINK_ATTR_PARAM_VALUE: 1556 ret = 0; 1557 nla += 1; 1558 break; 1559 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1560 *enable = 1; 1561 return 0; 1562 default: 1563 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1564 } 1565 } 1566 *enable = 0; 1567 return ret; 1568 } 1569 1570 /** 1571 * Get ROCE enable status through Netlink. 1572 * 1573 * @param[in] nlsk_fd 1574 * Netlink socket file descriptor. 1575 * @param[in] family_id 1576 * the Devlink family ID. 1577 * @param pci_addr 1578 * The device PCI address. 1579 * @param[out] enable 1580 * Where to store the enable status. 1581 * 1582 * @return 1583 * 0 on success and @p enable is updated, a negative errno value otherwise 1584 * and rte_errno is set. 1585 */ 1586 int 1587 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, 1588 int *enable) 1589 { 1590 struct nlmsghdr *nlh; 1591 struct genlmsghdr *genl; 1592 uint32_t sn = MLX5_NL_SN_GENERATE; 1593 int ret; 1594 int cur_en = 0; 1595 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1596 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1597 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 1598 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 1599 1600 memset(buf, 0, sizeof(buf)); 1601 nlh = (struct nlmsghdr *)buf; 1602 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1603 nlh->nlmsg_type = family_id; 1604 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1605 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1606 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1607 genl->cmd = DEVLINK_CMD_PARAM_GET; 1608 genl->version = DEVLINK_GENL_VERSION; 1609 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1610 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1611 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1612 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1613 if (ret >= 0) 1614 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); 1615 if (ret < 0) { 1616 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", 1617 pci_addr, ret); 1618 return ret; 1619 } 1620 *enable = cur_en; 1621 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", 1622 cur_en ? "en" : "dis", pci_addr); 1623 return ret; 1624 } 1625 1626 /** 1627 * Reload mlx5 device kernel driver through Netlink. 1628 * 1629 * @param[in] nlsk_fd 1630 * Netlink socket file descriptor. 1631 * @param[in] family_id 1632 * the Devlink family ID. 1633 * @param pci_addr 1634 * The device PCI address. 1635 * @param[out] enable 1636 * The enable status to set. 1637 * 1638 * @return 1639 * 0 on success, a negative errno value otherwise and rte_errno is set. 1640 */ 1641 int 1642 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) 1643 { 1644 struct nlmsghdr *nlh; 1645 struct genlmsghdr *genl; 1646 uint32_t sn = MLX5_NL_SN_GENERATE; 1647 int ret; 1648 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1649 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1650 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 + 1651 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2]; 1652 1653 memset(buf, 0, sizeof(buf)); 1654 nlh = (struct nlmsghdr *)buf; 1655 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1656 nlh->nlmsg_type = family_id; 1657 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1658 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1659 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1660 genl->cmd = DEVLINK_CMD_RELOAD; 1661 genl->version = DEVLINK_GENL_VERSION; 1662 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1663 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1664 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1665 if (ret >= 0) 1666 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1667 if (ret < 0) { 1668 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", 1669 pci_addr, ret); 1670 return ret; 1671 } 1672 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", 1673 pci_addr); 1674 return 0; 1675 } 1676 1677 /** 1678 * Set ROCE enable status through Netlink. 1679 * 1680 * @param[in] nlsk_fd 1681 * Netlink socket file descriptor. 1682 * @param[in] family_id 1683 * the Devlink family ID. 1684 * @param pci_addr 1685 * The device PCI address. 1686 * @param[out] enable 1687 * The enable status to set. 1688 * 1689 * @return 1690 * 0 on success, a negative errno value otherwise and rte_errno is set. 1691 */ 1692 int 1693 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, 1694 int enable) 1695 { 1696 struct nlmsghdr *nlh; 1697 struct genlmsghdr *genl; 1698 uint32_t sn = MLX5_NL_SN_GENERATE; 1699 int ret; 1700 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1701 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1702 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 + 1703 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; 1704 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; 1705 uint8_t ptype = NLA_FLAG; 1706 ; 1707 1708 memset(buf, 0, sizeof(buf)); 1709 nlh = (struct nlmsghdr *)buf; 1710 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1711 nlh->nlmsg_type = family_id; 1712 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1713 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1714 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1715 genl->cmd = DEVLINK_CMD_PARAM_SET; 1716 genl->version = DEVLINK_GENL_VERSION; 1717 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1718 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1719 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1720 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); 1721 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); 1722 if (enable) 1723 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0); 1724 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1725 if (ret >= 0) 1726 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1727 if (ret < 0) { 1728 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" 1729 " %d.", enable ? "en" : "dis", pci_addr, ret); 1730 return ret; 1731 } 1732 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", 1733 pci_addr, enable ? "en" : "dis"); 1734 /* Now, need to reload the driver. */ 1735 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); 1736 } 1737