1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 6WIND S.A. 3 * Copyright 2018 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <linux/if_link.h> 8 #include <linux/rtnetlink.h> 9 #include <linux/genetlink.h> 10 #include <net/if.h> 11 #include <rdma/rdma_netlink.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <stdlib.h> 15 #include <stdalign.h> 16 #include <string.h> 17 #include <sys/socket.h> 18 #include <unistd.h> 19 20 #include <rte_errno.h> 21 22 #include "mlx5_nl.h" 23 #include "mlx5_common_utils.h" 24 #include "mlx5_malloc.h" 25 #ifdef HAVE_DEVLINK 26 #include <linux/devlink.h> 27 #endif 28 29 30 /* Size of the buffer to receive kernel messages */ 31 #define MLX5_NL_BUF_SIZE (32 * 1024) 32 /* Send buffer size for the Netlink socket */ 33 #define MLX5_SEND_BUF_SIZE 32768 34 /* Receive buffer size for the Netlink socket */ 35 #define MLX5_RECV_BUF_SIZE 32768 36 37 /** Parameters of VLAN devices created by driver. */ 38 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" 39 /* 40 * Define NDA_RTA as defined in iproute2 sources. 41 * 42 * see in iproute2 sources file include/libnetlink.h 43 */ 44 #ifndef MLX5_NDA_RTA 45 #define MLX5_NDA_RTA(r) \ 46 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 47 #endif 48 /* 49 * Define NLMSG_TAIL as defined in iproute2 sources. 50 * 51 * see in iproute2 sources file include/libnetlink.h 52 */ 53 #ifndef NLMSG_TAIL 54 #define NLMSG_TAIL(nmsg) \ 55 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) 56 #endif 57 /* 58 * The following definitions are normally found in rdma/rdma_netlink.h, 59 * however they are so recent that most systems do not expose them yet. 60 */ 61 #ifndef HAVE_RDMA_NL_NLDEV 62 #define RDMA_NL_NLDEV 5 63 #endif 64 #ifndef HAVE_RDMA_NLDEV_CMD_GET 65 #define RDMA_NLDEV_CMD_GET 1 66 #endif 67 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET 68 #define RDMA_NLDEV_CMD_PORT_GET 5 69 #endif 70 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX 71 #define RDMA_NLDEV_ATTR_DEV_INDEX 1 72 #endif 73 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME 74 #define RDMA_NLDEV_ATTR_DEV_NAME 2 75 #endif 76 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX 77 #define RDMA_NLDEV_ATTR_PORT_INDEX 3 78 #endif 79 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX 80 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50 81 #endif 82 83 /* These are normally found in linux/if_link.h. */ 84 #ifndef HAVE_IFLA_NUM_VF 85 #define IFLA_NUM_VF 21 86 #endif 87 #ifndef HAVE_IFLA_EXT_MASK 88 #define IFLA_EXT_MASK 29 89 #endif 90 #ifndef HAVE_IFLA_PHYS_SWITCH_ID 91 #define IFLA_PHYS_SWITCH_ID 36 92 #endif 93 #ifndef HAVE_IFLA_PHYS_PORT_NAME 94 #define IFLA_PHYS_PORT_NAME 38 95 #endif 96 97 /* 98 * Some Devlink defines may be missed in old kernel versions, 99 * adjust used defines. 100 */ 101 #ifndef DEVLINK_GENL_NAME 102 #define DEVLINK_GENL_NAME "devlink" 103 #endif 104 #ifndef DEVLINK_GENL_VERSION 105 #define DEVLINK_GENL_VERSION 1 106 #endif 107 #ifndef DEVLINK_ATTR_BUS_NAME 108 #define DEVLINK_ATTR_BUS_NAME 1 109 #endif 110 #ifndef DEVLINK_ATTR_DEV_NAME 111 #define DEVLINK_ATTR_DEV_NAME 2 112 #endif 113 #ifndef DEVLINK_ATTR_PARAM 114 #define DEVLINK_ATTR_PARAM 80 115 #endif 116 #ifndef DEVLINK_ATTR_PARAM_NAME 117 #define DEVLINK_ATTR_PARAM_NAME 81 118 #endif 119 #ifndef DEVLINK_ATTR_PARAM_TYPE 120 #define DEVLINK_ATTR_PARAM_TYPE 83 121 #endif 122 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST 123 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84 124 #endif 125 #ifndef DEVLINK_ATTR_PARAM_VALUE 126 #define DEVLINK_ATTR_PARAM_VALUE 85 127 #endif 128 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA 129 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86 130 #endif 131 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE 132 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87 133 #endif 134 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT 135 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1 136 #endif 137 #ifndef DEVLINK_CMD_RELOAD 138 #define DEVLINK_CMD_RELOAD 37 139 #endif 140 #ifndef DEVLINK_CMD_PARAM_GET 141 #define DEVLINK_CMD_PARAM_GET 38 142 #endif 143 #ifndef DEVLINK_CMD_PARAM_SET 144 #define DEVLINK_CMD_PARAM_SET 39 145 #endif 146 #ifndef NLA_FLAG 147 #define NLA_FLAG 6 148 #endif 149 150 /* Add/remove MAC address through Netlink */ 151 struct mlx5_nl_mac_addr { 152 struct rte_ether_addr (*mac)[]; 153 /**< MAC address handled by the device. */ 154 int mac_n; /**< Number of addresses in the array. */ 155 }; 156 157 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0) 158 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) 159 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) 160 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) 161 162 /** Data structure used by mlx5_nl_cmdget_cb(). */ 163 struct mlx5_nl_ifindex_data { 164 const char *name; /**< IB device name (in). */ 165 uint32_t flags; /**< found attribute flags (out). */ 166 uint32_t ibindex; /**< IB device index (out). */ 167 uint32_t ifindex; /**< Network interface index (out). */ 168 uint32_t portnum; /**< IB device max port number (out). */ 169 }; 170 171 uint32_t atomic_sn; 172 173 /* Generate Netlink sequence number. */ 174 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED) 175 176 /** 177 * Opens a Netlink socket. 178 * 179 * @param protocol 180 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). 181 * 182 * @return 183 * A file descriptor on success, a negative errno value otherwise and 184 * rte_errno is set. 185 */ 186 int 187 mlx5_nl_init(int protocol) 188 { 189 int fd; 190 int sndbuf_size = MLX5_SEND_BUF_SIZE; 191 int rcvbuf_size = MLX5_RECV_BUF_SIZE; 192 struct sockaddr_nl local = { 193 .nl_family = AF_NETLINK, 194 }; 195 int ret; 196 197 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); 198 if (fd == -1) { 199 rte_errno = errno; 200 return -rte_errno; 201 } 202 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); 203 if (ret == -1) { 204 rte_errno = errno; 205 goto error; 206 } 207 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); 208 if (ret == -1) { 209 rte_errno = errno; 210 goto error; 211 } 212 ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); 213 if (ret == -1) { 214 rte_errno = errno; 215 goto error; 216 } 217 return fd; 218 error: 219 close(fd); 220 return -rte_errno; 221 } 222 223 /** 224 * Send a request message to the kernel on the Netlink socket. 225 * 226 * @param[in] nlsk_fd 227 * Netlink socket file descriptor. 228 * @param[in] nh 229 * The Netlink message send to the kernel. 230 * @param[in] ssn 231 * Sequence number. 232 * @param[in] req 233 * Pointer to the request structure. 234 * @param[in] len 235 * Length of the request in bytes. 236 * 237 * @return 238 * The number of sent bytes on success, a negative errno value otherwise and 239 * rte_errno is set. 240 */ 241 static int 242 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, 243 int len) 244 { 245 struct sockaddr_nl sa = { 246 .nl_family = AF_NETLINK, 247 }; 248 struct iovec iov[2] = { 249 { .iov_base = nh, .iov_len = sizeof(*nh), }, 250 { .iov_base = req, .iov_len = len, }, 251 }; 252 struct msghdr msg = { 253 .msg_name = &sa, 254 .msg_namelen = sizeof(sa), 255 .msg_iov = iov, 256 .msg_iovlen = 2, 257 }; 258 int send_bytes; 259 260 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 261 nh->nlmsg_seq = sn; 262 send_bytes = sendmsg(nlsk_fd, &msg, 0); 263 if (send_bytes < 0) { 264 rte_errno = errno; 265 return -rte_errno; 266 } 267 return send_bytes; 268 } 269 270 /** 271 * Send a message to the kernel on the Netlink socket. 272 * 273 * @param[in] nlsk_fd 274 * The Netlink socket file descriptor used for communication. 275 * @param[in] nh 276 * The Netlink message send to the kernel. 277 * @param[in] sn 278 * Sequence number. 279 * 280 * @return 281 * The number of sent bytes on success, a negative errno value otherwise and 282 * rte_errno is set. 283 */ 284 static int 285 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) 286 { 287 struct sockaddr_nl sa = { 288 .nl_family = AF_NETLINK, 289 }; 290 struct iovec iov = { 291 .iov_base = nh, 292 .iov_len = nh->nlmsg_len, 293 }; 294 struct msghdr msg = { 295 .msg_name = &sa, 296 .msg_namelen = sizeof(sa), 297 .msg_iov = &iov, 298 .msg_iovlen = 1, 299 }; 300 int send_bytes; 301 302 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 303 nh->nlmsg_seq = sn; 304 send_bytes = sendmsg(nlsk_fd, &msg, 0); 305 if (send_bytes < 0) { 306 rte_errno = errno; 307 return -rte_errno; 308 } 309 return send_bytes; 310 } 311 312 /** 313 * Receive a message from the kernel on the Netlink socket, following 314 * mlx5_nl_send(). 315 * 316 * @param[in] nlsk_fd 317 * The Netlink socket file descriptor used for communication. 318 * @param[in] sn 319 * Sequence number. 320 * @param[in] cb 321 * The callback function to call for each Netlink message received. 322 * @param[in, out] arg 323 * Custom arguments for the callback. 324 * 325 * @return 326 * 0 on success, a negative errno value otherwise and rte_errno is set. 327 */ 328 static int 329 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), 330 void *arg) 331 { 332 struct sockaddr_nl sa; 333 void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY); 334 struct iovec iov = { 335 .iov_base = buf, 336 .iov_len = MLX5_RECV_BUF_SIZE, 337 }; 338 struct msghdr msg = { 339 .msg_name = &sa, 340 .msg_namelen = sizeof(sa), 341 .msg_iov = &iov, 342 /* One message at a time */ 343 .msg_iovlen = 1, 344 }; 345 int multipart = 0; 346 int ret = 0; 347 348 if (!buf) { 349 rte_errno = ENOMEM; 350 return -rte_errno; 351 } 352 do { 353 struct nlmsghdr *nh; 354 int recv_bytes = 0; 355 356 do { 357 recv_bytes = recvmsg(nlsk_fd, &msg, 0); 358 if (recv_bytes == -1) { 359 rte_errno = errno; 360 ret = -rte_errno; 361 goto exit; 362 } 363 nh = (struct nlmsghdr *)buf; 364 } while (nh->nlmsg_seq != sn); 365 for (; 366 NLMSG_OK(nh, (unsigned int)recv_bytes); 367 nh = NLMSG_NEXT(nh, recv_bytes)) { 368 if (nh->nlmsg_type == NLMSG_ERROR) { 369 struct nlmsgerr *err_data = NLMSG_DATA(nh); 370 371 if (err_data->error < 0) { 372 rte_errno = -err_data->error; 373 ret = -rte_errno; 374 goto exit; 375 } 376 /* Ack message. */ 377 ret = 0; 378 goto exit; 379 } 380 /* Multi-part msgs and their trailing DONE message. */ 381 if (nh->nlmsg_flags & NLM_F_MULTI) { 382 if (nh->nlmsg_type == NLMSG_DONE) { 383 ret = 0; 384 goto exit; 385 } 386 multipart = 1; 387 } 388 if (cb) { 389 ret = cb(nh, arg); 390 if (ret < 0) 391 goto exit; 392 } 393 } 394 } while (multipart); 395 exit: 396 mlx5_free(buf); 397 return ret; 398 } 399 400 /** 401 * Parse Netlink message to retrieve the bridge MAC address. 402 * 403 * @param nh 404 * Pointer to Netlink Message Header. 405 * @param arg 406 * PMD data register with this callback. 407 * 408 * @return 409 * 0 on success, a negative errno value otherwise and rte_errno is set. 410 */ 411 static int 412 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) 413 { 414 struct mlx5_nl_mac_addr *data = arg; 415 struct ndmsg *r = NLMSG_DATA(nh); 416 struct rtattr *attribute; 417 int len; 418 419 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); 420 for (attribute = MLX5_NDA_RTA(r); 421 RTA_OK(attribute, len); 422 attribute = RTA_NEXT(attribute, len)) { 423 if (attribute->rta_type == NDA_LLADDR) { 424 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { 425 DRV_LOG(WARNING, 426 "not enough room to finalize the" 427 " request"); 428 rte_errno = ENOMEM; 429 return -rte_errno; 430 } 431 #ifdef RTE_LIBRTE_MLX5_DEBUG 432 char m[RTE_ETHER_ADDR_FMT_SIZE]; 433 434 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, 435 RTA_DATA(attribute)); 436 DRV_LOG(DEBUG, "bridge MAC address %s", m); 437 #endif 438 memcpy(&(*data->mac)[data->mac_n++], 439 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); 440 } 441 } 442 return 0; 443 } 444 445 /** 446 * Get bridge MAC addresses. 447 * 448 * @param[in] nlsk_fd 449 * Netlink socket file descriptor. 450 * @param[in] iface_idx 451 * Net device interface index. 452 * @param mac[out] 453 * Pointer to the array table of MAC addresses to fill. 454 * Its size should be of MLX5_MAX_MAC_ADDRESSES. 455 * @param mac_n[out] 456 * Number of entries filled in MAC array. 457 * 458 * @return 459 * 0 on success, a negative errno value otherwise and rte_errno is set. 460 */ 461 static int 462 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, 463 struct rte_ether_addr (*mac)[], int *mac_n) 464 { 465 struct { 466 struct nlmsghdr hdr; 467 struct ifinfomsg ifm; 468 } req = { 469 .hdr = { 470 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 471 .nlmsg_type = RTM_GETNEIGH, 472 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 473 }, 474 .ifm = { 475 .ifi_family = PF_BRIDGE, 476 .ifi_index = iface_idx, 477 }, 478 }; 479 struct mlx5_nl_mac_addr data = { 480 .mac = mac, 481 .mac_n = 0, 482 }; 483 uint32_t sn = MLX5_NL_SN_GENERATE; 484 int ret; 485 486 if (nlsk_fd == -1) 487 return 0; 488 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, 489 sizeof(struct ifinfomsg)); 490 if (ret < 0) 491 goto error; 492 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); 493 if (ret < 0) 494 goto error; 495 *mac_n = data.mac_n; 496 return 0; 497 error: 498 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", 499 iface_idx, strerror(rte_errno)); 500 return -rte_errno; 501 } 502 503 /** 504 * Modify the MAC address neighbour table with Netlink. 505 * 506 * @param[in] nlsk_fd 507 * Netlink socket file descriptor. 508 * @param[in] iface_idx 509 * Net device interface index. 510 * @param mac 511 * MAC address to consider. 512 * @param add 513 * 1 to add the MAC address, 0 to remove the MAC address. 514 * 515 * @return 516 * 0 on success, a negative errno value otherwise and rte_errno is set. 517 */ 518 static int 519 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 520 struct rte_ether_addr *mac, int add) 521 { 522 struct { 523 struct nlmsghdr hdr; 524 struct ndmsg ndm; 525 struct rtattr rta; 526 uint8_t buffer[RTE_ETHER_ADDR_LEN]; 527 } req = { 528 .hdr = { 529 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), 530 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 531 NLM_F_EXCL | NLM_F_ACK, 532 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, 533 }, 534 .ndm = { 535 .ndm_family = PF_BRIDGE, 536 .ndm_state = NUD_NOARP | NUD_PERMANENT, 537 .ndm_ifindex = iface_idx, 538 .ndm_flags = NTF_SELF, 539 }, 540 .rta = { 541 .rta_type = NDA_LLADDR, 542 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), 543 }, 544 }; 545 uint32_t sn = MLX5_NL_SN_GENERATE; 546 int ret; 547 548 if (nlsk_fd == -1) 549 return 0; 550 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); 551 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 552 RTA_ALIGN(req.rta.rta_len); 553 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 554 if (ret < 0) 555 goto error; 556 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 557 if (ret < 0) 558 goto error; 559 return 0; 560 error: 561 #ifdef RTE_LIBRTE_MLX5_DEBUG 562 { 563 char m[RTE_ETHER_ADDR_FMT_SIZE]; 564 565 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac); 566 DRV_LOG(DEBUG, 567 "Interface %u cannot %s MAC address %s %s", 568 iface_idx, 569 add ? "add" : "remove", m, strerror(rte_errno)); 570 } 571 #endif 572 return -rte_errno; 573 } 574 575 /** 576 * Modify the VF MAC address neighbour table with Netlink. 577 * 578 * @param[in] nlsk_fd 579 * Netlink socket file descriptor. 580 * @param[in] iface_idx 581 * Net device interface index. 582 * @param mac 583 * MAC address to consider. 584 * @param vf_index 585 * VF index. 586 * 587 * @return 588 * 0 on success, a negative errno value otherwise and rte_errno is set. 589 */ 590 int 591 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 592 struct rte_ether_addr *mac, int vf_index) 593 { 594 int ret; 595 struct { 596 struct nlmsghdr hdr; 597 struct ifinfomsg ifm; 598 struct rtattr vf_list_rta; 599 struct rtattr vf_info_rta; 600 struct rtattr vf_mac_rta; 601 struct ifla_vf_mac ivm; 602 } req = { 603 .hdr = { 604 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 605 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 606 .nlmsg_type = RTM_BASE, 607 }, 608 .ifm = { 609 .ifi_index = iface_idx, 610 }, 611 .vf_list_rta = { 612 .rta_type = IFLA_VFINFO_LIST, 613 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 614 }, 615 .vf_info_rta = { 616 .rta_type = IFLA_VF_INFO, 617 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 618 }, 619 .vf_mac_rta = { 620 .rta_type = IFLA_VF_MAC, 621 }, 622 }; 623 struct ifla_vf_mac ivm = { 624 .vf = vf_index, 625 }; 626 uint32_t sn = MLX5_NL_SN_GENERATE; 627 628 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); 629 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); 630 631 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); 632 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 633 RTA_ALIGN(req.vf_list_rta.rta_len) + 634 RTA_ALIGN(req.vf_info_rta.rta_len) + 635 RTA_ALIGN(req.vf_mac_rta.rta_len); 636 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 637 &req.vf_list_rta); 638 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 639 &req.vf_info_rta); 640 641 if (nlsk_fd < 0) 642 return -1; 643 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 644 if (ret < 0) 645 goto error; 646 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 647 if (ret < 0) 648 goto error; 649 return 0; 650 error: 651 DRV_LOG(ERR, 652 "representor %u cannot set VF MAC address " 653 "%02X:%02X:%02X:%02X:%02X:%02X : %s", 654 vf_index, 655 mac->addr_bytes[0], mac->addr_bytes[1], 656 mac->addr_bytes[2], mac->addr_bytes[3], 657 mac->addr_bytes[4], mac->addr_bytes[5], 658 strerror(rte_errno)); 659 return -rte_errno; 660 } 661 662 /** 663 * Add a MAC address. 664 * 665 * @param[in] nlsk_fd 666 * Netlink socket file descriptor. 667 * @param[in] iface_idx 668 * Net device interface index. 669 * @param mac_own 670 * BITFIELD_DECLARE array to store the mac. 671 * @param mac 672 * MAC address to register. 673 * @param index 674 * MAC address index. 675 * 676 * @return 677 * 0 on success, a negative errno value otherwise and rte_errno is set. 678 */ 679 int 680 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, 681 uint64_t *mac_own, struct rte_ether_addr *mac, 682 uint32_t index) 683 { 684 int ret; 685 686 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); 687 if (!ret) { 688 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 689 if (index >= MLX5_MAX_MAC_ADDRESSES) 690 return -EINVAL; 691 692 BITFIELD_SET(mac_own, index); 693 } 694 if (ret == -EEXIST) 695 return 0; 696 return ret; 697 } 698 699 /** 700 * Remove a MAC address. 701 * 702 * @param[in] nlsk_fd 703 * Netlink socket file descriptor. 704 * @param[in] iface_idx 705 * Net device interface index. 706 * @param mac_own 707 * BITFIELD_DECLARE array to store the mac. 708 * @param mac 709 * MAC address to remove. 710 * @param index 711 * MAC address index. 712 * 713 * @return 714 * 0 on success, a negative errno value otherwise and rte_errno is set. 715 */ 716 int 717 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, 718 struct rte_ether_addr *mac, uint32_t index) 719 { 720 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 721 if (index >= MLX5_MAX_MAC_ADDRESSES) 722 return -EINVAL; 723 724 BITFIELD_RESET(mac_own, index); 725 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); 726 } 727 728 /** 729 * Synchronize Netlink bridge table to the internal table. 730 * 731 * @param[in] nlsk_fd 732 * Netlink socket file descriptor. 733 * @param[in] iface_idx 734 * Net device interface index. 735 * @param mac_addrs 736 * Mac addresses array to sync. 737 * @param n 738 * @p mac_addrs array size. 739 */ 740 void 741 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, 742 struct rte_ether_addr *mac_addrs, int n) 743 { 744 struct rte_ether_addr macs[n]; 745 int macs_n = 0; 746 int i; 747 int ret; 748 749 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); 750 if (ret) 751 return; 752 for (i = 0; i != macs_n; ++i) { 753 int j; 754 755 /* Verify the address is not in the array yet. */ 756 for (j = 0; j != n; ++j) 757 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) 758 break; 759 if (j != n) 760 continue; 761 /* Find the first entry available. */ 762 for (j = 0; j != n; ++j) { 763 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 764 mac_addrs[j] = macs[i]; 765 break; 766 } 767 } 768 } 769 } 770 771 /** 772 * Flush all added MAC addresses. 773 * 774 * @param[in] nlsk_fd 775 * Netlink socket file descriptor. 776 * @param[in] iface_idx 777 * Net device interface index. 778 * @param[in] mac_addrs 779 * Mac addresses array to flush. 780 * @param n 781 * @p mac_addrs array size. 782 * @param mac_own 783 * BITFIELD_DECLARE array to store the mac. 784 */ 785 void 786 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, 787 struct rte_ether_addr *mac_addrs, int n, 788 uint64_t *mac_own) 789 { 790 int i; 791 792 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES) 793 return; 794 795 for (i = n - 1; i >= 0; --i) { 796 struct rte_ether_addr *m = &mac_addrs[i]; 797 798 if (BITFIELD_ISSET(mac_own, i)) 799 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, 800 i); 801 } 802 } 803 804 /** 805 * Enable promiscuous / all multicast mode through Netlink. 806 * 807 * @param[in] nlsk_fd 808 * Netlink socket file descriptor. 809 * @param[in] iface_idx 810 * Net device interface index. 811 * @param flags 812 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. 813 * @param enable 814 * Nonzero to enable, disable otherwise. 815 * 816 * @return 817 * 0 on success, a negative errno value otherwise and rte_errno is set. 818 */ 819 static int 820 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, 821 int enable) 822 { 823 struct { 824 struct nlmsghdr hdr; 825 struct ifinfomsg ifi; 826 } req = { 827 .hdr = { 828 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 829 .nlmsg_type = RTM_NEWLINK, 830 .nlmsg_flags = NLM_F_REQUEST, 831 }, 832 .ifi = { 833 .ifi_flags = enable ? flags : 0, 834 .ifi_change = flags, 835 .ifi_index = iface_idx, 836 }, 837 }; 838 uint32_t sn = MLX5_NL_SN_GENERATE; 839 int ret; 840 841 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); 842 if (nlsk_fd < 0) 843 return 0; 844 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 845 if (ret < 0) 846 return ret; 847 return 0; 848 } 849 850 /** 851 * Enable promiscuous mode through Netlink. 852 * 853 * @param[in] nlsk_fd 854 * Netlink socket file descriptor. 855 * @param[in] iface_idx 856 * Net device interface index. 857 * @param enable 858 * Nonzero to enable, disable otherwise. 859 * 860 * @return 861 * 0 on success, a negative errno value otherwise and rte_errno is set. 862 */ 863 int 864 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) 865 { 866 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); 867 868 if (ret) 869 DRV_LOG(DEBUG, 870 "Interface %u cannot %s promisc mode: Netlink error %s", 871 iface_idx, enable ? "enable" : "disable", 872 strerror(rte_errno)); 873 return ret; 874 } 875 876 /** 877 * Enable all multicast mode through Netlink. 878 * 879 * @param[in] nlsk_fd 880 * Netlink socket file descriptor. 881 * @param[in] iface_idx 882 * Net device interface index. 883 * @param enable 884 * Nonzero to enable, disable otherwise. 885 * 886 * @return 887 * 0 on success, a negative errno value otherwise and rte_errno is set. 888 */ 889 int 890 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) 891 { 892 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, 893 enable); 894 895 if (ret) 896 DRV_LOG(DEBUG, 897 "Interface %u cannot %s allmulti : Netlink error %s", 898 iface_idx, enable ? "enable" : "disable", 899 strerror(rte_errno)); 900 return ret; 901 } 902 903 /** 904 * Process network interface information from Netlink message. 905 * 906 * @param nh 907 * Pointer to Netlink message header. 908 * @param arg 909 * Opaque data pointer for this callback. 910 * 911 * @return 912 * 0 on success, a negative errno value otherwise and rte_errno is set. 913 */ 914 static int 915 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) 916 { 917 struct mlx5_nl_ifindex_data *data = arg; 918 struct mlx5_nl_ifindex_data local = { 919 .flags = 0, 920 }; 921 size_t off = NLMSG_HDRLEN; 922 923 if (nh->nlmsg_type != 924 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && 925 nh->nlmsg_type != 926 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) 927 goto error; 928 while (off < nh->nlmsg_len) { 929 struct nlattr *na = (void *)((uintptr_t)nh + off); 930 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); 931 932 if (na->nla_len > nh->nlmsg_len - off) 933 goto error; 934 switch (na->nla_type) { 935 case RDMA_NLDEV_ATTR_DEV_INDEX: 936 local.ibindex = *(uint32_t *)payload; 937 local.flags |= MLX5_NL_CMD_GET_IB_INDEX; 938 break; 939 case RDMA_NLDEV_ATTR_DEV_NAME: 940 if (!strcmp(payload, data->name)) 941 local.flags |= MLX5_NL_CMD_GET_IB_NAME; 942 break; 943 case RDMA_NLDEV_ATTR_NDEV_INDEX: 944 local.ifindex = *(uint32_t *)payload; 945 local.flags |= MLX5_NL_CMD_GET_NET_INDEX; 946 break; 947 case RDMA_NLDEV_ATTR_PORT_INDEX: 948 local.portnum = *(uint32_t *)payload; 949 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; 950 break; 951 default: 952 break; 953 } 954 off += NLA_ALIGN(na->nla_len); 955 } 956 /* 957 * It is possible to have multiple messages for all 958 * Infiniband devices in the system with appropriate name. 959 * So we should gather parameters locally and copy to 960 * query context only in case of coinciding device name. 961 */ 962 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { 963 data->flags = local.flags; 964 data->ibindex = local.ibindex; 965 data->ifindex = local.ifindex; 966 data->portnum = local.portnum; 967 } 968 return 0; 969 error: 970 rte_errno = EINVAL; 971 return -rte_errno; 972 } 973 974 /** 975 * Get index of network interface associated with some IB device. 976 * 977 * This is the only somewhat safe method to avoid resorting to heuristics 978 * when faced with port representors. Unfortunately it requires at least 979 * Linux 4.17. 980 * 981 * @param nl 982 * Netlink socket of the RDMA kind (NETLINK_RDMA). 983 * @param[in] name 984 * IB device name. 985 * @param[in] pindex 986 * IB device port index, starting from 1 987 * @return 988 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno 989 * is set. 990 */ 991 unsigned int 992 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) 993 { 994 struct mlx5_nl_ifindex_data data = { 995 .name = name, 996 .flags = 0, 997 .ibindex = 0, /* Determined during first pass. */ 998 .ifindex = 0, /* Determined during second pass. */ 999 }; 1000 union { 1001 struct nlmsghdr nh; 1002 uint8_t buf[NLMSG_HDRLEN + 1003 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) + 1004 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; 1005 } req = { 1006 .nh = { 1007 .nlmsg_len = NLMSG_LENGTH(0), 1008 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1009 RDMA_NLDEV_CMD_GET), 1010 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1011 }, 1012 }; 1013 struct nlattr *na; 1014 uint32_t sn = MLX5_NL_SN_GENERATE; 1015 int ret; 1016 1017 ret = mlx5_nl_send(nl, &req.nh, sn); 1018 if (ret < 0) 1019 return 0; 1020 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1021 if (ret < 0) 1022 return 0; 1023 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1024 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) 1025 goto error; 1026 data.flags = 0; 1027 sn = MLX5_NL_SN_GENERATE; 1028 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1029 RDMA_NLDEV_CMD_PORT_GET); 1030 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1031 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); 1032 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); 1033 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex); 1034 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; 1035 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1036 &data.ibindex, sizeof(data.ibindex)); 1037 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); 1038 na->nla_len = NLA_HDRLEN + sizeof(pindex); 1039 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; 1040 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1041 &pindex, sizeof(pindex)); 1042 ret = mlx5_nl_send(nl, &req.nh, sn); 1043 if (ret < 0) 1044 return 0; 1045 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1046 if (ret < 0) 1047 return 0; 1048 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1049 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1050 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || 1051 !data.ifindex) 1052 goto error; 1053 return data.ifindex; 1054 error: 1055 rte_errno = ENODEV; 1056 return 0; 1057 } 1058 1059 /** 1060 * Get the number of physical ports of given IB device. 1061 * 1062 * @param nl 1063 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1064 * @param[in] name 1065 * IB device name. 1066 * 1067 * @return 1068 * A valid (nonzero) number of ports on success, 0 otherwise 1069 * and rte_errno is set. 1070 */ 1071 unsigned int 1072 mlx5_nl_portnum(int nl, const char *name) 1073 { 1074 struct mlx5_nl_ifindex_data data = { 1075 .flags = 0, 1076 .name = name, 1077 .ifindex = 0, 1078 .portnum = 0, 1079 }; 1080 struct nlmsghdr req = { 1081 .nlmsg_len = NLMSG_LENGTH(0), 1082 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1083 RDMA_NLDEV_CMD_GET), 1084 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1085 }; 1086 uint32_t sn = MLX5_NL_SN_GENERATE; 1087 int ret; 1088 1089 ret = mlx5_nl_send(nl, &req, sn); 1090 if (ret < 0) 1091 return 0; 1092 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1093 if (ret < 0) 1094 return 0; 1095 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1096 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1097 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { 1098 rte_errno = ENODEV; 1099 return 0; 1100 } 1101 if (!data.portnum) 1102 rte_errno = EINVAL; 1103 return data.portnum; 1104 } 1105 1106 /** 1107 * Analyze gathered port parameters via Netlink to recognize master 1108 * and representor devices for E-Switch configuration. 1109 * 1110 * @param[in] num_vf_set 1111 * flag of presence of number of VFs port attribute. 1112 * @param[inout] switch_info 1113 * Port information, including port name as a number and port name 1114 * type if recognized 1115 * 1116 * @return 1117 * master and representor flags are set in switch_info according to 1118 * recognized parameters (if any). 1119 */ 1120 static void 1121 mlx5_nl_check_switch_info(bool num_vf_set, 1122 struct mlx5_switch_info *switch_info) 1123 { 1124 switch (switch_info->name_type) { 1125 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1126 /* 1127 * Name is not recognized, assume the master, 1128 * check the number of VFs key presence. 1129 */ 1130 switch_info->master = num_vf_set; 1131 break; 1132 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1133 /* 1134 * Name is not set, this assumes the legacy naming 1135 * schema for master, just check if there is a 1136 * number of VFs key. 1137 */ 1138 switch_info->master = num_vf_set; 1139 break; 1140 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1141 /* New uplink naming schema recognized. */ 1142 switch_info->master = 1; 1143 break; 1144 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1145 /* Legacy representors naming schema. */ 1146 switch_info->representor = !num_vf_set; 1147 break; 1148 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1149 /* Fallthrough */ 1150 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1151 /* New representors naming schema. */ 1152 switch_info->representor = 1; 1153 break; 1154 } 1155 } 1156 1157 /** 1158 * Process switch information from Netlink message. 1159 * 1160 * @param nh 1161 * Pointer to Netlink message header. 1162 * @param arg 1163 * Opaque data pointer for this callback. 1164 * 1165 * @return 1166 * 0 on success, a negative errno value otherwise and rte_errno is set. 1167 */ 1168 static int 1169 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) 1170 { 1171 struct mlx5_switch_info info = { 1172 .master = 0, 1173 .representor = 0, 1174 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1175 .port_name = 0, 1176 .switch_id = 0, 1177 }; 1178 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 1179 bool switch_id_set = false; 1180 bool num_vf_set = false; 1181 1182 if (nh->nlmsg_type != RTM_NEWLINK) 1183 goto error; 1184 while (off < nh->nlmsg_len) { 1185 struct rtattr *ra = (void *)((uintptr_t)nh + off); 1186 void *payload = RTA_DATA(ra); 1187 unsigned int i; 1188 1189 if (ra->rta_len > nh->nlmsg_len - off) 1190 goto error; 1191 switch (ra->rta_type) { 1192 case IFLA_NUM_VF: 1193 num_vf_set = true; 1194 break; 1195 case IFLA_PHYS_PORT_NAME: 1196 mlx5_translate_port_name((char *)payload, &info); 1197 break; 1198 case IFLA_PHYS_SWITCH_ID: 1199 info.switch_id = 0; 1200 for (i = 0; i < RTA_PAYLOAD(ra); ++i) { 1201 info.switch_id <<= 8; 1202 info.switch_id |= ((uint8_t *)payload)[i]; 1203 } 1204 switch_id_set = true; 1205 break; 1206 } 1207 off += RTA_ALIGN(ra->rta_len); 1208 } 1209 if (switch_id_set) { 1210 /* We have some E-Switch configuration. */ 1211 mlx5_nl_check_switch_info(num_vf_set, &info); 1212 } 1213 MLX5_ASSERT(!(info.master && info.representor)); 1214 memcpy(arg, &info, sizeof(info)); 1215 return 0; 1216 error: 1217 rte_errno = EINVAL; 1218 return -rte_errno; 1219 } 1220 1221 /** 1222 * Get switch information associated with network interface. 1223 * 1224 * @param nl 1225 * Netlink socket of the ROUTE kind (NETLINK_ROUTE). 1226 * @param ifindex 1227 * Network interface index. 1228 * @param[out] info 1229 * Switch information object, populated in case of success. 1230 * 1231 * @return 1232 * 0 on success, a negative errno value otherwise and rte_errno is set. 1233 */ 1234 int 1235 mlx5_nl_switch_info(int nl, unsigned int ifindex, 1236 struct mlx5_switch_info *info) 1237 { 1238 struct { 1239 struct nlmsghdr nh; 1240 struct ifinfomsg info; 1241 struct rtattr rta; 1242 uint32_t extmask; 1243 } req = { 1244 .nh = { 1245 .nlmsg_len = NLMSG_LENGTH 1246 (sizeof(req.info) + 1247 RTA_LENGTH(sizeof(uint32_t))), 1248 .nlmsg_type = RTM_GETLINK, 1249 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1250 }, 1251 .info = { 1252 .ifi_family = AF_UNSPEC, 1253 .ifi_index = ifindex, 1254 }, 1255 .rta = { 1256 .rta_type = IFLA_EXT_MASK, 1257 .rta_len = RTA_LENGTH(sizeof(int32_t)), 1258 }, 1259 .extmask = RTE_LE32(1), 1260 }; 1261 uint32_t sn = MLX5_NL_SN_GENERATE; 1262 int ret; 1263 1264 ret = mlx5_nl_send(nl, &req.nh, sn); 1265 if (ret >= 0) 1266 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); 1267 if (info->master && info->representor) { 1268 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1269 " and as representor", ifindex); 1270 rte_errno = ENODEV; 1271 ret = -rte_errno; 1272 } 1273 return ret; 1274 } 1275 1276 /* 1277 * Delete VLAN network device by ifindex. 1278 * 1279 * @param[in] tcf 1280 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1281 * @param[in] ifindex 1282 * Interface index of network device to delete. 1283 */ 1284 void 1285 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, 1286 uint32_t ifindex) 1287 { 1288 uint32_t sn = MLX5_NL_SN_GENERATE; 1289 int ret; 1290 struct { 1291 struct nlmsghdr nh; 1292 struct ifinfomsg info; 1293 } req = { 1294 .nh = { 1295 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 1296 .nlmsg_type = RTM_DELLINK, 1297 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1298 }, 1299 .info = { 1300 .ifi_family = AF_UNSPEC, 1301 .ifi_index = ifindex, 1302 }, 1303 }; 1304 1305 if (ifindex) { 1306 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); 1307 if (ret >= 0) 1308 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1309 if (ret < 0) 1310 DRV_LOG(WARNING, "netlink: error deleting VLAN WA" 1311 " ifindex %u, %d", ifindex, ret); 1312 } 1313 } 1314 1315 /* Set of subroutines to build Netlink message. */ 1316 static struct nlattr * 1317 nl_msg_tail(struct nlmsghdr *nlh) 1318 { 1319 return (struct nlattr *) 1320 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); 1321 } 1322 1323 static void 1324 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) 1325 { 1326 struct nlattr *nla = nl_msg_tail(nlh); 1327 1328 nla->nla_type = type; 1329 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen; 1330 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len); 1331 1332 if (alen) 1333 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); 1334 } 1335 1336 static struct nlattr * 1337 nl_attr_nest_start(struct nlmsghdr *nlh, int type) 1338 { 1339 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); 1340 1341 nl_attr_put(nlh, type, NULL, 0); 1342 return nest; 1343 } 1344 1345 static void 1346 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) 1347 { 1348 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; 1349 } 1350 1351 /* 1352 * Create network VLAN device with specified VLAN tag. 1353 * 1354 * @param[in] tcf 1355 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1356 * @param[in] ifindex 1357 * Base network interface index. 1358 * @param[in] tag 1359 * VLAN tag for VLAN network device to create. 1360 */ 1361 uint32_t 1362 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, 1363 uint32_t ifindex, uint16_t tag) 1364 { 1365 struct nlmsghdr *nlh; 1366 struct ifinfomsg *ifm; 1367 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; 1368 1369 __rte_cache_aligned 1370 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1371 NLMSG_ALIGN(sizeof(struct ifinfomsg)) + 1372 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + 1373 NLMSG_ALIGN(sizeof(uint32_t)) + 1374 NLMSG_ALIGN(sizeof(name)) + 1375 NLMSG_ALIGN(sizeof("vlan")) + 1376 NLMSG_ALIGN(sizeof(uint32_t)) + 1377 NLMSG_ALIGN(sizeof(uint16_t)) + 16]; 1378 struct nlattr *na_info; 1379 struct nlattr *na_vlan; 1380 uint32_t sn = MLX5_NL_SN_GENERATE; 1381 int ret; 1382 1383 memset(buf, 0, sizeof(buf)); 1384 nlh = (struct nlmsghdr *)buf; 1385 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1386 nlh->nlmsg_type = RTM_NEWLINK; 1387 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 1388 NLM_F_EXCL | NLM_F_ACK; 1389 ifm = (struct ifinfomsg *)nl_msg_tail(nlh); 1390 nlh->nlmsg_len += sizeof(struct ifinfomsg); 1391 ifm->ifi_family = AF_UNSPEC; 1392 ifm->ifi_type = 0; 1393 ifm->ifi_index = 0; 1394 ifm->ifi_flags = IFF_UP; 1395 ifm->ifi_change = 0xffffffff; 1396 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); 1397 ret = snprintf(name, sizeof(name), "%s.%u.%u", 1398 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); 1399 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); 1400 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); 1401 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); 1402 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); 1403 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); 1404 nl_attr_nest_end(nlh, na_vlan); 1405 nl_attr_nest_end(nlh, na_info); 1406 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len); 1407 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); 1408 if (ret >= 0) 1409 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1410 if (ret < 0) { 1411 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, 1412 ret); 1413 } 1414 /* Try to get ifindex of created or pre-existing device. */ 1415 ret = if_nametoindex(name); 1416 if (!ret) { 1417 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, 1418 errno); 1419 return 0; 1420 } 1421 return ret; 1422 } 1423 1424 /** 1425 * Parse Netlink message to retrieve the general family ID. 1426 * 1427 * @param nh 1428 * Pointer to Netlink Message Header. 1429 * @param arg 1430 * PMD data register with this callback. 1431 * 1432 * @return 1433 * 0 on success, a negative errno value otherwise and rte_errno is set. 1434 */ 1435 static int 1436 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg) 1437 { 1438 1439 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1440 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1441 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1442 1443 for (; nla->nla_len && nla < tail; 1444 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) { 1445 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) { 1446 *(uint16_t *)arg = *(uint16_t *)(nla + 1); 1447 return 0; 1448 } 1449 } 1450 return -EINVAL; 1451 } 1452 1453 #define MLX5_NL_MAX_ATTR_SIZE 100 1454 /** 1455 * Get generic netlink family ID. 1456 * 1457 * @param[in] nlsk_fd 1458 * Netlink socket file descriptor. 1459 * @param[in] name 1460 * The family name. 1461 * 1462 * @return 1463 * ID >= 0 on success and @p enable is updated, a negative errno value 1464 * otherwise and rte_errno is set. 1465 */ 1466 static int 1467 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name) 1468 { 1469 struct nlmsghdr *nlh; 1470 struct genlmsghdr *genl; 1471 uint32_t sn = MLX5_NL_SN_GENERATE; 1472 int name_size = strlen(name) + 1; 1473 int ret; 1474 uint16_t id = -1; 1475 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1476 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1477 NLMSG_ALIGN(sizeof(struct nlattr)) + 1478 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)]; 1479 1480 memset(buf, 0, sizeof(buf)); 1481 nlh = (struct nlmsghdr *)buf; 1482 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1483 nlh->nlmsg_type = GENL_ID_CTRL; 1484 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1485 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1486 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1487 genl->cmd = CTRL_CMD_GETFAMILY; 1488 genl->version = 1; 1489 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size); 1490 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1491 if (ret >= 0) 1492 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id); 1493 if (ret < 0) { 1494 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name, 1495 ret); 1496 return ret; 1497 } 1498 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id); 1499 return (int)id; 1500 } 1501 1502 /** 1503 * Get Devlink family ID. 1504 * 1505 * @param[in] nlsk_fd 1506 * Netlink socket file descriptor. 1507 * 1508 * @return 1509 * ID >= 0 on success and @p enable is updated, a negative errno value 1510 * otherwise and rte_errno is set. 1511 */ 1512 1513 int 1514 mlx5_nl_devlink_family_id_get(int nlsk_fd) 1515 { 1516 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME); 1517 } 1518 1519 /** 1520 * Parse Netlink message to retrieve the ROCE enable status. 1521 * 1522 * @param nh 1523 * Pointer to Netlink Message Header. 1524 * @param arg 1525 * PMD data register with this callback. 1526 * 1527 * @return 1528 * 0 on success, a negative errno value otherwise and rte_errno is set. 1529 */ 1530 static int 1531 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) 1532 { 1533 1534 int ret = -EINVAL; 1535 int *enable = arg; 1536 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1537 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1538 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1539 1540 while (nla->nla_len && nla < tail) { 1541 switch (nla->nla_type) { 1542 /* Expected nested attributes case. */ 1543 case DEVLINK_ATTR_PARAM: 1544 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1545 case DEVLINK_ATTR_PARAM_VALUE: 1546 ret = 0; 1547 nla += 1; 1548 break; 1549 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1550 *enable = 1; 1551 return 0; 1552 default: 1553 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1554 } 1555 } 1556 *enable = 0; 1557 return ret; 1558 } 1559 1560 /** 1561 * Get ROCE enable status through Netlink. 1562 * 1563 * @param[in] nlsk_fd 1564 * Netlink socket file descriptor. 1565 * @param[in] family_id 1566 * the Devlink family ID. 1567 * @param pci_addr 1568 * The device PCI address. 1569 * @param[out] enable 1570 * Where to store the enable status. 1571 * 1572 * @return 1573 * 0 on success and @p enable is updated, a negative errno value otherwise 1574 * and rte_errno is set. 1575 */ 1576 int 1577 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, 1578 int *enable) 1579 { 1580 struct nlmsghdr *nlh; 1581 struct genlmsghdr *genl; 1582 uint32_t sn = MLX5_NL_SN_GENERATE; 1583 int ret; 1584 int cur_en = 0; 1585 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1586 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1587 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 1588 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 1589 1590 memset(buf, 0, sizeof(buf)); 1591 nlh = (struct nlmsghdr *)buf; 1592 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1593 nlh->nlmsg_type = family_id; 1594 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1595 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1596 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1597 genl->cmd = DEVLINK_CMD_PARAM_GET; 1598 genl->version = DEVLINK_GENL_VERSION; 1599 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1600 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1601 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1602 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1603 if (ret >= 0) 1604 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); 1605 if (ret < 0) { 1606 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", 1607 pci_addr, ret); 1608 return ret; 1609 } 1610 *enable = cur_en; 1611 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", 1612 cur_en ? "en" : "dis", pci_addr); 1613 return ret; 1614 } 1615 1616 /** 1617 * Reload mlx5 device kernel driver through Netlink. 1618 * 1619 * @param[in] nlsk_fd 1620 * Netlink socket file descriptor. 1621 * @param[in] family_id 1622 * the Devlink family ID. 1623 * @param pci_addr 1624 * The device PCI address. 1625 * @param[out] enable 1626 * The enable status to set. 1627 * 1628 * @return 1629 * 0 on success, a negative errno value otherwise and rte_errno is set. 1630 */ 1631 int 1632 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) 1633 { 1634 struct nlmsghdr *nlh; 1635 struct genlmsghdr *genl; 1636 uint32_t sn = MLX5_NL_SN_GENERATE; 1637 int ret; 1638 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1639 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1640 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 + 1641 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2]; 1642 1643 memset(buf, 0, sizeof(buf)); 1644 nlh = (struct nlmsghdr *)buf; 1645 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1646 nlh->nlmsg_type = family_id; 1647 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1648 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1649 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1650 genl->cmd = DEVLINK_CMD_RELOAD; 1651 genl->version = DEVLINK_GENL_VERSION; 1652 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1653 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1654 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1655 if (ret >= 0) 1656 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1657 if (ret < 0) { 1658 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", 1659 pci_addr, ret); 1660 return ret; 1661 } 1662 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", 1663 pci_addr); 1664 return 0; 1665 } 1666 1667 /** 1668 * Set ROCE enable status through Netlink. 1669 * 1670 * @param[in] nlsk_fd 1671 * Netlink socket file descriptor. 1672 * @param[in] family_id 1673 * the Devlink family ID. 1674 * @param pci_addr 1675 * The device PCI address. 1676 * @param[out] enable 1677 * The enable status to set. 1678 * 1679 * @return 1680 * 0 on success, a negative errno value otherwise and rte_errno is set. 1681 */ 1682 int 1683 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, 1684 int enable) 1685 { 1686 struct nlmsghdr *nlh; 1687 struct genlmsghdr *genl; 1688 uint32_t sn = MLX5_NL_SN_GENERATE; 1689 int ret; 1690 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1691 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1692 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 + 1693 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; 1694 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; 1695 uint8_t ptype = NLA_FLAG; 1696 ; 1697 1698 memset(buf, 0, sizeof(buf)); 1699 nlh = (struct nlmsghdr *)buf; 1700 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1701 nlh->nlmsg_type = family_id; 1702 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1703 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1704 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1705 genl->cmd = DEVLINK_CMD_PARAM_SET; 1706 genl->version = DEVLINK_GENL_VERSION; 1707 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1708 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1709 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1710 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); 1711 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); 1712 if (enable) 1713 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0); 1714 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1715 if (ret >= 0) 1716 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1717 if (ret < 0) { 1718 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" 1719 " %d.", enable ? "en" : "dis", pci_addr, ret); 1720 return ret; 1721 } 1722 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", 1723 pci_addr, enable ? "en" : "dis"); 1724 /* Now, need to reload the driver. */ 1725 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); 1726 } 1727