1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 6WIND S.A. 3 * Copyright 2018 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <linux/if_link.h> 8 #include <linux/rtnetlink.h> 9 #include <linux/genetlink.h> 10 #include <net/if.h> 11 #include <rdma/rdma_netlink.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <stdlib.h> 15 #include <stdalign.h> 16 #include <string.h> 17 #include <sys/socket.h> 18 #include <unistd.h> 19 20 #include <rte_errno.h> 21 #include <rte_atomic.h> 22 23 #include "mlx5_nl.h" 24 #include "mlx5_common_utils.h" 25 #ifdef HAVE_DEVLINK 26 #include <linux/devlink.h> 27 #endif 28 29 30 /* Size of the buffer to receive kernel messages */ 31 #define MLX5_NL_BUF_SIZE (32 * 1024) 32 /* Send buffer size for the Netlink socket */ 33 #define MLX5_SEND_BUF_SIZE 32768 34 /* Receive buffer size for the Netlink socket */ 35 #define MLX5_RECV_BUF_SIZE 32768 36 37 /** Parameters of VLAN devices created by driver. */ 38 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" 39 /* 40 * Define NDA_RTA as defined in iproute2 sources. 41 * 42 * see in iproute2 sources file include/libnetlink.h 43 */ 44 #ifndef MLX5_NDA_RTA 45 #define MLX5_NDA_RTA(r) \ 46 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 47 #endif 48 /* 49 * Define NLMSG_TAIL as defined in iproute2 sources. 50 * 51 * see in iproute2 sources file include/libnetlink.h 52 */ 53 #ifndef NLMSG_TAIL 54 #define NLMSG_TAIL(nmsg) \ 55 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) 56 #endif 57 /* 58 * The following definitions are normally found in rdma/rdma_netlink.h, 59 * however they are so recent that most systems do not expose them yet. 60 */ 61 #ifndef HAVE_RDMA_NL_NLDEV 62 #define RDMA_NL_NLDEV 5 63 #endif 64 #ifndef HAVE_RDMA_NLDEV_CMD_GET 65 #define RDMA_NLDEV_CMD_GET 1 66 #endif 67 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET 68 #define RDMA_NLDEV_CMD_PORT_GET 5 69 #endif 70 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX 71 #define RDMA_NLDEV_ATTR_DEV_INDEX 1 72 #endif 73 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME 74 #define RDMA_NLDEV_ATTR_DEV_NAME 2 75 #endif 76 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX 77 #define RDMA_NLDEV_ATTR_PORT_INDEX 3 78 #endif 79 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX 80 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50 81 #endif 82 83 /* These are normally found in linux/if_link.h. */ 84 #ifndef HAVE_IFLA_NUM_VF 85 #define IFLA_NUM_VF 21 86 #endif 87 #ifndef HAVE_IFLA_EXT_MASK 88 #define IFLA_EXT_MASK 29 89 #endif 90 #ifndef HAVE_IFLA_PHYS_SWITCH_ID 91 #define IFLA_PHYS_SWITCH_ID 36 92 #endif 93 #ifndef HAVE_IFLA_PHYS_PORT_NAME 94 #define IFLA_PHYS_PORT_NAME 38 95 #endif 96 97 /* 98 * Some Devlink defines may be missed in old kernel versions, 99 * adjust used defines. 100 */ 101 #ifndef DEVLINK_GENL_NAME 102 #define DEVLINK_GENL_NAME "devlink" 103 #endif 104 #ifndef DEVLINK_GENL_VERSION 105 #define DEVLINK_GENL_VERSION 1 106 #endif 107 #ifndef DEVLINK_ATTR_BUS_NAME 108 #define DEVLINK_ATTR_BUS_NAME 1 109 #endif 110 #ifndef DEVLINK_ATTR_DEV_NAME 111 #define DEVLINK_ATTR_DEV_NAME 2 112 #endif 113 #ifndef DEVLINK_ATTR_PARAM 114 #define DEVLINK_ATTR_PARAM 80 115 #endif 116 #ifndef DEVLINK_ATTR_PARAM_NAME 117 #define DEVLINK_ATTR_PARAM_NAME 81 118 #endif 119 #ifndef DEVLINK_ATTR_PARAM_TYPE 120 #define DEVLINK_ATTR_PARAM_TYPE 83 121 #endif 122 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST 123 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84 124 #endif 125 #ifndef DEVLINK_ATTR_PARAM_VALUE 126 #define DEVLINK_ATTR_PARAM_VALUE 85 127 #endif 128 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA 129 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86 130 #endif 131 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE 132 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87 133 #endif 134 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT 135 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1 136 #endif 137 #ifndef DEVLINK_CMD_RELOAD 138 #define DEVLINK_CMD_RELOAD 37 139 #endif 140 #ifndef DEVLINK_CMD_PARAM_GET 141 #define DEVLINK_CMD_PARAM_GET 38 142 #endif 143 #ifndef DEVLINK_CMD_PARAM_SET 144 #define DEVLINK_CMD_PARAM_SET 39 145 #endif 146 #ifndef NLA_FLAG 147 #define NLA_FLAG 6 148 #endif 149 150 /* Add/remove MAC address through Netlink */ 151 struct mlx5_nl_mac_addr { 152 struct rte_ether_addr (*mac)[]; 153 /**< MAC address handled by the device. */ 154 int mac_n; /**< Number of addresses in the array. */ 155 }; 156 157 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0) 158 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) 159 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) 160 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) 161 162 /** Data structure used by mlx5_nl_cmdget_cb(). */ 163 struct mlx5_nl_ifindex_data { 164 const char *name; /**< IB device name (in). */ 165 uint32_t flags; /**< found attribute flags (out). */ 166 uint32_t ibindex; /**< IB device index (out). */ 167 uint32_t ifindex; /**< Network interface index (out). */ 168 uint32_t portnum; /**< IB device max port number (out). */ 169 }; 170 171 rte_atomic32_t atomic_sn = RTE_ATOMIC32_INIT(0); 172 173 /* Generate Netlink sequence number. */ 174 #define MLX5_NL_SN_GENERATE ((uint32_t)rte_atomic32_add_return(&atomic_sn, 1)) 175 176 /** 177 * Opens a Netlink socket. 178 * 179 * @param protocol 180 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). 181 * 182 * @return 183 * A file descriptor on success, a negative errno value otherwise and 184 * rte_errno is set. 185 */ 186 int 187 mlx5_nl_init(int protocol) 188 { 189 int fd; 190 int sndbuf_size = MLX5_SEND_BUF_SIZE; 191 int rcvbuf_size = MLX5_RECV_BUF_SIZE; 192 struct sockaddr_nl local = { 193 .nl_family = AF_NETLINK, 194 }; 195 int ret; 196 197 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); 198 if (fd == -1) { 199 rte_errno = errno; 200 return -rte_errno; 201 } 202 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); 203 if (ret == -1) { 204 rte_errno = errno; 205 goto error; 206 } 207 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); 208 if (ret == -1) { 209 rte_errno = errno; 210 goto error; 211 } 212 ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); 213 if (ret == -1) { 214 rte_errno = errno; 215 goto error; 216 } 217 return fd; 218 error: 219 close(fd); 220 return -rte_errno; 221 } 222 223 /** 224 * Send a request message to the kernel on the Netlink socket. 225 * 226 * @param[in] nlsk_fd 227 * Netlink socket file descriptor. 228 * @param[in] nh 229 * The Netlink message send to the kernel. 230 * @param[in] ssn 231 * Sequence number. 232 * @param[in] req 233 * Pointer to the request structure. 234 * @param[in] len 235 * Length of the request in bytes. 236 * 237 * @return 238 * The number of sent bytes on success, a negative errno value otherwise and 239 * rte_errno is set. 240 */ 241 static int 242 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, 243 int len) 244 { 245 struct sockaddr_nl sa = { 246 .nl_family = AF_NETLINK, 247 }; 248 struct iovec iov[2] = { 249 { .iov_base = nh, .iov_len = sizeof(*nh), }, 250 { .iov_base = req, .iov_len = len, }, 251 }; 252 struct msghdr msg = { 253 .msg_name = &sa, 254 .msg_namelen = sizeof(sa), 255 .msg_iov = iov, 256 .msg_iovlen = 2, 257 }; 258 int send_bytes; 259 260 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 261 nh->nlmsg_seq = sn; 262 send_bytes = sendmsg(nlsk_fd, &msg, 0); 263 if (send_bytes < 0) { 264 rte_errno = errno; 265 return -rte_errno; 266 } 267 return send_bytes; 268 } 269 270 /** 271 * Send a message to the kernel on the Netlink socket. 272 * 273 * @param[in] nlsk_fd 274 * The Netlink socket file descriptor used for communication. 275 * @param[in] nh 276 * The Netlink message send to the kernel. 277 * @param[in] sn 278 * Sequence number. 279 * 280 * @return 281 * The number of sent bytes on success, a negative errno value otherwise and 282 * rte_errno is set. 283 */ 284 static int 285 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) 286 { 287 struct sockaddr_nl sa = { 288 .nl_family = AF_NETLINK, 289 }; 290 struct iovec iov = { 291 .iov_base = nh, 292 .iov_len = nh->nlmsg_len, 293 }; 294 struct msghdr msg = { 295 .msg_name = &sa, 296 .msg_namelen = sizeof(sa), 297 .msg_iov = &iov, 298 .msg_iovlen = 1, 299 }; 300 int send_bytes; 301 302 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 303 nh->nlmsg_seq = sn; 304 send_bytes = sendmsg(nlsk_fd, &msg, 0); 305 if (send_bytes < 0) { 306 rte_errno = errno; 307 return -rte_errno; 308 } 309 return send_bytes; 310 } 311 312 /** 313 * Receive a message from the kernel on the Netlink socket, following 314 * mlx5_nl_send(). 315 * 316 * @param[in] nlsk_fd 317 * The Netlink socket file descriptor used for communication. 318 * @param[in] sn 319 * Sequence number. 320 * @param[in] cb 321 * The callback function to call for each Netlink message received. 322 * @param[in, out] arg 323 * Custom arguments for the callback. 324 * 325 * @return 326 * 0 on success, a negative errno value otherwise and rte_errno is set. 327 */ 328 static int 329 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), 330 void *arg) 331 { 332 struct sockaddr_nl sa; 333 void *buf = malloc(MLX5_RECV_BUF_SIZE); 334 struct iovec iov = { 335 .iov_base = buf, 336 .iov_len = MLX5_RECV_BUF_SIZE, 337 }; 338 struct msghdr msg = { 339 .msg_name = &sa, 340 .msg_namelen = sizeof(sa), 341 .msg_iov = &iov, 342 /* One message at a time */ 343 .msg_iovlen = 1, 344 }; 345 int multipart = 0; 346 int ret = 0; 347 348 if (!buf) { 349 rte_errno = ENOMEM; 350 return -rte_errno; 351 } 352 do { 353 struct nlmsghdr *nh; 354 int recv_bytes = 0; 355 356 do { 357 recv_bytes = recvmsg(nlsk_fd, &msg, 0); 358 if (recv_bytes == -1) { 359 rte_errno = errno; 360 ret = -rte_errno; 361 goto exit; 362 } 363 nh = (struct nlmsghdr *)buf; 364 } while (nh->nlmsg_seq != sn); 365 for (; 366 NLMSG_OK(nh, (unsigned int)recv_bytes); 367 nh = NLMSG_NEXT(nh, recv_bytes)) { 368 if (nh->nlmsg_type == NLMSG_ERROR) { 369 struct nlmsgerr *err_data = NLMSG_DATA(nh); 370 371 if (err_data->error < 0) { 372 rte_errno = -err_data->error; 373 ret = -rte_errno; 374 goto exit; 375 } 376 /* Ack message. */ 377 ret = 0; 378 goto exit; 379 } 380 /* Multi-part msgs and their trailing DONE message. */ 381 if (nh->nlmsg_flags & NLM_F_MULTI) { 382 if (nh->nlmsg_type == NLMSG_DONE) { 383 ret = 0; 384 goto exit; 385 } 386 multipart = 1; 387 } 388 if (cb) { 389 ret = cb(nh, arg); 390 if (ret < 0) 391 goto exit; 392 } 393 } 394 } while (multipart); 395 exit: 396 free(buf); 397 return ret; 398 } 399 400 /** 401 * Parse Netlink message to retrieve the bridge MAC address. 402 * 403 * @param nh 404 * Pointer to Netlink Message Header. 405 * @param arg 406 * PMD data register with this callback. 407 * 408 * @return 409 * 0 on success, a negative errno value otherwise and rte_errno is set. 410 */ 411 static int 412 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) 413 { 414 struct mlx5_nl_mac_addr *data = arg; 415 struct ndmsg *r = NLMSG_DATA(nh); 416 struct rtattr *attribute; 417 int len; 418 419 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); 420 for (attribute = MLX5_NDA_RTA(r); 421 RTA_OK(attribute, len); 422 attribute = RTA_NEXT(attribute, len)) { 423 if (attribute->rta_type == NDA_LLADDR) { 424 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { 425 DRV_LOG(WARNING, 426 "not enough room to finalize the" 427 " request"); 428 rte_errno = ENOMEM; 429 return -rte_errno; 430 } 431 #ifdef RTE_LIBRTE_MLX5_DEBUG 432 char m[RTE_ETHER_ADDR_FMT_SIZE]; 433 434 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, 435 RTA_DATA(attribute)); 436 DRV_LOG(DEBUG, "bridge MAC address %s", m); 437 #endif 438 memcpy(&(*data->mac)[data->mac_n++], 439 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); 440 } 441 } 442 return 0; 443 } 444 445 /** 446 * Get bridge MAC addresses. 447 * 448 * @param[in] nlsk_fd 449 * Netlink socket file descriptor. 450 * @param[in] iface_idx 451 * Net device interface index. 452 * @param mac[out] 453 * Pointer to the array table of MAC addresses to fill. 454 * Its size should be of MLX5_MAX_MAC_ADDRESSES. 455 * @param mac_n[out] 456 * Number of entries filled in MAC array. 457 * 458 * @return 459 * 0 on success, a negative errno value otherwise and rte_errno is set. 460 */ 461 static int 462 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, 463 struct rte_ether_addr (*mac)[], int *mac_n) 464 { 465 struct { 466 struct nlmsghdr hdr; 467 struct ifinfomsg ifm; 468 } req = { 469 .hdr = { 470 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 471 .nlmsg_type = RTM_GETNEIGH, 472 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 473 }, 474 .ifm = { 475 .ifi_family = PF_BRIDGE, 476 .ifi_index = iface_idx, 477 }, 478 }; 479 struct mlx5_nl_mac_addr data = { 480 .mac = mac, 481 .mac_n = 0, 482 }; 483 uint32_t sn = MLX5_NL_SN_GENERATE; 484 int ret; 485 486 if (nlsk_fd == -1) 487 return 0; 488 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, 489 sizeof(struct ifinfomsg)); 490 if (ret < 0) 491 goto error; 492 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); 493 if (ret < 0) 494 goto error; 495 *mac_n = data.mac_n; 496 return 0; 497 error: 498 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", 499 iface_idx, strerror(rte_errno)); 500 return -rte_errno; 501 } 502 503 /** 504 * Modify the MAC address neighbour table with Netlink. 505 * 506 * @param[in] nlsk_fd 507 * Netlink socket file descriptor. 508 * @param[in] iface_idx 509 * Net device interface index. 510 * @param mac 511 * MAC address to consider. 512 * @param add 513 * 1 to add the MAC address, 0 to remove the MAC address. 514 * 515 * @return 516 * 0 on success, a negative errno value otherwise and rte_errno is set. 517 */ 518 static int 519 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 520 struct rte_ether_addr *mac, int add) 521 { 522 struct { 523 struct nlmsghdr hdr; 524 struct ndmsg ndm; 525 struct rtattr rta; 526 uint8_t buffer[RTE_ETHER_ADDR_LEN]; 527 } req = { 528 .hdr = { 529 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), 530 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 531 NLM_F_EXCL | NLM_F_ACK, 532 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, 533 }, 534 .ndm = { 535 .ndm_family = PF_BRIDGE, 536 .ndm_state = NUD_NOARP | NUD_PERMANENT, 537 .ndm_ifindex = iface_idx, 538 .ndm_flags = NTF_SELF, 539 }, 540 .rta = { 541 .rta_type = NDA_LLADDR, 542 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), 543 }, 544 }; 545 uint32_t sn = MLX5_NL_SN_GENERATE; 546 int ret; 547 548 if (nlsk_fd == -1) 549 return 0; 550 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); 551 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 552 RTA_ALIGN(req.rta.rta_len); 553 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 554 if (ret < 0) 555 goto error; 556 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 557 if (ret < 0) 558 goto error; 559 return 0; 560 error: 561 #ifdef RTE_LIBRTE_MLX5_DEBUG 562 { 563 char m[RTE_ETHER_ADDR_FMT_SIZE]; 564 565 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac); 566 DRV_LOG(DEBUG, 567 "Interface %u cannot %s MAC address %s %s", 568 iface_idx, 569 add ? "add" : "remove", m, strerror(rte_errno)); 570 } 571 #endif 572 return -rte_errno; 573 } 574 575 /** 576 * Modify the VF MAC address neighbour table with Netlink. 577 * 578 * @param[in] nlsk_fd 579 * Netlink socket file descriptor. 580 * @param[in] iface_idx 581 * Net device interface index. 582 * @param mac 583 * MAC address to consider. 584 * @param vf_index 585 * VF index. 586 * 587 * @return 588 * 0 on success, a negative errno value otherwise and rte_errno is set. 589 */ 590 int 591 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 592 struct rte_ether_addr *mac, int vf_index) 593 { 594 int ret; 595 struct { 596 struct nlmsghdr hdr; 597 struct ifinfomsg ifm; 598 struct rtattr vf_list_rta; 599 struct rtattr vf_info_rta; 600 struct rtattr vf_mac_rta; 601 struct ifla_vf_mac ivm; 602 } req = { 603 .hdr = { 604 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 605 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 606 .nlmsg_type = RTM_BASE, 607 }, 608 .ifm = { 609 .ifi_index = iface_idx, 610 }, 611 .vf_list_rta = { 612 .rta_type = IFLA_VFINFO_LIST, 613 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 614 }, 615 .vf_info_rta = { 616 .rta_type = IFLA_VF_INFO, 617 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 618 }, 619 .vf_mac_rta = { 620 .rta_type = IFLA_VF_MAC, 621 }, 622 }; 623 struct ifla_vf_mac ivm = { 624 .vf = vf_index, 625 }; 626 uint32_t sn = MLX5_NL_SN_GENERATE; 627 628 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); 629 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); 630 631 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); 632 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 633 RTA_ALIGN(req.vf_list_rta.rta_len) + 634 RTA_ALIGN(req.vf_info_rta.rta_len) + 635 RTA_ALIGN(req.vf_mac_rta.rta_len); 636 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 637 &req.vf_list_rta); 638 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 639 &req.vf_info_rta); 640 641 if (nlsk_fd < 0) 642 return -1; 643 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 644 if (ret < 0) 645 goto error; 646 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 647 if (ret < 0) 648 goto error; 649 return 0; 650 error: 651 DRV_LOG(ERR, 652 "representor %u cannot set VF MAC address " 653 "%02X:%02X:%02X:%02X:%02X:%02X : %s", 654 vf_index, 655 mac->addr_bytes[0], mac->addr_bytes[1], 656 mac->addr_bytes[2], mac->addr_bytes[3], 657 mac->addr_bytes[4], mac->addr_bytes[5], 658 strerror(rte_errno)); 659 return -rte_errno; 660 } 661 662 /** 663 * Add a MAC address. 664 * 665 * @param[in] nlsk_fd 666 * Netlink socket file descriptor. 667 * @param[in] iface_idx 668 * Net device interface index. 669 * @param mac_own 670 * BITFIELD_DECLARE array to store the mac. 671 * @param mac 672 * MAC address to register. 673 * @param index 674 * MAC address index. 675 * 676 * @return 677 * 0 on success, a negative errno value otherwise and rte_errno is set. 678 */ 679 int 680 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, 681 uint64_t *mac_own, struct rte_ether_addr *mac, 682 uint32_t index) 683 { 684 int ret; 685 686 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); 687 if (!ret) { 688 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 689 if (index >= MLX5_MAX_MAC_ADDRESSES) 690 return -EINVAL; 691 692 BITFIELD_SET(mac_own, index); 693 } 694 if (ret == -EEXIST) 695 return 0; 696 return ret; 697 } 698 699 /** 700 * Remove a MAC address. 701 * 702 * @param[in] nlsk_fd 703 * Netlink socket file descriptor. 704 * @param[in] iface_idx 705 * Net device interface index. 706 * @param mac_own 707 * BITFIELD_DECLARE array to store the mac. 708 * @param mac 709 * MAC address to remove. 710 * @param index 711 * MAC address index. 712 * 713 * @return 714 * 0 on success, a negative errno value otherwise and rte_errno is set. 715 */ 716 int 717 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, 718 struct rte_ether_addr *mac, uint32_t index) 719 { 720 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 721 if (index >= MLX5_MAX_MAC_ADDRESSES) 722 return -EINVAL; 723 724 BITFIELD_RESET(mac_own, index); 725 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); 726 } 727 728 /** 729 * Synchronize Netlink bridge table to the internal table. 730 * 731 * @param[in] nlsk_fd 732 * Netlink socket file descriptor. 733 * @param[in] iface_idx 734 * Net device interface index. 735 * @param mac_addrs 736 * Mac addresses array to sync. 737 * @param n 738 * @p mac_addrs array size. 739 */ 740 void 741 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, 742 struct rte_ether_addr *mac_addrs, int n) 743 { 744 struct rte_ether_addr macs[n]; 745 int macs_n = 0; 746 int i; 747 int ret; 748 749 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); 750 if (ret) 751 return; 752 for (i = 0; i != macs_n; ++i) { 753 int j; 754 755 /* Verify the address is not in the array yet. */ 756 for (j = 0; j != n; ++j) 757 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) 758 break; 759 if (j != n) 760 continue; 761 /* Find the first entry available. */ 762 for (j = 0; j != n; ++j) { 763 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 764 mac_addrs[j] = macs[i]; 765 break; 766 } 767 } 768 } 769 } 770 771 /** 772 * Flush all added MAC addresses. 773 * 774 * @param[in] nlsk_fd 775 * Netlink socket file descriptor. 776 * @param[in] iface_idx 777 * Net device interface index. 778 * @param[in] mac_addrs 779 * Mac addresses array to flush. 780 * @param n 781 * @p mac_addrs array size. 782 * @param mac_own 783 * BITFIELD_DECLARE array to store the mac. 784 */ 785 void 786 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, 787 struct rte_ether_addr *mac_addrs, int n, 788 uint64_t *mac_own) 789 { 790 int i; 791 792 if (n <= 0 || n >= MLX5_MAX_MAC_ADDRESSES) 793 return; 794 795 for (i = n - 1; i >= 0; --i) { 796 struct rte_ether_addr *m = &mac_addrs[i]; 797 798 if (BITFIELD_ISSET(mac_own, i)) 799 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, 800 i); 801 } 802 } 803 804 /** 805 * Enable promiscuous / all multicast mode through Netlink. 806 * 807 * @param[in] nlsk_fd 808 * Netlink socket file descriptor. 809 * @param[in] iface_idx 810 * Net device interface index. 811 * @param flags 812 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. 813 * @param enable 814 * Nonzero to enable, disable otherwise. 815 * 816 * @return 817 * 0 on success, a negative errno value otherwise and rte_errno is set. 818 */ 819 static int 820 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, 821 int enable) 822 { 823 struct { 824 struct nlmsghdr hdr; 825 struct ifinfomsg ifi; 826 } req = { 827 .hdr = { 828 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 829 .nlmsg_type = RTM_NEWLINK, 830 .nlmsg_flags = NLM_F_REQUEST, 831 }, 832 .ifi = { 833 .ifi_flags = enable ? flags : 0, 834 .ifi_change = flags, 835 .ifi_index = iface_idx, 836 }, 837 }; 838 uint32_t sn = MLX5_NL_SN_GENERATE; 839 int ret; 840 841 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); 842 if (nlsk_fd < 0) 843 return 0; 844 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 845 if (ret < 0) 846 return ret; 847 return 0; 848 } 849 850 /** 851 * Enable promiscuous mode through Netlink. 852 * 853 * @param[in] nlsk_fd 854 * Netlink socket file descriptor. 855 * @param[in] iface_idx 856 * Net device interface index. 857 * @param enable 858 * Nonzero to enable, disable otherwise. 859 * 860 * @return 861 * 0 on success, a negative errno value otherwise and rte_errno is set. 862 */ 863 int 864 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) 865 { 866 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); 867 868 if (ret) 869 DRV_LOG(DEBUG, 870 "Interface %u cannot %s promisc mode: Netlink error %s", 871 iface_idx, enable ? "enable" : "disable", 872 strerror(rte_errno)); 873 return ret; 874 } 875 876 /** 877 * Enable all multicast mode through Netlink. 878 * 879 * @param[in] nlsk_fd 880 * Netlink socket file descriptor. 881 * @param[in] iface_idx 882 * Net device interface index. 883 * @param enable 884 * Nonzero to enable, disable otherwise. 885 * 886 * @return 887 * 0 on success, a negative errno value otherwise and rte_errno is set. 888 */ 889 int 890 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) 891 { 892 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, 893 enable); 894 895 if (ret) 896 DRV_LOG(DEBUG, 897 "Interface %u cannot %s allmulti : Netlink error %s", 898 iface_idx, enable ? "enable" : "disable", 899 strerror(rte_errno)); 900 return ret; 901 } 902 903 /** 904 * Process network interface information from Netlink message. 905 * 906 * @param nh 907 * Pointer to Netlink message header. 908 * @param arg 909 * Opaque data pointer for this callback. 910 * 911 * @return 912 * 0 on success, a negative errno value otherwise and rte_errno is set. 913 */ 914 static int 915 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) 916 { 917 struct mlx5_nl_ifindex_data *data = arg; 918 struct mlx5_nl_ifindex_data local = { 919 .flags = 0, 920 }; 921 size_t off = NLMSG_HDRLEN; 922 923 if (nh->nlmsg_type != 924 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && 925 nh->nlmsg_type != 926 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) 927 goto error; 928 while (off < nh->nlmsg_len) { 929 struct nlattr *na = (void *)((uintptr_t)nh + off); 930 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); 931 932 if (na->nla_len > nh->nlmsg_len - off) 933 goto error; 934 switch (na->nla_type) { 935 case RDMA_NLDEV_ATTR_DEV_INDEX: 936 local.ibindex = *(uint32_t *)payload; 937 local.flags |= MLX5_NL_CMD_GET_IB_INDEX; 938 break; 939 case RDMA_NLDEV_ATTR_DEV_NAME: 940 if (!strcmp(payload, data->name)) 941 local.flags |= MLX5_NL_CMD_GET_IB_NAME; 942 break; 943 case RDMA_NLDEV_ATTR_NDEV_INDEX: 944 local.ifindex = *(uint32_t *)payload; 945 local.flags |= MLX5_NL_CMD_GET_NET_INDEX; 946 break; 947 case RDMA_NLDEV_ATTR_PORT_INDEX: 948 local.portnum = *(uint32_t *)payload; 949 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; 950 break; 951 default: 952 break; 953 } 954 off += NLA_ALIGN(na->nla_len); 955 } 956 /* 957 * It is possible to have multiple messages for all 958 * Infiniband devices in the system with appropriate name. 959 * So we should gather parameters locally and copy to 960 * query context only in case of coinciding device name. 961 */ 962 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { 963 data->flags = local.flags; 964 data->ibindex = local.ibindex; 965 data->ifindex = local.ifindex; 966 data->portnum = local.portnum; 967 } 968 return 0; 969 error: 970 rte_errno = EINVAL; 971 return -rte_errno; 972 } 973 974 /** 975 * Get index of network interface associated with some IB device. 976 * 977 * This is the only somewhat safe method to avoid resorting to heuristics 978 * when faced with port representors. Unfortunately it requires at least 979 * Linux 4.17. 980 * 981 * @param nl 982 * Netlink socket of the RDMA kind (NETLINK_RDMA). 983 * @param[in] name 984 * IB device name. 985 * @param[in] pindex 986 * IB device port index, starting from 1 987 * @return 988 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno 989 * is set. 990 */ 991 unsigned int 992 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) 993 { 994 struct mlx5_nl_ifindex_data data = { 995 .name = name, 996 .flags = 0, 997 .ibindex = 0, /* Determined during first pass. */ 998 .ifindex = 0, /* Determined during second pass. */ 999 }; 1000 union { 1001 struct nlmsghdr nh; 1002 uint8_t buf[NLMSG_HDRLEN + 1003 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) + 1004 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; 1005 } req = { 1006 .nh = { 1007 .nlmsg_len = NLMSG_LENGTH(0), 1008 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1009 RDMA_NLDEV_CMD_GET), 1010 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1011 }, 1012 }; 1013 struct nlattr *na; 1014 uint32_t sn = MLX5_NL_SN_GENERATE; 1015 int ret; 1016 1017 ret = mlx5_nl_send(nl, &req.nh, sn); 1018 if (ret < 0) 1019 return 0; 1020 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1021 if (ret < 0) 1022 return 0; 1023 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1024 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) 1025 goto error; 1026 data.flags = 0; 1027 sn = MLX5_NL_SN_GENERATE; 1028 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1029 RDMA_NLDEV_CMD_PORT_GET); 1030 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1031 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); 1032 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); 1033 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex); 1034 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; 1035 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1036 &data.ibindex, sizeof(data.ibindex)); 1037 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); 1038 na->nla_len = NLA_HDRLEN + sizeof(pindex); 1039 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; 1040 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1041 &pindex, sizeof(pindex)); 1042 ret = mlx5_nl_send(nl, &req.nh, sn); 1043 if (ret < 0) 1044 return 0; 1045 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1046 if (ret < 0) 1047 return 0; 1048 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1049 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1050 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || 1051 !data.ifindex) 1052 goto error; 1053 return data.ifindex; 1054 error: 1055 rte_errno = ENODEV; 1056 return 0; 1057 } 1058 1059 /** 1060 * Get the number of physical ports of given IB device. 1061 * 1062 * @param nl 1063 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1064 * @param[in] name 1065 * IB device name. 1066 * 1067 * @return 1068 * A valid (nonzero) number of ports on success, 0 otherwise 1069 * and rte_errno is set. 1070 */ 1071 unsigned int 1072 mlx5_nl_portnum(int nl, const char *name) 1073 { 1074 struct mlx5_nl_ifindex_data data = { 1075 .flags = 0, 1076 .name = name, 1077 .ifindex = 0, 1078 .portnum = 0, 1079 }; 1080 struct nlmsghdr req = { 1081 .nlmsg_len = NLMSG_LENGTH(0), 1082 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1083 RDMA_NLDEV_CMD_GET), 1084 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1085 }; 1086 uint32_t sn = MLX5_NL_SN_GENERATE; 1087 int ret; 1088 1089 ret = mlx5_nl_send(nl, &req, sn); 1090 if (ret < 0) 1091 return 0; 1092 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1093 if (ret < 0) 1094 return 0; 1095 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1096 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1097 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { 1098 rte_errno = ENODEV; 1099 return 0; 1100 } 1101 if (!data.portnum) 1102 rte_errno = EINVAL; 1103 return data.portnum; 1104 } 1105 1106 /** 1107 * Analyze gathered port parameters via Netlink to recognize master 1108 * and representor devices for E-Switch configuration. 1109 * 1110 * @param[in] num_vf_set 1111 * flag of presence of number of VFs port attribute. 1112 * @param[inout] switch_info 1113 * Port information, including port name as a number and port name 1114 * type if recognized 1115 * 1116 * @return 1117 * master and representor flags are set in switch_info according to 1118 * recognized parameters (if any). 1119 */ 1120 static void 1121 mlx5_nl_check_switch_info(bool num_vf_set, 1122 struct mlx5_switch_info *switch_info) 1123 { 1124 switch (switch_info->name_type) { 1125 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1126 /* 1127 * Name is not recognized, assume the master, 1128 * check the number of VFs key presence. 1129 */ 1130 switch_info->master = num_vf_set; 1131 break; 1132 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1133 /* 1134 * Name is not set, this assumes the legacy naming 1135 * schema for master, just check if there is a 1136 * number of VFs key. 1137 */ 1138 switch_info->master = num_vf_set; 1139 break; 1140 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1141 /* New uplink naming schema recognized. */ 1142 switch_info->master = 1; 1143 break; 1144 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1145 /* Legacy representors naming schema. */ 1146 switch_info->representor = !num_vf_set; 1147 break; 1148 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1149 /* New representors naming schema. */ 1150 switch_info->representor = 1; 1151 break; 1152 } 1153 } 1154 1155 /** 1156 * Process switch information from Netlink message. 1157 * 1158 * @param nh 1159 * Pointer to Netlink message header. 1160 * @param arg 1161 * Opaque data pointer for this callback. 1162 * 1163 * @return 1164 * 0 on success, a negative errno value otherwise and rte_errno is set. 1165 */ 1166 static int 1167 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) 1168 { 1169 struct mlx5_switch_info info = { 1170 .master = 0, 1171 .representor = 0, 1172 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1173 .port_name = 0, 1174 .switch_id = 0, 1175 }; 1176 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 1177 bool switch_id_set = false; 1178 bool num_vf_set = false; 1179 1180 if (nh->nlmsg_type != RTM_NEWLINK) 1181 goto error; 1182 while (off < nh->nlmsg_len) { 1183 struct rtattr *ra = (void *)((uintptr_t)nh + off); 1184 void *payload = RTA_DATA(ra); 1185 unsigned int i; 1186 1187 if (ra->rta_len > nh->nlmsg_len - off) 1188 goto error; 1189 switch (ra->rta_type) { 1190 case IFLA_NUM_VF: 1191 num_vf_set = true; 1192 break; 1193 case IFLA_PHYS_PORT_NAME: 1194 mlx5_translate_port_name((char *)payload, &info); 1195 break; 1196 case IFLA_PHYS_SWITCH_ID: 1197 info.switch_id = 0; 1198 for (i = 0; i < RTA_PAYLOAD(ra); ++i) { 1199 info.switch_id <<= 8; 1200 info.switch_id |= ((uint8_t *)payload)[i]; 1201 } 1202 switch_id_set = true; 1203 break; 1204 } 1205 off += RTA_ALIGN(ra->rta_len); 1206 } 1207 if (switch_id_set) { 1208 /* We have some E-Switch configuration. */ 1209 mlx5_nl_check_switch_info(num_vf_set, &info); 1210 } 1211 MLX5_ASSERT(!(info.master && info.representor)); 1212 memcpy(arg, &info, sizeof(info)); 1213 return 0; 1214 error: 1215 rte_errno = EINVAL; 1216 return -rte_errno; 1217 } 1218 1219 /** 1220 * Get switch information associated with network interface. 1221 * 1222 * @param nl 1223 * Netlink socket of the ROUTE kind (NETLINK_ROUTE). 1224 * @param ifindex 1225 * Network interface index. 1226 * @param[out] info 1227 * Switch information object, populated in case of success. 1228 * 1229 * @return 1230 * 0 on success, a negative errno value otherwise and rte_errno is set. 1231 */ 1232 int 1233 mlx5_nl_switch_info(int nl, unsigned int ifindex, 1234 struct mlx5_switch_info *info) 1235 { 1236 struct { 1237 struct nlmsghdr nh; 1238 struct ifinfomsg info; 1239 struct rtattr rta; 1240 uint32_t extmask; 1241 } req = { 1242 .nh = { 1243 .nlmsg_len = NLMSG_LENGTH 1244 (sizeof(req.info) + 1245 RTA_LENGTH(sizeof(uint32_t))), 1246 .nlmsg_type = RTM_GETLINK, 1247 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1248 }, 1249 .info = { 1250 .ifi_family = AF_UNSPEC, 1251 .ifi_index = ifindex, 1252 }, 1253 .rta = { 1254 .rta_type = IFLA_EXT_MASK, 1255 .rta_len = RTA_LENGTH(sizeof(int32_t)), 1256 }, 1257 .extmask = RTE_LE32(1), 1258 }; 1259 uint32_t sn = MLX5_NL_SN_GENERATE; 1260 int ret; 1261 1262 ret = mlx5_nl_send(nl, &req.nh, sn); 1263 if (ret >= 0) 1264 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); 1265 if (info->master && info->representor) { 1266 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1267 " and as representor", ifindex); 1268 rte_errno = ENODEV; 1269 ret = -rte_errno; 1270 } 1271 return ret; 1272 } 1273 1274 /* 1275 * Delete VLAN network device by ifindex. 1276 * 1277 * @param[in] tcf 1278 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1279 * @param[in] ifindex 1280 * Interface index of network device to delete. 1281 */ 1282 void 1283 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, 1284 uint32_t ifindex) 1285 { 1286 uint32_t sn = MLX5_NL_SN_GENERATE; 1287 int ret; 1288 struct { 1289 struct nlmsghdr nh; 1290 struct ifinfomsg info; 1291 } req = { 1292 .nh = { 1293 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 1294 .nlmsg_type = RTM_DELLINK, 1295 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1296 }, 1297 .info = { 1298 .ifi_family = AF_UNSPEC, 1299 .ifi_index = ifindex, 1300 }, 1301 }; 1302 1303 if (ifindex) { 1304 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); 1305 if (ret >= 0) 1306 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1307 if (ret < 0) 1308 DRV_LOG(WARNING, "netlink: error deleting VLAN WA" 1309 " ifindex %u, %d", ifindex, ret); 1310 } 1311 } 1312 1313 /* Set of subroutines to build Netlink message. */ 1314 static struct nlattr * 1315 nl_msg_tail(struct nlmsghdr *nlh) 1316 { 1317 return (struct nlattr *) 1318 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); 1319 } 1320 1321 static void 1322 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) 1323 { 1324 struct nlattr *nla = nl_msg_tail(nlh); 1325 1326 nla->nla_type = type; 1327 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen; 1328 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len); 1329 1330 if (alen) 1331 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); 1332 } 1333 1334 static struct nlattr * 1335 nl_attr_nest_start(struct nlmsghdr *nlh, int type) 1336 { 1337 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); 1338 1339 nl_attr_put(nlh, type, NULL, 0); 1340 return nest; 1341 } 1342 1343 static void 1344 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) 1345 { 1346 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; 1347 } 1348 1349 /* 1350 * Create network VLAN device with specified VLAN tag. 1351 * 1352 * @param[in] tcf 1353 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1354 * @param[in] ifindex 1355 * Base network interface index. 1356 * @param[in] tag 1357 * VLAN tag for VLAN network device to create. 1358 */ 1359 uint32_t 1360 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, 1361 uint32_t ifindex, uint16_t tag) 1362 { 1363 struct nlmsghdr *nlh; 1364 struct ifinfomsg *ifm; 1365 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; 1366 1367 __rte_cache_aligned 1368 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1369 NLMSG_ALIGN(sizeof(struct ifinfomsg)) + 1370 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + 1371 NLMSG_ALIGN(sizeof(uint32_t)) + 1372 NLMSG_ALIGN(sizeof(name)) + 1373 NLMSG_ALIGN(sizeof("vlan")) + 1374 NLMSG_ALIGN(sizeof(uint32_t)) + 1375 NLMSG_ALIGN(sizeof(uint16_t)) + 16]; 1376 struct nlattr *na_info; 1377 struct nlattr *na_vlan; 1378 uint32_t sn = MLX5_NL_SN_GENERATE; 1379 int ret; 1380 1381 memset(buf, 0, sizeof(buf)); 1382 nlh = (struct nlmsghdr *)buf; 1383 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1384 nlh->nlmsg_type = RTM_NEWLINK; 1385 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 1386 NLM_F_EXCL | NLM_F_ACK; 1387 ifm = (struct ifinfomsg *)nl_msg_tail(nlh); 1388 nlh->nlmsg_len += sizeof(struct ifinfomsg); 1389 ifm->ifi_family = AF_UNSPEC; 1390 ifm->ifi_type = 0; 1391 ifm->ifi_index = 0; 1392 ifm->ifi_flags = IFF_UP; 1393 ifm->ifi_change = 0xffffffff; 1394 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); 1395 ret = snprintf(name, sizeof(name), "%s.%u.%u", 1396 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); 1397 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); 1398 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); 1399 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); 1400 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); 1401 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); 1402 nl_attr_nest_end(nlh, na_vlan); 1403 nl_attr_nest_end(nlh, na_info); 1404 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len); 1405 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); 1406 if (ret >= 0) 1407 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1408 if (ret < 0) { 1409 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, 1410 ret); 1411 } 1412 /* Try to get ifindex of created or pre-existing device. */ 1413 ret = if_nametoindex(name); 1414 if (!ret) { 1415 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, 1416 errno); 1417 return 0; 1418 } 1419 return ret; 1420 } 1421 1422 /** 1423 * Parse Netlink message to retrieve the general family ID. 1424 * 1425 * @param nh 1426 * Pointer to Netlink Message Header. 1427 * @param arg 1428 * PMD data register with this callback. 1429 * 1430 * @return 1431 * 0 on success, a negative errno value otherwise and rte_errno is set. 1432 */ 1433 static int 1434 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg) 1435 { 1436 1437 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1438 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1439 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1440 1441 for (; nla->nla_len && nla < tail; 1442 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) { 1443 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) { 1444 *(uint16_t *)arg = *(uint16_t *)(nla + 1); 1445 return 0; 1446 } 1447 } 1448 return -EINVAL; 1449 } 1450 1451 #define MLX5_NL_MAX_ATTR_SIZE 100 1452 /** 1453 * Get generic netlink family ID. 1454 * 1455 * @param[in] nlsk_fd 1456 * Netlink socket file descriptor. 1457 * @param[in] name 1458 * The family name. 1459 * 1460 * @return 1461 * ID >= 0 on success and @p enable is updated, a negative errno value 1462 * otherwise and rte_errno is set. 1463 */ 1464 static int 1465 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name) 1466 { 1467 struct nlmsghdr *nlh; 1468 struct genlmsghdr *genl; 1469 uint32_t sn = MLX5_NL_SN_GENERATE; 1470 int name_size = strlen(name) + 1; 1471 int ret; 1472 uint16_t id = -1; 1473 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1474 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1475 NLMSG_ALIGN(sizeof(struct nlattr)) + 1476 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)]; 1477 1478 memset(buf, 0, sizeof(buf)); 1479 nlh = (struct nlmsghdr *)buf; 1480 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1481 nlh->nlmsg_type = GENL_ID_CTRL; 1482 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1483 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1484 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1485 genl->cmd = CTRL_CMD_GETFAMILY; 1486 genl->version = 1; 1487 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size); 1488 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1489 if (ret >= 0) 1490 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id); 1491 if (ret < 0) { 1492 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name, 1493 ret); 1494 return ret; 1495 } 1496 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id); 1497 return (int)id; 1498 } 1499 1500 /** 1501 * Get Devlink family ID. 1502 * 1503 * @param[in] nlsk_fd 1504 * Netlink socket file descriptor. 1505 * 1506 * @return 1507 * ID >= 0 on success and @p enable is updated, a negative errno value 1508 * otherwise and rte_errno is set. 1509 */ 1510 1511 int 1512 mlx5_nl_devlink_family_id_get(int nlsk_fd) 1513 { 1514 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME); 1515 } 1516 1517 /** 1518 * Parse Netlink message to retrieve the ROCE enable status. 1519 * 1520 * @param nh 1521 * Pointer to Netlink Message Header. 1522 * @param arg 1523 * PMD data register with this callback. 1524 * 1525 * @return 1526 * 0 on success, a negative errno value otherwise and rte_errno is set. 1527 */ 1528 static int 1529 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) 1530 { 1531 1532 int ret = -EINVAL; 1533 int *enable = arg; 1534 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1535 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1536 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1537 1538 while (nla->nla_len && nla < tail) { 1539 switch (nla->nla_type) { 1540 /* Expected nested attributes case. */ 1541 case DEVLINK_ATTR_PARAM: 1542 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1543 case DEVLINK_ATTR_PARAM_VALUE: 1544 ret = 0; 1545 nla += 1; 1546 break; 1547 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1548 *enable = 1; 1549 return 0; 1550 default: 1551 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1552 } 1553 } 1554 *enable = 0; 1555 return ret; 1556 } 1557 1558 /** 1559 * Get ROCE enable status through Netlink. 1560 * 1561 * @param[in] nlsk_fd 1562 * Netlink socket file descriptor. 1563 * @param[in] family_id 1564 * the Devlink family ID. 1565 * @param pci_addr 1566 * The device PCI address. 1567 * @param[out] enable 1568 * Where to store the enable status. 1569 * 1570 * @return 1571 * 0 on success and @p enable is updated, a negative errno value otherwise 1572 * and rte_errno is set. 1573 */ 1574 int 1575 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, 1576 int *enable) 1577 { 1578 struct nlmsghdr *nlh; 1579 struct genlmsghdr *genl; 1580 uint32_t sn = MLX5_NL_SN_GENERATE; 1581 int ret; 1582 int cur_en = 0; 1583 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1584 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1585 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 1586 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 1587 1588 memset(buf, 0, sizeof(buf)); 1589 nlh = (struct nlmsghdr *)buf; 1590 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1591 nlh->nlmsg_type = family_id; 1592 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1593 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1594 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1595 genl->cmd = DEVLINK_CMD_PARAM_GET; 1596 genl->version = DEVLINK_GENL_VERSION; 1597 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1598 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1599 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1600 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1601 if (ret >= 0) 1602 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); 1603 if (ret < 0) { 1604 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", 1605 pci_addr, ret); 1606 return ret; 1607 } 1608 *enable = cur_en; 1609 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", 1610 cur_en ? "en" : "dis", pci_addr); 1611 return ret; 1612 } 1613 1614 /** 1615 * Reload mlx5 device kernel driver through Netlink. 1616 * 1617 * @param[in] nlsk_fd 1618 * Netlink socket file descriptor. 1619 * @param[in] family_id 1620 * the Devlink family ID. 1621 * @param pci_addr 1622 * The device PCI address. 1623 * @param[out] enable 1624 * The enable status to set. 1625 * 1626 * @return 1627 * 0 on success, a negative errno value otherwise and rte_errno is set. 1628 */ 1629 int 1630 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) 1631 { 1632 struct nlmsghdr *nlh; 1633 struct genlmsghdr *genl; 1634 uint32_t sn = MLX5_NL_SN_GENERATE; 1635 int ret; 1636 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1637 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1638 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 + 1639 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2]; 1640 1641 memset(buf, 0, sizeof(buf)); 1642 nlh = (struct nlmsghdr *)buf; 1643 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1644 nlh->nlmsg_type = family_id; 1645 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1646 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1647 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1648 genl->cmd = DEVLINK_CMD_RELOAD; 1649 genl->version = DEVLINK_GENL_VERSION; 1650 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1651 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1652 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1653 if (ret >= 0) 1654 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1655 if (ret < 0) { 1656 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", 1657 pci_addr, ret); 1658 return ret; 1659 } 1660 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", 1661 pci_addr); 1662 return 0; 1663 } 1664 1665 /** 1666 * Set ROCE enable status through Netlink. 1667 * 1668 * @param[in] nlsk_fd 1669 * Netlink socket file descriptor. 1670 * @param[in] family_id 1671 * the Devlink family ID. 1672 * @param pci_addr 1673 * The device PCI address. 1674 * @param[out] enable 1675 * The enable status to set. 1676 * 1677 * @return 1678 * 0 on success, a negative errno value otherwise and rte_errno is set. 1679 */ 1680 int 1681 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, 1682 int enable) 1683 { 1684 struct nlmsghdr *nlh; 1685 struct genlmsghdr *genl; 1686 uint32_t sn = MLX5_NL_SN_GENERATE; 1687 int ret; 1688 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1689 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1690 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 + 1691 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; 1692 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; 1693 uint8_t ptype = NLA_FLAG; 1694 ; 1695 1696 memset(buf, 0, sizeof(buf)); 1697 nlh = (struct nlmsghdr *)buf; 1698 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1699 nlh->nlmsg_type = family_id; 1700 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1701 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1702 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1703 genl->cmd = DEVLINK_CMD_PARAM_SET; 1704 genl->version = DEVLINK_GENL_VERSION; 1705 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1706 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1707 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1708 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); 1709 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); 1710 if (enable) 1711 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0); 1712 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1713 if (ret >= 0) 1714 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1715 if (ret < 0) { 1716 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" 1717 " %d.", enable ? "en" : "dis", pci_addr, ret); 1718 return ret; 1719 } 1720 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", 1721 pci_addr, enable ? "en" : "dis"); 1722 /* Now, need to reload the driver. */ 1723 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); 1724 } 1725