1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2018 6WIND S.A. 3 * Copyright 2018 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <linux/if_link.h> 8 #include <linux/rtnetlink.h> 9 #include <linux/genetlink.h> 10 #include <net/if.h> 11 #include <rdma/rdma_netlink.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <stdlib.h> 15 #include <stdalign.h> 16 #include <string.h> 17 #include <sys/socket.h> 18 #include <unistd.h> 19 20 #include <rte_errno.h> 21 #include <rte_atomic.h> 22 23 #include "mlx5_nl.h" 24 #include "mlx5_common_utils.h" 25 #include "mlx5_malloc.h" 26 #ifdef HAVE_DEVLINK 27 #include <linux/devlink.h> 28 #endif 29 30 31 /* Size of the buffer to receive kernel messages */ 32 #define MLX5_NL_BUF_SIZE (32 * 1024) 33 /* Send buffer size for the Netlink socket */ 34 #define MLX5_SEND_BUF_SIZE 32768 35 /* Receive buffer size for the Netlink socket */ 36 #define MLX5_RECV_BUF_SIZE 32768 37 38 /** Parameters of VLAN devices created by driver. */ 39 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" 40 /* 41 * Define NDA_RTA as defined in iproute2 sources. 42 * 43 * see in iproute2 sources file include/libnetlink.h 44 */ 45 #ifndef MLX5_NDA_RTA 46 #define MLX5_NDA_RTA(r) \ 47 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 48 #endif 49 /* 50 * Define NLMSG_TAIL as defined in iproute2 sources. 51 * 52 * see in iproute2 sources file include/libnetlink.h 53 */ 54 #ifndef NLMSG_TAIL 55 #define NLMSG_TAIL(nmsg) \ 56 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) 57 #endif 58 /* 59 * The following definitions are normally found in rdma/rdma_netlink.h, 60 * however they are so recent that most systems do not expose them yet. 61 */ 62 #ifndef HAVE_RDMA_NL_NLDEV 63 #define RDMA_NL_NLDEV 5 64 #endif 65 #ifndef HAVE_RDMA_NLDEV_CMD_GET 66 #define RDMA_NLDEV_CMD_GET 1 67 #endif 68 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET 69 #define RDMA_NLDEV_CMD_PORT_GET 5 70 #endif 71 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX 72 #define RDMA_NLDEV_ATTR_DEV_INDEX 1 73 #endif 74 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME 75 #define RDMA_NLDEV_ATTR_DEV_NAME 2 76 #endif 77 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX 78 #define RDMA_NLDEV_ATTR_PORT_INDEX 3 79 #endif 80 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX 81 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50 82 #endif 83 84 /* These are normally found in linux/if_link.h. */ 85 #ifndef HAVE_IFLA_NUM_VF 86 #define IFLA_NUM_VF 21 87 #endif 88 #ifndef HAVE_IFLA_EXT_MASK 89 #define IFLA_EXT_MASK 29 90 #endif 91 #ifndef HAVE_IFLA_PHYS_SWITCH_ID 92 #define IFLA_PHYS_SWITCH_ID 36 93 #endif 94 #ifndef HAVE_IFLA_PHYS_PORT_NAME 95 #define IFLA_PHYS_PORT_NAME 38 96 #endif 97 98 /* 99 * Some Devlink defines may be missed in old kernel versions, 100 * adjust used defines. 101 */ 102 #ifndef DEVLINK_GENL_NAME 103 #define DEVLINK_GENL_NAME "devlink" 104 #endif 105 #ifndef DEVLINK_GENL_VERSION 106 #define DEVLINK_GENL_VERSION 1 107 #endif 108 #ifndef DEVLINK_ATTR_BUS_NAME 109 #define DEVLINK_ATTR_BUS_NAME 1 110 #endif 111 #ifndef DEVLINK_ATTR_DEV_NAME 112 #define DEVLINK_ATTR_DEV_NAME 2 113 #endif 114 #ifndef DEVLINK_ATTR_PARAM 115 #define DEVLINK_ATTR_PARAM 80 116 #endif 117 #ifndef DEVLINK_ATTR_PARAM_NAME 118 #define DEVLINK_ATTR_PARAM_NAME 81 119 #endif 120 #ifndef DEVLINK_ATTR_PARAM_TYPE 121 #define DEVLINK_ATTR_PARAM_TYPE 83 122 #endif 123 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST 124 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84 125 #endif 126 #ifndef DEVLINK_ATTR_PARAM_VALUE 127 #define DEVLINK_ATTR_PARAM_VALUE 85 128 #endif 129 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA 130 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86 131 #endif 132 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE 133 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87 134 #endif 135 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT 136 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1 137 #endif 138 #ifndef DEVLINK_CMD_RELOAD 139 #define DEVLINK_CMD_RELOAD 37 140 #endif 141 #ifndef DEVLINK_CMD_PARAM_GET 142 #define DEVLINK_CMD_PARAM_GET 38 143 #endif 144 #ifndef DEVLINK_CMD_PARAM_SET 145 #define DEVLINK_CMD_PARAM_SET 39 146 #endif 147 #ifndef NLA_FLAG 148 #define NLA_FLAG 6 149 #endif 150 151 /* Add/remove MAC address through Netlink */ 152 struct mlx5_nl_mac_addr { 153 struct rte_ether_addr (*mac)[]; 154 /**< MAC address handled by the device. */ 155 int mac_n; /**< Number of addresses in the array. */ 156 }; 157 158 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0) 159 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) 160 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) 161 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) 162 163 /** Data structure used by mlx5_nl_cmdget_cb(). */ 164 struct mlx5_nl_ifindex_data { 165 const char *name; /**< IB device name (in). */ 166 uint32_t flags; /**< found attribute flags (out). */ 167 uint32_t ibindex; /**< IB device index (out). */ 168 uint32_t ifindex; /**< Network interface index (out). */ 169 uint32_t portnum; /**< IB device max port number (out). */ 170 }; 171 172 rte_atomic32_t atomic_sn = RTE_ATOMIC32_INIT(0); 173 174 /* Generate Netlink sequence number. */ 175 #define MLX5_NL_SN_GENERATE ((uint32_t)rte_atomic32_add_return(&atomic_sn, 1)) 176 177 /** 178 * Opens a Netlink socket. 179 * 180 * @param protocol 181 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). 182 * 183 * @return 184 * A file descriptor on success, a negative errno value otherwise and 185 * rte_errno is set. 186 */ 187 int 188 mlx5_nl_init(int protocol) 189 { 190 int fd; 191 int sndbuf_size = MLX5_SEND_BUF_SIZE; 192 int rcvbuf_size = MLX5_RECV_BUF_SIZE; 193 struct sockaddr_nl local = { 194 .nl_family = AF_NETLINK, 195 }; 196 int ret; 197 198 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); 199 if (fd == -1) { 200 rte_errno = errno; 201 return -rte_errno; 202 } 203 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); 204 if (ret == -1) { 205 rte_errno = errno; 206 goto error; 207 } 208 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); 209 if (ret == -1) { 210 rte_errno = errno; 211 goto error; 212 } 213 ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); 214 if (ret == -1) { 215 rte_errno = errno; 216 goto error; 217 } 218 return fd; 219 error: 220 close(fd); 221 return -rte_errno; 222 } 223 224 /** 225 * Send a request message to the kernel on the Netlink socket. 226 * 227 * @param[in] nlsk_fd 228 * Netlink socket file descriptor. 229 * @param[in] nh 230 * The Netlink message send to the kernel. 231 * @param[in] ssn 232 * Sequence number. 233 * @param[in] req 234 * Pointer to the request structure. 235 * @param[in] len 236 * Length of the request in bytes. 237 * 238 * @return 239 * The number of sent bytes on success, a negative errno value otherwise and 240 * rte_errno is set. 241 */ 242 static int 243 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, 244 int len) 245 { 246 struct sockaddr_nl sa = { 247 .nl_family = AF_NETLINK, 248 }; 249 struct iovec iov[2] = { 250 { .iov_base = nh, .iov_len = sizeof(*nh), }, 251 { .iov_base = req, .iov_len = len, }, 252 }; 253 struct msghdr msg = { 254 .msg_name = &sa, 255 .msg_namelen = sizeof(sa), 256 .msg_iov = iov, 257 .msg_iovlen = 2, 258 }; 259 int send_bytes; 260 261 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 262 nh->nlmsg_seq = sn; 263 send_bytes = sendmsg(nlsk_fd, &msg, 0); 264 if (send_bytes < 0) { 265 rte_errno = errno; 266 return -rte_errno; 267 } 268 return send_bytes; 269 } 270 271 /** 272 * Send a message to the kernel on the Netlink socket. 273 * 274 * @param[in] nlsk_fd 275 * The Netlink socket file descriptor used for communication. 276 * @param[in] nh 277 * The Netlink message send to the kernel. 278 * @param[in] sn 279 * Sequence number. 280 * 281 * @return 282 * The number of sent bytes on success, a negative errno value otherwise and 283 * rte_errno is set. 284 */ 285 static int 286 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) 287 { 288 struct sockaddr_nl sa = { 289 .nl_family = AF_NETLINK, 290 }; 291 struct iovec iov = { 292 .iov_base = nh, 293 .iov_len = nh->nlmsg_len, 294 }; 295 struct msghdr msg = { 296 .msg_name = &sa, 297 .msg_namelen = sizeof(sa), 298 .msg_iov = &iov, 299 .msg_iovlen = 1, 300 }; 301 int send_bytes; 302 303 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 304 nh->nlmsg_seq = sn; 305 send_bytes = sendmsg(nlsk_fd, &msg, 0); 306 if (send_bytes < 0) { 307 rte_errno = errno; 308 return -rte_errno; 309 } 310 return send_bytes; 311 } 312 313 /** 314 * Receive a message from the kernel on the Netlink socket, following 315 * mlx5_nl_send(). 316 * 317 * @param[in] nlsk_fd 318 * The Netlink socket file descriptor used for communication. 319 * @param[in] sn 320 * Sequence number. 321 * @param[in] cb 322 * The callback function to call for each Netlink message received. 323 * @param[in, out] arg 324 * Custom arguments for the callback. 325 * 326 * @return 327 * 0 on success, a negative errno value otherwise and rte_errno is set. 328 */ 329 static int 330 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), 331 void *arg) 332 { 333 struct sockaddr_nl sa; 334 void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY); 335 struct iovec iov = { 336 .iov_base = buf, 337 .iov_len = MLX5_RECV_BUF_SIZE, 338 }; 339 struct msghdr msg = { 340 .msg_name = &sa, 341 .msg_namelen = sizeof(sa), 342 .msg_iov = &iov, 343 /* One message at a time */ 344 .msg_iovlen = 1, 345 }; 346 int multipart = 0; 347 int ret = 0; 348 349 if (!buf) { 350 rte_errno = ENOMEM; 351 return -rte_errno; 352 } 353 do { 354 struct nlmsghdr *nh; 355 int recv_bytes = 0; 356 357 do { 358 recv_bytes = recvmsg(nlsk_fd, &msg, 0); 359 if (recv_bytes == -1) { 360 rte_errno = errno; 361 ret = -rte_errno; 362 goto exit; 363 } 364 nh = (struct nlmsghdr *)buf; 365 } while (nh->nlmsg_seq != sn); 366 for (; 367 NLMSG_OK(nh, (unsigned int)recv_bytes); 368 nh = NLMSG_NEXT(nh, recv_bytes)) { 369 if (nh->nlmsg_type == NLMSG_ERROR) { 370 struct nlmsgerr *err_data = NLMSG_DATA(nh); 371 372 if (err_data->error < 0) { 373 rte_errno = -err_data->error; 374 ret = -rte_errno; 375 goto exit; 376 } 377 /* Ack message. */ 378 ret = 0; 379 goto exit; 380 } 381 /* Multi-part msgs and their trailing DONE message. */ 382 if (nh->nlmsg_flags & NLM_F_MULTI) { 383 if (nh->nlmsg_type == NLMSG_DONE) { 384 ret = 0; 385 goto exit; 386 } 387 multipart = 1; 388 } 389 if (cb) { 390 ret = cb(nh, arg); 391 if (ret < 0) 392 goto exit; 393 } 394 } 395 } while (multipart); 396 exit: 397 mlx5_free(buf); 398 return ret; 399 } 400 401 /** 402 * Parse Netlink message to retrieve the bridge MAC address. 403 * 404 * @param nh 405 * Pointer to Netlink Message Header. 406 * @param arg 407 * PMD data register with this callback. 408 * 409 * @return 410 * 0 on success, a negative errno value otherwise and rte_errno is set. 411 */ 412 static int 413 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) 414 { 415 struct mlx5_nl_mac_addr *data = arg; 416 struct ndmsg *r = NLMSG_DATA(nh); 417 struct rtattr *attribute; 418 int len; 419 420 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); 421 for (attribute = MLX5_NDA_RTA(r); 422 RTA_OK(attribute, len); 423 attribute = RTA_NEXT(attribute, len)) { 424 if (attribute->rta_type == NDA_LLADDR) { 425 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { 426 DRV_LOG(WARNING, 427 "not enough room to finalize the" 428 " request"); 429 rte_errno = ENOMEM; 430 return -rte_errno; 431 } 432 #ifdef RTE_LIBRTE_MLX5_DEBUG 433 char m[RTE_ETHER_ADDR_FMT_SIZE]; 434 435 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, 436 RTA_DATA(attribute)); 437 DRV_LOG(DEBUG, "bridge MAC address %s", m); 438 #endif 439 memcpy(&(*data->mac)[data->mac_n++], 440 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); 441 } 442 } 443 return 0; 444 } 445 446 /** 447 * Get bridge MAC addresses. 448 * 449 * @param[in] nlsk_fd 450 * Netlink socket file descriptor. 451 * @param[in] iface_idx 452 * Net device interface index. 453 * @param mac[out] 454 * Pointer to the array table of MAC addresses to fill. 455 * Its size should be of MLX5_MAX_MAC_ADDRESSES. 456 * @param mac_n[out] 457 * Number of entries filled in MAC array. 458 * 459 * @return 460 * 0 on success, a negative errno value otherwise and rte_errno is set. 461 */ 462 static int 463 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, 464 struct rte_ether_addr (*mac)[], int *mac_n) 465 { 466 struct { 467 struct nlmsghdr hdr; 468 struct ifinfomsg ifm; 469 } req = { 470 .hdr = { 471 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 472 .nlmsg_type = RTM_GETNEIGH, 473 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 474 }, 475 .ifm = { 476 .ifi_family = PF_BRIDGE, 477 .ifi_index = iface_idx, 478 }, 479 }; 480 struct mlx5_nl_mac_addr data = { 481 .mac = mac, 482 .mac_n = 0, 483 }; 484 uint32_t sn = MLX5_NL_SN_GENERATE; 485 int ret; 486 487 if (nlsk_fd == -1) 488 return 0; 489 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, 490 sizeof(struct ifinfomsg)); 491 if (ret < 0) 492 goto error; 493 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); 494 if (ret < 0) 495 goto error; 496 *mac_n = data.mac_n; 497 return 0; 498 error: 499 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", 500 iface_idx, strerror(rte_errno)); 501 return -rte_errno; 502 } 503 504 /** 505 * Modify the MAC address neighbour table with Netlink. 506 * 507 * @param[in] nlsk_fd 508 * Netlink socket file descriptor. 509 * @param[in] iface_idx 510 * Net device interface index. 511 * @param mac 512 * MAC address to consider. 513 * @param add 514 * 1 to add the MAC address, 0 to remove the MAC address. 515 * 516 * @return 517 * 0 on success, a negative errno value otherwise and rte_errno is set. 518 */ 519 static int 520 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 521 struct rte_ether_addr *mac, int add) 522 { 523 struct { 524 struct nlmsghdr hdr; 525 struct ndmsg ndm; 526 struct rtattr rta; 527 uint8_t buffer[RTE_ETHER_ADDR_LEN]; 528 } req = { 529 .hdr = { 530 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), 531 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 532 NLM_F_EXCL | NLM_F_ACK, 533 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, 534 }, 535 .ndm = { 536 .ndm_family = PF_BRIDGE, 537 .ndm_state = NUD_NOARP | NUD_PERMANENT, 538 .ndm_ifindex = iface_idx, 539 .ndm_flags = NTF_SELF, 540 }, 541 .rta = { 542 .rta_type = NDA_LLADDR, 543 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), 544 }, 545 }; 546 uint32_t sn = MLX5_NL_SN_GENERATE; 547 int ret; 548 549 if (nlsk_fd == -1) 550 return 0; 551 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); 552 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 553 RTA_ALIGN(req.rta.rta_len); 554 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 555 if (ret < 0) 556 goto error; 557 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 558 if (ret < 0) 559 goto error; 560 return 0; 561 error: 562 #ifdef RTE_LIBRTE_MLX5_DEBUG 563 { 564 char m[RTE_ETHER_ADDR_FMT_SIZE]; 565 566 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac); 567 DRV_LOG(DEBUG, 568 "Interface %u cannot %s MAC address %s %s", 569 iface_idx, 570 add ? "add" : "remove", m, strerror(rte_errno)); 571 } 572 #endif 573 return -rte_errno; 574 } 575 576 /** 577 * Modify the VF MAC address neighbour table with Netlink. 578 * 579 * @param[in] nlsk_fd 580 * Netlink socket file descriptor. 581 * @param[in] iface_idx 582 * Net device interface index. 583 * @param mac 584 * MAC address to consider. 585 * @param vf_index 586 * VF index. 587 * 588 * @return 589 * 0 on success, a negative errno value otherwise and rte_errno is set. 590 */ 591 int 592 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, 593 struct rte_ether_addr *mac, int vf_index) 594 { 595 int ret; 596 struct { 597 struct nlmsghdr hdr; 598 struct ifinfomsg ifm; 599 struct rtattr vf_list_rta; 600 struct rtattr vf_info_rta; 601 struct rtattr vf_mac_rta; 602 struct ifla_vf_mac ivm; 603 } req = { 604 .hdr = { 605 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 606 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 607 .nlmsg_type = RTM_BASE, 608 }, 609 .ifm = { 610 .ifi_index = iface_idx, 611 }, 612 .vf_list_rta = { 613 .rta_type = IFLA_VFINFO_LIST, 614 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 615 }, 616 .vf_info_rta = { 617 .rta_type = IFLA_VF_INFO, 618 .rta_len = RTA_ALIGN(RTA_LENGTH(0)), 619 }, 620 .vf_mac_rta = { 621 .rta_type = IFLA_VF_MAC, 622 }, 623 }; 624 struct ifla_vf_mac ivm = { 625 .vf = vf_index, 626 }; 627 uint32_t sn = MLX5_NL_SN_GENERATE; 628 629 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); 630 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); 631 632 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); 633 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + 634 RTA_ALIGN(req.vf_list_rta.rta_len) + 635 RTA_ALIGN(req.vf_info_rta.rta_len) + 636 RTA_ALIGN(req.vf_mac_rta.rta_len); 637 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 638 &req.vf_list_rta); 639 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), 640 &req.vf_info_rta); 641 642 if (nlsk_fd < 0) 643 return -1; 644 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 645 if (ret < 0) 646 goto error; 647 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 648 if (ret < 0) 649 goto error; 650 return 0; 651 error: 652 DRV_LOG(ERR, 653 "representor %u cannot set VF MAC address " 654 "%02X:%02X:%02X:%02X:%02X:%02X : %s", 655 vf_index, 656 mac->addr_bytes[0], mac->addr_bytes[1], 657 mac->addr_bytes[2], mac->addr_bytes[3], 658 mac->addr_bytes[4], mac->addr_bytes[5], 659 strerror(rte_errno)); 660 return -rte_errno; 661 } 662 663 /** 664 * Add a MAC address. 665 * 666 * @param[in] nlsk_fd 667 * Netlink socket file descriptor. 668 * @param[in] iface_idx 669 * Net device interface index. 670 * @param mac_own 671 * BITFIELD_DECLARE array to store the mac. 672 * @param mac 673 * MAC address to register. 674 * @param index 675 * MAC address index. 676 * 677 * @return 678 * 0 on success, a negative errno value otherwise and rte_errno is set. 679 */ 680 int 681 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, 682 uint64_t *mac_own, struct rte_ether_addr *mac, 683 uint32_t index) 684 { 685 int ret; 686 687 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); 688 if (!ret) { 689 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 690 if (index >= MLX5_MAX_MAC_ADDRESSES) 691 return -EINVAL; 692 693 BITFIELD_SET(mac_own, index); 694 } 695 if (ret == -EEXIST) 696 return 0; 697 return ret; 698 } 699 700 /** 701 * Remove a MAC address. 702 * 703 * @param[in] nlsk_fd 704 * Netlink socket file descriptor. 705 * @param[in] iface_idx 706 * Net device interface index. 707 * @param mac_own 708 * BITFIELD_DECLARE array to store the mac. 709 * @param mac 710 * MAC address to remove. 711 * @param index 712 * MAC address index. 713 * 714 * @return 715 * 0 on success, a negative errno value otherwise and rte_errno is set. 716 */ 717 int 718 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, 719 struct rte_ether_addr *mac, uint32_t index) 720 { 721 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES); 722 if (index >= MLX5_MAX_MAC_ADDRESSES) 723 return -EINVAL; 724 725 BITFIELD_RESET(mac_own, index); 726 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); 727 } 728 729 /** 730 * Synchronize Netlink bridge table to the internal table. 731 * 732 * @param[in] nlsk_fd 733 * Netlink socket file descriptor. 734 * @param[in] iface_idx 735 * Net device interface index. 736 * @param mac_addrs 737 * Mac addresses array to sync. 738 * @param n 739 * @p mac_addrs array size. 740 */ 741 void 742 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, 743 struct rte_ether_addr *mac_addrs, int n) 744 { 745 struct rte_ether_addr macs[n]; 746 int macs_n = 0; 747 int i; 748 int ret; 749 750 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); 751 if (ret) 752 return; 753 for (i = 0; i != macs_n; ++i) { 754 int j; 755 756 /* Verify the address is not in the array yet. */ 757 for (j = 0; j != n; ++j) 758 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) 759 break; 760 if (j != n) 761 continue; 762 /* Find the first entry available. */ 763 for (j = 0; j != n; ++j) { 764 if (rte_is_zero_ether_addr(&mac_addrs[j])) { 765 mac_addrs[j] = macs[i]; 766 break; 767 } 768 } 769 } 770 } 771 772 /** 773 * Flush all added MAC addresses. 774 * 775 * @param[in] nlsk_fd 776 * Netlink socket file descriptor. 777 * @param[in] iface_idx 778 * Net device interface index. 779 * @param[in] mac_addrs 780 * Mac addresses array to flush. 781 * @param n 782 * @p mac_addrs array size. 783 * @param mac_own 784 * BITFIELD_DECLARE array to store the mac. 785 */ 786 void 787 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, 788 struct rte_ether_addr *mac_addrs, int n, 789 uint64_t *mac_own) 790 { 791 int i; 792 793 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES) 794 return; 795 796 for (i = n - 1; i >= 0; --i) { 797 struct rte_ether_addr *m = &mac_addrs[i]; 798 799 if (BITFIELD_ISSET(mac_own, i)) 800 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, 801 i); 802 } 803 } 804 805 /** 806 * Enable promiscuous / all multicast mode through Netlink. 807 * 808 * @param[in] nlsk_fd 809 * Netlink socket file descriptor. 810 * @param[in] iface_idx 811 * Net device interface index. 812 * @param flags 813 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. 814 * @param enable 815 * Nonzero to enable, disable otherwise. 816 * 817 * @return 818 * 0 on success, a negative errno value otherwise and rte_errno is set. 819 */ 820 static int 821 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, 822 int enable) 823 { 824 struct { 825 struct nlmsghdr hdr; 826 struct ifinfomsg ifi; 827 } req = { 828 .hdr = { 829 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 830 .nlmsg_type = RTM_NEWLINK, 831 .nlmsg_flags = NLM_F_REQUEST, 832 }, 833 .ifi = { 834 .ifi_flags = enable ? flags : 0, 835 .ifi_change = flags, 836 .ifi_index = iface_idx, 837 }, 838 }; 839 uint32_t sn = MLX5_NL_SN_GENERATE; 840 int ret; 841 842 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); 843 if (nlsk_fd < 0) 844 return 0; 845 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); 846 if (ret < 0) 847 return ret; 848 return 0; 849 } 850 851 /** 852 * Enable promiscuous mode through Netlink. 853 * 854 * @param[in] nlsk_fd 855 * Netlink socket file descriptor. 856 * @param[in] iface_idx 857 * Net device interface index. 858 * @param enable 859 * Nonzero to enable, disable otherwise. 860 * 861 * @return 862 * 0 on success, a negative errno value otherwise and rte_errno is set. 863 */ 864 int 865 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) 866 { 867 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); 868 869 if (ret) 870 DRV_LOG(DEBUG, 871 "Interface %u cannot %s promisc mode: Netlink error %s", 872 iface_idx, enable ? "enable" : "disable", 873 strerror(rte_errno)); 874 return ret; 875 } 876 877 /** 878 * Enable all multicast mode through Netlink. 879 * 880 * @param[in] nlsk_fd 881 * Netlink socket file descriptor. 882 * @param[in] iface_idx 883 * Net device interface index. 884 * @param enable 885 * Nonzero to enable, disable otherwise. 886 * 887 * @return 888 * 0 on success, a negative errno value otherwise and rte_errno is set. 889 */ 890 int 891 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) 892 { 893 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, 894 enable); 895 896 if (ret) 897 DRV_LOG(DEBUG, 898 "Interface %u cannot %s allmulti : Netlink error %s", 899 iface_idx, enable ? "enable" : "disable", 900 strerror(rte_errno)); 901 return ret; 902 } 903 904 /** 905 * Process network interface information from Netlink message. 906 * 907 * @param nh 908 * Pointer to Netlink message header. 909 * @param arg 910 * Opaque data pointer for this callback. 911 * 912 * @return 913 * 0 on success, a negative errno value otherwise and rte_errno is set. 914 */ 915 static int 916 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) 917 { 918 struct mlx5_nl_ifindex_data *data = arg; 919 struct mlx5_nl_ifindex_data local = { 920 .flags = 0, 921 }; 922 size_t off = NLMSG_HDRLEN; 923 924 if (nh->nlmsg_type != 925 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && 926 nh->nlmsg_type != 927 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) 928 goto error; 929 while (off < nh->nlmsg_len) { 930 struct nlattr *na = (void *)((uintptr_t)nh + off); 931 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); 932 933 if (na->nla_len > nh->nlmsg_len - off) 934 goto error; 935 switch (na->nla_type) { 936 case RDMA_NLDEV_ATTR_DEV_INDEX: 937 local.ibindex = *(uint32_t *)payload; 938 local.flags |= MLX5_NL_CMD_GET_IB_INDEX; 939 break; 940 case RDMA_NLDEV_ATTR_DEV_NAME: 941 if (!strcmp(payload, data->name)) 942 local.flags |= MLX5_NL_CMD_GET_IB_NAME; 943 break; 944 case RDMA_NLDEV_ATTR_NDEV_INDEX: 945 local.ifindex = *(uint32_t *)payload; 946 local.flags |= MLX5_NL_CMD_GET_NET_INDEX; 947 break; 948 case RDMA_NLDEV_ATTR_PORT_INDEX: 949 local.portnum = *(uint32_t *)payload; 950 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; 951 break; 952 default: 953 break; 954 } 955 off += NLA_ALIGN(na->nla_len); 956 } 957 /* 958 * It is possible to have multiple messages for all 959 * Infiniband devices in the system with appropriate name. 960 * So we should gather parameters locally and copy to 961 * query context only in case of coinciding device name. 962 */ 963 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { 964 data->flags = local.flags; 965 data->ibindex = local.ibindex; 966 data->ifindex = local.ifindex; 967 data->portnum = local.portnum; 968 } 969 return 0; 970 error: 971 rte_errno = EINVAL; 972 return -rte_errno; 973 } 974 975 /** 976 * Get index of network interface associated with some IB device. 977 * 978 * This is the only somewhat safe method to avoid resorting to heuristics 979 * when faced with port representors. Unfortunately it requires at least 980 * Linux 4.17. 981 * 982 * @param nl 983 * Netlink socket of the RDMA kind (NETLINK_RDMA). 984 * @param[in] name 985 * IB device name. 986 * @param[in] pindex 987 * IB device port index, starting from 1 988 * @return 989 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno 990 * is set. 991 */ 992 unsigned int 993 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) 994 { 995 struct mlx5_nl_ifindex_data data = { 996 .name = name, 997 .flags = 0, 998 .ibindex = 0, /* Determined during first pass. */ 999 .ifindex = 0, /* Determined during second pass. */ 1000 }; 1001 union { 1002 struct nlmsghdr nh; 1003 uint8_t buf[NLMSG_HDRLEN + 1004 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) + 1005 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; 1006 } req = { 1007 .nh = { 1008 .nlmsg_len = NLMSG_LENGTH(0), 1009 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1010 RDMA_NLDEV_CMD_GET), 1011 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1012 }, 1013 }; 1014 struct nlattr *na; 1015 uint32_t sn = MLX5_NL_SN_GENERATE; 1016 int ret; 1017 1018 ret = mlx5_nl_send(nl, &req.nh, sn); 1019 if (ret < 0) 1020 return 0; 1021 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1022 if (ret < 0) 1023 return 0; 1024 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1025 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) 1026 goto error; 1027 data.flags = 0; 1028 sn = MLX5_NL_SN_GENERATE; 1029 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1030 RDMA_NLDEV_CMD_PORT_GET); 1031 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1032 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); 1033 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); 1034 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex); 1035 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; 1036 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1037 &data.ibindex, sizeof(data.ibindex)); 1038 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); 1039 na->nla_len = NLA_HDRLEN + sizeof(pindex); 1040 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; 1041 memcpy((void *)((uintptr_t)na + NLA_HDRLEN), 1042 &pindex, sizeof(pindex)); 1043 ret = mlx5_nl_send(nl, &req.nh, sn); 1044 if (ret < 0) 1045 return 0; 1046 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1047 if (ret < 0) 1048 return 0; 1049 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1050 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1051 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || 1052 !data.ifindex) 1053 goto error; 1054 return data.ifindex; 1055 error: 1056 rte_errno = ENODEV; 1057 return 0; 1058 } 1059 1060 /** 1061 * Get the number of physical ports of given IB device. 1062 * 1063 * @param nl 1064 * Netlink socket of the RDMA kind (NETLINK_RDMA). 1065 * @param[in] name 1066 * IB device name. 1067 * 1068 * @return 1069 * A valid (nonzero) number of ports on success, 0 otherwise 1070 * and rte_errno is set. 1071 */ 1072 unsigned int 1073 mlx5_nl_portnum(int nl, const char *name) 1074 { 1075 struct mlx5_nl_ifindex_data data = { 1076 .flags = 0, 1077 .name = name, 1078 .ifindex = 0, 1079 .portnum = 0, 1080 }; 1081 struct nlmsghdr req = { 1082 .nlmsg_len = NLMSG_LENGTH(0), 1083 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 1084 RDMA_NLDEV_CMD_GET), 1085 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, 1086 }; 1087 uint32_t sn = MLX5_NL_SN_GENERATE; 1088 int ret; 1089 1090 ret = mlx5_nl_send(nl, &req, sn); 1091 if (ret < 0) 1092 return 0; 1093 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); 1094 if (ret < 0) 1095 return 0; 1096 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || 1097 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || 1098 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { 1099 rte_errno = ENODEV; 1100 return 0; 1101 } 1102 if (!data.portnum) 1103 rte_errno = EINVAL; 1104 return data.portnum; 1105 } 1106 1107 /** 1108 * Analyze gathered port parameters via Netlink to recognize master 1109 * and representor devices for E-Switch configuration. 1110 * 1111 * @param[in] num_vf_set 1112 * flag of presence of number of VFs port attribute. 1113 * @param[inout] switch_info 1114 * Port information, including port name as a number and port name 1115 * type if recognized 1116 * 1117 * @return 1118 * master and representor flags are set in switch_info according to 1119 * recognized parameters (if any). 1120 */ 1121 static void 1122 mlx5_nl_check_switch_info(bool num_vf_set, 1123 struct mlx5_switch_info *switch_info) 1124 { 1125 switch (switch_info->name_type) { 1126 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1127 /* 1128 * Name is not recognized, assume the master, 1129 * check the number of VFs key presence. 1130 */ 1131 switch_info->master = num_vf_set; 1132 break; 1133 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1134 /* 1135 * Name is not set, this assumes the legacy naming 1136 * schema for master, just check if there is a 1137 * number of VFs key. 1138 */ 1139 switch_info->master = num_vf_set; 1140 break; 1141 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1142 /* New uplink naming schema recognized. */ 1143 switch_info->master = 1; 1144 break; 1145 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1146 /* Legacy representors naming schema. */ 1147 switch_info->representor = !num_vf_set; 1148 break; 1149 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1150 /* Fallthrough */ 1151 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1152 /* New representors naming schema. */ 1153 switch_info->representor = 1; 1154 break; 1155 } 1156 } 1157 1158 /** 1159 * Process switch information from Netlink message. 1160 * 1161 * @param nh 1162 * Pointer to Netlink message header. 1163 * @param arg 1164 * Opaque data pointer for this callback. 1165 * 1166 * @return 1167 * 0 on success, a negative errno value otherwise and rte_errno is set. 1168 */ 1169 static int 1170 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) 1171 { 1172 struct mlx5_switch_info info = { 1173 .master = 0, 1174 .representor = 0, 1175 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1176 .port_name = 0, 1177 .switch_id = 0, 1178 }; 1179 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 1180 bool switch_id_set = false; 1181 bool num_vf_set = false; 1182 1183 if (nh->nlmsg_type != RTM_NEWLINK) 1184 goto error; 1185 while (off < nh->nlmsg_len) { 1186 struct rtattr *ra = (void *)((uintptr_t)nh + off); 1187 void *payload = RTA_DATA(ra); 1188 unsigned int i; 1189 1190 if (ra->rta_len > nh->nlmsg_len - off) 1191 goto error; 1192 switch (ra->rta_type) { 1193 case IFLA_NUM_VF: 1194 num_vf_set = true; 1195 break; 1196 case IFLA_PHYS_PORT_NAME: 1197 mlx5_translate_port_name((char *)payload, &info); 1198 break; 1199 case IFLA_PHYS_SWITCH_ID: 1200 info.switch_id = 0; 1201 for (i = 0; i < RTA_PAYLOAD(ra); ++i) { 1202 info.switch_id <<= 8; 1203 info.switch_id |= ((uint8_t *)payload)[i]; 1204 } 1205 switch_id_set = true; 1206 break; 1207 } 1208 off += RTA_ALIGN(ra->rta_len); 1209 } 1210 if (switch_id_set) { 1211 /* We have some E-Switch configuration. */ 1212 mlx5_nl_check_switch_info(num_vf_set, &info); 1213 } 1214 MLX5_ASSERT(!(info.master && info.representor)); 1215 memcpy(arg, &info, sizeof(info)); 1216 return 0; 1217 error: 1218 rte_errno = EINVAL; 1219 return -rte_errno; 1220 } 1221 1222 /** 1223 * Get switch information associated with network interface. 1224 * 1225 * @param nl 1226 * Netlink socket of the ROUTE kind (NETLINK_ROUTE). 1227 * @param ifindex 1228 * Network interface index. 1229 * @param[out] info 1230 * Switch information object, populated in case of success. 1231 * 1232 * @return 1233 * 0 on success, a negative errno value otherwise and rte_errno is set. 1234 */ 1235 int 1236 mlx5_nl_switch_info(int nl, unsigned int ifindex, 1237 struct mlx5_switch_info *info) 1238 { 1239 struct { 1240 struct nlmsghdr nh; 1241 struct ifinfomsg info; 1242 struct rtattr rta; 1243 uint32_t extmask; 1244 } req = { 1245 .nh = { 1246 .nlmsg_len = NLMSG_LENGTH 1247 (sizeof(req.info) + 1248 RTA_LENGTH(sizeof(uint32_t))), 1249 .nlmsg_type = RTM_GETLINK, 1250 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1251 }, 1252 .info = { 1253 .ifi_family = AF_UNSPEC, 1254 .ifi_index = ifindex, 1255 }, 1256 .rta = { 1257 .rta_type = IFLA_EXT_MASK, 1258 .rta_len = RTA_LENGTH(sizeof(int32_t)), 1259 }, 1260 .extmask = RTE_LE32(1), 1261 }; 1262 uint32_t sn = MLX5_NL_SN_GENERATE; 1263 int ret; 1264 1265 ret = mlx5_nl_send(nl, &req.nh, sn); 1266 if (ret >= 0) 1267 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); 1268 if (info->master && info->representor) { 1269 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1270 " and as representor", ifindex); 1271 rte_errno = ENODEV; 1272 ret = -rte_errno; 1273 } 1274 return ret; 1275 } 1276 1277 /* 1278 * Delete VLAN network device by ifindex. 1279 * 1280 * @param[in] tcf 1281 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1282 * @param[in] ifindex 1283 * Interface index of network device to delete. 1284 */ 1285 void 1286 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, 1287 uint32_t ifindex) 1288 { 1289 uint32_t sn = MLX5_NL_SN_GENERATE; 1290 int ret; 1291 struct { 1292 struct nlmsghdr nh; 1293 struct ifinfomsg info; 1294 } req = { 1295 .nh = { 1296 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 1297 .nlmsg_type = RTM_DELLINK, 1298 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, 1299 }, 1300 .info = { 1301 .ifi_family = AF_UNSPEC, 1302 .ifi_index = ifindex, 1303 }, 1304 }; 1305 1306 if (ifindex) { 1307 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); 1308 if (ret >= 0) 1309 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1310 if (ret < 0) 1311 DRV_LOG(WARNING, "netlink: error deleting VLAN WA" 1312 " ifindex %u, %d", ifindex, ret); 1313 } 1314 } 1315 1316 /* Set of subroutines to build Netlink message. */ 1317 static struct nlattr * 1318 nl_msg_tail(struct nlmsghdr *nlh) 1319 { 1320 return (struct nlattr *) 1321 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); 1322 } 1323 1324 static void 1325 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) 1326 { 1327 struct nlattr *nla = nl_msg_tail(nlh); 1328 1329 nla->nla_type = type; 1330 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen; 1331 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len); 1332 1333 if (alen) 1334 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); 1335 } 1336 1337 static struct nlattr * 1338 nl_attr_nest_start(struct nlmsghdr *nlh, int type) 1339 { 1340 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); 1341 1342 nl_attr_put(nlh, type, NULL, 0); 1343 return nest; 1344 } 1345 1346 static void 1347 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) 1348 { 1349 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; 1350 } 1351 1352 /* 1353 * Create network VLAN device with specified VLAN tag. 1354 * 1355 * @param[in] tcf 1356 * Context object initialized by mlx5_nl_vlan_vmwa_init(). 1357 * @param[in] ifindex 1358 * Base network interface index. 1359 * @param[in] tag 1360 * VLAN tag for VLAN network device to create. 1361 */ 1362 uint32_t 1363 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, 1364 uint32_t ifindex, uint16_t tag) 1365 { 1366 struct nlmsghdr *nlh; 1367 struct ifinfomsg *ifm; 1368 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; 1369 1370 __rte_cache_aligned 1371 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1372 NLMSG_ALIGN(sizeof(struct ifinfomsg)) + 1373 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + 1374 NLMSG_ALIGN(sizeof(uint32_t)) + 1375 NLMSG_ALIGN(sizeof(name)) + 1376 NLMSG_ALIGN(sizeof("vlan")) + 1377 NLMSG_ALIGN(sizeof(uint32_t)) + 1378 NLMSG_ALIGN(sizeof(uint16_t)) + 16]; 1379 struct nlattr *na_info; 1380 struct nlattr *na_vlan; 1381 uint32_t sn = MLX5_NL_SN_GENERATE; 1382 int ret; 1383 1384 memset(buf, 0, sizeof(buf)); 1385 nlh = (struct nlmsghdr *)buf; 1386 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1387 nlh->nlmsg_type = RTM_NEWLINK; 1388 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | 1389 NLM_F_EXCL | NLM_F_ACK; 1390 ifm = (struct ifinfomsg *)nl_msg_tail(nlh); 1391 nlh->nlmsg_len += sizeof(struct ifinfomsg); 1392 ifm->ifi_family = AF_UNSPEC; 1393 ifm->ifi_type = 0; 1394 ifm->ifi_index = 0; 1395 ifm->ifi_flags = IFF_UP; 1396 ifm->ifi_change = 0xffffffff; 1397 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); 1398 ret = snprintf(name, sizeof(name), "%s.%u.%u", 1399 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); 1400 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); 1401 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); 1402 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); 1403 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); 1404 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); 1405 nl_attr_nest_end(nlh, na_vlan); 1406 nl_attr_nest_end(nlh, na_info); 1407 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len); 1408 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); 1409 if (ret >= 0) 1410 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); 1411 if (ret < 0) { 1412 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, 1413 ret); 1414 } 1415 /* Try to get ifindex of created or pre-existing device. */ 1416 ret = if_nametoindex(name); 1417 if (!ret) { 1418 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, 1419 errno); 1420 return 0; 1421 } 1422 return ret; 1423 } 1424 1425 /** 1426 * Parse Netlink message to retrieve the general family ID. 1427 * 1428 * @param nh 1429 * Pointer to Netlink Message Header. 1430 * @param arg 1431 * PMD data register with this callback. 1432 * 1433 * @return 1434 * 0 on success, a negative errno value otherwise and rte_errno is set. 1435 */ 1436 static int 1437 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg) 1438 { 1439 1440 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1441 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1442 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1443 1444 for (; nla->nla_len && nla < tail; 1445 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) { 1446 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) { 1447 *(uint16_t *)arg = *(uint16_t *)(nla + 1); 1448 return 0; 1449 } 1450 } 1451 return -EINVAL; 1452 } 1453 1454 #define MLX5_NL_MAX_ATTR_SIZE 100 1455 /** 1456 * Get generic netlink family ID. 1457 * 1458 * @param[in] nlsk_fd 1459 * Netlink socket file descriptor. 1460 * @param[in] name 1461 * The family name. 1462 * 1463 * @return 1464 * ID >= 0 on success and @p enable is updated, a negative errno value 1465 * otherwise and rte_errno is set. 1466 */ 1467 static int 1468 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name) 1469 { 1470 struct nlmsghdr *nlh; 1471 struct genlmsghdr *genl; 1472 uint32_t sn = MLX5_NL_SN_GENERATE; 1473 int name_size = strlen(name) + 1; 1474 int ret; 1475 uint16_t id = -1; 1476 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1477 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1478 NLMSG_ALIGN(sizeof(struct nlattr)) + 1479 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)]; 1480 1481 memset(buf, 0, sizeof(buf)); 1482 nlh = (struct nlmsghdr *)buf; 1483 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1484 nlh->nlmsg_type = GENL_ID_CTRL; 1485 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1486 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1487 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1488 genl->cmd = CTRL_CMD_GETFAMILY; 1489 genl->version = 1; 1490 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size); 1491 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1492 if (ret >= 0) 1493 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id); 1494 if (ret < 0) { 1495 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name, 1496 ret); 1497 return ret; 1498 } 1499 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id); 1500 return (int)id; 1501 } 1502 1503 /** 1504 * Get Devlink family ID. 1505 * 1506 * @param[in] nlsk_fd 1507 * Netlink socket file descriptor. 1508 * 1509 * @return 1510 * ID >= 0 on success and @p enable is updated, a negative errno value 1511 * otherwise and rte_errno is set. 1512 */ 1513 1514 int 1515 mlx5_nl_devlink_family_id_get(int nlsk_fd) 1516 { 1517 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME); 1518 } 1519 1520 /** 1521 * Parse Netlink message to retrieve the ROCE enable status. 1522 * 1523 * @param nh 1524 * Pointer to Netlink Message Header. 1525 * @param arg 1526 * PMD data register with this callback. 1527 * 1528 * @return 1529 * 0 on success, a negative errno value otherwise and rte_errno is set. 1530 */ 1531 static int 1532 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg) 1533 { 1534 1535 int ret = -EINVAL; 1536 int *enable = arg; 1537 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len); 1538 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) + 1539 NLMSG_ALIGN(sizeof(struct genlmsghdr))); 1540 1541 while (nla->nla_len && nla < tail) { 1542 switch (nla->nla_type) { 1543 /* Expected nested attributes case. */ 1544 case DEVLINK_ATTR_PARAM: 1545 case DEVLINK_ATTR_PARAM_VALUES_LIST: 1546 case DEVLINK_ATTR_PARAM_VALUE: 1547 ret = 0; 1548 nla += 1; 1549 break; 1550 case DEVLINK_ATTR_PARAM_VALUE_DATA: 1551 *enable = 1; 1552 return 0; 1553 default: 1554 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len)); 1555 } 1556 } 1557 *enable = 0; 1558 return ret; 1559 } 1560 1561 /** 1562 * Get ROCE enable status through Netlink. 1563 * 1564 * @param[in] nlsk_fd 1565 * Netlink socket file descriptor. 1566 * @param[in] family_id 1567 * the Devlink family ID. 1568 * @param pci_addr 1569 * The device PCI address. 1570 * @param[out] enable 1571 * Where to store the enable status. 1572 * 1573 * @return 1574 * 0 on success and @p enable is updated, a negative errno value otherwise 1575 * and rte_errno is set. 1576 */ 1577 int 1578 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, 1579 int *enable) 1580 { 1581 struct nlmsghdr *nlh; 1582 struct genlmsghdr *genl; 1583 uint32_t sn = MLX5_NL_SN_GENERATE; 1584 int ret; 1585 int cur_en = 0; 1586 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1587 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1588 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 + 1589 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4]; 1590 1591 memset(buf, 0, sizeof(buf)); 1592 nlh = (struct nlmsghdr *)buf; 1593 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1594 nlh->nlmsg_type = family_id; 1595 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1596 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1597 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1598 genl->cmd = DEVLINK_CMD_PARAM_GET; 1599 genl->version = DEVLINK_GENL_VERSION; 1600 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1601 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1602 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1603 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1604 if (ret >= 0) 1605 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en); 1606 if (ret < 0) { 1607 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.", 1608 pci_addr, ret); 1609 return ret; 1610 } 1611 *enable = cur_en; 1612 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".", 1613 cur_en ? "en" : "dis", pci_addr); 1614 return ret; 1615 } 1616 1617 /** 1618 * Reload mlx5 device kernel driver through Netlink. 1619 * 1620 * @param[in] nlsk_fd 1621 * Netlink socket file descriptor. 1622 * @param[in] family_id 1623 * the Devlink family ID. 1624 * @param pci_addr 1625 * The device PCI address. 1626 * @param[out] enable 1627 * The enable status to set. 1628 * 1629 * @return 1630 * 0 on success, a negative errno value otherwise and rte_errno is set. 1631 */ 1632 int 1633 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr) 1634 { 1635 struct nlmsghdr *nlh; 1636 struct genlmsghdr *genl; 1637 uint32_t sn = MLX5_NL_SN_GENERATE; 1638 int ret; 1639 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1640 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1641 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 + 1642 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2]; 1643 1644 memset(buf, 0, sizeof(buf)); 1645 nlh = (struct nlmsghdr *)buf; 1646 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1647 nlh->nlmsg_type = family_id; 1648 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1649 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1650 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1651 genl->cmd = DEVLINK_CMD_RELOAD; 1652 genl->version = DEVLINK_GENL_VERSION; 1653 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1654 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1655 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1656 if (ret >= 0) 1657 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1658 if (ret < 0) { 1659 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d", 1660 pci_addr, ret); 1661 return ret; 1662 } 1663 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.", 1664 pci_addr); 1665 return 0; 1666 } 1667 1668 /** 1669 * Set ROCE enable status through Netlink. 1670 * 1671 * @param[in] nlsk_fd 1672 * Netlink socket file descriptor. 1673 * @param[in] family_id 1674 * the Devlink family ID. 1675 * @param pci_addr 1676 * The device PCI address. 1677 * @param[out] enable 1678 * The enable status to set. 1679 * 1680 * @return 1681 * 0 on success, a negative errno value otherwise and rte_errno is set. 1682 */ 1683 int 1684 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, 1685 int enable) 1686 { 1687 struct nlmsghdr *nlh; 1688 struct genlmsghdr *genl; 1689 uint32_t sn = MLX5_NL_SN_GENERATE; 1690 int ret; 1691 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + 1692 NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1693 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 + 1694 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6]; 1695 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT; 1696 uint8_t ptype = NLA_FLAG; 1697 ; 1698 1699 memset(buf, 0, sizeof(buf)); 1700 nlh = (struct nlmsghdr *)buf; 1701 nlh->nlmsg_len = sizeof(struct nlmsghdr); 1702 nlh->nlmsg_type = family_id; 1703 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1704 genl = (struct genlmsghdr *)nl_msg_tail(nlh); 1705 nlh->nlmsg_len += sizeof(struct genlmsghdr); 1706 genl->cmd = DEVLINK_CMD_PARAM_SET; 1707 genl->version = DEVLINK_GENL_VERSION; 1708 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4); 1709 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1); 1710 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12); 1711 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode)); 1712 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype)); 1713 if (enable) 1714 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0); 1715 ret = mlx5_nl_send(nlsk_fd, nlh, sn); 1716 if (ret >= 0) 1717 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); 1718 if (ret < 0) { 1719 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:" 1720 " %d.", enable ? "en" : "dis", pci_addr, ret); 1721 return ret; 1722 } 1723 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.", 1724 pci_addr, enable ? "en" : "dis"); 1725 /* Now, need to reload the driver. */ 1726 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); 1727 } 1728