xref: /dpdk/drivers/common/mlx5/linux/mlx5_nl.c (revision 25245d5dc9ecfa8bc9964c69a756beca6ee1ca72)
159513c3eSOphir Munk /* SPDX-License-Identifier: BSD-3-Clause
259513c3eSOphir Munk  * Copyright 2018 6WIND S.A.
359513c3eSOphir Munk  * Copyright 2018 Mellanox Technologies, Ltd
459513c3eSOphir Munk  */
559513c3eSOphir Munk 
659513c3eSOphir Munk #include <errno.h>
759513c3eSOphir Munk #include <linux/if_link.h>
859513c3eSOphir Munk #include <linux/rtnetlink.h>
959513c3eSOphir Munk #include <linux/genetlink.h>
1059513c3eSOphir Munk #include <net/if.h>
1159513c3eSOphir Munk #include <rdma/rdma_netlink.h>
1259513c3eSOphir Munk #include <stdbool.h>
1359513c3eSOphir Munk #include <stdint.h>
1459513c3eSOphir Munk #include <stdlib.h>
1559513c3eSOphir Munk #include <stdalign.h>
1659513c3eSOphir Munk #include <string.h>
1759513c3eSOphir Munk #include <sys/socket.h>
1859513c3eSOphir Munk #include <unistd.h>
1959513c3eSOphir Munk 
2059513c3eSOphir Munk #include <rte_errno.h>
2159513c3eSOphir Munk 
2259513c3eSOphir Munk #include "mlx5_nl.h"
23*25245d5dSShiri Kuzin #include "../mlx5_common_log.h"
2466914d19SSuanming Mou #include "mlx5_malloc.h"
2559513c3eSOphir Munk #ifdef HAVE_DEVLINK
2659513c3eSOphir Munk #include <linux/devlink.h>
2759513c3eSOphir Munk #endif
2859513c3eSOphir Munk 
2959513c3eSOphir Munk 
3059513c3eSOphir Munk /* Size of the buffer to receive kernel messages */
3159513c3eSOphir Munk #define MLX5_NL_BUF_SIZE (32 * 1024)
3259513c3eSOphir Munk /* Send buffer size for the Netlink socket */
3359513c3eSOphir Munk #define MLX5_SEND_BUF_SIZE 32768
3459513c3eSOphir Munk /* Receive buffer size for the Netlink socket */
3559513c3eSOphir Munk #define MLX5_RECV_BUF_SIZE 32768
3659513c3eSOphir Munk 
3759513c3eSOphir Munk /** Parameters of VLAN devices created by driver. */
3859513c3eSOphir Munk #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
3959513c3eSOphir Munk /*
4059513c3eSOphir Munk  * Define NDA_RTA as defined in iproute2 sources.
4159513c3eSOphir Munk  *
4259513c3eSOphir Munk  * see in iproute2 sources file include/libnetlink.h
4359513c3eSOphir Munk  */
4459513c3eSOphir Munk #ifndef MLX5_NDA_RTA
4559513c3eSOphir Munk #define MLX5_NDA_RTA(r) \
4659513c3eSOphir Munk 	((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
4759513c3eSOphir Munk #endif
4859513c3eSOphir Munk /*
4959513c3eSOphir Munk  * Define NLMSG_TAIL as defined in iproute2 sources.
5059513c3eSOphir Munk  *
5159513c3eSOphir Munk  * see in iproute2 sources file include/libnetlink.h
5259513c3eSOphir Munk  */
5359513c3eSOphir Munk #ifndef NLMSG_TAIL
5459513c3eSOphir Munk #define NLMSG_TAIL(nmsg) \
5559513c3eSOphir Munk 	((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
5659513c3eSOphir Munk #endif
5759513c3eSOphir Munk /*
5859513c3eSOphir Munk  * The following definitions are normally found in rdma/rdma_netlink.h,
5959513c3eSOphir Munk  * however they are so recent that most systems do not expose them yet.
6059513c3eSOphir Munk  */
6159513c3eSOphir Munk #ifndef HAVE_RDMA_NL_NLDEV
6259513c3eSOphir Munk #define RDMA_NL_NLDEV 5
6359513c3eSOphir Munk #endif
6459513c3eSOphir Munk #ifndef HAVE_RDMA_NLDEV_CMD_GET
6559513c3eSOphir Munk #define RDMA_NLDEV_CMD_GET 1
6659513c3eSOphir Munk #endif
6759513c3eSOphir Munk #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
6859513c3eSOphir Munk #define RDMA_NLDEV_CMD_PORT_GET 5
6959513c3eSOphir Munk #endif
7059513c3eSOphir Munk #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
7159513c3eSOphir Munk #define RDMA_NLDEV_ATTR_DEV_INDEX 1
7259513c3eSOphir Munk #endif
7359513c3eSOphir Munk #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
7459513c3eSOphir Munk #define RDMA_NLDEV_ATTR_DEV_NAME 2
7559513c3eSOphir Munk #endif
7659513c3eSOphir Munk #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
7759513c3eSOphir Munk #define RDMA_NLDEV_ATTR_PORT_INDEX 3
7859513c3eSOphir Munk #endif
7959513c3eSOphir Munk #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
8059513c3eSOphir Munk #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
8159513c3eSOphir Munk #endif
8259513c3eSOphir Munk 
8359513c3eSOphir Munk /* These are normally found in linux/if_link.h. */
8459513c3eSOphir Munk #ifndef HAVE_IFLA_NUM_VF
8559513c3eSOphir Munk #define IFLA_NUM_VF 21
8659513c3eSOphir Munk #endif
8759513c3eSOphir Munk #ifndef HAVE_IFLA_EXT_MASK
8859513c3eSOphir Munk #define IFLA_EXT_MASK 29
8959513c3eSOphir Munk #endif
9059513c3eSOphir Munk #ifndef HAVE_IFLA_PHYS_SWITCH_ID
9159513c3eSOphir Munk #define IFLA_PHYS_SWITCH_ID 36
9259513c3eSOphir Munk #endif
9359513c3eSOphir Munk #ifndef HAVE_IFLA_PHYS_PORT_NAME
9459513c3eSOphir Munk #define IFLA_PHYS_PORT_NAME 38
9559513c3eSOphir Munk #endif
9659513c3eSOphir Munk 
9759513c3eSOphir Munk /*
9859513c3eSOphir Munk  * Some Devlink defines may be missed in old kernel versions,
9959513c3eSOphir Munk  * adjust used defines.
10059513c3eSOphir Munk  */
10159513c3eSOphir Munk #ifndef DEVLINK_GENL_NAME
10259513c3eSOphir Munk #define DEVLINK_GENL_NAME "devlink"
10359513c3eSOphir Munk #endif
10459513c3eSOphir Munk #ifndef DEVLINK_GENL_VERSION
10559513c3eSOphir Munk #define DEVLINK_GENL_VERSION 1
10659513c3eSOphir Munk #endif
10759513c3eSOphir Munk #ifndef DEVLINK_ATTR_BUS_NAME
10859513c3eSOphir Munk #define DEVLINK_ATTR_BUS_NAME 1
10959513c3eSOphir Munk #endif
11059513c3eSOphir Munk #ifndef DEVLINK_ATTR_DEV_NAME
11159513c3eSOphir Munk #define DEVLINK_ATTR_DEV_NAME 2
11259513c3eSOphir Munk #endif
11359513c3eSOphir Munk #ifndef DEVLINK_ATTR_PARAM
11459513c3eSOphir Munk #define DEVLINK_ATTR_PARAM 80
11559513c3eSOphir Munk #endif
11659513c3eSOphir Munk #ifndef DEVLINK_ATTR_PARAM_NAME
11759513c3eSOphir Munk #define DEVLINK_ATTR_PARAM_NAME 81
11859513c3eSOphir Munk #endif
11959513c3eSOphir Munk #ifndef DEVLINK_ATTR_PARAM_TYPE
12059513c3eSOphir Munk #define DEVLINK_ATTR_PARAM_TYPE 83
12159513c3eSOphir Munk #endif
12259513c3eSOphir Munk #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST
12359513c3eSOphir Munk #define DEVLINK_ATTR_PARAM_VALUES_LIST 84
12459513c3eSOphir Munk #endif
12559513c3eSOphir Munk #ifndef DEVLINK_ATTR_PARAM_VALUE
12659513c3eSOphir Munk #define DEVLINK_ATTR_PARAM_VALUE 85
12759513c3eSOphir Munk #endif
12859513c3eSOphir Munk #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA
12959513c3eSOphir Munk #define DEVLINK_ATTR_PARAM_VALUE_DATA 86
13059513c3eSOphir Munk #endif
13159513c3eSOphir Munk #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE
13259513c3eSOphir Munk #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87
13359513c3eSOphir Munk #endif
13459513c3eSOphir Munk #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT
13559513c3eSOphir Munk #define DEVLINK_PARAM_CMODE_DRIVERINIT 1
13659513c3eSOphir Munk #endif
13759513c3eSOphir Munk #ifndef DEVLINK_CMD_RELOAD
13859513c3eSOphir Munk #define DEVLINK_CMD_RELOAD 37
13959513c3eSOphir Munk #endif
14059513c3eSOphir Munk #ifndef DEVLINK_CMD_PARAM_GET
14159513c3eSOphir Munk #define DEVLINK_CMD_PARAM_GET 38
14259513c3eSOphir Munk #endif
14359513c3eSOphir Munk #ifndef DEVLINK_CMD_PARAM_SET
14459513c3eSOphir Munk #define DEVLINK_CMD_PARAM_SET 39
14559513c3eSOphir Munk #endif
14659513c3eSOphir Munk #ifndef NLA_FLAG
14759513c3eSOphir Munk #define NLA_FLAG 6
14859513c3eSOphir Munk #endif
14959513c3eSOphir Munk 
15059513c3eSOphir Munk /* Add/remove MAC address through Netlink */
15159513c3eSOphir Munk struct mlx5_nl_mac_addr {
15259513c3eSOphir Munk 	struct rte_ether_addr (*mac)[];
15359513c3eSOphir Munk 	/**< MAC address handled by the device. */
15459513c3eSOphir Munk 	int mac_n; /**< Number of addresses in the array. */
15559513c3eSOphir Munk };
15659513c3eSOphir Munk 
15759513c3eSOphir Munk #define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
15859513c3eSOphir Munk #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
15959513c3eSOphir Munk #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
16059513c3eSOphir Munk #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
16159513c3eSOphir Munk 
16259513c3eSOphir Munk /** Data structure used by mlx5_nl_cmdget_cb(). */
16359513c3eSOphir Munk struct mlx5_nl_ifindex_data {
16459513c3eSOphir Munk 	const char *name; /**< IB device name (in). */
16559513c3eSOphir Munk 	uint32_t flags; /**< found attribute flags (out). */
16659513c3eSOphir Munk 	uint32_t ibindex; /**< IB device index (out). */
16759513c3eSOphir Munk 	uint32_t ifindex; /**< Network interface index (out). */
16859513c3eSOphir Munk 	uint32_t portnum; /**< IB device max port number (out). */
16959513c3eSOphir Munk };
17059513c3eSOphir Munk 
17110a5fa20SAlexander Kozyrev uint32_t atomic_sn;
17259513c3eSOphir Munk 
17359513c3eSOphir Munk /* Generate Netlink sequence number. */
17410a5fa20SAlexander Kozyrev #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED)
17559513c3eSOphir Munk 
17659513c3eSOphir Munk /**
17759513c3eSOphir Munk  * Opens a Netlink socket.
17859513c3eSOphir Munk  *
17959513c3eSOphir Munk  * @param protocol
18059513c3eSOphir Munk  *   Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
18159513c3eSOphir Munk  *
18259513c3eSOphir Munk  * @return
18359513c3eSOphir Munk  *   A file descriptor on success, a negative errno value otherwise and
18459513c3eSOphir Munk  *   rte_errno is set.
18559513c3eSOphir Munk  */
18659513c3eSOphir Munk int
18759513c3eSOphir Munk mlx5_nl_init(int protocol)
18859513c3eSOphir Munk {
18959513c3eSOphir Munk 	int fd;
19059513c3eSOphir Munk 	int sndbuf_size = MLX5_SEND_BUF_SIZE;
19159513c3eSOphir Munk 	int rcvbuf_size = MLX5_RECV_BUF_SIZE;
19259513c3eSOphir Munk 	struct sockaddr_nl local = {
19359513c3eSOphir Munk 		.nl_family = AF_NETLINK,
19459513c3eSOphir Munk 	};
19559513c3eSOphir Munk 	int ret;
19659513c3eSOphir Munk 
19759513c3eSOphir Munk 	fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
19859513c3eSOphir Munk 	if (fd == -1) {
19959513c3eSOphir Munk 		rte_errno = errno;
20059513c3eSOphir Munk 		return -rte_errno;
20159513c3eSOphir Munk 	}
20259513c3eSOphir Munk 	ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int));
20359513c3eSOphir Munk 	if (ret == -1) {
20459513c3eSOphir Munk 		rte_errno = errno;
20559513c3eSOphir Munk 		goto error;
20659513c3eSOphir Munk 	}
20759513c3eSOphir Munk 	ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int));
20859513c3eSOphir Munk 	if (ret == -1) {
20959513c3eSOphir Munk 		rte_errno = errno;
21059513c3eSOphir Munk 		goto error;
21159513c3eSOphir Munk 	}
21259513c3eSOphir Munk 	ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
21359513c3eSOphir Munk 	if (ret == -1) {
21459513c3eSOphir Munk 		rte_errno = errno;
21559513c3eSOphir Munk 		goto error;
21659513c3eSOphir Munk 	}
21759513c3eSOphir Munk 	return fd;
21859513c3eSOphir Munk error:
21959513c3eSOphir Munk 	close(fd);
22059513c3eSOphir Munk 	return -rte_errno;
22159513c3eSOphir Munk }
22259513c3eSOphir Munk 
22359513c3eSOphir Munk /**
22459513c3eSOphir Munk  * Send a request message to the kernel on the Netlink socket.
22559513c3eSOphir Munk  *
22659513c3eSOphir Munk  * @param[in] nlsk_fd
22759513c3eSOphir Munk  *   Netlink socket file descriptor.
22859513c3eSOphir Munk  * @param[in] nh
22959513c3eSOphir Munk  *   The Netlink message send to the kernel.
23059513c3eSOphir Munk  * @param[in] ssn
23159513c3eSOphir Munk  *   Sequence number.
23259513c3eSOphir Munk  * @param[in] req
23359513c3eSOphir Munk  *   Pointer to the request structure.
23459513c3eSOphir Munk  * @param[in] len
23559513c3eSOphir Munk  *   Length of the request in bytes.
23659513c3eSOphir Munk  *
23759513c3eSOphir Munk  * @return
23859513c3eSOphir Munk  *   The number of sent bytes on success, a negative errno value otherwise and
23959513c3eSOphir Munk  *   rte_errno is set.
24059513c3eSOphir Munk  */
24159513c3eSOphir Munk static int
24259513c3eSOphir Munk mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
24359513c3eSOphir Munk 		int len)
24459513c3eSOphir Munk {
24559513c3eSOphir Munk 	struct sockaddr_nl sa = {
24659513c3eSOphir Munk 		.nl_family = AF_NETLINK,
24759513c3eSOphir Munk 	};
24859513c3eSOphir Munk 	struct iovec iov[2] = {
24959513c3eSOphir Munk 		{ .iov_base = nh, .iov_len = sizeof(*nh), },
25059513c3eSOphir Munk 		{ .iov_base = req, .iov_len = len, },
25159513c3eSOphir Munk 	};
25259513c3eSOphir Munk 	struct msghdr msg = {
25359513c3eSOphir Munk 		.msg_name = &sa,
25459513c3eSOphir Munk 		.msg_namelen = sizeof(sa),
25559513c3eSOphir Munk 		.msg_iov = iov,
25659513c3eSOphir Munk 		.msg_iovlen = 2,
25759513c3eSOphir Munk 	};
25859513c3eSOphir Munk 	int send_bytes;
25959513c3eSOphir Munk 
26059513c3eSOphir Munk 	nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
26159513c3eSOphir Munk 	nh->nlmsg_seq = sn;
26259513c3eSOphir Munk 	send_bytes = sendmsg(nlsk_fd, &msg, 0);
26359513c3eSOphir Munk 	if (send_bytes < 0) {
26459513c3eSOphir Munk 		rte_errno = errno;
26559513c3eSOphir Munk 		return -rte_errno;
26659513c3eSOphir Munk 	}
26759513c3eSOphir Munk 	return send_bytes;
26859513c3eSOphir Munk }
26959513c3eSOphir Munk 
27059513c3eSOphir Munk /**
27159513c3eSOphir Munk  * Send a message to the kernel on the Netlink socket.
27259513c3eSOphir Munk  *
27359513c3eSOphir Munk  * @param[in] nlsk_fd
27459513c3eSOphir Munk  *   The Netlink socket file descriptor used for communication.
27559513c3eSOphir Munk  * @param[in] nh
27659513c3eSOphir Munk  *   The Netlink message send to the kernel.
27759513c3eSOphir Munk  * @param[in] sn
27859513c3eSOphir Munk  *   Sequence number.
27959513c3eSOphir Munk  *
28059513c3eSOphir Munk  * @return
28159513c3eSOphir Munk  *   The number of sent bytes on success, a negative errno value otherwise and
28259513c3eSOphir Munk  *   rte_errno is set.
28359513c3eSOphir Munk  */
28459513c3eSOphir Munk static int
28559513c3eSOphir Munk mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
28659513c3eSOphir Munk {
28759513c3eSOphir Munk 	struct sockaddr_nl sa = {
28859513c3eSOphir Munk 		.nl_family = AF_NETLINK,
28959513c3eSOphir Munk 	};
29059513c3eSOphir Munk 	struct iovec iov = {
29159513c3eSOphir Munk 		.iov_base = nh,
29259513c3eSOphir Munk 		.iov_len = nh->nlmsg_len,
29359513c3eSOphir Munk 	};
29459513c3eSOphir Munk 	struct msghdr msg = {
29559513c3eSOphir Munk 		.msg_name = &sa,
29659513c3eSOphir Munk 		.msg_namelen = sizeof(sa),
29759513c3eSOphir Munk 		.msg_iov = &iov,
29859513c3eSOphir Munk 		.msg_iovlen = 1,
29959513c3eSOphir Munk 	};
30059513c3eSOphir Munk 	int send_bytes;
30159513c3eSOphir Munk 
30259513c3eSOphir Munk 	nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
30359513c3eSOphir Munk 	nh->nlmsg_seq = sn;
30459513c3eSOphir Munk 	send_bytes = sendmsg(nlsk_fd, &msg, 0);
30559513c3eSOphir Munk 	if (send_bytes < 0) {
30659513c3eSOphir Munk 		rte_errno = errno;
30759513c3eSOphir Munk 		return -rte_errno;
30859513c3eSOphir Munk 	}
30959513c3eSOphir Munk 	return send_bytes;
31059513c3eSOphir Munk }
31159513c3eSOphir Munk 
31259513c3eSOphir Munk /**
31359513c3eSOphir Munk  * Receive a message from the kernel on the Netlink socket, following
31459513c3eSOphir Munk  * mlx5_nl_send().
31559513c3eSOphir Munk  *
31659513c3eSOphir Munk  * @param[in] nlsk_fd
31759513c3eSOphir Munk  *   The Netlink socket file descriptor used for communication.
31859513c3eSOphir Munk  * @param[in] sn
31959513c3eSOphir Munk  *   Sequence number.
32059513c3eSOphir Munk  * @param[in] cb
32159513c3eSOphir Munk  *   The callback function to call for each Netlink message received.
32259513c3eSOphir Munk  * @param[in, out] arg
32359513c3eSOphir Munk  *   Custom arguments for the callback.
32459513c3eSOphir Munk  *
32559513c3eSOphir Munk  * @return
32659513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
32759513c3eSOphir Munk  */
32859513c3eSOphir Munk static int
32959513c3eSOphir Munk mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
33059513c3eSOphir Munk 	     void *arg)
33159513c3eSOphir Munk {
33259513c3eSOphir Munk 	struct sockaddr_nl sa;
33366914d19SSuanming Mou 	void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY);
33459513c3eSOphir Munk 	struct iovec iov = {
33559513c3eSOphir Munk 		.iov_base = buf,
33659513c3eSOphir Munk 		.iov_len = MLX5_RECV_BUF_SIZE,
33759513c3eSOphir Munk 	};
33859513c3eSOphir Munk 	struct msghdr msg = {
33959513c3eSOphir Munk 		.msg_name = &sa,
34059513c3eSOphir Munk 		.msg_namelen = sizeof(sa),
34159513c3eSOphir Munk 		.msg_iov = &iov,
34259513c3eSOphir Munk 		/* One message at a time */
34359513c3eSOphir Munk 		.msg_iovlen = 1,
34459513c3eSOphir Munk 	};
34559513c3eSOphir Munk 	int multipart = 0;
34659513c3eSOphir Munk 	int ret = 0;
34759513c3eSOphir Munk 
34859513c3eSOphir Munk 	if (!buf) {
34959513c3eSOphir Munk 		rte_errno = ENOMEM;
35059513c3eSOphir Munk 		return -rte_errno;
35159513c3eSOphir Munk 	}
35259513c3eSOphir Munk 	do {
35359513c3eSOphir Munk 		struct nlmsghdr *nh;
35459513c3eSOphir Munk 		int recv_bytes = 0;
35559513c3eSOphir Munk 
35659513c3eSOphir Munk 		do {
35759513c3eSOphir Munk 			recv_bytes = recvmsg(nlsk_fd, &msg, 0);
35859513c3eSOphir Munk 			if (recv_bytes == -1) {
35959513c3eSOphir Munk 				rte_errno = errno;
36059513c3eSOphir Munk 				ret = -rte_errno;
36159513c3eSOphir Munk 				goto exit;
36259513c3eSOphir Munk 			}
36359513c3eSOphir Munk 			nh = (struct nlmsghdr *)buf;
36459513c3eSOphir Munk 		} while (nh->nlmsg_seq != sn);
36559513c3eSOphir Munk 		for (;
36659513c3eSOphir Munk 		     NLMSG_OK(nh, (unsigned int)recv_bytes);
36759513c3eSOphir Munk 		     nh = NLMSG_NEXT(nh, recv_bytes)) {
36859513c3eSOphir Munk 			if (nh->nlmsg_type == NLMSG_ERROR) {
36959513c3eSOphir Munk 				struct nlmsgerr *err_data = NLMSG_DATA(nh);
37059513c3eSOphir Munk 
37159513c3eSOphir Munk 				if (err_data->error < 0) {
37259513c3eSOphir Munk 					rte_errno = -err_data->error;
37359513c3eSOphir Munk 					ret = -rte_errno;
37459513c3eSOphir Munk 					goto exit;
37559513c3eSOphir Munk 				}
37659513c3eSOphir Munk 				/* Ack message. */
37759513c3eSOphir Munk 				ret = 0;
37859513c3eSOphir Munk 				goto exit;
37959513c3eSOphir Munk 			}
38059513c3eSOphir Munk 			/* Multi-part msgs and their trailing DONE message. */
38159513c3eSOphir Munk 			if (nh->nlmsg_flags & NLM_F_MULTI) {
38259513c3eSOphir Munk 				if (nh->nlmsg_type == NLMSG_DONE) {
38359513c3eSOphir Munk 					ret =  0;
38459513c3eSOphir Munk 					goto exit;
38559513c3eSOphir Munk 				}
38659513c3eSOphir Munk 				multipart = 1;
38759513c3eSOphir Munk 			}
38859513c3eSOphir Munk 			if (cb) {
38959513c3eSOphir Munk 				ret = cb(nh, arg);
39059513c3eSOphir Munk 				if (ret < 0)
39159513c3eSOphir Munk 					goto exit;
39259513c3eSOphir Munk 			}
39359513c3eSOphir Munk 		}
39459513c3eSOphir Munk 	} while (multipart);
39559513c3eSOphir Munk exit:
39666914d19SSuanming Mou 	mlx5_free(buf);
39759513c3eSOphir Munk 	return ret;
39859513c3eSOphir Munk }
39959513c3eSOphir Munk 
40059513c3eSOphir Munk /**
40159513c3eSOphir Munk  * Parse Netlink message to retrieve the bridge MAC address.
40259513c3eSOphir Munk  *
40359513c3eSOphir Munk  * @param nh
40459513c3eSOphir Munk  *   Pointer to Netlink Message Header.
40559513c3eSOphir Munk  * @param arg
40659513c3eSOphir Munk  *   PMD data register with this callback.
40759513c3eSOphir Munk  *
40859513c3eSOphir Munk  * @return
40959513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
41059513c3eSOphir Munk  */
41159513c3eSOphir Munk static int
41259513c3eSOphir Munk mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
41359513c3eSOphir Munk {
41459513c3eSOphir Munk 	struct mlx5_nl_mac_addr *data = arg;
41559513c3eSOphir Munk 	struct ndmsg *r = NLMSG_DATA(nh);
41659513c3eSOphir Munk 	struct rtattr *attribute;
41759513c3eSOphir Munk 	int len;
41859513c3eSOphir Munk 
41959513c3eSOphir Munk 	len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
42059513c3eSOphir Munk 	for (attribute = MLX5_NDA_RTA(r);
42159513c3eSOphir Munk 	     RTA_OK(attribute, len);
42259513c3eSOphir Munk 	     attribute = RTA_NEXT(attribute, len)) {
42359513c3eSOphir Munk 		if (attribute->rta_type == NDA_LLADDR) {
42459513c3eSOphir Munk 			if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
42559513c3eSOphir Munk 				DRV_LOG(WARNING,
42659513c3eSOphir Munk 					"not enough room to finalize the"
42759513c3eSOphir Munk 					" request");
42859513c3eSOphir Munk 				rte_errno = ENOMEM;
42959513c3eSOphir Munk 				return -rte_errno;
43059513c3eSOphir Munk 			}
43159513c3eSOphir Munk #ifdef RTE_LIBRTE_MLX5_DEBUG
43259513c3eSOphir Munk 			char m[RTE_ETHER_ADDR_FMT_SIZE];
43359513c3eSOphir Munk 
43459513c3eSOphir Munk 			rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE,
43559513c3eSOphir Munk 					      RTA_DATA(attribute));
43659513c3eSOphir Munk 			DRV_LOG(DEBUG, "bridge MAC address %s", m);
43759513c3eSOphir Munk #endif
43859513c3eSOphir Munk 			memcpy(&(*data->mac)[data->mac_n++],
43959513c3eSOphir Munk 			       RTA_DATA(attribute), RTE_ETHER_ADDR_LEN);
44059513c3eSOphir Munk 		}
44159513c3eSOphir Munk 	}
44259513c3eSOphir Munk 	return 0;
44359513c3eSOphir Munk }
44459513c3eSOphir Munk 
44559513c3eSOphir Munk /**
44659513c3eSOphir Munk  * Get bridge MAC addresses.
44759513c3eSOphir Munk  *
44859513c3eSOphir Munk  * @param[in] nlsk_fd
44959513c3eSOphir Munk  *   Netlink socket file descriptor.
45059513c3eSOphir Munk  * @param[in] iface_idx
45159513c3eSOphir Munk  *   Net device interface index.
45259513c3eSOphir Munk  * @param mac[out]
45359513c3eSOphir Munk  *   Pointer to the array table of MAC addresses to fill.
45459513c3eSOphir Munk  *   Its size should be of MLX5_MAX_MAC_ADDRESSES.
45559513c3eSOphir Munk  * @param mac_n[out]
45659513c3eSOphir Munk  *   Number of entries filled in MAC array.
45759513c3eSOphir Munk  *
45859513c3eSOphir Munk  * @return
45959513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
46059513c3eSOphir Munk  */
46159513c3eSOphir Munk static int
46259513c3eSOphir Munk mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx,
46359513c3eSOphir Munk 		      struct rte_ether_addr (*mac)[], int *mac_n)
46459513c3eSOphir Munk {
46559513c3eSOphir Munk 	struct {
46659513c3eSOphir Munk 		struct nlmsghdr	hdr;
46759513c3eSOphir Munk 		struct ifinfomsg ifm;
46859513c3eSOphir Munk 	} req = {
46959513c3eSOphir Munk 		.hdr = {
47059513c3eSOphir Munk 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
47159513c3eSOphir Munk 			.nlmsg_type = RTM_GETNEIGH,
47259513c3eSOphir Munk 			.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
47359513c3eSOphir Munk 		},
47459513c3eSOphir Munk 		.ifm = {
47559513c3eSOphir Munk 			.ifi_family = PF_BRIDGE,
47659513c3eSOphir Munk 			.ifi_index = iface_idx,
47759513c3eSOphir Munk 		},
47859513c3eSOphir Munk 	};
47959513c3eSOphir Munk 	struct mlx5_nl_mac_addr data = {
48059513c3eSOphir Munk 		.mac = mac,
48159513c3eSOphir Munk 		.mac_n = 0,
48259513c3eSOphir Munk 	};
48359513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
48459513c3eSOphir Munk 	int ret;
48559513c3eSOphir Munk 
48659513c3eSOphir Munk 	if (nlsk_fd == -1)
48759513c3eSOphir Munk 		return 0;
48859513c3eSOphir Munk 	ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm,
48959513c3eSOphir Munk 			      sizeof(struct ifinfomsg));
49059513c3eSOphir Munk 	if (ret < 0)
49159513c3eSOphir Munk 		goto error;
49259513c3eSOphir Munk 	ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data);
49359513c3eSOphir Munk 	if (ret < 0)
49459513c3eSOphir Munk 		goto error;
49559513c3eSOphir Munk 	*mac_n = data.mac_n;
49659513c3eSOphir Munk 	return 0;
49759513c3eSOphir Munk error:
49859513c3eSOphir Munk 	DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s",
49959513c3eSOphir Munk 		iface_idx, strerror(rte_errno));
50059513c3eSOphir Munk 	return -rte_errno;
50159513c3eSOphir Munk }
50259513c3eSOphir Munk 
50359513c3eSOphir Munk /**
50459513c3eSOphir Munk  * Modify the MAC address neighbour table with Netlink.
50559513c3eSOphir Munk  *
50659513c3eSOphir Munk  * @param[in] nlsk_fd
50759513c3eSOphir Munk  *   Netlink socket file descriptor.
50859513c3eSOphir Munk  * @param[in] iface_idx
50959513c3eSOphir Munk  *   Net device interface index.
51059513c3eSOphir Munk  * @param mac
51159513c3eSOphir Munk  *   MAC address to consider.
51259513c3eSOphir Munk  * @param add
51359513c3eSOphir Munk  *   1 to add the MAC address, 0 to remove the MAC address.
51459513c3eSOphir Munk  *
51559513c3eSOphir Munk  * @return
51659513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
51759513c3eSOphir Munk  */
51859513c3eSOphir Munk static int
51959513c3eSOphir Munk mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
52059513c3eSOphir Munk 			struct rte_ether_addr *mac, int add)
52159513c3eSOphir Munk {
52259513c3eSOphir Munk 	struct {
52359513c3eSOphir Munk 		struct nlmsghdr hdr;
52459513c3eSOphir Munk 		struct ndmsg ndm;
52559513c3eSOphir Munk 		struct rtattr rta;
52659513c3eSOphir Munk 		uint8_t buffer[RTE_ETHER_ADDR_LEN];
52759513c3eSOphir Munk 	} req = {
52859513c3eSOphir Munk 		.hdr = {
52959513c3eSOphir Munk 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
53059513c3eSOphir Munk 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
53159513c3eSOphir Munk 				NLM_F_EXCL | NLM_F_ACK,
53259513c3eSOphir Munk 			.nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
53359513c3eSOphir Munk 		},
53459513c3eSOphir Munk 		.ndm = {
53559513c3eSOphir Munk 			.ndm_family = PF_BRIDGE,
53659513c3eSOphir Munk 			.ndm_state = NUD_NOARP | NUD_PERMANENT,
53759513c3eSOphir Munk 			.ndm_ifindex = iface_idx,
53859513c3eSOphir Munk 			.ndm_flags = NTF_SELF,
53959513c3eSOphir Munk 		},
54059513c3eSOphir Munk 		.rta = {
54159513c3eSOphir Munk 			.rta_type = NDA_LLADDR,
54259513c3eSOphir Munk 			.rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN),
54359513c3eSOphir Munk 		},
54459513c3eSOphir Munk 	};
54559513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
54659513c3eSOphir Munk 	int ret;
54759513c3eSOphir Munk 
54859513c3eSOphir Munk 	if (nlsk_fd == -1)
54959513c3eSOphir Munk 		return 0;
55059513c3eSOphir Munk 	memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN);
55159513c3eSOphir Munk 	req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
55259513c3eSOphir Munk 		RTA_ALIGN(req.rta.rta_len);
55359513c3eSOphir Munk 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
55459513c3eSOphir Munk 	if (ret < 0)
55559513c3eSOphir Munk 		goto error;
55659513c3eSOphir Munk 	ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
55759513c3eSOphir Munk 	if (ret < 0)
55859513c3eSOphir Munk 		goto error;
55959513c3eSOphir Munk 	return 0;
56059513c3eSOphir Munk error:
56159513c3eSOphir Munk #ifdef RTE_LIBRTE_MLX5_DEBUG
56259513c3eSOphir Munk 	{
56359513c3eSOphir Munk 		char m[RTE_ETHER_ADDR_FMT_SIZE];
56459513c3eSOphir Munk 
56559513c3eSOphir Munk 		rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac);
56659513c3eSOphir Munk 		DRV_LOG(DEBUG,
56759513c3eSOphir Munk 			"Interface %u cannot %s MAC address %s %s",
56859513c3eSOphir Munk 			iface_idx,
56959513c3eSOphir Munk 			add ? "add" : "remove", m, strerror(rte_errno));
57059513c3eSOphir Munk 	}
57159513c3eSOphir Munk #endif
57259513c3eSOphir Munk 	return -rte_errno;
57359513c3eSOphir Munk }
57459513c3eSOphir Munk 
57559513c3eSOphir Munk /**
57659513c3eSOphir Munk  * Modify the VF MAC address neighbour table with Netlink.
57759513c3eSOphir Munk  *
57859513c3eSOphir Munk  * @param[in] nlsk_fd
57959513c3eSOphir Munk  *   Netlink socket file descriptor.
58059513c3eSOphir Munk  * @param[in] iface_idx
58159513c3eSOphir Munk  *   Net device interface index.
58259513c3eSOphir Munk  * @param mac
58359513c3eSOphir Munk  *    MAC address to consider.
58459513c3eSOphir Munk  * @param vf_index
58559513c3eSOphir Munk  *    VF index.
58659513c3eSOphir Munk  *
58759513c3eSOphir Munk  * @return
58859513c3eSOphir Munk  *    0 on success, a negative errno value otherwise and rte_errno is set.
58959513c3eSOphir Munk  */
59059513c3eSOphir Munk int
59159513c3eSOphir Munk mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
59259513c3eSOphir Munk 			   struct rte_ether_addr *mac, int vf_index)
59359513c3eSOphir Munk {
59459513c3eSOphir Munk 	int ret;
59559513c3eSOphir Munk 	struct {
59659513c3eSOphir Munk 		struct nlmsghdr hdr;
59759513c3eSOphir Munk 		struct ifinfomsg ifm;
59859513c3eSOphir Munk 		struct rtattr vf_list_rta;
59959513c3eSOphir Munk 		struct rtattr vf_info_rta;
60059513c3eSOphir Munk 		struct rtattr vf_mac_rta;
60159513c3eSOphir Munk 		struct ifla_vf_mac ivm;
60259513c3eSOphir Munk 	} req = {
60359513c3eSOphir Munk 		.hdr = {
60459513c3eSOphir Munk 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
60559513c3eSOphir Munk 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
60659513c3eSOphir Munk 			.nlmsg_type = RTM_BASE,
60759513c3eSOphir Munk 		},
60859513c3eSOphir Munk 		.ifm = {
60959513c3eSOphir Munk 			.ifi_index = iface_idx,
61059513c3eSOphir Munk 		},
61159513c3eSOphir Munk 		.vf_list_rta = {
61259513c3eSOphir Munk 			.rta_type = IFLA_VFINFO_LIST,
61359513c3eSOphir Munk 			.rta_len = RTA_ALIGN(RTA_LENGTH(0)),
61459513c3eSOphir Munk 		},
61559513c3eSOphir Munk 		.vf_info_rta = {
61659513c3eSOphir Munk 			.rta_type = IFLA_VF_INFO,
61759513c3eSOphir Munk 			.rta_len = RTA_ALIGN(RTA_LENGTH(0)),
61859513c3eSOphir Munk 		},
61959513c3eSOphir Munk 		.vf_mac_rta = {
62059513c3eSOphir Munk 			.rta_type = IFLA_VF_MAC,
62159513c3eSOphir Munk 		},
62259513c3eSOphir Munk 	};
62359513c3eSOphir Munk 	struct ifla_vf_mac ivm = {
62459513c3eSOphir Munk 		.vf = vf_index,
62559513c3eSOphir Munk 	};
62659513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
62759513c3eSOphir Munk 
62859513c3eSOphir Munk 	memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN);
62959513c3eSOphir Munk 	memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm));
63059513c3eSOphir Munk 
63159513c3eSOphir Munk 	req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm));
63259513c3eSOphir Munk 	req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
63359513c3eSOphir Munk 		RTA_ALIGN(req.vf_list_rta.rta_len) +
63459513c3eSOphir Munk 		RTA_ALIGN(req.vf_info_rta.rta_len) +
63559513c3eSOphir Munk 		RTA_ALIGN(req.vf_mac_rta.rta_len);
63659513c3eSOphir Munk 	req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
63759513c3eSOphir Munk 					       &req.vf_list_rta);
63859513c3eSOphir Munk 	req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
63959513c3eSOphir Munk 					       &req.vf_info_rta);
64059513c3eSOphir Munk 
64159513c3eSOphir Munk 	if (nlsk_fd < 0)
64259513c3eSOphir Munk 		return -1;
64359513c3eSOphir Munk 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
64459513c3eSOphir Munk 	if (ret < 0)
64559513c3eSOphir Munk 		goto error;
64659513c3eSOphir Munk 	ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
64759513c3eSOphir Munk 	if (ret < 0)
64859513c3eSOphir Munk 		goto error;
64959513c3eSOphir Munk 	return 0;
65059513c3eSOphir Munk error:
65159513c3eSOphir Munk 	DRV_LOG(ERR,
65259513c3eSOphir Munk 		"representor %u cannot set VF MAC address "
65359513c3eSOphir Munk 		"%02X:%02X:%02X:%02X:%02X:%02X : %s",
65459513c3eSOphir Munk 		vf_index,
65559513c3eSOphir Munk 		mac->addr_bytes[0], mac->addr_bytes[1],
65659513c3eSOphir Munk 		mac->addr_bytes[2], mac->addr_bytes[3],
65759513c3eSOphir Munk 		mac->addr_bytes[4], mac->addr_bytes[5],
65859513c3eSOphir Munk 		strerror(rte_errno));
65959513c3eSOphir Munk 	return -rte_errno;
66059513c3eSOphir Munk }
66159513c3eSOphir Munk 
66259513c3eSOphir Munk /**
66359513c3eSOphir Munk  * Add a MAC address.
66459513c3eSOphir Munk  *
66559513c3eSOphir Munk  * @param[in] nlsk_fd
66659513c3eSOphir Munk  *   Netlink socket file descriptor.
66759513c3eSOphir Munk  * @param[in] iface_idx
66859513c3eSOphir Munk  *   Net device interface index.
66959513c3eSOphir Munk  * @param mac_own
67059513c3eSOphir Munk  *   BITFIELD_DECLARE array to store the mac.
67159513c3eSOphir Munk  * @param mac
67259513c3eSOphir Munk  *   MAC address to register.
67359513c3eSOphir Munk  * @param index
67459513c3eSOphir Munk  *   MAC address index.
67559513c3eSOphir Munk  *
67659513c3eSOphir Munk  * @return
67759513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
67859513c3eSOphir Munk  */
67959513c3eSOphir Munk int
68059513c3eSOphir Munk mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx,
68159513c3eSOphir Munk 		     uint64_t *mac_own, struct rte_ether_addr *mac,
68259513c3eSOphir Munk 		     uint32_t index)
68359513c3eSOphir Munk {
68459513c3eSOphir Munk 	int ret;
68559513c3eSOphir Munk 
68659513c3eSOphir Munk 	ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1);
68759513c3eSOphir Munk 	if (!ret) {
68859513c3eSOphir Munk 		MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
68959513c3eSOphir Munk 		if (index >= MLX5_MAX_MAC_ADDRESSES)
69059513c3eSOphir Munk 			return -EINVAL;
69159513c3eSOphir Munk 
69259513c3eSOphir Munk 		BITFIELD_SET(mac_own, index);
69359513c3eSOphir Munk 	}
69459513c3eSOphir Munk 	if (ret == -EEXIST)
69559513c3eSOphir Munk 		return 0;
69659513c3eSOphir Munk 	return ret;
69759513c3eSOphir Munk }
69859513c3eSOphir Munk 
69959513c3eSOphir Munk /**
70059513c3eSOphir Munk  * Remove a MAC address.
70159513c3eSOphir Munk  *
70259513c3eSOphir Munk  * @param[in] nlsk_fd
70359513c3eSOphir Munk  *   Netlink socket file descriptor.
70459513c3eSOphir Munk  * @param[in] iface_idx
70559513c3eSOphir Munk  *   Net device interface index.
70659513c3eSOphir Munk  * @param mac_own
70759513c3eSOphir Munk  *   BITFIELD_DECLARE array to store the mac.
70859513c3eSOphir Munk  * @param mac
70959513c3eSOphir Munk  *   MAC address to remove.
71059513c3eSOphir Munk  * @param index
71159513c3eSOphir Munk  *   MAC address index.
71259513c3eSOphir Munk  *
71359513c3eSOphir Munk  * @return
71459513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
71559513c3eSOphir Munk  */
71659513c3eSOphir Munk int
71759513c3eSOphir Munk mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
71859513c3eSOphir Munk 			struct rte_ether_addr *mac, uint32_t index)
71959513c3eSOphir Munk {
72059513c3eSOphir Munk 	MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
72159513c3eSOphir Munk 	if (index >= MLX5_MAX_MAC_ADDRESSES)
72259513c3eSOphir Munk 		return -EINVAL;
72359513c3eSOphir Munk 
72459513c3eSOphir Munk 	BITFIELD_RESET(mac_own, index);
72559513c3eSOphir Munk 	return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0);
72659513c3eSOphir Munk }
72759513c3eSOphir Munk 
72859513c3eSOphir Munk /**
72959513c3eSOphir Munk  * Synchronize Netlink bridge table to the internal table.
73059513c3eSOphir Munk  *
73159513c3eSOphir Munk  * @param[in] nlsk_fd
73259513c3eSOphir Munk  *   Netlink socket file descriptor.
73359513c3eSOphir Munk  * @param[in] iface_idx
73459513c3eSOphir Munk  *   Net device interface index.
73559513c3eSOphir Munk  * @param mac_addrs
73659513c3eSOphir Munk  *   Mac addresses array to sync.
73759513c3eSOphir Munk  * @param n
73859513c3eSOphir Munk  *   @p mac_addrs array size.
73959513c3eSOphir Munk  */
74059513c3eSOphir Munk void
74159513c3eSOphir Munk mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx,
74259513c3eSOphir Munk 		      struct rte_ether_addr *mac_addrs, int n)
74359513c3eSOphir Munk {
74459513c3eSOphir Munk 	struct rte_ether_addr macs[n];
74559513c3eSOphir Munk 	int macs_n = 0;
74659513c3eSOphir Munk 	int i;
74759513c3eSOphir Munk 	int ret;
74859513c3eSOphir Munk 
74959df97f1SXueming Li 	memset(macs, 0, n * sizeof(macs[0]));
75059513c3eSOphir Munk 	ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n);
75159513c3eSOphir Munk 	if (ret)
75259513c3eSOphir Munk 		return;
75359513c3eSOphir Munk 	for (i = 0; i != macs_n; ++i) {
75459513c3eSOphir Munk 		int j;
75559513c3eSOphir Munk 
75659513c3eSOphir Munk 		/* Verify the address is not in the array yet. */
75759513c3eSOphir Munk 		for (j = 0; j != n; ++j)
75859513c3eSOphir Munk 			if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j]))
75959513c3eSOphir Munk 				break;
76059513c3eSOphir Munk 		if (j != n)
76159513c3eSOphir Munk 			continue;
762493f0bb5SSouvik Dey 		if (rte_is_multicast_ether_addr(&macs[i])) {
76359513c3eSOphir Munk 			/* Find the first entry available. */
764493f0bb5SSouvik Dey 			for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) {
76559513c3eSOphir Munk 				if (rte_is_zero_ether_addr(&mac_addrs[j])) {
76659513c3eSOphir Munk 					mac_addrs[j] = macs[i];
76759513c3eSOphir Munk 					break;
76859513c3eSOphir Munk 				}
76959513c3eSOphir Munk 			}
770493f0bb5SSouvik Dey 		} else {
771493f0bb5SSouvik Dey 			/* Find the first entry available. */
772493f0bb5SSouvik Dey 			for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) {
773493f0bb5SSouvik Dey 				if (rte_is_zero_ether_addr(&mac_addrs[j])) {
774493f0bb5SSouvik Dey 					mac_addrs[j] = macs[i];
775493f0bb5SSouvik Dey 					break;
776493f0bb5SSouvik Dey 				}
777493f0bb5SSouvik Dey 			}
778493f0bb5SSouvik Dey 		}
77959513c3eSOphir Munk 	}
78059513c3eSOphir Munk }
78159513c3eSOphir Munk 
78259513c3eSOphir Munk /**
78359513c3eSOphir Munk  * Flush all added MAC addresses.
78459513c3eSOphir Munk  *
78559513c3eSOphir Munk  * @param[in] nlsk_fd
78659513c3eSOphir Munk  *   Netlink socket file descriptor.
78759513c3eSOphir Munk  * @param[in] iface_idx
78859513c3eSOphir Munk  *   Net device interface index.
78959513c3eSOphir Munk  * @param[in] mac_addrs
79059513c3eSOphir Munk  *   Mac addresses array to flush.
79159513c3eSOphir Munk  * @param n
79259513c3eSOphir Munk  *   @p mac_addrs array size.
79359513c3eSOphir Munk  * @param mac_own
79459513c3eSOphir Munk  *   BITFIELD_DECLARE array to store the mac.
79559513c3eSOphir Munk  */
79659513c3eSOphir Munk void
79759513c3eSOphir Munk mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx,
79859513c3eSOphir Munk 		       struct rte_ether_addr *mac_addrs, int n,
79959513c3eSOphir Munk 		       uint64_t *mac_own)
80059513c3eSOphir Munk {
80159513c3eSOphir Munk 	int i;
80259513c3eSOphir Munk 
8034a01fa04SShiri Kuzin 	if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES)
80459513c3eSOphir Munk 		return;
80559513c3eSOphir Munk 
80659513c3eSOphir Munk 	for (i = n - 1; i >= 0; --i) {
80759513c3eSOphir Munk 		struct rte_ether_addr *m = &mac_addrs[i];
80859513c3eSOphir Munk 
80959513c3eSOphir Munk 		if (BITFIELD_ISSET(mac_own, i))
81059513c3eSOphir Munk 			mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m,
81159513c3eSOphir Munk 						i);
81259513c3eSOphir Munk 	}
81359513c3eSOphir Munk }
81459513c3eSOphir Munk 
81559513c3eSOphir Munk /**
81659513c3eSOphir Munk  * Enable promiscuous / all multicast mode through Netlink.
81759513c3eSOphir Munk  *
81859513c3eSOphir Munk  * @param[in] nlsk_fd
81959513c3eSOphir Munk  *   Netlink socket file descriptor.
82059513c3eSOphir Munk  * @param[in] iface_idx
82159513c3eSOphir Munk  *   Net device interface index.
82259513c3eSOphir Munk  * @param flags
82359513c3eSOphir Munk  *   IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
82459513c3eSOphir Munk  * @param enable
82559513c3eSOphir Munk  *   Nonzero to enable, disable otherwise.
82659513c3eSOphir Munk  *
82759513c3eSOphir Munk  * @return
82859513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
82959513c3eSOphir Munk  */
83059513c3eSOphir Munk static int
83159513c3eSOphir Munk mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags,
83259513c3eSOphir Munk 		     int enable)
83359513c3eSOphir Munk {
83459513c3eSOphir Munk 	struct {
83559513c3eSOphir Munk 		struct nlmsghdr hdr;
83659513c3eSOphir Munk 		struct ifinfomsg ifi;
83759513c3eSOphir Munk 	} req = {
83859513c3eSOphir Munk 		.hdr = {
83959513c3eSOphir Munk 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
84059513c3eSOphir Munk 			.nlmsg_type = RTM_NEWLINK,
84159513c3eSOphir Munk 			.nlmsg_flags = NLM_F_REQUEST,
84259513c3eSOphir Munk 		},
84359513c3eSOphir Munk 		.ifi = {
84459513c3eSOphir Munk 			.ifi_flags = enable ? flags : 0,
84559513c3eSOphir Munk 			.ifi_change = flags,
84659513c3eSOphir Munk 			.ifi_index = iface_idx,
84759513c3eSOphir Munk 		},
84859513c3eSOphir Munk 	};
84959513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
85059513c3eSOphir Munk 	int ret;
85159513c3eSOphir Munk 
85259513c3eSOphir Munk 	MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
85359513c3eSOphir Munk 	if (nlsk_fd < 0)
85459513c3eSOphir Munk 		return 0;
85559513c3eSOphir Munk 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
85659513c3eSOphir Munk 	if (ret < 0)
85759513c3eSOphir Munk 		return ret;
85859513c3eSOphir Munk 	return 0;
85959513c3eSOphir Munk }
86059513c3eSOphir Munk 
86159513c3eSOphir Munk /**
86259513c3eSOphir Munk  * Enable promiscuous mode through Netlink.
86359513c3eSOphir Munk  *
86459513c3eSOphir Munk  * @param[in] nlsk_fd
86559513c3eSOphir Munk  *   Netlink socket file descriptor.
86659513c3eSOphir Munk  * @param[in] iface_idx
86759513c3eSOphir Munk  *   Net device interface index.
86859513c3eSOphir Munk  * @param enable
86959513c3eSOphir Munk  *   Nonzero to enable, disable otherwise.
87059513c3eSOphir Munk  *
87159513c3eSOphir Munk  * @return
87259513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
87359513c3eSOphir Munk  */
87459513c3eSOphir Munk int
87559513c3eSOphir Munk mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable)
87659513c3eSOphir Munk {
87759513c3eSOphir Munk 	int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable);
87859513c3eSOphir Munk 
87959513c3eSOphir Munk 	if (ret)
88059513c3eSOphir Munk 		DRV_LOG(DEBUG,
88159513c3eSOphir Munk 			"Interface %u cannot %s promisc mode: Netlink error %s",
88259513c3eSOphir Munk 			iface_idx, enable ? "enable" : "disable",
88359513c3eSOphir Munk 			strerror(rte_errno));
88459513c3eSOphir Munk 	return ret;
88559513c3eSOphir Munk }
88659513c3eSOphir Munk 
88759513c3eSOphir Munk /**
88859513c3eSOphir Munk  * Enable all multicast mode through Netlink.
88959513c3eSOphir Munk  *
89059513c3eSOphir Munk  * @param[in] nlsk_fd
89159513c3eSOphir Munk  *   Netlink socket file descriptor.
89259513c3eSOphir Munk  * @param[in] iface_idx
89359513c3eSOphir Munk  *   Net device interface index.
89459513c3eSOphir Munk  * @param enable
89559513c3eSOphir Munk  *   Nonzero to enable, disable otherwise.
89659513c3eSOphir Munk  *
89759513c3eSOphir Munk  * @return
89859513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
89959513c3eSOphir Munk  */
90059513c3eSOphir Munk int
90159513c3eSOphir Munk mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
90259513c3eSOphir Munk {
90359513c3eSOphir Munk 	int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI,
90459513c3eSOphir Munk 				       enable);
90559513c3eSOphir Munk 
90659513c3eSOphir Munk 	if (ret)
90759513c3eSOphir Munk 		DRV_LOG(DEBUG,
90859513c3eSOphir Munk 			"Interface %u cannot %s allmulti : Netlink error %s",
90959513c3eSOphir Munk 			iface_idx, enable ? "enable" : "disable",
91059513c3eSOphir Munk 			strerror(rte_errno));
91159513c3eSOphir Munk 	return ret;
91259513c3eSOphir Munk }
91359513c3eSOphir Munk 
91459513c3eSOphir Munk /**
91559513c3eSOphir Munk  * Process network interface information from Netlink message.
91659513c3eSOphir Munk  *
91759513c3eSOphir Munk  * @param nh
91859513c3eSOphir Munk  *   Pointer to Netlink message header.
91959513c3eSOphir Munk  * @param arg
92059513c3eSOphir Munk  *   Opaque data pointer for this callback.
92159513c3eSOphir Munk  *
92259513c3eSOphir Munk  * @return
92359513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
92459513c3eSOphir Munk  */
92559513c3eSOphir Munk static int
92659513c3eSOphir Munk mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
92759513c3eSOphir Munk {
92859513c3eSOphir Munk 	struct mlx5_nl_ifindex_data *data = arg;
92959513c3eSOphir Munk 	struct mlx5_nl_ifindex_data local = {
93059513c3eSOphir Munk 		.flags = 0,
93159513c3eSOphir Munk 	};
93259513c3eSOphir Munk 	size_t off = NLMSG_HDRLEN;
93359513c3eSOphir Munk 
93459513c3eSOphir Munk 	if (nh->nlmsg_type !=
93559513c3eSOphir Munk 	    RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
93659513c3eSOphir Munk 	    nh->nlmsg_type !=
93759513c3eSOphir Munk 	    RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
93859513c3eSOphir Munk 		goto error;
93959513c3eSOphir Munk 	while (off < nh->nlmsg_len) {
94059513c3eSOphir Munk 		struct nlattr *na = (void *)((uintptr_t)nh + off);
94159513c3eSOphir Munk 		void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
94259513c3eSOphir Munk 
94359513c3eSOphir Munk 		if (na->nla_len > nh->nlmsg_len - off)
94459513c3eSOphir Munk 			goto error;
94559513c3eSOphir Munk 		switch (na->nla_type) {
94659513c3eSOphir Munk 		case RDMA_NLDEV_ATTR_DEV_INDEX:
94759513c3eSOphir Munk 			local.ibindex = *(uint32_t *)payload;
94859513c3eSOphir Munk 			local.flags |= MLX5_NL_CMD_GET_IB_INDEX;
94959513c3eSOphir Munk 			break;
95059513c3eSOphir Munk 		case RDMA_NLDEV_ATTR_DEV_NAME:
95159513c3eSOphir Munk 			if (!strcmp(payload, data->name))
95259513c3eSOphir Munk 				local.flags |= MLX5_NL_CMD_GET_IB_NAME;
95359513c3eSOphir Munk 			break;
95459513c3eSOphir Munk 		case RDMA_NLDEV_ATTR_NDEV_INDEX:
95559513c3eSOphir Munk 			local.ifindex = *(uint32_t *)payload;
95659513c3eSOphir Munk 			local.flags |= MLX5_NL_CMD_GET_NET_INDEX;
95759513c3eSOphir Munk 			break;
95859513c3eSOphir Munk 		case RDMA_NLDEV_ATTR_PORT_INDEX:
95959513c3eSOphir Munk 			local.portnum = *(uint32_t *)payload;
96059513c3eSOphir Munk 			local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
96159513c3eSOphir Munk 			break;
96259513c3eSOphir Munk 		default:
96359513c3eSOphir Munk 			break;
96459513c3eSOphir Munk 		}
96559513c3eSOphir Munk 		off += NLA_ALIGN(na->nla_len);
96659513c3eSOphir Munk 	}
96759513c3eSOphir Munk 	/*
96859513c3eSOphir Munk 	 * It is possible to have multiple messages for all
96959513c3eSOphir Munk 	 * Infiniband devices in the system with appropriate name.
97059513c3eSOphir Munk 	 * So we should gather parameters locally and copy to
97159513c3eSOphir Munk 	 * query context only in case of coinciding device name.
97259513c3eSOphir Munk 	 */
97359513c3eSOphir Munk 	if (local.flags & MLX5_NL_CMD_GET_IB_NAME) {
97459513c3eSOphir Munk 		data->flags = local.flags;
97559513c3eSOphir Munk 		data->ibindex = local.ibindex;
97659513c3eSOphir Munk 		data->ifindex = local.ifindex;
97759513c3eSOphir Munk 		data->portnum = local.portnum;
97859513c3eSOphir Munk 	}
97959513c3eSOphir Munk 	return 0;
98059513c3eSOphir Munk error:
98159513c3eSOphir Munk 	rte_errno = EINVAL;
98259513c3eSOphir Munk 	return -rte_errno;
98359513c3eSOphir Munk }
98459513c3eSOphir Munk 
98559513c3eSOphir Munk /**
98659513c3eSOphir Munk  * Get index of network interface associated with some IB device.
98759513c3eSOphir Munk  *
98859513c3eSOphir Munk  * This is the only somewhat safe method to avoid resorting to heuristics
98959513c3eSOphir Munk  * when faced with port representors. Unfortunately it requires at least
99059513c3eSOphir Munk  * Linux 4.17.
99159513c3eSOphir Munk  *
99259513c3eSOphir Munk  * @param nl
99359513c3eSOphir Munk  *   Netlink socket of the RDMA kind (NETLINK_RDMA).
99459513c3eSOphir Munk  * @param[in] name
99559513c3eSOphir Munk  *   IB device name.
99659513c3eSOphir Munk  * @param[in] pindex
99759513c3eSOphir Munk  *   IB device port index, starting from 1
99859513c3eSOphir Munk  * @return
99959513c3eSOphir Munk  *   A valid (nonzero) interface index on success, 0 otherwise and rte_errno
100059513c3eSOphir Munk  *   is set.
100159513c3eSOphir Munk  */
100259513c3eSOphir Munk unsigned int
100359513c3eSOphir Munk mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
100459513c3eSOphir Munk {
100559513c3eSOphir Munk 	struct mlx5_nl_ifindex_data data = {
100659513c3eSOphir Munk 		.name = name,
100759513c3eSOphir Munk 		.flags = 0,
100859513c3eSOphir Munk 		.ibindex = 0, /* Determined during first pass. */
100959513c3eSOphir Munk 		.ifindex = 0, /* Determined during second pass. */
101059513c3eSOphir Munk 	};
101159513c3eSOphir Munk 	union {
101259513c3eSOphir Munk 		struct nlmsghdr nh;
101359513c3eSOphir Munk 		uint8_t buf[NLMSG_HDRLEN +
101459513c3eSOphir Munk 			    NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
101559513c3eSOphir Munk 			    NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
101659513c3eSOphir Munk 	} req = {
101759513c3eSOphir Munk 		.nh = {
101859513c3eSOphir Munk 			.nlmsg_len = NLMSG_LENGTH(0),
101959513c3eSOphir Munk 			.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
102059513c3eSOphir Munk 						       RDMA_NLDEV_CMD_GET),
102159513c3eSOphir Munk 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
102259513c3eSOphir Munk 		},
102359513c3eSOphir Munk 	};
102459513c3eSOphir Munk 	struct nlattr *na;
102559513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
102659513c3eSOphir Munk 	int ret;
102759513c3eSOphir Munk 
102859513c3eSOphir Munk 	ret = mlx5_nl_send(nl, &req.nh, sn);
102959513c3eSOphir Munk 	if (ret < 0)
103059513c3eSOphir Munk 		return 0;
103159513c3eSOphir Munk 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
103259513c3eSOphir Munk 	if (ret < 0)
103359513c3eSOphir Munk 		return 0;
103459513c3eSOphir Munk 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
103559513c3eSOphir Munk 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
103659513c3eSOphir Munk 		goto error;
103759513c3eSOphir Munk 	data.flags = 0;
103859513c3eSOphir Munk 	sn = MLX5_NL_SN_GENERATE;
103959513c3eSOphir Munk 	req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
104059513c3eSOphir Munk 					     RDMA_NLDEV_CMD_PORT_GET);
104159513c3eSOphir Munk 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
104259513c3eSOphir Munk 	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
104359513c3eSOphir Munk 	na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
104459513c3eSOphir Munk 	na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
104559513c3eSOphir Munk 	na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
104659513c3eSOphir Munk 	memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
104759513c3eSOphir Munk 	       &data.ibindex, sizeof(data.ibindex));
104859513c3eSOphir Munk 	na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
104959513c3eSOphir Munk 	na->nla_len = NLA_HDRLEN + sizeof(pindex);
105059513c3eSOphir Munk 	na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
105159513c3eSOphir Munk 	memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
105259513c3eSOphir Munk 	       &pindex, sizeof(pindex));
105359513c3eSOphir Munk 	ret = mlx5_nl_send(nl, &req.nh, sn);
105459513c3eSOphir Munk 	if (ret < 0)
105559513c3eSOphir Munk 		return 0;
105659513c3eSOphir Munk 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
105759513c3eSOphir Munk 	if (ret < 0)
105859513c3eSOphir Munk 		return 0;
105959513c3eSOphir Munk 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
106059513c3eSOphir Munk 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
106159513c3eSOphir Munk 	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) ||
106259513c3eSOphir Munk 	    !data.ifindex)
106359513c3eSOphir Munk 		goto error;
106459513c3eSOphir Munk 	return data.ifindex;
106559513c3eSOphir Munk error:
106659513c3eSOphir Munk 	rte_errno = ENODEV;
106759513c3eSOphir Munk 	return 0;
106859513c3eSOphir Munk }
106959513c3eSOphir Munk 
107059513c3eSOphir Munk /**
107159513c3eSOphir Munk  * Get the number of physical ports of given IB device.
107259513c3eSOphir Munk  *
107359513c3eSOphir Munk  * @param nl
107459513c3eSOphir Munk  *   Netlink socket of the RDMA kind (NETLINK_RDMA).
107559513c3eSOphir Munk  * @param[in] name
107659513c3eSOphir Munk  *   IB device name.
107759513c3eSOphir Munk  *
107859513c3eSOphir Munk  * @return
107959513c3eSOphir Munk  *   A valid (nonzero) number of ports on success, 0 otherwise
108059513c3eSOphir Munk  *   and rte_errno is set.
108159513c3eSOphir Munk  */
108259513c3eSOphir Munk unsigned int
108359513c3eSOphir Munk mlx5_nl_portnum(int nl, const char *name)
108459513c3eSOphir Munk {
108559513c3eSOphir Munk 	struct mlx5_nl_ifindex_data data = {
108659513c3eSOphir Munk 		.flags = 0,
108759513c3eSOphir Munk 		.name = name,
108859513c3eSOphir Munk 		.ifindex = 0,
108959513c3eSOphir Munk 		.portnum = 0,
109059513c3eSOphir Munk 	};
109159513c3eSOphir Munk 	struct nlmsghdr req = {
109259513c3eSOphir Munk 		.nlmsg_len = NLMSG_LENGTH(0),
109359513c3eSOphir Munk 		.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
109459513c3eSOphir Munk 					       RDMA_NLDEV_CMD_GET),
109559513c3eSOphir Munk 		.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
109659513c3eSOphir Munk 	};
109759513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
109859513c3eSOphir Munk 	int ret;
109959513c3eSOphir Munk 
110059513c3eSOphir Munk 	ret = mlx5_nl_send(nl, &req, sn);
110159513c3eSOphir Munk 	if (ret < 0)
110259513c3eSOphir Munk 		return 0;
110359513c3eSOphir Munk 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
110459513c3eSOphir Munk 	if (ret < 0)
110559513c3eSOphir Munk 		return 0;
110659513c3eSOphir Munk 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
110759513c3eSOphir Munk 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
110859513c3eSOphir Munk 	    !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) {
110959513c3eSOphir Munk 		rte_errno = ENODEV;
111059513c3eSOphir Munk 		return 0;
111159513c3eSOphir Munk 	}
111259513c3eSOphir Munk 	if (!data.portnum)
111359513c3eSOphir Munk 		rte_errno = EINVAL;
111459513c3eSOphir Munk 	return data.portnum;
111559513c3eSOphir Munk }
111659513c3eSOphir Munk 
111759513c3eSOphir Munk /**
111859513c3eSOphir Munk  * Analyze gathered port parameters via Netlink to recognize master
111959513c3eSOphir Munk  * and representor devices for E-Switch configuration.
112059513c3eSOphir Munk  *
112159513c3eSOphir Munk  * @param[in] num_vf_set
112259513c3eSOphir Munk  *   flag of presence of number of VFs port attribute.
112359513c3eSOphir Munk  * @param[inout] switch_info
112459513c3eSOphir Munk  *   Port information, including port name as a number and port name
112559513c3eSOphir Munk  *   type if recognized
112659513c3eSOphir Munk  *
112759513c3eSOphir Munk  * @return
112859513c3eSOphir Munk  *   master and representor flags are set in switch_info according to
112959513c3eSOphir Munk  *   recognized parameters (if any).
113059513c3eSOphir Munk  */
113159513c3eSOphir Munk static void
113259513c3eSOphir Munk mlx5_nl_check_switch_info(bool num_vf_set,
113359513c3eSOphir Munk 			  struct mlx5_switch_info *switch_info)
113459513c3eSOphir Munk {
113559513c3eSOphir Munk 	switch (switch_info->name_type) {
113659513c3eSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
113759513c3eSOphir Munk 		/*
113859513c3eSOphir Munk 		 * Name is not recognized, assume the master,
113959513c3eSOphir Munk 		 * check the number of VFs key presence.
114059513c3eSOphir Munk 		 */
114159513c3eSOphir Munk 		switch_info->master = num_vf_set;
114259513c3eSOphir Munk 		break;
114359513c3eSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
114459513c3eSOphir Munk 		/*
114559513c3eSOphir Munk 		 * Name is not set, this assumes the legacy naming
114659513c3eSOphir Munk 		 * schema for master, just check if there is a
114759513c3eSOphir Munk 		 * number of VFs key.
114859513c3eSOphir Munk 		 */
114959513c3eSOphir Munk 		switch_info->master = num_vf_set;
115059513c3eSOphir Munk 		break;
115159513c3eSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
115259513c3eSOphir Munk 		/* New uplink naming schema recognized. */
115359513c3eSOphir Munk 		switch_info->master = 1;
115459513c3eSOphir Munk 		break;
115559513c3eSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
115659513c3eSOphir Munk 		/* Legacy representors naming schema. */
115759513c3eSOphir Munk 		switch_info->representor = !num_vf_set;
115859513c3eSOphir Munk 		break;
1159420bbdaeSViacheslav Ovsiienko 	case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
1160420bbdaeSViacheslav Ovsiienko 		/* Fallthrough */
116159513c3eSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
116259df97f1SXueming Li 		/* Fallthrough */
116359df97f1SXueming Li 	case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
116459513c3eSOphir Munk 		/* New representors naming schema. */
116559513c3eSOphir Munk 		switch_info->representor = 1;
116659513c3eSOphir Munk 		break;
116759513c3eSOphir Munk 	}
116859513c3eSOphir Munk }
116959513c3eSOphir Munk 
117059513c3eSOphir Munk /**
117159513c3eSOphir Munk  * Process switch information from Netlink message.
117259513c3eSOphir Munk  *
117359513c3eSOphir Munk  * @param nh
117459513c3eSOphir Munk  *   Pointer to Netlink message header.
117559513c3eSOphir Munk  * @param arg
117659513c3eSOphir Munk  *   Opaque data pointer for this callback.
117759513c3eSOphir Munk  *
117859513c3eSOphir Munk  * @return
117959513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
118059513c3eSOphir Munk  */
118159513c3eSOphir Munk static int
118259513c3eSOphir Munk mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
118359513c3eSOphir Munk {
118459513c3eSOphir Munk 	struct mlx5_switch_info info = {
118559513c3eSOphir Munk 		.master = 0,
118659513c3eSOphir Munk 		.representor = 0,
118759513c3eSOphir Munk 		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
118859513c3eSOphir Munk 		.port_name = 0,
118959513c3eSOphir Munk 		.switch_id = 0,
119059513c3eSOphir Munk 	};
119159513c3eSOphir Munk 	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
119259513c3eSOphir Munk 	bool switch_id_set = false;
119359513c3eSOphir Munk 	bool num_vf_set = false;
119459513c3eSOphir Munk 
119559513c3eSOphir Munk 	if (nh->nlmsg_type != RTM_NEWLINK)
119659513c3eSOphir Munk 		goto error;
119759513c3eSOphir Munk 	while (off < nh->nlmsg_len) {
119859513c3eSOphir Munk 		struct rtattr *ra = (void *)((uintptr_t)nh + off);
119959513c3eSOphir Munk 		void *payload = RTA_DATA(ra);
120059513c3eSOphir Munk 		unsigned int i;
120159513c3eSOphir Munk 
120259513c3eSOphir Munk 		if (ra->rta_len > nh->nlmsg_len - off)
120359513c3eSOphir Munk 			goto error;
120459513c3eSOphir Munk 		switch (ra->rta_type) {
120559513c3eSOphir Munk 		case IFLA_NUM_VF:
120659513c3eSOphir Munk 			num_vf_set = true;
120759513c3eSOphir Munk 			break;
120859513c3eSOphir Munk 		case IFLA_PHYS_PORT_NAME:
120959513c3eSOphir Munk 			mlx5_translate_port_name((char *)payload, &info);
121059513c3eSOphir Munk 			break;
121159513c3eSOphir Munk 		case IFLA_PHYS_SWITCH_ID:
121259513c3eSOphir Munk 			info.switch_id = 0;
121359513c3eSOphir Munk 			for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
121459513c3eSOphir Munk 				info.switch_id <<= 8;
121559513c3eSOphir Munk 				info.switch_id |= ((uint8_t *)payload)[i];
121659513c3eSOphir Munk 			}
121759513c3eSOphir Munk 			switch_id_set = true;
121859513c3eSOphir Munk 			break;
121959513c3eSOphir Munk 		}
122059513c3eSOphir Munk 		off += RTA_ALIGN(ra->rta_len);
122159513c3eSOphir Munk 	}
122259513c3eSOphir Munk 	if (switch_id_set) {
122359513c3eSOphir Munk 		/* We have some E-Switch configuration. */
122459513c3eSOphir Munk 		mlx5_nl_check_switch_info(num_vf_set, &info);
122559513c3eSOphir Munk 	}
122659513c3eSOphir Munk 	MLX5_ASSERT(!(info.master && info.representor));
122759513c3eSOphir Munk 	memcpy(arg, &info, sizeof(info));
122859513c3eSOphir Munk 	return 0;
122959513c3eSOphir Munk error:
123059513c3eSOphir Munk 	rte_errno = EINVAL;
123159513c3eSOphir Munk 	return -rte_errno;
123259513c3eSOphir Munk }
123359513c3eSOphir Munk 
123459513c3eSOphir Munk /**
123559513c3eSOphir Munk  * Get switch information associated with network interface.
123659513c3eSOphir Munk  *
123759513c3eSOphir Munk  * @param nl
123859513c3eSOphir Munk  *   Netlink socket of the ROUTE kind (NETLINK_ROUTE).
123959513c3eSOphir Munk  * @param ifindex
124059513c3eSOphir Munk  *   Network interface index.
124159513c3eSOphir Munk  * @param[out] info
124259513c3eSOphir Munk  *   Switch information object, populated in case of success.
124359513c3eSOphir Munk  *
124459513c3eSOphir Munk  * @return
124559513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
124659513c3eSOphir Munk  */
124759513c3eSOphir Munk int
124859513c3eSOphir Munk mlx5_nl_switch_info(int nl, unsigned int ifindex,
124959513c3eSOphir Munk 		    struct mlx5_switch_info *info)
125059513c3eSOphir Munk {
125159513c3eSOphir Munk 	struct {
125259513c3eSOphir Munk 		struct nlmsghdr nh;
125359513c3eSOphir Munk 		struct ifinfomsg info;
125459513c3eSOphir Munk 		struct rtattr rta;
125559513c3eSOphir Munk 		uint32_t extmask;
125659513c3eSOphir Munk 	} req = {
125759513c3eSOphir Munk 		.nh = {
125859513c3eSOphir Munk 			.nlmsg_len = NLMSG_LENGTH
125959513c3eSOphir Munk 					(sizeof(req.info) +
126059513c3eSOphir Munk 					 RTA_LENGTH(sizeof(uint32_t))),
126159513c3eSOphir Munk 			.nlmsg_type = RTM_GETLINK,
126259513c3eSOphir Munk 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
126359513c3eSOphir Munk 		},
126459513c3eSOphir Munk 		.info = {
126559513c3eSOphir Munk 			.ifi_family = AF_UNSPEC,
126659513c3eSOphir Munk 			.ifi_index = ifindex,
126759513c3eSOphir Munk 		},
126859513c3eSOphir Munk 		.rta = {
126959513c3eSOphir Munk 			.rta_type = IFLA_EXT_MASK,
127059513c3eSOphir Munk 			.rta_len = RTA_LENGTH(sizeof(int32_t)),
127159513c3eSOphir Munk 		},
127259513c3eSOphir Munk 		.extmask = RTE_LE32(1),
127359513c3eSOphir Munk 	};
127459513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
127559513c3eSOphir Munk 	int ret;
127659513c3eSOphir Munk 
127759513c3eSOphir Munk 	ret = mlx5_nl_send(nl, &req.nh, sn);
127859513c3eSOphir Munk 	if (ret >= 0)
127959513c3eSOphir Munk 		ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
128059513c3eSOphir Munk 	if (info->master && info->representor) {
128159513c3eSOphir Munk 		DRV_LOG(ERR, "ifindex %u device is recognized as master"
128259513c3eSOphir Munk 			     " and as representor", ifindex);
128359513c3eSOphir Munk 		rte_errno = ENODEV;
128459513c3eSOphir Munk 		ret = -rte_errno;
128559513c3eSOphir Munk 	}
128659513c3eSOphir Munk 	return ret;
128759513c3eSOphir Munk }
128859513c3eSOphir Munk 
128959513c3eSOphir Munk /*
129059513c3eSOphir Munk  * Delete VLAN network device by ifindex.
129159513c3eSOphir Munk  *
129259513c3eSOphir Munk  * @param[in] tcf
129359513c3eSOphir Munk  *   Context object initialized by mlx5_nl_vlan_vmwa_init().
129459513c3eSOphir Munk  * @param[in] ifindex
129559513c3eSOphir Munk  *   Interface index of network device to delete.
129659513c3eSOphir Munk  */
129759513c3eSOphir Munk void
129859513c3eSOphir Munk mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa,
129959513c3eSOphir Munk 		      uint32_t ifindex)
130059513c3eSOphir Munk {
130159513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
130259513c3eSOphir Munk 	int ret;
130359513c3eSOphir Munk 	struct {
130459513c3eSOphir Munk 		struct nlmsghdr nh;
130559513c3eSOphir Munk 		struct ifinfomsg info;
130659513c3eSOphir Munk 	} req = {
130759513c3eSOphir Munk 		.nh = {
130859513c3eSOphir Munk 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
130959513c3eSOphir Munk 			.nlmsg_type = RTM_DELLINK,
131059513c3eSOphir Munk 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
131159513c3eSOphir Munk 		},
131259513c3eSOphir Munk 		.info = {
131359513c3eSOphir Munk 			.ifi_family = AF_UNSPEC,
131459513c3eSOphir Munk 			.ifi_index = ifindex,
131559513c3eSOphir Munk 		},
131659513c3eSOphir Munk 	};
131759513c3eSOphir Munk 
131859513c3eSOphir Munk 	if (ifindex) {
131959513c3eSOphir Munk 		ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn);
132059513c3eSOphir Munk 		if (ret >= 0)
132159513c3eSOphir Munk 			ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
132259513c3eSOphir Munk 		if (ret < 0)
132359513c3eSOphir Munk 			DRV_LOG(WARNING, "netlink: error deleting VLAN WA"
132459513c3eSOphir Munk 				" ifindex %u, %d", ifindex, ret);
132559513c3eSOphir Munk 	}
132659513c3eSOphir Munk }
132759513c3eSOphir Munk 
132859513c3eSOphir Munk /* Set of subroutines to build Netlink message. */
132959513c3eSOphir Munk static struct nlattr *
133059513c3eSOphir Munk nl_msg_tail(struct nlmsghdr *nlh)
133159513c3eSOphir Munk {
133259513c3eSOphir Munk 	return (struct nlattr *)
133359513c3eSOphir Munk 		(((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
133459513c3eSOphir Munk }
133559513c3eSOphir Munk 
133659513c3eSOphir Munk static void
133759513c3eSOphir Munk nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
133859513c3eSOphir Munk {
133959513c3eSOphir Munk 	struct nlattr *nla = nl_msg_tail(nlh);
134059513c3eSOphir Munk 
134159513c3eSOphir Munk 	nla->nla_type = type;
134259513c3eSOphir Munk 	nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen;
134359513c3eSOphir Munk 	nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len);
134459513c3eSOphir Munk 
134559513c3eSOphir Munk 	if (alen)
134659513c3eSOphir Munk 		memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
134759513c3eSOphir Munk }
134859513c3eSOphir Munk 
134959513c3eSOphir Munk static struct nlattr *
135059513c3eSOphir Munk nl_attr_nest_start(struct nlmsghdr *nlh, int type)
135159513c3eSOphir Munk {
135259513c3eSOphir Munk 	struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
135359513c3eSOphir Munk 
135459513c3eSOphir Munk 	nl_attr_put(nlh, type, NULL, 0);
135559513c3eSOphir Munk 	return nest;
135659513c3eSOphir Munk }
135759513c3eSOphir Munk 
135859513c3eSOphir Munk static void
135959513c3eSOphir Munk nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
136059513c3eSOphir Munk {
136159513c3eSOphir Munk 	nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
136259513c3eSOphir Munk }
136359513c3eSOphir Munk 
136459513c3eSOphir Munk /*
136559513c3eSOphir Munk  * Create network VLAN device with specified VLAN tag.
136659513c3eSOphir Munk  *
136759513c3eSOphir Munk  * @param[in] tcf
136859513c3eSOphir Munk  *   Context object initialized by mlx5_nl_vlan_vmwa_init().
136959513c3eSOphir Munk  * @param[in] ifindex
137059513c3eSOphir Munk  *   Base network interface index.
137159513c3eSOphir Munk  * @param[in] tag
137259513c3eSOphir Munk  *   VLAN tag for VLAN network device to create.
137359513c3eSOphir Munk  */
137459513c3eSOphir Munk uint32_t
137559513c3eSOphir Munk mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa,
137659513c3eSOphir Munk 			 uint32_t ifindex, uint16_t tag)
137759513c3eSOphir Munk {
137859513c3eSOphir Munk 	struct nlmsghdr *nlh;
137959513c3eSOphir Munk 	struct ifinfomsg *ifm;
138059513c3eSOphir Munk 	char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
138159513c3eSOphir Munk 
138259513c3eSOphir Munk 	__rte_cache_aligned
138359513c3eSOphir Munk 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
138459513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
138559513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
138659513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(uint32_t)) +
138759513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(name)) +
138859513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof("vlan")) +
138959513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(uint32_t)) +
139059513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(uint16_t)) + 16];
139159513c3eSOphir Munk 	struct nlattr *na_info;
139259513c3eSOphir Munk 	struct nlattr *na_vlan;
139359513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
139459513c3eSOphir Munk 	int ret;
139559513c3eSOphir Munk 
139659513c3eSOphir Munk 	memset(buf, 0, sizeof(buf));
139759513c3eSOphir Munk 	nlh = (struct nlmsghdr *)buf;
139859513c3eSOphir Munk 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
139959513c3eSOphir Munk 	nlh->nlmsg_type = RTM_NEWLINK;
140059513c3eSOphir Munk 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
140159513c3eSOphir Munk 			   NLM_F_EXCL | NLM_F_ACK;
140259513c3eSOphir Munk 	ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
140359513c3eSOphir Munk 	nlh->nlmsg_len += sizeof(struct ifinfomsg);
140459513c3eSOphir Munk 	ifm->ifi_family = AF_UNSPEC;
140559513c3eSOphir Munk 	ifm->ifi_type = 0;
140659513c3eSOphir Munk 	ifm->ifi_index = 0;
140759513c3eSOphir Munk 	ifm->ifi_flags = IFF_UP;
140859513c3eSOphir Munk 	ifm->ifi_change = 0xffffffff;
140959513c3eSOphir Munk 	nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
141059513c3eSOphir Munk 	ret = snprintf(name, sizeof(name), "%s.%u.%u",
141159513c3eSOphir Munk 		       MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
141259513c3eSOphir Munk 	nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
141359513c3eSOphir Munk 	na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
141459513c3eSOphir Munk 	nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
141559513c3eSOphir Munk 	na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
141659513c3eSOphir Munk 	nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
141759513c3eSOphir Munk 	nl_attr_nest_end(nlh, na_vlan);
141859513c3eSOphir Munk 	nl_attr_nest_end(nlh, na_info);
141959513c3eSOphir Munk 	MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len);
142059513c3eSOphir Munk 	ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn);
142159513c3eSOphir Munk 	if (ret >= 0)
142259513c3eSOphir Munk 		ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
142359513c3eSOphir Munk 	if (ret < 0) {
142459513c3eSOphir Munk 		DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name,
142559513c3eSOphir Munk 			ret);
142659513c3eSOphir Munk 	}
142759513c3eSOphir Munk 	/* Try to get ifindex of created or pre-existing device. */
142859513c3eSOphir Munk 	ret = if_nametoindex(name);
142959513c3eSOphir Munk 	if (!ret) {
143059513c3eSOphir Munk 		DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name,
143159513c3eSOphir Munk 			errno);
143259513c3eSOphir Munk 		return 0;
143359513c3eSOphir Munk 	}
143459513c3eSOphir Munk 	return ret;
143559513c3eSOphir Munk }
143659513c3eSOphir Munk 
143759513c3eSOphir Munk /**
143859513c3eSOphir Munk  * Parse Netlink message to retrieve the general family ID.
143959513c3eSOphir Munk  *
144059513c3eSOphir Munk  * @param nh
144159513c3eSOphir Munk  *   Pointer to Netlink Message Header.
144259513c3eSOphir Munk  * @param arg
144359513c3eSOphir Munk  *   PMD data register with this callback.
144459513c3eSOphir Munk  *
144559513c3eSOphir Munk  * @return
144659513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
144759513c3eSOphir Munk  */
144859513c3eSOphir Munk static int
144959513c3eSOphir Munk mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg)
145059513c3eSOphir Munk {
145159513c3eSOphir Munk 
145259513c3eSOphir Munk 	struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
145359513c3eSOphir Munk 	struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
145459513c3eSOphir Munk 					NLMSG_ALIGN(sizeof(struct genlmsghdr)));
145559513c3eSOphir Munk 
145659513c3eSOphir Munk 	for (; nla->nla_len && nla < tail;
145759513c3eSOphir Munk 	     nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) {
145859513c3eSOphir Munk 		if (nla->nla_type == CTRL_ATTR_FAMILY_ID) {
145959513c3eSOphir Munk 			*(uint16_t *)arg = *(uint16_t *)(nla + 1);
146059513c3eSOphir Munk 			return 0;
146159513c3eSOphir Munk 		}
146259513c3eSOphir Munk 	}
146359513c3eSOphir Munk 	return -EINVAL;
146459513c3eSOphir Munk }
146559513c3eSOphir Munk 
146659513c3eSOphir Munk #define MLX5_NL_MAX_ATTR_SIZE 100
146759513c3eSOphir Munk /**
146859513c3eSOphir Munk  * Get generic netlink family ID.
146959513c3eSOphir Munk  *
147059513c3eSOphir Munk  * @param[in] nlsk_fd
147159513c3eSOphir Munk  *   Netlink socket file descriptor.
147259513c3eSOphir Munk  * @param[in] name
147359513c3eSOphir Munk  *   The family name.
147459513c3eSOphir Munk  *
147559513c3eSOphir Munk  * @return
147659513c3eSOphir Munk  *   ID >= 0 on success and @p enable is updated, a negative errno value
147759513c3eSOphir Munk  *   otherwise and rte_errno is set.
147859513c3eSOphir Munk  */
147959513c3eSOphir Munk static int
148059513c3eSOphir Munk mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name)
148159513c3eSOphir Munk {
148259513c3eSOphir Munk 	struct nlmsghdr *nlh;
148359513c3eSOphir Munk 	struct genlmsghdr *genl;
148459513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
148559513c3eSOphir Munk 	int name_size = strlen(name) + 1;
148659513c3eSOphir Munk 	int ret;
148759513c3eSOphir Munk 	uint16_t id = -1;
148859513c3eSOphir Munk 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
148959513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
149059513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct nlattr)) +
149159513c3eSOphir Munk 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)];
149259513c3eSOphir Munk 
149359513c3eSOphir Munk 	memset(buf, 0, sizeof(buf));
149459513c3eSOphir Munk 	nlh = (struct nlmsghdr *)buf;
149559513c3eSOphir Munk 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
149659513c3eSOphir Munk 	nlh->nlmsg_type = GENL_ID_CTRL;
149759513c3eSOphir Munk 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
149859513c3eSOphir Munk 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
149959513c3eSOphir Munk 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
150059513c3eSOphir Munk 	genl->cmd = CTRL_CMD_GETFAMILY;
150159513c3eSOphir Munk 	genl->version = 1;
150259513c3eSOphir Munk 	nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size);
150359513c3eSOphir Munk 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
150459513c3eSOphir Munk 	if (ret >= 0)
150559513c3eSOphir Munk 		ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id);
150659513c3eSOphir Munk 	if (ret < 0) {
150759513c3eSOphir Munk 		DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name,
150859513c3eSOphir Munk 			ret);
150959513c3eSOphir Munk 		return ret;
151059513c3eSOphir Munk 	}
151159513c3eSOphir Munk 	DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id);
151259513c3eSOphir Munk 	return (int)id;
151359513c3eSOphir Munk }
151459513c3eSOphir Munk 
151559513c3eSOphir Munk /**
151659513c3eSOphir Munk  * Get Devlink family ID.
151759513c3eSOphir Munk  *
151859513c3eSOphir Munk  * @param[in] nlsk_fd
151959513c3eSOphir Munk  *   Netlink socket file descriptor.
152059513c3eSOphir Munk  *
152159513c3eSOphir Munk  * @return
152259513c3eSOphir Munk  *   ID >= 0 on success and @p enable is updated, a negative errno value
152359513c3eSOphir Munk  *   otherwise and rte_errno is set.
152459513c3eSOphir Munk  */
152559513c3eSOphir Munk 
152659513c3eSOphir Munk int
152759513c3eSOphir Munk mlx5_nl_devlink_family_id_get(int nlsk_fd)
152859513c3eSOphir Munk {
152959513c3eSOphir Munk 	return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME);
153059513c3eSOphir Munk }
153159513c3eSOphir Munk 
153259513c3eSOphir Munk /**
153359513c3eSOphir Munk  * Parse Netlink message to retrieve the ROCE enable status.
153459513c3eSOphir Munk  *
153559513c3eSOphir Munk  * @param nh
153659513c3eSOphir Munk  *   Pointer to Netlink Message Header.
153759513c3eSOphir Munk  * @param arg
153859513c3eSOphir Munk  *   PMD data register with this callback.
153959513c3eSOphir Munk  *
154059513c3eSOphir Munk  * @return
154159513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
154259513c3eSOphir Munk  */
154359513c3eSOphir Munk static int
154459513c3eSOphir Munk mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg)
154559513c3eSOphir Munk {
154659513c3eSOphir Munk 
154759513c3eSOphir Munk 	int ret = -EINVAL;
154859513c3eSOphir Munk 	int *enable = arg;
154959513c3eSOphir Munk 	struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
155059513c3eSOphir Munk 	struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
155159513c3eSOphir Munk 					NLMSG_ALIGN(sizeof(struct genlmsghdr)));
155259513c3eSOphir Munk 
155359513c3eSOphir Munk 	while (nla->nla_len && nla < tail) {
155459513c3eSOphir Munk 		switch (nla->nla_type) {
155559513c3eSOphir Munk 		/* Expected nested attributes case. */
155659513c3eSOphir Munk 		case DEVLINK_ATTR_PARAM:
155759513c3eSOphir Munk 		case DEVLINK_ATTR_PARAM_VALUES_LIST:
155859513c3eSOphir Munk 		case DEVLINK_ATTR_PARAM_VALUE:
155959513c3eSOphir Munk 			ret = 0;
156059513c3eSOphir Munk 			nla += 1;
156159513c3eSOphir Munk 			break;
156259513c3eSOphir Munk 		case DEVLINK_ATTR_PARAM_VALUE_DATA:
156359513c3eSOphir Munk 			*enable = 1;
156459513c3eSOphir Munk 			return 0;
156559513c3eSOphir Munk 		default:
156659513c3eSOphir Munk 			nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len));
156759513c3eSOphir Munk 		}
156859513c3eSOphir Munk 	}
156959513c3eSOphir Munk 	*enable = 0;
157059513c3eSOphir Munk 	return ret;
157159513c3eSOphir Munk }
157259513c3eSOphir Munk 
157359513c3eSOphir Munk /**
157459513c3eSOphir Munk  * Get ROCE enable status through Netlink.
157559513c3eSOphir Munk  *
157659513c3eSOphir Munk  * @param[in] nlsk_fd
157759513c3eSOphir Munk  *   Netlink socket file descriptor.
157859513c3eSOphir Munk  * @param[in] family_id
157959513c3eSOphir Munk  *   the Devlink family ID.
158059513c3eSOphir Munk  * @param pci_addr
158159513c3eSOphir Munk  *   The device PCI address.
158259513c3eSOphir Munk  * @param[out] enable
158359513c3eSOphir Munk  *   Where to store the enable status.
158459513c3eSOphir Munk  *
158559513c3eSOphir Munk  * @return
158659513c3eSOphir Munk  *   0 on success and @p enable is updated, a negative errno value otherwise
158759513c3eSOphir Munk  *   and rte_errno is set.
158859513c3eSOphir Munk  */
158959513c3eSOphir Munk int
159059513c3eSOphir Munk mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
159159513c3eSOphir Munk 			int *enable)
159259513c3eSOphir Munk {
159359513c3eSOphir Munk 	struct nlmsghdr *nlh;
159459513c3eSOphir Munk 	struct genlmsghdr *genl;
159559513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
159659513c3eSOphir Munk 	int ret;
159759513c3eSOphir Munk 	int cur_en = 0;
159859513c3eSOphir Munk 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
159959513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
160059513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 4 +
160159513c3eSOphir Munk 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4];
160259513c3eSOphir Munk 
160359513c3eSOphir Munk 	memset(buf, 0, sizeof(buf));
160459513c3eSOphir Munk 	nlh = (struct nlmsghdr *)buf;
160559513c3eSOphir Munk 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
160659513c3eSOphir Munk 	nlh->nlmsg_type = family_id;
160759513c3eSOphir Munk 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
160859513c3eSOphir Munk 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
160959513c3eSOphir Munk 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
161059513c3eSOphir Munk 	genl->cmd = DEVLINK_CMD_PARAM_GET;
161159513c3eSOphir Munk 	genl->version = DEVLINK_GENL_VERSION;
161259513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
161359513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
161459513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
161559513c3eSOphir Munk 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
161659513c3eSOphir Munk 	if (ret >= 0)
161759513c3eSOphir Munk 		ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en);
161859513c3eSOphir Munk 	if (ret < 0) {
161959513c3eSOphir Munk 		DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.",
162059513c3eSOphir Munk 			pci_addr, ret);
162159513c3eSOphir Munk 		return ret;
162259513c3eSOphir Munk 	}
162359513c3eSOphir Munk 	*enable = cur_en;
162459513c3eSOphir Munk 	DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".",
162559513c3eSOphir Munk 		cur_en ? "en" : "dis", pci_addr);
162659513c3eSOphir Munk 	return ret;
162759513c3eSOphir Munk }
162859513c3eSOphir Munk 
162959513c3eSOphir Munk /**
163059513c3eSOphir Munk  * Reload mlx5 device kernel driver through Netlink.
163159513c3eSOphir Munk  *
163259513c3eSOphir Munk  * @param[in] nlsk_fd
163359513c3eSOphir Munk  *   Netlink socket file descriptor.
163459513c3eSOphir Munk  * @param[in] family_id
163559513c3eSOphir Munk  *   the Devlink family ID.
163659513c3eSOphir Munk  * @param pci_addr
163759513c3eSOphir Munk  *   The device PCI address.
163859513c3eSOphir Munk  * @param[out] enable
163959513c3eSOphir Munk  *   The enable status to set.
164059513c3eSOphir Munk  *
164159513c3eSOphir Munk  * @return
164259513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
164359513c3eSOphir Munk  */
164459513c3eSOphir Munk int
164559513c3eSOphir Munk mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr)
164659513c3eSOphir Munk {
164759513c3eSOphir Munk 	struct nlmsghdr *nlh;
164859513c3eSOphir Munk 	struct genlmsghdr *genl;
164959513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
165059513c3eSOphir Munk 	int ret;
165159513c3eSOphir Munk 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
165259513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
165359513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 2 +
165459513c3eSOphir Munk 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2];
165559513c3eSOphir Munk 
165659513c3eSOphir Munk 	memset(buf, 0, sizeof(buf));
165759513c3eSOphir Munk 	nlh = (struct nlmsghdr *)buf;
165859513c3eSOphir Munk 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
165959513c3eSOphir Munk 	nlh->nlmsg_type = family_id;
166059513c3eSOphir Munk 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
166159513c3eSOphir Munk 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
166259513c3eSOphir Munk 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
166359513c3eSOphir Munk 	genl->cmd = DEVLINK_CMD_RELOAD;
166459513c3eSOphir Munk 	genl->version = DEVLINK_GENL_VERSION;
166559513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
166659513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
166759513c3eSOphir Munk 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
166859513c3eSOphir Munk 	if (ret >= 0)
166959513c3eSOphir Munk 		ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
167059513c3eSOphir Munk 	if (ret < 0) {
167159513c3eSOphir Munk 		DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d",
167259513c3eSOphir Munk 			pci_addr, ret);
167359513c3eSOphir Munk 		return ret;
167459513c3eSOphir Munk 	}
167559513c3eSOphir Munk 	DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.",
167659513c3eSOphir Munk 		pci_addr);
167759513c3eSOphir Munk 	return 0;
167859513c3eSOphir Munk }
167959513c3eSOphir Munk 
168059513c3eSOphir Munk /**
168159513c3eSOphir Munk  * Set ROCE enable status through Netlink.
168259513c3eSOphir Munk  *
168359513c3eSOphir Munk  * @param[in] nlsk_fd
168459513c3eSOphir Munk  *   Netlink socket file descriptor.
168559513c3eSOphir Munk  * @param[in] family_id
168659513c3eSOphir Munk  *   the Devlink family ID.
168759513c3eSOphir Munk  * @param pci_addr
168859513c3eSOphir Munk  *   The device PCI address.
168959513c3eSOphir Munk  * @param[out] enable
169059513c3eSOphir Munk  *   The enable status to set.
169159513c3eSOphir Munk  *
169259513c3eSOphir Munk  * @return
169359513c3eSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
169459513c3eSOphir Munk  */
169559513c3eSOphir Munk int
169659513c3eSOphir Munk mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
169759513c3eSOphir Munk 			int enable)
169859513c3eSOphir Munk {
169959513c3eSOphir Munk 	struct nlmsghdr *nlh;
170059513c3eSOphir Munk 	struct genlmsghdr *genl;
170159513c3eSOphir Munk 	uint32_t sn = MLX5_NL_SN_GENERATE;
170259513c3eSOphir Munk 	int ret;
170359513c3eSOphir Munk 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
170459513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
170559513c3eSOphir Munk 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 6 +
170659513c3eSOphir Munk 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6];
170759513c3eSOphir Munk 	uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT;
170859513c3eSOphir Munk 	uint8_t ptype = NLA_FLAG;
170959513c3eSOphir Munk ;
171059513c3eSOphir Munk 
171159513c3eSOphir Munk 	memset(buf, 0, sizeof(buf));
171259513c3eSOphir Munk 	nlh = (struct nlmsghdr *)buf;
171359513c3eSOphir Munk 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
171459513c3eSOphir Munk 	nlh->nlmsg_type = family_id;
171559513c3eSOphir Munk 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
171659513c3eSOphir Munk 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
171759513c3eSOphir Munk 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
171859513c3eSOphir Munk 	genl->cmd = DEVLINK_CMD_PARAM_SET;
171959513c3eSOphir Munk 	genl->version = DEVLINK_GENL_VERSION;
172059513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
172159513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
172259513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
172359513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode));
172459513c3eSOphir Munk 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype));
172559513c3eSOphir Munk 	if (enable)
172659513c3eSOphir Munk 		nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0);
172759513c3eSOphir Munk 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
172859513c3eSOphir Munk 	if (ret >= 0)
172959513c3eSOphir Munk 		ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
173059513c3eSOphir Munk 	if (ret < 0) {
173159513c3eSOphir Munk 		DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:"
173259513c3eSOphir Munk 			" %d.", enable ? "en" : "dis", pci_addr, ret);
173359513c3eSOphir Munk 		return ret;
173459513c3eSOphir Munk 	}
173559513c3eSOphir Munk 	DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.",
173659513c3eSOphir Munk 		pci_addr, enable ? "en" : "dis");
173759513c3eSOphir Munk 	/* Now, need to reload the driver. */
173859513c3eSOphir Munk 	return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);
173959513c3eSOphir Munk }
1740