xref: /dpdk/drivers/common/mlx5/linux/mlx5_nl.c (revision 8f393c4ffdc1ff9b46702708781723ca0f17f5ac)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5 
6 #include <errno.h>
7 #include <linux/if_link.h>
8 #include <linux/rtnetlink.h>
9 #include <linux/genetlink.h>
10 #include <net/if.h>
11 #include <rdma/rdma_netlink.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <stdalign.h>
16 #include <string.h>
17 #include <sys/socket.h>
18 #include <unistd.h>
19 
20 #include <rte_errno.h>
21 
22 #include "mlx5_nl.h"
23 #include "../mlx5_common_log.h"
24 #include "mlx5_malloc.h"
25 #ifdef HAVE_DEVLINK
26 #include <linux/devlink.h>
27 #endif
28 
29 
30 /* Size of the buffer to receive kernel messages */
31 #define MLX5_NL_BUF_SIZE (32 * 1024)
32 /* Send buffer size for the Netlink socket */
33 #define MLX5_SEND_BUF_SIZE 32768
34 /* Receive buffer size for the Netlink socket */
35 #define MLX5_RECV_BUF_SIZE 32768
36 /* Maximal physical port name length. */
37 #define MLX5_PHYS_PORT_NAME_MAX 128
38 
39 /** Parameters of VLAN devices created by driver. */
40 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
41 /*
42  * Define NDA_RTA as defined in iproute2 sources.
43  *
44  * see in iproute2 sources file include/libnetlink.h
45  */
46 #ifndef MLX5_NDA_RTA
47 #define MLX5_NDA_RTA(r) \
48 	((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
49 #endif
50 /*
51  * Define NLMSG_TAIL as defined in iproute2 sources.
52  *
53  * see in iproute2 sources file include/libnetlink.h
54  */
55 #ifndef NLMSG_TAIL
56 #define NLMSG_TAIL(nmsg) \
57 	((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
58 #endif
59 /*
60  * The following definitions are normally found in rdma/rdma_netlink.h,
61  * however they are so recent that most systems do not expose them yet.
62  */
63 #ifndef HAVE_RDMA_NL_NLDEV
64 #define RDMA_NL_NLDEV 5
65 #endif
66 #ifndef HAVE_RDMA_NLDEV_CMD_GET
67 #define RDMA_NLDEV_CMD_GET 1
68 #endif
69 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
70 #define RDMA_NLDEV_CMD_PORT_GET 5
71 #endif
72 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
73 #define RDMA_NLDEV_ATTR_DEV_INDEX 1
74 #endif
75 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
76 #define RDMA_NLDEV_ATTR_DEV_NAME 2
77 #endif
78 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
79 #define RDMA_NLDEV_ATTR_PORT_INDEX 3
80 #endif
81 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
82 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
83 #endif
84 
85 /* These are normally found in linux/if_link.h. */
86 #ifndef HAVE_IFLA_NUM_VF
87 #define IFLA_NUM_VF 21
88 #endif
89 #ifndef HAVE_IFLA_EXT_MASK
90 #define IFLA_EXT_MASK 29
91 #endif
92 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
93 #define IFLA_PHYS_SWITCH_ID 36
94 #endif
95 #ifndef HAVE_IFLA_PHYS_PORT_NAME
96 #define IFLA_PHYS_PORT_NAME 38
97 #endif
98 
99 /*
100  * Some Devlink defines may be missed in old kernel versions,
101  * adjust used defines.
102  */
103 #ifndef DEVLINK_GENL_NAME
104 #define DEVLINK_GENL_NAME "devlink"
105 #endif
106 #ifndef DEVLINK_GENL_VERSION
107 #define DEVLINK_GENL_VERSION 1
108 #endif
109 #ifndef DEVLINK_ATTR_BUS_NAME
110 #define DEVLINK_ATTR_BUS_NAME 1
111 #endif
112 #ifndef DEVLINK_ATTR_DEV_NAME
113 #define DEVLINK_ATTR_DEV_NAME 2
114 #endif
115 #ifndef DEVLINK_ATTR_PARAM
116 #define DEVLINK_ATTR_PARAM 80
117 #endif
118 #ifndef DEVLINK_ATTR_PARAM_NAME
119 #define DEVLINK_ATTR_PARAM_NAME 81
120 #endif
121 #ifndef DEVLINK_ATTR_PARAM_TYPE
122 #define DEVLINK_ATTR_PARAM_TYPE 83
123 #endif
124 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST
125 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84
126 #endif
127 #ifndef DEVLINK_ATTR_PARAM_VALUE
128 #define DEVLINK_ATTR_PARAM_VALUE 85
129 #endif
130 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA
131 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86
132 #endif
133 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE
134 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87
135 #endif
136 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT
137 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1
138 #endif
139 #ifndef DEVLINK_CMD_RELOAD
140 #define DEVLINK_CMD_RELOAD 37
141 #endif
142 #ifndef DEVLINK_CMD_PARAM_GET
143 #define DEVLINK_CMD_PARAM_GET 38
144 #endif
145 #ifndef DEVLINK_CMD_PARAM_SET
146 #define DEVLINK_CMD_PARAM_SET 39
147 #endif
148 #ifndef NLA_FLAG
149 #define NLA_FLAG 6
150 #endif
151 
152 /* Add/remove MAC address through Netlink */
153 struct mlx5_nl_mac_addr {
154 	struct rte_ether_addr (*mac)[];
155 	/**< MAC address handled by the device. */
156 	int mac_n; /**< Number of addresses in the array. */
157 };
158 
159 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
160 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
161 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
162 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
163 
164 /** Data structure used by mlx5_nl_cmdget_cb(). */
165 struct mlx5_nl_ifindex_data {
166 	const char *name; /**< IB device name (in). */
167 	uint32_t flags; /**< found attribute flags (out). */
168 	uint32_t ibindex; /**< IB device index (out). */
169 	uint32_t ifindex; /**< Network interface index (out). */
170 	uint32_t portnum; /**< IB device max port number (out). */
171 };
172 
173 uint32_t atomic_sn;
174 
175 /* Generate Netlink sequence number. */
176 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED)
177 
178 /**
179  * Opens a Netlink socket.
180  *
181  * @param protocol
182  *   Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
183  *
184  * @return
185  *   A file descriptor on success, a negative errno value otherwise and
186  *   rte_errno is set.
187  */
188 int
189 mlx5_nl_init(int protocol)
190 {
191 	int fd;
192 	int buf_size;
193 	socklen_t opt_size;
194 	struct sockaddr_nl local = {
195 		.nl_family = AF_NETLINK,
196 	};
197 	int ret;
198 
199 	fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
200 	if (fd == -1) {
201 		rte_errno = errno;
202 		return -rte_errno;
203 	}
204 	opt_size = sizeof(buf_size);
205 	ret = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf_size, &opt_size);
206 	if (ret == -1) {
207 		rte_errno = errno;
208 		goto error;
209 	}
210 	DRV_LOG(DEBUG, "Netlink socket send buffer: %d", buf_size);
211 	if (buf_size < MLX5_SEND_BUF_SIZE) {
212 		ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
213 				 &buf_size, sizeof(buf_size));
214 		if (ret == -1) {
215 			rte_errno = errno;
216 			goto error;
217 		}
218 	}
219 	opt_size = sizeof(buf_size);
220 	ret = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size);
221 	if (ret == -1) {
222 		rte_errno = errno;
223 		goto error;
224 	}
225 	DRV_LOG(DEBUG, "Netlink socket recv buffer: %d", buf_size);
226 	if (buf_size < MLX5_RECV_BUF_SIZE) {
227 		ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
228 				 &buf_size, sizeof(buf_size));
229 		if (ret == -1) {
230 			rte_errno = errno;
231 			goto error;
232 		}
233 	}
234 	ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
235 	if (ret == -1) {
236 		rte_errno = errno;
237 		goto error;
238 	}
239 	return fd;
240 error:
241 	close(fd);
242 	return -rte_errno;
243 }
244 
245 /**
246  * Send a request message to the kernel on the Netlink socket.
247  *
248  * @param[in] nlsk_fd
249  *   Netlink socket file descriptor.
250  * @param[in] nh
251  *   The Netlink message send to the kernel.
252  * @param[in] ssn
253  *   Sequence number.
254  * @param[in] req
255  *   Pointer to the request structure.
256  * @param[in] len
257  *   Length of the request in bytes.
258  *
259  * @return
260  *   The number of sent bytes on success, a negative errno value otherwise and
261  *   rte_errno is set.
262  */
263 static int
264 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
265 		int len)
266 {
267 	struct sockaddr_nl sa = {
268 		.nl_family = AF_NETLINK,
269 	};
270 	struct iovec iov[2] = {
271 		{ .iov_base = nh, .iov_len = sizeof(*nh), },
272 		{ .iov_base = req, .iov_len = len, },
273 	};
274 	struct msghdr msg = {
275 		.msg_name = &sa,
276 		.msg_namelen = sizeof(sa),
277 		.msg_iov = iov,
278 		.msg_iovlen = 2,
279 	};
280 	int send_bytes;
281 
282 	nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
283 	nh->nlmsg_seq = sn;
284 	send_bytes = sendmsg(nlsk_fd, &msg, 0);
285 	if (send_bytes < 0) {
286 		rte_errno = errno;
287 		return -rte_errno;
288 	}
289 	return send_bytes;
290 }
291 
292 /**
293  * Send a message to the kernel on the Netlink socket.
294  *
295  * @param[in] nlsk_fd
296  *   The Netlink socket file descriptor used for communication.
297  * @param[in] nh
298  *   The Netlink message send to the kernel.
299  * @param[in] sn
300  *   Sequence number.
301  *
302  * @return
303  *   The number of sent bytes on success, a negative errno value otherwise and
304  *   rte_errno is set.
305  */
306 static int
307 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
308 {
309 	struct sockaddr_nl sa = {
310 		.nl_family = AF_NETLINK,
311 	};
312 	struct iovec iov = {
313 		.iov_base = nh,
314 		.iov_len = nh->nlmsg_len,
315 	};
316 	struct msghdr msg = {
317 		.msg_name = &sa,
318 		.msg_namelen = sizeof(sa),
319 		.msg_iov = &iov,
320 		.msg_iovlen = 1,
321 	};
322 	int send_bytes;
323 
324 	nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
325 	nh->nlmsg_seq = sn;
326 	send_bytes = sendmsg(nlsk_fd, &msg, 0);
327 	if (send_bytes < 0) {
328 		rte_errno = errno;
329 		return -rte_errno;
330 	}
331 	return send_bytes;
332 }
333 
334 /**
335  * Receive a message from the kernel on the Netlink socket, following
336  * mlx5_nl_send().
337  *
338  * @param[in] nlsk_fd
339  *   The Netlink socket file descriptor used for communication.
340  * @param[in] sn
341  *   Sequence number.
342  * @param[in] cb
343  *   The callback function to call for each Netlink message received.
344  * @param[in, out] arg
345  *   Custom arguments for the callback.
346  *
347  * @return
348  *   0 on success, a negative errno value otherwise and rte_errno is set.
349  */
350 static int
351 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
352 	     void *arg)
353 {
354 	struct sockaddr_nl sa;
355 	struct iovec iov;
356 	struct msghdr msg = {
357 		.msg_name = &sa,
358 		.msg_namelen = sizeof(sa),
359 		.msg_iov = &iov,
360 		/* One message at a time */
361 		.msg_iovlen = 1,
362 	};
363 	void *buf = NULL;
364 	int multipart = 0;
365 	int ret = 0;
366 
367 	do {
368 		struct nlmsghdr *nh;
369 		int recv_bytes;
370 
371 		do {
372 			/* Query length of incoming message. */
373 			iov.iov_base = NULL;
374 			iov.iov_len = 0;
375 			recv_bytes = recvmsg(nlsk_fd, &msg,
376 					     MSG_PEEK | MSG_TRUNC);
377 			if (recv_bytes < 0) {
378 				rte_errno = errno;
379 				ret = -rte_errno;
380 				goto exit;
381 			}
382 			if (recv_bytes == 0) {
383 				rte_errno = ENODATA;
384 				ret = -rte_errno;
385 				goto exit;
386 			}
387 			/* Allocate buffer to fetch the message. */
388 			if (recv_bytes < MLX5_RECV_BUF_SIZE)
389 				recv_bytes = MLX5_RECV_BUF_SIZE;
390 			mlx5_free(buf);
391 			buf = mlx5_malloc(0, recv_bytes, 0, SOCKET_ID_ANY);
392 			if (!buf) {
393 				rte_errno = ENOMEM;
394 				ret = -rte_errno;
395 				goto exit;
396 			}
397 			/* Fetch the message. */
398 			iov.iov_base = buf;
399 			iov.iov_len = recv_bytes;
400 			recv_bytes = recvmsg(nlsk_fd, &msg, 0);
401 			if (recv_bytes == -1) {
402 				rte_errno = errno;
403 				ret = -rte_errno;
404 				goto exit;
405 			}
406 			nh = (struct nlmsghdr *)buf;
407 		} while (nh->nlmsg_seq != sn);
408 		for (;
409 		     NLMSG_OK(nh, (unsigned int)recv_bytes);
410 		     nh = NLMSG_NEXT(nh, recv_bytes)) {
411 			if (nh->nlmsg_type == NLMSG_ERROR) {
412 				struct nlmsgerr *err_data = NLMSG_DATA(nh);
413 
414 				if (err_data->error < 0) {
415 					rte_errno = -err_data->error;
416 					ret = -rte_errno;
417 					goto exit;
418 				}
419 				/* Ack message. */
420 				ret = 0;
421 				goto exit;
422 			}
423 			/* Multi-part msgs and their trailing DONE message. */
424 			if (nh->nlmsg_flags & NLM_F_MULTI) {
425 				if (nh->nlmsg_type == NLMSG_DONE) {
426 					ret =  0;
427 					goto exit;
428 				}
429 				multipart = 1;
430 			}
431 			if (cb) {
432 				ret = cb(nh, arg);
433 				if (ret < 0)
434 					goto exit;
435 			}
436 		}
437 	} while (multipart);
438 exit:
439 	mlx5_free(buf);
440 	return ret;
441 }
442 
443 /**
444  * Parse Netlink message to retrieve the bridge MAC address.
445  *
446  * @param nh
447  *   Pointer to Netlink Message Header.
448  * @param arg
449  *   PMD data register with this callback.
450  *
451  * @return
452  *   0 on success, a negative errno value otherwise and rte_errno is set.
453  */
454 static int
455 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
456 {
457 	struct mlx5_nl_mac_addr *data = arg;
458 	struct ndmsg *r = NLMSG_DATA(nh);
459 	struct rtattr *attribute;
460 	int len;
461 
462 	len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
463 	for (attribute = MLX5_NDA_RTA(r);
464 	     RTA_OK(attribute, len);
465 	     attribute = RTA_NEXT(attribute, len)) {
466 		if (attribute->rta_type == NDA_LLADDR) {
467 			if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
468 				DRV_LOG(WARNING,
469 					"not enough room to finalize the"
470 					" request");
471 				rte_errno = ENOMEM;
472 				return -rte_errno;
473 			}
474 #ifdef RTE_LIBRTE_MLX5_DEBUG
475 			char m[RTE_ETHER_ADDR_FMT_SIZE];
476 
477 			rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE,
478 					      RTA_DATA(attribute));
479 			DRV_LOG(DEBUG, "bridge MAC address %s", m);
480 #endif
481 			memcpy(&(*data->mac)[data->mac_n++],
482 			       RTA_DATA(attribute), RTE_ETHER_ADDR_LEN);
483 		}
484 	}
485 	return 0;
486 }
487 
488 /**
489  * Get bridge MAC addresses.
490  *
491  * @param[in] nlsk_fd
492  *   Netlink socket file descriptor.
493  * @param[in] iface_idx
494  *   Net device interface index.
495  * @param mac[out]
496  *   Pointer to the array table of MAC addresses to fill.
497  *   Its size should be of MLX5_MAX_MAC_ADDRESSES.
498  * @param mac_n[out]
499  *   Number of entries filled in MAC array.
500  *
501  * @return
502  *   0 on success, a negative errno value otherwise and rte_errno is set.
503  */
504 static int
505 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx,
506 		      struct rte_ether_addr (*mac)[], int *mac_n)
507 {
508 	struct {
509 		struct nlmsghdr	hdr;
510 		struct ifinfomsg ifm;
511 	} req = {
512 		.hdr = {
513 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
514 			.nlmsg_type = RTM_GETNEIGH,
515 			.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
516 		},
517 		.ifm = {
518 			.ifi_family = PF_BRIDGE,
519 			.ifi_index = iface_idx,
520 		},
521 	};
522 	struct mlx5_nl_mac_addr data = {
523 		.mac = mac,
524 		.mac_n = 0,
525 	};
526 	uint32_t sn = MLX5_NL_SN_GENERATE;
527 	int ret;
528 
529 	if (nlsk_fd == -1)
530 		return 0;
531 	ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm,
532 			      sizeof(struct ifinfomsg));
533 	if (ret < 0)
534 		goto error;
535 	ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data);
536 	if (ret < 0)
537 		goto error;
538 	*mac_n = data.mac_n;
539 	return 0;
540 error:
541 	DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s",
542 		iface_idx, strerror(rte_errno));
543 	return -rte_errno;
544 }
545 
546 /**
547  * Modify the MAC address neighbour table with Netlink.
548  *
549  * @param[in] nlsk_fd
550  *   Netlink socket file descriptor.
551  * @param[in] iface_idx
552  *   Net device interface index.
553  * @param mac
554  *   MAC address to consider.
555  * @param add
556  *   1 to add the MAC address, 0 to remove the MAC address.
557  *
558  * @return
559  *   0 on success, a negative errno value otherwise and rte_errno is set.
560  */
561 static int
562 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
563 			struct rte_ether_addr *mac, int add)
564 {
565 	struct {
566 		struct nlmsghdr hdr;
567 		struct ndmsg ndm;
568 		struct rtattr rta;
569 		uint8_t buffer[RTE_ETHER_ADDR_LEN];
570 	} req = {
571 		.hdr = {
572 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
573 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
574 				NLM_F_EXCL | NLM_F_ACK,
575 			.nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
576 		},
577 		.ndm = {
578 			.ndm_family = PF_BRIDGE,
579 			.ndm_state = NUD_NOARP | NUD_PERMANENT,
580 			.ndm_ifindex = iface_idx,
581 			.ndm_flags = NTF_SELF,
582 		},
583 		.rta = {
584 			.rta_type = NDA_LLADDR,
585 			.rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN),
586 		},
587 	};
588 	uint32_t sn = MLX5_NL_SN_GENERATE;
589 	int ret;
590 
591 	if (nlsk_fd == -1)
592 		return 0;
593 	memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN);
594 	req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
595 		RTA_ALIGN(req.rta.rta_len);
596 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
597 	if (ret < 0)
598 		goto error;
599 	ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
600 	if (ret < 0)
601 		goto error;
602 	return 0;
603 error:
604 #ifdef RTE_LIBRTE_MLX5_DEBUG
605 	{
606 		char m[RTE_ETHER_ADDR_FMT_SIZE];
607 
608 		rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac);
609 		DRV_LOG(DEBUG,
610 			"Interface %u cannot %s MAC address %s %s",
611 			iface_idx,
612 			add ? "add" : "remove", m, strerror(rte_errno));
613 	}
614 #endif
615 	return -rte_errno;
616 }
617 
618 /**
619  * Modify the VF MAC address neighbour table with Netlink.
620  *
621  * @param[in] nlsk_fd
622  *   Netlink socket file descriptor.
623  * @param[in] iface_idx
624  *   Net device interface index.
625  * @param mac
626  *    MAC address to consider.
627  * @param vf_index
628  *    VF index.
629  *
630  * @return
631  *    0 on success, a negative errno value otherwise and rte_errno is set.
632  */
633 int
634 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
635 			   struct rte_ether_addr *mac, int vf_index)
636 {
637 	int ret;
638 	struct {
639 		struct nlmsghdr hdr;
640 		struct ifinfomsg ifm;
641 		struct rtattr vf_list_rta;
642 		struct rtattr vf_info_rta;
643 		struct rtattr vf_mac_rta;
644 		struct ifla_vf_mac ivm;
645 	} req = {
646 		.hdr = {
647 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
648 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
649 			.nlmsg_type = RTM_BASE,
650 		},
651 		.ifm = {
652 			.ifi_index = iface_idx,
653 		},
654 		.vf_list_rta = {
655 			.rta_type = IFLA_VFINFO_LIST,
656 			.rta_len = RTA_ALIGN(RTA_LENGTH(0)),
657 		},
658 		.vf_info_rta = {
659 			.rta_type = IFLA_VF_INFO,
660 			.rta_len = RTA_ALIGN(RTA_LENGTH(0)),
661 		},
662 		.vf_mac_rta = {
663 			.rta_type = IFLA_VF_MAC,
664 		},
665 	};
666 	struct ifla_vf_mac ivm = {
667 		.vf = vf_index,
668 	};
669 	uint32_t sn = MLX5_NL_SN_GENERATE;
670 
671 	memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN);
672 	memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm));
673 
674 	req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm));
675 	req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
676 		RTA_ALIGN(req.vf_list_rta.rta_len) +
677 		RTA_ALIGN(req.vf_info_rta.rta_len) +
678 		RTA_ALIGN(req.vf_mac_rta.rta_len);
679 	req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
680 					       &req.vf_list_rta);
681 	req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
682 					       &req.vf_info_rta);
683 
684 	if (nlsk_fd < 0)
685 		return -1;
686 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
687 	if (ret < 0)
688 		goto error;
689 	ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
690 	if (ret < 0)
691 		goto error;
692 	return 0;
693 error:
694 	DRV_LOG(ERR,
695 		"representor %u cannot set VF MAC address "
696 		"%02X:%02X:%02X:%02X:%02X:%02X : %s",
697 		vf_index,
698 		mac->addr_bytes[0], mac->addr_bytes[1],
699 		mac->addr_bytes[2], mac->addr_bytes[3],
700 		mac->addr_bytes[4], mac->addr_bytes[5],
701 		strerror(rte_errno));
702 	return -rte_errno;
703 }
704 
705 /**
706  * Add a MAC address.
707  *
708  * @param[in] nlsk_fd
709  *   Netlink socket file descriptor.
710  * @param[in] iface_idx
711  *   Net device interface index.
712  * @param mac_own
713  *   BITFIELD_DECLARE array to store the mac.
714  * @param mac
715  *   MAC address to register.
716  * @param index
717  *   MAC address index.
718  *
719  * @return
720  *   0 on success, a negative errno value otherwise and rte_errno is set.
721  */
722 int
723 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx,
724 		     uint64_t *mac_own, struct rte_ether_addr *mac,
725 		     uint32_t index)
726 {
727 	int ret;
728 
729 	ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1);
730 	if (!ret) {
731 		MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
732 		if (index >= MLX5_MAX_MAC_ADDRESSES)
733 			return -EINVAL;
734 
735 		BITFIELD_SET(mac_own, index);
736 	}
737 	if (ret == -EEXIST)
738 		return 0;
739 	return ret;
740 }
741 
742 /**
743  * Remove a MAC address.
744  *
745  * @param[in] nlsk_fd
746  *   Netlink socket file descriptor.
747  * @param[in] iface_idx
748  *   Net device interface index.
749  * @param mac_own
750  *   BITFIELD_DECLARE array to store the mac.
751  * @param mac
752  *   MAC address to remove.
753  * @param index
754  *   MAC address index.
755  *
756  * @return
757  *   0 on success, a negative errno value otherwise and rte_errno is set.
758  */
759 int
760 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
761 			struct rte_ether_addr *mac, uint32_t index)
762 {
763 	MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
764 	if (index >= MLX5_MAX_MAC_ADDRESSES)
765 		return -EINVAL;
766 
767 	BITFIELD_RESET(mac_own, index);
768 	return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0);
769 }
770 
771 /**
772  * Synchronize Netlink bridge table to the internal table.
773  *
774  * @param[in] nlsk_fd
775  *   Netlink socket file descriptor.
776  * @param[in] iface_idx
777  *   Net device interface index.
778  * @param mac_addrs
779  *   Mac addresses array to sync.
780  * @param n
781  *   @p mac_addrs array size.
782  */
783 void
784 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx,
785 		      struct rte_ether_addr *mac_addrs, int n)
786 {
787 	struct rte_ether_addr macs[n];
788 	int macs_n = 0;
789 	int i;
790 	int ret;
791 
792 	memset(macs, 0, n * sizeof(macs[0]));
793 	ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n);
794 	if (ret)
795 		return;
796 	for (i = 0; i != macs_n; ++i) {
797 		int j;
798 
799 		/* Verify the address is not in the array yet. */
800 		for (j = 0; j != n; ++j)
801 			if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j]))
802 				break;
803 		if (j != n)
804 			continue;
805 		if (rte_is_multicast_ether_addr(&macs[i])) {
806 			/* Find the first entry available. */
807 			for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) {
808 				if (rte_is_zero_ether_addr(&mac_addrs[j])) {
809 					mac_addrs[j] = macs[i];
810 					break;
811 				}
812 			}
813 		} else {
814 			/* Find the first entry available. */
815 			for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) {
816 				if (rte_is_zero_ether_addr(&mac_addrs[j])) {
817 					mac_addrs[j] = macs[i];
818 					break;
819 				}
820 			}
821 		}
822 	}
823 }
824 
825 /**
826  * Flush all added MAC addresses.
827  *
828  * @param[in] nlsk_fd
829  *   Netlink socket file descriptor.
830  * @param[in] iface_idx
831  *   Net device interface index.
832  * @param[in] mac_addrs
833  *   Mac addresses array to flush.
834  * @param n
835  *   @p mac_addrs array size.
836  * @param mac_own
837  *   BITFIELD_DECLARE array to store the mac.
838  */
839 void
840 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx,
841 		       struct rte_ether_addr *mac_addrs, int n,
842 		       uint64_t *mac_own)
843 {
844 	int i;
845 
846 	if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES)
847 		return;
848 
849 	for (i = n - 1; i >= 0; --i) {
850 		struct rte_ether_addr *m = &mac_addrs[i];
851 
852 		if (BITFIELD_ISSET(mac_own, i))
853 			mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m,
854 						i);
855 	}
856 }
857 
858 /**
859  * Enable promiscuous / all multicast mode through Netlink.
860  *
861  * @param[in] nlsk_fd
862  *   Netlink socket file descriptor.
863  * @param[in] iface_idx
864  *   Net device interface index.
865  * @param flags
866  *   IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
867  * @param enable
868  *   Nonzero to enable, disable otherwise.
869  *
870  * @return
871  *   0 on success, a negative errno value otherwise and rte_errno is set.
872  */
873 static int
874 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags,
875 		     int enable)
876 {
877 	struct {
878 		struct nlmsghdr hdr;
879 		struct ifinfomsg ifi;
880 	} req = {
881 		.hdr = {
882 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
883 			.nlmsg_type = RTM_NEWLINK,
884 			.nlmsg_flags = NLM_F_REQUEST,
885 		},
886 		.ifi = {
887 			.ifi_flags = enable ? flags : 0,
888 			.ifi_change = flags,
889 			.ifi_index = iface_idx,
890 		},
891 	};
892 	uint32_t sn = MLX5_NL_SN_GENERATE;
893 	int ret;
894 
895 	MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
896 	if (nlsk_fd < 0)
897 		return 0;
898 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
899 	if (ret < 0)
900 		return ret;
901 	return 0;
902 }
903 
904 /**
905  * Enable promiscuous mode through Netlink.
906  *
907  * @param[in] nlsk_fd
908  *   Netlink socket file descriptor.
909  * @param[in] iface_idx
910  *   Net device interface index.
911  * @param enable
912  *   Nonzero to enable, disable otherwise.
913  *
914  * @return
915  *   0 on success, a negative errno value otherwise and rte_errno is set.
916  */
917 int
918 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable)
919 {
920 	int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable);
921 
922 	if (ret)
923 		DRV_LOG(DEBUG,
924 			"Interface %u cannot %s promisc mode: Netlink error %s",
925 			iface_idx, enable ? "enable" : "disable",
926 			strerror(rte_errno));
927 	return ret;
928 }
929 
930 /**
931  * Enable all multicast mode through Netlink.
932  *
933  * @param[in] nlsk_fd
934  *   Netlink socket file descriptor.
935  * @param[in] iface_idx
936  *   Net device interface index.
937  * @param enable
938  *   Nonzero to enable, disable otherwise.
939  *
940  * @return
941  *   0 on success, a negative errno value otherwise and rte_errno is set.
942  */
943 int
944 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
945 {
946 	int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI,
947 				       enable);
948 
949 	if (ret)
950 		DRV_LOG(DEBUG,
951 			"Interface %u cannot %s allmulti : Netlink error %s",
952 			iface_idx, enable ? "enable" : "disable",
953 			strerror(rte_errno));
954 	return ret;
955 }
956 
957 /**
958  * Process network interface information from Netlink message.
959  *
960  * @param nh
961  *   Pointer to Netlink message header.
962  * @param arg
963  *   Opaque data pointer for this callback.
964  *
965  * @return
966  *   0 on success, a negative errno value otherwise and rte_errno is set.
967  */
968 static int
969 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
970 {
971 	struct mlx5_nl_ifindex_data *data = arg;
972 	struct mlx5_nl_ifindex_data local = {
973 		.flags = 0,
974 	};
975 	size_t off = NLMSG_HDRLEN;
976 
977 	if (nh->nlmsg_type !=
978 	    RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
979 	    nh->nlmsg_type !=
980 	    RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
981 		goto error;
982 	while (off < nh->nlmsg_len) {
983 		struct nlattr *na = (void *)((uintptr_t)nh + off);
984 		void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
985 
986 		if (na->nla_len > nh->nlmsg_len - off)
987 			goto error;
988 		switch (na->nla_type) {
989 		case RDMA_NLDEV_ATTR_DEV_INDEX:
990 			local.ibindex = *(uint32_t *)payload;
991 			local.flags |= MLX5_NL_CMD_GET_IB_INDEX;
992 			break;
993 		case RDMA_NLDEV_ATTR_DEV_NAME:
994 			if (!strcmp(payload, data->name))
995 				local.flags |= MLX5_NL_CMD_GET_IB_NAME;
996 			break;
997 		case RDMA_NLDEV_ATTR_NDEV_INDEX:
998 			local.ifindex = *(uint32_t *)payload;
999 			local.flags |= MLX5_NL_CMD_GET_NET_INDEX;
1000 			break;
1001 		case RDMA_NLDEV_ATTR_PORT_INDEX:
1002 			local.portnum = *(uint32_t *)payload;
1003 			local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
1004 			break;
1005 		default:
1006 			break;
1007 		}
1008 		off += NLA_ALIGN(na->nla_len);
1009 	}
1010 	/*
1011 	 * It is possible to have multiple messages for all
1012 	 * Infiniband devices in the system with appropriate name.
1013 	 * So we should gather parameters locally and copy to
1014 	 * query context only in case of coinciding device name.
1015 	 */
1016 	if (local.flags & MLX5_NL_CMD_GET_IB_NAME) {
1017 		data->flags = local.flags;
1018 		data->ibindex = local.ibindex;
1019 		data->ifindex = local.ifindex;
1020 		data->portnum = local.portnum;
1021 	}
1022 	return 0;
1023 error:
1024 	rte_errno = EINVAL;
1025 	return -rte_errno;
1026 }
1027 
1028 /**
1029  * Get index of network interface associated with some IB device.
1030  *
1031  * This is the only somewhat safe method to avoid resorting to heuristics
1032  * when faced with port representors. Unfortunately it requires at least
1033  * Linux 4.17.
1034  *
1035  * @param nl
1036  *   Netlink socket of the RDMA kind (NETLINK_RDMA).
1037  * @param[in] name
1038  *   IB device name.
1039  * @param[in] pindex
1040  *   IB device port index, starting from 1
1041  * @return
1042  *   A valid (nonzero) interface index on success, 0 otherwise and rte_errno
1043  *   is set.
1044  */
1045 unsigned int
1046 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
1047 {
1048 	struct mlx5_nl_ifindex_data data = {
1049 		.name = name,
1050 		.flags = 0,
1051 		.ibindex = 0, /* Determined during first pass. */
1052 		.ifindex = 0, /* Determined during second pass. */
1053 	};
1054 	union {
1055 		struct nlmsghdr nh;
1056 		uint8_t buf[NLMSG_HDRLEN +
1057 			    NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
1058 			    NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
1059 	} req = {
1060 		.nh = {
1061 			.nlmsg_len = NLMSG_LENGTH(0),
1062 			.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1063 						       RDMA_NLDEV_CMD_GET),
1064 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1065 		},
1066 	};
1067 	struct nlattr *na;
1068 	uint32_t sn = MLX5_NL_SN_GENERATE;
1069 	int ret;
1070 
1071 	ret = mlx5_nl_send(nl, &req.nh, sn);
1072 	if (ret < 0)
1073 		return 0;
1074 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1075 	if (ret < 0)
1076 		return 0;
1077 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1078 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
1079 		goto error;
1080 	data.flags = 0;
1081 	sn = MLX5_NL_SN_GENERATE;
1082 	req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1083 					     RDMA_NLDEV_CMD_PORT_GET);
1084 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1085 	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
1086 	na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
1087 	na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
1088 	na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
1089 	memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1090 	       &data.ibindex, sizeof(data.ibindex));
1091 	na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
1092 	na->nla_len = NLA_HDRLEN + sizeof(pindex);
1093 	na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
1094 	memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1095 	       &pindex, sizeof(pindex));
1096 	ret = mlx5_nl_send(nl, &req.nh, sn);
1097 	if (ret < 0)
1098 		return 0;
1099 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1100 	if (ret < 0)
1101 		return 0;
1102 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1103 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1104 	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) ||
1105 	    !data.ifindex)
1106 		goto error;
1107 	return data.ifindex;
1108 error:
1109 	rte_errno = ENODEV;
1110 	return 0;
1111 }
1112 
1113 /**
1114  * Get the number of physical ports of given IB device.
1115  *
1116  * @param nl
1117  *   Netlink socket of the RDMA kind (NETLINK_RDMA).
1118  * @param[in] name
1119  *   IB device name.
1120  *
1121  * @return
1122  *   A valid (nonzero) number of ports on success, 0 otherwise
1123  *   and rte_errno is set.
1124  */
1125 unsigned int
1126 mlx5_nl_portnum(int nl, const char *name)
1127 {
1128 	struct mlx5_nl_ifindex_data data = {
1129 		.flags = 0,
1130 		.name = name,
1131 		.ifindex = 0,
1132 		.portnum = 0,
1133 	};
1134 	struct nlmsghdr req = {
1135 		.nlmsg_len = NLMSG_LENGTH(0),
1136 		.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1137 					       RDMA_NLDEV_CMD_GET),
1138 		.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1139 	};
1140 	uint32_t sn = MLX5_NL_SN_GENERATE;
1141 	int ret;
1142 
1143 	ret = mlx5_nl_send(nl, &req, sn);
1144 	if (ret < 0)
1145 		return 0;
1146 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1147 	if (ret < 0)
1148 		return 0;
1149 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1150 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1151 	    !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) {
1152 		rte_errno = ENODEV;
1153 		return 0;
1154 	}
1155 	if (!data.portnum)
1156 		rte_errno = EINVAL;
1157 	return data.portnum;
1158 }
1159 
1160 /**
1161  * Analyze gathered port parameters via Netlink to recognize master
1162  * and representor devices for E-Switch configuration.
1163  *
1164  * @param[in] num_vf_set
1165  *   flag of presence of number of VFs port attribute.
1166  * @param[inout] switch_info
1167  *   Port information, including port name as a number and port name
1168  *   type if recognized
1169  *
1170  * @return
1171  *   master and representor flags are set in switch_info according to
1172  *   recognized parameters (if any).
1173  */
1174 static void
1175 mlx5_nl_check_switch_info(bool num_vf_set,
1176 			  struct mlx5_switch_info *switch_info)
1177 {
1178 	switch (switch_info->name_type) {
1179 	case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1180 		/*
1181 		 * Name is not recognized, assume the master,
1182 		 * check the number of VFs key presence.
1183 		 */
1184 		switch_info->master = num_vf_set;
1185 		break;
1186 	case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1187 		/*
1188 		 * Name is not set, this assumes the legacy naming
1189 		 * schema for master, just check if there is a
1190 		 * number of VFs key.
1191 		 */
1192 		switch_info->master = num_vf_set;
1193 		break;
1194 	case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1195 		/* New uplink naming schema recognized. */
1196 		switch_info->master = 1;
1197 		break;
1198 	case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1199 		/* Legacy representors naming schema. */
1200 		switch_info->representor = !num_vf_set;
1201 		break;
1202 	case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
1203 		/* Fallthrough */
1204 	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1205 		/* Fallthrough */
1206 	case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
1207 		/* New representors naming schema. */
1208 		switch_info->representor = 1;
1209 		break;
1210 	}
1211 }
1212 
1213 /**
1214  * Process switch information from Netlink message.
1215  *
1216  * @param nh
1217  *   Pointer to Netlink message header.
1218  * @param arg
1219  *   Opaque data pointer for this callback.
1220  *
1221  * @return
1222  *   0 on success, a negative errno value otherwise and rte_errno is set.
1223  */
1224 static int
1225 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
1226 {
1227 	struct mlx5_switch_info info = {
1228 		.master = 0,
1229 		.representor = 0,
1230 		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1231 		.port_name = 0,
1232 		.switch_id = 0,
1233 	};
1234 	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1235 	bool switch_id_set = false;
1236 	bool num_vf_set = false;
1237 	int len;
1238 
1239 	if (nh->nlmsg_type != RTM_NEWLINK)
1240 		goto error;
1241 	while (off < nh->nlmsg_len) {
1242 		struct rtattr *ra = (void *)((uintptr_t)nh + off);
1243 		void *payload = RTA_DATA(ra);
1244 		unsigned int i;
1245 
1246 		if (ra->rta_len > nh->nlmsg_len - off)
1247 			goto error;
1248 		switch (ra->rta_type) {
1249 		case IFLA_NUM_VF:
1250 			num_vf_set = true;
1251 			break;
1252 		case IFLA_PHYS_PORT_NAME:
1253 			len = RTA_PAYLOAD(ra);
1254 			/* Some kernels do not pad attributes with zero. */
1255 			if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) {
1256 				char name[MLX5_PHYS_PORT_NAME_MAX];
1257 
1258 				/*
1259 				 * We can't just patch the message with padding
1260 				 * zero - it might corrupt the following items
1261 				 * in the message, we have to copy the string
1262 				 * by attribute length and pad the copied one.
1263 				 */
1264 				memcpy(name, payload, len);
1265 				name[len] = 0;
1266 				mlx5_translate_port_name(name, &info);
1267 			} else {
1268 				info.name_type =
1269 					MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
1270 			}
1271 			break;
1272 		case IFLA_PHYS_SWITCH_ID:
1273 			info.switch_id = 0;
1274 			for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
1275 				info.switch_id <<= 8;
1276 				info.switch_id |= ((uint8_t *)payload)[i];
1277 			}
1278 			switch_id_set = true;
1279 			break;
1280 		}
1281 		off += RTA_ALIGN(ra->rta_len);
1282 	}
1283 	if (switch_id_set) {
1284 		/* We have some E-Switch configuration. */
1285 		mlx5_nl_check_switch_info(num_vf_set, &info);
1286 	}
1287 	MLX5_ASSERT(!(info.master && info.representor));
1288 	memcpy(arg, &info, sizeof(info));
1289 	return 0;
1290 error:
1291 	rte_errno = EINVAL;
1292 	return -rte_errno;
1293 }
1294 
1295 /**
1296  * Get switch information associated with network interface.
1297  *
1298  * @param nl
1299  *   Netlink socket of the ROUTE kind (NETLINK_ROUTE).
1300  * @param ifindex
1301  *   Network interface index.
1302  * @param[out] info
1303  *   Switch information object, populated in case of success.
1304  *
1305  * @return
1306  *   0 on success, a negative errno value otherwise and rte_errno is set.
1307  */
1308 int
1309 mlx5_nl_switch_info(int nl, unsigned int ifindex,
1310 		    struct mlx5_switch_info *info)
1311 {
1312 	struct {
1313 		struct nlmsghdr nh;
1314 		struct ifinfomsg info;
1315 		struct rtattr rta;
1316 		uint32_t extmask;
1317 	} req = {
1318 		.nh = {
1319 			.nlmsg_len = NLMSG_LENGTH
1320 					(sizeof(req.info) +
1321 					 RTA_LENGTH(sizeof(uint32_t))),
1322 			.nlmsg_type = RTM_GETLINK,
1323 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1324 		},
1325 		.info = {
1326 			.ifi_family = AF_UNSPEC,
1327 			.ifi_index = ifindex,
1328 		},
1329 		.rta = {
1330 			.rta_type = IFLA_EXT_MASK,
1331 			.rta_len = RTA_LENGTH(sizeof(int32_t)),
1332 		},
1333 		.extmask = RTE_LE32(1),
1334 	};
1335 	uint32_t sn = MLX5_NL_SN_GENERATE;
1336 	int ret;
1337 
1338 	ret = mlx5_nl_send(nl, &req.nh, sn);
1339 	if (ret >= 0)
1340 		ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
1341 	if (info->master && info->representor) {
1342 		DRV_LOG(ERR, "ifindex %u device is recognized as master"
1343 			     " and as representor", ifindex);
1344 		rte_errno = ENODEV;
1345 		ret = -rte_errno;
1346 	}
1347 	return ret;
1348 }
1349 
1350 /*
1351  * Delete VLAN network device by ifindex.
1352  *
1353  * @param[in] tcf
1354  *   Context object initialized by mlx5_nl_vlan_vmwa_init().
1355  * @param[in] ifindex
1356  *   Interface index of network device to delete.
1357  */
1358 void
1359 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa,
1360 		      uint32_t ifindex)
1361 {
1362 	uint32_t sn = MLX5_NL_SN_GENERATE;
1363 	int ret;
1364 	struct {
1365 		struct nlmsghdr nh;
1366 		struct ifinfomsg info;
1367 	} req = {
1368 		.nh = {
1369 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
1370 			.nlmsg_type = RTM_DELLINK,
1371 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1372 		},
1373 		.info = {
1374 			.ifi_family = AF_UNSPEC,
1375 			.ifi_index = ifindex,
1376 		},
1377 	};
1378 
1379 	if (ifindex) {
1380 		ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn);
1381 		if (ret >= 0)
1382 			ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1383 		if (ret < 0)
1384 			DRV_LOG(WARNING, "netlink: error deleting VLAN WA"
1385 				" ifindex %u, %d", ifindex, ret);
1386 	}
1387 }
1388 
1389 /* Set of subroutines to build Netlink message. */
1390 static struct nlattr *
1391 nl_msg_tail(struct nlmsghdr *nlh)
1392 {
1393 	return (struct nlattr *)
1394 		(((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
1395 }
1396 
1397 static void
1398 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
1399 {
1400 	struct nlattr *nla = nl_msg_tail(nlh);
1401 
1402 	nla->nla_type = type;
1403 	nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen;
1404 	nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len);
1405 
1406 	if (alen)
1407 		memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
1408 }
1409 
1410 static struct nlattr *
1411 nl_attr_nest_start(struct nlmsghdr *nlh, int type)
1412 {
1413 	struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
1414 
1415 	nl_attr_put(nlh, type, NULL, 0);
1416 	return nest;
1417 }
1418 
1419 static void
1420 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
1421 {
1422 	nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
1423 }
1424 
1425 /*
1426  * Create network VLAN device with specified VLAN tag.
1427  *
1428  * @param[in] tcf
1429  *   Context object initialized by mlx5_nl_vlan_vmwa_init().
1430  * @param[in] ifindex
1431  *   Base network interface index.
1432  * @param[in] tag
1433  *   VLAN tag for VLAN network device to create.
1434  */
1435 uint32_t
1436 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa,
1437 			 uint32_t ifindex, uint16_t tag)
1438 {
1439 	struct nlmsghdr *nlh;
1440 	struct ifinfomsg *ifm;
1441 	char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
1442 
1443 	__rte_cache_aligned
1444 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1445 		    NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
1446 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
1447 		    NLMSG_ALIGN(sizeof(uint32_t)) +
1448 		    NLMSG_ALIGN(sizeof(name)) +
1449 		    NLMSG_ALIGN(sizeof("vlan")) +
1450 		    NLMSG_ALIGN(sizeof(uint32_t)) +
1451 		    NLMSG_ALIGN(sizeof(uint16_t)) + 16];
1452 	struct nlattr *na_info;
1453 	struct nlattr *na_vlan;
1454 	uint32_t sn = MLX5_NL_SN_GENERATE;
1455 	int ret;
1456 
1457 	memset(buf, 0, sizeof(buf));
1458 	nlh = (struct nlmsghdr *)buf;
1459 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1460 	nlh->nlmsg_type = RTM_NEWLINK;
1461 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
1462 			   NLM_F_EXCL | NLM_F_ACK;
1463 	ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
1464 	nlh->nlmsg_len += sizeof(struct ifinfomsg);
1465 	ifm->ifi_family = AF_UNSPEC;
1466 	ifm->ifi_type = 0;
1467 	ifm->ifi_index = 0;
1468 	ifm->ifi_flags = IFF_UP;
1469 	ifm->ifi_change = 0xffffffff;
1470 	nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
1471 	ret = snprintf(name, sizeof(name), "%s.%u.%u",
1472 		       MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
1473 	nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
1474 	na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
1475 	nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
1476 	na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
1477 	nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
1478 	nl_attr_nest_end(nlh, na_vlan);
1479 	nl_attr_nest_end(nlh, na_info);
1480 	MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len);
1481 	ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn);
1482 	if (ret >= 0)
1483 		ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1484 	if (ret < 0) {
1485 		DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name,
1486 			ret);
1487 	}
1488 	/* Try to get ifindex of created or pre-existing device. */
1489 	ret = if_nametoindex(name);
1490 	if (!ret) {
1491 		DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name,
1492 			errno);
1493 		return 0;
1494 	}
1495 	return ret;
1496 }
1497 
1498 /**
1499  * Parse Netlink message to retrieve the general family ID.
1500  *
1501  * @param nh
1502  *   Pointer to Netlink Message Header.
1503  * @param arg
1504  *   PMD data register with this callback.
1505  *
1506  * @return
1507  *   0 on success, a negative errno value otherwise and rte_errno is set.
1508  */
1509 static int
1510 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg)
1511 {
1512 
1513 	struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1514 	struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1515 					NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1516 
1517 	for (; nla->nla_len && nla < tail;
1518 	     nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) {
1519 		if (nla->nla_type == CTRL_ATTR_FAMILY_ID) {
1520 			*(uint16_t *)arg = *(uint16_t *)(nla + 1);
1521 			return 0;
1522 		}
1523 	}
1524 	return -EINVAL;
1525 }
1526 
1527 #define MLX5_NL_MAX_ATTR_SIZE 100
1528 /**
1529  * Get generic netlink family ID.
1530  *
1531  * @param[in] nlsk_fd
1532  *   Netlink socket file descriptor.
1533  * @param[in] name
1534  *   The family name.
1535  *
1536  * @return
1537  *   ID >= 0 on success and @p enable is updated, a negative errno value
1538  *   otherwise and rte_errno is set.
1539  */
1540 static int
1541 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name)
1542 {
1543 	struct nlmsghdr *nlh;
1544 	struct genlmsghdr *genl;
1545 	uint32_t sn = MLX5_NL_SN_GENERATE;
1546 	int name_size = strlen(name) + 1;
1547 	int ret;
1548 	uint16_t id = -1;
1549 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1550 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1551 		    NLMSG_ALIGN(sizeof(struct nlattr)) +
1552 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)];
1553 
1554 	memset(buf, 0, sizeof(buf));
1555 	nlh = (struct nlmsghdr *)buf;
1556 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1557 	nlh->nlmsg_type = GENL_ID_CTRL;
1558 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1559 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1560 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
1561 	genl->cmd = CTRL_CMD_GETFAMILY;
1562 	genl->version = 1;
1563 	nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size);
1564 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1565 	if (ret >= 0)
1566 		ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id);
1567 	if (ret < 0) {
1568 		DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name,
1569 			ret);
1570 		return ret;
1571 	}
1572 	DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id);
1573 	return (int)id;
1574 }
1575 
1576 /**
1577  * Get Devlink family ID.
1578  *
1579  * @param[in] nlsk_fd
1580  *   Netlink socket file descriptor.
1581  *
1582  * @return
1583  *   ID >= 0 on success and @p enable is updated, a negative errno value
1584  *   otherwise and rte_errno is set.
1585  */
1586 
1587 int
1588 mlx5_nl_devlink_family_id_get(int nlsk_fd)
1589 {
1590 	return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME);
1591 }
1592 
1593 /**
1594  * Parse Netlink message to retrieve the ROCE enable status.
1595  *
1596  * @param nh
1597  *   Pointer to Netlink Message Header.
1598  * @param arg
1599  *   PMD data register with this callback.
1600  *
1601  * @return
1602  *   0 on success, a negative errno value otherwise and rte_errno is set.
1603  */
1604 static int
1605 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg)
1606 {
1607 
1608 	int ret = -EINVAL;
1609 	int *enable = arg;
1610 	struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1611 	struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1612 					NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1613 
1614 	while (nla->nla_len && nla < tail) {
1615 		switch (nla->nla_type) {
1616 		/* Expected nested attributes case. */
1617 		case DEVLINK_ATTR_PARAM:
1618 		case DEVLINK_ATTR_PARAM_VALUES_LIST:
1619 		case DEVLINK_ATTR_PARAM_VALUE:
1620 			ret = 0;
1621 			nla += 1;
1622 			break;
1623 		case DEVLINK_ATTR_PARAM_VALUE_DATA:
1624 			*enable = 1;
1625 			return 0;
1626 		default:
1627 			nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len));
1628 		}
1629 	}
1630 	*enable = 0;
1631 	return ret;
1632 }
1633 
1634 /**
1635  * Get ROCE enable status through Netlink.
1636  *
1637  * @param[in] nlsk_fd
1638  *   Netlink socket file descriptor.
1639  * @param[in] family_id
1640  *   the Devlink family ID.
1641  * @param pci_addr
1642  *   The device PCI address.
1643  * @param[out] enable
1644  *   Where to store the enable status.
1645  *
1646  * @return
1647  *   0 on success and @p enable is updated, a negative errno value otherwise
1648  *   and rte_errno is set.
1649  */
1650 int
1651 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
1652 			int *enable)
1653 {
1654 	struct nlmsghdr *nlh;
1655 	struct genlmsghdr *genl;
1656 	uint32_t sn = MLX5_NL_SN_GENERATE;
1657 	int ret;
1658 	int cur_en = 0;
1659 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1660 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1661 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 4 +
1662 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4];
1663 
1664 	memset(buf, 0, sizeof(buf));
1665 	nlh = (struct nlmsghdr *)buf;
1666 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1667 	nlh->nlmsg_type = family_id;
1668 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1669 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1670 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
1671 	genl->cmd = DEVLINK_CMD_PARAM_GET;
1672 	genl->version = DEVLINK_GENL_VERSION;
1673 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1674 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1675 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1676 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1677 	if (ret >= 0)
1678 		ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en);
1679 	if (ret < 0) {
1680 		DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.",
1681 			pci_addr, ret);
1682 		return ret;
1683 	}
1684 	*enable = cur_en;
1685 	DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".",
1686 		cur_en ? "en" : "dis", pci_addr);
1687 	return ret;
1688 }
1689 
1690 /**
1691  * Reload mlx5 device kernel driver through Netlink.
1692  *
1693  * @param[in] nlsk_fd
1694  *   Netlink socket file descriptor.
1695  * @param[in] family_id
1696  *   the Devlink family ID.
1697  * @param pci_addr
1698  *   The device PCI address.
1699  * @param[out] enable
1700  *   The enable status to set.
1701  *
1702  * @return
1703  *   0 on success, a negative errno value otherwise and rte_errno is set.
1704  */
1705 int
1706 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr)
1707 {
1708 	struct nlmsghdr *nlh;
1709 	struct genlmsghdr *genl;
1710 	uint32_t sn = MLX5_NL_SN_GENERATE;
1711 	int ret;
1712 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1713 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1714 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 2 +
1715 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2];
1716 
1717 	memset(buf, 0, sizeof(buf));
1718 	nlh = (struct nlmsghdr *)buf;
1719 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1720 	nlh->nlmsg_type = family_id;
1721 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1722 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1723 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
1724 	genl->cmd = DEVLINK_CMD_RELOAD;
1725 	genl->version = DEVLINK_GENL_VERSION;
1726 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1727 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1728 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1729 	if (ret >= 0)
1730 		ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1731 	if (ret < 0) {
1732 		DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d",
1733 			pci_addr, ret);
1734 		return ret;
1735 	}
1736 	DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.",
1737 		pci_addr);
1738 	return 0;
1739 }
1740 
1741 /**
1742  * Set ROCE enable status through Netlink.
1743  *
1744  * @param[in] nlsk_fd
1745  *   Netlink socket file descriptor.
1746  * @param[in] family_id
1747  *   the Devlink family ID.
1748  * @param pci_addr
1749  *   The device PCI address.
1750  * @param[out] enable
1751  *   The enable status to set.
1752  *
1753  * @return
1754  *   0 on success, a negative errno value otherwise and rte_errno is set.
1755  */
1756 int
1757 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
1758 			int enable)
1759 {
1760 	struct nlmsghdr *nlh;
1761 	struct genlmsghdr *genl;
1762 	uint32_t sn = MLX5_NL_SN_GENERATE;
1763 	int ret;
1764 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1765 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1766 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 6 +
1767 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6];
1768 	uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT;
1769 	uint8_t ptype = NLA_FLAG;
1770 ;
1771 
1772 	memset(buf, 0, sizeof(buf));
1773 	nlh = (struct nlmsghdr *)buf;
1774 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1775 	nlh->nlmsg_type = family_id;
1776 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1777 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1778 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
1779 	genl->cmd = DEVLINK_CMD_PARAM_SET;
1780 	genl->version = DEVLINK_GENL_VERSION;
1781 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1782 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1783 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1784 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode));
1785 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype));
1786 	if (enable)
1787 		nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0);
1788 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1789 	if (ret >= 0)
1790 		ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1791 	if (ret < 0) {
1792 		DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:"
1793 			" %d.", enable ? "en" : "dis", pci_addr, ret);
1794 		return ret;
1795 	}
1796 	DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.",
1797 		pci_addr, enable ? "en" : "dis");
1798 	/* Now, need to reload the driver. */
1799 	return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);
1800 }
1801