xref: /dpdk/drivers/common/mlx5/linux/mlx5_nl.c (revision c9902a15bd005b6d4fe072cf7b60fe4ee679155f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5 
6 #include <errno.h>
7 #include <linux/if_link.h>
8 #include <linux/rtnetlink.h>
9 #include <linux/genetlink.h>
10 #include <net/if.h>
11 #include <rdma/rdma_netlink.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <stdalign.h>
16 #include <string.h>
17 #include <sys/socket.h>
18 #include <unistd.h>
19 
20 #include <rte_errno.h>
21 
22 #include "mlx5_nl.h"
23 #include "../mlx5_common_log.h"
24 #include "mlx5_malloc.h"
25 #ifdef HAVE_DEVLINK
26 #include <linux/devlink.h>
27 #endif
28 
29 
30 /* Size of the buffer to receive kernel messages */
31 #define MLX5_NL_BUF_SIZE (32 * 1024)
32 /* Send buffer size for the Netlink socket */
33 #define MLX5_SEND_BUF_SIZE 32768
34 /* Receive buffer size for the Netlink socket */
35 #define MLX5_RECV_BUF_SIZE 32768
36 /* Maximal physical port name length. */
37 #define MLX5_PHYS_PORT_NAME_MAX 128
38 
39 /** Parameters of VLAN devices created by driver. */
40 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
41 /*
42  * Define NDA_RTA as defined in iproute2 sources.
43  *
44  * see in iproute2 sources file include/libnetlink.h
45  */
46 #ifndef MLX5_NDA_RTA
47 #define MLX5_NDA_RTA(r) \
48 	((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
49 #endif
50 /*
51  * Define NLMSG_TAIL as defined in iproute2 sources.
52  *
53  * see in iproute2 sources file include/libnetlink.h
54  */
55 #ifndef NLMSG_TAIL
56 #define NLMSG_TAIL(nmsg) \
57 	((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
58 #endif
59 /*
60  * The following definitions are normally found in rdma/rdma_netlink.h,
61  * however they are so recent that most systems do not expose them yet.
62  */
63 #ifndef HAVE_RDMA_NL_NLDEV
64 #define RDMA_NL_NLDEV 5
65 #endif
66 #ifndef HAVE_RDMA_NLDEV_CMD_GET
67 #define RDMA_NLDEV_CMD_GET 1
68 #endif
69 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
70 #define RDMA_NLDEV_CMD_PORT_GET 5
71 #endif
72 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
73 #define RDMA_NLDEV_ATTR_DEV_INDEX 1
74 #endif
75 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
76 #define RDMA_NLDEV_ATTR_DEV_NAME 2
77 #endif
78 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
79 #define RDMA_NLDEV_ATTR_PORT_INDEX 3
80 #endif
81 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
82 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
83 #endif
84 
85 /* These are normally found in linux/if_link.h. */
86 #ifndef HAVE_IFLA_NUM_VF
87 #define IFLA_NUM_VF 21
88 #endif
89 #ifndef HAVE_IFLA_EXT_MASK
90 #define IFLA_EXT_MASK 29
91 #endif
92 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
93 #define IFLA_PHYS_SWITCH_ID 36
94 #endif
95 #ifndef HAVE_IFLA_PHYS_PORT_NAME
96 #define IFLA_PHYS_PORT_NAME 38
97 #endif
98 
99 /*
100  * Some Devlink defines may be missed in old kernel versions,
101  * adjust used defines.
102  */
103 #ifndef DEVLINK_GENL_NAME
104 #define DEVLINK_GENL_NAME "devlink"
105 #endif
106 #ifndef DEVLINK_GENL_VERSION
107 #define DEVLINK_GENL_VERSION 1
108 #endif
109 #ifndef DEVLINK_ATTR_BUS_NAME
110 #define DEVLINK_ATTR_BUS_NAME 1
111 #endif
112 #ifndef DEVLINK_ATTR_DEV_NAME
113 #define DEVLINK_ATTR_DEV_NAME 2
114 #endif
115 #ifndef DEVLINK_ATTR_PARAM
116 #define DEVLINK_ATTR_PARAM 80
117 #endif
118 #ifndef DEVLINK_ATTR_PARAM_NAME
119 #define DEVLINK_ATTR_PARAM_NAME 81
120 #endif
121 #ifndef DEVLINK_ATTR_PARAM_TYPE
122 #define DEVLINK_ATTR_PARAM_TYPE 83
123 #endif
124 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST
125 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84
126 #endif
127 #ifndef DEVLINK_ATTR_PARAM_VALUE
128 #define DEVLINK_ATTR_PARAM_VALUE 85
129 #endif
130 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA
131 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86
132 #endif
133 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE
134 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87
135 #endif
136 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT
137 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1
138 #endif
139 #ifndef DEVLINK_CMD_RELOAD
140 #define DEVLINK_CMD_RELOAD 37
141 #endif
142 #ifndef DEVLINK_CMD_PARAM_GET
143 #define DEVLINK_CMD_PARAM_GET 38
144 #endif
145 #ifndef DEVLINK_CMD_PARAM_SET
146 #define DEVLINK_CMD_PARAM_SET 39
147 #endif
148 #ifndef NLA_FLAG
149 #define NLA_FLAG 6
150 #endif
151 
152 /* Add/remove MAC address through Netlink */
153 struct mlx5_nl_mac_addr {
154 	struct rte_ether_addr (*mac)[];
155 	/**< MAC address handled by the device. */
156 	int mac_n; /**< Number of addresses in the array. */
157 };
158 
159 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
160 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
161 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
162 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
163 
164 /** Data structure used by mlx5_nl_cmdget_cb(). */
165 struct mlx5_nl_ifindex_data {
166 	const char *name; /**< IB device name (in). */
167 	uint32_t flags; /**< found attribute flags (out). */
168 	uint32_t ibindex; /**< IB device index (out). */
169 	uint32_t ifindex; /**< Network interface index (out). */
170 	uint32_t portnum; /**< IB device max port number (out). */
171 };
172 
173 uint32_t atomic_sn;
174 
175 /* Generate Netlink sequence number. */
176 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED)
177 
178 /**
179  * Opens a Netlink socket.
180  *
181  * @param protocol
182  *   Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
183  *
184  * @return
185  *   A file descriptor on success, a negative errno value otherwise and
186  *   rte_errno is set.
187  */
188 int
189 mlx5_nl_init(int protocol)
190 {
191 	int fd;
192 	int buf_size;
193 	socklen_t opt_size;
194 	struct sockaddr_nl local = {
195 		.nl_family = AF_NETLINK,
196 	};
197 	int ret;
198 
199 	fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
200 	if (fd == -1) {
201 		rte_errno = errno;
202 		return -rte_errno;
203 	}
204 	opt_size = sizeof(buf_size);
205 	ret = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf_size, &opt_size);
206 	if (ret == -1) {
207 		rte_errno = errno;
208 		goto error;
209 	}
210 	DRV_LOG(DEBUG, "Netlink socket send buffer: %d", buf_size);
211 	if (buf_size < MLX5_SEND_BUF_SIZE) {
212 		ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
213 				 &buf_size, sizeof(buf_size));
214 		if (ret == -1) {
215 			rte_errno = errno;
216 			goto error;
217 		}
218 	}
219 	opt_size = sizeof(buf_size);
220 	ret = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size);
221 	if (ret == -1) {
222 		rte_errno = errno;
223 		goto error;
224 	}
225 	DRV_LOG(DEBUG, "Netlink socket recv buffer: %d", buf_size);
226 	if (buf_size < MLX5_RECV_BUF_SIZE) {
227 		ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
228 				 &buf_size, sizeof(buf_size));
229 		if (ret == -1) {
230 			rte_errno = errno;
231 			goto error;
232 		}
233 	}
234 	ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
235 	if (ret == -1) {
236 		rte_errno = errno;
237 		goto error;
238 	}
239 	return fd;
240 error:
241 	close(fd);
242 	return -rte_errno;
243 }
244 
245 /**
246  * Send a request message to the kernel on the Netlink socket.
247  *
248  * @param[in] nlsk_fd
249  *   Netlink socket file descriptor.
250  * @param[in] nh
251  *   The Netlink message send to the kernel.
252  * @param[in] ssn
253  *   Sequence number.
254  * @param[in] req
255  *   Pointer to the request structure.
256  * @param[in] len
257  *   Length of the request in bytes.
258  *
259  * @return
260  *   The number of sent bytes on success, a negative errno value otherwise and
261  *   rte_errno is set.
262  */
263 static int
264 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
265 		int len)
266 {
267 	struct sockaddr_nl sa = {
268 		.nl_family = AF_NETLINK,
269 	};
270 	struct iovec iov[2] = {
271 		{ .iov_base = nh, .iov_len = sizeof(*nh), },
272 		{ .iov_base = req, .iov_len = len, },
273 	};
274 	struct msghdr msg = {
275 		.msg_name = &sa,
276 		.msg_namelen = sizeof(sa),
277 		.msg_iov = iov,
278 		.msg_iovlen = 2,
279 	};
280 	int send_bytes;
281 
282 	nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
283 	nh->nlmsg_seq = sn;
284 	send_bytes = sendmsg(nlsk_fd, &msg, 0);
285 	if (send_bytes < 0) {
286 		rte_errno = errno;
287 		return -rte_errno;
288 	}
289 	return send_bytes;
290 }
291 
292 /**
293  * Send a message to the kernel on the Netlink socket.
294  *
295  * @param[in] nlsk_fd
296  *   The Netlink socket file descriptor used for communication.
297  * @param[in] nh
298  *   The Netlink message send to the kernel.
299  * @param[in] sn
300  *   Sequence number.
301  *
302  * @return
303  *   The number of sent bytes on success, a negative errno value otherwise and
304  *   rte_errno is set.
305  */
306 static int
307 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
308 {
309 	struct sockaddr_nl sa = {
310 		.nl_family = AF_NETLINK,
311 	};
312 	struct iovec iov = {
313 		.iov_base = nh,
314 		.iov_len = nh->nlmsg_len,
315 	};
316 	struct msghdr msg = {
317 		.msg_name = &sa,
318 		.msg_namelen = sizeof(sa),
319 		.msg_iov = &iov,
320 		.msg_iovlen = 1,
321 	};
322 	int send_bytes;
323 
324 	nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
325 	nh->nlmsg_seq = sn;
326 	send_bytes = sendmsg(nlsk_fd, &msg, 0);
327 	if (send_bytes < 0) {
328 		rte_errno = errno;
329 		return -rte_errno;
330 	}
331 	return send_bytes;
332 }
333 
334 /**
335  * Receive a message from the kernel on the Netlink socket, following
336  * mlx5_nl_send().
337  *
338  * @param[in] nlsk_fd
339  *   The Netlink socket file descriptor used for communication.
340  * @param[in] sn
341  *   Sequence number.
342  * @param[in] cb
343  *   The callback function to call for each Netlink message received.
344  * @param[in, out] arg
345  *   Custom arguments for the callback.
346  *
347  * @return
348  *   0 on success, a negative errno value otherwise and rte_errno is set.
349  */
350 static int
351 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
352 	     void *arg)
353 {
354 	struct sockaddr_nl sa;
355 	struct iovec iov;
356 	struct msghdr msg = {
357 		.msg_name = &sa,
358 		.msg_namelen = sizeof(sa),
359 		.msg_iov = &iov,
360 		/* One message at a time */
361 		.msg_iovlen = 1,
362 	};
363 	void *buf = NULL;
364 	int multipart = 0;
365 	int ret = 0;
366 
367 	do {
368 		struct nlmsghdr *nh;
369 		int recv_bytes;
370 
371 		do {
372 			/* Query length of incoming message. */
373 			iov.iov_base = NULL;
374 			iov.iov_len = 0;
375 			recv_bytes = recvmsg(nlsk_fd, &msg,
376 					     MSG_PEEK | MSG_TRUNC);
377 			if (recv_bytes < 0) {
378 				rte_errno = errno;
379 				ret = -rte_errno;
380 				goto exit;
381 			}
382 			if (recv_bytes == 0) {
383 				rte_errno = ENODATA;
384 				ret = -rte_errno;
385 				goto exit;
386 			}
387 			/* Allocate buffer to fetch the message. */
388 			if (recv_bytes < MLX5_RECV_BUF_SIZE)
389 				recv_bytes = MLX5_RECV_BUF_SIZE;
390 			mlx5_free(buf);
391 			buf = mlx5_malloc(0, recv_bytes, 0, SOCKET_ID_ANY);
392 			if (!buf) {
393 				rte_errno = ENOMEM;
394 				ret = -rte_errno;
395 				goto exit;
396 			}
397 			/* Fetch the message. */
398 			iov.iov_base = buf;
399 			iov.iov_len = recv_bytes;
400 			recv_bytes = recvmsg(nlsk_fd, &msg, 0);
401 			if (recv_bytes == -1) {
402 				rte_errno = errno;
403 				ret = -rte_errno;
404 				goto exit;
405 			}
406 			nh = (struct nlmsghdr *)buf;
407 		} while (nh->nlmsg_seq != sn);
408 		for (;
409 		     NLMSG_OK(nh, (unsigned int)recv_bytes);
410 		     nh = NLMSG_NEXT(nh, recv_bytes)) {
411 			if (nh->nlmsg_type == NLMSG_ERROR) {
412 				struct nlmsgerr *err_data = NLMSG_DATA(nh);
413 
414 				if (err_data->error < 0) {
415 					rte_errno = -err_data->error;
416 					ret = -rte_errno;
417 					goto exit;
418 				}
419 				/* Ack message. */
420 				ret = 0;
421 				goto exit;
422 			}
423 			/* Multi-part msgs and their trailing DONE message. */
424 			if (nh->nlmsg_flags & NLM_F_MULTI) {
425 				if (nh->nlmsg_type == NLMSG_DONE) {
426 					ret =  0;
427 					goto exit;
428 				}
429 				multipart = 1;
430 			}
431 			if (cb) {
432 				ret = cb(nh, arg);
433 				if (ret < 0)
434 					goto exit;
435 			}
436 		}
437 	} while (multipart);
438 exit:
439 	mlx5_free(buf);
440 	return ret;
441 }
442 
443 /**
444  * Parse Netlink message to retrieve the bridge MAC address.
445  *
446  * @param nh
447  *   Pointer to Netlink Message Header.
448  * @param arg
449  *   PMD data register with this callback.
450  *
451  * @return
452  *   0 on success, a negative errno value otherwise and rte_errno is set.
453  */
454 static int
455 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
456 {
457 	struct mlx5_nl_mac_addr *data = arg;
458 	struct ndmsg *r = NLMSG_DATA(nh);
459 	struct rtattr *attribute;
460 	int len;
461 
462 	len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
463 	for (attribute = MLX5_NDA_RTA(r);
464 	     RTA_OK(attribute, len);
465 	     attribute = RTA_NEXT(attribute, len)) {
466 		if (attribute->rta_type == NDA_LLADDR) {
467 			if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
468 				DRV_LOG(WARNING,
469 					"not enough room to finalize the"
470 					" request");
471 				rte_errno = ENOMEM;
472 				return -rte_errno;
473 			}
474 #ifdef RTE_LIBRTE_MLX5_DEBUG
475 			char m[RTE_ETHER_ADDR_FMT_SIZE];
476 
477 			rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE,
478 					      RTA_DATA(attribute));
479 			DRV_LOG(DEBUG, "bridge MAC address %s", m);
480 #endif
481 			memcpy(&(*data->mac)[data->mac_n++],
482 			       RTA_DATA(attribute), RTE_ETHER_ADDR_LEN);
483 		}
484 	}
485 	return 0;
486 }
487 
488 /**
489  * Get bridge MAC addresses.
490  *
491  * @param[in] nlsk_fd
492  *   Netlink socket file descriptor.
493  * @param[in] iface_idx
494  *   Net device interface index.
495  * @param mac[out]
496  *   Pointer to the array table of MAC addresses to fill.
497  *   Its size should be of MLX5_MAX_MAC_ADDRESSES.
498  * @param mac_n[out]
499  *   Number of entries filled in MAC array.
500  *
501  * @return
502  *   0 on success, a negative errno value otherwise and rte_errno is set.
503  */
504 static int
505 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx,
506 		      struct rte_ether_addr (*mac)[], int *mac_n)
507 {
508 	struct {
509 		struct nlmsghdr	hdr;
510 		struct ifinfomsg ifm;
511 	} req = {
512 		.hdr = {
513 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
514 			.nlmsg_type = RTM_GETNEIGH,
515 			.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
516 		},
517 		.ifm = {
518 			.ifi_family = PF_BRIDGE,
519 			.ifi_index = iface_idx,
520 		},
521 	};
522 	struct mlx5_nl_mac_addr data = {
523 		.mac = mac,
524 		.mac_n = 0,
525 	};
526 	uint32_t sn = MLX5_NL_SN_GENERATE;
527 	int ret;
528 
529 	if (nlsk_fd == -1)
530 		return 0;
531 	ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm,
532 			      sizeof(struct ifinfomsg));
533 	if (ret < 0)
534 		goto error;
535 	ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data);
536 	if (ret < 0)
537 		goto error;
538 	*mac_n = data.mac_n;
539 	return 0;
540 error:
541 	DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s",
542 		iface_idx, strerror(rte_errno));
543 	return -rte_errno;
544 }
545 
546 /**
547  * Modify the MAC address neighbour table with Netlink.
548  *
549  * @param[in] nlsk_fd
550  *   Netlink socket file descriptor.
551  * @param[in] iface_idx
552  *   Net device interface index.
553  * @param mac
554  *   MAC address to consider.
555  * @param add
556  *   1 to add the MAC address, 0 to remove the MAC address.
557  *
558  * @return
559  *   0 on success, a negative errno value otherwise and rte_errno is set.
560  */
561 static int
562 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
563 			struct rte_ether_addr *mac, int add)
564 {
565 	struct {
566 		struct nlmsghdr hdr;
567 		struct ndmsg ndm;
568 		struct rtattr rta;
569 		uint8_t buffer[RTE_ETHER_ADDR_LEN];
570 	} req = {
571 		.hdr = {
572 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
573 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
574 				NLM_F_EXCL | NLM_F_ACK,
575 			.nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
576 		},
577 		.ndm = {
578 			.ndm_family = PF_BRIDGE,
579 			.ndm_state = NUD_NOARP | NUD_PERMANENT,
580 			.ndm_ifindex = iface_idx,
581 			.ndm_flags = NTF_SELF,
582 		},
583 		.rta = {
584 			.rta_type = NDA_LLADDR,
585 			.rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN),
586 		},
587 	};
588 	uint32_t sn = MLX5_NL_SN_GENERATE;
589 	int ret;
590 
591 	if (nlsk_fd == -1)
592 		return 0;
593 	memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN);
594 	req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
595 		RTA_ALIGN(req.rta.rta_len);
596 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
597 	if (ret < 0)
598 		goto error;
599 	ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
600 	if (ret < 0)
601 		goto error;
602 	return 0;
603 error:
604 #ifdef RTE_LIBRTE_MLX5_DEBUG
605 	{
606 		char m[RTE_ETHER_ADDR_FMT_SIZE];
607 
608 		rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac);
609 		DRV_LOG(DEBUG,
610 			"Interface %u cannot %s MAC address %s %s",
611 			iface_idx,
612 			add ? "add" : "remove", m, strerror(rte_errno));
613 	}
614 #endif
615 	return -rte_errno;
616 }
617 
618 /**
619  * Modify the VF MAC address neighbour table with Netlink.
620  *
621  * @param[in] nlsk_fd
622  *   Netlink socket file descriptor.
623  * @param[in] iface_idx
624  *   Net device interface index.
625  * @param mac
626  *    MAC address to consider.
627  * @param vf_index
628  *    VF index.
629  *
630  * @return
631  *    0 on success, a negative errno value otherwise and rte_errno is set.
632  */
633 int
634 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
635 			   struct rte_ether_addr *mac, int vf_index)
636 {
637 	int ret;
638 	struct {
639 		struct nlmsghdr hdr;
640 		struct ifinfomsg ifm;
641 		struct rtattr vf_list_rta;
642 		struct rtattr vf_info_rta;
643 		struct rtattr vf_mac_rta;
644 		struct ifla_vf_mac ivm;
645 	} req = {
646 		.hdr = {
647 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
648 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
649 			.nlmsg_type = RTM_BASE,
650 		},
651 		.ifm = {
652 			.ifi_index = iface_idx,
653 		},
654 		.vf_list_rta = {
655 			.rta_type = IFLA_VFINFO_LIST,
656 			.rta_len = RTA_ALIGN(RTA_LENGTH(0)),
657 		},
658 		.vf_info_rta = {
659 			.rta_type = IFLA_VF_INFO,
660 			.rta_len = RTA_ALIGN(RTA_LENGTH(0)),
661 		},
662 		.vf_mac_rta = {
663 			.rta_type = IFLA_VF_MAC,
664 		},
665 	};
666 	struct ifla_vf_mac ivm = {
667 		.vf = vf_index,
668 	};
669 	uint32_t sn = MLX5_NL_SN_GENERATE;
670 
671 	memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN);
672 	memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm));
673 
674 	req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm));
675 	req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
676 		RTA_ALIGN(req.vf_list_rta.rta_len) +
677 		RTA_ALIGN(req.vf_info_rta.rta_len) +
678 		RTA_ALIGN(req.vf_mac_rta.rta_len);
679 	req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
680 					       &req.vf_list_rta);
681 	req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
682 					       &req.vf_info_rta);
683 
684 	if (nlsk_fd < 0)
685 		return -1;
686 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
687 	if (ret < 0)
688 		goto error;
689 	ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
690 	if (ret < 0)
691 		goto error;
692 	return 0;
693 error:
694 	DRV_LOG(ERR,
695 		"representor %u cannot set VF MAC address "
696 		RTE_ETHER_ADDR_PRT_FMT " : %s",
697 		vf_index,
698 		RTE_ETHER_ADDR_BYTES(mac),
699 		strerror(rte_errno));
700 	return -rte_errno;
701 }
702 
703 /**
704  * Add a MAC address.
705  *
706  * @param[in] nlsk_fd
707  *   Netlink socket file descriptor.
708  * @param[in] iface_idx
709  *   Net device interface index.
710  * @param mac_own
711  *   BITFIELD_DECLARE array to store the mac.
712  * @param mac
713  *   MAC address to register.
714  * @param index
715  *   MAC address index.
716  *
717  * @return
718  *   0 on success, a negative errno value otherwise and rte_errno is set.
719  */
720 int
721 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx,
722 		     uint64_t *mac_own, struct rte_ether_addr *mac,
723 		     uint32_t index)
724 {
725 	int ret;
726 
727 	ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1);
728 	if (!ret) {
729 		MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
730 		if (index >= MLX5_MAX_MAC_ADDRESSES)
731 			return -EINVAL;
732 
733 		BITFIELD_SET(mac_own, index);
734 	}
735 	if (ret == -EEXIST)
736 		return 0;
737 	return ret;
738 }
739 
740 /**
741  * Remove a MAC address.
742  *
743  * @param[in] nlsk_fd
744  *   Netlink socket file descriptor.
745  * @param[in] iface_idx
746  *   Net device interface index.
747  * @param mac_own
748  *   BITFIELD_DECLARE array to store the mac.
749  * @param mac
750  *   MAC address to remove.
751  * @param index
752  *   MAC address index.
753  *
754  * @return
755  *   0 on success, a negative errno value otherwise and rte_errno is set.
756  */
757 int
758 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
759 			struct rte_ether_addr *mac, uint32_t index)
760 {
761 	MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
762 	if (index >= MLX5_MAX_MAC_ADDRESSES)
763 		return -EINVAL;
764 
765 	BITFIELD_RESET(mac_own, index);
766 	return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0);
767 }
768 
769 /**
770  * Synchronize Netlink bridge table to the internal table.
771  *
772  * @param[in] nlsk_fd
773  *   Netlink socket file descriptor.
774  * @param[in] iface_idx
775  *   Net device interface index.
776  * @param mac_addrs
777  *   Mac addresses array to sync.
778  * @param n
779  *   @p mac_addrs array size.
780  */
781 void
782 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx,
783 		      struct rte_ether_addr *mac_addrs, int n)
784 {
785 	struct rte_ether_addr macs[n];
786 	int macs_n = 0;
787 	int i;
788 	int ret;
789 
790 	memset(macs, 0, n * sizeof(macs[0]));
791 	ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n);
792 	if (ret)
793 		return;
794 	for (i = 0; i != macs_n; ++i) {
795 		int j;
796 
797 		/* Verify the address is not in the array yet. */
798 		for (j = 0; j != n; ++j)
799 			if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j]))
800 				break;
801 		if (j != n)
802 			continue;
803 		if (rte_is_multicast_ether_addr(&macs[i])) {
804 			/* Find the first entry available. */
805 			for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) {
806 				if (rte_is_zero_ether_addr(&mac_addrs[j])) {
807 					mac_addrs[j] = macs[i];
808 					break;
809 				}
810 			}
811 		} else {
812 			/* Find the first entry available. */
813 			for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) {
814 				if (rte_is_zero_ether_addr(&mac_addrs[j])) {
815 					mac_addrs[j] = macs[i];
816 					break;
817 				}
818 			}
819 		}
820 	}
821 }
822 
823 /**
824  * Flush all added MAC addresses.
825  *
826  * @param[in] nlsk_fd
827  *   Netlink socket file descriptor.
828  * @param[in] iface_idx
829  *   Net device interface index.
830  * @param[in] mac_addrs
831  *   Mac addresses array to flush.
832  * @param n
833  *   @p mac_addrs array size.
834  * @param mac_own
835  *   BITFIELD_DECLARE array to store the mac.
836  */
837 void
838 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx,
839 		       struct rte_ether_addr *mac_addrs, int n,
840 		       uint64_t *mac_own)
841 {
842 	int i;
843 
844 	if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES)
845 		return;
846 
847 	for (i = n - 1; i >= 0; --i) {
848 		struct rte_ether_addr *m = &mac_addrs[i];
849 
850 		if (BITFIELD_ISSET(mac_own, i))
851 			mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m,
852 						i);
853 	}
854 }
855 
856 /**
857  * Enable promiscuous / all multicast mode through Netlink.
858  *
859  * @param[in] nlsk_fd
860  *   Netlink socket file descriptor.
861  * @param[in] iface_idx
862  *   Net device interface index.
863  * @param flags
864  *   IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
865  * @param enable
866  *   Nonzero to enable, disable otherwise.
867  *
868  * @return
869  *   0 on success, a negative errno value otherwise and rte_errno is set.
870  */
871 static int
872 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags,
873 		     int enable)
874 {
875 	struct {
876 		struct nlmsghdr hdr;
877 		struct ifinfomsg ifi;
878 	} req = {
879 		.hdr = {
880 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
881 			.nlmsg_type = RTM_NEWLINK,
882 			.nlmsg_flags = NLM_F_REQUEST,
883 		},
884 		.ifi = {
885 			.ifi_flags = enable ? flags : 0,
886 			.ifi_change = flags,
887 			.ifi_index = iface_idx,
888 		},
889 	};
890 	uint32_t sn = MLX5_NL_SN_GENERATE;
891 	int ret;
892 
893 	MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
894 	if (nlsk_fd < 0)
895 		return 0;
896 	ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
897 	if (ret < 0)
898 		return ret;
899 	return 0;
900 }
901 
902 /**
903  * Enable promiscuous mode through Netlink.
904  *
905  * @param[in] nlsk_fd
906  *   Netlink socket file descriptor.
907  * @param[in] iface_idx
908  *   Net device interface index.
909  * @param enable
910  *   Nonzero to enable, disable otherwise.
911  *
912  * @return
913  *   0 on success, a negative errno value otherwise and rte_errno is set.
914  */
915 int
916 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable)
917 {
918 	int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable);
919 
920 	if (ret)
921 		DRV_LOG(DEBUG,
922 			"Interface %u cannot %s promisc mode: Netlink error %s",
923 			iface_idx, enable ? "enable" : "disable",
924 			strerror(rte_errno));
925 	return ret;
926 }
927 
928 /**
929  * Enable all multicast mode through Netlink.
930  *
931  * @param[in] nlsk_fd
932  *   Netlink socket file descriptor.
933  * @param[in] iface_idx
934  *   Net device interface index.
935  * @param enable
936  *   Nonzero to enable, disable otherwise.
937  *
938  * @return
939  *   0 on success, a negative errno value otherwise and rte_errno is set.
940  */
941 int
942 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
943 {
944 	int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI,
945 				       enable);
946 
947 	if (ret)
948 		DRV_LOG(DEBUG,
949 			"Interface %u cannot %s allmulti : Netlink error %s",
950 			iface_idx, enable ? "enable" : "disable",
951 			strerror(rte_errno));
952 	return ret;
953 }
954 
955 /**
956  * Process network interface information from Netlink message.
957  *
958  * @param nh
959  *   Pointer to Netlink message header.
960  * @param arg
961  *   Opaque data pointer for this callback.
962  *
963  * @return
964  *   0 on success, a negative errno value otherwise and rte_errno is set.
965  */
966 static int
967 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
968 {
969 	struct mlx5_nl_ifindex_data *data = arg;
970 	struct mlx5_nl_ifindex_data local = {
971 		.flags = 0,
972 	};
973 	size_t off = NLMSG_HDRLEN;
974 
975 	if (nh->nlmsg_type !=
976 	    RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
977 	    nh->nlmsg_type !=
978 	    RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
979 		goto error;
980 	while (off < nh->nlmsg_len) {
981 		struct nlattr *na = (void *)((uintptr_t)nh + off);
982 		void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
983 
984 		if (na->nla_len > nh->nlmsg_len - off)
985 			goto error;
986 		switch (na->nla_type) {
987 		case RDMA_NLDEV_ATTR_DEV_INDEX:
988 			local.ibindex = *(uint32_t *)payload;
989 			local.flags |= MLX5_NL_CMD_GET_IB_INDEX;
990 			break;
991 		case RDMA_NLDEV_ATTR_DEV_NAME:
992 			if (!strcmp(payload, data->name))
993 				local.flags |= MLX5_NL_CMD_GET_IB_NAME;
994 			break;
995 		case RDMA_NLDEV_ATTR_NDEV_INDEX:
996 			local.ifindex = *(uint32_t *)payload;
997 			local.flags |= MLX5_NL_CMD_GET_NET_INDEX;
998 			break;
999 		case RDMA_NLDEV_ATTR_PORT_INDEX:
1000 			local.portnum = *(uint32_t *)payload;
1001 			local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
1002 			break;
1003 		default:
1004 			break;
1005 		}
1006 		off += NLA_ALIGN(na->nla_len);
1007 	}
1008 	/*
1009 	 * It is possible to have multiple messages for all
1010 	 * Infiniband devices in the system with appropriate name.
1011 	 * So we should gather parameters locally and copy to
1012 	 * query context only in case of coinciding device name.
1013 	 */
1014 	if (local.flags & MLX5_NL_CMD_GET_IB_NAME) {
1015 		data->flags = local.flags;
1016 		data->ibindex = local.ibindex;
1017 		data->ifindex = local.ifindex;
1018 		data->portnum = local.portnum;
1019 	}
1020 	return 0;
1021 error:
1022 	rte_errno = EINVAL;
1023 	return -rte_errno;
1024 }
1025 
1026 /**
1027  * Get index of network interface associated with some IB device.
1028  *
1029  * This is the only somewhat safe method to avoid resorting to heuristics
1030  * when faced with port representors. Unfortunately it requires at least
1031  * Linux 4.17.
1032  *
1033  * @param nl
1034  *   Netlink socket of the RDMA kind (NETLINK_RDMA).
1035  * @param[in] name
1036  *   IB device name.
1037  * @param[in] pindex
1038  *   IB device port index, starting from 1
1039  * @return
1040  *   A valid (nonzero) interface index on success, 0 otherwise and rte_errno
1041  *   is set.
1042  */
1043 unsigned int
1044 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
1045 {
1046 	struct mlx5_nl_ifindex_data data = {
1047 		.name = name,
1048 		.flags = 0,
1049 		.ibindex = 0, /* Determined during first pass. */
1050 		.ifindex = 0, /* Determined during second pass. */
1051 	};
1052 	union {
1053 		struct nlmsghdr nh;
1054 		uint8_t buf[NLMSG_HDRLEN +
1055 			    NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
1056 			    NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
1057 	} req = {
1058 		.nh = {
1059 			.nlmsg_len = NLMSG_LENGTH(0),
1060 			.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1061 						       RDMA_NLDEV_CMD_GET),
1062 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1063 		},
1064 	};
1065 	struct nlattr *na;
1066 	uint32_t sn = MLX5_NL_SN_GENERATE;
1067 	int ret;
1068 
1069 	ret = mlx5_nl_send(nl, &req.nh, sn);
1070 	if (ret < 0)
1071 		return 0;
1072 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1073 	if (ret < 0)
1074 		return 0;
1075 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1076 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
1077 		goto error;
1078 	data.flags = 0;
1079 	sn = MLX5_NL_SN_GENERATE;
1080 	req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1081 					     RDMA_NLDEV_CMD_PORT_GET);
1082 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1083 	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
1084 	na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
1085 	na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
1086 	na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
1087 	memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1088 	       &data.ibindex, sizeof(data.ibindex));
1089 	na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
1090 	na->nla_len = NLA_HDRLEN + sizeof(pindex);
1091 	na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
1092 	memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1093 	       &pindex, sizeof(pindex));
1094 	ret = mlx5_nl_send(nl, &req.nh, sn);
1095 	if (ret < 0)
1096 		return 0;
1097 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1098 	if (ret < 0)
1099 		return 0;
1100 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1101 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1102 	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) ||
1103 	    !data.ifindex)
1104 		goto error;
1105 	return data.ifindex;
1106 error:
1107 	rte_errno = ENODEV;
1108 	return 0;
1109 }
1110 
1111 /**
1112  * Get the number of physical ports of given IB device.
1113  *
1114  * @param nl
1115  *   Netlink socket of the RDMA kind (NETLINK_RDMA).
1116  * @param[in] name
1117  *   IB device name.
1118  *
1119  * @return
1120  *   A valid (nonzero) number of ports on success, 0 otherwise
1121  *   and rte_errno is set.
1122  */
1123 unsigned int
1124 mlx5_nl_portnum(int nl, const char *name)
1125 {
1126 	struct mlx5_nl_ifindex_data data = {
1127 		.flags = 0,
1128 		.name = name,
1129 		.ifindex = 0,
1130 		.portnum = 0,
1131 	};
1132 	struct nlmsghdr req = {
1133 		.nlmsg_len = NLMSG_LENGTH(0),
1134 		.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1135 					       RDMA_NLDEV_CMD_GET),
1136 		.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1137 	};
1138 	uint32_t sn = MLX5_NL_SN_GENERATE;
1139 	int ret;
1140 
1141 	ret = mlx5_nl_send(nl, &req, sn);
1142 	if (ret < 0)
1143 		return 0;
1144 	ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1145 	if (ret < 0)
1146 		return 0;
1147 	if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1148 	    !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1149 	    !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) {
1150 		rte_errno = ENODEV;
1151 		return 0;
1152 	}
1153 	if (!data.portnum)
1154 		rte_errno = EINVAL;
1155 	return data.portnum;
1156 }
1157 
1158 /**
1159  * Analyze gathered port parameters via Netlink to recognize master
1160  * and representor devices for E-Switch configuration.
1161  *
1162  * @param[in] num_vf_set
1163  *   flag of presence of number of VFs port attribute.
1164  * @param[inout] switch_info
1165  *   Port information, including port name as a number and port name
1166  *   type if recognized
1167  *
1168  * @return
1169  *   master and representor flags are set in switch_info according to
1170  *   recognized parameters (if any).
1171  */
1172 static void
1173 mlx5_nl_check_switch_info(bool num_vf_set,
1174 			  struct mlx5_switch_info *switch_info)
1175 {
1176 	switch (switch_info->name_type) {
1177 	case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1178 		/*
1179 		 * Name is not recognized, assume the master,
1180 		 * check the number of VFs key presence.
1181 		 */
1182 		switch_info->master = num_vf_set;
1183 		break;
1184 	case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1185 		/*
1186 		 * Name is not set, this assumes the legacy naming
1187 		 * schema for master, just check if there is a
1188 		 * number of VFs key.
1189 		 */
1190 		switch_info->master = num_vf_set;
1191 		break;
1192 	case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1193 		/* New uplink naming schema recognized. */
1194 		switch_info->master = 1;
1195 		break;
1196 	case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1197 		/* Legacy representors naming schema. */
1198 		switch_info->representor = !num_vf_set;
1199 		break;
1200 	case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
1201 		/* Fallthrough */
1202 	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1203 		/* Fallthrough */
1204 	case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
1205 		/* New representors naming schema. */
1206 		switch_info->representor = 1;
1207 		break;
1208 	}
1209 }
1210 
1211 /**
1212  * Process switch information from Netlink message.
1213  *
1214  * @param nh
1215  *   Pointer to Netlink message header.
1216  * @param arg
1217  *   Opaque data pointer for this callback.
1218  *
1219  * @return
1220  *   0 on success, a negative errno value otherwise and rte_errno is set.
1221  */
1222 static int
1223 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
1224 {
1225 	struct mlx5_switch_info info = {
1226 		.master = 0,
1227 		.representor = 0,
1228 		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1229 		.port_name = 0,
1230 		.switch_id = 0,
1231 	};
1232 	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1233 	bool switch_id_set = false;
1234 	bool num_vf_set = false;
1235 	int len;
1236 
1237 	if (nh->nlmsg_type != RTM_NEWLINK)
1238 		goto error;
1239 	while (off < nh->nlmsg_len) {
1240 		struct rtattr *ra = (void *)((uintptr_t)nh + off);
1241 		void *payload = RTA_DATA(ra);
1242 		unsigned int i;
1243 
1244 		if (ra->rta_len > nh->nlmsg_len - off)
1245 			goto error;
1246 		switch (ra->rta_type) {
1247 		case IFLA_NUM_VF:
1248 			num_vf_set = true;
1249 			break;
1250 		case IFLA_PHYS_PORT_NAME:
1251 			len = RTA_PAYLOAD(ra);
1252 			/* Some kernels do not pad attributes with zero. */
1253 			if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) {
1254 				char name[MLX5_PHYS_PORT_NAME_MAX];
1255 
1256 				/*
1257 				 * We can't just patch the message with padding
1258 				 * zero - it might corrupt the following items
1259 				 * in the message, we have to copy the string
1260 				 * by attribute length and pad the copied one.
1261 				 */
1262 				memcpy(name, payload, len);
1263 				name[len] = 0;
1264 				mlx5_translate_port_name(name, &info);
1265 			} else {
1266 				info.name_type =
1267 					MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
1268 			}
1269 			break;
1270 		case IFLA_PHYS_SWITCH_ID:
1271 			info.switch_id = 0;
1272 			for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
1273 				info.switch_id <<= 8;
1274 				info.switch_id |= ((uint8_t *)payload)[i];
1275 			}
1276 			switch_id_set = true;
1277 			break;
1278 		}
1279 		off += RTA_ALIGN(ra->rta_len);
1280 	}
1281 	if (switch_id_set) {
1282 		/* We have some E-Switch configuration. */
1283 		mlx5_nl_check_switch_info(num_vf_set, &info);
1284 	}
1285 	MLX5_ASSERT(!(info.master && info.representor));
1286 	memcpy(arg, &info, sizeof(info));
1287 	return 0;
1288 error:
1289 	rte_errno = EINVAL;
1290 	return -rte_errno;
1291 }
1292 
1293 /**
1294  * Get switch information associated with network interface.
1295  *
1296  * @param nl
1297  *   Netlink socket of the ROUTE kind (NETLINK_ROUTE).
1298  * @param ifindex
1299  *   Network interface index.
1300  * @param[out] info
1301  *   Switch information object, populated in case of success.
1302  *
1303  * @return
1304  *   0 on success, a negative errno value otherwise and rte_errno is set.
1305  */
1306 int
1307 mlx5_nl_switch_info(int nl, unsigned int ifindex,
1308 		    struct mlx5_switch_info *info)
1309 {
1310 	struct {
1311 		struct nlmsghdr nh;
1312 		struct ifinfomsg info;
1313 		struct rtattr rta;
1314 		uint32_t extmask;
1315 	} req = {
1316 		.nh = {
1317 			.nlmsg_len = NLMSG_LENGTH
1318 					(sizeof(req.info) +
1319 					 RTA_LENGTH(sizeof(uint32_t))),
1320 			.nlmsg_type = RTM_GETLINK,
1321 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1322 		},
1323 		.info = {
1324 			.ifi_family = AF_UNSPEC,
1325 			.ifi_index = ifindex,
1326 		},
1327 		.rta = {
1328 			.rta_type = IFLA_EXT_MASK,
1329 			.rta_len = RTA_LENGTH(sizeof(int32_t)),
1330 		},
1331 		.extmask = RTE_LE32(1),
1332 	};
1333 	uint32_t sn = MLX5_NL_SN_GENERATE;
1334 	int ret;
1335 
1336 	ret = mlx5_nl_send(nl, &req.nh, sn);
1337 	if (ret >= 0)
1338 		ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
1339 	if (info->master && info->representor) {
1340 		DRV_LOG(ERR, "ifindex %u device is recognized as master"
1341 			     " and as representor", ifindex);
1342 		rte_errno = ENODEV;
1343 		ret = -rte_errno;
1344 	}
1345 	return ret;
1346 }
1347 
1348 /*
1349  * Delete VLAN network device by ifindex.
1350  *
1351  * @param[in] tcf
1352  *   Context object initialized by mlx5_nl_vlan_vmwa_init().
1353  * @param[in] ifindex
1354  *   Interface index of network device to delete.
1355  */
1356 void
1357 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa,
1358 		      uint32_t ifindex)
1359 {
1360 	uint32_t sn = MLX5_NL_SN_GENERATE;
1361 	int ret;
1362 	struct {
1363 		struct nlmsghdr nh;
1364 		struct ifinfomsg info;
1365 	} req = {
1366 		.nh = {
1367 			.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
1368 			.nlmsg_type = RTM_DELLINK,
1369 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1370 		},
1371 		.info = {
1372 			.ifi_family = AF_UNSPEC,
1373 			.ifi_index = ifindex,
1374 		},
1375 	};
1376 
1377 	if (ifindex) {
1378 		ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn);
1379 		if (ret >= 0)
1380 			ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1381 		if (ret < 0)
1382 			DRV_LOG(WARNING, "netlink: error deleting VLAN WA"
1383 				" ifindex %u, %d", ifindex, ret);
1384 	}
1385 }
1386 
1387 /* Set of subroutines to build Netlink message. */
1388 static struct nlattr *
1389 nl_msg_tail(struct nlmsghdr *nlh)
1390 {
1391 	return (struct nlattr *)
1392 		(((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
1393 }
1394 
1395 static void
1396 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
1397 {
1398 	struct nlattr *nla = nl_msg_tail(nlh);
1399 
1400 	nla->nla_type = type;
1401 	nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen;
1402 	nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len);
1403 
1404 	if (alen)
1405 		memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
1406 }
1407 
1408 static struct nlattr *
1409 nl_attr_nest_start(struct nlmsghdr *nlh, int type)
1410 {
1411 	struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
1412 
1413 	nl_attr_put(nlh, type, NULL, 0);
1414 	return nest;
1415 }
1416 
1417 static void
1418 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
1419 {
1420 	nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
1421 }
1422 
1423 /*
1424  * Create network VLAN device with specified VLAN tag.
1425  *
1426  * @param[in] tcf
1427  *   Context object initialized by mlx5_nl_vlan_vmwa_init().
1428  * @param[in] ifindex
1429  *   Base network interface index.
1430  * @param[in] tag
1431  *   VLAN tag for VLAN network device to create.
1432  */
1433 uint32_t
1434 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa,
1435 			 uint32_t ifindex, uint16_t tag)
1436 {
1437 	struct nlmsghdr *nlh;
1438 	struct ifinfomsg *ifm;
1439 	char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
1440 
1441 	__rte_cache_aligned
1442 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1443 		    NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
1444 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
1445 		    NLMSG_ALIGN(sizeof(uint32_t)) +
1446 		    NLMSG_ALIGN(sizeof(name)) +
1447 		    NLMSG_ALIGN(sizeof("vlan")) +
1448 		    NLMSG_ALIGN(sizeof(uint32_t)) +
1449 		    NLMSG_ALIGN(sizeof(uint16_t)) + 16];
1450 	struct nlattr *na_info;
1451 	struct nlattr *na_vlan;
1452 	uint32_t sn = MLX5_NL_SN_GENERATE;
1453 	int ret;
1454 
1455 	memset(buf, 0, sizeof(buf));
1456 	nlh = (struct nlmsghdr *)buf;
1457 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1458 	nlh->nlmsg_type = RTM_NEWLINK;
1459 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
1460 			   NLM_F_EXCL | NLM_F_ACK;
1461 	ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
1462 	nlh->nlmsg_len += sizeof(struct ifinfomsg);
1463 	ifm->ifi_family = AF_UNSPEC;
1464 	ifm->ifi_type = 0;
1465 	ifm->ifi_index = 0;
1466 	ifm->ifi_flags = IFF_UP;
1467 	ifm->ifi_change = 0xffffffff;
1468 	nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
1469 	ret = snprintf(name, sizeof(name), "%s.%u.%u",
1470 		       MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
1471 	nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
1472 	na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
1473 	nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
1474 	na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
1475 	nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
1476 	nl_attr_nest_end(nlh, na_vlan);
1477 	nl_attr_nest_end(nlh, na_info);
1478 	MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len);
1479 	ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn);
1480 	if (ret >= 0)
1481 		ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1482 	if (ret < 0) {
1483 		DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name,
1484 			ret);
1485 	}
1486 	/* Try to get ifindex of created or pre-existing device. */
1487 	ret = if_nametoindex(name);
1488 	if (!ret) {
1489 		DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name,
1490 			errno);
1491 		return 0;
1492 	}
1493 	return ret;
1494 }
1495 
1496 /**
1497  * Parse Netlink message to retrieve the general family ID.
1498  *
1499  * @param nh
1500  *   Pointer to Netlink Message Header.
1501  * @param arg
1502  *   PMD data register with this callback.
1503  *
1504  * @return
1505  *   0 on success, a negative errno value otherwise and rte_errno is set.
1506  */
1507 static int
1508 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg)
1509 {
1510 
1511 	struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1512 	struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1513 					NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1514 
1515 	for (; nla->nla_len && nla < tail;
1516 	     nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) {
1517 		if (nla->nla_type == CTRL_ATTR_FAMILY_ID) {
1518 			*(uint16_t *)arg = *(uint16_t *)(nla + 1);
1519 			return 0;
1520 		}
1521 	}
1522 	return -EINVAL;
1523 }
1524 
1525 #define MLX5_NL_MAX_ATTR_SIZE 100
1526 /**
1527  * Get generic netlink family ID.
1528  *
1529  * @param[in] nlsk_fd
1530  *   Netlink socket file descriptor.
1531  * @param[in] name
1532  *   The family name.
1533  *
1534  * @return
1535  *   ID >= 0 on success and @p enable is updated, a negative errno value
1536  *   otherwise and rte_errno is set.
1537  */
1538 static int
1539 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name)
1540 {
1541 	struct nlmsghdr *nlh;
1542 	struct genlmsghdr *genl;
1543 	uint32_t sn = MLX5_NL_SN_GENERATE;
1544 	int name_size = strlen(name) + 1;
1545 	int ret;
1546 	uint16_t id = -1;
1547 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1548 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1549 		    NLMSG_ALIGN(sizeof(struct nlattr)) +
1550 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)];
1551 
1552 	memset(buf, 0, sizeof(buf));
1553 	nlh = (struct nlmsghdr *)buf;
1554 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1555 	nlh->nlmsg_type = GENL_ID_CTRL;
1556 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1557 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1558 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
1559 	genl->cmd = CTRL_CMD_GETFAMILY;
1560 	genl->version = 1;
1561 	nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size);
1562 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1563 	if (ret >= 0)
1564 		ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id);
1565 	if (ret < 0) {
1566 		DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name,
1567 			ret);
1568 		return ret;
1569 	}
1570 	DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id);
1571 	return (int)id;
1572 }
1573 
1574 /**
1575  * Get Devlink family ID.
1576  *
1577  * @param[in] nlsk_fd
1578  *   Netlink socket file descriptor.
1579  *
1580  * @return
1581  *   ID >= 0 on success and @p enable is updated, a negative errno value
1582  *   otherwise and rte_errno is set.
1583  */
1584 
1585 int
1586 mlx5_nl_devlink_family_id_get(int nlsk_fd)
1587 {
1588 	return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME);
1589 }
1590 
1591 /**
1592  * Parse Netlink message to retrieve the ROCE enable status.
1593  *
1594  * @param nh
1595  *   Pointer to Netlink Message Header.
1596  * @param arg
1597  *   PMD data register with this callback.
1598  *
1599  * @return
1600  *   0 on success, a negative errno value otherwise and rte_errno is set.
1601  */
1602 static int
1603 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg)
1604 {
1605 
1606 	int ret = -EINVAL;
1607 	int *enable = arg;
1608 	struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1609 	struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1610 					NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1611 
1612 	while (nla->nla_len && nla < tail) {
1613 		switch (nla->nla_type) {
1614 		/* Expected nested attributes case. */
1615 		case DEVLINK_ATTR_PARAM:
1616 		case DEVLINK_ATTR_PARAM_VALUES_LIST:
1617 		case DEVLINK_ATTR_PARAM_VALUE:
1618 			ret = 0;
1619 			nla += 1;
1620 			break;
1621 		case DEVLINK_ATTR_PARAM_VALUE_DATA:
1622 			*enable = 1;
1623 			return 0;
1624 		default:
1625 			nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len));
1626 		}
1627 	}
1628 	*enable = 0;
1629 	return ret;
1630 }
1631 
1632 /**
1633  * Get ROCE enable status through Netlink.
1634  *
1635  * @param[in] nlsk_fd
1636  *   Netlink socket file descriptor.
1637  * @param[in] family_id
1638  *   the Devlink family ID.
1639  * @param pci_addr
1640  *   The device PCI address.
1641  * @param[out] enable
1642  *   Where to store the enable status.
1643  *
1644  * @return
1645  *   0 on success and @p enable is updated, a negative errno value otherwise
1646  *   and rte_errno is set.
1647  */
1648 int
1649 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
1650 			int *enable)
1651 {
1652 	struct nlmsghdr *nlh;
1653 	struct genlmsghdr *genl;
1654 	uint32_t sn = MLX5_NL_SN_GENERATE;
1655 	int ret;
1656 	int cur_en = 0;
1657 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1658 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1659 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 4 +
1660 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4];
1661 
1662 	memset(buf, 0, sizeof(buf));
1663 	nlh = (struct nlmsghdr *)buf;
1664 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1665 	nlh->nlmsg_type = family_id;
1666 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1667 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1668 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
1669 	genl->cmd = DEVLINK_CMD_PARAM_GET;
1670 	genl->version = DEVLINK_GENL_VERSION;
1671 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1672 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1673 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1674 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1675 	if (ret >= 0)
1676 		ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en);
1677 	if (ret < 0) {
1678 		DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.",
1679 			pci_addr, ret);
1680 		return ret;
1681 	}
1682 	*enable = cur_en;
1683 	DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".",
1684 		cur_en ? "en" : "dis", pci_addr);
1685 	return ret;
1686 }
1687 
1688 /**
1689  * Reload mlx5 device kernel driver through Netlink.
1690  *
1691  * @param[in] nlsk_fd
1692  *   Netlink socket file descriptor.
1693  * @param[in] family_id
1694  *   the Devlink family ID.
1695  * @param pci_addr
1696  *   The device PCI address.
1697  * @param[out] enable
1698  *   The enable status to set.
1699  *
1700  * @return
1701  *   0 on success, a negative errno value otherwise and rte_errno is set.
1702  */
1703 int
1704 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr)
1705 {
1706 	struct nlmsghdr *nlh;
1707 	struct genlmsghdr *genl;
1708 	uint32_t sn = MLX5_NL_SN_GENERATE;
1709 	int ret;
1710 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1711 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1712 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 2 +
1713 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2];
1714 
1715 	memset(buf, 0, sizeof(buf));
1716 	nlh = (struct nlmsghdr *)buf;
1717 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1718 	nlh->nlmsg_type = family_id;
1719 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1720 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1721 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
1722 	genl->cmd = DEVLINK_CMD_RELOAD;
1723 	genl->version = DEVLINK_GENL_VERSION;
1724 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1725 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1726 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1727 	if (ret >= 0)
1728 		ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1729 	if (ret < 0) {
1730 		DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d",
1731 			pci_addr, ret);
1732 		return ret;
1733 	}
1734 	DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.",
1735 		pci_addr);
1736 	return 0;
1737 }
1738 
1739 /**
1740  * Set ROCE enable status through Netlink.
1741  *
1742  * @param[in] nlsk_fd
1743  *   Netlink socket file descriptor.
1744  * @param[in] family_id
1745  *   the Devlink family ID.
1746  * @param pci_addr
1747  *   The device PCI address.
1748  * @param[out] enable
1749  *   The enable status to set.
1750  *
1751  * @return
1752  *   0 on success, a negative errno value otherwise and rte_errno is set.
1753  */
1754 int
1755 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
1756 			int enable)
1757 {
1758 	struct nlmsghdr *nlh;
1759 	struct genlmsghdr *genl;
1760 	uint32_t sn = MLX5_NL_SN_GENERATE;
1761 	int ret;
1762 	uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1763 		    NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1764 		    NLMSG_ALIGN(sizeof(struct nlattr)) * 6 +
1765 		    NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6];
1766 	uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT;
1767 	uint8_t ptype = NLA_FLAG;
1768 ;
1769 
1770 	memset(buf, 0, sizeof(buf));
1771 	nlh = (struct nlmsghdr *)buf;
1772 	nlh->nlmsg_len = sizeof(struct nlmsghdr);
1773 	nlh->nlmsg_type = family_id;
1774 	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1775 	genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1776 	nlh->nlmsg_len += sizeof(struct genlmsghdr);
1777 	genl->cmd = DEVLINK_CMD_PARAM_SET;
1778 	genl->version = DEVLINK_GENL_VERSION;
1779 	nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1780 	nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1781 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1782 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode));
1783 	nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype));
1784 	if (enable)
1785 		nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0);
1786 	ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1787 	if (ret >= 0)
1788 		ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1789 	if (ret < 0) {
1790 		DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:"
1791 			" %d.", enable ? "en" : "dis", pci_addr, ret);
1792 		return ret;
1793 	}
1794 	DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.",
1795 		pci_addr, enable ? "en" : "dis");
1796 	/* Now, need to reload the driver. */
1797 	return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);
1798 }
1799