xref: /dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c (revision 582e9d7765f8d4823d2924572567f3e245b835b8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <stddef.h>
7 #include <inttypes.h>
8 #include <unistd.h>
9 #include <stdbool.h>
10 #include <stdint.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <errno.h>
15 #include <dirent.h>
16 #include <net/if.h>
17 #include <sys/ioctl.h>
18 #include <sys/socket.h>
19 #include <netinet/in.h>
20 #include <linux/ethtool.h>
21 #include <linux/sockios.h>
22 #include <fcntl.h>
23 #include <stdalign.h>
24 #include <sys/un.h>
25 #include <time.h>
26 
27 #include <rte_atomic.h>
28 #include <rte_ethdev_driver.h>
29 #include <rte_bus_pci.h>
30 #include <rte_mbuf.h>
31 #include <rte_common.h>
32 #include <rte_interrupts.h>
33 #include <rte_malloc.h>
34 #include <rte_string_fns.h>
35 #include <rte_rwlock.h>
36 #include <rte_cycles.h>
37 
38 #include <mlx5_glue.h>
39 #include <mlx5_devx_cmds.h>
40 #include <mlx5_common.h>
41 
42 #include "mlx5.h"
43 #include "mlx5_rxtx.h"
44 #include "mlx5_utils.h"
45 
46 /* Supported speed values found in /usr/include/linux/ethtool.h */
47 #ifndef HAVE_SUPPORTED_40000baseKR4_Full
48 #define SUPPORTED_40000baseKR4_Full (1 << 23)
49 #endif
50 #ifndef HAVE_SUPPORTED_40000baseCR4_Full
51 #define SUPPORTED_40000baseCR4_Full (1 << 24)
52 #endif
53 #ifndef HAVE_SUPPORTED_40000baseSR4_Full
54 #define SUPPORTED_40000baseSR4_Full (1 << 25)
55 #endif
56 #ifndef HAVE_SUPPORTED_40000baseLR4_Full
57 #define SUPPORTED_40000baseLR4_Full (1 << 26)
58 #endif
59 #ifndef HAVE_SUPPORTED_56000baseKR4_Full
60 #define SUPPORTED_56000baseKR4_Full (1 << 27)
61 #endif
62 #ifndef HAVE_SUPPORTED_56000baseCR4_Full
63 #define SUPPORTED_56000baseCR4_Full (1 << 28)
64 #endif
65 #ifndef HAVE_SUPPORTED_56000baseSR4_Full
66 #define SUPPORTED_56000baseSR4_Full (1 << 29)
67 #endif
68 #ifndef HAVE_SUPPORTED_56000baseLR4_Full
69 #define SUPPORTED_56000baseLR4_Full (1 << 30)
70 #endif
71 
72 /* Add defines in case the running kernel is not the same as user headers. */
73 #ifndef ETHTOOL_GLINKSETTINGS
74 struct ethtool_link_settings {
75 	uint32_t cmd;
76 	uint32_t speed;
77 	uint8_t duplex;
78 	uint8_t port;
79 	uint8_t phy_address;
80 	uint8_t autoneg;
81 	uint8_t mdio_support;
82 	uint8_t eth_to_mdix;
83 	uint8_t eth_tp_mdix_ctrl;
84 	int8_t link_mode_masks_nwords;
85 	uint32_t reserved[8];
86 	uint32_t link_mode_masks[];
87 };
88 
89 /* The kernel values can be found in /include/uapi/linux/ethtool.h */
90 #define ETHTOOL_GLINKSETTINGS 0x0000004c
91 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5
92 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6
93 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17
94 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18
95 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19
96 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20
97 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21
98 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22
99 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23
100 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24
101 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25
102 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26
103 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27
104 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28
105 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29
106 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30
107 #endif
108 #ifndef HAVE_ETHTOOL_LINK_MODE_25G
109 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31
110 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32
111 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33
112 #endif
113 #ifndef HAVE_ETHTOOL_LINK_MODE_50G
114 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34
115 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35
116 #endif
117 #ifndef HAVE_ETHTOOL_LINK_MODE_100G
118 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36
119 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37
120 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
121 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
122 #endif
123 #ifndef HAVE_ETHTOOL_LINK_MODE_200G
124 #define ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT 62
125 #define ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT 63
126 #define ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT 0 /* 64 - 64 */
127 #define ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT 1 /* 65 - 64 */
128 #define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */
129 #endif
130 
131 /**
132  * Get master interface name from private structure.
133  *
134  * @param[in] dev
135  *   Pointer to Ethernet device.
136  * @param[out] ifname
137  *   Interface name output buffer.
138  *
139  * @return
140  *   0 on success, a negative errno value otherwise and rte_errno is set.
141  */
142 int
143 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE])
144 {
145 	DIR *dir;
146 	struct dirent *dent;
147 	unsigned int dev_type = 0;
148 	unsigned int dev_port_prev = ~0u;
149 	char match[IF_NAMESIZE] = "";
150 
151 	MLX5_ASSERT(ibdev_path);
152 	{
153 		MKSTR(path, "%s/device/net", ibdev_path);
154 
155 		dir = opendir(path);
156 		if (dir == NULL) {
157 			rte_errno = errno;
158 			return -rte_errno;
159 		}
160 	}
161 	while ((dent = readdir(dir)) != NULL) {
162 		char *name = dent->d_name;
163 		FILE *file;
164 		unsigned int dev_port;
165 		int r;
166 
167 		if ((name[0] == '.') &&
168 		    ((name[1] == '\0') ||
169 		     ((name[1] == '.') && (name[2] == '\0'))))
170 			continue;
171 
172 		MKSTR(path, "%s/device/net/%s/%s",
173 		      ibdev_path, name,
174 		      (dev_type ? "dev_id" : "dev_port"));
175 
176 		file = fopen(path, "rb");
177 		if (file == NULL) {
178 			if (errno != ENOENT)
179 				continue;
180 			/*
181 			 * Switch to dev_id when dev_port does not exist as
182 			 * is the case with Linux kernel versions < 3.15.
183 			 */
184 try_dev_id:
185 			match[0] = '\0';
186 			if (dev_type)
187 				break;
188 			dev_type = 1;
189 			dev_port_prev = ~0u;
190 			rewinddir(dir);
191 			continue;
192 		}
193 		r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
194 		fclose(file);
195 		if (r != 1)
196 			continue;
197 		/*
198 		 * Switch to dev_id when dev_port returns the same value for
199 		 * all ports. May happen when using a MOFED release older than
200 		 * 3.0 with a Linux kernel >= 3.15.
201 		 */
202 		if (dev_port == dev_port_prev)
203 			goto try_dev_id;
204 		dev_port_prev = dev_port;
205 		if (dev_port == 0)
206 			strlcpy(match, name, sizeof(match));
207 	}
208 	closedir(dir);
209 	if (match[0] == '\0') {
210 		rte_errno = ENOENT;
211 		return -rte_errno;
212 	}
213 	strncpy(*ifname, match, sizeof(*ifname));
214 	return 0;
215 }
216 
217 /**
218  * Get interface name from private structure.
219  *
220  * This is a port representor-aware version of mlx5_get_master_ifname().
221  *
222  * @param[in] dev
223  *   Pointer to Ethernet device.
224  * @param[out] ifname
225  *   Interface name output buffer.
226  *
227  * @return
228  *   0 on success, a negative errno value otherwise and rte_errno is set.
229  */
230 int
231 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE])
232 {
233 	struct mlx5_priv *priv = dev->data->dev_private;
234 	unsigned int ifindex;
235 
236 	MLX5_ASSERT(priv);
237 	MLX5_ASSERT(priv->sh);
238 	ifindex = mlx5_ifindex(dev);
239 	if (!ifindex) {
240 		if (!priv->representor)
241 			return mlx5_get_master_ifname(priv->sh->ibdev_path,
242 						      ifname);
243 		rte_errno = ENXIO;
244 		return -rte_errno;
245 	}
246 	if (if_indextoname(ifindex, &(*ifname)[0]))
247 		return 0;
248 	rte_errno = errno;
249 	return -rte_errno;
250 }
251 
252 /**
253  * Perform ifreq ioctl() on associated Ethernet device.
254  *
255  * @param[in] dev
256  *   Pointer to Ethernet device.
257  * @param req
258  *   Request number to pass to ioctl().
259  * @param[out] ifr
260  *   Interface request structure output buffer.
261  *
262  * @return
263  *   0 on success, a negative errno value otherwise and rte_errno is set.
264  */
265 int
266 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
267 {
268 	int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
269 	int ret = 0;
270 
271 	if (sock == -1) {
272 		rte_errno = errno;
273 		return -rte_errno;
274 	}
275 	ret = mlx5_get_ifname(dev, &ifr->ifr_name);
276 	if (ret)
277 		goto error;
278 	ret = ioctl(sock, req, ifr);
279 	if (ret == -1) {
280 		rte_errno = errno;
281 		goto error;
282 	}
283 	close(sock);
284 	return 0;
285 error:
286 	close(sock);
287 	return -rte_errno;
288 }
289 
290 /**
291  * Get device MTU.
292  *
293  * @param dev
294  *   Pointer to Ethernet device.
295  * @param[out] mtu
296  *   MTU value output buffer.
297  *
298  * @return
299  *   0 on success, a negative errno value otherwise and rte_errno is set.
300  */
301 int
302 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
303 {
304 	struct ifreq request;
305 	int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
306 
307 	if (ret)
308 		return ret;
309 	*mtu = request.ifr_mtu;
310 	return 0;
311 }
312 
313 /**
314  * Set device MTU.
315  *
316  * @param dev
317  *   Pointer to Ethernet device.
318  * @param mtu
319  *   MTU value to set.
320  *
321  * @return
322  *   0 on success, a negative errno value otherwise and rte_errno is set.
323  */
324 int
325 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
326 {
327 	struct ifreq request = { .ifr_mtu = mtu, };
328 
329 	return mlx5_ifreq(dev, SIOCSIFMTU, &request);
330 }
331 
332 /**
333  * Set device flags.
334  *
335  * @param dev
336  *   Pointer to Ethernet device.
337  * @param keep
338  *   Bitmask for flags that must remain untouched.
339  * @param flags
340  *   Bitmask for flags to modify.
341  *
342  * @return
343  *   0 on success, a negative errno value otherwise and rte_errno is set.
344  */
345 int
346 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
347 {
348 	struct ifreq request;
349 	int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
350 
351 	if (ret)
352 		return ret;
353 	request.ifr_flags &= keep;
354 	request.ifr_flags |= flags & ~keep;
355 	return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
356 }
357 
358 /**
359  * Get device current raw clock counter
360  *
361  * @param dev
362  *   Pointer to Ethernet device structure.
363  * @param[out] time
364  *   Current raw clock counter of the device.
365  *
366  * @return
367  *   0 if the clock has correctly been read
368  *   The value of errno in case of error
369  */
370 int
371 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock)
372 {
373 	struct mlx5_priv *priv = dev->data->dev_private;
374 	struct ibv_context *ctx = priv->sh->ctx;
375 	struct ibv_values_ex values;
376 	int err = 0;
377 
378 	values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK;
379 	err = mlx5_glue->query_rt_values_ex(ctx, &values);
380 	if (err != 0) {
381 		DRV_LOG(WARNING, "Could not query the clock !");
382 		return err;
383 	}
384 	*clock = values.raw_clock.tv_nsec;
385 	return 0;
386 }
387 
388 /**
389  * Retrieve the master device for representor in the same switch domain.
390  *
391  * @param dev
392  *   Pointer to representor Ethernet device structure.
393  *
394  * @return
395  *   Master device structure  on success, NULL otherwise.
396  */
397 static struct rte_eth_dev *
398 mlx5_find_master_dev(struct rte_eth_dev *dev)
399 {
400 	struct mlx5_priv *priv;
401 	uint16_t port_id;
402 	uint16_t domain_id;
403 
404 	priv = dev->data->dev_private;
405 	domain_id = priv->domain_id;
406 	MLX5_ASSERT(priv->representor);
407 	MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
408 		struct mlx5_priv *opriv =
409 			rte_eth_devices[port_id].data->dev_private;
410 		if (opriv &&
411 		    opriv->master &&
412 		    opriv->domain_id == domain_id &&
413 		    opriv->sh == priv->sh)
414 			return &rte_eth_devices[port_id];
415 	}
416 	return NULL;
417 }
418 
419 /**
420  * DPDK callback to retrieve physical link information.
421  *
422  * @param dev
423  *   Pointer to Ethernet device structure.
424  * @param[out] link
425  *   Storage for current link status.
426  *
427  * @return
428  *   0 on success, a negative errno value otherwise and rte_errno is set.
429  */
430 static int
431 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
432 			       struct rte_eth_link *link)
433 {
434 	struct mlx5_priv *priv = dev->data->dev_private;
435 	struct ethtool_cmd edata = {
436 		.cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */
437 	};
438 	struct ifreq ifr;
439 	struct rte_eth_link dev_link;
440 	int link_speed = 0;
441 	int ret;
442 
443 	ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
444 	if (ret) {
445 		DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
446 			dev->data->port_id, strerror(rte_errno));
447 		return ret;
448 	}
449 	dev_link = (struct rte_eth_link) {
450 		.link_status = ((ifr.ifr_flags & IFF_UP) &&
451 				(ifr.ifr_flags & IFF_RUNNING)),
452 	};
453 	ifr = (struct ifreq) {
454 		.ifr_data = (void *)&edata,
455 	};
456 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
457 	if (ret) {
458 		if (ret == -ENOTSUP && priv->representor) {
459 			struct rte_eth_dev *master;
460 
461 			/*
462 			 * For representors we can try to inherit link
463 			 * settings from the master device. Actually
464 			 * link settings do not make a lot of sense
465 			 * for representors due to missing physical
466 			 * link. The old kernel drivers supported
467 			 * emulated settings query for representors,
468 			 * the new ones do not, so we have to add
469 			 * this code for compatibility issues.
470 			 */
471 			master = mlx5_find_master_dev(dev);
472 			if (master) {
473 				ifr = (struct ifreq) {
474 					.ifr_data = (void *)&edata,
475 				};
476 				ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
477 			}
478 		}
479 		if (ret) {
480 			DRV_LOG(WARNING,
481 				"port %u ioctl(SIOCETHTOOL,"
482 				" ETHTOOL_GSET) failed: %s",
483 				dev->data->port_id, strerror(rte_errno));
484 			return ret;
485 		}
486 	}
487 	link_speed = ethtool_cmd_speed(&edata);
488 	if (link_speed == -1)
489 		dev_link.link_speed = ETH_SPEED_NUM_NONE;
490 	else
491 		dev_link.link_speed = link_speed;
492 	priv->link_speed_capa = 0;
493 	if (edata.supported & SUPPORTED_Autoneg)
494 		priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
495 	if (edata.supported & (SUPPORTED_1000baseT_Full |
496 			       SUPPORTED_1000baseKX_Full))
497 		priv->link_speed_capa |= ETH_LINK_SPEED_1G;
498 	if (edata.supported & SUPPORTED_10000baseKR_Full)
499 		priv->link_speed_capa |= ETH_LINK_SPEED_10G;
500 	if (edata.supported & (SUPPORTED_40000baseKR4_Full |
501 			       SUPPORTED_40000baseCR4_Full |
502 			       SUPPORTED_40000baseSR4_Full |
503 			       SUPPORTED_40000baseLR4_Full))
504 		priv->link_speed_capa |= ETH_LINK_SPEED_40G;
505 	dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
506 				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
507 	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
508 			ETH_LINK_SPEED_FIXED);
509 	if (((dev_link.link_speed && !dev_link.link_status) ||
510 	     (!dev_link.link_speed && dev_link.link_status))) {
511 		rte_errno = EAGAIN;
512 		return -rte_errno;
513 	}
514 	*link = dev_link;
515 	return 0;
516 }
517 
518 /**
519  * Retrieve physical link information (unlocked version using new ioctl).
520  *
521  * @param dev
522  *   Pointer to Ethernet device structure.
523  * @param[out] link
524  *   Storage for current link status.
525  *
526  * @return
527  *   0 on success, a negative errno value otherwise and rte_errno is set.
528  */
529 static int
530 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
531 			     struct rte_eth_link *link)
532 
533 {
534 	struct mlx5_priv *priv = dev->data->dev_private;
535 	struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
536 	struct ifreq ifr;
537 	struct rte_eth_link dev_link;
538 	struct rte_eth_dev *master = NULL;
539 	uint64_t sc;
540 	int ret;
541 
542 	ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
543 	if (ret) {
544 		DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
545 			dev->data->port_id, strerror(rte_errno));
546 		return ret;
547 	}
548 	dev_link = (struct rte_eth_link) {
549 		.link_status = ((ifr.ifr_flags & IFF_UP) &&
550 				(ifr.ifr_flags & IFF_RUNNING)),
551 	};
552 	ifr = (struct ifreq) {
553 		.ifr_data = (void *)&gcmd,
554 	};
555 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
556 	if (ret) {
557 		if (ret == -ENOTSUP && priv->representor) {
558 			/*
559 			 * For representors we can try to inherit link
560 			 * settings from the master device. Actually
561 			 * link settings do not make a lot of sense
562 			 * for representors due to missing physical
563 			 * link. The old kernel drivers supported
564 			 * emulated settings query for representors,
565 			 * the new ones do not, so we have to add
566 			 * this code for compatibility issues.
567 			 */
568 			master = mlx5_find_master_dev(dev);
569 			if (master) {
570 				ifr = (struct ifreq) {
571 					.ifr_data = (void *)&gcmd,
572 				};
573 				ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
574 			}
575 		}
576 		if (ret) {
577 			DRV_LOG(DEBUG,
578 				"port %u ioctl(SIOCETHTOOL,"
579 				" ETHTOOL_GLINKSETTINGS) failed: %s",
580 				dev->data->port_id, strerror(rte_errno));
581 			return ret;
582 		}
583 	}
584 	gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;
585 
586 	alignas(struct ethtool_link_settings)
587 	uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) +
588 		     sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3];
589 	struct ethtool_link_settings *ecmd = (void *)data;
590 
591 	*ecmd = gcmd;
592 	ifr.ifr_data = (void *)ecmd;
593 	ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr);
594 	if (ret) {
595 		DRV_LOG(DEBUG,
596 			"port %u ioctl(SIOCETHTOOL,"
597 			"ETHTOOL_GLINKSETTINGS) failed: %s",
598 			dev->data->port_id, strerror(rte_errno));
599 		return ret;
600 	}
601 	dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE :
602 							    ecmd->speed;
603 	sc = ecmd->link_mode_masks[0] |
604 		((uint64_t)ecmd->link_mode_masks[1] << 32);
605 	priv->link_speed_capa = 0;
606 	if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT))
607 		priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
608 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) |
609 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)))
610 		priv->link_speed_capa |= ETH_LINK_SPEED_1G;
611 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) |
612 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) |
613 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)))
614 		priv->link_speed_capa |= ETH_LINK_SPEED_10G;
615 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) |
616 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)))
617 		priv->link_speed_capa |= ETH_LINK_SPEED_20G;
618 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) |
619 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) |
620 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) |
621 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)))
622 		priv->link_speed_capa |= ETH_LINK_SPEED_40G;
623 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) |
624 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) |
625 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) |
626 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)))
627 		priv->link_speed_capa |= ETH_LINK_SPEED_56G;
628 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) |
629 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) |
630 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)))
631 		priv->link_speed_capa |= ETH_LINK_SPEED_25G;
632 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) |
633 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)))
634 		priv->link_speed_capa |= ETH_LINK_SPEED_50G;
635 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) |
636 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) |
637 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) |
638 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)))
639 		priv->link_speed_capa |= ETH_LINK_SPEED_100G;
640 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT) |
641 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT)))
642 		priv->link_speed_capa |= ETH_LINK_SPEED_200G;
643 
644 	sc = ecmd->link_mode_masks[2] |
645 		((uint64_t)ecmd->link_mode_masks[3] << 32);
646 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT) |
647 		  MLX5_BITSHIFT
648 		       (ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT) |
649 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT)))
650 		priv->link_speed_capa |= ETH_LINK_SPEED_200G;
651 	dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?
652 				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
653 	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
654 				  ETH_LINK_SPEED_FIXED);
655 	if (((dev_link.link_speed && !dev_link.link_status) ||
656 	     (!dev_link.link_speed && dev_link.link_status))) {
657 		rte_errno = EAGAIN;
658 		return -rte_errno;
659 	}
660 	*link = dev_link;
661 	return 0;
662 }
663 
664 /**
665  * DPDK callback to retrieve physical link information.
666  *
667  * @param dev
668  *   Pointer to Ethernet device structure.
669  * @param wait_to_complete
670  *   Wait for request completion.
671  *
672  * @return
673  *   0 if link status was not updated, positive if it was, a negative errno
674  *   value otherwise and rte_errno is set.
675  */
676 int
677 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
678 {
679 	int ret;
680 	struct rte_eth_link dev_link;
681 	time_t start_time = time(NULL);
682 	int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT;
683 
684 	do {
685 		ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
686 		if (ret == -ENOTSUP)
687 			ret = mlx5_link_update_unlocked_gset(dev, &dev_link);
688 		if (ret == 0)
689 			break;
690 		/* Handle wait to complete situation. */
691 		if ((wait_to_complete || retry) && ret == -EAGAIN) {
692 			if (abs((int)difftime(time(NULL), start_time)) <
693 			    MLX5_LINK_STATUS_TIMEOUT) {
694 				usleep(0);
695 				continue;
696 			} else {
697 				rte_errno = EBUSY;
698 				return -rte_errno;
699 			}
700 		} else if (ret < 0) {
701 			return ret;
702 		}
703 	} while (wait_to_complete || retry-- > 0);
704 	ret = !!memcmp(&dev->data->dev_link, &dev_link,
705 		       sizeof(struct rte_eth_link));
706 	dev->data->dev_link = dev_link;
707 	return ret;
708 }
709 
710 /**
711  * DPDK callback to get flow control status.
712  *
713  * @param dev
714  *   Pointer to Ethernet device structure.
715  * @param[out] fc_conf
716  *   Flow control output buffer.
717  *
718  * @return
719  *   0 on success, a negative errno value otherwise and rte_errno is set.
720  */
721 int
722 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
723 {
724 	struct ifreq ifr;
725 	struct ethtool_pauseparam ethpause = {
726 		.cmd = ETHTOOL_GPAUSEPARAM
727 	};
728 	int ret;
729 
730 	ifr.ifr_data = (void *)&ethpause;
731 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
732 	if (ret) {
733 		DRV_LOG(WARNING,
734 			"port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
735 			" %s",
736 			dev->data->port_id, strerror(rte_errno));
737 		return ret;
738 	}
739 	fc_conf->autoneg = ethpause.autoneg;
740 	if (ethpause.rx_pause && ethpause.tx_pause)
741 		fc_conf->mode = RTE_FC_FULL;
742 	else if (ethpause.rx_pause)
743 		fc_conf->mode = RTE_FC_RX_PAUSE;
744 	else if (ethpause.tx_pause)
745 		fc_conf->mode = RTE_FC_TX_PAUSE;
746 	else
747 		fc_conf->mode = RTE_FC_NONE;
748 	return 0;
749 }
750 
751 /**
752  * DPDK callback to modify flow control parameters.
753  *
754  * @param dev
755  *   Pointer to Ethernet device structure.
756  * @param[in] fc_conf
757  *   Flow control parameters.
758  *
759  * @return
760  *   0 on success, a negative errno value otherwise and rte_errno is set.
761  */
762 int
763 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
764 {
765 	struct ifreq ifr;
766 	struct ethtool_pauseparam ethpause = {
767 		.cmd = ETHTOOL_SPAUSEPARAM
768 	};
769 	int ret;
770 
771 	ifr.ifr_data = (void *)&ethpause;
772 	ethpause.autoneg = fc_conf->autoneg;
773 	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
774 	    (fc_conf->mode & RTE_FC_RX_PAUSE))
775 		ethpause.rx_pause = 1;
776 	else
777 		ethpause.rx_pause = 0;
778 
779 	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
780 	    (fc_conf->mode & RTE_FC_TX_PAUSE))
781 		ethpause.tx_pause = 1;
782 	else
783 		ethpause.tx_pause = 0;
784 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
785 	if (ret) {
786 		DRV_LOG(WARNING,
787 			"port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
788 			" failed: %s",
789 			dev->data->port_id, strerror(rte_errno));
790 		return ret;
791 	}
792 	return 0;
793 }
794 
795 /**
796  * Handle asynchronous removal event for entire multiport device.
797  *
798  * @param sh
799  *   Infiniband device shared context.
800  */
801 static void
802 mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh)
803 {
804 	uint32_t i;
805 
806 	for (i = 0; i < sh->max_port; ++i) {
807 		struct rte_eth_dev *dev;
808 
809 		if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) {
810 			/*
811 			 * Or not existing port either no
812 			 * handler installed for this port.
813 			 */
814 			continue;
815 		}
816 		dev = &rte_eth_devices[sh->port[i].ih_port_id];
817 		MLX5_ASSERT(dev);
818 		if (dev->data->dev_conf.intr_conf.rmv)
819 			_rte_eth_dev_callback_process
820 				(dev, RTE_ETH_EVENT_INTR_RMV, NULL);
821 	}
822 }
823 
824 /**
825  * Handle shared asynchronous events the NIC (removal event
826  * and link status change). Supports multiport IB device.
827  *
828  * @param cb_arg
829  *   Callback argument.
830  */
831 void
832 mlx5_dev_interrupt_handler(void *cb_arg)
833 {
834 	struct mlx5_dev_ctx_shared *sh = cb_arg;
835 	struct ibv_async_event event;
836 
837 	/* Read all message from the IB device and acknowledge them. */
838 	for (;;) {
839 		struct rte_eth_dev *dev;
840 		uint32_t tmp;
841 
842 		if (mlx5_glue->get_async_event(sh->ctx, &event))
843 			break;
844 		/* Retrieve and check IB port index. */
845 		tmp = (uint32_t)event.element.port_num;
846 		if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) {
847 			/*
848 			 * The DEVICE_FATAL event is called once for
849 			 * entire device without port specifying.
850 			 * We should notify all existing ports.
851 			 */
852 			mlx5_glue->ack_async_event(&event);
853 			mlx5_dev_interrupt_device_fatal(sh);
854 			continue;
855 		}
856 		MLX5_ASSERT(tmp && (tmp <= sh->max_port));
857 		if (!tmp) {
858 			/* Unsupported device level event. */
859 			mlx5_glue->ack_async_event(&event);
860 			DRV_LOG(DEBUG,
861 				"unsupported common event (type %d)",
862 				event.event_type);
863 			continue;
864 		}
865 		if (tmp > sh->max_port) {
866 			/* Invalid IB port index. */
867 			mlx5_glue->ack_async_event(&event);
868 			DRV_LOG(DEBUG,
869 				"cannot handle an event (type %d)"
870 				"due to invalid IB port index (%u)",
871 				event.event_type, tmp);
872 			continue;
873 		}
874 		if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) {
875 			/* No handler installed. */
876 			mlx5_glue->ack_async_event(&event);
877 			DRV_LOG(DEBUG,
878 				"cannot handle an event (type %d)"
879 				"due to no handler installed for port %u",
880 				event.event_type, tmp);
881 			continue;
882 		}
883 		/* Retrieve ethernet device descriptor. */
884 		tmp = sh->port[tmp - 1].ih_port_id;
885 		dev = &rte_eth_devices[tmp];
886 		MLX5_ASSERT(dev);
887 		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
888 		     event.event_type == IBV_EVENT_PORT_ERR) &&
889 			dev->data->dev_conf.intr_conf.lsc) {
890 			mlx5_glue->ack_async_event(&event);
891 			if (mlx5_link_update(dev, 0) == -EAGAIN) {
892 				usleep(0);
893 				continue;
894 			}
895 			_rte_eth_dev_callback_process
896 				(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
897 			continue;
898 		}
899 		DRV_LOG(DEBUG,
900 			"port %u cannot handle an unknown event (type %d)",
901 			dev->data->port_id, event.event_type);
902 		mlx5_glue->ack_async_event(&event);
903 	}
904 }
905 
906 /*
907  * Unregister callback handler safely. The handler may be active
908  * while we are trying to unregister it, in this case code -EAGAIN
909  * is returned by rte_intr_callback_unregister(). This routine checks
910  * the return code and tries to unregister handler again.
911  *
912  * @param handle
913  *   interrupt handle
914  * @param cb_fn
915  *   pointer to callback routine
916  * @cb_arg
917  *   opaque callback parameter
918  */
919 void
920 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle,
921 			      rte_intr_callback_fn cb_fn, void *cb_arg)
922 {
923 	/*
924 	 * Try to reduce timeout management overhead by not calling
925 	 * the timer related routines on the first iteration. If the
926 	 * unregistering succeeds on first call there will be no
927 	 * timer calls at all.
928 	 */
929 	uint64_t twait = 0;
930 	uint64_t start = 0;
931 
932 	do {
933 		int ret;
934 
935 		ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg);
936 		if (ret >= 0)
937 			return;
938 		if (ret != -EAGAIN) {
939 			DRV_LOG(INFO, "failed to unregister interrupt"
940 				      " handler (error: %d)", ret);
941 			MLX5_ASSERT(false);
942 			return;
943 		}
944 		if (twait) {
945 			struct timespec onems;
946 
947 			/* Wait one millisecond and try again. */
948 			onems.tv_sec = 0;
949 			onems.tv_nsec = NS_PER_S / MS_PER_S;
950 			nanosleep(&onems, 0);
951 			/* Check whether one second elapsed. */
952 			if ((rte_get_timer_cycles() - start) <= twait)
953 				continue;
954 		} else {
955 			/*
956 			 * We get the amount of timer ticks for one second.
957 			 * If this amount elapsed it means we spent one
958 			 * second in waiting. This branch is executed once
959 			 * on first iteration.
960 			 */
961 			twait = rte_get_timer_hz();
962 			MLX5_ASSERT(twait);
963 		}
964 		/*
965 		 * Timeout elapsed, show message (once a second) and retry.
966 		 * We have no other acceptable option here, if we ignore
967 		 * the unregistering return code the handler will not
968 		 * be unregistered, fd will be closed and we may get the
969 		 * crush. Hanging and messaging in the loop seems not to be
970 		 * the worst choice.
971 		 */
972 		DRV_LOG(INFO, "Retrying to unregister interrupt handler");
973 		start = rte_get_timer_cycles();
974 	} while (true);
975 }
976 
977 /**
978  * Handle DEVX interrupts from the NIC.
979  * This function is probably called from the DPDK host thread.
980  *
981  * @param cb_arg
982  *   Callback argument.
983  */
984 void
985 mlx5_dev_interrupt_handler_devx(void *cb_arg)
986 {
987 #ifndef HAVE_IBV_DEVX_ASYNC
988 	(void)cb_arg;
989 	return;
990 #else
991 	struct mlx5_dev_ctx_shared *sh = cb_arg;
992 	union {
993 		struct mlx5dv_devx_async_cmd_hdr cmd_resp;
994 		uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
995 			    MLX5_ST_SZ_BYTES(traffic_counter) +
996 			    sizeof(struct mlx5dv_devx_async_cmd_hdr)];
997 	} out;
998 	uint8_t *buf = out.buf + sizeof(out.cmd_resp);
999 
1000 	while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp,
1001 						   &out.cmd_resp,
1002 						   sizeof(out.buf)))
1003 		mlx5_flow_async_pool_query_handle
1004 			(sh, (uint64_t)out.cmd_resp.wr_id,
1005 			 mlx5_devx_get_out_command_status(buf));
1006 #endif /* HAVE_IBV_DEVX_ASYNC */
1007 }
1008 
1009 /**
1010  * DPDK callback to bring the link DOWN.
1011  *
1012  * @param dev
1013  *   Pointer to Ethernet device structure.
1014  *
1015  * @return
1016  *   0 on success, a negative errno value otherwise and rte_errno is set.
1017  */
1018 int
1019 mlx5_set_link_down(struct rte_eth_dev *dev)
1020 {
1021 	return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP);
1022 }
1023 
1024 /**
1025  * DPDK callback to bring the link UP.
1026  *
1027  * @param dev
1028  *   Pointer to Ethernet device structure.
1029  *
1030  * @return
1031  *   0 on success, a negative errno value otherwise and rte_errno is set.
1032  */
1033 int
1034 mlx5_set_link_up(struct rte_eth_dev *dev)
1035 {
1036 	return mlx5_set_flags(dev, ~IFF_UP, IFF_UP);
1037 }
1038 
1039 /**
1040  * Check if mlx5 device was removed.
1041  *
1042  * @param dev
1043  *   Pointer to Ethernet device structure.
1044  *
1045  * @return
1046  *   1 when device is removed, otherwise 0.
1047  */
1048 int
1049 mlx5_is_removed(struct rte_eth_dev *dev)
1050 {
1051 	struct ibv_device_attr device_attr;
1052 	struct mlx5_priv *priv = dev->data->dev_private;
1053 
1054 	if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO)
1055 		return 1;
1056 	return 0;
1057 }
1058 
1059 /**
1060  * Get switch information associated with network interface.
1061  *
1062  * @param ifindex
1063  *   Network interface index.
1064  * @param[out] info
1065  *   Switch information object, populated in case of success.
1066  *
1067  * @return
1068  *   0 on success, a negative errno value otherwise and rte_errno is set.
1069  */
1070 int
1071 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
1072 {
1073 	char ifname[IF_NAMESIZE];
1074 	char port_name[IF_NAMESIZE];
1075 	FILE *file;
1076 	struct mlx5_switch_info data = {
1077 		.master = 0,
1078 		.representor = 0,
1079 		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1080 		.port_name = 0,
1081 		.switch_id = 0,
1082 	};
1083 	DIR *dir;
1084 	bool port_switch_id_set = false;
1085 	bool device_dir = false;
1086 	char c;
1087 	int ret;
1088 
1089 	if (!if_indextoname(ifindex, ifname)) {
1090 		rte_errno = errno;
1091 		return -rte_errno;
1092 	}
1093 
1094 	MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name",
1095 	      ifname);
1096 	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
1097 	      ifname);
1098 	MKSTR(pci_device, "/sys/class/net/%s/device",
1099 	      ifname);
1100 
1101 	file = fopen(phys_port_name, "rb");
1102 	if (file != NULL) {
1103 		ret = fscanf(file, "%s", port_name);
1104 		fclose(file);
1105 		if (ret == 1)
1106 			mlx5_translate_port_name(port_name, &data);
1107 	}
1108 	file = fopen(phys_switch_id, "rb");
1109 	if (file == NULL) {
1110 		rte_errno = errno;
1111 		return -rte_errno;
1112 	}
1113 	port_switch_id_set =
1114 		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
1115 		c == '\n';
1116 	fclose(file);
1117 	dir = opendir(pci_device);
1118 	if (dir != NULL) {
1119 		closedir(dir);
1120 		device_dir = true;
1121 	}
1122 	if (port_switch_id_set) {
1123 		/* We have some E-Switch configuration. */
1124 		mlx5_sysfs_check_switch_info(device_dir, &data);
1125 	}
1126 	*info = data;
1127 	MLX5_ASSERT(!(data.master && data.representor));
1128 	if (data.master && data.representor) {
1129 		DRV_LOG(ERR, "ifindex %u device is recognized as master"
1130 			     " and as representor", ifindex);
1131 		rte_errno = ENODEV;
1132 		return -rte_errno;
1133 	}
1134 	return 0;
1135 }
1136 
1137 /**
1138  * Analyze gathered port parameters via sysfs to recognize master
1139  * and representor devices for E-Switch configuration.
1140  *
1141  * @param[in] device_dir
1142  *   flag of presence of "device" directory under port device key.
1143  * @param[inout] switch_info
1144  *   Port information, including port name as a number and port name
1145  *   type if recognized
1146  *
1147  * @return
1148  *   master and representor flags are set in switch_info according to
1149  *   recognized parameters (if any).
1150  */
1151 void
1152 mlx5_sysfs_check_switch_info(bool device_dir,
1153 			     struct mlx5_switch_info *switch_info)
1154 {
1155 	switch (switch_info->name_type) {
1156 	case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1157 		/*
1158 		 * Name is not recognized, assume the master,
1159 		 * check the device directory presence.
1160 		 */
1161 		switch_info->master = device_dir;
1162 		break;
1163 	case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1164 		/*
1165 		 * Name is not set, this assumes the legacy naming
1166 		 * schema for master, just check if there is
1167 		 * a device directory.
1168 		 */
1169 		switch_info->master = device_dir;
1170 		break;
1171 	case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1172 		/* New uplink naming schema recognized. */
1173 		switch_info->master = 1;
1174 		break;
1175 	case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1176 		/* Legacy representors naming schema. */
1177 		switch_info->representor = !device_dir;
1178 		break;
1179 	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1180 		/* New representors naming schema. */
1181 		switch_info->representor = 1;
1182 		break;
1183 	}
1184 }
1185 
1186 /**
1187  * DPDK callback to retrieve plug-in module EEPROM information (type and size).
1188  *
1189  * @param dev
1190  *   Pointer to Ethernet device structure.
1191  * @param[out] modinfo
1192  *   Storage for plug-in module EEPROM information.
1193  *
1194  * @return
1195  *   0 on success, a negative errno value otherwise and rte_errno is set.
1196  */
1197 int
1198 mlx5_get_module_info(struct rte_eth_dev *dev,
1199 		     struct rte_eth_dev_module_info *modinfo)
1200 {
1201 	struct ethtool_modinfo info = {
1202 		.cmd = ETHTOOL_GMODULEINFO,
1203 	};
1204 	struct ifreq ifr = (struct ifreq) {
1205 		.ifr_data = (void *)&info,
1206 	};
1207 	int ret = 0;
1208 
1209 	if (!dev || !modinfo) {
1210 		DRV_LOG(WARNING, "missing argument, cannot get module info");
1211 		rte_errno = EINVAL;
1212 		return -rte_errno;
1213 	}
1214 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
1215 	if (ret) {
1216 		DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s",
1217 			dev->data->port_id, strerror(rte_errno));
1218 		return ret;
1219 	}
1220 	modinfo->type = info.type;
1221 	modinfo->eeprom_len = info.eeprom_len;
1222 	return ret;
1223 }
1224 
1225 /**
1226  * DPDK callback to retrieve plug-in module EEPROM data.
1227  *
1228  * @param dev
1229  *   Pointer to Ethernet device structure.
1230  * @param[out] info
1231  *   Storage for plug-in module EEPROM data.
1232  *
1233  * @return
1234  *   0 on success, a negative errno value otherwise and rte_errno is set.
1235  */
1236 int mlx5_get_module_eeprom(struct rte_eth_dev *dev,
1237 			   struct rte_dev_eeprom_info *info)
1238 {
1239 	struct ethtool_eeprom *eeprom;
1240 	struct ifreq ifr;
1241 	int ret = 0;
1242 
1243 	if (!dev || !info) {
1244 		DRV_LOG(WARNING, "missing argument, cannot get module eeprom");
1245 		rte_errno = EINVAL;
1246 		return -rte_errno;
1247 	}
1248 	eeprom = rte_calloc(__func__, 1,
1249 			    (sizeof(struct ethtool_eeprom) + info->length), 0);
1250 	if (!eeprom) {
1251 		DRV_LOG(WARNING, "port %u cannot allocate memory for "
1252 			"eeprom data", dev->data->port_id);
1253 		rte_errno = ENOMEM;
1254 		return -rte_errno;
1255 	}
1256 	eeprom->cmd = ETHTOOL_GMODULEEEPROM;
1257 	eeprom->offset = info->offset;
1258 	eeprom->len = info->length;
1259 	ifr = (struct ifreq) {
1260 		.ifr_data = (void *)eeprom,
1261 	};
1262 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
1263 	if (ret)
1264 		DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s",
1265 			dev->data->port_id, strerror(rte_errno));
1266 	else
1267 		rte_memcpy(info->data, eeprom->data, info->length);
1268 	rte_free(eeprom);
1269 	return ret;
1270 }
1271