xref: /dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c (revision 1256805dd54d3e0debd95c0420ca7f211bb9407c)
1*1256805dSOphir Munk /* SPDX-License-Identifier: BSD-3-Clause
2*1256805dSOphir Munk  * Copyright 2015 6WIND S.A.
3*1256805dSOphir Munk  * Copyright 2015 Mellanox Technologies, Ltd
4*1256805dSOphir Munk  */
5*1256805dSOphir Munk 
6*1256805dSOphir Munk #include <stddef.h>
7*1256805dSOphir Munk #include <inttypes.h>
8*1256805dSOphir Munk #include <unistd.h>
9*1256805dSOphir Munk #include <stdbool.h>
10*1256805dSOphir Munk #include <stdint.h>
11*1256805dSOphir Munk #include <stdio.h>
12*1256805dSOphir Munk #include <string.h>
13*1256805dSOphir Munk #include <stdlib.h>
14*1256805dSOphir Munk #include <errno.h>
15*1256805dSOphir Munk #include <dirent.h>
16*1256805dSOphir Munk #include <net/if.h>
17*1256805dSOphir Munk #include <sys/ioctl.h>
18*1256805dSOphir Munk #include <sys/socket.h>
19*1256805dSOphir Munk #include <netinet/in.h>
20*1256805dSOphir Munk #include <linux/ethtool.h>
21*1256805dSOphir Munk #include <linux/sockios.h>
22*1256805dSOphir Munk #include <fcntl.h>
23*1256805dSOphir Munk #include <stdalign.h>
24*1256805dSOphir Munk #include <sys/un.h>
25*1256805dSOphir Munk #include <time.h>
26*1256805dSOphir Munk 
27*1256805dSOphir Munk #include <rte_atomic.h>
28*1256805dSOphir Munk #include <rte_ethdev_driver.h>
29*1256805dSOphir Munk #include <rte_bus_pci.h>
30*1256805dSOphir Munk #include <rte_mbuf.h>
31*1256805dSOphir Munk #include <rte_common.h>
32*1256805dSOphir Munk #include <rte_interrupts.h>
33*1256805dSOphir Munk #include <rte_malloc.h>
34*1256805dSOphir Munk #include <rte_string_fns.h>
35*1256805dSOphir Munk #include <rte_rwlock.h>
36*1256805dSOphir Munk #include <rte_cycles.h>
37*1256805dSOphir Munk 
38*1256805dSOphir Munk #include <mlx5_glue.h>
39*1256805dSOphir Munk #include <mlx5_devx_cmds.h>
40*1256805dSOphir Munk #include <mlx5_common.h>
41*1256805dSOphir Munk 
42*1256805dSOphir Munk #include "mlx5.h"
43*1256805dSOphir Munk #include "mlx5_rxtx.h"
44*1256805dSOphir Munk #include "mlx5_utils.h"
45*1256805dSOphir Munk 
46*1256805dSOphir Munk /* Supported speed values found in /usr/include/linux/ethtool.h */
47*1256805dSOphir Munk #ifndef HAVE_SUPPORTED_40000baseKR4_Full
48*1256805dSOphir Munk #define SUPPORTED_40000baseKR4_Full (1 << 23)
49*1256805dSOphir Munk #endif
50*1256805dSOphir Munk #ifndef HAVE_SUPPORTED_40000baseCR4_Full
51*1256805dSOphir Munk #define SUPPORTED_40000baseCR4_Full (1 << 24)
52*1256805dSOphir Munk #endif
53*1256805dSOphir Munk #ifndef HAVE_SUPPORTED_40000baseSR4_Full
54*1256805dSOphir Munk #define SUPPORTED_40000baseSR4_Full (1 << 25)
55*1256805dSOphir Munk #endif
56*1256805dSOphir Munk #ifndef HAVE_SUPPORTED_40000baseLR4_Full
57*1256805dSOphir Munk #define SUPPORTED_40000baseLR4_Full (1 << 26)
58*1256805dSOphir Munk #endif
59*1256805dSOphir Munk #ifndef HAVE_SUPPORTED_56000baseKR4_Full
60*1256805dSOphir Munk #define SUPPORTED_56000baseKR4_Full (1 << 27)
61*1256805dSOphir Munk #endif
62*1256805dSOphir Munk #ifndef HAVE_SUPPORTED_56000baseCR4_Full
63*1256805dSOphir Munk #define SUPPORTED_56000baseCR4_Full (1 << 28)
64*1256805dSOphir Munk #endif
65*1256805dSOphir Munk #ifndef HAVE_SUPPORTED_56000baseSR4_Full
66*1256805dSOphir Munk #define SUPPORTED_56000baseSR4_Full (1 << 29)
67*1256805dSOphir Munk #endif
68*1256805dSOphir Munk #ifndef HAVE_SUPPORTED_56000baseLR4_Full
69*1256805dSOphir Munk #define SUPPORTED_56000baseLR4_Full (1 << 30)
70*1256805dSOphir Munk #endif
71*1256805dSOphir Munk 
72*1256805dSOphir Munk /* Add defines in case the running kernel is not the same as user headers. */
73*1256805dSOphir Munk #ifndef ETHTOOL_GLINKSETTINGS
74*1256805dSOphir Munk struct ethtool_link_settings {
75*1256805dSOphir Munk 	uint32_t cmd;
76*1256805dSOphir Munk 	uint32_t speed;
77*1256805dSOphir Munk 	uint8_t duplex;
78*1256805dSOphir Munk 	uint8_t port;
79*1256805dSOphir Munk 	uint8_t phy_address;
80*1256805dSOphir Munk 	uint8_t autoneg;
81*1256805dSOphir Munk 	uint8_t mdio_support;
82*1256805dSOphir Munk 	uint8_t eth_to_mdix;
83*1256805dSOphir Munk 	uint8_t eth_tp_mdix_ctrl;
84*1256805dSOphir Munk 	int8_t link_mode_masks_nwords;
85*1256805dSOphir Munk 	uint32_t reserved[8];
86*1256805dSOphir Munk 	uint32_t link_mode_masks[];
87*1256805dSOphir Munk };
88*1256805dSOphir Munk 
89*1256805dSOphir Munk /* The kernel values can be found in /include/uapi/linux/ethtool.h */
90*1256805dSOphir Munk #define ETHTOOL_GLINKSETTINGS 0x0000004c
91*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5
92*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_Autoneg_BIT 6
93*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17
94*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18
95*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19
96*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20
97*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21
98*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22
99*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23
100*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24
101*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25
102*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26
103*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27
104*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28
105*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29
106*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30
107*1256805dSOphir Munk #endif
108*1256805dSOphir Munk #ifndef HAVE_ETHTOOL_LINK_MODE_25G
109*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31
110*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32
111*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33
112*1256805dSOphir Munk #endif
113*1256805dSOphir Munk #ifndef HAVE_ETHTOOL_LINK_MODE_50G
114*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34
115*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35
116*1256805dSOphir Munk #endif
117*1256805dSOphir Munk #ifndef HAVE_ETHTOOL_LINK_MODE_100G
118*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36
119*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37
120*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
121*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
122*1256805dSOphir Munk #endif
123*1256805dSOphir Munk #ifndef HAVE_ETHTOOL_LINK_MODE_200G
124*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT 62
125*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT 63
126*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT 0 /* 64 - 64 */
127*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT 1 /* 65 - 64 */
128*1256805dSOphir Munk #define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */
129*1256805dSOphir Munk #endif
130*1256805dSOphir Munk 
131*1256805dSOphir Munk /**
132*1256805dSOphir Munk  * Get master interface name from private structure.
133*1256805dSOphir Munk  *
134*1256805dSOphir Munk  * @param[in] dev
135*1256805dSOphir Munk  *   Pointer to Ethernet device.
136*1256805dSOphir Munk  * @param[out] ifname
137*1256805dSOphir Munk  *   Interface name output buffer.
138*1256805dSOphir Munk  *
139*1256805dSOphir Munk  * @return
140*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
141*1256805dSOphir Munk  */
142*1256805dSOphir Munk int
143*1256805dSOphir Munk mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE])
144*1256805dSOphir Munk {
145*1256805dSOphir Munk 	DIR *dir;
146*1256805dSOphir Munk 	struct dirent *dent;
147*1256805dSOphir Munk 	unsigned int dev_type = 0;
148*1256805dSOphir Munk 	unsigned int dev_port_prev = ~0u;
149*1256805dSOphir Munk 	char match[IF_NAMESIZE] = "";
150*1256805dSOphir Munk 
151*1256805dSOphir Munk 	MLX5_ASSERT(ibdev_path);
152*1256805dSOphir Munk 	{
153*1256805dSOphir Munk 		MKSTR(path, "%s/device/net", ibdev_path);
154*1256805dSOphir Munk 
155*1256805dSOphir Munk 		dir = opendir(path);
156*1256805dSOphir Munk 		if (dir == NULL) {
157*1256805dSOphir Munk 			rte_errno = errno;
158*1256805dSOphir Munk 			return -rte_errno;
159*1256805dSOphir Munk 		}
160*1256805dSOphir Munk 	}
161*1256805dSOphir Munk 	while ((dent = readdir(dir)) != NULL) {
162*1256805dSOphir Munk 		char *name = dent->d_name;
163*1256805dSOphir Munk 		FILE *file;
164*1256805dSOphir Munk 		unsigned int dev_port;
165*1256805dSOphir Munk 		int r;
166*1256805dSOphir Munk 
167*1256805dSOphir Munk 		if ((name[0] == '.') &&
168*1256805dSOphir Munk 		    ((name[1] == '\0') ||
169*1256805dSOphir Munk 		     ((name[1] == '.') && (name[2] == '\0'))))
170*1256805dSOphir Munk 			continue;
171*1256805dSOphir Munk 
172*1256805dSOphir Munk 		MKSTR(path, "%s/device/net/%s/%s",
173*1256805dSOphir Munk 		      ibdev_path, name,
174*1256805dSOphir Munk 		      (dev_type ? "dev_id" : "dev_port"));
175*1256805dSOphir Munk 
176*1256805dSOphir Munk 		file = fopen(path, "rb");
177*1256805dSOphir Munk 		if (file == NULL) {
178*1256805dSOphir Munk 			if (errno != ENOENT)
179*1256805dSOphir Munk 				continue;
180*1256805dSOphir Munk 			/*
181*1256805dSOphir Munk 			 * Switch to dev_id when dev_port does not exist as
182*1256805dSOphir Munk 			 * is the case with Linux kernel versions < 3.15.
183*1256805dSOphir Munk 			 */
184*1256805dSOphir Munk try_dev_id:
185*1256805dSOphir Munk 			match[0] = '\0';
186*1256805dSOphir Munk 			if (dev_type)
187*1256805dSOphir Munk 				break;
188*1256805dSOphir Munk 			dev_type = 1;
189*1256805dSOphir Munk 			dev_port_prev = ~0u;
190*1256805dSOphir Munk 			rewinddir(dir);
191*1256805dSOphir Munk 			continue;
192*1256805dSOphir Munk 		}
193*1256805dSOphir Munk 		r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
194*1256805dSOphir Munk 		fclose(file);
195*1256805dSOphir Munk 		if (r != 1)
196*1256805dSOphir Munk 			continue;
197*1256805dSOphir Munk 		/*
198*1256805dSOphir Munk 		 * Switch to dev_id when dev_port returns the same value for
199*1256805dSOphir Munk 		 * all ports. May happen when using a MOFED release older than
200*1256805dSOphir Munk 		 * 3.0 with a Linux kernel >= 3.15.
201*1256805dSOphir Munk 		 */
202*1256805dSOphir Munk 		if (dev_port == dev_port_prev)
203*1256805dSOphir Munk 			goto try_dev_id;
204*1256805dSOphir Munk 		dev_port_prev = dev_port;
205*1256805dSOphir Munk 		if (dev_port == 0)
206*1256805dSOphir Munk 			strlcpy(match, name, sizeof(match));
207*1256805dSOphir Munk 	}
208*1256805dSOphir Munk 	closedir(dir);
209*1256805dSOphir Munk 	if (match[0] == '\0') {
210*1256805dSOphir Munk 		rte_errno = ENOENT;
211*1256805dSOphir Munk 		return -rte_errno;
212*1256805dSOphir Munk 	}
213*1256805dSOphir Munk 	strncpy(*ifname, match, sizeof(*ifname));
214*1256805dSOphir Munk 	return 0;
215*1256805dSOphir Munk }
216*1256805dSOphir Munk 
217*1256805dSOphir Munk /**
218*1256805dSOphir Munk  * Get interface name from private structure.
219*1256805dSOphir Munk  *
220*1256805dSOphir Munk  * This is a port representor-aware version of mlx5_get_master_ifname().
221*1256805dSOphir Munk  *
222*1256805dSOphir Munk  * @param[in] dev
223*1256805dSOphir Munk  *   Pointer to Ethernet device.
224*1256805dSOphir Munk  * @param[out] ifname
225*1256805dSOphir Munk  *   Interface name output buffer.
226*1256805dSOphir Munk  *
227*1256805dSOphir Munk  * @return
228*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
229*1256805dSOphir Munk  */
230*1256805dSOphir Munk int
231*1256805dSOphir Munk mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE])
232*1256805dSOphir Munk {
233*1256805dSOphir Munk 	struct mlx5_priv *priv = dev->data->dev_private;
234*1256805dSOphir Munk 	unsigned int ifindex;
235*1256805dSOphir Munk 
236*1256805dSOphir Munk 	MLX5_ASSERT(priv);
237*1256805dSOphir Munk 	MLX5_ASSERT(priv->sh);
238*1256805dSOphir Munk 	ifindex = mlx5_ifindex(dev);
239*1256805dSOphir Munk 	if (!ifindex) {
240*1256805dSOphir Munk 		if (!priv->representor)
241*1256805dSOphir Munk 			return mlx5_get_master_ifname(priv->sh->ibdev_path,
242*1256805dSOphir Munk 						      ifname);
243*1256805dSOphir Munk 		rte_errno = ENXIO;
244*1256805dSOphir Munk 		return -rte_errno;
245*1256805dSOphir Munk 	}
246*1256805dSOphir Munk 	if (if_indextoname(ifindex, &(*ifname)[0]))
247*1256805dSOphir Munk 		return 0;
248*1256805dSOphir Munk 	rte_errno = errno;
249*1256805dSOphir Munk 	return -rte_errno;
250*1256805dSOphir Munk }
251*1256805dSOphir Munk 
252*1256805dSOphir Munk /**
253*1256805dSOphir Munk  * Perform ifreq ioctl() on associated Ethernet device.
254*1256805dSOphir Munk  *
255*1256805dSOphir Munk  * @param[in] dev
256*1256805dSOphir Munk  *   Pointer to Ethernet device.
257*1256805dSOphir Munk  * @param req
258*1256805dSOphir Munk  *   Request number to pass to ioctl().
259*1256805dSOphir Munk  * @param[out] ifr
260*1256805dSOphir Munk  *   Interface request structure output buffer.
261*1256805dSOphir Munk  *
262*1256805dSOphir Munk  * @return
263*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
264*1256805dSOphir Munk  */
265*1256805dSOphir Munk int
266*1256805dSOphir Munk mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
267*1256805dSOphir Munk {
268*1256805dSOphir Munk 	int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
269*1256805dSOphir Munk 	int ret = 0;
270*1256805dSOphir Munk 
271*1256805dSOphir Munk 	if (sock == -1) {
272*1256805dSOphir Munk 		rte_errno = errno;
273*1256805dSOphir Munk 		return -rte_errno;
274*1256805dSOphir Munk 	}
275*1256805dSOphir Munk 	ret = mlx5_get_ifname(dev, &ifr->ifr_name);
276*1256805dSOphir Munk 	if (ret)
277*1256805dSOphir Munk 		goto error;
278*1256805dSOphir Munk 	ret = ioctl(sock, req, ifr);
279*1256805dSOphir Munk 	if (ret == -1) {
280*1256805dSOphir Munk 		rte_errno = errno;
281*1256805dSOphir Munk 		goto error;
282*1256805dSOphir Munk 	}
283*1256805dSOphir Munk 	close(sock);
284*1256805dSOphir Munk 	return 0;
285*1256805dSOphir Munk error:
286*1256805dSOphir Munk 	close(sock);
287*1256805dSOphir Munk 	return -rte_errno;
288*1256805dSOphir Munk }
289*1256805dSOphir Munk 
290*1256805dSOphir Munk /**
291*1256805dSOphir Munk  * Get device MTU.
292*1256805dSOphir Munk  *
293*1256805dSOphir Munk  * @param dev
294*1256805dSOphir Munk  *   Pointer to Ethernet device.
295*1256805dSOphir Munk  * @param[out] mtu
296*1256805dSOphir Munk  *   MTU value output buffer.
297*1256805dSOphir Munk  *
298*1256805dSOphir Munk  * @return
299*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
300*1256805dSOphir Munk  */
301*1256805dSOphir Munk int
302*1256805dSOphir Munk mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
303*1256805dSOphir Munk {
304*1256805dSOphir Munk 	struct ifreq request;
305*1256805dSOphir Munk 	int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
306*1256805dSOphir Munk 
307*1256805dSOphir Munk 	if (ret)
308*1256805dSOphir Munk 		return ret;
309*1256805dSOphir Munk 	*mtu = request.ifr_mtu;
310*1256805dSOphir Munk 	return 0;
311*1256805dSOphir Munk }
312*1256805dSOphir Munk 
313*1256805dSOphir Munk /**
314*1256805dSOphir Munk  * Set device MTU.
315*1256805dSOphir Munk  *
316*1256805dSOphir Munk  * @param dev
317*1256805dSOphir Munk  *   Pointer to Ethernet device.
318*1256805dSOphir Munk  * @param mtu
319*1256805dSOphir Munk  *   MTU value to set.
320*1256805dSOphir Munk  *
321*1256805dSOphir Munk  * @return
322*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
323*1256805dSOphir Munk  */
324*1256805dSOphir Munk int
325*1256805dSOphir Munk mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
326*1256805dSOphir Munk {
327*1256805dSOphir Munk 	struct ifreq request = { .ifr_mtu = mtu, };
328*1256805dSOphir Munk 
329*1256805dSOphir Munk 	return mlx5_ifreq(dev, SIOCSIFMTU, &request);
330*1256805dSOphir Munk }
331*1256805dSOphir Munk 
332*1256805dSOphir Munk /**
333*1256805dSOphir Munk  * Set device flags.
334*1256805dSOphir Munk  *
335*1256805dSOphir Munk  * @param dev
336*1256805dSOphir Munk  *   Pointer to Ethernet device.
337*1256805dSOphir Munk  * @param keep
338*1256805dSOphir Munk  *   Bitmask for flags that must remain untouched.
339*1256805dSOphir Munk  * @param flags
340*1256805dSOphir Munk  *   Bitmask for flags to modify.
341*1256805dSOphir Munk  *
342*1256805dSOphir Munk  * @return
343*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
344*1256805dSOphir Munk  */
345*1256805dSOphir Munk int
346*1256805dSOphir Munk mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
347*1256805dSOphir Munk {
348*1256805dSOphir Munk 	struct ifreq request;
349*1256805dSOphir Munk 	int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
350*1256805dSOphir Munk 
351*1256805dSOphir Munk 	if (ret)
352*1256805dSOphir Munk 		return ret;
353*1256805dSOphir Munk 	request.ifr_flags &= keep;
354*1256805dSOphir Munk 	request.ifr_flags |= flags & ~keep;
355*1256805dSOphir Munk 	return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
356*1256805dSOphir Munk }
357*1256805dSOphir Munk 
358*1256805dSOphir Munk /**
359*1256805dSOphir Munk  * Get device current raw clock counter
360*1256805dSOphir Munk  *
361*1256805dSOphir Munk  * @param dev
362*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
363*1256805dSOphir Munk  * @param[out] time
364*1256805dSOphir Munk  *   Current raw clock counter of the device.
365*1256805dSOphir Munk  *
366*1256805dSOphir Munk  * @return
367*1256805dSOphir Munk  *   0 if the clock has correctly been read
368*1256805dSOphir Munk  *   The value of errno in case of error
369*1256805dSOphir Munk  */
370*1256805dSOphir Munk int
371*1256805dSOphir Munk mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock)
372*1256805dSOphir Munk {
373*1256805dSOphir Munk 	struct mlx5_priv *priv = dev->data->dev_private;
374*1256805dSOphir Munk 	struct ibv_context *ctx = priv->sh->ctx;
375*1256805dSOphir Munk 	struct ibv_values_ex values;
376*1256805dSOphir Munk 	int err = 0;
377*1256805dSOphir Munk 
378*1256805dSOphir Munk 	values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK;
379*1256805dSOphir Munk 	err = mlx5_glue->query_rt_values_ex(ctx, &values);
380*1256805dSOphir Munk 	if (err != 0) {
381*1256805dSOphir Munk 		DRV_LOG(WARNING, "Could not query the clock !");
382*1256805dSOphir Munk 		return err;
383*1256805dSOphir Munk 	}
384*1256805dSOphir Munk 	*clock = values.raw_clock.tv_nsec;
385*1256805dSOphir Munk 	return 0;
386*1256805dSOphir Munk }
387*1256805dSOphir Munk 
388*1256805dSOphir Munk /**
389*1256805dSOphir Munk  * Retrieve the master device for representor in the same switch domain.
390*1256805dSOphir Munk  *
391*1256805dSOphir Munk  * @param dev
392*1256805dSOphir Munk  *   Pointer to representor Ethernet device structure.
393*1256805dSOphir Munk  *
394*1256805dSOphir Munk  * @return
395*1256805dSOphir Munk  *   Master device structure  on success, NULL otherwise.
396*1256805dSOphir Munk  */
397*1256805dSOphir Munk static struct rte_eth_dev *
398*1256805dSOphir Munk mlx5_find_master_dev(struct rte_eth_dev *dev)
399*1256805dSOphir Munk {
400*1256805dSOphir Munk 	struct mlx5_priv *priv;
401*1256805dSOphir Munk 	uint16_t port_id;
402*1256805dSOphir Munk 	uint16_t domain_id;
403*1256805dSOphir Munk 
404*1256805dSOphir Munk 	priv = dev->data->dev_private;
405*1256805dSOphir Munk 	domain_id = priv->domain_id;
406*1256805dSOphir Munk 	MLX5_ASSERT(priv->representor);
407*1256805dSOphir Munk 	MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
408*1256805dSOphir Munk 		struct mlx5_priv *opriv =
409*1256805dSOphir Munk 			rte_eth_devices[port_id].data->dev_private;
410*1256805dSOphir Munk 		if (opriv &&
411*1256805dSOphir Munk 		    opriv->master &&
412*1256805dSOphir Munk 		    opriv->domain_id == domain_id &&
413*1256805dSOphir Munk 		    opriv->sh == priv->sh)
414*1256805dSOphir Munk 			return &rte_eth_devices[port_id];
415*1256805dSOphir Munk 	}
416*1256805dSOphir Munk 	return NULL;
417*1256805dSOphir Munk }
418*1256805dSOphir Munk 
419*1256805dSOphir Munk /**
420*1256805dSOphir Munk  * DPDK callback to retrieve physical link information.
421*1256805dSOphir Munk  *
422*1256805dSOphir Munk  * @param dev
423*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
424*1256805dSOphir Munk  * @param[out] link
425*1256805dSOphir Munk  *   Storage for current link status.
426*1256805dSOphir Munk  *
427*1256805dSOphir Munk  * @return
428*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
429*1256805dSOphir Munk  */
430*1256805dSOphir Munk static int
431*1256805dSOphir Munk mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
432*1256805dSOphir Munk 			       struct rte_eth_link *link)
433*1256805dSOphir Munk {
434*1256805dSOphir Munk 	struct mlx5_priv *priv = dev->data->dev_private;
435*1256805dSOphir Munk 	struct ethtool_cmd edata = {
436*1256805dSOphir Munk 		.cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */
437*1256805dSOphir Munk 	};
438*1256805dSOphir Munk 	struct ifreq ifr;
439*1256805dSOphir Munk 	struct rte_eth_link dev_link;
440*1256805dSOphir Munk 	int link_speed = 0;
441*1256805dSOphir Munk 	int ret;
442*1256805dSOphir Munk 
443*1256805dSOphir Munk 	ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
444*1256805dSOphir Munk 	if (ret) {
445*1256805dSOphir Munk 		DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
446*1256805dSOphir Munk 			dev->data->port_id, strerror(rte_errno));
447*1256805dSOphir Munk 		return ret;
448*1256805dSOphir Munk 	}
449*1256805dSOphir Munk 	dev_link = (struct rte_eth_link) {
450*1256805dSOphir Munk 		.link_status = ((ifr.ifr_flags & IFF_UP) &&
451*1256805dSOphir Munk 				(ifr.ifr_flags & IFF_RUNNING)),
452*1256805dSOphir Munk 	};
453*1256805dSOphir Munk 	ifr = (struct ifreq) {
454*1256805dSOphir Munk 		.ifr_data = (void *)&edata,
455*1256805dSOphir Munk 	};
456*1256805dSOphir Munk 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
457*1256805dSOphir Munk 	if (ret) {
458*1256805dSOphir Munk 		if (ret == -ENOTSUP && priv->representor) {
459*1256805dSOphir Munk 			struct rte_eth_dev *master;
460*1256805dSOphir Munk 
461*1256805dSOphir Munk 			/*
462*1256805dSOphir Munk 			 * For representors we can try to inherit link
463*1256805dSOphir Munk 			 * settings from the master device. Actually
464*1256805dSOphir Munk 			 * link settings do not make a lot of sense
465*1256805dSOphir Munk 			 * for representors due to missing physical
466*1256805dSOphir Munk 			 * link. The old kernel drivers supported
467*1256805dSOphir Munk 			 * emulated settings query for representors,
468*1256805dSOphir Munk 			 * the new ones do not, so we have to add
469*1256805dSOphir Munk 			 * this code for compatibility issues.
470*1256805dSOphir Munk 			 */
471*1256805dSOphir Munk 			master = mlx5_find_master_dev(dev);
472*1256805dSOphir Munk 			if (master) {
473*1256805dSOphir Munk 				ifr = (struct ifreq) {
474*1256805dSOphir Munk 					.ifr_data = (void *)&edata,
475*1256805dSOphir Munk 				};
476*1256805dSOphir Munk 				ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
477*1256805dSOphir Munk 			}
478*1256805dSOphir Munk 		}
479*1256805dSOphir Munk 		if (ret) {
480*1256805dSOphir Munk 			DRV_LOG(WARNING,
481*1256805dSOphir Munk 				"port %u ioctl(SIOCETHTOOL,"
482*1256805dSOphir Munk 				" ETHTOOL_GSET) failed: %s",
483*1256805dSOphir Munk 				dev->data->port_id, strerror(rte_errno));
484*1256805dSOphir Munk 			return ret;
485*1256805dSOphir Munk 		}
486*1256805dSOphir Munk 	}
487*1256805dSOphir Munk 	link_speed = ethtool_cmd_speed(&edata);
488*1256805dSOphir Munk 	if (link_speed == -1)
489*1256805dSOphir Munk 		dev_link.link_speed = ETH_SPEED_NUM_NONE;
490*1256805dSOphir Munk 	else
491*1256805dSOphir Munk 		dev_link.link_speed = link_speed;
492*1256805dSOphir Munk 	priv->link_speed_capa = 0;
493*1256805dSOphir Munk 	if (edata.supported & SUPPORTED_Autoneg)
494*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
495*1256805dSOphir Munk 	if (edata.supported & (SUPPORTED_1000baseT_Full |
496*1256805dSOphir Munk 			       SUPPORTED_1000baseKX_Full))
497*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_1G;
498*1256805dSOphir Munk 	if (edata.supported & SUPPORTED_10000baseKR_Full)
499*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_10G;
500*1256805dSOphir Munk 	if (edata.supported & (SUPPORTED_40000baseKR4_Full |
501*1256805dSOphir Munk 			       SUPPORTED_40000baseCR4_Full |
502*1256805dSOphir Munk 			       SUPPORTED_40000baseSR4_Full |
503*1256805dSOphir Munk 			       SUPPORTED_40000baseLR4_Full))
504*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_40G;
505*1256805dSOphir Munk 	dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
506*1256805dSOphir Munk 				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
507*1256805dSOphir Munk 	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
508*1256805dSOphir Munk 			ETH_LINK_SPEED_FIXED);
509*1256805dSOphir Munk 	if (((dev_link.link_speed && !dev_link.link_status) ||
510*1256805dSOphir Munk 	     (!dev_link.link_speed && dev_link.link_status))) {
511*1256805dSOphir Munk 		rte_errno = EAGAIN;
512*1256805dSOphir Munk 		return -rte_errno;
513*1256805dSOphir Munk 	}
514*1256805dSOphir Munk 	*link = dev_link;
515*1256805dSOphir Munk 	return 0;
516*1256805dSOphir Munk }
517*1256805dSOphir Munk 
518*1256805dSOphir Munk /**
519*1256805dSOphir Munk  * Retrieve physical link information (unlocked version using new ioctl).
520*1256805dSOphir Munk  *
521*1256805dSOphir Munk  * @param dev
522*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
523*1256805dSOphir Munk  * @param[out] link
524*1256805dSOphir Munk  *   Storage for current link status.
525*1256805dSOphir Munk  *
526*1256805dSOphir Munk  * @return
527*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
528*1256805dSOphir Munk  */
529*1256805dSOphir Munk static int
530*1256805dSOphir Munk mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
531*1256805dSOphir Munk 			     struct rte_eth_link *link)
532*1256805dSOphir Munk 
533*1256805dSOphir Munk {
534*1256805dSOphir Munk 	struct mlx5_priv *priv = dev->data->dev_private;
535*1256805dSOphir Munk 	struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
536*1256805dSOphir Munk 	struct ifreq ifr;
537*1256805dSOphir Munk 	struct rte_eth_link dev_link;
538*1256805dSOphir Munk 	struct rte_eth_dev *master = NULL;
539*1256805dSOphir Munk 	uint64_t sc;
540*1256805dSOphir Munk 	int ret;
541*1256805dSOphir Munk 
542*1256805dSOphir Munk 	ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
543*1256805dSOphir Munk 	if (ret) {
544*1256805dSOphir Munk 		DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
545*1256805dSOphir Munk 			dev->data->port_id, strerror(rte_errno));
546*1256805dSOphir Munk 		return ret;
547*1256805dSOphir Munk 	}
548*1256805dSOphir Munk 	dev_link = (struct rte_eth_link) {
549*1256805dSOphir Munk 		.link_status = ((ifr.ifr_flags & IFF_UP) &&
550*1256805dSOphir Munk 				(ifr.ifr_flags & IFF_RUNNING)),
551*1256805dSOphir Munk 	};
552*1256805dSOphir Munk 	ifr = (struct ifreq) {
553*1256805dSOphir Munk 		.ifr_data = (void *)&gcmd,
554*1256805dSOphir Munk 	};
555*1256805dSOphir Munk 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
556*1256805dSOphir Munk 	if (ret) {
557*1256805dSOphir Munk 		if (ret == -ENOTSUP && priv->representor) {
558*1256805dSOphir Munk 			/*
559*1256805dSOphir Munk 			 * For representors we can try to inherit link
560*1256805dSOphir Munk 			 * settings from the master device. Actually
561*1256805dSOphir Munk 			 * link settings do not make a lot of sense
562*1256805dSOphir Munk 			 * for representors due to missing physical
563*1256805dSOphir Munk 			 * link. The old kernel drivers supported
564*1256805dSOphir Munk 			 * emulated settings query for representors,
565*1256805dSOphir Munk 			 * the new ones do not, so we have to add
566*1256805dSOphir Munk 			 * this code for compatibility issues.
567*1256805dSOphir Munk 			 */
568*1256805dSOphir Munk 			master = mlx5_find_master_dev(dev);
569*1256805dSOphir Munk 			if (master) {
570*1256805dSOphir Munk 				ifr = (struct ifreq) {
571*1256805dSOphir Munk 					.ifr_data = (void *)&gcmd,
572*1256805dSOphir Munk 				};
573*1256805dSOphir Munk 				ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
574*1256805dSOphir Munk 			}
575*1256805dSOphir Munk 		}
576*1256805dSOphir Munk 		if (ret) {
577*1256805dSOphir Munk 			DRV_LOG(DEBUG,
578*1256805dSOphir Munk 				"port %u ioctl(SIOCETHTOOL,"
579*1256805dSOphir Munk 				" ETHTOOL_GLINKSETTINGS) failed: %s",
580*1256805dSOphir Munk 				dev->data->port_id, strerror(rte_errno));
581*1256805dSOphir Munk 			return ret;
582*1256805dSOphir Munk 		}
583*1256805dSOphir Munk 	}
584*1256805dSOphir Munk 	gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;
585*1256805dSOphir Munk 
586*1256805dSOphir Munk 	alignas(struct ethtool_link_settings)
587*1256805dSOphir Munk 	uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) +
588*1256805dSOphir Munk 		     sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3];
589*1256805dSOphir Munk 	struct ethtool_link_settings *ecmd = (void *)data;
590*1256805dSOphir Munk 
591*1256805dSOphir Munk 	*ecmd = gcmd;
592*1256805dSOphir Munk 	ifr.ifr_data = (void *)ecmd;
593*1256805dSOphir Munk 	ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr);
594*1256805dSOphir Munk 	if (ret) {
595*1256805dSOphir Munk 		DRV_LOG(DEBUG,
596*1256805dSOphir Munk 			"port %u ioctl(SIOCETHTOOL,"
597*1256805dSOphir Munk 			"ETHTOOL_GLINKSETTINGS) failed: %s",
598*1256805dSOphir Munk 			dev->data->port_id, strerror(rte_errno));
599*1256805dSOphir Munk 		return ret;
600*1256805dSOphir Munk 	}
601*1256805dSOphir Munk 	dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE :
602*1256805dSOphir Munk 							    ecmd->speed;
603*1256805dSOphir Munk 	sc = ecmd->link_mode_masks[0] |
604*1256805dSOphir Munk 		((uint64_t)ecmd->link_mode_masks[1] << 32);
605*1256805dSOphir Munk 	priv->link_speed_capa = 0;
606*1256805dSOphir Munk 	if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT))
607*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
608*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) |
609*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)))
610*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_1G;
611*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) |
612*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) |
613*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)))
614*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_10G;
615*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) |
616*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)))
617*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_20G;
618*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) |
619*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) |
620*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) |
621*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)))
622*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_40G;
623*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) |
624*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) |
625*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) |
626*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)))
627*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_56G;
628*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) |
629*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) |
630*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)))
631*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_25G;
632*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) |
633*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)))
634*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_50G;
635*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) |
636*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) |
637*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) |
638*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)))
639*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_100G;
640*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT) |
641*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT)))
642*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_200G;
643*1256805dSOphir Munk 
644*1256805dSOphir Munk 	sc = ecmd->link_mode_masks[2] |
645*1256805dSOphir Munk 		((uint64_t)ecmd->link_mode_masks[3] << 32);
646*1256805dSOphir Munk 	if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT) |
647*1256805dSOphir Munk 		  MLX5_BITSHIFT
648*1256805dSOphir Munk 		       (ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT) |
649*1256805dSOphir Munk 		  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT)))
650*1256805dSOphir Munk 		priv->link_speed_capa |= ETH_LINK_SPEED_200G;
651*1256805dSOphir Munk 	dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?
652*1256805dSOphir Munk 				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
653*1256805dSOphir Munk 	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
654*1256805dSOphir Munk 				  ETH_LINK_SPEED_FIXED);
655*1256805dSOphir Munk 	if (((dev_link.link_speed && !dev_link.link_status) ||
656*1256805dSOphir Munk 	     (!dev_link.link_speed && dev_link.link_status))) {
657*1256805dSOphir Munk 		rte_errno = EAGAIN;
658*1256805dSOphir Munk 		return -rte_errno;
659*1256805dSOphir Munk 	}
660*1256805dSOphir Munk 	*link = dev_link;
661*1256805dSOphir Munk 	return 0;
662*1256805dSOphir Munk }
663*1256805dSOphir Munk 
664*1256805dSOphir Munk /**
665*1256805dSOphir Munk  * DPDK callback to retrieve physical link information.
666*1256805dSOphir Munk  *
667*1256805dSOphir Munk  * @param dev
668*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
669*1256805dSOphir Munk  * @param wait_to_complete
670*1256805dSOphir Munk  *   Wait for request completion.
671*1256805dSOphir Munk  *
672*1256805dSOphir Munk  * @return
673*1256805dSOphir Munk  *   0 if link status was not updated, positive if it was, a negative errno
674*1256805dSOphir Munk  *   value otherwise and rte_errno is set.
675*1256805dSOphir Munk  */
676*1256805dSOphir Munk int
677*1256805dSOphir Munk mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
678*1256805dSOphir Munk {
679*1256805dSOphir Munk 	int ret;
680*1256805dSOphir Munk 	struct rte_eth_link dev_link;
681*1256805dSOphir Munk 	time_t start_time = time(NULL);
682*1256805dSOphir Munk 	int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT;
683*1256805dSOphir Munk 
684*1256805dSOphir Munk 	do {
685*1256805dSOphir Munk 		ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
686*1256805dSOphir Munk 		if (ret == -ENOTSUP)
687*1256805dSOphir Munk 			ret = mlx5_link_update_unlocked_gset(dev, &dev_link);
688*1256805dSOphir Munk 		if (ret == 0)
689*1256805dSOphir Munk 			break;
690*1256805dSOphir Munk 		/* Handle wait to complete situation. */
691*1256805dSOphir Munk 		if ((wait_to_complete || retry) && ret == -EAGAIN) {
692*1256805dSOphir Munk 			if (abs((int)difftime(time(NULL), start_time)) <
693*1256805dSOphir Munk 			    MLX5_LINK_STATUS_TIMEOUT) {
694*1256805dSOphir Munk 				usleep(0);
695*1256805dSOphir Munk 				continue;
696*1256805dSOphir Munk 			} else {
697*1256805dSOphir Munk 				rte_errno = EBUSY;
698*1256805dSOphir Munk 				return -rte_errno;
699*1256805dSOphir Munk 			}
700*1256805dSOphir Munk 		} else if (ret < 0) {
701*1256805dSOphir Munk 			return ret;
702*1256805dSOphir Munk 		}
703*1256805dSOphir Munk 	} while (wait_to_complete || retry-- > 0);
704*1256805dSOphir Munk 	ret = !!memcmp(&dev->data->dev_link, &dev_link,
705*1256805dSOphir Munk 		       sizeof(struct rte_eth_link));
706*1256805dSOphir Munk 	dev->data->dev_link = dev_link;
707*1256805dSOphir Munk 	return ret;
708*1256805dSOphir Munk }
709*1256805dSOphir Munk 
710*1256805dSOphir Munk /**
711*1256805dSOphir Munk  * DPDK callback to get flow control status.
712*1256805dSOphir Munk  *
713*1256805dSOphir Munk  * @param dev
714*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
715*1256805dSOphir Munk  * @param[out] fc_conf
716*1256805dSOphir Munk  *   Flow control output buffer.
717*1256805dSOphir Munk  *
718*1256805dSOphir Munk  * @return
719*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
720*1256805dSOphir Munk  */
721*1256805dSOphir Munk int
722*1256805dSOphir Munk mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
723*1256805dSOphir Munk {
724*1256805dSOphir Munk 	struct ifreq ifr;
725*1256805dSOphir Munk 	struct ethtool_pauseparam ethpause = {
726*1256805dSOphir Munk 		.cmd = ETHTOOL_GPAUSEPARAM
727*1256805dSOphir Munk 	};
728*1256805dSOphir Munk 	int ret;
729*1256805dSOphir Munk 
730*1256805dSOphir Munk 	ifr.ifr_data = (void *)&ethpause;
731*1256805dSOphir Munk 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
732*1256805dSOphir Munk 	if (ret) {
733*1256805dSOphir Munk 		DRV_LOG(WARNING,
734*1256805dSOphir Munk 			"port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
735*1256805dSOphir Munk 			" %s",
736*1256805dSOphir Munk 			dev->data->port_id, strerror(rte_errno));
737*1256805dSOphir Munk 		return ret;
738*1256805dSOphir Munk 	}
739*1256805dSOphir Munk 	fc_conf->autoneg = ethpause.autoneg;
740*1256805dSOphir Munk 	if (ethpause.rx_pause && ethpause.tx_pause)
741*1256805dSOphir Munk 		fc_conf->mode = RTE_FC_FULL;
742*1256805dSOphir Munk 	else if (ethpause.rx_pause)
743*1256805dSOphir Munk 		fc_conf->mode = RTE_FC_RX_PAUSE;
744*1256805dSOphir Munk 	else if (ethpause.tx_pause)
745*1256805dSOphir Munk 		fc_conf->mode = RTE_FC_TX_PAUSE;
746*1256805dSOphir Munk 	else
747*1256805dSOphir Munk 		fc_conf->mode = RTE_FC_NONE;
748*1256805dSOphir Munk 	return 0;
749*1256805dSOphir Munk }
750*1256805dSOphir Munk 
751*1256805dSOphir Munk /**
752*1256805dSOphir Munk  * DPDK callback to modify flow control parameters.
753*1256805dSOphir Munk  *
754*1256805dSOphir Munk  * @param dev
755*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
756*1256805dSOphir Munk  * @param[in] fc_conf
757*1256805dSOphir Munk  *   Flow control parameters.
758*1256805dSOphir Munk  *
759*1256805dSOphir Munk  * @return
760*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
761*1256805dSOphir Munk  */
762*1256805dSOphir Munk int
763*1256805dSOphir Munk mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
764*1256805dSOphir Munk {
765*1256805dSOphir Munk 	struct ifreq ifr;
766*1256805dSOphir Munk 	struct ethtool_pauseparam ethpause = {
767*1256805dSOphir Munk 		.cmd = ETHTOOL_SPAUSEPARAM
768*1256805dSOphir Munk 	};
769*1256805dSOphir Munk 	int ret;
770*1256805dSOphir Munk 
771*1256805dSOphir Munk 	ifr.ifr_data = (void *)&ethpause;
772*1256805dSOphir Munk 	ethpause.autoneg = fc_conf->autoneg;
773*1256805dSOphir Munk 	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
774*1256805dSOphir Munk 	    (fc_conf->mode & RTE_FC_RX_PAUSE))
775*1256805dSOphir Munk 		ethpause.rx_pause = 1;
776*1256805dSOphir Munk 	else
777*1256805dSOphir Munk 		ethpause.rx_pause = 0;
778*1256805dSOphir Munk 
779*1256805dSOphir Munk 	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
780*1256805dSOphir Munk 	    (fc_conf->mode & RTE_FC_TX_PAUSE))
781*1256805dSOphir Munk 		ethpause.tx_pause = 1;
782*1256805dSOphir Munk 	else
783*1256805dSOphir Munk 		ethpause.tx_pause = 0;
784*1256805dSOphir Munk 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
785*1256805dSOphir Munk 	if (ret) {
786*1256805dSOphir Munk 		DRV_LOG(WARNING,
787*1256805dSOphir Munk 			"port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
788*1256805dSOphir Munk 			" failed: %s",
789*1256805dSOphir Munk 			dev->data->port_id, strerror(rte_errno));
790*1256805dSOphir Munk 		return ret;
791*1256805dSOphir Munk 	}
792*1256805dSOphir Munk 	return 0;
793*1256805dSOphir Munk }
794*1256805dSOphir Munk 
795*1256805dSOphir Munk /**
796*1256805dSOphir Munk  * Handle asynchronous removal event for entire multiport device.
797*1256805dSOphir Munk  *
798*1256805dSOphir Munk  * @param sh
799*1256805dSOphir Munk  *   Infiniband device shared context.
800*1256805dSOphir Munk  */
801*1256805dSOphir Munk static void
802*1256805dSOphir Munk mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh)
803*1256805dSOphir Munk {
804*1256805dSOphir Munk 	uint32_t i;
805*1256805dSOphir Munk 
806*1256805dSOphir Munk 	for (i = 0; i < sh->max_port; ++i) {
807*1256805dSOphir Munk 		struct rte_eth_dev *dev;
808*1256805dSOphir Munk 
809*1256805dSOphir Munk 		if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) {
810*1256805dSOphir Munk 			/*
811*1256805dSOphir Munk 			 * Or not existing port either no
812*1256805dSOphir Munk 			 * handler installed for this port.
813*1256805dSOphir Munk 			 */
814*1256805dSOphir Munk 			continue;
815*1256805dSOphir Munk 		}
816*1256805dSOphir Munk 		dev = &rte_eth_devices[sh->port[i].ih_port_id];
817*1256805dSOphir Munk 		MLX5_ASSERT(dev);
818*1256805dSOphir Munk 		if (dev->data->dev_conf.intr_conf.rmv)
819*1256805dSOphir Munk 			_rte_eth_dev_callback_process
820*1256805dSOphir Munk 				(dev, RTE_ETH_EVENT_INTR_RMV, NULL);
821*1256805dSOphir Munk 	}
822*1256805dSOphir Munk }
823*1256805dSOphir Munk 
824*1256805dSOphir Munk /**
825*1256805dSOphir Munk  * Handle shared asynchronous events the NIC (removal event
826*1256805dSOphir Munk  * and link status change). Supports multiport IB device.
827*1256805dSOphir Munk  *
828*1256805dSOphir Munk  * @param cb_arg
829*1256805dSOphir Munk  *   Callback argument.
830*1256805dSOphir Munk  */
831*1256805dSOphir Munk void
832*1256805dSOphir Munk mlx5_dev_interrupt_handler(void *cb_arg)
833*1256805dSOphir Munk {
834*1256805dSOphir Munk 	struct mlx5_dev_ctx_shared *sh = cb_arg;
835*1256805dSOphir Munk 	struct ibv_async_event event;
836*1256805dSOphir Munk 
837*1256805dSOphir Munk 	/* Read all message from the IB device and acknowledge them. */
838*1256805dSOphir Munk 	for (;;) {
839*1256805dSOphir Munk 		struct rte_eth_dev *dev;
840*1256805dSOphir Munk 		uint32_t tmp;
841*1256805dSOphir Munk 
842*1256805dSOphir Munk 		if (mlx5_glue->get_async_event(sh->ctx, &event))
843*1256805dSOphir Munk 			break;
844*1256805dSOphir Munk 		/* Retrieve and check IB port index. */
845*1256805dSOphir Munk 		tmp = (uint32_t)event.element.port_num;
846*1256805dSOphir Munk 		if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) {
847*1256805dSOphir Munk 			/*
848*1256805dSOphir Munk 			 * The DEVICE_FATAL event is called once for
849*1256805dSOphir Munk 			 * entire device without port specifying.
850*1256805dSOphir Munk 			 * We should notify all existing ports.
851*1256805dSOphir Munk 			 */
852*1256805dSOphir Munk 			mlx5_glue->ack_async_event(&event);
853*1256805dSOphir Munk 			mlx5_dev_interrupt_device_fatal(sh);
854*1256805dSOphir Munk 			continue;
855*1256805dSOphir Munk 		}
856*1256805dSOphir Munk 		MLX5_ASSERT(tmp && (tmp <= sh->max_port));
857*1256805dSOphir Munk 		if (!tmp) {
858*1256805dSOphir Munk 			/* Unsupported device level event. */
859*1256805dSOphir Munk 			mlx5_glue->ack_async_event(&event);
860*1256805dSOphir Munk 			DRV_LOG(DEBUG,
861*1256805dSOphir Munk 				"unsupported common event (type %d)",
862*1256805dSOphir Munk 				event.event_type);
863*1256805dSOphir Munk 			continue;
864*1256805dSOphir Munk 		}
865*1256805dSOphir Munk 		if (tmp > sh->max_port) {
866*1256805dSOphir Munk 			/* Invalid IB port index. */
867*1256805dSOphir Munk 			mlx5_glue->ack_async_event(&event);
868*1256805dSOphir Munk 			DRV_LOG(DEBUG,
869*1256805dSOphir Munk 				"cannot handle an event (type %d)"
870*1256805dSOphir Munk 				"due to invalid IB port index (%u)",
871*1256805dSOphir Munk 				event.event_type, tmp);
872*1256805dSOphir Munk 			continue;
873*1256805dSOphir Munk 		}
874*1256805dSOphir Munk 		if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) {
875*1256805dSOphir Munk 			/* No handler installed. */
876*1256805dSOphir Munk 			mlx5_glue->ack_async_event(&event);
877*1256805dSOphir Munk 			DRV_LOG(DEBUG,
878*1256805dSOphir Munk 				"cannot handle an event (type %d)"
879*1256805dSOphir Munk 				"due to no handler installed for port %u",
880*1256805dSOphir Munk 				event.event_type, tmp);
881*1256805dSOphir Munk 			continue;
882*1256805dSOphir Munk 		}
883*1256805dSOphir Munk 		/* Retrieve ethernet device descriptor. */
884*1256805dSOphir Munk 		tmp = sh->port[tmp - 1].ih_port_id;
885*1256805dSOphir Munk 		dev = &rte_eth_devices[tmp];
886*1256805dSOphir Munk 		MLX5_ASSERT(dev);
887*1256805dSOphir Munk 		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
888*1256805dSOphir Munk 		     event.event_type == IBV_EVENT_PORT_ERR) &&
889*1256805dSOphir Munk 			dev->data->dev_conf.intr_conf.lsc) {
890*1256805dSOphir Munk 			mlx5_glue->ack_async_event(&event);
891*1256805dSOphir Munk 			if (mlx5_link_update(dev, 0) == -EAGAIN) {
892*1256805dSOphir Munk 				usleep(0);
893*1256805dSOphir Munk 				continue;
894*1256805dSOphir Munk 			}
895*1256805dSOphir Munk 			_rte_eth_dev_callback_process
896*1256805dSOphir Munk 				(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
897*1256805dSOphir Munk 			continue;
898*1256805dSOphir Munk 		}
899*1256805dSOphir Munk 		DRV_LOG(DEBUG,
900*1256805dSOphir Munk 			"port %u cannot handle an unknown event (type %d)",
901*1256805dSOphir Munk 			dev->data->port_id, event.event_type);
902*1256805dSOphir Munk 		mlx5_glue->ack_async_event(&event);
903*1256805dSOphir Munk 	}
904*1256805dSOphir Munk }
905*1256805dSOphir Munk 
906*1256805dSOphir Munk /*
907*1256805dSOphir Munk  * Unregister callback handler safely. The handler may be active
908*1256805dSOphir Munk  * while we are trying to unregister it, in this case code -EAGAIN
909*1256805dSOphir Munk  * is returned by rte_intr_callback_unregister(). This routine checks
910*1256805dSOphir Munk  * the return code and tries to unregister handler again.
911*1256805dSOphir Munk  *
912*1256805dSOphir Munk  * @param handle
913*1256805dSOphir Munk  *   interrupt handle
914*1256805dSOphir Munk  * @param cb_fn
915*1256805dSOphir Munk  *   pointer to callback routine
916*1256805dSOphir Munk  * @cb_arg
917*1256805dSOphir Munk  *   opaque callback parameter
918*1256805dSOphir Munk  */
919*1256805dSOphir Munk void
920*1256805dSOphir Munk mlx5_intr_callback_unregister(const struct rte_intr_handle *handle,
921*1256805dSOphir Munk 			      rte_intr_callback_fn cb_fn, void *cb_arg)
922*1256805dSOphir Munk {
923*1256805dSOphir Munk 	/*
924*1256805dSOphir Munk 	 * Try to reduce timeout management overhead by not calling
925*1256805dSOphir Munk 	 * the timer related routines on the first iteration. If the
926*1256805dSOphir Munk 	 * unregistering succeeds on first call there will be no
927*1256805dSOphir Munk 	 * timer calls at all.
928*1256805dSOphir Munk 	 */
929*1256805dSOphir Munk 	uint64_t twait = 0;
930*1256805dSOphir Munk 	uint64_t start = 0;
931*1256805dSOphir Munk 
932*1256805dSOphir Munk 	do {
933*1256805dSOphir Munk 		int ret;
934*1256805dSOphir Munk 
935*1256805dSOphir Munk 		ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg);
936*1256805dSOphir Munk 		if (ret >= 0)
937*1256805dSOphir Munk 			return;
938*1256805dSOphir Munk 		if (ret != -EAGAIN) {
939*1256805dSOphir Munk 			DRV_LOG(INFO, "failed to unregister interrupt"
940*1256805dSOphir Munk 				      " handler (error: %d)", ret);
941*1256805dSOphir Munk 			MLX5_ASSERT(false);
942*1256805dSOphir Munk 			return;
943*1256805dSOphir Munk 		}
944*1256805dSOphir Munk 		if (twait) {
945*1256805dSOphir Munk 			struct timespec onems;
946*1256805dSOphir Munk 
947*1256805dSOphir Munk 			/* Wait one millisecond and try again. */
948*1256805dSOphir Munk 			onems.tv_sec = 0;
949*1256805dSOphir Munk 			onems.tv_nsec = NS_PER_S / MS_PER_S;
950*1256805dSOphir Munk 			nanosleep(&onems, 0);
951*1256805dSOphir Munk 			/* Check whether one second elapsed. */
952*1256805dSOphir Munk 			if ((rte_get_timer_cycles() - start) <= twait)
953*1256805dSOphir Munk 				continue;
954*1256805dSOphir Munk 		} else {
955*1256805dSOphir Munk 			/*
956*1256805dSOphir Munk 			 * We get the amount of timer ticks for one second.
957*1256805dSOphir Munk 			 * If this amount elapsed it means we spent one
958*1256805dSOphir Munk 			 * second in waiting. This branch is executed once
959*1256805dSOphir Munk 			 * on first iteration.
960*1256805dSOphir Munk 			 */
961*1256805dSOphir Munk 			twait = rte_get_timer_hz();
962*1256805dSOphir Munk 			MLX5_ASSERT(twait);
963*1256805dSOphir Munk 		}
964*1256805dSOphir Munk 		/*
965*1256805dSOphir Munk 		 * Timeout elapsed, show message (once a second) and retry.
966*1256805dSOphir Munk 		 * We have no other acceptable option here, if we ignore
967*1256805dSOphir Munk 		 * the unregistering return code the handler will not
968*1256805dSOphir Munk 		 * be unregistered, fd will be closed and we may get the
969*1256805dSOphir Munk 		 * crush. Hanging and messaging in the loop seems not to be
970*1256805dSOphir Munk 		 * the worst choice.
971*1256805dSOphir Munk 		 */
972*1256805dSOphir Munk 		DRV_LOG(INFO, "Retrying to unregister interrupt handler");
973*1256805dSOphir Munk 		start = rte_get_timer_cycles();
974*1256805dSOphir Munk 	} while (true);
975*1256805dSOphir Munk }
976*1256805dSOphir Munk 
977*1256805dSOphir Munk /**
978*1256805dSOphir Munk  * Handle DEVX interrupts from the NIC.
979*1256805dSOphir Munk  * This function is probably called from the DPDK host thread.
980*1256805dSOphir Munk  *
981*1256805dSOphir Munk  * @param cb_arg
982*1256805dSOphir Munk  *   Callback argument.
983*1256805dSOphir Munk  */
984*1256805dSOphir Munk void
985*1256805dSOphir Munk mlx5_dev_interrupt_handler_devx(void *cb_arg)
986*1256805dSOphir Munk {
987*1256805dSOphir Munk #ifndef HAVE_IBV_DEVX_ASYNC
988*1256805dSOphir Munk 	(void)cb_arg;
989*1256805dSOphir Munk 	return;
990*1256805dSOphir Munk #else
991*1256805dSOphir Munk 	struct mlx5_dev_ctx_shared *sh = cb_arg;
992*1256805dSOphir Munk 	union {
993*1256805dSOphir Munk 		struct mlx5dv_devx_async_cmd_hdr cmd_resp;
994*1256805dSOphir Munk 		uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
995*1256805dSOphir Munk 			    MLX5_ST_SZ_BYTES(traffic_counter) +
996*1256805dSOphir Munk 			    sizeof(struct mlx5dv_devx_async_cmd_hdr)];
997*1256805dSOphir Munk 	} out;
998*1256805dSOphir Munk 	uint8_t *buf = out.buf + sizeof(out.cmd_resp);
999*1256805dSOphir Munk 
1000*1256805dSOphir Munk 	while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp,
1001*1256805dSOphir Munk 						   &out.cmd_resp,
1002*1256805dSOphir Munk 						   sizeof(out.buf)))
1003*1256805dSOphir Munk 		mlx5_flow_async_pool_query_handle
1004*1256805dSOphir Munk 			(sh, (uint64_t)out.cmd_resp.wr_id,
1005*1256805dSOphir Munk 			 mlx5_devx_get_out_command_status(buf));
1006*1256805dSOphir Munk #endif /* HAVE_IBV_DEVX_ASYNC */
1007*1256805dSOphir Munk }
1008*1256805dSOphir Munk 
1009*1256805dSOphir Munk /**
1010*1256805dSOphir Munk  * DPDK callback to bring the link DOWN.
1011*1256805dSOphir Munk  *
1012*1256805dSOphir Munk  * @param dev
1013*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
1014*1256805dSOphir Munk  *
1015*1256805dSOphir Munk  * @return
1016*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
1017*1256805dSOphir Munk  */
1018*1256805dSOphir Munk int
1019*1256805dSOphir Munk mlx5_set_link_down(struct rte_eth_dev *dev)
1020*1256805dSOphir Munk {
1021*1256805dSOphir Munk 	return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP);
1022*1256805dSOphir Munk }
1023*1256805dSOphir Munk 
1024*1256805dSOphir Munk /**
1025*1256805dSOphir Munk  * DPDK callback to bring the link UP.
1026*1256805dSOphir Munk  *
1027*1256805dSOphir Munk  * @param dev
1028*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
1029*1256805dSOphir Munk  *
1030*1256805dSOphir Munk  * @return
1031*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
1032*1256805dSOphir Munk  */
1033*1256805dSOphir Munk int
1034*1256805dSOphir Munk mlx5_set_link_up(struct rte_eth_dev *dev)
1035*1256805dSOphir Munk {
1036*1256805dSOphir Munk 	return mlx5_set_flags(dev, ~IFF_UP, IFF_UP);
1037*1256805dSOphir Munk }
1038*1256805dSOphir Munk 
1039*1256805dSOphir Munk /**
1040*1256805dSOphir Munk  * Check if mlx5 device was removed.
1041*1256805dSOphir Munk  *
1042*1256805dSOphir Munk  * @param dev
1043*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
1044*1256805dSOphir Munk  *
1045*1256805dSOphir Munk  * @return
1046*1256805dSOphir Munk  *   1 when device is removed, otherwise 0.
1047*1256805dSOphir Munk  */
1048*1256805dSOphir Munk int
1049*1256805dSOphir Munk mlx5_is_removed(struct rte_eth_dev *dev)
1050*1256805dSOphir Munk {
1051*1256805dSOphir Munk 	struct ibv_device_attr device_attr;
1052*1256805dSOphir Munk 	struct mlx5_priv *priv = dev->data->dev_private;
1053*1256805dSOphir Munk 
1054*1256805dSOphir Munk 	if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO)
1055*1256805dSOphir Munk 		return 1;
1056*1256805dSOphir Munk 	return 0;
1057*1256805dSOphir Munk }
1058*1256805dSOphir Munk 
1059*1256805dSOphir Munk /**
1060*1256805dSOphir Munk  * Get switch information associated with network interface.
1061*1256805dSOphir Munk  *
1062*1256805dSOphir Munk  * @param ifindex
1063*1256805dSOphir Munk  *   Network interface index.
1064*1256805dSOphir Munk  * @param[out] info
1065*1256805dSOphir Munk  *   Switch information object, populated in case of success.
1066*1256805dSOphir Munk  *
1067*1256805dSOphir Munk  * @return
1068*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
1069*1256805dSOphir Munk  */
1070*1256805dSOphir Munk int
1071*1256805dSOphir Munk mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
1072*1256805dSOphir Munk {
1073*1256805dSOphir Munk 	char ifname[IF_NAMESIZE];
1074*1256805dSOphir Munk 	char port_name[IF_NAMESIZE];
1075*1256805dSOphir Munk 	FILE *file;
1076*1256805dSOphir Munk 	struct mlx5_switch_info data = {
1077*1256805dSOphir Munk 		.master = 0,
1078*1256805dSOphir Munk 		.representor = 0,
1079*1256805dSOphir Munk 		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1080*1256805dSOphir Munk 		.port_name = 0,
1081*1256805dSOphir Munk 		.switch_id = 0,
1082*1256805dSOphir Munk 	};
1083*1256805dSOphir Munk 	DIR *dir;
1084*1256805dSOphir Munk 	bool port_switch_id_set = false;
1085*1256805dSOphir Munk 	bool device_dir = false;
1086*1256805dSOphir Munk 	char c;
1087*1256805dSOphir Munk 	int ret;
1088*1256805dSOphir Munk 
1089*1256805dSOphir Munk 	if (!if_indextoname(ifindex, ifname)) {
1090*1256805dSOphir Munk 		rte_errno = errno;
1091*1256805dSOphir Munk 		return -rte_errno;
1092*1256805dSOphir Munk 	}
1093*1256805dSOphir Munk 
1094*1256805dSOphir Munk 	MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name",
1095*1256805dSOphir Munk 	      ifname);
1096*1256805dSOphir Munk 	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
1097*1256805dSOphir Munk 	      ifname);
1098*1256805dSOphir Munk 	MKSTR(pci_device, "/sys/class/net/%s/device",
1099*1256805dSOphir Munk 	      ifname);
1100*1256805dSOphir Munk 
1101*1256805dSOphir Munk 	file = fopen(phys_port_name, "rb");
1102*1256805dSOphir Munk 	if (file != NULL) {
1103*1256805dSOphir Munk 		ret = fscanf(file, "%s", port_name);
1104*1256805dSOphir Munk 		fclose(file);
1105*1256805dSOphir Munk 		if (ret == 1)
1106*1256805dSOphir Munk 			mlx5_translate_port_name(port_name, &data);
1107*1256805dSOphir Munk 	}
1108*1256805dSOphir Munk 	file = fopen(phys_switch_id, "rb");
1109*1256805dSOphir Munk 	if (file == NULL) {
1110*1256805dSOphir Munk 		rte_errno = errno;
1111*1256805dSOphir Munk 		return -rte_errno;
1112*1256805dSOphir Munk 	}
1113*1256805dSOphir Munk 	port_switch_id_set =
1114*1256805dSOphir Munk 		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
1115*1256805dSOphir Munk 		c == '\n';
1116*1256805dSOphir Munk 	fclose(file);
1117*1256805dSOphir Munk 	dir = opendir(pci_device);
1118*1256805dSOphir Munk 	if (dir != NULL) {
1119*1256805dSOphir Munk 		closedir(dir);
1120*1256805dSOphir Munk 		device_dir = true;
1121*1256805dSOphir Munk 	}
1122*1256805dSOphir Munk 	if (port_switch_id_set) {
1123*1256805dSOphir Munk 		/* We have some E-Switch configuration. */
1124*1256805dSOphir Munk 		mlx5_sysfs_check_switch_info(device_dir, &data);
1125*1256805dSOphir Munk 	}
1126*1256805dSOphir Munk 	*info = data;
1127*1256805dSOphir Munk 	MLX5_ASSERT(!(data.master && data.representor));
1128*1256805dSOphir Munk 	if (data.master && data.representor) {
1129*1256805dSOphir Munk 		DRV_LOG(ERR, "ifindex %u device is recognized as master"
1130*1256805dSOphir Munk 			     " and as representor", ifindex);
1131*1256805dSOphir Munk 		rte_errno = ENODEV;
1132*1256805dSOphir Munk 		return -rte_errno;
1133*1256805dSOphir Munk 	}
1134*1256805dSOphir Munk 	return 0;
1135*1256805dSOphir Munk }
1136*1256805dSOphir Munk 
1137*1256805dSOphir Munk /**
1138*1256805dSOphir Munk  * Analyze gathered port parameters via sysfs to recognize master
1139*1256805dSOphir Munk  * and representor devices for E-Switch configuration.
1140*1256805dSOphir Munk  *
1141*1256805dSOphir Munk  * @param[in] device_dir
1142*1256805dSOphir Munk  *   flag of presence of "device" directory under port device key.
1143*1256805dSOphir Munk  * @param[inout] switch_info
1144*1256805dSOphir Munk  *   Port information, including port name as a number and port name
1145*1256805dSOphir Munk  *   type if recognized
1146*1256805dSOphir Munk  *
1147*1256805dSOphir Munk  * @return
1148*1256805dSOphir Munk  *   master and representor flags are set in switch_info according to
1149*1256805dSOphir Munk  *   recognized parameters (if any).
1150*1256805dSOphir Munk  */
1151*1256805dSOphir Munk void
1152*1256805dSOphir Munk mlx5_sysfs_check_switch_info(bool device_dir,
1153*1256805dSOphir Munk 			     struct mlx5_switch_info *switch_info)
1154*1256805dSOphir Munk {
1155*1256805dSOphir Munk 	switch (switch_info->name_type) {
1156*1256805dSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1157*1256805dSOphir Munk 		/*
1158*1256805dSOphir Munk 		 * Name is not recognized, assume the master,
1159*1256805dSOphir Munk 		 * check the device directory presence.
1160*1256805dSOphir Munk 		 */
1161*1256805dSOphir Munk 		switch_info->master = device_dir;
1162*1256805dSOphir Munk 		break;
1163*1256805dSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1164*1256805dSOphir Munk 		/*
1165*1256805dSOphir Munk 		 * Name is not set, this assumes the legacy naming
1166*1256805dSOphir Munk 		 * schema for master, just check if there is
1167*1256805dSOphir Munk 		 * a device directory.
1168*1256805dSOphir Munk 		 */
1169*1256805dSOphir Munk 		switch_info->master = device_dir;
1170*1256805dSOphir Munk 		break;
1171*1256805dSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1172*1256805dSOphir Munk 		/* New uplink naming schema recognized. */
1173*1256805dSOphir Munk 		switch_info->master = 1;
1174*1256805dSOphir Munk 		break;
1175*1256805dSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1176*1256805dSOphir Munk 		/* Legacy representors naming schema. */
1177*1256805dSOphir Munk 		switch_info->representor = !device_dir;
1178*1256805dSOphir Munk 		break;
1179*1256805dSOphir Munk 	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1180*1256805dSOphir Munk 		/* New representors naming schema. */
1181*1256805dSOphir Munk 		switch_info->representor = 1;
1182*1256805dSOphir Munk 		break;
1183*1256805dSOphir Munk 	}
1184*1256805dSOphir Munk }
1185*1256805dSOphir Munk 
1186*1256805dSOphir Munk /**
1187*1256805dSOphir Munk  * DPDK callback to retrieve plug-in module EEPROM information (type and size).
1188*1256805dSOphir Munk  *
1189*1256805dSOphir Munk  * @param dev
1190*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
1191*1256805dSOphir Munk  * @param[out] modinfo
1192*1256805dSOphir Munk  *   Storage for plug-in module EEPROM information.
1193*1256805dSOphir Munk  *
1194*1256805dSOphir Munk  * @return
1195*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
1196*1256805dSOphir Munk  */
1197*1256805dSOphir Munk int
1198*1256805dSOphir Munk mlx5_get_module_info(struct rte_eth_dev *dev,
1199*1256805dSOphir Munk 		     struct rte_eth_dev_module_info *modinfo)
1200*1256805dSOphir Munk {
1201*1256805dSOphir Munk 	struct ethtool_modinfo info = {
1202*1256805dSOphir Munk 		.cmd = ETHTOOL_GMODULEINFO,
1203*1256805dSOphir Munk 	};
1204*1256805dSOphir Munk 	struct ifreq ifr = (struct ifreq) {
1205*1256805dSOphir Munk 		.ifr_data = (void *)&info,
1206*1256805dSOphir Munk 	};
1207*1256805dSOphir Munk 	int ret = 0;
1208*1256805dSOphir Munk 
1209*1256805dSOphir Munk 	if (!dev || !modinfo) {
1210*1256805dSOphir Munk 		DRV_LOG(WARNING, "missing argument, cannot get module info");
1211*1256805dSOphir Munk 		rte_errno = EINVAL;
1212*1256805dSOphir Munk 		return -rte_errno;
1213*1256805dSOphir Munk 	}
1214*1256805dSOphir Munk 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
1215*1256805dSOphir Munk 	if (ret) {
1216*1256805dSOphir Munk 		DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s",
1217*1256805dSOphir Munk 			dev->data->port_id, strerror(rte_errno));
1218*1256805dSOphir Munk 		return ret;
1219*1256805dSOphir Munk 	}
1220*1256805dSOphir Munk 	modinfo->type = info.type;
1221*1256805dSOphir Munk 	modinfo->eeprom_len = info.eeprom_len;
1222*1256805dSOphir Munk 	return ret;
1223*1256805dSOphir Munk }
1224*1256805dSOphir Munk 
1225*1256805dSOphir Munk /**
1226*1256805dSOphir Munk  * DPDK callback to retrieve plug-in module EEPROM data.
1227*1256805dSOphir Munk  *
1228*1256805dSOphir Munk  * @param dev
1229*1256805dSOphir Munk  *   Pointer to Ethernet device structure.
1230*1256805dSOphir Munk  * @param[out] info
1231*1256805dSOphir Munk  *   Storage for plug-in module EEPROM data.
1232*1256805dSOphir Munk  *
1233*1256805dSOphir Munk  * @return
1234*1256805dSOphir Munk  *   0 on success, a negative errno value otherwise and rte_errno is set.
1235*1256805dSOphir Munk  */
1236*1256805dSOphir Munk int mlx5_get_module_eeprom(struct rte_eth_dev *dev,
1237*1256805dSOphir Munk 			   struct rte_dev_eeprom_info *info)
1238*1256805dSOphir Munk {
1239*1256805dSOphir Munk 	struct ethtool_eeprom *eeprom;
1240*1256805dSOphir Munk 	struct ifreq ifr;
1241*1256805dSOphir Munk 	int ret = 0;
1242*1256805dSOphir Munk 
1243*1256805dSOphir Munk 	if (!dev || !info) {
1244*1256805dSOphir Munk 		DRV_LOG(WARNING, "missing argument, cannot get module eeprom");
1245*1256805dSOphir Munk 		rte_errno = EINVAL;
1246*1256805dSOphir Munk 		return -rte_errno;
1247*1256805dSOphir Munk 	}
1248*1256805dSOphir Munk 	eeprom = rte_calloc(__func__, 1,
1249*1256805dSOphir Munk 			    (sizeof(struct ethtool_eeprom) + info->length), 0);
1250*1256805dSOphir Munk 	if (!eeprom) {
1251*1256805dSOphir Munk 		DRV_LOG(WARNING, "port %u cannot allocate memory for "
1252*1256805dSOphir Munk 			"eeprom data", dev->data->port_id);
1253*1256805dSOphir Munk 		rte_errno = ENOMEM;
1254*1256805dSOphir Munk 		return -rte_errno;
1255*1256805dSOphir Munk 	}
1256*1256805dSOphir Munk 	eeprom->cmd = ETHTOOL_GMODULEEEPROM;
1257*1256805dSOphir Munk 	eeprom->offset = info->offset;
1258*1256805dSOphir Munk 	eeprom->len = info->length;
1259*1256805dSOphir Munk 	ifr = (struct ifreq) {
1260*1256805dSOphir Munk 		.ifr_data = (void *)eeprom,
1261*1256805dSOphir Munk 	};
1262*1256805dSOphir Munk 	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
1263*1256805dSOphir Munk 	if (ret)
1264*1256805dSOphir Munk 		DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s",
1265*1256805dSOphir Munk 			dev->data->port_id, strerror(rte_errno));
1266*1256805dSOphir Munk 	else
1267*1256805dSOphir Munk 		rte_memcpy(info->data, eeprom->data, info->length);
1268*1256805dSOphir Munk 	rte_free(eeprom);
1269*1256805dSOphir Munk 	return ret;
1270*1256805dSOphir Munk }
1271