xref: /dpdk/drivers/net/tap/tap_netlink.c (revision b4241019d426114fe7adb4da892053a1dbf51261)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5 
6 #include <errno.h>
7 #include <inttypes.h>
8 #include <linux/netlink.h>
9 #include <string.h>
10 #include <sys/socket.h>
11 #include <unistd.h>
12 #include <stdbool.h>
13 
14 #include <rte_malloc.h>
15 #include <tap_netlink.h>
16 #include <rte_random.h>
17 
18 #include "tap_log.h"
19 
20 /* Compatibility with glibc < 2.24 */
21 #ifndef SOL_NETLINK
22 #define SOL_NETLINK     270
23 #endif
24 
25 /* Must be quite large to support dumping a huge list of QDISC or filters. */
26 #define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
27 #define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
28 #define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */
29 
30 struct nested_tail {
31 	struct rtattr *tail;
32 	struct nested_tail *prev;
33 };
34 
35 /**
36  * Initialize a netlink socket for communicating with the kernel.
37  *
38  * @param nl_groups
39  *   Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for
40  *   specific netlink multicast groups. Otherwise, no subscription will be made.
41  *
42  * @return
43  *   netlink socket file descriptor on success, -1 otherwise.
44  */
45 int
46 tap_nl_init(uint32_t nl_groups)
47 {
48 	int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE;
49 	struct sockaddr_nl local = {
50 		.nl_family = AF_NETLINK,
51 		.nl_groups = nl_groups,
52 	};
53 #ifdef NETLINK_EXT_ACK
54 	int one = 1;
55 #endif
56 
57 	fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
58 	if (fd < 0) {
59 		TAP_LOG(ERR, "Unable to create a netlink socket");
60 		return -1;
61 	}
62 	if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
63 		TAP_LOG(ERR, "Unable to set socket buffer send size");
64 		close(fd);
65 		return -1;
66 	}
67 	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
68 		TAP_LOG(ERR, "Unable to set socket buffer receive size");
69 		close(fd);
70 		return -1;
71 	}
72 
73 #ifdef NETLINK_EXT_ACK
74 	/* Ask for extended ACK response. on older kernel will ignore request. */
75 	if (setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)) < 0)
76 		TAP_LOG(NOTICE, "Unable to request netlink error information");
77 #endif
78 
79 	if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
80 		TAP_LOG(ERR, "Unable to bind to the netlink socket");
81 		close(fd);
82 		return -1;
83 	}
84 	return fd;
85 }
86 
87 /**
88  * Clean up a netlink socket once all communicating with the kernel is finished.
89  *
90  * @param[in] nlsk_fd
91  *   The netlink socket file descriptor used for communication.
92  *
93  * @return
94  *   0 on success, -1 otherwise.
95  */
96 int
97 tap_nl_final(int nlsk_fd)
98 {
99 	if (close(nlsk_fd)) {
100 		TAP_LOG(ERR, "Failed to close netlink socket: %s (%d)",
101 			strerror(errno), errno);
102 		return -1;
103 	}
104 	return 0;
105 }
106 
107 /**
108  * Send a message to the kernel on the netlink socket.
109  *
110  * @param[in] nlsk_fd
111  *   The netlink socket file descriptor used for communication.
112  * @param[in] nh
113  *   The netlink message send to the kernel.
114  *
115  * @return
116  *   the number of sent bytes on success, -1 otherwise.
117  */
118 int
119 tap_nl_send(int nlsk_fd, struct nlmsghdr *nh)
120 {
121 	int send_bytes;
122 
123 	nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
124 	nh->nlmsg_seq = (uint32_t)rte_rand();
125 
126 retry:
127 	send_bytes = send(nlsk_fd, nh, nh->nlmsg_len, 0);
128 	if (send_bytes < 0) {
129 		if (errno == EINTR)
130 			goto retry;
131 
132 		TAP_LOG(ERR, "Failed to send netlink message: %s (%d)",
133 			strerror(errno), errno);
134 		return -1;
135 	}
136 	return send_bytes;
137 }
138 
139 #ifdef NETLINK_EXT_ACK
140 static const struct nlattr *
141 tap_nl_attr_first(const struct nlmsghdr *nh, size_t offset)
142 {
143 	return (const struct nlattr *)((const char *)nh + NLMSG_SPACE(offset));
144 }
145 
146 static const struct nlattr *
147 tap_nl_attr_next(const struct nlattr *attr)
148 {
149 	return (const struct nlattr *)((const char *)attr
150 				       + NLMSG_ALIGN(attr->nla_len));
151 }
152 
153 static bool
154 tap_nl_attr_ok(const struct nlattr *attr, int len)
155 {
156 	if (len < (int)sizeof(struct nlattr))
157 		return false; /* missing header */
158 	if (attr->nla_len < sizeof(struct nlattr))
159 		return false; /* attribute length should include itself */
160 	if ((int)attr->nla_len  > len)
161 		return false; /* attribute is truncated */
162 	return true;
163 }
164 
165 
166 /* Decode extended errors from kernel */
167 static void
168 tap_nl_dump_ext_ack(const struct nlmsghdr *nh, const struct nlmsgerr *err)
169 {
170 	const struct nlattr *attr;
171 	const char *tail = (const char *)nh + NLMSG_ALIGN(nh->nlmsg_len);
172 	size_t hlen = sizeof(*err);
173 
174 	/* no TLVs, no extended response */
175 	if (!(nh->nlmsg_flags & NLM_F_ACK_TLVS))
176 		return;
177 
178 	if (!(nh->nlmsg_flags & NLM_F_CAPPED))
179 		hlen += err->msg.nlmsg_len - NLMSG_HDRLEN;
180 
181 	for (attr = tap_nl_attr_first(nh, hlen);
182 	     tap_nl_attr_ok(attr, tail - (const char *)attr);
183 	     attr = tap_nl_attr_next(attr)) {
184 		uint16_t type = attr->nla_type & NLA_TYPE_MASK;
185 
186 		if (type == NLMSGERR_ATTR_MSG) {
187 			const char *msg = (const char *)attr
188 				+ NLMSG_ALIGN(sizeof(*attr));
189 
190 			if (err->error)
191 				TAP_LOG(ERR, "%s", msg);
192 			else
193 
194 				TAP_LOG(WARNING, "%s", msg);
195 			break;
196 		}
197 	}
198 }
199 #else
200 /*
201  * External ACK support was added in Linux kernel 4.17
202  * on older kernels, just ignore that part of message
203  */
204 #define tap_nl_dump_ext_ack(nh, err) do { } while (0)
205 #endif
206 
207 /**
208  * Check that the kernel sends an appropriate ACK in response
209  * to an tap_nl_send().
210  *
211  * @param[in] nlsk_fd
212  *   The netlink socket file descriptor used for communication.
213  *
214  * @return
215  *   0 on success, -1 otherwise with errno set.
216  */
217 int
218 tap_nl_recv_ack(int nlsk_fd)
219 {
220 	return tap_nl_recv(nlsk_fd, NULL, NULL);
221 }
222 
223 /**
224  * Receive a message from the kernel on the netlink socket, following an
225  * tap_nl_send().
226  *
227  * @param[in] nlsk_fd
228  *   The netlink socket file descriptor used for communication.
229  * @param[in] cb
230  *   The callback function to call for each netlink message received.
231  * @param[in, out] arg
232  *   Custom arguments for the callback.
233  *
234  * @return
235  *   0 on success, -1 otherwise with errno set.
236  */
237 int
238 tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
239 {
240 	char buf[BUF_SIZE];
241 	int multipart = 0;
242 	int ret = 0;
243 
244 	do {
245 		struct nlmsghdr *nh;
246 		int recv_bytes;
247 
248 retry:
249 		recv_bytes = recv(nlsk_fd, buf, sizeof(buf), 0);
250 		if (recv_bytes < 0) {
251 			if (errno == EINTR)
252 				goto retry;
253 			return -1;
254 		}
255 
256 		for (nh = (struct nlmsghdr *)buf;
257 		     NLMSG_OK(nh, (unsigned int)recv_bytes);
258 		     nh = NLMSG_NEXT(nh, recv_bytes)) {
259 			if (nh->nlmsg_type == NLMSG_ERROR) {
260 				struct nlmsgerr *err_data = NLMSG_DATA(nh);
261 
262 				tap_nl_dump_ext_ack(nh, err_data);
263 				if (err_data->error < 0) {
264 					errno = -err_data->error;
265 					return -1;
266 				}
267 				/* Ack message. */
268 				return 0;
269 			}
270 			/* Multi-part msgs and their trailing DONE message. */
271 			if (nh->nlmsg_flags & NLM_F_MULTI) {
272 				if (nh->nlmsg_type == NLMSG_DONE)
273 					return 0;
274 				multipart = 1;
275 			}
276 			if (cb)
277 				ret = cb(nh, arg);
278 		}
279 	} while (multipart);
280 	return ret;
281 }
282 
283 /**
284  * Append a netlink attribute to a message.
285  *
286  * @param[in, out] nh
287  *   The netlink message to parse, received from the kernel.
288  * @param[in] type
289  *   The type of attribute to append.
290  * @param[in] data_len
291  *   The length of the data to append.
292  * @param[in] data
293  *   The data to append.
294  */
295 void
296 tap_nlattr_add(struct nlmsghdr *nh, unsigned short type,
297 	   unsigned int data_len, const void *data)
298 {
299 	/* see man 3 rtnetlink */
300 	struct rtattr *rta;
301 
302 	rta = (struct rtattr *)NLMSG_TAIL(nh);
303 	rta->rta_len = RTA_LENGTH(data_len);
304 	rta->rta_type = type;
305 	if (data_len > 0)
306 		memcpy(RTA_DATA(rta), data, data_len);
307 	nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
308 }
309 
310 /**
311  * Append a uint8_t netlink attribute to a message.
312  *
313  * @param[in, out] nh
314  *   The netlink message to parse, received from the kernel.
315  * @param[in] type
316  *   The type of attribute to append.
317  * @param[in] data
318  *   The data to append.
319  */
320 void
321 tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
322 {
323 	tap_nlattr_add(nh, type, sizeof(uint8_t), &data);
324 }
325 
326 /**
327  * Append a uint16_t netlink attribute to a message.
328  *
329  * @param[in, out] nh
330  *   The netlink message to parse, received from the kernel.
331  * @param[in] type
332  *   The type of attribute to append.
333  * @param[in] data
334  *   The data to append.
335  */
336 void
337 tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
338 {
339 	tap_nlattr_add(nh, type, sizeof(uint16_t), &data);
340 }
341 
342 /**
343  * Append a uint16_t netlink attribute to a message.
344  *
345  * @param[in, out] nh
346  *   The netlink message to parse, received from the kernel.
347  * @param[in] type
348  *   The type of attribute to append.
349  * @param[in] data
350  *   The data to append.
351  */
352 void
353 tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
354 {
355 	tap_nlattr_add(nh, type, sizeof(uint32_t), &data);
356 }
357 
358 /**
359  * Start a nested netlink attribute.
360  * It must be followed later by a call to tap_nlattr_nested_finish().
361  *
362  * @param[in, out] msg
363  *   The netlink message where to edit the nested_tails metadata.
364  * @param[in] type
365  *   The nested attribute type to append.
366  *
367  * @return
368  *   -1 if adding a nested netlink attribute failed, 0 otherwise.
369  */
370 int
371 tap_nlattr_nested_start(struct tap_nlmsg *msg, uint16_t type)
372 {
373 	struct nested_tail *tail;
374 
375 	tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0);
376 	if (!tail) {
377 		TAP_LOG(ERR,
378 			"Couldn't allocate memory for nested netlink attribute");
379 		return -1;
380 	}
381 
382 	tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh);
383 
384 	tap_nlattr_add(&msg->nh, type, 0, NULL);
385 
386 	tail->prev = msg->nested_tails;
387 
388 	msg->nested_tails = tail;
389 
390 	return 0;
391 }
392 
393 /**
394  * End a nested netlink attribute.
395  * It follows a call to tap_nlattr_nested_start().
396  * In effect, it will modify the nested attribute length to include every bytes
397  * from the nested attribute start, up to here.
398  *
399  * @param[in, out] msg
400  *   The netlink message where to edit the nested_tails metadata.
401  */
402 void
403 tap_nlattr_nested_finish(struct tap_nlmsg *msg)
404 {
405 	struct nested_tail *tail = msg->nested_tails;
406 
407 	tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail;
408 
409 	if (tail->prev)
410 		msg->nested_tails = tail->prev;
411 
412 	rte_free(tail);
413 }
414