1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <inttypes.h> 8 #include <linux/netlink.h> 9 #include <string.h> 10 #include <sys/socket.h> 11 #include <unistd.h> 12 #include <stdbool.h> 13 14 #include <rte_malloc.h> 15 #include <tap_netlink.h> 16 #include <rte_random.h> 17 18 #include "tap_log.h" 19 20 /* Compatibility with glibc < 2.24 */ 21 #ifndef SOL_NETLINK 22 #define SOL_NETLINK 270 23 #endif 24 25 /* Must be quite large to support dumping a huge list of QDISC or filters. */ 26 #define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */ 27 #define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */ 28 #define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */ 29 30 struct nested_tail { 31 struct rtattr *tail; 32 struct nested_tail *prev; 33 }; 34 35 /** 36 * Initialize a netlink socket for communicating with the kernel. 37 * 38 * @param nl_groups 39 * Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for 40 * specific netlink multicast groups. Otherwise, no subscription will be made. 41 * 42 * @return 43 * netlink socket file descriptor on success, -1 otherwise. 44 */ 45 int 46 tap_nl_init(uint32_t nl_groups) 47 { 48 int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE; 49 struct sockaddr_nl local = { 50 .nl_family = AF_NETLINK, 51 .nl_groups = nl_groups, 52 }; 53 #ifdef NETLINK_EXT_ACK 54 int one = 1; 55 #endif 56 57 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); 58 if (fd < 0) { 59 TAP_LOG(ERR, "Unable to create a netlink socket"); 60 return -1; 61 } 62 if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) { 63 TAP_LOG(ERR, "Unable to set socket buffer send size"); 64 close(fd); 65 return -1; 66 } 67 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) { 68 TAP_LOG(ERR, "Unable to set socket buffer receive size"); 69 close(fd); 70 return -1; 71 } 72 73 #ifdef NETLINK_EXT_ACK 74 /* Ask for extended ACK response. on older kernel will ignore request. */ 75 if (setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)) < 0) 76 TAP_LOG(NOTICE, "Unable to request netlink error information"); 77 #endif 78 79 if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { 80 TAP_LOG(ERR, "Unable to bind to the netlink socket"); 81 close(fd); 82 return -1; 83 } 84 return fd; 85 } 86 87 /** 88 * Clean up a netlink socket once all communicating with the kernel is finished. 89 * 90 * @param[in] nlsk_fd 91 * The netlink socket file descriptor used for communication. 92 * 93 * @return 94 * 0 on success, -1 otherwise. 95 */ 96 int 97 tap_nl_final(int nlsk_fd) 98 { 99 if (close(nlsk_fd)) { 100 TAP_LOG(ERR, "Failed to close netlink socket: %s (%d)", 101 strerror(errno), errno); 102 return -1; 103 } 104 return 0; 105 } 106 107 /** 108 * Send a message to the kernel on the netlink socket. 109 * 110 * @param[in] nlsk_fd 111 * The netlink socket file descriptor used for communication. 112 * @param[in] nh 113 * The netlink message send to the kernel. 114 * 115 * @return 116 * the number of sent bytes on success, -1 otherwise. 117 */ 118 int 119 tap_nl_send(int nlsk_fd, struct nlmsghdr *nh) 120 { 121 int send_bytes; 122 123 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 124 nh->nlmsg_seq = (uint32_t)rte_rand(); 125 126 retry: 127 send_bytes = send(nlsk_fd, nh, nh->nlmsg_len, 0); 128 if (send_bytes < 0) { 129 if (errno == EINTR) 130 goto retry; 131 132 TAP_LOG(ERR, "Failed to send netlink message: %s (%d)", 133 strerror(errno), errno); 134 return -1; 135 } 136 return send_bytes; 137 } 138 139 #ifdef NETLINK_EXT_ACK 140 static const struct nlattr * 141 tap_nl_attr_first(const struct nlmsghdr *nh, size_t offset) 142 { 143 return (const struct nlattr *)((const char *)nh + NLMSG_SPACE(offset)); 144 } 145 146 static const struct nlattr * 147 tap_nl_attr_next(const struct nlattr *attr) 148 { 149 return (const struct nlattr *)((const char *)attr 150 + NLMSG_ALIGN(attr->nla_len)); 151 } 152 153 static bool 154 tap_nl_attr_ok(const struct nlattr *attr, int len) 155 { 156 if (len < (int)sizeof(struct nlattr)) 157 return false; /* missing header */ 158 if (attr->nla_len < sizeof(struct nlattr)) 159 return false; /* attribute length should include itself */ 160 if ((int)attr->nla_len > len) 161 return false; /* attribute is truncated */ 162 return true; 163 } 164 165 166 /* Decode extended errors from kernel */ 167 static void 168 tap_nl_dump_ext_ack(const struct nlmsghdr *nh, const struct nlmsgerr *err) 169 { 170 const struct nlattr *attr; 171 const char *tail = (const char *)nh + NLMSG_ALIGN(nh->nlmsg_len); 172 size_t hlen = sizeof(*err); 173 174 /* no TLVs, no extended response */ 175 if (!(nh->nlmsg_flags & NLM_F_ACK_TLVS)) 176 return; 177 178 if (!(nh->nlmsg_flags & NLM_F_CAPPED)) 179 hlen += err->msg.nlmsg_len - NLMSG_HDRLEN; 180 181 for (attr = tap_nl_attr_first(nh, hlen); 182 tap_nl_attr_ok(attr, tail - (const char *)attr); 183 attr = tap_nl_attr_next(attr)) { 184 uint16_t type = attr->nla_type & NLA_TYPE_MASK; 185 186 if (type == NLMSGERR_ATTR_MSG) { 187 const char *msg = (const char *)attr 188 + NLMSG_ALIGN(sizeof(*attr)); 189 190 if (err->error) 191 TAP_LOG(ERR, "%s", msg); 192 else 193 194 TAP_LOG(WARNING, "%s", msg); 195 break; 196 } 197 } 198 } 199 #else 200 /* 201 * External ACK support was added in Linux kernel 4.17 202 * on older kernels, just ignore that part of message 203 */ 204 #define tap_nl_dump_ext_ack(nh, err) do { } while (0) 205 #endif 206 207 /** 208 * Check that the kernel sends an appropriate ACK in response 209 * to an tap_nl_send(). 210 * 211 * @param[in] nlsk_fd 212 * The netlink socket file descriptor used for communication. 213 * 214 * @return 215 * 0 on success, -1 otherwise with errno set. 216 */ 217 int 218 tap_nl_recv_ack(int nlsk_fd) 219 { 220 return tap_nl_recv(nlsk_fd, NULL, NULL); 221 } 222 223 /** 224 * Receive a message from the kernel on the netlink socket, following an 225 * tap_nl_send(). 226 * 227 * @param[in] nlsk_fd 228 * The netlink socket file descriptor used for communication. 229 * @param[in] cb 230 * The callback function to call for each netlink message received. 231 * @param[in, out] arg 232 * Custom arguments for the callback. 233 * 234 * @return 235 * 0 on success, -1 otherwise with errno set. 236 */ 237 int 238 tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg) 239 { 240 char buf[BUF_SIZE]; 241 int multipart = 0; 242 int ret = 0; 243 244 do { 245 struct nlmsghdr *nh; 246 int recv_bytes; 247 248 retry: 249 recv_bytes = recv(nlsk_fd, buf, sizeof(buf), 0); 250 if (recv_bytes < 0) { 251 if (errno == EINTR) 252 goto retry; 253 return -1; 254 } 255 256 for (nh = (struct nlmsghdr *)buf; 257 NLMSG_OK(nh, (unsigned int)recv_bytes); 258 nh = NLMSG_NEXT(nh, recv_bytes)) { 259 if (nh->nlmsg_type == NLMSG_ERROR) { 260 struct nlmsgerr *err_data = NLMSG_DATA(nh); 261 262 tap_nl_dump_ext_ack(nh, err_data); 263 if (err_data->error < 0) { 264 errno = -err_data->error; 265 return -1; 266 } 267 /* Ack message. */ 268 return 0; 269 } 270 /* Multi-part msgs and their trailing DONE message. */ 271 if (nh->nlmsg_flags & NLM_F_MULTI) { 272 if (nh->nlmsg_type == NLMSG_DONE) 273 return 0; 274 multipart = 1; 275 } 276 if (cb) 277 ret = cb(nh, arg); 278 } 279 } while (multipart); 280 return ret; 281 } 282 283 /** 284 * Append a netlink attribute to a message. 285 * 286 * @param[in, out] nh 287 * The netlink message to parse, received from the kernel. 288 * @param[in] type 289 * The type of attribute to append. 290 * @param[in] data_len 291 * The length of the data to append. 292 * @param[in] data 293 * The data to append. 294 */ 295 void 296 tap_nlattr_add(struct nlmsghdr *nh, unsigned short type, 297 unsigned int data_len, const void *data) 298 { 299 /* see man 3 rtnetlink */ 300 struct rtattr *rta; 301 302 rta = (struct rtattr *)NLMSG_TAIL(nh); 303 rta->rta_len = RTA_LENGTH(data_len); 304 rta->rta_type = type; 305 if (data_len > 0) 306 memcpy(RTA_DATA(rta), data, data_len); 307 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len); 308 } 309 310 /** 311 * Append a uint8_t netlink attribute to a message. 312 * 313 * @param[in, out] nh 314 * The netlink message to parse, received from the kernel. 315 * @param[in] type 316 * The type of attribute to append. 317 * @param[in] data 318 * The data to append. 319 */ 320 void 321 tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data) 322 { 323 tap_nlattr_add(nh, type, sizeof(uint8_t), &data); 324 } 325 326 /** 327 * Append a uint16_t netlink attribute to a message. 328 * 329 * @param[in, out] nh 330 * The netlink message to parse, received from the kernel. 331 * @param[in] type 332 * The type of attribute to append. 333 * @param[in] data 334 * The data to append. 335 */ 336 void 337 tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data) 338 { 339 tap_nlattr_add(nh, type, sizeof(uint16_t), &data); 340 } 341 342 /** 343 * Append a uint16_t netlink attribute to a message. 344 * 345 * @param[in, out] nh 346 * The netlink message to parse, received from the kernel. 347 * @param[in] type 348 * The type of attribute to append. 349 * @param[in] data 350 * The data to append. 351 */ 352 void 353 tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data) 354 { 355 tap_nlattr_add(nh, type, sizeof(uint32_t), &data); 356 } 357 358 /** 359 * Start a nested netlink attribute. 360 * It must be followed later by a call to tap_nlattr_nested_finish(). 361 * 362 * @param[in, out] msg 363 * The netlink message where to edit the nested_tails metadata. 364 * @param[in] type 365 * The nested attribute type to append. 366 * 367 * @return 368 * -1 if adding a nested netlink attribute failed, 0 otherwise. 369 */ 370 int 371 tap_nlattr_nested_start(struct tap_nlmsg *msg, uint16_t type) 372 { 373 struct nested_tail *tail; 374 375 tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0); 376 if (!tail) { 377 TAP_LOG(ERR, 378 "Couldn't allocate memory for nested netlink attribute"); 379 return -1; 380 } 381 382 tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh); 383 384 tap_nlattr_add(&msg->nh, type, 0, NULL); 385 386 tail->prev = msg->nested_tails; 387 388 msg->nested_tails = tail; 389 390 return 0; 391 } 392 393 /** 394 * End a nested netlink attribute. 395 * It follows a call to tap_nlattr_nested_start(). 396 * In effect, it will modify the nested attribute length to include every bytes 397 * from the nested attribute start, up to here. 398 * 399 * @param[in, out] msg 400 * The netlink message where to edit the nested_tails metadata. 401 */ 402 void 403 tap_nlattr_nested_finish(struct tap_nlmsg *msg) 404 { 405 struct nested_tail *tail = msg->nested_tails; 406 407 tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail; 408 409 if (tail->prev) 410 msg->nested_tails = tail->prev; 411 412 rte_free(tail); 413 } 414