1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <inttypes.h> 8 #include <linux/netlink.h> 9 #include <string.h> 10 #include <sys/socket.h> 11 #include <unistd.h> 12 #include <stdbool.h> 13 14 #include <rte_malloc.h> 15 #include <tap_netlink.h> 16 #include <rte_random.h> 17 18 #include "tap_log.h" 19 20 /* Compatibility with glibc < 2.24 */ 21 #ifndef SOL_NETLINK 22 #define SOL_NETLINK 270 23 #endif 24 25 /* Must be quite large to support dumping a huge list of QDISC or filters. */ 26 #define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */ 27 #define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */ 28 #define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */ 29 30 struct nested_tail { 31 struct rtattr *tail; 32 struct nested_tail *prev; 33 }; 34 35 /** 36 * Initialize a netlink socket for communicating with the kernel. 37 * 38 * @param nl_groups 39 * Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for 40 * specific netlink multicast groups. Otherwise, no subscription will be made. 41 * 42 * @return 43 * netlink socket file descriptor on success, -1 otherwise. 44 */ 45 int 46 tap_nl_init(uint32_t nl_groups) 47 { 48 int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE; 49 struct sockaddr_nl local = { 50 .nl_family = AF_NETLINK, 51 .nl_groups = nl_groups, 52 }; 53 #ifdef NETLINK_EXT_ACK 54 int one = 1; 55 #endif 56 57 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); 58 if (fd < 0) { 59 TAP_LOG(ERR, "Unable to create a netlink socket"); 60 return -1; 61 } 62 if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) { 63 TAP_LOG(ERR, "Unable to set socket buffer send size"); 64 close(fd); 65 return -1; 66 } 67 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) { 68 TAP_LOG(ERR, "Unable to set socket buffer receive size"); 69 close(fd); 70 return -1; 71 } 72 73 #ifdef NETLINK_EXT_ACK 74 /* Ask for extended ACK response. on older kernel will ignore request. */ 75 setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)); 76 #endif 77 78 if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { 79 TAP_LOG(ERR, "Unable to bind to the netlink socket"); 80 close(fd); 81 return -1; 82 } 83 return fd; 84 } 85 86 /** 87 * Clean up a netlink socket once all communicating with the kernel is finished. 88 * 89 * @param[in] nlsk_fd 90 * The netlink socket file descriptor used for communication. 91 * 92 * @return 93 * 0 on success, -1 otherwise. 94 */ 95 int 96 tap_nl_final(int nlsk_fd) 97 { 98 if (close(nlsk_fd)) { 99 TAP_LOG(ERR, "Failed to close netlink socket: %s (%d)", 100 strerror(errno), errno); 101 return -1; 102 } 103 return 0; 104 } 105 106 /** 107 * Send a message to the kernel on the netlink socket. 108 * 109 * @param[in] nlsk_fd 110 * The netlink socket file descriptor used for communication. 111 * @param[in] nh 112 * The netlink message send to the kernel. 113 * 114 * @return 115 * the number of sent bytes on success, -1 otherwise. 116 */ 117 int 118 tap_nl_send(int nlsk_fd, struct nlmsghdr *nh) 119 { 120 int send_bytes; 121 122 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ 123 nh->nlmsg_seq = (uint32_t)rte_rand(); 124 125 retry: 126 send_bytes = send(nlsk_fd, nh, nh->nlmsg_len, 0); 127 if (send_bytes < 0) { 128 if (errno == EINTR) 129 goto retry; 130 131 TAP_LOG(ERR, "Failed to send netlink message: %s (%d)", 132 strerror(errno), errno); 133 return -1; 134 } 135 return send_bytes; 136 } 137 138 #ifdef NETLINK_EXT_ACK 139 static const struct nlattr * 140 tap_nl_attr_first(const struct nlmsghdr *nh, size_t offset) 141 { 142 return (const struct nlattr *)((const char *)nh + NLMSG_SPACE(offset)); 143 } 144 145 static const struct nlattr * 146 tap_nl_attr_next(const struct nlattr *attr) 147 { 148 return (const struct nlattr *)((const char *)attr 149 + NLMSG_ALIGN(attr->nla_len)); 150 } 151 152 static bool 153 tap_nl_attr_ok(const struct nlattr *attr, int len) 154 { 155 if (len < (int)sizeof(struct nlattr)) 156 return false; /* missing header */ 157 if (attr->nla_len < sizeof(struct nlattr)) 158 return false; /* attribute length should include itself */ 159 if ((int)attr->nla_len > len) 160 return false; /* attribute is truncated */ 161 return true; 162 } 163 164 165 /* Decode extended errors from kernel */ 166 static void 167 tap_nl_dump_ext_ack(const struct nlmsghdr *nh, const struct nlmsgerr *err) 168 { 169 const struct nlattr *attr; 170 const char *tail = (const char *)nh + NLMSG_ALIGN(nh->nlmsg_len); 171 size_t hlen = sizeof(*err); 172 173 /* no TLVs, no extended response */ 174 if (!(nh->nlmsg_flags & NLM_F_ACK_TLVS)) 175 return; 176 177 if (!(nh->nlmsg_flags & NLM_F_CAPPED)) 178 hlen += err->msg.nlmsg_len - NLMSG_HDRLEN; 179 180 for (attr = tap_nl_attr_first(nh, hlen); 181 tap_nl_attr_ok(attr, tail - (const char *)attr); 182 attr = tap_nl_attr_next(attr)) { 183 uint16_t type = attr->nla_type & NLA_TYPE_MASK; 184 185 if (type == NLMSGERR_ATTR_MSG) { 186 const char *msg = (const char *)attr 187 + NLMSG_ALIGN(sizeof(*attr)); 188 189 if (err->error) 190 TAP_LOG(ERR, "%s", msg); 191 else 192 193 TAP_LOG(WARNING, "%s", msg); 194 break; 195 } 196 } 197 } 198 #else 199 /* 200 * External ACK support was added in Linux kernel 4.17 201 * on older kernels, just ignore that part of message 202 */ 203 #define tap_nl_dump_ext_ack(nh, err) do { } while (0) 204 #endif 205 206 /** 207 * Check that the kernel sends an appropriate ACK in response 208 * to an tap_nl_send(). 209 * 210 * @param[in] nlsk_fd 211 * The netlink socket file descriptor used for communication. 212 * 213 * @return 214 * 0 on success, -1 otherwise with errno set. 215 */ 216 int 217 tap_nl_recv_ack(int nlsk_fd) 218 { 219 return tap_nl_recv(nlsk_fd, NULL, NULL); 220 } 221 222 /** 223 * Receive a message from the kernel on the netlink socket, following an 224 * tap_nl_send(). 225 * 226 * @param[in] nlsk_fd 227 * The netlink socket file descriptor used for communication. 228 * @param[in] cb 229 * The callback function to call for each netlink message received. 230 * @param[in, out] arg 231 * Custom arguments for the callback. 232 * 233 * @return 234 * 0 on success, -1 otherwise with errno set. 235 */ 236 int 237 tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg) 238 { 239 char buf[BUF_SIZE]; 240 int multipart = 0; 241 int ret = 0; 242 243 do { 244 struct nlmsghdr *nh; 245 int recv_bytes; 246 247 retry: 248 recv_bytes = recv(nlsk_fd, buf, sizeof(buf), 0); 249 if (recv_bytes < 0) { 250 if (errno == EINTR) 251 goto retry; 252 return -1; 253 } 254 255 for (nh = (struct nlmsghdr *)buf; 256 NLMSG_OK(nh, (unsigned int)recv_bytes); 257 nh = NLMSG_NEXT(nh, recv_bytes)) { 258 if (nh->nlmsg_type == NLMSG_ERROR) { 259 struct nlmsgerr *err_data = NLMSG_DATA(nh); 260 261 tap_nl_dump_ext_ack(nh, err_data); 262 if (err_data->error < 0) { 263 errno = -err_data->error; 264 return -1; 265 } 266 /* Ack message. */ 267 return 0; 268 } 269 /* Multi-part msgs and their trailing DONE message. */ 270 if (nh->nlmsg_flags & NLM_F_MULTI) { 271 if (nh->nlmsg_type == NLMSG_DONE) 272 return 0; 273 multipart = 1; 274 } 275 if (cb) 276 ret = cb(nh, arg); 277 } 278 } while (multipart); 279 return ret; 280 } 281 282 /** 283 * Append a netlink attribute to a message. 284 * 285 * @param[in, out] nh 286 * The netlink message to parse, received from the kernel. 287 * @param[in] type 288 * The type of attribute to append. 289 * @param[in] data_len 290 * The length of the data to append. 291 * @param[in] data 292 * The data to append. 293 */ 294 void 295 tap_nlattr_add(struct nlmsghdr *nh, unsigned short type, 296 unsigned int data_len, const void *data) 297 { 298 /* see man 3 rtnetlink */ 299 struct rtattr *rta; 300 301 rta = (struct rtattr *)NLMSG_TAIL(nh); 302 rta->rta_len = RTA_LENGTH(data_len); 303 rta->rta_type = type; 304 memcpy(RTA_DATA(rta), data, data_len); 305 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len); 306 } 307 308 /** 309 * Append a uint8_t netlink attribute to a message. 310 * 311 * @param[in, out] nh 312 * The netlink message to parse, received from the kernel. 313 * @param[in] type 314 * The type of attribute to append. 315 * @param[in] data 316 * The data to append. 317 */ 318 void 319 tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data) 320 { 321 tap_nlattr_add(nh, type, sizeof(uint8_t), &data); 322 } 323 324 /** 325 * Append a uint16_t netlink attribute to a message. 326 * 327 * @param[in, out] nh 328 * The netlink message to parse, received from the kernel. 329 * @param[in] type 330 * The type of attribute to append. 331 * @param[in] data 332 * The data to append. 333 */ 334 void 335 tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data) 336 { 337 tap_nlattr_add(nh, type, sizeof(uint16_t), &data); 338 } 339 340 /** 341 * Append a uint16_t netlink attribute to a message. 342 * 343 * @param[in, out] nh 344 * The netlink message to parse, received from the kernel. 345 * @param[in] type 346 * The type of attribute to append. 347 * @param[in] data 348 * The data to append. 349 */ 350 void 351 tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data) 352 { 353 tap_nlattr_add(nh, type, sizeof(uint32_t), &data); 354 } 355 356 /** 357 * Start a nested netlink attribute. 358 * It must be followed later by a call to tap_nlattr_nested_finish(). 359 * 360 * @param[in, out] msg 361 * The netlink message where to edit the nested_tails metadata. 362 * @param[in] type 363 * The nested attribute type to append. 364 * 365 * @return 366 * -1 if adding a nested netlink attribute failed, 0 otherwise. 367 */ 368 int 369 tap_nlattr_nested_start(struct nlmsg *msg, uint16_t type) 370 { 371 struct nested_tail *tail; 372 373 tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0); 374 if (!tail) { 375 TAP_LOG(ERR, 376 "Couldn't allocate memory for nested netlink attribute"); 377 return -1; 378 } 379 380 tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh); 381 382 tap_nlattr_add(&msg->nh, type, 0, NULL); 383 384 tail->prev = msg->nested_tails; 385 386 msg->nested_tails = tail; 387 388 return 0; 389 } 390 391 /** 392 * End a nested netlink attribute. 393 * It follows a call to tap_nlattr_nested_start(). 394 * In effect, it will modify the nested attribute length to include every bytes 395 * from the nested attribute start, up to here. 396 * 397 * @param[in, out] msg 398 * The netlink message where to edit the nested_tails metadata. 399 */ 400 void 401 tap_nlattr_nested_finish(struct nlmsg *msg) 402 { 403 struct nested_tail *tail = msg->nested_tails; 404 405 tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail; 406 407 if (tail->prev) 408 msg->nested_tails = tail->prev; 409 410 rte_free(tail); 411 } 412