1 /* $NetBSD: pcap-linux.c,v 1.8 2024/09/02 15:33:37 christos Exp $ */ 2 3 /* 4 * pcap-linux.c: Packet capture interface to the Linux kernel 5 * 6 * Copyright (c) 2000 Torsten Landschoff <torsten@debian.org> 7 * Sebastian Krahmer <krahmer@cs.uni-potsdam.de> 8 * 9 * License: BSD 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in 19 * the documentation and/or other materials provided with the 20 * distribution. 21 * 3. The names of the authors may not be used to endorse or promote 22 * products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 28 * 29 * Modifications: Added PACKET_MMAP support 30 * Paolo Abeni <paolo.abeni@email.it> 31 * Added TPACKET_V3 support 32 * Gabor Tatarka <gabor.tatarka@ericsson.com> 33 * 34 * based on previous works of: 35 * Simon Patarin <patarin@cs.unibo.it> 36 * Phil Wood <cpw@lanl.gov> 37 * 38 * Monitor-mode support for mac80211 includes code taken from the iw 39 * command; the copyright notice for that code is 40 * 41 * Copyright (c) 2007, 2008 Johannes Berg 42 * Copyright (c) 2007 Andy Lutomirski 43 * Copyright (c) 2007 Mike Kershaw 44 * Copyright (c) 2008 Gábor Stefanik 45 * 46 * All rights reserved. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. The name of the author may not be used to endorse or promote products 57 * derived from this software without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 60 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 61 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 62 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 63 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 64 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 65 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 66 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 67 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 */ 71 72 73 #define _GNU_SOURCE 74 75 #include <sys/cdefs.h> 76 __RCSID("$NetBSD: pcap-linux.c,v 1.8 2024/09/02 15:33:37 christos Exp $"); 77 78 #include <config.h> 79 80 #include <errno.h> 81 #include <stdio.h> 82 #include <stdlib.h> 83 #include <unistd.h> 84 #include <fcntl.h> 85 #include <string.h> 86 #include <limits.h> 87 #include <endian.h> 88 #include <sys/stat.h> 89 #include <sys/socket.h> 90 #include <sys/ioctl.h> 91 #include <sys/utsname.h> 92 #include <sys/mman.h> 93 #include <linux/if.h> 94 #include <linux/if_packet.h> 95 #include <linux/sockios.h> 96 #include <linux/ethtool.h> 97 #include <netinet/in.h> 98 #include <linux/if_ether.h> 99 #include <linux/if_arp.h> 100 #include <poll.h> 101 #include <dirent.h> 102 #include <sys/eventfd.h> 103 104 #include "pcap-int.h" 105 #include "pcap-util.h" 106 #include "pcap/sll.h" 107 #include "pcap/vlan.h" 108 #include "pcap/can_socketcan.h" 109 110 #include "diag-control.h" 111 112 /* 113 * We require TPACKET_V2 support. 114 */ 115 #ifndef TPACKET2_HDRLEN 116 #error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel" 117 #endif 118 119 /* check for memory mapped access availability. We assume every needed 120 * struct is defined if the macro TPACKET_HDRLEN is defined, because it 121 * uses many ring related structs and macros */ 122 #ifdef TPACKET3_HDRLEN 123 # define HAVE_TPACKET3 124 #endif /* TPACKET3_HDRLEN */ 125 126 /* 127 * Not all compilers that are used to compile code to run on Linux have 128 * these builtins. For example, older versions of GCC don't, and at 129 * least some people are doing cross-builds for MIPS with older versions 130 * of GCC. 131 */ 132 #ifndef HAVE___ATOMIC_LOAD_N 133 #define __atomic_load_n(ptr, memory_model) (*(ptr)) 134 #endif 135 #ifndef HAVE___ATOMIC_STORE_N 136 #define __atomic_store_n(ptr, val, memory_model) *(ptr) = (val) 137 #endif 138 139 #define packet_mmap_acquire(pkt) \ 140 (__atomic_load_n(&pkt->tp_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 141 #define packet_mmap_release(pkt) \ 142 (__atomic_store_n(&pkt->tp_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 143 #define packet_mmap_v3_acquire(pkt) \ 144 (__atomic_load_n(&pkt->hdr.bh1.block_status, __ATOMIC_ACQUIRE) != TP_STATUS_KERNEL) 145 #define packet_mmap_v3_release(pkt) \ 146 (__atomic_store_n(&pkt->hdr.bh1.block_status, TP_STATUS_KERNEL, __ATOMIC_RELEASE)) 147 148 #include <linux/types.h> 149 #include <linux/filter.h> 150 151 #ifdef HAVE_LINUX_NET_TSTAMP_H 152 #include <linux/net_tstamp.h> 153 #endif 154 155 /* 156 * For checking whether a device is a bonding device. 157 */ 158 #include <linux/if_bonding.h> 159 160 /* 161 * Got libnl? 162 */ 163 #ifdef HAVE_LIBNL 164 #include <linux/nl80211.h> 165 166 #include <netlink/genl/genl.h> 167 #include <netlink/genl/family.h> 168 #include <netlink/genl/ctrl.h> 169 #include <netlink/msg.h> 170 #include <netlink/attr.h> 171 #endif /* HAVE_LIBNL */ 172 173 #ifndef HAVE_SOCKLEN_T 174 typedef int socklen_t; 175 #endif 176 177 #define MAX_LINKHEADER_SIZE 256 178 179 /* 180 * When capturing on all interfaces we use this as the buffer size. 181 * Should be bigger then all MTUs that occur in real life. 182 * 64kB should be enough for now. 183 */ 184 #define BIGGER_THAN_ALL_MTUS (64*1024) 185 186 /* 187 * Private data for capturing on Linux PF_PACKET sockets. 188 */ 189 struct pcap_linux { 190 long long sysfs_dropped; /* packets reported dropped by /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors */ 191 struct pcap_stat stat; 192 193 char *device; /* device name */ 194 int filter_in_userland; /* must filter in userland */ 195 int blocks_to_filter_in_userland; 196 int must_do_on_close; /* stuff we must do when we close */ 197 int timeout; /* timeout for buffering */ 198 int cooked; /* using SOCK_DGRAM rather than SOCK_RAW */ 199 int ifindex; /* interface index of device we're bound to */ 200 int lo_ifindex; /* interface index of the loopback device */ 201 int netdown; /* we got an ENETDOWN and haven't resolved it */ 202 bpf_u_int32 oldmode; /* mode to restore when turning monitor mode off */ 203 char *mondevice; /* mac80211 monitor device we created */ 204 u_char *mmapbuf; /* memory-mapped region pointer */ 205 size_t mmapbuflen; /* size of region */ 206 int vlan_offset; /* offset at which to insert vlan tags; if -1, don't insert */ 207 u_int tp_version; /* version of tpacket_hdr for mmaped ring */ 208 u_int tp_hdrlen; /* hdrlen of tpacket_hdr for mmaped ring */ 209 u_char *oneshot_buffer; /* buffer for copy of packet */ 210 int poll_timeout; /* timeout to use in poll() */ 211 #ifdef HAVE_TPACKET3 212 unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */ 213 int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */ 214 #endif 215 int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */ 216 }; 217 218 /* 219 * Stuff to do when we close. 220 */ 221 #define MUST_CLEAR_RFMON 0x00000001 /* clear rfmon (monitor) mode */ 222 #define MUST_DELETE_MONIF 0x00000002 /* delete monitor-mode interface */ 223 224 /* 225 * Prototypes for internal functions and methods. 226 */ 227 static int get_if_flags(const char *, bpf_u_int32 *, char *); 228 static int is_wifi(const char *); 229 static int map_arphrd_to_dlt(pcap_t *, int, const char *, int); 230 static int pcap_activate_linux(pcap_t *); 231 static int setup_socket(pcap_t *, int); 232 static int setup_mmapped(pcap_t *); 233 static int pcap_can_set_rfmon_linux(pcap_t *); 234 static int pcap_inject_linux(pcap_t *, const void *, int); 235 static int pcap_stats_linux(pcap_t *, struct pcap_stat *); 236 static int pcap_setfilter_linux(pcap_t *, struct bpf_program *); 237 static int pcap_setdirection_linux(pcap_t *, pcap_direction_t); 238 static int pcap_set_datalink_linux(pcap_t *, int); 239 static void pcap_cleanup_linux(pcap_t *); 240 241 union thdr { 242 struct tpacket2_hdr *h2; 243 #ifdef HAVE_TPACKET3 244 struct tpacket_block_desc *h3; 245 #endif 246 u_char *raw; 247 }; 248 249 #define RING_GET_FRAME_AT(h, offset) (((u_char **)h->buffer)[(offset)]) 250 #define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset) 251 252 static void destroy_ring(pcap_t *handle); 253 static int create_ring(pcap_t *handle); 254 static int prepare_tpacket_socket(pcap_t *handle); 255 static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *); 256 #ifdef HAVE_TPACKET3 257 static int pcap_read_linux_mmap_v3(pcap_t *, int, pcap_handler , u_char *); 258 #endif 259 static int pcap_setnonblock_linux(pcap_t *p, int nonblock); 260 static int pcap_getnonblock_linux(pcap_t *p); 261 static void pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 262 const u_char *bytes); 263 264 /* 265 * In pre-3.0 kernels, the tp_vlan_tci field is set to whatever the 266 * vlan_tci field in the skbuff is. 0 can either mean "not on a VLAN" 267 * or "on VLAN 0". There is no flag set in the tp_status field to 268 * distinguish between them. 269 * 270 * In 3.0 and later kernels, if there's a VLAN tag present, the tp_vlan_tci 271 * field is set to the VLAN tag, and the TP_STATUS_VLAN_VALID flag is set 272 * in the tp_status field, otherwise the tp_vlan_tci field is set to 0 and 273 * the TP_STATUS_VLAN_VALID flag isn't set in the tp_status field. 274 * 275 * With a pre-3.0 kernel, we cannot distinguish between packets with no 276 * VLAN tag and packets on VLAN 0, so we will mishandle some packets, and 277 * there's nothing we can do about that. 278 * 279 * So, on those systems, which never set the TP_STATUS_VLAN_VALID flag, we 280 * continue the behavior of earlier libpcaps, wherein we treated packets 281 * with a VLAN tag of 0 as being packets without a VLAN tag rather than packets 282 * on VLAN 0. We do this by treating packets with a tp_vlan_tci of 0 and 283 * with the TP_STATUS_VLAN_VALID flag not set in tp_status as not having 284 * VLAN tags. This does the right thing on 3.0 and later kernels, and 285 * continues the old unfixably-imperfect behavior on pre-3.0 kernels. 286 * 287 * If TP_STATUS_VLAN_VALID isn't defined, we test it as the 0x10 bit; it 288 * has that value in 3.0 and later kernels. 289 */ 290 #ifdef TP_STATUS_VLAN_VALID 291 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & TP_STATUS_VLAN_VALID)) 292 #else 293 /* 294 * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID, 295 * so we test with the value it has in the 3.0 and later kernels, so 296 * we can test it if we're running on a system that has it. (If we're 297 * running on a system that doesn't have it, it won't be set in the 298 * tp_status field, so the tests of it will always fail; that means 299 * we behave the way we did before we introduced this macro.) 300 */ 301 #define VLAN_VALID(hdr, hv) ((hv)->tp_vlan_tci != 0 || ((hdr)->tp_status & 0x10)) 302 #endif 303 304 #ifdef TP_STATUS_VLAN_TPID_VALID 305 # define VLAN_TPID(hdr, hv) (((hv)->tp_vlan_tpid || ((hdr)->tp_status & TP_STATUS_VLAN_TPID_VALID)) ? (hv)->tp_vlan_tpid : ETH_P_8021Q) 306 #else 307 # define VLAN_TPID(hdr, hv) ETH_P_8021Q 308 #endif 309 310 /* 311 * Required select timeout if we're polling for an "interface disappeared" 312 * indication - 1 millisecond. 313 */ 314 static const struct timeval netdown_timeout = { 315 0, 1000 /* 1000 microseconds = 1 millisecond */ 316 }; 317 318 /* 319 * Wrap some ioctl calls 320 */ 321 static int iface_get_id(int fd, const char *device, char *ebuf); 322 static int iface_get_mtu(int fd, const char *device, char *ebuf); 323 static int iface_get_arptype(int fd, const char *device, char *ebuf); 324 static int iface_bind(int fd, int ifindex, char *ebuf, int protocol); 325 static int enter_rfmon_mode(pcap_t *handle, int sock_fd, 326 const char *device); 327 static int iface_get_ts_types(const char *device, pcap_t *handle, 328 char *ebuf); 329 static int iface_get_offload(pcap_t *handle); 330 331 static int fix_program(pcap_t *handle, struct sock_fprog *fcode); 332 static int fix_offset(pcap_t *handle, struct bpf_insn *p); 333 static int set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode); 334 static int reset_kernel_filter(pcap_t *handle); 335 336 static struct sock_filter total_insn 337 = BPF_STMT(BPF_RET | BPF_K, 0); 338 static struct sock_fprog total_fcode 339 = { 1, &total_insn }; 340 341 static int iface_dsa_get_proto_info(const char *device, pcap_t *handle); 342 343 pcap_t * 344 pcapint_create_interface(const char *device, char *ebuf) 345 { 346 pcap_t *handle; 347 348 handle = PCAP_CREATE_COMMON(ebuf, struct pcap_linux); 349 if (handle == NULL) 350 return NULL; 351 352 handle->activate_op = pcap_activate_linux; 353 handle->can_set_rfmon_op = pcap_can_set_rfmon_linux; 354 355 /* 356 * See what time stamp types we support. 357 */ 358 if (iface_get_ts_types(device, handle, ebuf) == -1) { 359 pcap_close(handle); 360 return NULL; 361 } 362 363 /* 364 * We claim that we support microsecond and nanosecond time 365 * stamps. 366 * 367 * XXX - with adapter-supplied time stamps, can we choose 368 * microsecond or nanosecond time stamps on arbitrary 369 * adapters? 370 */ 371 handle->tstamp_precision_list = malloc(2 * sizeof(u_int)); 372 if (handle->tstamp_precision_list == NULL) { 373 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 374 errno, "malloc"); 375 pcap_close(handle); 376 return NULL; 377 } 378 handle->tstamp_precision_list[0] = PCAP_TSTAMP_PRECISION_MICRO; 379 handle->tstamp_precision_list[1] = PCAP_TSTAMP_PRECISION_NANO; 380 handle->tstamp_precision_count = 2; 381 382 /* 383 * Start out with the breakloop handle not open; we don't 384 * need it until we're activated and ready to capture. 385 */ 386 struct pcap_linux *handlep = handle->priv; 387 handlep->poll_breakloop_fd = -1; 388 389 return handle; 390 } 391 392 #ifdef HAVE_LIBNL 393 /* 394 * If interface {if_name} is a mac80211 driver, the file 395 * /sys/class/net/{if_name}/phy80211 is a symlink to 396 * /sys/class/ieee80211/{phydev_name}, for some {phydev_name}. 397 * 398 * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at 399 * least, has a "wmaster0" device and a "wlan0" device; the 400 * latter is the one with the IP address. Both show up in 401 * "tcpdump -D" output. Capturing on the wmaster0 device 402 * captures with 802.11 headers. 403 * 404 * airmon-ng searches through /sys/class/net for devices named 405 * monN, starting with mon0; as soon as one *doesn't* exist, 406 * it chooses that as the monitor device name. If the "iw" 407 * command exists, it does 408 * 409 * iw dev {if_name} interface add {monif_name} type monitor 410 * 411 * where {monif_name} is the monitor device. It then (sigh) sleeps 412 * .1 second, and then configures the device up. Otherwise, if 413 * /sys/class/ieee80211/{phydev_name}/add_iface is a file, it writes 414 * {mondev_name}, without a newline, to that file, and again (sigh) 415 * sleeps .1 second, and then iwconfig's that device into monitor 416 * mode and configures it up. Otherwise, you can't do monitor mode. 417 * 418 * All these devices are "glued" together by having the 419 * /sys/class/net/{if_name}/phy80211 links pointing to the same 420 * place, so, given a wmaster, wlan, or mon device, you can 421 * find the other devices by looking for devices with 422 * the same phy80211 link. 423 * 424 * To turn monitor mode off, delete the monitor interface, 425 * either with 426 * 427 * iw dev {monif_name} interface del 428 * 429 * or by sending {monif_name}, with no NL, down 430 * /sys/class/ieee80211/{phydev_name}/remove_iface 431 * 432 * Note: if you try to create a monitor device named "monN", and 433 * there's already a "monN" device, it fails, as least with 434 * the netlink interface (which is what iw uses), with a return 435 * value of -ENFILE. (Return values are negative errnos.) We 436 * could probably use that to find an unused device. 437 * 438 * Yes, you can have multiple monitor devices for a given 439 * physical device. 440 */ 441 442 /* 443 * Is this a mac80211 device? If so, fill in the physical device path and 444 * return 1; if not, return 0. On an error, fill in handle->errbuf and 445 * return PCAP_ERROR. 446 */ 447 static int 448 get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path, 449 size_t phydev_max_pathlen) 450 { 451 char *pathstr; 452 ssize_t bytes_read; 453 454 /* 455 * Generate the path string for the symlink to the physical device. 456 */ 457 if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) { 458 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 459 "%s: Can't generate path name string for /sys/class/net device", 460 device); 461 return PCAP_ERROR; 462 } 463 bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen); 464 if (bytes_read == -1) { 465 if (errno == ENOENT || errno == EINVAL) { 466 /* 467 * Doesn't exist, or not a symlink; assume that 468 * means it's not a mac80211 device. 469 */ 470 free(pathstr); 471 return 0; 472 } 473 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 474 errno, "%s: Can't readlink %s", device, pathstr); 475 free(pathstr); 476 return PCAP_ERROR; 477 } 478 free(pathstr); 479 phydev_path[bytes_read] = '\0'; 480 return 1; 481 } 482 483 struct nl80211_state { 484 struct nl_sock *nl_sock; 485 struct nl_cache *nl_cache; 486 struct genl_family *nl80211; 487 }; 488 489 static int 490 nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device) 491 { 492 int err; 493 494 state->nl_sock = nl_socket_alloc(); 495 if (!state->nl_sock) { 496 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 497 "%s: failed to allocate netlink handle", device); 498 return PCAP_ERROR; 499 } 500 501 if (genl_connect(state->nl_sock)) { 502 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 503 "%s: failed to connect to generic netlink", device); 504 goto out_handle_destroy; 505 } 506 507 err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache); 508 if (err < 0) { 509 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 510 "%s: failed to allocate generic netlink cache: %s", 511 device, nl_geterror(-err)); 512 goto out_handle_destroy; 513 } 514 515 state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211"); 516 if (!state->nl80211) { 517 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 518 "%s: nl80211 not found", device); 519 goto out_cache_free; 520 } 521 522 return 0; 523 524 out_cache_free: 525 nl_cache_free(state->nl_cache); 526 out_handle_destroy: 527 nl_socket_free(state->nl_sock); 528 return PCAP_ERROR; 529 } 530 531 static void 532 nl80211_cleanup(struct nl80211_state *state) 533 { 534 genl_family_put(state->nl80211); 535 nl_cache_free(state->nl_cache); 536 nl_socket_free(state->nl_sock); 537 } 538 539 static int 540 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 541 const char *device, const char *mondevice); 542 543 static int 544 add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 545 const char *device, const char *mondevice) 546 { 547 struct pcap_linux *handlep = handle->priv; 548 int ifindex; 549 struct nl_msg *msg; 550 int err; 551 552 ifindex = iface_get_id(sock_fd, device, handle->errbuf); 553 if (ifindex == -1) 554 return PCAP_ERROR; 555 556 msg = nlmsg_alloc(); 557 if (!msg) { 558 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 559 "%s: failed to allocate netlink msg", device); 560 return PCAP_ERROR; 561 } 562 563 genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0, 564 0, NL80211_CMD_NEW_INTERFACE, 0); 565 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 566 DIAG_OFF_NARROWING 567 NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice); 568 DIAG_ON_NARROWING 569 NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR); 570 571 err = nl_send_auto_complete(state->nl_sock, msg); 572 if (err < 0) { 573 if (err == -NLE_FAILURE) { 574 /* 575 * Device not available; our caller should just 576 * keep trying. (libnl 2.x maps ENFILE to 577 * NLE_FAILURE; it can also map other errors 578 * to that, but there's not much we can do 579 * about that.) 580 */ 581 nlmsg_free(msg); 582 return 0; 583 } else { 584 /* 585 * Real failure, not just "that device is not 586 * available. 587 */ 588 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 589 "%s: nl_send_auto_complete failed adding %s interface: %s", 590 device, mondevice, nl_geterror(-err)); 591 nlmsg_free(msg); 592 return PCAP_ERROR; 593 } 594 } 595 err = nl_wait_for_ack(state->nl_sock); 596 if (err < 0) { 597 if (err == -NLE_FAILURE) { 598 /* 599 * Device not available; our caller should just 600 * keep trying. (libnl 2.x maps ENFILE to 601 * NLE_FAILURE; it can also map other errors 602 * to that, but there's not much we can do 603 * about that.) 604 */ 605 nlmsg_free(msg); 606 return 0; 607 } else { 608 /* 609 * Real failure, not just "that device is not 610 * available. 611 */ 612 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 613 "%s: nl_wait_for_ack failed adding %s interface: %s", 614 device, mondevice, nl_geterror(-err)); 615 nlmsg_free(msg); 616 return PCAP_ERROR; 617 } 618 } 619 620 /* 621 * Success. 622 */ 623 nlmsg_free(msg); 624 625 /* 626 * Try to remember the monitor device. 627 */ 628 handlep->mondevice = strdup(mondevice); 629 if (handlep->mondevice == NULL) { 630 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 631 errno, "strdup"); 632 /* 633 * Get rid of the monitor device. 634 */ 635 del_mon_if(handle, sock_fd, state, device, mondevice); 636 return PCAP_ERROR; 637 } 638 return 1; 639 640 nla_put_failure: 641 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 642 "%s: nl_put failed adding %s interface", 643 device, mondevice); 644 nlmsg_free(msg); 645 return PCAP_ERROR; 646 } 647 648 static int 649 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state, 650 const char *device, const char *mondevice) 651 { 652 int ifindex; 653 struct nl_msg *msg; 654 int err; 655 656 ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf); 657 if (ifindex == -1) 658 return PCAP_ERROR; 659 660 msg = nlmsg_alloc(); 661 if (!msg) { 662 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 663 "%s: failed to allocate netlink msg", device); 664 return PCAP_ERROR; 665 } 666 667 genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0, 668 0, NL80211_CMD_DEL_INTERFACE, 0); 669 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex); 670 671 err = nl_send_auto_complete(state->nl_sock, msg); 672 if (err < 0) { 673 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 674 "%s: nl_send_auto_complete failed deleting %s interface: %s", 675 device, mondevice, nl_geterror(-err)); 676 nlmsg_free(msg); 677 return PCAP_ERROR; 678 } 679 err = nl_wait_for_ack(state->nl_sock); 680 if (err < 0) { 681 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 682 "%s: nl_wait_for_ack failed adding %s interface: %s", 683 device, mondevice, nl_geterror(-err)); 684 nlmsg_free(msg); 685 return PCAP_ERROR; 686 } 687 688 /* 689 * Success. 690 */ 691 nlmsg_free(msg); 692 return 1; 693 694 nla_put_failure: 695 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 696 "%s: nl_put failed deleting %s interface", 697 device, mondevice); 698 nlmsg_free(msg); 699 return PCAP_ERROR; 700 } 701 #endif /* HAVE_LIBNL */ 702 703 static int pcap_protocol(pcap_t *handle) 704 { 705 int protocol; 706 707 protocol = handle->opt.protocol; 708 if (protocol == 0) 709 protocol = ETH_P_ALL; 710 711 return htons(protocol); 712 } 713 714 static int 715 pcap_can_set_rfmon_linux(pcap_t *handle) 716 { 717 #ifdef HAVE_LIBNL 718 char phydev_path[PATH_MAX+1]; 719 int ret; 720 #endif 721 722 if (strcmp(handle->opt.device, "any") == 0) { 723 /* 724 * Monitor mode makes no sense on the "any" device. 725 */ 726 return 0; 727 } 728 729 #ifdef HAVE_LIBNL 730 /* 731 * Bleah. There doesn't seem to be a way to ask a mac80211 732 * device, through libnl, whether it supports monitor mode; 733 * we'll just check whether the device appears to be a 734 * mac80211 device and, if so, assume the device supports 735 * monitor mode. 736 */ 737 ret = get_mac80211_phydev(handle, handle->opt.device, phydev_path, 738 PATH_MAX); 739 if (ret < 0) 740 return ret; /* error */ 741 if (ret == 1) 742 return 1; /* mac80211 device */ 743 #endif 744 745 return 0; 746 } 747 748 /* 749 * Grabs the number of missed packets by the interface from 750 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors. 751 * 752 * Compared to /proc/net/dev this avoids counting software drops, 753 * but may be unimplemented and just return 0. 754 * The author has found no straightforward way to check for support. 755 */ 756 static long long int 757 linux_get_stat(const char * if_name, const char * stat) { 758 ssize_t bytes_read; 759 int fd; 760 char buffer[PATH_MAX]; 761 762 snprintf(buffer, sizeof(buffer), "/sys/class/net/%s/statistics/%s", if_name, stat); 763 fd = open(buffer, O_RDONLY); 764 if (fd == -1) 765 return 0; 766 767 bytes_read = read(fd, buffer, sizeof(buffer) - 1); 768 close(fd); 769 if (bytes_read == -1) 770 return 0; 771 buffer[bytes_read] = '\0'; 772 773 return strtoll(buffer, NULL, 10); 774 } 775 776 static long long int 777 linux_if_drops(const char * if_name) 778 { 779 long long int missed = linux_get_stat(if_name, "rx_missed_errors"); 780 long long int fifo = linux_get_stat(if_name, "rx_fifo_errors"); 781 return missed + fifo; 782 } 783 784 785 /* 786 * Monitor mode is kind of interesting because we have to reset the 787 * interface before exiting. The problem can't really be solved without 788 * some daemon taking care of managing usage counts. If we put the 789 * interface into monitor mode, we set a flag indicating that we must 790 * take it out of that mode when the interface is closed, and, when 791 * closing the interface, if that flag is set we take it out of monitor 792 * mode. 793 */ 794 795 static void pcap_cleanup_linux( pcap_t *handle ) 796 { 797 struct pcap_linux *handlep = handle->priv; 798 #ifdef HAVE_LIBNL 799 struct nl80211_state nlstate; 800 int ret; 801 #endif /* HAVE_LIBNL */ 802 803 if (handlep->must_do_on_close != 0) { 804 /* 805 * There's something we have to do when closing this 806 * pcap_t. 807 */ 808 #ifdef HAVE_LIBNL 809 if (handlep->must_do_on_close & MUST_DELETE_MONIF) { 810 ret = nl80211_init(handle, &nlstate, handlep->device); 811 if (ret >= 0) { 812 ret = del_mon_if(handle, handle->fd, &nlstate, 813 handlep->device, handlep->mondevice); 814 nl80211_cleanup(&nlstate); 815 } 816 if (ret < 0) { 817 fprintf(stderr, 818 "Can't delete monitor interface %s (%s).\n" 819 "Please delete manually.\n", 820 handlep->mondevice, handle->errbuf); 821 } 822 } 823 #endif /* HAVE_LIBNL */ 824 825 /* 826 * Take this pcap out of the list of pcaps for which we 827 * have to take the interface out of some mode. 828 */ 829 pcapint_remove_from_pcaps_to_close(handle); 830 } 831 832 if (handle->fd != -1) { 833 /* 834 * Destroy the ring buffer (assuming we've set it up), 835 * and unmap it if it's mapped. 836 */ 837 destroy_ring(handle); 838 } 839 840 if (handlep->oneshot_buffer != NULL) { 841 free(handlep->oneshot_buffer); 842 handlep->oneshot_buffer = NULL; 843 } 844 845 if (handlep->mondevice != NULL) { 846 free(handlep->mondevice); 847 handlep->mondevice = NULL; 848 } 849 if (handlep->device != NULL) { 850 free(handlep->device); 851 handlep->device = NULL; 852 } 853 854 if (handlep->poll_breakloop_fd != -1) { 855 close(handlep->poll_breakloop_fd); 856 handlep->poll_breakloop_fd = -1; 857 } 858 pcapint_cleanup_live_common(handle); 859 } 860 861 #ifdef HAVE_TPACKET3 862 /* 863 * Some versions of TPACKET_V3 have annoying bugs/misfeatures 864 * around which we have to work. Determine if we have those 865 * problems or not. 866 * 3.19 is the first release with a fixed version of 867 * TPACKET_V3. We treat anything before that as 868 * not having a fixed version; that may really mean 869 * it has *no* version. 870 */ 871 static int has_broken_tpacket_v3(void) 872 { 873 struct utsname utsname; 874 const char *release; 875 long major, minor; 876 int matches, verlen; 877 878 /* No version information, assume broken. */ 879 if (uname(&utsname) == -1) 880 return 1; 881 release = utsname.release; 882 883 /* A malformed version, ditto. */ 884 matches = sscanf(release, "%ld.%ld%n", &major, &minor, &verlen); 885 if (matches != 2) 886 return 1; 887 if (release[verlen] != '.' && release[verlen] != '\0') 888 return 1; 889 890 /* OK, a fixed version. */ 891 if (major > 3 || (major == 3 && minor >= 19)) 892 return 0; 893 894 /* Too old :( */ 895 return 1; 896 } 897 #endif 898 899 /* 900 * Set the timeout to be used in poll() with memory-mapped packet capture. 901 */ 902 static void 903 set_poll_timeout(struct pcap_linux *handlep) 904 { 905 #ifdef HAVE_TPACKET3 906 int broken_tpacket_v3 = has_broken_tpacket_v3(); 907 #endif 908 if (handlep->timeout == 0) { 909 #ifdef HAVE_TPACKET3 910 /* 911 * XXX - due to a set of (mis)features in the TPACKET_V3 912 * kernel code prior to the 3.19 kernel, blocking forever 913 * with a TPACKET_V3 socket can, if few packets are 914 * arriving and passing the socket filter, cause most 915 * packets to be dropped. See libpcap issue #335 for the 916 * full painful story. 917 * 918 * The workaround is to have poll() time out very quickly, 919 * so we grab the frames handed to us, and return them to 920 * the kernel, ASAP. 921 */ 922 if (handlep->tp_version == TPACKET_V3 && broken_tpacket_v3) 923 handlep->poll_timeout = 1; /* don't block for very long */ 924 else 925 #endif 926 handlep->poll_timeout = -1; /* block forever */ 927 } else if (handlep->timeout > 0) { 928 #ifdef HAVE_TPACKET3 929 /* 930 * For TPACKET_V3, the timeout is handled by the kernel, 931 * so block forever; that way, we don't get extra timeouts. 932 * Don't do that if we have a broken TPACKET_V3, though. 933 */ 934 if (handlep->tp_version == TPACKET_V3 && !broken_tpacket_v3) 935 handlep->poll_timeout = -1; /* block forever, let TPACKET_V3 wake us up */ 936 else 937 #endif 938 handlep->poll_timeout = handlep->timeout; /* block for that amount of time */ 939 } else { 940 /* 941 * Non-blocking mode; we call poll() to pick up error 942 * indications, but we don't want it to wait for 943 * anything. 944 */ 945 handlep->poll_timeout = 0; 946 } 947 } 948 949 static void pcap_breakloop_linux(pcap_t *handle) 950 { 951 pcapint_breakloop_common(handle); 952 struct pcap_linux *handlep = handle->priv; 953 954 uint64_t value = 1; 955 956 if (handlep->poll_breakloop_fd != -1) { 957 /* 958 * XXX - pcap_breakloop() doesn't have a return value, 959 * so we can't indicate an error. 960 */ 961 DIAG_OFF_WARN_UNUSED_RESULT 962 (void)write(handlep->poll_breakloop_fd, &value, sizeof(value)); 963 DIAG_ON_WARN_UNUSED_RESULT 964 } 965 } 966 967 /* 968 * Set the offset at which to insert VLAN tags. 969 * That should be the offset of the type field. 970 */ 971 static void 972 set_vlan_offset(pcap_t *handle) 973 { 974 struct pcap_linux *handlep = handle->priv; 975 976 switch (handle->linktype) { 977 978 case DLT_EN10MB: 979 /* 980 * The type field is after the destination and source 981 * MAC address. 982 */ 983 handlep->vlan_offset = 2 * ETH_ALEN; 984 break; 985 986 case DLT_LINUX_SLL: 987 /* 988 * The type field is in the last 2 bytes of the 989 * DLT_LINUX_SLL header. 990 */ 991 handlep->vlan_offset = SLL_HDR_LEN - 2; 992 break; 993 994 default: 995 handlep->vlan_offset = -1; /* unknown */ 996 break; 997 } 998 } 999 1000 /* 1001 * Get a handle for a live capture from the given device. You can 1002 * pass NULL as device to get all packages (without link level 1003 * information of course). If you pass 1 as promisc the interface 1004 * will be set to promiscuous mode (XXX: I think this usage should 1005 * be deprecated and functions be added to select that later allow 1006 * modification of that values -- Torsten). 1007 */ 1008 static int 1009 pcap_activate_linux(pcap_t *handle) 1010 { 1011 struct pcap_linux *handlep = handle->priv; 1012 const char *device; 1013 int is_any_device; 1014 struct ifreq ifr; 1015 int status; 1016 int ret; 1017 1018 device = handle->opt.device; 1019 1020 /* 1021 * Start out assuming no warnings. 1022 */ 1023 status = 0; 1024 1025 /* 1026 * Make sure the name we were handed will fit into the ioctls we 1027 * might perform on the device; if not, return a "No such device" 1028 * indication, as the Linux kernel shouldn't support creating 1029 * a device whose name won't fit into those ioctls. 1030 * 1031 * "Will fit" means "will fit, complete with a null terminator", 1032 * so if the length, which does *not* include the null terminator, 1033 * is greater than *or equal to* the size of the field into which 1034 * we'll be copying it, that won't fit. 1035 */ 1036 if (strlen(device) >= sizeof(ifr.ifr_name)) { 1037 /* 1038 * There's nothing more to say, so clear the error 1039 * message. 1040 */ 1041 handle->errbuf[0] = '\0'; 1042 status = PCAP_ERROR_NO_SUCH_DEVICE; 1043 goto fail; 1044 } 1045 1046 /* 1047 * Turn a negative snapshot value (invalid), a snapshot value of 1048 * 0 (unspecified), or a value bigger than the normal maximum 1049 * value, into the maximum allowed value. 1050 * 1051 * If some application really *needs* a bigger snapshot 1052 * length, we should just increase MAXIMUM_SNAPLEN. 1053 */ 1054 if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN) 1055 handle->snapshot = MAXIMUM_SNAPLEN; 1056 1057 handlep->device = strdup(device); 1058 if (handlep->device == NULL) { 1059 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1060 errno, "strdup"); 1061 status = PCAP_ERROR; 1062 goto fail; 1063 } 1064 1065 /* 1066 * The "any" device is a special device which causes us not 1067 * to bind to a particular device and thus to look at all 1068 * devices. 1069 */ 1070 is_any_device = (strcmp(device, "any") == 0); 1071 if (is_any_device) { 1072 if (handle->opt.promisc) { 1073 handle->opt.promisc = 0; 1074 /* Just a warning. */ 1075 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 1076 "Promiscuous mode not supported on the \"any\" device"); 1077 status = PCAP_WARNING_PROMISC_NOTSUP; 1078 } 1079 } 1080 1081 /* copy timeout value */ 1082 handlep->timeout = handle->opt.timeout; 1083 1084 /* 1085 * If we're in promiscuous mode, then we probably want 1086 * to see when the interface drops packets too, so get an 1087 * initial count from 1088 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1089 */ 1090 if (handle->opt.promisc) 1091 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1092 1093 /* 1094 * If the "any" device is specified, try to open a SOCK_DGRAM. 1095 * Otherwise, open a SOCK_RAW. 1096 */ 1097 ret = setup_socket(handle, is_any_device); 1098 if (ret < 0) { 1099 /* 1100 * Fatal error; the return value is the error code, 1101 * and handle->errbuf has been set to an appropriate 1102 * error message. 1103 */ 1104 status = ret; 1105 goto fail; 1106 } 1107 if (ret > 0) { 1108 /* 1109 * We got a warning; return that, as handle->errbuf 1110 * might have been overwritten by this warning. 1111 */ 1112 status = ret; 1113 } 1114 1115 /* 1116 * Success (possibly with a warning). 1117 * 1118 * First, try to allocate an event FD for breakloop, if 1119 * we're not going to start in non-blocking mode. 1120 */ 1121 if (!handle->opt.nonblock) { 1122 handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK); 1123 if (handlep->poll_breakloop_fd == -1) { 1124 /* 1125 * Failed. 1126 */ 1127 pcapint_fmt_errmsg_for_errno(handle->errbuf, 1128 PCAP_ERRBUF_SIZE, errno, "could not open eventfd"); 1129 status = PCAP_ERROR; 1130 goto fail; 1131 } 1132 } 1133 1134 /* 1135 * Succeeded. 1136 * Try to set up memory-mapped access. 1137 */ 1138 ret = setup_mmapped(handle); 1139 if (ret < 0) { 1140 /* 1141 * We failed to set up to use it, or the 1142 * kernel supports it, but we failed to 1143 * enable it. The return value is the 1144 * error status to return and, if it's 1145 * PCAP_ERROR, handle->errbuf contains 1146 * the error message. 1147 */ 1148 status = ret; 1149 goto fail; 1150 } 1151 if (ret > 0) { 1152 /* 1153 * We got a warning; return that, as handle->errbuf 1154 * might have been overwritten by this warning. 1155 */ 1156 status = ret; 1157 } 1158 1159 /* 1160 * We succeeded. status has been set to the status to return, 1161 * which might be 0, or might be a PCAP_WARNING_ value. 1162 */ 1163 /* 1164 * Now that we have activated the mmap ring, we can 1165 * set the correct protocol. 1166 */ 1167 if ((ret = iface_bind(handle->fd, handlep->ifindex, 1168 handle->errbuf, pcap_protocol(handle))) != 0) { 1169 status = ret; 1170 goto fail; 1171 } 1172 1173 handle->inject_op = pcap_inject_linux; 1174 handle->setfilter_op = pcap_setfilter_linux; 1175 handle->setdirection_op = pcap_setdirection_linux; 1176 handle->set_datalink_op = pcap_set_datalink_linux; 1177 handle->setnonblock_op = pcap_setnonblock_linux; 1178 handle->getnonblock_op = pcap_getnonblock_linux; 1179 handle->cleanup_op = pcap_cleanup_linux; 1180 handle->stats_op = pcap_stats_linux; 1181 handle->breakloop_op = pcap_breakloop_linux; 1182 1183 switch (handlep->tp_version) { 1184 1185 case TPACKET_V2: 1186 handle->read_op = pcap_read_linux_mmap_v2; 1187 break; 1188 #ifdef HAVE_TPACKET3 1189 case TPACKET_V3: 1190 handle->read_op = pcap_read_linux_mmap_v3; 1191 break; 1192 #endif 1193 } 1194 handle->oneshot_callback = pcapint_oneshot_linux; 1195 handle->selectable_fd = handle->fd; 1196 1197 return status; 1198 1199 fail: 1200 pcap_cleanup_linux(handle); 1201 return status; 1202 } 1203 1204 static int 1205 pcap_set_datalink_linux(pcap_t *handle, int dlt) 1206 { 1207 handle->linktype = dlt; 1208 1209 /* 1210 * Update the offset at which to insert VLAN tags for the 1211 * new link-layer type. 1212 */ 1213 set_vlan_offset(handle); 1214 1215 return 0; 1216 } 1217 1218 /* 1219 * linux_check_direction() 1220 * 1221 * Do checks based on packet direction. 1222 */ 1223 static inline int 1224 linux_check_direction(const pcap_t *handle, const struct sockaddr_ll *sll) 1225 { 1226 struct pcap_linux *handlep = handle->priv; 1227 1228 if (sll->sll_pkttype == PACKET_OUTGOING) { 1229 /* 1230 * Outgoing packet. 1231 * If this is from the loopback device, reject it; 1232 * we'll see the packet as an incoming packet as well, 1233 * and we don't want to see it twice. 1234 */ 1235 if (sll->sll_ifindex == handlep->lo_ifindex) 1236 return 0; 1237 1238 /* 1239 * If this is an outgoing CAN or CAN FD frame, and 1240 * the user doesn't only want outgoing packets, 1241 * reject it; CAN devices and drivers, and the CAN 1242 * stack, always arrange to loop back transmitted 1243 * packets, so they also appear as incoming packets. 1244 * We don't want duplicate packets, and we can't 1245 * easily distinguish packets looped back by the CAN 1246 * layer than those received by the CAN layer, so we 1247 * eliminate this packet instead. 1248 * 1249 * We check whether this is a CAN or CAN FD frame 1250 * by checking whether the device's hardware type 1251 * is ARPHRD_CAN. 1252 */ 1253 if (sll->sll_hatype == ARPHRD_CAN && 1254 handle->direction != PCAP_D_OUT) 1255 return 0; 1256 1257 /* 1258 * If the user only wants incoming packets, reject it. 1259 */ 1260 if (handle->direction == PCAP_D_IN) 1261 return 0; 1262 } else { 1263 /* 1264 * Incoming packet. 1265 * If the user only wants outgoing packets, reject it. 1266 */ 1267 if (handle->direction == PCAP_D_OUT) 1268 return 0; 1269 } 1270 return 1; 1271 } 1272 1273 /* 1274 * Check whether the device to which the pcap_t is bound still exists. 1275 * We do so by asking what address the socket is bound to, and checking 1276 * whether the ifindex in the address is -1, meaning "that device is gone", 1277 * or some other value, meaning "that device still exists". 1278 */ 1279 static int 1280 device_still_exists(pcap_t *handle) 1281 { 1282 struct pcap_linux *handlep = handle->priv; 1283 struct sockaddr_ll addr; 1284 socklen_t addr_len; 1285 1286 /* 1287 * If handlep->ifindex is -1, the socket isn't bound, meaning 1288 * we're capturing on the "any" device; that device never 1289 * disappears. (It should also never be configured down, so 1290 * we shouldn't even get here, but let's make sure.) 1291 */ 1292 if (handlep->ifindex == -1) 1293 return (1); /* it's still here */ 1294 1295 /* 1296 * OK, now try to get the address for the socket. 1297 */ 1298 addr_len = sizeof (addr); 1299 if (getsockname(handle->fd, (struct sockaddr *) &addr, &addr_len) == -1) { 1300 /* 1301 * Error - report an error and return -1. 1302 */ 1303 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1304 errno, "getsockname failed"); 1305 return (-1); 1306 } 1307 if (addr.sll_ifindex == -1) { 1308 /* 1309 * This means the device went away. 1310 */ 1311 return (0); 1312 } 1313 1314 /* 1315 * The device presumably just went down. 1316 */ 1317 return (1); 1318 } 1319 1320 static int 1321 pcap_inject_linux(pcap_t *handle, const void *buf, int size) 1322 { 1323 struct pcap_linux *handlep = handle->priv; 1324 int ret; 1325 1326 if (handlep->ifindex == -1) { 1327 /* 1328 * We don't support sending on the "any" device. 1329 */ 1330 pcapint_strlcpy(handle->errbuf, 1331 "Sending packets isn't supported on the \"any\" device", 1332 PCAP_ERRBUF_SIZE); 1333 return (-1); 1334 } 1335 1336 if (handlep->cooked) { 1337 /* 1338 * We don't support sending on cooked-mode sockets. 1339 * 1340 * XXX - how do you send on a bound cooked-mode 1341 * socket? 1342 * Is a "sendto()" required there? 1343 */ 1344 pcapint_strlcpy(handle->errbuf, 1345 "Sending packets isn't supported in cooked mode", 1346 PCAP_ERRBUF_SIZE); 1347 return (-1); 1348 } 1349 1350 ret = (int)send(handle->fd, buf, size, 0); 1351 if (ret == -1) { 1352 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 1353 errno, "send"); 1354 return (-1); 1355 } 1356 return (ret); 1357 } 1358 1359 /* 1360 * Get the statistics for the given packet capture handle. 1361 */ 1362 static int 1363 pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats) 1364 { 1365 struct pcap_linux *handlep = handle->priv; 1366 #ifdef HAVE_TPACKET3 1367 /* 1368 * For sockets using TPACKET_V2, the extra stuff at the end 1369 * of a struct tpacket_stats_v3 will not be filled in, and 1370 * we don't look at it so this is OK even for those sockets. 1371 * In addition, the PF_PACKET socket code in the kernel only 1372 * uses the length parameter to compute how much data to 1373 * copy out and to indicate how much data was copied out, so 1374 * it's OK to base it on the size of a struct tpacket_stats. 1375 * 1376 * XXX - it's probably OK, in fact, to just use a 1377 * struct tpacket_stats for V3 sockets, as we don't 1378 * care about the tp_freeze_q_cnt stat. 1379 */ 1380 struct tpacket_stats_v3 kstats; 1381 #else /* HAVE_TPACKET3 */ 1382 struct tpacket_stats kstats; 1383 #endif /* HAVE_TPACKET3 */ 1384 socklen_t len = sizeof (struct tpacket_stats); 1385 1386 long long if_dropped = 0; 1387 1388 /* 1389 * To fill in ps_ifdrop, we parse 1390 * /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors 1391 * for the numbers 1392 */ 1393 if (handle->opt.promisc) 1394 { 1395 /* 1396 * XXX - is there any reason to do this by remembering 1397 * the last counts value, subtracting it from the 1398 * current counts value, and adding that to stat.ps_ifdrop, 1399 * maintaining stat.ps_ifdrop as a count, rather than just 1400 * saving the *initial* counts value and setting 1401 * stat.ps_ifdrop to the difference between the current 1402 * value and the initial value? 1403 * 1404 * One reason might be to handle the count wrapping 1405 * around, on platforms where the count is 32 bits 1406 * and where you might get more than 2^32 dropped 1407 * packets; is there any other reason? 1408 * 1409 * (We maintain the count as a long long int so that, 1410 * if the kernel maintains the counts as 64-bit even 1411 * on 32-bit platforms, we can handle the real count. 1412 * 1413 * Unfortunately, we can't report 64-bit counts; we 1414 * need a better API for reporting statistics, such as 1415 * one that reports them in a style similar to the 1416 * pcapng Interface Statistics Block, so that 1) the 1417 * counts are 64-bit, 2) it's easier to add new statistics 1418 * without breaking the ABI, and 3) it's easier to 1419 * indicate to a caller that wants one particular 1420 * statistic that it's not available by just not supplying 1421 * it.) 1422 */ 1423 if_dropped = handlep->sysfs_dropped; 1424 handlep->sysfs_dropped = linux_if_drops(handlep->device); 1425 handlep->stat.ps_ifdrop += (u_int)(handlep->sysfs_dropped - if_dropped); 1426 } 1427 1428 /* 1429 * Try to get the packet counts from the kernel. 1430 */ 1431 if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, 1432 &kstats, &len) > -1) { 1433 /* 1434 * "ps_recv" counts only packets that *passed* the 1435 * filter, not packets that didn't pass the filter. 1436 * This includes packets later dropped because we 1437 * ran out of buffer space. 1438 * 1439 * "ps_drop" counts packets dropped because we ran 1440 * out of buffer space. It doesn't count packets 1441 * dropped by the interface driver. It counts only 1442 * packets that passed the filter. 1443 * 1444 * See above for ps_ifdrop. 1445 * 1446 * Both statistics include packets not yet read from 1447 * the kernel by libpcap, and thus not yet seen by 1448 * the application. 1449 * 1450 * In "linux/net/packet/af_packet.c", at least in 2.6.27 1451 * through 5.6 kernels, "tp_packets" is incremented for 1452 * every packet that passes the packet filter *and* is 1453 * successfully copied to the ring buffer; "tp_drops" is 1454 * incremented for every packet dropped because there's 1455 * not enough free space in the ring buffer. 1456 * 1457 * When the statistics are returned for a PACKET_STATISTICS 1458 * "getsockopt()" call, "tp_drops" is added to "tp_packets", 1459 * so that "tp_packets" counts all packets handed to 1460 * the PF_PACKET socket, including packets dropped because 1461 * there wasn't room on the socket buffer - but not 1462 * including packets that didn't pass the filter. 1463 * 1464 * In the BSD BPF, the count of received packets is 1465 * incremented for every packet handed to BPF, regardless 1466 * of whether it passed the filter. 1467 * 1468 * We can't make "pcap_stats()" work the same on both 1469 * platforms, but the best approximation is to return 1470 * "tp_packets" as the count of packets and "tp_drops" 1471 * as the count of drops. 1472 * 1473 * Keep a running total because each call to 1474 * getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, .... 1475 * resets the counters to zero. 1476 */ 1477 handlep->stat.ps_recv += kstats.tp_packets; 1478 handlep->stat.ps_drop += kstats.tp_drops; 1479 *stats = handlep->stat; 1480 return 0; 1481 } 1482 1483 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, errno, 1484 "failed to get statistics from socket"); 1485 return -1; 1486 } 1487 1488 /* 1489 * A PF_PACKET socket can be bound to any network interface. 1490 */ 1491 static int 1492 can_be_bound(const char *name _U_) 1493 { 1494 return (1); 1495 } 1496 1497 /* 1498 * Get a socket to use with various interface ioctls. 1499 */ 1500 static int 1501 get_if_ioctl_socket(void) 1502 { 1503 int fd; 1504 1505 /* 1506 * This is a bit ugly. 1507 * 1508 * There isn't a socket type that's guaranteed to work. 1509 * 1510 * AF_NETLINK will work *if* you have Netlink configured into the 1511 * kernel (can it be configured out if you have any networking 1512 * support at all?) *and* if you're running a sufficiently recent 1513 * kernel, but not all the kernels we support are sufficiently 1514 * recent - that feature was introduced in Linux 4.6. 1515 * 1516 * AF_UNIX will work *if* you have UNIX-domain sockets configured 1517 * into the kernel and *if* you're not on a system that doesn't 1518 * allow them - some SELinux systems don't allow you create them. 1519 * Most systems probably have them configured in, but not all systems 1520 * have them configured in and allow them to be created. 1521 * 1522 * AF_INET will work *if* you have IPv4 configured into the kernel, 1523 * but, apparently, some systems have network adapters but have 1524 * kernels without IPv4 support. 1525 * 1526 * AF_INET6 will work *if* you have IPv6 configured into the 1527 * kernel, but if you don't have AF_INET, you might not have 1528 * AF_INET6, either (that is, independently on its own grounds). 1529 * 1530 * AF_PACKET would work, except that some of these calls should 1531 * work even if you *don't* have capture permission (you should be 1532 * able to enumerate interfaces and get information about them 1533 * without capture permission; you shouldn't get a failure until 1534 * you try pcap_activate()). (If you don't allow programs to 1535 * get as much information as possible about interfaces if you 1536 * don't have permission to capture, you run the risk of users 1537 * asking "why isn't it showing XXX" - or, worse, if you don't 1538 * show interfaces *at all* if you don't have permission to 1539 * capture on them, "why do no interfaces show up?" - when the 1540 * real problem is a permissions problem. Error reports of that 1541 * type require a lot more back-and-forth to debug, as evidenced 1542 * by many Wireshark bugs/mailing list questions/Q&A questions.) 1543 * 1544 * So: 1545 * 1546 * we first try an AF_NETLINK socket, where "try" includes 1547 * "try to do a device ioctl on it", as, in the future, once 1548 * pre-4.6 kernels are sufficiently rare, that will probably 1549 * be the mechanism most likely to work; 1550 * 1551 * if that fails, we try an AF_UNIX socket, as that's less 1552 * likely to be configured out on a networking-capable system 1553 * than is IP; 1554 * 1555 * if that fails, we try an AF_INET6 socket; 1556 * 1557 * if that fails, we try an AF_INET socket. 1558 */ 1559 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1560 if (fd != -1) { 1561 /* 1562 * OK, let's make sure we can do an SIOCGIFNAME 1563 * ioctl. 1564 */ 1565 struct ifreq ifr; 1566 1567 memset(&ifr, 0, sizeof(ifr)); 1568 if (ioctl(fd, SIOCGIFNAME, &ifr) == 0 || 1569 errno != EOPNOTSUPP) { 1570 /* 1571 * It succeeded, or failed for some reason 1572 * other than "netlink sockets don't support 1573 * device ioctls". Go with the AF_NETLINK 1574 * socket. 1575 */ 1576 return (fd); 1577 } 1578 1579 /* 1580 * OK, that didn't work, so it's as bad as "netlink 1581 * sockets aren't available". Close the socket and 1582 * drive on. 1583 */ 1584 close(fd); 1585 } 1586 1587 /* 1588 * Now try an AF_UNIX socket. 1589 */ 1590 fd = socket(AF_UNIX, SOCK_RAW, 0); 1591 if (fd != -1) { 1592 /* 1593 * OK, we got it! 1594 */ 1595 return (fd); 1596 } 1597 1598 /* 1599 * Now try an AF_INET6 socket. 1600 */ 1601 fd = socket(AF_INET6, SOCK_DGRAM, 0); 1602 if (fd != -1) { 1603 return (fd); 1604 } 1605 1606 /* 1607 * Now try an AF_INET socket. 1608 * 1609 * XXX - if that fails, is there anything else we should try? 1610 * AF_CAN, for embedded systems in vehicles, in case they're 1611 * built without Internet protocol support? Any other socket 1612 * types popular in non-Internet embedded systems? 1613 */ 1614 return (socket(AF_INET, SOCK_DGRAM, 0)); 1615 } 1616 1617 /* 1618 * Get additional flags for a device, using SIOCGIFMEDIA. 1619 */ 1620 static int 1621 get_if_flags(const char *name, bpf_u_int32 *flags, char *errbuf) 1622 { 1623 int sock; 1624 FILE *fh; 1625 unsigned int arptype; 1626 struct ifreq ifr; 1627 struct ethtool_value info; 1628 1629 if (*flags & PCAP_IF_LOOPBACK) { 1630 /* 1631 * Loopback devices aren't wireless, and "connected"/ 1632 * "disconnected" doesn't apply to them. 1633 */ 1634 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1635 return 0; 1636 } 1637 1638 sock = get_if_ioctl_socket(); 1639 if (sock == -1) { 1640 pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, errno, 1641 "Can't create socket to get ethtool information for %s", 1642 name); 1643 return -1; 1644 } 1645 1646 /* 1647 * OK, what type of network is this? 1648 * In particular, is it wired or wireless? 1649 */ 1650 if (is_wifi(name)) { 1651 /* 1652 * Wi-Fi, hence wireless. 1653 */ 1654 *flags |= PCAP_IF_WIRELESS; 1655 } else { 1656 /* 1657 * OK, what does /sys/class/net/{if_name}/type contain? 1658 * (We don't use that for Wi-Fi, as it'll report 1659 * "Ethernet", i.e. ARPHRD_ETHER, for non-monitor- 1660 * mode devices.) 1661 */ 1662 char *pathstr; 1663 1664 if (asprintf(&pathstr, "/sys/class/net/%s/type", name) == -1) { 1665 snprintf(errbuf, PCAP_ERRBUF_SIZE, 1666 "%s: Can't generate path name string for /sys/class/net device", 1667 name); 1668 close(sock); 1669 return -1; 1670 } 1671 fh = fopen(pathstr, "r"); 1672 if (fh != NULL) { 1673 if (fscanf(fh, "%u", &arptype) == 1) { 1674 /* 1675 * OK, we got an ARPHRD_ type; what is it? 1676 */ 1677 switch (arptype) { 1678 1679 case ARPHRD_LOOPBACK: 1680 /* 1681 * These are types to which 1682 * "connected" and "disconnected" 1683 * don't apply, so don't bother 1684 * asking about it. 1685 * 1686 * XXX - add other types? 1687 */ 1688 close(sock); 1689 fclose(fh); 1690 free(pathstr); 1691 return 0; 1692 1693 case ARPHRD_IRDA: 1694 case ARPHRD_IEEE80211: 1695 case ARPHRD_IEEE80211_PRISM: 1696 case ARPHRD_IEEE80211_RADIOTAP: 1697 #ifdef ARPHRD_IEEE802154 1698 case ARPHRD_IEEE802154: 1699 #endif 1700 #ifdef ARPHRD_IEEE802154_MONITOR 1701 case ARPHRD_IEEE802154_MONITOR: 1702 #endif 1703 #ifdef ARPHRD_6LOWPAN 1704 case ARPHRD_6LOWPAN: 1705 #endif 1706 /* 1707 * Various wireless types. 1708 */ 1709 *flags |= PCAP_IF_WIRELESS; 1710 break; 1711 } 1712 } 1713 fclose(fh); 1714 } 1715 free(pathstr); 1716 } 1717 1718 #ifdef ETHTOOL_GLINK 1719 memset(&ifr, 0, sizeof(ifr)); 1720 pcapint_strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); 1721 info.cmd = ETHTOOL_GLINK; 1722 /* 1723 * XXX - while Valgrind handles SIOCETHTOOL and knows that 1724 * the ETHTOOL_GLINK command sets the .data member of the 1725 * structure, Memory Sanitizer doesn't yet do so: 1726 * 1727 * https://bugs.llvm.org/show_bug.cgi?id=45814 1728 * 1729 * For now, we zero it out to squelch warnings; if the bug 1730 * in question is fixed, we can remove this. 1731 */ 1732 info.data = 0; 1733 ifr.ifr_data = (caddr_t)&info; 1734 if (ioctl(sock, SIOCETHTOOL, &ifr) == -1) { 1735 int save_errno = errno; 1736 1737 switch (save_errno) { 1738 1739 case EOPNOTSUPP: 1740 case EINVAL: 1741 /* 1742 * OK, this OS version or driver doesn't support 1743 * asking for this information. 1744 * XXX - distinguish between "this doesn't 1745 * support ethtool at all because it's not 1746 * that type of device" vs. "this doesn't 1747 * support ethtool even though it's that 1748 * type of device", and return "unknown". 1749 */ 1750 *flags |= PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE; 1751 close(sock); 1752 return 0; 1753 1754 case ENODEV: 1755 /* 1756 * OK, no such device. 1757 * The user will find that out when they try to 1758 * activate the device; just say "OK" and 1759 * don't set anything. 1760 */ 1761 close(sock); 1762 return 0; 1763 1764 default: 1765 /* 1766 * Other error. 1767 */ 1768 pcapint_fmt_errmsg_for_errno(errbuf, PCAP_ERRBUF_SIZE, 1769 save_errno, 1770 "%s: SIOCETHTOOL(ETHTOOL_GLINK) ioctl failed", 1771 name); 1772 close(sock); 1773 return -1; 1774 } 1775 } 1776 1777 /* 1778 * Is it connected? 1779 */ 1780 if (info.data) { 1781 /* 1782 * It's connected. 1783 */ 1784 *flags |= PCAP_IF_CONNECTION_STATUS_CONNECTED; 1785 } else { 1786 /* 1787 * It's disconnected. 1788 */ 1789 *flags |= PCAP_IF_CONNECTION_STATUS_DISCONNECTED; 1790 } 1791 #endif 1792 1793 close(sock); 1794 return 0; 1795 } 1796 1797 int 1798 pcapint_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf) 1799 { 1800 /* 1801 * Get the list of regular interfaces first. 1802 */ 1803 if (pcapint_findalldevs_interfaces(devlistp, errbuf, can_be_bound, 1804 get_if_flags) == -1) 1805 return (-1); /* failure */ 1806 1807 /* 1808 * Add the "any" device. 1809 */ 1810 if (pcap_add_any_dev(devlistp, errbuf) == NULL) 1811 return (-1); 1812 1813 return (0); 1814 } 1815 1816 /* 1817 * Set direction flag: Which packets do we accept on a forwarding 1818 * single device? IN, OUT or both? 1819 */ 1820 static int 1821 pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d) 1822 { 1823 /* 1824 * It's guaranteed, at this point, that d is a valid 1825 * direction value. 1826 */ 1827 handle->direction = d; 1828 return 0; 1829 } 1830 1831 static int 1832 is_wifi(const char *device) 1833 { 1834 char *pathstr; 1835 struct stat statb; 1836 1837 /* 1838 * See if there's a sysfs wireless directory for it. 1839 * If so, it's a wireless interface. 1840 */ 1841 if (asprintf(&pathstr, "/sys/class/net/%s/wireless", device) == -1) { 1842 /* 1843 * Just give up here. 1844 */ 1845 return 0; 1846 } 1847 if (stat(pathstr, &statb) == 0) { 1848 free(pathstr); 1849 return 1; 1850 } 1851 free(pathstr); 1852 1853 return 0; 1854 } 1855 1856 /* 1857 * Linux uses the ARP hardware type to identify the type of an 1858 * interface. pcap uses the DLT_xxx constants for this. This 1859 * function takes a pointer to a "pcap_t", and an ARPHRD_xxx 1860 * constant, as arguments, and sets "handle->linktype" to the 1861 * appropriate DLT_XXX constant and sets "handle->offset" to 1862 * the appropriate value (to make "handle->offset" plus link-layer 1863 * header length be a multiple of 4, so that the link-layer payload 1864 * will be aligned on a 4-byte boundary when capturing packets). 1865 * (If the offset isn't set here, it'll be 0; add code as appropriate 1866 * for cases where it shouldn't be 0.) 1867 * 1868 * If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture 1869 * in cooked mode; otherwise, we can't use cooked mode, so we have 1870 * to pick some type that works in raw mode, or fail. 1871 * 1872 * Sets the link type to -1 if unable to map the type. 1873 * 1874 * Returns 0 on success or a PCAP_ERROR_ value on error. 1875 */ 1876 static int map_arphrd_to_dlt(pcap_t *handle, int arptype, 1877 const char *device, int cooked_ok) 1878 { 1879 static const char cdma_rmnet[] = "cdma_rmnet"; 1880 1881 switch (arptype) { 1882 1883 case ARPHRD_ETHER: 1884 /* 1885 * For various annoying reasons having to do with DHCP 1886 * software, some versions of Android give the mobile- 1887 * phone-network interface an ARPHRD_ value of 1888 * ARPHRD_ETHER, even though the packets supplied by 1889 * that interface have no link-layer header, and begin 1890 * with an IP header, so that the ARPHRD_ value should 1891 * be ARPHRD_NONE. 1892 * 1893 * Detect those devices by checking the device name, and 1894 * use DLT_RAW for them. 1895 */ 1896 if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) { 1897 handle->linktype = DLT_RAW; 1898 return 0; 1899 } 1900 1901 /* 1902 * Is this a real Ethernet device? If so, give it a 1903 * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so 1904 * that an application can let you choose it, in case you're 1905 * capturing DOCSIS traffic that a Cisco Cable Modem 1906 * Termination System is putting out onto an Ethernet (it 1907 * doesn't put an Ethernet header onto the wire, it puts raw 1908 * DOCSIS frames out on the wire inside the low-level 1909 * Ethernet framing). 1910 * 1911 * XXX - are there any other sorts of "fake Ethernet" that 1912 * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as 1913 * a Cisco CMTS won't put traffic onto it or get traffic 1914 * bridged onto it? ISDN is handled in "setup_socket()", 1915 * as we fall back on cooked mode there, and we use 1916 * is_wifi() to check for 802.11 devices; are there any 1917 * others? 1918 */ 1919 if (!is_wifi(device)) { 1920 int ret; 1921 1922 /* 1923 * This is not a Wi-Fi device but it could be 1924 * a DSA master/management network device. 1925 */ 1926 ret = iface_dsa_get_proto_info(device, handle); 1927 if (ret < 0) 1928 return ret; 1929 1930 if (ret == 1) { 1931 /* 1932 * This is a DSA master/management network 1933 * device linktype is already set by 1934 * iface_dsa_get_proto_info() set an 1935 * appropriate offset here. 1936 */ 1937 handle->offset = 2; 1938 break; 1939 } 1940 1941 /* 1942 * It's not a Wi-Fi device; offer DOCSIS. 1943 */ 1944 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 1945 if (handle->dlt_list == NULL) { 1946 pcapint_fmt_errmsg_for_errno(handle->errbuf, 1947 PCAP_ERRBUF_SIZE, errno, "malloc"); 1948 return (PCAP_ERROR); 1949 } 1950 handle->dlt_list[0] = DLT_EN10MB; 1951 handle->dlt_list[1] = DLT_DOCSIS; 1952 handle->dlt_count = 2; 1953 } 1954 /* FALLTHROUGH */ 1955 1956 case ARPHRD_METRICOM: 1957 case ARPHRD_LOOPBACK: 1958 handle->linktype = DLT_EN10MB; 1959 handle->offset = 2; 1960 break; 1961 1962 case ARPHRD_EETHER: 1963 handle->linktype = DLT_EN3MB; 1964 break; 1965 1966 case ARPHRD_AX25: 1967 handle->linktype = DLT_AX25_KISS; 1968 break; 1969 1970 case ARPHRD_PRONET: 1971 handle->linktype = DLT_PRONET; 1972 break; 1973 1974 case ARPHRD_CHAOS: 1975 handle->linktype = DLT_CHAOS; 1976 break; 1977 #ifndef ARPHRD_CAN 1978 #define ARPHRD_CAN 280 1979 #endif 1980 case ARPHRD_CAN: 1981 handle->linktype = DLT_CAN_SOCKETCAN; 1982 break; 1983 1984 #ifndef ARPHRD_IEEE802_TR 1985 #define ARPHRD_IEEE802_TR 800 /* From Linux 2.4 */ 1986 #endif 1987 case ARPHRD_IEEE802_TR: 1988 case ARPHRD_IEEE802: 1989 handle->linktype = DLT_IEEE802; 1990 handle->offset = 2; 1991 break; 1992 1993 case ARPHRD_ARCNET: 1994 handle->linktype = DLT_ARCNET_LINUX; 1995 break; 1996 1997 #ifndef ARPHRD_FDDI /* From Linux 2.2.13 */ 1998 #define ARPHRD_FDDI 774 1999 #endif 2000 case ARPHRD_FDDI: 2001 handle->linktype = DLT_FDDI; 2002 handle->offset = 3; 2003 break; 2004 2005 #ifndef ARPHRD_ATM /* FIXME: How to #include this? */ 2006 #define ARPHRD_ATM 19 2007 #endif 2008 case ARPHRD_ATM: 2009 /* 2010 * The Classical IP implementation in ATM for Linux 2011 * supports both what RFC 1483 calls "LLC Encapsulation", 2012 * in which each packet has an LLC header, possibly 2013 * with a SNAP header as well, prepended to it, and 2014 * what RFC 1483 calls "VC Based Multiplexing", in which 2015 * different virtual circuits carry different network 2016 * layer protocols, and no header is prepended to packets. 2017 * 2018 * They both have an ARPHRD_ type of ARPHRD_ATM, so 2019 * you can't use the ARPHRD_ type to find out whether 2020 * captured packets will have an LLC header, and, 2021 * while there's a socket ioctl to *set* the encapsulation 2022 * type, there's no ioctl to *get* the encapsulation type. 2023 * 2024 * This means that 2025 * 2026 * programs that dissect Linux Classical IP frames 2027 * would have to check for an LLC header and, 2028 * depending on whether they see one or not, dissect 2029 * the frame as LLC-encapsulated or as raw IP (I 2030 * don't know whether there's any traffic other than 2031 * IP that would show up on the socket, or whether 2032 * there's any support for IPv6 in the Linux 2033 * Classical IP code); 2034 * 2035 * filter expressions would have to compile into 2036 * code that checks for an LLC header and does 2037 * the right thing. 2038 * 2039 * Both of those are a nuisance - and, at least on systems 2040 * that support PF_PACKET sockets, we don't have to put 2041 * up with those nuisances; instead, we can just capture 2042 * in cooked mode. That's what we'll do, if we can. 2043 * Otherwise, we'll just fail. 2044 */ 2045 if (cooked_ok) 2046 handle->linktype = DLT_LINUX_SLL; 2047 else 2048 handle->linktype = -1; 2049 break; 2050 2051 #ifndef ARPHRD_IEEE80211 /* From Linux 2.4.6 */ 2052 #define ARPHRD_IEEE80211 801 2053 #endif 2054 case ARPHRD_IEEE80211: 2055 handle->linktype = DLT_IEEE802_11; 2056 break; 2057 2058 #ifndef ARPHRD_IEEE80211_PRISM /* From Linux 2.4.18 */ 2059 #define ARPHRD_IEEE80211_PRISM 802 2060 #endif 2061 case ARPHRD_IEEE80211_PRISM: 2062 handle->linktype = DLT_PRISM_HEADER; 2063 break; 2064 2065 #ifndef ARPHRD_IEEE80211_RADIOTAP /* new */ 2066 #define ARPHRD_IEEE80211_RADIOTAP 803 2067 #endif 2068 case ARPHRD_IEEE80211_RADIOTAP: 2069 handle->linktype = DLT_IEEE802_11_RADIO; 2070 break; 2071 2072 case ARPHRD_PPP: 2073 /* 2074 * Some PPP code in the kernel supplies no link-layer 2075 * header whatsoever to PF_PACKET sockets; other PPP 2076 * code supplies PPP link-layer headers ("syncppp.c"); 2077 * some PPP code might supply random link-layer 2078 * headers (PPP over ISDN - there's code in Ethereal, 2079 * for example, to cope with PPP-over-ISDN captures 2080 * with which the Ethereal developers have had to cope, 2081 * heuristically trying to determine which of the 2082 * oddball link-layer headers particular packets have). 2083 * 2084 * As such, we just punt, and run all PPP interfaces 2085 * in cooked mode, if we can; otherwise, we just treat 2086 * it as DLT_RAW, for now - if somebody needs to capture, 2087 * on a 2.0[.x] kernel, on PPP devices that supply a 2088 * link-layer header, they'll have to add code here to 2089 * map to the appropriate DLT_ type (possibly adding a 2090 * new DLT_ type, if necessary). 2091 */ 2092 if (cooked_ok) 2093 handle->linktype = DLT_LINUX_SLL; 2094 else { 2095 /* 2096 * XXX - handle ISDN types here? We can't fall 2097 * back on cooked sockets, so we'd have to 2098 * figure out from the device name what type of 2099 * link-layer encapsulation it's using, and map 2100 * that to an appropriate DLT_ value, meaning 2101 * we'd map "isdnN" devices to DLT_RAW (they 2102 * supply raw IP packets with no link-layer 2103 * header) and "isdY" devices to a new DLT_I4L_IP 2104 * type that has only an Ethernet packet type as 2105 * a link-layer header. 2106 * 2107 * But sometimes we seem to get random crap 2108 * in the link-layer header when capturing on 2109 * ISDN devices.... 2110 */ 2111 handle->linktype = DLT_RAW; 2112 } 2113 break; 2114 2115 #ifndef ARPHRD_CISCO 2116 #define ARPHRD_CISCO 513 /* previously ARPHRD_HDLC */ 2117 #endif 2118 case ARPHRD_CISCO: 2119 handle->linktype = DLT_C_HDLC; 2120 break; 2121 2122 /* Not sure if this is correct for all tunnels, but it 2123 * works for CIPE */ 2124 case ARPHRD_TUNNEL: 2125 #ifndef ARPHRD_SIT 2126 #define ARPHRD_SIT 776 /* From Linux 2.2.13 */ 2127 #endif 2128 case ARPHRD_SIT: 2129 case ARPHRD_CSLIP: 2130 case ARPHRD_SLIP6: 2131 case ARPHRD_CSLIP6: 2132 case ARPHRD_ADAPT: 2133 case ARPHRD_SLIP: 2134 #ifndef ARPHRD_RAWHDLC 2135 #define ARPHRD_RAWHDLC 518 2136 #endif 2137 case ARPHRD_RAWHDLC: 2138 #ifndef ARPHRD_DLCI 2139 #define ARPHRD_DLCI 15 2140 #endif 2141 case ARPHRD_DLCI: 2142 /* 2143 * XXX - should some of those be mapped to DLT_LINUX_SLL 2144 * instead? Should we just map all of them to DLT_LINUX_SLL? 2145 */ 2146 handle->linktype = DLT_RAW; 2147 break; 2148 2149 #ifndef ARPHRD_FRAD 2150 #define ARPHRD_FRAD 770 2151 #endif 2152 case ARPHRD_FRAD: 2153 handle->linktype = DLT_FRELAY; 2154 break; 2155 2156 case ARPHRD_LOCALTLK: 2157 handle->linktype = DLT_LTALK; 2158 break; 2159 2160 case 18: 2161 /* 2162 * RFC 4338 defines an encapsulation for IP and ARP 2163 * packets that's compatible with the RFC 2625 2164 * encapsulation, but that uses a different ARP 2165 * hardware type and hardware addresses. That 2166 * ARP hardware type is 18; Linux doesn't define 2167 * any ARPHRD_ value as 18, but if it ever officially 2168 * supports RFC 4338-style IP-over-FC, it should define 2169 * one. 2170 * 2171 * For now, we map it to DLT_IP_OVER_FC, in the hopes 2172 * that this will encourage its use in the future, 2173 * should Linux ever officially support RFC 4338-style 2174 * IP-over-FC. 2175 */ 2176 handle->linktype = DLT_IP_OVER_FC; 2177 break; 2178 2179 #ifndef ARPHRD_FCPP 2180 #define ARPHRD_FCPP 784 2181 #endif 2182 case ARPHRD_FCPP: 2183 #ifndef ARPHRD_FCAL 2184 #define ARPHRD_FCAL 785 2185 #endif 2186 case ARPHRD_FCAL: 2187 #ifndef ARPHRD_FCPL 2188 #define ARPHRD_FCPL 786 2189 #endif 2190 case ARPHRD_FCPL: 2191 #ifndef ARPHRD_FCFABRIC 2192 #define ARPHRD_FCFABRIC 787 2193 #endif 2194 case ARPHRD_FCFABRIC: 2195 /* 2196 * Back in 2002, Donald Lee at Cray wanted a DLT_ for 2197 * IP-over-FC: 2198 * 2199 * https://www.mail-archive.com/tcpdump-workers@sandelman.ottawa.on.ca/msg01043.html 2200 * 2201 * and one was assigned. 2202 * 2203 * In a later private discussion (spun off from a message 2204 * on the ethereal-users list) on how to get that DLT_ 2205 * value in libpcap on Linux, I ended up deciding that 2206 * the best thing to do would be to have him tweak the 2207 * driver to set the ARPHRD_ value to some ARPHRD_FCxx 2208 * type, and map all those types to DLT_IP_OVER_FC: 2209 * 2210 * I've checked into the libpcap and tcpdump CVS tree 2211 * support for DLT_IP_OVER_FC. In order to use that, 2212 * you'd have to modify your modified driver to return 2213 * one of the ARPHRD_FCxxx types, in "fcLINUXfcp.c" - 2214 * change it to set "dev->type" to ARPHRD_FCFABRIC, for 2215 * example (the exact value doesn't matter, it can be 2216 * any of ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, or 2217 * ARPHRD_FCFABRIC). 2218 * 2219 * 11 years later, Christian Svensson wanted to map 2220 * various ARPHRD_ values to DLT_FC_2 and 2221 * DLT_FC_2_WITH_FRAME_DELIMS for raw Fibre Channel 2222 * frames: 2223 * 2224 * https://github.com/mcr/libpcap/pull/29 2225 * 2226 * There doesn't seem to be any network drivers that uses 2227 * any of the ARPHRD_FC* values for IP-over-FC, and 2228 * it's not exactly clear what the "Dummy types for non 2229 * ARP hardware" are supposed to mean (link-layer 2230 * header type? Physical network type?), so it's 2231 * not exactly clear why the ARPHRD_FC* types exist 2232 * in the first place. 2233 * 2234 * For now, we map them to DLT_FC_2, and provide an 2235 * option of DLT_FC_2_WITH_FRAME_DELIMS, as well as 2236 * DLT_IP_OVER_FC just in case there's some old 2237 * driver out there that uses one of those types for 2238 * IP-over-FC on which somebody wants to capture 2239 * packets. 2240 */ 2241 handle->linktype = DLT_FC_2; 2242 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3); 2243 if (handle->dlt_list == NULL) { 2244 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2245 PCAP_ERRBUF_SIZE, errno, "malloc"); 2246 return (PCAP_ERROR); 2247 } 2248 handle->dlt_list[0] = DLT_FC_2; 2249 handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS; 2250 handle->dlt_list[2] = DLT_IP_OVER_FC; 2251 handle->dlt_count = 3; 2252 break; 2253 2254 #ifndef ARPHRD_IRDA 2255 #define ARPHRD_IRDA 783 2256 #endif 2257 case ARPHRD_IRDA: 2258 /* Don't expect IP packet out of this interfaces... */ 2259 handle->linktype = DLT_LINUX_IRDA; 2260 /* We need to save packet direction for IrDA decoding, 2261 * so let's use "Linux-cooked" mode. Jean II 2262 * 2263 * XXX - this is handled in setup_socket(). */ 2264 /* handlep->cooked = 1; */ 2265 break; 2266 2267 /* ARPHRD_LAPD is unofficial and randomly allocated, if reallocation 2268 * is needed, please report it to <daniele@orlandi.com> */ 2269 #ifndef ARPHRD_LAPD 2270 #define ARPHRD_LAPD 8445 2271 #endif 2272 case ARPHRD_LAPD: 2273 /* Don't expect IP packet out of this interfaces... */ 2274 handle->linktype = DLT_LINUX_LAPD; 2275 break; 2276 2277 #ifndef ARPHRD_NONE 2278 #define ARPHRD_NONE 0xFFFE 2279 #endif 2280 case ARPHRD_NONE: 2281 /* 2282 * No link-layer header; packets are just IP 2283 * packets, so use DLT_RAW. 2284 */ 2285 handle->linktype = DLT_RAW; 2286 break; 2287 2288 #ifndef ARPHRD_IEEE802154 2289 #define ARPHRD_IEEE802154 804 2290 #endif 2291 case ARPHRD_IEEE802154: 2292 handle->linktype = DLT_IEEE802_15_4_NOFCS; 2293 break; 2294 2295 #ifndef ARPHRD_NETLINK 2296 #define ARPHRD_NETLINK 824 2297 #endif 2298 case ARPHRD_NETLINK: 2299 handle->linktype = DLT_NETLINK; 2300 /* 2301 * We need to use cooked mode, so that in sll_protocol we 2302 * pick up the netlink protocol type such as NETLINK_ROUTE, 2303 * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc. 2304 * 2305 * XXX - this is handled in setup_socket(). 2306 */ 2307 /* handlep->cooked = 1; */ 2308 break; 2309 2310 #ifndef ARPHRD_VSOCKMON 2311 #define ARPHRD_VSOCKMON 826 2312 #endif 2313 case ARPHRD_VSOCKMON: 2314 handle->linktype = DLT_VSOCK; 2315 break; 2316 2317 default: 2318 handle->linktype = -1; 2319 break; 2320 } 2321 return (0); 2322 } 2323 2324 /* 2325 * Try to set up a PF_PACKET socket. 2326 * Returns 0 or a PCAP_WARNING_ value on success and a PCAP_ERROR_ value 2327 * on failure. 2328 */ 2329 static int 2330 setup_socket(pcap_t *handle, int is_any_device) 2331 { 2332 struct pcap_linux *handlep = handle->priv; 2333 const char *device = handle->opt.device; 2334 int status = 0; 2335 int sock_fd, arptype; 2336 int val; 2337 int err = 0; 2338 struct packet_mreq mr; 2339 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2340 int bpf_extensions; 2341 socklen_t len = sizeof(bpf_extensions); 2342 #endif 2343 2344 /* 2345 * Open a socket with protocol family packet. If cooked is true, 2346 * we open a SOCK_DGRAM socket for the cooked interface, otherwise 2347 * we open a SOCK_RAW socket for the raw interface. 2348 * 2349 * The protocol is set to 0. This means we will receive no 2350 * packets until we "bind" the socket with a non-zero 2351 * protocol. This allows us to setup the ring buffers without 2352 * dropping any packets. 2353 */ 2354 sock_fd = is_any_device ? 2355 socket(PF_PACKET, SOCK_DGRAM, 0) : 2356 socket(PF_PACKET, SOCK_RAW, 0); 2357 2358 if (sock_fd == -1) { 2359 if (errno == EPERM || errno == EACCES) { 2360 /* 2361 * You don't have permission to open the 2362 * socket. 2363 */ 2364 status = PCAP_ERROR_PERM_DENIED; 2365 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2366 "Attempt to create packet socket failed - CAP_NET_RAW may be required"); 2367 } else if (errno == EAFNOSUPPORT) { 2368 /* 2369 * PF_PACKET sockets not supported. 2370 * Perhaps we're running on the WSL1 module 2371 * in the Windows NT kernel rather than on 2372 * a real Linux kernel. 2373 */ 2374 status = PCAP_ERROR_CAPTURE_NOTSUP; 2375 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2376 "PF_PACKET sockets not supported - is this WSL1?"); 2377 } else { 2378 /* 2379 * Other error. 2380 */ 2381 status = PCAP_ERROR; 2382 } 2383 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2384 errno, "socket"); 2385 return status; 2386 } 2387 2388 /* 2389 * Get the interface index of the loopback device. 2390 * If the attempt fails, don't fail, just set the 2391 * "handlep->lo_ifindex" to -1. 2392 * 2393 * XXX - can there be more than one device that loops 2394 * packets back, i.e. devices other than "lo"? If so, 2395 * we'd need to find them all, and have an array of 2396 * indices for them, and check all of them in 2397 * "pcap_read_packet()". 2398 */ 2399 handlep->lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf); 2400 2401 /* 2402 * Default value for offset to align link-layer payload 2403 * on a 4-byte boundary. 2404 */ 2405 handle->offset = 0; 2406 2407 /* 2408 * What kind of frames do we have to deal with? Fall back 2409 * to cooked mode if we have an unknown interface type 2410 * or a type we know doesn't work well in raw mode. 2411 */ 2412 if (!is_any_device) { 2413 /* Assume for now we don't need cooked mode. */ 2414 handlep->cooked = 0; 2415 2416 if (handle->opt.rfmon) { 2417 /* 2418 * We were asked to turn on monitor mode. 2419 * Do so before we get the link-layer type, 2420 * because entering monitor mode could change 2421 * the link-layer type. 2422 */ 2423 err = enter_rfmon_mode(handle, sock_fd, device); 2424 if (err < 0) { 2425 /* Hard failure */ 2426 close(sock_fd); 2427 return err; 2428 } 2429 if (err == 0) { 2430 /* 2431 * Nothing worked for turning monitor mode 2432 * on. 2433 */ 2434 close(sock_fd); 2435 2436 return PCAP_ERROR_RFMON_NOTSUP; 2437 } 2438 2439 /* 2440 * Either monitor mode has been turned on for 2441 * the device, or we've been given a different 2442 * device to open for monitor mode. If we've 2443 * been given a different device, use it. 2444 */ 2445 if (handlep->mondevice != NULL) 2446 device = handlep->mondevice; 2447 } 2448 arptype = iface_get_arptype(sock_fd, device, handle->errbuf); 2449 if (arptype < 0) { 2450 close(sock_fd); 2451 return arptype; 2452 } 2453 status = map_arphrd_to_dlt(handle, arptype, device, 1); 2454 if (status < 0) { 2455 close(sock_fd); 2456 return status; 2457 } 2458 if (handle->linktype == -1 || 2459 handle->linktype == DLT_LINUX_SLL || 2460 handle->linktype == DLT_LINUX_IRDA || 2461 handle->linktype == DLT_LINUX_LAPD || 2462 handle->linktype == DLT_NETLINK || 2463 (handle->linktype == DLT_EN10MB && 2464 (strncmp("isdn", device, 4) == 0 || 2465 strncmp("isdY", device, 4) == 0))) { 2466 /* 2467 * Unknown interface type (-1), or a 2468 * device we explicitly chose to run 2469 * in cooked mode (e.g., PPP devices), 2470 * or an ISDN device (whose link-layer 2471 * type we can only determine by using 2472 * APIs that may be different on different 2473 * kernels) - reopen in cooked mode. 2474 * 2475 * If the type is unknown, return a warning; 2476 * map_arphrd_to_dlt() has already set the 2477 * warning message. 2478 */ 2479 if (close(sock_fd) == -1) { 2480 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2481 PCAP_ERRBUF_SIZE, errno, "close"); 2482 return PCAP_ERROR; 2483 } 2484 sock_fd = socket(PF_PACKET, SOCK_DGRAM, 0); 2485 if (sock_fd < 0) { 2486 /* 2487 * Fatal error. We treat this as 2488 * a generic error; we already know 2489 * that we were able to open a 2490 * PF_PACKET/SOCK_RAW socket, so 2491 * any failure is a "this shouldn't 2492 * happen" case. 2493 */ 2494 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2495 PCAP_ERRBUF_SIZE, errno, "socket"); 2496 return PCAP_ERROR; 2497 } 2498 handlep->cooked = 1; 2499 2500 /* 2501 * Get rid of any link-layer type list 2502 * we allocated - this only supports cooked 2503 * capture. 2504 */ 2505 if (handle->dlt_list != NULL) { 2506 free(handle->dlt_list); 2507 handle->dlt_list = NULL; 2508 handle->dlt_count = 0; 2509 } 2510 2511 if (handle->linktype == -1) { 2512 /* 2513 * Warn that we're falling back on 2514 * cooked mode; we may want to 2515 * update "map_arphrd_to_dlt()" 2516 * to handle the new type. 2517 */ 2518 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2519 "arptype %d not " 2520 "supported by libpcap - " 2521 "falling back to cooked " 2522 "socket", 2523 arptype); 2524 status = PCAP_WARNING; 2525 } 2526 2527 /* 2528 * IrDA capture is not a real "cooked" capture, 2529 * it's IrLAP frames, not IP packets. The 2530 * same applies to LAPD capture. 2531 */ 2532 if (handle->linktype != DLT_LINUX_IRDA && 2533 handle->linktype != DLT_LINUX_LAPD && 2534 handle->linktype != DLT_NETLINK) 2535 handle->linktype = DLT_LINUX_SLL; 2536 } 2537 2538 handlep->ifindex = iface_get_id(sock_fd, device, 2539 handle->errbuf); 2540 if (handlep->ifindex == -1) { 2541 close(sock_fd); 2542 return PCAP_ERROR; 2543 } 2544 2545 if ((err = iface_bind(sock_fd, handlep->ifindex, 2546 handle->errbuf, 0)) != 0) { 2547 close(sock_fd); 2548 return err; 2549 } 2550 } else { 2551 /* 2552 * The "any" device. 2553 */ 2554 if (handle->opt.rfmon) { 2555 /* 2556 * It doesn't support monitor mode. 2557 */ 2558 close(sock_fd); 2559 return PCAP_ERROR_RFMON_NOTSUP; 2560 } 2561 2562 /* 2563 * It uses cooked mode. 2564 * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2. 2565 */ 2566 handlep->cooked = 1; 2567 handle->linktype = DLT_LINUX_SLL; 2568 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2); 2569 if (handle->dlt_list == NULL) { 2570 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2571 PCAP_ERRBUF_SIZE, errno, "malloc"); 2572 return (PCAP_ERROR); 2573 } 2574 handle->dlt_list[0] = DLT_LINUX_SLL; 2575 handle->dlt_list[1] = DLT_LINUX_SLL2; 2576 handle->dlt_count = 2; 2577 2578 /* 2579 * We're not bound to a device. 2580 * For now, we're using this as an indication 2581 * that we can't transmit; stop doing that only 2582 * if we figure out how to transmit in cooked 2583 * mode. 2584 */ 2585 handlep->ifindex = -1; 2586 } 2587 2588 /* 2589 * Select promiscuous mode on if "promisc" is set. 2590 * 2591 * Do not turn allmulti mode on if we don't select 2592 * promiscuous mode - on some devices (e.g., Orinoco 2593 * wireless interfaces), allmulti mode isn't supported 2594 * and the driver implements it by turning promiscuous 2595 * mode on, and that screws up the operation of the 2596 * card as a normal networking interface, and on no 2597 * other platform I know of does starting a non- 2598 * promiscuous capture affect which multicast packets 2599 * are received by the interface. 2600 */ 2601 2602 /* 2603 * Hmm, how can we set promiscuous mode on all interfaces? 2604 * I am not sure if that is possible at all. For now, we 2605 * silently ignore attempts to turn promiscuous mode on 2606 * for the "any" device (so you don't have to explicitly 2607 * disable it in programs such as tcpdump). 2608 */ 2609 2610 if (!is_any_device && handle->opt.promisc) { 2611 memset(&mr, 0, sizeof(mr)); 2612 mr.mr_ifindex = handlep->ifindex; 2613 mr.mr_type = PACKET_MR_PROMISC; 2614 if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, 2615 &mr, sizeof(mr)) == -1) { 2616 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2617 PCAP_ERRBUF_SIZE, errno, "setsockopt (PACKET_ADD_MEMBERSHIP)"); 2618 close(sock_fd); 2619 return PCAP_ERROR; 2620 } 2621 } 2622 2623 /* 2624 * Enable auxiliary data and reserve room for reconstructing 2625 * VLAN headers. 2626 * 2627 * XXX - is enabling auxiliary data necessary, now that we 2628 * only support memory-mapped capture? The kernel's memory-mapped 2629 * capture code doesn't seem to check whether auxiliary data 2630 * is enabled, it seems to provide it whether it is or not. 2631 */ 2632 val = 1; 2633 if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val, 2634 sizeof(val)) == -1 && errno != ENOPROTOOPT) { 2635 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2636 errno, "setsockopt (PACKET_AUXDATA)"); 2637 close(sock_fd); 2638 return PCAP_ERROR; 2639 } 2640 handle->offset += VLAN_TAG_LEN; 2641 2642 /* 2643 * If we're in cooked mode, make the snapshot length 2644 * large enough to hold a "cooked mode" header plus 2645 * 1 byte of packet data (so we don't pass a byte 2646 * count of 0 to "recvfrom()"). 2647 * XXX - we don't know whether this will be DLT_LINUX_SLL 2648 * or DLT_LINUX_SLL2, so make sure it's big enough for 2649 * a DLT_LINUX_SLL2 "cooked mode" header; a snapshot length 2650 * that small is silly anyway. 2651 */ 2652 if (handlep->cooked) { 2653 if (handle->snapshot < SLL2_HDR_LEN + 1) 2654 handle->snapshot = SLL2_HDR_LEN + 1; 2655 } 2656 handle->bufsize = handle->snapshot; 2657 2658 /* 2659 * Set the offset at which to insert VLAN tags. 2660 */ 2661 set_vlan_offset(handle); 2662 2663 if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) { 2664 int nsec_tstamps = 1; 2665 2666 if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPNS, &nsec_tstamps, sizeof(nsec_tstamps)) < 0) { 2667 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "setsockopt: unable to set SO_TIMESTAMPNS"); 2668 close(sock_fd); 2669 return PCAP_ERROR; 2670 } 2671 } 2672 2673 /* 2674 * We've succeeded. Save the socket FD in the pcap structure. 2675 */ 2676 handle->fd = sock_fd; 2677 2678 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) 2679 /* 2680 * Can we generate special code for VLAN checks? 2681 * (XXX - what if we need the special code but it's not supported 2682 * by the OS? Is that possible?) 2683 */ 2684 if (getsockopt(sock_fd, SOL_SOCKET, SO_BPF_EXTENSIONS, 2685 &bpf_extensions, &len) == 0) { 2686 if (bpf_extensions >= SKF_AD_VLAN_TAG_PRESENT) { 2687 /* 2688 * Yes, we can. Request that we do so. 2689 */ 2690 handle->bpf_codegen_flags |= BPF_SPECIAL_VLAN_HANDLING; 2691 } 2692 } 2693 #endif /* defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT) */ 2694 2695 return status; 2696 } 2697 2698 /* 2699 * Attempt to setup memory-mapped access. 2700 * 2701 * On success, returns 0 if there are no warnings or a PCAP_WARNING_ code 2702 * if there is a warning. 2703 * 2704 * On error, returns the appropriate error code; if that is PCAP_ERROR, 2705 * sets handle->errbuf to the appropriate message. 2706 */ 2707 static int 2708 setup_mmapped(pcap_t *handle) 2709 { 2710 struct pcap_linux *handlep = handle->priv; 2711 int status; 2712 2713 /* 2714 * Attempt to allocate a buffer to hold the contents of one 2715 * packet, for use by the oneshot callback. 2716 */ 2717 handlep->oneshot_buffer = malloc(handle->snapshot); 2718 if (handlep->oneshot_buffer == NULL) { 2719 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2720 errno, "can't allocate oneshot buffer"); 2721 return PCAP_ERROR; 2722 } 2723 2724 if (handle->opt.buffer_size == 0) { 2725 /* by default request 2M for the ring buffer */ 2726 handle->opt.buffer_size = 2*1024*1024; 2727 } 2728 status = prepare_tpacket_socket(handle); 2729 if (status == -1) { 2730 free(handlep->oneshot_buffer); 2731 handlep->oneshot_buffer = NULL; 2732 return PCAP_ERROR; 2733 } 2734 status = create_ring(handle); 2735 if (status < 0) { 2736 /* 2737 * Error attempting to enable memory-mapped capture; 2738 * fail. The return value is the status to return. 2739 */ 2740 free(handlep->oneshot_buffer); 2741 handlep->oneshot_buffer = NULL; 2742 return status; 2743 } 2744 2745 /* 2746 * Success. status has been set either to 0 if there are no 2747 * warnings or to a PCAP_WARNING_ value if there is a warning. 2748 * 2749 * handle->offset is used to get the current position into the rx ring. 2750 * handle->cc is used to store the ring size. 2751 */ 2752 2753 /* 2754 * Set the timeout to use in poll() before returning. 2755 */ 2756 set_poll_timeout(handlep); 2757 2758 return status; 2759 } 2760 2761 /* 2762 * Attempt to set the socket to the specified version of the memory-mapped 2763 * header. 2764 * 2765 * Return 0 if we succeed; return 1 if we fail because that version isn't 2766 * supported; return -1 on any other error, and set handle->errbuf. 2767 */ 2768 static int 2769 init_tpacket(pcap_t *handle, int version, const char *version_str) 2770 { 2771 struct pcap_linux *handlep = handle->priv; 2772 int val = version; 2773 socklen_t len = sizeof(val); 2774 2775 /* 2776 * Probe whether kernel supports the specified TPACKET version; 2777 * this also gets the length of the header for that version. 2778 * 2779 * This socket option was introduced in 2.6.27, which was 2780 * also the first release with TPACKET_V2 support. 2781 */ 2782 if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) { 2783 if (errno == EINVAL) { 2784 /* 2785 * EINVAL means this specific version of TPACKET 2786 * is not supported. Tell the caller they can try 2787 * with a different one; if they've run out of 2788 * others to try, let them set the error message 2789 * appropriately. 2790 */ 2791 return 1; 2792 } 2793 2794 /* 2795 * All other errors are fatal. 2796 */ 2797 if (errno == ENOPROTOOPT) { 2798 /* 2799 * PACKET_HDRLEN isn't supported, which means 2800 * that memory-mapped capture isn't supported. 2801 * Indicate that in the message. 2802 */ 2803 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2804 "Kernel doesn't support memory-mapped capture; a 2.6.27 or later 2.x kernel is required, with CONFIG_PACKET_MMAP specified for 2.x kernels"); 2805 } else { 2806 /* 2807 * Some unexpected error. 2808 */ 2809 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2810 errno, "can't get %s header len on packet socket", 2811 version_str); 2812 } 2813 return -1; 2814 } 2815 handlep->tp_hdrlen = val; 2816 2817 val = version; 2818 if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val, 2819 sizeof(val)) < 0) { 2820 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 2821 errno, "can't activate %s on packet socket", version_str); 2822 return -1; 2823 } 2824 handlep->tp_version = version; 2825 2826 return 0; 2827 } 2828 2829 /* 2830 * Attempt to set the socket to version 3 of the memory-mapped header and, 2831 * if that fails because version 3 isn't supported, attempt to fall 2832 * back to version 2. If version 2 isn't supported, just fail. 2833 * 2834 * Return 0 if we succeed and -1 on any other error, and set handle->errbuf. 2835 */ 2836 static int 2837 prepare_tpacket_socket(pcap_t *handle) 2838 { 2839 int ret; 2840 2841 #ifdef HAVE_TPACKET3 2842 /* 2843 * Try setting the version to TPACKET_V3. 2844 * 2845 * The only mode in which buffering is done on PF_PACKET 2846 * sockets, so that packets might not be delivered 2847 * immediately, is TPACKET_V3 mode. 2848 * 2849 * The buffering cannot be disabled in that mode, so 2850 * if the user has requested immediate mode, we don't 2851 * use TPACKET_V3. 2852 */ 2853 if (!handle->opt.immediate) { 2854 ret = init_tpacket(handle, TPACKET_V3, "TPACKET_V3"); 2855 if (ret == 0) { 2856 /* 2857 * Success. 2858 */ 2859 return 0; 2860 } 2861 if (ret == -1) { 2862 /* 2863 * We failed for some reason other than "the 2864 * kernel doesn't support TPACKET_V3". 2865 */ 2866 return -1; 2867 } 2868 2869 /* 2870 * This means it returned 1, which means "the kernel 2871 * doesn't support TPACKET_V3"; try TPACKET_V2. 2872 */ 2873 } 2874 #endif /* HAVE_TPACKET3 */ 2875 2876 /* 2877 * Try setting the version to TPACKET_V2. 2878 */ 2879 ret = init_tpacket(handle, TPACKET_V2, "TPACKET_V2"); 2880 if (ret == 0) { 2881 /* 2882 * Success. 2883 */ 2884 return 0; 2885 } 2886 2887 if (ret == 1) { 2888 /* 2889 * OK, the kernel supports memory-mapped capture, but 2890 * not TPACKET_V2. Set the error message appropriately. 2891 */ 2892 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 2893 "Kernel doesn't support TPACKET_V2; a 2.6.27 or later kernel is required"); 2894 } 2895 2896 /* 2897 * We failed. 2898 */ 2899 return -1; 2900 } 2901 2902 #define MAX(a,b) ((a)>(b)?(a):(b)) 2903 2904 /* 2905 * Attempt to set up memory-mapped access. 2906 * 2907 * On success, returns 0 if there are no warnings or to a PCAP_WARNING_ code 2908 * if there is a warning. 2909 * 2910 * On error, returns the appropriate error code; if that is PCAP_ERROR, 2911 * sets handle->errbuf to the appropriate message. 2912 */ 2913 static int 2914 create_ring(pcap_t *handle) 2915 { 2916 struct pcap_linux *handlep = handle->priv; 2917 unsigned i, j, frames_per_block; 2918 #ifdef HAVE_TPACKET3 2919 /* 2920 * For sockets using TPACKET_V2, the extra stuff at the end of a 2921 * struct tpacket_req3 will be ignored, so this is OK even for 2922 * those sockets. 2923 */ 2924 struct tpacket_req3 req; 2925 #else 2926 struct tpacket_req req; 2927 #endif 2928 socklen_t len; 2929 unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff; 2930 unsigned int frame_size; 2931 int status; 2932 2933 /* 2934 * Start out assuming no warnings. 2935 */ 2936 status = 0; 2937 2938 /* 2939 * Reserve space for VLAN tag reconstruction. 2940 */ 2941 tp_reserve = VLAN_TAG_LEN; 2942 2943 /* 2944 * If we're capturing in cooked mode, reserve space for 2945 * a DLT_LINUX_SLL2 header; we don't know yet whether 2946 * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as 2947 * that can be changed on an open device, so we reserve 2948 * space for the larger of the two. 2949 * 2950 * XXX - we assume that the kernel is still adding 2951 * 16 bytes of extra space, so we subtract 16 from 2952 * SLL2_HDR_LEN to get the additional space needed. 2953 * (Are they doing that for DLT_LINUX_SLL, the link- 2954 * layer header for which is 16 bytes?) 2955 * 2956 * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)? 2957 */ 2958 if (handlep->cooked) 2959 tp_reserve += SLL2_HDR_LEN - 16; 2960 2961 /* 2962 * Try to request that amount of reserve space. 2963 * This must be done before creating the ring buffer. 2964 */ 2965 len = sizeof(tp_reserve); 2966 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, 2967 &tp_reserve, len) < 0) { 2968 pcapint_fmt_errmsg_for_errno(handle->errbuf, 2969 PCAP_ERRBUF_SIZE, errno, 2970 "setsockopt (PACKET_RESERVE)"); 2971 return PCAP_ERROR; 2972 } 2973 2974 switch (handlep->tp_version) { 2975 2976 case TPACKET_V2: 2977 /* Note that with large snapshot length (say 256K, which is 2978 * the default for recent versions of tcpdump, Wireshark, 2979 * TShark, dumpcap or 64K, the value that "-s 0" has given for 2980 * a long time with tcpdump), if we use the snapshot 2981 * length to calculate the frame length, only a few frames 2982 * will be available in the ring even with pretty 2983 * large ring size (and a lot of memory will be unused). 2984 * 2985 * Ideally, we should choose a frame length based on the 2986 * minimum of the specified snapshot length and the maximum 2987 * packet size. That's not as easy as it sounds; consider, 2988 * for example, an 802.11 interface in monitor mode, where 2989 * the frame would include a radiotap header, where the 2990 * maximum radiotap header length is device-dependent. 2991 * 2992 * So, for now, we just do this for Ethernet devices, where 2993 * there's no metadata header, and the link-layer header is 2994 * fixed length. We can get the maximum packet size by 2995 * adding 18, the Ethernet header length plus the CRC length 2996 * (just in case we happen to get the CRC in the packet), to 2997 * the MTU of the interface; we fetch the MTU in the hopes 2998 * that it reflects support for jumbo frames. (Even if the 2999 * interface is just being used for passive snooping, the 3000 * driver might set the size of buffers in the receive ring 3001 * based on the MTU, so that the MTU limits the maximum size 3002 * of packets that we can receive.) 3003 * 3004 * If segmentation/fragmentation or receive offload are 3005 * enabled, we can get reassembled/aggregated packets larger 3006 * than MTU, but bounded to 65535 plus the Ethernet overhead, 3007 * due to kernel and protocol constraints */ 3008 frame_size = handle->snapshot; 3009 if (handle->linktype == DLT_EN10MB) { 3010 unsigned int max_frame_len; 3011 int mtu; 3012 int offload; 3013 3014 mtu = iface_get_mtu(handle->fd, handle->opt.device, 3015 handle->errbuf); 3016 if (mtu == -1) 3017 return PCAP_ERROR; 3018 offload = iface_get_offload(handle); 3019 if (offload == -1) 3020 return PCAP_ERROR; 3021 if (offload) 3022 max_frame_len = MAX(mtu, 65535); 3023 else 3024 max_frame_len = mtu; 3025 max_frame_len += 18; 3026 3027 if (frame_size > max_frame_len) 3028 frame_size = max_frame_len; 3029 } 3030 3031 /* NOTE: calculus matching those in tpacket_rcv() 3032 * in linux-2.6/net/packet/af_packet.c 3033 */ 3034 len = sizeof(sk_type); 3035 if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type, 3036 &len) < 0) { 3037 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3038 PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)"); 3039 return PCAP_ERROR; 3040 } 3041 maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE; 3042 /* XXX: in the kernel maclen is calculated from 3043 * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len 3044 * in: packet_snd() in linux-2.6/net/packet/af_packet.c 3045 * then packet_alloc_skb() in linux-2.6/net/packet/af_packet.c 3046 * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c 3047 * but I see no way to get those sizes in userspace, 3048 * like for instance with an ifreq ioctl(); 3049 * the best thing I've found so far is MAX_HEADER in 3050 * the kernel part of linux-2.6/include/linux/netdevice.h 3051 * which goes up to 128+48=176; since pcap-linux.c 3052 * defines a MAX_LINKHEADER_SIZE of 256 which is 3053 * greater than that, let's use it.. maybe is it even 3054 * large enough to directly replace macoff.. 3055 */ 3056 tp_hdrlen = TPACKET_ALIGN(handlep->tp_hdrlen) + sizeof(struct sockaddr_ll) ; 3057 netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve; 3058 /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN 3059 * of netoff, which contradicts 3060 * linux-2.6/Documentation/networking/packet_mmap.txt 3061 * documenting that: 3062 * "- Gap, chosen so that packet data (Start+tp_net) 3063 * aligns to TPACKET_ALIGNMENT=16" 3064 */ 3065 /* NOTE: in linux-2.6/include/linux/skbuff.h: 3066 * "CPUs often take a performance hit 3067 * when accessing unaligned memory locations" 3068 */ 3069 macoff = netoff - maclen; 3070 req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size); 3071 /* 3072 * Round the buffer size up to a multiple of the 3073 * frame size (rather than rounding down, which 3074 * would give a buffer smaller than our caller asked 3075 * for, and possibly give zero frames if the requested 3076 * buffer size is too small for one frame). 3077 */ 3078 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3079 break; 3080 3081 #ifdef HAVE_TPACKET3 3082 case TPACKET_V3: 3083 /* The "frames" for this are actually buffers that 3084 * contain multiple variable-sized frames. 3085 * 3086 * We pick a "frame" size of MAXIMUM_SNAPLEN to leave 3087 * enough room for at least one reasonably-sized packet 3088 * in the "frame". */ 3089 req.tp_frame_size = MAXIMUM_SNAPLEN; 3090 /* 3091 * Round the buffer size up to a multiple of the 3092 * "frame" size (rather than rounding down, which 3093 * would give a buffer smaller than our caller asked 3094 * for, and possibly give zero "frames" if the requested 3095 * buffer size is too small for one "frame"). 3096 */ 3097 req.tp_frame_nr = (handle->opt.buffer_size + req.tp_frame_size - 1)/req.tp_frame_size; 3098 break; 3099 #endif 3100 default: 3101 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3102 "Internal error: unknown TPACKET_ value %u", 3103 handlep->tp_version); 3104 return PCAP_ERROR; 3105 } 3106 3107 /* compute the minimum block size that will handle this frame. 3108 * The block has to be page size aligned. 3109 * The max block size allowed by the kernel is arch-dependent and 3110 * it's not explicitly checked here. */ 3111 req.tp_block_size = getpagesize(); 3112 while (req.tp_block_size < req.tp_frame_size) 3113 req.tp_block_size <<= 1; 3114 3115 frames_per_block = req.tp_block_size/req.tp_frame_size; 3116 3117 /* 3118 * PACKET_TIMESTAMP was added after linux/net_tstamp.h was, 3119 * so we check for PACKET_TIMESTAMP. We check for 3120 * linux/net_tstamp.h just in case a system somehow has 3121 * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might 3122 * be unnecessary. 3123 * 3124 * SIOCSHWTSTAMP was introduced in the patch that introduced 3125 * linux/net_tstamp.h, so we don't bother checking whether 3126 * SIOCSHWTSTAMP is defined (if your Linux system has 3127 * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your 3128 * Linux system is badly broken). 3129 */ 3130 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 3131 /* 3132 * If we were told to do so, ask the kernel and the driver 3133 * to use hardware timestamps. 3134 * 3135 * Hardware timestamps are only supported with mmapped 3136 * captures. 3137 */ 3138 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER || 3139 handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) { 3140 struct hwtstamp_config hwconfig; 3141 struct ifreq ifr; 3142 int timesource; 3143 3144 /* 3145 * Ask for hardware time stamps on all packets, 3146 * including transmitted packets. 3147 */ 3148 memset(&hwconfig, 0, sizeof(hwconfig)); 3149 hwconfig.tx_type = HWTSTAMP_TX_ON; 3150 hwconfig.rx_filter = HWTSTAMP_FILTER_ALL; 3151 3152 memset(&ifr, 0, sizeof(ifr)); 3153 pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 3154 ifr.ifr_data = (void *)&hwconfig; 3155 3156 /* 3157 * This may require CAP_NET_ADMIN. 3158 */ 3159 if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) { 3160 switch (errno) { 3161 3162 case EPERM: 3163 /* 3164 * Treat this as an error, as the 3165 * user should try to run this 3166 * with the appropriate privileges - 3167 * and, if they can't, shouldn't 3168 * try requesting hardware time stamps. 3169 */ 3170 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3171 "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required"); 3172 return PCAP_ERROR_PERM_DENIED; 3173 3174 case EOPNOTSUPP: 3175 case ERANGE: 3176 /* 3177 * Treat this as a warning, as the 3178 * only way to fix the warning is to 3179 * get an adapter that supports hardware 3180 * time stamps for *all* packets. 3181 * (ERANGE means "we support hardware 3182 * time stamps, but for packets matching 3183 * that particular filter", so it means 3184 * "we don't support hardware time stamps 3185 * for all incoming packets" here.) 3186 * 3187 * We'll just fall back on the standard 3188 * host time stamps. 3189 */ 3190 status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP; 3191 break; 3192 3193 default: 3194 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3195 PCAP_ERRBUF_SIZE, errno, 3196 "SIOCSHWTSTAMP failed"); 3197 return PCAP_ERROR; 3198 } 3199 } else { 3200 /* 3201 * Well, that worked. Now specify the type of 3202 * hardware time stamp we want for this 3203 * socket. 3204 */ 3205 if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) { 3206 /* 3207 * Hardware timestamp, synchronized 3208 * with the system clock. 3209 */ 3210 timesource = SOF_TIMESTAMPING_SYS_HARDWARE; 3211 } else { 3212 /* 3213 * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware 3214 * timestamp, not synchronized with the 3215 * system clock. 3216 */ 3217 timesource = SOF_TIMESTAMPING_RAW_HARDWARE; 3218 } 3219 if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP, 3220 (void *)×ource, sizeof(timesource))) { 3221 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3222 PCAP_ERRBUF_SIZE, errno, 3223 "can't set PACKET_TIMESTAMP"); 3224 return PCAP_ERROR; 3225 } 3226 } 3227 } 3228 #endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */ 3229 3230 /* ask the kernel to create the ring */ 3231 retry: 3232 req.tp_block_nr = req.tp_frame_nr / frames_per_block; 3233 3234 /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */ 3235 req.tp_frame_nr = req.tp_block_nr * frames_per_block; 3236 3237 #ifdef HAVE_TPACKET3 3238 /* timeout value to retire block - use the configured buffering timeout, or default if <0. */ 3239 if (handlep->timeout > 0) { 3240 /* Use the user specified timeout as the block timeout */ 3241 req.tp_retire_blk_tov = handlep->timeout; 3242 } else if (handlep->timeout == 0) { 3243 /* 3244 * In pcap, this means "infinite timeout"; TPACKET_V3 3245 * doesn't support that, so just set it to UINT_MAX 3246 * milliseconds. In the TPACKET_V3 loop, if the 3247 * timeout is 0, and we haven't yet seen any packets, 3248 * and we block and still don't have any packets, we 3249 * keep blocking until we do. 3250 */ 3251 req.tp_retire_blk_tov = UINT_MAX; 3252 } else { 3253 /* 3254 * XXX - this is not valid; use 0, meaning "have the 3255 * kernel pick a default", for now. 3256 */ 3257 req.tp_retire_blk_tov = 0; 3258 } 3259 /* private data not used */ 3260 req.tp_sizeof_priv = 0; 3261 /* Rx ring - feature request bits - none (rxhash will not be filled) */ 3262 req.tp_feature_req_word = 0; 3263 #endif 3264 3265 if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3266 (void *) &req, sizeof(req))) { 3267 if ((errno == ENOMEM) && (req.tp_block_nr > 1)) { 3268 /* 3269 * Memory failure; try to reduce the requested ring 3270 * size. 3271 * 3272 * We used to reduce this by half -- do 5% instead. 3273 * That may result in more iterations and a longer 3274 * startup, but the user will be much happier with 3275 * the resulting buffer size. 3276 */ 3277 if (req.tp_frame_nr < 20) 3278 req.tp_frame_nr -= 1; 3279 else 3280 req.tp_frame_nr -= req.tp_frame_nr/20; 3281 goto retry; 3282 } 3283 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3284 errno, "can't create rx ring on packet socket"); 3285 return PCAP_ERROR; 3286 } 3287 3288 /* memory map the rx ring */ 3289 handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size; 3290 handlep->mmapbuf = mmap(0, handlep->mmapbuflen, 3291 PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0); 3292 if (handlep->mmapbuf == MAP_FAILED) { 3293 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3294 errno, "can't mmap rx ring"); 3295 3296 /* clear the allocated ring on error*/ 3297 destroy_ring(handle); 3298 return PCAP_ERROR; 3299 } 3300 3301 /* allocate a ring for each frame header pointer*/ 3302 handle->cc = req.tp_frame_nr; 3303 handle->buffer = malloc(handle->cc * sizeof(union thdr *)); 3304 if (!handle->buffer) { 3305 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 3306 errno, "can't allocate ring of frame headers"); 3307 3308 destroy_ring(handle); 3309 return PCAP_ERROR; 3310 } 3311 3312 /* fill the header ring with proper frame ptr*/ 3313 handle->offset = 0; 3314 for (i=0; i<req.tp_block_nr; ++i) { 3315 u_char *base = &handlep->mmapbuf[i*req.tp_block_size]; 3316 for (j=0; j<frames_per_block; ++j, ++handle->offset) { 3317 RING_GET_CURRENT_FRAME(handle) = base; 3318 base += req.tp_frame_size; 3319 } 3320 } 3321 3322 handle->bufsize = req.tp_frame_size; 3323 handle->offset = 0; 3324 return status; 3325 } 3326 3327 /* free all ring related resources*/ 3328 static void 3329 destroy_ring(pcap_t *handle) 3330 { 3331 struct pcap_linux *handlep = handle->priv; 3332 3333 /* 3334 * Tell the kernel to destroy the ring. 3335 * We don't check for setsockopt failure, as 1) we can't recover 3336 * from an error and 2) we might not yet have set it up in the 3337 * first place. 3338 */ 3339 struct tpacket_req req; 3340 memset(&req, 0, sizeof(req)); 3341 (void)setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, 3342 (void *) &req, sizeof(req)); 3343 3344 /* if ring is mapped, unmap it*/ 3345 if (handlep->mmapbuf) { 3346 /* do not test for mmap failure, as we can't recover from any error */ 3347 (void)munmap(handlep->mmapbuf, handlep->mmapbuflen); 3348 handlep->mmapbuf = NULL; 3349 } 3350 } 3351 3352 /* 3353 * Special one-shot callback, used for pcap_next() and pcap_next_ex(), 3354 * for Linux mmapped capture. 3355 * 3356 * The problem is that pcap_next() and pcap_next_ex() expect the packet 3357 * data handed to the callback to be valid after the callback returns, 3358 * but pcap_read_linux_mmap() has to release that packet as soon as 3359 * the callback returns (otherwise, the kernel thinks there's still 3360 * at least one unprocessed packet available in the ring, so a select() 3361 * will immediately return indicating that there's data to process), so, 3362 * in the callback, we have to make a copy of the packet. 3363 * 3364 * Yes, this means that, if the capture is using the ring buffer, using 3365 * pcap_next() or pcap_next_ex() requires more copies than using 3366 * pcap_loop() or pcap_dispatch(). If that bothers you, don't use 3367 * pcap_next() or pcap_next_ex(). 3368 */ 3369 static void 3370 pcapint_oneshot_linux(u_char *user, const struct pcap_pkthdr *h, 3371 const u_char *bytes) 3372 { 3373 struct oneshot_userdata *sp = (struct oneshot_userdata *)user; 3374 pcap_t *handle = sp->pd; 3375 struct pcap_linux *handlep = handle->priv; 3376 3377 *sp->hdr = *h; 3378 memcpy(handlep->oneshot_buffer, bytes, h->caplen); 3379 *sp->pkt = handlep->oneshot_buffer; 3380 } 3381 3382 static int 3383 pcap_getnonblock_linux(pcap_t *handle) 3384 { 3385 struct pcap_linux *handlep = handle->priv; 3386 3387 /* use negative value of timeout to indicate non blocking ops */ 3388 return (handlep->timeout<0); 3389 } 3390 3391 static int 3392 pcap_setnonblock_linux(pcap_t *handle, int nonblock) 3393 { 3394 struct pcap_linux *handlep = handle->priv; 3395 3396 /* 3397 * Set the file descriptor to the requested mode, as we use 3398 * it for sending packets. 3399 */ 3400 if (pcapint_setnonblock_fd(handle, nonblock) == -1) 3401 return -1; 3402 3403 /* 3404 * Map each value to their corresponding negation to 3405 * preserve the timeout value provided with pcap_set_timeout. 3406 */ 3407 if (nonblock) { 3408 /* 3409 * We're setting the mode to non-blocking mode. 3410 */ 3411 if (handlep->timeout >= 0) { 3412 /* 3413 * Indicate that we're switching to 3414 * non-blocking mode. 3415 */ 3416 handlep->timeout = ~handlep->timeout; 3417 } 3418 if (handlep->poll_breakloop_fd != -1) { 3419 /* Close the eventfd; we do not need it in nonblock mode. */ 3420 close(handlep->poll_breakloop_fd); 3421 handlep->poll_breakloop_fd = -1; 3422 } 3423 } else { 3424 /* 3425 * We're setting the mode to blocking mode. 3426 */ 3427 if (handlep->poll_breakloop_fd == -1) { 3428 /* If we did not have an eventfd, open one now that we are blocking. */ 3429 if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) { 3430 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3431 PCAP_ERRBUF_SIZE, errno, 3432 "could not open eventfd"); 3433 return -1; 3434 } 3435 } 3436 if (handlep->timeout < 0) { 3437 handlep->timeout = ~handlep->timeout; 3438 } 3439 } 3440 /* Update the timeout to use in poll(). */ 3441 set_poll_timeout(handlep); 3442 return 0; 3443 } 3444 3445 /* 3446 * Get the status field of the ring buffer frame at a specified offset. 3447 */ 3448 static inline u_int 3449 pcap_get_ring_frame_status(pcap_t *handle, int offset) 3450 { 3451 struct pcap_linux *handlep = handle->priv; 3452 union thdr h; 3453 3454 h.raw = RING_GET_FRAME_AT(handle, offset); 3455 switch (handlep->tp_version) { 3456 case TPACKET_V2: 3457 return __atomic_load_n(&h.h2->tp_status, __ATOMIC_ACQUIRE); 3458 break; 3459 #ifdef HAVE_TPACKET3 3460 case TPACKET_V3: 3461 return __atomic_load_n(&h.h3->hdr.bh1.block_status, __ATOMIC_ACQUIRE); 3462 break; 3463 #endif 3464 } 3465 /* This should not happen. */ 3466 return 0; 3467 } 3468 3469 /* 3470 * Block waiting for frames to be available. 3471 */ 3472 static int pcap_wait_for_frames_mmap(pcap_t *handle) 3473 { 3474 struct pcap_linux *handlep = handle->priv; 3475 int timeout; 3476 struct ifreq ifr; 3477 int ret; 3478 struct pollfd pollinfo[2]; 3479 int numpollinfo; 3480 pollinfo[0].fd = handle->fd; 3481 pollinfo[0].events = POLLIN; 3482 if ( handlep->poll_breakloop_fd == -1 ) { 3483 numpollinfo = 1; 3484 pollinfo[1].revents = 0; 3485 /* 3486 * We set pollinfo[1].revents to zero, even though 3487 * numpollinfo = 1 meaning that poll() doesn't see 3488 * pollinfo[1], so that we do not have to add a 3489 * conditional of numpollinfo > 1 below when we 3490 * test pollinfo[1].revents. 3491 */ 3492 } else { 3493 pollinfo[1].fd = handlep->poll_breakloop_fd; 3494 pollinfo[1].events = POLLIN; 3495 numpollinfo = 2; 3496 } 3497 3498 /* 3499 * Keep polling until we either get some packets to read, see 3500 * that we got told to break out of the loop, get a fatal error, 3501 * or discover that the device went away. 3502 * 3503 * In non-blocking mode, we must still do one poll() to catch 3504 * any pending error indications, but the poll() has a timeout 3505 * of 0, so that it doesn't block, and we quit after that one 3506 * poll(). 3507 * 3508 * If we've seen an ENETDOWN, it might be the first indication 3509 * that the device went away, or it might just be that it was 3510 * configured down. Unfortunately, there's no guarantee that 3511 * the device has actually been removed as an interface, because: 3512 * 3513 * 1) if, as appears to be the case at least some of the time, 3514 * the PF_PACKET socket code first gets a NETDEV_DOWN indication 3515 * for the device and then gets a NETDEV_UNREGISTER indication 3516 * for it, the first indication will cause a wakeup with ENETDOWN 3517 * but won't set the packet socket's field for the interface index 3518 * to -1, and the second indication won't cause a wakeup (because 3519 * the first indication also caused the protocol hook to be 3520 * unregistered) but will set the packet socket's field for the 3521 * interface index to -1; 3522 * 3523 * 2) even if just a NETDEV_UNREGISTER indication is registered, 3524 * the packet socket's field for the interface index only gets 3525 * set to -1 after the wakeup, so there's a small but non-zero 3526 * risk that a thread blocked waiting for the wakeup will get 3527 * to the "fetch the socket name" code before the interface index 3528 * gets set to -1, so it'll get the old interface index. 3529 * 3530 * Therefore, if we got an ENETDOWN and haven't seen a packet 3531 * since then, we assume that we might be waiting for the interface 3532 * to disappear, and poll with a timeout to try again in a short 3533 * period of time. If we *do* see a packet, the interface has 3534 * come back up again, and is *definitely* still there, so we 3535 * don't need to poll. 3536 */ 3537 for (;;) { 3538 /* 3539 * Yes, we do this even in non-blocking mode, as it's 3540 * the only way to get error indications from a 3541 * tpacket socket. 3542 * 3543 * The timeout is 0 in non-blocking mode, so poll() 3544 * returns immediately. 3545 */ 3546 timeout = handlep->poll_timeout; 3547 3548 /* 3549 * If we got an ENETDOWN and haven't gotten an indication 3550 * that the device has gone away or that the device is up, 3551 * we don't yet know for certain whether the device has 3552 * gone away or not, do a poll() with a 1-millisecond timeout, 3553 * as we have to poll indefinitely for "device went away" 3554 * indications until we either get one or see that the 3555 * device is up. 3556 */ 3557 if (handlep->netdown) { 3558 if (timeout != 0) 3559 timeout = 1; 3560 } 3561 ret = poll(pollinfo, numpollinfo, timeout); 3562 if (ret < 0) { 3563 /* 3564 * Error. If it's not EINTR, report it. 3565 */ 3566 if (errno != EINTR) { 3567 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3568 PCAP_ERRBUF_SIZE, errno, 3569 "can't poll on packet socket"); 3570 return PCAP_ERROR; 3571 } 3572 3573 /* 3574 * It's EINTR; if we were told to break out of 3575 * the loop, do so. 3576 */ 3577 if (handle->break_loop) { 3578 handle->break_loop = 0; 3579 return PCAP_ERROR_BREAK; 3580 } 3581 } else if (ret > 0) { 3582 /* 3583 * OK, some descriptor is ready. 3584 * Check the socket descriptor first. 3585 * 3586 * As I read the Linux man page, pollinfo[0].revents 3587 * will either be POLLIN, POLLERR, POLLHUP, or POLLNVAL. 3588 */ 3589 if (pollinfo[0].revents == POLLIN) { 3590 /* 3591 * OK, we may have packets to 3592 * read. 3593 */ 3594 break; 3595 } 3596 if (pollinfo[0].revents != 0) { 3597 /* 3598 * There's some indication other than 3599 * "you can read on this descriptor" on 3600 * the descriptor. 3601 */ 3602 if (pollinfo[0].revents & POLLNVAL) { 3603 snprintf(handle->errbuf, 3604 PCAP_ERRBUF_SIZE, 3605 "Invalid polling request on packet socket"); 3606 return PCAP_ERROR; 3607 } 3608 if (pollinfo[0].revents & (POLLHUP | POLLRDHUP)) { 3609 snprintf(handle->errbuf, 3610 PCAP_ERRBUF_SIZE, 3611 "Hangup on packet socket"); 3612 return PCAP_ERROR; 3613 } 3614 if (pollinfo[0].revents & POLLERR) { 3615 /* 3616 * Get the error. 3617 */ 3618 int err; 3619 socklen_t errlen; 3620 3621 errlen = sizeof(err); 3622 if (getsockopt(handle->fd, SOL_SOCKET, 3623 SO_ERROR, &err, &errlen) == -1) { 3624 /* 3625 * The call *itself* returned 3626 * an error; make *that* 3627 * the error. 3628 */ 3629 err = errno; 3630 } 3631 3632 /* 3633 * OK, we have the error. 3634 */ 3635 if (err == ENETDOWN) { 3636 /* 3637 * The device on which we're 3638 * capturing went away or the 3639 * interface was taken down. 3640 * 3641 * We don't know for certain 3642 * which happened, and the 3643 * next poll() may indicate 3644 * that there are packets 3645 * to be read, so just set 3646 * a flag to get us to do 3647 * checks later, and set 3648 * the required select 3649 * timeout to 1 millisecond 3650 * so that event loops that 3651 * check our socket descriptor 3652 * also time out so that 3653 * they can call us and we 3654 * can do the checks. 3655 */ 3656 handlep->netdown = 1; 3657 handle->required_select_timeout = &netdown_timeout; 3658 } else if (err == 0) { 3659 /* 3660 * This shouldn't happen, so 3661 * report a special indication 3662 * that it did. 3663 */ 3664 snprintf(handle->errbuf, 3665 PCAP_ERRBUF_SIZE, 3666 "Error condition on packet socket: Reported error was 0"); 3667 return PCAP_ERROR; 3668 } else { 3669 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3670 PCAP_ERRBUF_SIZE, 3671 err, 3672 "Error condition on packet socket"); 3673 return PCAP_ERROR; 3674 } 3675 } 3676 } 3677 /* 3678 * Now check the event device. 3679 */ 3680 if (pollinfo[1].revents & POLLIN) { 3681 ssize_t nread; 3682 uint64_t value; 3683 3684 /* 3685 * This should never fail, but, just 3686 * in case.... 3687 */ 3688 nread = read(handlep->poll_breakloop_fd, &value, 3689 sizeof(value)); 3690 if (nread == -1) { 3691 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3692 PCAP_ERRBUF_SIZE, 3693 errno, 3694 "Error reading from event FD"); 3695 return PCAP_ERROR; 3696 } 3697 3698 /* 3699 * According to the Linux read(2) man 3700 * page, read() will transfer at most 3701 * 2^31-1 bytes, so the return value is 3702 * either -1 or a value between 0 3703 * and 2^31-1, so it's non-negative. 3704 * 3705 * Cast it to size_t to squelch 3706 * warnings from the compiler; add this 3707 * comment to squelch warnings from 3708 * humans reading the code. :-) 3709 * 3710 * Don't treat an EOF as an error, but 3711 * *do* treat a short read as an error; 3712 * that "shouldn't happen", but.... 3713 */ 3714 if (nread != 0 && 3715 (size_t)nread < sizeof(value)) { 3716 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3717 "Short read from event FD: expected %zu, got %zd", 3718 sizeof(value), nread); 3719 return PCAP_ERROR; 3720 } 3721 3722 /* 3723 * This event gets signaled by a 3724 * pcap_breakloop() call; if we were told 3725 * to break out of the loop, do so. 3726 */ 3727 if (handle->break_loop) { 3728 handle->break_loop = 0; 3729 return PCAP_ERROR_BREAK; 3730 } 3731 } 3732 } 3733 3734 /* 3735 * Either: 3736 * 3737 * 1) we got neither an error from poll() nor any 3738 * readable descriptors, in which case there 3739 * are no packets waiting to read 3740 * 3741 * or 3742 * 3743 * 2) We got readable descriptors but the PF_PACKET 3744 * socket wasn't one of them, in which case there 3745 * are no packets waiting to read 3746 * 3747 * so, if we got an ENETDOWN, we've drained whatever 3748 * packets were available to read at the point of the 3749 * ENETDOWN. 3750 * 3751 * So, if we got an ENETDOWN and haven't gotten an indication 3752 * that the device has gone away or that the device is up, 3753 * we don't yet know for certain whether the device has 3754 * gone away or not, check whether the device exists and is 3755 * up. 3756 */ 3757 if (handlep->netdown) { 3758 if (!device_still_exists(handle)) { 3759 /* 3760 * The device doesn't exist any more; 3761 * report that. 3762 * 3763 * XXX - we should really return an 3764 * appropriate error for that, but 3765 * pcap_dispatch() etc. aren't documented 3766 * as having error returns other than 3767 * PCAP_ERROR or PCAP_ERROR_BREAK. 3768 */ 3769 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3770 "The interface disappeared"); 3771 return PCAP_ERROR; 3772 } 3773 3774 /* 3775 * The device still exists; try to see if it's up. 3776 */ 3777 memset(&ifr, 0, sizeof(ifr)); 3778 pcapint_strlcpy(ifr.ifr_name, handlep->device, 3779 sizeof(ifr.ifr_name)); 3780 if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) { 3781 if (errno == ENXIO || errno == ENODEV) { 3782 /* 3783 * OK, *now* it's gone. 3784 * 3785 * XXX - see above comment. 3786 */ 3787 snprintf(handle->errbuf, 3788 PCAP_ERRBUF_SIZE, 3789 "The interface disappeared"); 3790 return PCAP_ERROR; 3791 } else { 3792 pcapint_fmt_errmsg_for_errno(handle->errbuf, 3793 PCAP_ERRBUF_SIZE, errno, 3794 "%s: Can't get flags", 3795 handlep->device); 3796 return PCAP_ERROR; 3797 } 3798 } 3799 if (ifr.ifr_flags & IFF_UP) { 3800 /* 3801 * It's up, so it definitely still exists. 3802 * Cancel the ENETDOWN indication - we 3803 * presumably got it due to the interface 3804 * going down rather than the device going 3805 * away - and revert to "no required select 3806 * timeout. 3807 */ 3808 handlep->netdown = 0; 3809 handle->required_select_timeout = NULL; 3810 } 3811 } 3812 3813 /* 3814 * If we're in non-blocking mode, just quit now, rather 3815 * than spinning in a loop doing poll()s that immediately 3816 * time out if there's no indication on any descriptor. 3817 */ 3818 if (handlep->poll_timeout == 0) 3819 break; 3820 } 3821 return 0; 3822 } 3823 3824 /* handle a single memory mapped packet */ 3825 static int pcap_handle_packet_mmap( 3826 pcap_t *handle, 3827 pcap_handler callback, 3828 u_char *user, 3829 unsigned char *frame, 3830 unsigned int tp_len, 3831 unsigned int tp_mac, 3832 unsigned int tp_snaplen, 3833 unsigned int tp_sec, 3834 unsigned int tp_usec, 3835 int tp_vlan_tci_valid, 3836 __u16 tp_vlan_tci, 3837 __u16 tp_vlan_tpid) 3838 { 3839 struct pcap_linux *handlep = handle->priv; 3840 unsigned char *bp; 3841 struct sockaddr_ll *sll; 3842 struct pcap_pkthdr pcaphdr; 3843 unsigned int snaplen = tp_snaplen; 3844 struct utsname utsname; 3845 3846 /* perform sanity check on internal offset. */ 3847 if (tp_mac + tp_snaplen > handle->bufsize) { 3848 /* 3849 * Report some system information as a debugging aid. 3850 */ 3851 if (uname(&utsname) != -1) { 3852 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3853 "corrupted frame on kernel ring mac " 3854 "offset %u + caplen %u > frame len %d " 3855 "(kernel %.32s version %s, machine %.16s)", 3856 tp_mac, tp_snaplen, handle->bufsize, 3857 utsname.release, utsname.version, 3858 utsname.machine); 3859 } else { 3860 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3861 "corrupted frame on kernel ring mac " 3862 "offset %u + caplen %u > frame len %d", 3863 tp_mac, tp_snaplen, handle->bufsize); 3864 } 3865 return -1; 3866 } 3867 3868 /* run filter on received packet 3869 * If the kernel filtering is enabled we need to run the 3870 * filter until all the frames present into the ring 3871 * at filter creation time are processed. 3872 * In this case, blocks_to_filter_in_userland is used 3873 * as a counter for the packet we need to filter. 3874 * Note: alternatively it could be possible to stop applying 3875 * the filter when the ring became empty, but it can possibly 3876 * happen a lot later... */ 3877 bp = frame + tp_mac; 3878 3879 /* if required build in place the sll header*/ 3880 sll = (void *)(frame + TPACKET_ALIGN(handlep->tp_hdrlen)); 3881 if (handlep->cooked) { 3882 if (handle->linktype == DLT_LINUX_SLL2) { 3883 struct sll2_header *hdrp; 3884 3885 /* 3886 * The kernel should have left us with enough 3887 * space for an sll header; back up the packet 3888 * data pointer into that space, as that'll be 3889 * the beginning of the packet we pass to the 3890 * callback. 3891 */ 3892 bp -= SLL2_HDR_LEN; 3893 3894 /* 3895 * Let's make sure that's past the end of 3896 * the tpacket header, i.e. >= 3897 * ((u_char *)thdr + TPACKET_HDRLEN), so we 3898 * don't step on the header when we construct 3899 * the sll header. 3900 */ 3901 if (bp < (u_char *)frame + 3902 TPACKET_ALIGN(handlep->tp_hdrlen) + 3903 sizeof(struct sockaddr_ll)) { 3904 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3905 "cooked-mode frame doesn't have room for sll header"); 3906 return -1; 3907 } 3908 3909 /* 3910 * OK, that worked; construct the sll header. 3911 */ 3912 hdrp = (struct sll2_header *)bp; 3913 hdrp->sll2_protocol = sll->sll_protocol; 3914 hdrp->sll2_reserved_mbz = 0; 3915 hdrp->sll2_if_index = htonl(sll->sll_ifindex); 3916 hdrp->sll2_hatype = htons(sll->sll_hatype); 3917 hdrp->sll2_pkttype = sll->sll_pkttype; 3918 hdrp->sll2_halen = sll->sll_halen; 3919 memcpy(hdrp->sll2_addr, sll->sll_addr, SLL_ADDRLEN); 3920 3921 snaplen += sizeof(struct sll2_header); 3922 } else { 3923 struct sll_header *hdrp; 3924 3925 /* 3926 * The kernel should have left us with enough 3927 * space for an sll header; back up the packet 3928 * data pointer into that space, as that'll be 3929 * the beginning of the packet we pass to the 3930 * callback. 3931 */ 3932 bp -= SLL_HDR_LEN; 3933 3934 /* 3935 * Let's make sure that's past the end of 3936 * the tpacket header, i.e. >= 3937 * ((u_char *)thdr + TPACKET_HDRLEN), so we 3938 * don't step on the header when we construct 3939 * the sll header. 3940 */ 3941 if (bp < (u_char *)frame + 3942 TPACKET_ALIGN(handlep->tp_hdrlen) + 3943 sizeof(struct sockaddr_ll)) { 3944 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 3945 "cooked-mode frame doesn't have room for sll header"); 3946 return -1; 3947 } 3948 3949 /* 3950 * OK, that worked; construct the sll header. 3951 */ 3952 hdrp = (struct sll_header *)bp; 3953 hdrp->sll_pkttype = htons(sll->sll_pkttype); 3954 hdrp->sll_hatype = htons(sll->sll_hatype); 3955 hdrp->sll_halen = htons(sll->sll_halen); 3956 memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN); 3957 hdrp->sll_protocol = sll->sll_protocol; 3958 3959 snaplen += sizeof(struct sll_header); 3960 } 3961 } else { 3962 /* 3963 * If this is a packet from a CAN device, so that 3964 * sll->sll_hatype is ARPHRD_CAN, then, as we're 3965 * not capturing in cooked mode, its link-layer 3966 * type is DLT_CAN_SOCKETCAN. Fix up the header 3967 * provided by the code below us to match what 3968 * DLT_CAN_SOCKETCAN is expected to provide. 3969 */ 3970 if (sll->sll_hatype == ARPHRD_CAN) { 3971 pcap_can_socketcan_hdr *canhdr = (pcap_can_socketcan_hdr *)bp; 3972 uint16_t protocol = ntohs(sll->sll_protocol); 3973 3974 /* 3975 * Check the protocol field from the sll header. 3976 * If it's one of the known CAN protocol types, 3977 * make sure the appropriate flags are set, so 3978 * that a program can tell what type of frame 3979 * it is. 3980 * 3981 * The two flags are: 3982 * 3983 * CANFD_FDF, which is in the fd_flags field 3984 * of the CAN classic/CAN FD header; 3985 * 3986 * CANXL_XLF, which is in the flags field 3987 * of the CAN XL header, which overlaps 3988 * the payload_length field of the CAN 3989 * classic/CAN FD header. 3990 */ 3991 switch (protocol) { 3992 3993 case LINUX_SLL_P_CAN: 3994 /* 3995 * CAN classic. 3996 * 3997 * Zero out the fd_flags and reserved 3998 * fields, in case they're uninitialized 3999 * crap, and clear the CANXL_XLF bit in 4000 * the payload_length field. 4001 * 4002 * This means that the CANFD_FDF flag isn't 4003 * set in the fd_flags field, and that 4004 * the CANXL_XLF bit isn't set in the 4005 * payload_length field, so this frame 4006 * will appear to be a CAN classic frame. 4007 */ 4008 canhdr->payload_length &= ~CANXL_XLF; 4009 canhdr->fd_flags = 0; 4010 canhdr->reserved1 = 0; 4011 canhdr->reserved2 = 0; 4012 break; 4013 4014 case LINUX_SLL_P_CANFD: 4015 /* 4016 * Set CANFD_FDF in the fd_flags field, 4017 * and clear the CANXL_XLF bit in the 4018 * payload_length field, so this frame 4019 * will appear to be a CAN FD frame. 4020 */ 4021 canhdr->payload_length &= ~CANXL_XLF; 4022 canhdr->fd_flags |= CANFD_FDF; 4023 4024 /* 4025 * Zero out all the unknown bits in fd_flags 4026 * and clear the reserved fields, so that 4027 * a program reading this can assume that 4028 * CANFD_FDF is set because we set it, not 4029 * because some uninitialized crap was 4030 * provided in the fd_flags field. 4031 * 4032 * (At least some LINKTYPE_CAN_SOCKETCAN 4033 * files attached to Wireshark bugs had 4034 * uninitialized junk there, so it does 4035 * happen.) 4036 * 4037 * Update this if Linux adds more flag bits 4038 * to the fd_flags field or uses either of 4039 * the reserved fields for FD frames. 4040 */ 4041 canhdr->fd_flags &= (CANFD_FDF|CANFD_ESI|CANFD_BRS); 4042 canhdr->reserved1 = 0; 4043 canhdr->reserved2 = 0; 4044 break; 4045 4046 case LINUX_SLL_P_CANXL: 4047 /* 4048 * CAN XL frame. 4049 * 4050 * Make sure the CANXL_XLF bit is set in 4051 * the payload_length field, so that 4052 * this frame will appear to be a 4053 * CAN XL frame. 4054 */ 4055 canhdr->payload_length |= CANXL_XLF; 4056 break; 4057 } 4058 4059 /* 4060 * Put multi-byte header fields in a byte-order 4061 *-independent format. 4062 */ 4063 if (canhdr->payload_length & CANXL_XLF) { 4064 /* 4065 * This is a CAN XL frame. 4066 * 4067 * DLT_CAN_SOCKETCAN is specified as having 4068 * the Priority ID/VCID field in big-- 4069 * endian byte order, and the payload length 4070 * and Acceptance Field in little-endian byte 4071 * order. but capturing on a CAN device 4072 * provides them in host byte order. 4073 * Convert them to the appropriate byte 4074 * orders. 4075 * 4076 * The reason we put the first field 4077 * into big-endian byte order is that 4078 * older libpcap code, ignorant of 4079 * CAN XL, treated it as the CAN ID 4080 * field and put it into big-endian 4081 * byte order, and we don't want to 4082 * break code that understands CAN XL 4083 * headers, and treats that field as 4084 * being big-endian. 4085 * 4086 * The other fields are put in little- 4087 * endian byte order is that older 4088 * libpcap code, ignorant of CAN XL, 4089 * left those fields alone, and the 4090 * processors on which the CAN XL 4091 * frames were captured are likely 4092 * to be little-endian processors. 4093 */ 4094 pcap_can_socketcan_xl_hdr *canxl_hdr = (pcap_can_socketcan_xl_hdr *)bp; 4095 4096 #if __BYTE_ORDER == __LITTLE_ENDIAN 4097 /* 4098 * We're capturing on a little-endian 4099 * machine, so we put the priority/VCID 4100 * field into big-endian byte order, and 4101 * leave the payload length and acceptance 4102 * field in little-endian byte order. 4103 */ 4104 /* Byte-swap priority/VCID. */ 4105 canxl_hdr->priority_vcid = SWAPLONG(canxl_hdr->priority_vcid); 4106 #elif __BYTE_ORDER == __BIG_ENDIAN 4107 /* 4108 * We're capturing on a big-endian 4109 * machine, so we want to leave the 4110 * priority/VCID field alone, and byte-swap 4111 * the payload length and acceptance 4112 * fields to little-endian. 4113 */ 4114 /* Byte-swap the payload length */ 4115 canxl_hdr->payload_length = SWAPSHORT(canxl_hdr->payload_length); 4116 4117 /* 4118 * Byte-swap the acceptance field. 4119 * 4120 * XXX - is it just a 4-octet string, 4121 * not in any byte order? 4122 */ 4123 canxl_hdr->acceptance_field = SWAPLONG(canxl_hdr->acceptance_field); 4124 #else 4125 #error "Unknown byte order" 4126 #endif 4127 } else { 4128 /* 4129 * CAN or CAN FD frame. 4130 * 4131 * DLT_CAN_SOCKETCAN is specified as having 4132 * the CAN ID and flags in network byte 4133 * order, but capturing on a CAN device 4134 * provides it in host byte order. Convert 4135 * it to network byte order. 4136 */ 4137 canhdr->can_id = htonl(canhdr->can_id); 4138 } 4139 } 4140 } 4141 4142 if (handlep->filter_in_userland && handle->fcode.bf_insns) { 4143 struct pcap_bpf_aux_data aux_data; 4144 4145 aux_data.vlan_tag_present = tp_vlan_tci_valid; 4146 aux_data.vlan_tag = tp_vlan_tci & 0x0fff; 4147 4148 if (pcapint_filter_with_aux_data(handle->fcode.bf_insns, 4149 bp, 4150 tp_len, 4151 snaplen, 4152 &aux_data) == 0) 4153 return 0; 4154 } 4155 4156 if (!linux_check_direction(handle, sll)) 4157 return 0; 4158 4159 /* get required packet info from ring header */ 4160 pcaphdr.ts.tv_sec = tp_sec; 4161 pcaphdr.ts.tv_usec = tp_usec; 4162 pcaphdr.caplen = tp_snaplen; 4163 pcaphdr.len = tp_len; 4164 4165 /* if required build in place the sll header*/ 4166 if (handlep->cooked) { 4167 /* update packet len */ 4168 if (handle->linktype == DLT_LINUX_SLL2) { 4169 pcaphdr.caplen += SLL2_HDR_LEN; 4170 pcaphdr.len += SLL2_HDR_LEN; 4171 } else { 4172 pcaphdr.caplen += SLL_HDR_LEN; 4173 pcaphdr.len += SLL_HDR_LEN; 4174 } 4175 } 4176 4177 if (tp_vlan_tci_valid && 4178 handlep->vlan_offset != -1 && 4179 tp_snaplen >= (unsigned int) handlep->vlan_offset) 4180 { 4181 struct vlan_tag *tag; 4182 4183 /* 4184 * Move everything in the header, except the type field, 4185 * down VLAN_TAG_LEN bytes, to allow us to insert the 4186 * VLAN tag between that stuff and the type field. 4187 */ 4188 bp -= VLAN_TAG_LEN; 4189 memmove(bp, bp + VLAN_TAG_LEN, handlep->vlan_offset); 4190 4191 /* 4192 * Now insert the tag. 4193 */ 4194 tag = (struct vlan_tag *)(bp + handlep->vlan_offset); 4195 tag->vlan_tpid = htons(tp_vlan_tpid); 4196 tag->vlan_tci = htons(tp_vlan_tci); 4197 4198 /* 4199 * Add the tag to the packet lengths. 4200 */ 4201 pcaphdr.caplen += VLAN_TAG_LEN; 4202 pcaphdr.len += VLAN_TAG_LEN; 4203 } 4204 4205 /* 4206 * The only way to tell the kernel to cut off the 4207 * packet at a snapshot length is with a filter program; 4208 * if there's no filter program, the kernel won't cut 4209 * the packet off. 4210 * 4211 * Trim the snapshot length to be no longer than the 4212 * specified snapshot length. 4213 * 4214 * XXX - an alternative is to put a filter, consisting 4215 * of a "ret <snaplen>" instruction, on the socket 4216 * in the activate routine, so that the truncation is 4217 * done in the kernel even if nobody specified a filter; 4218 * that means that less buffer space is consumed in 4219 * the memory-mapped buffer. 4220 */ 4221 if (pcaphdr.caplen > (bpf_u_int32)handle->snapshot) 4222 pcaphdr.caplen = handle->snapshot; 4223 4224 /* pass the packet to the user */ 4225 callback(user, &pcaphdr, bp); 4226 4227 return 1; 4228 } 4229 4230 static int 4231 pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback, 4232 u_char *user) 4233 { 4234 struct pcap_linux *handlep = handle->priv; 4235 union thdr h; 4236 int pkts = 0; 4237 int ret; 4238 4239 /* wait for frames availability.*/ 4240 h.raw = RING_GET_CURRENT_FRAME(handle); 4241 if (!packet_mmap_acquire(h.h2)) { 4242 /* 4243 * The current frame is owned by the kernel; wait for 4244 * a frame to be handed to us. 4245 */ 4246 ret = pcap_wait_for_frames_mmap(handle); 4247 if (ret) { 4248 return ret; 4249 } 4250 } 4251 4252 /* 4253 * This can conceivably process more than INT_MAX packets, 4254 * which would overflow the packet count, causing it either 4255 * to look like a negative number, and thus cause us to 4256 * return a value that looks like an error, or overflow 4257 * back into positive territory, and thus cause us to 4258 * return a too-low count. 4259 * 4260 * Therefore, if the packet count is unlimited, we clip 4261 * it at INT_MAX; this routine is not expected to 4262 * process packets indefinitely, so that's not an issue. 4263 */ 4264 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4265 max_packets = INT_MAX; 4266 4267 while (pkts < max_packets) { 4268 /* 4269 * Get the current ring buffer frame, and break if 4270 * it's still owned by the kernel. 4271 */ 4272 h.raw = RING_GET_CURRENT_FRAME(handle); 4273 if (!packet_mmap_acquire(h.h2)) 4274 break; 4275 4276 ret = pcap_handle_packet_mmap( 4277 handle, 4278 callback, 4279 user, 4280 h.raw, 4281 h.h2->tp_len, 4282 h.h2->tp_mac, 4283 h.h2->tp_snaplen, 4284 h.h2->tp_sec, 4285 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? h.h2->tp_nsec : h.h2->tp_nsec / 1000, 4286 VLAN_VALID(h.h2, h.h2), 4287 h.h2->tp_vlan_tci, 4288 VLAN_TPID(h.h2, h.h2)); 4289 if (ret == 1) { 4290 pkts++; 4291 } else if (ret < 0) { 4292 return ret; 4293 } 4294 4295 /* 4296 * Hand this block back to the kernel, and, if we're 4297 * counting blocks that need to be filtered in userland 4298 * after having been filtered by the kernel, count 4299 * the one we've just processed. 4300 */ 4301 packet_mmap_release(h.h2); 4302 if (handlep->blocks_to_filter_in_userland > 0) { 4303 handlep->blocks_to_filter_in_userland--; 4304 if (handlep->blocks_to_filter_in_userland == 0) { 4305 /* 4306 * No more blocks need to be filtered 4307 * in userland. 4308 */ 4309 handlep->filter_in_userland = 0; 4310 } 4311 } 4312 4313 /* next block */ 4314 if (++handle->offset >= handle->cc) 4315 handle->offset = 0; 4316 4317 /* check for break loop condition*/ 4318 if (handle->break_loop) { 4319 handle->break_loop = 0; 4320 return PCAP_ERROR_BREAK; 4321 } 4322 } 4323 return pkts; 4324 } 4325 4326 #ifdef HAVE_TPACKET3 4327 static int 4328 pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback, 4329 u_char *user) 4330 { 4331 struct pcap_linux *handlep = handle->priv; 4332 union thdr h; 4333 int pkts = 0; 4334 int ret; 4335 4336 again: 4337 if (handlep->current_packet == NULL) { 4338 /* wait for frames availability.*/ 4339 h.raw = RING_GET_CURRENT_FRAME(handle); 4340 if (!packet_mmap_v3_acquire(h.h3)) { 4341 /* 4342 * The current frame is owned by the kernel; wait 4343 * for a frame to be handed to us. 4344 */ 4345 ret = pcap_wait_for_frames_mmap(handle); 4346 if (ret) { 4347 return ret; 4348 } 4349 } 4350 } 4351 h.raw = RING_GET_CURRENT_FRAME(handle); 4352 if (!packet_mmap_v3_acquire(h.h3)) { 4353 if (pkts == 0 && handlep->timeout == 0) { 4354 /* Block until we see a packet. */ 4355 goto again; 4356 } 4357 return pkts; 4358 } 4359 4360 /* 4361 * This can conceivably process more than INT_MAX packets, 4362 * which would overflow the packet count, causing it either 4363 * to look like a negative number, and thus cause us to 4364 * return a value that looks like an error, or overflow 4365 * back into positive territory, and thus cause us to 4366 * return a too-low count. 4367 * 4368 * Therefore, if the packet count is unlimited, we clip 4369 * it at INT_MAX; this routine is not expected to 4370 * process packets indefinitely, so that's not an issue. 4371 */ 4372 if (PACKET_COUNT_IS_UNLIMITED(max_packets)) 4373 max_packets = INT_MAX; 4374 4375 while (pkts < max_packets) { 4376 int packets_to_read; 4377 4378 if (handlep->current_packet == NULL) { 4379 h.raw = RING_GET_CURRENT_FRAME(handle); 4380 if (!packet_mmap_v3_acquire(h.h3)) 4381 break; 4382 4383 handlep->current_packet = h.raw + h.h3->hdr.bh1.offset_to_first_pkt; 4384 handlep->packets_left = h.h3->hdr.bh1.num_pkts; 4385 } 4386 packets_to_read = handlep->packets_left; 4387 4388 if (packets_to_read > (max_packets - pkts)) { 4389 /* 4390 * There are more packets in the buffer than 4391 * the number of packets we have left to 4392 * process to get up to the maximum number 4393 * of packets to process. Only process enough 4394 * of them to get us up to that maximum. 4395 */ 4396 packets_to_read = max_packets - pkts; 4397 } 4398 4399 while (packets_to_read-- && !handle->break_loop) { 4400 struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet; 4401 ret = pcap_handle_packet_mmap( 4402 handle, 4403 callback, 4404 user, 4405 handlep->current_packet, 4406 tp3_hdr->tp_len, 4407 tp3_hdr->tp_mac, 4408 tp3_hdr->tp_snaplen, 4409 tp3_hdr->tp_sec, 4410 handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000, 4411 VLAN_VALID(tp3_hdr, &tp3_hdr->hv1), 4412 tp3_hdr->hv1.tp_vlan_tci, 4413 VLAN_TPID(tp3_hdr, &tp3_hdr->hv1)); 4414 if (ret == 1) { 4415 pkts++; 4416 } else if (ret < 0) { 4417 handlep->current_packet = NULL; 4418 return ret; 4419 } 4420 handlep->current_packet += tp3_hdr->tp_next_offset; 4421 handlep->packets_left--; 4422 } 4423 4424 if (handlep->packets_left <= 0) { 4425 /* 4426 * Hand this block back to the kernel, and, if 4427 * we're counting blocks that need to be 4428 * filtered in userland after having been 4429 * filtered by the kernel, count the one we've 4430 * just processed. 4431 */ 4432 packet_mmap_v3_release(h.h3); 4433 if (handlep->blocks_to_filter_in_userland > 0) { 4434 handlep->blocks_to_filter_in_userland--; 4435 if (handlep->blocks_to_filter_in_userland == 0) { 4436 /* 4437 * No more blocks need to be filtered 4438 * in userland. 4439 */ 4440 handlep->filter_in_userland = 0; 4441 } 4442 } 4443 4444 /* next block */ 4445 if (++handle->offset >= handle->cc) 4446 handle->offset = 0; 4447 4448 handlep->current_packet = NULL; 4449 } 4450 4451 /* check for break loop condition*/ 4452 if (handle->break_loop) { 4453 handle->break_loop = 0; 4454 return PCAP_ERROR_BREAK; 4455 } 4456 } 4457 if (pkts == 0 && handlep->timeout == 0) { 4458 /* Block until we see a packet. */ 4459 goto again; 4460 } 4461 return pkts; 4462 } 4463 #endif /* HAVE_TPACKET3 */ 4464 4465 /* 4466 * Attach the given BPF code to the packet capture device. 4467 */ 4468 static int 4469 pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter) 4470 { 4471 struct pcap_linux *handlep; 4472 struct sock_fprog fcode; 4473 int can_filter_in_kernel; 4474 int err = 0; 4475 int n, offset; 4476 4477 if (!handle) 4478 return -1; 4479 if (!filter) { 4480 pcapint_strlcpy(handle->errbuf, "setfilter: No filter specified", 4481 PCAP_ERRBUF_SIZE); 4482 return -1; 4483 } 4484 4485 handlep = handle->priv; 4486 4487 /* Make our private copy of the filter */ 4488 4489 if (pcapint_install_bpf_program(handle, filter) < 0) 4490 /* pcapint_install_bpf_program() filled in errbuf */ 4491 return -1; 4492 4493 /* 4494 * Run user level packet filter by default. Will be overridden if 4495 * installing a kernel filter succeeds. 4496 */ 4497 handlep->filter_in_userland = 1; 4498 4499 /* Install kernel level filter if possible */ 4500 4501 #ifdef USHRT_MAX 4502 if (handle->fcode.bf_len > USHRT_MAX) { 4503 /* 4504 * fcode.len is an unsigned short for current kernel. 4505 * I have yet to see BPF-Code with that much 4506 * instructions but still it is possible. So for the 4507 * sake of correctness I added this check. 4508 */ 4509 fprintf(stderr, "Warning: Filter too complex for kernel\n"); 4510 fcode.len = 0; 4511 fcode.filter = NULL; 4512 can_filter_in_kernel = 0; 4513 } else 4514 #endif /* USHRT_MAX */ 4515 { 4516 /* 4517 * Oh joy, the Linux kernel uses struct sock_fprog instead 4518 * of struct bpf_program and of course the length field is 4519 * of different size. Pointed out by Sebastian 4520 * 4521 * Oh, and we also need to fix it up so that all "ret" 4522 * instructions with non-zero operands have MAXIMUM_SNAPLEN 4523 * as the operand if we're not capturing in memory-mapped 4524 * mode, and so that, if we're in cooked mode, all memory- 4525 * reference instructions use special magic offsets in 4526 * references to the link-layer header and assume that the 4527 * link-layer payload begins at 0; "fix_program()" will do 4528 * that. 4529 */ 4530 switch (fix_program(handle, &fcode)) { 4531 4532 case -1: 4533 default: 4534 /* 4535 * Fatal error; just quit. 4536 * (The "default" case shouldn't happen; we 4537 * return -1 for that reason.) 4538 */ 4539 return -1; 4540 4541 case 0: 4542 /* 4543 * The program performed checks that we can't make 4544 * work in the kernel. 4545 */ 4546 can_filter_in_kernel = 0; 4547 break; 4548 4549 case 1: 4550 /* 4551 * We have a filter that'll work in the kernel. 4552 */ 4553 can_filter_in_kernel = 1; 4554 break; 4555 } 4556 } 4557 4558 /* 4559 * NOTE: at this point, we've set both the "len" and "filter" 4560 * fields of "fcode". As of the 2.6.32.4 kernel, at least, 4561 * those are the only members of the "sock_fprog" structure, 4562 * so we initialize every member of that structure. 4563 * 4564 * If there is anything in "fcode" that is not initialized, 4565 * it is either a field added in a later kernel, or it's 4566 * padding. 4567 * 4568 * If a new field is added, this code needs to be updated 4569 * to set it correctly. 4570 * 4571 * If there are no other fields, then: 4572 * 4573 * if the Linux kernel looks at the padding, it's 4574 * buggy; 4575 * 4576 * if the Linux kernel doesn't look at the padding, 4577 * then if some tool complains that we're passing 4578 * uninitialized data to the kernel, then the tool 4579 * is buggy and needs to understand that it's just 4580 * padding. 4581 */ 4582 if (can_filter_in_kernel) { 4583 if ((err = set_kernel_filter(handle, &fcode)) == 0) 4584 { 4585 /* 4586 * Installation succeeded - using kernel filter, 4587 * so userland filtering not needed. 4588 */ 4589 handlep->filter_in_userland = 0; 4590 } 4591 else if (err == -1) /* Non-fatal error */ 4592 { 4593 /* 4594 * Print a warning if we weren't able to install 4595 * the filter for a reason other than "this kernel 4596 * isn't configured to support socket filters. 4597 */ 4598 if (errno == ENOMEM) { 4599 /* 4600 * Either a kernel memory allocation 4601 * failure occurred, or there's too 4602 * much "other/option memory" allocated 4603 * for this socket. Suggest that they 4604 * increase the "other/option memory" 4605 * limit. 4606 */ 4607 fprintf(stderr, 4608 "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n"); 4609 } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) { 4610 fprintf(stderr, 4611 "Warning: Kernel filter failed: %s\n", 4612 pcap_strerror(errno)); 4613 } 4614 } 4615 } 4616 4617 /* 4618 * If we're not using the kernel filter, get rid of any kernel 4619 * filter that might've been there before, e.g. because the 4620 * previous filter could work in the kernel, or because some other 4621 * code attached a filter to the socket by some means other than 4622 * calling "pcap_setfilter()". Otherwise, the kernel filter may 4623 * filter out packets that would pass the new userland filter. 4624 */ 4625 if (handlep->filter_in_userland) { 4626 if (reset_kernel_filter(handle) == -1) { 4627 pcapint_fmt_errmsg_for_errno(handle->errbuf, 4628 PCAP_ERRBUF_SIZE, errno, 4629 "can't remove kernel filter"); 4630 err = -2; /* fatal error */ 4631 } 4632 } 4633 4634 /* 4635 * Free up the copy of the filter that was made by "fix_program()". 4636 */ 4637 if (fcode.filter != NULL) 4638 free(fcode.filter); 4639 4640 if (err == -2) 4641 /* Fatal error */ 4642 return -1; 4643 4644 /* 4645 * If we're filtering in userland, there's nothing to do; 4646 * the new filter will be used for the next packet. 4647 */ 4648 if (handlep->filter_in_userland) 4649 return 0; 4650 4651 /* 4652 * We're filtering in the kernel; the packets present in 4653 * all blocks currently in the ring were already filtered 4654 * by the old filter, and so will need to be filtered in 4655 * userland by the new filter. 4656 * 4657 * Get an upper bound for the number of such blocks; first, 4658 * walk the ring backward and count the free blocks. 4659 */ 4660 offset = handle->offset; 4661 if (--offset < 0) 4662 offset = handle->cc - 1; 4663 for (n=0; n < handle->cc; ++n) { 4664 if (--offset < 0) 4665 offset = handle->cc - 1; 4666 if (pcap_get_ring_frame_status(handle, offset) != TP_STATUS_KERNEL) 4667 break; 4668 } 4669 4670 /* 4671 * If we found free blocks, decrement the count of free 4672 * blocks by 1, just in case we lost a race with another 4673 * thread of control that was adding a packet while 4674 * we were counting and that had run the filter before 4675 * we changed it. 4676 * 4677 * XXX - could there be more than one block added in 4678 * this fashion? 4679 * 4680 * XXX - is there a way to avoid that race, e.g. somehow 4681 * wait for all packets that passed the old filter to 4682 * be added to the ring? 4683 */ 4684 if (n != 0) 4685 n--; 4686 4687 /* 4688 * Set the count of blocks worth of packets to filter 4689 * in userland to the total number of blocks in the 4690 * ring minus the number of free blocks we found, and 4691 * turn on userland filtering. (The count of blocks 4692 * worth of packets to filter in userland is guaranteed 4693 * not to be zero - n, above, couldn't be set to a 4694 * value > handle->cc, and if it were equal to 4695 * handle->cc, it wouldn't be zero, and thus would 4696 * be decremented to handle->cc - 1.) 4697 */ 4698 handlep->blocks_to_filter_in_userland = handle->cc - n; 4699 handlep->filter_in_userland = 1; 4700 4701 return 0; 4702 } 4703 4704 /* 4705 * Return the index of the given device name. Fill ebuf and return 4706 * -1 on failure. 4707 */ 4708 static int 4709 iface_get_id(int fd, const char *device, char *ebuf) 4710 { 4711 struct ifreq ifr; 4712 4713 memset(&ifr, 0, sizeof(ifr)); 4714 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 4715 4716 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) { 4717 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4718 errno, "SIOCGIFINDEX"); 4719 return -1; 4720 } 4721 4722 return ifr.ifr_ifindex; 4723 } 4724 4725 /* 4726 * Bind the socket associated with FD to the given device. 4727 * Return 0 on success or a PCAP_ERROR_ value on a hard error. 4728 */ 4729 static int 4730 iface_bind(int fd, int ifindex, char *ebuf, int protocol) 4731 { 4732 struct sockaddr_ll sll; 4733 int ret, err; 4734 socklen_t errlen = sizeof(err); 4735 4736 memset(&sll, 0, sizeof(sll)); 4737 sll.sll_family = AF_PACKET; 4738 sll.sll_ifindex = ifindex < 0 ? 0 : ifindex; 4739 sll.sll_protocol = protocol; 4740 4741 if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) { 4742 if (errno == ENETDOWN) { 4743 /* 4744 * Return a "network down" indication, so that 4745 * the application can report that rather than 4746 * saying we had a mysterious failure and 4747 * suggest that they report a problem to the 4748 * libpcap developers. 4749 */ 4750 return PCAP_ERROR_IFACE_NOT_UP; 4751 } 4752 if (errno == ENODEV) { 4753 /* 4754 * There's nothing more to say, so clear the 4755 * error message. 4756 */ 4757 ebuf[0] = '\0'; 4758 ret = PCAP_ERROR_NO_SUCH_DEVICE; 4759 } else { 4760 ret = PCAP_ERROR; 4761 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4762 errno, "bind"); 4763 } 4764 return ret; 4765 } 4766 4767 /* Any pending errors, e.g., network is down? */ 4768 4769 if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) { 4770 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4771 errno, "getsockopt (SO_ERROR)"); 4772 return PCAP_ERROR; 4773 } 4774 4775 if (err == ENETDOWN) { 4776 /* 4777 * Return a "network down" indication, so that 4778 * the application can report that rather than 4779 * saying we had a mysterious failure and 4780 * suggest that they report a problem to the 4781 * libpcap developers. 4782 */ 4783 return PCAP_ERROR_IFACE_NOT_UP; 4784 } else if (err > 0) { 4785 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4786 err, "bind"); 4787 return PCAP_ERROR; 4788 } 4789 4790 return 0; 4791 } 4792 4793 /* 4794 * Try to enter monitor mode. 4795 * If we have libnl, try to create a new monitor-mode device and 4796 * capture on that; otherwise, just say "not supported". 4797 */ 4798 #ifdef HAVE_LIBNL 4799 static int 4800 enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device) 4801 { 4802 struct pcap_linux *handlep = handle->priv; 4803 int ret; 4804 char phydev_path[PATH_MAX+1]; 4805 struct nl80211_state nlstate; 4806 struct ifreq ifr; 4807 u_int n; 4808 4809 /* 4810 * Is this a mac80211 device? 4811 */ 4812 ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX); 4813 if (ret < 0) 4814 return ret; /* error */ 4815 if (ret == 0) 4816 return 0; /* no error, but not mac80211 device */ 4817 4818 /* 4819 * XXX - is this already a monN device? 4820 * If so, we're done. 4821 */ 4822 4823 /* 4824 * OK, it's apparently a mac80211 device. 4825 * Try to find an unused monN device for it. 4826 */ 4827 ret = nl80211_init(handle, &nlstate, device); 4828 if (ret != 0) 4829 return ret; 4830 for (n = 0; n < UINT_MAX; n++) { 4831 /* 4832 * Try mon{n}. 4833 */ 4834 char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */ 4835 4836 snprintf(mondevice, sizeof mondevice, "mon%u", n); 4837 ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice); 4838 if (ret == 1) { 4839 /* 4840 * Success. We don't clean up the libnl state 4841 * yet, as we'll be using it later. 4842 */ 4843 goto added; 4844 } 4845 if (ret < 0) { 4846 /* 4847 * Hard failure. Just return ret; handle->errbuf 4848 * has already been set. 4849 */ 4850 nl80211_cleanup(&nlstate); 4851 return ret; 4852 } 4853 } 4854 4855 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 4856 "%s: No free monN interfaces", device); 4857 nl80211_cleanup(&nlstate); 4858 return PCAP_ERROR; 4859 4860 added: 4861 4862 #if 0 4863 /* 4864 * Sleep for .1 seconds. 4865 */ 4866 delay.tv_sec = 0; 4867 delay.tv_nsec = 500000000; 4868 nanosleep(&delay, NULL); 4869 #endif 4870 4871 /* 4872 * If we haven't already done so, arrange to have 4873 * "pcap_close_all()" called when we exit. 4874 */ 4875 if (!pcapint_do_addexit(handle)) { 4876 /* 4877 * "atexit()" failed; don't put the interface 4878 * in rfmon mode, just give up. 4879 */ 4880 del_mon_if(handle, sock_fd, &nlstate, device, 4881 handlep->mondevice); 4882 nl80211_cleanup(&nlstate); 4883 return PCAP_ERROR; 4884 } 4885 4886 /* 4887 * Now configure the monitor interface up. 4888 */ 4889 memset(&ifr, 0, sizeof(ifr)); 4890 pcapint_strlcpy(ifr.ifr_name, handlep->mondevice, sizeof(ifr.ifr_name)); 4891 if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) { 4892 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 4893 errno, "%s: Can't get flags for %s", device, 4894 handlep->mondevice); 4895 del_mon_if(handle, sock_fd, &nlstate, device, 4896 handlep->mondevice); 4897 nl80211_cleanup(&nlstate); 4898 return PCAP_ERROR; 4899 } 4900 ifr.ifr_flags |= IFF_UP|IFF_RUNNING; 4901 if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) { 4902 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 4903 errno, "%s: Can't set flags for %s", device, 4904 handlep->mondevice); 4905 del_mon_if(handle, sock_fd, &nlstate, device, 4906 handlep->mondevice); 4907 nl80211_cleanup(&nlstate); 4908 return PCAP_ERROR; 4909 } 4910 4911 /* 4912 * Success. Clean up the libnl state. 4913 */ 4914 nl80211_cleanup(&nlstate); 4915 4916 /* 4917 * Note that we have to delete the monitor device when we close 4918 * the handle. 4919 */ 4920 handlep->must_do_on_close |= MUST_DELETE_MONIF; 4921 4922 /* 4923 * Add this to the list of pcaps to close when we exit. 4924 */ 4925 pcapint_add_to_pcaps_to_close(handle); 4926 4927 return 1; 4928 } 4929 #else /* HAVE_LIBNL */ 4930 static int 4931 enter_rfmon_mode(pcap_t *handle _U_, int sock_fd _U_, const char *device _U_) 4932 { 4933 /* 4934 * We don't have libnl, so we can't do monitor mode. 4935 */ 4936 return 0; 4937 } 4938 #endif /* HAVE_LIBNL */ 4939 4940 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) 4941 /* 4942 * Map SOF_TIMESTAMPING_ values to PCAP_TSTAMP_ values. 4943 */ 4944 static const struct { 4945 int soft_timestamping_val; 4946 int pcap_tstamp_val; 4947 } sof_ts_type_map[3] = { 4948 { SOF_TIMESTAMPING_SOFTWARE, PCAP_TSTAMP_HOST }, 4949 { SOF_TIMESTAMPING_SYS_HARDWARE, PCAP_TSTAMP_ADAPTER }, 4950 { SOF_TIMESTAMPING_RAW_HARDWARE, PCAP_TSTAMP_ADAPTER_UNSYNCED } 4951 }; 4952 #define NUM_SOF_TIMESTAMPING_TYPES (sizeof sof_ts_type_map / sizeof sof_ts_type_map[0]) 4953 4954 /* 4955 * Set the list of time stamping types to include all types. 4956 */ 4957 static int 4958 iface_set_all_ts_types(pcap_t *handle, char *ebuf) 4959 { 4960 u_int i; 4961 4962 handle->tstamp_type_list = malloc(NUM_SOF_TIMESTAMPING_TYPES * sizeof(u_int)); 4963 if (handle->tstamp_type_list == NULL) { 4964 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 4965 errno, "malloc"); 4966 return -1; 4967 } 4968 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) 4969 handle->tstamp_type_list[i] = sof_ts_type_map[i].pcap_tstamp_val; 4970 handle->tstamp_type_count = NUM_SOF_TIMESTAMPING_TYPES; 4971 return 0; 4972 } 4973 4974 /* 4975 * Get a list of time stamp types. 4976 */ 4977 #ifdef ETHTOOL_GET_TS_INFO 4978 static int 4979 iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 4980 { 4981 int fd; 4982 struct ifreq ifr; 4983 struct ethtool_ts_info info; 4984 int num_ts_types; 4985 u_int i, j; 4986 4987 /* 4988 * This doesn't apply to the "any" device; you can't say "turn on 4989 * hardware time stamping for all devices that exist now and arrange 4990 * that it be turned on for any device that appears in the future", 4991 * and not all devices even necessarily *support* hardware time 4992 * stamping, so don't report any time stamp types. 4993 */ 4994 if (strcmp(device, "any") == 0) { 4995 handle->tstamp_type_list = NULL; 4996 return 0; 4997 } 4998 4999 /* 5000 * Create a socket from which to fetch time stamping capabilities. 5001 */ 5002 fd = get_if_ioctl_socket(); 5003 if (fd < 0) { 5004 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5005 errno, "socket for SIOCETHTOOL(ETHTOOL_GET_TS_INFO)"); 5006 return -1; 5007 } 5008 5009 memset(&ifr, 0, sizeof(ifr)); 5010 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5011 memset(&info, 0, sizeof(info)); 5012 info.cmd = ETHTOOL_GET_TS_INFO; 5013 ifr.ifr_data = (caddr_t)&info; 5014 if (ioctl(fd, SIOCETHTOOL, &ifr) == -1) { 5015 int save_errno = errno; 5016 5017 close(fd); 5018 switch (save_errno) { 5019 5020 case EOPNOTSUPP: 5021 case EINVAL: 5022 /* 5023 * OK, this OS version or driver doesn't support 5024 * asking for the time stamping types, so let's 5025 * just return all the possible types. 5026 */ 5027 if (iface_set_all_ts_types(handle, ebuf) == -1) 5028 return -1; 5029 return 0; 5030 5031 case ENODEV: 5032 /* 5033 * OK, no such device. 5034 * The user will find that out when they try to 5035 * activate the device; just return an empty 5036 * list of time stamp types. 5037 */ 5038 handle->tstamp_type_list = NULL; 5039 return 0; 5040 5041 default: 5042 /* 5043 * Other error. 5044 */ 5045 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5046 save_errno, 5047 "%s: SIOCETHTOOL(ETHTOOL_GET_TS_INFO) ioctl failed", 5048 device); 5049 return -1; 5050 } 5051 } 5052 close(fd); 5053 5054 /* 5055 * Do we support hardware time stamping of *all* packets? 5056 */ 5057 if (!(info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))) { 5058 /* 5059 * No, so don't report any time stamp types. 5060 * 5061 * XXX - some devices either don't report 5062 * HWTSTAMP_FILTER_ALL when they do support it, or 5063 * report HWTSTAMP_FILTER_ALL but map it to only 5064 * time stamping a few PTP packets. See 5065 * http://marc.info/?l=linux-netdev&m=146318183529571&w=2 5066 * 5067 * Maybe that got fixed later. 5068 */ 5069 handle->tstamp_type_list = NULL; 5070 return 0; 5071 } 5072 5073 num_ts_types = 0; 5074 for (i = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 5075 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) 5076 num_ts_types++; 5077 } 5078 if (num_ts_types != 0) { 5079 handle->tstamp_type_list = malloc(num_ts_types * sizeof(u_int)); 5080 if (handle->tstamp_type_list == NULL) { 5081 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5082 errno, "malloc"); 5083 return -1; 5084 } 5085 for (i = 0, j = 0; i < NUM_SOF_TIMESTAMPING_TYPES; i++) { 5086 if (info.so_timestamping & sof_ts_type_map[i].soft_timestamping_val) { 5087 handle->tstamp_type_list[j] = sof_ts_type_map[i].pcap_tstamp_val; 5088 j++; 5089 } 5090 } 5091 handle->tstamp_type_count = num_ts_types; 5092 } else 5093 handle->tstamp_type_list = NULL; 5094 5095 return 0; 5096 } 5097 #else /* ETHTOOL_GET_TS_INFO */ 5098 static int 5099 iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf) 5100 { 5101 /* 5102 * This doesn't apply to the "any" device; you can't say "turn on 5103 * hardware time stamping for all devices that exist now and arrange 5104 * that it be turned on for any device that appears in the future", 5105 * and not all devices even necessarily *support* hardware time 5106 * stamping, so don't report any time stamp types. 5107 */ 5108 if (strcmp(device, "any") == 0) { 5109 handle->tstamp_type_list = NULL; 5110 return 0; 5111 } 5112 5113 /* 5114 * We don't have an ioctl to use to ask what's supported, 5115 * so say we support everything. 5116 */ 5117 if (iface_set_all_ts_types(handle, ebuf) == -1) 5118 return -1; 5119 return 0; 5120 } 5121 #endif /* ETHTOOL_GET_TS_INFO */ 5122 #else /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 5123 static int 5124 iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_) 5125 { 5126 /* 5127 * Nothing to fetch, so it always "succeeds". 5128 */ 5129 return 0; 5130 } 5131 #endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */ 5132 5133 /* 5134 * Find out if we have any form of fragmentation/reassembly offloading. 5135 * 5136 * We do so using SIOCETHTOOL checking for various types of offloading; 5137 * if SIOCETHTOOL isn't defined, or we don't have any #defines for any 5138 * of the types of offloading, there's nothing we can do to check, so 5139 * we just say "no, we don't". 5140 * 5141 * We treat EOPNOTSUPP, EINVAL and, if eperm_ok is true, EPERM as 5142 * indications that the operation isn't supported. We do EPERM 5143 * weirdly because the SIOCETHTOOL code in later kernels 1) doesn't 5144 * support ETHTOOL_GUFO, 2) also doesn't include it in the list 5145 * of ethtool operations that don't require CAP_NET_ADMIN privileges, 5146 * and 3) does the "is this permitted" check before doing the "is 5147 * this even supported" check, so it fails with "this is not permitted" 5148 * rather than "this is not even supported". To work around this 5149 * annoyance, we only treat EPERM as an error for the first feature, 5150 * and assume that they all do the same permission checks, so if the 5151 * first one is allowed all the others are allowed if supported. 5152 */ 5153 #if defined(SIOCETHTOOL) && (defined(ETHTOOL_GTSO) || defined(ETHTOOL_GUFO) || defined(ETHTOOL_GGSO) || defined(ETHTOOL_GFLAGS) || defined(ETHTOOL_GGRO)) 5154 static int 5155 iface_ethtool_flag_ioctl(pcap_t *handle, int cmd, const char *cmdname, 5156 int eperm_ok) 5157 { 5158 struct ifreq ifr; 5159 struct ethtool_value eval; 5160 5161 memset(&ifr, 0, sizeof(ifr)); 5162 pcapint_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name)); 5163 eval.cmd = cmd; 5164 eval.data = 0; 5165 ifr.ifr_data = (caddr_t)&eval; 5166 if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) { 5167 if (errno == EOPNOTSUPP || errno == EINVAL || 5168 (errno == EPERM && eperm_ok)) { 5169 /* 5170 * OK, let's just return 0, which, in our 5171 * case, either means "no, what we're asking 5172 * about is not enabled" or "all the flags 5173 * are clear (i.e., nothing is enabled)". 5174 */ 5175 return 0; 5176 } 5177 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5178 errno, "%s: SIOCETHTOOL(%s) ioctl failed", 5179 handle->opt.device, cmdname); 5180 return -1; 5181 } 5182 return eval.data; 5183 } 5184 5185 /* 5186 * XXX - it's annoying that we have to check for offloading at all, but, 5187 * given that we have to, it's still annoying that we have to check for 5188 * particular types of offloading, especially that shiny new types of 5189 * offloading may be added - and, worse, may not be checkable with 5190 * a particular ETHTOOL_ operation; ETHTOOL_GFEATURES would, in 5191 * theory, give those to you, but the actual flags being used are 5192 * opaque (defined in a non-uapi header), and there doesn't seem to 5193 * be any obvious way to ask the kernel what all the offloading flags 5194 * are - at best, you can ask for a set of strings(!) to get *names* 5195 * for various flags. (That whole mechanism appears to have been 5196 * designed for the sole purpose of letting ethtool report flags 5197 * by name and set flags by name, with the names having no semantics 5198 * ethtool understands.) 5199 */ 5200 static int 5201 iface_get_offload(pcap_t *handle) 5202 { 5203 int ret; 5204 5205 #ifdef ETHTOOL_GTSO 5206 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO", 0); 5207 if (ret == -1) 5208 return -1; 5209 if (ret) 5210 return 1; /* TCP segmentation offloading on */ 5211 #endif 5212 5213 #ifdef ETHTOOL_GGSO 5214 /* 5215 * XXX - will this cause large unsegmented packets to be 5216 * handed to PF_PACKET sockets on transmission? If not, 5217 * this need not be checked. 5218 */ 5219 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO", 0); 5220 if (ret == -1) 5221 return -1; 5222 if (ret) 5223 return 1; /* generic segmentation offloading on */ 5224 #endif 5225 5226 #ifdef ETHTOOL_GFLAGS 5227 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS", 0); 5228 if (ret == -1) 5229 return -1; 5230 if (ret & ETH_FLAG_LRO) 5231 return 1; /* large receive offloading on */ 5232 #endif 5233 5234 #ifdef ETHTOOL_GGRO 5235 /* 5236 * XXX - will this cause large reassembled packets to be 5237 * handed to PF_PACKET sockets on receipt? If not, 5238 * this need not be checked. 5239 */ 5240 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO", 0); 5241 if (ret == -1) 5242 return -1; 5243 if (ret) 5244 return 1; /* generic (large) receive offloading on */ 5245 #endif 5246 5247 #ifdef ETHTOOL_GUFO 5248 /* 5249 * Do this one last, as support for it was removed in later 5250 * kernels, and it fails with EPERM on those kernels rather 5251 * than with EOPNOTSUPP (see explanation in comment for 5252 * iface_ethtool_flag_ioctl()). 5253 */ 5254 ret = iface_ethtool_flag_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO", 1); 5255 if (ret == -1) 5256 return -1; 5257 if (ret) 5258 return 1; /* UDP fragmentation offloading on */ 5259 #endif 5260 5261 return 0; 5262 } 5263 #else /* SIOCETHTOOL */ 5264 static int 5265 iface_get_offload(pcap_t *handle _U_) 5266 { 5267 /* 5268 * XXX - do we need to get this information if we don't 5269 * have the ethtool ioctls? If so, how do we do that? 5270 */ 5271 return 0; 5272 } 5273 #endif /* SIOCETHTOOL */ 5274 5275 static struct dsa_proto { 5276 const char *name; 5277 bpf_u_int32 linktype; 5278 } dsa_protos[] = { 5279 /* 5280 * None is special and indicates that the interface does not have 5281 * any tagging protocol configured, and is therefore a standard 5282 * Ethernet interface. 5283 */ 5284 { "none", DLT_EN10MB }, 5285 { "brcm", DLT_DSA_TAG_BRCM }, 5286 { "brcm-prepend", DLT_DSA_TAG_BRCM_PREPEND }, 5287 { "dsa", DLT_DSA_TAG_DSA }, 5288 { "edsa", DLT_DSA_TAG_EDSA }, 5289 }; 5290 5291 static int 5292 iface_dsa_get_proto_info(const char *device, pcap_t *handle) 5293 { 5294 char *pathstr; 5295 unsigned int i; 5296 /* 5297 * Make this significantly smaller than PCAP_ERRBUF_SIZE; 5298 * the tag *shouldn't* have some huge long name, and making 5299 * it smaller keeps newer versions of GCC from whining that 5300 * the error message if we don't support the tag could 5301 * overflow the error message buffer. 5302 */ 5303 char buf[128]; 5304 ssize_t r; 5305 int fd; 5306 5307 fd = asprintf(&pathstr, "/sys/class/net/%s/dsa/tagging", device); 5308 if (fd < 0) { 5309 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5310 fd, "asprintf"); 5311 return PCAP_ERROR; 5312 } 5313 5314 fd = open(pathstr, O_RDONLY); 5315 free(pathstr); 5316 /* 5317 * This is not fatal, kernel >= 4.20 *might* expose this attribute 5318 */ 5319 if (fd < 0) 5320 return 0; 5321 5322 r = read(fd, buf, sizeof(buf) - 1); 5323 if (r <= 0) { 5324 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5325 errno, "read"); 5326 close(fd); 5327 return PCAP_ERROR; 5328 } 5329 close(fd); 5330 5331 /* 5332 * Buffer should be LF terminated. 5333 */ 5334 if (buf[r - 1] == '\n') 5335 r--; 5336 buf[r] = '\0'; 5337 5338 for (i = 0; i < sizeof(dsa_protos) / sizeof(dsa_protos[0]); i++) { 5339 if (strlen(dsa_protos[i].name) == (size_t)r && 5340 strcmp(buf, dsa_protos[i].name) == 0) { 5341 handle->linktype = dsa_protos[i].linktype; 5342 switch (dsa_protos[i].linktype) { 5343 case DLT_EN10MB: 5344 return 0; 5345 default: 5346 return 1; 5347 } 5348 } 5349 } 5350 5351 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 5352 "unsupported DSA tag: %s", buf); 5353 5354 return PCAP_ERROR; 5355 } 5356 5357 /* 5358 * Query the kernel for the MTU of the given interface. 5359 */ 5360 static int 5361 iface_get_mtu(int fd, const char *device, char *ebuf) 5362 { 5363 struct ifreq ifr; 5364 5365 if (!device) 5366 return BIGGER_THAN_ALL_MTUS; 5367 5368 memset(&ifr, 0, sizeof(ifr)); 5369 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5370 5371 if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) { 5372 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5373 errno, "SIOCGIFMTU"); 5374 return -1; 5375 } 5376 5377 return ifr.ifr_mtu; 5378 } 5379 5380 /* 5381 * Get the hardware type of the given interface as ARPHRD_xxx constant. 5382 */ 5383 static int 5384 iface_get_arptype(int fd, const char *device, char *ebuf) 5385 { 5386 struct ifreq ifr; 5387 int ret; 5388 5389 memset(&ifr, 0, sizeof(ifr)); 5390 pcapint_strlcpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 5391 5392 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) { 5393 if (errno == ENODEV) { 5394 /* 5395 * No such device. 5396 * 5397 * There's nothing more to say, so clear 5398 * the error message. 5399 */ 5400 ret = PCAP_ERROR_NO_SUCH_DEVICE; 5401 ebuf[0] = '\0'; 5402 } else { 5403 ret = PCAP_ERROR; 5404 pcapint_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE, 5405 errno, "SIOCGIFHWADDR"); 5406 } 5407 return ret; 5408 } 5409 5410 return ifr.ifr_hwaddr.sa_family; 5411 } 5412 5413 static int 5414 fix_program(pcap_t *handle, struct sock_fprog *fcode) 5415 { 5416 struct pcap_linux *handlep = handle->priv; 5417 size_t prog_size; 5418 register int i; 5419 register struct bpf_insn *p; 5420 struct bpf_insn *f; 5421 int len; 5422 5423 /* 5424 * Make a copy of the filter, and modify that copy if 5425 * necessary. 5426 */ 5427 prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len; 5428 len = handle->fcode.bf_len; 5429 f = (struct bpf_insn *)malloc(prog_size); 5430 if (f == NULL) { 5431 pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE, 5432 errno, "malloc"); 5433 return -1; 5434 } 5435 memcpy(f, handle->fcode.bf_insns, prog_size); 5436 fcode->len = len; 5437 fcode->filter = (struct sock_filter *) f; 5438 5439 for (i = 0; i < len; ++i) { 5440 p = &f[i]; 5441 /* 5442 * What type of instruction is this? 5443 */ 5444 switch (BPF_CLASS(p->code)) { 5445 5446 case BPF_LD: 5447 case BPF_LDX: 5448 /* 5449 * It's a load instruction; is it loading 5450 * from the packet? 5451 */ 5452 switch (BPF_MODE(p->code)) { 5453 5454 case BPF_ABS: 5455 case BPF_IND: 5456 case BPF_MSH: 5457 /* 5458 * Yes; are we in cooked mode? 5459 */ 5460 if (handlep->cooked) { 5461 /* 5462 * Yes, so we need to fix this 5463 * instruction. 5464 */ 5465 if (fix_offset(handle, p) < 0) { 5466 /* 5467 * We failed to do so. 5468 * Return 0, so our caller 5469 * knows to punt to userland. 5470 */ 5471 return 0; 5472 } 5473 } 5474 break; 5475 } 5476 break; 5477 } 5478 } 5479 return 1; /* we succeeded */ 5480 } 5481 5482 static int 5483 fix_offset(pcap_t *handle, struct bpf_insn *p) 5484 { 5485 /* 5486 * Existing references to auxiliary data shouldn't be adjusted. 5487 * 5488 * Note that SKF_AD_OFF is negative, but p->k is unsigned, so 5489 * we use >= and cast SKF_AD_OFF to unsigned. 5490 */ 5491 if (p->k >= (bpf_u_int32)SKF_AD_OFF) 5492 return 0; 5493 if (handle->linktype == DLT_LINUX_SLL2) { 5494 /* 5495 * What's the offset? 5496 */ 5497 if (p->k >= SLL2_HDR_LEN) { 5498 /* 5499 * It's within the link-layer payload; that starts 5500 * at an offset of 0, as far as the kernel packet 5501 * filter is concerned, so subtract the length of 5502 * the link-layer header. 5503 */ 5504 p->k -= SLL2_HDR_LEN; 5505 } else if (p->k == 0) { 5506 /* 5507 * It's the protocol field; map it to the 5508 * special magic kernel offset for that field. 5509 */ 5510 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5511 } else if (p->k == 4) { 5512 /* 5513 * It's the ifindex field; map it to the 5514 * special magic kernel offset for that field. 5515 */ 5516 p->k = SKF_AD_OFF + SKF_AD_IFINDEX; 5517 } else if (p->k == 10) { 5518 /* 5519 * It's the packet type field; map it to the 5520 * special magic kernel offset for that field. 5521 */ 5522 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5523 } else if ((bpf_int32)(p->k) > 0) { 5524 /* 5525 * It's within the header, but it's not one of 5526 * those fields; we can't do that in the kernel, 5527 * so punt to userland. 5528 */ 5529 return -1; 5530 } 5531 } else { 5532 /* 5533 * What's the offset? 5534 */ 5535 if (p->k >= SLL_HDR_LEN) { 5536 /* 5537 * It's within the link-layer payload; that starts 5538 * at an offset of 0, as far as the kernel packet 5539 * filter is concerned, so subtract the length of 5540 * the link-layer header. 5541 */ 5542 p->k -= SLL_HDR_LEN; 5543 } else if (p->k == 0) { 5544 /* 5545 * It's the packet type field; map it to the 5546 * special magic kernel offset for that field. 5547 */ 5548 p->k = SKF_AD_OFF + SKF_AD_PKTTYPE; 5549 } else if (p->k == 14) { 5550 /* 5551 * It's the protocol field; map it to the 5552 * special magic kernel offset for that field. 5553 */ 5554 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 5555 } else if ((bpf_int32)(p->k) > 0) { 5556 /* 5557 * It's within the header, but it's not one of 5558 * those fields; we can't do that in the kernel, 5559 * so punt to userland. 5560 */ 5561 return -1; 5562 } 5563 } 5564 return 0; 5565 } 5566 5567 static int 5568 set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode) 5569 { 5570 int total_filter_on = 0; 5571 int save_mode; 5572 int ret; 5573 int save_errno; 5574 5575 /* 5576 * The socket filter code doesn't discard all packets queued 5577 * up on the socket when the filter is changed; this means 5578 * that packets that don't match the new filter may show up 5579 * after the new filter is put onto the socket, if those 5580 * packets haven't yet been read. 5581 * 5582 * This means, for example, that if you do a tcpdump capture 5583 * with a filter, the first few packets in the capture might 5584 * be packets that wouldn't have passed the filter. 5585 * 5586 * We therefore discard all packets queued up on the socket 5587 * when setting a kernel filter. (This isn't an issue for 5588 * userland filters, as the userland filtering is done after 5589 * packets are queued up.) 5590 * 5591 * To flush those packets, we put the socket in read-only mode, 5592 * and read packets from the socket until there are no more to 5593 * read. 5594 * 5595 * In order to keep that from being an infinite loop - i.e., 5596 * to keep more packets from arriving while we're draining 5597 * the queue - we put the "total filter", which is a filter 5598 * that rejects all packets, onto the socket before draining 5599 * the queue. 5600 * 5601 * This code deliberately ignores any errors, so that you may 5602 * get bogus packets if an error occurs, rather than having 5603 * the filtering done in userland even if it could have been 5604 * done in the kernel. 5605 */ 5606 if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 5607 &total_fcode, sizeof(total_fcode)) == 0) { 5608 char drain[1]; 5609 5610 /* 5611 * Note that we've put the total filter onto the socket. 5612 */ 5613 total_filter_on = 1; 5614 5615 /* 5616 * Save the socket's current mode, and put it in 5617 * non-blocking mode; we drain it by reading packets 5618 * until we get an error (which is normally a 5619 * "nothing more to be read" error). 5620 */ 5621 save_mode = fcntl(handle->fd, F_GETFL, 0); 5622 if (save_mode == -1) { 5623 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5624 PCAP_ERRBUF_SIZE, errno, 5625 "can't get FD flags when changing filter"); 5626 return -2; 5627 } 5628 if (fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) < 0) { 5629 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5630 PCAP_ERRBUF_SIZE, errno, 5631 "can't set nonblocking mode when changing filter"); 5632 return -2; 5633 } 5634 while (recv(handle->fd, &drain, sizeof drain, MSG_TRUNC) >= 0) 5635 ; 5636 save_errno = errno; 5637 if (save_errno != EAGAIN) { 5638 /* 5639 * Fatal error. 5640 * 5641 * If we can't restore the mode or reset the 5642 * kernel filter, there's nothing we can do. 5643 */ 5644 (void)fcntl(handle->fd, F_SETFL, save_mode); 5645 (void)reset_kernel_filter(handle); 5646 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5647 PCAP_ERRBUF_SIZE, save_errno, 5648 "recv failed when changing filter"); 5649 return -2; 5650 } 5651 if (fcntl(handle->fd, F_SETFL, save_mode) == -1) { 5652 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5653 PCAP_ERRBUF_SIZE, errno, 5654 "can't restore FD flags when changing filter"); 5655 return -2; 5656 } 5657 } 5658 5659 /* 5660 * Now attach the new filter. 5661 */ 5662 ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 5663 fcode, sizeof(*fcode)); 5664 if (ret == -1 && total_filter_on) { 5665 /* 5666 * Well, we couldn't set that filter on the socket, 5667 * but we could set the total filter on the socket. 5668 * 5669 * This could, for example, mean that the filter was 5670 * too big to put into the kernel, so we'll have to 5671 * filter in userland; in any case, we'll be doing 5672 * filtering in userland, so we need to remove the 5673 * total filter so we see packets. 5674 */ 5675 save_errno = errno; 5676 5677 /* 5678 * If this fails, we're really screwed; we have the 5679 * total filter on the socket, and it won't come off. 5680 * Report it as a fatal error. 5681 */ 5682 if (reset_kernel_filter(handle) == -1) { 5683 pcapint_fmt_errmsg_for_errno(handle->errbuf, 5684 PCAP_ERRBUF_SIZE, errno, 5685 "can't remove kernel total filter"); 5686 return -2; /* fatal error */ 5687 } 5688 5689 errno = save_errno; 5690 } 5691 return ret; 5692 } 5693 5694 static int 5695 reset_kernel_filter(pcap_t *handle) 5696 { 5697 int ret; 5698 /* 5699 * setsockopt() barfs unless it get a dummy parameter. 5700 * valgrind whines unless the value is initialized, 5701 * as it has no idea that setsockopt() ignores its 5702 * parameter. 5703 */ 5704 int dummy = 0; 5705 5706 ret = setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER, 5707 &dummy, sizeof(dummy)); 5708 /* 5709 * Ignore ENOENT - it means "we don't have a filter", so there 5710 * was no filter to remove, and there's still no filter. 5711 * 5712 * Also ignore ENONET, as a lot of kernel versions had a 5713 * typo where ENONET, rather than ENOENT, was returned. 5714 */ 5715 if (ret == -1 && errno != ENOENT && errno != ENONET) 5716 return -1; 5717 return 0; 5718 } 5719 5720 int 5721 pcap_set_protocol_linux(pcap_t *p, int protocol) 5722 { 5723 if (pcapint_check_activated(p)) 5724 return (PCAP_ERROR_ACTIVATED); 5725 p->opt.protocol = protocol; 5726 return (0); 5727 } 5728 5729 /* 5730 * Libpcap version string. 5731 */ 5732 const char * 5733 pcap_lib_version(void) 5734 { 5735 #if defined(HAVE_TPACKET3) 5736 return (PCAP_VERSION_STRING " (with TPACKET_V3)"); 5737 #else 5738 return (PCAP_VERSION_STRING " (with TPACKET_V2)"); 5739 #endif 5740 } 5741