1 /* 2 * Copyright (c) 2008 Damien Miller <djm@mindrot.org> 3 * Copyright (c) 2011 Christiano F. Haesbaert <haesbaert@haesbaert.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/types.h> 19 #include <sys/time.h> 20 #include <sys/socket.h> 21 #include <sys/socketvar.h> 22 #include <sys/resource.h> 23 #include <sys/queue.h> 24 25 #include <net/route.h> 26 27 #include <netinet/in.h> 28 #include <netinet/in_systm.h> 29 #include <netinet/ip.h> 30 #include <netinet/tcp.h> 31 #include <netinet/tcp_timer.h> 32 #include <netinet/tcp_fsm.h> 33 #include <netinet/in_pcb.h> 34 #include <netinet/tcp_var.h> 35 36 #include <arpa/inet.h> 37 38 #include <unistd.h> 39 #include <limits.h> 40 #include <stdlib.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <errno.h> 44 #include <event.h> 45 #include <netdb.h> 46 #include <signal.h> 47 #include <err.h> 48 #include <fcntl.h> 49 #include <poll.h> 50 51 #include <kvm.h> 52 #include <nlist.h> 53 54 #define DEFAULT_PORT "12345" 55 #define DEFAULT_STATS_INTERVAL 1000 /* ms */ 56 #define DEFAULT_BUF (256 * 1024) 57 #define DEFAULT_UDP_PKT (1500 - 28) /* TODO don't hardcode this */ 58 #define TCP_MODE !ptb->uflag 59 #define UDP_MODE ptb->uflag 60 #define MAX_FD 1024 61 62 /* Our tcpbench globals */ 63 struct { 64 u_int Vflag; /* rtableid */ 65 int Sflag; /* Socket buffer size (tcp mode) */ 66 u_int rflag; /* Report rate (ms) */ 67 int sflag; /* True if server */ 68 int Tflag; /* ToS if != -1 */ 69 int vflag; /* Verbose */ 70 int uflag; /* UDP mode */ 71 kvm_t *kvmh; /* Kvm handler */ 72 char **kvars; /* Kvm enabled vars */ 73 u_long ktcbtab; /* Ktcb */ 74 char *dummybuf; /* IO buffer */ 75 size_t dummybuf_len; /* IO buffer len */ 76 } tcpbench, *ptb; 77 78 /* stats for a single tcp connection, udp uses only one */ 79 struct statctx { 80 TAILQ_ENTRY(statctx) entry; 81 struct timeval t_start, t_last; 82 unsigned long long bytes; 83 int fd; 84 char *buf; 85 size_t buflen; 86 struct event ev; 87 /* TCP only */ 88 u_long tcp_tcbaddr; 89 /* UDP only */ 90 u_long udp_slice_pkts; 91 }; 92 93 static void signal_handler(int, short, void *); 94 static void saddr_ntop(const struct sockaddr *, socklen_t, char *, size_t); 95 static void drop_gid(void); 96 static void set_slice_timer(int); 97 static void print_tcp_header(void); 98 static void kget(u_long, void *, size_t); 99 static u_long kfind_tcb(int); 100 static void kupdate_stats(u_long, struct inpcb *, struct tcpcb *, 101 struct socket *); 102 static void list_kvars(void); 103 static void check_kvar(const char *); 104 static char ** check_prepare_kvars(char *); 105 static void stats_prepare(struct statctx *); 106 static void tcp_stats_display(unsigned long long, long double, float, 107 struct statctx *, struct inpcb *, struct tcpcb *, struct socket *); 108 static void tcp_process_slice(int, short, void *); 109 static void tcp_server_handle_sc(int, short, void *); 110 static void tcp_server_accept(int, short, void *); 111 static void server_init(struct addrinfo *, struct statctx *); 112 static void client_handle_sc(int, short, void *); 113 static void client_init(struct addrinfo *, int, struct statctx *, 114 struct addrinfo *); 115 static int clock_gettime_tv(clockid_t, struct timeval *); 116 static void udp_server_handle_sc(int, short, void *); 117 static void udp_process_slice(int, short, void *); 118 static int map_tos(char *, int *); 119 /* 120 * We account the mainstats here, that is the stats 121 * for all connections, all variables starting with slice 122 * are used to account information for the timeslice 123 * between each output. Peak variables record the highest 124 * between all slices so far. 125 */ 126 static struct { 127 unsigned long long slice_bytes; /* bytes for last slice */ 128 long double peak_mbps; /* peak mbps so far */ 129 int nconns; /* connected clients */ 130 struct event timer; /* process timer */ 131 } mainstats; 132 133 /* When adding variables, also add to tcp_stats_display() */ 134 static const char *allowed_kvars[] = { 135 "inpcb.inp_flags", 136 "sockb.so_rcv.sb_cc", 137 "sockb.so_rcv.sb_wat", 138 "sockb.so_rcv.sb_hiwat", 139 "sockb.so_snd.sb_cc", 140 "sockb.so_snd.sb_wat", 141 "sockb.so_snd.sb_hiwat", 142 "tcpcb.snd_una", 143 "tcpcb.snd_nxt", 144 "tcpcb.snd_wl1", 145 "tcpcb.snd_wl2", 146 "tcpcb.snd_wnd", 147 "tcpcb.rcv_wnd", 148 "tcpcb.rcv_nxt", 149 "tcpcb.rcv_adv", 150 "tcpcb.snd_max", 151 "tcpcb.snd_cwnd", 152 "tcpcb.snd_ssthresh", 153 "tcpcb.t_rcvtime", 154 "tcpcb.t_rtttime", 155 "tcpcb.t_rtseq", 156 "tcpcb.t_srtt", 157 "tcpcb.t_rttvar", 158 "tcpcb.t_rttmin", 159 "tcpcb.max_sndwnd", 160 "tcpcb.snd_scale", 161 "tcpcb.rcv_scale", 162 "tcpcb.last_ack_sent", 163 "tcpcb.rfbuf_cnt", 164 "tcpcb.rfbuf_ts", 165 "tcpcb.ts_recent_age", 166 "tcpcb.ts_recent", 167 NULL 168 }; 169 170 TAILQ_HEAD(, statctx) sc_queue; 171 172 static void __dead 173 usage(void) 174 { 175 fprintf(stderr, 176 "usage: tcpbench -l\n" 177 " tcpbench [-uv] [-B buf] [-b addr] [-k kvars] [-n connections]\n" 178 " [-p port] [-r interval] [-S space] [-T toskeyword]\n" 179 " [-V rtable] hostname\n" 180 " tcpbench -s [-uv] [-B buf] [-k kvars] [-p port]\n" 181 " [-r interval] [-S space] [-T toskeyword] [-V rtable]\n"); 182 exit(1); 183 } 184 185 static void 186 signal_handler(int sig, short event, void *bula) 187 { 188 /* 189 * signal handler rules don't apply, libevent decouples for us 190 */ 191 switch (sig) { 192 case SIGINT: 193 case SIGTERM: 194 case SIGHUP: 195 warnx("Terminated by signal %d", sig); 196 exit(0); 197 break; /* NOTREACHED */ 198 default: 199 errx(1, "unexpected signal %d", sig); 200 break; /* NOTREACHED */ 201 } 202 } 203 204 static void 205 saddr_ntop(const struct sockaddr *addr, socklen_t alen, char *buf, size_t len) 206 { 207 char hbuf[NI_MAXHOST], pbuf[NI_MAXSERV]; 208 int herr; 209 210 if ((herr = getnameinfo(addr, alen, hbuf, sizeof(hbuf), 211 pbuf, sizeof(pbuf), NI_NUMERICHOST|NI_NUMERICSERV)) != 0) { 212 if (herr == EAI_SYSTEM) 213 err(1, "getnameinfo"); 214 else 215 errx(1, "getnameinfo: %s", gai_strerror(herr)); 216 } 217 snprintf(buf, len, "[%s]:%s", hbuf, pbuf); 218 } 219 220 static void 221 drop_gid(void) 222 { 223 gid_t gid; 224 225 gid = getgid(); 226 if (setresgid(gid, gid, gid) == -1) 227 err(1, "setresgid"); 228 } 229 230 static void 231 set_slice_timer(int on) 232 { 233 struct timeval tv; 234 235 if (ptb->rflag == 0) 236 return; 237 238 if (on) { 239 if (evtimer_pending(&mainstats.timer, NULL)) 240 return; 241 timerclear(&tv); 242 /* XXX Is there a better way to do this ? */ 243 tv.tv_sec = ptb->rflag / 1000; 244 tv.tv_usec = (ptb->rflag % 1000) * 1000; 245 246 evtimer_add(&mainstats.timer, &tv); 247 } else if (evtimer_pending(&mainstats.timer, NULL)) 248 evtimer_del(&mainstats.timer); 249 } 250 251 static int 252 clock_gettime_tv(clockid_t clock_id, struct timeval *tv) 253 { 254 struct timespec ts; 255 256 if (clock_gettime(clock_id, &ts) == -1) 257 return (-1); 258 259 TIMESPEC_TO_TIMEVAL(tv, &ts); 260 261 return (0); 262 } 263 264 static void 265 print_tcp_header(void) 266 { 267 char **kv; 268 269 printf("%12s %14s %12s %8s ", "elapsed_ms", "bytes", "mbps", 270 "bwidth"); 271 for (kv = ptb->kvars; ptb->kvars != NULL && *kv != NULL; kv++) 272 printf("%s%s", kv != ptb->kvars ? "," : "", *kv); 273 printf("\n"); 274 } 275 276 static void 277 kget(u_long addr, void *buf, size_t size) 278 { 279 if (kvm_read(ptb->kvmh, addr, buf, size) != (ssize_t)size) 280 errx(1, "kvm_read: %s", kvm_geterr(ptb->kvmh)); 281 } 282 283 static u_long 284 kfind_tcb(int sock) 285 { 286 struct inpcbtable tcbtab; 287 struct inpcb *head, *next, *prev; 288 struct inpcb inpcb; 289 struct tcpcb tcpcb; 290 291 struct sockaddr_storage me, them; 292 socklen_t melen, themlen; 293 struct sockaddr_in *in4; 294 struct sockaddr_in6 *in6; 295 char tmp1[64], tmp2[64]; 296 int nretry; 297 298 nretry = 10; 299 melen = themlen = sizeof(struct sockaddr_storage); 300 if (getsockname(sock, (struct sockaddr *)&me, &melen) == -1) 301 err(1, "getsockname"); 302 if (getpeername(sock, (struct sockaddr *)&them, &themlen) == -1) 303 err(1, "getpeername"); 304 if (me.ss_family != them.ss_family) 305 errx(1, "%s: me.ss_family != them.ss_family", __func__); 306 if (me.ss_family != AF_INET && me.ss_family != AF_INET6) 307 errx(1, "%s: unknown socket family", __func__); 308 if (ptb->vflag >= 2) { 309 saddr_ntop((struct sockaddr *)&me, me.ss_len, 310 tmp1, sizeof(tmp1)); 311 saddr_ntop((struct sockaddr *)&them, them.ss_len, 312 tmp2, sizeof(tmp2)); 313 fprintf(stderr, "Our socket local %s remote %s\n", tmp1, tmp2); 314 } 315 if (ptb->vflag >= 2) 316 fprintf(stderr, "Using PCB table at %lu\n", ptb->ktcbtab); 317 retry: 318 kget(ptb->ktcbtab, &tcbtab, sizeof(tcbtab)); 319 prev = head = (struct inpcb *)&CIRCLEQ_FIRST( 320 &((struct inpcbtable *)ptb->ktcbtab)->inpt_queue); 321 next = CIRCLEQ_FIRST(&tcbtab.inpt_queue); 322 323 if (ptb->vflag >= 2) 324 fprintf(stderr, "PCB head at %p\n", head); 325 while (next != head) { 326 if (ptb->vflag >= 2) 327 fprintf(stderr, "Checking PCB %p\n", next); 328 kget((u_long)next, &inpcb, sizeof(inpcb)); 329 if (CIRCLEQ_PREV(&inpcb, inp_queue) != prev) { 330 if (nretry--) { 331 warnx("pcb prev pointer insane"); 332 goto retry; 333 } 334 else 335 errx(1, "pcb prev pointer insane," 336 " all attempts exausted"); 337 } 338 prev = next; 339 next = CIRCLEQ_NEXT(&inpcb, inp_queue); 340 341 if (me.ss_family == AF_INET) { 342 if ((inpcb.inp_flags & INP_IPV6) != 0) { 343 if (ptb->vflag >= 2) 344 fprintf(stderr, "Skip: INP_IPV6"); 345 continue; 346 } 347 if (ptb->vflag >= 2) { 348 inet_ntop(AF_INET, &inpcb.inp_laddr, 349 tmp1, sizeof(tmp1)); 350 inet_ntop(AF_INET, &inpcb.inp_faddr, 351 tmp2, sizeof(tmp2)); 352 fprintf(stderr, "PCB %p local: [%s]:%d " 353 "remote: [%s]:%d\n", prev, 354 tmp1, inpcb.inp_lport, 355 tmp2, inpcb.inp_fport); 356 } 357 in4 = (struct sockaddr_in *)&me; 358 if (memcmp(&in4->sin_addr, &inpcb.inp_laddr, 359 sizeof(struct in_addr)) != 0 || 360 in4->sin_port != inpcb.inp_lport) 361 continue; 362 in4 = (struct sockaddr_in *)&them; 363 if (memcmp(&in4->sin_addr, &inpcb.inp_faddr, 364 sizeof(struct in_addr)) != 0 || 365 in4->sin_port != inpcb.inp_fport) 366 continue; 367 } else { 368 if ((inpcb.inp_flags & INP_IPV6) == 0) 369 continue; 370 if (ptb->vflag >= 2) { 371 inet_ntop(AF_INET6, &inpcb.inp_laddr6, 372 tmp1, sizeof(tmp1)); 373 inet_ntop(AF_INET6, &inpcb.inp_faddr6, 374 tmp2, sizeof(tmp2)); 375 fprintf(stderr, "PCB %p local: [%s]:%d " 376 "remote: [%s]:%d\n", prev, 377 tmp1, inpcb.inp_lport, 378 tmp2, inpcb.inp_fport); 379 } 380 in6 = (struct sockaddr_in6 *)&me; 381 if (memcmp(&in6->sin6_addr, &inpcb.inp_laddr6, 382 sizeof(struct in6_addr)) != 0 || 383 in6->sin6_port != inpcb.inp_lport) 384 continue; 385 in6 = (struct sockaddr_in6 *)&them; 386 if (memcmp(&in6->sin6_addr, &inpcb.inp_faddr6, 387 sizeof(struct in6_addr)) != 0 || 388 in6->sin6_port != inpcb.inp_fport) 389 continue; 390 } 391 kget((u_long)inpcb.inp_ppcb, &tcpcb, sizeof(tcpcb)); 392 if (tcpcb.t_state != TCPS_ESTABLISHED) { 393 if (ptb->vflag >= 2) 394 fprintf(stderr, "Not established\n"); 395 continue; 396 } 397 if (ptb->vflag >= 2) 398 fprintf(stderr, "Found PCB at %p\n", prev); 399 return ((u_long)prev); 400 } 401 402 errx(1, "No matching PCB found"); 403 } 404 405 static void 406 kupdate_stats(u_long tcbaddr, struct inpcb *inpcb, 407 struct tcpcb *tcpcb, struct socket *sockb) 408 { 409 kget(tcbaddr, inpcb, sizeof(*inpcb)); 410 kget((u_long)inpcb->inp_ppcb, tcpcb, sizeof(*tcpcb)); 411 kget((u_long)inpcb->inp_socket, sockb, sizeof(*sockb)); 412 } 413 414 static void 415 check_kvar(const char *var) 416 { 417 u_int i; 418 419 for (i = 0; allowed_kvars[i] != NULL; i++) 420 if (strcmp(allowed_kvars[i], var) == 0) 421 return; 422 errx(1, "Unrecognised kvar: %s", var); 423 } 424 425 static void 426 list_kvars(void) 427 { 428 u_int i; 429 430 fprintf(stderr, "Supported kernel variables:\n"); 431 for (i = 0; allowed_kvars[i] != NULL; i++) 432 fprintf(stderr, "\t%s\n", allowed_kvars[i]); 433 } 434 435 static char ** 436 check_prepare_kvars(char *list) 437 { 438 char *item, **ret = NULL; 439 u_int n = 0; 440 441 while ((item = strsep(&list, ", \t\n")) != NULL) { 442 check_kvar(item); 443 if ((ret = realloc(ret, sizeof(*ret) * (++n + 1))) == NULL) 444 errx(1, "realloc(kvars)"); 445 if ((ret[n - 1] = strdup(item)) == NULL) 446 errx(1, "strdup"); 447 ret[n] = NULL; 448 } 449 return (ret); 450 } 451 452 static void 453 stats_prepare(struct statctx *sc) 454 { 455 sc->buf = ptb->dummybuf; 456 sc->buflen = ptb->dummybuf_len; 457 458 if (ptb->kvars) 459 sc->tcp_tcbaddr = kfind_tcb(sc->fd); 460 if (clock_gettime_tv(CLOCK_MONOTONIC, &sc->t_start) == -1) 461 err(1, "clock_gettime_tv"); 462 sc->t_last = sc->t_start; 463 464 } 465 466 static void 467 tcp_stats_display(unsigned long long total_elapsed, long double mbps, 468 float bwperc, struct statctx *sc, struct inpcb *inpcb, 469 struct tcpcb *tcpcb, struct socket *sockb) 470 { 471 int j; 472 473 printf("%12llu %14llu %12.3Lf %7.2f%% ", total_elapsed, sc->bytes, 474 mbps, bwperc); 475 476 if (ptb->kvars != NULL) { 477 kupdate_stats(sc->tcp_tcbaddr, inpcb, tcpcb, 478 sockb); 479 480 for (j = 0; ptb->kvars[j] != NULL; j++) { 481 #define S(a) #a 482 #define P(b, v, f) \ 483 if (strcmp(ptb->kvars[j], S(b.v)) == 0) { \ 484 printf("%s"f, j > 0 ? "," : "", b->v); \ 485 continue; \ 486 } 487 P(inpcb, inp_flags, "0x%08x") 488 P(sockb, so_rcv.sb_cc, "%lu") 489 P(sockb, so_rcv.sb_wat, "%lu") 490 P(sockb, so_rcv.sb_hiwat, "%lu") 491 P(sockb, so_snd.sb_cc, "%lu") 492 P(sockb, so_snd.sb_wat, "%lu") 493 P(sockb, so_snd.sb_hiwat, "%lu") 494 P(tcpcb, snd_una, "%u") 495 P(tcpcb, snd_nxt, "%u") 496 P(tcpcb, snd_wl1, "%u") 497 P(tcpcb, snd_wl2, "%u") 498 P(tcpcb, snd_wnd, "%lu") 499 P(tcpcb, rcv_wnd, "%lu") 500 P(tcpcb, rcv_nxt, "%u") 501 P(tcpcb, rcv_adv, "%u") 502 P(tcpcb, snd_max, "%u") 503 P(tcpcb, snd_cwnd, "%lu") 504 P(tcpcb, snd_ssthresh, "%lu") 505 P(tcpcb, t_rcvtime, "%u") 506 P(tcpcb, t_rtttime, "%u") 507 P(tcpcb, t_rtseq, "%u") 508 P(tcpcb, t_srtt, "%hu") 509 P(tcpcb, t_rttvar, "%hu") 510 P(tcpcb, t_rttmin, "%hu") 511 P(tcpcb, max_sndwnd, "%lu") 512 P(tcpcb, snd_scale, "%u") 513 P(tcpcb, rcv_scale, "%u") 514 P(tcpcb, last_ack_sent, "%u") 515 P(tcpcb, rfbuf_cnt, "%u") 516 P(tcpcb, rfbuf_ts, "%u") 517 P(tcpcb, ts_recent_age, "%u") 518 P(tcpcb, ts_recent, "%u") 519 #undef S 520 #undef P 521 } 522 } 523 printf("\n"); 524 } 525 526 static void 527 tcp_process_slice(int fd, short event, void *bula) 528 { 529 unsigned long long total_elapsed, since_last; 530 long double mbps, slice_mbps = 0; 531 float bwperc; 532 struct statctx *sc; 533 struct timeval t_cur, t_diff; 534 struct inpcb inpcb; 535 struct tcpcb tcpcb; 536 struct socket sockb; 537 538 TAILQ_FOREACH(sc, &sc_queue, entry) { 539 if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1) 540 err(1, "clock_gettime_tv"); 541 if (ptb->kvars != NULL) /* process kernel stats */ 542 kupdate_stats(sc->tcp_tcbaddr, &inpcb, &tcpcb, 543 &sockb); 544 545 timersub(&t_cur, &sc->t_start, &t_diff); 546 total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 547 timersub(&t_cur, &sc->t_last, &t_diff); 548 since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 549 bwperc = (sc->bytes * 100.0) / mainstats.slice_bytes; 550 mbps = (sc->bytes * 8) / (since_last * 1000.0); 551 slice_mbps += mbps; 552 553 tcp_stats_display(total_elapsed, mbps, bwperc, sc, 554 &inpcb, &tcpcb, &sockb); 555 556 sc->t_last = t_cur; 557 sc->bytes = 0; 558 } 559 560 /* process stats for this slice */ 561 if (slice_mbps > mainstats.peak_mbps) 562 mainstats.peak_mbps = slice_mbps; 563 printf("Conn: %3d Mbps: %12.3Lf Peak Mbps: %12.3Lf Avg Mbps: %12.3Lf\n", 564 mainstats.nconns, slice_mbps, mainstats.peak_mbps, 565 slice_mbps / mainstats.nconns); 566 mainstats.slice_bytes = 0; 567 568 set_slice_timer(mainstats.nconns > 0); 569 } 570 571 static void 572 udp_process_slice(int fd, short event, void *v_sc) 573 { 574 struct statctx *sc = v_sc; 575 unsigned long long total_elapsed, since_last; 576 long double slice_mbps, pps; 577 struct timeval t_cur, t_diff; 578 579 if (clock_gettime_tv(CLOCK_MONOTONIC, &t_cur) == -1) 580 err(1, "clock_gettime_tv"); 581 /* Calculate pps */ 582 timersub(&t_cur, &sc->t_start, &t_diff); 583 total_elapsed = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 584 timersub(&t_cur, &sc->t_last, &t_diff); 585 since_last = t_diff.tv_sec * 1000 + t_diff.tv_usec / 1000; 586 slice_mbps = (sc->bytes * 8) / (since_last * 1000.0); 587 pps = (sc->udp_slice_pkts * 1000) / since_last; 588 if (slice_mbps > mainstats.peak_mbps) 589 mainstats.peak_mbps = slice_mbps; 590 printf("Elapsed: %11llu Mbps: %11.3Lf Peak Mbps: %11.3Lf %s PPS: %10.3Lf\n", 591 total_elapsed, slice_mbps, mainstats.peak_mbps, 592 ptb->sflag ? "Rx" : "Tx", pps); 593 594 /* Clean up this slice time */ 595 sc->t_last = t_cur; 596 sc->bytes = 0; 597 sc->udp_slice_pkts = 0; 598 set_slice_timer(1); 599 } 600 601 static void 602 udp_server_handle_sc(int fd, short event, void *v_sc) 603 { 604 ssize_t n; 605 struct statctx *sc = v_sc; 606 607 again: 608 n = read(fd, ptb->dummybuf, ptb->dummybuf_len); 609 if (n == 0) 610 return; 611 else if (n == -1) { 612 if (errno == EINTR) 613 goto again; 614 else if (errno == EWOULDBLOCK) 615 return; 616 warn("fd %d read error", fd); 617 return; 618 } 619 620 if (ptb->vflag >= 3) 621 fprintf(stderr, "read: %zd bytes\n", n); 622 /* If this was our first packet, start slice timer */ 623 if (mainstats.peak_mbps == 0) 624 set_slice_timer(1); 625 /* Account packet */ 626 sc->udp_slice_pkts++; 627 sc->bytes += n; 628 } 629 630 static void 631 tcp_server_handle_sc(int fd, short event, void *v_sc) 632 { 633 struct statctx *sc = v_sc; 634 ssize_t n; 635 636 again: 637 n = read(sc->fd, sc->buf, sc->buflen); 638 if (n == -1) { 639 if (errno == EINTR) 640 goto again; 641 else if (errno == EWOULDBLOCK) 642 return; 643 warn("fd %d read error", sc->fd); 644 return; 645 } else if (n == 0) { 646 if (ptb->vflag) 647 fprintf(stderr, "%8d closed by remote end\n", sc->fd); 648 event_del(&sc->ev); 649 close(sc->fd); 650 TAILQ_REMOVE(&sc_queue, sc, entry); 651 free(sc); 652 mainstats.nconns--; 653 set_slice_timer(mainstats.nconns > 0); 654 return; 655 } 656 if (ptb->vflag >= 3) 657 fprintf(stderr, "read: %zd bytes\n", n); 658 sc->bytes += n; 659 mainstats.slice_bytes += n; 660 } 661 662 static void 663 tcp_server_accept(int fd, short event, void *bula) 664 { 665 int sock, r; 666 struct statctx *sc; 667 struct sockaddr_storage ss; 668 socklen_t sslen; 669 char tmp[128]; 670 671 sslen = sizeof(ss); 672 again: 673 if ((sock = accept(fd, (struct sockaddr *)&ss, &sslen)) == -1) { 674 if (errno == EINTR) 675 goto again; 676 warn("accept"); 677 return; 678 } 679 saddr_ntop((struct sockaddr *)&ss, sslen, 680 tmp, sizeof(tmp)); 681 if ((r = fcntl(sock, F_GETFL, 0)) == -1) 682 err(1, "fcntl(F_GETFL)"); 683 r |= O_NONBLOCK; 684 if (fcntl(sock, F_SETFL, r) == -1) 685 err(1, "fcntl(F_SETFL, O_NONBLOCK)"); 686 if (ptb->Tflag != -1 && ss.ss_family == AF_INET) { 687 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 688 &ptb->Tflag, sizeof(ptb->Tflag))) 689 err(1, "setsockopt IP_TOS"); 690 } 691 if (ptb->Tflag != -1 && ss.ss_family == AF_INET6) { 692 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 693 &ptb->Tflag, sizeof(ptb->Tflag))) 694 err(1, "setsockopt IPV6_TCLASS"); 695 } 696 /* Alloc client structure and register reading callback */ 697 if ((sc = calloc(1, sizeof(*sc))) == NULL) 698 err(1, "calloc"); 699 sc->fd = sock; 700 stats_prepare(sc); 701 event_set(&sc->ev, sc->fd, EV_READ | EV_PERSIST, 702 tcp_server_handle_sc, sc); 703 event_add(&sc->ev, NULL); 704 TAILQ_INSERT_TAIL(&sc_queue, sc, entry); 705 mainstats.nconns++; 706 set_slice_timer(mainstats.nconns > 0); 707 if (ptb->vflag) 708 warnx("Accepted connection from %s, fd = %d\n", tmp, sc->fd); 709 } 710 711 static void 712 server_init(struct addrinfo *aitop, struct statctx *udp_sc) 713 { 714 char tmp[128]; 715 int sock, on = 1; 716 struct addrinfo *ai; 717 struct event *ev; 718 nfds_t lnfds; 719 720 if (setpgid(0, 0) == -1) 721 err(1, "setpgid"); 722 723 lnfds = 0; 724 for (ai = aitop; ai != NULL; ai = ai->ai_next) { 725 saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, sizeof(tmp)); 726 if (ptb->vflag) 727 fprintf(stderr, "Try to bind to %s\n", tmp); 728 if ((sock = socket(ai->ai_family, ai->ai_socktype, 729 ai->ai_protocol)) == -1) { 730 if (ai->ai_next == NULL) 731 err(1, "socket"); 732 if (ptb->vflag) 733 warn("socket"); 734 continue; 735 } 736 if (ptb->Vflag) { 737 if (setsockopt(sock, SOL_SOCKET, SO_RTABLE, 738 &ptb->Vflag, sizeof(ptb->Vflag)) == -1) { 739 if (errno == ENOPROTOOPT) 740 warn("set rtable"); 741 else 742 err(1, "setsockopt SO_RTABLE"); 743 } 744 } 745 if (ptb->Tflag != -1 && ai->ai_family == AF_INET) { 746 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 747 &ptb->Tflag, sizeof(ptb->Tflag))) 748 err(1, "setsockopt IP_TOS"); 749 } 750 if (ptb->Tflag != -1 && ai->ai_family == AF_INET6) { 751 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 752 &ptb->Tflag, sizeof(ptb->Tflag))) 753 err(1, "setsockopt IPV6_TCLASS"); 754 } 755 if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 756 &on, sizeof(on)) == -1) 757 warn("reuse port"); 758 if (bind(sock, ai->ai_addr, ai->ai_addrlen) != 0) { 759 if (ai->ai_next == NULL) 760 err(1, "bind"); 761 if (ptb->vflag) 762 warn("bind"); 763 close(sock); 764 continue; 765 } 766 if (ptb->Sflag) { 767 if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, 768 &ptb->Sflag, sizeof(ptb->Sflag)) == -1) 769 warn("set receive buffer size"); 770 } 771 if (TCP_MODE) { 772 if (listen(sock, 64) == -1) { 773 if (ai->ai_next == NULL) 774 err(1, "listen"); 775 if (ptb->vflag) 776 warn("listen"); 777 close(sock); 778 continue; 779 } 780 } 781 if ((ev = calloc(1, sizeof(*ev))) == NULL) 782 err(1, "calloc"); 783 if (UDP_MODE) 784 event_set(ev, sock, EV_READ | EV_PERSIST, 785 udp_server_handle_sc, udp_sc); 786 else 787 event_set(ev, sock, EV_READ | EV_PERSIST, 788 tcp_server_accept, NULL); 789 event_add(ev, NULL); 790 if (ptb->vflag >= 3) 791 fprintf(stderr, "bound to fd %d\n", sock); 792 lnfds++; 793 } 794 freeaddrinfo(aitop); 795 if (lnfds == 0) 796 errx(1, "No working listen addresses found"); 797 } 798 799 static void 800 client_handle_sc(int fd, short event, void *v_sc) 801 { 802 struct statctx *sc = v_sc; 803 ssize_t n; 804 805 again: 806 if ((n = write(sc->fd, sc->buf, sc->buflen)) == -1) { 807 if (errno == EINTR || errno == EAGAIN || 808 (UDP_MODE && errno == ENOBUFS)) 809 goto again; 810 err(1, "write"); 811 } 812 if (TCP_MODE && n == 0) { 813 warnx("Remote end closed connection"); 814 exit(1); 815 } 816 if (ptb->vflag >= 3) 817 warnx("write: %zd bytes\n", n); 818 sc->bytes += n; 819 mainstats.slice_bytes += n; 820 if (UDP_MODE) 821 sc->udp_slice_pkts++; 822 } 823 824 static void 825 client_init(struct addrinfo *aitop, int nconn, struct statctx *udp_sc, 826 struct addrinfo *aib) 827 { 828 struct statctx *sc; 829 struct addrinfo *ai; 830 char tmp[128]; 831 int i, r, sock; 832 833 sc = udp_sc; 834 for (i = 0; i < nconn; i++) { 835 for (sock = -1, ai = aitop; ai != NULL; ai = ai->ai_next) { 836 saddr_ntop(ai->ai_addr, ai->ai_addrlen, tmp, 837 sizeof(tmp)); 838 if (ptb->vflag && i == 0) 839 fprintf(stderr, "Trying %s\n", tmp); 840 if ((sock = socket(ai->ai_family, ai->ai_socktype, 841 ai->ai_protocol)) == -1) { 842 if (ai->ai_next == NULL) 843 err(1, "socket"); 844 if (ptb->vflag) 845 warn("socket"); 846 continue; 847 } 848 if (aib != NULL) { 849 saddr_ntop(aib->ai_addr, aib->ai_addrlen, 850 tmp, sizeof(tmp)); 851 if (ptb->vflag) 852 fprintf(stderr, 853 "Try to bind to %s\n", tmp); 854 if (bind(sock, (struct sockaddr *)aib->ai_addr, 855 aib->ai_addrlen) == -1) 856 err(1, "bind"); 857 freeaddrinfo(aib); 858 } 859 if (ptb->Tflag != -1 && ai->ai_family == AF_INET) { 860 if (setsockopt(sock, IPPROTO_IP, IP_TOS, 861 &ptb->Tflag, sizeof(ptb->Tflag))) 862 err(1, "setsockopt IP_TOS"); 863 } 864 if (ptb->Tflag != -1 && ai->ai_family == AF_INET6) { 865 if (setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, 866 &ptb->Tflag, sizeof(ptb->Tflag))) 867 err(1, "setsockopt IPV6_TCLASS"); 868 } 869 if (ptb->Vflag) { 870 if (setsockopt(sock, SOL_SOCKET, SO_RTABLE, 871 &ptb->Vflag, sizeof(ptb->Vflag)) == -1) { 872 if (errno == ENOPROTOOPT) 873 warn("set rtable"); 874 else 875 err(1, "setsockopt SO_RTABLE"); 876 } 877 } 878 if (ptb->Sflag) { 879 if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, 880 &ptb->Sflag, sizeof(ptb->Sflag)) == -1) 881 warn("set TCP send buffer size"); 882 } 883 if (connect(sock, ai->ai_addr, ai->ai_addrlen) != 0) { 884 if (ai->ai_next == NULL) 885 err(1, "connect"); 886 if (ptb->vflag) 887 warn("connect"); 888 close(sock); 889 sock = -1; 890 continue; 891 } 892 break; 893 } 894 if (sock == -1) 895 errx(1, "No host found"); 896 if ((r = fcntl(sock, F_GETFL, 0)) == -1) 897 err(1, "fcntl(F_GETFL)"); 898 r |= O_NONBLOCK; 899 if (fcntl(sock, F_SETFL, r) == -1) 900 err(1, "fcntl(F_SETFL, O_NONBLOCK)"); 901 /* Alloc and prepare stats */ 902 if (TCP_MODE) { 903 if ((sc = calloc(1, sizeof(*sc))) == NULL) 904 err(1, "calloc"); 905 } 906 sc->fd = sock; 907 stats_prepare(sc); 908 event_set(&sc->ev, sc->fd, EV_WRITE | EV_PERSIST, 909 client_handle_sc, sc); 910 event_add(&sc->ev, NULL); 911 TAILQ_INSERT_TAIL(&sc_queue, sc, entry); 912 mainstats.nconns++; 913 set_slice_timer(mainstats.nconns > 0); 914 if (UDP_MODE) 915 break; 916 } 917 freeaddrinfo(aitop); 918 919 if (ptb->vflag && nconn > 1) 920 fprintf(stderr, "%u connections established\n", nconn); 921 } 922 923 static int 924 map_tos(char *s, int *val) 925 { 926 /* DiffServ Codepoints and other TOS mappings */ 927 const struct toskeywords { 928 const char *keyword; 929 int val; 930 } *t, toskeywords[] = { 931 { "af11", IPTOS_DSCP_AF11 }, 932 { "af12", IPTOS_DSCP_AF12 }, 933 { "af13", IPTOS_DSCP_AF13 }, 934 { "af21", IPTOS_DSCP_AF21 }, 935 { "af22", IPTOS_DSCP_AF22 }, 936 { "af23", IPTOS_DSCP_AF23 }, 937 { "af31", IPTOS_DSCP_AF31 }, 938 { "af32", IPTOS_DSCP_AF32 }, 939 { "af33", IPTOS_DSCP_AF33 }, 940 { "af41", IPTOS_DSCP_AF41 }, 941 { "af42", IPTOS_DSCP_AF42 }, 942 { "af43", IPTOS_DSCP_AF43 }, 943 { "critical", IPTOS_PREC_CRITIC_ECP }, 944 { "cs0", IPTOS_DSCP_CS0 }, 945 { "cs1", IPTOS_DSCP_CS1 }, 946 { "cs2", IPTOS_DSCP_CS2 }, 947 { "cs3", IPTOS_DSCP_CS3 }, 948 { "cs4", IPTOS_DSCP_CS4 }, 949 { "cs5", IPTOS_DSCP_CS5 }, 950 { "cs6", IPTOS_DSCP_CS6 }, 951 { "cs7", IPTOS_DSCP_CS7 }, 952 { "ef", IPTOS_DSCP_EF }, 953 { "inetcontrol", IPTOS_PREC_INTERNETCONTROL }, 954 { "lowdelay", IPTOS_LOWDELAY }, 955 { "netcontrol", IPTOS_PREC_NETCONTROL }, 956 { "reliability", IPTOS_RELIABILITY }, 957 { "throughput", IPTOS_THROUGHPUT }, 958 { NULL, -1 }, 959 }; 960 961 for (t = toskeywords; t->keyword != NULL; t++) { 962 if (strcmp(s, t->keyword) == 0) { 963 *val = t->val; 964 return (1); 965 } 966 } 967 968 return (0); 969 } 970 971 int 972 main(int argc, char **argv) 973 { 974 extern int optind; 975 extern char *optarg; 976 977 char kerr[_POSIX2_LINE_MAX], *tmp; 978 struct addrinfo *aitop, *aib, hints; 979 const char *errstr; 980 struct rlimit rl; 981 int ch, herr, nconn; 982 struct nlist nl[] = { { "_tcbtable" }, { "" } }; 983 const char *host = NULL, *port = DEFAULT_PORT, *srcbind = NULL; 984 struct event ev_sigint, ev_sigterm, ev_sighup; 985 struct statctx *udp_sc = NULL; 986 987 /* Init world */ 988 ptb = &tcpbench; 989 ptb->dummybuf_len = 0; 990 ptb->Sflag = ptb->sflag = ptb->vflag = ptb->Vflag = 0; 991 ptb->kvmh = NULL; 992 ptb->kvars = NULL; 993 ptb->rflag = DEFAULT_STATS_INTERVAL; 994 ptb->Tflag = -1; 995 nconn = 1; 996 aib = NULL; 997 998 while ((ch = getopt(argc, argv, "b:B:hlk:n:p:r:sS:T:uvV:")) != -1) { 999 switch (ch) { 1000 case 'b': 1001 srcbind = optarg; 1002 break; 1003 case 'l': 1004 list_kvars(); 1005 exit(0); 1006 case 'k': 1007 if ((tmp = strdup(optarg)) == NULL) 1008 errx(1, "strdup"); 1009 ptb->kvars = check_prepare_kvars(tmp); 1010 free(tmp); 1011 break; 1012 case 'r': 1013 ptb->rflag = strtonum(optarg, 0, 60 * 60 * 24 * 1000, 1014 &errstr); 1015 if (errstr != NULL) 1016 errx(1, "statistics interval is %s: %s", 1017 errstr, optarg); 1018 break; 1019 case 'p': 1020 port = optarg; 1021 break; 1022 case 's': 1023 ptb->sflag = 1; 1024 break; 1025 case 'S': 1026 ptb->Sflag = strtonum(optarg, 0, 1024*1024*1024, 1027 &errstr); 1028 if (errstr != NULL) 1029 errx(1, "receive space interval is %s: %s", 1030 errstr, optarg); 1031 break; 1032 case 'B': 1033 ptb->dummybuf_len = strtonum(optarg, 0, 1024*1024*1024, 1034 &errstr); 1035 if (errstr != NULL) 1036 errx(1, "read/write buffer size is %s: %s", 1037 errstr, optarg); 1038 break; 1039 case 'v': 1040 ptb->vflag++; 1041 break; 1042 case 'V': 1043 ptb->Vflag = (unsigned int)strtonum(optarg, 0, 1044 RT_TABLEID_MAX, &errstr); 1045 if (errstr) 1046 errx(1, "rtable value is %s: %s", 1047 errstr, optarg); 1048 break; 1049 case 'n': 1050 nconn = strtonum(optarg, 0, 65535, &errstr); 1051 if (errstr != NULL) 1052 errx(1, "number of connections is %s: %s", 1053 errstr, optarg); 1054 break; 1055 case 'u': 1056 ptb->uflag = 1; 1057 break; 1058 case 'T': 1059 if (map_tos(optarg, &ptb->Tflag)) 1060 break; 1061 errstr = NULL; 1062 if (strlen(optarg) > 1 && optarg[0] == '0' && 1063 optarg[1] == 'x') 1064 ptb->Tflag = (int)strtol(optarg, NULL, 16); 1065 else 1066 ptb->Tflag = (int)strtonum(optarg, 0, 255, 1067 &errstr); 1068 if (ptb->Tflag == -1 || ptb->Tflag > 255 || errstr) 1069 errx(1, "illegal tos value %s", optarg); 1070 break; 1071 case 'h': 1072 default: 1073 usage(); 1074 } 1075 } 1076 1077 argv += optind; 1078 argc -= optind; 1079 if ((argc != (ptb->sflag ? 0 : 1)) || 1080 (UDP_MODE && (ptb->kvars || nconn != 1))) 1081 usage(); 1082 1083 if (!ptb->sflag) 1084 host = argv[0]; 1085 /* 1086 * Rationale, 1087 * If TCP, use a big buffer with big reads/writes. 1088 * If UDP, use a big buffer in server and a buffer the size of a 1089 * ethernet packet. 1090 */ 1091 if (!ptb->dummybuf_len) { 1092 if (ptb->sflag || TCP_MODE) 1093 ptb->dummybuf_len = DEFAULT_BUF; 1094 else 1095 ptb->dummybuf_len = DEFAULT_UDP_PKT; 1096 } 1097 1098 bzero(&hints, sizeof(hints)); 1099 if (UDP_MODE) { 1100 hints.ai_socktype = SOCK_DGRAM; 1101 hints.ai_protocol = IPPROTO_UDP; 1102 } 1103 else { 1104 hints.ai_socktype = SOCK_STREAM; 1105 hints.ai_protocol = IPPROTO_TCP; 1106 } 1107 if (ptb->sflag) 1108 hints.ai_flags = AI_PASSIVE; 1109 if (srcbind != NULL) { 1110 hints.ai_flags |= AI_NUMERICHOST; 1111 herr = getaddrinfo(srcbind, NULL, &hints, &aib); 1112 hints.ai_flags &= ~AI_NUMERICHOST; 1113 if (herr != 0) { 1114 if (herr == EAI_SYSTEM) 1115 err(1, "getaddrinfo"); 1116 else 1117 errx(1, "getaddrinfo: %s", gai_strerror(herr)); 1118 } 1119 } 1120 if ((herr = getaddrinfo(host, port, &hints, &aitop)) != 0) { 1121 if (herr == EAI_SYSTEM) 1122 err(1, "getaddrinfo"); 1123 else 1124 errx(1, "getaddrinfo: %s", gai_strerror(herr)); 1125 } 1126 if (ptb->kvars) { 1127 if ((ptb->kvmh = kvm_openfiles(NULL, NULL, NULL, 1128 O_RDONLY, kerr)) == NULL) 1129 errx(1, "kvm_open: %s", kerr); 1130 drop_gid(); 1131 if (kvm_nlist(ptb->kvmh, nl) < 0 || nl[0].n_type == 0) 1132 errx(1, "kvm: no namelist"); 1133 ptb->ktcbtab = nl[0].n_value; 1134 } else 1135 drop_gid(); 1136 1137 if (getrlimit(RLIMIT_NOFILE, &rl) == -1) 1138 err(1, "getrlimit"); 1139 if (rl.rlim_cur < MAX_FD) 1140 rl.rlim_cur = MAX_FD; 1141 if (setrlimit(RLIMIT_NOFILE, &rl)) 1142 err(1, "setrlimit"); 1143 if (getrlimit(RLIMIT_NOFILE, &rl) == -1) 1144 err(1, "getrlimit"); 1145 1146 /* Init world */ 1147 TAILQ_INIT(&sc_queue); 1148 if ((ptb->dummybuf = malloc(ptb->dummybuf_len)) == NULL) 1149 err(1, "malloc"); 1150 arc4random_buf(ptb->dummybuf, ptb->dummybuf_len); 1151 1152 if (UDP_MODE) { 1153 if ((udp_sc = calloc(1, sizeof(*udp_sc))) == NULL) 1154 err(1, "calloc"); 1155 udp_sc->fd = -1; 1156 stats_prepare(udp_sc); 1157 } 1158 1159 /* Setup libevent and signals */ 1160 event_init(); 1161 signal_set(&ev_sigterm, SIGTERM, signal_handler, NULL); 1162 signal_set(&ev_sighup, SIGHUP, signal_handler, NULL); 1163 signal_set(&ev_sigint, SIGINT, signal_handler, NULL); 1164 signal_add(&ev_sigint, NULL); 1165 signal_add(&ev_sigterm, NULL); 1166 signal_add(&ev_sighup, NULL); 1167 signal(SIGPIPE, SIG_IGN); 1168 1169 if (TCP_MODE) 1170 print_tcp_header(); 1171 1172 if (UDP_MODE) 1173 evtimer_set(&mainstats.timer, udp_process_slice, udp_sc); 1174 else 1175 evtimer_set(&mainstats.timer, tcp_process_slice, NULL); 1176 1177 if (ptb->sflag) { 1178 server_init(aitop, udp_sc); 1179 } else 1180 client_init(aitop, nconn, udp_sc, aib); 1181 1182 /* libevent main loop*/ 1183 event_dispatch(); 1184 1185 return (0); 1186 } 1187