127f190a3SBjoern A. Zeeb /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3fe267a55SPedro F. Giffuni * 427f190a3SBjoern A. Zeeb * Copyright (c) 2006, Myricom Inc. 527f190a3SBjoern A. Zeeb * Copyright (c) 2008, Intel Corporation. 69ca874cfSHans Petter Selasky * Copyright (c) 2016-2021 Mellanox Technologies. 727f190a3SBjoern A. Zeeb * All rights reserved. 827f190a3SBjoern A. Zeeb * 927f190a3SBjoern A. Zeeb * Redistribution and use in source and binary forms, with or without 1027f190a3SBjoern A. Zeeb * modification, are permitted provided that the following conditions 1127f190a3SBjoern A. Zeeb * are met: 1227f190a3SBjoern A. Zeeb * 1. Redistributions of source code must retain the above copyright 1327f190a3SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer. 1427f190a3SBjoern A. Zeeb * 2. Redistributions in binary form must reproduce the above copyright 1527f190a3SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer in the 1627f190a3SBjoern A. Zeeb * documentation and/or other materials provided with the distribution. 1727f190a3SBjoern A. Zeeb * 1827f190a3SBjoern A. Zeeb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1927f190a3SBjoern A. Zeeb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2027f190a3SBjoern A. Zeeb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2127f190a3SBjoern A. Zeeb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2227f190a3SBjoern A. Zeeb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2327f190a3SBjoern A. Zeeb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2427f190a3SBjoern A. Zeeb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2527f190a3SBjoern A. Zeeb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2627f190a3SBjoern A. Zeeb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2727f190a3SBjoern A. Zeeb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2827f190a3SBjoern A. Zeeb * SUCH DAMAGE. 2927f190a3SBjoern A. Zeeb */ 306c5087a8SJack F Vogel 316c5087a8SJack F Vogel #ifndef _TCP_LRO_H_ 326c5087a8SJack F Vogel #define _TCP_LRO_H_ 336c5087a8SJack F Vogel 347127e6acSNavdeep Parhar #include <sys/time.h> 35e0d8add4SHans Petter Selasky #include <sys/param.h> 364f9c93f1SGleb Smirnoff #include <sys/mbuf.h> 37b4f60fabSMark Johnston #include <netinet/in.h> 387127e6acSNavdeep Parhar 39e936121dSHans Petter Selasky #ifndef TCP_LRO_ENTRIES 40e936121dSHans Petter Selasky /* Define default number of LRO entries per RX queue */ 41e936121dSHans Petter Selasky #define TCP_LRO_ENTRIES 8 42e936121dSHans Petter Selasky #endif 43e936121dSHans Petter Selasky 4469a34e8dSRandall Stewart /* 4569a34e8dSRandall Stewart * Flags for ACK entry for compression 4693e28d6eSRichard Scheffenegger * the bottom 12 bits has the th_x2|th_flags. 4769a34e8dSRandall Stewart * LRO itself adds only the TSTMP flags 4869a34e8dSRandall Stewart * to indicate if either of the types 4969a34e8dSRandall Stewart * of timestamps are filled and the 5069a34e8dSRandall Stewart * HAS_TSTMP option to indicate if the 5169a34e8dSRandall Stewart * TCP timestamp option is valid. 5269a34e8dSRandall Stewart * 5393e28d6eSRichard Scheffenegger * The other 1 flag bits are for processing 5469a34e8dSRandall Stewart * by a stack. 5569a34e8dSRandall Stewart * 5669a34e8dSRandall Stewart */ 5793e28d6eSRichard Scheffenegger #define TSTMP_LRO 0x1000 5893e28d6eSRichard Scheffenegger #define TSTMP_HDWR 0x2000 5993e28d6eSRichard Scheffenegger #define HAS_TSTMP 0x4000 60d7955cc0SRandall Stewart /* 61d7955cc0SRandall Stewart * Default number of interrupts on the same cpu in a row 62d7955cc0SRandall Stewart * that will cause us to declare a "affinity cpu". 63d7955cc0SRandall Stewart */ 64d7955cc0SRandall Stewart #define TCP_LRO_CPU_DECLARATION_THRESH 50 6569a34e8dSRandall Stewart 6669a34e8dSRandall Stewart struct inpcb; 6769a34e8dSRandall Stewart 68e0d8add4SHans Petter Selasky /* Precompute the LRO_RAW_ADDRESS_MAX value: */ 69e0d8add4SHans Petter Selasky #define LRO_RAW_ADDRESS_MAX \ 70e0d8add4SHans Petter Selasky howmany(12 + 2 * sizeof(struct in6_addr), sizeof(u_long)) 71e0d8add4SHans Petter Selasky 729ca874cfSHans Petter Selasky union lro_address { 73e0d8add4SHans Petter Selasky u_long raw[LRO_RAW_ADDRESS_MAX]; 749ca874cfSHans Petter Selasky struct { 75bb5cd80eSHans Petter Selasky uint8_t lro_type; /* internal */ 769ca874cfSHans Petter Selasky #define LRO_TYPE_NONE 0 779ca874cfSHans Petter Selasky #define LRO_TYPE_IPV4_TCP 1 789ca874cfSHans Petter Selasky #define LRO_TYPE_IPV6_TCP 2 799ca874cfSHans Petter Selasky #define LRO_TYPE_IPV4_UDP 3 809ca874cfSHans Petter Selasky #define LRO_TYPE_IPV6_UDP 4 81bb5cd80eSHans Petter Selasky uint8_t lro_flags; 82bb5cd80eSHans Petter Selasky #define LRO_FLAG_DECRYPTED 1 839ca874cfSHans Petter Selasky uint16_t vlan_id; /* VLAN identifier */ 849ca874cfSHans Petter Selasky uint16_t s_port; /* source TCP/UDP port */ 859ca874cfSHans Petter Selasky uint16_t d_port; /* destination TCP/UDP port */ 869ca874cfSHans Petter Selasky uint32_t vxlan_vni; /* VXLAN virtual network identifier */ 879ca874cfSHans Petter Selasky union { 889ca874cfSHans Petter Selasky struct in_addr v4; 899ca874cfSHans Petter Selasky struct in6_addr v6; 909ca874cfSHans Petter Selasky } s_addr; /* source IPv4/IPv6 address */ 919ca874cfSHans Petter Selasky union { 929ca874cfSHans Petter Selasky struct in_addr v4; 939ca874cfSHans Petter Selasky struct in6_addr v6; 949ca874cfSHans Petter Selasky } d_addr; /* destination IPv4/IPv6 address */ 959ca874cfSHans Petter Selasky }; 96e0d8add4SHans Petter Selasky }; 979ca874cfSHans Petter Selasky 98e0d8add4SHans Petter Selasky _Static_assert(sizeof(union lro_address) == sizeof(u_long) * LRO_RAW_ADDRESS_MAX, 99e0d8add4SHans Petter Selasky "The raw field in the lro_address union does not cover the whole structure."); 1009ca874cfSHans Petter Selasky 1019ca874cfSHans Petter Selasky /* Optimize address comparison by comparing one unsigned long at a time: */ 1029ca874cfSHans Petter Selasky 1039ca874cfSHans Petter Selasky static inline bool 1049ca874cfSHans Petter Selasky lro_address_compare(const union lro_address *pa, const union lro_address *pb) 1059ca874cfSHans Petter Selasky { 1069ca874cfSHans Petter Selasky if (pa->lro_type == LRO_TYPE_NONE && pb->lro_type == LRO_TYPE_NONE) { 1079ca874cfSHans Petter Selasky return (true); 1089ca874cfSHans Petter Selasky } else for (unsigned i = 0; i < LRO_RAW_ADDRESS_MAX; i++) { 1099ca874cfSHans Petter Selasky if (pa->raw[i] != pb->raw[i]) 1109ca874cfSHans Petter Selasky return (false); 1119ca874cfSHans Petter Selasky } 1129ca874cfSHans Petter Selasky return (true); 1139ca874cfSHans Petter Selasky } 1149ca874cfSHans Petter Selasky 1159ca874cfSHans Petter Selasky struct lro_parser { 1169ca874cfSHans Petter Selasky union lro_address data; 1179ca874cfSHans Petter Selasky union { 1189ca874cfSHans Petter Selasky uint8_t *l3; 1199ca874cfSHans Petter Selasky struct ip *ip4; 1209ca874cfSHans Petter Selasky struct ip6_hdr *ip6; 1219ca874cfSHans Petter Selasky }; 1229ca874cfSHans Petter Selasky union { 1239ca874cfSHans Petter Selasky uint8_t *l4; 1249ca874cfSHans Petter Selasky struct tcphdr *tcp; 1259ca874cfSHans Petter Selasky struct udphdr *udp; 1269ca874cfSHans Petter Selasky }; 1279ca874cfSHans Petter Selasky uint16_t total_hdr_len; 1289ca874cfSHans Petter Selasky }; 1299ca874cfSHans Petter Selasky 1309ca874cfSHans Petter Selasky /* This structure is zeroed frequently, try to keep it small. */ 1311ea44822SSepherosa Ziehau struct lro_entry { 1321ea44822SSepherosa Ziehau LIST_ENTRY(lro_entry) next; 13305cde7efSSepherosa Ziehau LIST_ENTRY(lro_entry) hash_next; 1346c5087a8SJack F Vogel struct mbuf *m_head; 1356c5087a8SJack F Vogel struct mbuf *m_tail; 136e57b2d0eSRandall Stewart struct mbuf *m_last_mbuf; 1379ca874cfSHans Petter Selasky struct lro_parser outer; 1389ca874cfSHans Petter Selasky struct lro_parser inner; 13962b5b6ecSBjoern A. Zeeb uint32_t next_seq; /* tcp_seq */ 14062b5b6ecSBjoern A. Zeeb uint32_t ack_seq; /* tcp_seq */ 14162b5b6ecSBjoern A. Zeeb uint32_t tsval; 14262b5b6ecSBjoern A. Zeeb uint32_t tsecr; 1439ca874cfSHans Petter Selasky uint16_t compressed; 1449ca874cfSHans Petter Selasky uint16_t uncompressed; 14562b5b6ecSBjoern A. Zeeb uint16_t window; 14693e28d6eSRichard Scheffenegger uint16_t flags : 12, /* 12 TCP header bits */ 14793e28d6eSRichard Scheffenegger timestamp : 1, 14893e28d6eSRichard Scheffenegger needs_merge : 1, 14993e28d6eSRichard Scheffenegger reserved : 2; /* unused */ 150b45daaeaSRandall Stewart struct bintime alloc_time; /* time when entry was allocated */ 1516c5087a8SJack F Vogel }; 1526c5087a8SJack F Vogel 1539ca874cfSHans Petter Selasky LIST_HEAD(lro_head, lro_entry); 15462b5b6ecSBjoern A. Zeeb 155fc271df3SHans Petter Selasky struct lro_mbuf_sort { 156fc271df3SHans Petter Selasky uint64_t seq; 157fc271df3SHans Petter Selasky struct mbuf *mb; 158fc271df3SHans Petter Selasky }; 159fc271df3SHans Petter Selasky 16062b5b6ecSBjoern A. Zeeb /* NB: This is part of driver structs. */ 1616c5087a8SJack F Vogel struct lro_ctrl { 1626c5087a8SJack F Vogel struct ifnet *ifp; 163fc271df3SHans Petter Selasky struct lro_mbuf_sort *lro_mbuf_data; 164b45daaeaSRandall Stewart struct bintime lro_last_queue_time; /* last time data was queued */ 165e936121dSHans Petter Selasky uint64_t lro_queued; 166e936121dSHans Petter Selasky uint64_t lro_flushed; 167e936121dSHans Petter Selasky uint64_t lro_bad_csum; 168e936121dSHans Petter Selasky unsigned lro_cnt; 169e936121dSHans Petter Selasky unsigned lro_mbuf_count; 170e936121dSHans Petter Selasky unsigned lro_mbuf_max; 1717ae3d4bfSSepherosa Ziehau unsigned short lro_ackcnt_lim; /* max # of aggregated ACKs */ 172d7955cc0SRandall Stewart unsigned short lro_cpu; /* Guess at the cpu we have affinity too */ 1737ae3d4bfSSepherosa Ziehau unsigned lro_length_lim; /* max len of aggregated data */ 17405cde7efSSepherosa Ziehau u_long lro_hashsz; 175d7955cc0SRandall Stewart uint32_t lro_last_cpu; 176d7955cc0SRandall Stewart uint32_t lro_cnt_of_same_cpu; 17705cde7efSSepherosa Ziehau struct lro_head *lro_hash; 1786c5087a8SJack F Vogel struct lro_head lro_active; 1796c5087a8SJack F Vogel struct lro_head lro_free; 180d7955cc0SRandall Stewart uint8_t lro_cpu_is_set; /* Flag to say its ok to set the CPU on the inp */ 1816c5087a8SJack F Vogel }; 1826c5087a8SJack F Vogel 18369a34e8dSRandall Stewart struct tcp_ackent { 18469a34e8dSRandall Stewart uint64_t timestamp; /* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */ 18569a34e8dSRandall Stewart uint32_t seq; /* th_seq value */ 18669a34e8dSRandall Stewart uint32_t ack; /* th_ack value */ 18769a34e8dSRandall Stewart uint32_t ts_value; /* If ts option value, valid if HAS_TSTMP is set */ 18869a34e8dSRandall Stewart uint32_t ts_echo; /* If ts option echo, valid if HAS_TSTMP is set */ 18969a34e8dSRandall Stewart uint16_t win; /* TCP window */ 19069a34e8dSRandall Stewart uint16_t flags; /* Flags to say if TS is present and type of timestamp and th_flags */ 19169a34e8dSRandall Stewart uint8_t codepoint; /* IP level codepoint including ECN bits */ 19269a34e8dSRandall Stewart uint8_t ack_val_set; /* Classification of ack used by the stack */ 19369a34e8dSRandall Stewart uint8_t pad[2]; /* To 32 byte boundary */ 19469a34e8dSRandall Stewart }; 19569a34e8dSRandall Stewart 19669a34e8dSRandall Stewart /* We use two M_PROTO on the mbuf */ 19769a34e8dSRandall Stewart #define M_ACKCMP M_PROTO4 /* Indicates LRO is sending in a Ack-compression mbuf */ 19869a34e8dSRandall Stewart #define M_LRO_EHDRSTRP M_PROTO6 /* Indicates that LRO has stripped the etherenet header */ 19969a34e8dSRandall Stewart 2009ca874cfSHans Petter Selasky #define TCP_LRO_LENGTH_MAX (65535 - 255) /* safe value with room for outer headers */ 2017ae3d4bfSSepherosa Ziehau #define TCP_LRO_ACKCNT_MAX 65535 /* unlimited */ 2027ae3d4bfSSepherosa Ziehau 2034f9c93f1SGleb Smirnoff #define TCP_LRO_TS_OPTION ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |\ 2044f9c93f1SGleb Smirnoff (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP) 2054f9c93f1SGleb Smirnoff 2064f9c93f1SGleb Smirnoff static inline struct tcphdr * 2074f9c93f1SGleb Smirnoff tcp_lro_get_th(struct mbuf *m) 2084f9c93f1SGleb Smirnoff { 2094f9c93f1SGleb Smirnoff return ((struct tcphdr *)((char *)m->m_data + 2104f9c93f1SGleb Smirnoff m->m_pkthdr.lro_tcp_h_off)); 2114f9c93f1SGleb Smirnoff } 2124f9c93f1SGleb Smirnoff 2134f9c93f1SGleb Smirnoff extern long tcplro_stacks_wanting_mbufq; 2144f9c93f1SGleb Smirnoff 2156c5087a8SJack F Vogel int tcp_lro_init(struct lro_ctrl *); 216e936121dSHans Petter Selasky int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned); 2176c5087a8SJack F Vogel void tcp_lro_free(struct lro_ctrl *); 2187127e6acSNavdeep Parhar void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *); 219e936121dSHans Petter Selasky void tcp_lro_flush_all(struct lro_ctrl *); 220*2c6fc36aSGleb Smirnoff extern int (*tcp_lro_flush_tcphpts)(struct lro_ctrl *, struct lro_entry *); 2216c5087a8SJack F Vogel int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t); 222e936121dSHans Petter Selasky void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *); 223e57b2d0eSRandall Stewart void tcp_lro_reg_mbufq(void); 224e57b2d0eSRandall Stewart void tcp_lro_dereg_mbufq(void); 2256c5087a8SJack F Vogel 226489f0c3cSSepherosa Ziehau #define TCP_LRO_NO_ENTRIES -2 22762b5b6ecSBjoern A. Zeeb #define TCP_LRO_CANNOT -1 22862b5b6ecSBjoern A. Zeeb #define TCP_LRO_NOT_SUPPORTED 1 2296c5087a8SJack F Vogel 2306c5087a8SJack F Vogel #endif /* _TCP_LRO_H_ */ 231