1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Inspur Corporation 3 */ 4 5 #include <rte_malloc.h> 6 #include <rte_mbuf.h> 7 #include <rte_cycles.h> 8 #include <rte_ethdev.h> 9 #include <rte_udp.h> 10 11 #include "gro_vxlan_udp4.h" 12 13 void * 14 gro_vxlan_udp4_tbl_create(uint16_t socket_id, 15 uint16_t max_flow_num, 16 uint16_t max_item_per_flow) 17 { 18 struct gro_vxlan_udp4_tbl *tbl; 19 size_t size; 20 uint32_t entries_num, i; 21 22 entries_num = max_flow_num * max_item_per_flow; 23 entries_num = RTE_MIN(entries_num, GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM); 24 25 if (entries_num == 0) 26 return NULL; 27 28 tbl = rte_zmalloc_socket(__func__, 29 sizeof(struct gro_vxlan_udp4_tbl), 30 RTE_CACHE_LINE_SIZE, 31 socket_id); 32 if (tbl == NULL) 33 return NULL; 34 35 size = sizeof(struct gro_vxlan_udp4_item) * entries_num; 36 tbl->items = rte_zmalloc_socket(__func__, 37 size, 38 RTE_CACHE_LINE_SIZE, 39 socket_id); 40 if (tbl->items == NULL) { 41 rte_free(tbl); 42 return NULL; 43 } 44 tbl->max_item_num = entries_num; 45 46 size = sizeof(struct gro_vxlan_udp4_flow) * entries_num; 47 tbl->flows = rte_zmalloc_socket(__func__, 48 size, 49 RTE_CACHE_LINE_SIZE, 50 socket_id); 51 if (tbl->flows == NULL) { 52 rte_free(tbl->items); 53 rte_free(tbl); 54 return NULL; 55 } 56 57 for (i = 0; i < entries_num; i++) 58 tbl->flows[i].start_index = INVALID_ARRAY_INDEX; 59 tbl->max_flow_num = entries_num; 60 61 return tbl; 62 } 63 64 void 65 gro_vxlan_udp4_tbl_destroy(void *tbl) 66 { 67 struct gro_vxlan_udp4_tbl *vxlan_tbl = tbl; 68 69 if (vxlan_tbl) { 70 rte_free(vxlan_tbl->items); 71 rte_free(vxlan_tbl->flows); 72 } 73 rte_free(vxlan_tbl); 74 } 75 76 static inline uint32_t 77 find_an_empty_item(struct gro_vxlan_udp4_tbl *tbl) 78 { 79 uint32_t max_item_num = tbl->max_item_num, i; 80 81 for (i = 0; i < max_item_num; i++) 82 if (tbl->items[i].inner_item.firstseg == NULL) 83 return i; 84 return INVALID_ARRAY_INDEX; 85 } 86 87 static inline uint32_t 88 find_an_empty_flow(struct gro_vxlan_udp4_tbl *tbl) 89 { 90 uint32_t max_flow_num = tbl->max_flow_num, i; 91 92 for (i = 0; i < max_flow_num; i++) 93 if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX) 94 return i; 95 return INVALID_ARRAY_INDEX; 96 } 97 98 static inline uint32_t 99 insert_new_item(struct gro_vxlan_udp4_tbl *tbl, 100 struct rte_mbuf *pkt, 101 uint64_t start_time, 102 uint32_t prev_idx, 103 uint16_t frag_offset, 104 uint8_t is_last_frag) 105 { 106 uint32_t item_idx; 107 108 item_idx = find_an_empty_item(tbl); 109 if (unlikely(item_idx == INVALID_ARRAY_INDEX)) 110 return INVALID_ARRAY_INDEX; 111 112 tbl->items[item_idx].inner_item.firstseg = pkt; 113 tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt); 114 tbl->items[item_idx].inner_item.start_time = start_time; 115 tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX; 116 tbl->items[item_idx].inner_item.frag_offset = frag_offset; 117 tbl->items[item_idx].inner_item.is_last_frag = is_last_frag; 118 tbl->items[item_idx].inner_item.nb_merged = 1; 119 tbl->item_num++; 120 121 /* If the previous packet exists, chain the new one with it. */ 122 if (prev_idx != INVALID_ARRAY_INDEX) { 123 tbl->items[item_idx].inner_item.next_pkt_idx = 124 tbl->items[prev_idx].inner_item.next_pkt_idx; 125 tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx; 126 } 127 128 return item_idx; 129 } 130 131 static inline uint32_t 132 delete_item(struct gro_vxlan_udp4_tbl *tbl, 133 uint32_t item_idx, 134 uint32_t prev_item_idx) 135 { 136 uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx; 137 138 /* NULL indicates an empty item. */ 139 tbl->items[item_idx].inner_item.firstseg = NULL; 140 tbl->item_num--; 141 if (prev_item_idx != INVALID_ARRAY_INDEX) 142 tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx; 143 144 return next_idx; 145 } 146 147 static inline uint32_t 148 insert_new_flow(struct gro_vxlan_udp4_tbl *tbl, 149 struct vxlan_udp4_flow_key *src, 150 uint32_t item_idx) 151 { 152 struct vxlan_udp4_flow_key *dst; 153 uint32_t flow_idx; 154 155 flow_idx = find_an_empty_flow(tbl); 156 if (unlikely(flow_idx == INVALID_ARRAY_INDEX)) 157 return INVALID_ARRAY_INDEX; 158 159 dst = &(tbl->flows[flow_idx].key); 160 161 rte_ether_addr_copy(&(src->inner_key.eth_saddr), 162 &(dst->inner_key.eth_saddr)); 163 rte_ether_addr_copy(&(src->inner_key.eth_daddr), 164 &(dst->inner_key.eth_daddr)); 165 dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr; 166 dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr; 167 dst->inner_key.ip_id = src->inner_key.ip_id; 168 169 dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags; 170 dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni; 171 rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr)); 172 rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr)); 173 dst->outer_ip_src_addr = src->outer_ip_src_addr; 174 dst->outer_ip_dst_addr = src->outer_ip_dst_addr; 175 dst->outer_dst_port = src->outer_dst_port; 176 177 tbl->flows[flow_idx].start_index = item_idx; 178 tbl->flow_num++; 179 180 return flow_idx; 181 } 182 183 static inline int 184 is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1, 185 struct vxlan_udp4_flow_key k2) 186 { 187 /* For VxLAN packet, outer udp src port is calculated from 188 * inner packet RSS hash, udp src port of the first UDP 189 * fragment is different from one of other UDP fragments 190 * even if they are same flow, so we have to skip outer udp 191 * src port comparison here. 192 */ 193 return (rte_is_same_ether_addr(&k1.outer_eth_saddr, 194 &k2.outer_eth_saddr) && 195 rte_is_same_ether_addr(&k1.outer_eth_daddr, 196 &k2.outer_eth_daddr) && 197 (k1.outer_ip_src_addr == k2.outer_ip_src_addr) && 198 (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) && 199 (k1.outer_dst_port == k2.outer_dst_port) && 200 (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) && 201 (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) && 202 is_same_udp4_flow(k1.inner_key, k2.inner_key)); 203 } 204 205 static inline int 206 udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item, 207 uint16_t frag_offset, 208 uint16_t ip_dl) 209 { 210 struct rte_mbuf *pkt = item->inner_item.firstseg; 211 int cmp; 212 uint16_t l2_offset; 213 int ret = 0; 214 215 /* Note: if outer DF bit is set, i.e outer_is_atomic is 0, 216 * we needn't compare outer_ip_id because they are same, 217 * for the case outer_is_atomic is 1, we also have no way 218 * to compare outer_ip_id because the difference between 219 * outer_ip_ids of two received packets isn't always +/-1. 220 * So skip outer_ip_id comparison here. 221 */ 222 223 l2_offset = pkt->outer_l2_len + pkt->outer_l3_len; 224 cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl, 225 l2_offset); 226 if (cmp > 0) 227 /* Append the new packet. */ 228 ret = 1; 229 else if (cmp < 0) 230 /* Prepend the new packet. */ 231 ret = -1; 232 233 return ret; 234 } 235 236 static inline int 237 merge_two_vxlan_udp4_packets(struct gro_vxlan_udp4_item *item, 238 struct rte_mbuf *pkt, 239 int cmp, 240 uint16_t frag_offset, 241 uint8_t is_last_frag) 242 { 243 if (merge_two_udp4_packets(&item->inner_item, pkt, cmp, frag_offset, 244 is_last_frag, 245 pkt->outer_l2_len + pkt->outer_l3_len)) { 246 return 1; 247 } 248 249 return 0; 250 } 251 252 static inline void 253 update_vxlan_header(struct gro_vxlan_udp4_item *item) 254 { 255 struct rte_ipv4_hdr *ipv4_hdr; 256 struct rte_udp_hdr *udp_hdr; 257 struct rte_mbuf *pkt = item->inner_item.firstseg; 258 uint16_t len; 259 uint16_t frag_offset; 260 261 /* Update the outer IPv4 header. */ 262 len = pkt->pkt_len - pkt->outer_l2_len; 263 ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + 264 pkt->outer_l2_len); 265 ipv4_hdr->total_length = rte_cpu_to_be_16(len); 266 267 /* Update the outer UDP header. */ 268 len -= pkt->outer_l3_len; 269 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len); 270 udp_hdr->dgram_len = rte_cpu_to_be_16(len); 271 272 /* Update the inner IPv4 header. */ 273 len -= pkt->l2_len; 274 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len); 275 ipv4_hdr->total_length = rte_cpu_to_be_16(len); 276 277 /* Clear MF bit if it is last fragment */ 278 if (item->inner_item.is_last_frag) { 279 frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); 280 ipv4_hdr->fragment_offset = 281 rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG); 282 } 283 } 284 285 int32_t 286 gro_vxlan_udp4_reassemble(struct rte_mbuf *pkt, 287 struct gro_vxlan_udp4_tbl *tbl, 288 uint64_t start_time) 289 { 290 struct rte_ether_hdr *outer_eth_hdr, *eth_hdr; 291 struct rte_ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr; 292 struct rte_udp_hdr *udp_hdr; 293 struct rte_vxlan_hdr *vxlan_hdr; 294 uint16_t frag_offset; 295 uint8_t is_last_frag; 296 int16_t ip_dl; 297 uint16_t ip_id; 298 299 struct vxlan_udp4_flow_key key; 300 uint32_t cur_idx, prev_idx, item_idx; 301 uint32_t i, max_flow_num, remaining_flow_num; 302 int cmp; 303 uint16_t hdr_len; 304 uint8_t find; 305 306 outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); 307 outer_ipv4_hdr = (struct rte_ipv4_hdr *)((char *)outer_eth_hdr + 308 pkt->outer_l2_len); 309 310 udp_hdr = (struct rte_udp_hdr *)((char *)outer_ipv4_hdr + 311 pkt->outer_l3_len); 312 vxlan_hdr = (struct rte_vxlan_hdr *)((char *)udp_hdr + 313 sizeof(struct rte_udp_hdr)); 314 eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_hdr + 315 sizeof(struct rte_vxlan_hdr)); 316 /* l2_len = outer udp hdr len + vxlan hdr len + inner l2 len */ 317 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len); 318 319 /* 320 * Don't process the packet which has non-fragment inner IP. 321 */ 322 if (!is_ipv4_fragment(ipv4_hdr)) 323 return -1; 324 325 hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len + 326 pkt->l3_len; 327 /* 328 * Don't process the packet whose payload length is less than or 329 * equal to 0. 330 */ 331 if (pkt->pkt_len <= hdr_len) 332 return -1; 333 334 ip_dl = pkt->pkt_len - hdr_len; 335 336 ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); 337 frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); 338 is_last_frag = ((frag_offset & RTE_IPV4_HDR_MF_FLAG) == 0) ? 1 : 0; 339 frag_offset = (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) << 3; 340 341 rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.inner_key.eth_saddr)); 342 rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.inner_key.eth_daddr)); 343 key.inner_key.ip_src_addr = ipv4_hdr->src_addr; 344 key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr; 345 key.inner_key.ip_id = ip_id; 346 347 key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags; 348 key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni; 349 rte_ether_addr_copy(&(outer_eth_hdr->src_addr), &(key.outer_eth_saddr)); 350 rte_ether_addr_copy(&(outer_eth_hdr->dst_addr), &(key.outer_eth_daddr)); 351 key.outer_ip_src_addr = outer_ipv4_hdr->src_addr; 352 key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr; 353 /* Note: It is unnecessary to save outer_src_port here because it can 354 * be different for VxLAN UDP fragments from the same flow. 355 */ 356 key.outer_dst_port = udp_hdr->dst_port; 357 358 /* Search for a matched flow. */ 359 max_flow_num = tbl->max_flow_num; 360 remaining_flow_num = tbl->flow_num; 361 find = 0; 362 for (i = 0; i < max_flow_num && remaining_flow_num; i++) { 363 if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) { 364 if (is_same_vxlan_udp4_flow(tbl->flows[i].key, key)) { 365 find = 1; 366 break; 367 } 368 remaining_flow_num--; 369 } 370 } 371 372 /* 373 * Can't find a matched flow. Insert a new flow and store the 374 * packet into the flow. 375 */ 376 if (find == 0) { 377 item_idx = insert_new_item(tbl, pkt, start_time, 378 INVALID_ARRAY_INDEX, frag_offset, 379 is_last_frag); 380 if (unlikely(item_idx == INVALID_ARRAY_INDEX)) 381 return -1; 382 if (insert_new_flow(tbl, &key, item_idx) == 383 INVALID_ARRAY_INDEX) { 384 /* 385 * Fail to insert a new flow, so 386 * delete the inserted packet. 387 */ 388 delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); 389 return -1; 390 } 391 return 0; 392 } 393 394 /* Check all packets in the flow and try to find a neighbor. */ 395 cur_idx = tbl->flows[i].start_index; 396 prev_idx = cur_idx; 397 do { 398 cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]), 399 frag_offset, ip_dl); 400 if (cmp) { 401 if (merge_two_vxlan_udp4_packets( 402 &(tbl->items[cur_idx]), 403 pkt, cmp, frag_offset, 404 is_last_frag)) { 405 return 1; 406 } 407 /* 408 * Can't merge two packets, as the packet 409 * length will be greater than the max value. 410 * Insert the packet into the flow. 411 */ 412 if (insert_new_item(tbl, pkt, start_time, prev_idx, 413 frag_offset, is_last_frag) == 414 INVALID_ARRAY_INDEX) 415 return -1; 416 return 0; 417 } 418 419 /* Ensure inserted items are ordered by frag_offset */ 420 if (frag_offset 421 < tbl->items[cur_idx].inner_item.frag_offset) { 422 break; 423 } 424 425 prev_idx = cur_idx; 426 cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx; 427 } while (cur_idx != INVALID_ARRAY_INDEX); 428 429 /* Can't find neighbor. Insert the packet into the flow. */ 430 if (cur_idx == tbl->flows[i].start_index) { 431 /* Insert it before the first packet of the flow */ 432 item_idx = insert_new_item(tbl, pkt, start_time, 433 INVALID_ARRAY_INDEX, frag_offset, 434 is_last_frag); 435 if (unlikely(item_idx == INVALID_ARRAY_INDEX)) 436 return -1; 437 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx; 438 tbl->flows[i].start_index = item_idx; 439 } else { 440 if (insert_new_item(tbl, pkt, start_time, prev_idx, 441 frag_offset, is_last_frag 442 ) == INVALID_ARRAY_INDEX) 443 return -1; 444 } 445 446 return 0; 447 } 448 449 static int 450 gro_vxlan_udp4_merge_items(struct gro_vxlan_udp4_tbl *tbl, 451 uint32_t start_idx) 452 { 453 uint16_t frag_offset; 454 uint8_t is_last_frag; 455 int16_t ip_dl; 456 struct rte_mbuf *pkt; 457 int cmp; 458 uint32_t item_idx; 459 uint16_t hdr_len; 460 461 item_idx = tbl->items[start_idx].inner_item.next_pkt_idx; 462 while (item_idx != INVALID_ARRAY_INDEX) { 463 pkt = tbl->items[item_idx].inner_item.firstseg; 464 hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len + 465 pkt->l3_len; 466 ip_dl = pkt->pkt_len - hdr_len; 467 frag_offset = tbl->items[item_idx].inner_item.frag_offset; 468 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag; 469 cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]), 470 frag_offset, ip_dl); 471 if (cmp) { 472 if (merge_two_vxlan_udp4_packets( 473 &(tbl->items[start_idx]), 474 pkt, cmp, frag_offset, 475 is_last_frag)) { 476 item_idx = delete_item(tbl, item_idx, 477 INVALID_ARRAY_INDEX); 478 tbl->items[start_idx].inner_item.next_pkt_idx 479 = item_idx; 480 } else 481 return 0; 482 } else 483 return 0; 484 } 485 486 return 0; 487 } 488 489 uint16_t 490 gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl, 491 uint64_t flush_timestamp, 492 struct rte_mbuf **out, 493 uint16_t nb_out) 494 { 495 uint16_t k = 0; 496 uint32_t i, j; 497 uint32_t max_flow_num = tbl->max_flow_num; 498 499 for (i = 0; i < max_flow_num; i++) { 500 if (unlikely(tbl->flow_num == 0)) 501 return k; 502 503 j = tbl->flows[i].start_index; 504 while (j != INVALID_ARRAY_INDEX) { 505 if (tbl->items[j].inner_item.start_time <= 506 flush_timestamp) { 507 gro_vxlan_udp4_merge_items(tbl, j); 508 out[k++] = tbl->items[j].inner_item.firstseg; 509 if (tbl->items[j].inner_item.nb_merged > 1) 510 update_vxlan_header(&(tbl->items[j])); 511 /* 512 * Delete the item and get the next packet 513 * index. 514 */ 515 j = delete_item(tbl, j, INVALID_ARRAY_INDEX); 516 tbl->flows[i].start_index = j; 517 if (j == INVALID_ARRAY_INDEX) 518 tbl->flow_num--; 519 520 if (unlikely(k == nb_out)) 521 return k; 522 } else 523 /* 524 * Flushing packets does not strictly follow 525 * timestamp. It does not flush left packets of 526 * the flow this time once it finds one item 527 * whose start_time is greater than 528 * flush_timestamp. So go to check other flows. 529 */ 530 break; 531 } 532 } 533 return k; 534 } 535 536 uint32_t 537 gro_vxlan_udp4_tbl_pkt_count(void *tbl) 538 { 539 struct gro_vxlan_udp4_tbl *gro_tbl = tbl; 540 541 if (gro_tbl) 542 return gro_tbl->item_num; 543 544 return 0; 545 } 546