1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2021 Marvell. 3 */ 4 #ifndef __CN9K_TX_H__ 5 #define __CN9K_TX_H__ 6 7 #include <rte_vect.h> 8 9 #define NIX_TX_OFFLOAD_NONE (0) 10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0) 11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1) 12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2) 13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3) 14 #define NIX_TX_OFFLOAD_TSO_F BIT(4) 15 #define NIX_TX_OFFLOAD_TSTAMP_F BIT(5) 16 #define NIX_TX_OFFLOAD_SECURITY_F BIT(6) 17 #define NIX_TX_OFFLOAD_MAX (NIX_TX_OFFLOAD_SECURITY_F << 1) 18 19 /* Flags to control xmit_prepare function. 20 * Defining it from backwards to denote its been 21 * not used as offload flags to pick function 22 */ 23 #define NIX_TX_MULTI_SEG_F BIT(15) 24 25 #define NIX_TX_NEED_SEND_HDR_W1 \ 26 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \ 27 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F) 28 29 #define NIX_TX_NEED_EXT_HDR \ 30 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \ 31 NIX_TX_OFFLOAD_TSO_F) 32 33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts) \ 34 do { \ 35 int64_t avail; \ 36 /* Cached value is low, Update the fc_cache_pkts */ \ 37 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \ 38 avail = txq->nb_sqb_bufs_adj - *txq->fc_mem; \ 39 /* Multiply with sqe_per_sqb to express in pkts */ \ 40 (txq)->fc_cache_pkts = \ 41 (avail << (txq)->sqes_per_sqb_log2) - avail; \ 42 /* Check it again for the room */ \ 43 if (unlikely((txq)->fc_cache_pkts < (pkts))) \ 44 return 0; \ 45 } \ 46 } while (0) 47 48 /* Function to determine no of tx subdesc required in case ext 49 * sub desc is enabled. 50 */ 51 static __rte_always_inline int 52 cn9k_nix_tx_ext_subs(const uint16_t flags) 53 { 54 return (flags & NIX_TX_OFFLOAD_TSTAMP_F) 55 ? 2 56 : ((flags & 57 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) 58 ? 1 59 : 0); 60 } 61 62 static __rte_always_inline void 63 cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd, 64 const uint16_t flags, const uint16_t static_sz) 65 { 66 if (static_sz) 67 cmd[0] = txq->send_hdr_w0; 68 else 69 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) | 70 ((uint64_t)(cn9k_nix_tx_ext_subs(flags) + 1) << 40); 71 cmd[1] = 0; 72 73 if (flags & NIX_TX_NEED_EXT_HDR) { 74 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) 75 cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15); 76 else 77 cmd[2] = NIX_SUBDC_EXT << 60; 78 cmd[3] = 0; 79 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48); 80 } else { 81 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48); 82 } 83 } 84 85 static __rte_always_inline void 86 cn9k_nix_free_extmbuf(struct rte_mbuf *m) 87 { 88 struct rte_mbuf *m_next; 89 while (m != NULL) { 90 m_next = m->next; 91 rte_pktmbuf_free_seg(m); 92 m = m_next; 93 } 94 } 95 96 static __rte_always_inline uint64_t 97 cn9k_nix_prefree_seg(struct rte_mbuf *m, struct rte_mbuf **extm, struct cn9k_eth_txq *txq, 98 struct nix_send_hdr_s *send_hdr, uint64_t *aura) 99 { 100 struct rte_mbuf *prev; 101 uint32_t sqe_id; 102 103 if (RTE_MBUF_HAS_EXTBUF(m)) { 104 if (unlikely(txq->tx_compl.ena == 0)) { 105 m->next = *extm; 106 *extm = m; 107 return 1; 108 } 109 if (send_hdr->w0.pnc) { 110 sqe_id = send_hdr->w1.sqe_id; 111 prev = txq->tx_compl.ptr[sqe_id]; 112 m->next = prev; 113 txq->tx_compl.ptr[sqe_id] = m; 114 } else { 115 sqe_id = __atomic_fetch_add(&txq->tx_compl.sqe_id, 1, __ATOMIC_RELAXED); 116 send_hdr->w0.pnc = 1; 117 send_hdr->w1.sqe_id = sqe_id & 118 txq->tx_compl.nb_desc_mask; 119 txq->tx_compl.ptr[send_hdr->w1.sqe_id] = m; 120 m->next = NULL; 121 } 122 return 1; 123 } else { 124 return cnxk_nix_prefree_seg(m, aura); 125 } 126 } 127 128 #if defined(RTE_ARCH_ARM64) 129 /* Only called for first segments of single segmented mbufs */ 130 static __rte_always_inline void 131 cn9k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct rte_mbuf **extm, struct cn9k_eth_txq *txq, 132 uint64x2_t *senddesc01_w0, uint64x2_t *senddesc23_w0, 133 uint64x2_t *senddesc01_w1, uint64x2_t *senddesc23_w1) 134 { 135 struct rte_mbuf **tx_compl_ptr = txq->tx_compl.ptr; 136 uint32_t nb_desc_mask = txq->tx_compl.nb_desc_mask; 137 bool tx_compl_ena = txq->tx_compl.ena; 138 struct rte_mbuf *m0, *m1, *m2, *m3; 139 struct rte_mbuf *cookie; 140 uint64_t w0, w1, aura; 141 uint64_t sqe_id; 142 143 m0 = mbufs[0]; 144 m1 = mbufs[1]; 145 m2 = mbufs[2]; 146 m3 = mbufs[3]; 147 148 /* mbuf 0 */ 149 w0 = vgetq_lane_u64(*senddesc01_w0, 0); 150 if (RTE_MBUF_HAS_EXTBUF(m0)) { 151 w0 |= BIT_ULL(19); 152 w1 = vgetq_lane_u64(*senddesc01_w1, 0); 153 w1 &= ~0xFFFF000000000000UL; 154 if (unlikely(!tx_compl_ena)) { 155 m0->next = *extm; 156 *extm = m0; 157 } else { 158 sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1, 159 rte_memory_order_relaxed); 160 sqe_id = sqe_id & nb_desc_mask; 161 /* Set PNC */ 162 w0 |= BIT_ULL(43); 163 w1 |= sqe_id << 48; 164 tx_compl_ptr[sqe_id] = m0; 165 *senddesc01_w1 = vsetq_lane_u64(w1, *senddesc01_w1, 0); 166 } 167 } else { 168 cookie = RTE_MBUF_DIRECT(m0) ? m0 : rte_mbuf_from_indirect(m0); 169 aura = (w0 >> 20) & 0xFFFFF; 170 w0 &= ~0xFFFFF00000UL; 171 w0 |= cnxk_nix_prefree_seg(m0, &aura) << 19; 172 w0 |= aura << 20; 173 174 if ((w0 & BIT_ULL(19)) == 0) 175 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 176 } 177 *senddesc01_w0 = vsetq_lane_u64(w0, *senddesc01_w0, 0); 178 179 /* mbuf1 */ 180 w0 = vgetq_lane_u64(*senddesc01_w0, 1); 181 if (RTE_MBUF_HAS_EXTBUF(m1)) { 182 w0 |= BIT_ULL(19); 183 w1 = vgetq_lane_u64(*senddesc01_w1, 1); 184 w1 &= ~0xFFFF000000000000UL; 185 if (unlikely(!tx_compl_ena)) { 186 m1->next = *extm; 187 *extm = m1; 188 } else { 189 sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1, 190 rte_memory_order_relaxed); 191 sqe_id = sqe_id & nb_desc_mask; 192 /* Set PNC */ 193 w0 |= BIT_ULL(43); 194 w1 |= sqe_id << 48; 195 tx_compl_ptr[sqe_id] = m1; 196 *senddesc01_w1 = vsetq_lane_u64(w1, *senddesc01_w1, 1); 197 } 198 } else { 199 cookie = RTE_MBUF_DIRECT(m1) ? m1 : rte_mbuf_from_indirect(m1); 200 aura = (w0 >> 20) & 0xFFFFF; 201 w0 &= ~0xFFFFF00000UL; 202 w0 |= cnxk_nix_prefree_seg(m1, &aura) << 19; 203 w0 |= aura << 20; 204 205 if ((w0 & BIT_ULL(19)) == 0) 206 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 207 } 208 *senddesc01_w0 = vsetq_lane_u64(w0, *senddesc01_w0, 1); 209 210 /* mbuf 2 */ 211 w0 = vgetq_lane_u64(*senddesc23_w0, 0); 212 if (RTE_MBUF_HAS_EXTBUF(m2)) { 213 w0 |= BIT_ULL(19); 214 w1 = vgetq_lane_u64(*senddesc23_w1, 0); 215 w1 &= ~0xFFFF000000000000UL; 216 if (unlikely(!tx_compl_ena)) { 217 m2->next = *extm; 218 *extm = m2; 219 } else { 220 sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1, 221 rte_memory_order_relaxed); 222 sqe_id = sqe_id & nb_desc_mask; 223 /* Set PNC */ 224 w0 |= BIT_ULL(43); 225 w1 |= sqe_id << 48; 226 tx_compl_ptr[sqe_id] = m2; 227 *senddesc23_w1 = vsetq_lane_u64(w1, *senddesc23_w1, 0); 228 } 229 } else { 230 cookie = RTE_MBUF_DIRECT(m2) ? m2 : rte_mbuf_from_indirect(m2); 231 aura = (w0 >> 20) & 0xFFFFF; 232 w0 &= ~0xFFFFF00000UL; 233 w0 |= cnxk_nix_prefree_seg(m2, &aura) << 19; 234 w0 |= aura << 20; 235 236 if ((w0 & BIT_ULL(19)) == 0) 237 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 238 } 239 *senddesc23_w0 = vsetq_lane_u64(w0, *senddesc23_w0, 0); 240 241 /* mbuf3 */ 242 w0 = vgetq_lane_u64(*senddesc23_w0, 1); 243 if (RTE_MBUF_HAS_EXTBUF(m3)) { 244 w0 |= BIT_ULL(19); 245 w1 = vgetq_lane_u64(*senddesc23_w1, 1); 246 w1 &= ~0xFFFF000000000000UL; 247 if (unlikely(!tx_compl_ena)) { 248 m3->next = *extm; 249 *extm = m3; 250 } else { 251 sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1, 252 rte_memory_order_relaxed); 253 sqe_id = sqe_id & nb_desc_mask; 254 /* Set PNC */ 255 w0 |= BIT_ULL(43); 256 w1 |= sqe_id << 48; 257 tx_compl_ptr[sqe_id] = m3; 258 *senddesc23_w1 = vsetq_lane_u64(w1, *senddesc23_w1, 1); 259 } 260 } else { 261 cookie = RTE_MBUF_DIRECT(m3) ? m3 : rte_mbuf_from_indirect(m3); 262 aura = (w0 >> 20) & 0xFFFFF; 263 w0 &= ~0xFFFFF00000UL; 264 w0 |= cnxk_nix_prefree_seg(m3, &aura) << 19; 265 w0 |= aura << 20; 266 267 if ((w0 & BIT_ULL(19)) == 0) 268 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 269 } 270 *senddesc23_w0 = vsetq_lane_u64(w0, *senddesc23_w0, 1); 271 #ifndef RTE_LIBRTE_MEMPOOL_DEBUG 272 RTE_SET_USED(cookie); 273 #endif 274 } 275 #endif 276 277 static __rte_always_inline void 278 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags) 279 { 280 uint64_t mask, ol_flags = m->ol_flags; 281 282 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 283 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t); 284 uint16_t *iplen, *oiplen, *oudplen; 285 uint16_t lso_sb, paylen; 286 287 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6)); 288 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) + 289 m->l2_len + m->l3_len + m->l4_len; 290 291 /* Reduce payload len from base headers */ 292 paylen = m->pkt_len - lso_sb; 293 294 /* Get iplen position assuming no tunnel hdr */ 295 iplen = (uint16_t *)(mdata + m->l2_len + 296 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6))); 297 /* Handle tunnel tso */ 298 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && 299 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) { 300 const uint8_t is_udp_tun = 301 (CNXK_NIX_UDP_TUN_BITMASK >> 302 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) & 303 0x1; 304 305 oiplen = (uint16_t *)(mdata + m->outer_l2_len + 306 (2 << !!(ol_flags & 307 RTE_MBUF_F_TX_OUTER_IPV6))); 308 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) - 309 paylen); 310 311 /* Update format for UDP tunneled packet */ 312 if (is_udp_tun) { 313 oudplen = (uint16_t *)(mdata + m->outer_l2_len + 314 m->outer_l3_len + 4); 315 *oudplen = rte_cpu_to_be_16( 316 rte_be_to_cpu_16(*oudplen) - paylen); 317 } 318 319 /* Update iplen position to inner ip hdr */ 320 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len - 321 m->l4_len + 322 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6))); 323 } 324 325 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen); 326 } 327 } 328 329 static __rte_always_inline void 330 cn9k_nix_xmit_prepare(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm, 331 uint64_t *cmd, const uint16_t flags, const uint64_t lso_tun_fmt, 332 uint8_t mark_flag, uint64_t mark_fmt) 333 { 334 uint8_t mark_off = 0, mark_vlan = 0, markptr = 0; 335 struct nix_send_ext_s *send_hdr_ext; 336 struct nix_send_hdr_s *send_hdr; 337 uint64_t ol_flags = 0, mask; 338 union nix_send_hdr_w1_u w1; 339 union nix_send_sg_s *sg; 340 uint16_t mark_form = 0; 341 342 send_hdr = (struct nix_send_hdr_s *)cmd; 343 if (flags & NIX_TX_NEED_EXT_HDR) { 344 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2); 345 sg = (union nix_send_sg_s *)(cmd + 4); 346 /* Clear previous markings */ 347 send_hdr_ext->w0.lso = 0; 348 send_hdr_ext->w0.mark_en = 0; 349 send_hdr_ext->w1.u = 0; 350 ol_flags = m->ol_flags; 351 } else { 352 sg = (union nix_send_sg_s *)(cmd + 2); 353 } 354 355 if (flags & NIX_TX_NEED_SEND_HDR_W1) { 356 ol_flags = m->ol_flags; 357 w1.u = 0; 358 } 359 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) 360 send_hdr->w0.pnc = 0; 361 362 if (!(flags & NIX_TX_MULTI_SEG_F)) 363 send_hdr->w0.total = m->data_len; 364 else 365 send_hdr->w0.total = m->pkt_len; 366 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id); 367 368 /* 369 * L3type: 2 => IPV4 370 * 3 => IPV4 with csum 371 * 4 => IPV6 372 * L3type and L3ptr needs to be set for either 373 * L3 csum or L4 csum or LSO 374 * 375 */ 376 377 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && 378 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) { 379 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM); 380 const uint8_t ol3type = 381 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) + 382 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) + 383 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM); 384 385 /* Outer L3 */ 386 w1.ol3type = ol3type; 387 mask = 0xffffull << ((!!ol3type) << 4); 388 w1.ol3ptr = ~mask & m->outer_l2_len; 389 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len); 390 391 /* Outer L4 */ 392 w1.ol4type = csum + (csum << 1); 393 394 /* Inner L3 */ 395 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) + 396 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2); 397 w1.il3ptr = w1.ol4ptr + m->l2_len; 398 w1.il4ptr = w1.il3ptr + m->l3_len; 399 /* Increment it by 1 if it is IPV4 as 3 is with csum */ 400 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM); 401 402 /* Inner L4 */ 403 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52; 404 405 /* In case of no tunnel header use only 406 * shift IL3/IL4 fields a bit to use 407 * OL3/OL4 for header checksum 408 */ 409 mask = !ol3type; 410 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) | 411 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4)); 412 413 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) { 414 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM); 415 const uint8_t outer_l2_len = m->outer_l2_len; 416 417 /* Outer L3 */ 418 w1.ol3ptr = outer_l2_len; 419 w1.ol4ptr = outer_l2_len + m->outer_l3_len; 420 /* Increment it by 1 if it is IPV4 as 3 is with csum */ 421 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) + 422 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) + 423 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM); 424 425 /* Outer L4 */ 426 w1.ol4type = csum + (csum << 1); 427 428 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) { 429 const uint8_t l2_len = m->l2_len; 430 431 /* Always use OLXPTR and OLXTYPE when only 432 * when one header is present 433 */ 434 435 /* Inner L3 */ 436 w1.ol3ptr = l2_len; 437 w1.ol4ptr = l2_len + m->l3_len; 438 /* Increment it by 1 if it is IPV4 as 3 is with csum */ 439 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) + 440 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) + 441 !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM); 442 443 /* Inner L4 */ 444 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52; 445 } 446 447 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { 448 const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6); 449 const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 | 450 RTE_MBUF_F_TX_IPV6)); 451 452 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN); 453 /* HW will update ptr after vlan0 update */ 454 send_hdr_ext->w1.vlan1_ins_ptr = 12; 455 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci; 456 457 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ); 458 /* 2B before end of l2 header */ 459 send_hdr_ext->w1.vlan0_ins_ptr = 12; 460 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer; 461 /* Fill for VLAN marking only when VLAN insertion enabled */ 462 mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) & 463 (send_hdr_ext->w1.vlan1_ins_ena || 464 send_hdr_ext->w1.vlan0_ins_ena)); 465 /* Mask requested flags with packet data information */ 466 mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan); 467 mark_off = ffs(mark_off & CNXK_TM_MARK_MASK); 468 469 mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4)); 470 mark_form = (mark_form >> (ipv6 << 3)) & 0xFF; 471 markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2); 472 473 send_hdr_ext->w0.mark_en = !!mark_off; 474 send_hdr_ext->w0.markform = mark_form & 0x7F; 475 send_hdr_ext->w0.markptr = markptr; 476 } 477 478 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 479 uint16_t lso_sb; 480 uint64_t mask; 481 482 mask = -(!w1.il3type); 483 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len; 484 485 send_hdr_ext->w0.lso_sb = lso_sb; 486 send_hdr_ext->w0.lso = 1; 487 send_hdr_ext->w0.lso_mps = m->tso_segsz; 488 send_hdr_ext->w0.lso_format = 489 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6); 490 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM; 491 492 /* Handle tunnel tso */ 493 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && 494 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) { 495 const uint8_t is_udp_tun = 496 (CNXK_NIX_UDP_TUN_BITMASK >> 497 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) & 498 0x1; 499 uint8_t shift = is_udp_tun ? 32 : 0; 500 501 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4); 502 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3); 503 504 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM; 505 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; 506 /* Update format for UDP tunneled packet */ 507 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift); 508 } 509 } 510 511 if (flags & NIX_TX_NEED_SEND_HDR_W1) 512 send_hdr->w1.u = w1.u; 513 514 if (!(flags & NIX_TX_MULTI_SEG_F)) { 515 struct rte_mbuf *cookie; 516 517 sg->seg1_size = m->data_len; 518 *(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m); 519 cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m); 520 521 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { 522 uint64_t aura; 523 /* DF bit = 1 if refcount of current mbuf or parent mbuf 524 * is greater than 1 525 * DF bit = 0 otherwise 526 */ 527 aura = send_hdr->w0.aura; 528 send_hdr->w0.df = cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura); 529 send_hdr->w0.aura = aura; 530 /* Ensuring mbuf fields which got updated in 531 * cnxk_nix_prefree_seg are written before LMTST. 532 */ 533 rte_io_wmb(); 534 } 535 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG 536 /* Mark mempool object as "put" since it is freed by NIX */ 537 if (!send_hdr->w0.df) 538 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 539 #else 540 RTE_SET_USED(cookie); 541 #endif 542 } else { 543 sg->seg1_size = m->data_len; 544 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m); 545 546 /* NOFF is handled later for multi-seg */ 547 } 548 } 549 550 static __rte_always_inline void 551 cn9k_nix_xmit_prepare_tstamp(struct cn9k_eth_txq *txq, uint64_t *cmd, 552 const uint64_t ol_flags, const uint16_t no_segdw, 553 const uint16_t flags) 554 { 555 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { 556 struct nix_send_mem_s *send_mem; 557 uint16_t off = (no_segdw - 1) << 1; 558 const uint8_t is_ol_tstamp = 559 !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST); 560 561 send_mem = (struct nix_send_mem_s *)(cmd + off); 562 563 /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, Tx tstamp 564 * should not be recorded, hence changing the alg type to 565 * NIX_SENDMEMALG_SUB and also changing send mem addr field to 566 * next 8 bytes as it corrupts the actual Tx tstamp registered 567 * address. 568 */ 569 send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM; 570 send_mem->w0.cn9k.alg = 571 NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3); 572 573 send_mem->addr = (rte_iova_t)(((uint64_t *)txq->ts_mem) + 574 (is_ol_tstamp)); 575 } 576 } 577 578 static __rte_always_inline void 579 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr, 580 const uint32_t flags) 581 { 582 uint64_t lmt_status; 583 584 do { 585 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags)); 586 lmt_status = roc_lmt_submit_ldeor(io_addr); 587 } while (lmt_status == 0); 588 } 589 590 static __rte_always_inline void 591 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags) 592 { 593 roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags)); 594 } 595 596 static __rte_always_inline void 597 cn9k_nix_sec_fc_wait_one(const struct cn9k_eth_txq *txq) 598 { 599 uint64_t nb_desc = txq->cpt_desc; 600 uint64_t *fc = txq->cpt_fc; 601 602 while (nb_desc <= __atomic_load_n(fc, __ATOMIC_RELAXED)) 603 ; 604 } 605 606 static __rte_always_inline uint64_t 607 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr) 608 { 609 return roc_lmt_submit_ldeor(io_addr); 610 } 611 612 static __rte_always_inline uint64_t 613 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr) 614 { 615 return roc_lmt_submit_ldeorl(io_addr); 616 } 617 618 static __rte_always_inline uint16_t 619 cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm, 620 uint64_t *cmd, const uint16_t flags) 621 { 622 struct nix_send_hdr_s *send_hdr; 623 uint64_t prefree = 0, aura; 624 struct rte_mbuf *cookie; 625 union nix_send_sg_s *sg; 626 struct rte_mbuf *m_next; 627 uint64_t *slist, sg_u; 628 uint64_t nb_segs; 629 uint64_t segdw; 630 uint8_t off, i; 631 632 send_hdr = (struct nix_send_hdr_s *)cmd; 633 634 if (flags & NIX_TX_NEED_EXT_HDR) 635 off = 2; 636 else 637 off = 0; 638 639 sg = (union nix_send_sg_s *)&cmd[2 + off]; 640 641 /* Start from second segment, first segment is already there */ 642 i = 1; 643 sg_u = sg->u; 644 sg_u &= 0xFC0000000000FFFF; 645 nb_segs = m->nb_segs - 1; 646 m_next = m->next; 647 slist = &cmd[3 + off + 1]; 648 649 cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m); 650 /* Set invert df if buffer is not to be freed by H/W */ 651 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { 652 aura = send_hdr->w0.aura; 653 prefree = (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << 55); 654 send_hdr->w0.aura = aura; 655 sg_u |= prefree; 656 rte_io_wmb(); 657 } 658 659 /* Mark mempool object as "put" since it is freed by NIX */ 660 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG 661 if (!(sg_u & (1ULL << 55))) 662 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 663 rte_io_wmb(); 664 #else 665 RTE_SET_USED(cookie); 666 #endif 667 #ifdef RTE_ENABLE_ASSERT 668 m->next = NULL; 669 m->nb_segs = 1; 670 #endif 671 m = m_next; 672 if (!m) 673 goto done; 674 675 /* Fill mbuf segments */ 676 do { 677 m_next = m->next; 678 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); 679 *slist = rte_mbuf_data_iova(m); 680 cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m); 681 /* Set invert df if buffer is not to be freed by H/W */ 682 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { 683 sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, NULL) << (i + 55)); 684 /* Commit changes to mbuf */ 685 rte_io_wmb(); 686 } 687 /* Mark mempool object as "put" since it is freed by NIX */ 688 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG 689 if (!(sg_u & (1ULL << (i + 55)))) 690 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 691 rte_io_wmb(); 692 #endif 693 slist++; 694 i++; 695 nb_segs--; 696 if (i > 2 && nb_segs) { 697 i = 0; 698 /* Next SG subdesc */ 699 *(uint64_t *)slist = sg_u & 0xFC00000000000000; 700 sg->u = sg_u; 701 sg->segs = 3; 702 sg = (union nix_send_sg_s *)slist; 703 sg_u = sg->u; 704 slist++; 705 } 706 #ifdef RTE_ENABLE_ASSERT 707 m->next = NULL; 708 #endif 709 m = m_next; 710 } while (nb_segs); 711 712 done: 713 sg->u = sg_u; 714 sg->segs = i; 715 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off]; 716 /* Roundup extra dwords to multiple of 2 */ 717 segdw = (segdw >> 1) + (segdw & 0x1); 718 /* Default dwords */ 719 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); 720 send_hdr->w0.sizem1 = segdw - 1; 721 722 #ifdef RTE_ENABLE_ASSERT 723 rte_io_wmb(); 724 #endif 725 return segdw; 726 } 727 728 static __rte_always_inline void 729 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw) 730 { 731 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); 732 } 733 734 static __rte_always_inline void 735 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr, 736 uint16_t segdw) 737 { 738 uint64_t lmt_status; 739 740 do { 741 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); 742 lmt_status = roc_lmt_submit_ldeor(io_addr); 743 } while (lmt_status == 0); 744 } 745 746 static __rte_always_inline void 747 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr, 748 rte_iova_t io_addr, uint16_t segdw) 749 { 750 uint64_t lmt_status; 751 752 rte_io_wmb(); 753 do { 754 roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); 755 lmt_status = roc_lmt_submit_ldeor(io_addr); 756 } while (lmt_status == 0); 757 } 758 759 static __rte_always_inline uint16_t 760 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, 761 uint64_t *cmd, const uint16_t flags) 762 { 763 struct cn9k_eth_txq *txq = tx_queue; 764 const rte_iova_t io_addr = txq->io_addr; 765 uint64_t lso_tun_fmt = 0, mark_fmt = 0; 766 void *lmt_addr = txq->lmt_addr; 767 struct rte_mbuf *extm = NULL; 768 uint8_t mark_flag = 0; 769 uint16_t i; 770 771 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena) 772 handle_tx_completion_pkts(txq, 0); 773 774 NIX_XMIT_FC_OR_RETURN(txq, pkts); 775 776 cn9k_nix_tx_skeleton(txq, cmd, flags, 1); 777 778 /* Perform header writes before barrier for TSO */ 779 if (flags & NIX_TX_OFFLOAD_TSO_F) { 780 lso_tun_fmt = txq->lso_tun_fmt; 781 782 for (i = 0; i < pkts; i++) 783 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags); 784 } 785 786 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { 787 mark_fmt = txq->mark_fmt; 788 mark_flag = txq->mark_flag; 789 } 790 791 /* Lets commit any changes in the packet here as no further changes 792 * to the packet will be done unless no fast free is enabled. 793 */ 794 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) 795 rte_io_wmb(); 796 797 for (i = 0; i < pkts; i++) { 798 cn9k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt, 799 mark_flag, mark_fmt); 800 cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4, 801 flags); 802 cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags); 803 } 804 805 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena) 806 cn9k_nix_free_extmbuf(extm); 807 808 /* Reduce the cached count */ 809 txq->fc_cache_pkts -= pkts; 810 811 return pkts; 812 } 813 814 static __rte_always_inline uint16_t 815 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, 816 uint16_t pkts, uint64_t *cmd, const uint16_t flags) 817 { 818 struct cn9k_eth_txq *txq = tx_queue; 819 const rte_iova_t io_addr = txq->io_addr; 820 uint64_t lso_tun_fmt = 0, mark_fmt = 0; 821 void *lmt_addr = txq->lmt_addr; 822 struct rte_mbuf *extm = NULL; 823 uint8_t mark_flag = 0; 824 uint16_t segdw; 825 uint64_t i; 826 827 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena) 828 handle_tx_completion_pkts(txq, 0); 829 830 NIX_XMIT_FC_OR_RETURN(txq, pkts); 831 832 cn9k_nix_tx_skeleton(txq, cmd, flags, 1); 833 834 /* Perform header writes before barrier for TSO */ 835 if (flags & NIX_TX_OFFLOAD_TSO_F) { 836 lso_tun_fmt = txq->lso_tun_fmt; 837 838 for (i = 0; i < pkts; i++) 839 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags); 840 } 841 842 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { 843 mark_fmt = txq->mark_fmt; 844 mark_flag = txq->mark_flag; 845 } 846 847 /* Lets commit any changes in the packet here as no further changes 848 * to the packet will be done unless no fast free is enabled. 849 */ 850 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) 851 rte_io_wmb(); 852 853 for (i = 0; i < pkts; i++) { 854 cn9k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt, 855 mark_flag, mark_fmt); 856 segdw = cn9k_nix_prepare_mseg(txq, tx_pkts[i], &extm, cmd, flags); 857 cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 858 segdw, flags); 859 cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw); 860 } 861 862 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena) 863 cn9k_nix_free_extmbuf(extm); 864 865 /* Reduce the cached count */ 866 txq->fc_cache_pkts -= pkts; 867 868 return pkts; 869 } 870 871 #if defined(RTE_ARCH_ARM64) 872 873 static __rte_always_inline void 874 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1, 875 union nix_send_ext_w0_u *w0, uint64_t ol_flags, 876 uint64_t flags) 877 { 878 uint16_t lso_sb; 879 uint64_t mask; 880 881 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 882 return; 883 884 mask = -(!w1->il3type); 885 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len; 886 887 w0->u |= BIT(14); 888 w0->lso_sb = lso_sb; 889 w0->lso_mps = m->tso_segsz; 890 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6); 891 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM; 892 893 /* Handle tunnel tso */ 894 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) && 895 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) { 896 const uint8_t is_udp_tun = 897 (CNXK_NIX_UDP_TUN_BITMASK >> 898 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) & 899 0x1; 900 901 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM; 902 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; 903 /* Update format for UDP tunneled packet */ 904 w0->lso_format += is_udp_tun ? 2 : 6; 905 906 w0->lso_format += !!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 1; 907 } 908 } 909 910 static __rte_always_inline uint8_t 911 cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq, 912 struct rte_mbuf *m, struct rte_mbuf **extm, uint64_t *cmd, 913 struct nix_send_hdr_s *send_hdr, 914 union nix_send_sg_s *sg, const uint32_t flags) 915 { 916 struct rte_mbuf *m_next, *cookie; 917 uint64_t *slist, sg_u, aura; 918 uint16_t nb_segs; 919 uint64_t segdw; 920 int i = 1; 921 922 send_hdr->w0.total = m->pkt_len; 923 /* Clear sg->u header before use */ 924 sg->u &= 0xFC00000000000000; 925 sg_u = sg->u; 926 slist = &cmd[0]; 927 928 sg_u = sg_u | ((uint64_t)m->data_len); 929 930 nb_segs = m->nb_segs - 1; 931 m_next = m->next; 932 933 /* Set invert df if buffer is not to be freed by H/W */ 934 cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m); 935 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { 936 aura = send_hdr->w0.aura; 937 sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << 55); 938 send_hdr->w0.aura = aura; 939 } 940 /* Mark mempool object as "put" since it is freed by NIX */ 941 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG 942 if (!(sg_u & (1ULL << 55))) 943 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 944 rte_io_wmb(); 945 #else 946 RTE_SET_USED(cookie); 947 #endif 948 949 #ifdef RTE_ENABLE_ASSERT 950 m->next = NULL; 951 m->nb_segs = 1; 952 #endif 953 m = m_next; 954 /* Fill mbuf segments */ 955 do { 956 m_next = m->next; 957 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); 958 *slist = rte_mbuf_data_iova(m); 959 cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m); 960 /* Set invert df if buffer is not to be freed by H/W */ 961 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) 962 sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << (i + 55)); 963 /* Mark mempool object as "put" since it is freed by NIX 964 */ 965 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG 966 if (!(sg_u & (1ULL << (i + 55)))) 967 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 968 rte_io_wmb(); 969 #endif 970 slist++; 971 i++; 972 nb_segs--; 973 if (i > 2 && nb_segs) { 974 i = 0; 975 /* Next SG subdesc */ 976 *(uint64_t *)slist = sg_u & 0xFC00000000000000; 977 sg->u = sg_u; 978 sg->segs = 3; 979 sg = (union nix_send_sg_s *)slist; 980 sg_u = sg->u; 981 slist++; 982 } 983 #ifdef RTE_ENABLE_ASSERT 984 m->next = NULL; 985 #endif 986 m = m_next; 987 } while (nb_segs); 988 989 sg->u = sg_u; 990 sg->segs = i; 991 segdw = (uint64_t *)slist - (uint64_t *)&cmd[0]; 992 993 segdw += 2; 994 /* Roundup extra dwords to multiple of 2 */ 995 segdw = (segdw >> 1) + (segdw & 0x1); 996 /* Default dwords */ 997 segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) + 998 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); 999 send_hdr->w0.sizem1 = segdw - 1; 1000 1001 #ifdef RTE_ENABLE_ASSERT 1002 rte_io_wmb(); 1003 #endif 1004 return segdw; 1005 } 1006 1007 static __rte_always_inline uint8_t 1008 cn9k_nix_prepare_mseg_vec(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm, 1009 uint64_t *cmd, uint64x2_t *cmd0, uint64x2_t *cmd1, const uint32_t flags) 1010 { 1011 struct nix_send_hdr_s send_hdr; 1012 struct rte_mbuf *cookie; 1013 union nix_send_sg_s sg; 1014 uint64_t aura; 1015 uint8_t ret; 1016 1017 if (m->nb_segs == 1) { 1018 cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m); 1019 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { 1020 send_hdr.w0.u = vgetq_lane_u64(cmd0[0], 0); 1021 send_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1); 1022 sg.u = vgetq_lane_u64(cmd1[0], 0); 1023 aura = send_hdr.w0.aura; 1024 sg.u |= (cn9k_nix_prefree_seg(m, extm, txq, &send_hdr, &aura) << 55); 1025 send_hdr.w0.aura = aura; 1026 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); 1027 cmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0); 1028 cmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1); 1029 } 1030 1031 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG 1032 sg.u = vgetq_lane_u64(cmd1[0], 0); 1033 if (!(sg.u & (1ULL << 55))) 1034 RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0); 1035 rte_io_wmb(); 1036 #else 1037 RTE_SET_USED(cookie); 1038 #endif 1039 return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) + 1040 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F); 1041 } 1042 1043 send_hdr.w0.u = vgetq_lane_u64(cmd0[0], 0); 1044 send_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1); 1045 sg.u = vgetq_lane_u64(cmd1[0], 0); 1046 1047 ret = cn9k_nix_prepare_mseg_vec_list(txq, m, extm, cmd, &send_hdr, &sg, flags); 1048 1049 cmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0); 1050 cmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1); 1051 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0); 1052 return ret; 1053 } 1054 1055 #define NIX_DESCS_PER_LOOP 4 1056 1057 static __rte_always_inline void 1058 cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1, 1059 uint64x2_t *cmd2, uint64x2_t *cmd3, 1060 uint8_t *segdw, 1061 uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2], 1062 uint64_t *lmt_addr, rte_iova_t io_addr, 1063 const uint32_t flags) 1064 { 1065 uint64_t lmt_status; 1066 uint8_t j, off; 1067 1068 if (!(flags & NIX_TX_NEED_EXT_HDR) && 1069 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) { 1070 /* No segments in 4 consecutive packets. */ 1071 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) { 1072 do { 1073 vst1q_u64(lmt_addr, cmd0[0]); 1074 vst1q_u64(lmt_addr + 2, cmd1[0]); 1075 vst1q_u64(lmt_addr + 4, cmd0[1]); 1076 vst1q_u64(lmt_addr + 6, cmd1[1]); 1077 vst1q_u64(lmt_addr + 8, cmd0[2]); 1078 vst1q_u64(lmt_addr + 10, cmd1[2]); 1079 vst1q_u64(lmt_addr + 12, cmd0[3]); 1080 vst1q_u64(lmt_addr + 14, cmd1[3]); 1081 lmt_status = roc_lmt_submit_ldeor(io_addr); 1082 } while (lmt_status == 0); 1083 1084 return; 1085 } 1086 } 1087 1088 for (j = 0; j < NIX_DESCS_PER_LOOP;) { 1089 /* Fit consecutive packets in same LMTLINE. */ 1090 if ((segdw[j] + segdw[j + 1]) <= 8) { 1091 again0: 1092 if ((flags & NIX_TX_NEED_EXT_HDR) && 1093 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { 1094 vst1q_u64(lmt_addr, cmd0[j]); 1095 vst1q_u64(lmt_addr + 2, cmd2[j]); 1096 vst1q_u64(lmt_addr + 4, cmd1[j]); 1097 /* Copy segs */ 1098 off = segdw[j] - 4; 1099 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); 1100 off <<= 1; 1101 vst1q_u64(lmt_addr + 6 + off, cmd3[j]); 1102 1103 vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]); 1104 vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]); 1105 vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]); 1106 roc_lmt_mov_seg(lmt_addr + 14 + off, 1107 slist[j + 1], segdw[j + 1] - 4); 1108 off += ((segdw[j + 1] - 4) << 1); 1109 vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]); 1110 } else if (flags & NIX_TX_NEED_EXT_HDR) { 1111 vst1q_u64(lmt_addr, cmd0[j]); 1112 vst1q_u64(lmt_addr + 2, cmd2[j]); 1113 vst1q_u64(lmt_addr + 4, cmd1[j]); 1114 /* Copy segs */ 1115 off = segdw[j] - 3; 1116 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); 1117 off <<= 1; 1118 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]); 1119 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]); 1120 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]); 1121 roc_lmt_mov_seg(lmt_addr + 12 + off, 1122 slist[j + 1], segdw[j + 1] - 3); 1123 } else { 1124 vst1q_u64(lmt_addr, cmd0[j]); 1125 vst1q_u64(lmt_addr + 2, cmd1[j]); 1126 /* Copy segs */ 1127 off = segdw[j] - 2; 1128 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); 1129 off <<= 1; 1130 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]); 1131 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]); 1132 roc_lmt_mov_seg(lmt_addr + 8 + off, 1133 slist[j + 1], segdw[j + 1] - 2); 1134 } 1135 lmt_status = roc_lmt_submit_ldeor(io_addr); 1136 if (lmt_status == 0) 1137 goto again0; 1138 j += 2; 1139 } else { 1140 again1: 1141 if ((flags & NIX_TX_NEED_EXT_HDR) && 1142 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) { 1143 vst1q_u64(lmt_addr, cmd0[j]); 1144 vst1q_u64(lmt_addr + 2, cmd2[j]); 1145 vst1q_u64(lmt_addr + 4, cmd1[j]); 1146 /* Copy segs */ 1147 off = segdw[j] - 4; 1148 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); 1149 off <<= 1; 1150 vst1q_u64(lmt_addr + 6 + off, cmd3[j]); 1151 } else if (flags & NIX_TX_NEED_EXT_HDR) { 1152 vst1q_u64(lmt_addr, cmd0[j]); 1153 vst1q_u64(lmt_addr + 2, cmd2[j]); 1154 vst1q_u64(lmt_addr + 4, cmd1[j]); 1155 /* Copy segs */ 1156 off = segdw[j] - 3; 1157 roc_lmt_mov_seg(lmt_addr + 6, slist[j], off); 1158 } else { 1159 vst1q_u64(lmt_addr, cmd0[j]); 1160 vst1q_u64(lmt_addr + 2, cmd1[j]); 1161 /* Copy segs */ 1162 off = segdw[j] - 2; 1163 roc_lmt_mov_seg(lmt_addr + 4, slist[j], off); 1164 } 1165 lmt_status = roc_lmt_submit_ldeor(io_addr); 1166 if (lmt_status == 0) 1167 goto again1; 1168 j += 1; 1169 } 1170 } 1171 } 1172 1173 static __rte_always_inline uint16_t 1174 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, 1175 uint16_t pkts, uint64_t *cmd, const uint16_t flags) 1176 { 1177 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; 1178 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; 1179 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], 1180 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP]; 1181 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; 1182 uint64x2_t senddesc01_w0, senddesc23_w0; 1183 uint64x2_t senddesc01_w1, senddesc23_w1; 1184 uint64x2_t sendext01_w0, sendext23_w0; 1185 uint64x2_t sendext01_w1, sendext23_w1; 1186 uint64x2_t sendmem01_w0, sendmem23_w0; 1187 uint64x2_t sendmem01_w1, sendmem23_w1; 1188 uint64x2_t sgdesc01_w0, sgdesc23_w0; 1189 uint64x2_t sgdesc01_w1, sgdesc23_w1; 1190 struct cn9k_eth_txq *txq = tx_queue; 1191 uint64_t *lmt_addr = txq->lmt_addr; 1192 rte_iova_t io_addr = txq->io_addr; 1193 uint64x2_t ltypes01, ltypes23; 1194 struct rte_mbuf *extm = NULL; 1195 uint64x2_t xtmp128, ytmp128; 1196 uint64x2_t xmask01, xmask23; 1197 uint64_t lmt_status, i; 1198 uint16_t pkts_left; 1199 1200 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena) 1201 handle_tx_completion_pkts(txq, 0); 1202 1203 NIX_XMIT_FC_OR_RETURN(txq, pkts); 1204 1205 pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); 1206 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); 1207 1208 /* Reduce the cached count */ 1209 txq->fc_cache_pkts -= pkts; 1210 1211 /* Perform header writes before barrier for TSO */ 1212 if (flags & NIX_TX_OFFLOAD_TSO_F) { 1213 for (i = 0; i < pkts; i++) 1214 cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags); 1215 } 1216 1217 /* Lets commit any changes in the packet here as no further changes 1218 * to the packet will be done unless no fast free is enabled. 1219 */ 1220 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) 1221 rte_io_wmb(); 1222 1223 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0); 1224 senddesc23_w0 = senddesc01_w0; 1225 1226 senddesc01_w1 = vdupq_n_u64(0); 1227 senddesc23_w1 = senddesc01_w1; 1228 sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48)); 1229 sgdesc23_w0 = sgdesc01_w0; 1230 1231 if (flags & NIX_TX_NEED_EXT_HDR) { 1232 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { 1233 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) | 1234 BIT_ULL(15)); 1235 sendmem01_w0 = 1236 vdupq_n_u64((NIX_SUBDC_MEM << 60) | 1237 (NIX_SENDMEMALG_SETTSTMP << 56)); 1238 sendmem23_w0 = sendmem01_w0; 1239 sendmem01_w1 = vdupq_n_u64(txq->ts_mem); 1240 sendmem23_w1 = sendmem01_w1; 1241 } else { 1242 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60)); 1243 } 1244 sendext23_w0 = sendext01_w0; 1245 1246 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) 1247 sendext01_w1 = vdupq_n_u64(12 | 12U << 24); 1248 else 1249 sendext01_w1 = vdupq_n_u64(0); 1250 sendext23_w1 = sendext01_w1; 1251 } 1252 1253 for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) { 1254 /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ 1255 senddesc01_w0 = 1256 vbicq_u64(senddesc01_w0, vdupq_n_u64(0x800FFFFFFFF)); 1257 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF)); 1258 1259 senddesc23_w0 = senddesc01_w0; 1260 sgdesc23_w0 = sgdesc01_w0; 1261 1262 /* Clear vlan enables. */ 1263 if (flags & NIX_TX_NEED_EXT_HDR) { 1264 sendext01_w1 = vbicq_u64(sendext01_w1, 1265 vdupq_n_u64(0x3FFFF00FFFF00)); 1266 sendext23_w1 = sendext01_w1; 1267 } 1268 1269 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { 1270 /* Reset send mem alg to SETTSTMP from SUB*/ 1271 sendmem01_w0 = vbicq_u64(sendmem01_w0, 1272 vdupq_n_u64(BIT_ULL(59))); 1273 /* Reset send mem address to default. */ 1274 sendmem01_w1 = 1275 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF)); 1276 sendmem23_w0 = sendmem01_w0; 1277 sendmem23_w1 = sendmem01_w1; 1278 } 1279 1280 if (flags & NIX_TX_OFFLOAD_TSO_F) { 1281 /* Clear the LSO enable bit. */ 1282 sendext01_w0 = vbicq_u64(sendext01_w0, 1283 vdupq_n_u64(BIT_ULL(14))); 1284 sendext23_w0 = sendext01_w0; 1285 } 1286 1287 /* Move mbufs to iova */ 1288 mbuf0 = (uint64_t *)tx_pkts[0]; 1289 mbuf1 = (uint64_t *)tx_pkts[1]; 1290 mbuf2 = (uint64_t *)tx_pkts[2]; 1291 mbuf3 = (uint64_t *)tx_pkts[3]; 1292 1293 /* 1294 * Get mbuf's, olflags, iova, pktlen, dataoff 1295 * dataoff_iovaX.D[0] = iova, 1296 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff 1297 * len_olflagsX.D[0] = ol_flags, 1298 * len_olflagsX.D[1](63:32) = mbuf->pkt_len 1299 */ 1300 dataoff_iova0 = 1301 vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1); 1302 len_olflags0 = vld1q_u64(mbuf0 + 3); 1303 dataoff_iova1 = 1304 vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off, vld1q_u64(mbuf1), 1); 1305 len_olflags1 = vld1q_u64(mbuf1 + 3); 1306 dataoff_iova2 = 1307 vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off, vld1q_u64(mbuf2), 1); 1308 len_olflags2 = vld1q_u64(mbuf2 + 3); 1309 dataoff_iova3 = 1310 vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off, vld1q_u64(mbuf3), 1); 1311 len_olflags3 = vld1q_u64(mbuf3 + 3); 1312 1313 /* Move mbufs to point pool */ 1314 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool)); 1315 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool)); 1316 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool)); 1317 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool)); 1318 1319 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | 1320 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) { 1321 /* Get tx_offload for ol2, ol3, l2, l3 lengths */ 1322 /* 1323 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7) 1324 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7) 1325 */ 1326 1327 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t" 1328 : [a] "+w"(senddesc01_w1) 1329 : [in] "r"(mbuf0 + 2) 1330 : "memory"); 1331 1332 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t" 1333 : [a] "+w"(senddesc01_w1) 1334 : [in] "r"(mbuf1 + 2) 1335 : "memory"); 1336 1337 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t" 1338 : [b] "+w"(senddesc23_w1) 1339 : [in] "r"(mbuf2 + 2) 1340 : "memory"); 1341 1342 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t" 1343 : [b] "+w"(senddesc23_w1) 1344 : [in] "r"(mbuf3 + 2) 1345 : "memory"); 1346 1347 /* Get pool pointer alone */ 1348 mbuf0 = (uint64_t *)*mbuf0; 1349 mbuf1 = (uint64_t *)*mbuf1; 1350 mbuf2 = (uint64_t *)*mbuf2; 1351 mbuf3 = (uint64_t *)*mbuf3; 1352 } else { 1353 /* Get pool pointer alone */ 1354 mbuf0 = (uint64_t *)*mbuf0; 1355 mbuf1 = (uint64_t *)*mbuf1; 1356 mbuf2 = (uint64_t *)*mbuf2; 1357 mbuf3 = (uint64_t *)*mbuf3; 1358 } 1359 1360 const uint8x16_t shuf_mask2 = { 1361 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 1362 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 1363 }; 1364 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1); 1365 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3); 1366 1367 /* 1368 * Pick only 16 bits of pktlen preset at bits 63:32 1369 * and place them at bits 15:0. 1370 */ 1371 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2); 1372 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2); 1373 1374 /* Add pairwise to get dataoff + iova in sgdesc_w1 */ 1375 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1); 1376 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3); 1377 1378 /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of 1379 * pktlen at 15:0 position. 1380 */ 1381 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128); 1382 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128); 1383 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128); 1384 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128); 1385 1386 /* Move mbuf to point to pool_id. */ 1387 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + 1388 offsetof(struct rte_mempool, pool_id)); 1389 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + 1390 offsetof(struct rte_mempool, pool_id)); 1391 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + 1392 offsetof(struct rte_mempool, pool_id)); 1393 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + 1394 offsetof(struct rte_mempool, pool_id)); 1395 1396 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) && 1397 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) { 1398 /* 1399 * Lookup table to translate ol_flags to 1400 * il3/il4 types. But we still use ol3/ol4 types in 1401 * senddesc_w1 as only one header processing is enabled. 1402 */ 1403 const uint8x16_t tbl = { 1404 /* [0-15] = il4type:il3type */ 1405 0x04, /* none (IPv6 assumed) */ 1406 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */ 1407 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */ 1408 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */ 1409 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */ 1410 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */ 1411 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */ 1412 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */ 1413 0x02, /* RTE_MBUF_F_TX_IPV4 */ 1414 0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */ 1415 0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */ 1416 0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */ 1417 0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */ 1418 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM | 1419 * RTE_MBUF_F_TX_TCP_CKSUM 1420 */ 1421 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM | 1422 * RTE_MBUF_F_TX_SCTP_CKSUM 1423 */ 1424 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM | 1425 * RTE_MBUF_F_TX_UDP_CKSUM 1426 */ 1427 }; 1428 1429 /* Extract olflags to translate to iltypes */ 1430 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); 1431 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); 1432 1433 /* 1434 * E(47):L3_LEN(9):L2_LEN(7+z) 1435 * E(47):L3_LEN(9):L2_LEN(7+z) 1436 */ 1437 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1); 1438 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1); 1439 1440 /* Move OLFLAGS bits 55:52 to 51:48 1441 * with zeros preprended on the byte and rest 1442 * don't care 1443 */ 1444 xtmp128 = vshrq_n_u8(xtmp128, 4); 1445 ytmp128 = vshrq_n_u8(ytmp128, 4); 1446 /* 1447 * E(48):L3_LEN(8):L2_LEN(z+7) 1448 * E(48):L3_LEN(8):L2_LEN(z+7) 1449 */ 1450 const int8x16_t tshft3 = { 1451 -1, 0, 8, 8, 8, 8, 8, 8, 1452 -1, 0, 8, 8, 8, 8, 8, 8, 1453 }; 1454 1455 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3); 1456 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3); 1457 1458 /* Do the lookup */ 1459 ltypes01 = vqtbl1q_u8(tbl, xtmp128); 1460 ltypes23 = vqtbl1q_u8(tbl, ytmp128); 1461 1462 /* Pick only relevant fields i.e Bit 48:55 of iltype 1463 * and place it in ol3/ol4type of senddesc_w1 1464 */ 1465 const uint8x16_t shuf_mask0 = { 1466 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF, 1467 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF, 1468 }; 1469 1470 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0); 1471 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0); 1472 1473 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len. 1474 * a [E(32):E(16):OL3(8):OL2(8)] 1475 * a = a + (a << 8) 1476 * a [E(32):E(16):(OL3+OL2):OL2] 1477 * => E(32):E(16)::OL4PTR(8):OL3PTR(8) 1478 */ 1479 senddesc01_w1 = vaddq_u8(senddesc01_w1, 1480 vshlq_n_u16(senddesc01_w1, 8)); 1481 senddesc23_w1 = vaddq_u8(senddesc23_w1, 1482 vshlq_n_u16(senddesc23_w1, 8)); 1483 1484 /* Move ltypes to senddesc*_w1 */ 1485 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01); 1486 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23); 1487 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) && 1488 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) { 1489 /* 1490 * Lookup table to translate ol_flags to 1491 * ol3/ol4 types. 1492 */ 1493 1494 const uint8x16_t tbl = { 1495 /* [0-15] = ol4type:ol3type */ 1496 0x00, /* none */ 1497 0x03, /* OUTER_IP_CKSUM */ 1498 0x02, /* OUTER_IPV4 */ 1499 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */ 1500 0x04, /* OUTER_IPV6 */ 1501 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */ 1502 0x00, /* OUTER_IPV6 | OUTER_IPV4 */ 1503 0x00, /* OUTER_IPV6 | OUTER_IPV4 | 1504 * OUTER_IP_CKSUM 1505 */ 1506 0x00, /* OUTER_UDP_CKSUM */ 1507 0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */ 1508 0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */ 1509 0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 | 1510 * OUTER_IP_CKSUM 1511 */ 1512 0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */ 1513 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 | 1514 * OUTER_IP_CKSUM 1515 */ 1516 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 | 1517 * OUTER_IPV4 1518 */ 1519 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 | 1520 * OUTER_IPV4 | OUTER_IP_CKSUM 1521 */ 1522 }; 1523 1524 /* Extract olflags to translate to iltypes */ 1525 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); 1526 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); 1527 1528 /* 1529 * E(47):OL3_LEN(9):OL2_LEN(7+z) 1530 * E(47):OL3_LEN(9):OL2_LEN(7+z) 1531 */ 1532 const uint8x16_t shuf_mask5 = { 1533 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 1534 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 1535 }; 1536 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5); 1537 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5); 1538 1539 /* Extract outer ol flags only */ 1540 const uint64x2_t o_cksum_mask = { 1541 0x1C00020000000000, 1542 0x1C00020000000000, 1543 }; 1544 1545 xtmp128 = vandq_u64(xtmp128, o_cksum_mask); 1546 ytmp128 = vandq_u64(ytmp128, o_cksum_mask); 1547 1548 /* Extract OUTER_UDP_CKSUM bit 41 and 1549 * move it to bit 61 1550 */ 1551 1552 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20); 1553 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20); 1554 1555 /* Shift oltype by 2 to start nibble from BIT(56) 1556 * instead of BIT(58) 1557 */ 1558 xtmp128 = vshrq_n_u8(xtmp128, 2); 1559 ytmp128 = vshrq_n_u8(ytmp128, 2); 1560 /* 1561 * E(48):L3_LEN(8):L2_LEN(z+7) 1562 * E(48):L3_LEN(8):L2_LEN(z+7) 1563 */ 1564 const int8x16_t tshft3 = { 1565 -1, 0, 8, 8, 8, 8, 8, 8, 1566 -1, 0, 8, 8, 8, 8, 8, 8, 1567 }; 1568 1569 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3); 1570 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3); 1571 1572 /* Do the lookup */ 1573 ltypes01 = vqtbl1q_u8(tbl, xtmp128); 1574 ltypes23 = vqtbl1q_u8(tbl, ytmp128); 1575 1576 /* Pick only relevant fields i.e Bit 56:63 of oltype 1577 * and place it in ol3/ol4type of senddesc_w1 1578 */ 1579 const uint8x16_t shuf_mask0 = { 1580 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF, 1581 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF, 1582 }; 1583 1584 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0); 1585 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0); 1586 1587 /* Prepare ol4ptr, ol3ptr from ol3len, ol2len. 1588 * a [E(32):E(16):OL3(8):OL2(8)] 1589 * a = a + (a << 8) 1590 * a [E(32):E(16):(OL3+OL2):OL2] 1591 * => E(32):E(16)::OL4PTR(8):OL3PTR(8) 1592 */ 1593 senddesc01_w1 = vaddq_u8(senddesc01_w1, 1594 vshlq_n_u16(senddesc01_w1, 8)); 1595 senddesc23_w1 = vaddq_u8(senddesc23_w1, 1596 vshlq_n_u16(senddesc23_w1, 8)); 1597 1598 /* Move ltypes to senddesc*_w1 */ 1599 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01); 1600 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23); 1601 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) && 1602 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) { 1603 /* Lookup table to translate ol_flags to 1604 * ol4type, ol3type, il4type, il3type of senddesc_w1 1605 */ 1606 const uint8x16x2_t tbl = {{ 1607 { 1608 /* [0-15] = il4type:il3type */ 1609 0x04, /* none (IPv6) */ 1610 0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */ 1611 0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */ 1612 0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */ 1613 0x03, /* RTE_MBUF_F_TX_IP_CKSUM */ 1614 0x13, /* RTE_MBUF_F_TX_IP_CKSUM | 1615 * RTE_MBUF_F_TX_TCP_CKSUM 1616 */ 1617 0x23, /* RTE_MBUF_F_TX_IP_CKSUM | 1618 * RTE_MBUF_F_TX_SCTP_CKSUM 1619 */ 1620 0x33, /* RTE_MBUF_F_TX_IP_CKSUM | 1621 * RTE_MBUF_F_TX_UDP_CKSUM 1622 */ 1623 0x02, /* RTE_MBUF_F_TX_IPV4 */ 1624 0x12, /* RTE_MBUF_F_TX_IPV4 | 1625 * RTE_MBUF_F_TX_TCP_CKSUM 1626 */ 1627 0x22, /* RTE_MBUF_F_TX_IPV4 | 1628 * RTE_MBUF_F_TX_SCTP_CKSUM 1629 */ 1630 0x32, /* RTE_MBUF_F_TX_IPV4 | 1631 * RTE_MBUF_F_TX_UDP_CKSUM 1632 */ 1633 0x03, /* RTE_MBUF_F_TX_IPV4 | 1634 * RTE_MBUF_F_TX_IP_CKSUM 1635 */ 1636 0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM | 1637 * RTE_MBUF_F_TX_TCP_CKSUM 1638 */ 1639 0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM | 1640 * RTE_MBUF_F_TX_SCTP_CKSUM 1641 */ 1642 0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM | 1643 * RTE_MBUF_F_TX_UDP_CKSUM 1644 */ 1645 }, 1646 1647 { 1648 /* [16-31] = ol4type:ol3type */ 1649 0x00, /* none */ 1650 0x03, /* OUTER_IP_CKSUM */ 1651 0x02, /* OUTER_IPV4 */ 1652 0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */ 1653 0x04, /* OUTER_IPV6 */ 1654 0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */ 1655 0x00, /* OUTER_IPV6 | OUTER_IPV4 */ 1656 0x00, /* OUTER_IPV6 | OUTER_IPV4 | 1657 * OUTER_IP_CKSUM 1658 */ 1659 0x00, /* OUTER_UDP_CKSUM */ 1660 0x33, /* OUTER_UDP_CKSUM | 1661 * OUTER_IP_CKSUM 1662 */ 1663 0x32, /* OUTER_UDP_CKSUM | 1664 * OUTER_IPV4 1665 */ 1666 0x33, /* OUTER_UDP_CKSUM | 1667 * OUTER_IPV4 | OUTER_IP_CKSUM 1668 */ 1669 0x34, /* OUTER_UDP_CKSUM | 1670 * OUTER_IPV6 1671 */ 1672 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 | 1673 * OUTER_IP_CKSUM 1674 */ 1675 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 | 1676 * OUTER_IPV4 1677 */ 1678 0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 | 1679 * OUTER_IPV4 | OUTER_IP_CKSUM 1680 */ 1681 }, 1682 }}; 1683 1684 /* Extract olflags to translate to oltype & iltype */ 1685 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); 1686 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); 1687 1688 /* 1689 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z) 1690 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z) 1691 */ 1692 const uint32x4_t tshft_4 = { 1693 1, 1694 0, 1695 1, 1696 0, 1697 }; 1698 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4); 1699 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4); 1700 1701 /* 1702 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z) 1703 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z) 1704 */ 1705 const uint8x16_t shuf_mask5 = { 1706 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF, 1707 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 1708 }; 1709 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5); 1710 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5); 1711 1712 /* Extract outer and inner header ol_flags */ 1713 const uint64x2_t oi_cksum_mask = { 1714 0x1CF0020000000000, 1715 0x1CF0020000000000, 1716 }; 1717 1718 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask); 1719 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask); 1720 1721 /* Extract OUTER_UDP_CKSUM bit 41 and 1722 * move it to bit 61 1723 */ 1724 1725 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20); 1726 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20); 1727 1728 /* Shift right oltype by 2 and iltype by 4 1729 * to start oltype nibble from BIT(58) 1730 * instead of BIT(56) and iltype nibble from BIT(48) 1731 * instead of BIT(52). 1732 */ 1733 const int8x16_t tshft5 = { 1734 8, 8, 8, 8, 8, 8, -4, -2, 1735 8, 8, 8, 8, 8, 8, -4, -2, 1736 }; 1737 1738 xtmp128 = vshlq_u8(xtmp128, tshft5); 1739 ytmp128 = vshlq_u8(ytmp128, tshft5); 1740 /* 1741 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8) 1742 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8) 1743 */ 1744 const int8x16_t tshft3 = { 1745 -1, 0, -1, 0, 0, 0, 0, 0, 1746 -1, 0, -1, 0, 0, 0, 0, 0, 1747 }; 1748 1749 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3); 1750 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3); 1751 1752 /* Mark Bit(4) of oltype */ 1753 const uint64x2_t oi_cksum_mask2 = { 1754 0x1000000000000000, 1755 0x1000000000000000, 1756 }; 1757 1758 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2); 1759 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2); 1760 1761 /* Do the lookup */ 1762 ltypes01 = vqtbl2q_u8(tbl, xtmp128); 1763 ltypes23 = vqtbl2q_u8(tbl, ytmp128); 1764 1765 /* Pick only relevant fields i.e Bit 48:55 of iltype and 1766 * Bit 56:63 of oltype and place it in corresponding 1767 * place in senddesc_w1. 1768 */ 1769 const uint8x16_t shuf_mask0 = { 1770 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF, 1771 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF, 1772 }; 1773 1774 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0); 1775 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0); 1776 1777 /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from 1778 * l3len, l2len, ol3len, ol2len. 1779 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)] 1780 * a = a + (a << 8) 1781 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2] 1782 * a = a + (a << 16) 1783 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2] 1784 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8) 1785 */ 1786 senddesc01_w1 = vaddq_u8(senddesc01_w1, 1787 vshlq_n_u32(senddesc01_w1, 8)); 1788 senddesc23_w1 = vaddq_u8(senddesc23_w1, 1789 vshlq_n_u32(senddesc23_w1, 8)); 1790 1791 /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */ 1792 senddesc01_w1 = vaddq_u8( 1793 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16)); 1794 senddesc23_w1 = vaddq_u8( 1795 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16)); 1796 1797 /* Move ltypes to senddesc*_w1 */ 1798 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01); 1799 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23); 1800 } 1801 1802 xmask01 = vdupq_n_u64(0); 1803 xmask23 = xmask01; 1804 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t" 1805 : [a] "+w"(xmask01) 1806 : [in] "r"(mbuf0) 1807 : "memory"); 1808 1809 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t" 1810 : [a] "+w"(xmask01) 1811 : [in] "r"(mbuf1) 1812 : "memory"); 1813 1814 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t" 1815 : [b] "+w"(xmask23) 1816 : [in] "r"(mbuf2) 1817 : "memory"); 1818 1819 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t" 1820 : [b] "+w"(xmask23) 1821 : [in] "r"(mbuf3) 1822 : "memory"); 1823 xmask01 = vshlq_n_u64(xmask01, 20); 1824 xmask23 = vshlq_n_u64(xmask23, 20); 1825 1826 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); 1827 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); 1828 1829 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { 1830 /* Tx ol_flag for vlan. */ 1831 const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN}; 1832 /* Bit enable for VLAN1 */ 1833 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; 1834 /* Tx ol_flag for QnQ. */ 1835 const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ}; 1836 /* Bit enable for VLAN0 */ 1837 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; 1838 /* Load vlan values from packet. outer is VLAN 0 */ 1839 uint64x2_t ext01 = { 1840 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | 1841 ((uint64_t)tx_pkts[0]->vlan_tci) << 32, 1842 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | 1843 ((uint64_t)tx_pkts[1]->vlan_tci) << 32, 1844 }; 1845 uint64x2_t ext23 = { 1846 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | 1847 ((uint64_t)tx_pkts[2]->vlan_tci) << 32, 1848 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | 1849 ((uint64_t)tx_pkts[3]->vlan_tci) << 32, 1850 }; 1851 1852 /* Get ol_flags of the packets. */ 1853 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); 1854 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); 1855 1856 /* ORR vlan outer/inner values into cmd. */ 1857 sendext01_w1 = vorrq_u64(sendext01_w1, ext01); 1858 sendext23_w1 = vorrq_u64(sendext23_w1, ext23); 1859 1860 /* Test for offload enable bits and generate masks. */ 1861 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), 1862 mlv), 1863 vandq_u64(vtstq_u64(xtmp128, olq), 1864 mlq)); 1865 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), 1866 mlv), 1867 vandq_u64(vtstq_u64(ytmp128, olq), 1868 mlq)); 1869 1870 /* Set vlan enable bits into cmd based on mask. */ 1871 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); 1872 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); 1873 } 1874 1875 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { 1876 /* Tx ol_flag for timestamp. */ 1877 const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST, 1878 RTE_MBUF_F_TX_IEEE1588_TMST}; 1879 /* Set send mem alg to SUB. */ 1880 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)}; 1881 /* Increment send mem address by 8. */ 1882 const uint64x2_t addr = {0x8, 0x8}; 1883 1884 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); 1885 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); 1886 1887 /* Check if timestamp is requested and generate inverted 1888 * mask as we need not make any changes to default cmd 1889 * value. 1890 */ 1891 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128)); 1892 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128)); 1893 1894 /* Change send mem address to an 8 byte offset when 1895 * TSTMP is disabled. 1896 */ 1897 sendmem01_w1 = vaddq_u64(sendmem01_w1, 1898 vandq_u64(xtmp128, addr)); 1899 sendmem23_w1 = vaddq_u64(sendmem23_w1, 1900 vandq_u64(ytmp128, addr)); 1901 /* Change send mem alg to SUB when TSTMP is disabled. */ 1902 sendmem01_w0 = vorrq_u64(sendmem01_w0, 1903 vandq_u64(xtmp128, alg)); 1904 sendmem23_w0 = vorrq_u64(sendmem23_w0, 1905 vandq_u64(ytmp128, alg)); 1906 1907 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1); 1908 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1); 1909 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1); 1910 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1); 1911 } 1912 1913 if (flags & NIX_TX_OFFLOAD_TSO_F) { 1914 uint64_t sx_w0[NIX_DESCS_PER_LOOP]; 1915 uint64_t sd_w1[NIX_DESCS_PER_LOOP]; 1916 1917 /* Extract SD W1 as we need to set L4 types. */ 1918 vst1q_u64(sd_w1, senddesc01_w1); 1919 vst1q_u64(sd_w1 + 2, senddesc23_w1); 1920 1921 /* Extract SX W0 as we need to set LSO fields. */ 1922 vst1q_u64(sx_w0, sendext01_w0); 1923 vst1q_u64(sx_w0 + 2, sendext23_w0); 1924 1925 /* Extract ol_flags. */ 1926 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); 1927 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); 1928 1929 /* Prepare individual mbufs. */ 1930 cn9k_nix_prepare_tso(tx_pkts[0], 1931 (union nix_send_hdr_w1_u *)&sd_w1[0], 1932 (union nix_send_ext_w0_u *)&sx_w0[0], 1933 vgetq_lane_u64(xtmp128, 0), flags); 1934 1935 cn9k_nix_prepare_tso(tx_pkts[1], 1936 (union nix_send_hdr_w1_u *)&sd_w1[1], 1937 (union nix_send_ext_w0_u *)&sx_w0[1], 1938 vgetq_lane_u64(xtmp128, 1), flags); 1939 1940 cn9k_nix_prepare_tso(tx_pkts[2], 1941 (union nix_send_hdr_w1_u *)&sd_w1[2], 1942 (union nix_send_ext_w0_u *)&sx_w0[2], 1943 vgetq_lane_u64(ytmp128, 0), flags); 1944 1945 cn9k_nix_prepare_tso(tx_pkts[3], 1946 (union nix_send_hdr_w1_u *)&sd_w1[3], 1947 (union nix_send_ext_w0_u *)&sx_w0[3], 1948 vgetq_lane_u64(ytmp128, 1), flags); 1949 1950 senddesc01_w1 = vld1q_u64(sd_w1); 1951 senddesc23_w1 = vld1q_u64(sd_w1 + 2); 1952 1953 sendext01_w0 = vld1q_u64(sx_w0); 1954 sendext23_w0 = vld1q_u64(sx_w0 + 2); 1955 } 1956 1957 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) && 1958 !(flags & NIX_TX_MULTI_SEG_F)) { 1959 /* Set don't free bit if reference count > 1 */ 1960 cn9k_nix_prefree_seg_vec(tx_pkts, &extm, txq, &senddesc01_w0, 1961 &senddesc23_w0, &senddesc01_w1, &senddesc23_w1); 1962 /* Ensuring mbuf fields which got updated in 1963 * cnxk_nix_prefree_seg are written before LMTST. 1964 */ 1965 rte_io_wmb(); 1966 } else if (!(flags & NIX_TX_MULTI_SEG_F)) { 1967 /* Move mbufs to iova */ 1968 mbuf0 = (uint64_t *)tx_pkts[0]; 1969 mbuf1 = (uint64_t *)tx_pkts[1]; 1970 mbuf2 = (uint64_t *)tx_pkts[2]; 1971 mbuf3 = (uint64_t *)tx_pkts[3]; 1972 1973 /* Mark mempool object as "put" since 1974 * it is freed by NIX 1975 */ 1976 RTE_MEMPOOL_CHECK_COOKIES( 1977 ((struct rte_mbuf *)mbuf0)->pool, 1978 (void **)&mbuf0, 1, 0); 1979 1980 RTE_MEMPOOL_CHECK_COOKIES( 1981 ((struct rte_mbuf *)mbuf1)->pool, 1982 (void **)&mbuf1, 1, 0); 1983 1984 RTE_MEMPOOL_CHECK_COOKIES( 1985 ((struct rte_mbuf *)mbuf2)->pool, 1986 (void **)&mbuf2, 1, 0); 1987 1988 RTE_MEMPOOL_CHECK_COOKIES( 1989 ((struct rte_mbuf *)mbuf3)->pool, 1990 (void **)&mbuf3, 1, 0); 1991 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG 1992 rte_io_wmb(); 1993 #endif 1994 } 1995 1996 /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */ 1997 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1); 1998 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1); 1999 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1); 2000 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1); 2001 2002 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1); 2003 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1); 2004 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); 2005 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); 2006 2007 if (flags & NIX_TX_NEED_EXT_HDR) { 2008 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); 2009 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); 2010 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); 2011 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); 2012 } 2013 2014 if (flags & NIX_TX_MULTI_SEG_F) { 2015 uint64_t seg_list[NIX_DESCS_PER_LOOP] 2016 [CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; 2017 uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1]; 2018 2019 /* Build mseg list for each packet individually. */ 2020 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) 2021 segdw[j] = cn9k_nix_prepare_mseg_vec(txq, 2022 tx_pkts[j], &extm, 2023 seg_list[j], &cmd0[j], 2024 &cmd1[j], flags); 2025 segdw[4] = 8; 2026 2027 /* Commit all changes to mbuf before LMTST. */ 2028 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) 2029 rte_io_wmb(); 2030 2031 cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3, 2032 segdw, seg_list, 2033 lmt_addr, io_addr, 2034 flags); 2035 } else if (flags & NIX_TX_NEED_EXT_HDR) { 2036 /* With ext header in the command we can no longer send 2037 * all 4 packets together since LMTLINE is 128bytes. 2038 * Split and Tx twice. 2039 */ 2040 do { 2041 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { 2042 vst1q_u64(lmt_addr, cmd0[0]); 2043 vst1q_u64(lmt_addr + 2, cmd2[0]); 2044 vst1q_u64(lmt_addr + 4, cmd1[0]); 2045 vst1q_u64(lmt_addr + 6, cmd3[0]); 2046 vst1q_u64(lmt_addr + 8, cmd0[1]); 2047 vst1q_u64(lmt_addr + 10, cmd2[1]); 2048 vst1q_u64(lmt_addr + 12, cmd1[1]); 2049 vst1q_u64(lmt_addr + 14, cmd3[1]); 2050 } else { 2051 vst1q_u64(lmt_addr, cmd0[0]); 2052 vst1q_u64(lmt_addr + 2, cmd2[0]); 2053 vst1q_u64(lmt_addr + 4, cmd1[0]); 2054 vst1q_u64(lmt_addr + 6, cmd0[1]); 2055 vst1q_u64(lmt_addr + 8, cmd2[1]); 2056 vst1q_u64(lmt_addr + 10, cmd1[1]); 2057 } 2058 lmt_status = roc_lmt_submit_ldeor(io_addr); 2059 } while (lmt_status == 0); 2060 2061 do { 2062 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) { 2063 vst1q_u64(lmt_addr, cmd0[2]); 2064 vst1q_u64(lmt_addr + 2, cmd2[2]); 2065 vst1q_u64(lmt_addr + 4, cmd1[2]); 2066 vst1q_u64(lmt_addr + 6, cmd3[2]); 2067 vst1q_u64(lmt_addr + 8, cmd0[3]); 2068 vst1q_u64(lmt_addr + 10, cmd2[3]); 2069 vst1q_u64(lmt_addr + 12, cmd1[3]); 2070 vst1q_u64(lmt_addr + 14, cmd3[3]); 2071 } else { 2072 vst1q_u64(lmt_addr, cmd0[2]); 2073 vst1q_u64(lmt_addr + 2, cmd2[2]); 2074 vst1q_u64(lmt_addr + 4, cmd1[2]); 2075 vst1q_u64(lmt_addr + 6, cmd0[3]); 2076 vst1q_u64(lmt_addr + 8, cmd2[3]); 2077 vst1q_u64(lmt_addr + 10, cmd1[3]); 2078 } 2079 lmt_status = roc_lmt_submit_ldeor(io_addr); 2080 } while (lmt_status == 0); 2081 } else { 2082 do { 2083 vst1q_u64(lmt_addr, cmd0[0]); 2084 vst1q_u64(lmt_addr + 2, cmd1[0]); 2085 vst1q_u64(lmt_addr + 4, cmd0[1]); 2086 vst1q_u64(lmt_addr + 6, cmd1[1]); 2087 vst1q_u64(lmt_addr + 8, cmd0[2]); 2088 vst1q_u64(lmt_addr + 10, cmd1[2]); 2089 vst1q_u64(lmt_addr + 12, cmd0[3]); 2090 vst1q_u64(lmt_addr + 14, cmd1[3]); 2091 lmt_status = roc_lmt_submit_ldeor(io_addr); 2092 } while (lmt_status == 0); 2093 } 2094 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; 2095 } 2096 2097 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena) 2098 cn9k_nix_free_extmbuf(extm); 2099 2100 if (unlikely(pkts_left)) { 2101 if (flags & NIX_TX_MULTI_SEG_F) 2102 pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, 2103 pkts_left, cmd, flags); 2104 else 2105 pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, 2106 cmd, flags); 2107 } 2108 2109 return pkts; 2110 } 2111 2112 #else 2113 static __rte_always_inline uint16_t 2114 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, 2115 uint16_t pkts, uint64_t *cmd, const uint16_t flags) 2116 { 2117 RTE_SET_USED(tx_queue); 2118 RTE_SET_USED(tx_pkts); 2119 RTE_SET_USED(pkts); 2120 RTE_SET_USED(cmd); 2121 RTE_SET_USED(flags); 2122 return 0; 2123 } 2124 #endif 2125 2126 #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F 2127 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F 2128 #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F 2129 #define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F 2130 #define TSO_F NIX_TX_OFFLOAD_TSO_F 2131 #define TSP_F NIX_TX_OFFLOAD_TSTAMP_F 2132 #define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F 2133 2134 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */ 2135 #define NIX_TX_FASTPATH_MODES_0_15 \ 2136 T(no_offload, 6, NIX_TX_OFFLOAD_NONE) \ 2137 T(l3l4csum, 6, L3L4CSUM_F) \ 2138 T(ol3ol4csum, 6, OL3OL4CSUM_F) \ 2139 T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F) \ 2140 T(vlan, 6, VLAN_F) \ 2141 T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F) \ 2142 T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F) \ 2143 T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2144 T(noff, 6, NOFF_F) \ 2145 T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F) \ 2146 T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F) \ 2147 T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2148 T(noff_vlan, 6, NOFF_F | VLAN_F) \ 2149 T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F) \ 2150 T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F) \ 2151 T(noff_vlan_ol3ol4csum_l3l4csum, 6, \ 2152 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) 2153 2154 #define NIX_TX_FASTPATH_MODES_16_31 \ 2155 T(tso, 6, TSO_F) \ 2156 T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F) \ 2157 T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F) \ 2158 T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2159 T(tso_vlan, 6, TSO_F | VLAN_F) \ 2160 T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F) \ 2161 T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F) \ 2162 T(tso_vlan_ol3ol4csum_l3l4csum, 6, \ 2163 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2164 T(tso_noff, 6, TSO_F | NOFF_F) \ 2165 T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F) \ 2166 T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F) \ 2167 T(tso_noff_ol3ol4csum_l3l4csum, 6, \ 2168 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2169 T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F) \ 2170 T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \ 2171 T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \ 2172 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \ 2173 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) 2174 2175 #define NIX_TX_FASTPATH_MODES_32_47 \ 2176 T(ts, 8, TSP_F) \ 2177 T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F) \ 2178 T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F) \ 2179 T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2180 T(ts_vlan, 8, TSP_F | VLAN_F) \ 2181 T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F) \ 2182 T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F) \ 2183 T(ts_vlan_ol3ol4csum_l3l4csum, 8, \ 2184 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2185 T(ts_noff, 8, TSP_F | NOFF_F) \ 2186 T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F) \ 2187 T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F) \ 2188 T(ts_noff_ol3ol4csum_l3l4csum, 8, \ 2189 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2190 T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F) \ 2191 T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \ 2192 T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \ 2193 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \ 2194 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) 2195 2196 #define NIX_TX_FASTPATH_MODES_48_63 \ 2197 T(ts_tso, 8, TSP_F | TSO_F) \ 2198 T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F) \ 2199 T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F) \ 2200 T(ts_tso_ol3ol4csum_l3l4csum, 8, \ 2201 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2202 T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F) \ 2203 T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \ 2204 T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \ 2205 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \ 2206 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2207 T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F) \ 2208 T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \ 2209 T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \ 2210 T(ts_tso_noff_ol3ol4csum_l3l4csum, 8, \ 2211 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2212 T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F) \ 2213 T(ts_tso_noff_vlan_l3l4csum, 8, \ 2214 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \ 2215 T(ts_tso_noff_vlan_ol3ol4csum, 8, \ 2216 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \ 2217 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \ 2218 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) 2219 2220 #define NIX_TX_FASTPATH_MODES_64_79 \ 2221 T(sec, 6, T_SEC_F) \ 2222 T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F) \ 2223 T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F) \ 2224 T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2225 T(sec_vlan, 6, T_SEC_F | VLAN_F) \ 2226 T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F) \ 2227 T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F) \ 2228 T(sec_vlan_ol3ol4csum_l3l4csum, 6, \ 2229 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2230 T(sec_noff, 6, T_SEC_F | NOFF_F) \ 2231 T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F) \ 2232 T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F) \ 2233 T(sec_noff_ol3ol4csum_l3l4csum, 6, \ 2234 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2235 T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F) \ 2236 T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \ 2237 T(sec_noff_vlan_ol3ol4csum, 6, \ 2238 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \ 2239 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6, \ 2240 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) 2241 2242 #define NIX_TX_FASTPATH_MODES_80_95 \ 2243 T(sec_tso, 6, T_SEC_F | TSO_F) \ 2244 T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F) \ 2245 T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F) \ 2246 T(sec_tso_ol3ol4csum_l3l4csum, 6, \ 2247 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2248 T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F) \ 2249 T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \ 2250 T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \ 2251 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6, \ 2252 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2253 T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F) \ 2254 T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \ 2255 T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \ 2256 T(sec_tso_noff_ol3ol4csum_l3l4csum, 6, \ 2257 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2258 T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F) \ 2259 T(sec_tso_noff_vlan_l3l4csum, 6, \ 2260 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \ 2261 T(sec_tso_noff_vlan_ol3ol4csum, 6, \ 2262 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \ 2263 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \ 2264 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) 2265 2266 #define NIX_TX_FASTPATH_MODES_96_111 \ 2267 T(sec_ts, 8, T_SEC_F | TSP_F) \ 2268 T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F) \ 2269 T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F) \ 2270 T(sec_ts_ol3ol4csum_l3l4csum, 8, \ 2271 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2272 T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F) \ 2273 T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \ 2274 T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \ 2275 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8, \ 2276 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2277 T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F) \ 2278 T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \ 2279 T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \ 2280 T(sec_ts_noff_ol3ol4csum_l3l4csum, 8, \ 2281 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2282 T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F) \ 2283 T(sec_ts_noff_vlan_l3l4csum, 8, \ 2284 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \ 2285 T(sec_ts_noff_vlan_ol3ol4csum, 8, \ 2286 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \ 2287 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \ 2288 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) 2289 2290 #define NIX_TX_FASTPATH_MODES_112_127 \ 2291 T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F) \ 2292 T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \ 2293 T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \ 2294 T(sec_ts_tso_ol3ol4csum_l3l4csum, 8, \ 2295 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2296 T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F) \ 2297 T(sec_ts_tso_vlan_l3l4csum, 8, \ 2298 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \ 2299 T(sec_ts_tso_vlan_ol3ol4csum, 8, \ 2300 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \ 2301 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \ 2302 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2303 T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F) \ 2304 T(sec_ts_tso_noff_l3l4csum, 8, \ 2305 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \ 2306 T(sec_ts_tso_noff_ol3ol4csum, 8, \ 2307 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \ 2308 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8, \ 2309 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ 2310 T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \ 2311 T(sec_ts_tso_noff_vlan_l3l4csum, 8, \ 2312 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \ 2313 T(sec_ts_tso_noff_vlan_ol3ol4csum, 8, \ 2314 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \ 2315 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \ 2316 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \ 2317 L3L4CSUM_F) 2318 2319 #define NIX_TX_FASTPATH_MODES \ 2320 NIX_TX_FASTPATH_MODES_0_15 \ 2321 NIX_TX_FASTPATH_MODES_16_31 \ 2322 NIX_TX_FASTPATH_MODES_32_47 \ 2323 NIX_TX_FASTPATH_MODES_48_63 \ 2324 NIX_TX_FASTPATH_MODES_64_79 \ 2325 NIX_TX_FASTPATH_MODES_80_95 \ 2326 NIX_TX_FASTPATH_MODES_96_111 \ 2327 NIX_TX_FASTPATH_MODES_112_127 2328 2329 #define T(name, sz, flags) \ 2330 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name( \ 2331 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ 2332 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name( \ 2333 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ 2334 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \ 2335 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \ 2336 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \ 2337 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); 2338 2339 NIX_TX_FASTPATH_MODES 2340 #undef T 2341 2342 #define NIX_TX_XMIT(fn, sz, flags) \ 2343 uint16_t __rte_noinline __rte_hot fn( \ 2344 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ 2345 { \ 2346 uint64_t cmd[sz]; \ 2347 /* For TSO inner checksum is a must */ \ 2348 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ 2349 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ 2350 return 0; \ 2351 return cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, \ 2352 flags); \ 2353 } 2354 2355 #define NIX_TX_XMIT_MSEG(fn, sz, flags) \ 2356 uint16_t __rte_noinline __rte_hot fn( \ 2357 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ 2358 { \ 2359 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ 2360 /* For TSO inner checksum is a must */ \ 2361 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ 2362 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ 2363 return 0; \ 2364 return cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ 2365 (flags) | NIX_TX_MULTI_SEG_F); \ 2366 } 2367 2368 #define NIX_TX_XMIT_VEC(fn, sz, flags) \ 2369 uint16_t __rte_noinline __rte_hot fn( \ 2370 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ 2371 { \ 2372 uint64_t cmd[sz]; \ 2373 /* For TSO inner checksum is a must */ \ 2374 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ 2375 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ 2376 return 0; \ 2377 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ 2378 (flags)); \ 2379 } 2380 2381 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags) \ 2382 uint16_t __rte_noinline __rte_hot fn( \ 2383 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \ 2384 { \ 2385 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ 2386 /* For TSO inner checksum is a must */ \ 2387 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ 2388 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ 2389 return 0; \ 2390 return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \ 2391 (flags) | \ 2392 NIX_TX_MULTI_SEG_F); \ 2393 } 2394 2395 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_all_offload(void *tx_queue, 2396 struct rte_mbuf **tx_pkts, 2397 uint16_t pkts); 2398 2399 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_all_offload(void *tx_queue, 2400 struct rte_mbuf **tx_pkts, 2401 uint16_t pkts); 2402 2403 #endif /* __CN9K_TX_H__ */ 2404