1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Huawei Technologies Co., Ltd 3 */ 4 5 #include <rte_mbuf.h> 6 #include <rte_tcp.h> 7 #include <rte_sctp.h> 8 #include <rte_udp.h> 9 #include <rte_ip.h> 10 #ifdef RTE_ARCH_ARM64 11 #include <arm_neon.h> 12 #endif 13 14 #include "base/hinic_compat.h" 15 #include "base/hinic_pmd_hwdev.h" 16 #include "base/hinic_pmd_hwif.h" 17 #include "base/hinic_pmd_wq.h" 18 #include "base/hinic_pmd_nicio.h" 19 #include "base/hinic_pmd_niccfg.h" 20 #include "hinic_pmd_ethdev.h" 21 #include "hinic_pmd_tx.h" 22 23 /* packet header and tx offload info */ 24 #define ETHER_LEN_NO_VLAN 14 25 #define ETHER_LEN_WITH_VLAN 18 26 #define VXLANLEN 8 27 #define MAX_PLD_OFFSET 221 28 #define MAX_SINGLE_SGE_SIZE 65536 29 #define TSO_ENABLE 1 30 #define TX_MSS_DEFAULT 0x3E00 31 #define TX_MSS_MIN 0x50 32 33 #define HINIC_NONTSO_PKT_MAX_SGE 17 /* non-tso max sge 17 */ 34 #define HINIC_NONTSO_SEG_NUM_INVALID(num) \ 35 ((num) > HINIC_NONTSO_PKT_MAX_SGE) 36 37 #define HINIC_TSO_PKT_MAX_SGE 127 /* tso max sge 127 */ 38 #define HINIC_TSO_SEG_NUM_INVALID(num) ((num) > HINIC_TSO_PKT_MAX_SGE) 39 40 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */ 41 #define HINIC_BUF_DESC_SIZE(nr_descs) (SIZE_8BYTES(((u32)nr_descs) << 4)) 42 43 #define MASKED_SQ_IDX(sq, idx) ((idx) & (sq)->wq->mask) 44 45 /* SQ_CTRL */ 46 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0 47 #define SQ_CTRL_TASKSECT_LEN_SHIFT 16 48 #define SQ_CTRL_DATA_FORMAT_SHIFT 22 49 #define SQ_CTRL_LEN_SHIFT 29 50 #define SQ_CTRL_OWNER_SHIFT 31 51 52 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK 0xFFU 53 #define SQ_CTRL_TASKSECT_LEN_MASK 0x1FU 54 #define SQ_CTRL_DATA_FORMAT_MASK 0x1U 55 #define SQ_CTRL_LEN_MASK 0x3U 56 #define SQ_CTRL_OWNER_MASK 0x1U 57 58 #define SQ_CTRL_SET(val, member) \ 59 (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT) 60 61 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT 2 62 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT 10 63 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT 11 64 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT 12 65 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT 13 66 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT 27 67 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT 28 68 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT 29 69 70 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK 0xFFU 71 #define SQ_CTRL_QUEUE_INFO_UFO_MASK 0x1U 72 #define SQ_CTRL_QUEUE_INFO_TSO_MASK 0x1U 73 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK 0x1U 74 #define SQ_CTRL_QUEUE_INFO_MSS_MASK 0x3FFFU 75 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK 0x1U 76 #define SQ_CTRL_QUEUE_INFO_UC_MASK 0x1U 77 #define SQ_CTRL_QUEUE_INFO_PRI_MASK 0x7U 78 79 #define SQ_CTRL_QUEUE_INFO_SET(val, member) \ 80 (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) << \ 81 SQ_CTRL_QUEUE_INFO_##member##_SHIFT) 82 83 #define SQ_CTRL_QUEUE_INFO_GET(val, member) \ 84 (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) & \ 85 SQ_CTRL_QUEUE_INFO_##member##_MASK) 86 87 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member) \ 88 ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \ 89 SQ_CTRL_QUEUE_INFO_##member##_SHIFT))) 90 91 #define SQ_TASK_INFO0_L2HDR_LEN_SHIFT 0 92 #define SQ_TASK_INFO0_L4OFFLOAD_SHIFT 8 93 #define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT 10 94 #define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT 12 95 #define SQ_TASK_INFO0_PARSE_FLAG_SHIFT 13 96 #define SQ_TASK_INFO0_UFO_AVD_SHIFT 14 97 #define SQ_TASK_INFO0_TSO_UFO_SHIFT 15 98 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT 16 99 100 #define SQ_TASK_INFO0_L2HDR_LEN_MASK 0xFFU 101 #define SQ_TASK_INFO0_L4OFFLOAD_MASK 0x3U 102 #define SQ_TASK_INFO0_INNER_L3TYPE_MASK 0x3U 103 #define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK 0x1U 104 #define SQ_TASK_INFO0_PARSE_FLAG_MASK 0x1U 105 #define SQ_TASK_INFO0_UFO_AVD_MASK 0x1U 106 #define SQ_TASK_INFO0_TSO_UFO_MASK 0x1U 107 #define SQ_TASK_INFO0_VLAN_TAG_MASK 0xFFFFU 108 109 #define SQ_TASK_INFO0_SET(val, member) \ 110 (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) << \ 111 SQ_TASK_INFO0_##member##_SHIFT) 112 113 #define SQ_TASK_INFO1_MD_TYPE_SHIFT 8 114 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT 16 115 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT 24 116 117 #define SQ_TASK_INFO1_MD_TYPE_MASK 0xFFU 118 #define SQ_TASK_INFO1_INNER_L4LEN_MASK 0xFFU 119 #define SQ_TASK_INFO1_INNER_L3LEN_MASK 0xFFU 120 121 #define SQ_TASK_INFO1_SET(val, member) \ 122 (((val) & SQ_TASK_INFO1_##member##_MASK) << \ 123 SQ_TASK_INFO1_##member##_SHIFT) 124 125 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT 0 126 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT 8 127 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16 128 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT 24 129 130 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK 0xFFU 131 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK 0xFFU 132 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK 0x7U 133 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK 0x3U 134 135 #define SQ_TASK_INFO2_SET(val, member) \ 136 (((val) & SQ_TASK_INFO2_##member##_MASK) << \ 137 SQ_TASK_INFO2_##member##_SHIFT) 138 139 #define SQ_TASK_INFO4_L2TYPE_SHIFT 31 140 141 #define SQ_TASK_INFO4_L2TYPE_MASK 0x1U 142 143 #define SQ_TASK_INFO4_SET(val, member) \ 144 (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \ 145 SQ_TASK_INFO4_##member##_SHIFT) 146 147 /* SQ_DB */ 148 #define SQ_DB_OFF 0x00000800 149 #define SQ_DB_INFO_HI_PI_SHIFT 0 150 #define SQ_DB_INFO_QID_SHIFT 8 151 #define SQ_DB_INFO_CFLAG_SHIFT 23 152 #define SQ_DB_INFO_COS_SHIFT 24 153 #define SQ_DB_INFO_TYPE_SHIFT 27 154 155 #define SQ_DB_INFO_HI_PI_MASK 0xFFU 156 #define SQ_DB_INFO_QID_MASK 0x3FFU 157 #define SQ_DB_INFO_CFLAG_MASK 0x1U 158 #define SQ_DB_INFO_COS_MASK 0x7U 159 #define SQ_DB_INFO_TYPE_MASK 0x1FU 160 #define SQ_DB_INFO_SET(val, member) \ 161 (((u32)(val) & SQ_DB_INFO_##member##_MASK) << \ 162 SQ_DB_INFO_##member##_SHIFT) 163 164 #define SQ_DB 1 165 #define SQ_CFLAG_DP 0 /* CFLAG_DATA_PATH */ 166 167 #define SQ_DB_PI_LOW_MASK 0xFF 168 #define SQ_DB_PI_LOW(pi) ((pi) & SQ_DB_PI_LOW_MASK) 169 #define SQ_DB_PI_HI_SHIFT 8 170 #define SQ_DB_PI_HIGH(pi) ((pi) >> SQ_DB_PI_HI_SHIFT) 171 #define SQ_DB_ADDR(sq, pi) \ 172 ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi)) 173 174 /* txq wq operations */ 175 #define HINIC_GET_SQ_WQE_MASK(txq) ((txq)->wq->mask) 176 177 #define HINIC_GET_SQ_HW_CI(txq) \ 178 ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq)) 179 180 #define HINIC_GET_SQ_LOCAL_CI(txq) \ 181 (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq)) 182 183 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt) \ 184 do { \ 185 (txq)->wq->cons_idx += wqebb_cnt; \ 186 (txq)->wq->delta += wqebb_cnt; \ 187 } while (0) 188 189 #define HINIC_GET_SQ_FREE_WQEBBS(txq) ((txq)->wq->delta - 1) 190 191 #define HINIC_IS_SQ_EMPTY(txq) (((txq)->wq->delta) == ((txq)->q_depth)) 192 193 #define BUF_DESC_SIZE_SHIFT 4 194 195 #define HINIC_SQ_WQE_SIZE(num_sge) \ 196 (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) + \ 197 (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT)) 198 199 #define HINIC_SQ_WQEBB_CNT(num_sge) \ 200 (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \ 201 HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT) 202 203 204 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb) 205 { 206 #if defined(RTE_ARCH_X86_64) 207 int i; 208 __m128i *wqe_line = (__m128i *)data; 209 __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 210 11, 4, 5, 6, 7, 0, 1, 2, 3); 211 212 for (i = 0; i < nr_wqebb; i++) { 213 /* convert 64B wqebb using 4 SSE instructions */ 214 wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask); 215 wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask); 216 wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask); 217 wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask); 218 wqe_line += 4; 219 } 220 #elif defined(RTE_ARCH_ARM64) 221 int i; 222 uint8x16_t *wqe_line = (uint8x16_t *)data; 223 const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 224 9, 8, 15, 14, 13, 12}; 225 226 for (i = 0; i < nr_wqebb; i++) { 227 wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask); 228 wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask); 229 wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask); 230 wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask); 231 wqe_line += 4; 232 } 233 #else 234 hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE); 235 #endif 236 } 237 238 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge) 239 { 240 #if defined(RTE_ARCH_X86_64) 241 int i; 242 __m128i *sge_line = (__m128i *)data; 243 __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 244 11, 4, 5, 6, 7, 0, 1, 2, 3); 245 246 for (i = 0; i < nr_sge; i++) { 247 /* convert 16B sge using 1 SSE instructions */ 248 *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask); 249 sge_line++; 250 } 251 #elif defined(RTE_ARCH_ARM64) 252 int i; 253 uint8x16_t *sge_line = (uint8x16_t *)data; 254 const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 255 9, 8, 15, 14, 13, 12}; 256 257 for (i = 0; i < nr_sge; i++) { 258 *sge_line = vqtbl1q_u8(*sge_line, shuf_mask); 259 sge_line++; 260 } 261 #else 262 hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc)); 263 #endif 264 } 265 266 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) 267 { 268 if (!txq || !stats) { 269 PMD_DRV_LOG(ERR, "Txq or stats is NULL"); 270 return; 271 } 272 273 memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats)); 274 } 275 276 void hinic_txq_stats_reset(struct hinic_txq *txq) 277 { 278 struct hinic_txq_stats *txq_stats; 279 280 if (txq == NULL) 281 return; 282 283 txq_stats = &txq->txq_stats; 284 memset(txq_stats, 0, sizeof(*txq_stats)); 285 } 286 287 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev, 288 struct rte_mbuf *mbuf, 289 u16 sge_cnt) 290 { 291 struct rte_mbuf *dst_mbuf; 292 u32 offset = 0; 293 u16 i; 294 295 if (unlikely(!nic_dev->cpy_mpool)) 296 return NULL; 297 298 dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool); 299 if (unlikely(!dst_mbuf)) 300 return NULL; 301 302 dst_mbuf->data_off = 0; 303 for (i = 0; i < sge_cnt; i++) { 304 rte_memcpy((char *)dst_mbuf->buf_addr + offset, 305 (char *)mbuf->buf_addr + mbuf->data_off, 306 mbuf->data_len); 307 dst_mbuf->data_len += mbuf->data_len; 308 offset += mbuf->data_len; 309 mbuf = mbuf->next; 310 } 311 312 dst_mbuf->pkt_len = dst_mbuf->data_len; 313 314 return dst_mbuf; 315 } 316 317 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq, 318 struct rte_mbuf *mbuf, 319 struct hinic_sq_bufdesc *sges, 320 struct hinic_wqe_info *sqe_info) 321 { 322 dma_addr_t dma_addr; 323 u16 i, around_sges; 324 u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt; 325 u16 real_nb_segs = mbuf->nb_segs; 326 struct hinic_sq_bufdesc *sge_idx = sges; 327 328 if (unlikely(sqe_info->around)) { 329 /* parts of wqe is in sq bottom while parts 330 * of wqe is in sq head 331 */ 332 i = 0; 333 for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr; 334 sge_idx++) { 335 if (unlikely(mbuf == NULL)) { 336 txq->txq_stats.mbuf_null++; 337 return false; 338 } 339 340 dma_addr = rte_mbuf_data_iova(mbuf); 341 if (unlikely(mbuf->data_len == 0)) { 342 txq->txq_stats.sge_len0++; 343 return false; 344 } 345 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 346 mbuf->data_len); 347 mbuf = mbuf->next; 348 i++; 349 } 350 351 around_sges = nb_segs - i; 352 sge_idx = (struct hinic_sq_bufdesc *) 353 ((void *)txq->sq_head_addr); 354 for (; i < nb_segs; i++) { 355 if (unlikely(mbuf == NULL)) { 356 txq->txq_stats.mbuf_null++; 357 return false; 358 } 359 360 dma_addr = rte_mbuf_data_iova(mbuf); 361 if (unlikely(mbuf->data_len == 0)) { 362 txq->txq_stats.sge_len0++; 363 return false; 364 } 365 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 366 mbuf->data_len); 367 mbuf = mbuf->next; 368 sge_idx++; 369 } 370 371 /* covert sges at head to big endian */ 372 hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges); 373 } else { 374 /* wqe is in continuous space */ 375 for (i = 0; i < nb_segs; i++) { 376 if (unlikely(mbuf == NULL)) { 377 txq->txq_stats.mbuf_null++; 378 return false; 379 } 380 381 dma_addr = rte_mbuf_data_iova(mbuf); 382 if (unlikely(mbuf->data_len == 0)) { 383 txq->txq_stats.sge_len0++; 384 return false; 385 } 386 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 387 mbuf->data_len); 388 mbuf = mbuf->next; 389 sge_idx++; 390 } 391 } 392 393 /* for now: support non-tso over 17 sge, copy the last 2 mbuf */ 394 if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) { 395 /* copy invalid mbuf segs to a valid buffer, lost performance */ 396 txq->txq_stats.cpy_pkts += 1; 397 mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf, 398 real_nb_segs - nb_segs); 399 if (unlikely(!mbuf)) 400 return false; 401 402 txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf; 403 404 /* deal with the last mbuf */ 405 dma_addr = rte_mbuf_data_iova(mbuf); 406 if (unlikely(mbuf->data_len == 0)) { 407 txq->txq_stats.sge_len0++; 408 return false; 409 } 410 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 411 mbuf->data_len); 412 if (unlikely(sqe_info->around)) 413 hinic_sge_cpu_to_be32((void *)sge_idx, 1); 414 } 415 416 return true; 417 } 418 419 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl, 420 u32 queue_info, int nr_descs, 421 u8 owner) 422 { 423 u32 ctrl_size, task_size, bufdesc_size; 424 425 ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl)); 426 task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task)); 427 bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs); 428 429 ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) | 430 SQ_CTRL_SET(task_size, TASKSECT_LEN) | 431 SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | 432 SQ_CTRL_SET(ctrl_size, LEN) | 433 SQ_CTRL_SET(owner, OWNER); 434 435 ctrl->queue_info = queue_info; 436 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC); 437 438 if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) { 439 ctrl->queue_info |= 440 SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS); 441 } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) { 442 /* mss should not be less than 80 */ 443 ctrl->queue_info = 444 SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS); 445 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS); 446 } 447 } 448 449 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf, 450 struct hinic_tx_offload_info 451 *poff_info, 452 struct hinic_wqe_info *sqe_info) 453 { 454 u32 total_len, limit_len, checked_len, left_len, adjust_mss; 455 u32 i, first_mss_sges, left_sges; 456 struct rte_mbuf *mbuf_head, *mbuf_pre; 457 458 left_sges = mbuf->nb_segs; 459 mbuf_head = mbuf; 460 461 /* tso sge number validation */ 462 if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) { 463 checked_len = 0; 464 adjust_mss = mbuf->tso_segsz >= TX_MSS_MIN ? 465 mbuf->tso_segsz : TX_MSS_MIN; 466 limit_len = adjust_mss + poff_info->payload_offset; 467 first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE; 468 469 /* each continues 17 mbufs segmust do one check */ 470 while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) { 471 /* total len of first 16 mbufs must equal 472 * or more than limit_len 473 */ 474 total_len = 0; 475 for (i = 0; i < first_mss_sges; i++) { 476 total_len += mbuf->data_len; 477 mbuf_pre = mbuf; 478 mbuf = mbuf->next; 479 if (total_len >= limit_len) { 480 limit_len = adjust_mss; 481 break; 482 } 483 } 484 485 checked_len += total_len; 486 487 /* try to copy if not valid */ 488 if (unlikely(first_mss_sges == i)) { 489 left_sges -= first_mss_sges; 490 checked_len -= mbuf_pre->data_len; 491 492 left_len = mbuf_head->pkt_len - checked_len; 493 if (left_len > HINIC_COPY_MBUF_SIZE) 494 return false; 495 496 sqe_info->sge_cnt = mbuf_head->nb_segs - 497 left_sges; 498 sqe_info->cpy_mbuf_cnt = 1; 499 500 return true; 501 } 502 first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1); 503 504 /* continue next 16 mbufs */ 505 left_sges -= (i + 1); 506 } /* end of while */ 507 } 508 509 sqe_info->sge_cnt = mbuf_head->nb_segs; 510 return true; 511 } 512 513 static inline void 514 hinic_set_l4_csum_info(struct hinic_sq_task *task, 515 u32 *queue_info, struct hinic_tx_offload_info *poff_info) 516 { 517 u32 tcp_udp_cs, sctp = 0; 518 u16 l2hdr_len; 519 520 if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE)) 521 sctp = 1; 522 523 tcp_udp_cs = poff_info->inner_l4_tcp_udp; 524 525 if (poff_info->tunnel_type == TUNNEL_UDP_CSUM || 526 poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) { 527 l2hdr_len = poff_info->outer_l2_len; 528 529 task->pkt_info2 |= 530 SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) | 531 SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN); 532 task->pkt_info2 |= 533 SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) | 534 SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN); 535 } else { 536 l2hdr_len = poff_info->inner_l2_len; 537 } 538 539 task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN); 540 task->pkt_info1 |= 541 SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN); 542 task->pkt_info0 |= 543 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE); 544 task->pkt_info1 |= 545 SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN); 546 task->pkt_info0 |= 547 SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD); 548 *queue_info |= 549 SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) | 550 SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) | 551 SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP); 552 } 553 554 static inline void 555 hinic_set_tso_info(struct hinic_sq_task *task, 556 u32 *queue_info, struct rte_mbuf *mbuf, 557 struct hinic_tx_offload_info *poff_info) 558 { 559 hinic_set_l4_csum_info(task, queue_info, poff_info); 560 561 /* wqe for tso */ 562 task->pkt_info0 |= 563 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE); 564 task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO); 565 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO); 566 /* qsf was initialized in prepare_sq_wqe */ 567 *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS); 568 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS); 569 } 570 571 static inline void 572 hinic_set_vlan_tx_offload(struct hinic_sq_task *task, 573 u32 *queue_info, u16 vlan_tag, u16 vlan_pri) 574 { 575 task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) | 576 SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD); 577 578 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI); 579 } 580 581 static inline void 582 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf, 583 struct hinic_sq_task *task, u32 *queue_info, 584 struct hinic_tx_offload_info *tx_off_info) 585 { 586 u16 vlan_tag; 587 uint64_t ol_flags = mbuf->ol_flags; 588 589 /* clear DW0~2 of task section for offload */ 590 task->pkt_info0 = 0; 591 task->pkt_info1 = 0; 592 task->pkt_info2 = 0; 593 594 /* Base VLAN */ 595 if (unlikely(ol_flags & RTE_MBUF_F_TX_VLAN)) { 596 vlan_tag = mbuf->vlan_tci; 597 hinic_set_vlan_tx_offload(task, queue_info, vlan_tag, 598 vlan_tag >> VLAN_PRIO_SHIFT); 599 } 600 601 /* non checksum or tso */ 602 if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))) 603 return; 604 605 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 606 /* set tso info for task and qsf */ 607 hinic_set_tso_info(task, queue_info, mbuf, tx_off_info); 608 else /* just support l4 checksum offload */ 609 hinic_set_l4_csum_info(task, queue_info, tx_off_info); 610 } 611 612 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq) 613 { 614 struct hinic_tx_info *tx_info; 615 struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK]; 616 int i, nb_free = 0; 617 u16 hw_ci, sw_ci, sq_mask; 618 int wqebb_cnt = 0; 619 620 hw_ci = HINIC_GET_SQ_HW_CI(txq); 621 sw_ci = HINIC_GET_SQ_LOCAL_CI(txq); 622 sq_mask = HINIC_GET_SQ_WQE_MASK(txq); 623 624 for (i = 0; i < txq->tx_free_thresh; ++i) { 625 tx_info = &txq->tx_info[sw_ci]; 626 if (hw_ci == sw_ci || 627 (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt)) 628 break; 629 630 sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask; 631 632 if (unlikely(tx_info->cpy_mbuf != NULL)) { 633 rte_pktmbuf_free(tx_info->cpy_mbuf); 634 tx_info->cpy_mbuf = NULL; 635 } 636 637 wqebb_cnt += tx_info->wqebb_cnt; 638 mbuf = tx_info->mbuf; 639 640 if (likely(mbuf->nb_segs == 1)) { 641 m = rte_pktmbuf_prefree_seg(mbuf); 642 tx_info->mbuf = NULL; 643 644 if (unlikely(m == NULL)) 645 continue; 646 647 mbuf_free[nb_free++] = m; 648 if (unlikely(m->pool != mbuf_free[0]->pool || 649 nb_free >= HINIC_MAX_TX_FREE_BULK)) { 650 rte_mempool_put_bulk(mbuf_free[0]->pool, 651 (void **)mbuf_free, (nb_free - 1)); 652 nb_free = 0; 653 mbuf_free[nb_free++] = m; 654 } 655 } else { 656 rte_pktmbuf_free(mbuf); 657 tx_info->mbuf = NULL; 658 } 659 } 660 661 if (nb_free > 0) 662 rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free, 663 nb_free); 664 665 HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt); 666 } 667 668 static inline struct hinic_sq_wqe * 669 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt, 670 struct hinic_wqe_info *wqe_info) 671 { 672 u32 cur_pi, end_pi; 673 u16 remain_wqebbs; 674 struct hinic_sq *sq = txq->sq; 675 struct hinic_wq *wq = txq->wq; 676 677 /* record current pi */ 678 cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx); 679 end_pi = cur_pi + wqebb_cnt; 680 681 /* update next pi and delta */ 682 wq->prod_idx += wqebb_cnt; 683 wq->delta -= wqebb_cnt; 684 685 /* return current pi and owner */ 686 wqe_info->pi = cur_pi; 687 wqe_info->owner = sq->owner; 688 wqe_info->around = 0; 689 wqe_info->seq_wqebbs = wqebb_cnt; 690 691 if (unlikely(end_pi >= txq->q_depth)) { 692 /* update owner of next prod_idx */ 693 sq->owner = !sq->owner; 694 695 /* turn around to head */ 696 if (unlikely(end_pi > txq->q_depth)) { 697 wqe_info->around = 1; 698 remain_wqebbs = txq->q_depth - cur_pi; 699 wqe_info->seq_wqebbs = remain_wqebbs; 700 } 701 } 702 703 return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi); 704 } 705 706 static inline uint16_t 707 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags) 708 { 709 struct ipv4_psd_header { 710 uint32_t src_addr; /* IP address of source host. */ 711 uint32_t dst_addr; /* IP address of destination host. */ 712 uint8_t zero; /* zero. */ 713 uint8_t proto; /* L4 protocol type. */ 714 uint16_t len; /* L4 length. */ 715 } psd_hdr; 716 717 psd_hdr.src_addr = ipv4_hdr->src_addr; 718 psd_hdr.dst_addr = ipv4_hdr->dst_addr; 719 psd_hdr.zero = 0; 720 psd_hdr.proto = ipv4_hdr->next_proto_id; 721 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 722 psd_hdr.len = 0; 723 } else { 724 psd_hdr.len = 725 rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) - 726 rte_ipv4_hdr_len(ipv4_hdr)); 727 } 728 return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); 729 } 730 731 static inline uint16_t 732 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags) 733 { 734 uint32_t sum; 735 struct { 736 uint32_t len; /* L4 length. */ 737 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */ 738 } psd_hdr; 739 740 psd_hdr.proto = (ipv6_hdr->proto << 24); 741 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) 742 psd_hdr.len = 0; 743 else 744 psd_hdr.len = ipv6_hdr->payload_len; 745 746 sum = __rte_raw_cksum(&ipv6_hdr->src_addr, 747 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0); 748 sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum); 749 return __rte_raw_cksum_reduce(sum); 750 } 751 752 static inline void hinic_get_outer_cs_pld_offset(struct rte_mbuf *m, 753 struct hinic_tx_offload_info *off_info) 754 { 755 uint64_t ol_flags = m->ol_flags; 756 757 if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_UDP_CKSUM) 758 off_info->payload_offset = m->outer_l2_len + m->outer_l3_len + 759 m->l2_len + m->l3_len; 760 else if ((ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) || (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 761 off_info->payload_offset = m->outer_l2_len + m->outer_l3_len + 762 m->l2_len + m->l3_len + m->l4_len; 763 } 764 765 static inline void hinic_get_pld_offset(struct rte_mbuf *m, 766 struct hinic_tx_offload_info *off_info) 767 { 768 uint64_t ol_flags = m->ol_flags; 769 770 if (((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_UDP_CKSUM) || 771 ((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_SCTP_CKSUM)) 772 off_info->payload_offset = m->l2_len + m->l3_len; 773 else if ((ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) || (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 774 off_info->payload_offset = m->l2_len + m->l3_len + 775 m->l4_len; 776 } 777 778 static inline void hinic_analyze_tx_info(struct rte_mbuf *mbuf, 779 struct hinic_tx_offload_info *off_info) 780 { 781 struct rte_ether_hdr *eth_hdr; 782 struct rte_vlan_hdr *vlan_hdr; 783 struct rte_ipv4_hdr *ipv4_hdr; 784 u16 eth_type; 785 786 eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); 787 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type); 788 789 if (eth_type == RTE_ETHER_TYPE_VLAN) { 790 off_info->outer_l2_len = ETHER_LEN_WITH_VLAN; 791 vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1); 792 eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto); 793 } else { 794 off_info->outer_l2_len = ETHER_LEN_NO_VLAN; 795 } 796 797 if (eth_type == RTE_ETHER_TYPE_IPV4) { 798 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, 799 off_info->outer_l2_len); 800 off_info->outer_l3_len = rte_ipv4_hdr_len(ipv4_hdr); 801 } else if (eth_type == RTE_ETHER_TYPE_IPV6) { 802 /* not support ipv6 extension header */ 803 off_info->outer_l3_len = sizeof(struct rte_ipv6_hdr); 804 } 805 } 806 807 static inline void hinic_analyze_outer_ip_vxlan(struct rte_mbuf *mbuf, 808 struct hinic_tx_offload_info *off_info) 809 { 810 struct rte_ether_hdr *eth_hdr; 811 struct rte_vlan_hdr *vlan_hdr; 812 struct rte_ipv4_hdr *ipv4_hdr; 813 struct rte_udp_hdr *udp_hdr; 814 u16 eth_type = 0; 815 816 eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); 817 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type); 818 819 if (eth_type == RTE_ETHER_TYPE_VLAN) { 820 vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1); 821 eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto); 822 } 823 824 if (eth_type == RTE_ETHER_TYPE_IPV4) { 825 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, 826 mbuf->outer_l2_len); 827 off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD; 828 ipv4_hdr->hdr_checksum = 0; 829 830 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + 831 mbuf->outer_l3_len); 832 udp_hdr->dgram_cksum = 0; 833 } else if (eth_type == RTE_ETHER_TYPE_IPV6) { 834 off_info->outer_l3_type = IPV6_PKT; 835 836 udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, 837 (mbuf->outer_l2_len + 838 mbuf->outer_l3_len)); 839 udp_hdr->dgram_cksum = 0; 840 } 841 } 842 843 static inline uint8_t hinic_analyze_l3_type(struct rte_mbuf *mbuf) 844 { 845 uint8_t l3_type; 846 uint64_t ol_flags = mbuf->ol_flags; 847 848 if (ol_flags & RTE_MBUF_F_TX_IPV4) 849 l3_type = (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) ? 850 IPV4_PKT_WITH_CHKSUM_OFFLOAD : 851 IPV4_PKT_NO_CHKSUM_OFFLOAD; 852 else if (ol_flags & RTE_MBUF_F_TX_IPV6) 853 l3_type = IPV6_PKT; 854 else 855 l3_type = UNKNOWN_L3TYPE; 856 857 return l3_type; 858 } 859 860 static inline void hinic_calculate_tcp_checksum(struct rte_mbuf *mbuf, 861 struct hinic_tx_offload_info *off_info, 862 uint64_t inner_l3_offset) 863 { 864 struct rte_ipv4_hdr *ipv4_hdr; 865 struct rte_ipv6_hdr *ipv6_hdr; 866 struct rte_tcp_hdr *tcp_hdr; 867 uint64_t ol_flags = mbuf->ol_flags; 868 869 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 870 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, 871 inner_l3_offset); 872 873 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) 874 ipv4_hdr->hdr_checksum = 0; 875 876 tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + 877 mbuf->l3_len); 878 tcp_hdr->cksum = hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags); 879 } else { 880 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *, 881 inner_l3_offset); 882 tcp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, 883 (inner_l3_offset + 884 mbuf->l3_len)); 885 tcp_hdr->cksum = hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags); 886 } 887 888 off_info->inner_l4_type = TCP_OFFLOAD_ENABLE; 889 off_info->inner_l4_tcp_udp = 1; 890 } 891 892 static inline void hinic_calculate_udp_checksum(struct rte_mbuf *mbuf, 893 struct hinic_tx_offload_info *off_info, 894 uint64_t inner_l3_offset) 895 { 896 struct rte_ipv4_hdr *ipv4_hdr; 897 struct rte_ipv6_hdr *ipv6_hdr; 898 struct rte_udp_hdr *udp_hdr; 899 uint64_t ol_flags = mbuf->ol_flags; 900 901 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 902 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, 903 inner_l3_offset); 904 905 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) 906 ipv4_hdr->hdr_checksum = 0; 907 908 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + 909 mbuf->l3_len); 910 udp_hdr->dgram_cksum = hinic_ipv4_phdr_cksum(ipv4_hdr, 911 ol_flags); 912 } else { 913 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *, 914 inner_l3_offset); 915 916 udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, 917 (inner_l3_offset + 918 mbuf->l3_len)); 919 udp_hdr->dgram_cksum = hinic_ipv6_phdr_cksum(ipv6_hdr, 920 ol_flags); 921 } 922 923 off_info->inner_l4_type = UDP_OFFLOAD_ENABLE; 924 off_info->inner_l4_tcp_udp = 1; 925 } 926 927 static inline void 928 hinic_calculate_sctp_checksum(struct hinic_tx_offload_info *off_info) 929 { 930 off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE; 931 off_info->inner_l4_tcp_udp = 0; 932 off_info->inner_l4_len = sizeof(struct rte_sctp_hdr); 933 } 934 935 static inline void hinic_calculate_checksum(struct rte_mbuf *mbuf, 936 struct hinic_tx_offload_info *off_info, 937 uint64_t inner_l3_offset) 938 { 939 uint64_t ol_flags = mbuf->ol_flags; 940 941 switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) { 942 case RTE_MBUF_F_TX_UDP_CKSUM: 943 hinic_calculate_udp_checksum(mbuf, off_info, inner_l3_offset); 944 break; 945 946 case RTE_MBUF_F_TX_TCP_CKSUM: 947 hinic_calculate_tcp_checksum(mbuf, off_info, inner_l3_offset); 948 break; 949 950 case RTE_MBUF_F_TX_SCTP_CKSUM: 951 hinic_calculate_sctp_checksum(off_info); 952 break; 953 954 default: 955 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) 956 hinic_calculate_tcp_checksum(mbuf, off_info, 957 inner_l3_offset); 958 break; 959 } 960 } 961 962 static inline int hinic_tx_offload_pkt_prepare(struct rte_mbuf *m, 963 struct hinic_tx_offload_info *off_info) 964 { 965 uint64_t inner_l3_offset; 966 uint64_t ol_flags = m->ol_flags; 967 968 /* Check if the packets set available offload flags */ 969 if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)) 970 return 0; 971 972 /* Support only vxlan offload */ 973 if (unlikely((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) && 974 !(ol_flags & RTE_MBUF_F_TX_TUNNEL_VXLAN))) 975 return -ENOTSUP; 976 977 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 978 if (rte_validate_tx_offload(m) != 0) 979 return -EINVAL; 980 #endif 981 982 if (ol_flags & RTE_MBUF_F_TX_TUNNEL_VXLAN) { 983 off_info->tunnel_type = TUNNEL_UDP_NO_CSUM; 984 985 /* inner_l4_tcp_udp csum should be set to calculate outer 986 * udp checksum when vxlan packets without inner l3 and l4 987 */ 988 off_info->inner_l4_tcp_udp = 1; 989 990 if ((ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) || 991 (ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) || 992 (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 993 inner_l3_offset = m->l2_len + m->outer_l2_len + 994 m->outer_l3_len; 995 off_info->outer_l2_len = m->outer_l2_len; 996 off_info->outer_l3_len = m->outer_l3_len; 997 /* just support vxlan tunneling pkt */ 998 off_info->inner_l2_len = m->l2_len - VXLANLEN - 999 sizeof(struct rte_udp_hdr); 1000 off_info->tunnel_length = m->l2_len; 1001 1002 hinic_analyze_outer_ip_vxlan(m, off_info); 1003 1004 hinic_get_outer_cs_pld_offset(m, off_info); 1005 } else { 1006 inner_l3_offset = m->l2_len; 1007 hinic_analyze_tx_info(m, off_info); 1008 /* just support vxlan tunneling pkt */ 1009 off_info->inner_l2_len = m->l2_len - VXLANLEN - 1010 sizeof(struct rte_udp_hdr) - 1011 off_info->outer_l2_len - 1012 off_info->outer_l3_len; 1013 off_info->tunnel_length = m->l2_len - 1014 off_info->outer_l2_len - 1015 off_info->outer_l3_len; 1016 off_info->outer_l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD; 1017 1018 hinic_get_pld_offset(m, off_info); 1019 } 1020 } else { 1021 inner_l3_offset = m->l2_len; 1022 off_info->inner_l2_len = m->l2_len; 1023 off_info->tunnel_type = NOT_TUNNEL; 1024 1025 hinic_get_pld_offset(m, off_info); 1026 } 1027 1028 /* invalid udp or tcp header */ 1029 if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET)) 1030 return -EINVAL; 1031 1032 off_info->inner_l3_len = m->l3_len; 1033 off_info->inner_l4_len = m->l4_len; 1034 off_info->inner_l3_type = hinic_analyze_l3_type(m); 1035 1036 /* Process the pseudo-header checksum */ 1037 hinic_calculate_checksum(m, off_info, inner_l3_offset); 1038 1039 return 0; 1040 } 1041 1042 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt, 1043 struct hinic_wqe_info *sqe_info, 1044 struct hinic_tx_offload_info 1045 *off_info) 1046 { 1047 u16 i, total_len, sge_cnt = mbuf_pkt->nb_segs; 1048 struct rte_mbuf *mbuf; 1049 int ret; 1050 1051 memset(off_info, 0, sizeof(*off_info)); 1052 1053 ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info); 1054 if (unlikely(ret)) 1055 return false; 1056 1057 sqe_info->cpy_mbuf_cnt = 0; 1058 1059 /* non tso mbuf */ 1060 if (likely(!(mbuf_pkt->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) { 1061 if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) { 1062 /* non tso packet len must less than 64KB */ 1063 return false; 1064 } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) { 1065 /* non tso packet buffer number must less than 17 1066 * the mbuf segs more than 17 must copy to one buffer 1067 */ 1068 total_len = 0; 1069 mbuf = mbuf_pkt; 1070 for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) { 1071 total_len += mbuf->data_len; 1072 mbuf = mbuf->next; 1073 } 1074 1075 /* default support copy total 4k mbuf segs */ 1076 if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) < 1077 mbuf_pkt->pkt_len) 1078 return false; 1079 1080 sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE; 1081 sqe_info->cpy_mbuf_cnt = 1; 1082 return true; 1083 } 1084 1085 /* valid non tso mbuf */ 1086 sqe_info->sge_cnt = sge_cnt; 1087 } else { 1088 /* tso mbuf */ 1089 if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt))) 1090 /* too many mbuf segs */ 1091 return false; 1092 1093 /* check tso mbuf segs are valid or not */ 1094 if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt, 1095 off_info, sqe_info))) 1096 return false; 1097 } 1098 1099 return true; 1100 } 1101 1102 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos) 1103 { 1104 u16 prod_idx; 1105 u32 hi_prod_idx; 1106 struct hinic_sq_db sq_db; 1107 1108 prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx); 1109 hi_prod_idx = SQ_DB_PI_HIGH(prod_idx); 1110 1111 sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) | 1112 SQ_DB_INFO_SET(SQ_DB, TYPE) | 1113 SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) | 1114 SQ_DB_INFO_SET(cos, COS) | 1115 SQ_DB_INFO_SET(sq->q_id, QID); 1116 1117 /* Data should be written to HW in Big Endian Format */ 1118 sq_db.db_info = cpu_to_be32(sq_db.db_info); 1119 1120 /* Write all before the doorbell */ 1121 rte_wmb(); 1122 writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx)); 1123 } 1124 1125 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts) 1126 { 1127 int free_wqebb_cnt, wqe_wqebb_cnt; 1128 u32 queue_info, tx_bytes = 0; 1129 u16 nb_tx; 1130 struct hinic_wqe_info sqe_info; 1131 struct hinic_tx_offload_info off_info; 1132 struct rte_mbuf *mbuf_pkt; 1133 struct hinic_txq *txq = tx_queue; 1134 struct hinic_tx_info *tx_info; 1135 struct hinic_sq_wqe *sq_wqe; 1136 struct hinic_sq_task *task; 1137 1138 /* reclaim tx mbuf before xmit new packet */ 1139 if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh) 1140 hinic_xmit_mbuf_cleanup(txq); 1141 1142 /* tx loop routine */ 1143 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1144 mbuf_pkt = *tx_pkts++; 1145 queue_info = 0; 1146 1147 /* 1. parse sge and tx offload info from mbuf */ 1148 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt, 1149 &sqe_info, &off_info))) { 1150 txq->txq_stats.off_errs++; 1151 break; 1152 } 1153 1154 /* 2. try to get enough wqebb */ 1155 wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt); 1156 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq); 1157 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) { 1158 /* reclaim again */ 1159 hinic_xmit_mbuf_cleanup(txq); 1160 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq); 1161 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) { 1162 txq->txq_stats.tx_busy += (nb_pkts - nb_tx); 1163 break; 1164 } 1165 } 1166 1167 /* 3. get sq tail wqe address from wqe_page, 1168 * sq have enough wqebb for this packet 1169 */ 1170 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info); 1171 1172 /* 4. fill sq wqe sge section */ 1173 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt, 1174 sq_wqe->buf_descs, 1175 &sqe_info))) { 1176 hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id, 1177 wqe_wqebb_cnt, sqe_info.owner); 1178 txq->txq_stats.off_errs++; 1179 break; 1180 } 1181 1182 /* 5. fill sq wqe task section and queue info */ 1183 task = &sq_wqe->task; 1184 1185 /* tx packet offload configure */ 1186 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info, 1187 &off_info); 1188 1189 /* 6. record tx info */ 1190 tx_info = &txq->tx_info[sqe_info.pi]; 1191 tx_info->mbuf = mbuf_pkt; 1192 tx_info->wqebb_cnt = wqe_wqebb_cnt; 1193 1194 /* 7. fill sq wqe header section */ 1195 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info, 1196 sqe_info.sge_cnt, sqe_info.owner); 1197 1198 /* 8.convert continue or bottom wqe byteorder to big endian */ 1199 hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs); 1200 1201 tx_bytes += mbuf_pkt->pkt_len; 1202 } 1203 1204 /* 9. write sq doorbell in burst mode */ 1205 if (nb_tx) { 1206 hinic_sq_write_db(txq->sq, txq->cos); 1207 1208 txq->txq_stats.packets += nb_tx; 1209 txq->txq_stats.bytes += tx_bytes; 1210 } 1211 txq->txq_stats.burst_pkts = nb_tx; 1212 1213 return nb_tx; 1214 } 1215 1216 void hinic_free_all_tx_mbufs(struct hinic_txq *txq) 1217 { 1218 u16 ci; 1219 struct hinic_nic_dev *nic_dev = txq->nic_dev; 1220 struct hinic_tx_info *tx_info; 1221 int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev, 1222 txq->q_id) + 1; 1223 1224 while (free_wqebbs < txq->q_depth) { 1225 ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id); 1226 1227 tx_info = &txq->tx_info[ci]; 1228 1229 if (unlikely(tx_info->cpy_mbuf != NULL)) { 1230 rte_pktmbuf_free(tx_info->cpy_mbuf); 1231 tx_info->cpy_mbuf = NULL; 1232 } 1233 1234 rte_pktmbuf_free(tx_info->mbuf); 1235 hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id, 1236 tx_info->wqebb_cnt); 1237 1238 free_wqebbs += tx_info->wqebb_cnt; 1239 tx_info->mbuf = NULL; 1240 } 1241 } 1242 1243 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev) 1244 { 1245 u16 q_id; 1246 struct hinic_nic_dev *nic_dev = 1247 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev); 1248 1249 for (q_id = 0; q_id < nic_dev->num_sq; q_id++) { 1250 if (eth_dev->data->tx_queues != NULL) 1251 eth_dev->data->tx_queues[q_id] = NULL; 1252 1253 if (nic_dev->txqs[q_id] == NULL) 1254 continue; 1255 1256 /* stop tx queue free tx mbuf */ 1257 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]); 1258 hinic_free_tx_resources(nic_dev->txqs[q_id]); 1259 1260 /* free txq */ 1261 kfree(nic_dev->txqs[q_id]); 1262 nic_dev->txqs[q_id] = NULL; 1263 } 1264 } 1265 1266 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev) 1267 { 1268 u16 q_id; 1269 struct hinic_nic_dev *nic_dev = 1270 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev); 1271 1272 for (q_id = 0; q_id < nic_dev->num_sq; q_id++) 1273 /* stop tx queue free tx mbuf */ 1274 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]); 1275 } 1276 1277 int hinic_setup_tx_resources(struct hinic_txq *txq) 1278 { 1279 u64 tx_info_sz; 1280 1281 tx_info_sz = txq->q_depth * sizeof(*txq->tx_info); 1282 txq->tx_info = rte_zmalloc_socket("tx_info", tx_info_sz, 1283 RTE_CACHE_LINE_SIZE, txq->socket_id); 1284 if (!txq->tx_info) 1285 return -ENOMEM; 1286 1287 return HINIC_OK; 1288 } 1289 1290 void hinic_free_tx_resources(struct hinic_txq *txq) 1291 { 1292 if (txq->tx_info == NULL) 1293 return; 1294 1295 rte_free(txq->tx_info); 1296 txq->tx_info = NULL; 1297 } 1298 1299 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, 1300 u16 sq_depth, unsigned int socket_id) 1301 { 1302 int err; 1303 struct hinic_nic_io *nic_io = hwdev->nic_io; 1304 struct hinic_qp *qp = &nic_io->qps[q_id]; 1305 struct hinic_sq *sq = &qp->sq; 1306 void __iomem *db_addr; 1307 volatile u32 *ci_addr; 1308 1309 sq->sq_depth = sq_depth; 1310 nic_io->sq_depth = sq_depth; 1311 1312 /* alloc wq */ 1313 err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id], 1314 HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth, 1315 socket_id); 1316 if (err) { 1317 PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ"); 1318 return err; 1319 } 1320 1321 /* alloc sq doorbell space */ 1322 err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr); 1323 if (err) { 1324 PMD_DRV_LOG(ERR, "Failed to init db addr"); 1325 goto alloc_db_err; 1326 } 1327 1328 /* clear hardware ci */ 1329 ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id); 1330 *ci_addr = 0; 1331 1332 sq->q_id = q_id; 1333 sq->wq = &nic_io->sq_wq[q_id]; 1334 sq->owner = 1; 1335 sq->cons_idx_addr = (volatile u16 *)ci_addr; 1336 sq->db_addr = db_addr; 1337 1338 return HINIC_OK; 1339 1340 alloc_db_err: 1341 hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]); 1342 1343 return err; 1344 } 1345 1346 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id) 1347 { 1348 struct hinic_nic_io *nic_io; 1349 struct hinic_qp *qp; 1350 1351 nic_io = hwdev->nic_io; 1352 qp = &nic_io->qps[q_id]; 1353 1354 if (qp->sq.wq == NULL) 1355 return; 1356 1357 hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr); 1358 hinic_wq_free(nic_io->hwdev, qp->sq.wq); 1359 qp->sq.wq = NULL; 1360 } 1361