1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Huawei Technologies Co., Ltd 3 */ 4 5 #include <rte_mbuf.h> 6 #include <rte_tcp.h> 7 #include <rte_sctp.h> 8 #include <rte_udp.h> 9 #include <rte_ip.h> 10 #ifdef __ARM64_NEON__ 11 #include <arm_neon.h> 12 #endif 13 14 #include "base/hinic_compat.h" 15 #include "base/hinic_pmd_hwdev.h" 16 #include "base/hinic_pmd_hwif.h" 17 #include "base/hinic_pmd_wq.h" 18 #include "base/hinic_pmd_nicio.h" 19 #include "base/hinic_pmd_niccfg.h" 20 #include "hinic_pmd_ethdev.h" 21 #include "hinic_pmd_tx.h" 22 23 /* packet header and tx offload info */ 24 #define ETHER_LEN_NO_VLAN 14 25 #define ETHER_LEN_WITH_VLAN 18 26 #define HEADER_LEN_OFFSET 2 27 #define VXLANLEN 8 28 #define MAX_PLD_OFFSET 221 29 #define MAX_SINGLE_SGE_SIZE 65536 30 #define TSO_ENABLE 1 31 #define TX_MSS_DEFAULT 0x3E00 32 #define TX_MSS_MIN 0x50 33 34 #define HINIC_NONTSO_PKT_MAX_SGE 17 /* non-tso max sge 17 */ 35 #define HINIC_NONTSO_SEG_NUM_INVALID(num) \ 36 ((num) > HINIC_NONTSO_PKT_MAX_SGE) 37 38 #define HINIC_TSO_PKT_MAX_SGE 127 /* tso max sge 127 */ 39 #define HINIC_TSO_SEG_NUM_INVALID(num) ((num) > HINIC_TSO_PKT_MAX_SGE) 40 41 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */ 42 #define HINIC_BUF_DESC_SIZE(nr_descs) (SIZE_8BYTES(((u32)nr_descs) << 4)) 43 44 #define MASKED_SQ_IDX(sq, idx) ((idx) & (sq)->wq->mask) 45 46 /* SQ_CTRL */ 47 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0 48 #define SQ_CTRL_TASKSECT_LEN_SHIFT 16 49 #define SQ_CTRL_DATA_FORMAT_SHIFT 22 50 #define SQ_CTRL_LEN_SHIFT 29 51 #define SQ_CTRL_OWNER_SHIFT 31 52 53 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK 0xFFU 54 #define SQ_CTRL_TASKSECT_LEN_MASK 0x1FU 55 #define SQ_CTRL_DATA_FORMAT_MASK 0x1U 56 #define SQ_CTRL_LEN_MASK 0x3U 57 #define SQ_CTRL_OWNER_MASK 0x1U 58 59 #define SQ_CTRL_SET(val, member) \ 60 (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT) 61 62 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT 2 63 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT 10 64 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT 11 65 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT 12 66 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT 13 67 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT 27 68 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT 28 69 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT 29 70 71 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK 0xFFU 72 #define SQ_CTRL_QUEUE_INFO_UFO_MASK 0x1U 73 #define SQ_CTRL_QUEUE_INFO_TSO_MASK 0x1U 74 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK 0x1U 75 #define SQ_CTRL_QUEUE_INFO_MSS_MASK 0x3FFFU 76 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK 0x1U 77 #define SQ_CTRL_QUEUE_INFO_UC_MASK 0x1U 78 #define SQ_CTRL_QUEUE_INFO_PRI_MASK 0x7U 79 80 #define SQ_CTRL_QUEUE_INFO_SET(val, member) \ 81 (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) << \ 82 SQ_CTRL_QUEUE_INFO_##member##_SHIFT) 83 84 #define SQ_CTRL_QUEUE_INFO_GET(val, member) \ 85 (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) & \ 86 SQ_CTRL_QUEUE_INFO_##member##_MASK) 87 88 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member) \ 89 ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \ 90 SQ_CTRL_QUEUE_INFO_##member##_SHIFT))) 91 92 #define SQ_TASK_INFO0_L2HDR_LEN_SHIFT 0 93 #define SQ_TASK_INFO0_L4OFFLOAD_SHIFT 8 94 #define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT 10 95 #define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT 12 96 #define SQ_TASK_INFO0_PARSE_FLAG_SHIFT 13 97 #define SQ_TASK_INFO0_UFO_AVD_SHIFT 14 98 #define SQ_TASK_INFO0_TSO_UFO_SHIFT 15 99 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT 16 100 101 #define SQ_TASK_INFO0_L2HDR_LEN_MASK 0xFFU 102 #define SQ_TASK_INFO0_L4OFFLOAD_MASK 0x3U 103 #define SQ_TASK_INFO0_INNER_L3TYPE_MASK 0x3U 104 #define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK 0x1U 105 #define SQ_TASK_INFO0_PARSE_FLAG_MASK 0x1U 106 #define SQ_TASK_INFO0_UFO_AVD_MASK 0x1U 107 #define SQ_TASK_INFO0_TSO_UFO_MASK 0x1U 108 #define SQ_TASK_INFO0_VLAN_TAG_MASK 0xFFFFU 109 110 #define SQ_TASK_INFO0_SET(val, member) \ 111 (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) << \ 112 SQ_TASK_INFO0_##member##_SHIFT) 113 114 #define SQ_TASK_INFO1_MD_TYPE_SHIFT 8 115 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT 16 116 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT 24 117 118 #define SQ_TASK_INFO1_MD_TYPE_MASK 0xFFU 119 #define SQ_TASK_INFO1_INNER_L4LEN_MASK 0xFFU 120 #define SQ_TASK_INFO1_INNER_L3LEN_MASK 0xFFU 121 122 #define SQ_TASK_INFO1_SET(val, member) \ 123 (((val) & SQ_TASK_INFO1_##member##_MASK) << \ 124 SQ_TASK_INFO1_##member##_SHIFT) 125 126 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT 0 127 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT 8 128 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16 129 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT 24 130 131 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK 0xFFU 132 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK 0xFFU 133 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK 0x7U 134 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK 0x3U 135 136 #define SQ_TASK_INFO2_SET(val, member) \ 137 (((val) & SQ_TASK_INFO2_##member##_MASK) << \ 138 SQ_TASK_INFO2_##member##_SHIFT) 139 140 #define SQ_TASK_INFO4_L2TYPE_SHIFT 31 141 142 #define SQ_TASK_INFO4_L2TYPE_MASK 0x1U 143 144 #define SQ_TASK_INFO4_SET(val, member) \ 145 (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \ 146 SQ_TASK_INFO4_##member##_SHIFT) 147 148 /* SQ_DB */ 149 #define SQ_DB_OFF 0x00000800 150 #define SQ_DB_INFO_HI_PI_SHIFT 0 151 #define SQ_DB_INFO_QID_SHIFT 8 152 #define SQ_DB_INFO_CFLAG_SHIFT 23 153 #define SQ_DB_INFO_COS_SHIFT 24 154 #define SQ_DB_INFO_TYPE_SHIFT 27 155 156 #define SQ_DB_INFO_HI_PI_MASK 0xFFU 157 #define SQ_DB_INFO_QID_MASK 0x3FFU 158 #define SQ_DB_INFO_CFLAG_MASK 0x1U 159 #define SQ_DB_INFO_COS_MASK 0x7U 160 #define SQ_DB_INFO_TYPE_MASK 0x1FU 161 #define SQ_DB_INFO_SET(val, member) \ 162 (((u32)(val) & SQ_DB_INFO_##member##_MASK) << \ 163 SQ_DB_INFO_##member##_SHIFT) 164 165 #define SQ_DB 1 166 #define SQ_CFLAG_DP 0 /* CFLAG_DATA_PATH */ 167 168 #define SQ_DB_PI_LOW_MASK 0xFF 169 #define SQ_DB_PI_LOW(pi) ((pi) & SQ_DB_PI_LOW_MASK) 170 #define SQ_DB_PI_HI_SHIFT 8 171 #define SQ_DB_PI_HIGH(pi) ((pi) >> SQ_DB_PI_HI_SHIFT) 172 #define SQ_DB_ADDR(sq, pi) \ 173 ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi)) 174 175 /* txq wq operations */ 176 #define HINIC_GET_SQ_WQE_MASK(txq) ((txq)->wq->mask) 177 178 #define HINIC_GET_SQ_HW_CI(txq) \ 179 ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq)) 180 181 #define HINIC_GET_SQ_LOCAL_CI(txq) \ 182 (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq)) 183 184 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt) \ 185 do { \ 186 (txq)->wq->cons_idx += wqebb_cnt; \ 187 (txq)->wq->delta += wqebb_cnt; \ 188 } while (0) 189 190 #define HINIC_GET_SQ_FREE_WQEBBS(txq) ((txq)->wq->delta - 1) 191 192 #define HINIC_IS_SQ_EMPTY(txq) (((txq)->wq->delta) == ((txq)->q_depth)) 193 194 #define BUF_DESC_SIZE_SHIFT 4 195 196 #define HINIC_SQ_WQE_SIZE(num_sge) \ 197 (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) + \ 198 (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT)) 199 200 #define HINIC_SQ_WQEBB_CNT(num_sge) \ 201 (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \ 202 HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT) 203 204 205 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb) 206 { 207 #if defined(__X86_64_SSE__) 208 int i; 209 __m128i *wqe_line = (__m128i *)data; 210 __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 211 11, 4, 5, 6, 7, 0, 1, 2, 3); 212 213 for (i = 0; i < nr_wqebb; i++) { 214 /* convert 64B wqebb using 4 SSE instructions */ 215 wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask); 216 wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask); 217 wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask); 218 wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask); 219 wqe_line += 4; 220 } 221 #elif defined(__ARM64_NEON__) 222 int i; 223 uint8x16_t *wqe_line = (uint8x16_t *)data; 224 const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 225 9, 8, 15, 14, 13, 12}; 226 227 for (i = 0; i < nr_wqebb; i++) { 228 wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask); 229 wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask); 230 wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask); 231 wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask); 232 wqe_line += 4; 233 } 234 #else 235 hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE); 236 #endif 237 } 238 239 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge) 240 { 241 #if defined(__X86_64_SSE__) 242 int i; 243 __m128i *sge_line = (__m128i *)data; 244 __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 245 11, 4, 5, 6, 7, 0, 1, 2, 3); 246 247 for (i = 0; i < nr_sge; i++) { 248 /* convert 16B sge using 1 SSE instructions */ 249 *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask); 250 sge_line++; 251 } 252 #elif defined(__ARM64_NEON__) 253 int i; 254 uint8x16_t *sge_line = (uint8x16_t *)data; 255 const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 256 9, 8, 15, 14, 13, 12}; 257 258 for (i = 0; i < nr_sge; i++) { 259 *sge_line = vqtbl1q_u8(*sge_line, shuf_mask); 260 sge_line++; 261 } 262 #else 263 hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc)); 264 #endif 265 } 266 267 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) 268 { 269 if (!txq || !stats) { 270 PMD_DRV_LOG(ERR, "Txq or stats is NULL"); 271 return; 272 } 273 274 memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats)); 275 } 276 277 void hinic_txq_stats_reset(struct hinic_txq *txq) 278 { 279 struct hinic_txq_stats *txq_stats; 280 281 if (txq == NULL) 282 return; 283 284 txq_stats = &txq->txq_stats; 285 memset(txq_stats, 0, sizeof(*txq_stats)); 286 } 287 288 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev, 289 struct rte_mbuf *mbuf, 290 u16 sge_cnt) 291 { 292 struct rte_mbuf *dst_mbuf; 293 u32 offset = 0; 294 u16 i; 295 296 if (unlikely(!nic_dev->cpy_mpool)) 297 return NULL; 298 299 dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool); 300 if (unlikely(!dst_mbuf)) 301 return NULL; 302 303 dst_mbuf->data_off = 0; 304 for (i = 0; i < sge_cnt; i++) { 305 rte_memcpy((char *)dst_mbuf->buf_addr + offset, 306 (char *)mbuf->buf_addr + mbuf->data_off, 307 mbuf->data_len); 308 dst_mbuf->data_len += mbuf->data_len; 309 offset += mbuf->data_len; 310 mbuf = mbuf->next; 311 } 312 313 dst_mbuf->pkt_len = dst_mbuf->data_len; 314 315 return dst_mbuf; 316 } 317 318 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq, 319 struct rte_mbuf *mbuf, 320 struct hinic_sq_bufdesc *sges, 321 struct hinic_wqe_info *sqe_info) 322 { 323 dma_addr_t dma_addr; 324 u16 i, around_sges; 325 u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt; 326 u16 real_nb_segs = mbuf->nb_segs; 327 struct hinic_sq_bufdesc *sge_idx = sges; 328 329 if (unlikely(sqe_info->around)) { 330 /* parts of wqe is in sq bottom while parts 331 * of wqe is in sq head 332 */ 333 i = 0; 334 for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr; 335 sge_idx++) { 336 if (unlikely(mbuf == NULL)) { 337 txq->txq_stats.mbuf_null++; 338 return false; 339 } 340 341 dma_addr = rte_mbuf_data_iova(mbuf); 342 if (unlikely(mbuf->data_len == 0)) { 343 txq->txq_stats.sge_len0++; 344 return false; 345 } 346 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 347 mbuf->data_len); 348 mbuf = mbuf->next; 349 i++; 350 } 351 352 around_sges = nb_segs - i; 353 sge_idx = (struct hinic_sq_bufdesc *) 354 ((void *)txq->sq_head_addr); 355 for (; i < nb_segs; i++) { 356 if (unlikely(mbuf == NULL)) { 357 txq->txq_stats.mbuf_null++; 358 return false; 359 } 360 361 dma_addr = rte_mbuf_data_iova(mbuf); 362 if (unlikely(mbuf->data_len == 0)) { 363 txq->txq_stats.sge_len0++; 364 return false; 365 } 366 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 367 mbuf->data_len); 368 mbuf = mbuf->next; 369 sge_idx++; 370 } 371 372 /* covert sges at head to big endian */ 373 hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges); 374 } else { 375 /* wqe is in continuous space */ 376 for (i = 0; i < nb_segs; i++) { 377 if (unlikely(mbuf == NULL)) { 378 txq->txq_stats.mbuf_null++; 379 return false; 380 } 381 382 dma_addr = rte_mbuf_data_iova(mbuf); 383 if (unlikely(mbuf->data_len == 0)) { 384 txq->txq_stats.sge_len0++; 385 return false; 386 } 387 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 388 mbuf->data_len); 389 mbuf = mbuf->next; 390 sge_idx++; 391 } 392 } 393 394 /* for now: support non-tso over 17 sge, copy the last 2 mbuf */ 395 if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) { 396 /* copy invalid mbuf segs to a valid buffer, lost performance */ 397 txq->txq_stats.cpy_pkts += 1; 398 mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf, 399 real_nb_segs - nb_segs); 400 if (unlikely(!mbuf)) 401 return false; 402 403 txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf; 404 405 /* deal with the last mbuf */ 406 dma_addr = rte_mbuf_data_iova(mbuf); 407 if (unlikely(mbuf->data_len == 0)) { 408 txq->txq_stats.sge_len0++; 409 return false; 410 } 411 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 412 mbuf->data_len); 413 if (unlikely(sqe_info->around)) 414 hinic_sge_cpu_to_be32((void *)sge_idx, 1); 415 } 416 417 return true; 418 } 419 420 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl, 421 u32 queue_info, int nr_descs, 422 u8 owner) 423 { 424 u32 ctrl_size, task_size, bufdesc_size; 425 426 ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl)); 427 task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task)); 428 bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs); 429 430 ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) | 431 SQ_CTRL_SET(task_size, TASKSECT_LEN) | 432 SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | 433 SQ_CTRL_SET(ctrl_size, LEN) | 434 SQ_CTRL_SET(owner, OWNER); 435 436 ctrl->queue_info = queue_info; 437 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC); 438 439 if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) { 440 ctrl->queue_info |= 441 SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS); 442 } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) { 443 /* mss should not be less than 80 */ 444 ctrl->queue_info = 445 SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS); 446 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS); 447 } 448 } 449 450 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf, 451 struct hinic_tx_offload_info 452 *poff_info, 453 struct hinic_wqe_info *sqe_info) 454 { 455 u32 total_len, limit_len, checked_len, left_len, adjust_mss; 456 u32 i, first_mss_sges, left_sges; 457 struct rte_mbuf *mbuf_head, *mbuf_pre; 458 459 left_sges = mbuf->nb_segs; 460 mbuf_head = mbuf; 461 462 /* tso sge number validation */ 463 if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) { 464 checked_len = 0; 465 adjust_mss = mbuf->tso_segsz >= TX_MSS_MIN ? 466 mbuf->tso_segsz : TX_MSS_MIN; 467 limit_len = adjust_mss + poff_info->payload_offset; 468 first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE; 469 470 /* each continues 17 mbufs segmust do one check */ 471 while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) { 472 /* total len of first 16 mbufs must equal 473 * or more than limit_len 474 */ 475 total_len = 0; 476 for (i = 0; i < first_mss_sges; i++) { 477 total_len += mbuf->data_len; 478 mbuf_pre = mbuf; 479 mbuf = mbuf->next; 480 if (total_len >= limit_len) { 481 limit_len = adjust_mss; 482 break; 483 } 484 } 485 486 checked_len += total_len; 487 488 /* try to copy if not valid */ 489 if (unlikely(first_mss_sges == i)) { 490 left_sges -= first_mss_sges; 491 checked_len -= mbuf_pre->data_len; 492 493 left_len = mbuf_head->pkt_len - checked_len; 494 if (left_len > HINIC_COPY_MBUF_SIZE) 495 return false; 496 497 sqe_info->sge_cnt = mbuf_head->nb_segs - 498 left_sges; 499 sqe_info->cpy_mbuf_cnt = 1; 500 501 return true; 502 } 503 first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1); 504 505 /* continue next 16 mbufs */ 506 left_sges -= (i + 1); 507 } /* end of while */ 508 } 509 510 sqe_info->sge_cnt = mbuf_head->nb_segs; 511 return true; 512 } 513 514 static inline void 515 hinic_set_l4_csum_info(struct hinic_sq_task *task, 516 u32 *queue_info, struct hinic_tx_offload_info *poff_info) 517 { 518 u32 tcp_udp_cs, sctp = 0; 519 u16 l2hdr_len; 520 521 if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE)) 522 sctp = 1; 523 524 tcp_udp_cs = poff_info->inner_l4_tcp_udp; 525 526 if (poff_info->tunnel_type == TUNNEL_UDP_CSUM || 527 poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) { 528 l2hdr_len = poff_info->outer_l2_len; 529 530 task->pkt_info2 |= 531 SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) | 532 SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN); 533 task->pkt_info2 |= 534 SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) | 535 SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN); 536 } else { 537 l2hdr_len = poff_info->inner_l2_len; 538 } 539 540 task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN); 541 task->pkt_info1 |= 542 SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN); 543 task->pkt_info0 |= 544 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE); 545 task->pkt_info1 |= 546 SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN); 547 task->pkt_info0 |= 548 SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD); 549 *queue_info |= 550 SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) | 551 SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) | 552 SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP); 553 } 554 555 static inline void 556 hinic_set_tso_info(struct hinic_sq_task *task, 557 u32 *queue_info, struct rte_mbuf *mbuf, 558 struct hinic_tx_offload_info *poff_info) 559 { 560 hinic_set_l4_csum_info(task, queue_info, poff_info); 561 562 /* wqe for tso */ 563 task->pkt_info0 |= 564 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE); 565 task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO); 566 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO); 567 /* qsf was initialized in prepare_sq_wqe */ 568 *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS); 569 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS); 570 } 571 572 static inline void 573 hinic_set_vlan_tx_offload(struct hinic_sq_task *task, 574 u32 *queue_info, u16 vlan_tag, u16 vlan_pri) 575 { 576 task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) | 577 SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD); 578 579 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI); 580 } 581 582 static inline void 583 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf, 584 struct hinic_sq_task *task, u32 *queue_info, 585 struct hinic_tx_offload_info *tx_off_info) 586 { 587 u16 vlan_tag; 588 uint64_t ol_flags = mbuf->ol_flags; 589 590 /* clear DW0~2 of task section for offload */ 591 task->pkt_info0 = 0; 592 task->pkt_info1 = 0; 593 task->pkt_info2 = 0; 594 595 /* Base VLAN */ 596 if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) { 597 vlan_tag = mbuf->vlan_tci; 598 hinic_set_vlan_tx_offload(task, queue_info, vlan_tag, 599 vlan_tag >> VLAN_PRIO_SHIFT); 600 } 601 602 /* non checksum or tso */ 603 if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))) 604 return; 605 606 if ((ol_flags & PKT_TX_TCP_SEG)) 607 /* set tso info for task and qsf */ 608 hinic_set_tso_info(task, queue_info, mbuf, tx_off_info); 609 else /* just support l4 checksum offload */ 610 hinic_set_l4_csum_info(task, queue_info, tx_off_info); 611 } 612 613 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq) 614 { 615 struct hinic_tx_info *tx_info; 616 struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK]; 617 int i, nb_free = 0; 618 u16 hw_ci, sw_ci, sq_mask; 619 int wqebb_cnt = 0; 620 621 hw_ci = HINIC_GET_SQ_HW_CI(txq); 622 sw_ci = HINIC_GET_SQ_LOCAL_CI(txq); 623 sq_mask = HINIC_GET_SQ_WQE_MASK(txq); 624 625 for (i = 0; i < txq->tx_free_thresh; ++i) { 626 tx_info = &txq->tx_info[sw_ci]; 627 if (hw_ci == sw_ci || 628 (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt)) 629 break; 630 631 sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask; 632 633 if (unlikely(tx_info->cpy_mbuf != NULL)) { 634 rte_pktmbuf_free(tx_info->cpy_mbuf); 635 tx_info->cpy_mbuf = NULL; 636 } 637 638 wqebb_cnt += tx_info->wqebb_cnt; 639 mbuf = tx_info->mbuf; 640 641 if (likely(mbuf->nb_segs == 1)) { 642 m = rte_pktmbuf_prefree_seg(mbuf); 643 tx_info->mbuf = NULL; 644 645 if (unlikely(m == NULL)) 646 continue; 647 648 mbuf_free[nb_free++] = m; 649 if (unlikely(m->pool != mbuf_free[0]->pool || 650 nb_free >= HINIC_MAX_TX_FREE_BULK)) { 651 rte_mempool_put_bulk(mbuf_free[0]->pool, 652 (void **)mbuf_free, (nb_free - 1)); 653 nb_free = 0; 654 mbuf_free[nb_free++] = m; 655 } 656 } else { 657 rte_pktmbuf_free(mbuf); 658 tx_info->mbuf = NULL; 659 } 660 } 661 662 if (nb_free > 0) 663 rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free, 664 nb_free); 665 666 HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt); 667 } 668 669 static inline struct hinic_sq_wqe * 670 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt, 671 struct hinic_wqe_info *wqe_info) 672 { 673 u32 cur_pi, end_pi; 674 u16 remain_wqebbs; 675 struct hinic_sq *sq = txq->sq; 676 struct hinic_wq *wq = txq->wq; 677 678 /* record current pi */ 679 cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx); 680 end_pi = cur_pi + wqebb_cnt; 681 682 /* update next pi and delta */ 683 wq->prod_idx += wqebb_cnt; 684 wq->delta -= wqebb_cnt; 685 686 /* return current pi and owner */ 687 wqe_info->pi = cur_pi; 688 wqe_info->owner = sq->owner; 689 wqe_info->around = 0; 690 wqe_info->seq_wqebbs = wqebb_cnt; 691 692 if (unlikely(end_pi >= txq->q_depth)) { 693 /* update owner of next prod_idx */ 694 sq->owner = !sq->owner; 695 696 /* turn around to head */ 697 if (unlikely(end_pi > txq->q_depth)) { 698 wqe_info->around = 1; 699 remain_wqebbs = txq->q_depth - cur_pi; 700 wqe_info->seq_wqebbs = remain_wqebbs; 701 } 702 } 703 704 return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi); 705 } 706 707 static inline uint16_t 708 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags) 709 { 710 struct ipv4_psd_header { 711 uint32_t src_addr; /* IP address of source host. */ 712 uint32_t dst_addr; /* IP address of destination host. */ 713 uint8_t zero; /* zero. */ 714 uint8_t proto; /* L4 protocol type. */ 715 uint16_t len; /* L4 length. */ 716 } psd_hdr; 717 uint8_t ihl; 718 719 psd_hdr.src_addr = ipv4_hdr->src_addr; 720 psd_hdr.dst_addr = ipv4_hdr->dst_addr; 721 psd_hdr.zero = 0; 722 psd_hdr.proto = ipv4_hdr->next_proto_id; 723 if (ol_flags & PKT_TX_TCP_SEG) { 724 psd_hdr.len = 0; 725 } else { 726 /* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates 727 * lower 4 bits and unit is 4 bytes 728 */ 729 ihl = (ipv4_hdr->version_ihl & 0xF) << 2; 730 psd_hdr.len = 731 rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) - 732 ihl); 733 } 734 return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); 735 } 736 737 static inline uint16_t 738 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags) 739 { 740 uint32_t sum; 741 struct { 742 uint32_t len; /* L4 length. */ 743 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */ 744 } psd_hdr; 745 746 psd_hdr.proto = (ipv6_hdr->proto << 24); 747 if (ol_flags & PKT_TX_TCP_SEG) 748 psd_hdr.len = 0; 749 else 750 psd_hdr.len = ipv6_hdr->payload_len; 751 752 sum = __rte_raw_cksum(ipv6_hdr->src_addr, 753 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0); 754 sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum); 755 return __rte_raw_cksum_reduce(sum); 756 } 757 758 static inline void hinic_get_outer_cs_pld_offset(struct rte_mbuf *m, 759 struct hinic_tx_offload_info *off_info) 760 { 761 uint64_t ol_flags = m->ol_flags; 762 763 if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) 764 off_info->payload_offset = m->outer_l2_len + m->outer_l3_len + 765 m->l2_len + m->l3_len; 766 else if ((ol_flags & PKT_TX_TCP_CKSUM) || (ol_flags & PKT_TX_TCP_SEG)) 767 off_info->payload_offset = m->outer_l2_len + m->outer_l3_len + 768 m->l2_len + m->l3_len + m->l4_len; 769 } 770 771 static inline void hinic_get_pld_offset(struct rte_mbuf *m, 772 struct hinic_tx_offload_info *off_info) 773 { 774 uint64_t ol_flags = m->ol_flags; 775 776 if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) 777 off_info->payload_offset = m->l2_len + m->l3_len; 778 else if ((ol_flags & PKT_TX_TCP_CKSUM) || (ol_flags & PKT_TX_TCP_SEG)) 779 off_info->payload_offset = m->l2_len + m->l3_len + 780 m->l4_len; 781 } 782 783 static inline void hinic_analyze_tx_info(struct rte_mbuf *mbuf, 784 struct hinic_tx_offload_info *off_info) 785 { 786 struct rte_ether_hdr *eth_hdr; 787 struct rte_vlan_hdr *vlan_hdr; 788 struct rte_ipv4_hdr *ip4h; 789 u16 pkt_type; 790 u8 *hdr; 791 792 hdr = (u8 *)rte_pktmbuf_mtod(mbuf, u8*); 793 eth_hdr = (struct rte_ether_hdr *)hdr; 794 pkt_type = rte_be_to_cpu_16(eth_hdr->ether_type); 795 796 if (pkt_type == RTE_ETHER_TYPE_VLAN) { 797 off_info->outer_l2_len = ETHER_LEN_WITH_VLAN; 798 vlan_hdr = (struct rte_vlan_hdr *)(hdr + 1); 799 pkt_type = rte_be_to_cpu_16(vlan_hdr->eth_proto); 800 } else { 801 off_info->outer_l2_len = ETHER_LEN_NO_VLAN; 802 } 803 804 if (pkt_type == RTE_ETHER_TYPE_IPV4) { 805 ip4h = (struct rte_ipv4_hdr *)(hdr + off_info->outer_l2_len); 806 off_info->outer_l3_len = (ip4h->version_ihl & 0xf) << 807 HEADER_LEN_OFFSET; 808 } else if (pkt_type == RTE_ETHER_TYPE_IPV6) { 809 /* not support ipv6 extension header */ 810 off_info->outer_l3_len = sizeof(struct rte_ipv6_hdr); 811 } 812 } 813 814 static inline void hinic_analyze_outer_ip_vxlan(struct rte_mbuf *mbuf, 815 struct hinic_tx_offload_info *off_info) 816 { 817 struct rte_ether_hdr *eth_hdr; 818 struct rte_vlan_hdr *vlan_hdr; 819 struct rte_ipv4_hdr *ipv4_hdr; 820 struct rte_udp_hdr *udp_hdr; 821 u16 eth_type = 0; 822 823 eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); 824 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type); 825 826 if (eth_type == RTE_ETHER_TYPE_VLAN) { 827 vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1); 828 eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto); 829 } 830 831 if (eth_type == RTE_ETHER_TYPE_IPV4) { 832 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, 833 mbuf->outer_l2_len); 834 off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD; 835 ipv4_hdr->hdr_checksum = 0; 836 837 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + 838 mbuf->outer_l3_len); 839 udp_hdr->dgram_cksum = 0; 840 } else if (eth_type == RTE_ETHER_TYPE_IPV6) { 841 off_info->outer_l3_type = IPV6_PKT; 842 843 udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, 844 (mbuf->outer_l2_len + 845 mbuf->outer_l3_len)); 846 udp_hdr->dgram_cksum = 0; 847 } 848 } 849 850 static inline uint8_t hinic_analyze_l3_type(struct rte_mbuf *mbuf) 851 { 852 uint8_t l3_type; 853 uint64_t ol_flags = mbuf->ol_flags; 854 855 if (ol_flags & PKT_TX_IPV4) 856 l3_type = (ol_flags & PKT_TX_IP_CKSUM) ? 857 IPV4_PKT_WITH_CHKSUM_OFFLOAD : 858 IPV4_PKT_NO_CHKSUM_OFFLOAD; 859 else if (ol_flags & PKT_TX_IPV6) 860 l3_type = IPV6_PKT; 861 else 862 l3_type = UNKNOWN_L3TYPE; 863 864 return l3_type; 865 } 866 867 static inline void hinic_calculate_tcp_checksum(struct rte_mbuf *mbuf, 868 struct hinic_tx_offload_info *off_info, 869 uint64_t inner_l3_offset) 870 { 871 struct rte_ipv4_hdr *ipv4_hdr; 872 struct rte_ipv6_hdr *ipv6_hdr; 873 struct rte_tcp_hdr *tcp_hdr; 874 uint64_t ol_flags = mbuf->ol_flags; 875 876 if (ol_flags & PKT_TX_IPV4) { 877 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, 878 inner_l3_offset); 879 880 if (ol_flags & PKT_TX_IP_CKSUM) 881 ipv4_hdr->hdr_checksum = 0; 882 883 tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + 884 mbuf->l3_len); 885 tcp_hdr->cksum = hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags); 886 } else { 887 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *, 888 inner_l3_offset); 889 tcp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, 890 (inner_l3_offset + 891 mbuf->l3_len)); 892 tcp_hdr->cksum = hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags); 893 } 894 895 off_info->inner_l4_type = TCP_OFFLOAD_ENABLE; 896 off_info->inner_l4_tcp_udp = 1; 897 } 898 899 static inline void hinic_calculate_udp_checksum(struct rte_mbuf *mbuf, 900 struct hinic_tx_offload_info *off_info, 901 uint64_t inner_l3_offset) 902 { 903 struct rte_ipv4_hdr *ipv4_hdr; 904 struct rte_ipv6_hdr *ipv6_hdr; 905 struct rte_udp_hdr *udp_hdr; 906 uint64_t ol_flags = mbuf->ol_flags; 907 908 if (ol_flags & PKT_TX_IPV4) { 909 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, 910 inner_l3_offset); 911 912 if (ol_flags & PKT_TX_IP_CKSUM) 913 ipv4_hdr->hdr_checksum = 0; 914 915 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + 916 mbuf->l3_len); 917 udp_hdr->dgram_cksum = hinic_ipv4_phdr_cksum(ipv4_hdr, 918 ol_flags); 919 } else { 920 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *, 921 inner_l3_offset); 922 923 udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *, 924 (inner_l3_offset + 925 mbuf->l3_len)); 926 udp_hdr->dgram_cksum = hinic_ipv6_phdr_cksum(ipv6_hdr, 927 ol_flags); 928 } 929 930 off_info->inner_l4_type = UDP_OFFLOAD_ENABLE; 931 off_info->inner_l4_tcp_udp = 1; 932 } 933 934 static inline void 935 hinic_calculate_sctp_checksum(struct hinic_tx_offload_info *off_info) 936 { 937 off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE; 938 off_info->inner_l4_tcp_udp = 0; 939 off_info->inner_l4_len = sizeof(struct rte_sctp_hdr); 940 } 941 942 static inline void hinic_calculate_checksum(struct rte_mbuf *mbuf, 943 struct hinic_tx_offload_info *off_info, 944 uint64_t inner_l3_offset) 945 { 946 uint64_t ol_flags = mbuf->ol_flags; 947 948 switch (ol_flags & PKT_TX_L4_MASK) { 949 case PKT_TX_UDP_CKSUM: 950 hinic_calculate_udp_checksum(mbuf, off_info, inner_l3_offset); 951 break; 952 953 case PKT_TX_TCP_CKSUM: 954 hinic_calculate_tcp_checksum(mbuf, off_info, inner_l3_offset); 955 break; 956 957 case PKT_TX_SCTP_CKSUM: 958 hinic_calculate_sctp_checksum(off_info); 959 break; 960 961 default: 962 if (ol_flags & PKT_TX_TCP_SEG) 963 hinic_calculate_tcp_checksum(mbuf, off_info, 964 inner_l3_offset); 965 break; 966 } 967 } 968 969 static inline int hinic_tx_offload_pkt_prepare(struct rte_mbuf *m, 970 struct hinic_tx_offload_info *off_info) 971 { 972 uint64_t inner_l3_offset; 973 uint64_t ol_flags = m->ol_flags; 974 975 /* Check if the packets set available offload flags */ 976 if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)) 977 return 0; 978 979 /* Support only vxlan offload */ 980 if (unlikely((ol_flags & PKT_TX_TUNNEL_MASK) && 981 !(ol_flags & PKT_TX_TUNNEL_VXLAN))) 982 return -ENOTSUP; 983 984 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 985 if (rte_validate_tx_offload(m) != 0) 986 return -EINVAL; 987 #endif 988 989 if (ol_flags & PKT_TX_TUNNEL_VXLAN) { 990 off_info->tunnel_type = TUNNEL_UDP_NO_CSUM; 991 992 /* inner_l4_tcp_udp csum should be set to calculate outer 993 * udp checksum when vxlan packets without inner l3 and l4 994 */ 995 off_info->inner_l4_tcp_udp = 1; 996 997 if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) || 998 (ol_flags & PKT_TX_OUTER_IPV6) || 999 (ol_flags & PKT_TX_TCP_SEG)) { 1000 inner_l3_offset = m->l2_len + m->outer_l2_len + 1001 m->outer_l3_len; 1002 off_info->outer_l2_len = m->outer_l2_len; 1003 off_info->outer_l3_len = m->outer_l3_len; 1004 /* just support vxlan tunneling pkt */ 1005 off_info->inner_l2_len = m->l2_len - VXLANLEN - 1006 sizeof(struct rte_udp_hdr); 1007 off_info->tunnel_length = m->l2_len; 1008 1009 hinic_analyze_outer_ip_vxlan(m, off_info); 1010 1011 hinic_get_outer_cs_pld_offset(m, off_info); 1012 } else { 1013 inner_l3_offset = m->l2_len; 1014 hinic_analyze_tx_info(m, off_info); 1015 /* just support vxlan tunneling pkt */ 1016 off_info->inner_l2_len = m->l2_len - VXLANLEN - 1017 sizeof(struct rte_udp_hdr) - 1018 off_info->outer_l2_len - 1019 off_info->outer_l3_len; 1020 off_info->tunnel_length = m->l2_len - 1021 off_info->outer_l2_len - 1022 off_info->outer_l3_len; 1023 off_info->outer_l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD; 1024 1025 hinic_get_pld_offset(m, off_info); 1026 } 1027 } else { 1028 inner_l3_offset = m->l2_len; 1029 off_info->inner_l2_len = m->l2_len; 1030 off_info->tunnel_type = NOT_TUNNEL; 1031 1032 hinic_get_pld_offset(m, off_info); 1033 } 1034 1035 /* invalid udp or tcp header */ 1036 if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET)) 1037 return -EINVAL; 1038 1039 off_info->inner_l3_len = m->l3_len; 1040 off_info->inner_l4_len = m->l4_len; 1041 off_info->inner_l3_type = hinic_analyze_l3_type(m); 1042 1043 /* Process the pseudo-header checksum */ 1044 hinic_calculate_checksum(m, off_info, inner_l3_offset); 1045 1046 return 0; 1047 } 1048 1049 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt, 1050 struct hinic_wqe_info *sqe_info, 1051 struct hinic_tx_offload_info 1052 *off_info) 1053 { 1054 u16 i, total_len, sge_cnt = mbuf_pkt->nb_segs; 1055 struct rte_mbuf *mbuf; 1056 int ret; 1057 1058 memset(off_info, 0, sizeof(*off_info)); 1059 1060 ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info); 1061 if (unlikely(ret)) 1062 return false; 1063 1064 sqe_info->cpy_mbuf_cnt = 0; 1065 1066 /* non tso mbuf */ 1067 if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) { 1068 if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) { 1069 /* non tso packet len must less than 64KB */ 1070 return false; 1071 } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) { 1072 /* non tso packet buffer number must less than 17 1073 * the mbuf segs more than 17 must copy to one buffer 1074 */ 1075 total_len = 0; 1076 mbuf = mbuf_pkt; 1077 for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) { 1078 total_len += mbuf->data_len; 1079 mbuf = mbuf->next; 1080 } 1081 1082 /* default support copy total 4k mbuf segs */ 1083 if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) < 1084 mbuf_pkt->pkt_len) 1085 return false; 1086 1087 sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE; 1088 sqe_info->cpy_mbuf_cnt = 1; 1089 return true; 1090 } 1091 1092 /* valid non tso mbuf */ 1093 sqe_info->sge_cnt = sge_cnt; 1094 } else { 1095 /* tso mbuf */ 1096 if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt))) 1097 /* too many mbuf segs */ 1098 return false; 1099 1100 /* check tso mbuf segs are valid or not */ 1101 if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt, 1102 off_info, sqe_info))) 1103 return false; 1104 } 1105 1106 return true; 1107 } 1108 1109 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos) 1110 { 1111 u16 prod_idx; 1112 u32 hi_prod_idx; 1113 struct hinic_sq_db sq_db; 1114 1115 prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx); 1116 hi_prod_idx = SQ_DB_PI_HIGH(prod_idx); 1117 1118 sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) | 1119 SQ_DB_INFO_SET(SQ_DB, TYPE) | 1120 SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) | 1121 SQ_DB_INFO_SET(cos, COS) | 1122 SQ_DB_INFO_SET(sq->q_id, QID); 1123 1124 /* Data should be written to HW in Big Endian Format */ 1125 sq_db.db_info = cpu_to_be32(sq_db.db_info); 1126 1127 /* Write all before the doorbell */ 1128 rte_wmb(); 1129 writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx)); 1130 } 1131 1132 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts) 1133 { 1134 int free_wqebb_cnt, wqe_wqebb_cnt; 1135 u32 queue_info, tx_bytes = 0; 1136 u16 nb_tx; 1137 struct hinic_wqe_info sqe_info; 1138 struct hinic_tx_offload_info off_info; 1139 struct rte_mbuf *mbuf_pkt; 1140 struct hinic_txq *txq = tx_queue; 1141 struct hinic_tx_info *tx_info; 1142 struct hinic_sq_wqe *sq_wqe; 1143 struct hinic_sq_task *task; 1144 1145 /* reclaim tx mbuf before xmit new packet */ 1146 if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh) 1147 hinic_xmit_mbuf_cleanup(txq); 1148 1149 /* tx loop routine */ 1150 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1151 mbuf_pkt = *tx_pkts++; 1152 queue_info = 0; 1153 1154 /* 1. parse sge and tx offlod info from mbuf */ 1155 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt, 1156 &sqe_info, &off_info))) { 1157 txq->txq_stats.off_errs++; 1158 break; 1159 } 1160 1161 /* 2. try to get enough wqebb */ 1162 wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt); 1163 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq); 1164 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) { 1165 /* reclaim again */ 1166 hinic_xmit_mbuf_cleanup(txq); 1167 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq); 1168 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) { 1169 txq->txq_stats.tx_busy += (nb_pkts - nb_tx); 1170 break; 1171 } 1172 } 1173 1174 /* 3. get sq tail wqe address from wqe_page, 1175 * sq have enough wqebb for this packet 1176 */ 1177 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info); 1178 1179 /* 4. fill sq wqe sge section */ 1180 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt, 1181 sq_wqe->buf_descs, 1182 &sqe_info))) { 1183 hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id, 1184 wqe_wqebb_cnt, sqe_info.owner); 1185 txq->txq_stats.off_errs++; 1186 break; 1187 } 1188 1189 /* 5. fill sq wqe task section and queue info */ 1190 task = &sq_wqe->task; 1191 1192 /* tx packet offload configure */ 1193 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info, 1194 &off_info); 1195 1196 /* 6. record tx info */ 1197 tx_info = &txq->tx_info[sqe_info.pi]; 1198 tx_info->mbuf = mbuf_pkt; 1199 tx_info->wqebb_cnt = wqe_wqebb_cnt; 1200 1201 /* 7. fill sq wqe header section */ 1202 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info, 1203 sqe_info.sge_cnt, sqe_info.owner); 1204 1205 /* 8.convert continue or bottom wqe byteorder to big endian */ 1206 hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs); 1207 1208 tx_bytes += mbuf_pkt->pkt_len; 1209 } 1210 1211 /* 9. write sq doorbell in burst mode */ 1212 if (nb_tx) { 1213 hinic_sq_write_db(txq->sq, txq->cos); 1214 1215 txq->txq_stats.packets += nb_tx; 1216 txq->txq_stats.bytes += tx_bytes; 1217 } 1218 txq->txq_stats.burst_pkts = nb_tx; 1219 1220 return nb_tx; 1221 } 1222 1223 void hinic_free_all_tx_mbufs(struct hinic_txq *txq) 1224 { 1225 u16 ci; 1226 struct hinic_nic_dev *nic_dev = txq->nic_dev; 1227 struct hinic_tx_info *tx_info; 1228 int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev, 1229 txq->q_id) + 1; 1230 1231 while (free_wqebbs < txq->q_depth) { 1232 ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id); 1233 1234 tx_info = &txq->tx_info[ci]; 1235 1236 if (unlikely(tx_info->cpy_mbuf != NULL)) { 1237 rte_pktmbuf_free(tx_info->cpy_mbuf); 1238 tx_info->cpy_mbuf = NULL; 1239 } 1240 1241 rte_pktmbuf_free(tx_info->mbuf); 1242 hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id, 1243 tx_info->wqebb_cnt); 1244 1245 free_wqebbs += tx_info->wqebb_cnt; 1246 tx_info->mbuf = NULL; 1247 } 1248 } 1249 1250 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev) 1251 { 1252 u16 q_id; 1253 struct hinic_nic_dev *nic_dev = 1254 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev); 1255 1256 for (q_id = 0; q_id < nic_dev->num_sq; q_id++) { 1257 if (eth_dev->data->tx_queues != NULL) 1258 eth_dev->data->tx_queues[q_id] = NULL; 1259 1260 if (nic_dev->txqs[q_id] == NULL) 1261 continue; 1262 1263 /* stop tx queue free tx mbuf */ 1264 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]); 1265 hinic_free_tx_resources(nic_dev->txqs[q_id]); 1266 1267 /* free txq */ 1268 kfree(nic_dev->txqs[q_id]); 1269 nic_dev->txqs[q_id] = NULL; 1270 } 1271 } 1272 1273 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev) 1274 { 1275 u16 q_id; 1276 struct hinic_nic_dev *nic_dev = 1277 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev); 1278 1279 for (q_id = 0; q_id < nic_dev->num_sq; q_id++) 1280 /* stop tx queue free tx mbuf */ 1281 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]); 1282 } 1283 1284 int hinic_setup_tx_resources(struct hinic_txq *txq) 1285 { 1286 u64 tx_info_sz; 1287 1288 tx_info_sz = txq->q_depth * sizeof(*txq->tx_info); 1289 txq->tx_info = rte_zmalloc_socket("tx_info", tx_info_sz, 1290 RTE_CACHE_LINE_SIZE, txq->socket_id); 1291 if (!txq->tx_info) 1292 return -ENOMEM; 1293 1294 return HINIC_OK; 1295 } 1296 1297 void hinic_free_tx_resources(struct hinic_txq *txq) 1298 { 1299 if (txq->tx_info == NULL) 1300 return; 1301 1302 rte_free(txq->tx_info); 1303 txq->tx_info = NULL; 1304 } 1305 1306 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, 1307 u16 sq_depth, unsigned int socket_id) 1308 { 1309 int err; 1310 struct hinic_nic_io *nic_io = hwdev->nic_io; 1311 struct hinic_qp *qp = &nic_io->qps[q_id]; 1312 struct hinic_sq *sq = &qp->sq; 1313 void __iomem *db_addr; 1314 volatile u32 *ci_addr; 1315 1316 sq->sq_depth = sq_depth; 1317 nic_io->sq_depth = sq_depth; 1318 1319 /* alloc wq */ 1320 err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id], 1321 HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth, 1322 socket_id); 1323 if (err) { 1324 PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ"); 1325 return err; 1326 } 1327 1328 /* alloc sq doorbell space */ 1329 err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr); 1330 if (err) { 1331 PMD_DRV_LOG(ERR, "Failed to init db addr"); 1332 goto alloc_db_err; 1333 } 1334 1335 /* clear hardware ci */ 1336 ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id); 1337 *ci_addr = 0; 1338 1339 sq->q_id = q_id; 1340 sq->wq = &nic_io->sq_wq[q_id]; 1341 sq->owner = 1; 1342 sq->cons_idx_addr = (volatile u16 *)ci_addr; 1343 sq->db_addr = db_addr; 1344 1345 return HINIC_OK; 1346 1347 alloc_db_err: 1348 hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]); 1349 1350 return err; 1351 } 1352 1353 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id) 1354 { 1355 struct hinic_nic_io *nic_io; 1356 struct hinic_qp *qp; 1357 1358 nic_io = hwdev->nic_io; 1359 qp = &nic_io->qps[q_id]; 1360 1361 if (qp->sq.wq == NULL) 1362 return; 1363 1364 hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr); 1365 hinic_wq_free(nic_io->hwdev, qp->sq.wq); 1366 qp->sq.wq = NULL; 1367 } 1368