xref: /dpdk/drivers/net/hns3/hns3_rxtx.c (revision 9e991f217fc8719e38a812dc280dba5f84db9f59)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018-2019 Hisilicon Limited.
3  */
4 
5 #include <stdarg.h>
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <unistd.h>
10 #include <inttypes.h>
11 #include <rte_bus_pci.h>
12 #include <rte_byteorder.h>
13 #include <rte_common.h>
14 #include <rte_cycles.h>
15 #include <rte_dev.h>
16 #include <rte_eal.h>
17 #include <rte_ether.h>
18 #include <rte_vxlan.h>
19 #include <rte_ethdev_driver.h>
20 #include <rte_io.h>
21 #include <rte_ip.h>
22 #include <rte_gre.h>
23 #include <rte_net.h>
24 #include <rte_malloc.h>
25 #include <rte_pci.h>
26 
27 #include "hns3_ethdev.h"
28 #include "hns3_rxtx.h"
29 #include "hns3_regs.h"
30 #include "hns3_logs.h"
31 
32 #define HNS3_CFG_DESC_NUM(num)	((num) / 8 - 1)
33 #define DEFAULT_RX_FREE_THRESH	32
34 
35 static void
36 hns3_rx_queue_release_mbufs(struct hns3_rx_queue *rxq)
37 {
38 	uint16_t i;
39 
40 	/* Note: Fake rx queue will not enter here */
41 	if (rxq->sw_ring) {
42 		for (i = 0; i < rxq->nb_rx_desc; i++) {
43 			if (rxq->sw_ring[i].mbuf) {
44 				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
45 				rxq->sw_ring[i].mbuf = NULL;
46 			}
47 		}
48 	}
49 }
50 
51 static void
52 hns3_tx_queue_release_mbufs(struct hns3_tx_queue *txq)
53 {
54 	uint16_t i;
55 
56 	/* Note: Fake rx queue will not enter here */
57 	if (txq->sw_ring) {
58 		for (i = 0; i < txq->nb_tx_desc; i++) {
59 			if (txq->sw_ring[i].mbuf) {
60 				rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
61 				txq->sw_ring[i].mbuf = NULL;
62 			}
63 		}
64 	}
65 }
66 
67 static void
68 hns3_rx_queue_release(void *queue)
69 {
70 	struct hns3_rx_queue *rxq = queue;
71 	if (rxq) {
72 		hns3_rx_queue_release_mbufs(rxq);
73 		if (rxq->mz)
74 			rte_memzone_free(rxq->mz);
75 		if (rxq->sw_ring)
76 			rte_free(rxq->sw_ring);
77 		rte_free(rxq);
78 	}
79 }
80 
81 static void
82 hns3_tx_queue_release(void *queue)
83 {
84 	struct hns3_tx_queue *txq = queue;
85 	if (txq) {
86 		hns3_tx_queue_release_mbufs(txq);
87 		if (txq->mz)
88 			rte_memzone_free(txq->mz);
89 		if (txq->sw_ring)
90 			rte_free(txq->sw_ring);
91 		rte_free(txq);
92 	}
93 }
94 
95 void
96 hns3_dev_rx_queue_release(void *queue)
97 {
98 	struct hns3_rx_queue *rxq = queue;
99 	struct hns3_adapter *hns;
100 
101 	if (rxq == NULL)
102 		return;
103 
104 	hns = rxq->hns;
105 	rte_spinlock_lock(&hns->hw.lock);
106 	hns3_rx_queue_release(queue);
107 	rte_spinlock_unlock(&hns->hw.lock);
108 }
109 
110 void
111 hns3_dev_tx_queue_release(void *queue)
112 {
113 	struct hns3_tx_queue *txq = queue;
114 	struct hns3_adapter *hns;
115 
116 	if (txq == NULL)
117 		return;
118 
119 	hns = txq->hns;
120 	rte_spinlock_lock(&hns->hw.lock);
121 	hns3_tx_queue_release(queue);
122 	rte_spinlock_unlock(&hns->hw.lock);
123 }
124 
125 static void
126 hns3_fake_rx_queue_release(struct hns3_rx_queue *queue)
127 {
128 	struct hns3_rx_queue *rxq = queue;
129 	struct hns3_adapter *hns;
130 	struct hns3_hw *hw;
131 	uint16_t idx;
132 
133 	if (rxq == NULL)
134 		return;
135 
136 	hns = rxq->hns;
137 	hw = &hns->hw;
138 	idx = rxq->queue_id;
139 	if (hw->fkq_data.rx_queues[idx]) {
140 		hns3_rx_queue_release(hw->fkq_data.rx_queues[idx]);
141 		hw->fkq_data.rx_queues[idx] = NULL;
142 	}
143 
144 	/* free fake rx queue arrays */
145 	if (idx == (hw->fkq_data.nb_fake_rx_queues - 1)) {
146 		hw->fkq_data.nb_fake_rx_queues = 0;
147 		rte_free(hw->fkq_data.rx_queues);
148 		hw->fkq_data.rx_queues = NULL;
149 	}
150 }
151 
152 static void
153 hns3_fake_tx_queue_release(struct hns3_tx_queue *queue)
154 {
155 	struct hns3_tx_queue *txq = queue;
156 	struct hns3_adapter *hns;
157 	struct hns3_hw *hw;
158 	uint16_t idx;
159 
160 	if (txq == NULL)
161 		return;
162 
163 	hns = txq->hns;
164 	hw = &hns->hw;
165 	idx = txq->queue_id;
166 	if (hw->fkq_data.tx_queues[idx]) {
167 		hns3_tx_queue_release(hw->fkq_data.tx_queues[idx]);
168 		hw->fkq_data.tx_queues[idx] = NULL;
169 	}
170 
171 	/* free fake tx queue arrays */
172 	if (idx == (hw->fkq_data.nb_fake_tx_queues - 1)) {
173 		hw->fkq_data.nb_fake_tx_queues = 0;
174 		rte_free(hw->fkq_data.tx_queues);
175 		hw->fkq_data.tx_queues = NULL;
176 	}
177 }
178 
179 static void
180 hns3_free_rx_queues(struct rte_eth_dev *dev)
181 {
182 	struct hns3_adapter *hns = dev->data->dev_private;
183 	struct hns3_fake_queue_data *fkq_data;
184 	struct hns3_hw *hw = &hns->hw;
185 	uint16_t nb_rx_q;
186 	uint16_t i;
187 
188 	nb_rx_q = hw->data->nb_rx_queues;
189 	for (i = 0; i < nb_rx_q; i++) {
190 		if (dev->data->rx_queues[i]) {
191 			hns3_rx_queue_release(dev->data->rx_queues[i]);
192 			dev->data->rx_queues[i] = NULL;
193 		}
194 	}
195 
196 	/* Free fake Rx queues */
197 	fkq_data = &hw->fkq_data;
198 	for (i = 0; i < fkq_data->nb_fake_rx_queues; i++) {
199 		if (fkq_data->rx_queues[i])
200 			hns3_fake_rx_queue_release(fkq_data->rx_queues[i]);
201 	}
202 }
203 
204 static void
205 hns3_free_tx_queues(struct rte_eth_dev *dev)
206 {
207 	struct hns3_adapter *hns = dev->data->dev_private;
208 	struct hns3_fake_queue_data *fkq_data;
209 	struct hns3_hw *hw = &hns->hw;
210 	uint16_t nb_tx_q;
211 	uint16_t i;
212 
213 	nb_tx_q = hw->data->nb_tx_queues;
214 	for (i = 0; i < nb_tx_q; i++) {
215 		if (dev->data->tx_queues[i]) {
216 			hns3_tx_queue_release(dev->data->tx_queues[i]);
217 			dev->data->tx_queues[i] = NULL;
218 		}
219 	}
220 
221 	/* Free fake Tx queues */
222 	fkq_data = &hw->fkq_data;
223 	for (i = 0; i < fkq_data->nb_fake_tx_queues; i++) {
224 		if (fkq_data->tx_queues[i])
225 			hns3_fake_tx_queue_release(fkq_data->tx_queues[i]);
226 	}
227 }
228 
229 void
230 hns3_free_all_queues(struct rte_eth_dev *dev)
231 {
232 	hns3_free_rx_queues(dev);
233 	hns3_free_tx_queues(dev);
234 }
235 
236 static int
237 hns3_alloc_rx_queue_mbufs(struct hns3_hw *hw, struct hns3_rx_queue *rxq)
238 {
239 	struct rte_mbuf *mbuf;
240 	uint64_t dma_addr;
241 	uint16_t i;
242 
243 	for (i = 0; i < rxq->nb_rx_desc; i++) {
244 		mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
245 		if (unlikely(mbuf == NULL)) {
246 			hns3_err(hw, "Failed to allocate RXD[%d] for rx queue!",
247 				 i);
248 			hns3_rx_queue_release_mbufs(rxq);
249 			return -ENOMEM;
250 		}
251 
252 		rte_mbuf_refcnt_set(mbuf, 1);
253 		mbuf->next = NULL;
254 		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
255 		mbuf->nb_segs = 1;
256 		mbuf->port = rxq->port_id;
257 
258 		rxq->sw_ring[i].mbuf = mbuf;
259 		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
260 		rxq->rx_ring[i].addr = dma_addr;
261 		rxq->rx_ring[i].rx.bd_base_info = 0;
262 	}
263 
264 	return 0;
265 }
266 
267 static int
268 hns3_buf_size2type(uint32_t buf_size)
269 {
270 	int bd_size_type;
271 
272 	switch (buf_size) {
273 	case 512:
274 		bd_size_type = HNS3_BD_SIZE_512_TYPE;
275 		break;
276 	case 1024:
277 		bd_size_type = HNS3_BD_SIZE_1024_TYPE;
278 		break;
279 	case 4096:
280 		bd_size_type = HNS3_BD_SIZE_4096_TYPE;
281 		break;
282 	default:
283 		bd_size_type = HNS3_BD_SIZE_2048_TYPE;
284 	}
285 
286 	return bd_size_type;
287 }
288 
289 static void
290 hns3_init_rx_queue_hw(struct hns3_rx_queue *rxq)
291 {
292 	uint32_t rx_buf_len = rxq->rx_buf_len;
293 	uint64_t dma_addr = rxq->rx_ring_phys_addr;
294 
295 	hns3_write_dev(rxq, HNS3_RING_RX_BASEADDR_L_REG, (uint32_t)dma_addr);
296 	hns3_write_dev(rxq, HNS3_RING_RX_BASEADDR_H_REG,
297 		       (uint32_t)((dma_addr >> 31) >> 1));
298 
299 	hns3_write_dev(rxq, HNS3_RING_RX_BD_LEN_REG,
300 		       hns3_buf_size2type(rx_buf_len));
301 	hns3_write_dev(rxq, HNS3_RING_RX_BD_NUM_REG,
302 		       HNS3_CFG_DESC_NUM(rxq->nb_rx_desc));
303 }
304 
305 static void
306 hns3_init_tx_queue_hw(struct hns3_tx_queue *txq)
307 {
308 	uint64_t dma_addr = txq->tx_ring_phys_addr;
309 
310 	hns3_write_dev(txq, HNS3_RING_TX_BASEADDR_L_REG, (uint32_t)dma_addr);
311 	hns3_write_dev(txq, HNS3_RING_TX_BASEADDR_H_REG,
312 		       (uint32_t)((dma_addr >> 31) >> 1));
313 
314 	hns3_write_dev(txq, HNS3_RING_TX_BD_NUM_REG,
315 		       HNS3_CFG_DESC_NUM(txq->nb_tx_desc));
316 }
317 
318 static void
319 hns3_enable_all_queues(struct hns3_hw *hw, bool en)
320 {
321 	uint16_t nb_rx_q = hw->data->nb_rx_queues;
322 	uint16_t nb_tx_q = hw->data->nb_tx_queues;
323 	struct hns3_rx_queue *rxq;
324 	struct hns3_tx_queue *txq;
325 	uint32_t rcb_reg;
326 	int i;
327 
328 	for (i = 0; i < hw->cfg_max_queues; i++) {
329 		if (i < nb_rx_q)
330 			rxq = hw->data->rx_queues[i];
331 		else
332 			rxq = hw->fkq_data.rx_queues[i - nb_rx_q];
333 		if (i < nb_tx_q)
334 			txq = hw->data->tx_queues[i];
335 		else
336 			txq = hw->fkq_data.tx_queues[i - nb_tx_q];
337 		if (rxq == NULL || txq == NULL ||
338 		    (en && (rxq->rx_deferred_start || txq->tx_deferred_start)))
339 			continue;
340 
341 		rcb_reg = hns3_read_dev(rxq, HNS3_RING_EN_REG);
342 		if (en)
343 			rcb_reg |= BIT(HNS3_RING_EN_B);
344 		else
345 			rcb_reg &= ~BIT(HNS3_RING_EN_B);
346 		hns3_write_dev(rxq, HNS3_RING_EN_REG, rcb_reg);
347 	}
348 }
349 
350 static int
351 hns3_tqp_enable(struct hns3_hw *hw, uint16_t queue_id, bool enable)
352 {
353 	struct hns3_cfg_com_tqp_queue_cmd *req;
354 	struct hns3_cmd_desc desc;
355 	int ret;
356 
357 	req = (struct hns3_cfg_com_tqp_queue_cmd *)desc.data;
358 
359 	hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_CFG_COM_TQP_QUEUE, false);
360 	req->tqp_id = rte_cpu_to_le_16(queue_id & HNS3_RING_ID_MASK);
361 	req->stream_id = 0;
362 	hns3_set_bit(req->enable, HNS3_TQP_ENABLE_B, enable ? 1 : 0);
363 
364 	ret = hns3_cmd_send(hw, &desc, 1);
365 	if (ret)
366 		hns3_err(hw, "TQP enable fail, ret = %d", ret);
367 
368 	return ret;
369 }
370 
371 static int
372 hns3_send_reset_tqp_cmd(struct hns3_hw *hw, uint16_t queue_id, bool enable)
373 {
374 	struct hns3_reset_tqp_queue_cmd *req;
375 	struct hns3_cmd_desc desc;
376 	int ret;
377 
378 	hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_RESET_TQP_QUEUE, false);
379 
380 	req = (struct hns3_reset_tqp_queue_cmd *)desc.data;
381 	req->tqp_id = rte_cpu_to_le_16(queue_id & HNS3_RING_ID_MASK);
382 	hns3_set_bit(req->reset_req, HNS3_TQP_RESET_B, enable ? 1 : 0);
383 
384 	ret = hns3_cmd_send(hw, &desc, 1);
385 	if (ret)
386 		hns3_err(hw, "Send tqp reset cmd error, ret = %d", ret);
387 
388 	return ret;
389 }
390 
391 static int
392 hns3_get_reset_status(struct hns3_hw *hw, uint16_t queue_id)
393 {
394 	struct hns3_reset_tqp_queue_cmd *req;
395 	struct hns3_cmd_desc desc;
396 	int ret;
397 
398 	hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_RESET_TQP_QUEUE, true);
399 
400 	req = (struct hns3_reset_tqp_queue_cmd *)desc.data;
401 	req->tqp_id = rte_cpu_to_le_16(queue_id & HNS3_RING_ID_MASK);
402 
403 	ret = hns3_cmd_send(hw, &desc, 1);
404 	if (ret) {
405 		hns3_err(hw, "Get reset status error, ret =%d", ret);
406 		return ret;
407 	}
408 
409 	return hns3_get_bit(req->ready_to_reset, HNS3_TQP_RESET_B);
410 }
411 
412 static int
413 hns3_reset_tqp(struct hns3_hw *hw, uint16_t queue_id)
414 {
415 #define HNS3_TQP_RESET_TRY_MS	200
416 	uint64_t end;
417 	int reset_status;
418 	int ret;
419 
420 	ret = hns3_tqp_enable(hw, queue_id, false);
421 	if (ret)
422 		return ret;
423 
424 	/*
425 	 * In current version VF is not supported when PF is driven by DPDK
426 	 * driver, all task queue pairs are mapped to PF function, so PF's queue
427 	 * id is equals to the global queue id in PF range.
428 	 */
429 	ret = hns3_send_reset_tqp_cmd(hw, queue_id, true);
430 	if (ret) {
431 		hns3_err(hw, "Send reset tqp cmd fail, ret = %d", ret);
432 		return ret;
433 	}
434 	ret = -ETIMEDOUT;
435 	end = get_timeofday_ms() + HNS3_TQP_RESET_TRY_MS;
436 	do {
437 		/* Wait for tqp hw reset */
438 		rte_delay_ms(HNS3_POLL_RESPONE_MS);
439 		reset_status = hns3_get_reset_status(hw, queue_id);
440 		if (reset_status) {
441 			ret = 0;
442 			break;
443 		}
444 	} while (get_timeofday_ms() < end);
445 
446 	if (ret) {
447 		hns3_err(hw, "Reset TQP fail, ret = %d", ret);
448 		return ret;
449 	}
450 
451 	ret = hns3_send_reset_tqp_cmd(hw, queue_id, false);
452 	if (ret)
453 		hns3_err(hw, "Deassert the soft reset fail, ret = %d", ret);
454 
455 	return ret;
456 }
457 
458 static int
459 hns3vf_reset_tqp(struct hns3_hw *hw, uint16_t queue_id)
460 {
461 	uint8_t msg_data[2];
462 	int ret;
463 
464 	/* Disable VF's queue before send queue reset msg to PF */
465 	ret = hns3_tqp_enable(hw, queue_id, false);
466 	if (ret)
467 		return ret;
468 
469 	memcpy(msg_data, &queue_id, sizeof(uint16_t));
470 
471 	return hns3_send_mbx_msg(hw, HNS3_MBX_QUEUE_RESET, 0, msg_data,
472 				 sizeof(msg_data), true, NULL, 0);
473 }
474 
475 static int
476 hns3_reset_queue(struct hns3_adapter *hns, uint16_t queue_id)
477 {
478 	struct hns3_hw *hw = &hns->hw;
479 	if (hns->is_vf)
480 		return hns3vf_reset_tqp(hw, queue_id);
481 	else
482 		return hns3_reset_tqp(hw, queue_id);
483 }
484 
485 int
486 hns3_reset_all_queues(struct hns3_adapter *hns)
487 {
488 	struct hns3_hw *hw = &hns->hw;
489 	int ret, i;
490 
491 	for (i = 0; i < hw->cfg_max_queues; i++) {
492 		ret = hns3_reset_queue(hns, i);
493 		if (ret) {
494 			hns3_err(hw, "Failed to reset No.%d queue: %d", i, ret);
495 			return ret;
496 		}
497 	}
498 	return 0;
499 }
500 
501 void
502 hns3_tqp_intr_enable(struct hns3_hw *hw, uint16_t tpq_int_num, bool en)
503 {
504 	uint32_t addr, value;
505 
506 	addr = HNS3_TQP_INTR_CTRL_REG + tpq_int_num * HNS3_VECTOR_REG_OFFSET;
507 	value = en ? 1 : 0;
508 
509 	hns3_write_dev(hw, addr, value);
510 }
511 
512 int
513 hns3_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
514 {
515 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
516 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
517 	struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
518 
519 	if (dev->data->dev_conf.intr_conf.rxq == 0)
520 		return -ENOTSUP;
521 
522 	/* enable the vectors */
523 	hns3_tqp_intr_enable(hw, queue_id, true);
524 
525 	return rte_intr_ack(intr_handle);
526 }
527 
528 int
529 hns3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
530 {
531 	struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
532 
533 	if (dev->data->dev_conf.intr_conf.rxq == 0)
534 		return -ENOTSUP;
535 
536 	/* disable the vectors */
537 	hns3_tqp_intr_enable(hw, queue_id, false);
538 
539 	return 0;
540 }
541 
542 static int
543 hns3_dev_rx_queue_start(struct hns3_adapter *hns, uint16_t idx)
544 {
545 	struct hns3_hw *hw = &hns->hw;
546 	struct hns3_rx_queue *rxq;
547 	int ret;
548 
549 	PMD_INIT_FUNC_TRACE();
550 
551 	rxq = (struct hns3_rx_queue *)hw->data->rx_queues[idx];
552 	ret = hns3_alloc_rx_queue_mbufs(hw, rxq);
553 	if (ret) {
554 		hns3_err(hw, "Failed to alloc mbuf for No.%d rx queue: %d",
555 			 idx, ret);
556 		return ret;
557 	}
558 
559 	rxq->next_to_use = 0;
560 	rxq->next_to_clean = 0;
561 	rxq->nb_rx_hold = 0;
562 	hns3_init_rx_queue_hw(rxq);
563 
564 	return 0;
565 }
566 
567 static void
568 hns3_fake_rx_queue_start(struct hns3_adapter *hns, uint16_t idx)
569 {
570 	struct hns3_hw *hw = &hns->hw;
571 	struct hns3_rx_queue *rxq;
572 
573 	rxq = (struct hns3_rx_queue *)hw->fkq_data.rx_queues[idx];
574 	rxq->next_to_use = 0;
575 	rxq->next_to_clean = 0;
576 	rxq->nb_rx_hold = 0;
577 	hns3_init_rx_queue_hw(rxq);
578 }
579 
580 static void
581 hns3_init_tx_queue(struct hns3_tx_queue *queue)
582 {
583 	struct hns3_tx_queue *txq = queue;
584 	struct hns3_desc *desc;
585 	int i;
586 
587 	/* Clear tx bd */
588 	desc = txq->tx_ring;
589 	for (i = 0; i < txq->nb_tx_desc; i++) {
590 		desc->tx.tp_fe_sc_vld_ra_ri = 0;
591 		desc++;
592 	}
593 
594 	txq->next_to_use = 0;
595 	txq->next_to_clean = 0;
596 	txq->tx_bd_ready = txq->nb_tx_desc - 1;
597 	hns3_init_tx_queue_hw(txq);
598 }
599 
600 static void
601 hns3_dev_tx_queue_start(struct hns3_adapter *hns, uint16_t idx)
602 {
603 	struct hns3_hw *hw = &hns->hw;
604 	struct hns3_tx_queue *txq;
605 
606 	txq = (struct hns3_tx_queue *)hw->data->tx_queues[idx];
607 	hns3_init_tx_queue(txq);
608 }
609 
610 static void
611 hns3_fake_tx_queue_start(struct hns3_adapter *hns, uint16_t idx)
612 {
613 	struct hns3_hw *hw = &hns->hw;
614 	struct hns3_tx_queue *txq;
615 
616 	txq = (struct hns3_tx_queue *)hw->fkq_data.tx_queues[idx];
617 	hns3_init_tx_queue(txq);
618 }
619 
620 static void
621 hns3_init_tx_ring_tc(struct hns3_adapter *hns)
622 {
623 	struct hns3_hw *hw = &hns->hw;
624 	struct hns3_tx_queue *txq;
625 	int i, num;
626 
627 	for (i = 0; i < HNS3_MAX_TC_NUM; i++) {
628 		struct hns3_tc_queue_info *tc_queue = &hw->tc_queue[i];
629 		int j;
630 
631 		if (!tc_queue->enable)
632 			continue;
633 
634 		for (j = 0; j < tc_queue->tqp_count; j++) {
635 			num = tc_queue->tqp_offset + j;
636 			txq = (struct hns3_tx_queue *)hw->data->tx_queues[num];
637 			if (txq == NULL)
638 				continue;
639 
640 			hns3_write_dev(txq, HNS3_RING_TX_TC_REG, tc_queue->tc);
641 		}
642 	}
643 }
644 
645 static int
646 hns3_start_rx_queues(struct hns3_adapter *hns)
647 {
648 	struct hns3_hw *hw = &hns->hw;
649 	struct hns3_rx_queue *rxq;
650 	int i, j;
651 	int ret;
652 
653 	/* Initialize RSS for queues */
654 	ret = hns3_config_rss(hns);
655 	if (ret) {
656 		hns3_err(hw, "Failed to configure rss %d", ret);
657 		return ret;
658 	}
659 
660 	for (i = 0; i < hw->data->nb_rx_queues; i++) {
661 		rxq = (struct hns3_rx_queue *)hw->data->rx_queues[i];
662 		if (rxq == NULL || rxq->rx_deferred_start)
663 			continue;
664 		ret = hns3_dev_rx_queue_start(hns, i);
665 		if (ret) {
666 			hns3_err(hw, "Failed to start No.%d rx queue: %d", i,
667 				 ret);
668 			goto out;
669 		}
670 	}
671 
672 	for (i = 0; i < hw->fkq_data.nb_fake_rx_queues; i++) {
673 		rxq = (struct hns3_rx_queue *)hw->fkq_data.rx_queues[i];
674 		if (rxq == NULL || rxq->rx_deferred_start)
675 			continue;
676 		hns3_fake_rx_queue_start(hns, i);
677 	}
678 	return 0;
679 
680 out:
681 	for (j = 0; j < i; j++) {
682 		rxq = (struct hns3_rx_queue *)hw->data->rx_queues[j];
683 		hns3_rx_queue_release_mbufs(rxq);
684 	}
685 
686 	return ret;
687 }
688 
689 static void
690 hns3_start_tx_queues(struct hns3_adapter *hns)
691 {
692 	struct hns3_hw *hw = &hns->hw;
693 	struct hns3_tx_queue *txq;
694 	int i;
695 
696 	for (i = 0; i < hw->data->nb_tx_queues; i++) {
697 		txq = (struct hns3_tx_queue *)hw->data->tx_queues[i];
698 		if (txq == NULL || txq->tx_deferred_start)
699 			continue;
700 		hns3_dev_tx_queue_start(hns, i);
701 	}
702 
703 	for (i = 0; i < hw->fkq_data.nb_fake_tx_queues; i++) {
704 		txq = (struct hns3_tx_queue *)hw->fkq_data.tx_queues[i];
705 		if (txq == NULL || txq->tx_deferred_start)
706 			continue;
707 		hns3_fake_tx_queue_start(hns, i);
708 	}
709 
710 	hns3_init_tx_ring_tc(hns);
711 }
712 
713 int
714 hns3_start_queues(struct hns3_adapter *hns, bool reset_queue)
715 {
716 	struct hns3_hw *hw = &hns->hw;
717 	int ret;
718 
719 	if (reset_queue) {
720 		ret = hns3_reset_all_queues(hns);
721 		if (ret) {
722 			hns3_err(hw, "Failed to reset all queues %d", ret);
723 			return ret;
724 		}
725 	}
726 
727 	ret = hns3_start_rx_queues(hns);
728 	if (ret) {
729 		hns3_err(hw, "Failed to start rx queues: %d", ret);
730 		return ret;
731 	}
732 
733 	hns3_start_tx_queues(hns);
734 	hns3_enable_all_queues(hw, true);
735 
736 	return 0;
737 }
738 
739 int
740 hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue)
741 {
742 	struct hns3_hw *hw = &hns->hw;
743 	int ret;
744 
745 	hns3_enable_all_queues(hw, false);
746 	if (reset_queue) {
747 		ret = hns3_reset_all_queues(hns);
748 		if (ret) {
749 			hns3_err(hw, "Failed to reset all queues %d", ret);
750 			return ret;
751 		}
752 	}
753 	return 0;
754 }
755 
756 static void*
757 hns3_alloc_rxq_and_dma_zone(struct rte_eth_dev *dev,
758 			    struct hns3_queue_info *q_info)
759 {
760 	struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
761 	const struct rte_memzone *rx_mz;
762 	struct hns3_rx_queue *rxq;
763 	unsigned int rx_desc;
764 
765 	rxq = rte_zmalloc_socket(q_info->type, sizeof(struct hns3_rx_queue),
766 				 RTE_CACHE_LINE_SIZE, q_info->socket_id);
767 	if (rxq == NULL) {
768 		hns3_err(hw, "Failed to allocate memory for No.%d rx ring!",
769 			 q_info->idx);
770 		return NULL;
771 	}
772 
773 	/* Allocate rx ring hardware descriptors. */
774 	rxq->queue_id = q_info->idx;
775 	rxq->nb_rx_desc = q_info->nb_desc;
776 	rx_desc = rxq->nb_rx_desc * sizeof(struct hns3_desc);
777 	rx_mz = rte_eth_dma_zone_reserve(dev, q_info->ring_name, q_info->idx,
778 					 rx_desc, HNS3_RING_BASE_ALIGN,
779 					 q_info->socket_id);
780 	if (rx_mz == NULL) {
781 		hns3_err(hw, "Failed to reserve DMA memory for No.%d rx ring!",
782 			 q_info->idx);
783 		hns3_rx_queue_release(rxq);
784 		return NULL;
785 	}
786 	rxq->mz = rx_mz;
787 	rxq->rx_ring = (struct hns3_desc *)rx_mz->addr;
788 	rxq->rx_ring_phys_addr = rx_mz->iova;
789 
790 	hns3_dbg(hw, "No.%d rx descriptors iova 0x%" PRIx64, q_info->idx,
791 		 rxq->rx_ring_phys_addr);
792 
793 	return rxq;
794 }
795 
796 static int
797 hns3_fake_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
798 			 uint16_t nb_desc, unsigned int socket_id)
799 {
800 	struct hns3_adapter *hns = dev->data->dev_private;
801 	struct hns3_hw *hw = &hns->hw;
802 	struct hns3_queue_info q_info;
803 	struct hns3_rx_queue *rxq;
804 	uint16_t nb_rx_q;
805 
806 	if (hw->fkq_data.rx_queues[idx]) {
807 		hns3_rx_queue_release(hw->fkq_data.rx_queues[idx]);
808 		hw->fkq_data.rx_queues[idx] = NULL;
809 	}
810 
811 	q_info.idx = idx;
812 	q_info.socket_id = socket_id;
813 	q_info.nb_desc = nb_desc;
814 	q_info.type = "hns3 fake RX queue";
815 	q_info.ring_name = "rx_fake_ring";
816 	rxq = hns3_alloc_rxq_and_dma_zone(dev, &q_info);
817 	if (rxq == NULL) {
818 		hns3_err(hw, "Failed to setup No.%d fake rx ring.", idx);
819 		return -ENOMEM;
820 	}
821 
822 	/* Don't need alloc sw_ring, because upper applications don't use it */
823 	rxq->sw_ring = NULL;
824 
825 	rxq->hns = hns;
826 	rxq->rx_deferred_start = false;
827 	rxq->port_id = dev->data->port_id;
828 	rxq->configured = true;
829 	nb_rx_q = dev->data->nb_rx_queues;
830 	rxq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET +
831 				(nb_rx_q + idx) * HNS3_TQP_REG_SIZE);
832 	rxq->rx_buf_len = hw->rx_buf_len;
833 
834 	rte_spinlock_lock(&hw->lock);
835 	hw->fkq_data.rx_queues[idx] = rxq;
836 	rte_spinlock_unlock(&hw->lock);
837 
838 	return 0;
839 }
840 
841 static void*
842 hns3_alloc_txq_and_dma_zone(struct rte_eth_dev *dev,
843 			    struct hns3_queue_info *q_info)
844 {
845 	struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
846 	const struct rte_memzone *tx_mz;
847 	struct hns3_tx_queue *txq;
848 	struct hns3_desc *desc;
849 	unsigned int tx_desc;
850 	int i;
851 
852 	txq = rte_zmalloc_socket(q_info->type, sizeof(struct hns3_tx_queue),
853 				 RTE_CACHE_LINE_SIZE, q_info->socket_id);
854 	if (txq == NULL) {
855 		hns3_err(hw, "Failed to allocate memory for No.%d tx ring!",
856 			 q_info->idx);
857 		return NULL;
858 	}
859 
860 	/* Allocate tx ring hardware descriptors. */
861 	txq->queue_id = q_info->idx;
862 	txq->nb_tx_desc = q_info->nb_desc;
863 	tx_desc = txq->nb_tx_desc * sizeof(struct hns3_desc);
864 	tx_mz = rte_eth_dma_zone_reserve(dev, q_info->ring_name, q_info->idx,
865 					 tx_desc, HNS3_RING_BASE_ALIGN,
866 					 q_info->socket_id);
867 	if (tx_mz == NULL) {
868 		hns3_err(hw, "Failed to reserve DMA memory for No.%d tx ring!",
869 			 q_info->idx);
870 		hns3_tx_queue_release(txq);
871 		return NULL;
872 	}
873 	txq->mz = tx_mz;
874 	txq->tx_ring = (struct hns3_desc *)tx_mz->addr;
875 	txq->tx_ring_phys_addr = tx_mz->iova;
876 
877 	hns3_dbg(hw, "No.%d tx descriptors iova 0x%" PRIx64, q_info->idx,
878 		 txq->tx_ring_phys_addr);
879 
880 	/* Clear tx bd */
881 	desc = txq->tx_ring;
882 	for (i = 0; i < txq->nb_tx_desc; i++) {
883 		desc->tx.tp_fe_sc_vld_ra_ri = 0;
884 		desc++;
885 	}
886 
887 	return txq;
888 }
889 
890 static int
891 hns3_fake_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
892 			 uint16_t nb_desc, unsigned int socket_id)
893 {
894 	struct hns3_adapter *hns = dev->data->dev_private;
895 	struct hns3_hw *hw = &hns->hw;
896 	struct hns3_queue_info q_info;
897 	struct hns3_tx_queue *txq;
898 	uint16_t nb_tx_q;
899 
900 	if (hw->fkq_data.tx_queues[idx] != NULL) {
901 		hns3_tx_queue_release(hw->fkq_data.tx_queues[idx]);
902 		hw->fkq_data.tx_queues[idx] = NULL;
903 	}
904 
905 	q_info.idx = idx;
906 	q_info.socket_id = socket_id;
907 	q_info.nb_desc = nb_desc;
908 	q_info.type = "hns3 fake TX queue";
909 	q_info.ring_name = "tx_fake_ring";
910 	txq = hns3_alloc_txq_and_dma_zone(dev, &q_info);
911 	if (txq == NULL) {
912 		hns3_err(hw, "Failed to setup No.%d fake tx ring.", idx);
913 		return -ENOMEM;
914 	}
915 
916 	/* Don't need alloc sw_ring, because upper applications don't use it */
917 	txq->sw_ring = NULL;
918 
919 	txq->hns = hns;
920 	txq->tx_deferred_start = false;
921 	txq->port_id = dev->data->port_id;
922 	txq->configured = true;
923 	nb_tx_q = dev->data->nb_tx_queues;
924 	txq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET +
925 				(nb_tx_q + idx) * HNS3_TQP_REG_SIZE);
926 
927 	rte_spinlock_lock(&hw->lock);
928 	hw->fkq_data.tx_queues[idx] = txq;
929 	rte_spinlock_unlock(&hw->lock);
930 
931 	return 0;
932 }
933 
934 static int
935 hns3_fake_rx_queue_config(struct hns3_hw *hw, uint16_t nb_queues)
936 {
937 	uint16_t old_nb_queues = hw->fkq_data.nb_fake_rx_queues;
938 	void **rxq;
939 	uint8_t i;
940 
941 	if (hw->fkq_data.rx_queues == NULL && nb_queues != 0) {
942 		/* first time configuration */
943 
944 		uint32_t size;
945 		size = sizeof(hw->fkq_data.rx_queues[0]) * nb_queues;
946 		hw->fkq_data.rx_queues = rte_zmalloc("fake_rx_queues", size,
947 						     RTE_CACHE_LINE_SIZE);
948 		if (hw->fkq_data.rx_queues == NULL) {
949 			hw->fkq_data.nb_fake_rx_queues = 0;
950 			return -ENOMEM;
951 		}
952 	} else if (hw->fkq_data.rx_queues != NULL && nb_queues != 0) {
953 		/* re-configure */
954 
955 		rxq = hw->fkq_data.rx_queues;
956 		for (i = nb_queues; i < old_nb_queues; i++)
957 			hns3_dev_rx_queue_release(rxq[i]);
958 
959 		rxq = rte_realloc(rxq, sizeof(rxq[0]) * nb_queues,
960 				  RTE_CACHE_LINE_SIZE);
961 		if (rxq == NULL)
962 			return -ENOMEM;
963 		if (nb_queues > old_nb_queues) {
964 			uint16_t new_qs = nb_queues - old_nb_queues;
965 			memset(rxq + old_nb_queues, 0, sizeof(rxq[0]) * new_qs);
966 		}
967 
968 		hw->fkq_data.rx_queues = rxq;
969 	} else if (hw->fkq_data.rx_queues != NULL && nb_queues == 0) {
970 		rxq = hw->fkq_data.rx_queues;
971 		for (i = nb_queues; i < old_nb_queues; i++)
972 			hns3_dev_rx_queue_release(rxq[i]);
973 
974 		rte_free(hw->fkq_data.rx_queues);
975 		hw->fkq_data.rx_queues = NULL;
976 	}
977 
978 	hw->fkq_data.nb_fake_rx_queues = nb_queues;
979 
980 	return 0;
981 }
982 
983 static int
984 hns3_fake_tx_queue_config(struct hns3_hw *hw, uint16_t nb_queues)
985 {
986 	uint16_t old_nb_queues = hw->fkq_data.nb_fake_tx_queues;
987 	void **txq;
988 	uint8_t i;
989 
990 	if (hw->fkq_data.tx_queues == NULL && nb_queues != 0) {
991 		/* first time configuration */
992 
993 		uint32_t size;
994 		size = sizeof(hw->fkq_data.tx_queues[0]) * nb_queues;
995 		hw->fkq_data.tx_queues = rte_zmalloc("fake_tx_queues", size,
996 						     RTE_CACHE_LINE_SIZE);
997 		if (hw->fkq_data.tx_queues == NULL) {
998 			hw->fkq_data.nb_fake_tx_queues = 0;
999 			return -ENOMEM;
1000 		}
1001 	} else if (hw->fkq_data.tx_queues != NULL && nb_queues != 0) {
1002 		/* re-configure */
1003 
1004 		txq = hw->fkq_data.tx_queues;
1005 		for (i = nb_queues; i < old_nb_queues; i++)
1006 			hns3_dev_tx_queue_release(txq[i]);
1007 		txq = rte_realloc(txq, sizeof(txq[0]) * nb_queues,
1008 				  RTE_CACHE_LINE_SIZE);
1009 		if (txq == NULL)
1010 			return -ENOMEM;
1011 		if (nb_queues > old_nb_queues) {
1012 			uint16_t new_qs = nb_queues - old_nb_queues;
1013 			memset(txq + old_nb_queues, 0, sizeof(txq[0]) * new_qs);
1014 		}
1015 
1016 		hw->fkq_data.tx_queues = txq;
1017 	} else if (hw->fkq_data.tx_queues != NULL && nb_queues == 0) {
1018 		txq = hw->fkq_data.tx_queues;
1019 		for (i = nb_queues; i < old_nb_queues; i++)
1020 			hns3_dev_tx_queue_release(txq[i]);
1021 
1022 		rte_free(hw->fkq_data.tx_queues);
1023 		hw->fkq_data.tx_queues = NULL;
1024 	}
1025 	hw->fkq_data.nb_fake_tx_queues = nb_queues;
1026 
1027 	return 0;
1028 }
1029 
1030 int
1031 hns3_set_fake_rx_or_tx_queues(struct rte_eth_dev *dev, uint16_t nb_rx_q,
1032 			      uint16_t nb_tx_q)
1033 {
1034 	struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1035 	uint16_t rx_need_add_nb_q;
1036 	uint16_t tx_need_add_nb_q;
1037 	uint16_t port_id;
1038 	uint16_t q;
1039 	int ret;
1040 
1041 	/* Setup new number of fake RX/TX queues and reconfigure device. */
1042 	hw->cfg_max_queues = RTE_MAX(nb_rx_q, nb_tx_q);
1043 	rx_need_add_nb_q = hw->cfg_max_queues - nb_rx_q;
1044 	tx_need_add_nb_q = hw->cfg_max_queues - nb_tx_q;
1045 	ret = hns3_fake_rx_queue_config(hw, rx_need_add_nb_q);
1046 	if (ret) {
1047 		hns3_err(hw, "Fail to configure fake rx queues: %d", ret);
1048 		goto cfg_fake_rx_q_fail;
1049 	}
1050 
1051 	ret = hns3_fake_tx_queue_config(hw, tx_need_add_nb_q);
1052 	if (ret) {
1053 		hns3_err(hw, "Fail to configure fake rx queues: %d", ret);
1054 		goto cfg_fake_tx_q_fail;
1055 	}
1056 
1057 	/* Allocate and set up fake RX queue per Ethernet port. */
1058 	port_id = hw->data->port_id;
1059 	for (q = 0; q < rx_need_add_nb_q; q++) {
1060 		ret = hns3_fake_rx_queue_setup(dev, q, HNS3_MIN_RING_DESC,
1061 					       rte_eth_dev_socket_id(port_id));
1062 		if (ret)
1063 			goto setup_fake_rx_q_fail;
1064 	}
1065 
1066 	/* Allocate and set up fake TX queue per Ethernet port. */
1067 	for (q = 0; q < tx_need_add_nb_q; q++) {
1068 		ret = hns3_fake_tx_queue_setup(dev, q, HNS3_MIN_RING_DESC,
1069 					       rte_eth_dev_socket_id(port_id));
1070 		if (ret)
1071 			goto setup_fake_tx_q_fail;
1072 	}
1073 
1074 	return 0;
1075 
1076 setup_fake_tx_q_fail:
1077 setup_fake_rx_q_fail:
1078 	(void)hns3_fake_tx_queue_config(hw, 0);
1079 cfg_fake_tx_q_fail:
1080 	(void)hns3_fake_rx_queue_config(hw, 0);
1081 cfg_fake_rx_q_fail:
1082 	hw->cfg_max_queues = 0;
1083 
1084 	return ret;
1085 }
1086 
1087 void
1088 hns3_dev_release_mbufs(struct hns3_adapter *hns)
1089 {
1090 	struct rte_eth_dev_data *dev_data = hns->hw.data;
1091 	struct hns3_rx_queue *rxq;
1092 	struct hns3_tx_queue *txq;
1093 	int i;
1094 
1095 	if (dev_data->rx_queues)
1096 		for (i = 0; i < dev_data->nb_rx_queues; i++) {
1097 			rxq = dev_data->rx_queues[i];
1098 			if (rxq == NULL || rxq->rx_deferred_start)
1099 				continue;
1100 			hns3_rx_queue_release_mbufs(rxq);
1101 		}
1102 
1103 	if (dev_data->tx_queues)
1104 		for (i = 0; i < dev_data->nb_tx_queues; i++) {
1105 			txq = dev_data->tx_queues[i];
1106 			if (txq == NULL || txq->tx_deferred_start)
1107 				continue;
1108 			hns3_tx_queue_release_mbufs(txq);
1109 		}
1110 }
1111 
1112 int
1113 hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
1114 		    unsigned int socket_id, const struct rte_eth_rxconf *conf,
1115 		    struct rte_mempool *mp)
1116 {
1117 	struct hns3_adapter *hns = dev->data->dev_private;
1118 	struct hns3_hw *hw = &hns->hw;
1119 	struct hns3_queue_info q_info;
1120 	struct hns3_rx_queue *rxq;
1121 	int rx_entry_len;
1122 
1123 	if (dev->data->dev_started) {
1124 		hns3_err(hw, "rx_queue_setup after dev_start no supported");
1125 		return -EINVAL;
1126 	}
1127 
1128 	if (nb_desc > HNS3_MAX_RING_DESC || nb_desc < HNS3_MIN_RING_DESC ||
1129 	    nb_desc % HNS3_ALIGN_RING_DESC) {
1130 		hns3_err(hw, "Number (%u) of rx descriptors is invalid",
1131 			 nb_desc);
1132 		return -EINVAL;
1133 	}
1134 
1135 	if (dev->data->rx_queues[idx]) {
1136 		hns3_rx_queue_release(dev->data->rx_queues[idx]);
1137 		dev->data->rx_queues[idx] = NULL;
1138 	}
1139 
1140 	q_info.idx = idx;
1141 	q_info.socket_id = socket_id;
1142 	q_info.nb_desc = nb_desc;
1143 	q_info.type = "hns3 RX queue";
1144 	q_info.ring_name = "rx_ring";
1145 	rxq = hns3_alloc_rxq_and_dma_zone(dev, &q_info);
1146 	if (rxq == NULL) {
1147 		hns3_err(hw,
1148 			 "Failed to alloc mem and reserve DMA mem for rx ring!");
1149 		return -ENOMEM;
1150 	}
1151 
1152 	rxq->hns = hns;
1153 	rxq->mb_pool = mp;
1154 	if (conf->rx_free_thresh <= 0)
1155 		rxq->rx_free_thresh = DEFAULT_RX_FREE_THRESH;
1156 	else
1157 		rxq->rx_free_thresh = conf->rx_free_thresh;
1158 	rxq->rx_deferred_start = conf->rx_deferred_start;
1159 
1160 	rx_entry_len = sizeof(struct hns3_entry) * rxq->nb_rx_desc;
1161 	rxq->sw_ring = rte_zmalloc_socket("hns3 RX sw ring", rx_entry_len,
1162 					  RTE_CACHE_LINE_SIZE, socket_id);
1163 	if (rxq->sw_ring == NULL) {
1164 		hns3_err(hw, "Failed to allocate memory for rx sw ring!");
1165 		hns3_rx_queue_release(rxq);
1166 		return -ENOMEM;
1167 	}
1168 
1169 	rxq->next_to_use = 0;
1170 	rxq->next_to_clean = 0;
1171 	rxq->nb_rx_hold = 0;
1172 	rxq->pkt_first_seg = NULL;
1173 	rxq->pkt_last_seg = NULL;
1174 	rxq->port_id = dev->data->port_id;
1175 	rxq->configured = true;
1176 	rxq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET +
1177 				idx * HNS3_TQP_REG_SIZE);
1178 	rxq->rx_buf_len = hw->rx_buf_len;
1179 	rxq->l2_errors = 0;
1180 	rxq->pkt_len_errors = 0;
1181 	rxq->l3_csum_erros = 0;
1182 	rxq->l4_csum_erros = 0;
1183 	rxq->ol3_csum_erros = 0;
1184 	rxq->ol4_csum_erros = 0;
1185 
1186 	rte_spinlock_lock(&hw->lock);
1187 	dev->data->rx_queues[idx] = rxq;
1188 	rte_spinlock_unlock(&hw->lock);
1189 
1190 	return 0;
1191 }
1192 
1193 static inline uint32_t
1194 rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint32_t ol_info)
1195 {
1196 #define HNS3_L2TBL_NUM	4
1197 #define HNS3_L3TBL_NUM	16
1198 #define HNS3_L4TBL_NUM	16
1199 #define HNS3_OL3TBL_NUM	16
1200 #define HNS3_OL4TBL_NUM	16
1201 	uint32_t pkt_type = 0;
1202 	uint32_t l2id, l3id, l4id;
1203 	uint32_t ol3id, ol4id;
1204 
1205 	static const uint32_t l2table[HNS3_L2TBL_NUM] = {
1206 		RTE_PTYPE_L2_ETHER,
1207 		RTE_PTYPE_L2_ETHER_VLAN,
1208 		RTE_PTYPE_L2_ETHER_QINQ,
1209 		0
1210 	};
1211 
1212 	static const uint32_t l3table[HNS3_L3TBL_NUM] = {
1213 		RTE_PTYPE_L3_IPV4,
1214 		RTE_PTYPE_L3_IPV6,
1215 		RTE_PTYPE_L2_ETHER_ARP,
1216 		RTE_PTYPE_L2_ETHER,
1217 		RTE_PTYPE_L3_IPV4_EXT,
1218 		RTE_PTYPE_L3_IPV6_EXT,
1219 		RTE_PTYPE_L2_ETHER_LLDP,
1220 		0, 0, 0, 0, 0, 0, 0, 0, 0
1221 	};
1222 
1223 	static const uint32_t l4table[HNS3_L4TBL_NUM] = {
1224 		RTE_PTYPE_L4_UDP,
1225 		RTE_PTYPE_L4_TCP,
1226 		RTE_PTYPE_TUNNEL_GRE,
1227 		RTE_PTYPE_L4_SCTP,
1228 		RTE_PTYPE_L4_IGMP,
1229 		RTE_PTYPE_L4_ICMP,
1230 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1231 	};
1232 
1233 	static const uint32_t inner_l2table[HNS3_L2TBL_NUM] = {
1234 		RTE_PTYPE_INNER_L2_ETHER,
1235 		RTE_PTYPE_INNER_L2_ETHER_VLAN,
1236 		RTE_PTYPE_INNER_L2_ETHER_QINQ,
1237 		0
1238 	};
1239 
1240 	static const uint32_t inner_l3table[HNS3_L3TBL_NUM] = {
1241 		RTE_PTYPE_INNER_L3_IPV4,
1242 		RTE_PTYPE_INNER_L3_IPV6,
1243 		0,
1244 		RTE_PTYPE_INNER_L2_ETHER,
1245 		RTE_PTYPE_INNER_L3_IPV4_EXT,
1246 		RTE_PTYPE_INNER_L3_IPV6_EXT,
1247 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1248 	};
1249 
1250 	static const uint32_t inner_l4table[HNS3_L4TBL_NUM] = {
1251 		RTE_PTYPE_INNER_L4_UDP,
1252 		RTE_PTYPE_INNER_L4_TCP,
1253 		RTE_PTYPE_TUNNEL_GRE,
1254 		RTE_PTYPE_INNER_L4_SCTP,
1255 		RTE_PTYPE_L4_IGMP,
1256 		RTE_PTYPE_INNER_L4_ICMP,
1257 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1258 	};
1259 
1260 	static const uint32_t ol3table[HNS3_OL3TBL_NUM] = {
1261 		RTE_PTYPE_L3_IPV4,
1262 		RTE_PTYPE_L3_IPV6,
1263 		0, 0,
1264 		RTE_PTYPE_L3_IPV4_EXT,
1265 		RTE_PTYPE_L3_IPV6_EXT,
1266 		0, 0, 0, 0, 0, 0, 0, 0, 0,
1267 		RTE_PTYPE_UNKNOWN
1268 	};
1269 
1270 	static const uint32_t ol4table[HNS3_OL4TBL_NUM] = {
1271 		0,
1272 		RTE_PTYPE_TUNNEL_VXLAN,
1273 		RTE_PTYPE_TUNNEL_NVGRE,
1274 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1275 	};
1276 
1277 	l2id = hns3_get_field(pkt_info, HNS3_RXD_STRP_TAGP_M,
1278 			      HNS3_RXD_STRP_TAGP_S);
1279 	l3id = hns3_get_field(pkt_info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S);
1280 	l4id = hns3_get_field(pkt_info, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S);
1281 	ol3id = hns3_get_field(ol_info, HNS3_RXD_OL3ID_M, HNS3_RXD_OL3ID_S);
1282 	ol4id = hns3_get_field(ol_info, HNS3_RXD_OL4ID_M, HNS3_RXD_OL4ID_S);
1283 
1284 	if (ol4table[ol4id])
1285 		pkt_type |= (inner_l2table[l2id] | inner_l3table[l3id] |
1286 			     inner_l4table[l4id] | ol3table[ol3id] |
1287 			     ol4table[ol4id]);
1288 	else
1289 		pkt_type |= (l2table[l2id] | l3table[l3id] | l4table[l4id]);
1290 	return pkt_type;
1291 }
1292 
1293 const uint32_t *
1294 hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev)
1295 {
1296 	static const uint32_t ptypes[] = {
1297 		RTE_PTYPE_L2_ETHER,
1298 		RTE_PTYPE_L2_ETHER_VLAN,
1299 		RTE_PTYPE_L2_ETHER_QINQ,
1300 		RTE_PTYPE_L2_ETHER_LLDP,
1301 		RTE_PTYPE_L2_ETHER_ARP,
1302 		RTE_PTYPE_L3_IPV4,
1303 		RTE_PTYPE_L3_IPV4_EXT,
1304 		RTE_PTYPE_L3_IPV6,
1305 		RTE_PTYPE_L3_IPV6_EXT,
1306 		RTE_PTYPE_L4_IGMP,
1307 		RTE_PTYPE_L4_ICMP,
1308 		RTE_PTYPE_L4_SCTP,
1309 		RTE_PTYPE_L4_TCP,
1310 		RTE_PTYPE_L4_UDP,
1311 		RTE_PTYPE_TUNNEL_GRE,
1312 		RTE_PTYPE_UNKNOWN
1313 	};
1314 
1315 	if (dev->rx_pkt_burst == hns3_recv_pkts)
1316 		return ptypes;
1317 
1318 	return NULL;
1319 }
1320 
1321 static void
1322 hns3_clean_rx_buffers(struct hns3_rx_queue *rxq, int count)
1323 {
1324 	rxq->next_to_use += count;
1325 	if (rxq->next_to_use >= rxq->nb_rx_desc)
1326 		rxq->next_to_use -= rxq->nb_rx_desc;
1327 
1328 	hns3_write_dev(rxq, HNS3_RING_RX_HEAD_REG, count);
1329 }
1330 
1331 static int
1332 hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm,
1333 		   uint32_t bd_base_info, uint32_t l234_info,
1334 		   uint32_t *cksum_err)
1335 {
1336 	uint32_t tmp = 0;
1337 
1338 	if (unlikely(l234_info & BIT(HNS3_RXD_L2E_B))) {
1339 		rxq->l2_errors++;
1340 		return -EINVAL;
1341 	}
1342 
1343 	if (unlikely(rxm->pkt_len == 0 ||
1344 		(l234_info & BIT(HNS3_RXD_TRUNCAT_B)))) {
1345 		rxq->pkt_len_errors++;
1346 		return -EINVAL;
1347 	}
1348 
1349 	if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) {
1350 		if (unlikely(l234_info & BIT(HNS3_RXD_L3E_B))) {
1351 			rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;
1352 			rxq->l3_csum_erros++;
1353 			tmp |= HNS3_L3_CKSUM_ERR;
1354 		}
1355 
1356 		if (unlikely(l234_info & BIT(HNS3_RXD_L4E_B))) {
1357 			rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;
1358 			rxq->l4_csum_erros++;
1359 			tmp |= HNS3_L4_CKSUM_ERR;
1360 		}
1361 
1362 		if (unlikely(l234_info & BIT(HNS3_RXD_OL3E_B))) {
1363 			rxq->ol3_csum_erros++;
1364 			tmp |= HNS3_OUTER_L3_CKSUM_ERR;
1365 		}
1366 
1367 		if (unlikely(l234_info & BIT(HNS3_RXD_OL4E_B))) {
1368 			rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
1369 			rxq->ol4_csum_erros++;
1370 			tmp |= HNS3_OUTER_L4_CKSUM_ERR;
1371 		}
1372 	}
1373 	*cksum_err = tmp;
1374 
1375 	return 0;
1376 }
1377 
1378 static void
1379 hns3_rx_set_cksum_flag(struct rte_mbuf *rxm, uint64_t packet_type,
1380 		       const uint32_t cksum_err)
1381 {
1382 	if (unlikely((packet_type & RTE_PTYPE_TUNNEL_MASK))) {
1383 		if (likely(packet_type & RTE_PTYPE_INNER_L3_MASK) &&
1384 		    (cksum_err & HNS3_L3_CKSUM_ERR) == 0)
1385 			rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1386 		if (likely(packet_type & RTE_PTYPE_INNER_L4_MASK) &&
1387 		    (cksum_err & HNS3_L4_CKSUM_ERR) == 0)
1388 			rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1389 		if (likely(packet_type & RTE_PTYPE_L4_MASK) &&
1390 		    (cksum_err & HNS3_OUTER_L4_CKSUM_ERR) == 0)
1391 			rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
1392 	} else {
1393 		if (likely(packet_type & RTE_PTYPE_L3_MASK) &&
1394 		    (cksum_err & HNS3_L3_CKSUM_ERR) == 0)
1395 			rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1396 		if (likely(packet_type & RTE_PTYPE_L4_MASK) &&
1397 		    (cksum_err & HNS3_L4_CKSUM_ERR) == 0)
1398 			rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1399 	}
1400 }
1401 
1402 uint16_t
1403 hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1404 {
1405 	volatile struct hns3_desc *rx_ring;  /* RX ring (desc) */
1406 	volatile struct hns3_desc *rxdp;     /* pointer of the current desc */
1407 	struct hns3_rx_queue *rxq;      /* RX queue */
1408 	struct hns3_entry *sw_ring;
1409 	struct hns3_entry *rxe;
1410 	struct rte_mbuf *first_seg;
1411 	struct rte_mbuf *last_seg;
1412 	struct hns3_desc rxd;
1413 	struct rte_mbuf *nmb;           /* pointer of the new mbuf */
1414 	struct rte_mbuf *rxm;
1415 	struct rte_eth_dev *dev;
1416 	uint32_t bd_base_info;
1417 	uint32_t cksum_err;
1418 	uint32_t l234_info;
1419 	uint32_t ol_info;
1420 	uint64_t dma_addr;
1421 	uint16_t data_len;
1422 	uint16_t nb_rx_bd;
1423 	uint16_t pkt_len;
1424 	uint16_t nb_rx;
1425 	uint16_t rx_id;
1426 	int ret;
1427 
1428 	nb_rx = 0;
1429 	nb_rx_bd = 0;
1430 	rxq = rx_queue;
1431 	dev = &rte_eth_devices[rxq->port_id];
1432 
1433 	rx_id = rxq->next_to_clean;
1434 	rx_ring = rxq->rx_ring;
1435 	first_seg = rxq->pkt_first_seg;
1436 	last_seg = rxq->pkt_last_seg;
1437 	sw_ring = rxq->sw_ring;
1438 
1439 	while (nb_rx < nb_pkts) {
1440 		rxdp = &rx_ring[rx_id];
1441 		bd_base_info = rte_le_to_cpu_32(rxdp->rx.bd_base_info);
1442 		if (unlikely(!hns3_get_bit(bd_base_info, HNS3_RXD_VLD_B)))
1443 			break;
1444 		/*
1445 		 * The interactive process between software and hardware of
1446 		 * receiving a new packet in hns3 network engine:
1447 		 * 1. Hardware network engine firstly writes the packet content
1448 		 *    to the memory pointed by the 'addr' field of the Rx Buffer
1449 		 *    Descriptor, secondly fills the result of parsing the
1450 		 *    packet include the valid field into the Rx Buffer
1451 		 *    Descriptor in one write operation.
1452 		 * 2. Driver reads the Rx BD's valid field in the loop to check
1453 		 *    whether it's valid, if valid then assign a new address to
1454 		 *    the addr field, clear the valid field, get the other
1455 		 *    information of the packet by parsing Rx BD's other fields,
1456 		 *    finally write back the number of Rx BDs processed by the
1457 		 *    driver to the HNS3_RING_RX_HEAD_REG register to inform
1458 		 *    hardware.
1459 		 * In the above process, the ordering is very important. We must
1460 		 * make sure that CPU read Rx BD's other fields only after the
1461 		 * Rx BD is valid.
1462 		 *
1463 		 * There are two type of re-ordering: compiler re-ordering and
1464 		 * CPU re-ordering under the ARMv8 architecture.
1465 		 * 1. we use volatile to deal with compiler re-ordering, so you
1466 		 *    can see that rx_ring/rxdp defined with volatile.
1467 		 * 2. we commonly use memory barrier to deal with CPU
1468 		 *    re-ordering, but the cost is high.
1469 		 *
1470 		 * In order to solve the high cost of using memory barrier, we
1471 		 * use the data dependency order under the ARMv8 architecture,
1472 		 * for example:
1473 		 *      instr01: load A
1474 		 *      instr02: load B <- A
1475 		 * the instr02 will always execute after instr01.
1476 		 *
1477 		 * To construct the data dependency ordering, we use the
1478 		 * following assignment:
1479 		 *      rxd = rxdp[(bd_base_info & (1u << HNS3_RXD_VLD_B)) -
1480 		 *                 (1u<<HNS3_RXD_VLD_B)]
1481 		 * Using gcc compiler under the ARMv8 architecture, the related
1482 		 * assembly code example as follows:
1483 		 * note: (1u << HNS3_RXD_VLD_B) equal 0x10
1484 		 *      instr01: ldr w26, [x22, #28]  --read bd_base_info
1485 		 *      instr02: and w0, w26, #0x10   --calc bd_base_info & 0x10
1486 		 *      instr03: sub w0, w0, #0x10    --calc (bd_base_info &
1487 		 *                                            0x10) - 0x10
1488 		 *      instr04: add x0, x22, x0, lsl #5 --calc copy source addr
1489 		 *      instr05: ldp x2, x3, [x0]
1490 		 *      instr06: stp x2, x3, [x29, #256] --copy BD's [0 ~ 15]B
1491 		 *      instr07: ldp x4, x5, [x0, #16]
1492 		 *      instr08: stp x4, x5, [x29, #272] --copy BD's [16 ~ 31]B
1493 		 * the instr05~08 depend on x0's value, x0 depent on w26's
1494 		 * value, the w26 is the bd_base_info, this form the data
1495 		 * dependency ordering.
1496 		 * note: if BD is valid, (bd_base_info & (1u<<HNS3_RXD_VLD_B)) -
1497 		 *       (1u<<HNS3_RXD_VLD_B) will always zero, so the
1498 		 *       assignment is correct.
1499 		 *
1500 		 * So we use the data dependency ordering instead of memory
1501 		 * barrier to improve receive performance.
1502 		 */
1503 		rxd = rxdp[(bd_base_info & (1u << HNS3_RXD_VLD_B)) -
1504 			   (1u << HNS3_RXD_VLD_B)];
1505 
1506 		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1507 		if (unlikely(nmb == NULL)) {
1508 			dev->data->rx_mbuf_alloc_failed++;
1509 			break;
1510 		}
1511 
1512 		nb_rx_bd++;
1513 		rxe = &sw_ring[rx_id];
1514 		rx_id++;
1515 		if (unlikely(rx_id == rxq->nb_rx_desc))
1516 			rx_id = 0;
1517 
1518 		rte_prefetch0(sw_ring[rx_id].mbuf);
1519 		if ((rx_id & 0x3) == 0) {
1520 			rte_prefetch0(&rx_ring[rx_id]);
1521 			rte_prefetch0(&sw_ring[rx_id]);
1522 		}
1523 
1524 		rxm = rxe->mbuf;
1525 		rxe->mbuf = nmb;
1526 
1527 		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1528 		rxdp->rx.bd_base_info = 0;
1529 		rxdp->addr = dma_addr;
1530 
1531 		/* Load remained descriptor data and extract necessary fields */
1532 		data_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.size));
1533 		l234_info = rte_le_to_cpu_32(rxd.rx.l234_info);
1534 		ol_info = rte_le_to_cpu_32(rxd.rx.ol_info);
1535 
1536 		if (first_seg == NULL) {
1537 			first_seg = rxm;
1538 			first_seg->nb_segs = 1;
1539 		} else {
1540 			first_seg->nb_segs++;
1541 			last_seg->next = rxm;
1542 		}
1543 
1544 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1545 		rxm->data_len = data_len;
1546 
1547 		if (!hns3_get_bit(bd_base_info, HNS3_RXD_FE_B)) {
1548 			last_seg = rxm;
1549 			continue;
1550 		}
1551 
1552 		/* The last buffer of the received packet */
1553 		pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.pkt_len));
1554 		first_seg->pkt_len = pkt_len;
1555 		first_seg->port = rxq->port_id;
1556 		first_seg->hash.rss = rte_le_to_cpu_32(rxd.rx.rss_hash);
1557 		first_seg->ol_flags |= PKT_RX_RSS_HASH;
1558 		if (unlikely(hns3_get_bit(bd_base_info, HNS3_RXD_LUM_B))) {
1559 			first_seg->hash.fdir.hi =
1560 				rte_le_to_cpu_32(rxd.rx.fd_id);
1561 			first_seg->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
1562 		}
1563 		rxm->next = NULL;
1564 
1565 		ret = hns3_handle_bdinfo(rxq, first_seg, bd_base_info,
1566 					 l234_info, &cksum_err);
1567 		if (unlikely(ret))
1568 			goto pkt_err;
1569 
1570 		first_seg->packet_type = rxd_pkt_info_to_pkt_type(l234_info,
1571 								  ol_info);
1572 
1573 		if (bd_base_info & BIT(HNS3_RXD_L3L4P_B))
1574 			hns3_rx_set_cksum_flag(rxm, first_seg->packet_type,
1575 					       cksum_err);
1576 
1577 		first_seg->vlan_tci = rte_le_to_cpu_16(rxd.rx.vlan_tag);
1578 		first_seg->vlan_tci_outer =
1579 			rte_le_to_cpu_16(rxd.rx.ot_vlan_tag);
1580 		rx_pkts[nb_rx++] = first_seg;
1581 		first_seg = NULL;
1582 		continue;
1583 pkt_err:
1584 		rte_pktmbuf_free(first_seg);
1585 		first_seg = NULL;
1586 	}
1587 
1588 	rxq->next_to_clean = rx_id;
1589 	rxq->pkt_first_seg = first_seg;
1590 	rxq->pkt_last_seg = last_seg;
1591 
1592 	nb_rx_bd = nb_rx_bd + rxq->nb_rx_hold;
1593 	if (nb_rx_bd > rxq->rx_free_thresh) {
1594 		hns3_clean_rx_buffers(rxq, nb_rx_bd);
1595 		nb_rx_bd = 0;
1596 	}
1597 	rxq->nb_rx_hold = nb_rx_bd;
1598 
1599 	return nb_rx;
1600 }
1601 
1602 int
1603 hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
1604 		    unsigned int socket_id, const struct rte_eth_txconf *conf)
1605 {
1606 	struct hns3_adapter *hns = dev->data->dev_private;
1607 	struct hns3_hw *hw = &hns->hw;
1608 	struct hns3_queue_info q_info;
1609 	struct hns3_tx_queue *txq;
1610 	int tx_entry_len;
1611 
1612 	if (dev->data->dev_started) {
1613 		hns3_err(hw, "tx_queue_setup after dev_start no supported");
1614 		return -EINVAL;
1615 	}
1616 
1617 	if (nb_desc > HNS3_MAX_RING_DESC || nb_desc < HNS3_MIN_RING_DESC ||
1618 	    nb_desc % HNS3_ALIGN_RING_DESC) {
1619 		hns3_err(hw, "Number (%u) of tx descriptors is invalid",
1620 			    nb_desc);
1621 		return -EINVAL;
1622 	}
1623 
1624 	if (dev->data->tx_queues[idx] != NULL) {
1625 		hns3_tx_queue_release(dev->data->tx_queues[idx]);
1626 		dev->data->tx_queues[idx] = NULL;
1627 	}
1628 
1629 	q_info.idx = idx;
1630 	q_info.socket_id = socket_id;
1631 	q_info.nb_desc = nb_desc;
1632 	q_info.type = "hns3 TX queue";
1633 	q_info.ring_name = "tx_ring";
1634 	txq = hns3_alloc_txq_and_dma_zone(dev, &q_info);
1635 	if (txq == NULL) {
1636 		hns3_err(hw,
1637 			 "Failed to alloc mem and reserve DMA mem for tx ring!");
1638 		return -ENOMEM;
1639 	}
1640 
1641 	txq->tx_deferred_start = conf->tx_deferred_start;
1642 	tx_entry_len = sizeof(struct hns3_entry) * txq->nb_tx_desc;
1643 	txq->sw_ring = rte_zmalloc_socket("hns3 TX sw ring", tx_entry_len,
1644 					  RTE_CACHE_LINE_SIZE, socket_id);
1645 	if (txq->sw_ring == NULL) {
1646 		hns3_err(hw, "Failed to allocate memory for tx sw ring!");
1647 		hns3_tx_queue_release(txq);
1648 		return -ENOMEM;
1649 	}
1650 
1651 	txq->hns = hns;
1652 	txq->next_to_use = 0;
1653 	txq->next_to_clean = 0;
1654 	txq->tx_bd_ready = txq->nb_tx_desc - 1;
1655 	txq->port_id = dev->data->port_id;
1656 	txq->configured = true;
1657 	txq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET +
1658 				idx * HNS3_TQP_REG_SIZE);
1659 	rte_spinlock_lock(&hw->lock);
1660 	dev->data->tx_queues[idx] = txq;
1661 	rte_spinlock_unlock(&hw->lock);
1662 
1663 	return 0;
1664 }
1665 
1666 static inline void
1667 hns3_queue_xmit(struct hns3_tx_queue *txq, uint32_t buf_num)
1668 {
1669 	hns3_write_dev(txq, HNS3_RING_TX_TAIL_REG, buf_num);
1670 }
1671 
1672 static void
1673 hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq)
1674 {
1675 	uint16_t tx_next_clean = txq->next_to_clean;
1676 	uint16_t tx_next_use   = txq->next_to_use;
1677 	uint16_t tx_bd_ready   = txq->tx_bd_ready;
1678 	uint16_t tx_bd_max     = txq->nb_tx_desc;
1679 	struct hns3_entry *tx_bak_pkt = &txq->sw_ring[tx_next_clean];
1680 	struct hns3_desc *desc = &txq->tx_ring[tx_next_clean];
1681 	struct rte_mbuf *mbuf;
1682 
1683 	while ((!hns3_get_bit(desc->tx.tp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B)) &&
1684 		tx_next_use != tx_next_clean) {
1685 		mbuf = tx_bak_pkt->mbuf;
1686 		if (mbuf) {
1687 			rte_pktmbuf_free_seg(mbuf);
1688 			tx_bak_pkt->mbuf = NULL;
1689 		}
1690 
1691 		desc++;
1692 		tx_bak_pkt++;
1693 		tx_next_clean++;
1694 		tx_bd_ready++;
1695 
1696 		if (tx_next_clean >= tx_bd_max) {
1697 			tx_next_clean = 0;
1698 			desc = txq->tx_ring;
1699 			tx_bak_pkt = txq->sw_ring;
1700 		}
1701 	}
1702 
1703 	txq->next_to_clean = tx_next_clean;
1704 	txq->tx_bd_ready   = tx_bd_ready;
1705 }
1706 
1707 static void
1708 fill_desc(struct hns3_tx_queue *txq, uint16_t tx_desc_id, struct rte_mbuf *rxm,
1709 	  bool first, int offset)
1710 {
1711 	struct hns3_desc *tx_ring = txq->tx_ring;
1712 	struct hns3_desc *desc = &tx_ring[tx_desc_id];
1713 	uint8_t frag_end = rxm->next == NULL ? 1 : 0;
1714 	uint16_t size = rxm->data_len;
1715 	uint16_t rrcfv = 0;
1716 	uint64_t ol_flags = rxm->ol_flags;
1717 	uint32_t hdr_len;
1718 	uint32_t paylen;
1719 	uint32_t tmp;
1720 
1721 	desc->addr = rte_mbuf_data_iova(rxm) + offset;
1722 	desc->tx.send_size = rte_cpu_to_le_16(size);
1723 	hns3_set_bit(rrcfv, HNS3_TXD_VLD_B, 1);
1724 
1725 	if (first) {
1726 		hdr_len = rxm->l2_len + rxm->l3_len + rxm->l4_len;
1727 		hdr_len += (ol_flags & PKT_TX_TUNNEL_MASK) ?
1728 			   rxm->outer_l2_len + rxm->outer_l3_len : 0;
1729 		paylen = rxm->pkt_len - hdr_len;
1730 		desc->tx.paylen = rte_cpu_to_le_32(paylen);
1731 	}
1732 
1733 	hns3_set_bit(rrcfv, HNS3_TXD_FE_B, frag_end);
1734 	desc->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(rrcfv);
1735 
1736 	if (frag_end) {
1737 		if (ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_QINQ_PKT)) {
1738 			tmp = rte_le_to_cpu_32(desc->tx.type_cs_vlan_tso_len);
1739 			hns3_set_bit(tmp, HNS3_TXD_VLAN_B, 1);
1740 			desc->tx.type_cs_vlan_tso_len = rte_cpu_to_le_32(tmp);
1741 			desc->tx.vlan_tag = rte_cpu_to_le_16(rxm->vlan_tci);
1742 		}
1743 
1744 		if (ol_flags & PKT_TX_QINQ_PKT) {
1745 			tmp = rte_le_to_cpu_32(desc->tx.ol_type_vlan_len_msec);
1746 			hns3_set_bit(tmp, HNS3_TXD_OVLAN_B, 1);
1747 			desc->tx.ol_type_vlan_len_msec = rte_cpu_to_le_32(tmp);
1748 			desc->tx.outer_vlan_tag =
1749 				rte_cpu_to_le_16(rxm->vlan_tci_outer);
1750 		}
1751 	}
1752 }
1753 
1754 static int
1755 hns3_tx_alloc_mbufs(struct hns3_tx_queue *txq, struct rte_mempool *mb_pool,
1756 		    uint16_t nb_new_buf, struct rte_mbuf **alloc_mbuf)
1757 {
1758 	struct rte_mbuf *new_mbuf = NULL;
1759 	struct rte_eth_dev *dev;
1760 	struct rte_mbuf *temp;
1761 	struct hns3_hw *hw;
1762 	uint16_t i;
1763 
1764 	/* Allocate enough mbufs */
1765 	for (i = 0; i < nb_new_buf; i++) {
1766 		temp = rte_pktmbuf_alloc(mb_pool);
1767 		if (unlikely(temp == NULL)) {
1768 			dev = &rte_eth_devices[txq->port_id];
1769 			hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1770 			hns3_err(hw, "Failed to alloc TX mbuf port_id=%d,"
1771 				     "queue_id=%d in reassemble tx pkts.",
1772 				     txq->port_id, txq->queue_id);
1773 			rte_pktmbuf_free(new_mbuf);
1774 			return -ENOMEM;
1775 		}
1776 		temp->next = new_mbuf;
1777 		new_mbuf = temp;
1778 	}
1779 
1780 	if (new_mbuf == NULL)
1781 		return -ENOMEM;
1782 
1783 	new_mbuf->nb_segs = nb_new_buf;
1784 	*alloc_mbuf = new_mbuf;
1785 
1786 	return 0;
1787 }
1788 
1789 static int
1790 hns3_reassemble_tx_pkts(void *tx_queue, struct rte_mbuf *tx_pkt,
1791 			struct rte_mbuf **new_pkt)
1792 {
1793 	struct hns3_tx_queue *txq = tx_queue;
1794 	struct rte_mempool *mb_pool;
1795 	struct rte_mbuf *new_mbuf;
1796 	struct rte_mbuf *temp_new;
1797 	struct rte_mbuf *temp;
1798 	uint16_t last_buf_len;
1799 	uint16_t nb_new_buf;
1800 	uint16_t buf_size;
1801 	uint16_t buf_len;
1802 	uint16_t len_s;
1803 	uint16_t len_d;
1804 	uint16_t len;
1805 	uint16_t i;
1806 	int ret;
1807 	char *s;
1808 	char *d;
1809 
1810 	mb_pool = tx_pkt->pool;
1811 	buf_size = tx_pkt->buf_len - RTE_PKTMBUF_HEADROOM;
1812 	nb_new_buf = (tx_pkt->pkt_len - 1) / buf_size + 1;
1813 
1814 	last_buf_len = tx_pkt->pkt_len % buf_size;
1815 	if (last_buf_len == 0)
1816 		last_buf_len = buf_size;
1817 
1818 	/* Allocate enough mbufs */
1819 	ret = hns3_tx_alloc_mbufs(txq, mb_pool, nb_new_buf, &new_mbuf);
1820 	if (ret)
1821 		return ret;
1822 
1823 	/* Copy the original packet content to the new mbufs */
1824 	temp = tx_pkt;
1825 	s = rte_pktmbuf_mtod(temp, char *);
1826 	len_s = temp->data_len;
1827 	temp_new = new_mbuf;
1828 	for (i = 0; i < nb_new_buf; i++) {
1829 		d = rte_pktmbuf_mtod(temp_new, char *);
1830 		if (i < nb_new_buf - 1)
1831 			buf_len = buf_size;
1832 		else
1833 			buf_len = last_buf_len;
1834 		len_d = buf_len;
1835 
1836 		while (len_d) {
1837 			len = RTE_MIN(len_s, len_d);
1838 			memcpy(d, s, len);
1839 			s = s + len;
1840 			d = d + len;
1841 			len_d = len_d - len;
1842 			len_s = len_s - len;
1843 
1844 			if (len_s == 0) {
1845 				temp = temp->next;
1846 				if (temp == NULL)
1847 					break;
1848 				s = rte_pktmbuf_mtod(temp, char *);
1849 				len_s = temp->data_len;
1850 			}
1851 		}
1852 
1853 		temp_new->data_len = buf_len;
1854 		temp_new = temp_new->next;
1855 	}
1856 
1857 	/* free original mbufs */
1858 	rte_pktmbuf_free(tx_pkt);
1859 
1860 	*new_pkt = new_mbuf;
1861 
1862 	return 0;
1863 }
1864 
1865 static void
1866 hns3_parse_outer_params(uint64_t ol_flags, uint32_t *ol_type_vlan_len_msec)
1867 {
1868 	uint32_t tmp = *ol_type_vlan_len_msec;
1869 
1870 	/* (outer) IP header type */
1871 	if (ol_flags & PKT_TX_OUTER_IPV4) {
1872 		/* OL3 header size, defined in 4 bytes */
1873 		hns3_set_field(tmp, HNS3_TXD_L3LEN_M, HNS3_TXD_L3LEN_S,
1874 			       sizeof(struct rte_ipv4_hdr) >> HNS3_L3_LEN_UNIT);
1875 		if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
1876 			hns3_set_field(tmp, HNS3_TXD_OL3T_M,
1877 				       HNS3_TXD_OL3T_S, HNS3_OL3T_IPV4_CSUM);
1878 		else
1879 			hns3_set_field(tmp, HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
1880 				       HNS3_OL3T_IPV4_NO_CSUM);
1881 	} else if (ol_flags & PKT_TX_OUTER_IPV6) {
1882 		hns3_set_field(tmp, HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
1883 			       HNS3_OL3T_IPV6);
1884 		/* OL3 header size, defined in 4 bytes */
1885 		hns3_set_field(tmp, HNS3_TXD_L3LEN_M, HNS3_TXD_L3LEN_S,
1886 			       sizeof(struct rte_ipv6_hdr) >> HNS3_L3_LEN_UNIT);
1887 	}
1888 
1889 	*ol_type_vlan_len_msec = tmp;
1890 }
1891 
1892 static int
1893 hns3_parse_inner_params(uint64_t ol_flags, uint32_t *ol_type_vlan_len_msec,
1894 			struct rte_net_hdr_lens *hdr_lens)
1895 {
1896 	uint32_t tmp = *ol_type_vlan_len_msec;
1897 	uint8_t l4_len;
1898 
1899 	/* OL2 header size, defined in 2 bytes */
1900 	hns3_set_field(tmp, HNS3_TXD_L2LEN_M, HNS3_TXD_L2LEN_S,
1901 		       sizeof(struct rte_ether_hdr) >> HNS3_L2_LEN_UNIT);
1902 
1903 	/* L4TUNT: L4 Tunneling Type */
1904 	switch (ol_flags & PKT_TX_TUNNEL_MASK) {
1905 	case PKT_TX_TUNNEL_GENEVE:
1906 	case PKT_TX_TUNNEL_VXLAN:
1907 		/* MAC in UDP tunnelling packet, include VxLAN */
1908 		hns3_set_field(tmp, HNS3_TXD_TUNTYPE_M, HNS3_TXD_TUNTYPE_S,
1909 			       HNS3_TUN_MAC_IN_UDP);
1910 		/*
1911 		 * OL4 header size, defined in 4 Bytes, it contains outer
1912 		 * L4(UDP) length and tunneling length.
1913 		 */
1914 		hns3_set_field(tmp, HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S,
1915 			       (uint8_t)RTE_ETHER_VXLAN_HLEN >>
1916 			       HNS3_L4_LEN_UNIT);
1917 		break;
1918 	case PKT_TX_TUNNEL_GRE:
1919 		hns3_set_field(tmp, HNS3_TXD_TUNTYPE_M, HNS3_TXD_TUNTYPE_S,
1920 			       HNS3_TUN_NVGRE);
1921 		/*
1922 		 * OL4 header size, defined in 4 Bytes, it contains outer
1923 		 * L4(GRE) length and tunneling length.
1924 		 */
1925 		l4_len = hdr_lens->l4_len + hdr_lens->tunnel_len;
1926 		hns3_set_field(tmp, HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S,
1927 			       l4_len >> HNS3_L4_LEN_UNIT);
1928 		break;
1929 	default:
1930 		/* For non UDP / GRE tunneling, drop the tunnel packet */
1931 		return -EINVAL;
1932 	}
1933 
1934 	*ol_type_vlan_len_msec = tmp;
1935 
1936 	return 0;
1937 }
1938 
1939 static int
1940 hns3_parse_tunneling_params(struct hns3_tx_queue *txq, uint16_t tx_desc_id,
1941 			    uint64_t ol_flags,
1942 			    struct rte_net_hdr_lens *hdr_lens)
1943 {
1944 	struct hns3_desc *tx_ring = txq->tx_ring;
1945 	struct hns3_desc *desc = &tx_ring[tx_desc_id];
1946 	uint32_t value = 0;
1947 	int ret;
1948 
1949 	hns3_parse_outer_params(ol_flags, &value);
1950 	ret = hns3_parse_inner_params(ol_flags, &value, hdr_lens);
1951 	if (ret)
1952 		return -EINVAL;
1953 
1954 	desc->tx.ol_type_vlan_len_msec |= rte_cpu_to_le_32(value);
1955 
1956 	return 0;
1957 }
1958 
1959 static void
1960 hns3_parse_l3_cksum_params(uint64_t ol_flags, uint32_t *type_cs_vlan_tso_len)
1961 {
1962 	uint32_t tmp;
1963 
1964 	/* Enable L3 checksum offloads */
1965 	if (ol_flags & PKT_TX_IPV4) {
1966 		tmp = *type_cs_vlan_tso_len;
1967 		hns3_set_field(tmp, HNS3_TXD_L3T_M, HNS3_TXD_L3T_S,
1968 			       HNS3_L3T_IPV4);
1969 		/* inner(/normal) L3 header size, defined in 4 bytes */
1970 		hns3_set_field(tmp, HNS3_TXD_L3LEN_M, HNS3_TXD_L3LEN_S,
1971 			       sizeof(struct rte_ipv4_hdr) >> HNS3_L3_LEN_UNIT);
1972 		if (ol_flags & PKT_TX_IP_CKSUM)
1973 			hns3_set_bit(tmp, HNS3_TXD_L3CS_B, 1);
1974 		*type_cs_vlan_tso_len = tmp;
1975 	} else if (ol_flags & PKT_TX_IPV6) {
1976 		tmp = *type_cs_vlan_tso_len;
1977 		/* L3T, IPv6 don't do checksum */
1978 		hns3_set_field(tmp, HNS3_TXD_L3T_M, HNS3_TXD_L3T_S,
1979 			       HNS3_L3T_IPV6);
1980 		/* inner(/normal) L3 header size, defined in 4 bytes */
1981 		hns3_set_field(tmp, HNS3_TXD_L3LEN_M, HNS3_TXD_L3LEN_S,
1982 			       sizeof(struct rte_ipv6_hdr) >> HNS3_L3_LEN_UNIT);
1983 		*type_cs_vlan_tso_len = tmp;
1984 	}
1985 }
1986 
1987 static void
1988 hns3_parse_l4_cksum_params(uint64_t ol_flags, uint32_t *type_cs_vlan_tso_len)
1989 {
1990 	uint32_t tmp;
1991 
1992 	/* Enable L4 checksum offloads */
1993 	switch (ol_flags & PKT_TX_L4_MASK) {
1994 	case PKT_TX_TCP_CKSUM:
1995 		tmp = *type_cs_vlan_tso_len;
1996 		hns3_set_field(tmp, HNS3_TXD_L4T_M, HNS3_TXD_L4T_S,
1997 			       HNS3_L4T_TCP);
1998 		hns3_set_bit(tmp, HNS3_TXD_L4CS_B, 1);
1999 		hns3_set_field(tmp, HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S,
2000 			       sizeof(struct rte_tcp_hdr) >> HNS3_L4_LEN_UNIT);
2001 		*type_cs_vlan_tso_len = tmp;
2002 		break;
2003 	case PKT_TX_UDP_CKSUM:
2004 		tmp = *type_cs_vlan_tso_len;
2005 		hns3_set_field(tmp, HNS3_TXD_L4T_M, HNS3_TXD_L4T_S,
2006 			       HNS3_L4T_UDP);
2007 		hns3_set_bit(tmp, HNS3_TXD_L4CS_B, 1);
2008 		hns3_set_field(tmp, HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S,
2009 			       sizeof(struct rte_udp_hdr) >> HNS3_L4_LEN_UNIT);
2010 		*type_cs_vlan_tso_len = tmp;
2011 		break;
2012 	case PKT_TX_SCTP_CKSUM:
2013 		tmp = *type_cs_vlan_tso_len;
2014 		hns3_set_field(tmp, HNS3_TXD_L4T_M, HNS3_TXD_L4T_S,
2015 			       HNS3_L4T_SCTP);
2016 		hns3_set_bit(tmp, HNS3_TXD_L4CS_B, 1);
2017 		hns3_set_field(tmp, HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S,
2018 			       sizeof(struct rte_sctp_hdr) >> HNS3_L4_LEN_UNIT);
2019 		*type_cs_vlan_tso_len = tmp;
2020 		break;
2021 	default:
2022 		break;
2023 	}
2024 }
2025 
2026 static void
2027 hns3_txd_enable_checksum(struct hns3_tx_queue *txq, uint16_t tx_desc_id,
2028 			 uint64_t ol_flags)
2029 {
2030 	struct hns3_desc *tx_ring = txq->tx_ring;
2031 	struct hns3_desc *desc = &tx_ring[tx_desc_id];
2032 	uint32_t value = 0;
2033 
2034 	/* inner(/normal) L2 header size, defined in 2 bytes */
2035 	hns3_set_field(value, HNS3_TXD_L2LEN_M, HNS3_TXD_L2LEN_S,
2036 		       sizeof(struct rte_ether_hdr) >> HNS3_L2_LEN_UNIT);
2037 
2038 	hns3_parse_l3_cksum_params(ol_flags, &value);
2039 	hns3_parse_l4_cksum_params(ol_flags, &value);
2040 
2041 	desc->tx.type_cs_vlan_tso_len |= rte_cpu_to_le_32(value);
2042 }
2043 
2044 uint16_t
2045 hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
2046 	       uint16_t nb_pkts)
2047 {
2048 	struct rte_mbuf *m;
2049 	uint16_t i;
2050 	int ret;
2051 
2052 	for (i = 0; i < nb_pkts; i++) {
2053 		m = tx_pkts[i];
2054 
2055 		/* check the size of packet */
2056 		if (m->pkt_len < RTE_ETHER_MIN_LEN) {
2057 			rte_errno = EINVAL;
2058 			return i;
2059 		}
2060 
2061 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
2062 		ret = rte_validate_tx_offload(m);
2063 		if (ret != 0) {
2064 			rte_errno = -ret;
2065 			return i;
2066 		}
2067 #endif
2068 		ret = rte_net_intel_cksum_prepare(m);
2069 		if (ret != 0) {
2070 			rte_errno = -ret;
2071 			return i;
2072 		}
2073 	}
2074 
2075 	return i;
2076 }
2077 
2078 static int
2079 hns3_parse_cksum(struct hns3_tx_queue *txq, uint16_t tx_desc_id,
2080 		 const struct rte_mbuf *m, struct rte_net_hdr_lens *hdr_lens)
2081 {
2082 	/* Fill in tunneling parameters if necessary */
2083 	if (m->ol_flags & PKT_TX_TUNNEL_MASK) {
2084 		(void)rte_net_get_ptype(m, hdr_lens, RTE_PTYPE_ALL_MASK);
2085 		if (hns3_parse_tunneling_params(txq, tx_desc_id, m->ol_flags,
2086 						hdr_lens))
2087 			return -EINVAL;
2088 	}
2089 	/* Enable checksum offloading */
2090 	if (m->ol_flags & HNS3_TX_CKSUM_OFFLOAD_MASK)
2091 		hns3_txd_enable_checksum(txq, tx_desc_id, m->ol_flags);
2092 
2093 	return 0;
2094 }
2095 
2096 uint16_t
2097 hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2098 {
2099 	struct rte_net_hdr_lens hdr_lens = {0};
2100 	struct hns3_tx_queue *txq = tx_queue;
2101 	struct hns3_entry *tx_bak_pkt;
2102 	struct rte_mbuf *new_pkt;
2103 	struct rte_mbuf *tx_pkt;
2104 	struct rte_mbuf *m_seg;
2105 	uint32_t nb_hold = 0;
2106 	uint16_t tx_next_use;
2107 	uint16_t tx_pkt_num;
2108 	uint16_t tx_bd_max;
2109 	uint16_t nb_buf;
2110 	uint16_t nb_tx;
2111 	uint16_t i;
2112 
2113 	/* free useless buffer */
2114 	hns3_tx_free_useless_buffer(txq);
2115 
2116 	tx_next_use   = txq->next_to_use;
2117 	tx_bd_max     = txq->nb_tx_desc;
2118 	tx_pkt_num = nb_pkts;
2119 
2120 	/* send packets */
2121 	tx_bak_pkt = &txq->sw_ring[tx_next_use];
2122 	for (nb_tx = 0; nb_tx < tx_pkt_num; nb_tx++) {
2123 		tx_pkt = *tx_pkts++;
2124 
2125 		nb_buf = tx_pkt->nb_segs;
2126 
2127 		if (nb_buf > txq->tx_bd_ready) {
2128 			if (nb_tx == 0)
2129 				return 0;
2130 
2131 			goto end_of_tx;
2132 		}
2133 
2134 		/*
2135 		 * If packet length is greater than HNS3_MAX_FRAME_LEN
2136 		 * driver support, the packet will be ignored.
2137 		 */
2138 		if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) > HNS3_MAX_FRAME_LEN))
2139 			break;
2140 
2141 		/*
2142 		 * If packet length is less than minimum packet size, driver
2143 		 * need to pad it.
2144 		 */
2145 		if (unlikely(rte_pktmbuf_pkt_len(tx_pkt) < HNS3_MIN_PKT_SIZE)) {
2146 			uint16_t add_len;
2147 			char *appended;
2148 
2149 			add_len = HNS3_MIN_PKT_SIZE -
2150 					 rte_pktmbuf_pkt_len(tx_pkt);
2151 			appended = rte_pktmbuf_append(tx_pkt, add_len);
2152 			if (appended == NULL)
2153 				break;
2154 
2155 			memset(appended, 0, add_len);
2156 		}
2157 
2158 		m_seg = tx_pkt;
2159 		if (unlikely(nb_buf > HNS3_MAX_TX_BD_PER_PKT)) {
2160 			if (hns3_reassemble_tx_pkts(txq, tx_pkt, &new_pkt))
2161 				goto end_of_tx;
2162 			m_seg = new_pkt;
2163 			nb_buf = m_seg->nb_segs;
2164 		}
2165 
2166 		if (hns3_parse_cksum(txq, tx_next_use, m_seg, &hdr_lens))
2167 			goto end_of_tx;
2168 
2169 		i = 0;
2170 		do {
2171 			fill_desc(txq, tx_next_use, m_seg, (i == 0), 0);
2172 			tx_bak_pkt->mbuf = m_seg;
2173 			m_seg = m_seg->next;
2174 			tx_next_use++;
2175 			tx_bak_pkt++;
2176 			if (tx_next_use >= tx_bd_max) {
2177 				tx_next_use = 0;
2178 				tx_bak_pkt = txq->sw_ring;
2179 			}
2180 
2181 			i++;
2182 		} while (m_seg != NULL);
2183 
2184 		nb_hold += i;
2185 		txq->next_to_use = tx_next_use;
2186 		txq->tx_bd_ready -= i;
2187 	}
2188 
2189 end_of_tx:
2190 
2191 	if (likely(nb_tx))
2192 		hns3_queue_xmit(txq, nb_hold);
2193 
2194 	return nb_tx;
2195 }
2196 
2197 static uint16_t
2198 hns3_dummy_rxtx_burst(void *dpdk_txq __rte_unused,
2199 		      struct rte_mbuf **pkts __rte_unused,
2200 		      uint16_t pkts_n __rte_unused)
2201 {
2202 	return 0;
2203 }
2204 
2205 void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev)
2206 {
2207 	struct hns3_adapter *hns = eth_dev->data->dev_private;
2208 
2209 	if (hns->hw.adapter_state == HNS3_NIC_STARTED &&
2210 	    rte_atomic16_read(&hns->hw.reset.resetting) == 0) {
2211 		eth_dev->rx_pkt_burst = hns3_recv_pkts;
2212 		eth_dev->tx_pkt_burst = hns3_xmit_pkts;
2213 		eth_dev->tx_pkt_prepare = hns3_prep_pkts;
2214 	} else {
2215 		eth_dev->rx_pkt_burst = hns3_dummy_rxtx_burst;
2216 		eth_dev->tx_pkt_burst = hns3_dummy_rxtx_burst;
2217 		eth_dev->tx_pkt_prepare = hns3_dummy_rxtx_burst;
2218 	}
2219 }
2220