xref: /dpdk/drivers/common/qat/qat_qp.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2015-2018 Intel Corporation
3  */
4 
5 #include <rte_common.h>
6 #include <rte_cycles.h>
7 #include <rte_dev.h>
8 #include <rte_malloc.h>
9 #include <rte_memzone.h>
10 #include <rte_pci.h>
11 #include <rte_bus_pci.h>
12 #include <rte_atomic.h>
13 #include <rte_prefetch.h>
14 
15 #include "qat_logs.h"
16 #include "qat_device.h"
17 #include "qat_qp.h"
18 #include "qat_sym.h"
19 #include "qat_asym.h"
20 #include "qat_comp.h"
21 #include "adf_transport_access_macros.h"
22 
23 #define QAT_CQ_MAX_DEQ_RETRIES 10
24 
25 #define ADF_MAX_DESC				4096
26 #define ADF_MIN_DESC				128
27 
28 #define ADF_ARB_REG_SLOT			0x1000
29 #define ADF_ARB_RINGSRVARBEN_OFFSET		0x19C
30 
31 #define WRITE_CSR_ARB_RINGSRVARBEN(csr_addr, index, value) \
32 	ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \
33 	(ADF_ARB_REG_SLOT * index), value)
34 
35 __extension__
36 const struct qat_qp_hw_data qat_gen1_qps[QAT_MAX_SERVICES]
37 					 [ADF_MAX_QPS_ON_ANY_SERVICE] = {
38 	/* queue pairs which provide an asymmetric crypto service */
39 	[QAT_SERVICE_ASYMMETRIC] = {
40 		{
41 			.service_type = QAT_SERVICE_ASYMMETRIC,
42 			.hw_bundle_num = 0,
43 			.tx_ring_num = 0,
44 			.rx_ring_num = 8,
45 			.tx_msg_size = 64,
46 			.rx_msg_size = 32,
47 
48 		}, {
49 			.service_type = QAT_SERVICE_ASYMMETRIC,
50 			.hw_bundle_num = 0,
51 			.tx_ring_num = 1,
52 			.rx_ring_num = 9,
53 			.tx_msg_size = 64,
54 			.rx_msg_size = 32,
55 		}
56 	},
57 	/* queue pairs which provide a symmetric crypto service */
58 	[QAT_SERVICE_SYMMETRIC] = {
59 		{
60 			.service_type = QAT_SERVICE_SYMMETRIC,
61 			.hw_bundle_num = 0,
62 			.tx_ring_num = 2,
63 			.rx_ring_num = 10,
64 			.tx_msg_size = 128,
65 			.rx_msg_size = 32,
66 		},
67 		{
68 			.service_type = QAT_SERVICE_SYMMETRIC,
69 			.hw_bundle_num = 0,
70 			.tx_ring_num = 3,
71 			.rx_ring_num = 11,
72 			.tx_msg_size = 128,
73 			.rx_msg_size = 32,
74 		}
75 	},
76 	/* queue pairs which provide a compression service */
77 	[QAT_SERVICE_COMPRESSION] = {
78 		{
79 			.service_type = QAT_SERVICE_COMPRESSION,
80 			.hw_bundle_num = 0,
81 			.tx_ring_num = 6,
82 			.rx_ring_num = 14,
83 			.tx_msg_size = 128,
84 			.rx_msg_size = 32,
85 		}, {
86 			.service_type = QAT_SERVICE_COMPRESSION,
87 			.hw_bundle_num = 0,
88 			.tx_ring_num = 7,
89 			.rx_ring_num = 15,
90 			.tx_msg_size = 128,
91 			.rx_msg_size = 32,
92 		}
93 	}
94 };
95 
96 __extension__
97 const struct qat_qp_hw_data qat_gen3_qps[QAT_MAX_SERVICES]
98 					 [ADF_MAX_QPS_ON_ANY_SERVICE] = {
99 	/* queue pairs which provide an asymmetric crypto service */
100 	[QAT_SERVICE_ASYMMETRIC] = {
101 		{
102 			.service_type = QAT_SERVICE_ASYMMETRIC,
103 			.hw_bundle_num = 0,
104 			.tx_ring_num = 0,
105 			.rx_ring_num = 4,
106 			.tx_msg_size = 64,
107 			.rx_msg_size = 32,
108 		}
109 	},
110 	/* queue pairs which provide a symmetric crypto service */
111 	[QAT_SERVICE_SYMMETRIC] = {
112 		{
113 			.service_type = QAT_SERVICE_SYMMETRIC,
114 			.hw_bundle_num = 0,
115 			.tx_ring_num = 1,
116 			.rx_ring_num = 5,
117 			.tx_msg_size = 128,
118 			.rx_msg_size = 32,
119 		}
120 	},
121 	/* queue pairs which provide a compression service */
122 	[QAT_SERVICE_COMPRESSION] = {
123 		{
124 			.service_type = QAT_SERVICE_COMPRESSION,
125 			.hw_bundle_num = 0,
126 			.tx_ring_num = 3,
127 			.rx_ring_num = 7,
128 			.tx_msg_size = 128,
129 			.rx_msg_size = 32,
130 		}
131 	}
132 };
133 
134 static int qat_qp_check_queue_alignment(uint64_t phys_addr,
135 	uint32_t queue_size_bytes);
136 static void qat_queue_delete(struct qat_queue *queue);
137 static int qat_queue_create(struct qat_pci_device *qat_dev,
138 	struct qat_queue *queue, struct qat_qp_config *, uint8_t dir);
139 static int adf_verify_queue_size(uint32_t msg_size, uint32_t msg_num,
140 	uint32_t *queue_size_for_csr);
141 static void adf_configure_queues(struct qat_qp *queue);
142 static void adf_queue_arb_enable(struct qat_queue *txq, void *base_addr,
143 	rte_spinlock_t *lock);
144 static void adf_queue_arb_disable(struct qat_queue *txq, void *base_addr,
145 	rte_spinlock_t *lock);
146 
147 
148 int qat_qps_per_service(const struct qat_qp_hw_data *qp_hw_data,
149 		enum qat_service_type service)
150 {
151 	int i, count;
152 
153 	for (i = 0, count = 0; i < ADF_MAX_QPS_ON_ANY_SERVICE; i++)
154 		if (qp_hw_data[i].service_type == service)
155 			count++;
156 	return count;
157 }
158 
159 static const struct rte_memzone *
160 queue_dma_zone_reserve(const char *queue_name, uint32_t queue_size,
161 			int socket_id)
162 {
163 	const struct rte_memzone *mz;
164 
165 	mz = rte_memzone_lookup(queue_name);
166 	if (mz != 0) {
167 		if (((size_t)queue_size <= mz->len) &&
168 				((socket_id == SOCKET_ID_ANY) ||
169 					(socket_id == mz->socket_id))) {
170 			QAT_LOG(DEBUG, "re-use memzone already "
171 					"allocated for %s", queue_name);
172 			return mz;
173 		}
174 
175 		QAT_LOG(ERR, "Incompatible memzone already "
176 				"allocated %s, size %u, socket %d. "
177 				"Requested size %u, socket %u",
178 				queue_name, (uint32_t)mz->len,
179 				mz->socket_id, queue_size, socket_id);
180 		return NULL;
181 	}
182 
183 	QAT_LOG(DEBUG, "Allocate memzone for %s, size %u on socket %u",
184 					queue_name, queue_size, socket_id);
185 	return rte_memzone_reserve_aligned(queue_name, queue_size,
186 		socket_id, RTE_MEMZONE_IOVA_CONTIG, queue_size);
187 }
188 
189 int qat_qp_setup(struct qat_pci_device *qat_dev,
190 		struct qat_qp **qp_addr,
191 		uint16_t queue_pair_id,
192 		struct qat_qp_config *qat_qp_conf)
193 
194 {
195 	struct qat_qp *qp;
196 	struct rte_pci_device *pci_dev =
197 			qat_pci_devs[qat_dev->qat_dev_id].pci_dev;
198 	char op_cookie_pool_name[RTE_RING_NAMESIZE];
199 	uint32_t i;
200 
201 	QAT_LOG(DEBUG, "Setup qp %u on qat pci device %d gen %d",
202 		queue_pair_id, qat_dev->qat_dev_id, qat_dev->qat_dev_gen);
203 
204 	if ((qat_qp_conf->nb_descriptors > ADF_MAX_DESC) ||
205 		(qat_qp_conf->nb_descriptors < ADF_MIN_DESC)) {
206 		QAT_LOG(ERR, "Can't create qp for %u descriptors",
207 				qat_qp_conf->nb_descriptors);
208 		return -EINVAL;
209 	}
210 
211 	if (pci_dev->mem_resource[0].addr == NULL) {
212 		QAT_LOG(ERR, "Could not find VF config space "
213 				"(UIO driver attached?).");
214 		return -EINVAL;
215 	}
216 
217 	/* Allocate the queue pair data structure. */
218 	qp = rte_zmalloc_socket("qat PMD qp metadata",
219 				sizeof(*qp), RTE_CACHE_LINE_SIZE,
220 				qat_qp_conf->socket_id);
221 	if (qp == NULL) {
222 		QAT_LOG(ERR, "Failed to alloc mem for qp struct");
223 		return -ENOMEM;
224 	}
225 	qp->nb_descriptors = qat_qp_conf->nb_descriptors;
226 	qp->op_cookies = rte_zmalloc_socket("qat PMD op cookie pointer",
227 			qat_qp_conf->nb_descriptors * sizeof(*qp->op_cookies),
228 			RTE_CACHE_LINE_SIZE, qat_qp_conf->socket_id);
229 	if (qp->op_cookies == NULL) {
230 		QAT_LOG(ERR, "Failed to alloc mem for cookie");
231 		rte_free(qp);
232 		return -ENOMEM;
233 	}
234 
235 	qp->mmap_bar_addr = pci_dev->mem_resource[0].addr;
236 	qp->enqueued = qp->dequeued = 0;
237 
238 	if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf,
239 					ADF_RING_DIR_TX) != 0) {
240 		QAT_LOG(ERR, "Tx queue create failed "
241 				"queue_pair_id=%u", queue_pair_id);
242 		goto create_err;
243 	}
244 
245 	qp->max_inflights = ADF_MAX_INFLIGHTS(qp->tx_q.queue_size,
246 				ADF_BYTES_TO_MSG_SIZE(qp->tx_q.msg_size));
247 
248 	if (qp->max_inflights < 2) {
249 		QAT_LOG(ERR, "Invalid num inflights");
250 		qat_queue_delete(&(qp->tx_q));
251 		goto create_err;
252 	}
253 
254 	if (qat_queue_create(qat_dev, &(qp->rx_q), qat_qp_conf,
255 					ADF_RING_DIR_RX) != 0) {
256 		QAT_LOG(ERR, "Rx queue create failed "
257 				"queue_pair_id=%hu", queue_pair_id);
258 		qat_queue_delete(&(qp->tx_q));
259 		goto create_err;
260 	}
261 
262 	adf_configure_queues(qp);
263 	adf_queue_arb_enable(&qp->tx_q, qp->mmap_bar_addr,
264 					&qat_dev->arb_csr_lock);
265 
266 	snprintf(op_cookie_pool_name, RTE_RING_NAMESIZE,
267 					"%s%d_cookies_%s_qp%hu",
268 		pci_dev->driver->driver.name, qat_dev->qat_dev_id,
269 		qat_qp_conf->service_str, queue_pair_id);
270 
271 	QAT_LOG(DEBUG, "cookiepool: %s", op_cookie_pool_name);
272 	qp->op_cookie_pool = rte_mempool_lookup(op_cookie_pool_name);
273 	if (qp->op_cookie_pool == NULL)
274 		qp->op_cookie_pool = rte_mempool_create(op_cookie_pool_name,
275 				qp->nb_descriptors,
276 				qat_qp_conf->cookie_size, 64, 0,
277 				NULL, NULL, NULL, NULL,
278 				pci_dev->device.numa_node,
279 				0);
280 	if (!qp->op_cookie_pool) {
281 		QAT_LOG(ERR, "QAT PMD Cannot create"
282 				" op mempool");
283 		goto create_err;
284 	}
285 
286 	for (i = 0; i < qp->nb_descriptors; i++) {
287 		if (rte_mempool_get(qp->op_cookie_pool, &qp->op_cookies[i])) {
288 			QAT_LOG(ERR, "QAT PMD Cannot get op_cookie");
289 			goto create_err;
290 		}
291 		memset(qp->op_cookies[i], 0, qat_qp_conf->cookie_size);
292 	}
293 
294 	qp->qat_dev_gen = qat_dev->qat_dev_gen;
295 	qp->service_type = qat_qp_conf->hw->service_type;
296 	qp->qat_dev = qat_dev;
297 
298 	QAT_LOG(DEBUG, "QP setup complete: id: %d, cookiepool: %s",
299 			queue_pair_id, op_cookie_pool_name);
300 
301 	*qp_addr = qp;
302 	return 0;
303 
304 create_err:
305 	if (qp->op_cookie_pool)
306 		rte_mempool_free(qp->op_cookie_pool);
307 	rte_free(qp->op_cookies);
308 	rte_free(qp);
309 	return -EFAULT;
310 }
311 
312 int qat_qp_release(struct qat_qp **qp_addr)
313 {
314 	struct qat_qp *qp = *qp_addr;
315 	uint32_t i;
316 
317 	if (qp == NULL) {
318 		QAT_LOG(DEBUG, "qp already freed");
319 		return 0;
320 	}
321 
322 	QAT_LOG(DEBUG, "Free qp on qat_pci device %d",
323 				qp->qat_dev->qat_dev_id);
324 
325 	/* Don't free memory if there are still responses to be processed */
326 	if ((qp->enqueued - qp->dequeued) == 0) {
327 		qat_queue_delete(&(qp->tx_q));
328 		qat_queue_delete(&(qp->rx_q));
329 	} else {
330 		return -EAGAIN;
331 	}
332 
333 	adf_queue_arb_disable(&(qp->tx_q), qp->mmap_bar_addr,
334 					&qp->qat_dev->arb_csr_lock);
335 
336 	for (i = 0; i < qp->nb_descriptors; i++)
337 		rte_mempool_put(qp->op_cookie_pool, qp->op_cookies[i]);
338 
339 	if (qp->op_cookie_pool)
340 		rte_mempool_free(qp->op_cookie_pool);
341 
342 	rte_free(qp->op_cookies);
343 	rte_free(qp);
344 	*qp_addr = NULL;
345 	return 0;
346 }
347 
348 
349 static void qat_queue_delete(struct qat_queue *queue)
350 {
351 	const struct rte_memzone *mz;
352 	int status = 0;
353 
354 	if (queue == NULL) {
355 		QAT_LOG(DEBUG, "Invalid queue");
356 		return;
357 	}
358 	QAT_LOG(DEBUG, "Free ring %d, memzone: %s",
359 			queue->hw_queue_number, queue->memz_name);
360 
361 	mz = rte_memzone_lookup(queue->memz_name);
362 	if (mz != NULL)	{
363 		/* Write an unused pattern to the queue memory. */
364 		memset(queue->base_addr, 0x7F, queue->queue_size);
365 		status = rte_memzone_free(mz);
366 		if (status != 0)
367 			QAT_LOG(ERR, "Error %d on freeing queue %s",
368 					status, queue->memz_name);
369 	} else {
370 		QAT_LOG(DEBUG, "queue %s doesn't exist",
371 				queue->memz_name);
372 	}
373 }
374 
375 static int
376 qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue,
377 		struct qat_qp_config *qp_conf, uint8_t dir)
378 {
379 	uint64_t queue_base;
380 	void *io_addr;
381 	const struct rte_memzone *qp_mz;
382 	struct rte_pci_device *pci_dev =
383 			qat_pci_devs[qat_dev->qat_dev_id].pci_dev;
384 	int ret = 0;
385 	uint16_t desc_size = (dir == ADF_RING_DIR_TX ?
386 			qp_conf->hw->tx_msg_size : qp_conf->hw->rx_msg_size);
387 	uint32_t queue_size_bytes = (qp_conf->nb_descriptors)*(desc_size);
388 
389 	queue->hw_bundle_number = qp_conf->hw->hw_bundle_num;
390 	queue->hw_queue_number = (dir == ADF_RING_DIR_TX ?
391 			qp_conf->hw->tx_ring_num : qp_conf->hw->rx_ring_num);
392 
393 	if (desc_size > ADF_MSG_SIZE_TO_BYTES(ADF_MAX_MSG_SIZE)) {
394 		QAT_LOG(ERR, "Invalid descriptor size %d", desc_size);
395 		return -EINVAL;
396 	}
397 
398 	/*
399 	 * Allocate a memzone for the queue - create a unique name.
400 	 */
401 	snprintf(queue->memz_name, sizeof(queue->memz_name),
402 			"%s_%d_%s_%s_%d_%d",
403 		pci_dev->driver->driver.name, qat_dev->qat_dev_id,
404 		qp_conf->service_str, "qp_mem",
405 		queue->hw_bundle_number, queue->hw_queue_number);
406 	qp_mz = queue_dma_zone_reserve(queue->memz_name, queue_size_bytes,
407 			pci_dev->device.numa_node);
408 	if (qp_mz == NULL) {
409 		QAT_LOG(ERR, "Failed to allocate ring memzone");
410 		return -ENOMEM;
411 	}
412 
413 	queue->base_addr = (char *)qp_mz->addr;
414 	queue->base_phys_addr = qp_mz->iova;
415 	if (qat_qp_check_queue_alignment(queue->base_phys_addr,
416 			queue_size_bytes)) {
417 		QAT_LOG(ERR, "Invalid alignment on queue create "
418 					" 0x%"PRIx64"\n",
419 					queue->base_phys_addr);
420 		ret = -EFAULT;
421 		goto queue_create_err;
422 	}
423 
424 	if (adf_verify_queue_size(desc_size, qp_conf->nb_descriptors,
425 			&(queue->queue_size)) != 0) {
426 		QAT_LOG(ERR, "Invalid num inflights");
427 		ret = -EINVAL;
428 		goto queue_create_err;
429 	}
430 
431 	queue->modulo_mask = (1 << ADF_RING_SIZE_MODULO(queue->queue_size)) - 1;
432 	queue->head = 0;
433 	queue->tail = 0;
434 	queue->msg_size = desc_size;
435 
436 	/* For fast calculation of cookie index, relies on msg_size being 2^n */
437 	queue->trailz = __builtin_ctz(desc_size);
438 
439 	/*
440 	 * Write an unused pattern to the queue memory.
441 	 */
442 	memset(queue->base_addr, 0x7F, queue_size_bytes);
443 
444 	queue_base = BUILD_RING_BASE_ADDR(queue->base_phys_addr,
445 					queue->queue_size);
446 
447 	io_addr = pci_dev->mem_resource[0].addr;
448 
449 	WRITE_CSR_RING_BASE(io_addr, queue->hw_bundle_number,
450 			queue->hw_queue_number, queue_base);
451 
452 	QAT_LOG(DEBUG, "RING: Name:%s, size in CSR: %u, in bytes %u,"
453 		" nb msgs %u, msg_size %u, modulo mask %u",
454 			queue->memz_name,
455 			queue->queue_size, queue_size_bytes,
456 			qp_conf->nb_descriptors, desc_size,
457 			queue->modulo_mask);
458 
459 	return 0;
460 
461 queue_create_err:
462 	rte_memzone_free(qp_mz);
463 	return ret;
464 }
465 
466 static int qat_qp_check_queue_alignment(uint64_t phys_addr,
467 					uint32_t queue_size_bytes)
468 {
469 	if (((queue_size_bytes - 1) & phys_addr) != 0)
470 		return -EINVAL;
471 	return 0;
472 }
473 
474 static int adf_verify_queue_size(uint32_t msg_size, uint32_t msg_num,
475 	uint32_t *p_queue_size_for_csr)
476 {
477 	uint8_t i = ADF_MIN_RING_SIZE;
478 
479 	for (; i <= ADF_MAX_RING_SIZE; i++)
480 		if ((msg_size * msg_num) ==
481 				(uint32_t)ADF_SIZE_TO_RING_SIZE_IN_BYTES(i)) {
482 			*p_queue_size_for_csr = i;
483 			return 0;
484 		}
485 	QAT_LOG(ERR, "Invalid ring size %d", msg_size * msg_num);
486 	return -EINVAL;
487 }
488 
489 static void adf_queue_arb_enable(struct qat_queue *txq, void *base_addr,
490 					rte_spinlock_t *lock)
491 {
492 	uint32_t arb_csr_offset =  ADF_ARB_RINGSRVARBEN_OFFSET +
493 					(ADF_ARB_REG_SLOT *
494 							txq->hw_bundle_number);
495 	uint32_t value;
496 
497 	rte_spinlock_lock(lock);
498 	value = ADF_CSR_RD(base_addr, arb_csr_offset);
499 	value |= (0x01 << txq->hw_queue_number);
500 	ADF_CSR_WR(base_addr, arb_csr_offset, value);
501 	rte_spinlock_unlock(lock);
502 }
503 
504 static void adf_queue_arb_disable(struct qat_queue *txq, void *base_addr,
505 					rte_spinlock_t *lock)
506 {
507 	uint32_t arb_csr_offset =  ADF_ARB_RINGSRVARBEN_OFFSET +
508 					(ADF_ARB_REG_SLOT *
509 							txq->hw_bundle_number);
510 	uint32_t value;
511 
512 	rte_spinlock_lock(lock);
513 	value = ADF_CSR_RD(base_addr, arb_csr_offset);
514 	value &= ~(0x01 << txq->hw_queue_number);
515 	ADF_CSR_WR(base_addr, arb_csr_offset, value);
516 	rte_spinlock_unlock(lock);
517 }
518 
519 static void adf_configure_queues(struct qat_qp *qp)
520 {
521 	uint32_t queue_config;
522 	struct qat_queue *queue = &qp->tx_q;
523 
524 	queue_config = BUILD_RING_CONFIG(queue->queue_size);
525 
526 	WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr, queue->hw_bundle_number,
527 			queue->hw_queue_number, queue_config);
528 
529 	queue = &qp->rx_q;
530 	queue_config =
531 			BUILD_RESP_RING_CONFIG(queue->queue_size,
532 					ADF_RING_NEAR_WATERMARK_512,
533 					ADF_RING_NEAR_WATERMARK_0);
534 
535 	WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr, queue->hw_bundle_number,
536 			queue->hw_queue_number, queue_config);
537 }
538 
539 static inline uint32_t adf_modulo(uint32_t data, uint32_t modulo_mask)
540 {
541 	return data & modulo_mask;
542 }
543 
544 static inline void
545 txq_write_tail(struct qat_qp *qp, struct qat_queue *q) {
546 	WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number,
547 			q->hw_queue_number, q->tail);
548 	q->csr_tail = q->tail;
549 }
550 
551 static inline
552 void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
553 {
554 	uint32_t old_head, new_head;
555 	uint32_t max_head;
556 
557 	old_head = q->csr_head;
558 	new_head = q->head;
559 	max_head = qp->nb_descriptors * q->msg_size;
560 
561 	/* write out free descriptors */
562 	void *cur_desc = (uint8_t *)q->base_addr + old_head;
563 
564 	if (new_head < old_head) {
565 		memset(cur_desc, ADF_RING_EMPTY_SIG_BYTE, max_head - old_head);
566 		memset(q->base_addr, ADF_RING_EMPTY_SIG_BYTE, new_head);
567 	} else {
568 		memset(cur_desc, ADF_RING_EMPTY_SIG_BYTE, new_head - old_head);
569 	}
570 	q->nb_processed_responses = 0;
571 	q->csr_head = new_head;
572 
573 	/* write current head to CSR */
574 	WRITE_CSR_RING_HEAD(qp->mmap_bar_addr, q->hw_bundle_number,
575 			    q->hw_queue_number, new_head);
576 }
577 
578 uint16_t
579 qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
580 {
581 	register struct qat_queue *queue;
582 	struct qat_qp *tmp_qp = (struct qat_qp *)qp;
583 	register uint32_t nb_ops_sent = 0;
584 	register int ret = -1;
585 	uint16_t nb_ops_possible = nb_ops;
586 	register uint8_t *base_addr;
587 	register uint32_t tail;
588 
589 	if (unlikely(nb_ops == 0))
590 		return 0;
591 
592 	/* read params used a lot in main loop into registers */
593 	queue = &(tmp_qp->tx_q);
594 	base_addr = (uint8_t *)queue->base_addr;
595 	tail = queue->tail;
596 
597 	/* Find how many can actually fit on the ring */
598 	{
599 		/* dequeued can only be written by one thread, but it may not
600 		 * be this thread. As it's 4-byte aligned it will be read
601 		 * atomically here by any Intel CPU.
602 		 * enqueued can wrap before dequeued, but cannot
603 		 * lap it as var size of enq/deq (uint32_t) > var size of
604 		 * max_inflights (uint16_t). In reality inflights is never
605 		 * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
606 		 * On wrapping, the calculation still returns the correct
607 		 * positive value as all three vars are unsigned.
608 		 */
609 		uint32_t inflights =
610 			tmp_qp->enqueued - tmp_qp->dequeued;
611 
612 		if ((inflights + nb_ops) > tmp_qp->max_inflights) {
613 			nb_ops_possible = tmp_qp->max_inflights - inflights;
614 			if (nb_ops_possible == 0)
615 				return 0;
616 		}
617 		/* QAT has plenty of work queued already, so don't waste cycles
618 		 * enqueueing, wait til the application has gathered a bigger
619 		 * burst or some completed ops have been dequeued
620 		 */
621 		if (tmp_qp->min_enq_burst_threshold && inflights >
622 				QAT_QP_MIN_INFL_THRESHOLD && nb_ops_possible <
623 				tmp_qp->min_enq_burst_threshold) {
624 			tmp_qp->stats.threshold_hit_count++;
625 			return 0;
626 		}
627 	}
628 
629 #ifdef BUILD_QAT_SYM
630 	if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC)
631 		qat_sym_preprocess_requests(ops, nb_ops_possible);
632 #endif
633 
634 	while (nb_ops_sent != nb_ops_possible) {
635 		if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC) {
636 #ifdef BUILD_QAT_SYM
637 			ret = qat_sym_build_request(*ops, base_addr + tail,
638 				tmp_qp->op_cookies[tail >> queue->trailz],
639 				tmp_qp->qat_dev_gen);
640 #endif
641 		} else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION) {
642 			ret = qat_comp_build_request(*ops, base_addr + tail,
643 				tmp_qp->op_cookies[tail >> queue->trailz],
644 				tmp_qp->qat_dev_gen);
645 		} else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC) {
646 #ifdef BUILD_QAT_ASYM
647 			ret = qat_asym_build_request(*ops, base_addr + tail,
648 				tmp_qp->op_cookies[tail >> queue->trailz],
649 				tmp_qp->qat_dev_gen);
650 #endif
651 		}
652 		if (ret != 0) {
653 			tmp_qp->stats.enqueue_err_count++;
654 			/* This message cannot be enqueued */
655 			if (nb_ops_sent == 0)
656 				return 0;
657 			goto kick_tail;
658 		}
659 
660 		tail = adf_modulo(tail + queue->msg_size, queue->modulo_mask);
661 		ops++;
662 		nb_ops_sent++;
663 	}
664 kick_tail:
665 	queue->tail = tail;
666 	tmp_qp->enqueued += nb_ops_sent;
667 	tmp_qp->stats.enqueued_count += nb_ops_sent;
668 	txq_write_tail(tmp_qp, queue);
669 	return nb_ops_sent;
670 }
671 
672 /* Use this for compression only - but keep consistent with above common
673  * function as much as possible.
674  */
675 uint16_t
676 qat_enqueue_comp_op_burst(void *qp, void **ops, uint16_t nb_ops)
677 {
678 	register struct qat_queue *queue;
679 	struct qat_qp *tmp_qp = (struct qat_qp *)qp;
680 	register uint32_t nb_ops_sent = 0;
681 	register int nb_desc_to_build;
682 	uint16_t nb_ops_possible = nb_ops;
683 	register uint8_t *base_addr;
684 	register uint32_t tail;
685 
686 	int descriptors_built, total_descriptors_built = 0;
687 	int nb_remaining_descriptors;
688 	int overflow = 0;
689 
690 	if (unlikely(nb_ops == 0))
691 		return 0;
692 
693 	/* read params used a lot in main loop into registers */
694 	queue = &(tmp_qp->tx_q);
695 	base_addr = (uint8_t *)queue->base_addr;
696 	tail = queue->tail;
697 
698 	/* Find how many can actually fit on the ring */
699 	{
700 		/* dequeued can only be written by one thread, but it may not
701 		 * be this thread. As it's 4-byte aligned it will be read
702 		 * atomically here by any Intel CPU.
703 		 * enqueued can wrap before dequeued, but cannot
704 		 * lap it as var size of enq/deq (uint32_t) > var size of
705 		 * max_inflights (uint16_t). In reality inflights is never
706 		 * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
707 		 * On wrapping, the calculation still returns the correct
708 		 * positive value as all three vars are unsigned.
709 		 */
710 		uint32_t inflights =
711 			tmp_qp->enqueued - tmp_qp->dequeued;
712 
713 		/* Find how many can actually fit on the ring */
714 		overflow = (inflights + nb_ops) - tmp_qp->max_inflights;
715 		if (overflow > 0) {
716 			nb_ops_possible = nb_ops - overflow;
717 			if (nb_ops_possible == 0)
718 				return 0;
719 		}
720 
721 		/* QAT has plenty of work queued already, so don't waste cycles
722 		 * enqueueing, wait til the application has gathered a bigger
723 		 * burst or some completed ops have been dequeued
724 		 */
725 		if (tmp_qp->min_enq_burst_threshold && inflights >
726 				QAT_QP_MIN_INFL_THRESHOLD && nb_ops_possible <
727 				tmp_qp->min_enq_burst_threshold) {
728 			tmp_qp->stats.threshold_hit_count++;
729 			return 0;
730 		}
731 	}
732 
733 	/* At this point nb_ops_possible is assuming a 1:1 mapping
734 	 * between ops and descriptors.
735 	 * Fewer may be sent if some ops have to be split.
736 	 * nb_ops_possible is <= burst size.
737 	 * Find out how many spaces are actually available on the qp in case
738 	 * more are needed.
739 	 */
740 	nb_remaining_descriptors = nb_ops_possible
741 			 + ((overflow >= 0) ? 0 : overflow * (-1));
742 	QAT_DP_LOG(DEBUG, "Nb ops requested %d, nb descriptors remaining %d",
743 			nb_ops, nb_remaining_descriptors);
744 
745 	while (nb_ops_sent != nb_ops_possible &&
746 				nb_remaining_descriptors > 0) {
747 		struct qat_comp_op_cookie *cookie =
748 				tmp_qp->op_cookies[tail >> queue->trailz];
749 
750 		descriptors_built = 0;
751 
752 		QAT_DP_LOG(DEBUG, "--- data length: %u",
753 			   ((struct rte_comp_op *)*ops)->src.length);
754 
755 		nb_desc_to_build = qat_comp_build_request(*ops,
756 				base_addr + tail, cookie, tmp_qp->qat_dev_gen);
757 		QAT_DP_LOG(DEBUG, "%d descriptors built, %d remaining, "
758 			"%d ops sent, %d descriptors needed",
759 			total_descriptors_built, nb_remaining_descriptors,
760 			nb_ops_sent, nb_desc_to_build);
761 
762 		if (unlikely(nb_desc_to_build < 0)) {
763 			/* this message cannot be enqueued */
764 			tmp_qp->stats.enqueue_err_count++;
765 			if (nb_ops_sent == 0)
766 				return 0;
767 			goto kick_tail;
768 		} else if (unlikely(nb_desc_to_build > 1)) {
769 			/* this op is too big and must be split - get more
770 			 * descriptors and retry
771 			 */
772 
773 			QAT_DP_LOG(DEBUG, "Build %d descriptors for this op",
774 					nb_desc_to_build);
775 
776 			nb_remaining_descriptors -= nb_desc_to_build;
777 			if (nb_remaining_descriptors >= 0) {
778 				/* There are enough remaining descriptors
779 				 * so retry
780 				 */
781 				int ret2 = qat_comp_build_multiple_requests(
782 						*ops, tmp_qp, tail,
783 						nb_desc_to_build);
784 
785 				if (unlikely(ret2 < 1)) {
786 					QAT_DP_LOG(DEBUG,
787 							"Failed to build (%d) descriptors, status %d",
788 							nb_desc_to_build, ret2);
789 
790 					qat_comp_free_split_op_memzones(cookie,
791 							nb_desc_to_build - 1);
792 
793 					tmp_qp->stats.enqueue_err_count++;
794 
795 					/* This message cannot be enqueued */
796 					if (nb_ops_sent == 0)
797 						return 0;
798 					goto kick_tail;
799 				} else {
800 					descriptors_built = ret2;
801 					total_descriptors_built +=
802 							descriptors_built;
803 					nb_remaining_descriptors -=
804 							descriptors_built;
805 					QAT_DP_LOG(DEBUG,
806 							"Multiple descriptors (%d) built ok",
807 							descriptors_built);
808 				}
809 			} else {
810 				QAT_DP_LOG(ERR, "For the current op, number of requested descriptors (%d) "
811 						"exceeds number of available descriptors (%d)",
812 						nb_desc_to_build,
813 						nb_remaining_descriptors +
814 							nb_desc_to_build);
815 
816 				qat_comp_free_split_op_memzones(cookie,
817 						nb_desc_to_build - 1);
818 
819 				/* Not enough extra descriptors */
820 				if (nb_ops_sent == 0)
821 					return 0;
822 				goto kick_tail;
823 			}
824 		} else {
825 			descriptors_built = 1;
826 			total_descriptors_built++;
827 			nb_remaining_descriptors--;
828 			QAT_DP_LOG(DEBUG, "Single descriptor built ok");
829 		}
830 
831 		tail = adf_modulo(tail + (queue->msg_size * descriptors_built),
832 				  queue->modulo_mask);
833 		ops++;
834 		nb_ops_sent++;
835 	}
836 
837 kick_tail:
838 	queue->tail = tail;
839 	tmp_qp->enqueued += total_descriptors_built;
840 	tmp_qp->stats.enqueued_count += nb_ops_sent;
841 	txq_write_tail(tmp_qp, queue);
842 	return nb_ops_sent;
843 }
844 
845 uint16_t
846 qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
847 {
848 	struct qat_queue *rx_queue;
849 	struct qat_qp *tmp_qp = (struct qat_qp *)qp;
850 	uint32_t head;
851 	uint32_t op_resp_counter = 0, fw_resp_counter = 0;
852 	uint8_t *resp_msg;
853 	int nb_fw_responses;
854 
855 	rx_queue = &(tmp_qp->rx_q);
856 	head = rx_queue->head;
857 	resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head;
858 
859 	while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG &&
860 			op_resp_counter != nb_ops) {
861 
862 		nb_fw_responses = 1;
863 
864 		if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC)
865 			qat_sym_process_response(ops, resp_msg);
866 		else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION)
867 			nb_fw_responses = qat_comp_process_response(
868 				ops, resp_msg,
869 				tmp_qp->op_cookies[head >> rx_queue->trailz],
870 				&tmp_qp->stats.dequeue_err_count);
871 #ifdef BUILD_QAT_ASYM
872 		else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC)
873 			qat_asym_process_response(ops, resp_msg,
874 				tmp_qp->op_cookies[head >> rx_queue->trailz]);
875 #endif
876 
877 		head = adf_modulo(head + rx_queue->msg_size,
878 				  rx_queue->modulo_mask);
879 
880 		resp_msg = (uint8_t *)rx_queue->base_addr + head;
881 
882 		if (nb_fw_responses) {
883 			/* only move on to next op if one was ready to return
884 			 * to API
885 			 */
886 			ops++;
887 			op_resp_counter++;
888 		}
889 
890 		 /* A compression op may be broken up into multiple fw requests.
891 		  * Only count fw responses as complete once ALL the responses
892 		  * associated with an op have been processed, as the cookie
893 		  * data from the first response must be available until
894 		  * finished with all firmware responses.
895 		  */
896 		fw_resp_counter += nb_fw_responses;
897 
898 		rx_queue->nb_processed_responses++;
899 	}
900 
901 	tmp_qp->dequeued += fw_resp_counter;
902 	tmp_qp->stats.dequeued_count += op_resp_counter;
903 
904 	rx_queue->head = head;
905 	if (rx_queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH)
906 		rxq_free_desc(tmp_qp, rx_queue);
907 
908 	QAT_DP_LOG(DEBUG, "Dequeue burst return: %u, QAT responses: %u",
909 			op_resp_counter, fw_resp_counter);
910 
911 	return op_resp_counter;
912 }
913 
914 /* This is almost same as dequeue_op_burst, without the atomic, without stats
915  * and without the op. Dequeues one response.
916  */
917 static uint8_t
918 qat_cq_dequeue_response(struct qat_qp *qp, void *out_data)
919 {
920 	uint8_t result = 0;
921 	uint8_t retries = 0;
922 	struct qat_queue *queue = &(qp->rx_q);
923 	struct icp_qat_fw_comn_resp *resp_msg = (struct icp_qat_fw_comn_resp *)
924 			((uint8_t *)queue->base_addr + queue->head);
925 
926 	while (retries++ < QAT_CQ_MAX_DEQ_RETRIES &&
927 			*(uint32_t *)resp_msg == ADF_RING_EMPTY_SIG) {
928 		/* loop waiting for response until we reach the timeout */
929 		rte_delay_ms(20);
930 	}
931 
932 	if (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG) {
933 		/* response received */
934 		result = 1;
935 
936 		/* check status flag */
937 		if (ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(
938 				resp_msg->comn_hdr.comn_status) ==
939 				ICP_QAT_FW_COMN_STATUS_FLAG_OK) {
940 			/* success */
941 			memcpy(out_data, resp_msg, queue->msg_size);
942 		} else {
943 			memset(out_data, 0, queue->msg_size);
944 		}
945 
946 		queue->head = adf_modulo(queue->head + queue->msg_size,
947 				queue->modulo_mask);
948 		rxq_free_desc(qp, queue);
949 	}
950 
951 	return result;
952 }
953 
954 /* Sends a NULL message and extracts QAT fw version from the response.
955  * Used to determine detailed capabilities based on the fw version number.
956  * This assumes that there are no inflight messages, i.e. assumes there's space
957  * on the qp, one message is sent and only one response collected.
958  * Returns fw version number or 0 for unknown version or a negative error code.
959  */
960 int
961 qat_cq_get_fw_version(struct qat_qp *qp)
962 {
963 	struct qat_queue *queue = &(qp->tx_q);
964 	uint8_t *base_addr = (uint8_t *)queue->base_addr;
965 	struct icp_qat_fw_comn_req null_msg;
966 	struct icp_qat_fw_comn_resp response;
967 
968 	/* prepare the NULL request */
969 	memset(&null_msg, 0, sizeof(null_msg));
970 	null_msg.comn_hdr.hdr_flags =
971 		ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET);
972 	null_msg.comn_hdr.service_type = ICP_QAT_FW_COMN_REQ_NULL;
973 	null_msg.comn_hdr.service_cmd_id = ICP_QAT_FW_NULL_REQ_SERV_ID;
974 
975 #if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
976 	QAT_DP_HEXDUMP_LOG(DEBUG, "NULL request", &null_msg, sizeof(null_msg));
977 #endif
978 
979 	/* send the NULL request */
980 	memcpy(base_addr + queue->tail, &null_msg, sizeof(null_msg));
981 	queue->tail = adf_modulo(queue->tail + queue->msg_size,
982 			queue->modulo_mask);
983 	txq_write_tail(qp, queue);
984 
985 	/* receive a response */
986 	if (qat_cq_dequeue_response(qp, &response)) {
987 
988 #if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
989 		QAT_DP_HEXDUMP_LOG(DEBUG, "NULL response:", &response,
990 				sizeof(response));
991 #endif
992 		/* if LW0 bit 24 is set - then the fw version was returned */
993 		if (QAT_FIELD_GET(response.comn_hdr.hdr_flags,
994 				ICP_QAT_FW_COMN_NULL_VERSION_FLAG_BITPOS,
995 				ICP_QAT_FW_COMN_NULL_VERSION_FLAG_MASK))
996 			return response.resrvd[0]; /* return LW4 */
997 		else
998 			return 0; /* not set - we don't know fw version */
999 	}
1000 
1001 	QAT_LOG(ERR, "No response received");
1002 	return -EINVAL;
1003 }
1004 
1005 __rte_weak int
1006 qat_comp_process_response(void **op __rte_unused, uint8_t *resp __rte_unused,
1007 			  void *op_cookie __rte_unused,
1008 			  uint64_t *dequeue_err_count __rte_unused)
1009 {
1010 	return  0;
1011 }
1012