xref: /dpdk/drivers/net/mana/gdma.c (revision 26c6bdf3d1169e6e9ab04691a1088937137a6d5c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2022 Microsoft Corporation
3  */
4 
5 #include <ethdev_driver.h>
6 #include <rte_io.h>
7 
8 #include "mana.h"
9 
10 uint8_t *
gdma_get_wqe_pointer(struct mana_gdma_queue * queue)11 gdma_get_wqe_pointer(struct mana_gdma_queue *queue)
12 {
13 	uint32_t offset_in_bytes =
14 		(queue->head * GDMA_WQE_ALIGNMENT_UNIT_SIZE) &
15 		(queue->size - 1);
16 
17 	DP_LOG(DEBUG, "txq sq_head %u sq_size %u offset_in_bytes %u",
18 	       queue->head, queue->size, offset_in_bytes);
19 
20 	if (offset_in_bytes + GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue->size)
21 		DP_LOG(ERR, "fatal error: offset_in_bytes %u too big",
22 		       offset_in_bytes);
23 
24 	return ((uint8_t *)queue->buffer) + offset_in_bytes;
25 }
26 
27 static uint32_t
write_dma_client_oob(uint8_t * work_queue_buffer_pointer,const struct gdma_work_request * work_request,uint32_t client_oob_size)28 write_dma_client_oob(uint8_t *work_queue_buffer_pointer,
29 		     const struct gdma_work_request *work_request,
30 		     uint32_t client_oob_size)
31 {
32 	uint8_t *p = work_queue_buffer_pointer;
33 
34 	struct gdma_wqe_dma_oob *header = (struct gdma_wqe_dma_oob *)p;
35 
36 	memset(header, 0, sizeof(struct gdma_wqe_dma_oob));
37 	header->num_sgl_entries = work_request->num_sgl_elements;
38 	header->inline_client_oob_size_in_dwords =
39 		client_oob_size / sizeof(uint32_t);
40 	header->client_data_unit = work_request->client_data_unit;
41 
42 	DP_LOG(DEBUG, "queue buf %p sgl %u oob_h %u du %u oob_buf %p oob_b %u",
43 	       work_queue_buffer_pointer, header->num_sgl_entries,
44 	       header->inline_client_oob_size_in_dwords,
45 	       header->client_data_unit, work_request->inline_oob_data,
46 	       work_request->inline_oob_size_in_bytes);
47 
48 	p += sizeof(struct gdma_wqe_dma_oob);
49 	if (work_request->inline_oob_data &&
50 	    work_request->inline_oob_size_in_bytes > 0) {
51 		memcpy(p, work_request->inline_oob_data,
52 		       work_request->inline_oob_size_in_bytes);
53 		if (client_oob_size > work_request->inline_oob_size_in_bytes)
54 			memset(p + work_request->inline_oob_size_in_bytes, 0,
55 			       client_oob_size -
56 			       work_request->inline_oob_size_in_bytes);
57 	}
58 
59 	return sizeof(struct gdma_wqe_dma_oob) + client_oob_size;
60 }
61 
62 static uint32_t
write_scatter_gather_list(uint8_t * work_queue_head_pointer,uint8_t * work_queue_end_pointer,uint8_t * work_queue_cur_pointer,struct gdma_work_request * work_request)63 write_scatter_gather_list(uint8_t *work_queue_head_pointer,
64 			  uint8_t *work_queue_end_pointer,
65 			  uint8_t *work_queue_cur_pointer,
66 			  struct gdma_work_request *work_request)
67 {
68 	struct gdma_sgl_element *sge_list;
69 	struct gdma_sgl_element dummy_sgl[1];
70 	uint8_t *address;
71 	uint32_t size;
72 	uint32_t num_sge;
73 	uint32_t size_to_queue_end;
74 	uint32_t sge_list_size;
75 
76 	DP_LOG(DEBUG, "work_queue_cur_pointer %p work_request->flags %x",
77 	       work_queue_cur_pointer, work_request->flags);
78 
79 	num_sge = work_request->num_sgl_elements;
80 	sge_list = work_request->sgl;
81 	size_to_queue_end = (uint32_t)(work_queue_end_pointer -
82 				       work_queue_cur_pointer);
83 
84 	if (num_sge == 0) {
85 		/* Per spec, the case of an empty SGL should be handled as
86 		 * follows to avoid corrupted WQE errors:
87 		 * Write one dummy SGL entry
88 		 * Set the address to 1, leave the rest as 0
89 		 */
90 		dummy_sgl[num_sge].address = 1;
91 		dummy_sgl[num_sge].size = 0;
92 		dummy_sgl[num_sge].memory_key = 0;
93 		num_sge++;
94 		sge_list = dummy_sgl;
95 	}
96 
97 	sge_list_size = 0;
98 	{
99 		address = (uint8_t *)sge_list;
100 		size = sizeof(struct gdma_sgl_element) * num_sge;
101 		if (size_to_queue_end < size) {
102 			memcpy(work_queue_cur_pointer, address,
103 			       size_to_queue_end);
104 			work_queue_cur_pointer = work_queue_head_pointer;
105 			address += size_to_queue_end;
106 			size -= size_to_queue_end;
107 		}
108 
109 		memcpy(work_queue_cur_pointer, address, size);
110 		sge_list_size = size;
111 	}
112 
113 	DP_LOG(DEBUG, "sge %u address 0x%" PRIx64 " size %u key %u list_s %u",
114 	       num_sge, sge_list->address, sge_list->size,
115 	       sge_list->memory_key, sge_list_size);
116 
117 	return sge_list_size;
118 }
119 
120 /*
121  * Post a work request to queue.
122  */
123 int
gdma_post_work_request(struct mana_gdma_queue * queue,struct gdma_work_request * work_req,uint32_t * wqe_size_in_bu)124 gdma_post_work_request(struct mana_gdma_queue *queue,
125 		       struct gdma_work_request *work_req,
126 		       uint32_t *wqe_size_in_bu)
127 {
128 	uint32_t client_oob_size =
129 		work_req->inline_oob_size_in_bytes >
130 				INLINE_OOB_SMALL_SIZE_IN_BYTES ?
131 			INLINE_OOB_LARGE_SIZE_IN_BYTES :
132 			INLINE_OOB_SMALL_SIZE_IN_BYTES;
133 
134 	uint32_t sgl_data_size = sizeof(struct gdma_sgl_element) *
135 			RTE_MAX((uint32_t)1, work_req->num_sgl_elements);
136 	uint32_t wqe_size =
137 		RTE_ALIGN(sizeof(struct gdma_wqe_dma_oob) +
138 				client_oob_size + sgl_data_size,
139 			  GDMA_WQE_ALIGNMENT_UNIT_SIZE);
140 	uint8_t *wq_buffer_pointer;
141 	uint32_t queue_free_units = queue->count - (queue->head - queue->tail);
142 
143 	if (wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue_free_units) {
144 		DP_LOG(DEBUG, "WQE size %u queue count %u head %u tail %u",
145 		       wqe_size, queue->count, queue->head, queue->tail);
146 		return -EBUSY;
147 	}
148 
149 	DP_LOG(DEBUG, "client_oob_size %u sgl_data_size %u wqe_size %u",
150 	       client_oob_size, sgl_data_size, wqe_size);
151 
152 	*wqe_size_in_bu = wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE;
153 
154 	wq_buffer_pointer = gdma_get_wqe_pointer(queue);
155 	wq_buffer_pointer += write_dma_client_oob(wq_buffer_pointer, work_req,
156 						  client_oob_size);
157 	if (wq_buffer_pointer >= ((uint8_t *)queue->buffer) + queue->size)
158 		wq_buffer_pointer -= queue->size;
159 
160 	write_scatter_gather_list((uint8_t *)queue->buffer,
161 				  (uint8_t *)queue->buffer + queue->size,
162 				  wq_buffer_pointer, work_req);
163 
164 	queue->head += wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE;
165 
166 	return 0;
167 }
168 
169 #ifdef RTE_ARCH_32
170 union gdma_short_doorbell_entry {
171 	uint32_t     as_uint32;
172 
173 	struct {
174 		uint32_t tail_ptr_incr	: 16; /* Number of CQEs */
175 		uint32_t id		: 12;
176 		uint32_t reserved	: 3;
177 		uint32_t arm		: 1;
178 	} cq;
179 
180 	struct {
181 		uint32_t tail_ptr_incr	: 16; /* In number of bytes */
182 		uint32_t id		: 12;
183 		uint32_t reserved	: 4;
184 	} rq;
185 
186 	struct {
187 		uint32_t tail_ptr_incr	: 16; /* In number of bytes */
188 		uint32_t id		: 12;
189 		uint32_t reserved	: 4;
190 	} sq;
191 
192 	struct {
193 		uint32_t tail_ptr_incr	: 16; /* Number of EQEs */
194 		uint32_t id		: 12;
195 		uint32_t reserved	: 3;
196 		uint32_t arm		: 1;
197 	} eq;
198 }; /* HW DATA */
199 
200 enum {
201 	DOORBELL_SHORT_OFFSET_SQ = 0x10,
202 	DOORBELL_SHORT_OFFSET_RQ = 0x410,
203 	DOORBELL_SHORT_OFFSET_CQ = 0x810,
204 	DOORBELL_SHORT_OFFSET_EQ = 0xFF0,
205 };
206 
207 /*
208  * Write to hardware doorbell to notify new activity.
209  */
210 int
mana_ring_short_doorbell(void * db_page,enum gdma_queue_types queue_type,uint32_t queue_id,uint32_t tail_incr,uint8_t arm)211 mana_ring_short_doorbell(void *db_page, enum gdma_queue_types queue_type,
212 			 uint32_t queue_id, uint32_t tail_incr, uint8_t arm)
213 {
214 	uint8_t *addr = db_page;
215 	union gdma_short_doorbell_entry e = {};
216 
217 	if ((queue_id & ~GDMA_SHORT_DB_QID_MASK) ||
218 	    (tail_incr & ~GDMA_SHORT_DB_INC_MASK)) {
219 		DP_LOG(ERR, "%s: queue_id %u or "
220 		       "tail_incr %u overflowed, queue type %d",
221 		       __func__, queue_id, tail_incr, queue_type);
222 		return -EINVAL;
223 	}
224 
225 	switch (queue_type) {
226 	case GDMA_QUEUE_SEND:
227 		e.sq.id = queue_id;
228 		e.sq.tail_ptr_incr = tail_incr;
229 		addr += DOORBELL_SHORT_OFFSET_SQ;
230 		break;
231 
232 	case GDMA_QUEUE_RECEIVE:
233 		e.rq.id = queue_id;
234 		e.rq.tail_ptr_incr = tail_incr;
235 		addr += DOORBELL_SHORT_OFFSET_RQ;
236 		break;
237 
238 	case GDMA_QUEUE_COMPLETION:
239 		e.cq.id = queue_id;
240 		e.cq.tail_ptr_incr = tail_incr;
241 		e.cq.arm = arm;
242 		addr += DOORBELL_SHORT_OFFSET_CQ;
243 		break;
244 
245 	default:
246 		DP_LOG(ERR, "Unsupported queue type %d", queue_type);
247 		return -1;
248 	}
249 
250 	/* Ensure all writes are done before ringing doorbell */
251 	rte_wmb();
252 
253 	DP_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u arm %u",
254 	       db_page, addr, queue_id, queue_type, tail_incr, arm);
255 
256 	rte_write32(e.as_uint32, addr);
257 	return 0;
258 }
259 #else
260 union gdma_doorbell_entry {
261 	uint64_t     as_uint64;
262 
263 	struct {
264 		uint64_t id	  : 24;
265 		uint64_t reserved    : 8;
266 		uint64_t tail_ptr    : 31;
267 		uint64_t arm	 : 1;
268 	} cq;
269 
270 	struct {
271 		uint64_t id	  : 24;
272 		uint64_t wqe_cnt     : 8;
273 		uint64_t tail_ptr    : 32;
274 	} rq;
275 
276 	struct {
277 		uint64_t id	  : 24;
278 		uint64_t reserved    : 8;
279 		uint64_t tail_ptr    : 32;
280 	} sq;
281 
282 	struct {
283 		uint64_t id	  : 16;
284 		uint64_t reserved    : 16;
285 		uint64_t tail_ptr    : 31;
286 		uint64_t arm	 : 1;
287 	} eq;
288 }; /* HW DATA */
289 
290 enum {
291 	DOORBELL_OFFSET_SQ = 0x0,
292 	DOORBELL_OFFSET_RQ = 0x400,
293 	DOORBELL_OFFSET_CQ = 0x800,
294 	DOORBELL_OFFSET_EQ = 0xFF8,
295 };
296 
297 /*
298  * Write to hardware doorbell to notify new activity.
299  */
300 int
mana_ring_doorbell(void * db_page,enum gdma_queue_types queue_type,uint32_t queue_id,uint32_t tail,uint8_t arm)301 mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type,
302 		   uint32_t queue_id, uint32_t tail, uint8_t arm)
303 {
304 	uint8_t *addr = db_page;
305 	union gdma_doorbell_entry e = {};
306 
307 	switch (queue_type) {
308 	case GDMA_QUEUE_SEND:
309 		e.sq.id = queue_id;
310 		e.sq.tail_ptr = tail;
311 		addr += DOORBELL_OFFSET_SQ;
312 		break;
313 
314 	case GDMA_QUEUE_RECEIVE:
315 		e.rq.id = queue_id;
316 		e.rq.tail_ptr = tail;
317 		e.rq.wqe_cnt = arm;
318 		addr += DOORBELL_OFFSET_RQ;
319 		break;
320 
321 	case GDMA_QUEUE_COMPLETION:
322 		e.cq.id = queue_id;
323 		e.cq.tail_ptr = tail;
324 		e.cq.arm = arm;
325 		addr += DOORBELL_OFFSET_CQ;
326 		break;
327 
328 	default:
329 		DP_LOG(ERR, "Unsupported queue type %d", queue_type);
330 		return -1;
331 	}
332 
333 	/* Ensure all writes are done before ringing doorbell */
334 	rte_wmb();
335 
336 	DP_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u arm %u",
337 	       db_page, addr, queue_id, queue_type, tail, arm);
338 
339 	rte_write64(e.as_uint64, addr);
340 	return 0;
341 }
342 #endif
343 
344 /*
345  * Poll completion queue for completions.
346  */
347 uint32_t
gdma_poll_completion_queue(struct mana_gdma_queue * cq,struct gdma_comp * gdma_comp,uint32_t max_comp)348 gdma_poll_completion_queue(struct mana_gdma_queue *cq,
349 			   struct gdma_comp *gdma_comp, uint32_t max_comp)
350 {
351 	struct gdma_hardware_completion_entry *cqe;
352 	uint32_t new_owner_bits, old_owner_bits;
353 	uint32_t cqe_owner_bits;
354 	uint32_t num_comp = 0;
355 	struct gdma_hardware_completion_entry *buffer = cq->buffer;
356 
357 	while (num_comp < max_comp) {
358 		cqe = &buffer[cq->head % cq->count];
359 		new_owner_bits = (cq->head / cq->count) &
360 					COMPLETION_QUEUE_OWNER_MASK;
361 		old_owner_bits = (cq->head / cq->count - 1) &
362 					COMPLETION_QUEUE_OWNER_MASK;
363 		cqe_owner_bits = cqe->owner_bits;
364 
365 		DP_LOG(DEBUG, "comp cqe bits 0x%x owner bits 0x%x",
366 			cqe_owner_bits, old_owner_bits);
367 
368 		/* No new entry */
369 		if (cqe_owner_bits == old_owner_bits)
370 			break;
371 
372 		if (cqe_owner_bits != new_owner_bits) {
373 			DRV_LOG(ERR, "CQ overflowed, ID %u cqe 0x%x new 0x%x",
374 				cq->id, cqe_owner_bits, new_owner_bits);
375 			break;
376 		}
377 
378 		gdma_comp[num_comp].cqe_data = cqe->dma_client_data;
379 		num_comp++;
380 
381 		cq->head++;
382 
383 		DP_LOG(DEBUG, "comp new 0x%x old 0x%x cqe 0x%x wq %u sq %u head %u",
384 		       new_owner_bits, old_owner_bits, cqe_owner_bits,
385 		       cqe->wq_num, cqe->is_sq, cq->head);
386 	}
387 
388 	/* Make sure the CQE owner bits are checked before we access the data
389 	 * in CQE
390 	 */
391 	rte_rmb();
392 
393 	return num_comp;
394 }
395