1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2022 Microsoft Corporation
3 */
4
5 #include <ethdev_driver.h>
6 #include <rte_io.h>
7
8 #include "mana.h"
9
10 uint8_t *
gdma_get_wqe_pointer(struct mana_gdma_queue * queue)11 gdma_get_wqe_pointer(struct mana_gdma_queue *queue)
12 {
13 uint32_t offset_in_bytes =
14 (queue->head * GDMA_WQE_ALIGNMENT_UNIT_SIZE) &
15 (queue->size - 1);
16
17 DP_LOG(DEBUG, "txq sq_head %u sq_size %u offset_in_bytes %u",
18 queue->head, queue->size, offset_in_bytes);
19
20 if (offset_in_bytes + GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue->size)
21 DP_LOG(ERR, "fatal error: offset_in_bytes %u too big",
22 offset_in_bytes);
23
24 return ((uint8_t *)queue->buffer) + offset_in_bytes;
25 }
26
27 static uint32_t
write_dma_client_oob(uint8_t * work_queue_buffer_pointer,const struct gdma_work_request * work_request,uint32_t client_oob_size)28 write_dma_client_oob(uint8_t *work_queue_buffer_pointer,
29 const struct gdma_work_request *work_request,
30 uint32_t client_oob_size)
31 {
32 uint8_t *p = work_queue_buffer_pointer;
33
34 struct gdma_wqe_dma_oob *header = (struct gdma_wqe_dma_oob *)p;
35
36 memset(header, 0, sizeof(struct gdma_wqe_dma_oob));
37 header->num_sgl_entries = work_request->num_sgl_elements;
38 header->inline_client_oob_size_in_dwords =
39 client_oob_size / sizeof(uint32_t);
40 header->client_data_unit = work_request->client_data_unit;
41
42 DP_LOG(DEBUG, "queue buf %p sgl %u oob_h %u du %u oob_buf %p oob_b %u",
43 work_queue_buffer_pointer, header->num_sgl_entries,
44 header->inline_client_oob_size_in_dwords,
45 header->client_data_unit, work_request->inline_oob_data,
46 work_request->inline_oob_size_in_bytes);
47
48 p += sizeof(struct gdma_wqe_dma_oob);
49 if (work_request->inline_oob_data &&
50 work_request->inline_oob_size_in_bytes > 0) {
51 memcpy(p, work_request->inline_oob_data,
52 work_request->inline_oob_size_in_bytes);
53 if (client_oob_size > work_request->inline_oob_size_in_bytes)
54 memset(p + work_request->inline_oob_size_in_bytes, 0,
55 client_oob_size -
56 work_request->inline_oob_size_in_bytes);
57 }
58
59 return sizeof(struct gdma_wqe_dma_oob) + client_oob_size;
60 }
61
62 static uint32_t
write_scatter_gather_list(uint8_t * work_queue_head_pointer,uint8_t * work_queue_end_pointer,uint8_t * work_queue_cur_pointer,struct gdma_work_request * work_request)63 write_scatter_gather_list(uint8_t *work_queue_head_pointer,
64 uint8_t *work_queue_end_pointer,
65 uint8_t *work_queue_cur_pointer,
66 struct gdma_work_request *work_request)
67 {
68 struct gdma_sgl_element *sge_list;
69 struct gdma_sgl_element dummy_sgl[1];
70 uint8_t *address;
71 uint32_t size;
72 uint32_t num_sge;
73 uint32_t size_to_queue_end;
74 uint32_t sge_list_size;
75
76 DP_LOG(DEBUG, "work_queue_cur_pointer %p work_request->flags %x",
77 work_queue_cur_pointer, work_request->flags);
78
79 num_sge = work_request->num_sgl_elements;
80 sge_list = work_request->sgl;
81 size_to_queue_end = (uint32_t)(work_queue_end_pointer -
82 work_queue_cur_pointer);
83
84 if (num_sge == 0) {
85 /* Per spec, the case of an empty SGL should be handled as
86 * follows to avoid corrupted WQE errors:
87 * Write one dummy SGL entry
88 * Set the address to 1, leave the rest as 0
89 */
90 dummy_sgl[num_sge].address = 1;
91 dummy_sgl[num_sge].size = 0;
92 dummy_sgl[num_sge].memory_key = 0;
93 num_sge++;
94 sge_list = dummy_sgl;
95 }
96
97 sge_list_size = 0;
98 {
99 address = (uint8_t *)sge_list;
100 size = sizeof(struct gdma_sgl_element) * num_sge;
101 if (size_to_queue_end < size) {
102 memcpy(work_queue_cur_pointer, address,
103 size_to_queue_end);
104 work_queue_cur_pointer = work_queue_head_pointer;
105 address += size_to_queue_end;
106 size -= size_to_queue_end;
107 }
108
109 memcpy(work_queue_cur_pointer, address, size);
110 sge_list_size = size;
111 }
112
113 DP_LOG(DEBUG, "sge %u address 0x%" PRIx64 " size %u key %u list_s %u",
114 num_sge, sge_list->address, sge_list->size,
115 sge_list->memory_key, sge_list_size);
116
117 return sge_list_size;
118 }
119
120 /*
121 * Post a work request to queue.
122 */
123 int
gdma_post_work_request(struct mana_gdma_queue * queue,struct gdma_work_request * work_req,uint32_t * wqe_size_in_bu)124 gdma_post_work_request(struct mana_gdma_queue *queue,
125 struct gdma_work_request *work_req,
126 uint32_t *wqe_size_in_bu)
127 {
128 uint32_t client_oob_size =
129 work_req->inline_oob_size_in_bytes >
130 INLINE_OOB_SMALL_SIZE_IN_BYTES ?
131 INLINE_OOB_LARGE_SIZE_IN_BYTES :
132 INLINE_OOB_SMALL_SIZE_IN_BYTES;
133
134 uint32_t sgl_data_size = sizeof(struct gdma_sgl_element) *
135 RTE_MAX((uint32_t)1, work_req->num_sgl_elements);
136 uint32_t wqe_size =
137 RTE_ALIGN(sizeof(struct gdma_wqe_dma_oob) +
138 client_oob_size + sgl_data_size,
139 GDMA_WQE_ALIGNMENT_UNIT_SIZE);
140 uint8_t *wq_buffer_pointer;
141 uint32_t queue_free_units = queue->count - (queue->head - queue->tail);
142
143 if (wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue_free_units) {
144 DP_LOG(DEBUG, "WQE size %u queue count %u head %u tail %u",
145 wqe_size, queue->count, queue->head, queue->tail);
146 return -EBUSY;
147 }
148
149 DP_LOG(DEBUG, "client_oob_size %u sgl_data_size %u wqe_size %u",
150 client_oob_size, sgl_data_size, wqe_size);
151
152 *wqe_size_in_bu = wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE;
153
154 wq_buffer_pointer = gdma_get_wqe_pointer(queue);
155 wq_buffer_pointer += write_dma_client_oob(wq_buffer_pointer, work_req,
156 client_oob_size);
157 if (wq_buffer_pointer >= ((uint8_t *)queue->buffer) + queue->size)
158 wq_buffer_pointer -= queue->size;
159
160 write_scatter_gather_list((uint8_t *)queue->buffer,
161 (uint8_t *)queue->buffer + queue->size,
162 wq_buffer_pointer, work_req);
163
164 queue->head += wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE;
165
166 return 0;
167 }
168
169 #ifdef RTE_ARCH_32
170 union gdma_short_doorbell_entry {
171 uint32_t as_uint32;
172
173 struct {
174 uint32_t tail_ptr_incr : 16; /* Number of CQEs */
175 uint32_t id : 12;
176 uint32_t reserved : 3;
177 uint32_t arm : 1;
178 } cq;
179
180 struct {
181 uint32_t tail_ptr_incr : 16; /* In number of bytes */
182 uint32_t id : 12;
183 uint32_t reserved : 4;
184 } rq;
185
186 struct {
187 uint32_t tail_ptr_incr : 16; /* In number of bytes */
188 uint32_t id : 12;
189 uint32_t reserved : 4;
190 } sq;
191
192 struct {
193 uint32_t tail_ptr_incr : 16; /* Number of EQEs */
194 uint32_t id : 12;
195 uint32_t reserved : 3;
196 uint32_t arm : 1;
197 } eq;
198 }; /* HW DATA */
199
200 enum {
201 DOORBELL_SHORT_OFFSET_SQ = 0x10,
202 DOORBELL_SHORT_OFFSET_RQ = 0x410,
203 DOORBELL_SHORT_OFFSET_CQ = 0x810,
204 DOORBELL_SHORT_OFFSET_EQ = 0xFF0,
205 };
206
207 /*
208 * Write to hardware doorbell to notify new activity.
209 */
210 int
mana_ring_short_doorbell(void * db_page,enum gdma_queue_types queue_type,uint32_t queue_id,uint32_t tail_incr,uint8_t arm)211 mana_ring_short_doorbell(void *db_page, enum gdma_queue_types queue_type,
212 uint32_t queue_id, uint32_t tail_incr, uint8_t arm)
213 {
214 uint8_t *addr = db_page;
215 union gdma_short_doorbell_entry e = {};
216
217 if ((queue_id & ~GDMA_SHORT_DB_QID_MASK) ||
218 (tail_incr & ~GDMA_SHORT_DB_INC_MASK)) {
219 DP_LOG(ERR, "%s: queue_id %u or "
220 "tail_incr %u overflowed, queue type %d",
221 __func__, queue_id, tail_incr, queue_type);
222 return -EINVAL;
223 }
224
225 switch (queue_type) {
226 case GDMA_QUEUE_SEND:
227 e.sq.id = queue_id;
228 e.sq.tail_ptr_incr = tail_incr;
229 addr += DOORBELL_SHORT_OFFSET_SQ;
230 break;
231
232 case GDMA_QUEUE_RECEIVE:
233 e.rq.id = queue_id;
234 e.rq.tail_ptr_incr = tail_incr;
235 addr += DOORBELL_SHORT_OFFSET_RQ;
236 break;
237
238 case GDMA_QUEUE_COMPLETION:
239 e.cq.id = queue_id;
240 e.cq.tail_ptr_incr = tail_incr;
241 e.cq.arm = arm;
242 addr += DOORBELL_SHORT_OFFSET_CQ;
243 break;
244
245 default:
246 DP_LOG(ERR, "Unsupported queue type %d", queue_type);
247 return -1;
248 }
249
250 /* Ensure all writes are done before ringing doorbell */
251 rte_wmb();
252
253 DP_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u arm %u",
254 db_page, addr, queue_id, queue_type, tail_incr, arm);
255
256 rte_write32(e.as_uint32, addr);
257 return 0;
258 }
259 #else
260 union gdma_doorbell_entry {
261 uint64_t as_uint64;
262
263 struct {
264 uint64_t id : 24;
265 uint64_t reserved : 8;
266 uint64_t tail_ptr : 31;
267 uint64_t arm : 1;
268 } cq;
269
270 struct {
271 uint64_t id : 24;
272 uint64_t wqe_cnt : 8;
273 uint64_t tail_ptr : 32;
274 } rq;
275
276 struct {
277 uint64_t id : 24;
278 uint64_t reserved : 8;
279 uint64_t tail_ptr : 32;
280 } sq;
281
282 struct {
283 uint64_t id : 16;
284 uint64_t reserved : 16;
285 uint64_t tail_ptr : 31;
286 uint64_t arm : 1;
287 } eq;
288 }; /* HW DATA */
289
290 enum {
291 DOORBELL_OFFSET_SQ = 0x0,
292 DOORBELL_OFFSET_RQ = 0x400,
293 DOORBELL_OFFSET_CQ = 0x800,
294 DOORBELL_OFFSET_EQ = 0xFF8,
295 };
296
297 /*
298 * Write to hardware doorbell to notify new activity.
299 */
300 int
mana_ring_doorbell(void * db_page,enum gdma_queue_types queue_type,uint32_t queue_id,uint32_t tail,uint8_t arm)301 mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type,
302 uint32_t queue_id, uint32_t tail, uint8_t arm)
303 {
304 uint8_t *addr = db_page;
305 union gdma_doorbell_entry e = {};
306
307 switch (queue_type) {
308 case GDMA_QUEUE_SEND:
309 e.sq.id = queue_id;
310 e.sq.tail_ptr = tail;
311 addr += DOORBELL_OFFSET_SQ;
312 break;
313
314 case GDMA_QUEUE_RECEIVE:
315 e.rq.id = queue_id;
316 e.rq.tail_ptr = tail;
317 e.rq.wqe_cnt = arm;
318 addr += DOORBELL_OFFSET_RQ;
319 break;
320
321 case GDMA_QUEUE_COMPLETION:
322 e.cq.id = queue_id;
323 e.cq.tail_ptr = tail;
324 e.cq.arm = arm;
325 addr += DOORBELL_OFFSET_CQ;
326 break;
327
328 default:
329 DP_LOG(ERR, "Unsupported queue type %d", queue_type);
330 return -1;
331 }
332
333 /* Ensure all writes are done before ringing doorbell */
334 rte_wmb();
335
336 DP_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u arm %u",
337 db_page, addr, queue_id, queue_type, tail, arm);
338
339 rte_write64(e.as_uint64, addr);
340 return 0;
341 }
342 #endif
343
344 /*
345 * Poll completion queue for completions.
346 */
347 uint32_t
gdma_poll_completion_queue(struct mana_gdma_queue * cq,struct gdma_comp * gdma_comp,uint32_t max_comp)348 gdma_poll_completion_queue(struct mana_gdma_queue *cq,
349 struct gdma_comp *gdma_comp, uint32_t max_comp)
350 {
351 struct gdma_hardware_completion_entry *cqe;
352 uint32_t new_owner_bits, old_owner_bits;
353 uint32_t cqe_owner_bits;
354 uint32_t num_comp = 0;
355 struct gdma_hardware_completion_entry *buffer = cq->buffer;
356
357 while (num_comp < max_comp) {
358 cqe = &buffer[cq->head % cq->count];
359 new_owner_bits = (cq->head / cq->count) &
360 COMPLETION_QUEUE_OWNER_MASK;
361 old_owner_bits = (cq->head / cq->count - 1) &
362 COMPLETION_QUEUE_OWNER_MASK;
363 cqe_owner_bits = cqe->owner_bits;
364
365 DP_LOG(DEBUG, "comp cqe bits 0x%x owner bits 0x%x",
366 cqe_owner_bits, old_owner_bits);
367
368 /* No new entry */
369 if (cqe_owner_bits == old_owner_bits)
370 break;
371
372 if (cqe_owner_bits != new_owner_bits) {
373 DRV_LOG(ERR, "CQ overflowed, ID %u cqe 0x%x new 0x%x",
374 cq->id, cqe_owner_bits, new_owner_bits);
375 break;
376 }
377
378 gdma_comp[num_comp].cqe_data = cqe->dma_client_data;
379 num_comp++;
380
381 cq->head++;
382
383 DP_LOG(DEBUG, "comp new 0x%x old 0x%x cqe 0x%x wq %u sq %u head %u",
384 new_owner_bits, old_owner_bits, cqe_owner_bits,
385 cqe->wq_num, cqe->is_sq, cq->head);
386 }
387
388 /* Make sure the CQE owner bits are checked before we access the data
389 * in CQE
390 */
391 rte_rmb();
392
393 return num_comp;
394 }
395