1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 5 #include <ethdev_driver.h> 6 #include <rte_io.h> 7 8 #include "mana.h" 9 10 uint8_t * 11 gdma_get_wqe_pointer(struct mana_gdma_queue *queue) 12 { 13 uint32_t offset_in_bytes = 14 (queue->head * GDMA_WQE_ALIGNMENT_UNIT_SIZE) & 15 (queue->size - 1); 16 17 DP_LOG(DEBUG, "txq sq_head %u sq_size %u offset_in_bytes %u", 18 queue->head, queue->size, offset_in_bytes); 19 20 if (offset_in_bytes + GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue->size) 21 DP_LOG(ERR, "fatal error: offset_in_bytes %u too big", 22 offset_in_bytes); 23 24 return ((uint8_t *)queue->buffer) + offset_in_bytes; 25 } 26 27 static uint32_t 28 write_dma_client_oob(uint8_t *work_queue_buffer_pointer, 29 const struct gdma_work_request *work_request, 30 uint32_t client_oob_size) 31 { 32 uint8_t *p = work_queue_buffer_pointer; 33 34 struct gdma_wqe_dma_oob *header = (struct gdma_wqe_dma_oob *)p; 35 36 memset(header, 0, sizeof(struct gdma_wqe_dma_oob)); 37 header->num_sgl_entries = work_request->num_sgl_elements; 38 header->inline_client_oob_size_in_dwords = 39 client_oob_size / sizeof(uint32_t); 40 header->client_data_unit = work_request->client_data_unit; 41 42 DP_LOG(DEBUG, "queue buf %p sgl %u oob_h %u du %u oob_buf %p oob_b %u", 43 work_queue_buffer_pointer, header->num_sgl_entries, 44 header->inline_client_oob_size_in_dwords, 45 header->client_data_unit, work_request->inline_oob_data, 46 work_request->inline_oob_size_in_bytes); 47 48 p += sizeof(struct gdma_wqe_dma_oob); 49 if (work_request->inline_oob_data && 50 work_request->inline_oob_size_in_bytes > 0) { 51 memcpy(p, work_request->inline_oob_data, 52 work_request->inline_oob_size_in_bytes); 53 if (client_oob_size > work_request->inline_oob_size_in_bytes) 54 memset(p + work_request->inline_oob_size_in_bytes, 0, 55 client_oob_size - 56 work_request->inline_oob_size_in_bytes); 57 } 58 59 return sizeof(struct gdma_wqe_dma_oob) + client_oob_size; 60 } 61 62 static uint32_t 63 write_scatter_gather_list(uint8_t *work_queue_head_pointer, 64 uint8_t *work_queue_end_pointer, 65 uint8_t *work_queue_cur_pointer, 66 struct gdma_work_request *work_request) 67 { 68 struct gdma_sgl_element *sge_list; 69 struct gdma_sgl_element dummy_sgl[1]; 70 uint8_t *address; 71 uint32_t size; 72 uint32_t num_sge; 73 uint32_t size_to_queue_end; 74 uint32_t sge_list_size; 75 76 DP_LOG(DEBUG, "work_queue_cur_pointer %p work_request->flags %x", 77 work_queue_cur_pointer, work_request->flags); 78 79 num_sge = work_request->num_sgl_elements; 80 sge_list = work_request->sgl; 81 size_to_queue_end = (uint32_t)(work_queue_end_pointer - 82 work_queue_cur_pointer); 83 84 if (num_sge == 0) { 85 /* Per spec, the case of an empty SGL should be handled as 86 * follows to avoid corrupted WQE errors: 87 * Write one dummy SGL entry 88 * Set the address to 1, leave the rest as 0 89 */ 90 dummy_sgl[num_sge].address = 1; 91 dummy_sgl[num_sge].size = 0; 92 dummy_sgl[num_sge].memory_key = 0; 93 num_sge++; 94 sge_list = dummy_sgl; 95 } 96 97 sge_list_size = 0; 98 { 99 address = (uint8_t *)sge_list; 100 size = sizeof(struct gdma_sgl_element) * num_sge; 101 if (size_to_queue_end < size) { 102 memcpy(work_queue_cur_pointer, address, 103 size_to_queue_end); 104 work_queue_cur_pointer = work_queue_head_pointer; 105 address += size_to_queue_end; 106 size -= size_to_queue_end; 107 } 108 109 memcpy(work_queue_cur_pointer, address, size); 110 sge_list_size = size; 111 } 112 113 DP_LOG(DEBUG, "sge %u address 0x%" PRIx64 " size %u key %u list_s %u", 114 num_sge, sge_list->address, sge_list->size, 115 sge_list->memory_key, sge_list_size); 116 117 return sge_list_size; 118 } 119 120 /* 121 * Post a work request to queue. 122 */ 123 int 124 gdma_post_work_request(struct mana_gdma_queue *queue, 125 struct gdma_work_request *work_req, 126 uint32_t *wqe_size_in_bu) 127 { 128 uint32_t client_oob_size = 129 work_req->inline_oob_size_in_bytes > 130 INLINE_OOB_SMALL_SIZE_IN_BYTES ? 131 INLINE_OOB_LARGE_SIZE_IN_BYTES : 132 INLINE_OOB_SMALL_SIZE_IN_BYTES; 133 134 uint32_t sgl_data_size = sizeof(struct gdma_sgl_element) * 135 RTE_MAX((uint32_t)1, work_req->num_sgl_elements); 136 uint32_t wqe_size = 137 RTE_ALIGN(sizeof(struct gdma_wqe_dma_oob) + 138 client_oob_size + sgl_data_size, 139 GDMA_WQE_ALIGNMENT_UNIT_SIZE); 140 uint8_t *wq_buffer_pointer; 141 uint32_t queue_free_units = queue->count - (queue->head - queue->tail); 142 143 if (wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE > queue_free_units) { 144 DP_LOG(DEBUG, "WQE size %u queue count %u head %u tail %u", 145 wqe_size, queue->count, queue->head, queue->tail); 146 return -EBUSY; 147 } 148 149 DP_LOG(DEBUG, "client_oob_size %u sgl_data_size %u wqe_size %u", 150 client_oob_size, sgl_data_size, wqe_size); 151 152 *wqe_size_in_bu = wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE; 153 154 wq_buffer_pointer = gdma_get_wqe_pointer(queue); 155 wq_buffer_pointer += write_dma_client_oob(wq_buffer_pointer, work_req, 156 client_oob_size); 157 if (wq_buffer_pointer >= ((uint8_t *)queue->buffer) + queue->size) 158 wq_buffer_pointer -= queue->size; 159 160 write_scatter_gather_list((uint8_t *)queue->buffer, 161 (uint8_t *)queue->buffer + queue->size, 162 wq_buffer_pointer, work_req); 163 164 queue->head += wqe_size / GDMA_WQE_ALIGNMENT_UNIT_SIZE; 165 166 return 0; 167 } 168 169 #ifdef RTE_ARCH_32 170 union gdma_short_doorbell_entry { 171 uint32_t as_uint32; 172 173 struct { 174 uint32_t tail_ptr_incr : 16; /* Number of CQEs */ 175 uint32_t id : 12; 176 uint32_t reserved : 3; 177 uint32_t arm : 1; 178 } cq; 179 180 struct { 181 uint32_t tail_ptr_incr : 16; /* In number of bytes */ 182 uint32_t id : 12; 183 uint32_t reserved : 4; 184 } rq; 185 186 struct { 187 uint32_t tail_ptr_incr : 16; /* In number of bytes */ 188 uint32_t id : 12; 189 uint32_t reserved : 4; 190 } sq; 191 192 struct { 193 uint32_t tail_ptr_incr : 16; /* Number of EQEs */ 194 uint32_t id : 12; 195 uint32_t reserved : 3; 196 uint32_t arm : 1; 197 } eq; 198 }; /* HW DATA */ 199 200 enum { 201 DOORBELL_SHORT_OFFSET_SQ = 0x10, 202 DOORBELL_SHORT_OFFSET_RQ = 0x410, 203 DOORBELL_SHORT_OFFSET_CQ = 0x810, 204 DOORBELL_SHORT_OFFSET_EQ = 0xFF0, 205 }; 206 207 /* 208 * Write to hardware doorbell to notify new activity. 209 */ 210 int 211 mana_ring_short_doorbell(void *db_page, enum gdma_queue_types queue_type, 212 uint32_t queue_id, uint32_t tail_incr, uint8_t arm) 213 { 214 uint8_t *addr = db_page; 215 union gdma_short_doorbell_entry e = {}; 216 217 if ((queue_id & ~GDMA_SHORT_DB_QID_MASK) || 218 (tail_incr & ~GDMA_SHORT_DB_INC_MASK)) { 219 DP_LOG(ERR, "%s: queue_id %u or " 220 "tail_incr %u overflowed, queue type %d", 221 __func__, queue_id, tail_incr, queue_type); 222 return -EINVAL; 223 } 224 225 switch (queue_type) { 226 case GDMA_QUEUE_SEND: 227 e.sq.id = queue_id; 228 e.sq.tail_ptr_incr = tail_incr; 229 addr += DOORBELL_SHORT_OFFSET_SQ; 230 break; 231 232 case GDMA_QUEUE_RECEIVE: 233 e.rq.id = queue_id; 234 e.rq.tail_ptr_incr = tail_incr; 235 addr += DOORBELL_SHORT_OFFSET_RQ; 236 break; 237 238 case GDMA_QUEUE_COMPLETION: 239 e.cq.id = queue_id; 240 e.cq.tail_ptr_incr = tail_incr; 241 e.cq.arm = arm; 242 addr += DOORBELL_SHORT_OFFSET_CQ; 243 break; 244 245 default: 246 DP_LOG(ERR, "Unsupported queue type %d", queue_type); 247 return -1; 248 } 249 250 /* Ensure all writes are done before ringing doorbell */ 251 rte_wmb(); 252 253 DP_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u arm %u", 254 db_page, addr, queue_id, queue_type, tail_incr, arm); 255 256 rte_write32(e.as_uint32, addr); 257 return 0; 258 } 259 #else 260 union gdma_doorbell_entry { 261 uint64_t as_uint64; 262 263 struct { 264 uint64_t id : 24; 265 uint64_t reserved : 8; 266 uint64_t tail_ptr : 31; 267 uint64_t arm : 1; 268 } cq; 269 270 struct { 271 uint64_t id : 24; 272 uint64_t wqe_cnt : 8; 273 uint64_t tail_ptr : 32; 274 } rq; 275 276 struct { 277 uint64_t id : 24; 278 uint64_t reserved : 8; 279 uint64_t tail_ptr : 32; 280 } sq; 281 282 struct { 283 uint64_t id : 16; 284 uint64_t reserved : 16; 285 uint64_t tail_ptr : 31; 286 uint64_t arm : 1; 287 } eq; 288 }; /* HW DATA */ 289 290 enum { 291 DOORBELL_OFFSET_SQ = 0x0, 292 DOORBELL_OFFSET_RQ = 0x400, 293 DOORBELL_OFFSET_CQ = 0x800, 294 DOORBELL_OFFSET_EQ = 0xFF8, 295 }; 296 297 /* 298 * Write to hardware doorbell to notify new activity. 299 */ 300 int 301 mana_ring_doorbell(void *db_page, enum gdma_queue_types queue_type, 302 uint32_t queue_id, uint32_t tail, uint8_t arm) 303 { 304 uint8_t *addr = db_page; 305 union gdma_doorbell_entry e = {}; 306 307 switch (queue_type) { 308 case GDMA_QUEUE_SEND: 309 e.sq.id = queue_id; 310 e.sq.tail_ptr = tail; 311 addr += DOORBELL_OFFSET_SQ; 312 break; 313 314 case GDMA_QUEUE_RECEIVE: 315 e.rq.id = queue_id; 316 e.rq.tail_ptr = tail; 317 e.rq.wqe_cnt = arm; 318 addr += DOORBELL_OFFSET_RQ; 319 break; 320 321 case GDMA_QUEUE_COMPLETION: 322 e.cq.id = queue_id; 323 e.cq.tail_ptr = tail; 324 e.cq.arm = arm; 325 addr += DOORBELL_OFFSET_CQ; 326 break; 327 328 default: 329 DP_LOG(ERR, "Unsupported queue type %d", queue_type); 330 return -1; 331 } 332 333 /* Ensure all writes are done before ringing doorbell */ 334 rte_wmb(); 335 336 DP_LOG(DEBUG, "db_page %p addr %p queue_id %u type %u tail %u arm %u", 337 db_page, addr, queue_id, queue_type, tail, arm); 338 339 rte_write64(e.as_uint64, addr); 340 return 0; 341 } 342 #endif 343 344 /* 345 * Poll completion queue for completions. 346 */ 347 uint32_t 348 gdma_poll_completion_queue(struct mana_gdma_queue *cq, 349 struct gdma_comp *gdma_comp, uint32_t max_comp) 350 { 351 struct gdma_hardware_completion_entry *cqe; 352 uint32_t new_owner_bits, old_owner_bits; 353 uint32_t cqe_owner_bits; 354 uint32_t num_comp = 0; 355 struct gdma_hardware_completion_entry *buffer = cq->buffer; 356 357 while (num_comp < max_comp) { 358 cqe = &buffer[cq->head % cq->count]; 359 new_owner_bits = (cq->head / cq->count) & 360 COMPLETION_QUEUE_OWNER_MASK; 361 old_owner_bits = (cq->head / cq->count - 1) & 362 COMPLETION_QUEUE_OWNER_MASK; 363 cqe_owner_bits = cqe->owner_bits; 364 365 DP_LOG(DEBUG, "comp cqe bits 0x%x owner bits 0x%x", 366 cqe_owner_bits, old_owner_bits); 367 368 /* No new entry */ 369 if (cqe_owner_bits == old_owner_bits) 370 break; 371 372 if (cqe_owner_bits != new_owner_bits) { 373 DRV_LOG(ERR, "CQ overflowed, ID %u cqe 0x%x new 0x%x", 374 cq->id, cqe_owner_bits, new_owner_bits); 375 break; 376 } 377 378 gdma_comp[num_comp].cqe_data = cqe->dma_client_data; 379 num_comp++; 380 381 cq->head++; 382 383 DP_LOG(DEBUG, "comp new 0x%x old 0x%x cqe 0x%x wq %u sq %u head %u", 384 new_owner_bits, old_owner_bits, cqe_owner_bits, 385 cqe->wq_num, cqe->is_sq, cq->head); 386 } 387 388 /* Make sure the CQE owner bits are checked before we access the data 389 * in CQE 390 */ 391 rte_rmb(); 392 393 return num_comp; 394 } 395