15566a3e3SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 25566a3e3SBruce Richardson * Copyright(c) 2016-2017 Intel Corporation 3617995dfSBruce Richardson */ 4617995dfSBruce Richardson 5617995dfSBruce Richardson #include <rte_ring.h> 6617995dfSBruce Richardson #include <rte_hash_crc.h> 786aed50aSBruce Richardson #include <rte_event_ring.h> 8617995dfSBruce Richardson #include "sw_evdev.h" 9*dca926caSGage Eads #include "iq_chunk.h" 10617995dfSBruce Richardson 11617995dfSBruce Richardson #define SW_IQS_MASK (SW_IQS_MAX-1) 12617995dfSBruce Richardson 13617995dfSBruce Richardson /* Retrieve the highest priority IQ or -1 if no pkts available. Doing the 14617995dfSBruce Richardson * CLZ twice is faster than caching the value due to data dependencies 15617995dfSBruce Richardson */ 16617995dfSBruce Richardson #define PKT_MASK_TO_IQ(pkts) \ 17617995dfSBruce Richardson (__builtin_ctz(pkts | (1 << SW_IQS_MAX))) 18617995dfSBruce Richardson 19617995dfSBruce Richardson #if SW_IQS_MAX != 4 20617995dfSBruce Richardson #error Misconfigured PRIO_TO_IQ caused by SW_IQS_MAX value change 21617995dfSBruce Richardson #endif 22617995dfSBruce Richardson #define PRIO_TO_IQ(prio) (prio >> 6) 23617995dfSBruce Richardson 24617995dfSBruce Richardson #define MAX_PER_IQ_DEQUEUE 48 25617995dfSBruce Richardson #define FLOWID_MASK (SW_QID_NUM_FIDS-1) 267904c82aSHarry van Haaren /* use cheap bit mixing, we only need to lose a few bits */ 277904c82aSHarry van Haaren #define SW_HASH_FLOWID(f) (((f) ^ (f >> 10)) & FLOWID_MASK) 28617995dfSBruce Richardson 29617995dfSBruce Richardson static inline uint32_t 30617995dfSBruce Richardson sw_schedule_atomic_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, 31617995dfSBruce Richardson uint32_t iq_num, unsigned int count) 32617995dfSBruce Richardson { 33617995dfSBruce Richardson struct rte_event qes[MAX_PER_IQ_DEQUEUE]; /* count <= MAX */ 34617995dfSBruce Richardson struct rte_event blocked_qes[MAX_PER_IQ_DEQUEUE]; 35617995dfSBruce Richardson uint32_t nb_blocked = 0; 36617995dfSBruce Richardson uint32_t i; 37617995dfSBruce Richardson 38617995dfSBruce Richardson if (count > MAX_PER_IQ_DEQUEUE) 39617995dfSBruce Richardson count = MAX_PER_IQ_DEQUEUE; 40617995dfSBruce Richardson 41617995dfSBruce Richardson /* This is the QID ID. The QID ID is static, hence it can be 42617995dfSBruce Richardson * used to identify the stage of processing in history lists etc 43617995dfSBruce Richardson */ 44617995dfSBruce Richardson uint32_t qid_id = qid->id; 45617995dfSBruce Richardson 46*dca926caSGage Eads iq_dequeue_burst(sw, &qid->iq[iq_num], qes, count); 47617995dfSBruce Richardson for (i = 0; i < count; i++) { 48617995dfSBruce Richardson const struct rte_event *qe = &qes[i]; 497904c82aSHarry van Haaren const uint16_t flow_id = SW_HASH_FLOWID(qes[i].flow_id); 50617995dfSBruce Richardson struct sw_fid_t *fid = &qid->fids[flow_id]; 51617995dfSBruce Richardson int cq = fid->cq; 52617995dfSBruce Richardson 53617995dfSBruce Richardson if (cq < 0) { 54617995dfSBruce Richardson uint32_t cq_idx = qid->cq_next_tx++; 55617995dfSBruce Richardson if (qid->cq_next_tx == qid->cq_num_mapped_cqs) 56617995dfSBruce Richardson qid->cq_next_tx = 0; 57617995dfSBruce Richardson cq = qid->cq_map[cq_idx]; 58617995dfSBruce Richardson 59617995dfSBruce Richardson /* find least used */ 60617995dfSBruce Richardson int cq_free_cnt = sw->cq_ring_space[cq]; 61617995dfSBruce Richardson for (cq_idx = 0; cq_idx < qid->cq_num_mapped_cqs; 62617995dfSBruce Richardson cq_idx++) { 63617995dfSBruce Richardson int test_cq = qid->cq_map[cq_idx]; 64617995dfSBruce Richardson int test_cq_free = sw->cq_ring_space[test_cq]; 65617995dfSBruce Richardson if (test_cq_free > cq_free_cnt) { 66617995dfSBruce Richardson cq = test_cq; 67617995dfSBruce Richardson cq_free_cnt = test_cq_free; 68617995dfSBruce Richardson } 69617995dfSBruce Richardson } 70617995dfSBruce Richardson 71617995dfSBruce Richardson fid->cq = cq; /* this pins early */ 72617995dfSBruce Richardson } 73617995dfSBruce Richardson 74617995dfSBruce Richardson if (sw->cq_ring_space[cq] == 0 || 75617995dfSBruce Richardson sw->ports[cq].inflights == SW_PORT_HIST_LIST) { 76617995dfSBruce Richardson blocked_qes[nb_blocked++] = *qe; 77617995dfSBruce Richardson continue; 78617995dfSBruce Richardson } 79617995dfSBruce Richardson 80617995dfSBruce Richardson struct sw_port *p = &sw->ports[cq]; 81617995dfSBruce Richardson 82617995dfSBruce Richardson /* at this point we can queue up the packet on the cq_buf */ 83617995dfSBruce Richardson fid->pcount++; 84617995dfSBruce Richardson p->cq_buf[p->cq_buf_count++] = *qe; 85617995dfSBruce Richardson p->inflights++; 86617995dfSBruce Richardson sw->cq_ring_space[cq]--; 87617995dfSBruce Richardson 88617995dfSBruce Richardson int head = (p->hist_head++ & (SW_PORT_HIST_LIST-1)); 89617995dfSBruce Richardson p->hist_list[head].fid = flow_id; 90617995dfSBruce Richardson p->hist_list[head].qid = qid_id; 91617995dfSBruce Richardson 92617995dfSBruce Richardson p->stats.tx_pkts++; 93617995dfSBruce Richardson qid->stats.tx_pkts++; 940e1eadd0SHarry van Haaren qid->to_port[cq]++; 95617995dfSBruce Richardson 96617995dfSBruce Richardson /* if we just filled in the last slot, flush the buffer */ 97617995dfSBruce Richardson if (sw->cq_ring_space[cq] == 0) { 9886aed50aSBruce Richardson struct rte_event_ring *worker = p->cq_worker_ring; 9986aed50aSBruce Richardson rte_event_ring_enqueue_burst(worker, p->cq_buf, 100617995dfSBruce Richardson p->cq_buf_count, 101617995dfSBruce Richardson &sw->cq_ring_space[cq]); 102617995dfSBruce Richardson p->cq_buf_count = 0; 103617995dfSBruce Richardson } 104617995dfSBruce Richardson } 105*dca926caSGage Eads iq_put_back(sw, &qid->iq[iq_num], blocked_qes, nb_blocked); 106617995dfSBruce Richardson 107617995dfSBruce Richardson return count - nb_blocked; 108617995dfSBruce Richardson } 109617995dfSBruce Richardson 110617995dfSBruce Richardson static inline uint32_t 111617995dfSBruce Richardson sw_schedule_parallel_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, 112617995dfSBruce Richardson uint32_t iq_num, unsigned int count, int keep_order) 113617995dfSBruce Richardson { 114617995dfSBruce Richardson uint32_t i; 115617995dfSBruce Richardson uint32_t cq_idx = qid->cq_next_tx; 116617995dfSBruce Richardson 117617995dfSBruce Richardson /* This is the QID ID. The QID ID is static, hence it can be 118617995dfSBruce Richardson * used to identify the stage of processing in history lists etc 119617995dfSBruce Richardson */ 120617995dfSBruce Richardson uint32_t qid_id = qid->id; 121617995dfSBruce Richardson 122617995dfSBruce Richardson if (count > MAX_PER_IQ_DEQUEUE) 123617995dfSBruce Richardson count = MAX_PER_IQ_DEQUEUE; 124617995dfSBruce Richardson 125617995dfSBruce Richardson if (keep_order) 126617995dfSBruce Richardson /* only schedule as many as we have reorder buffer entries */ 127617995dfSBruce Richardson count = RTE_MIN(count, 128617995dfSBruce Richardson rte_ring_count(qid->reorder_buffer_freelist)); 129617995dfSBruce Richardson 130617995dfSBruce Richardson for (i = 0; i < count; i++) { 131*dca926caSGage Eads const struct rte_event *qe = iq_peek(&qid->iq[iq_num]); 132617995dfSBruce Richardson uint32_t cq_check_count = 0; 133617995dfSBruce Richardson uint32_t cq; 134617995dfSBruce Richardson 135617995dfSBruce Richardson /* 136617995dfSBruce Richardson * for parallel, just send to next available CQ in round-robin 137617995dfSBruce Richardson * fashion. So scan for an available CQ. If all CQs are full 138617995dfSBruce Richardson * just return and move on to next QID 139617995dfSBruce Richardson */ 140617995dfSBruce Richardson do { 141617995dfSBruce Richardson if (++cq_check_count > qid->cq_num_mapped_cqs) 142617995dfSBruce Richardson goto exit; 143617995dfSBruce Richardson cq = qid->cq_map[cq_idx]; 144617995dfSBruce Richardson if (++cq_idx == qid->cq_num_mapped_cqs) 145617995dfSBruce Richardson cq_idx = 0; 14686aed50aSBruce Richardson } while (rte_event_ring_free_count( 14786aed50aSBruce Richardson sw->ports[cq].cq_worker_ring) == 0 || 148617995dfSBruce Richardson sw->ports[cq].inflights == SW_PORT_HIST_LIST); 149617995dfSBruce Richardson 150617995dfSBruce Richardson struct sw_port *p = &sw->ports[cq]; 151617995dfSBruce Richardson if (sw->cq_ring_space[cq] == 0 || 152617995dfSBruce Richardson p->inflights == SW_PORT_HIST_LIST) 153617995dfSBruce Richardson break; 154617995dfSBruce Richardson 155617995dfSBruce Richardson sw->cq_ring_space[cq]--; 156617995dfSBruce Richardson 157617995dfSBruce Richardson qid->stats.tx_pkts++; 158617995dfSBruce Richardson 159617995dfSBruce Richardson const int head = (p->hist_head & (SW_PORT_HIST_LIST-1)); 1607904c82aSHarry van Haaren p->hist_list[head].fid = SW_HASH_FLOWID(qe->flow_id); 161617995dfSBruce Richardson p->hist_list[head].qid = qid_id; 162617995dfSBruce Richardson 163617995dfSBruce Richardson if (keep_order) 164617995dfSBruce Richardson rte_ring_sc_dequeue(qid->reorder_buffer_freelist, 165617995dfSBruce Richardson (void *)&p->hist_list[head].rob_entry); 166617995dfSBruce Richardson 167617995dfSBruce Richardson sw->ports[cq].cq_buf[sw->ports[cq].cq_buf_count++] = *qe; 168*dca926caSGage Eads iq_pop(sw, &qid->iq[iq_num]); 169617995dfSBruce Richardson 170617995dfSBruce Richardson rte_compiler_barrier(); 171617995dfSBruce Richardson p->inflights++; 172617995dfSBruce Richardson p->stats.tx_pkts++; 173617995dfSBruce Richardson p->hist_head++; 174617995dfSBruce Richardson } 175617995dfSBruce Richardson exit: 176617995dfSBruce Richardson qid->cq_next_tx = cq_idx; 177617995dfSBruce Richardson return i; 178617995dfSBruce Richardson } 179617995dfSBruce Richardson 180617995dfSBruce Richardson static uint32_t 181617995dfSBruce Richardson sw_schedule_dir_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, 182617995dfSBruce Richardson uint32_t iq_num, unsigned int count __rte_unused) 183617995dfSBruce Richardson { 184617995dfSBruce Richardson uint32_t cq_id = qid->cq_map[0]; 185617995dfSBruce Richardson struct sw_port *port = &sw->ports[cq_id]; 186617995dfSBruce Richardson 187617995dfSBruce Richardson /* get max burst enq size for cq_ring */ 188617995dfSBruce Richardson uint32_t count_free = sw->cq_ring_space[cq_id]; 189617995dfSBruce Richardson if (count_free == 0) 190617995dfSBruce Richardson return 0; 191617995dfSBruce Richardson 192617995dfSBruce Richardson /* burst dequeue from the QID IQ ring */ 193*dca926caSGage Eads struct sw_iq *iq = &qid->iq[iq_num]; 194*dca926caSGage Eads uint32_t ret = iq_dequeue_burst(sw, iq, 195617995dfSBruce Richardson &port->cq_buf[port->cq_buf_count], count_free); 196617995dfSBruce Richardson port->cq_buf_count += ret; 197617995dfSBruce Richardson 198617995dfSBruce Richardson /* Update QID, Port and Total TX stats */ 199617995dfSBruce Richardson qid->stats.tx_pkts += ret; 200617995dfSBruce Richardson port->stats.tx_pkts += ret; 201617995dfSBruce Richardson 202617995dfSBruce Richardson /* Subtract credits from cached value */ 203617995dfSBruce Richardson sw->cq_ring_space[cq_id] -= ret; 204617995dfSBruce Richardson 205617995dfSBruce Richardson return ret; 206617995dfSBruce Richardson } 207617995dfSBruce Richardson 208617995dfSBruce Richardson static uint32_t 209617995dfSBruce Richardson sw_schedule_qid_to_cq(struct sw_evdev *sw) 210617995dfSBruce Richardson { 211617995dfSBruce Richardson uint32_t pkts = 0; 212617995dfSBruce Richardson uint32_t qid_idx; 213617995dfSBruce Richardson 214617995dfSBruce Richardson sw->sched_cq_qid_called++; 215617995dfSBruce Richardson 216617995dfSBruce Richardson for (qid_idx = 0; qid_idx < sw->qid_count; qid_idx++) { 217617995dfSBruce Richardson struct sw_qid *qid = sw->qids_prioritized[qid_idx]; 218617995dfSBruce Richardson 219617995dfSBruce Richardson int type = qid->type; 220617995dfSBruce Richardson int iq_num = PKT_MASK_TO_IQ(qid->iq_pkt_mask); 221617995dfSBruce Richardson 222617995dfSBruce Richardson /* zero mapped CQs indicates directed */ 223617995dfSBruce Richardson if (iq_num >= SW_IQS_MAX) 224617995dfSBruce Richardson continue; 225617995dfSBruce Richardson 226617995dfSBruce Richardson uint32_t pkts_done = 0; 227*dca926caSGage Eads uint32_t count = iq_count(&qid->iq[iq_num]); 228617995dfSBruce Richardson 229617995dfSBruce Richardson if (count > 0) { 230617995dfSBruce Richardson if (type == SW_SCHED_TYPE_DIRECT) 231617995dfSBruce Richardson pkts_done += sw_schedule_dir_to_cq(sw, qid, 232617995dfSBruce Richardson iq_num, count); 233617995dfSBruce Richardson else if (type == RTE_SCHED_TYPE_ATOMIC) 234617995dfSBruce Richardson pkts_done += sw_schedule_atomic_to_cq(sw, qid, 235617995dfSBruce Richardson iq_num, count); 236617995dfSBruce Richardson else 237617995dfSBruce Richardson pkts_done += sw_schedule_parallel_to_cq(sw, qid, 238617995dfSBruce Richardson iq_num, count, 239617995dfSBruce Richardson type == RTE_SCHED_TYPE_ORDERED); 240617995dfSBruce Richardson } 241617995dfSBruce Richardson 242617995dfSBruce Richardson /* Check if the IQ that was polled is now empty, and unset it 243617995dfSBruce Richardson * in the IQ mask if its empty. 244617995dfSBruce Richardson */ 245617995dfSBruce Richardson int all_done = (pkts_done == count); 246617995dfSBruce Richardson 247617995dfSBruce Richardson qid->iq_pkt_mask &= ~(all_done << (iq_num)); 248617995dfSBruce Richardson pkts += pkts_done; 249617995dfSBruce Richardson } 250617995dfSBruce Richardson 251617995dfSBruce Richardson return pkts; 252617995dfSBruce Richardson } 253617995dfSBruce Richardson 254617995dfSBruce Richardson /* This function will perform re-ordering of packets, and injecting into 255617995dfSBruce Richardson * the appropriate QID IQ. As LB and DIR QIDs are in the same array, but *NOT* 256617995dfSBruce Richardson * contiguous in that array, this function accepts a "range" of QIDs to scan. 257617995dfSBruce Richardson */ 258617995dfSBruce Richardson static uint16_t 259617995dfSBruce Richardson sw_schedule_reorder(struct sw_evdev *sw, int qid_start, int qid_end) 260617995dfSBruce Richardson { 261617995dfSBruce Richardson /* Perform egress reordering */ 262617995dfSBruce Richardson struct rte_event *qe; 263617995dfSBruce Richardson uint32_t pkts_iter = 0; 264617995dfSBruce Richardson 265617995dfSBruce Richardson for (; qid_start < qid_end; qid_start++) { 266617995dfSBruce Richardson struct sw_qid *qid = &sw->qids[qid_start]; 267617995dfSBruce Richardson int i, num_entries_in_use; 268617995dfSBruce Richardson 269617995dfSBruce Richardson if (qid->type != RTE_SCHED_TYPE_ORDERED) 270617995dfSBruce Richardson continue; 271617995dfSBruce Richardson 272617995dfSBruce Richardson num_entries_in_use = rte_ring_free_count( 273617995dfSBruce Richardson qid->reorder_buffer_freelist); 274617995dfSBruce Richardson 275617995dfSBruce Richardson for (i = 0; i < num_entries_in_use; i++) { 276617995dfSBruce Richardson struct reorder_buffer_entry *entry; 277617995dfSBruce Richardson int j; 278617995dfSBruce Richardson 279617995dfSBruce Richardson entry = &qid->reorder_buffer[qid->reorder_buffer_index]; 280617995dfSBruce Richardson 281617995dfSBruce Richardson if (!entry->ready) 282617995dfSBruce Richardson break; 283617995dfSBruce Richardson 284617995dfSBruce Richardson for (j = 0; j < entry->num_fragments; j++) { 285617995dfSBruce Richardson uint16_t dest_qid; 286617995dfSBruce Richardson uint16_t dest_iq; 287617995dfSBruce Richardson 288617995dfSBruce Richardson int idx = entry->fragment_index + j; 289617995dfSBruce Richardson qe = &entry->fragments[idx]; 290617995dfSBruce Richardson 291617995dfSBruce Richardson dest_qid = qe->queue_id; 292617995dfSBruce Richardson dest_iq = PRIO_TO_IQ(qe->priority); 293617995dfSBruce Richardson 294617995dfSBruce Richardson if (dest_qid >= sw->qid_count) { 295617995dfSBruce Richardson sw->stats.rx_dropped++; 296617995dfSBruce Richardson continue; 297617995dfSBruce Richardson } 298617995dfSBruce Richardson 299617995dfSBruce Richardson pkts_iter++; 300617995dfSBruce Richardson 301617995dfSBruce Richardson struct sw_qid *q = &sw->qids[dest_qid]; 302*dca926caSGage Eads struct sw_iq *iq = &q->iq[dest_iq]; 303617995dfSBruce Richardson 304617995dfSBruce Richardson /* we checked for space above, so enqueue must 305617995dfSBruce Richardson * succeed 306617995dfSBruce Richardson */ 307*dca926caSGage Eads iq_enqueue(sw, iq, qe); 308617995dfSBruce Richardson q->iq_pkt_mask |= (1 << (dest_iq)); 309617995dfSBruce Richardson q->iq_pkt_count[dest_iq]++; 310617995dfSBruce Richardson q->stats.rx_pkts++; 311617995dfSBruce Richardson } 312617995dfSBruce Richardson 313617995dfSBruce Richardson entry->ready = (j != entry->num_fragments); 314617995dfSBruce Richardson entry->num_fragments -= j; 315617995dfSBruce Richardson entry->fragment_index += j; 316617995dfSBruce Richardson 317617995dfSBruce Richardson if (!entry->ready) { 318617995dfSBruce Richardson entry->fragment_index = 0; 319617995dfSBruce Richardson 320617995dfSBruce Richardson rte_ring_sp_enqueue( 321617995dfSBruce Richardson qid->reorder_buffer_freelist, 322617995dfSBruce Richardson entry); 323617995dfSBruce Richardson 324617995dfSBruce Richardson qid->reorder_buffer_index++; 325617995dfSBruce Richardson qid->reorder_buffer_index %= qid->window_size; 326617995dfSBruce Richardson } 327617995dfSBruce Richardson } 328617995dfSBruce Richardson } 329617995dfSBruce Richardson return pkts_iter; 330617995dfSBruce Richardson } 331617995dfSBruce Richardson 332c0583d98SJerin Jacob static __rte_always_inline void 333617995dfSBruce Richardson sw_refill_pp_buf(struct sw_evdev *sw, struct sw_port *port) 334617995dfSBruce Richardson { 335617995dfSBruce Richardson RTE_SET_USED(sw); 33686aed50aSBruce Richardson struct rte_event_ring *worker = port->rx_worker_ring; 337617995dfSBruce Richardson port->pp_buf_start = 0; 33886aed50aSBruce Richardson port->pp_buf_count = rte_event_ring_dequeue_burst(worker, port->pp_buf, 33986aed50aSBruce Richardson RTE_DIM(port->pp_buf), NULL); 340617995dfSBruce Richardson } 341617995dfSBruce Richardson 342c0583d98SJerin Jacob static __rte_always_inline uint32_t 343617995dfSBruce Richardson __pull_port_lb(struct sw_evdev *sw, uint32_t port_id, int allow_reorder) 344617995dfSBruce Richardson { 345fa865c01SFerruh Yigit static struct reorder_buffer_entry dummy_rob; 346617995dfSBruce Richardson uint32_t pkts_iter = 0; 347617995dfSBruce Richardson struct sw_port *port = &sw->ports[port_id]; 348617995dfSBruce Richardson 349617995dfSBruce Richardson /* If shadow ring has 0 pkts, pull from worker ring */ 350617995dfSBruce Richardson if (port->pp_buf_count == 0) 351617995dfSBruce Richardson sw_refill_pp_buf(sw, port); 352617995dfSBruce Richardson 353617995dfSBruce Richardson while (port->pp_buf_count) { 354617995dfSBruce Richardson const struct rte_event *qe = &port->pp_buf[port->pp_buf_start]; 355617995dfSBruce Richardson struct sw_hist_list_entry *hist_entry = NULL; 356617995dfSBruce Richardson uint8_t flags = qe->op; 357617995dfSBruce Richardson const uint16_t eop = !(flags & QE_FLAG_NOT_EOP); 358617995dfSBruce Richardson int needs_reorder = 0; 359617995dfSBruce Richardson /* if no-reordering, having PARTIAL == NEW */ 360617995dfSBruce Richardson if (!allow_reorder && !eop) 361617995dfSBruce Richardson flags = QE_FLAG_VALID; 362617995dfSBruce Richardson 363617995dfSBruce Richardson /* 364617995dfSBruce Richardson * if we don't have space for this packet in an IQ, 365617995dfSBruce Richardson * then move on to next queue. Technically, for a 366617995dfSBruce Richardson * packet that needs reordering, we don't need to check 367617995dfSBruce Richardson * here, but it simplifies things not to special-case 368617995dfSBruce Richardson */ 369617995dfSBruce Richardson uint32_t iq_num = PRIO_TO_IQ(qe->priority); 370617995dfSBruce Richardson struct sw_qid *qid = &sw->qids[qe->queue_id]; 371617995dfSBruce Richardson 372617995dfSBruce Richardson /* now process based on flags. Note that for directed 373617995dfSBruce Richardson * queues, the enqueue_flush masks off all but the 374617995dfSBruce Richardson * valid flag. This makes FWD and PARTIAL enqueues just 375617995dfSBruce Richardson * NEW type, and makes DROPS no-op calls. 376617995dfSBruce Richardson */ 377617995dfSBruce Richardson if ((flags & QE_FLAG_COMPLETE) && port->inflights > 0) { 378617995dfSBruce Richardson const uint32_t hist_tail = port->hist_tail & 379617995dfSBruce Richardson (SW_PORT_HIST_LIST - 1); 380617995dfSBruce Richardson 381617995dfSBruce Richardson hist_entry = &port->hist_list[hist_tail]; 382617995dfSBruce Richardson const uint32_t hist_qid = hist_entry->qid; 383617995dfSBruce Richardson const uint32_t hist_fid = hist_entry->fid; 384617995dfSBruce Richardson 385617995dfSBruce Richardson struct sw_fid_t *fid = 386617995dfSBruce Richardson &sw->qids[hist_qid].fids[hist_fid]; 387617995dfSBruce Richardson fid->pcount -= eop; 388617995dfSBruce Richardson if (fid->pcount == 0) 389617995dfSBruce Richardson fid->cq = -1; 390617995dfSBruce Richardson 391617995dfSBruce Richardson if (allow_reorder) { 392617995dfSBruce Richardson /* set reorder ready if an ordered QID */ 393617995dfSBruce Richardson uintptr_t rob_ptr = 394617995dfSBruce Richardson (uintptr_t)hist_entry->rob_entry; 395617995dfSBruce Richardson const uintptr_t valid = (rob_ptr != 0); 396617995dfSBruce Richardson needs_reorder = valid; 397617995dfSBruce Richardson rob_ptr |= 398617995dfSBruce Richardson ((valid - 1) & (uintptr_t)&dummy_rob); 399617995dfSBruce Richardson struct reorder_buffer_entry *tmp_rob_ptr = 400617995dfSBruce Richardson (struct reorder_buffer_entry *)rob_ptr; 401617995dfSBruce Richardson tmp_rob_ptr->ready = eop * needs_reorder; 402617995dfSBruce Richardson } 403617995dfSBruce Richardson 404617995dfSBruce Richardson port->inflights -= eop; 405617995dfSBruce Richardson port->hist_tail += eop; 406617995dfSBruce Richardson } 407617995dfSBruce Richardson if (flags & QE_FLAG_VALID) { 408617995dfSBruce Richardson port->stats.rx_pkts++; 409617995dfSBruce Richardson 410617995dfSBruce Richardson if (allow_reorder && needs_reorder) { 411617995dfSBruce Richardson struct reorder_buffer_entry *rob_entry = 412617995dfSBruce Richardson hist_entry->rob_entry; 413617995dfSBruce Richardson 4142e516d18SBruce Richardson hist_entry->rob_entry = NULL; 415617995dfSBruce Richardson /* Although fragmentation not currently 416617995dfSBruce Richardson * supported by eventdev API, we support it 417617995dfSBruce Richardson * here. Open: How do we alert the user that 418617995dfSBruce Richardson * they've exceeded max frags? 419617995dfSBruce Richardson */ 420617995dfSBruce Richardson int num_frag = rob_entry->num_fragments; 421617995dfSBruce Richardson if (num_frag == SW_FRAGMENTS_MAX) 422617995dfSBruce Richardson sw->stats.rx_dropped++; 423617995dfSBruce Richardson else { 424617995dfSBruce Richardson int idx = rob_entry->num_fragments++; 425617995dfSBruce Richardson rob_entry->fragments[idx] = *qe; 426617995dfSBruce Richardson } 427617995dfSBruce Richardson goto end_qe; 428617995dfSBruce Richardson } 429617995dfSBruce Richardson 430617995dfSBruce Richardson /* Use the iq_num from above to push the QE 431617995dfSBruce Richardson * into the qid at the right priority 432617995dfSBruce Richardson */ 433617995dfSBruce Richardson 434617995dfSBruce Richardson qid->iq_pkt_mask |= (1 << (iq_num)); 435*dca926caSGage Eads iq_enqueue(sw, &qid->iq[iq_num], qe); 436617995dfSBruce Richardson qid->iq_pkt_count[iq_num]++; 437617995dfSBruce Richardson qid->stats.rx_pkts++; 438617995dfSBruce Richardson pkts_iter++; 439617995dfSBruce Richardson } 440617995dfSBruce Richardson 441617995dfSBruce Richardson end_qe: 442617995dfSBruce Richardson port->pp_buf_start++; 443617995dfSBruce Richardson port->pp_buf_count--; 444617995dfSBruce Richardson } /* while (avail_qes) */ 445617995dfSBruce Richardson 446617995dfSBruce Richardson return pkts_iter; 447617995dfSBruce Richardson } 448617995dfSBruce Richardson 449617995dfSBruce Richardson static uint32_t 450617995dfSBruce Richardson sw_schedule_pull_port_lb(struct sw_evdev *sw, uint32_t port_id) 451617995dfSBruce Richardson { 452617995dfSBruce Richardson return __pull_port_lb(sw, port_id, 1); 453617995dfSBruce Richardson } 454617995dfSBruce Richardson 455617995dfSBruce Richardson static uint32_t 456617995dfSBruce Richardson sw_schedule_pull_port_no_reorder(struct sw_evdev *sw, uint32_t port_id) 457617995dfSBruce Richardson { 458617995dfSBruce Richardson return __pull_port_lb(sw, port_id, 0); 459617995dfSBruce Richardson } 460617995dfSBruce Richardson 461617995dfSBruce Richardson static uint32_t 462617995dfSBruce Richardson sw_schedule_pull_port_dir(struct sw_evdev *sw, uint32_t port_id) 463617995dfSBruce Richardson { 464617995dfSBruce Richardson uint32_t pkts_iter = 0; 465617995dfSBruce Richardson struct sw_port *port = &sw->ports[port_id]; 466617995dfSBruce Richardson 467617995dfSBruce Richardson /* If shadow ring has 0 pkts, pull from worker ring */ 468617995dfSBruce Richardson if (port->pp_buf_count == 0) 469617995dfSBruce Richardson sw_refill_pp_buf(sw, port); 470617995dfSBruce Richardson 471617995dfSBruce Richardson while (port->pp_buf_count) { 472617995dfSBruce Richardson const struct rte_event *qe = &port->pp_buf[port->pp_buf_start]; 473617995dfSBruce Richardson uint8_t flags = qe->op; 474617995dfSBruce Richardson 475617995dfSBruce Richardson if ((flags & QE_FLAG_VALID) == 0) 476617995dfSBruce Richardson goto end_qe; 477617995dfSBruce Richardson 478617995dfSBruce Richardson uint32_t iq_num = PRIO_TO_IQ(qe->priority); 479617995dfSBruce Richardson struct sw_qid *qid = &sw->qids[qe->queue_id]; 480*dca926caSGage Eads struct sw_iq *iq = &qid->iq[iq_num]; 481617995dfSBruce Richardson 482617995dfSBruce Richardson port->stats.rx_pkts++; 483617995dfSBruce Richardson 484617995dfSBruce Richardson /* Use the iq_num from above to push the QE 485617995dfSBruce Richardson * into the qid at the right priority 486617995dfSBruce Richardson */ 487617995dfSBruce Richardson qid->iq_pkt_mask |= (1 << (iq_num)); 488*dca926caSGage Eads iq_enqueue(sw, iq, qe); 489617995dfSBruce Richardson qid->iq_pkt_count[iq_num]++; 490617995dfSBruce Richardson qid->stats.rx_pkts++; 491617995dfSBruce Richardson pkts_iter++; 492617995dfSBruce Richardson 493617995dfSBruce Richardson end_qe: 494617995dfSBruce Richardson port->pp_buf_start++; 495617995dfSBruce Richardson port->pp_buf_count--; 496617995dfSBruce Richardson } /* while port->pp_buf_count */ 497617995dfSBruce Richardson 498617995dfSBruce Richardson return pkts_iter; 499617995dfSBruce Richardson } 500617995dfSBruce Richardson 501617995dfSBruce Richardson void 502617995dfSBruce Richardson sw_event_schedule(struct rte_eventdev *dev) 503617995dfSBruce Richardson { 504617995dfSBruce Richardson struct sw_evdev *sw = sw_pmd_priv(dev); 505617995dfSBruce Richardson uint32_t in_pkts, out_pkts; 506617995dfSBruce Richardson uint32_t out_pkts_total = 0, in_pkts_total = 0; 507617995dfSBruce Richardson int32_t sched_quanta = sw->sched_quanta; 508617995dfSBruce Richardson uint32_t i; 509617995dfSBruce Richardson 510617995dfSBruce Richardson sw->sched_called++; 511617995dfSBruce Richardson if (!sw->started) 512617995dfSBruce Richardson return; 513617995dfSBruce Richardson 514617995dfSBruce Richardson do { 515617995dfSBruce Richardson uint32_t in_pkts_this_iteration = 0; 516617995dfSBruce Richardson 517617995dfSBruce Richardson /* Pull from rx_ring for ports */ 518617995dfSBruce Richardson do { 519617995dfSBruce Richardson in_pkts = 0; 520617995dfSBruce Richardson for (i = 0; i < sw->port_count; i++) 521617995dfSBruce Richardson if (sw->ports[i].is_directed) 522617995dfSBruce Richardson in_pkts += sw_schedule_pull_port_dir(sw, i); 523617995dfSBruce Richardson else if (sw->ports[i].num_ordered_qids > 0) 524617995dfSBruce Richardson in_pkts += sw_schedule_pull_port_lb(sw, i); 525617995dfSBruce Richardson else 526617995dfSBruce Richardson in_pkts += sw_schedule_pull_port_no_reorder(sw, i); 527617995dfSBruce Richardson 528617995dfSBruce Richardson /* QID scan for re-ordered */ 529617995dfSBruce Richardson in_pkts += sw_schedule_reorder(sw, 0, 530617995dfSBruce Richardson sw->qid_count); 531617995dfSBruce Richardson in_pkts_this_iteration += in_pkts; 532617995dfSBruce Richardson } while (in_pkts > 4 && 533617995dfSBruce Richardson (int)in_pkts_this_iteration < sched_quanta); 534617995dfSBruce Richardson 535617995dfSBruce Richardson out_pkts = 0; 536617995dfSBruce Richardson out_pkts += sw_schedule_qid_to_cq(sw); 537617995dfSBruce Richardson out_pkts_total += out_pkts; 538617995dfSBruce Richardson in_pkts_total += in_pkts_this_iteration; 539617995dfSBruce Richardson 540617995dfSBruce Richardson if (in_pkts == 0 && out_pkts == 0) 541617995dfSBruce Richardson break; 542617995dfSBruce Richardson } while ((int)out_pkts_total < sched_quanta); 543617995dfSBruce Richardson 544617995dfSBruce Richardson /* push all the internal buffered QEs in port->cq_ring to the 545617995dfSBruce Richardson * worker cores: aka, do the ring transfers batched. 546617995dfSBruce Richardson */ 547617995dfSBruce Richardson for (i = 0; i < sw->port_count; i++) { 54886aed50aSBruce Richardson struct rte_event_ring *worker = sw->ports[i].cq_worker_ring; 54986aed50aSBruce Richardson rte_event_ring_enqueue_burst(worker, sw->ports[i].cq_buf, 550617995dfSBruce Richardson sw->ports[i].cq_buf_count, 551617995dfSBruce Richardson &sw->cq_ring_space[i]); 552617995dfSBruce Richardson sw->ports[i].cq_buf_count = 0; 553617995dfSBruce Richardson } 554617995dfSBruce Richardson 555617995dfSBruce Richardson sw->stats.tx_pkts += out_pkts_total; 556617995dfSBruce Richardson sw->stats.rx_pkts += in_pkts_total; 557617995dfSBruce Richardson 558617995dfSBruce Richardson sw->sched_no_iq_enqueues += (in_pkts_total == 0); 559617995dfSBruce Richardson sw->sched_no_cq_enqueues += (out_pkts_total == 0); 560617995dfSBruce Richardson 561617995dfSBruce Richardson } 562