1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <errno.h> 39 40 #include <tmmintrin.h> 41 42 #include <rte_cycles.h> 43 #include <rte_memory.h> 44 #include <rte_memzone.h> 45 #include <rte_branch_prediction.h> 46 #include <rte_mempool.h> 47 #include <rte_malloc.h> 48 #include <rte_mbuf.h> 49 #include <rte_ether.h> 50 #include <rte_ethdev.h> 51 #include <rte_prefetch.h> 52 #include <rte_string_fns.h> 53 #include <rte_errno.h> 54 #include <rte_byteorder.h> 55 56 #include "virtio_logs.h" 57 #include "virtio_ethdev.h" 58 #include "virtqueue.h" 59 #include "virtio_rxtx.h" 60 61 #define RTE_VIRTIO_VPMD_RX_BURST 32 62 #define RTE_VIRTIO_DESC_PER_LOOP 8 63 #define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST 64 65 #ifndef __INTEL_COMPILER 66 #pragma GCC diagnostic ignored "-Wcast-qual" 67 #endif 68 69 int __attribute__((cold)) 70 virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, 71 struct rte_mbuf *cookie) 72 { 73 struct vq_desc_extra *dxp; 74 struct vring_desc *start_dp; 75 uint16_t desc_idx; 76 77 desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); 78 dxp = &vq->vq_descx[desc_idx]; 79 dxp->cookie = (void *)cookie; 80 vq->sw_ring[desc_idx] = cookie; 81 82 start_dp = vq->vq_ring.desc; 83 start_dp[desc_idx].addr = (uint64_t)((uintptr_t)cookie->buf_physaddr + 84 RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size); 85 start_dp[desc_idx].len = cookie->buf_len - 86 RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; 87 88 vq->vq_free_cnt--; 89 vq->vq_avail_idx++; 90 91 return 0; 92 } 93 94 static inline void 95 virtio_rxq_rearm_vec(struct virtqueue *rxvq) 96 { 97 int i; 98 uint16_t desc_idx; 99 struct rte_mbuf **sw_ring; 100 struct vring_desc *start_dp; 101 int ret; 102 103 desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1); 104 sw_ring = &rxvq->sw_ring[desc_idx]; 105 start_dp = &rxvq->vq_ring.desc[desc_idx]; 106 107 ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring, 108 RTE_VIRTIO_VPMD_RX_REARM_THRESH); 109 if (unlikely(ret)) { 110 rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed += 111 RTE_VIRTIO_VPMD_RX_REARM_THRESH; 112 return; 113 } 114 115 for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) { 116 uintptr_t p; 117 118 p = (uintptr_t)&sw_ring[i]->rearm_data; 119 *(uint64_t *)p = rxvq->mbuf_initializer; 120 121 start_dp[i].addr = 122 (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr + 123 RTE_PKTMBUF_HEADROOM - rxvq->hw->vtnet_hdr_size); 124 start_dp[i].len = sw_ring[i]->buf_len - 125 RTE_PKTMBUF_HEADROOM + rxvq->hw->vtnet_hdr_size; 126 } 127 128 rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; 129 rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; 130 vq_update_avail_idx(rxvq); 131 } 132 133 /* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) 134 * 135 * This routine is for non-mergeable RX, one desc for each guest buffer. 136 * This routine is based on the RX ring layout optimization. Each entry in the 137 * avail ring points to the desc with the same index in the desc ring and this 138 * will never be changed in the driver. 139 * 140 * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet 141 */ 142 uint16_t 143 virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 144 uint16_t nb_pkts) 145 { 146 struct virtqueue *rxvq = rx_queue; 147 uint16_t nb_used; 148 uint16_t desc_idx; 149 struct vring_used_elem *rused; 150 struct rte_mbuf **sw_ring; 151 struct rte_mbuf **sw_ring_end; 152 uint16_t nb_pkts_received; 153 __m128i shuf_msk1, shuf_msk2, len_adjust; 154 155 shuf_msk1 = _mm_set_epi8( 156 0xFF, 0xFF, 0xFF, 0xFF, 157 0xFF, 0xFF, /* vlan tci */ 158 5, 4, /* dat len */ 159 0xFF, 0xFF, 5, 4, /* pkt len */ 160 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ 161 162 ); 163 164 shuf_msk2 = _mm_set_epi8( 165 0xFF, 0xFF, 0xFF, 0xFF, 166 0xFF, 0xFF, /* vlan tci */ 167 13, 12, /* dat len */ 168 0xFF, 0xFF, 13, 12, /* pkt len */ 169 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ 170 ); 171 172 /* Subtract the header length. 173 * In which case do we need the header length in used->len ? 174 */ 175 len_adjust = _mm_set_epi16( 176 0, 0, 177 0, 178 (uint16_t)-rxvq->hw->vtnet_hdr_size, 179 0, (uint16_t)-rxvq->hw->vtnet_hdr_size, 180 0, 0); 181 182 if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) 183 return 0; 184 185 nb_used = *(volatile uint16_t *)&rxvq->vq_ring.used->idx - 186 rxvq->vq_used_cons_idx; 187 188 rte_compiler_barrier(); 189 190 if (unlikely(nb_used == 0)) 191 return 0; 192 193 nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); 194 nb_used = RTE_MIN(nb_used, nb_pkts); 195 196 desc_idx = (uint16_t)(rxvq->vq_used_cons_idx & (rxvq->vq_nentries - 1)); 197 rused = &rxvq->vq_ring.used->ring[desc_idx]; 198 sw_ring = &rxvq->sw_ring[desc_idx]; 199 sw_ring_end = &rxvq->sw_ring[rxvq->vq_nentries]; 200 201 _mm_prefetch((const void *)rused, _MM_HINT_T0); 202 203 if (rxvq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { 204 virtio_rxq_rearm_vec(rxvq); 205 if (unlikely(virtqueue_kick_prepare(rxvq))) 206 virtqueue_notify(rxvq); 207 } 208 209 for (nb_pkts_received = 0; 210 nb_pkts_received < nb_used;) { 211 __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; 212 __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; 213 __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; 214 215 mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0)); 216 desc[0] = _mm_loadu_si128((__m128i *)(rused + 0)); 217 _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]); 218 219 mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2)); 220 desc[1] = _mm_loadu_si128((__m128i *)(rused + 2)); 221 _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]); 222 223 mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4)); 224 desc[2] = _mm_loadu_si128((__m128i *)(rused + 4)); 225 _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]); 226 227 mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6)); 228 desc[3] = _mm_loadu_si128((__m128i *)(rused + 6)); 229 _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]); 230 231 pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2); 232 pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1); 233 pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust); 234 pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust); 235 _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1, 236 pkt_mb[1]); 237 _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1, 238 pkt_mb[0]); 239 240 pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2); 241 pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1); 242 pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust); 243 pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust); 244 _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1, 245 pkt_mb[3]); 246 _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1, 247 pkt_mb[2]); 248 249 pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2); 250 pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1); 251 pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust); 252 pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust); 253 _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1, 254 pkt_mb[5]); 255 _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1, 256 pkt_mb[4]); 257 258 pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2); 259 pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1); 260 pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust); 261 pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust); 262 _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1, 263 pkt_mb[7]); 264 _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1, 265 pkt_mb[6]); 266 267 if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { 268 if (sw_ring + nb_used <= sw_ring_end) 269 nb_pkts_received += nb_used; 270 else 271 nb_pkts_received += sw_ring_end - sw_ring; 272 break; 273 } else { 274 if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= 275 sw_ring_end)) { 276 nb_pkts_received += sw_ring_end - sw_ring; 277 break; 278 } else { 279 nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; 280 281 rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; 282 sw_ring += RTE_VIRTIO_DESC_PER_LOOP; 283 rused += RTE_VIRTIO_DESC_PER_LOOP; 284 nb_used -= RTE_VIRTIO_DESC_PER_LOOP; 285 } 286 } 287 } 288 289 rxvq->vq_used_cons_idx += nb_pkts_received; 290 rxvq->vq_free_cnt += nb_pkts_received; 291 rxvq->packets += nb_pkts_received; 292 return nb_pkts_received; 293 } 294 295 #define VIRTIO_TX_FREE_THRESH 32 296 #define VIRTIO_TX_MAX_FREE_BUF_SZ 32 297 #define VIRTIO_TX_FREE_NR 32 298 /* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */ 299 static inline void 300 virtio_xmit_cleanup(struct virtqueue *vq) 301 { 302 uint16_t i, desc_idx; 303 int nb_free = 0; 304 struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; 305 306 desc_idx = (uint16_t)(vq->vq_used_cons_idx & 307 ((vq->vq_nentries >> 1) - 1)); 308 m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; 309 m = __rte_pktmbuf_prefree_seg(m); 310 if (likely(m != NULL)) { 311 free[0] = m; 312 nb_free = 1; 313 for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { 314 m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; 315 m = __rte_pktmbuf_prefree_seg(m); 316 if (likely(m != NULL)) { 317 if (likely(m->pool == free[0]->pool)) 318 free[nb_free++] = m; 319 else { 320 rte_mempool_put_bulk(free[0]->pool, 321 (void **)free, nb_free); 322 free[0] = m; 323 nb_free = 1; 324 } 325 } 326 } 327 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); 328 } else { 329 for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { 330 m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; 331 m = __rte_pktmbuf_prefree_seg(m); 332 if (m != NULL) 333 rte_mempool_put(m->pool, m); 334 } 335 } 336 337 vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; 338 vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); 339 } 340 341 uint16_t 342 virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, 343 uint16_t nb_pkts) 344 { 345 struct virtqueue *txvq = tx_queue; 346 uint16_t nb_used; 347 uint16_t desc_idx; 348 struct vring_desc *start_dp; 349 uint16_t nb_tail, nb_commit; 350 int i; 351 uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1; 352 353 nb_used = VIRTQUEUE_NUSED(txvq); 354 rte_compiler_barrier(); 355 356 if (nb_used >= VIRTIO_TX_FREE_THRESH) 357 virtio_xmit_cleanup(tx_queue); 358 359 nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts); 360 desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max); 361 start_dp = txvq->vq_ring.desc; 362 nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx); 363 364 if (nb_commit >= nb_tail) { 365 for (i = 0; i < nb_tail; i++) 366 txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; 367 for (i = 0; i < nb_tail; i++) { 368 start_dp[desc_idx].addr = 369 rte_mbuf_data_dma_addr(*tx_pkts); 370 start_dp[desc_idx].len = (*tx_pkts)->pkt_len; 371 tx_pkts++; 372 desc_idx++; 373 } 374 nb_commit -= nb_tail; 375 desc_idx = 0; 376 } 377 for (i = 0; i < nb_commit; i++) 378 txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; 379 for (i = 0; i < nb_commit; i++) { 380 start_dp[desc_idx].addr = rte_mbuf_data_dma_addr(*tx_pkts); 381 start_dp[desc_idx].len = (*tx_pkts)->pkt_len; 382 tx_pkts++; 383 desc_idx++; 384 } 385 386 rte_compiler_barrier(); 387 388 txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1); 389 txvq->vq_avail_idx += nb_pkts; 390 txvq->vq_ring.avail->idx = txvq->vq_avail_idx; 391 txvq->packets += nb_pkts; 392 393 if (likely(nb_pkts)) { 394 if (unlikely(virtqueue_kick_prepare(txvq))) 395 virtqueue_notify(txvq); 396 } 397 398 return nb_pkts; 399 } 400 401 int __attribute__((cold)) 402 virtio_rxq_vec_setup(struct virtqueue *rxq) 403 { 404 uintptr_t p; 405 struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ 406 407 mb_def.nb_segs = 1; 408 mb_def.data_off = RTE_PKTMBUF_HEADROOM; 409 mb_def.port = rxq->port_id; 410 rte_mbuf_refcnt_set(&mb_def, 1); 411 412 /* prevent compiler reordering: rearm_data covers previous fields */ 413 rte_compiler_barrier(); 414 p = (uintptr_t)&mb_def.rearm_data; 415 rxq->mbuf_initializer = *(uint64_t *)p; 416 417 return 0; 418 } 419