1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2017 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_vhost.h> 12 13 #include "main.h" 14 15 /* 16 * A very simple vhost-user net driver implementation, without 17 * any extra features being enabled, such as TSO and mrg-Rx. 18 */ 19 20 void 21 vs_vhost_net_setup(struct vhost_dev *dev) 22 { 23 uint16_t i; 24 int vid = dev->vid; 25 struct vhost_queue *queue; 26 int ret; 27 28 RTE_LOG(INFO, VHOST_CONFIG, 29 "setting builtin vhost-user net driver\n"); 30 31 rte_vhost_get_negotiated_features(vid, &dev->features); 32 if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) 33 dev->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 34 else 35 dev->hdr_len = sizeof(struct virtio_net_hdr); 36 37 ret = rte_vhost_get_mem_table(vid, &dev->mem); 38 if (ret < 0) { 39 RTE_LOG(ERR, VHOST_CONFIG, "Failed to get " 40 "VM memory layout for device(%d)\n", vid); 41 return; 42 } 43 44 dev->nr_vrings = rte_vhost_get_vring_num(vid); 45 for (i = 0; i < dev->nr_vrings; i++) { 46 queue = &dev->queues[i]; 47 48 queue->last_used_idx = 0; 49 queue->last_avail_idx = 0; 50 rte_vhost_get_vhost_vring(vid, i, &queue->vr); 51 } 52 } 53 54 void 55 vs_vhost_net_remove(struct vhost_dev *dev) 56 { 57 free(dev->mem); 58 } 59 60 static __rte_always_inline int 61 enqueue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr, 62 struct rte_mbuf *m, uint16_t desc_idx) 63 { 64 uint32_t desc_avail, desc_offset; 65 uint64_t desc_chunck_len; 66 uint32_t mbuf_avail, mbuf_offset; 67 uint32_t cpy_len; 68 struct vring_desc *desc; 69 uint64_t desc_addr, desc_gaddr; 70 struct virtio_net_hdr virtio_hdr = {0, 0, 0, 0, 0, 0}; 71 /* A counter to avoid desc dead loop chain */ 72 uint16_t nr_desc = 1; 73 74 desc = &vr->desc[desc_idx]; 75 desc_chunck_len = desc->len; 76 desc_gaddr = desc->addr; 77 desc_addr = rte_vhost_va_from_guest_pa( 78 dev->mem, desc_gaddr, &desc_chunck_len); 79 /* 80 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid 81 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which 82 * otherwise stores offset on the stack instead of in a register. 83 */ 84 if (unlikely(desc->len < dev->hdr_len) || !desc_addr) 85 return -1; 86 87 rte_prefetch0((void *)(uintptr_t)desc_addr); 88 89 /* write virtio-net header */ 90 if (likely(desc_chunck_len >= dev->hdr_len)) { 91 *(struct virtio_net_hdr *)(uintptr_t)desc_addr = virtio_hdr; 92 desc_offset = dev->hdr_len; 93 } else { 94 uint64_t len; 95 uint64_t remain = dev->hdr_len; 96 uint64_t src = (uint64_t)(uintptr_t)&virtio_hdr, dst; 97 uint64_t guest_addr = desc_gaddr; 98 99 while (remain) { 100 len = remain; 101 dst = rte_vhost_va_from_guest_pa(dev->mem, 102 guest_addr, &len); 103 if (unlikely(!dst || !len)) 104 return -1; 105 106 rte_memcpy((void *)(uintptr_t)dst, 107 (void *)(uintptr_t)src, 108 len); 109 110 remain -= len; 111 guest_addr += len; 112 src += len; 113 } 114 115 desc_chunck_len = desc->len - dev->hdr_len; 116 desc_gaddr += dev->hdr_len; 117 desc_addr = rte_vhost_va_from_guest_pa( 118 dev->mem, desc_gaddr, 119 &desc_chunck_len); 120 if (unlikely(!desc_addr)) 121 return -1; 122 123 desc_offset = 0; 124 } 125 126 desc_avail = desc->len - dev->hdr_len; 127 128 mbuf_avail = rte_pktmbuf_data_len(m); 129 mbuf_offset = 0; 130 while (mbuf_avail != 0 || m->next != NULL) { 131 /* done with current mbuf, fetch next */ 132 if (mbuf_avail == 0) { 133 m = m->next; 134 135 mbuf_offset = 0; 136 mbuf_avail = rte_pktmbuf_data_len(m); 137 } 138 139 /* done with current desc buf, fetch next */ 140 if (desc_avail == 0) { 141 if ((desc->flags & VRING_DESC_F_NEXT) == 0) { 142 /* Room in vring buffer is not enough */ 143 return -1; 144 } 145 if (unlikely(desc->next >= vr->size || 146 ++nr_desc > vr->size)) 147 return -1; 148 149 desc = &vr->desc[desc->next]; 150 desc_chunck_len = desc->len; 151 desc_gaddr = desc->addr; 152 desc_addr = rte_vhost_va_from_guest_pa( 153 dev->mem, desc_gaddr, &desc_chunck_len); 154 if (unlikely(!desc_addr)) 155 return -1; 156 157 desc_offset = 0; 158 desc_avail = desc->len; 159 } else if (unlikely(desc_chunck_len == 0)) { 160 desc_chunck_len = desc_avail; 161 desc_gaddr += desc_offset; 162 desc_addr = rte_vhost_va_from_guest_pa(dev->mem, 163 desc_gaddr, 164 &desc_chunck_len); 165 if (unlikely(!desc_addr)) 166 return -1; 167 168 desc_offset = 0; 169 } 170 171 cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); 172 rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), 173 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 174 cpy_len); 175 176 mbuf_avail -= cpy_len; 177 mbuf_offset += cpy_len; 178 desc_avail -= cpy_len; 179 desc_offset += cpy_len; 180 desc_chunck_len -= cpy_len; 181 } 182 183 return 0; 184 } 185 186 uint16_t 187 vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id, 188 struct rte_mbuf **pkts, uint32_t count) 189 { 190 struct vhost_queue *queue; 191 struct rte_vhost_vring *vr; 192 uint16_t avail_idx, free_entries, start_idx; 193 uint16_t desc_indexes[MAX_PKT_BURST]; 194 uint16_t used_idx; 195 uint32_t i; 196 197 queue = &dev->queues[queue_id]; 198 vr = &queue->vr; 199 200 avail_idx = __atomic_load_n(&vr->avail->idx, __ATOMIC_ACQUIRE); 201 start_idx = queue->last_used_idx; 202 free_entries = avail_idx - start_idx; 203 count = RTE_MIN(count, free_entries); 204 count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); 205 if (count == 0) 206 return 0; 207 208 /* Retrieve all of the desc indexes first to avoid caching issues. */ 209 rte_prefetch0(&vr->avail->ring[start_idx & (vr->size - 1)]); 210 for (i = 0; i < count; i++) { 211 used_idx = (start_idx + i) & (vr->size - 1); 212 desc_indexes[i] = vr->avail->ring[used_idx]; 213 vr->used->ring[used_idx].id = desc_indexes[i]; 214 vr->used->ring[used_idx].len = pkts[i]->pkt_len + 215 dev->hdr_len; 216 } 217 218 rte_prefetch0(&vr->desc[desc_indexes[0]]); 219 for (i = 0; i < count; i++) { 220 uint16_t desc_idx = desc_indexes[i]; 221 int err; 222 223 err = enqueue_pkt(dev, vr, pkts[i], desc_idx); 224 if (unlikely(err)) { 225 used_idx = (start_idx + i) & (vr->size - 1); 226 vr->used->ring[used_idx].len = dev->hdr_len; 227 } 228 229 if (i + 1 < count) 230 rte_prefetch0(&vr->desc[desc_indexes[i+1]]); 231 } 232 233 __atomic_add_fetch(&vr->used->idx, count, __ATOMIC_RELEASE); 234 queue->last_used_idx += count; 235 236 rte_vhost_vring_call(dev->vid, queue_id); 237 238 return count; 239 } 240 241 uint16_t 242 builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id, 243 struct rte_mbuf **pkts, uint32_t count) 244 { 245 return vs_enqueue_pkts(dev, queue_id, pkts, count); 246 } 247 248 static __rte_always_inline int 249 dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr, 250 struct rte_mbuf *m, uint16_t desc_idx, 251 struct rte_mempool *mbuf_pool) 252 { 253 struct vring_desc *desc; 254 uint64_t desc_addr, desc_gaddr; 255 uint32_t desc_avail, desc_offset; 256 uint64_t desc_chunck_len; 257 uint32_t mbuf_avail, mbuf_offset; 258 uint32_t cpy_len; 259 struct rte_mbuf *cur = m, *prev = m; 260 /* A counter to avoid desc dead loop chain */ 261 uint32_t nr_desc = 1; 262 263 desc = &vr->desc[desc_idx]; 264 if (unlikely((desc->len < dev->hdr_len)) || 265 (desc->flags & VRING_DESC_F_INDIRECT)) 266 return -1; 267 268 desc_chunck_len = desc->len; 269 desc_gaddr = desc->addr; 270 desc_addr = rte_vhost_va_from_guest_pa( 271 dev->mem, desc_gaddr, &desc_chunck_len); 272 if (unlikely(!desc_addr)) 273 return -1; 274 275 /* 276 * We don't support ANY_LAYOUT, neither VERSION_1, meaning 277 * a Tx packet from guest must have 2 desc buffers at least: 278 * the first for storing the header and the others for 279 * storing the data. 280 * 281 * And since we don't support TSO, we could simply skip the 282 * header. 283 */ 284 desc = &vr->desc[desc->next]; 285 desc_chunck_len = desc->len; 286 desc_gaddr = desc->addr; 287 desc_addr = rte_vhost_va_from_guest_pa( 288 dev->mem, desc_gaddr, &desc_chunck_len); 289 if (unlikely(!desc_addr)) 290 return -1; 291 rte_prefetch0((void *)(uintptr_t)desc_addr); 292 293 desc_offset = 0; 294 desc_avail = desc->len; 295 nr_desc += 1; 296 297 mbuf_offset = 0; 298 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 299 while (1) { 300 cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); 301 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 302 mbuf_offset), 303 (void *)((uintptr_t)(desc_addr + desc_offset)), 304 cpy_len); 305 306 mbuf_avail -= cpy_len; 307 mbuf_offset += cpy_len; 308 desc_avail -= cpy_len; 309 desc_offset += cpy_len; 310 desc_chunck_len -= cpy_len; 311 312 /* This desc reaches to its end, get the next one */ 313 if (desc_avail == 0) { 314 if ((desc->flags & VRING_DESC_F_NEXT) == 0) 315 break; 316 317 if (unlikely(desc->next >= vr->size || 318 ++nr_desc > vr->size)) 319 return -1; 320 desc = &vr->desc[desc->next]; 321 322 desc_chunck_len = desc->len; 323 desc_gaddr = desc->addr; 324 desc_addr = rte_vhost_va_from_guest_pa( 325 dev->mem, desc_gaddr, &desc_chunck_len); 326 if (unlikely(!desc_addr)) 327 return -1; 328 rte_prefetch0((void *)(uintptr_t)desc_addr); 329 330 desc_offset = 0; 331 desc_avail = desc->len; 332 } else if (unlikely(desc_chunck_len == 0)) { 333 desc_chunck_len = desc_avail; 334 desc_gaddr += desc_offset; 335 desc_addr = rte_vhost_va_from_guest_pa(dev->mem, 336 desc_gaddr, 337 &desc_chunck_len); 338 if (unlikely(!desc_addr)) 339 return -1; 340 341 desc_offset = 0; 342 } 343 344 /* 345 * This mbuf reaches to its end, get a new one 346 * to hold more data. 347 */ 348 if (mbuf_avail == 0) { 349 cur = rte_pktmbuf_alloc(mbuf_pool); 350 if (unlikely(cur == NULL)) { 351 RTE_LOG(ERR, VHOST_DATA, "Failed to " 352 "allocate memory for mbuf.\n"); 353 return -1; 354 } 355 356 prev->next = cur; 357 prev->data_len = mbuf_offset; 358 m->nb_segs += 1; 359 m->pkt_len += mbuf_offset; 360 prev = cur; 361 362 mbuf_offset = 0; 363 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 364 } 365 } 366 367 prev->data_len = mbuf_offset; 368 m->pkt_len += mbuf_offset; 369 370 return 0; 371 } 372 373 static uint16_t 374 vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id, 375 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 376 { 377 struct vhost_queue *queue; 378 struct rte_vhost_vring *vr; 379 uint32_t desc_indexes[MAX_PKT_BURST]; 380 uint32_t used_idx; 381 uint32_t i = 0; 382 uint16_t free_entries; 383 uint16_t avail_idx; 384 385 queue = &dev->queues[queue_id]; 386 vr = &queue->vr; 387 388 free_entries = __atomic_load_n(&vr->avail->idx, __ATOMIC_ACQUIRE) - 389 queue->last_avail_idx; 390 if (free_entries == 0) 391 return 0; 392 393 /* Prefetch available and used ring */ 394 avail_idx = queue->last_avail_idx & (vr->size - 1); 395 used_idx = queue->last_used_idx & (vr->size - 1); 396 rte_prefetch0(&vr->avail->ring[avail_idx]); 397 rte_prefetch0(&vr->used->ring[used_idx]); 398 399 count = RTE_MIN(count, MAX_PKT_BURST); 400 count = RTE_MIN(count, free_entries); 401 402 if (unlikely(count == 0)) 403 return 0; 404 405 /* 406 * Retrieve all of the head indexes first and pre-update used entries 407 * to avoid caching issues. 408 */ 409 for (i = 0; i < count; i++) { 410 avail_idx = (queue->last_avail_idx + i) & (vr->size - 1); 411 used_idx = (queue->last_used_idx + i) & (vr->size - 1); 412 desc_indexes[i] = vr->avail->ring[avail_idx]; 413 414 vr->used->ring[used_idx].id = desc_indexes[i]; 415 vr->used->ring[used_idx].len = 0; 416 } 417 418 /* Prefetch descriptor index. */ 419 rte_prefetch0(&vr->desc[desc_indexes[0]]); 420 for (i = 0; i < count; i++) { 421 int err; 422 423 if (likely(i + 1 < count)) 424 rte_prefetch0(&vr->desc[desc_indexes[i + 1]]); 425 426 pkts[i] = rte_pktmbuf_alloc(mbuf_pool); 427 if (unlikely(pkts[i] == NULL)) { 428 RTE_LOG(ERR, VHOST_DATA, 429 "Failed to allocate memory for mbuf.\n"); 430 break; 431 } 432 433 err = dequeue_pkt(dev, vr, pkts[i], desc_indexes[i], mbuf_pool); 434 if (unlikely(err)) { 435 rte_pktmbuf_free(pkts[i]); 436 break; 437 } 438 439 } 440 441 queue->last_avail_idx += i; 442 queue->last_used_idx += i; 443 444 __atomic_add_fetch(&vr->used->idx, i, __ATOMIC_ACQ_REL); 445 446 rte_vhost_vring_call(dev->vid, queue_id); 447 448 return i; 449 } 450 451 uint16_t 452 builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id, 453 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 454 { 455 return vs_dequeue_pkts(dev, queue_id, mbuf_pool, pkts, count); 456 } 457