1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2017 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/virtio_net.h> 8 9 #include <rte_mbuf.h> 10 #include <rte_memcpy.h> 11 #include <rte_vhost.h> 12 13 #include "main.h" 14 15 /* 16 * A very simple vhost-user net driver implementation, without 17 * any extra features being enabled, such as TSO and mrg-Rx. 18 */ 19 20 void 21 vs_vhost_net_setup(struct vhost_dev *dev) 22 { 23 uint16_t i; 24 int vid = dev->vid; 25 struct vhost_queue *queue; 26 int ret; 27 28 RTE_LOG(INFO, VHOST_CONFIG, 29 "setting builtin vhost-user net driver\n"); 30 31 rte_vhost_get_negotiated_features(vid, &dev->features); 32 if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) 33 dev->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 34 else 35 dev->hdr_len = sizeof(struct virtio_net_hdr); 36 37 ret = rte_vhost_get_mem_table(vid, &dev->mem); 38 if (ret < 0) { 39 RTE_LOG(ERR, VHOST_CONFIG, "Failed to get " 40 "VM memory layout for device(%d)\n", vid); 41 return; 42 } 43 44 dev->nr_vrings = rte_vhost_get_vring_num(vid); 45 for (i = 0; i < dev->nr_vrings; i++) { 46 queue = &dev->queues[i]; 47 48 queue->last_used_idx = 0; 49 queue->last_avail_idx = 0; 50 rte_vhost_get_vhost_vring(vid, i, &queue->vr); 51 } 52 } 53 54 void 55 vs_vhost_net_remove(struct vhost_dev *dev) 56 { 57 free(dev->mem); 58 } 59 60 static __rte_always_inline int 61 enqueue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr, 62 struct rte_mbuf *m, uint16_t desc_idx) 63 { 64 uint32_t desc_avail, desc_offset; 65 uint64_t desc_chunck_len; 66 uint32_t mbuf_avail, mbuf_offset; 67 uint32_t cpy_len; 68 struct vring_desc *desc; 69 uint64_t desc_addr, desc_gaddr; 70 struct virtio_net_hdr virtio_hdr = {0, 0, 0, 0, 0, 0}; 71 /* A counter to avoid desc dead loop chain */ 72 uint16_t nr_desc = 1; 73 74 desc = &vr->desc[desc_idx]; 75 desc_chunck_len = desc->len; 76 desc_gaddr = desc->addr; 77 desc_addr = rte_vhost_va_from_guest_pa( 78 dev->mem, desc_gaddr, &desc_chunck_len); 79 /* 80 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid 81 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which 82 * otherwise stores offset on the stack instead of in a register. 83 */ 84 if (unlikely(desc->len < dev->hdr_len) || !desc_addr) 85 return -1; 86 87 rte_prefetch0((void *)(uintptr_t)desc_addr); 88 89 /* write virtio-net header */ 90 if (likely(desc_chunck_len >= dev->hdr_len)) { 91 *(struct virtio_net_hdr *)(uintptr_t)desc_addr = virtio_hdr; 92 desc_offset = dev->hdr_len; 93 } else { 94 uint64_t len; 95 uint64_t remain = dev->hdr_len; 96 uint64_t src = (uint64_t)(uintptr_t)&virtio_hdr, dst; 97 uint64_t guest_addr = desc_gaddr; 98 99 while (remain) { 100 len = remain; 101 dst = rte_vhost_va_from_guest_pa(dev->mem, 102 guest_addr, &len); 103 if (unlikely(!dst || !len)) 104 return -1; 105 106 rte_memcpy((void *)(uintptr_t)dst, 107 (void *)(uintptr_t)src, 108 len); 109 110 remain -= len; 111 guest_addr += len; 112 src += len; 113 } 114 115 desc_chunck_len = desc->len - dev->hdr_len; 116 desc_gaddr += dev->hdr_len; 117 desc_addr = rte_vhost_va_from_guest_pa( 118 dev->mem, desc_gaddr, 119 &desc_chunck_len); 120 if (unlikely(!desc_addr)) 121 return -1; 122 123 desc_offset = 0; 124 } 125 126 desc_avail = desc->len - dev->hdr_len; 127 128 mbuf_avail = rte_pktmbuf_data_len(m); 129 mbuf_offset = 0; 130 while (mbuf_avail != 0 || m->next != NULL) { 131 /* done with current mbuf, fetch next */ 132 if (mbuf_avail == 0) { 133 m = m->next; 134 135 mbuf_offset = 0; 136 mbuf_avail = rte_pktmbuf_data_len(m); 137 } 138 139 /* done with current desc buf, fetch next */ 140 if (desc_avail == 0) { 141 if ((desc->flags & VRING_DESC_F_NEXT) == 0) { 142 /* Room in vring buffer is not enough */ 143 return -1; 144 } 145 if (unlikely(desc->next >= vr->size || 146 ++nr_desc > vr->size)) 147 return -1; 148 149 desc = &vr->desc[desc->next]; 150 desc_chunck_len = desc->len; 151 desc_gaddr = desc->addr; 152 desc_addr = rte_vhost_va_from_guest_pa( 153 dev->mem, desc_gaddr, &desc_chunck_len); 154 if (unlikely(!desc_addr)) 155 return -1; 156 157 desc_offset = 0; 158 desc_avail = desc->len; 159 } else if (unlikely(desc_chunck_len == 0)) { 160 desc_chunck_len = desc_avail; 161 desc_gaddr += desc_offset; 162 desc_addr = rte_vhost_va_from_guest_pa(dev->mem, 163 desc_gaddr, 164 &desc_chunck_len); 165 if (unlikely(!desc_addr)) 166 return -1; 167 168 desc_offset = 0; 169 } 170 171 cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); 172 rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), 173 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), 174 cpy_len); 175 176 mbuf_avail -= cpy_len; 177 mbuf_offset += cpy_len; 178 desc_avail -= cpy_len; 179 desc_offset += cpy_len; 180 desc_chunck_len -= cpy_len; 181 } 182 183 return 0; 184 } 185 186 uint16_t 187 vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id, 188 struct rte_mbuf **pkts, uint32_t count) 189 { 190 struct vhost_queue *queue; 191 struct rte_vhost_vring *vr; 192 uint16_t avail_idx, free_entries, start_idx; 193 uint16_t desc_indexes[MAX_PKT_BURST]; 194 uint16_t used_idx; 195 uint32_t i; 196 197 queue = &dev->queues[queue_id]; 198 vr = &queue->vr; 199 200 avail_idx = __atomic_load_n(&vr->avail->idx, __ATOMIC_ACQUIRE); 201 start_idx = queue->last_used_idx; 202 free_entries = avail_idx - start_idx; 203 count = RTE_MIN(count, free_entries); 204 count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); 205 if (count == 0) 206 return 0; 207 208 /* Retrieve all of the desc indexes first to avoid caching issues. */ 209 rte_prefetch0(&vr->avail->ring[start_idx & (vr->size - 1)]); 210 for (i = 0; i < count; i++) { 211 used_idx = (start_idx + i) & (vr->size - 1); 212 desc_indexes[i] = vr->avail->ring[used_idx]; 213 vr->used->ring[used_idx].id = desc_indexes[i]; 214 vr->used->ring[used_idx].len = pkts[i]->pkt_len + 215 dev->hdr_len; 216 } 217 218 rte_prefetch0(&vr->desc[desc_indexes[0]]); 219 for (i = 0; i < count; i++) { 220 uint16_t desc_idx = desc_indexes[i]; 221 int err; 222 223 err = enqueue_pkt(dev, vr, pkts[i], desc_idx); 224 if (unlikely(err)) { 225 used_idx = (start_idx + i) & (vr->size - 1); 226 vr->used->ring[used_idx].len = dev->hdr_len; 227 } 228 229 if (i + 1 < count) 230 rte_prefetch0(&vr->desc[desc_indexes[i+1]]); 231 } 232 233 __atomic_add_fetch(&vr->used->idx, count, __ATOMIC_RELEASE); 234 queue->last_used_idx += count; 235 236 rte_vhost_vring_call(dev->vid, queue_id); 237 238 return count; 239 } 240 241 static __rte_always_inline int 242 dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr, 243 struct rte_mbuf *m, uint16_t desc_idx, 244 struct rte_mempool *mbuf_pool) 245 { 246 struct vring_desc *desc; 247 uint64_t desc_addr, desc_gaddr; 248 uint32_t desc_avail, desc_offset; 249 uint64_t desc_chunck_len; 250 uint32_t mbuf_avail, mbuf_offset; 251 uint32_t cpy_len; 252 struct rte_mbuf *cur = m, *prev = m; 253 /* A counter to avoid desc dead loop chain */ 254 uint32_t nr_desc = 1; 255 256 desc = &vr->desc[desc_idx]; 257 if (unlikely((desc->len < dev->hdr_len)) || 258 (desc->flags & VRING_DESC_F_INDIRECT)) 259 return -1; 260 261 desc_chunck_len = desc->len; 262 desc_gaddr = desc->addr; 263 desc_addr = rte_vhost_va_from_guest_pa( 264 dev->mem, desc_gaddr, &desc_chunck_len); 265 if (unlikely(!desc_addr)) 266 return -1; 267 268 /* 269 * We don't support ANY_LAYOUT, neither VERSION_1, meaning 270 * a Tx packet from guest must have 2 desc buffers at least: 271 * the first for storing the header and the others for 272 * storing the data. 273 * 274 * And since we don't support TSO, we could simply skip the 275 * header. 276 */ 277 desc = &vr->desc[desc->next]; 278 desc_chunck_len = desc->len; 279 desc_gaddr = desc->addr; 280 desc_addr = rte_vhost_va_from_guest_pa( 281 dev->mem, desc_gaddr, &desc_chunck_len); 282 if (unlikely(!desc_addr)) 283 return -1; 284 rte_prefetch0((void *)(uintptr_t)desc_addr); 285 286 desc_offset = 0; 287 desc_avail = desc->len; 288 nr_desc += 1; 289 290 mbuf_offset = 0; 291 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; 292 while (1) { 293 cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail); 294 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, 295 mbuf_offset), 296 (void *)((uintptr_t)(desc_addr + desc_offset)), 297 cpy_len); 298 299 mbuf_avail -= cpy_len; 300 mbuf_offset += cpy_len; 301 desc_avail -= cpy_len; 302 desc_offset += cpy_len; 303 desc_chunck_len -= cpy_len; 304 305 /* This desc reaches to its end, get the next one */ 306 if (desc_avail == 0) { 307 if ((desc->flags & VRING_DESC_F_NEXT) == 0) 308 break; 309 310 if (unlikely(desc->next >= vr->size || 311 ++nr_desc > vr->size)) 312 return -1; 313 desc = &vr->desc[desc->next]; 314 315 desc_chunck_len = desc->len; 316 desc_gaddr = desc->addr; 317 desc_addr = rte_vhost_va_from_guest_pa( 318 dev->mem, desc_gaddr, &desc_chunck_len); 319 if (unlikely(!desc_addr)) 320 return -1; 321 rte_prefetch0((void *)(uintptr_t)desc_addr); 322 323 desc_offset = 0; 324 desc_avail = desc->len; 325 } else if (unlikely(desc_chunck_len == 0)) { 326 desc_chunck_len = desc_avail; 327 desc_gaddr += desc_offset; 328 desc_addr = rte_vhost_va_from_guest_pa(dev->mem, 329 desc_gaddr, 330 &desc_chunck_len); 331 if (unlikely(!desc_addr)) 332 return -1; 333 334 desc_offset = 0; 335 } 336 337 /* 338 * This mbuf reaches to its end, get a new one 339 * to hold more data. 340 */ 341 if (mbuf_avail == 0) { 342 cur = rte_pktmbuf_alloc(mbuf_pool); 343 if (unlikely(cur == NULL)) { 344 RTE_LOG(ERR, VHOST_DATA, "Failed to " 345 "allocate memory for mbuf.\n"); 346 return -1; 347 } 348 349 prev->next = cur; 350 prev->data_len = mbuf_offset; 351 m->nb_segs += 1; 352 m->pkt_len += mbuf_offset; 353 prev = cur; 354 355 mbuf_offset = 0; 356 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; 357 } 358 } 359 360 prev->data_len = mbuf_offset; 361 m->pkt_len += mbuf_offset; 362 363 return 0; 364 } 365 366 uint16_t 367 vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id, 368 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) 369 { 370 struct vhost_queue *queue; 371 struct rte_vhost_vring *vr; 372 uint32_t desc_indexes[MAX_PKT_BURST]; 373 uint32_t used_idx; 374 uint32_t i = 0; 375 uint16_t free_entries; 376 uint16_t avail_idx; 377 378 queue = &dev->queues[queue_id]; 379 vr = &queue->vr; 380 381 free_entries = __atomic_load_n(&vr->avail->idx, __ATOMIC_ACQUIRE) - 382 queue->last_avail_idx; 383 if (free_entries == 0) 384 return 0; 385 386 /* Prefetch available and used ring */ 387 avail_idx = queue->last_avail_idx & (vr->size - 1); 388 used_idx = queue->last_used_idx & (vr->size - 1); 389 rte_prefetch0(&vr->avail->ring[avail_idx]); 390 rte_prefetch0(&vr->used->ring[used_idx]); 391 392 count = RTE_MIN(count, MAX_PKT_BURST); 393 count = RTE_MIN(count, free_entries); 394 395 if (unlikely(count == 0)) 396 return 0; 397 398 /* 399 * Retrieve all of the head indexes first and pre-update used entries 400 * to avoid caching issues. 401 */ 402 for (i = 0; i < count; i++) { 403 avail_idx = (queue->last_avail_idx + i) & (vr->size - 1); 404 used_idx = (queue->last_used_idx + i) & (vr->size - 1); 405 desc_indexes[i] = vr->avail->ring[avail_idx]; 406 407 vr->used->ring[used_idx].id = desc_indexes[i]; 408 vr->used->ring[used_idx].len = 0; 409 } 410 411 /* Prefetch descriptor index. */ 412 rte_prefetch0(&vr->desc[desc_indexes[0]]); 413 for (i = 0; i < count; i++) { 414 int err; 415 416 if (likely(i + 1 < count)) 417 rte_prefetch0(&vr->desc[desc_indexes[i + 1]]); 418 419 pkts[i] = rte_pktmbuf_alloc(mbuf_pool); 420 if (unlikely(pkts[i] == NULL)) { 421 RTE_LOG(ERR, VHOST_DATA, 422 "Failed to allocate memory for mbuf.\n"); 423 break; 424 } 425 426 err = dequeue_pkt(dev, vr, pkts[i], desc_indexes[i], mbuf_pool); 427 if (unlikely(err)) { 428 rte_pktmbuf_free(pkts[i]); 429 break; 430 } 431 432 } 433 434 queue->last_avail_idx += i; 435 queue->last_used_idx += i; 436 437 __atomic_add_fetch(&vr->used->idx, i, __ATOMIC_ACQ_REL); 438 439 rte_vhost_vring_call(dev->vid, queue_id); 440 441 return i; 442 } 443