1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2018, Microsoft Corporation.
3 * All Rights Reserved.
4 */
5
6 #include <unistd.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <sys/uio.h>
10
11 #include <rte_eal.h>
12 #include <rte_tailq.h>
13 #include <rte_log.h>
14 #include <rte_malloc.h>
15 #include <rte_atomic.h>
16 #include <rte_memory.h>
17 #include <rte_bus_vmbus.h>
18
19 #include "private.h"
20
21 static inline void
vmbus_sync_set_bit(volatile RTE_ATOMIC (uint32_t)* addr,uint32_t mask)22 vmbus_sync_set_bit(volatile RTE_ATOMIC(uint32_t) *addr, uint32_t mask)
23 {
24 rte_atomic_fetch_or_explicit(addr, mask, rte_memory_order_seq_cst);
25 }
26
27 static inline void
vmbus_set_monitor(const struct vmbus_channel * channel,uint32_t monitor_id)28 vmbus_set_monitor(const struct vmbus_channel *channel, uint32_t monitor_id)
29 {
30 RTE_ATOMIC(uint32_t) *monitor_addr;
31 uint32_t monitor_mask;
32 unsigned int trigger_index;
33
34 trigger_index = monitor_id / HV_MON_TRIG_LEN;
35 monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN);
36
37 monitor_addr = &channel->monitor_page->trigs[trigger_index].pending;
38 vmbus_sync_set_bit(monitor_addr, monitor_mask);
39 }
40
41 static void
vmbus_set_event(const struct vmbus_channel * chan)42 vmbus_set_event(const struct vmbus_channel *chan)
43 {
44 vmbus_set_monitor(chan, chan->monitor_id);
45 }
46
47 /*
48 * Set the wait between when hypervisor examines the trigger.
49 */
50 void
rte_vmbus_set_latency(const struct rte_vmbus_device * dev,const struct vmbus_channel * chan,uint32_t latency)51 rte_vmbus_set_latency(const struct rte_vmbus_device *dev,
52 const struct vmbus_channel *chan,
53 uint32_t latency)
54 {
55 uint32_t trig_idx = chan->monitor_id / VMBUS_MONTRIG_LEN;
56 uint32_t trig_offs = chan->monitor_id % VMBUS_MONTRIG_LEN;
57
58 if (latency >= UINT16_MAX * 100) {
59 VMBUS_LOG(ERR, "invalid latency value %u", latency);
60 return;
61 }
62
63 if (trig_idx >= VMBUS_MONTRIGS_MAX) {
64 VMBUS_LOG(ERR, "invalid monitor trigger %u",
65 trig_idx);
66 return;
67 }
68
69 /* Host value is expressed in 100 nanosecond units */
70 dev->monitor_page->lat[trig_idx][trig_offs] = latency / 100;
71 }
72
73 /*
74 * Notify host that there are data pending on our TX bufring.
75 *
76 * Since this in userspace, rely on the monitor page.
77 * Can't do a hypercall from userspace.
78 */
79 void
rte_vmbus_chan_signal_tx(const struct vmbus_channel * chan)80 rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan)
81 {
82 const struct vmbus_br *tbr = &chan->txbr;
83
84 /* Make sure all updates are done before signaling host */
85 rte_smp_wmb();
86
87 /* If host is ignoring interrupts? */
88 if (tbr->vbr->imask)
89 return;
90
91 vmbus_set_event(chan);
92 }
93
94
95 /* Do a simple send directly using transmit ring. */
rte_vmbus_chan_send(struct vmbus_channel * chan,uint16_t type,void * data,uint32_t dlen,uint64_t xactid,uint32_t flags,bool * need_sig)96 int rte_vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
97 void *data, uint32_t dlen,
98 uint64_t xactid, uint32_t flags, bool *need_sig)
99 {
100 struct vmbus_chanpkt pkt;
101 unsigned int pktlen, pad_pktlen;
102 const uint32_t hlen = sizeof(pkt);
103 bool send_evt = false;
104 uint64_t pad = 0;
105 struct iovec iov[3];
106 int error;
107
108 pktlen = hlen + dlen;
109 pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
110
111 pkt.hdr.type = type;
112 pkt.hdr.flags = flags;
113 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
114 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
115 pkt.hdr.xactid = xactid;
116
117 iov[0].iov_base = &pkt;
118 iov[0].iov_len = hlen;
119 iov[1].iov_base = data;
120 iov[1].iov_len = dlen;
121 iov[2].iov_base = &pad;
122 iov[2].iov_len = pad_pktlen - pktlen;
123
124 error = vmbus_txbr_write(&chan->txbr, iov, 3, &send_evt);
125
126 /*
127 * caller sets need_sig to non-NULL if it will handle
128 * signaling if required later.
129 * if need_sig is NULL, signal now if needed.
130 */
131 if (need_sig)
132 *need_sig |= send_evt;
133 else if (error == 0 && send_evt)
134 rte_vmbus_chan_signal_tx(chan);
135 return error;
136 }
137
138 /* Do a scatter/gather send where the descriptor points to data. */
rte_vmbus_chan_send_sglist(struct vmbus_channel * chan,struct vmbus_gpa sg[],uint32_t sglen,void * data,uint32_t dlen,uint64_t xactid,bool * need_sig)139 int rte_vmbus_chan_send_sglist(struct vmbus_channel *chan,
140 struct vmbus_gpa sg[], uint32_t sglen,
141 void *data, uint32_t dlen,
142 uint64_t xactid, bool *need_sig)
143 {
144 struct vmbus_chanpkt_sglist pkt;
145 unsigned int pktlen, pad_pktlen, hlen;
146 bool send_evt = false;
147 struct iovec iov[4];
148 uint64_t pad = 0;
149 int error;
150
151 hlen = offsetof(struct vmbus_chanpkt_sglist, gpa[sglen]);
152 pktlen = hlen + dlen;
153 pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
154
155 pkt.hdr.type = VMBUS_CHANPKT_TYPE_GPA;
156 pkt.hdr.flags = VMBUS_CHANPKT_FLAG_RC;
157 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
158 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
159 pkt.hdr.xactid = xactid;
160 pkt.rsvd = 0;
161 pkt.gpa_cnt = sglen;
162
163 iov[0].iov_base = &pkt;
164 iov[0].iov_len = sizeof(pkt);
165 iov[1].iov_base = sg;
166 iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
167 iov[2].iov_base = data;
168 iov[2].iov_len = dlen;
169 iov[3].iov_base = &pad;
170 iov[3].iov_len = pad_pktlen - pktlen;
171
172 error = vmbus_txbr_write(&chan->txbr, iov, 4, &send_evt);
173
174 /* if caller is batching, just propagate the status */
175 if (need_sig)
176 *need_sig |= send_evt;
177 else if (error == 0 && send_evt)
178 rte_vmbus_chan_signal_tx(chan);
179 return error;
180 }
181
rte_vmbus_chan_rx_empty(const struct vmbus_channel * channel)182 bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel)
183 {
184 const struct vmbus_br *br = &channel->rxbr;
185
186 rte_smp_rmb();
187 return br->vbr->rindex == br->vbr->windex;
188 }
189
190 /* Signal host after reading N bytes */
rte_vmbus_chan_signal_read(struct vmbus_channel * chan,uint32_t bytes_read)191 void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read)
192 {
193 struct vmbus_br *rbr = &chan->rxbr;
194 uint32_t write_sz, pending_sz;
195
196 /* No need for signaling on older versions */
197 if (!rbr->vbr->feature_bits.feat_pending_send_sz)
198 return;
199
200 /* Make sure reading of pending happens after new read index */
201 rte_smp_mb();
202
203 pending_sz = rbr->vbr->pending_send;
204 if (!pending_sz)
205 return;
206
207 rte_smp_rmb();
208 write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex);
209
210 /* If there was space before then host was not blocked */
211 if (write_sz - bytes_read > pending_sz)
212 return;
213
214 /* If pending write will not fit */
215 if (write_sz <= pending_sz)
216 return;
217
218 vmbus_set_event(chan);
219 }
220
rte_vmbus_chan_recv(struct vmbus_channel * chan,void * data,uint32_t * len,uint64_t * request_id)221 int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len,
222 uint64_t *request_id)
223 {
224 struct vmbus_chanpkt_hdr pkt;
225 uint32_t dlen, hlen, bufferlen = *len;
226 int error;
227
228 *len = 0;
229
230 error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
231 if (error)
232 return error;
233
234 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
235 VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
236 /* XXX this channel is dead actually. */
237 return -EIO;
238 }
239
240 if (unlikely(pkt.hlen > pkt.tlen)) {
241 VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
242 pkt.hlen, pkt.tlen);
243 return -EIO;
244 }
245
246 /* Length are in quad words */
247 hlen = pkt.hlen << VMBUS_CHANPKT_SIZE_SHIFT;
248 dlen = (pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT) - hlen;
249 *len = dlen;
250
251 /* If caller buffer is not large enough */
252 if (unlikely(dlen > bufferlen))
253 return -ENOBUFS;
254
255 if (request_id)
256 *request_id = pkt.xactid;
257
258 /* Read data and skip packet header */
259 error = vmbus_rxbr_read(&chan->rxbr, data, dlen, hlen);
260 if (error)
261 return error;
262
263 rte_vmbus_chan_signal_read(chan, dlen + hlen + sizeof(uint64_t));
264 return 0;
265 }
266
267 /* TODO: replace this with inplace ring buffer (no copy) */
rte_vmbus_chan_recv_raw(struct vmbus_channel * chan,void * data,uint32_t * len)268 int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan,
269 void *data, uint32_t *len)
270 {
271 struct vmbus_chanpkt_hdr pkt;
272 uint32_t dlen, bufferlen = *len;
273 int error;
274
275 error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
276 if (error)
277 return error;
278
279 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
280 VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
281 /* XXX this channel is dead actually. */
282 return -EIO;
283 }
284
285 if (unlikely(pkt.hlen > pkt.tlen)) {
286 VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
287 pkt.hlen, pkt.tlen);
288 return -EIO;
289 }
290
291 /* Length are in quad words */
292 dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
293 *len = dlen;
294
295 /* If caller buffer is not large enough */
296 if (unlikely(dlen > bufferlen))
297 return -ENOBUFS;
298
299 /* Read data and skip packet header */
300 error = vmbus_rxbr_read(&chan->rxbr, data, dlen, 0);
301 if (error)
302 return error;
303
304 /* Return the number of bytes read */
305 return dlen + sizeof(uint64_t);
306 }
307
vmbus_chan_create(const struct rte_vmbus_device * device,uint16_t relid,uint16_t subid,uint8_t monitor_id,struct vmbus_channel ** new_chan)308 int vmbus_chan_create(const struct rte_vmbus_device *device,
309 uint16_t relid, uint16_t subid, uint8_t monitor_id,
310 struct vmbus_channel **new_chan)
311 {
312 struct vmbus_channel *chan;
313 int err;
314
315 chan = rte_zmalloc_socket("VMBUS", sizeof(*chan), RTE_CACHE_LINE_SIZE,
316 device->device.numa_node);
317 if (!chan)
318 return -ENOMEM;
319
320 STAILQ_INIT(&chan->subchannel_list);
321 chan->device = device;
322 chan->subchannel_id = subid;
323 chan->relid = relid;
324 chan->monitor_id = monitor_id;
325 chan->monitor_page = device->monitor_page;
326 *new_chan = chan;
327
328 err = vmbus_uio_map_rings(chan);
329 if (err) {
330 rte_free(chan);
331 return err;
332 }
333
334 return 0;
335 }
336
337 /* Setup the primary channel */
rte_vmbus_chan_open(struct rte_vmbus_device * device,struct vmbus_channel ** new_chan)338 int rte_vmbus_chan_open(struct rte_vmbus_device *device,
339 struct vmbus_channel **new_chan)
340 {
341 struct mapped_vmbus_resource *uio_res;
342 int err;
343
344 uio_res = vmbus_uio_find_resource(device);
345 if (!uio_res) {
346 VMBUS_LOG(ERR, "can't find uio resource");
347 return -EINVAL;
348 }
349
350 err = vmbus_chan_create(device, device->relid, 0,
351 device->monitor_id, new_chan);
352 if (!err)
353 device->primary = *new_chan;
354
355 return err;
356 }
357
rte_vmbus_max_channels(const struct rte_vmbus_device * device)358 int rte_vmbus_max_channels(const struct rte_vmbus_device *device)
359 {
360 if (vmbus_uio_subchannels_supported(device, device->primary))
361 return VMBUS_MAX_CHANNELS;
362 else
363 return 1;
364 }
365
366 /* Setup secondary channel */
rte_vmbus_subchan_open(struct vmbus_channel * primary,struct vmbus_channel ** new_chan)367 int rte_vmbus_subchan_open(struct vmbus_channel *primary,
368 struct vmbus_channel **new_chan)
369 {
370 struct vmbus_channel *chan;
371 int err;
372
373 err = vmbus_uio_get_subchan(primary, &chan);
374 if (err)
375 return err;
376
377 STAILQ_INSERT_TAIL(&primary->subchannel_list, chan, next);
378 *new_chan = chan;
379 return 0;
380 }
381
rte_vmbus_sub_channel_index(const struct vmbus_channel * chan)382 uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan)
383 {
384 return chan->subchannel_id;
385 }
386
rte_vmbus_chan_close(struct vmbus_channel * chan)387 void rte_vmbus_chan_close(struct vmbus_channel *chan)
388 {
389 const struct rte_vmbus_device *device = chan->device;
390 struct vmbus_channel *primary = device->primary;
391
392 /*
393 * intentionally leak primary channel because
394 * secondary may still reference it
395 */
396 if (chan != primary) {
397 STAILQ_REMOVE(&primary->subchannel_list, chan,
398 vmbus_channel, next);
399 rte_free(chan);
400 }
401
402 }
403
vmbus_dump_ring(FILE * f,const char * id,const struct vmbus_br * br)404 static void vmbus_dump_ring(FILE *f, const char *id, const struct vmbus_br *br)
405 {
406 const struct vmbus_bufring *vbr = br->vbr;
407 struct vmbus_chanpkt_hdr pkt;
408
409 fprintf(f, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n",
410 id, vbr->windex, vbr->rindex, vbr->imask,
411 vbr->pending_send, vbr->feature_bits.value);
412 fprintf(f, " size=%u avail write=%u read=%u\n",
413 br->dsize, vmbus_br_availwrite(br, vbr->windex),
414 vmbus_br_availread(br));
415
416 if (vmbus_rxbr_peek(br, &pkt, sizeof(pkt)) == 0)
417 fprintf(f, " pkt type %#x len %u flags %#x xactid %#"PRIx64"\n",
418 pkt.type,
419 pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT,
420 pkt.flags, pkt.xactid);
421 }
422
rte_vmbus_chan_dump(FILE * f,const struct vmbus_channel * chan)423 void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan)
424 {
425 fprintf(f, "channel[%u] relid=%u monitor=%u\n",
426 chan->subchannel_id, chan->relid, chan->monitor_id);
427 vmbus_dump_ring(f, "rxbr", &chan->rxbr);
428 vmbus_dump_ring(f, "txbr", &chan->txbr);
429 }
430