xref: /dpdk/drivers/bus/vmbus/vmbus_channel.c (revision e12a0166c80f65e35408f4715b2f3a60763c3741)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2018, Microsoft Corporation.
3  * All Rights Reserved.
4  */
5 
6 #include <unistd.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <sys/uio.h>
10 
11 #include <rte_eal.h>
12 #include <rte_tailq.h>
13 #include <rte_log.h>
14 #include <rte_malloc.h>
15 #include <rte_atomic.h>
16 #include <rte_memory.h>
17 #include <rte_bus_vmbus.h>
18 
19 #include "private.h"
20 
21 static inline void
vmbus_sync_set_bit(volatile RTE_ATOMIC (uint32_t)* addr,uint32_t mask)22 vmbus_sync_set_bit(volatile RTE_ATOMIC(uint32_t) *addr, uint32_t mask)
23 {
24 	rte_atomic_fetch_or_explicit(addr, mask, rte_memory_order_seq_cst);
25 }
26 
27 static inline void
vmbus_set_monitor(const struct vmbus_channel * channel,uint32_t monitor_id)28 vmbus_set_monitor(const struct vmbus_channel *channel, uint32_t monitor_id)
29 {
30 	RTE_ATOMIC(uint32_t) *monitor_addr;
31 	uint32_t monitor_mask;
32 	unsigned int trigger_index;
33 
34 	trigger_index = monitor_id / HV_MON_TRIG_LEN;
35 	monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN);
36 
37 	monitor_addr = &channel->monitor_page->trigs[trigger_index].pending;
38 	vmbus_sync_set_bit(monitor_addr, monitor_mask);
39 }
40 
41 static void
vmbus_set_event(const struct vmbus_channel * chan)42 vmbus_set_event(const struct vmbus_channel *chan)
43 {
44 	vmbus_set_monitor(chan, chan->monitor_id);
45 }
46 
47 /*
48  * Set the wait between when hypervisor examines the trigger.
49  */
50 void
rte_vmbus_set_latency(const struct rte_vmbus_device * dev,const struct vmbus_channel * chan,uint32_t latency)51 rte_vmbus_set_latency(const struct rte_vmbus_device *dev,
52 		      const struct vmbus_channel *chan,
53 		      uint32_t latency)
54 {
55 	uint32_t trig_idx = chan->monitor_id / VMBUS_MONTRIG_LEN;
56 	uint32_t trig_offs = chan->monitor_id % VMBUS_MONTRIG_LEN;
57 
58 	if (latency >= UINT16_MAX * 100) {
59 		VMBUS_LOG(ERR, "invalid latency value %u", latency);
60 		return;
61 	}
62 
63 	if (trig_idx >= VMBUS_MONTRIGS_MAX) {
64 		VMBUS_LOG(ERR, "invalid monitor trigger %u",
65 			  trig_idx);
66 		return;
67 	}
68 
69 	/* Host value is expressed in 100 nanosecond units */
70 	dev->monitor_page->lat[trig_idx][trig_offs] = latency / 100;
71 }
72 
73 /*
74  * Notify host that there are data pending on our TX bufring.
75  *
76  * Since this in userspace, rely on the monitor page.
77  * Can't do a hypercall from userspace.
78  */
79 void
rte_vmbus_chan_signal_tx(const struct vmbus_channel * chan)80 rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan)
81 {
82 	const struct vmbus_br *tbr = &chan->txbr;
83 
84 	/* Make sure all updates are done before signaling host */
85 	rte_smp_wmb();
86 
87 	/* If host is ignoring interrupts? */
88 	if (tbr->vbr->imask)
89 		return;
90 
91 	vmbus_set_event(chan);
92 }
93 
94 
95 /* Do a simple send directly using transmit ring. */
rte_vmbus_chan_send(struct vmbus_channel * chan,uint16_t type,void * data,uint32_t dlen,uint64_t xactid,uint32_t flags,bool * need_sig)96 int rte_vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
97 			void *data, uint32_t dlen,
98 			uint64_t xactid, uint32_t flags, bool *need_sig)
99 {
100 	struct vmbus_chanpkt pkt;
101 	unsigned int pktlen, pad_pktlen;
102 	const uint32_t hlen = sizeof(pkt);
103 	bool send_evt = false;
104 	uint64_t pad = 0;
105 	struct iovec iov[3];
106 	int error;
107 
108 	pktlen = hlen + dlen;
109 	pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
110 
111 	pkt.hdr.type = type;
112 	pkt.hdr.flags = flags;
113 	pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
114 	pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
115 	pkt.hdr.xactid = xactid;
116 
117 	iov[0].iov_base = &pkt;
118 	iov[0].iov_len = hlen;
119 	iov[1].iov_base = data;
120 	iov[1].iov_len = dlen;
121 	iov[2].iov_base = &pad;
122 	iov[2].iov_len = pad_pktlen - pktlen;
123 
124 	error = vmbus_txbr_write(&chan->txbr, iov, 3, &send_evt);
125 
126 	/*
127 	 * caller sets need_sig to non-NULL if it will handle
128 	 * signaling if required later.
129 	 * if need_sig is NULL, signal now if needed.
130 	 */
131 	if (need_sig)
132 		*need_sig |= send_evt;
133 	else if (error == 0 && send_evt)
134 		rte_vmbus_chan_signal_tx(chan);
135 	return error;
136 }
137 
138 /* Do a scatter/gather send where the descriptor points to data. */
rte_vmbus_chan_send_sglist(struct vmbus_channel * chan,struct vmbus_gpa sg[],uint32_t sglen,void * data,uint32_t dlen,uint64_t xactid,bool * need_sig)139 int rte_vmbus_chan_send_sglist(struct vmbus_channel *chan,
140 			       struct vmbus_gpa sg[], uint32_t sglen,
141 			       void *data, uint32_t dlen,
142 			       uint64_t xactid, bool *need_sig)
143 {
144 	struct vmbus_chanpkt_sglist pkt;
145 	unsigned int pktlen, pad_pktlen, hlen;
146 	bool send_evt = false;
147 	struct iovec iov[4];
148 	uint64_t pad = 0;
149 	int error;
150 
151 	hlen = offsetof(struct vmbus_chanpkt_sglist, gpa[sglen]);
152 	pktlen = hlen + dlen;
153 	pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
154 
155 	pkt.hdr.type = VMBUS_CHANPKT_TYPE_GPA;
156 	pkt.hdr.flags = VMBUS_CHANPKT_FLAG_RC;
157 	pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
158 	pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
159 	pkt.hdr.xactid = xactid;
160 	pkt.rsvd = 0;
161 	pkt.gpa_cnt = sglen;
162 
163 	iov[0].iov_base = &pkt;
164 	iov[0].iov_len = sizeof(pkt);
165 	iov[1].iov_base = sg;
166 	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
167 	iov[2].iov_base = data;
168 	iov[2].iov_len = dlen;
169 	iov[3].iov_base = &pad;
170 	iov[3].iov_len = pad_pktlen - pktlen;
171 
172 	error = vmbus_txbr_write(&chan->txbr, iov, 4, &send_evt);
173 
174 	/* if caller is batching, just propagate the status */
175 	if (need_sig)
176 		*need_sig |= send_evt;
177 	else if (error == 0 && send_evt)
178 		rte_vmbus_chan_signal_tx(chan);
179 	return error;
180 }
181 
rte_vmbus_chan_rx_empty(const struct vmbus_channel * channel)182 bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel)
183 {
184 	const struct vmbus_br *br = &channel->rxbr;
185 
186 	rte_smp_rmb();
187 	return br->vbr->rindex == br->vbr->windex;
188 }
189 
190 /* Signal host after reading N bytes */
rte_vmbus_chan_signal_read(struct vmbus_channel * chan,uint32_t bytes_read)191 void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read)
192 {
193 	struct vmbus_br *rbr = &chan->rxbr;
194 	uint32_t write_sz, pending_sz;
195 
196 	/* No need for signaling on older versions */
197 	if (!rbr->vbr->feature_bits.feat_pending_send_sz)
198 		return;
199 
200 	/* Make sure reading of pending happens after new read index */
201 	rte_smp_mb();
202 
203 	pending_sz = rbr->vbr->pending_send;
204 	if (!pending_sz)
205 		return;
206 
207 	rte_smp_rmb();
208 	write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex);
209 
210 	/* If there was space before then host was not blocked */
211 	if (write_sz - bytes_read > pending_sz)
212 		return;
213 
214 	/* If pending write will not fit */
215 	if (write_sz <= pending_sz)
216 		return;
217 
218 	vmbus_set_event(chan);
219 }
220 
rte_vmbus_chan_recv(struct vmbus_channel * chan,void * data,uint32_t * len,uint64_t * request_id)221 int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len,
222 			uint64_t *request_id)
223 {
224 	struct vmbus_chanpkt_hdr pkt;
225 	uint32_t dlen, hlen, bufferlen = *len;
226 	int error;
227 
228 	*len = 0;
229 
230 	error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
231 	if (error)
232 		return error;
233 
234 	if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
235 		VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
236 		/* XXX this channel is dead actually. */
237 		return -EIO;
238 	}
239 
240 	if (unlikely(pkt.hlen > pkt.tlen)) {
241 		VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
242 			  pkt.hlen, pkt.tlen);
243 		return -EIO;
244 	}
245 
246 	/* Length are in quad words */
247 	hlen = pkt.hlen << VMBUS_CHANPKT_SIZE_SHIFT;
248 	dlen = (pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT) - hlen;
249 	*len = dlen;
250 
251 	/* If caller buffer is not large enough */
252 	if (unlikely(dlen > bufferlen))
253 		return -ENOBUFS;
254 
255 	if (request_id)
256 		*request_id = pkt.xactid;
257 
258 	/* Read data and skip packet header */
259 	error = vmbus_rxbr_read(&chan->rxbr, data, dlen, hlen);
260 	if (error)
261 		return error;
262 
263 	rte_vmbus_chan_signal_read(chan, dlen + hlen + sizeof(uint64_t));
264 	return 0;
265 }
266 
267 /* TODO: replace this with inplace ring buffer (no copy) */
rte_vmbus_chan_recv_raw(struct vmbus_channel * chan,void * data,uint32_t * len)268 int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan,
269 			    void *data, uint32_t *len)
270 {
271 	struct vmbus_chanpkt_hdr pkt;
272 	uint32_t dlen, bufferlen = *len;
273 	int error;
274 
275 	error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
276 	if (error)
277 		return error;
278 
279 	if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
280 		VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
281 		/* XXX this channel is dead actually. */
282 		return -EIO;
283 	}
284 
285 	if (unlikely(pkt.hlen > pkt.tlen)) {
286 		VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
287 			pkt.hlen, pkt.tlen);
288 		return -EIO;
289 	}
290 
291 	/* Length are in quad words */
292 	dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
293 	*len = dlen;
294 
295 	/* If caller buffer is not large enough */
296 	if (unlikely(dlen > bufferlen))
297 		return -ENOBUFS;
298 
299 	/* Read data and skip packet header */
300 	error = vmbus_rxbr_read(&chan->rxbr, data, dlen, 0);
301 	if (error)
302 		return error;
303 
304 	/* Return the number of bytes read */
305 	return dlen + sizeof(uint64_t);
306 }
307 
vmbus_chan_create(const struct rte_vmbus_device * device,uint16_t relid,uint16_t subid,uint8_t monitor_id,struct vmbus_channel ** new_chan)308 int vmbus_chan_create(const struct rte_vmbus_device *device,
309 		      uint16_t relid, uint16_t subid, uint8_t monitor_id,
310 		      struct vmbus_channel **new_chan)
311 {
312 	struct vmbus_channel *chan;
313 	int err;
314 
315 	chan = rte_zmalloc_socket("VMBUS", sizeof(*chan), RTE_CACHE_LINE_SIZE,
316 				  device->device.numa_node);
317 	if (!chan)
318 		return -ENOMEM;
319 
320 	STAILQ_INIT(&chan->subchannel_list);
321 	chan->device = device;
322 	chan->subchannel_id = subid;
323 	chan->relid = relid;
324 	chan->monitor_id = monitor_id;
325 	chan->monitor_page = device->monitor_page;
326 	*new_chan = chan;
327 
328 	err = vmbus_uio_map_rings(chan);
329 	if (err) {
330 		rte_free(chan);
331 		return err;
332 	}
333 
334 	return 0;
335 }
336 
337 /* Setup the primary channel */
rte_vmbus_chan_open(struct rte_vmbus_device * device,struct vmbus_channel ** new_chan)338 int rte_vmbus_chan_open(struct rte_vmbus_device *device,
339 			struct vmbus_channel **new_chan)
340 {
341 	struct mapped_vmbus_resource *uio_res;
342 	int err;
343 
344 	uio_res = vmbus_uio_find_resource(device);
345 	if (!uio_res) {
346 		VMBUS_LOG(ERR, "can't find uio resource");
347 		return -EINVAL;
348 	}
349 
350 	err = vmbus_chan_create(device, device->relid, 0,
351 				device->monitor_id, new_chan);
352 	if (!err)
353 		device->primary = *new_chan;
354 
355 	return err;
356 }
357 
rte_vmbus_max_channels(const struct rte_vmbus_device * device)358 int rte_vmbus_max_channels(const struct rte_vmbus_device *device)
359 {
360 	if (vmbus_uio_subchannels_supported(device, device->primary))
361 		return VMBUS_MAX_CHANNELS;
362 	else
363 		return 1;
364 }
365 
366 /* Setup secondary channel */
rte_vmbus_subchan_open(struct vmbus_channel * primary,struct vmbus_channel ** new_chan)367 int rte_vmbus_subchan_open(struct vmbus_channel *primary,
368 			   struct vmbus_channel **new_chan)
369 {
370 	struct vmbus_channel *chan;
371 	int err;
372 
373 	err = vmbus_uio_get_subchan(primary, &chan);
374 	if (err)
375 		return err;
376 
377 	STAILQ_INSERT_TAIL(&primary->subchannel_list, chan, next);
378 	*new_chan = chan;
379 	return 0;
380 }
381 
rte_vmbus_sub_channel_index(const struct vmbus_channel * chan)382 uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan)
383 {
384 	return chan->subchannel_id;
385 }
386 
rte_vmbus_chan_close(struct vmbus_channel * chan)387 void rte_vmbus_chan_close(struct vmbus_channel *chan)
388 {
389 	const struct rte_vmbus_device *device = chan->device;
390 	struct vmbus_channel *primary = device->primary;
391 
392 	/*
393 	 * intentionally leak primary channel because
394 	 * secondary may still reference it
395 	 */
396 	if (chan != primary) {
397 		STAILQ_REMOVE(&primary->subchannel_list, chan,
398 			      vmbus_channel, next);
399 		rte_free(chan);
400 	}
401 
402 }
403 
vmbus_dump_ring(FILE * f,const char * id,const struct vmbus_br * br)404 static void vmbus_dump_ring(FILE *f, const char *id, const struct vmbus_br *br)
405 {
406 	const struct vmbus_bufring *vbr = br->vbr;
407 	struct vmbus_chanpkt_hdr pkt;
408 
409 	fprintf(f, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n",
410 		id, vbr->windex, vbr->rindex, vbr->imask,
411 		vbr->pending_send, vbr->feature_bits.value);
412 	fprintf(f, " size=%u avail write=%u read=%u\n",
413 		br->dsize, vmbus_br_availwrite(br, vbr->windex),
414 		vmbus_br_availread(br));
415 
416 	if (vmbus_rxbr_peek(br, &pkt, sizeof(pkt)) == 0)
417 		fprintf(f, "  pkt type %#x len %u flags %#x xactid %#"PRIx64"\n",
418 			pkt.type,
419 			pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT,
420 			pkt.flags, pkt.xactid);
421 }
422 
rte_vmbus_chan_dump(FILE * f,const struct vmbus_channel * chan)423 void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan)
424 {
425 	fprintf(f, "channel[%u] relid=%u monitor=%u\n",
426 		chan->subchannel_id, chan->relid, chan->monitor_id);
427 	vmbus_dump_ring(f, "rxbr", &chan->rxbr);
428 	vmbus_dump_ring(f, "txbr", &chan->txbr);
429 }
430