xref: /openbsd-src/sys/dev/pv/hyperv.c (revision fb8aa7497fded39583f40e800732f9c046411717)
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * Copyright (c) 2016 Mike Belopuhov <mike@esdenera.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice unmodified, this list of conditions, and the following
13  *    disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * The OpenBSD port was done under funding by Esdenera Networks GmbH.
32  */
33 
34 #include <sys/param.h>
35 
36 /* Hyperv requires locked atomic operations */
37 #ifndef MULTIPROCESSOR
38 #define _HYPERVMPATOMICS
39 #define MULTIPROCESSOR
40 #endif
41 #include <sys/atomic.h>
42 #ifdef _HYPERVMPATOMICS
43 #undef MULTIPROCESSOR
44 #undef _HYPERVMPATOMICS
45 #endif
46 
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/signal.h>
50 #include <sys/signalvar.h>
51 #include <sys/malloc.h>
52 #include <sys/kernel.h>
53 #include <sys/device.h>
54 #include <sys/timetc.h>
55 #include <sys/task.h>
56 #include <sys/syslog.h>
57 
58 #include <machine/bus.h>
59 #include <machine/cpu.h>
60 #include <machine/cpufunc.h>
61 
62 #include <uvm/uvm_extern.h>
63 
64 #include <machine/i82489var.h>
65 
66 #include <dev/rndvar.h>
67 
68 #include <dev/pv/pvvar.h>
69 #include <dev/pv/pvreg.h>
70 #include <dev/pv/hypervreg.h>
71 #include <dev/pv/hypervvar.h>
72 
73 /* Command submission flags */
74 #define HCF_SLEEPOK	0x0001	/* M_WAITOK */
75 #define HCF_NOSLEEP	0x0002	/* M_NOWAIT */
76 #define HCF_NOREPLY	0x0004
77 
78 struct hv_softc *hv_sc;
79 
80 int 	hv_match(struct device *, void *, void *);
81 void	hv_attach(struct device *, struct device *, void *);
82 void	hv_deferred(void *);
83 void	hv_fake_version(struct hv_softc *);
84 u_int	hv_gettime(struct timecounter *);
85 int	hv_init_hypercall(struct hv_softc *);
86 uint64_t hv_hypercall(struct hv_softc *, uint64_t, void *, void *);
87 int	hv_init_interrupts(struct hv_softc *);
88 int	hv_init_synic(struct hv_softc *);
89 int	hv_cmd(struct hv_softc *, void *, size_t, void *, size_t, int);
90 int	hv_start(struct hv_softc *, struct hv_msg *);
91 int	hv_reply(struct hv_softc *, struct hv_msg *);
92 uint16_t hv_intr_signal(struct hv_softc *, void *);
93 void	hv_intr(void);
94 void	hv_event_intr(struct hv_softc *);
95 void	hv_message_intr(struct hv_softc *);
96 int	hv_vmbus_connect(struct hv_softc *);
97 void	hv_channel_response(struct hv_softc *, struct hv_channel_msg_header *);
98 void	hv_channel_offer(struct hv_softc *, struct hv_channel_msg_header *);
99 void	hv_channel_delivered(struct hv_softc *, struct hv_channel_msg_header *);
100 int	hv_channel_scan(struct hv_softc *);
101 void	hv_process_offer(struct hv_softc *, struct hv_offer *);
102 struct hv_channel *
103 	hv_channel_lookup(struct hv_softc *, uint32_t);
104 int	hv_channel_ring_create(struct hv_channel *, uint32_t, uint32_t);
105 void	hv_channel_ring_destroy(struct hv_channel *);
106 void	hv_attach_internal(struct hv_softc *);
107 void	hv_heartbeat(void *);
108 void	hv_kvp_init(struct hv_channel *);
109 void	hv_kvp(void *);
110 int	hv_kvop(void *, int, char *, char *, size_t);
111 void	hv_shutdown_init(struct hv_channel *);
112 void	hv_shutdown(void *);
113 void	hv_timesync_init(struct hv_channel *);
114 void	hv_timesync(void *);
115 int	hv_attach_devices(struct hv_softc *);
116 
117 struct {
118 	int		  hmd_response;
119 	int		  hmd_request;
120 	void		(*hmd_handler)(struct hv_softc *,
121 			    struct hv_channel_msg_header *);
122 } hv_msg_dispatch[] = {
123 	{ HV_CHANMSG_INVALID,			0, NULL },
124 	{ HV_CHANMSG_OFFER_CHANNEL,		0, hv_channel_offer },
125 	{ HV_CHANMSG_RESCIND_CHANNEL_OFFER,	0, NULL },
126 	{ HV_CHANMSG_REQUEST_OFFERS,		HV_CHANMSG_OFFER_CHANNEL,
127 	  NULL },
128 	{ HV_CHANMSG_ALL_OFFERS_DELIVERED,	0,
129 	  hv_channel_delivered },
130 	{ HV_CHANMSG_OPEN_CHANNEL,		0, NULL },
131 	{ HV_CHANMSG_OPEN_CHANNEL_RESULT,	HV_CHANMSG_OPEN_CHANNEL,
132 	  hv_channel_response },
133 	{ HV_CHANMSG_CLOSE_CHANNEL,		0, NULL },
134 	{ HV_CHANMSG_GPADL_HEADER,		0, NULL },
135 	{ HV_CHANMSG_GPADL_BODY,		0, NULL },
136 	{ HV_CHANMSG_GPADL_CREATED,		HV_CHANMSG_GPADL_HEADER,
137 	  hv_channel_response },
138 	{ HV_CHANMSG_GPADL_TEARDOWN,		0, NULL },
139 	{ HV_CHANMSG_GPADL_TORNDOWN,		HV_CHANMSG_GPADL_TEARDOWN,
140 	  hv_channel_response },
141 	{ HV_CHANMSG_REL_ID_RELEASED,		0, NULL },
142 	{ HV_CHANMSG_INITIATED_CONTACT,		0, NULL },
143 	{ HV_CHANMSG_VERSION_RESPONSE,		HV_CHANMSG_INITIATED_CONTACT,
144 	  hv_channel_response },
145 	{ HV_CHANMSG_UNLOAD,			0, NULL },
146 };
147 
148 struct timecounter hv_timecounter = {
149 	hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001
150 };
151 
152 struct cfdriver hyperv_cd = {
153 	NULL, "hyperv", DV_DULL
154 };
155 
156 const struct cfattach hyperv_ca = {
157 	sizeof(struct hv_softc), hv_match, hv_attach
158 };
159 
160 int
161 hv_match(struct device *parent, void *match, void *aux)
162 {
163 	struct pv_attach_args *pva = aux;
164 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_HYPERV];
165 
166 	if (hv->hv_base == 0)
167 		return (0);
168 
169 	return (1);
170 }
171 
172 void
173 hv_attach(struct device *parent, struct device *self, void *aux)
174 {
175 	struct hv_softc *sc = (struct hv_softc *)self;
176 	struct pv_attach_args *pva = aux;
177 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_HYPERV];
178 
179 	sc->sc_pvbus = hv;
180 	sc->sc_dmat = pva->pva_dmat;
181 
182 	printf("\n");
183 
184 	hv_fake_version(sc);
185 
186 	tc_init(&hv_timecounter);
187 
188 	if (hv_init_hypercall(sc))
189 		return;
190 
191 	/* Wire it up to the global */
192 	hv_sc = sc;
193 
194 	if (hv_init_interrupts(sc))
195 		return;
196 
197 	startuphook_establish(hv_deferred, sc);
198 }
199 
200 void
201 hv_deferred(void *arg)
202 {
203 	struct hv_softc *sc = arg;
204 
205 	if (hv_vmbus_connect(sc))
206 		return;
207 
208 	if (hv_channel_scan(sc))
209 		return;
210 
211 	hv_attach_internal(sc);
212 
213 	if (hv_attach_devices(sc))
214 		return;
215 }
216 
217 void
218 hv_fake_version(struct hv_softc *sc)
219 {
220 	uint64_t ver;
221 
222 	/* FreeBSD 10 apparently */
223 	ver = 0x8200ULL << 48;
224 	ver |= 10 << 16;
225 	wrmsr(HV_X64_MSR_GUEST_OS_ID, ver);
226 }
227 
228 u_int
229 hv_gettime(struct timecounter *tc)
230 {
231 	u_int now = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
232 
233 	return (now);
234 }
235 
236 int
237 hv_init_hypercall(struct hv_softc *sc)
238 {
239 	extern void *hv_hypercall_page;
240 	uint64_t msr;
241 	paddr_t pa;
242 
243 	sc->sc_hc = &hv_hypercall_page;
244 
245 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_hc, &pa)) {
246 		printf(": hypercall page PA extraction failed\n");
247 		return (-1);
248 	}
249 
250 	msr = (atop(pa) << HV_X64_MSR_HYPERCALL_PASHIFT) |
251 	    HV_X64_MSR_HYPERCALL_ENABLED;
252 	wrmsr(HV_X64_MSR_HYPERCALL, msr);
253 
254 	if (!(rdmsr(HV_X64_MSR_HYPERCALL) & HV_X64_MSR_HYPERCALL_ENABLED)) {
255 		printf(": failed to set up a hypercall page\n");
256 		return (-1);
257 	}
258 
259 	return (0);
260 }
261 
262 uint64_t
263 hv_hypercall(struct hv_softc *sc, uint64_t control, void *input,
264     void *output)
265 {
266 	paddr_t input_pa = 0, output_pa = 0;
267 	uint64_t status = 0;
268 
269 	if (input != NULL &&
270 	    pmap_extract(pmap_kernel(), (vaddr_t)input, &input_pa) == 0) {
271 		printf("%s: hypercall input PA extraction failed\n",
272 		    sc->sc_dev.dv_xname);
273 		return (~HV_STATUS_SUCCESS);
274 	}
275 
276 	if (output != NULL &&
277 	    pmap_extract(pmap_kernel(), (vaddr_t)output, &output_pa) == 0) {
278 		printf("%s: hypercall output PA extraction failed\n",
279 		    sc->sc_dev.dv_xname);
280 		return (~HV_STATUS_SUCCESS);
281 	}
282 
283 #ifdef __amd64__
284 	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_pa) : "r8");
285 	__asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (control),
286 	    "d" (input_pa), "m" (sc->sc_hc));
287 #else  /* __i386__ */
288 	{
289 		uint32_t control_hi = control >> 32;
290 		uint32_t control_lo = control & 0xfffffffff;
291 		uint32_t status_hi = 1;
292 		uint32_t status_lo = 1;
293 
294 		__asm__ __volatile__ ("call *%8" :
295 		    "=d" (status_hi), "=a"(status_lo) :
296 		    "d" (control_hi), "a" (control_lo),
297 		    "b" (0), "c" (input_pa), "D" (0), "S" (output_pa),
298 		    "m" (sc->sc_hc));
299 
300 		status = status_lo | ((uint64_t)status_hi << 32);
301 	}
302 #endif	/* __amd64__ */
303 
304 	return (status);
305 }
306 
307 int
308 hv_init_interrupts(struct hv_softc *sc)
309 {
310 	struct cpu_info *ci = curcpu();
311 	int cpu = CPU_INFO_UNIT(ci);
312 
313 	sc->sc_idtvec = LAPIC_HYPERV_VECTOR;
314 
315 	TAILQ_INIT(&sc->sc_reqs);
316 	mtx_init(&sc->sc_reqlck, IPL_NET);
317 
318 	TAILQ_INIT(&sc->sc_rsps);
319 	mtx_init(&sc->sc_rsplck, IPL_NET);
320 
321 	sc->sc_simp[cpu] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
322 	if (sc->sc_simp[cpu] == NULL) {
323 		printf(": failed to allocate SIMP\n");
324 		return (-1);
325 	}
326 
327 	sc->sc_siep[cpu] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
328 	if (sc->sc_siep[cpu] == NULL) {
329 		printf(": failed to allocate SIEP\n");
330 		km_free(sc->sc_simp[cpu], PAGE_SIZE, &kv_any, &kp_zero);
331 		return (-1);
332 	}
333 
334 	sc->sc_proto = HV_VMBUS_VERSION_WS2008;
335 
336 	return (hv_init_synic(sc));
337 }
338 
339 int
340 hv_init_synic(struct hv_softc *sc)
341 {
342 	struct cpu_info *ci = curcpu();
343 	int cpu = CPU_INFO_UNIT(ci);
344 	uint64_t simp, siefp, sctrl, sint, version;
345 	paddr_t pa;
346 
347 	version = rdmsr(HV_X64_MSR_SVERSION);
348 
349 	/*
350 	 * Setup the Synic's message page
351 	 */
352 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_simp[cpu], &pa)) {
353 		printf(": SIMP PA extraction failed\n");
354 		return (-1);
355 	}
356 	simp = rdmsr(HV_X64_MSR_SIMP);
357 	simp &= (1 << HV_X64_MSR_SIMP_PASHIFT) - 1;
358 	simp |= (atop(pa) << HV_X64_MSR_SIMP_PASHIFT);
359 	simp |= HV_X64_MSR_SIMP_ENABLED;
360 	wrmsr(HV_X64_MSR_SIMP, simp);
361 
362 	/*
363 	 * Setup the Synic's event page
364 	 */
365 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_siep[cpu], &pa)) {
366 		printf(": SIEP PA extraction failed\n");
367 		return (-1);
368 	}
369 	siefp = rdmsr(HV_X64_MSR_SIEFP);
370 	siefp &= (1<<HV_X64_MSR_SIEFP_PASHIFT) - 1;
371 	siefp |= (atop(pa) << HV_X64_MSR_SIEFP_PASHIFT);
372 	siefp |= HV_X64_MSR_SIEFP_ENABLED;
373 	wrmsr(HV_X64_MSR_SIEFP, siefp);
374 
375 	/* HV_SHARED_SINT_IDT_VECTOR + 0x20 */
376 	sint = sc->sc_idtvec | HV_X64_MSR_SINT_AUTOEOI;
377 	wrmsr(HV_X64_MSR_SINT0 + HV_MESSAGE_SINT, sint);
378 
379 	/* Enable the global synic bit */
380 	sctrl = rdmsr(HV_X64_MSR_SCONTROL);
381 	sctrl |= HV_X64_MSR_SCONTROL_ENABLED;
382 	wrmsr(HV_X64_MSR_SCONTROL, sctrl);
383 
384 	sc->sc_vcpus[cpu] = rdmsr(HV_X64_MSR_VP_INDEX);
385 
386 	DPRINTF("vcpu%u: SIMP %#llx SIEFP %#llx SCTRL %#llx\n",
387 	    sc->sc_vcpus[cpu], simp, siefp, sctrl);
388 
389 	return (0);
390 }
391 
392 int
393 hv_cmd(struct hv_softc *sc, void *cmd, size_t cmdlen, void *rsp,
394     size_t rsplen, int flags)
395 {
396 	struct hv_msg msg;
397 	int rv;
398 
399 	if (cmdlen > HV_MESSAGE_PAYLOAD) {
400 		printf("%s: payload too large (%ld)\n", sc->sc_dev.dv_xname,
401 		    cmdlen);
402 		return (EMSGSIZE);
403 	}
404 
405 	memset(&msg, 0, sizeof(msg));
406 
407 	msg.msg_req.payload_size = cmdlen;
408 	memcpy(msg.msg_req.payload, cmd, cmdlen);
409 
410 	if (!(flags & HCF_NOREPLY)) {
411 		msg.msg_rsp = rsp;
412 		msg.msg_rsplen = rsplen;
413 	} else
414 		msg.msg_flags |= MSGF_NOQUEUE;
415 
416 	if (flags & HCF_NOSLEEP)
417 		msg.msg_flags |= MSGF_NOSLEEP;
418 
419 	if ((rv = hv_start(sc, &msg)) != 0)
420 		return (rv);
421 	return (hv_reply(sc, &msg));
422 }
423 
424 int
425 hv_start(struct hv_softc *sc, struct hv_msg *msg)
426 {
427 	const int delays[] = { 100, 100, 100, 500, 500, 5000, 5000, 5000 };
428 	const char *wchan = "hvstart";
429 	uint16_t status;
430 	int i, s;
431 
432 	msg->msg_req.connection_id = HV_MESSAGE_CONNECTION_ID;
433 	msg->msg_req.message_type = 1;
434 
435 	if (!(msg->msg_flags & MSGF_NOQUEUE)) {
436 		mtx_enter(&sc->sc_reqlck);
437 		TAILQ_INSERT_TAIL(&sc->sc_reqs, msg, msg_entry);
438 		mtx_leave(&sc->sc_reqlck);
439 	}
440 
441 	for (i = 0; i < nitems(delays); i++) {
442 		status = hv_hypercall(sc, HV_CALL_POST_MESSAGE,
443 		    &msg->msg_req, NULL);
444 		if (status != HV_STATUS_INSUFFICIENT_BUFFERS)
445 			break;
446 		if (msg->msg_flags & MSGF_NOSLEEP) {
447 			delay(delays[i]);
448 			s = splnet();
449 			hv_intr();
450 			splx(s);
451 		} else
452 			tsleep(wchan, PRIBIO, wchan, 1);
453 	}
454 	if (status != 0) {
455 		printf("%s: posting vmbus message failed with %d\n",
456 		    sc->sc_dev.dv_xname, status);
457 		if (!(msg->msg_flags & MSGF_NOQUEUE)) {
458 			mtx_enter(&sc->sc_reqlck);
459 			TAILQ_REMOVE(&sc->sc_reqs, msg, msg_entry);
460 			mtx_leave(&sc->sc_reqlck);
461 		}
462 		return (EIO);
463 	}
464 
465 	return (0);
466 }
467 
468 int
469 hv_reply(struct hv_softc *sc, struct hv_msg *msg)
470 {
471 	const char *wchan = "hvreply";
472 	struct hv_msg *m, *tmp;
473 	int i, s;
474 
475 	if (msg->msg_flags & MSGF_NOQUEUE)
476 		return (0);
477 
478 	for (i = 0; i < 1000; i++) {
479 		mtx_enter(&sc->sc_rsplck);
480 		TAILQ_FOREACH_SAFE(m, &sc->sc_rsps, msg_entry, tmp) {
481 			if (m == msg) {
482 				TAILQ_REMOVE(&sc->sc_rsps, m, msg_entry);
483 				break;
484 			}
485 		}
486 		mtx_leave(&sc->sc_rsplck);
487 		if (m != NULL)
488 			return (0);
489 		if (msg->msg_flags & MSGF_NOSLEEP) {
490 			delay(100000);
491 			s = splnet();
492 			hv_intr();
493 			splx(s);
494 		} else {
495 			s = tsleep(&msg, PRIBIO | PCATCH, wchan, 1);
496 			if (s != EWOULDBLOCK)
497 				return (EINTR);
498 		}
499 	}
500 	mtx_enter(&sc->sc_rsplck);
501 	TAILQ_FOREACH_SAFE(m, &sc->sc_reqs, msg_entry, tmp) {
502 		if (m == msg) {
503 			TAILQ_REMOVE(&sc->sc_reqs, m, msg_entry);
504 			break;
505 		}
506 	}
507 	mtx_leave(&sc->sc_rsplck);
508 	return (ETIMEDOUT);
509 }
510 
511 uint16_t
512 hv_intr_signal(struct hv_softc *sc, void *con)
513 {
514 	uint64_t status;
515 
516 	status = hv_hypercall(sc, HV_CALL_SIGNAL_EVENT, con, NULL);
517 	return ((uint16_t)status);
518 }
519 
520 void
521 hv_intr(void)
522 {
523 	struct hv_softc *sc = hv_sc;
524 
525 	hv_event_intr(sc);
526 	hv_message_intr(sc);
527 }
528 
529 void
530 hv_event_intr(struct hv_softc *sc)
531 {
532 	struct hv_synic_event_flags *evt;
533 	struct cpu_info *ci = curcpu();
534 	int cpu = CPU_INFO_UNIT(ci);
535 	int bit, dword, maxdword, relid;
536 	struct hv_channel *ch;
537 	uint32_t *revents;
538 
539 	evt = (struct hv_synic_event_flags *)sc->sc_siep[cpu] + HV_MESSAGE_SINT;
540 	if ((sc->sc_proto == HV_VMBUS_VERSION_WS2008) ||
541 	    (sc->sc_proto == HV_VMBUS_VERSION_WIN7)) {
542 		if (atomic_clearbit_ptr(&evt->flags[0], 0) == 0)
543 			return;
544 		maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
545 		/*
546 		 * receive size is 1/2 page and divide that by 4 bytes
547 		 */
548 		revents = sc->sc_revents;
549 	} else {
550 		maxdword = nitems(evt->flags);
551 		/*
552 		 * On Host with Win8 or above, the event page can be
553 		 * checked directly to get the id of the channel
554 		 * that has the pending interrupt.
555 		 */
556 		revents = &evt->flags[0];
557 	}
558 
559 	for (dword = 0; dword < maxdword; dword++) {
560 		if (revents[dword] == 0)
561 			continue;
562 		for (bit = 0; bit < 32; bit++) {
563 			if (!atomic_clearbit_ptr(&revents[dword], bit))
564 				continue;
565 			relid = (dword << 5) + bit;
566 			/* vmbus channel protocol message */
567 			if (relid == 0)
568 				continue;
569 			ch = hv_channel_lookup(sc, relid);
570 			if (ch == NULL) {
571 				printf("%s: unhandled event on %u\n",
572 				    sc->sc_dev.dv_xname, relid);
573 				continue;
574 			}
575 			if (ch->ch_state != HV_CHANSTATE_OPENED) {
576 				printf("%s: channel %u is not active\n",
577 				    sc->sc_dev.dv_xname, relid);
578 				continue;
579 			}
580 			ch->ch_evcnt.ec_count++;
581 			if (ch->ch_handler)
582 				ch->ch_handler(ch->ch_ctx);
583 		}
584 	}
585 }
586 
587 void
588 hv_message_intr(struct hv_softc *sc)
589 {
590 	struct hv_vmbus_message *msg;
591 	struct hv_channel_msg_header *hdr;
592 	struct cpu_info *ci = curcpu();
593 	int cpu = CPU_INFO_UNIT(ci);
594 
595 	for (;;) {
596 		msg = (struct hv_vmbus_message *)sc->sc_simp[cpu] +
597 		    HV_MESSAGE_SINT;
598 		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
599 			break;
600 
601 		hdr = (struct hv_channel_msg_header *)msg->payload;
602 		if (hdr->message_type >= HV_CHANMSG_COUNT) {
603 			printf("%s: unhandled message type %d flags %#x\n",
604 			    sc->sc_dev.dv_xname, hdr->message_type,
605 			    msg->header.message_flags);
606 			goto skip;
607 		}
608 		if (hv_msg_dispatch[hdr->message_type].hmd_handler)
609 			hv_msg_dispatch[hdr->message_type].hmd_handler(sc, hdr);
610 		else
611 			printf("%s: unhandled message type %d\n",
612 			    sc->sc_dev.dv_xname, hdr->message_type);
613  skip:
614 		msg->header.message_type = HV_MESSAGE_TYPE_NONE;
615 		membar_sync();
616 		if (msg->header.message_flags & HV_SYNIC_MHF_PENDING)
617 			wrmsr(HV_X64_MSR_EOM, 0);
618 	}
619 }
620 
621 void
622 hv_channel_response(struct hv_softc *sc, struct hv_channel_msg_header *rsphdr)
623 {
624 	struct hv_msg *msg, *tmp;
625 	struct hv_channel_msg_header *reqhdr;
626 	int req;
627 
628 	req = hv_msg_dispatch[rsphdr->message_type].hmd_request;
629 	mtx_enter(&sc->sc_reqlck);
630 	TAILQ_FOREACH_SAFE(msg, &sc->sc_reqs, msg_entry, tmp) {
631 		reqhdr = (struct hv_channel_msg_header *)&msg->msg_req.payload;
632 		if (reqhdr->message_type == req) {
633 			TAILQ_REMOVE(&sc->sc_reqs, msg, msg_entry);
634 			break;
635 		}
636 	}
637 	mtx_leave(&sc->sc_reqlck);
638 	if (msg != NULL) {
639 		memcpy(msg->msg_rsp, rsphdr, msg->msg_rsplen);
640 		mtx_enter(&sc->sc_rsplck);
641 		TAILQ_INSERT_TAIL(&sc->sc_rsps, msg, msg_entry);
642 		mtx_leave(&sc->sc_rsplck);
643 		wakeup(msg);
644 	}
645 }
646 
647 void
648 hv_channel_offer(struct hv_softc *sc, struct hv_channel_msg_header *hdr)
649 {
650 	struct hv_offer *co;
651 
652 	co = malloc(sizeof(*co), M_DEVBUF, M_NOWAIT | M_ZERO);
653 	if (co == NULL) {
654 		printf("%s: failed to allocate an offer object\n",
655 		    sc->sc_dev.dv_xname);
656 		return;
657 	}
658 
659 	memcpy(&co->co_chan, hdr, sizeof(co->co_chan));
660 
661 	mtx_enter(&sc->sc_offerlck);
662 	SIMPLEQ_INSERT_TAIL(&sc->sc_offers, co, co_entry);
663 	mtx_leave(&sc->sc_offerlck);
664 }
665 
666 void
667 hv_channel_delivered(struct hv_softc *sc, struct hv_channel_msg_header *hdr)
668 {
669 	atomic_setbits_int(&sc->sc_flags, HSF_OFFERS_DELIVERED);
670 	wakeup(hdr);
671 }
672 
673 int
674 hv_vmbus_connect(struct hv_softc *sc)
675 {
676 	const uint32_t versions[] = { HV_VMBUS_VERSION_WIN8_1,
677 	    HV_VMBUS_VERSION_WIN8, HV_VMBUS_VERSION_WIN7,
678 	    HV_VMBUS_VERSION_WS2008, HV_VMBUS_VERSION_INVALID
679 	};
680 	struct hv_channel_initiate_contact cmd;
681 	struct hv_channel_version_response rsp;
682 	paddr_t epa, mpa1, mpa2;
683 	int i;
684 
685 	sc->sc_events = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
686 	if (sc->sc_events == NULL) {
687 		printf(": failed to allocate channel port events page\n");
688 		goto errout;
689 	}
690 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_events, &epa)) {
691 		printf(": channel port events page PA extraction failed\n");
692 		goto errout;
693 	}
694 
695 	sc->sc_wevents = (uint32_t *)sc->sc_events;
696 	sc->sc_revents = (uint32_t *)sc->sc_events + (PAGE_SIZE >> 1);
697 
698 	sc->sc_monitor[0] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
699 	if (sc->sc_monitor == NULL) {
700 		printf(": failed to allocate monitor page 1\n");
701 		goto errout;
702 	}
703 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_monitor[0], &mpa1)) {
704 		printf(": monitor page 1 PA extraction failed\n");
705 		goto errout;
706 	}
707 
708 	sc->sc_monitor[1] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
709 	if (sc->sc_monitor == NULL) {
710 		printf(": failed to allocate monitor page 2\n");
711 		goto errout;
712 	}
713 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_monitor[1], &mpa2)) {
714 		printf(": monitor page 2 PA extraction failed\n");
715 		goto errout;
716 	}
717 
718 	memset(&cmd, 0, sizeof(cmd));
719 	cmd.hdr.message_type = HV_CHANMSG_INITIATED_CONTACT;
720 	cmd.interrupt_page = (uint64_t)epa;
721 	cmd.monitor_page_1 = (uint64_t)mpa1;
722 	cmd.monitor_page_2 = (uint64_t)mpa2;
723 
724 	memset(&rsp, 0, sizeof(rsp));
725 
726 	for (i = 0; versions[i] != HV_VMBUS_VERSION_INVALID; i++) {
727 		cmd.vmbus_version_requested = versions[i];
728 		if (hv_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp),
729 		    HCF_NOSLEEP)) {
730 			DPRINTF("%s: INITIATED_CONTACT failed\n",
731 			    sc->sc_dev.dv_xname);
732 			goto errout;
733 		}
734 		if (rsp.version_supported) {
735 			sc->sc_flags |= HSF_CONNECTED;
736 			sc->sc_proto = versions[i];
737 			sc->sc_handle = 0xe1e10 - 1; /* magic! */
738 			DPRINTF("%s: protocol version %#x\n",
739 			    sc->sc_dev.dv_xname, versions[i]);
740 			break;
741 		}
742 	}
743 	if (versions[i] == HV_VMBUS_VERSION_INVALID) {
744 		printf("%s: failed to negotiate protocol version\n",
745 		    sc->sc_dev.dv_xname);
746 		goto errout;
747 	}
748 
749 	return (0);
750 
751  errout:
752 	if (sc->sc_events) {
753 		km_free(sc->sc_events, PAGE_SIZE, &kv_any, &kp_zero);
754 		sc->sc_events = NULL;
755 		sc->sc_wevents = NULL;
756 		sc->sc_revents = NULL;
757 	}
758 	if (sc->sc_monitor[0]) {
759 		km_free(sc->sc_monitor[0], PAGE_SIZE, &kv_any, &kp_zero);
760 		sc->sc_monitor[0] = NULL;
761 	}
762 	if (sc->sc_monitor[1]) {
763 		km_free(sc->sc_monitor[1], PAGE_SIZE, &kv_any, &kp_zero);
764 		sc->sc_monitor[1] = NULL;
765 	}
766 	return (-1);
767 }
768 
769 const struct hv_guid hv_guid_network = {
770 	{ 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46,
771 	  0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e }
772 };
773 
774 const struct hv_guid hv_guid_ide = {
775 	{ 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
776 	  0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 }
777 };
778 
779 const struct hv_guid hv_guid_scsi = {
780 	{ 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
781 	  0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f }
782 };
783 
784 const struct hv_guid hv_guid_shutdown = {
785 	{ 0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49,
786 	  0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb }
787 };
788 
789 const struct hv_guid hv_guid_timesync = {
790 	{ 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
791 	  0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf }
792 };
793 
794 const struct hv_guid hv_guid_heartbeat = {
795 	{ 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
796 	  0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d }
797 };
798 
799 const struct hv_guid hv_guid_kvp = {
800 	{ 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
801 	  0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6 }
802 };
803 
804 #ifdef HYPERV_DEBUG
805 const struct hv_guid hv_guid_vss = {
806 	{ 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42,
807 	  0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 }
808 };
809 
810 const struct hv_guid hv_guid_dynmem = {
811 	{ 0xdc, 0x74, 0x50, 0x52, 0x85, 0x89, 0xe2, 0x46,
812 	  0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 }
813 };
814 
815 const struct hv_guid hv_guid_mouse = {
816 	{ 0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c,
817 	  0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a }
818 };
819 
820 const struct hv_guid hv_guid_kbd = {
821 	{ 0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48,
822 	  0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84 }
823 };
824 
825 const struct hv_guid hv_guid_video = {
826 	{ 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a,
827 	  0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 }
828 };
829 
830 const struct hv_guid hv_guid_fc = {
831 	{ 0x4a, 0xcc, 0x9b, 0x2f, 0x69, 0x00, 0xf3, 0x4a,
832 	  0xb7, 0x6b, 0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda }
833 };
834 
835 const struct hv_guid hv_guid_fcopy = {
836 	{ 0xe3, 0x4b, 0xd1, 0x34, 0xe4, 0xde, 0xc8, 0x41,
837 	  0x9a, 0xe7, 0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92 }
838 };
839 
840 const struct hv_guid hv_guid_pcie = {
841 	{ 0x1d, 0xf6, 0xc4, 0x44, 0x44, 0x44, 0x00, 0x44,
842 	  0x9d, 0x52, 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f }
843 };
844 
845 const struct hv_guid hv_guid_netdir = {
846 	{ 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b,
847 	  0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 }
848 };
849 
850 const struct hv_guid hv_guid_rdesktop = {
851 	{ 0xf4, 0xac, 0x6a, 0x27, 0x15, 0xac, 0x6c, 0x42,
852 	  0x98, 0xdd, 0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe }
853 };
854 
855 /* Automatic Virtual Machine Activation (AVMA) Services */
856 const struct hv_guid hv_guid_avma1 = {
857 	{ 0x55, 0xb2, 0x87, 0x44, 0x8c, 0xb8, 0x3f, 0x40,
858 	  0xbb, 0x51, 0xd1, 0xf6, 0x9c, 0xf1, 0x7f, 0x87 }
859 };
860 
861 const struct hv_guid hv_guid_avma2 = {
862 	{ 0xf4, 0xba, 0x75, 0x33, 0x15, 0x9e, 0x30, 0x4b,
863 	  0xb7, 0x65, 0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b }
864 };
865 
866 const struct hv_guid hv_guid_avma3 = {
867 	{ 0xa0, 0x1f, 0x22, 0x99, 0xad, 0x24, 0xe2, 0x11,
868 	  0xbe, 0x98, 0x00, 0x1a, 0xa0, 0x1b, 0xbf, 0x6e }
869 };
870 
871 const struct hv_guid hv_guid_avma4 = {
872 	{ 0x16, 0x57, 0xe6, 0xf8, 0xb3, 0x3c, 0x06, 0x4a,
873 	  0x9a, 0x60, 0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5 }
874 };
875 
876 static inline char *
877 guidprint(struct hv_guid *a)
878 {
879 	/* 3     0  5  4 7 6  8 9  10        15 */
880 	/* 33221100-5544-7766-9988-FFEEDDCCBBAA */
881 	static char buf[16 * 2 + 4 + 1];
882 	int i, j = 0;
883 
884 	for (i = 3; i != -1; i -= 1, j += 2)
885 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
886 	buf[j++] = '-';
887 	for (i = 5; i != 3; i -= 1, j += 2)
888 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
889 	buf[j++] = '-';
890 	for (i = 7; i != 5; i -= 1, j += 2)
891 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
892 	buf[j++] = '-';
893 	for (i = 8; i < 10; i += 1, j += 2)
894 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
895 	buf[j++] = '-';
896 	for (i = 10; i < 16; i += 1, j += 2)
897 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
898 	return (&buf[0]);
899 }
900 #endif	/* HYPERV_DEBUG */
901 
902 void
903 hv_guid_sprint(struct hv_guid *guid, char *str, size_t size)
904 {
905 	const struct {
906 		const struct hv_guid	*guid;
907 		const char		*ident;
908 	} map[] = {
909 		{ &hv_guid_network,	"network" },
910 		{ &hv_guid_ide,		"ide" },
911 		{ &hv_guid_scsi,	"scsi" },
912 		{ &hv_guid_shutdown,	"shutdown" },
913 		{ &hv_guid_timesync,	"timesync" },
914 		{ &hv_guid_heartbeat,	"heartbeat" },
915 		{ &hv_guid_kvp,		"kvp" },
916 #ifdef HYPERV_DEBUG
917 		{ &hv_guid_vss,		"vss" },
918 		{ &hv_guid_dynmem,	"dynamic-memory" },
919 		{ &hv_guid_mouse,	"mouse" },
920 		{ &hv_guid_kbd,		"keyboard" },
921 		{ &hv_guid_video,	"video" },
922 		{ &hv_guid_fc,		"fiber-channel" },
923 		{ &hv_guid_fcopy,	"file-copy" },
924 		{ &hv_guid_pcie,	"pcie-passthrough" },
925 		{ &hv_guid_netdir,	"network-direct" },
926 		{ &hv_guid_rdesktop,	"remote-desktop" },
927 		{ &hv_guid_avma1,	"avma-1" },
928 		{ &hv_guid_avma2,	"avma-2" },
929 		{ &hv_guid_avma3,	"avma-3" },
930 		{ &hv_guid_avma4,	"avma-4" },
931 #endif
932 	};
933 	int i;
934 
935 	for (i = 0; i < nitems(map); i++) {
936 		if (memcmp(guid, map[i].guid, sizeof(*guid)) == 0) {
937 			strlcpy(str, map[i].ident, size);
938 			return;
939 		}
940 	}
941 #ifdef HYPERV_DEBUG
942 	strlcpy(str, guidprint(guid), size);
943 #endif
944 }
945 
946 int
947 hv_channel_scan(struct hv_softc *sc)
948 {
949 	struct hv_channel_msg_header hdr;
950 	struct hv_channel_offer_channel rsp, *offer;
951 	struct hv_offer *co;
952 
953 	SIMPLEQ_INIT(&sc->sc_offers);
954 	mtx_init(&sc->sc_offerlck, IPL_NET);
955 
956 	hdr.message_type = HV_CHANMSG_REQUEST_OFFERS;
957 	hdr.padding = 0;
958 
959 	if (hv_cmd(sc, &hdr, sizeof(hdr), &rsp, sizeof(rsp), HCF_NOREPLY)) {
960 		DPRINTF("%s: REQUEST_OFFERS failed\n", sc->sc_dev.dv_xname);
961 		return (-1);
962 	}
963 
964 	while ((sc->sc_flags & HSF_OFFERS_DELIVERED) == 0)
965 		tsleep(offer, PRIBIO, "hvoffers", 1);
966 
967 	TAILQ_INIT(&sc->sc_channels);
968 	mtx_init(&sc->sc_channelck, IPL_NET);
969 
970 	mtx_enter(&sc->sc_offerlck);
971 	while (!SIMPLEQ_EMPTY(&sc->sc_offers)) {
972 		co = SIMPLEQ_FIRST(&sc->sc_offers);
973 		SIMPLEQ_REMOVE_HEAD(&sc->sc_offers, co_entry);
974 		mtx_leave(&sc->sc_offerlck);
975 
976 		hv_process_offer(sc, co);
977 		free(co, M_DEVBUF, sizeof(*co));
978 
979 		mtx_enter(&sc->sc_offerlck);
980 	}
981 	mtx_leave(&sc->sc_offerlck);
982 
983 	return (0);
984 }
985 
986 void
987 hv_process_offer(struct hv_softc *sc, struct hv_offer *co)
988 {
989 	struct hv_channel *ch, *nch;
990 
991 	nch = malloc(sizeof(*nch), M_DEVBUF, M_ZERO | M_NOWAIT);
992 	if (nch == NULL) {
993 		printf("%s: failed to allocate memory for the channel\n",
994 		    sc->sc_dev.dv_xname);
995 		return;
996 	}
997 	nch->ch_sc = sc;
998 	hv_guid_sprint(&co->co_chan.offer.interface_type, nch->ch_ident,
999 	    sizeof(nch->ch_ident));
1000 
1001 	/*
1002 	 * By default we setup state to enable batched reading.
1003 	 * A specific service can choose to disable this prior
1004 	 * to opening the channel.
1005 	 */
1006 	nch->ch_flags |= CHF_BATCHED;
1007 
1008 	KASSERT((((vaddr_t)&nch->ch_sigevt) & 0x7) == 0);
1009 	memset(&nch->ch_sigevt, 0, sizeof(nch->ch_sigevt));
1010 	nch->ch_sigevt.connection_id = HV_EVENT_CONNECTION_ID;
1011 
1012 	if (sc->sc_proto != HV_VMBUS_VERSION_WS2008) {
1013 		if (co->co_chan.is_dedicated_interrupt)
1014 			nch->ch_flags |= CHF_DEDICATED;
1015 		nch->ch_sigevt.connection_id = co->co_chan.connection_id;
1016 	}
1017 
1018 	if (co->co_chan.monitor_allocated) {
1019 		nch->ch_mgroup = co->co_chan.monitor_id >> 5;
1020 		nch->ch_mindex = co->co_chan.monitor_id & 0x1f;
1021 		nch->ch_flags |= CHF_MONITOR;
1022 	}
1023 
1024 	nch->ch_relid = co->co_chan.child_rel_id;
1025 
1026 	memcpy(&nch->ch_type, &co->co_chan.offer.interface_type,
1027 	    sizeof(ch->ch_type));
1028 	memcpy(&nch->ch_inst, &co->co_chan.offer.interface_instance,
1029 	    sizeof(ch->ch_inst));
1030 
1031 	mtx_enter(&sc->sc_channelck);
1032 	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
1033 		if (!memcmp(&ch->ch_type, &nch->ch_type, sizeof(ch->ch_type)) &&
1034 		    !memcmp(&ch->ch_inst, &nch->ch_inst, sizeof(ch->ch_inst)))
1035 			break;
1036 	}
1037 	if (ch != NULL) {
1038 		if (co->co_chan.offer.sub_channel_index == 0) {
1039 			printf("%s: unknown offer \"%s\"\n",
1040 			    sc->sc_dev.dv_xname, nch->ch_ident);
1041 			mtx_leave(&sc->sc_channelck);
1042 			free(nch, M_DEVBUF, sizeof(*nch));
1043 			return;
1044 		}
1045 #ifdef HYPERV_DEBUG
1046 		printf("%s: subchannel %u for \"%s\"\n", sc->sc_dev.dv_xname,
1047 		    co->co_chan.offer.sub_channel_index, ch->ch_ident);
1048 #endif
1049 		mtx_leave(&sc->sc_channelck);
1050 		free(nch, M_DEVBUF, sizeof(*nch));
1051 		return;
1052 	}
1053 
1054 	nch->ch_state = HV_CHANSTATE_OFFERED;
1055 
1056 	TAILQ_INSERT_TAIL(&sc->sc_channels, nch, ch_entry);
1057 	mtx_leave(&sc->sc_channelck);
1058 
1059 #ifdef HYPERV_DEBUG
1060 	printf("%s: channel %u: \"%s\"", sc->sc_dev.dv_xname, nch->ch_relid,
1061 	    nch->ch_ident);
1062 	if (co->co_chan.monitor_allocated)
1063 		printf(", monitor %u\n", co->co_chan.monitor_id);
1064 	else
1065 		printf("\n");
1066 #endif
1067 }
1068 
1069 struct hv_channel *
1070 hv_channel_lookup(struct hv_softc *sc, uint32_t relid)
1071 {
1072 	struct hv_channel *ch;
1073 
1074 	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
1075 		if (ch->ch_relid == relid)
1076 			return (ch);
1077 	}
1078 	return (NULL);
1079 }
1080 
1081 int
1082 hv_channel_ring_create(struct hv_channel *ch, uint32_t sndbuflen,
1083     uint32_t rcvbuflen)
1084 {
1085 	struct hv_softc *sc = ch->ch_sc;
1086 
1087 	sndbuflen = roundup(sndbuflen, PAGE_SIZE);
1088 	rcvbuflen = roundup(rcvbuflen, PAGE_SIZE);
1089 	ch->ch_ring = km_alloc(sndbuflen + rcvbuflen, &kv_any, &kp_zero,
1090 	    cold ? &kd_nowait : &kd_waitok);
1091 	if (ch->ch_ring == NULL) {
1092 		printf("%s: failed to allocate channel ring\n",
1093 		    sc->sc_dev.dv_xname);
1094 		return (-1);
1095 	}
1096 	ch->ch_ring_size = sndbuflen + rcvbuflen;
1097 	ch->ch_ring_npg = ch->ch_ring_size >> PAGE_SHIFT;
1098 
1099 	memset(&ch->ch_wrd, 0, sizeof(ch->ch_wrd));
1100 	ch->ch_wrd.rd_ring = (struct hv_ring_buffer *)ch->ch_ring;
1101 	ch->ch_wrd.rd_size = sndbuflen;
1102 	ch->ch_wrd.rd_data_size = sndbuflen - sizeof(struct hv_ring_buffer);
1103 	mtx_init(&ch->ch_wrd.rd_lock, IPL_NET);
1104 
1105 	memset(&ch->ch_rrd, 0, sizeof(ch->ch_rrd));
1106 	ch->ch_rrd.rd_ring = (struct hv_ring_buffer *)((uint8_t *)ch->ch_ring +
1107 	    sndbuflen);
1108 	ch->ch_rrd.rd_size = rcvbuflen;
1109 	ch->ch_rrd.rd_data_size = rcvbuflen - sizeof(struct hv_ring_buffer);
1110 	mtx_init(&ch->ch_rrd.rd_lock, IPL_NET);
1111 
1112 	if (hv_handle_alloc(ch, ch->ch_ring, sndbuflen + rcvbuflen,
1113 	    &ch->ch_ring_hndl)) {
1114 		printf("%s: failed to obtain a PA handle for the ring\n",
1115 		    sc->sc_dev.dv_xname);
1116 		hv_channel_ring_destroy(ch);
1117 		return (-1);
1118 	}
1119 
1120 	return (0);
1121 }
1122 
1123 void
1124 hv_channel_ring_destroy(struct hv_channel *ch)
1125 {
1126 	km_free(ch->ch_ring, ch->ch_wrd.rd_size + ch->ch_rrd.rd_size,
1127 	    &kv_any, &kp_zero);
1128 	ch->ch_ring = NULL;
1129 	hv_handle_free(ch, ch->ch_ring_hndl);
1130 
1131 	memset(&ch->ch_wrd, 0, sizeof(ch->ch_wrd));
1132 	memset(&ch->ch_rrd, 0, sizeof(ch->ch_rrd));
1133 }
1134 
1135 int
1136 hv_channel_open(struct hv_channel *ch, void *udata, size_t udatalen,
1137     void (*handler)(void *), void *arg)
1138 {
1139 	struct hv_softc *sc = ch->ch_sc;
1140 	struct hv_channel_open cmd;
1141 	struct hv_channel_open_result rsp;
1142 	int rv;
1143 
1144 	if (ch->ch_ring == NULL &&
1145 	    hv_channel_ring_create(ch, PAGE_SIZE * 4, PAGE_SIZE * 4)) {
1146 		DPRINTF(": failed to create channel ring\n");
1147 		return (-1);
1148 	}
1149 
1150 	memset(&cmd, 0, sizeof(cmd));
1151 	cmd.header.message_type = HV_CHANMSG_OPEN_CHANNEL;
1152 	cmd.open_id = ch->ch_relid;
1153 	cmd.child_rel_id = ch->ch_relid;
1154 	cmd.ring_buffer_gpadl_handle = ch->ch_ring_hndl;
1155 	cmd.downstream_ring_buffer_page_offset =
1156 	    ch->ch_wrd.rd_size >> PAGE_SHIFT;
1157 	cmd.target_vcpu = ch->ch_vcpu;
1158 
1159 	if (udata && udatalen > 0)
1160 		memcpy(&cmd.user_data, udata, udatalen);
1161 
1162 	memset(&rsp, 0, sizeof(rsp));
1163 
1164 	ch->ch_handler = handler;
1165 	ch->ch_ctx = arg;
1166 
1167 	ch->ch_state = HV_CHANSTATE_OPENED;
1168 
1169 	rv = hv_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp), 0);
1170 	if (rv) {
1171 		hv_channel_ring_destroy(ch);
1172 		DPRINTF("%s: OPEN_CHANNEL failed with %d\n",
1173 		    sc->sc_dev.dv_xname, rv);
1174 		ch->ch_handler = NULL;
1175 		ch->ch_ctx = NULL;
1176 		ch->ch_state = HV_CHANSTATE_OFFERED;
1177 		return (-1);
1178 	}
1179 
1180 	return (0);
1181 }
1182 
1183 int
1184 hv_channel_close(struct hv_channel *ch)
1185 {
1186 	struct hv_softc *sc = ch->ch_sc;
1187 	struct hv_channel_close cmd;
1188 	int rv;
1189 
1190 	memset(&cmd, 0, sizeof(cmd));
1191 	cmd.header.message_type = HV_CHANMSG_CLOSE_CHANNEL;
1192 	cmd.child_rel_id = ch->ch_relid;
1193 
1194 	ch->ch_state = HV_CHANSTATE_CLOSING;
1195 	rv = hv_cmd(sc, &cmd, sizeof(cmd), NULL, 0, HCF_NOREPLY);
1196 	if (rv) {
1197 		DPRINTF("%s: CLOSE_CHANNEL failed with %d\n",
1198 		    sc->sc_dev.dv_xname, rv);
1199 		return (-1);
1200 	}
1201 	ch->ch_state = HV_CHANSTATE_CLOSED;
1202 	hv_channel_ring_destroy(ch);
1203 	return (0);
1204 }
1205 
1206 static inline void
1207 hv_channel_setevent(struct hv_softc *sc, struct hv_channel *ch)
1208 {
1209 	struct hv_monitor_trigger_group *mtg;
1210 
1211 	/* Each uint32_t represents 32 channels */
1212 	atomic_setbit_ptr((uint32_t *)sc->sc_wevents + (ch->ch_relid >> 5),
1213 	    ch->ch_relid & 31);
1214 	if (ch->ch_flags & CHF_MONITOR) {
1215 		mtg = &sc->sc_monitor[1]->trigger_group[ch->ch_mgroup];
1216 		atomic_setbit_ptr((uint32_t *)&mtg->pending, ch->ch_mindex);
1217 	} else
1218 		hv_intr_signal(sc, &ch->ch_sigevt);
1219 }
1220 
1221 static inline void
1222 hv_ring_put(struct hv_ring_data *wrd, uint8_t *data, uint32_t datalen)
1223 {
1224 	int left = MIN(datalen, wrd->rd_data_size - wrd->rd_prod);
1225 
1226 	memcpy(&wrd->rd_ring->buffer[wrd->rd_prod], data, left);
1227 	memcpy(&wrd->rd_ring->buffer[0], data + left, datalen - left);
1228 	wrd->rd_prod += datalen;
1229 	wrd->rd_prod &= wrd->rd_data_size - 1;
1230 }
1231 
1232 static inline void
1233 hv_ring_get(struct hv_ring_data *rrd, uint8_t *data, uint32_t datalen,
1234     int peek)
1235 {
1236 	int left = MIN(datalen, rrd->rd_data_size - rrd->rd_cons);
1237 
1238 	memcpy(data, &rrd->rd_ring->buffer[rrd->rd_cons], left);
1239 	memcpy(data + left, &rrd->rd_ring->buffer[0], datalen - left);
1240 	if (!peek) {
1241 		rrd->rd_cons += datalen;
1242 		rrd->rd_cons &= rrd->rd_data_size - 1;
1243 	}
1244 }
1245 
1246 #define	HV_BYTES_AVAIL_TO_WRITE(r, w, z)			\
1247 	((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w))
1248 
1249 static inline void
1250 hv_ring_avail(struct hv_ring_data *rd, uint32_t *towrite, uint32_t *toread)
1251 {
1252 	uint32_t ridx = rd->rd_ring->read_index;
1253 	uint32_t widx = rd->rd_ring->write_index;
1254 	uint32_t r, w;
1255 
1256 	w =  HV_BYTES_AVAIL_TO_WRITE(ridx, widx, rd->rd_data_size);
1257 	r = rd->rd_data_size - w;
1258 	if (towrite)
1259 		*towrite = w;
1260 	if (toread)
1261 		*toread = r;
1262 }
1263 
1264 int
1265 hv_ring_write(struct hv_ring_data *wrd, struct iovec *iov, int iov_cnt,
1266     int *needsig)
1267 {
1268 	uint64_t indices = 0;
1269 	uint32_t avail, oprod, datalen = sizeof(indices);
1270 	int i;
1271 
1272 	for (i = 0; i < iov_cnt; i++)
1273 		datalen += iov[i].iov_len;
1274 
1275 	KASSERT(datalen <= wrd->rd_data_size);
1276 
1277 	hv_ring_avail(wrd, &avail, NULL);
1278 	if (avail < datalen) {
1279 		printf("%s: avail %u datalen %u\n", __func__, avail, datalen);
1280 		return (EAGAIN);
1281 	}
1282 
1283 	mtx_enter(&wrd->rd_lock);
1284 
1285 	oprod = wrd->rd_prod;
1286 
1287 	for (i = 0; i < iov_cnt; i++)
1288 		hv_ring_put(wrd, iov[i].iov_base, iov[i].iov_len);
1289 
1290 	indices = (uint64_t)wrd->rd_prod << 32;
1291 	hv_ring_put(wrd, (uint8_t *)&indices, sizeof(indices));
1292 
1293 	membar_sync();
1294 	wrd->rd_ring->write_index = wrd->rd_prod;
1295 
1296 	mtx_leave(&wrd->rd_lock);
1297 
1298 	/* Signal when the ring transitions from being empty to non-empty */
1299 	if (wrd->rd_ring->interrupt_mask == 0 &&
1300 	    wrd->rd_ring->read_index == oprod)
1301 		*needsig = 1;
1302 	else
1303 		*needsig = 0;
1304 
1305 	return (0);
1306 }
1307 
1308 int
1309 hv_channel_send(struct hv_channel *ch, void *data, uint32_t datalen,
1310     uint64_t rid, int type, uint32_t flags)
1311 {
1312 	struct hv_softc *sc = ch->ch_sc;
1313 	struct hv_pktdesc d;
1314 	struct iovec iov[3];
1315 	uint32_t pktlen, pktlen_aligned;
1316 	uint64_t zeropad = 0;
1317 	int rv, needsig = 0;
1318 
1319 	pktlen = sizeof(d) + datalen;
1320 	pktlen_aligned = roundup(pktlen, sizeof(uint64_t));
1321 
1322 	d.type = type;
1323 	d.flags = flags;
1324 	d.offset = sizeof(d) >> 3;
1325 	d.length = pktlen_aligned >> 3;
1326 	d.tid = rid;
1327 
1328 	iov[0].iov_base = &d;
1329 	iov[0].iov_len = sizeof(d);
1330 
1331 	iov[1].iov_base = data;
1332 	iov[1].iov_len = datalen;
1333 
1334 	iov[2].iov_base = &zeropad;
1335 	iov[2].iov_len = pktlen_aligned - pktlen;
1336 
1337 	rv = hv_ring_write(&ch->ch_wrd, iov, 3, &needsig);
1338 	if (rv == 0 && needsig)
1339 		hv_channel_setevent(sc, ch);
1340 
1341 	return (rv);
1342 }
1343 
1344 int
1345 hv_channel_sendbuf(struct hv_channel *ch, struct hv_page_buffer *pb,
1346     uint32_t npb, void *data, uint32_t datalen, uint64_t rid)
1347 {
1348 	struct hv_softc *sc = ch->ch_sc;
1349 	struct hv_gpadesc d;
1350 	struct iovec iov[4];
1351 	uint32_t buflen, pktlen, pktlen_aligned;
1352 	uint64_t zeropad = 0;
1353 	int rv, needsig = 0;
1354 
1355 	buflen = sizeof(struct hv_page_buffer) * npb;
1356 	pktlen = sizeof(d) + datalen + buflen;
1357 	pktlen_aligned = roundup(pktlen, sizeof(uint64_t));
1358 
1359 	d.type = HV_PKT_DATA_USING_GPA_DIRECT;
1360 	d.flags = HV_PKTFLAG_COMPLETION_REQUESTED;
1361 	d.offset = (sizeof(d) + buflen) >> 3;
1362 	d.length = pktlen_aligned >> 3;
1363 	d.tid = rid;
1364 	d.range_count = npb;
1365 
1366 	iov[0].iov_base = &d;
1367 	iov[0].iov_len = sizeof(d);
1368 
1369 	iov[1].iov_base = pb;
1370 	iov[1].iov_len = buflen;
1371 
1372 	iov[2].iov_base = data;
1373 	iov[2].iov_len = datalen;
1374 
1375 	iov[3].iov_base = &zeropad;
1376 	iov[3].iov_len = pktlen_aligned - pktlen;
1377 
1378 	rv = hv_ring_write(&ch->ch_wrd, iov, 4, &needsig);
1379 	if (rv == 0 && needsig)
1380 		hv_channel_setevent(sc, ch);
1381 
1382 	return (rv);
1383 }
1384 
1385 int
1386 hv_ring_peek(struct hv_ring_data *rrd, void *data, uint32_t datalen)
1387 {
1388 	uint32_t avail;
1389 
1390 	KASSERT(datalen <= rrd->rd_data_size);
1391 
1392 	hv_ring_avail(rrd, NULL, &avail);
1393 	if (avail < datalen)
1394 		return (EAGAIN);
1395 
1396 	mtx_enter(&rrd->rd_lock);
1397 	hv_ring_get(rrd, (uint8_t *)data, datalen, 1);
1398 	mtx_leave(&rrd->rd_lock);
1399 	return (0);
1400 }
1401 
1402 int
1403 hv_ring_read(struct hv_ring_data *rrd, void *data, uint32_t datalen,
1404     uint32_t offset)
1405 {
1406 	uint64_t indices;
1407 	uint32_t avail;
1408 
1409 	KASSERT(datalen <= rrd->rd_data_size);
1410 
1411 	hv_ring_avail(rrd, NULL, &avail);
1412 	if (avail < datalen) {
1413 		printf("%s: avail %u datalen %u\n", __func__, avail, datalen);
1414 		return (EAGAIN);
1415 	}
1416 
1417 	mtx_enter(&rrd->rd_lock);
1418 
1419 	if (offset) {
1420 		rrd->rd_cons += offset;
1421 		rrd->rd_cons &= rrd->rd_data_size - 1;
1422 	}
1423 
1424 	hv_ring_get(rrd, (uint8_t *)data, datalen, 0);
1425 	hv_ring_get(rrd, (uint8_t *)&indices, sizeof(indices), 0);
1426 
1427 	membar_sync();
1428 	rrd->rd_ring->read_index = rrd->rd_cons;
1429 
1430 	mtx_leave(&rrd->rd_lock);
1431 
1432 	return (0);
1433 }
1434 
1435 int
1436 hv_channel_recv(struct hv_channel *ch, void *data, uint32_t datalen,
1437     uint32_t *rlen, uint64_t *rid, int raw)
1438 {
1439 	struct hv_pktdesc d;
1440 	uint32_t offset, pktlen;
1441 	int rv;
1442 
1443 	*rlen = 0;
1444 
1445 	if ((rv = hv_ring_peek(&ch->ch_rrd, &d, sizeof(d))) != 0)
1446 		return (rv);
1447 
1448 	offset = raw ? 0 : (d.offset << 3);
1449 	pktlen = (d.length << 3) - offset;
1450 	if (pktlen > datalen) {
1451 		printf("%s: pktlen %u datalen %u\n", __func__, pktlen, datalen);
1452 		return (EINVAL);
1453 	}
1454 
1455 	rv = hv_ring_read(&ch->ch_rrd, data, pktlen, offset);
1456 	if (rv == 0) {
1457 		*rlen = pktlen;
1458 		*rid = d.tid;
1459 	}
1460 
1461 	return (rv);
1462 }
1463 
1464 int
1465 hv_handle_alloc(struct hv_channel *ch, void *buffer, uint32_t buflen,
1466     uint32_t *handle)
1467 {
1468 	struct hv_softc *sc = ch->ch_sc;
1469 	struct hv_gpadl_header *hdr;
1470 	struct hv_gpadl_body *body, *cmd;
1471 	struct hv_gpadl_created rsp;
1472 	struct hv_msg *msg;
1473 	int i, j, last, left, rv;
1474 	int bodylen = 0, ncmds = 0, pfn = 0;
1475 	int waitok = cold ? M_NOWAIT : M_WAITOK;
1476 	uint64_t *frames;
1477 	paddr_t pa;
1478 	/* Total number of pages to reference */
1479 	int total = atop(buflen);
1480 	/* Number of pages that will fit the header */
1481 	int inhdr = MIN(total, HV_NPFNHDR);
1482 
1483 	KASSERT((buflen & (PAGE_SIZE - 1)) == 0);
1484 
1485 	if ((msg = malloc(sizeof(*msg), M_DEVBUF, M_ZERO | waitok)) == NULL)
1486 		return (ENOMEM);
1487 
1488 	/* Prepare array of frame addresses */
1489 	if ((frames = mallocarray(total, sizeof(*frames), M_DEVBUF, M_ZERO |
1490 	    waitok)) == NULL) {
1491 		free(msg, M_DEVBUF, sizeof(*msg));
1492 		return (ENOMEM);
1493 	}
1494 	for (i = 0; i < total; i++) {
1495 		if (!pmap_extract(pmap_kernel(), (vaddr_t)buffer +
1496 		    PAGE_SIZE * i, &pa)) {
1497 			free(msg, M_DEVBUF, sizeof(*msg));
1498 			free(frames, M_DEVBUF, total * sizeof(*frames));
1499 			return (EFAULT);
1500 		}
1501 		frames[i] = atop(pa);
1502 	}
1503 
1504 	msg->msg_req.payload_size = sizeof(struct hv_gpadl_header) +
1505 	    sizeof(struct hv_gpa_range) + inhdr * sizeof(uint64_t);
1506 	hdr = (struct hv_gpadl_header *)msg->msg_req.payload;
1507 	msg->msg_rsp = &rsp;
1508 	msg->msg_rsplen = sizeof(rsp);
1509 	if (!waitok)
1510 		msg->msg_flags = MSGF_NOSLEEP;
1511 
1512 	left = total - inhdr;
1513 
1514 	/* Allocate additional gpadl_body structures if required */
1515 	if (left > 0) {
1516 		ncmds = MAX(1, left / HV_NPFNBODY + left % HV_NPFNBODY);
1517 		bodylen = ncmds * HV_MESSAGE_PAYLOAD;
1518 		body = malloc(bodylen, M_DEVBUF, M_ZERO | waitok);
1519 		if (body == NULL) {
1520 			free(msg, M_DEVBUF, sizeof(*msg));
1521 			free(frames, M_DEVBUF, atop(buflen) * sizeof(*frames));
1522 			return (ENOMEM);
1523 		}
1524 	}
1525 
1526 	*handle = atomic_inc_int_nv(&sc->sc_handle);
1527 
1528 	hdr->header.message_type = HV_CHANMSG_GPADL_HEADER;
1529 	hdr->child_rel_id = ch->ch_relid;
1530 	hdr->gpadl = *handle;
1531 
1532 	/* Single range for a contiguous buffer */
1533 	hdr->range_count = 1;
1534 	hdr->range_buf_len = sizeof(struct hv_gpa_range) + total *
1535 	    sizeof(uint64_t);
1536 	hdr->range[0].byte_offset = 0;
1537 	hdr->range[0].byte_count = buflen;
1538 
1539 	/* Fit as many pages as possible into the header */
1540 	for (i = 0; i < inhdr; i++)
1541 		hdr->range[0].pfn_array[i] = frames[pfn++];
1542 
1543 	for (i = 0; i < ncmds; i++) {
1544 		cmd = (struct hv_gpadl_body *)((caddr_t)body +
1545 		    HV_MESSAGE_PAYLOAD * i);
1546 		cmd->header.message_type = HV_CHANMSG_GPADL_BODY;
1547 		cmd->gpadl = *handle;
1548 		last = MIN(left, HV_NPFNBODY);
1549 		for (j = 0; j < last; j++)
1550 			cmd->pfn[j] = frames[pfn++];
1551 		left -= last;
1552 	}
1553 
1554 	rv = hv_start(sc, msg);
1555 	if (rv != 0) {
1556 		DPRINTF("%s: GPADL_HEADER failed\n", sc->sc_dev.dv_xname);
1557 		goto out;
1558 	}
1559 	for (i = 0; i < ncmds; i++) {
1560 		int cmdlen = sizeof(*cmd);
1561 		cmd = (struct hv_gpadl_body *)((caddr_t)body +
1562 		    HV_MESSAGE_PAYLOAD * i);
1563 		/* Last element can be short */
1564 		if (i == ncmds - 1)
1565 			cmdlen += last * sizeof(uint64_t);
1566 		else
1567 			cmdlen += HV_NPFNBODY * sizeof(uint64_t);
1568 		rv = hv_cmd(sc, cmd, cmdlen, NULL, 0, waitok | HCF_NOREPLY);
1569 		if (rv != 0) {
1570 			DPRINTF("%s: GPADL_BODY (iteration %d/%d) failed "
1571 			    "with %d\n", sc->sc_dev.dv_xname, i, ncmds, rv);
1572 			goto out;
1573 		}
1574 	}
1575 	rv = hv_reply(sc, msg);
1576 	if (rv != 0)
1577 		DPRINTF("%s: GPADL allocation failed with %d\n",
1578 		    sc->sc_dev.dv_xname, rv);
1579 
1580  out:
1581 	free(msg, M_DEVBUF, sizeof(*msg));
1582 	free(frames, M_DEVBUF, total * sizeof(*frames));
1583 	if (bodylen > 0)
1584 		free(body, M_DEVBUF, bodylen);
1585 	if (rv != 0)
1586 		return (rv);
1587 
1588 	KASSERT(*handle == rsp.gpadl);
1589 
1590 	return (0);
1591 }
1592 
1593 void
1594 hv_handle_free(struct hv_channel *ch, uint32_t handle)
1595 {
1596 	struct hv_softc *sc = ch->ch_sc;
1597 	struct hv_gpadl_teardown cmd;
1598 	struct hv_gpadl_torndown rsp;
1599 	int rv;
1600 
1601 	memset(&cmd, 0, sizeof(cmd));
1602 	cmd.header.message_type = HV_CHANMSG_GPADL_TEARDOWN;
1603 	cmd.child_rel_id = ch->ch_relid;
1604 	cmd.gpadl = handle;
1605 
1606 	rv = hv_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp), 0);
1607 	if (rv)
1608 		DPRINTF("%s: GPADL_TEARDOWN failed with %d\n",
1609 		    sc->sc_dev.dv_xname, rv);
1610 }
1611 
1612 const struct {
1613 	const char		 *id_name;
1614 	const struct hv_guid	 *id_type;
1615 	void			(*id_init)(struct hv_channel *);
1616 	void			(*id_handler)(void *);
1617 } hv_internal_devs[] = {
1618 	{ "heartbeat",	&hv_guid_heartbeat, NULL,		hv_heartbeat },
1619 	{ "kvp",	&hv_guid_kvp,	    hv_kvp_init,	hv_kvp },
1620 	{ "shutdown",	&hv_guid_shutdown,  hv_shutdown_init,	hv_shutdown },
1621 	{ "timesync",	&hv_guid_timesync,  hv_timesync_init,	hv_timesync }
1622 };
1623 
1624 void
1625 hv_attach_internal(struct hv_softc *sc)
1626 {
1627 	struct hv_channel *ch;
1628 	int i;
1629 
1630 	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
1631 		if (ch->ch_state != HV_CHANSTATE_OPENED)
1632 			continue;
1633 		if (ch->ch_flags & CHF_MONITOR)
1634 			continue;
1635 		for (i = 0; i < nitems(hv_internal_devs); i++) {
1636 			if (memcmp(hv_internal_devs[i].id_type, &ch->ch_type,
1637 			    sizeof(ch->ch_type)) != 0)
1638 				continue;
1639 			/*
1640 			 * These services are not performance critical and
1641 			 * do not need batched reading. Furthermore, some
1642 			 * services such as KVP can only handle one message
1643 			 * from the host at a time.
1644 			 */
1645 			ch->ch_flags &= ~CHF_BATCHED;
1646 
1647 			if (hv_internal_devs[i].id_init)
1648 				hv_internal_devs[i].id_init(ch);
1649 
1650 			ch->ch_buf = km_alloc(PAGE_SIZE, &kv_any, &kp_zero,
1651 			    (cold ? &kd_nowait : &kd_waitok));
1652 			if (ch->ch_buf == NULL) {
1653 				hv_channel_ring_destroy(ch);
1654 				printf("%s: failed to allocate channel data "
1655 				    "buffer for \"%s\"", sc->sc_dev.dv_xname,
1656 				    hv_internal_devs[i].id_name);
1657 				continue;
1658 			}
1659 			ch->ch_buflen = PAGE_SIZE;
1660 
1661 			if (hv_channel_open(ch, NULL, 0,
1662 			    hv_internal_devs[i].id_handler, ch)) {
1663 				km_free(ch->ch_buf, PAGE_SIZE, &kv_any,
1664 				    &kp_zero);
1665 				ch->ch_buf = NULL;
1666 				ch->ch_buflen = 0;
1667 				printf("%s: failed to open channel for \"%s\"\n",
1668 				    sc->sc_dev.dv_xname,
1669 				    hv_internal_devs[i].id_name);
1670 			}
1671 			evcount_attach(&ch->ch_evcnt,
1672 			    hv_internal_devs[i].id_name, &sc->sc_idtvec);
1673 			break;
1674 		}
1675 	}
1676 }
1677 
1678 int
1679 hv_service_common(struct hv_channel *ch, uint32_t *rlen, uint64_t *rid,
1680     struct hv_icmsg_hdr **hdr)
1681 {
1682 	struct hv_icmsg_negotiate *msg;
1683 	int rv;
1684 
1685 	rv = hv_channel_recv(ch, ch->ch_buf, ch->ch_buflen, rlen, rid, 0);
1686 	if ((rv && rv != EAGAIN) || *rlen == 0)
1687 		return (rv);
1688 	*hdr = (struct hv_icmsg_hdr *)&ch->ch_buf[sizeof(struct hv_pipe_hdr)];
1689 	if ((*hdr)->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
1690 		msg = (struct hv_icmsg_negotiate *)(*hdr + 1);
1691 		if (msg->icframe_vercnt >= 2 &&
1692 		    msg->icversion_data[1].major == 3) {
1693 			msg->icversion_data[0].major = 3;
1694 			msg->icversion_data[0].minor = 0;
1695 			msg->icversion_data[1].major = 3;
1696 			msg->icversion_data[1].minor = 0;
1697 		} else {
1698 			msg->icversion_data[0].major = 1;
1699 			msg->icversion_data[0].minor = 0;
1700 			msg->icversion_data[1].major = 1;
1701 			msg->icversion_data[1].minor = 0;
1702 		}
1703 		msg->icframe_vercnt = 1;
1704 		msg->icmsg_vercnt = 1;
1705 		(*hdr)->icmsgsize = 0x10;
1706 	}
1707 	return (0);
1708 }
1709 
1710 void
1711 hv_heartbeat(void *arg)
1712 {
1713 	struct hv_channel *ch = arg;
1714 	struct hv_softc *sc = ch->ch_sc;
1715 	struct hv_icmsg_hdr *hdr;
1716 	struct hv_heartbeat_msg *msg;
1717 	uint64_t rid;
1718 	uint32_t rlen;
1719 	int rv;
1720 
1721 	rv = hv_service_common(ch, &rlen, &rid, &hdr);
1722 	if (rv || rlen == 0) {
1723 		if (rv != EAGAIN)
1724 			printf("heartbeat: rv=%d rlen=%u\n", rv, rlen);
1725 		return;
1726 	}
1727 	if (hdr->icmsgtype == HV_ICMSGTYPE_HEARTBEAT) {
1728 		msg = (struct hv_heartbeat_msg *)(hdr + 1);
1729 		msg->seq_num += 1;
1730 	} else if (hdr->icmsgtype != HV_ICMSGTYPE_NEGOTIATE) {
1731 		printf("%s: unhandled heartbeat message type %u\n",
1732 		    sc->sc_dev.dv_xname, hdr->icmsgtype);
1733 	}
1734 	hdr->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
1735 	hv_channel_send(ch, ch->ch_buf, rlen, rid, HV_PKT_DATA_IN_BAND, 0);
1736 }
1737 
1738 void
1739 hv_kvp_init(struct hv_channel *ch)
1740 {
1741 	struct hv_softc *sc = ch->ch_sc;
1742 
1743 	sc->sc_pvbus->hv_kvop = hv_kvop;
1744 	sc->sc_pvbus->hv_arg = sc;
1745 }
1746 
1747 void
1748 hv_kvp(void *arg)
1749 {
1750 }
1751 
1752 int
1753 hv_kvop(void *arg, int op, char *key, char *value, size_t valuelen)
1754 {
1755 	switch (op) {
1756 	case PVBUS_KVWRITE:
1757 	case PVBUS_KVREAD:
1758 	default:
1759 		return (EOPNOTSUPP);
1760 	}
1761 }
1762 
1763 static void
1764 hv_shutdown_task(void *arg)
1765 {
1766 	extern int allowpowerdown;
1767 
1768 	if (allowpowerdown == 0)
1769 		return;
1770 
1771 	suspend_randomness();
1772 
1773 	log(LOG_KERN | LOG_NOTICE, "Shutting down in response to "
1774 	    "request from Hyper-V host\n");
1775 	prsignal(initprocess, SIGUSR2);
1776 }
1777 
1778 void
1779 hv_shutdown_init(struct hv_channel *ch)
1780 {
1781 	struct hv_softc *sc = ch->ch_sc;
1782 
1783 	task_set(&sc->sc_sdtask, hv_shutdown_task, sc);
1784 }
1785 
1786 void
1787 hv_shutdown(void *arg)
1788 {
1789 	struct hv_channel *ch = arg;
1790 	struct hv_softc *sc = ch->ch_sc;
1791 	struct hv_icmsg_hdr *hdr;
1792 	struct hv_shutdown_msg *msg;
1793 	uint64_t rid;
1794 	uint32_t rlen;
1795 	int rv, shutdown = 0;
1796 
1797 	rv = hv_service_common(ch, &rlen, &rid, &hdr);
1798 	if (rv || rlen == 0) {
1799 		if (rv != EAGAIN)
1800 			printf("shutdown: rv=%d rlen=%u\n", rv, rlen);
1801 		return;
1802 	}
1803 	if (hdr->icmsgtype == HV_ICMSGTYPE_SHUTDOWN) {
1804 		msg = (struct hv_shutdown_msg *)(hdr + 1);
1805 		if (msg->flags == 0 || msg->flags == 1) {
1806 			shutdown = 1;
1807 			hdr->status = HV_S_OK;
1808 		} else
1809 			hdr->status = HV_E_FAIL;
1810 	} else if (hdr->icmsgtype != HV_ICMSGTYPE_NEGOTIATE) {
1811 		printf("%s: unhandled shutdown message type %u\n",
1812 		    sc->sc_dev.dv_xname, hdr->icmsgtype);
1813 	}
1814 
1815 	hdr->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
1816 	hv_channel_send(ch, ch->ch_buf, rlen, rid, HV_PKT_DATA_IN_BAND, 0);
1817 
1818 	if (shutdown)
1819 		task_add(systq, &sc->sc_sdtask);
1820 }
1821 
1822 void
1823 hv_timesync_init(struct hv_channel *ch)
1824 {
1825 	struct hv_softc *sc = ch->ch_sc;
1826 
1827 	strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
1828 	    sizeof(sc->sc_sensordev.xname));
1829 
1830 	sc->sc_sensor.type = SENSOR_TIMEDELTA;
1831 	sc->sc_sensor.status = SENSOR_S_UNKNOWN;
1832 
1833 	sensor_attach(&sc->sc_sensordev, &sc->sc_sensor);
1834 	sensordev_install(&sc->sc_sensordev);
1835 }
1836 
1837 void
1838 hv_timesync(void *arg)
1839 {
1840 	struct hv_channel *ch = arg;
1841 	struct hv_softc *sc = ch->ch_sc;
1842 	struct hv_icmsg_hdr *hdr;
1843 	struct hv_timesync_msg *msg;
1844 	struct timespec guest, host, diff;
1845 	uint64_t tns;
1846 	uint64_t rid;
1847 	uint32_t rlen;
1848 	int rv;
1849 
1850 	rv = hv_service_common(ch, &rlen, &rid, &hdr);
1851 	if (rv || rlen == 0) {
1852 		if (rv != EAGAIN)
1853 			printf("timesync: rv=%d rlen=%u\n", rv, rlen);
1854 		return;
1855 	}
1856 	if (hdr->icmsgtype == HV_ICMSGTYPE_TIMESYNC) {
1857 		msg = (struct hv_timesync_msg *)(hdr + 1);
1858 		if (msg->flags == HV_TIMESYNC_SYNC ||
1859 		    msg->flags == HV_TIMESYNC_SAMPLE) {
1860 			microtime(&sc->sc_sensor.tv);
1861 			nanotime(&guest);
1862 			tns = (msg->parent_time - 116444736000000000LL) * 100;
1863 			host.tv_sec = tns / 1000000000LL;
1864 			host.tv_nsec = tns % 1000000000LL;
1865 			timespecsub(&guest, &host, &diff);
1866 			sc->sc_sensor.value = (int64_t)diff.tv_sec *
1867 			    1000000000LL + diff.tv_nsec;
1868 			sc->sc_sensor.status = SENSOR_S_OK;
1869 		}
1870 	} else if (hdr->icmsgtype != HV_ICMSGTYPE_NEGOTIATE) {
1871 		printf("%s: unhandled timesync message type %u\n",
1872 		    sc->sc_dev.dv_xname, hdr->icmsgtype);
1873 	}
1874 
1875 	hdr->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
1876 	hv_channel_send(ch, ch->ch_buf, rlen, rid, HV_PKT_DATA_IN_BAND, 0);
1877 }
1878 
1879 static int
1880 hv_attach_print(void *aux, const char *name)
1881 {
1882 	struct hv_attach_args *aa = aux;
1883 
1884 	if (name)
1885 		printf("\"%s\" at %s", aa->aa_ident, name);
1886 
1887 	return (UNCONF);
1888 }
1889 
1890 int
1891 hv_attach_devices(struct hv_softc *sc)
1892 {
1893 	struct hv_dev *dv;
1894 	struct hv_channel *ch;
1895 
1896 	SLIST_INIT(&sc->sc_devs);
1897 	mtx_init(&sc->sc_devlck, IPL_NET);
1898 
1899 	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
1900 		if (ch->ch_state != HV_CHANSTATE_OFFERED)
1901 			continue;
1902 #if 0
1903 		if (strcmp(ch->ch_ident, "network") != 0 &&
1904 		    strcmp(ch->ch_ident, "scsi") != 0 &&
1905 		    strcmp(ch->ch_ident, "ide") != 0)
1906 			continue;
1907 #else
1908 		if (!(ch->ch_flags & CHF_MONITOR))
1909 			continue;
1910 #endif
1911 		dv = malloc(sizeof(*dv), M_DEVBUF, M_ZERO | M_NOWAIT);
1912 		if (dv == NULL) {
1913 			printf("%s: failed to allocate device object\n",
1914 			    sc->sc_dev.dv_xname);
1915 			return (-1);
1916 		}
1917 		dv->dv_aa.aa_parent = sc;
1918 		dv->dv_aa.aa_type = &ch->ch_type;
1919 		dv->dv_aa.aa_inst = &ch->ch_inst;
1920 		dv->dv_aa.aa_ident = ch->ch_ident;
1921 		dv->dv_aa.aa_chan = ch;
1922 		dv->dv_aa.aa_dmat = sc->sc_dmat;
1923 		mtx_enter(&sc->sc_devlck);
1924 		SLIST_INSERT_HEAD(&sc->sc_devs, dv, dv_entry);
1925 		mtx_leave(&sc->sc_devlck);
1926 		config_found((struct device *)sc, &dv->dv_aa, hv_attach_print);
1927 	}
1928 	return (0);
1929 }
1930