xref: /openbsd-src/sys/dev/pv/hyperv.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * Copyright (c) 2016 Mike Belopuhov <mike@esdenera.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice unmodified, this list of conditions, and the following
13  *    disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * The OpenBSD port was done under funding by Esdenera Networks GmbH.
32  */
33 
34 #include <sys/param.h>
35 
36 /* Hyperv requires locked atomic operations */
37 #ifndef MULTIPROCESSOR
38 #define _HYPERVMPATOMICS
39 #define MULTIPROCESSOR
40 #endif
41 #include <sys/atomic.h>
42 #ifdef _HYPERVMPATOMICS
43 #undef MULTIPROCESSOR
44 #undef _HYPERVMPATOMICS
45 #endif
46 
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/signal.h>
50 #include <sys/signalvar.h>
51 #include <sys/malloc.h>
52 #include <sys/kernel.h>
53 #include <sys/device.h>
54 #include <sys/timetc.h>
55 #include <sys/task.h>
56 #include <sys/syslog.h>
57 
58 #include <machine/bus.h>
59 #include <machine/cpu.h>
60 #include <machine/cpufunc.h>
61 
62 #include <uvm/uvm_extern.h>
63 
64 #include <machine/i82489var.h>
65 
66 #include <dev/rndvar.h>
67 
68 #include <dev/pv/pvvar.h>
69 #include <dev/pv/pvreg.h>
70 #include <dev/pv/hypervreg.h>
71 #include <dev/pv/hypervvar.h>
72 
73 /* Command submission flags */
74 #define HCF_SLEEPOK	0x0001	/* M_WAITOK */
75 #define HCF_NOSLEEP	0x0002	/* M_NOWAIT */
76 #define HCF_NOREPLY	0x0004
77 
78 struct hv_softc *hv_sc;
79 
80 int 	hv_match(struct device *, void *, void *);
81 void	hv_attach(struct device *, struct device *, void *);
82 void	hv_deferred(void *);
83 void	hv_fake_version(struct hv_softc *);
84 u_int	hv_gettime(struct timecounter *);
85 int	hv_init_hypercall(struct hv_softc *);
86 uint64_t hv_hypercall(struct hv_softc *, uint64_t, void *, void *);
87 int	hv_init_interrupts(struct hv_softc *);
88 int	hv_init_synic(struct hv_softc *);
89 int	hv_cmd(struct hv_softc *, void *, size_t, void *, size_t, int);
90 int	hv_start(struct hv_softc *, struct hv_msg *);
91 int	hv_reply(struct hv_softc *, struct hv_msg *);
92 uint16_t hv_intr_signal(struct hv_softc *, void *);
93 void	hv_intr(void);
94 void	hv_event_intr(struct hv_softc *);
95 void	hv_message_intr(struct hv_softc *);
96 int	hv_vmbus_connect(struct hv_softc *);
97 void	hv_channel_response(struct hv_softc *, struct vmbus_chanmsg_hdr *);
98 void	hv_channel_offer(struct hv_softc *, struct vmbus_chanmsg_hdr *);
99 void	hv_channel_delivered(struct hv_softc *, struct vmbus_chanmsg_hdr *);
100 int	hv_channel_scan(struct hv_softc *);
101 void	hv_process_offer(struct hv_softc *, struct hv_offer *);
102 struct hv_channel *
103 	hv_channel_lookup(struct hv_softc *, uint32_t);
104 int	hv_channel_ring_create(struct hv_channel *, uint32_t, uint32_t);
105 void	hv_channel_ring_destroy(struct hv_channel *);
106 void	hv_attach_internal(struct hv_softc *);
107 void	hv_heartbeat(void *);
108 void	hv_kvp_init(struct hv_channel *);
109 void	hv_kvp(void *);
110 int	hv_kvop(void *, int, char *, char *, size_t);
111 void	hv_shutdown_init(struct hv_channel *);
112 void	hv_shutdown(void *);
113 void	hv_timesync_init(struct hv_channel *);
114 void	hv_timesync(void *);
115 int	hv_attach_devices(struct hv_softc *);
116 
117 struct {
118 	int		  hmd_response;
119 	int		  hmd_request;
120 	void		(*hmd_handler)(struct hv_softc *,
121 			    struct vmbus_chanmsg_hdr *);
122 } hv_msg_dispatch[] = {
123 	{ 0,					0, NULL },
124 	{ VMBUS_CHANMSG_CHOFFER,		0, hv_channel_offer },
125 	{ VMBUS_CHANMSG_CHRESCIND,		0, NULL },
126 	{ VMBUS_CHANMSG_CHREQUEST,		VMBUS_CHANMSG_CHOFFER,
127 	  NULL },
128 	{ VMBUS_CHANMSG_CHOFFER_DONE,		0,
129 	  hv_channel_delivered },
130 	{ VMBUS_CHANMSG_CHOPEN,			0, NULL },
131 	{ VMBUS_CHANMSG_CHOPEN_RESP,		VMBUS_CHANMSG_CHOPEN,
132 	  hv_channel_response },
133 	{ VMBUS_CHANMSG_CHCLOSE,		0, NULL },
134 	{ VMBUS_CHANMSG_GPADL_CONN,		0, NULL },
135 	{ VMBUS_CHANMSG_GPADL_SUBCONN,		0, NULL },
136 	{ VMBUS_CHANMSG_GPADL_CONNRESP,		VMBUS_CHANMSG_GPADL_CONN,
137 	  hv_channel_response },
138 	{ VMBUS_CHANMSG_GPADL_DISCONN,		0, NULL },
139 	{ VMBUS_CHANMSG_GPADL_DISCONNRESP,	VMBUS_CHANMSG_GPADL_DISCONN,
140 	  hv_channel_response },
141 	{ VMBUS_CHANMSG_CHFREE,			0, NULL },
142 	{ VMBUS_CHANMSG_CONNECT,		0, NULL },
143 	{ VMBUS_CHANMSG_CONNECT_RESP,		VMBUS_CHANMSG_CONNECT,
144 	  hv_channel_response },
145 	{ VMBUS_CHANMSG_DISCONNECT,		0, NULL },
146 };
147 
148 struct timecounter hv_timecounter = {
149 	hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001
150 };
151 
152 struct cfdriver hyperv_cd = {
153 	NULL, "hyperv", DV_DULL
154 };
155 
156 const struct cfattach hyperv_ca = {
157 	sizeof(struct hv_softc), hv_match, hv_attach
158 };
159 
160 int
161 hv_match(struct device *parent, void *match, void *aux)
162 {
163 	struct pv_attach_args *pva = aux;
164 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_HYPERV];
165 
166 	if (hv->hv_base == 0)
167 		return (0);
168 
169 	return (1);
170 }
171 
172 void
173 hv_attach(struct device *parent, struct device *self, void *aux)
174 {
175 	struct hv_softc *sc = (struct hv_softc *)self;
176 	struct pv_attach_args *pva = aux;
177 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_HYPERV];
178 
179 	sc->sc_pvbus = hv;
180 	sc->sc_dmat = pva->pva_dmat;
181 
182 	printf("\n");
183 
184 	hv_fake_version(sc);
185 
186 	tc_init(&hv_timecounter);
187 
188 	if (hv_init_hypercall(sc))
189 		return;
190 
191 	/* Wire it up to the global */
192 	hv_sc = sc;
193 
194 	if (hv_init_interrupts(sc))
195 		return;
196 
197 	startuphook_establish(hv_deferred, sc);
198 }
199 
200 void
201 hv_deferred(void *arg)
202 {
203 	struct hv_softc *sc = arg;
204 
205 	if (hv_vmbus_connect(sc))
206 		return;
207 
208 	if (hv_channel_scan(sc))
209 		return;
210 
211 	hv_attach_internal(sc);
212 
213 	if (hv_attach_devices(sc))
214 		return;
215 }
216 
217 void
218 hv_fake_version(struct hv_softc *sc)
219 {
220 	uint64_t ver;
221 
222 	/* FreeBSD 10 apparently */
223 	ver = 0x8200ULL << 48;
224 	ver |= 10 << 16;
225 	wrmsr(MSR_HV_GUEST_OS_ID, ver);
226 }
227 
228 u_int
229 hv_gettime(struct timecounter *tc)
230 {
231 	u_int now = rdmsr(MSR_HV_TIME_REF_COUNT);
232 
233 	return (now);
234 }
235 
236 int
237 hv_init_hypercall(struct hv_softc *sc)
238 {
239 	extern void *hv_hypercall_page;
240 	uint64_t msr;
241 	paddr_t pa;
242 
243 	sc->sc_hc = &hv_hypercall_page;
244 
245 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_hc, &pa)) {
246 		printf(": hypercall page PA extraction failed\n");
247 		return (-1);
248 	}
249 
250 	msr = (atop(pa) << MSR_HV_HYPERCALL_PGSHIFT) | MSR_HV_HYPERCALL_ENABLE;
251 	wrmsr(MSR_HV_HYPERCALL, msr);
252 
253 	if (!(rdmsr(MSR_HV_HYPERCALL) & MSR_HV_HYPERCALL_ENABLE)) {
254 		printf(": failed to set up a hypercall page\n");
255 		return (-1);
256 	}
257 
258 	return (0);
259 }
260 
261 uint64_t
262 hv_hypercall(struct hv_softc *sc, uint64_t control, void *input,
263     void *output)
264 {
265 	paddr_t input_pa = 0, output_pa = 0;
266 	uint64_t status = 0;
267 
268 	if (input != NULL &&
269 	    pmap_extract(pmap_kernel(), (vaddr_t)input, &input_pa) == 0) {
270 		printf("%s: hypercall input PA extraction failed\n",
271 		    sc->sc_dev.dv_xname);
272 		return (~HYPERCALL_STATUS_SUCCESS);
273 	}
274 
275 	if (output != NULL &&
276 	    pmap_extract(pmap_kernel(), (vaddr_t)output, &output_pa) == 0) {
277 		printf("%s: hypercall output PA extraction failed\n",
278 		    sc->sc_dev.dv_xname);
279 		return (~HYPERCALL_STATUS_SUCCESS);
280 	}
281 
282 #ifdef __amd64__
283 	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_pa) : "r8");
284 	__asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (control),
285 	    "d" (input_pa), "m" (sc->sc_hc));
286 #else  /* __i386__ */
287 	{
288 		uint32_t control_hi = control >> 32;
289 		uint32_t control_lo = control & 0xfffffffff;
290 		uint32_t status_hi = 1;
291 		uint32_t status_lo = 1;
292 
293 		__asm__ __volatile__ ("call *%8" :
294 		    "=d" (status_hi), "=a"(status_lo) :
295 		    "d" (control_hi), "a" (control_lo),
296 		    "b" (0), "c" (input_pa), "D" (0), "S" (output_pa),
297 		    "m" (sc->sc_hc));
298 
299 		status = status_lo | ((uint64_t)status_hi << 32);
300 	}
301 #endif	/* __amd64__ */
302 
303 	return (status);
304 }
305 
306 int
307 hv_init_interrupts(struct hv_softc *sc)
308 {
309 	struct cpu_info *ci = curcpu();
310 	int cpu = CPU_INFO_UNIT(ci);
311 
312 	sc->sc_idtvec = LAPIC_HYPERV_VECTOR;
313 
314 	TAILQ_INIT(&sc->sc_reqs);
315 	mtx_init(&sc->sc_reqlck, IPL_NET);
316 
317 	TAILQ_INIT(&sc->sc_rsps);
318 	mtx_init(&sc->sc_rsplck, IPL_NET);
319 
320 	sc->sc_simp[cpu] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
321 	if (sc->sc_simp[cpu] == NULL) {
322 		printf(": failed to allocate SIMP\n");
323 		return (-1);
324 	}
325 
326 	sc->sc_siep[cpu] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
327 	if (sc->sc_siep[cpu] == NULL) {
328 		printf(": failed to allocate SIEP\n");
329 		km_free(sc->sc_simp[cpu], PAGE_SIZE, &kv_any, &kp_zero);
330 		return (-1);
331 	}
332 
333 	sc->sc_proto = VMBUS_VERSION_WS2008;
334 
335 	return (hv_init_synic(sc));
336 }
337 
338 int
339 hv_init_synic(struct hv_softc *sc)
340 {
341 	struct cpu_info *ci = curcpu();
342 	int cpu = CPU_INFO_UNIT(ci);
343 	uint64_t simp, siefp, sctrl, sint;
344 	paddr_t pa;
345 
346 	/*
347 	 * Setup the Synic's message page
348 	 */
349 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_simp[cpu], &pa)) {
350 		printf(": SIMP PA extraction failed\n");
351 		return (-1);
352 	}
353 	simp = rdmsr(MSR_HV_SIMP);
354 	simp &= (1 << MSR_HV_SIMP_PGSHIFT) - 1;
355 	simp |= (atop(pa) << MSR_HV_SIMP_PGSHIFT);
356 	simp |= MSR_HV_SIMP_ENABLE;
357 	wrmsr(MSR_HV_SIMP, simp);
358 
359 	/*
360 	 * Setup the Synic's event page
361 	 */
362 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_siep[cpu], &pa)) {
363 		printf(": SIEP PA extraction failed\n");
364 		return (-1);
365 	}
366 	siefp = rdmsr(MSR_HV_SIEFP);
367 	siefp &= (1<<MSR_HV_SIEFP_PGSHIFT) - 1;
368 	siefp |= (atop(pa) << MSR_HV_SIEFP_PGSHIFT);
369 	siefp |= MSR_HV_SIEFP_ENABLE;
370 	wrmsr(MSR_HV_SIEFP, siefp);
371 
372 	/*
373 	 * Configure and unmask SINT for message and event flags
374 	 */
375 	sint = rdmsr(MSR_HV_SINT0 + VMBUS_SINT_MESSAGE);
376 	sint = sc->sc_idtvec | MSR_HV_SINT_AUTOEOI |
377 	    (sint & MSR_HV_SINT_RSVD_MASK);
378 	wrmsr(MSR_HV_SINT0 + VMBUS_SINT_MESSAGE, sint);
379 
380 	/* Enable the global synic bit */
381 	sctrl = rdmsr(MSR_HV_SCONTROL);
382 	sctrl |= MSR_HV_SCTRL_ENABLE;
383 	wrmsr(MSR_HV_SCONTROL, sctrl);
384 
385 	sc->sc_vcpus[cpu] = rdmsr(MSR_HV_VP_INDEX);
386 
387 	DPRINTF("vcpu%u: SIMP %#llx SIEFP %#llx SCTRL %#llx\n",
388 	    sc->sc_vcpus[cpu], simp, siefp, sctrl);
389 
390 	return (0);
391 }
392 
393 int
394 hv_cmd(struct hv_softc *sc, void *cmd, size_t cmdlen, void *rsp,
395     size_t rsplen, int flags)
396 {
397 	struct hv_msg msg;
398 	int rv;
399 
400 	if (cmdlen > VMBUS_MSG_DSIZE_MAX) {
401 		printf("%s: payload too large (%lu)\n", sc->sc_dev.dv_xname,
402 		    cmdlen);
403 		return (EMSGSIZE);
404 	}
405 
406 	memset(&msg, 0, sizeof(msg));
407 
408 	msg.msg_req.hc_dsize = cmdlen;
409 	memcpy(msg.msg_req.hc_data, cmd, cmdlen);
410 
411 	if (!(flags & HCF_NOREPLY)) {
412 		msg.msg_rsp = rsp;
413 		msg.msg_rsplen = rsplen;
414 	} else
415 		msg.msg_flags |= MSGF_NOQUEUE;
416 
417 	if (flags & HCF_NOSLEEP)
418 		msg.msg_flags |= MSGF_NOSLEEP;
419 
420 	if ((rv = hv_start(sc, &msg)) != 0)
421 		return (rv);
422 	return (hv_reply(sc, &msg));
423 }
424 
425 int
426 hv_start(struct hv_softc *sc, struct hv_msg *msg)
427 {
428 	const int delays[] = { 100, 100, 100, 500, 500, 5000, 5000, 5000 };
429 	const char *wchan = "hvstart";
430 	uint16_t status;
431 	int i, s;
432 
433 	msg->msg_req.hc_connid = VMBUS_CONNID_MESSAGE;
434 	msg->msg_req.hc_msgtype = 1;
435 
436 	if (!(msg->msg_flags & MSGF_NOQUEUE)) {
437 		mtx_enter(&sc->sc_reqlck);
438 		TAILQ_INSERT_TAIL(&sc->sc_reqs, msg, msg_entry);
439 		mtx_leave(&sc->sc_reqlck);
440 	}
441 
442 	for (i = 0; i < nitems(delays); i++) {
443 		status = hv_hypercall(sc, HYPERCALL_POST_MESSAGE,
444 		    &msg->msg_req, NULL);
445 		if (status == HYPERCALL_STATUS_SUCCESS)
446 			break;
447 		if (msg->msg_flags & MSGF_NOSLEEP) {
448 			delay(delays[i]);
449 			s = splnet();
450 			hv_intr();
451 			splx(s);
452 		} else
453 			tsleep(wchan, PRIBIO, wchan, 1);
454 	}
455 	if (status != 0) {
456 		printf("%s: posting vmbus message failed with %d\n",
457 		    sc->sc_dev.dv_xname, status);
458 		if (!(msg->msg_flags & MSGF_NOQUEUE)) {
459 			mtx_enter(&sc->sc_reqlck);
460 			TAILQ_REMOVE(&sc->sc_reqs, msg, msg_entry);
461 			mtx_leave(&sc->sc_reqlck);
462 		}
463 		return (EIO);
464 	}
465 
466 	return (0);
467 }
468 
469 int
470 hv_reply(struct hv_softc *sc, struct hv_msg *msg)
471 {
472 	const char *wchan = "hvreply";
473 	struct hv_msg *m, *tmp;
474 	int i, s;
475 
476 	if (msg->msg_flags & MSGF_NOQUEUE)
477 		return (0);
478 
479 	for (i = 0; i < 1000; i++) {
480 		mtx_enter(&sc->sc_rsplck);
481 		TAILQ_FOREACH_SAFE(m, &sc->sc_rsps, msg_entry, tmp) {
482 			if (m == msg) {
483 				TAILQ_REMOVE(&sc->sc_rsps, m, msg_entry);
484 				break;
485 			}
486 		}
487 		mtx_leave(&sc->sc_rsplck);
488 		if (m != NULL)
489 			return (0);
490 		if (msg->msg_flags & MSGF_NOSLEEP) {
491 			delay(100000);
492 			s = splnet();
493 			hv_intr();
494 			splx(s);
495 		} else {
496 			s = tsleep(&msg, PRIBIO | PCATCH, wchan, 1);
497 			if (s != EWOULDBLOCK)
498 				return (EINTR);
499 		}
500 	}
501 	mtx_enter(&sc->sc_rsplck);
502 	TAILQ_FOREACH_SAFE(m, &sc->sc_reqs, msg_entry, tmp) {
503 		if (m == msg) {
504 			TAILQ_REMOVE(&sc->sc_reqs, m, msg_entry);
505 			break;
506 		}
507 	}
508 	mtx_leave(&sc->sc_rsplck);
509 	return (ETIMEDOUT);
510 }
511 
512 uint16_t
513 hv_intr_signal(struct hv_softc *sc, void *con)
514 {
515 	uint64_t status;
516 
517 	status = hv_hypercall(sc, HYPERCALL_SIGNAL_EVENT, con, NULL);
518 	return ((uint16_t)status);
519 }
520 
521 void
522 hv_intr(void)
523 {
524 	struct hv_softc *sc = hv_sc;
525 
526 	hv_event_intr(sc);
527 	hv_message_intr(sc);
528 }
529 
530 void
531 hv_event_intr(struct hv_softc *sc)
532 {
533 	struct vmbus_evtflags *evt;
534 	struct cpu_info *ci = curcpu();
535 	int cpu = CPU_INFO_UNIT(ci);
536 	int bit, row, maxrow, chanid;
537 	struct hv_channel *ch;
538 	u_long *revents, pending;
539 
540 	evt = (struct vmbus_evtflags *)sc->sc_siep[cpu] +
541 	    VMBUS_SINT_MESSAGE;
542 	if ((sc->sc_proto == VMBUS_VERSION_WS2008) ||
543 	    (sc->sc_proto == VMBUS_VERSION_WIN7)) {
544 		if (!test_bit(0, &evt->evt_flags[0]))
545 			return;
546 		clear_bit(0, &evt->evt_flags[0]);
547 		maxrow = VMBUS_CHAN_MAX_COMPAT / VMBUS_EVTFLAG_LEN;
548 		/*
549 		 * receive size is 1/2 page and divide that by 4 bytes
550 		 */
551 		revents = sc->sc_revents;
552 	} else {
553 		maxrow = nitems(evt->evt_flags);
554 		/*
555 		 * On Host with Win8 or above, the event page can be
556 		 * checked directly to get the id of the channel
557 		 * that has the pending interrupt.
558 		 */
559 		revents = &evt->evt_flags[0];
560 	}
561 
562 	for (row = 0; row < maxrow; row++) {
563 		if (revents[row] == 0)
564 			continue;
565 		pending = atomic_swap_ulong(&revents[row], 0);
566 		for (bit = 0; pending > 0; pending >>= 1, bit++) {
567 			if ((pending & 1) == 0)
568 				continue;
569 			chanid = (row * LONG_BIT) + bit;
570 			/* vmbus channel protocol message */
571 			if (chanid == 0)
572 				continue;
573 			ch = hv_channel_lookup(sc, chanid);
574 			if (ch == NULL) {
575 				printf("%s: unhandled event on %d\n",
576 				    sc->sc_dev.dv_xname, chanid);
577 				continue;
578 			}
579 			if (ch->ch_state != HV_CHANSTATE_OPENED) {
580 				printf("%s: channel %d is not active\n",
581 				    sc->sc_dev.dv_xname, chanid);
582 				continue;
583 			}
584 			ch->ch_evcnt.ec_count++;
585 			if (ch->ch_handler)
586 				ch->ch_handler(ch->ch_ctx);
587 		}
588 	}
589 }
590 
591 void
592 hv_message_intr(struct hv_softc *sc)
593 {
594 	struct vmbus_message *msg;
595 	struct vmbus_chanmsg_hdr *hdr;
596 	struct cpu_info *ci = curcpu();
597 	int cpu = CPU_INFO_UNIT(ci);
598 
599 	for (;;) {
600 		msg = (struct vmbus_message *)sc->sc_simp[cpu] +
601 		    VMBUS_SINT_MESSAGE;
602 		if (msg->msg_type == VMBUS_MSGTYPE_NONE)
603 			break;
604 
605 		hdr = (struct vmbus_chanmsg_hdr *)msg->msg_data;
606 		if (hdr->chm_type >= VMBUS_CHANMSG_COUNT) {
607 			printf("%s: unhandled message type %u flags %#x\n",
608 			    sc->sc_dev.dv_xname, hdr->chm_type,
609 			    msg->msg_flags);
610 			goto skip;
611 		}
612 		if (hv_msg_dispatch[hdr->chm_type].hmd_handler)
613 			hv_msg_dispatch[hdr->chm_type].hmd_handler(sc, hdr);
614 		else
615 			printf("%s: unhandled message type %u\n",
616 			    sc->sc_dev.dv_xname, hdr->chm_type);
617  skip:
618 		msg->msg_type = VMBUS_MSGTYPE_NONE;
619 		membar_sync();
620 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING)
621 			wrmsr(MSR_HV_EOM, 0);
622 	}
623 }
624 
625 void
626 hv_channel_response(struct hv_softc *sc, struct vmbus_chanmsg_hdr *rsphdr)
627 {
628 	struct hv_msg *msg, *tmp;
629 	struct vmbus_chanmsg_hdr *reqhdr;
630 	int req;
631 
632 	req = hv_msg_dispatch[rsphdr->chm_type].hmd_request;
633 	mtx_enter(&sc->sc_reqlck);
634 	TAILQ_FOREACH_SAFE(msg, &sc->sc_reqs, msg_entry, tmp) {
635 		reqhdr = (struct vmbus_chanmsg_hdr *)&msg->msg_req.hc_data;
636 		if (reqhdr->chm_type == req) {
637 			TAILQ_REMOVE(&sc->sc_reqs, msg, msg_entry);
638 			break;
639 		}
640 	}
641 	mtx_leave(&sc->sc_reqlck);
642 	if (msg != NULL) {
643 		memcpy(msg->msg_rsp, rsphdr, msg->msg_rsplen);
644 		mtx_enter(&sc->sc_rsplck);
645 		TAILQ_INSERT_TAIL(&sc->sc_rsps, msg, msg_entry);
646 		mtx_leave(&sc->sc_rsplck);
647 		wakeup(msg);
648 	}
649 }
650 
651 void
652 hv_channel_offer(struct hv_softc *sc, struct vmbus_chanmsg_hdr *hdr)
653 {
654 	struct hv_offer *co;
655 
656 	co = malloc(sizeof(*co), M_DEVBUF, M_NOWAIT | M_ZERO);
657 	if (co == NULL) {
658 		printf("%s: failed to allocate an offer object\n",
659 		    sc->sc_dev.dv_xname);
660 		return;
661 	}
662 
663 	memcpy(&co->co_chan, hdr, sizeof(co->co_chan));
664 
665 	mtx_enter(&sc->sc_offerlck);
666 	SIMPLEQ_INSERT_TAIL(&sc->sc_offers, co, co_entry);
667 	mtx_leave(&sc->sc_offerlck);
668 }
669 
670 void
671 hv_channel_delivered(struct hv_softc *sc, struct vmbus_chanmsg_hdr *hdr)
672 {
673 	atomic_setbits_int(&sc->sc_flags, HSF_OFFERS_DELIVERED);
674 	wakeup(hdr);
675 }
676 
677 int
678 hv_vmbus_connect(struct hv_softc *sc)
679 {
680 	const uint32_t versions[] = {
681 		VMBUS_VERSION_WIN8_1, VMBUS_VERSION_WIN8,
682 		VMBUS_VERSION_WIN7, VMBUS_VERSION_WS2008
683 	};
684 	struct vmbus_chanmsg_connect cmd;
685 	struct vmbus_chanmsg_connect_resp rsp;
686 	paddr_t epa, mpa1, mpa2;
687 	int i;
688 
689 	sc->sc_events = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
690 	if (sc->sc_events == NULL) {
691 		printf(": failed to allocate channel port events page\n");
692 		goto errout;
693 	}
694 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_events, &epa)) {
695 		printf(": channel port events page PA extraction failed\n");
696 		goto errout;
697 	}
698 
699 	sc->sc_wevents = (u_long *)sc->sc_events;
700 	sc->sc_revents = (u_long *)((caddr_t)sc->sc_events + (PAGE_SIZE >> 1));
701 
702 	sc->sc_monitor[0] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
703 	if (sc->sc_monitor == NULL) {
704 		printf(": failed to allocate monitor page 1\n");
705 		goto errout;
706 	}
707 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_monitor[0], &mpa1)) {
708 		printf(": monitor page 1 PA extraction failed\n");
709 		goto errout;
710 	}
711 
712 	sc->sc_monitor[1] = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
713 	if (sc->sc_monitor == NULL) {
714 		printf(": failed to allocate monitor page 2\n");
715 		goto errout;
716 	}
717 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_monitor[1], &mpa2)) {
718 		printf(": monitor page 2 PA extraction failed\n");
719 		goto errout;
720 	}
721 
722 	memset(&cmd, 0, sizeof(cmd));
723 	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_CONNECT;
724 	cmd.chm_evtflags = (uint64_t)epa;
725 	cmd.chm_mnf1 = (uint64_t)mpa1;
726 	cmd.chm_mnf2 = (uint64_t)mpa2;
727 
728 	memset(&rsp, 0, sizeof(rsp));
729 
730 	for (i = 0; i < nitems(versions); i++) {
731 		cmd.chm_ver = versions[i];
732 		if (hv_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp),
733 		    HCF_NOSLEEP)) {
734 			DPRINTF("%s: CONNECT failed\n",
735 			    sc->sc_dev.dv_xname);
736 			goto errout;
737 		}
738 		if (rsp.chm_done) {
739 			sc->sc_flags |= HSF_CONNECTED;
740 			sc->sc_proto = versions[i];
741 			sc->sc_handle = VMBUS_GPADL_START;
742 			DPRINTF("%s: protocol version %#x\n",
743 			    sc->sc_dev.dv_xname, versions[i]);
744 			break;
745 		}
746 	}
747 	if (i == nitems(versions)) {
748 		printf("%s: failed to negotiate protocol version\n",
749 		    sc->sc_dev.dv_xname);
750 		goto errout;
751 	}
752 
753 	return (0);
754 
755  errout:
756 	if (sc->sc_events) {
757 		km_free(sc->sc_events, PAGE_SIZE, &kv_any, &kp_zero);
758 		sc->sc_events = NULL;
759 		sc->sc_wevents = NULL;
760 		sc->sc_revents = NULL;
761 	}
762 	if (sc->sc_monitor[0]) {
763 		km_free(sc->sc_monitor[0], PAGE_SIZE, &kv_any, &kp_zero);
764 		sc->sc_monitor[0] = NULL;
765 	}
766 	if (sc->sc_monitor[1]) {
767 		km_free(sc->sc_monitor[1], PAGE_SIZE, &kv_any, &kp_zero);
768 		sc->sc_monitor[1] = NULL;
769 	}
770 	return (-1);
771 }
772 
773 const struct hv_guid hv_guid_network = {
774 	{ 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46,
775 	  0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e }
776 };
777 
778 const struct hv_guid hv_guid_ide = {
779 	{ 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
780 	  0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 }
781 };
782 
783 const struct hv_guid hv_guid_scsi = {
784 	{ 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
785 	  0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f }
786 };
787 
788 const struct hv_guid hv_guid_shutdown = {
789 	{ 0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49,
790 	  0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb }
791 };
792 
793 const struct hv_guid hv_guid_timesync = {
794 	{ 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
795 	  0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf }
796 };
797 
798 const struct hv_guid hv_guid_heartbeat = {
799 	{ 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
800 	  0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d }
801 };
802 
803 const struct hv_guid hv_guid_kvp = {
804 	{ 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
805 	  0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6 }
806 };
807 
808 #ifdef HYPERV_DEBUG
809 const struct hv_guid hv_guid_vss = {
810 	{ 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42,
811 	  0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 }
812 };
813 
814 const struct hv_guid hv_guid_dynmem = {
815 	{ 0xdc, 0x74, 0x50, 0x52, 0x85, 0x89, 0xe2, 0x46,
816 	  0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 }
817 };
818 
819 const struct hv_guid hv_guid_mouse = {
820 	{ 0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c,
821 	  0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a }
822 };
823 
824 const struct hv_guid hv_guid_kbd = {
825 	{ 0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48,
826 	  0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84 }
827 };
828 
829 const struct hv_guid hv_guid_video = {
830 	{ 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a,
831 	  0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 }
832 };
833 
834 const struct hv_guid hv_guid_fc = {
835 	{ 0x4a, 0xcc, 0x9b, 0x2f, 0x69, 0x00, 0xf3, 0x4a,
836 	  0xb7, 0x6b, 0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda }
837 };
838 
839 const struct hv_guid hv_guid_fcopy = {
840 	{ 0xe3, 0x4b, 0xd1, 0x34, 0xe4, 0xde, 0xc8, 0x41,
841 	  0x9a, 0xe7, 0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92 }
842 };
843 
844 const struct hv_guid hv_guid_pcie = {
845 	{ 0x1d, 0xf6, 0xc4, 0x44, 0x44, 0x44, 0x00, 0x44,
846 	  0x9d, 0x52, 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f }
847 };
848 
849 const struct hv_guid hv_guid_netdir = {
850 	{ 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b,
851 	  0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 }
852 };
853 
854 const struct hv_guid hv_guid_rdesktop = {
855 	{ 0xf4, 0xac, 0x6a, 0x27, 0x15, 0xac, 0x6c, 0x42,
856 	  0x98, 0xdd, 0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe }
857 };
858 
859 /* Automatic Virtual Machine Activation (AVMA) Services */
860 const struct hv_guid hv_guid_avma1 = {
861 	{ 0x55, 0xb2, 0x87, 0x44, 0x8c, 0xb8, 0x3f, 0x40,
862 	  0xbb, 0x51, 0xd1, 0xf6, 0x9c, 0xf1, 0x7f, 0x87 }
863 };
864 
865 const struct hv_guid hv_guid_avma2 = {
866 	{ 0xf4, 0xba, 0x75, 0x33, 0x15, 0x9e, 0x30, 0x4b,
867 	  0xb7, 0x65, 0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b }
868 };
869 
870 const struct hv_guid hv_guid_avma3 = {
871 	{ 0xa0, 0x1f, 0x22, 0x99, 0xad, 0x24, 0xe2, 0x11,
872 	  0xbe, 0x98, 0x00, 0x1a, 0xa0, 0x1b, 0xbf, 0x6e }
873 };
874 
875 const struct hv_guid hv_guid_avma4 = {
876 	{ 0x16, 0x57, 0xe6, 0xf8, 0xb3, 0x3c, 0x06, 0x4a,
877 	  0x9a, 0x60, 0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5 }
878 };
879 
880 static inline char *
881 guidprint(struct hv_guid *a)
882 {
883 	/* 3     0  5  4 7 6  8 9  10        15 */
884 	/* 33221100-5544-7766-9988-FFEEDDCCBBAA */
885 	static char buf[16 * 2 + 4 + 1];
886 	int i, j = 0;
887 
888 	for (i = 3; i != -1; i -= 1, j += 2)
889 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
890 	buf[j++] = '-';
891 	for (i = 5; i != 3; i -= 1, j += 2)
892 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
893 	buf[j++] = '-';
894 	for (i = 7; i != 5; i -= 1, j += 2)
895 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
896 	buf[j++] = '-';
897 	for (i = 8; i < 10; i += 1, j += 2)
898 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
899 	buf[j++] = '-';
900 	for (i = 10; i < 16; i += 1, j += 2)
901 		snprintf(&buf[j], 3, "%02x", (uint8_t)a->data[i]);
902 	return (&buf[0]);
903 }
904 #endif	/* HYPERV_DEBUG */
905 
906 void
907 hv_guid_sprint(struct hv_guid *guid, char *str, size_t size)
908 {
909 	const struct {
910 		const struct hv_guid	*guid;
911 		const char		*ident;
912 	} map[] = {
913 		{ &hv_guid_network,	"network" },
914 		{ &hv_guid_ide,		"ide" },
915 		{ &hv_guid_scsi,	"scsi" },
916 		{ &hv_guid_shutdown,	"shutdown" },
917 		{ &hv_guid_timesync,	"timesync" },
918 		{ &hv_guid_heartbeat,	"heartbeat" },
919 		{ &hv_guid_kvp,		"kvp" },
920 #ifdef HYPERV_DEBUG
921 		{ &hv_guid_vss,		"vss" },
922 		{ &hv_guid_dynmem,	"dynamic-memory" },
923 		{ &hv_guid_mouse,	"mouse" },
924 		{ &hv_guid_kbd,		"keyboard" },
925 		{ &hv_guid_video,	"video" },
926 		{ &hv_guid_fc,		"fiber-channel" },
927 		{ &hv_guid_fcopy,	"file-copy" },
928 		{ &hv_guid_pcie,	"pcie-passthrough" },
929 		{ &hv_guid_netdir,	"network-direct" },
930 		{ &hv_guid_rdesktop,	"remote-desktop" },
931 		{ &hv_guid_avma1,	"avma-1" },
932 		{ &hv_guid_avma2,	"avma-2" },
933 		{ &hv_guid_avma3,	"avma-3" },
934 		{ &hv_guid_avma4,	"avma-4" },
935 #endif
936 	};
937 	int i;
938 
939 	for (i = 0; i < nitems(map); i++) {
940 		if (memcmp(guid, map[i].guid, sizeof(*guid)) == 0) {
941 			strlcpy(str, map[i].ident, size);
942 			return;
943 		}
944 	}
945 #ifdef HYPERV_DEBUG
946 	strlcpy(str, guidprint(guid), size);
947 #endif
948 }
949 
950 int
951 hv_channel_scan(struct hv_softc *sc)
952 {
953 	struct vmbus_chanmsg_hdr hdr;
954 	struct vmbus_chanmsg_choffer rsp, *offer;
955 	struct hv_offer *co;
956 
957 	SIMPLEQ_INIT(&sc->sc_offers);
958 	mtx_init(&sc->sc_offerlck, IPL_NET);
959 
960 	memset(&hdr, 0, sizeof(hdr));
961 	hdr.chm_type = VMBUS_CHANMSG_CHREQUEST;
962 
963 	if (hv_cmd(sc, &hdr, sizeof(hdr), &rsp, sizeof(rsp), HCF_NOREPLY)) {
964 		DPRINTF("%s: CHREQUEST failed\n", sc->sc_dev.dv_xname);
965 		return (-1);
966 	}
967 
968 	while ((sc->sc_flags & HSF_OFFERS_DELIVERED) == 0)
969 		tsleep(offer, PRIBIO, "hvoffers", 1);
970 
971 	TAILQ_INIT(&sc->sc_channels);
972 	mtx_init(&sc->sc_channelck, IPL_NET);
973 
974 	mtx_enter(&sc->sc_offerlck);
975 	while (!SIMPLEQ_EMPTY(&sc->sc_offers)) {
976 		co = SIMPLEQ_FIRST(&sc->sc_offers);
977 		SIMPLEQ_REMOVE_HEAD(&sc->sc_offers, co_entry);
978 		mtx_leave(&sc->sc_offerlck);
979 
980 		hv_process_offer(sc, co);
981 		free(co, M_DEVBUF, sizeof(*co));
982 
983 		mtx_enter(&sc->sc_offerlck);
984 	}
985 	mtx_leave(&sc->sc_offerlck);
986 
987 	return (0);
988 }
989 
990 void
991 hv_process_offer(struct hv_softc *sc, struct hv_offer *co)
992 {
993 	struct hv_channel *ch, *nch;
994 
995 	nch = malloc(sizeof(*nch), M_DEVBUF, M_ZERO | M_NOWAIT);
996 	if (nch == NULL) {
997 		printf("%s: failed to allocate memory for the channel\n",
998 		    sc->sc_dev.dv_xname);
999 		return;
1000 	}
1001 	nch->ch_sc = sc;
1002 	hv_guid_sprint(&co->co_chan.chm_chtype, nch->ch_ident,
1003 	    sizeof(nch->ch_ident));
1004 
1005 	/*
1006 	 * By default we setup state to enable batched reading.
1007 	 * A specific service can choose to disable this prior
1008 	 * to opening the channel.
1009 	 */
1010 	nch->ch_flags |= CHF_BATCHED;
1011 
1012 	KASSERT((((vaddr_t)&nch->ch_monprm) & 0x7) == 0);
1013 	memset(&nch->ch_monprm, 0, sizeof(nch->ch_monprm));
1014 	nch->ch_monprm.mp_connid = VMBUS_CONNID_EVENT;
1015 
1016 	if (sc->sc_proto != VMBUS_VERSION_WS2008)
1017 		nch->ch_monprm.mp_connid = co->co_chan.chm_connid;
1018 
1019 	if (co->co_chan.chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
1020 		nch->ch_mgroup = co->co_chan.chm_montrig / VMBUS_MONTRIG_LEN;
1021 		nch->ch_mindex = co->co_chan.chm_montrig % VMBUS_MONTRIG_LEN;
1022 		nch->ch_flags |= CHF_MONITOR;
1023 	}
1024 
1025 	nch->ch_id = co->co_chan.chm_chanid;
1026 
1027 	memcpy(&nch->ch_type, &co->co_chan.chm_chtype, sizeof(ch->ch_type));
1028 	memcpy(&nch->ch_inst, &co->co_chan.chm_chinst, sizeof(ch->ch_inst));
1029 
1030 	mtx_enter(&sc->sc_channelck);
1031 	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
1032 		if (!memcmp(&ch->ch_type, &nch->ch_type, sizeof(ch->ch_type)) &&
1033 		    !memcmp(&ch->ch_inst, &nch->ch_inst, sizeof(ch->ch_inst)))
1034 			break;
1035 	}
1036 	if (ch != NULL) {
1037 		if (co->co_chan.chm_subidx == 0) {
1038 			printf("%s: unknown offer \"%s\"\n",
1039 			    sc->sc_dev.dv_xname, nch->ch_ident);
1040 			mtx_leave(&sc->sc_channelck);
1041 			free(nch, M_DEVBUF, sizeof(*nch));
1042 			return;
1043 		}
1044 #ifdef HYPERV_DEBUG
1045 		printf("%s: subchannel %u for \"%s\"\n", sc->sc_dev.dv_xname,
1046 		    co->co_chan.chm_subidx, ch->ch_ident);
1047 #endif
1048 		mtx_leave(&sc->sc_channelck);
1049 		free(nch, M_DEVBUF, sizeof(*nch));
1050 		return;
1051 	}
1052 
1053 	nch->ch_state = HV_CHANSTATE_OFFERED;
1054 
1055 	TAILQ_INSERT_TAIL(&sc->sc_channels, nch, ch_entry);
1056 	mtx_leave(&sc->sc_channelck);
1057 
1058 #ifdef HYPERV_DEBUG
1059 	printf("%s: channel %u: \"%s\"", sc->sc_dev.dv_xname, nch->ch_id,
1060 	    nch->ch_ident);
1061 	if (nch->ch_flags & CHF_MONITOR)
1062 		printf(", monitor %u\n", co->co_chan.chm_montrig);
1063 	else
1064 		printf("\n");
1065 #endif
1066 }
1067 
1068 struct hv_channel *
1069 hv_channel_lookup(struct hv_softc *sc, uint32_t relid)
1070 {
1071 	struct hv_channel *ch;
1072 
1073 	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
1074 		if (ch->ch_id == relid)
1075 			return (ch);
1076 	}
1077 	return (NULL);
1078 }
1079 
1080 int
1081 hv_channel_ring_create(struct hv_channel *ch, uint32_t sndbuflen,
1082     uint32_t rcvbuflen)
1083 {
1084 	struct hv_softc *sc = ch->ch_sc;
1085 
1086 	sndbuflen = roundup(sndbuflen, PAGE_SIZE);
1087 	rcvbuflen = roundup(rcvbuflen, PAGE_SIZE);
1088 	ch->ch_ring = km_alloc(sndbuflen + rcvbuflen, &kv_any, &kp_zero,
1089 	    cold ? &kd_nowait : &kd_waitok);
1090 	if (ch->ch_ring == NULL) {
1091 		printf("%s: failed to allocate channel ring\n",
1092 		    sc->sc_dev.dv_xname);
1093 		return (-1);
1094 	}
1095 	ch->ch_ring_size = sndbuflen + rcvbuflen;
1096 	ch->ch_ring_npg = ch->ch_ring_size >> PAGE_SHIFT;
1097 
1098 	memset(&ch->ch_wrd, 0, sizeof(ch->ch_wrd));
1099 	ch->ch_wrd.rd_ring = (struct vmbus_bufring *)ch->ch_ring;
1100 	ch->ch_wrd.rd_size = sndbuflen;
1101 	ch->ch_wrd.rd_data_size = sndbuflen - sizeof(struct vmbus_bufring);
1102 	mtx_init(&ch->ch_wrd.rd_lock, IPL_NET);
1103 
1104 	memset(&ch->ch_rrd, 0, sizeof(ch->ch_rrd));
1105 	ch->ch_rrd.rd_ring = (struct vmbus_bufring *)((uint8_t *)ch->ch_ring +
1106 	    sndbuflen);
1107 	ch->ch_rrd.rd_size = rcvbuflen;
1108 	ch->ch_rrd.rd_data_size = rcvbuflen - sizeof(struct vmbus_bufring);
1109 	mtx_init(&ch->ch_rrd.rd_lock, IPL_NET);
1110 
1111 	if (hv_handle_alloc(ch, ch->ch_ring, sndbuflen + rcvbuflen,
1112 	    &ch->ch_ring_gpadl)) {
1113 		printf("%s: failed to obtain a PA handle for the ring\n",
1114 		    sc->sc_dev.dv_xname);
1115 		hv_channel_ring_destroy(ch);
1116 		return (-1);
1117 	}
1118 
1119 	return (0);
1120 }
1121 
1122 void
1123 hv_channel_ring_destroy(struct hv_channel *ch)
1124 {
1125 	km_free(ch->ch_ring, ch->ch_wrd.rd_size + ch->ch_rrd.rd_size,
1126 	    &kv_any, &kp_zero);
1127 	ch->ch_ring = NULL;
1128 	hv_handle_free(ch, ch->ch_ring_gpadl);
1129 
1130 	memset(&ch->ch_wrd, 0, sizeof(ch->ch_wrd));
1131 	memset(&ch->ch_rrd, 0, sizeof(ch->ch_rrd));
1132 }
1133 
1134 int
1135 hv_channel_open(struct hv_channel *ch, void *udata, size_t udatalen,
1136     void (*handler)(void *), void *arg)
1137 {
1138 	struct hv_softc *sc = ch->ch_sc;
1139 	struct vmbus_chanmsg_chopen cmd;
1140 	struct vmbus_chanmsg_chopen_resp rsp;
1141 	int rv;
1142 
1143 	if (ch->ch_ring == NULL &&
1144 	    hv_channel_ring_create(ch, PAGE_SIZE * 4, PAGE_SIZE * 4)) {
1145 		DPRINTF(": failed to create channel ring\n");
1146 		return (-1);
1147 	}
1148 
1149 	memset(&cmd, 0, sizeof(cmd));
1150 	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_CHOPEN;
1151 	cmd.chm_openid = ch->ch_id;
1152 	cmd.chm_chanid = ch->ch_id;
1153 	cmd.chm_gpadl = ch->ch_ring_gpadl;
1154 	cmd.chm_txbr_pgcnt = ch->ch_wrd.rd_size >> PAGE_SHIFT;
1155 	cmd.chm_vcpuid = ch->ch_vcpu;
1156 
1157 	if (udata && udatalen > 0)
1158 		memcpy(cmd.chm_udata, udata, udatalen);
1159 
1160 	memset(&rsp, 0, sizeof(rsp));
1161 
1162 	ch->ch_handler = handler;
1163 	ch->ch_ctx = arg;
1164 
1165 	ch->ch_state = HV_CHANSTATE_OPENED;
1166 
1167 	rv = hv_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp), 0);
1168 	if (rv) {
1169 		hv_channel_ring_destroy(ch);
1170 		DPRINTF("%s: CHOPEN failed with %d\n",
1171 		    sc->sc_dev.dv_xname, rv);
1172 		ch->ch_handler = NULL;
1173 		ch->ch_ctx = NULL;
1174 		ch->ch_state = HV_CHANSTATE_OFFERED;
1175 		return (-1);
1176 	}
1177 
1178 	return (0);
1179 }
1180 
1181 int
1182 hv_channel_close(struct hv_channel *ch)
1183 {
1184 	struct hv_softc *sc = ch->ch_sc;
1185 	struct vmbus_chanmsg_chclose cmd;
1186 	int rv;
1187 
1188 	memset(&cmd, 0, sizeof(cmd));
1189 	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_CHCLOSE;
1190 	cmd.chm_chanid = ch->ch_id;
1191 
1192 	ch->ch_state = HV_CHANSTATE_CLOSING;
1193 	rv = hv_cmd(sc, &cmd, sizeof(cmd), NULL, 0, HCF_NOREPLY);
1194 	if (rv) {
1195 		DPRINTF("%s: CHCLOSE failed with %d\n",
1196 		    sc->sc_dev.dv_xname, rv);
1197 		return (-1);
1198 	}
1199 	ch->ch_state = HV_CHANSTATE_CLOSED;
1200 	hv_channel_ring_destroy(ch);
1201 	return (0);
1202 }
1203 
1204 static inline void
1205 hv_channel_setevent(struct hv_softc *sc, struct hv_channel *ch)
1206 {
1207 	struct vmbus_mon_trig *mtg;
1208 
1209 	/* Each uint32_t represents 32 channels */
1210 	set_bit(ch->ch_id, sc->sc_wevents);
1211 	if (ch->ch_flags & CHF_MONITOR) {
1212 		mtg = &sc->sc_monitor[1]->mnf_trigs[ch->ch_mgroup];
1213 		set_bit(ch->ch_mindex, &mtg->mt_pending);
1214 	} else
1215 		hv_intr_signal(sc, &ch->ch_monprm);
1216 }
1217 
1218 static inline void
1219 hv_ring_put(struct hv_ring_data *wrd, uint8_t *data, uint32_t datalen)
1220 {
1221 	int left = MIN(datalen, wrd->rd_data_size - wrd->rd_prod);
1222 
1223 	memcpy(&wrd->rd_ring->br_data[wrd->rd_prod], data, left);
1224 	memcpy(&wrd->rd_ring->br_data[0], data + left, datalen - left);
1225 	wrd->rd_prod += datalen;
1226 	wrd->rd_prod %= wrd->rd_data_size;
1227 }
1228 
1229 static inline void
1230 hv_ring_get(struct hv_ring_data *rrd, uint8_t *data, uint32_t datalen,
1231     int peek)
1232 {
1233 	int left = MIN(datalen, rrd->rd_data_size - rrd->rd_cons);
1234 
1235 	memcpy(data, &rrd->rd_ring->br_data[rrd->rd_cons], left);
1236 	memcpy(data + left, &rrd->rd_ring->br_data[0], datalen - left);
1237 	if (!peek) {
1238 		rrd->rd_cons += datalen;
1239 		rrd->rd_cons %= rrd->rd_data_size;
1240 	}
1241 }
1242 
1243 #define	HV_BYTES_AVAIL_TO_WRITE(r, w, z)			\
1244 	((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w))
1245 
1246 static inline void
1247 hv_ring_avail(struct hv_ring_data *rd, uint32_t *towrite, uint32_t *toread)
1248 {
1249 	uint32_t ridx = rd->rd_ring->br_rindex;
1250 	uint32_t widx = rd->rd_ring->br_windex;
1251 	uint32_t r, w;
1252 
1253 	w =  HV_BYTES_AVAIL_TO_WRITE(ridx, widx, rd->rd_data_size);
1254 	r = rd->rd_data_size - w;
1255 	if (towrite)
1256 		*towrite = w;
1257 	if (toread)
1258 		*toread = r;
1259 }
1260 
1261 int
1262 hv_ring_write(struct hv_ring_data *wrd, struct iovec *iov, int iov_cnt,
1263     int *needsig)
1264 {
1265 	uint64_t indices = 0;
1266 	uint32_t avail, oprod, datalen = sizeof(indices);
1267 	int i;
1268 
1269 	for (i = 0; i < iov_cnt; i++)
1270 		datalen += iov[i].iov_len;
1271 
1272 	KASSERT(datalen <= wrd->rd_data_size);
1273 
1274 	hv_ring_avail(wrd, &avail, NULL);
1275 	if (avail < datalen) {
1276 		printf("%s: avail %u datalen %u\n", __func__, avail, datalen);
1277 		return (EAGAIN);
1278 	}
1279 
1280 	mtx_enter(&wrd->rd_lock);
1281 
1282 	oprod = wrd->rd_prod;
1283 
1284 	for (i = 0; i < iov_cnt; i++)
1285 		hv_ring_put(wrd, iov[i].iov_base, iov[i].iov_len);
1286 
1287 	indices = (uint64_t)wrd->rd_prod << 32;
1288 	hv_ring_put(wrd, (uint8_t *)&indices, sizeof(indices));
1289 
1290 	membar_sync();
1291 	wrd->rd_ring->br_windex = wrd->rd_prod;
1292 
1293 	mtx_leave(&wrd->rd_lock);
1294 
1295 	/* Signal when the ring transitions from being empty to non-empty */
1296 	if (wrd->rd_ring->br_imask == 0 &&
1297 	    wrd->rd_ring->br_rindex == oprod)
1298 		*needsig = 1;
1299 	else
1300 		*needsig = 0;
1301 
1302 	return (0);
1303 }
1304 
1305 int
1306 hv_channel_send(struct hv_channel *ch, void *data, uint32_t datalen,
1307     uint64_t rid, int type, uint32_t flags)
1308 {
1309 	struct hv_softc *sc = ch->ch_sc;
1310 	struct vmbus_chanpkt cp;
1311 	struct iovec iov[3];
1312 	uint32_t pktlen, pktlen_aligned;
1313 	uint64_t zeropad = 0;
1314 	int rv, needsig = 0;
1315 
1316 	pktlen = sizeof(cp) + datalen;
1317 	pktlen_aligned = roundup(pktlen, sizeof(uint64_t));
1318 
1319 	cp.cp_hdr.cph_type = type;
1320 	cp.cp_hdr.cph_flags = flags;
1321 	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_hlen, sizeof(cp));
1322 	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_tlen, pktlen_aligned);
1323 	cp.cp_hdr.cph_tid = rid;
1324 
1325 	iov[0].iov_base = &cp;
1326 	iov[0].iov_len = sizeof(cp);
1327 
1328 	iov[1].iov_base = data;
1329 	iov[1].iov_len = datalen;
1330 
1331 	iov[2].iov_base = &zeropad;
1332 	iov[2].iov_len = pktlen_aligned - pktlen;
1333 
1334 	rv = hv_ring_write(&ch->ch_wrd, iov, 3, &needsig);
1335 	if (rv == 0 && needsig)
1336 		hv_channel_setevent(sc, ch);
1337 
1338 	return (rv);
1339 }
1340 
1341 int
1342 hv_channel_send_sgl(struct hv_channel *ch, struct vmbus_gpa *sgl,
1343     uint32_t nsge, void *data, uint32_t datalen, uint64_t rid)
1344 {
1345 	struct hv_softc *sc = ch->ch_sc;
1346 	struct vmbus_chanpkt_sglist cp;
1347 	struct iovec iov[4];
1348 	uint32_t buflen, pktlen, pktlen_aligned;
1349 	uint64_t zeropad = 0;
1350 	int rv, needsig = 0;
1351 
1352 	buflen = sizeof(struct vmbus_gpa) * nsge;
1353 	pktlen = sizeof(cp) + datalen + buflen;
1354 	pktlen_aligned = roundup(pktlen, sizeof(uint64_t));
1355 
1356 	cp.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
1357 	cp.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
1358 	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_hlen, sizeof(cp) + buflen);
1359 	VMBUS_CHANPKT_SETLEN(cp.cp_hdr.cph_tlen, pktlen_aligned);
1360 	cp.cp_hdr.cph_tid = rid;
1361 	cp.cp_gpa_cnt = nsge;
1362 
1363 	iov[0].iov_base = &cp;
1364 	iov[0].iov_len = sizeof(cp);
1365 
1366 	iov[1].iov_base = sgl;
1367 	iov[1].iov_len = buflen;
1368 
1369 	iov[2].iov_base = data;
1370 	iov[2].iov_len = datalen;
1371 
1372 	iov[3].iov_base = &zeropad;
1373 	iov[3].iov_len = pktlen_aligned - pktlen;
1374 
1375 	rv = hv_ring_write(&ch->ch_wrd, iov, 4, &needsig);
1376 	if (rv == 0 && needsig)
1377 		hv_channel_setevent(sc, ch);
1378 
1379 	return (rv);
1380 }
1381 
1382 int
1383 hv_ring_peek(struct hv_ring_data *rrd, void *data, uint32_t datalen)
1384 {
1385 	uint32_t avail;
1386 
1387 	KASSERT(datalen <= rrd->rd_data_size);
1388 
1389 	hv_ring_avail(rrd, NULL, &avail);
1390 	if (avail < datalen)
1391 		return (EAGAIN);
1392 
1393 	mtx_enter(&rrd->rd_lock);
1394 	hv_ring_get(rrd, (uint8_t *)data, datalen, 1);
1395 	mtx_leave(&rrd->rd_lock);
1396 	return (0);
1397 }
1398 
1399 int
1400 hv_ring_read(struct hv_ring_data *rrd, void *data, uint32_t datalen,
1401     uint32_t offset)
1402 {
1403 	uint64_t indices;
1404 	uint32_t avail;
1405 
1406 	KASSERT(datalen <= rrd->rd_data_size);
1407 
1408 	hv_ring_avail(rrd, NULL, &avail);
1409 	if (avail < datalen) {
1410 		printf("%s: avail %u datalen %u\n", __func__, avail, datalen);
1411 		return (EAGAIN);
1412 	}
1413 
1414 	mtx_enter(&rrd->rd_lock);
1415 
1416 	if (offset) {
1417 		rrd->rd_cons += offset;
1418 		rrd->rd_cons %= rrd->rd_data_size;
1419 	}
1420 
1421 	hv_ring_get(rrd, (uint8_t *)data, datalen, 0);
1422 	hv_ring_get(rrd, (uint8_t *)&indices, sizeof(indices), 0);
1423 
1424 	membar_sync();
1425 	rrd->rd_ring->br_rindex = rrd->rd_cons;
1426 
1427 	mtx_leave(&rrd->rd_lock);
1428 
1429 	return (0);
1430 }
1431 
1432 int
1433 hv_channel_recv(struct hv_channel *ch, void *data, uint32_t datalen,
1434     uint32_t *rlen, uint64_t *rid, int raw)
1435 {
1436 	struct vmbus_chanpkt_hdr cph;
1437 	uint32_t offset, pktlen;
1438 	int rv;
1439 
1440 	*rlen = 0;
1441 
1442 	if ((rv = hv_ring_peek(&ch->ch_rrd, &cph, sizeof(cph))) != 0)
1443 		return (rv);
1444 
1445 	offset = raw ? 0 : VMBUS_CHANPKT_GETLEN(cph.cph_hlen);
1446 	pktlen = VMBUS_CHANPKT_GETLEN(cph.cph_tlen) - offset;
1447 	if (pktlen > datalen) {
1448 		printf("%s: pktlen %u datalen %u\n", __func__, pktlen, datalen);
1449 		return (EINVAL);
1450 	}
1451 
1452 	rv = hv_ring_read(&ch->ch_rrd, data, pktlen, offset);
1453 	if (rv == 0) {
1454 		*rlen = pktlen;
1455 		*rid = cph.cph_tid;
1456 	}
1457 
1458 	return (rv);
1459 }
1460 
1461 int
1462 hv_handle_alloc(struct hv_channel *ch, void *buffer, uint32_t buflen,
1463     uint32_t *handle)
1464 {
1465 	struct hv_softc *sc = ch->ch_sc;
1466 	struct vmbus_chanmsg_gpadl_conn *hdr;
1467 	struct vmbus_chanmsg_gpadl_subconn *cmd;
1468 	struct vmbus_chanmsg_gpadl_connresp rsp;
1469 	struct hv_msg *msg;
1470 	int i, j, last, left, rv;
1471 	int bodylen = 0, ncmds = 0, pfn = 0;
1472 	int waitok = cold ? M_NOWAIT : M_WAITOK;
1473 	uint64_t *frames;
1474 	paddr_t pa;
1475 	caddr_t body;
1476 	/* Total number of pages to reference */
1477 	int total = atop(buflen);
1478 	/* Number of pages that will fit the header */
1479 	int inhdr = MIN(total, HV_NPFNHDR);
1480 
1481 	KASSERT((buflen & (PAGE_SIZE - 1)) == 0);
1482 
1483 	if ((msg = malloc(sizeof(*msg), M_DEVBUF, M_ZERO | waitok)) == NULL)
1484 		return (ENOMEM);
1485 
1486 	/* Prepare array of frame addresses */
1487 	if ((frames = mallocarray(total, sizeof(*frames), M_DEVBUF, M_ZERO |
1488 	    waitok)) == NULL) {
1489 		free(msg, M_DEVBUF, sizeof(*msg));
1490 		return (ENOMEM);
1491 	}
1492 	for (i = 0; i < total; i++) {
1493 		if (!pmap_extract(pmap_kernel(), (vaddr_t)buffer +
1494 		    PAGE_SIZE * i, &pa)) {
1495 			free(msg, M_DEVBUF, sizeof(*msg));
1496 			free(frames, M_DEVBUF, total * sizeof(*frames));
1497 			return (EFAULT);
1498 		}
1499 		frames[i] = atop(pa);
1500 	}
1501 
1502 	msg->msg_req.hc_dsize = sizeof(struct vmbus_chanmsg_gpadl_conn) +
1503 	    /* sizeof(struct vmbus_gpa_range) */ + inhdr * sizeof(uint64_t);
1504 	hdr = (struct vmbus_chanmsg_gpadl_conn *)msg->msg_req.hc_data;
1505 	msg->msg_rsp = &rsp;
1506 	msg->msg_rsplen = sizeof(rsp);
1507 	if (!waitok)
1508 		msg->msg_flags = MSGF_NOSLEEP;
1509 
1510 	left = total - inhdr;
1511 
1512 	/* Allocate additional gpadl_body structures if required */
1513 	if (left > 0) {
1514 		ncmds = MAX(1, left / HV_NPFNBODY + left % HV_NPFNBODY);
1515 		bodylen = ncmds * VMBUS_MSG_DSIZE_MAX;
1516 		body = malloc(bodylen, M_DEVBUF, M_ZERO | waitok);
1517 		if (body == NULL) {
1518 			free(msg, M_DEVBUF, sizeof(*msg));
1519 			free(frames, M_DEVBUF, atop(buflen) * sizeof(*frames));
1520 			return (ENOMEM);
1521 		}
1522 	}
1523 
1524 	*handle = atomic_inc_int_nv(&sc->sc_handle);
1525 
1526 	hdr->chm_hdr.chm_type = VMBUS_CHANMSG_GPADL_CONN;
1527 	hdr->chm_chanid = ch->ch_id;
1528 	hdr->chm_gpadl = *handle;
1529 
1530 	/* Single range for a contiguous buffer */
1531 	hdr->chm_range_cnt = 1;
1532 	hdr->chm_range_len = sizeof(struct vmbus_gpa_range) + total *
1533 	    sizeof(uint64_t);
1534 	hdr->chm_range.gpa_ofs = 0;
1535 	hdr->chm_range.gpa_len = buflen;
1536 
1537 	/* Fit as many pages as possible into the header */
1538 	for (i = 0; i < inhdr; i++)
1539 		hdr->chm_range.gpa_page[i] = frames[pfn++];
1540 
1541 	for (i = 0; i < ncmds; i++) {
1542 		cmd = (struct vmbus_chanmsg_gpadl_subconn *)(body +
1543 		    VMBUS_MSG_DSIZE_MAX * i);
1544 		cmd->chm_hdr.chm_type = VMBUS_CHANMSG_GPADL_SUBCONN;
1545 		cmd->chm_gpadl = *handle;
1546 		last = MIN(left, HV_NPFNBODY);
1547 		for (j = 0; j < last; j++)
1548 			cmd->chm_gpa_page[j] = frames[pfn++];
1549 		left -= last;
1550 	}
1551 
1552 	rv = hv_start(sc, msg);
1553 	if (rv != 0) {
1554 		DPRINTF("%s: GPADL_CONN failed\n", sc->sc_dev.dv_xname);
1555 		goto out;
1556 	}
1557 	for (i = 0; i < ncmds; i++) {
1558 		int cmdlen = sizeof(*cmd);
1559 		cmd = (struct vmbus_chanmsg_gpadl_subconn *)(body +
1560 		    VMBUS_MSG_DSIZE_MAX * i);
1561 		/* Last element can be short */
1562 		if (i == ncmds - 1)
1563 			cmdlen += last * sizeof(uint64_t);
1564 		else
1565 			cmdlen += HV_NPFNBODY * sizeof(uint64_t);
1566 		rv = hv_cmd(sc, cmd, cmdlen, NULL, 0, waitok | HCF_NOREPLY);
1567 		if (rv != 0) {
1568 			DPRINTF("%s: GPADL_SUBCONN (iteration %d/%d) failed "
1569 			    "with %d\n", sc->sc_dev.dv_xname, i, ncmds, rv);
1570 			goto out;
1571 		}
1572 	}
1573 	rv = hv_reply(sc, msg);
1574 	if (rv != 0)
1575 		DPRINTF("%s: GPADL allocation failed with %d\n",
1576 		    sc->sc_dev.dv_xname, rv);
1577 
1578  out:
1579 	free(msg, M_DEVBUF, sizeof(*msg));
1580 	free(frames, M_DEVBUF, total * sizeof(*frames));
1581 	if (bodylen > 0)
1582 		free(body, M_DEVBUF, bodylen);
1583 	if (rv != 0)
1584 		return (rv);
1585 
1586 	KASSERT(*handle == rsp.chm_gpadl);
1587 
1588 	return (0);
1589 }
1590 
1591 void
1592 hv_handle_free(struct hv_channel *ch, uint32_t handle)
1593 {
1594 	struct hv_softc *sc = ch->ch_sc;
1595 	struct vmbus_chanmsg_gpadl_disconn cmd;
1596 	struct vmbus_chanmsg_gpadl_disconn rsp;
1597 	int rv;
1598 
1599 	memset(&cmd, 0, sizeof(cmd));
1600 	cmd.chm_hdr.chm_type = VMBUS_CHANMSG_GPADL_DISCONN;
1601 	cmd.chm_chanid = ch->ch_id;
1602 	cmd.chm_gpadl = handle;
1603 
1604 	rv = hv_cmd(sc, &cmd, sizeof(cmd), &rsp, sizeof(rsp), 0);
1605 	if (rv)
1606 		DPRINTF("%s: GPADL_DISCONN failed with %d\n",
1607 		    sc->sc_dev.dv_xname, rv);
1608 }
1609 
1610 const struct {
1611 	const char		 *id_name;
1612 	const struct hv_guid	 *id_type;
1613 	void			(*id_init)(struct hv_channel *);
1614 	void			(*id_handler)(void *);
1615 } hv_internal_devs[] = {
1616 	{ "heartbeat",	&hv_guid_heartbeat, NULL,		hv_heartbeat },
1617 	{ "kvp",	&hv_guid_kvp,	    hv_kvp_init,	hv_kvp },
1618 	{ "shutdown",	&hv_guid_shutdown,  hv_shutdown_init,	hv_shutdown },
1619 	{ "timesync",	&hv_guid_timesync,  hv_timesync_init,	hv_timesync }
1620 };
1621 
1622 void
1623 hv_attach_internal(struct hv_softc *sc)
1624 {
1625 	struct hv_channel *ch;
1626 	int i;
1627 
1628 	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
1629 		if (ch->ch_state != HV_CHANSTATE_OFFERED)
1630 			continue;
1631 		if (ch->ch_flags & CHF_MONITOR)
1632 			continue;
1633 		for (i = 0; i < nitems(hv_internal_devs); i++) {
1634 			if (memcmp(hv_internal_devs[i].id_type, &ch->ch_type,
1635 			    sizeof(ch->ch_type)) != 0)
1636 				continue;
1637 			/*
1638 			 * These services are not performance critical and
1639 			 * do not need batched reading. Furthermore, some
1640 			 * services such as KVP can only handle one message
1641 			 * from the host at a time.
1642 			 */
1643 			ch->ch_flags &= ~CHF_BATCHED;
1644 
1645 			if (hv_internal_devs[i].id_init)
1646 				hv_internal_devs[i].id_init(ch);
1647 
1648 			ch->ch_buf = km_alloc(PAGE_SIZE, &kv_any, &kp_zero,
1649 			    (cold ? &kd_nowait : &kd_waitok));
1650 			if (ch->ch_buf == NULL) {
1651 				hv_channel_ring_destroy(ch);
1652 				printf("%s: failed to allocate channel data "
1653 				    "buffer for \"%s\"", sc->sc_dev.dv_xname,
1654 				    hv_internal_devs[i].id_name);
1655 				continue;
1656 			}
1657 			ch->ch_buflen = PAGE_SIZE;
1658 
1659 			if (hv_channel_open(ch, NULL, 0,
1660 			    hv_internal_devs[i].id_handler, ch)) {
1661 				km_free(ch->ch_buf, PAGE_SIZE, &kv_any,
1662 				    &kp_zero);
1663 				ch->ch_buf = NULL;
1664 				ch->ch_buflen = 0;
1665 				printf("%s: failed to open channel for \"%s\"\n",
1666 				    sc->sc_dev.dv_xname,
1667 				    hv_internal_devs[i].id_name);
1668 			}
1669 			evcount_attach(&ch->ch_evcnt,
1670 			    hv_internal_devs[i].id_name, &sc->sc_idtvec);
1671 			break;
1672 		}
1673 	}
1674 }
1675 
1676 int
1677 hv_service_common(struct hv_channel *ch, uint32_t *rlen, uint64_t *rid,
1678     struct hv_icmsg_hdr **hdr)
1679 {
1680 	struct hv_icmsg_negotiate *msg;
1681 	int rv;
1682 
1683 	rv = hv_channel_recv(ch, ch->ch_buf, ch->ch_buflen, rlen, rid, 0);
1684 	if (rv || *rlen == 0)
1685 		return (rv);
1686 	*hdr = (struct hv_icmsg_hdr *)&ch->ch_buf[sizeof(struct hv_pipe_hdr)];
1687 	if ((*hdr)->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
1688 		msg = (struct hv_icmsg_negotiate *)(*hdr + 1);
1689 		if (msg->icframe_vercnt >= 2 &&
1690 		    msg->icversion_data[1].major == 3) {
1691 			msg->icversion_data[0].major = 3;
1692 			msg->icversion_data[0].minor = 0;
1693 			msg->icversion_data[1].major = 3;
1694 			msg->icversion_data[1].minor = 0;
1695 		} else {
1696 			msg->icversion_data[0].major = 1;
1697 			msg->icversion_data[0].minor = 0;
1698 			msg->icversion_data[1].major = 1;
1699 			msg->icversion_data[1].minor = 0;
1700 		}
1701 		msg->icframe_vercnt = 1;
1702 		msg->icmsg_vercnt = 1;
1703 		(*hdr)->icmsgsize = 0x10;
1704 	}
1705 	return (0);
1706 }
1707 
1708 void
1709 hv_heartbeat(void *arg)
1710 {
1711 	struct hv_channel *ch = arg;
1712 	struct hv_softc *sc = ch->ch_sc;
1713 	struct hv_icmsg_hdr *hdr;
1714 	struct hv_heartbeat_msg *msg;
1715 	uint64_t rid;
1716 	uint32_t rlen;
1717 	int rv;
1718 
1719 	rv = hv_service_common(ch, &rlen, &rid, &hdr);
1720 	if (rv || rlen == 0) {
1721 		if (rv != EAGAIN)
1722 			printf("heartbeat: rv=%d rlen=%u\n", rv, rlen);
1723 		return;
1724 	}
1725 	if (hdr->icmsgtype == HV_ICMSGTYPE_HEARTBEAT) {
1726 		msg = (struct hv_heartbeat_msg *)(hdr + 1);
1727 		msg->seq_num += 1;
1728 	} else if (hdr->icmsgtype != HV_ICMSGTYPE_NEGOTIATE) {
1729 		printf("%s: unhandled heartbeat message type %u\n",
1730 		    sc->sc_dev.dv_xname, hdr->icmsgtype);
1731 	}
1732 	hdr->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
1733 	hv_channel_send(ch, ch->ch_buf, rlen, rid, VMBUS_CHANPKT_TYPE_INBAND, 0);
1734 }
1735 
1736 void
1737 hv_kvp_init(struct hv_channel *ch)
1738 {
1739 	struct hv_softc *sc = ch->ch_sc;
1740 
1741 	sc->sc_pvbus->hv_kvop = hv_kvop;
1742 	sc->sc_pvbus->hv_arg = sc;
1743 }
1744 
1745 void
1746 hv_kvp(void *arg)
1747 {
1748 }
1749 
1750 int
1751 hv_kvop(void *arg, int op, char *key, char *value, size_t valuelen)
1752 {
1753 	switch (op) {
1754 	case PVBUS_KVWRITE:
1755 	case PVBUS_KVREAD:
1756 	default:
1757 		return (EOPNOTSUPP);
1758 	}
1759 }
1760 
1761 static void
1762 hv_shutdown_task(void *arg)
1763 {
1764 	extern int allowpowerdown;
1765 
1766 	if (allowpowerdown == 0)
1767 		return;
1768 
1769 	suspend_randomness();
1770 
1771 	log(LOG_KERN | LOG_NOTICE, "Shutting down in response to "
1772 	    "request from Hyper-V host\n");
1773 	prsignal(initprocess, SIGUSR2);
1774 }
1775 
1776 void
1777 hv_shutdown_init(struct hv_channel *ch)
1778 {
1779 	struct hv_softc *sc = ch->ch_sc;
1780 
1781 	task_set(&sc->sc_sdtask, hv_shutdown_task, sc);
1782 }
1783 
1784 void
1785 hv_shutdown(void *arg)
1786 {
1787 	struct hv_channel *ch = arg;
1788 	struct hv_softc *sc = ch->ch_sc;
1789 	struct hv_icmsg_hdr *hdr;
1790 	struct hv_shutdown_msg *msg;
1791 	uint64_t rid;
1792 	uint32_t rlen;
1793 	int rv, shutdown = 0;
1794 
1795 	rv = hv_service_common(ch, &rlen, &rid, &hdr);
1796 	if (rv || rlen == 0) {
1797 		if (rv != EAGAIN)
1798 			printf("shutdown: rv=%d rlen=%u\n", rv, rlen);
1799 		return;
1800 	}
1801 	if (hdr->icmsgtype == HV_ICMSGTYPE_SHUTDOWN) {
1802 		msg = (struct hv_shutdown_msg *)(hdr + 1);
1803 		if (msg->flags == 0 || msg->flags == 1) {
1804 			shutdown = 1;
1805 			hdr->status = HV_ICMSG_STATUS_OK;
1806 		} else
1807 			hdr->status = HV_ICMSG_STATUS_FAIL;
1808 	} else if (hdr->icmsgtype != HV_ICMSGTYPE_NEGOTIATE) {
1809 		printf("%s: unhandled shutdown message type %u\n",
1810 		    sc->sc_dev.dv_xname, hdr->icmsgtype);
1811 	}
1812 
1813 	hdr->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
1814 	hv_channel_send(ch, ch->ch_buf, rlen, rid, VMBUS_CHANPKT_TYPE_INBAND, 0);
1815 
1816 	if (shutdown)
1817 		task_add(systq, &sc->sc_sdtask);
1818 }
1819 
1820 void
1821 hv_timesync_init(struct hv_channel *ch)
1822 {
1823 	struct hv_softc *sc = ch->ch_sc;
1824 
1825 	strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
1826 	    sizeof(sc->sc_sensordev.xname));
1827 
1828 	sc->sc_sensor.type = SENSOR_TIMEDELTA;
1829 	sc->sc_sensor.status = SENSOR_S_UNKNOWN;
1830 
1831 	sensor_attach(&sc->sc_sensordev, &sc->sc_sensor);
1832 	sensordev_install(&sc->sc_sensordev);
1833 }
1834 
1835 void
1836 hv_timesync(void *arg)
1837 {
1838 	struct hv_channel *ch = arg;
1839 	struct hv_softc *sc = ch->ch_sc;
1840 	struct hv_icmsg_hdr *hdr;
1841 	struct hv_timesync_msg *msg;
1842 	struct timespec guest, host, diff;
1843 	uint64_t tns;
1844 	uint64_t rid;
1845 	uint32_t rlen;
1846 	int rv;
1847 
1848 	rv = hv_service_common(ch, &rlen, &rid, &hdr);
1849 	if (rv || rlen == 0) {
1850 		if (rv != EAGAIN)
1851 			printf("timesync: rv=%d rlen=%u\n", rv, rlen);
1852 		return;
1853 	}
1854 	if (hdr->icmsgtype == HV_ICMSGTYPE_TIMESYNC) {
1855 		msg = (struct hv_timesync_msg *)(hdr + 1);
1856 		if (msg->flags == HV_TIMESYNC_SYNC ||
1857 		    msg->flags == HV_TIMESYNC_SAMPLE) {
1858 			microtime(&sc->sc_sensor.tv);
1859 			nanotime(&guest);
1860 			tns = (msg->parent_time - 116444736000000000LL) * 100;
1861 			host.tv_sec = tns / 1000000000LL;
1862 			host.tv_nsec = tns % 1000000000LL;
1863 			timespecsub(&guest, &host, &diff);
1864 			sc->sc_sensor.value = (int64_t)diff.tv_sec *
1865 			    1000000000LL + diff.tv_nsec;
1866 			sc->sc_sensor.status = SENSOR_S_OK;
1867 		}
1868 	} else if (hdr->icmsgtype != HV_ICMSGTYPE_NEGOTIATE) {
1869 		printf("%s: unhandled timesync message type %u\n",
1870 		    sc->sc_dev.dv_xname, hdr->icmsgtype);
1871 	}
1872 
1873 	hdr->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
1874 	hv_channel_send(ch, ch->ch_buf, rlen, rid, VMBUS_CHANPKT_TYPE_INBAND, 0);
1875 }
1876 
1877 static int
1878 hv_attach_print(void *aux, const char *name)
1879 {
1880 	struct hv_attach_args *aa = aux;
1881 
1882 	if (name)
1883 		printf("\"%s\" at %s", aa->aa_ident, name);
1884 
1885 	return (UNCONF);
1886 }
1887 
1888 int
1889 hv_attach_devices(struct hv_softc *sc)
1890 {
1891 	struct hv_dev *dv;
1892 	struct hv_channel *ch;
1893 
1894 	SLIST_INIT(&sc->sc_devs);
1895 	mtx_init(&sc->sc_devlck, IPL_NET);
1896 
1897 	TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
1898 		if (ch->ch_state != HV_CHANSTATE_OFFERED)
1899 			continue;
1900 		if (!(ch->ch_flags & CHF_MONITOR))
1901 			continue;
1902 		dv = malloc(sizeof(*dv), M_DEVBUF, M_ZERO | M_NOWAIT);
1903 		if (dv == NULL) {
1904 			printf("%s: failed to allocate device object\n",
1905 			    sc->sc_dev.dv_xname);
1906 			return (-1);
1907 		}
1908 		dv->dv_aa.aa_parent = sc;
1909 		dv->dv_aa.aa_type = &ch->ch_type;
1910 		dv->dv_aa.aa_inst = &ch->ch_inst;
1911 		dv->dv_aa.aa_ident = ch->ch_ident;
1912 		dv->dv_aa.aa_chan = ch;
1913 		dv->dv_aa.aa_dmat = sc->sc_dmat;
1914 		mtx_enter(&sc->sc_devlck);
1915 		SLIST_INSERT_HEAD(&sc->sc_devs, dv, dv_entry);
1916 		mtx_leave(&sc->sc_devlck);
1917 		config_found((struct device *)sc, &dv->dv_aa, hv_attach_print);
1918 	}
1919 	return (0);
1920 }
1921 
1922 void
1923 hv_evcount_attach(struct hv_channel *ch, const char *name)
1924 {
1925 	struct hv_softc *sc = ch->ch_sc;
1926 
1927 	evcount_attach(&ch->ch_evcnt, name, &sc->sc_idtvec);
1928 }
1929