xref: /openbsd-src/sys/dev/pv/pvclock.c (revision 7350f337b9e3eb4461d99580e625c7ef148d107c)
1 /*	$OpenBSD: pvclock.c,v 1.4 2019/05/13 15:40:34 pd Exp $	*/
2 
3 /*
4  * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #if !defined(__i386__) && !defined(__amd64__)
20 #error pvclock(4) is only supported on i386 and amd64
21 #endif
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/kernel.h>
26 #include <sys/timetc.h>
27 #include <sys/timeout.h>
28 #include <sys/malloc.h>
29 #include <sys/atomic.h>
30 
31 #include <machine/cpu.h>
32 #include <uvm/uvm_extern.h>
33 
34 #include <dev/pv/pvvar.h>
35 #include <dev/pv/pvreg.h>
36 
37 struct pvclock_softc {
38 	struct device		 sc_dev;
39 	void			*sc_time;
40 	paddr_t			 sc_paddr;
41 	struct timecounter	*sc_tc;
42 };
43 
44 #define DEVNAME(_s)			((_s)->sc_dev.dv_xname)
45 
46 int	 pvclock_match(struct device *, void *, void *);
47 void	 pvclock_attach(struct device *, struct device *, void *);
48 int	 pvclock_activate(struct device *, int);
49 
50 uint	 pvclock_get_timecount(struct timecounter *);
51 void	 pvclock_read_time_info(struct pvclock_softc *,
52 	    struct pvclock_time_info *);
53 
54 static inline uint32_t
55 	 pvclock_read_begin(const struct pvclock_time_info *);
56 static inline int
57 	 pvclock_read_done(const struct pvclock_time_info *, uint32_t);
58 
59 struct cfattach pvclock_ca = {
60 	sizeof(struct pvclock_softc),
61 	pvclock_match,
62 	pvclock_attach,
63 	NULL,
64 	pvclock_activate
65 };
66 
67 struct cfdriver pvclock_cd = {
68 	NULL,
69 	"pvclock",
70 	DV_DULL
71 };
72 
73 struct timecounter pvclock_timecounter = {
74 	pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
75 };
76 
77 int
78 pvclock_match(struct device *parent, void *match, void *aux)
79 {
80 	struct pv_attach_args	*pva = aux;
81 	struct pvbus_hv		*hv;
82 
83 	/*
84 	 * pvclock is provided by different hypervisors, we currently
85 	 * only support the "kvmclock".
86 	 */
87 	hv = &pva->pva_hv[PVBUS_KVM];
88 	if (hv->hv_base == 0)
89 		hv = &pva->pva_hv[PVBUS_OPENBSD];
90 	if (hv->hv_base != 0) {
91 		/*
92 		 * We only implement support for the 2nd version of pvclock.
93 		 * The first version is basically the same but with different
94 		 * non-standard MSRs and it is deprecated.
95 		 */
96 		if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
97 			return (0);
98 
99 		/*
100 		 * Only the "stable" clock with a sync'ed TSC is supported.
101 		 * In this case the host guarantees that the TSC is constant
102 		 * and invariant, either by the underlying TSC or by passing
103 		 * on a synchronized value.
104 		 */
105 		if ((hv->hv_features &
106 		    (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
107 			return (0);
108 
109 		return (1);
110 	}
111 
112 	return (0);
113 }
114 
115 void
116 pvclock_attach(struct device *parent, struct device *self, void *aux)
117 {
118 	struct pvclock_softc		*sc = (struct pvclock_softc *)self;
119 	struct pvclock_time_info	*ti;
120 	paddr_t			 	 pa;
121 	uint32_t			 version;
122 	uint8_t				 flags;
123 
124 	if ((sc->sc_time = km_alloc(PAGE_SIZE,
125 	    &kv_any, &kp_zero, &kd_nowait)) == NULL) {
126 		printf(": time page allocation failed\n");
127 		return;
128 	}
129 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
130 		printf(": time page PA extraction failed\n");
131 		km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
132 		return;
133 	}
134 
135 	wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
136 	sc->sc_paddr = pa;
137 
138 	ti = sc->sc_time;
139 	do {
140 		version = pvclock_read_begin(ti);
141 		flags = ti->ti_flags;
142 	} while (!pvclock_read_done(ti, version));
143 
144 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
145 		wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
146 		km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
147 		printf(": unstable clock\n");
148 		return;
149 	}
150 
151 	sc->sc_tc = &pvclock_timecounter;
152 	sc->sc_tc->tc_name = DEVNAME(sc);
153 	sc->sc_tc->tc_frequency = 1000000000ULL;
154 	sc->sc_tc->tc_priv = sc;
155 
156 	/* Better than HPET but below TSC */
157 	sc->sc_tc->tc_quality = 1500;
158 
159 	tc_init(sc->sc_tc);
160 
161 	printf("\n");
162 }
163 
164 int
165 pvclock_activate(struct device *self, int act)
166 {
167 	struct pvclock_softc	*sc = (struct pvclock_softc *)self;
168 	int			 rv = 0;
169 	paddr_t			 pa = sc->sc_paddr;
170 
171 	switch (act) {
172 	case DVACT_POWERDOWN:
173 		wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
174 		break;
175 	case DVACT_RESUME:
176 		wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
177 		break;
178 	}
179 
180 	return (rv);
181 }
182 
183 static inline uint32_t
184 pvclock_read_begin(const struct pvclock_time_info *ti)
185 {
186 	uint32_t version = ti->ti_version & ~0x1;
187 	virtio_membar_sync();
188 	return (version);
189 }
190 
191 static inline int
192 pvclock_read_done(const struct pvclock_time_info *ti,
193     uint32_t version)
194 {
195 	virtio_membar_sync();
196 	return (ti->ti_version == version);
197 }
198 
199 uint
200 pvclock_get_timecount(struct timecounter *tc)
201 {
202 	struct pvclock_softc		*sc = tc->tc_priv;
203 	struct pvclock_time_info	*ti;
204 	uint64_t			 tsc_timestamp, system_time, delta, ctr;
205 	uint32_t			 version, mul_frac;
206 	int8_t				 shift;
207 	uint8_t				 flags;
208 
209 	ti = sc->sc_time;
210 	do {
211 		version = pvclock_read_begin(ti);
212 		system_time = ti->ti_system_time;
213 		tsc_timestamp = ti->ti_tsc_timestamp;
214 		mul_frac = ti->ti_tsc_to_system_mul;
215 		shift = ti->ti_tsc_shift;
216 		flags = ti->ti_flags;
217 	} while (!pvclock_read_done(ti, version));
218 
219 	/* This bit must be set as we attached based on the stable flag */
220 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0)
221 		panic("%s: unstable result on stable clock", DEVNAME(sc));
222 
223 	/*
224 	 * The algorithm is described in
225 	 * linux/Documentation/virtual/kvm/msr.txt
226 	 */
227 	delta = rdtsc() - tsc_timestamp;
228 	if (shift < 0)
229 		delta >>= -shift;
230 	else
231 		delta <<= shift;
232 	ctr = ((delta * mul_frac) >> 32) + system_time;
233 
234 	return (ctr);
235 }
236