xref: /openbsd-src/sys/dev/pv/pvclock.c (revision c90a81c56dcebd6a1b73fe4aff9b03385b8e63b3)
1 /*	$OpenBSD: pvclock.c,v 1.3 2018/12/05 18:02:51 reyk Exp $	*/
2 
3 /*
4  * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #if !defined(__i386__) && !defined(__amd64__)
20 #error pvclock(4) is only supported on i386 and amd64
21 #endif
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/kernel.h>
26 #include <sys/timetc.h>
27 #include <sys/timeout.h>
28 #include <sys/malloc.h>
29 #include <sys/atomic.h>
30 
31 #include <machine/cpu.h>
32 #include <uvm/uvm_extern.h>
33 
34 #include <dev/pv/pvvar.h>
35 #include <dev/pv/pvreg.h>
36 
37 struct pvclock_softc {
38 	struct device		 sc_dev;
39 	void			*sc_time;
40 	paddr_t			 sc_paddr;
41 	struct timecounter	*sc_tc;
42 };
43 
44 struct pvclock_wall_clock {
45 	uint32_t		 wc_version;
46 	uint32_t		 wc_sec;
47 	uint32_t		 wc_nsec;
48 } __packed;
49 
50 struct pvclock_time_info {
51 	uint32_t		 ti_version;
52 	uint32_t		 ti_pad0;
53 	uint64_t		 ti_tsc_timestamp;
54 	uint64_t		 ti_system_time;
55 	uint32_t		 ti_tsc_to_system_mul;
56 	int8_t			 ti_tsc_shift;
57 	uint8_t			 ti_flags;
58 	uint8_t			 ti_pad[2];
59 } __packed;
60 
61 #define PVCLOCK_FLAG_TSC_STABLE		0x01
62 #define PVCLOCK_SYSTEM_TIME_ENABLE	0x01
63 #define DEVNAME(_s)			((_s)->sc_dev.dv_xname)
64 
65 int	 pvclock_match(struct device *, void *, void *);
66 void	 pvclock_attach(struct device *, struct device *, void *);
67 int	 pvclock_activate(struct device *, int);
68 
69 uint	 pvclock_get_timecount(struct timecounter *);
70 void	 pvclock_read_time_info(struct pvclock_softc *,
71 	    struct pvclock_time_info *);
72 
73 static inline uint32_t
74 	 pvclock_read_begin(const struct pvclock_time_info *);
75 static inline int
76 	 pvclock_read_done(const struct pvclock_time_info *, uint32_t);
77 
78 struct cfattach pvclock_ca = {
79 	sizeof(struct pvclock_softc),
80 	pvclock_match,
81 	pvclock_attach,
82 	NULL,
83 	pvclock_activate
84 };
85 
86 struct cfdriver pvclock_cd = {
87 	NULL,
88 	"pvclock",
89 	DV_DULL
90 };
91 
92 struct timecounter pvclock_timecounter = {
93 	pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
94 };
95 
96 int
97 pvclock_match(struct device *parent, void *match, void *aux)
98 {
99 	struct pv_attach_args	*pva = aux;
100 	struct pvbus_hv		*hv;
101 
102 	/*
103 	 * pvclock is provided by different hypervisors, we currently
104 	 * only support the "kvmclock".
105 	 */
106 	hv = &pva->pva_hv[PVBUS_KVM];
107 	if (hv->hv_base != 0) {
108 		/*
109 		 * We only implement support for the 2nd version of pvclock.
110 		 * The first version is basically the same but with different
111 		 * non-standard MSRs and it is deprecated.
112 		 */
113 		if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
114 			return (0);
115 
116 		/*
117 		 * Only the "stable" clock with a sync'ed TSC is supported.
118 		 * In this case the host guarantees that the TSC is constant
119 		 * and invariant, either by the underlying TSC or by passing
120 		 * on a synchronized value.
121 		 */
122 		if ((hv->hv_features &
123 		    (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
124 			return (0);
125 
126 		return (1);
127 	}
128 
129 	return (0);
130 }
131 
132 void
133 pvclock_attach(struct device *parent, struct device *self, void *aux)
134 {
135 	struct pvclock_softc		*sc = (struct pvclock_softc *)self;
136 	struct pvclock_time_info	*ti;
137 	paddr_t			 	 pa;
138 	uint32_t			 version;
139 	uint8_t				 flags;
140 
141 	if ((sc->sc_time = km_alloc(PAGE_SIZE,
142 	    &kv_any, &kp_zero, &kd_nowait)) == NULL) {
143 		printf(": time page allocation failed\n");
144 		return;
145 	}
146 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
147 		printf(": time page PA extraction failed\n");
148 		km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
149 		return;
150 	}
151 
152 	wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
153 	sc->sc_paddr = pa;
154 
155 	ti = sc->sc_time;
156 	do {
157 		version = pvclock_read_begin(ti);
158 		flags = ti->ti_flags;
159 	} while (!pvclock_read_done(ti, version));
160 
161 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
162 		wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
163 		km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
164 		printf(": unstable clock\n");
165 		return;
166 	}
167 
168 	sc->sc_tc = &pvclock_timecounter;
169 	sc->sc_tc->tc_name = DEVNAME(sc);
170 	sc->sc_tc->tc_frequency = 1000000000ULL;
171 	sc->sc_tc->tc_priv = sc;
172 
173 	/* Better than HPET but below TSC */
174 	sc->sc_tc->tc_quality = 1500;
175 
176 	tc_init(sc->sc_tc);
177 
178 	printf("\n");
179 }
180 
181 int
182 pvclock_activate(struct device *self, int act)
183 {
184 	struct pvclock_softc	*sc = (struct pvclock_softc *)self;
185 	int			 rv = 0;
186 	paddr_t			 pa = sc->sc_paddr;
187 
188 	switch (act) {
189 	case DVACT_POWERDOWN:
190 		wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
191 		break;
192 	case DVACT_RESUME:
193 		wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
194 		break;
195 	}
196 
197 	return (rv);
198 }
199 
200 static inline uint32_t
201 pvclock_read_begin(const struct pvclock_time_info *ti)
202 {
203 	uint32_t version = ti->ti_version & ~0x1;
204 	virtio_membar_sync();
205 	return (version);
206 }
207 
208 static inline int
209 pvclock_read_done(const struct pvclock_time_info *ti,
210     uint32_t version)
211 {
212 	virtio_membar_sync();
213 	return (ti->ti_version == version);
214 }
215 
216 uint
217 pvclock_get_timecount(struct timecounter *tc)
218 {
219 	struct pvclock_softc		*sc = tc->tc_priv;
220 	struct pvclock_time_info	*ti;
221 	uint64_t			 tsc_timestamp, system_time, delta, ctr;
222 	uint32_t			 version, mul_frac;
223 	int8_t				 shift;
224 	uint8_t				 flags;
225 
226 	ti = sc->sc_time;
227 	do {
228 		version = pvclock_read_begin(ti);
229 		system_time = ti->ti_system_time;
230 		tsc_timestamp = ti->ti_tsc_timestamp;
231 		mul_frac = ti->ti_tsc_to_system_mul;
232 		shift = ti->ti_tsc_shift;
233 		flags = ti->ti_flags;
234 	} while (!pvclock_read_done(ti, version));
235 
236 	/* This bit must be set as we attached based on the stable flag */
237 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0)
238 		panic("%s: unstable result on stable clock", DEVNAME(sc));
239 
240 	/*
241 	 * The algorithm is described in
242 	 * linux/Documentation/virtual/kvm/msr.txt
243 	 */
244 	delta = rdtsc() - tsc_timestamp;
245 	if (shift < 0)
246 		delta >>= -shift;
247 	else
248 		delta <<= shift;
249 	ctr = ((delta * mul_frac) >> 32) + system_time;
250 
251 	return (ctr);
252 }
253