xref: /openbsd-src/sys/dev/pv/pvclock.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*	$OpenBSD: pvclock.c,v 1.9 2023/02/04 19:19:37 cheloha Exp $	*/
2 
3 /*
4  * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #if !defined(__i386__) && !defined(__amd64__)
20 #error pvclock(4) is only supported on i386 and amd64
21 #endif
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/kernel.h>
26 #include <sys/timetc.h>
27 #include <sys/timeout.h>
28 #include <sys/malloc.h>
29 #include <sys/atomic.h>
30 
31 #include <machine/cpu.h>
32 #include <machine/atomic.h>
33 #include <uvm/uvm_extern.h>
34 
35 #include <dev/pv/pvvar.h>
36 #include <dev/pv/pvreg.h>
37 
38 uint pvclock_lastcount;
39 
40 struct pvclock_softc {
41 	struct device		 sc_dev;
42 	void			*sc_time;
43 	paddr_t			 sc_paddr;
44 	struct timecounter	*sc_tc;
45 };
46 
47 #define DEVNAME(_s)			((_s)->sc_dev.dv_xname)
48 
49 int	 pvclock_match(struct device *, void *, void *);
50 void	 pvclock_attach(struct device *, struct device *, void *);
51 int	 pvclock_activate(struct device *, int);
52 
53 uint	 pvclock_get_timecount(struct timecounter *);
54 void	 pvclock_read_time_info(struct pvclock_softc *,
55 	    struct pvclock_time_info *);
56 
57 static inline uint32_t
58 	 pvclock_read_begin(const struct pvclock_time_info *);
59 static inline int
60 	 pvclock_read_done(const struct pvclock_time_info *, uint32_t);
61 
62 const struct cfattach pvclock_ca = {
63 	sizeof(struct pvclock_softc),
64 	pvclock_match,
65 	pvclock_attach,
66 	NULL,
67 	pvclock_activate
68 };
69 
70 struct cfdriver pvclock_cd = {
71 	NULL,
72 	"pvclock",
73 	DV_DULL
74 };
75 
76 struct timecounter pvclock_timecounter = {
77 	.tc_get_timecount = pvclock_get_timecount,
78 	.tc_counter_mask = ~0u,
79 	.tc_frequency = 0,
80 	.tc_name = NULL,
81 	.tc_quality = -2000,
82 	.tc_priv = NULL,
83 	.tc_user = 0,
84 };
85 
86 int
87 pvclock_match(struct device *parent, void *match, void *aux)
88 {
89 	struct pv_attach_args	*pva = aux;
90 	struct pvbus_hv		*hv;
91 
92 	/*
93 	 * pvclock is provided by different hypervisors, we currently
94 	 * only support the "kvmclock".
95 	 */
96 	hv = &pva->pva_hv[PVBUS_KVM];
97 	if (hv->hv_base == 0)
98 		hv = &pva->pva_hv[PVBUS_OPENBSD];
99 	if (hv->hv_base != 0) {
100 		/*
101 		 * We only implement support for the 2nd version of pvclock.
102 		 * The first version is basically the same but with different
103 		 * non-standard MSRs and it is deprecated.
104 		 */
105 		if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
106 			return (0);
107 
108 		/*
109 		 * Only the "stable" clock with a sync'ed TSC is supported.
110 		 * In this case the host guarantees that the TSC is constant
111 		 * and invariant, either by the underlying TSC or by passing
112 		 * on a synchronized value.
113 		 */
114 		if ((hv->hv_features &
115 		    (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
116 			return (0);
117 
118 		return (1);
119 	}
120 
121 	return (0);
122 }
123 
124 void
125 pvclock_attach(struct device *parent, struct device *self, void *aux)
126 {
127 	struct pvclock_softc		*sc = (struct pvclock_softc *)self;
128 	struct pvclock_time_info	*ti;
129 	paddr_t			 	 pa;
130 	uint32_t			 version;
131 	uint8_t				 flags;
132 
133 	if ((sc->sc_time = km_alloc(PAGE_SIZE,
134 	    &kv_any, &kp_zero, &kd_nowait)) == NULL) {
135 		printf(": time page allocation failed\n");
136 		return;
137 	}
138 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
139 		printf(": time page PA extraction failed\n");
140 		km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
141 		return;
142 	}
143 
144 	wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
145 	sc->sc_paddr = pa;
146 
147 	ti = sc->sc_time;
148 	do {
149 		version = pvclock_read_begin(ti);
150 		flags = ti->ti_flags;
151 	} while (!pvclock_read_done(ti, version));
152 
153 	sc->sc_tc = &pvclock_timecounter;
154 	sc->sc_tc->tc_name = DEVNAME(sc);
155 	sc->sc_tc->tc_frequency = 1000000000ULL;
156 	sc->sc_tc->tc_priv = sc;
157 
158 	pvclock_lastcount = 0;
159 
160 	/* Better than HPET but below TSC */
161 	sc->sc_tc->tc_quality = 1500;
162 
163 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
164 		/* if tsc is not stable, set a lower priority */
165 		/* Better than i8254 but below HPET */
166 		sc->sc_tc->tc_quality = 500;
167 	}
168 
169 	tc_init(sc->sc_tc);
170 
171 	printf("\n");
172 }
173 
174 int
175 pvclock_activate(struct device *self, int act)
176 {
177 	struct pvclock_softc	*sc = (struct pvclock_softc *)self;
178 	int			 rv = 0;
179 	paddr_t			 pa = sc->sc_paddr;
180 
181 	switch (act) {
182 	case DVACT_POWERDOWN:
183 		wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
184 		break;
185 	case DVACT_RESUME:
186 		wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
187 		break;
188 	}
189 
190 	return (rv);
191 }
192 
193 static inline uint32_t
194 pvclock_read_begin(const struct pvclock_time_info *ti)
195 {
196 	uint32_t version = ti->ti_version & ~0x1;
197 	virtio_membar_sync();
198 	return (version);
199 }
200 
201 static inline int
202 pvclock_read_done(const struct pvclock_time_info *ti,
203     uint32_t version)
204 {
205 	virtio_membar_sync();
206 	return (ti->ti_version == version);
207 }
208 
209 uint
210 pvclock_get_timecount(struct timecounter *tc)
211 {
212 	struct pvclock_softc		*sc = tc->tc_priv;
213 	struct pvclock_time_info	*ti;
214 	uint64_t			 tsc_timestamp, system_time, delta, ctr;
215 	uint32_t			 version, mul_frac;
216 	int8_t				 shift;
217 	uint8_t				 flags;
218 
219 	ti = sc->sc_time;
220 	do {
221 		version = pvclock_read_begin(ti);
222 		system_time = ti->ti_system_time;
223 		tsc_timestamp = ti->ti_tsc_timestamp;
224 		mul_frac = ti->ti_tsc_to_system_mul;
225 		shift = ti->ti_tsc_shift;
226 		flags = ti->ti_flags;
227 	} while (!pvclock_read_done(ti, version));
228 
229 	/*
230 	 * The algorithm is described in
231 	 * linux/Documentation/virtual/kvm/msr.txt
232 	 */
233 	delta = rdtsc() - tsc_timestamp;
234 	if (shift < 0)
235 		delta >>= -shift;
236 	else
237 		delta <<= shift;
238 	ctr = ((delta * mul_frac) >> 32) + system_time;
239 
240 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
241 		return (ctr);
242 
243 	if (ctr < pvclock_lastcount)
244 		return (pvclock_lastcount);
245 
246 	atomic_swap_uint(&pvclock_lastcount, ctr);
247 
248 	return (ctr);
249 }
250