xref: /openbsd-src/sys/dev/pv/pvclock.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: pvclock.c,v 1.7 2021/02/23 04:44:31 cheloha Exp $	*/
2 
3 /*
4  * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #if !defined(__i386__) && !defined(__amd64__)
20 #error pvclock(4) is only supported on i386 and amd64
21 #endif
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/kernel.h>
26 #include <sys/timetc.h>
27 #include <sys/timeout.h>
28 #include <sys/malloc.h>
29 #include <sys/atomic.h>
30 
31 #include <machine/cpu.h>
32 #include <machine/atomic.h>
33 #include <uvm/uvm_extern.h>
34 
35 #include <dev/pv/pvvar.h>
36 #include <dev/pv/pvreg.h>
37 
38 uint pvclock_lastcount;
39 
40 struct pvclock_softc {
41 	struct device		 sc_dev;
42 	void			*sc_time;
43 	paddr_t			 sc_paddr;
44 	struct timecounter	*sc_tc;
45 };
46 
47 #define DEVNAME(_s)			((_s)->sc_dev.dv_xname)
48 
49 int	 pvclock_match(struct device *, void *, void *);
50 void	 pvclock_attach(struct device *, struct device *, void *);
51 int	 pvclock_activate(struct device *, int);
52 
53 uint	 pvclock_get_timecount(struct timecounter *);
54 void	 pvclock_read_time_info(struct pvclock_softc *,
55 	    struct pvclock_time_info *);
56 
57 static inline uint32_t
58 	 pvclock_read_begin(const struct pvclock_time_info *);
59 static inline int
60 	 pvclock_read_done(const struct pvclock_time_info *, uint32_t);
61 
62 struct cfattach pvclock_ca = {
63 	sizeof(struct pvclock_softc),
64 	pvclock_match,
65 	pvclock_attach,
66 	NULL,
67 	pvclock_activate
68 };
69 
70 struct cfdriver pvclock_cd = {
71 	NULL,
72 	"pvclock",
73 	DV_DULL
74 };
75 
76 struct timecounter pvclock_timecounter = {
77 	.tc_get_timecount = pvclock_get_timecount,
78 	.tc_poll_pps = NULL,
79 	.tc_counter_mask = ~0u,
80 	.tc_frequency = 0,
81 	.tc_name = NULL,
82 	.tc_quality = -2000,
83 	.tc_priv = NULL,
84 	.tc_user = 0,
85 };
86 
87 int
88 pvclock_match(struct device *parent, void *match, void *aux)
89 {
90 	struct pv_attach_args	*pva = aux;
91 	struct pvbus_hv		*hv;
92 
93 	/*
94 	 * pvclock is provided by different hypervisors, we currently
95 	 * only support the "kvmclock".
96 	 */
97 	hv = &pva->pva_hv[PVBUS_KVM];
98 	if (hv->hv_base == 0)
99 		hv = &pva->pva_hv[PVBUS_OPENBSD];
100 	if (hv->hv_base != 0) {
101 		/*
102 		 * We only implement support for the 2nd version of pvclock.
103 		 * The first version is basically the same but with different
104 		 * non-standard MSRs and it is deprecated.
105 		 */
106 		if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
107 			return (0);
108 
109 		/*
110 		 * Only the "stable" clock with a sync'ed TSC is supported.
111 		 * In this case the host guarantees that the TSC is constant
112 		 * and invariant, either by the underlying TSC or by passing
113 		 * on a synchronized value.
114 		 */
115 		if ((hv->hv_features &
116 		    (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
117 			return (0);
118 
119 		return (1);
120 	}
121 
122 	return (0);
123 }
124 
125 void
126 pvclock_attach(struct device *parent, struct device *self, void *aux)
127 {
128 	struct pvclock_softc		*sc = (struct pvclock_softc *)self;
129 	struct pvclock_time_info	*ti;
130 	paddr_t			 	 pa;
131 	uint32_t			 version;
132 	uint8_t				 flags;
133 
134 	if ((sc->sc_time = km_alloc(PAGE_SIZE,
135 	    &kv_any, &kp_zero, &kd_nowait)) == NULL) {
136 		printf(": time page allocation failed\n");
137 		return;
138 	}
139 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
140 		printf(": time page PA extraction failed\n");
141 		km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
142 		return;
143 	}
144 
145 	wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
146 	sc->sc_paddr = pa;
147 
148 	ti = sc->sc_time;
149 	do {
150 		version = pvclock_read_begin(ti);
151 		flags = ti->ti_flags;
152 	} while (!pvclock_read_done(ti, version));
153 
154 	sc->sc_tc = &pvclock_timecounter;
155 	sc->sc_tc->tc_name = DEVNAME(sc);
156 	sc->sc_tc->tc_frequency = 1000000000ULL;
157 	sc->sc_tc->tc_priv = sc;
158 
159 	pvclock_lastcount = 0;
160 
161 	/* Better than HPET but below TSC */
162 	sc->sc_tc->tc_quality = 1500;
163 
164 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
165 		/* if tsc is not stable, set a lower priority */
166 		/* Better than i8254 but below HPET */
167 		sc->sc_tc->tc_quality = 500;
168 	}
169 
170 	tc_init(sc->sc_tc);
171 
172 	printf("\n");
173 }
174 
175 int
176 pvclock_activate(struct device *self, int act)
177 {
178 	struct pvclock_softc	*sc = (struct pvclock_softc *)self;
179 	int			 rv = 0;
180 	paddr_t			 pa = sc->sc_paddr;
181 
182 	switch (act) {
183 	case DVACT_POWERDOWN:
184 		wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
185 		break;
186 	case DVACT_RESUME:
187 		wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
188 		break;
189 	}
190 
191 	return (rv);
192 }
193 
194 static inline uint32_t
195 pvclock_read_begin(const struct pvclock_time_info *ti)
196 {
197 	uint32_t version = ti->ti_version & ~0x1;
198 	virtio_membar_sync();
199 	return (version);
200 }
201 
202 static inline int
203 pvclock_read_done(const struct pvclock_time_info *ti,
204     uint32_t version)
205 {
206 	virtio_membar_sync();
207 	return (ti->ti_version == version);
208 }
209 
210 uint
211 pvclock_get_timecount(struct timecounter *tc)
212 {
213 	struct pvclock_softc		*sc = tc->tc_priv;
214 	struct pvclock_time_info	*ti;
215 	uint64_t			 tsc_timestamp, system_time, delta, ctr;
216 	uint32_t			 version, mul_frac;
217 	int8_t				 shift;
218 	uint8_t				 flags;
219 
220 	ti = sc->sc_time;
221 	do {
222 		version = pvclock_read_begin(ti);
223 		system_time = ti->ti_system_time;
224 		tsc_timestamp = ti->ti_tsc_timestamp;
225 		mul_frac = ti->ti_tsc_to_system_mul;
226 		shift = ti->ti_tsc_shift;
227 		flags = ti->ti_flags;
228 	} while (!pvclock_read_done(ti, version));
229 
230 	/*
231 	 * The algorithm is described in
232 	 * linux/Documentation/virtual/kvm/msr.txt
233 	 */
234 	delta = rdtsc() - tsc_timestamp;
235 	if (shift < 0)
236 		delta >>= -shift;
237 	else
238 		delta <<= shift;
239 	ctr = ((delta * mul_frac) >> 32) + system_time;
240 
241 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
242 		return (ctr);
243 
244 	if (ctr < pvclock_lastcount)
245 		return (pvclock_lastcount);
246 
247 	atomic_swap_uint(&pvclock_lastcount, ctr);
248 
249 	return (ctr);
250 }
251