xref: /openbsd-src/sys/dev/pv/pvclock.c (revision b4155af8a082e39401ba2bf7415523987f776dcf)
1 /*	$OpenBSD: pvclock.c,v 1.11 2024/05/24 10:05:55 jsg Exp $	*/
2 
3 /*
4  * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #if !defined(__i386__) && !defined(__amd64__)
20 #error pvclock(4) is only supported on i386 and amd64
21 #endif
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/timetc.h>
26 
27 #include <machine/cpu.h>
28 #include <machine/atomic.h>
29 #include <uvm/uvm_extern.h>
30 
31 #include <dev/pv/pvvar.h>
32 #include <dev/pv/pvreg.h>
33 
34 uint pvclock_lastcount;
35 
36 struct pvclock_softc {
37 	struct device		 sc_dev;
38 	void			*sc_time;
39 	paddr_t			 sc_paddr;
40 	struct timecounter	*sc_tc;
41 };
42 
43 #define DEVNAME(_s)			((_s)->sc_dev.dv_xname)
44 
45 int	 pvclock_match(struct device *, void *, void *);
46 void	 pvclock_attach(struct device *, struct device *, void *);
47 int	 pvclock_activate(struct device *, int);
48 
49 uint	 pvclock_get_timecount(struct timecounter *);
50 
51 static inline uint32_t
52 	 pvclock_read_begin(const struct pvclock_time_info *);
53 static inline int
54 	 pvclock_read_done(const struct pvclock_time_info *, uint32_t);
55 
56 const struct cfattach pvclock_ca = {
57 	sizeof(struct pvclock_softc),
58 	pvclock_match,
59 	pvclock_attach,
60 	NULL,
61 	pvclock_activate
62 };
63 
64 struct cfdriver pvclock_cd = {
65 	NULL,
66 	"pvclock",
67 	DV_DULL
68 };
69 
70 struct timecounter pvclock_timecounter = {
71 	.tc_get_timecount = pvclock_get_timecount,
72 	.tc_counter_mask = ~0u,
73 	.tc_frequency = 0,
74 	.tc_name = NULL,
75 	.tc_quality = -2000,
76 	.tc_priv = NULL,
77 	.tc_user = 0,
78 };
79 
80 int
pvclock_match(struct device * parent,void * match,void * aux)81 pvclock_match(struct device *parent, void *match, void *aux)
82 {
83 	struct pv_attach_args	*pva = aux;
84 	struct pvbus_hv		*hv;
85 
86 	/*
87 	 * pvclock is provided by different hypervisors, we currently
88 	 * only support the "kvmclock".
89 	 */
90 	hv = &pva->pva_hv[PVBUS_KVM];
91 	if (hv->hv_base == 0)
92 		hv = &pva->pva_hv[PVBUS_OPENBSD];
93 	if (hv->hv_base != 0) {
94 		/*
95 		 * We only implement support for the 2nd version of pvclock.
96 		 * The first version is basically the same but with different
97 		 * non-standard MSRs and it is deprecated.
98 		 */
99 		if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
100 			return (0);
101 
102 		/*
103 		 * Only the "stable" clock with a sync'ed TSC is supported.
104 		 * In this case the host guarantees that the TSC is constant
105 		 * and invariant, either by the underlying TSC or by passing
106 		 * on a synchronized value.
107 		 */
108 		if ((hv->hv_features &
109 		    (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
110 			return (0);
111 
112 		return (1);
113 	}
114 
115 	return (0);
116 }
117 
118 void
pvclock_attach(struct device * parent,struct device * self,void * aux)119 pvclock_attach(struct device *parent, struct device *self, void *aux)
120 {
121 	struct pvclock_softc		*sc = (struct pvclock_softc *)self;
122 	struct pvclock_time_info	*ti;
123 	paddr_t			 	 pa;
124 	uint32_t			 version;
125 	uint8_t				 flags;
126 
127 	if ((sc->sc_time = km_alloc(PAGE_SIZE,
128 	    &kv_any, &kp_zero, &kd_nowait)) == NULL) {
129 		printf(": time page allocation failed\n");
130 		return;
131 	}
132 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
133 		printf(": time page PA extraction failed\n");
134 		km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
135 		return;
136 	}
137 
138 	wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
139 	sc->sc_paddr = pa;
140 
141 	ti = sc->sc_time;
142 	do {
143 		version = pvclock_read_begin(ti);
144 		flags = ti->ti_flags;
145 	} while (!pvclock_read_done(ti, version));
146 
147 	sc->sc_tc = &pvclock_timecounter;
148 	sc->sc_tc->tc_name = DEVNAME(sc);
149 	sc->sc_tc->tc_frequency = 1000000000ULL;
150 	sc->sc_tc->tc_priv = sc;
151 
152 	pvclock_lastcount = 0;
153 
154 	/* Better than HPET but below TSC */
155 	sc->sc_tc->tc_quality = 1500;
156 
157 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
158 		/* if tsc is not stable, set a lower priority */
159 		/* Better than i8254 but below HPET */
160 		sc->sc_tc->tc_quality = 500;
161 	}
162 
163 	tc_init(sc->sc_tc);
164 
165 	printf("\n");
166 }
167 
168 int
pvclock_activate(struct device * self,int act)169 pvclock_activate(struct device *self, int act)
170 {
171 	struct pvclock_softc	*sc = (struct pvclock_softc *)self;
172 	int			 rv = 0;
173 	paddr_t			 pa = sc->sc_paddr;
174 
175 	switch (act) {
176 	case DVACT_POWERDOWN:
177 		wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
178 		break;
179 	case DVACT_RESUME:
180 		wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
181 		break;
182 	}
183 
184 	return (rv);
185 }
186 
187 static inline uint32_t
pvclock_read_begin(const struct pvclock_time_info * ti)188 pvclock_read_begin(const struct pvclock_time_info *ti)
189 {
190 	uint32_t version = ti->ti_version & ~0x1;
191 	virtio_membar_sync();
192 	return (version);
193 }
194 
195 static inline int
pvclock_read_done(const struct pvclock_time_info * ti,uint32_t version)196 pvclock_read_done(const struct pvclock_time_info *ti,
197     uint32_t version)
198 {
199 	virtio_membar_sync();
200 	return (ti->ti_version == version);
201 }
202 
203 uint
pvclock_get_timecount(struct timecounter * tc)204 pvclock_get_timecount(struct timecounter *tc)
205 {
206 	struct pvclock_softc		*sc = tc->tc_priv;
207 	struct pvclock_time_info	*ti;
208 	uint64_t			 tsc_timestamp, system_time, delta, ctr;
209 	uint32_t			 version, mul_frac;
210 	int8_t				 shift;
211 	uint8_t				 flags;
212 
213 	ti = sc->sc_time;
214 	do {
215 		version = pvclock_read_begin(ti);
216 		system_time = ti->ti_system_time;
217 		tsc_timestamp = ti->ti_tsc_timestamp;
218 		mul_frac = ti->ti_tsc_to_system_mul;
219 		shift = ti->ti_tsc_shift;
220 		flags = ti->ti_flags;
221 	} while (!pvclock_read_done(ti, version));
222 
223 	/*
224 	 * The algorithm is described in
225 	 * linux/Documentation/virtual/kvm/msr.txt
226 	 */
227 	delta = rdtsc() - tsc_timestamp;
228 	if (shift < 0)
229 		delta >>= -shift;
230 	else
231 		delta <<= shift;
232 	ctr = ((delta * mul_frac) >> 32) + system_time;
233 
234 	if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
235 		return (ctr);
236 
237 	if (ctr < pvclock_lastcount)
238 		return (pvclock_lastcount);
239 
240 	atomic_swap_uint(&pvclock_lastcount, ctr);
241 
242 	return (ctr);
243 }
244