1 /* $OpenBSD: pvclock.c,v 1.11 2024/05/24 10:05:55 jsg Exp $ */
2
3 /*
4 * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #if !defined(__i386__) && !defined(__amd64__)
20 #error pvclock(4) is only supported on i386 and amd64
21 #endif
22
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/timetc.h>
26
27 #include <machine/cpu.h>
28 #include <machine/atomic.h>
29 #include <uvm/uvm_extern.h>
30
31 #include <dev/pv/pvvar.h>
32 #include <dev/pv/pvreg.h>
33
34 uint pvclock_lastcount;
35
36 struct pvclock_softc {
37 struct device sc_dev;
38 void *sc_time;
39 paddr_t sc_paddr;
40 struct timecounter *sc_tc;
41 };
42
43 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
44
45 int pvclock_match(struct device *, void *, void *);
46 void pvclock_attach(struct device *, struct device *, void *);
47 int pvclock_activate(struct device *, int);
48
49 uint pvclock_get_timecount(struct timecounter *);
50
51 static inline uint32_t
52 pvclock_read_begin(const struct pvclock_time_info *);
53 static inline int
54 pvclock_read_done(const struct pvclock_time_info *, uint32_t);
55
56 const struct cfattach pvclock_ca = {
57 sizeof(struct pvclock_softc),
58 pvclock_match,
59 pvclock_attach,
60 NULL,
61 pvclock_activate
62 };
63
64 struct cfdriver pvclock_cd = {
65 NULL,
66 "pvclock",
67 DV_DULL
68 };
69
70 struct timecounter pvclock_timecounter = {
71 .tc_get_timecount = pvclock_get_timecount,
72 .tc_counter_mask = ~0u,
73 .tc_frequency = 0,
74 .tc_name = NULL,
75 .tc_quality = -2000,
76 .tc_priv = NULL,
77 .tc_user = 0,
78 };
79
80 int
pvclock_match(struct device * parent,void * match,void * aux)81 pvclock_match(struct device *parent, void *match, void *aux)
82 {
83 struct pv_attach_args *pva = aux;
84 struct pvbus_hv *hv;
85
86 /*
87 * pvclock is provided by different hypervisors, we currently
88 * only support the "kvmclock".
89 */
90 hv = &pva->pva_hv[PVBUS_KVM];
91 if (hv->hv_base == 0)
92 hv = &pva->pva_hv[PVBUS_OPENBSD];
93 if (hv->hv_base != 0) {
94 /*
95 * We only implement support for the 2nd version of pvclock.
96 * The first version is basically the same but with different
97 * non-standard MSRs and it is deprecated.
98 */
99 if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
100 return (0);
101
102 /*
103 * Only the "stable" clock with a sync'ed TSC is supported.
104 * In this case the host guarantees that the TSC is constant
105 * and invariant, either by the underlying TSC or by passing
106 * on a synchronized value.
107 */
108 if ((hv->hv_features &
109 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
110 return (0);
111
112 return (1);
113 }
114
115 return (0);
116 }
117
118 void
pvclock_attach(struct device * parent,struct device * self,void * aux)119 pvclock_attach(struct device *parent, struct device *self, void *aux)
120 {
121 struct pvclock_softc *sc = (struct pvclock_softc *)self;
122 struct pvclock_time_info *ti;
123 paddr_t pa;
124 uint32_t version;
125 uint8_t flags;
126
127 if ((sc->sc_time = km_alloc(PAGE_SIZE,
128 &kv_any, &kp_zero, &kd_nowait)) == NULL) {
129 printf(": time page allocation failed\n");
130 return;
131 }
132 if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
133 printf(": time page PA extraction failed\n");
134 km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
135 return;
136 }
137
138 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
139 sc->sc_paddr = pa;
140
141 ti = sc->sc_time;
142 do {
143 version = pvclock_read_begin(ti);
144 flags = ti->ti_flags;
145 } while (!pvclock_read_done(ti, version));
146
147 sc->sc_tc = &pvclock_timecounter;
148 sc->sc_tc->tc_name = DEVNAME(sc);
149 sc->sc_tc->tc_frequency = 1000000000ULL;
150 sc->sc_tc->tc_priv = sc;
151
152 pvclock_lastcount = 0;
153
154 /* Better than HPET but below TSC */
155 sc->sc_tc->tc_quality = 1500;
156
157 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
158 /* if tsc is not stable, set a lower priority */
159 /* Better than i8254 but below HPET */
160 sc->sc_tc->tc_quality = 500;
161 }
162
163 tc_init(sc->sc_tc);
164
165 printf("\n");
166 }
167
168 int
pvclock_activate(struct device * self,int act)169 pvclock_activate(struct device *self, int act)
170 {
171 struct pvclock_softc *sc = (struct pvclock_softc *)self;
172 int rv = 0;
173 paddr_t pa = sc->sc_paddr;
174
175 switch (act) {
176 case DVACT_POWERDOWN:
177 wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
178 break;
179 case DVACT_RESUME:
180 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
181 break;
182 }
183
184 return (rv);
185 }
186
187 static inline uint32_t
pvclock_read_begin(const struct pvclock_time_info * ti)188 pvclock_read_begin(const struct pvclock_time_info *ti)
189 {
190 uint32_t version = ti->ti_version & ~0x1;
191 virtio_membar_sync();
192 return (version);
193 }
194
195 static inline int
pvclock_read_done(const struct pvclock_time_info * ti,uint32_t version)196 pvclock_read_done(const struct pvclock_time_info *ti,
197 uint32_t version)
198 {
199 virtio_membar_sync();
200 return (ti->ti_version == version);
201 }
202
203 uint
pvclock_get_timecount(struct timecounter * tc)204 pvclock_get_timecount(struct timecounter *tc)
205 {
206 struct pvclock_softc *sc = tc->tc_priv;
207 struct pvclock_time_info *ti;
208 uint64_t tsc_timestamp, system_time, delta, ctr;
209 uint32_t version, mul_frac;
210 int8_t shift;
211 uint8_t flags;
212
213 ti = sc->sc_time;
214 do {
215 version = pvclock_read_begin(ti);
216 system_time = ti->ti_system_time;
217 tsc_timestamp = ti->ti_tsc_timestamp;
218 mul_frac = ti->ti_tsc_to_system_mul;
219 shift = ti->ti_tsc_shift;
220 flags = ti->ti_flags;
221 } while (!pvclock_read_done(ti, version));
222
223 /*
224 * The algorithm is described in
225 * linux/Documentation/virtual/kvm/msr.txt
226 */
227 delta = rdtsc() - tsc_timestamp;
228 if (shift < 0)
229 delta >>= -shift;
230 else
231 delta <<= shift;
232 ctr = ((delta * mul_frac) >> 32) + system_time;
233
234 if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
235 return (ctr);
236
237 if (ctr < pvclock_lastcount)
238 return (pvclock_lastcount);
239
240 atomic_swap_uint(&pvclock_lastcount, ctr);
241
242 return (ctr);
243 }
244