1 /* $OpenBSD: pvclock.c,v 1.3 2018/12/05 18:02:51 reyk Exp $ */ 2 3 /* 4 * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #if !defined(__i386__) && !defined(__amd64__) 20 #error pvclock(4) is only supported on i386 and amd64 21 #endif 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/kernel.h> 26 #include <sys/timetc.h> 27 #include <sys/timeout.h> 28 #include <sys/malloc.h> 29 #include <sys/atomic.h> 30 31 #include <machine/cpu.h> 32 #include <uvm/uvm_extern.h> 33 34 #include <dev/pv/pvvar.h> 35 #include <dev/pv/pvreg.h> 36 37 struct pvclock_softc { 38 struct device sc_dev; 39 void *sc_time; 40 paddr_t sc_paddr; 41 struct timecounter *sc_tc; 42 }; 43 44 struct pvclock_wall_clock { 45 uint32_t wc_version; 46 uint32_t wc_sec; 47 uint32_t wc_nsec; 48 } __packed; 49 50 struct pvclock_time_info { 51 uint32_t ti_version; 52 uint32_t ti_pad0; 53 uint64_t ti_tsc_timestamp; 54 uint64_t ti_system_time; 55 uint32_t ti_tsc_to_system_mul; 56 int8_t ti_tsc_shift; 57 uint8_t ti_flags; 58 uint8_t ti_pad[2]; 59 } __packed; 60 61 #define PVCLOCK_FLAG_TSC_STABLE 0x01 62 #define PVCLOCK_SYSTEM_TIME_ENABLE 0x01 63 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 64 65 int pvclock_match(struct device *, void *, void *); 66 void pvclock_attach(struct device *, struct device *, void *); 67 int pvclock_activate(struct device *, int); 68 69 uint pvclock_get_timecount(struct timecounter *); 70 void pvclock_read_time_info(struct pvclock_softc *, 71 struct pvclock_time_info *); 72 73 static inline uint32_t 74 pvclock_read_begin(const struct pvclock_time_info *); 75 static inline int 76 pvclock_read_done(const struct pvclock_time_info *, uint32_t); 77 78 struct cfattach pvclock_ca = { 79 sizeof(struct pvclock_softc), 80 pvclock_match, 81 pvclock_attach, 82 NULL, 83 pvclock_activate 84 }; 85 86 struct cfdriver pvclock_cd = { 87 NULL, 88 "pvclock", 89 DV_DULL 90 }; 91 92 struct timecounter pvclock_timecounter = { 93 pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL 94 }; 95 96 int 97 pvclock_match(struct device *parent, void *match, void *aux) 98 { 99 struct pv_attach_args *pva = aux; 100 struct pvbus_hv *hv; 101 102 /* 103 * pvclock is provided by different hypervisors, we currently 104 * only support the "kvmclock". 105 */ 106 hv = &pva->pva_hv[PVBUS_KVM]; 107 if (hv->hv_base != 0) { 108 /* 109 * We only implement support for the 2nd version of pvclock. 110 * The first version is basically the same but with different 111 * non-standard MSRs and it is deprecated. 112 */ 113 if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0) 114 return (0); 115 116 /* 117 * Only the "stable" clock with a sync'ed TSC is supported. 118 * In this case the host guarantees that the TSC is constant 119 * and invariant, either by the underlying TSC or by passing 120 * on a synchronized value. 121 */ 122 if ((hv->hv_features & 123 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0) 124 return (0); 125 126 return (1); 127 } 128 129 return (0); 130 } 131 132 void 133 pvclock_attach(struct device *parent, struct device *self, void *aux) 134 { 135 struct pvclock_softc *sc = (struct pvclock_softc *)self; 136 struct pvclock_time_info *ti; 137 paddr_t pa; 138 uint32_t version; 139 uint8_t flags; 140 141 if ((sc->sc_time = km_alloc(PAGE_SIZE, 142 &kv_any, &kp_zero, &kd_nowait)) == NULL) { 143 printf(": time page allocation failed\n"); 144 return; 145 } 146 if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) { 147 printf(": time page PA extraction failed\n"); 148 km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero); 149 return; 150 } 151 152 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); 153 sc->sc_paddr = pa; 154 155 ti = sc->sc_time; 156 do { 157 version = pvclock_read_begin(ti); 158 flags = ti->ti_flags; 159 } while (!pvclock_read_done(ti, version)); 160 161 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) { 162 wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE); 163 km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero); 164 printf(": unstable clock\n"); 165 return; 166 } 167 168 sc->sc_tc = &pvclock_timecounter; 169 sc->sc_tc->tc_name = DEVNAME(sc); 170 sc->sc_tc->tc_frequency = 1000000000ULL; 171 sc->sc_tc->tc_priv = sc; 172 173 /* Better than HPET but below TSC */ 174 sc->sc_tc->tc_quality = 1500; 175 176 tc_init(sc->sc_tc); 177 178 printf("\n"); 179 } 180 181 int 182 pvclock_activate(struct device *self, int act) 183 { 184 struct pvclock_softc *sc = (struct pvclock_softc *)self; 185 int rv = 0; 186 paddr_t pa = sc->sc_paddr; 187 188 switch (act) { 189 case DVACT_POWERDOWN: 190 wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE); 191 break; 192 case DVACT_RESUME: 193 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); 194 break; 195 } 196 197 return (rv); 198 } 199 200 static inline uint32_t 201 pvclock_read_begin(const struct pvclock_time_info *ti) 202 { 203 uint32_t version = ti->ti_version & ~0x1; 204 virtio_membar_sync(); 205 return (version); 206 } 207 208 static inline int 209 pvclock_read_done(const struct pvclock_time_info *ti, 210 uint32_t version) 211 { 212 virtio_membar_sync(); 213 return (ti->ti_version == version); 214 } 215 216 uint 217 pvclock_get_timecount(struct timecounter *tc) 218 { 219 struct pvclock_softc *sc = tc->tc_priv; 220 struct pvclock_time_info *ti; 221 uint64_t tsc_timestamp, system_time, delta, ctr; 222 uint32_t version, mul_frac; 223 int8_t shift; 224 uint8_t flags; 225 226 ti = sc->sc_time; 227 do { 228 version = pvclock_read_begin(ti); 229 system_time = ti->ti_system_time; 230 tsc_timestamp = ti->ti_tsc_timestamp; 231 mul_frac = ti->ti_tsc_to_system_mul; 232 shift = ti->ti_tsc_shift; 233 flags = ti->ti_flags; 234 } while (!pvclock_read_done(ti, version)); 235 236 /* This bit must be set as we attached based on the stable flag */ 237 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) 238 panic("%s: unstable result on stable clock", DEVNAME(sc)); 239 240 /* 241 * The algorithm is described in 242 * linux/Documentation/virtual/kvm/msr.txt 243 */ 244 delta = rdtsc() - tsc_timestamp; 245 if (shift < 0) 246 delta >>= -shift; 247 else 248 delta <<= shift; 249 ctr = ((delta * mul_frac) >> 32) + system_time; 250 251 return (ctr); 252 } 253