1 /* $OpenBSD: pvclock.c,v 1.6 2020/07/06 13:33:09 pirofti Exp $ */ 2 3 /* 4 * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #if !defined(__i386__) && !defined(__amd64__) 20 #error pvclock(4) is only supported on i386 and amd64 21 #endif 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/kernel.h> 26 #include <sys/timetc.h> 27 #include <sys/timeout.h> 28 #include <sys/malloc.h> 29 #include <sys/atomic.h> 30 31 #include <machine/cpu.h> 32 #include <machine/atomic.h> 33 #include <uvm/uvm_extern.h> 34 35 #include <dev/pv/pvvar.h> 36 #include <dev/pv/pvreg.h> 37 38 uint pvclock_lastcount; 39 40 struct pvclock_softc { 41 struct device sc_dev; 42 void *sc_time; 43 paddr_t sc_paddr; 44 struct timecounter *sc_tc; 45 }; 46 47 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 48 49 int pvclock_match(struct device *, void *, void *); 50 void pvclock_attach(struct device *, struct device *, void *); 51 int pvclock_activate(struct device *, int); 52 53 uint pvclock_get_timecount(struct timecounter *); 54 void pvclock_read_time_info(struct pvclock_softc *, 55 struct pvclock_time_info *); 56 57 static inline uint32_t 58 pvclock_read_begin(const struct pvclock_time_info *); 59 static inline int 60 pvclock_read_done(const struct pvclock_time_info *, uint32_t); 61 62 struct cfattach pvclock_ca = { 63 sizeof(struct pvclock_softc), 64 pvclock_match, 65 pvclock_attach, 66 NULL, 67 pvclock_activate 68 }; 69 70 struct cfdriver pvclock_cd = { 71 NULL, 72 "pvclock", 73 DV_DULL 74 }; 75 76 struct timecounter pvclock_timecounter = { 77 pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL, 0 78 }; 79 80 int 81 pvclock_match(struct device *parent, void *match, void *aux) 82 { 83 struct pv_attach_args *pva = aux; 84 struct pvbus_hv *hv; 85 86 /* 87 * pvclock is provided by different hypervisors, we currently 88 * only support the "kvmclock". 89 */ 90 hv = &pva->pva_hv[PVBUS_KVM]; 91 if (hv->hv_base == 0) 92 hv = &pva->pva_hv[PVBUS_OPENBSD]; 93 if (hv->hv_base != 0) { 94 /* 95 * We only implement support for the 2nd version of pvclock. 96 * The first version is basically the same but with different 97 * non-standard MSRs and it is deprecated. 98 */ 99 if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0) 100 return (0); 101 102 /* 103 * Only the "stable" clock with a sync'ed TSC is supported. 104 * In this case the host guarantees that the TSC is constant 105 * and invariant, either by the underlying TSC or by passing 106 * on a synchronized value. 107 */ 108 if ((hv->hv_features & 109 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0) 110 return (0); 111 112 return (1); 113 } 114 115 return (0); 116 } 117 118 void 119 pvclock_attach(struct device *parent, struct device *self, void *aux) 120 { 121 struct pvclock_softc *sc = (struct pvclock_softc *)self; 122 struct pvclock_time_info *ti; 123 paddr_t pa; 124 uint32_t version; 125 uint8_t flags; 126 127 if ((sc->sc_time = km_alloc(PAGE_SIZE, 128 &kv_any, &kp_zero, &kd_nowait)) == NULL) { 129 printf(": time page allocation failed\n"); 130 return; 131 } 132 if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) { 133 printf(": time page PA extraction failed\n"); 134 km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero); 135 return; 136 } 137 138 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); 139 sc->sc_paddr = pa; 140 141 ti = sc->sc_time; 142 do { 143 version = pvclock_read_begin(ti); 144 flags = ti->ti_flags; 145 } while (!pvclock_read_done(ti, version)); 146 147 sc->sc_tc = &pvclock_timecounter; 148 sc->sc_tc->tc_name = DEVNAME(sc); 149 sc->sc_tc->tc_frequency = 1000000000ULL; 150 sc->sc_tc->tc_priv = sc; 151 152 pvclock_lastcount = 0; 153 154 /* Better than HPET but below TSC */ 155 sc->sc_tc->tc_quality = 1500; 156 157 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) { 158 /* if tsc is not stable, set a lower priority */ 159 /* Better than i8254 but below HPET */ 160 sc->sc_tc->tc_quality = 500; 161 } 162 163 tc_init(sc->sc_tc); 164 165 printf("\n"); 166 } 167 168 int 169 pvclock_activate(struct device *self, int act) 170 { 171 struct pvclock_softc *sc = (struct pvclock_softc *)self; 172 int rv = 0; 173 paddr_t pa = sc->sc_paddr; 174 175 switch (act) { 176 case DVACT_POWERDOWN: 177 wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE); 178 break; 179 case DVACT_RESUME: 180 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); 181 break; 182 } 183 184 return (rv); 185 } 186 187 static inline uint32_t 188 pvclock_read_begin(const struct pvclock_time_info *ti) 189 { 190 uint32_t version = ti->ti_version & ~0x1; 191 virtio_membar_sync(); 192 return (version); 193 } 194 195 static inline int 196 pvclock_read_done(const struct pvclock_time_info *ti, 197 uint32_t version) 198 { 199 virtio_membar_sync(); 200 return (ti->ti_version == version); 201 } 202 203 uint 204 pvclock_get_timecount(struct timecounter *tc) 205 { 206 struct pvclock_softc *sc = tc->tc_priv; 207 struct pvclock_time_info *ti; 208 uint64_t tsc_timestamp, system_time, delta, ctr; 209 uint32_t version, mul_frac; 210 int8_t shift; 211 uint8_t flags; 212 213 ti = sc->sc_time; 214 do { 215 version = pvclock_read_begin(ti); 216 system_time = ti->ti_system_time; 217 tsc_timestamp = ti->ti_tsc_timestamp; 218 mul_frac = ti->ti_tsc_to_system_mul; 219 shift = ti->ti_tsc_shift; 220 flags = ti->ti_flags; 221 } while (!pvclock_read_done(ti, version)); 222 223 /* 224 * The algorithm is described in 225 * linux/Documentation/virtual/kvm/msr.txt 226 */ 227 delta = rdtsc() - tsc_timestamp; 228 if (shift < 0) 229 delta >>= -shift; 230 else 231 delta <<= shift; 232 ctr = ((delta * mul_frac) >> 32) + system_time; 233 234 if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0) 235 return (ctr); 236 237 if (ctr < pvclock_lastcount) 238 return (pvclock_lastcount); 239 240 atomic_swap_uint(&pvclock_lastcount, ctr); 241 242 return (ctr); 243 } 244