1f0bc4ed1SChristos Margiolis /* 2f0bc4ed1SChristos Margiolis * SPDX-License-Identifier: CDDL 1.0 3f0bc4ed1SChristos Margiolis * 49310bf54SChristos Margiolis * Copyright (c) 2022 Christos Margiolis <christos@FreeBSD.org> 59310bf54SChristos Margiolis * Copyright (c) 2023 The FreeBSD Foundation 69310bf54SChristos Margiolis * 79310bf54SChristos Margiolis * Portions of this software were developed by Christos Margiolis 89310bf54SChristos Margiolis * <christos@FreeBSD.org> under sponsorship from the FreeBSD Foundation. 9f0bc4ed1SChristos Margiolis */ 10f0bc4ed1SChristos Margiolis 11f0bc4ed1SChristos Margiolis #include <sys/param.h> 12f0bc4ed1SChristos Margiolis #include <sys/systm.h> 13f0bc4ed1SChristos Margiolis #include <sys/conf.h> 14f0bc4ed1SChristos Margiolis #include <sys/kernel.h> 15f0bc4ed1SChristos Margiolis #include <sys/linker.h> 16f0bc4ed1SChristos Margiolis #include <sys/module.h> 17f0bc4ed1SChristos Margiolis 18f0bc4ed1SChristos Margiolis #include <sys/dtrace.h> 19f0bc4ed1SChristos Margiolis 20f0bc4ed1SChristos Margiolis #include "kinst.h" 21f0bc4ed1SChristos Margiolis 22f0bc4ed1SChristos Margiolis MALLOC_DEFINE(M_KINST, "kinst", "Kernel Instruction Tracing"); 23f0bc4ed1SChristos Margiolis 24f0bc4ed1SChristos Margiolis static d_open_t kinst_open; 25f0bc4ed1SChristos Margiolis static d_close_t kinst_close; 26f0bc4ed1SChristos Margiolis static d_ioctl_t kinst_ioctl; 27f0bc4ed1SChristos Margiolis 28f0bc4ed1SChristos Margiolis static void kinst_provide_module(void *, modctl_t *); 29f0bc4ed1SChristos Margiolis static void kinst_getargdesc(void *, dtrace_id_t, void *, 30f0bc4ed1SChristos Margiolis dtrace_argdesc_t *); 31f0bc4ed1SChristos Margiolis static void kinst_destroy(void *, dtrace_id_t, void *); 32f0bc4ed1SChristos Margiolis static void kinst_enable(void *, dtrace_id_t, void *); 33f0bc4ed1SChristos Margiolis static void kinst_disable(void *, dtrace_id_t, void *); 34f0bc4ed1SChristos Margiolis static int kinst_load(void *); 35f0bc4ed1SChristos Margiolis static int kinst_unload(void *); 36f0bc4ed1SChristos Margiolis static int kinst_modevent(module_t, int, void *); 37f0bc4ed1SChristos Margiolis 38f0bc4ed1SChristos Margiolis static dtrace_pattr_t kinst_attr = { 39f0bc4ed1SChristos Margiolis { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 40f0bc4ed1SChristos Margiolis { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 41f0bc4ed1SChristos Margiolis { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 42f0bc4ed1SChristos Margiolis { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 43f0bc4ed1SChristos Margiolis { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 44f0bc4ed1SChristos Margiolis }; 45f0bc4ed1SChristos Margiolis 46778b7437SMark Johnston static const dtrace_pops_t kinst_pops = { 47f0bc4ed1SChristos Margiolis .dtps_provide = NULL, 48f0bc4ed1SChristos Margiolis .dtps_provide_module = kinst_provide_module, 49f0bc4ed1SChristos Margiolis .dtps_enable = kinst_enable, 50f0bc4ed1SChristos Margiolis .dtps_disable = kinst_disable, 51f0bc4ed1SChristos Margiolis .dtps_suspend = NULL, 52f0bc4ed1SChristos Margiolis .dtps_resume = NULL, 53f0bc4ed1SChristos Margiolis .dtps_getargdesc = kinst_getargdesc, 54f0bc4ed1SChristos Margiolis .dtps_getargval = NULL, 55f0bc4ed1SChristos Margiolis .dtps_usermode = NULL, 56f0bc4ed1SChristos Margiolis .dtps_destroy = kinst_destroy 57f0bc4ed1SChristos Margiolis }; 58f0bc4ed1SChristos Margiolis 59f0bc4ed1SChristos Margiolis static struct cdevsw kinst_cdevsw = { 60f0bc4ed1SChristos Margiolis .d_name = "kinst", 61f0bc4ed1SChristos Margiolis .d_version = D_VERSION, 62f0bc4ed1SChristos Margiolis .d_flags = D_TRACKCLOSE, 63f0bc4ed1SChristos Margiolis .d_open = kinst_open, 64f0bc4ed1SChristos Margiolis .d_close = kinst_close, 65f0bc4ed1SChristos Margiolis .d_ioctl = kinst_ioctl, 66f0bc4ed1SChristos Margiolis }; 67f0bc4ed1SChristos Margiolis 68f0bc4ed1SChristos Margiolis static dtrace_provider_id_t kinst_id; 69f0bc4ed1SChristos Margiolis struct kinst_probe_list *kinst_probetab; 70f0bc4ed1SChristos Margiolis static struct cdev *kinst_cdev; 71f0bc4ed1SChristos Margiolis 725c134fbaSChristos Margiolis /* 735c134fbaSChristos Margiolis * Tracing memcpy() will crash the kernel when kinst tries to trace an instance 745c134fbaSChristos Margiolis * of the memcpy() calls in kinst_invop(). To fix this, we can use 755c134fbaSChristos Margiolis * kinst_memcpy() in those cases, with its arguments marked as 'volatile' to 765c134fbaSChristos Margiolis * "outsmart" the compiler and avoid having it replaced by a regular memcpy(). 775c134fbaSChristos Margiolis */ 785c134fbaSChristos Margiolis volatile void * 795c134fbaSChristos Margiolis kinst_memcpy(volatile void *dst, volatile const void *src, size_t len) 805c134fbaSChristos Margiolis { 815c134fbaSChristos Margiolis volatile const unsigned char *src0; 825c134fbaSChristos Margiolis volatile unsigned char *dst0; 835c134fbaSChristos Margiolis 845c134fbaSChristos Margiolis src0 = src; 855c134fbaSChristos Margiolis dst0 = dst; 865c134fbaSChristos Margiolis 875c134fbaSChristos Margiolis while (len--) 885c134fbaSChristos Margiolis *dst0++ = *src0++; 895c134fbaSChristos Margiolis 905c134fbaSChristos Margiolis return (dst); 915c134fbaSChristos Margiolis } 925c134fbaSChristos Margiolis 93d434607bSChristos Margiolis bool 949c80ad68SChristos Margiolis kinst_excluded(const char *name) 959c80ad68SChristos Margiolis { 969c80ad68SChristos Margiolis if (kinst_md_excluded(name)) 97d434607bSChristos Margiolis return (true); 989c80ad68SChristos Margiolis 999c80ad68SChristos Margiolis /* 100eb1413c9SChristos Margiolis * cpu_switch() can cause a crash if it modifies the value of curthread 101eb1413c9SChristos Margiolis * while in probe context. 102eb1413c9SChristos Margiolis */ 103eb1413c9SChristos Margiolis if (strcmp(name, "cpu_switch") == 0) 104eb1413c9SChristos Margiolis return (true); 105eb1413c9SChristos Margiolis 106eb1413c9SChristos Margiolis /* 1079c80ad68SChristos Margiolis * Anything beginning with "dtrace_" may be called from probe context 1089c80ad68SChristos Margiolis * unless it explicitly indicates that it won't be called from probe 1099c80ad68SChristos Margiolis * context by using the prefix "dtrace_safe_". 1109c80ad68SChristos Margiolis */ 1119c80ad68SChristos Margiolis if (strncmp(name, "dtrace_", strlen("dtrace_")) == 0 && 1129c80ad68SChristos Margiolis strncmp(name, "dtrace_safe_", strlen("dtrace_safe_")) != 0) 113d434607bSChristos Margiolis return (true); 1149c80ad68SChristos Margiolis 1159c80ad68SChristos Margiolis /* 1169c80ad68SChristos Margiolis * Omit instrumentation of functions that are probably in DDB. It 1179c80ad68SChristos Margiolis * makes it too hard to debug broken kinst. 1189c80ad68SChristos Margiolis * 1199c80ad68SChristos Margiolis * NB: kdb_enter() can be excluded, but its call to printf() can't be. 1209c80ad68SChristos Margiolis * This is generally OK since we're not yet in debugging context. 1219c80ad68SChristos Margiolis */ 1229c80ad68SChristos Margiolis if (strncmp(name, "db_", strlen("db_")) == 0 || 1239c80ad68SChristos Margiolis strncmp(name, "kdb_", strlen("kdb_")) == 0) 124d434607bSChristos Margiolis return (true); 1259c80ad68SChristos Margiolis 1269c80ad68SChristos Margiolis /* 1279c80ad68SChristos Margiolis * Lock owner methods may be called from probe context. 1289c80ad68SChristos Margiolis */ 1299c80ad68SChristos Margiolis if (strcmp(name, "owner_mtx") == 0 || 1309c80ad68SChristos Margiolis strcmp(name, "owner_rm") == 0 || 1319c80ad68SChristos Margiolis strcmp(name, "owner_rw") == 0 || 1329c80ad68SChristos Margiolis strcmp(name, "owner_sx") == 0) 133d434607bSChristos Margiolis return (true); 1349c80ad68SChristos Margiolis 1359c80ad68SChristos Margiolis /* 136*fdeb273dSMark Johnston * The KMSAN runtime can't be instrumented safely. 137*fdeb273dSMark Johnston */ 138*fdeb273dSMark Johnston if (strncmp(name, "__msan", 6) == 0 || 139*fdeb273dSMark Johnston strncmp(name, "kmsan_", 6) == 0) 140*fdeb273dSMark Johnston return (1); 141*fdeb273dSMark Johnston 142*fdeb273dSMark Johnston /* 1439c80ad68SChristos Margiolis * When DTrace is built into the kernel we need to exclude the kinst 1449c80ad68SChristos Margiolis * functions from instrumentation. 1459c80ad68SChristos Margiolis */ 1469c80ad68SChristos Margiolis #ifndef _KLD_MODULE 1479c80ad68SChristos Margiolis if (strncmp(name, "kinst_", strlen("kinst_")) == 0) 148d434607bSChristos Margiolis return (true); 1499c80ad68SChristos Margiolis #endif 1509c80ad68SChristos Margiolis 1519c80ad68SChristos Margiolis if (strcmp(name, "trap_check") == 0) 152d434607bSChristos Margiolis return (true); 1539c80ad68SChristos Margiolis 154d434607bSChristos Margiolis return (false); 1559c80ad68SChristos Margiolis } 1569c80ad68SChristos Margiolis 157f0bc4ed1SChristos Margiolis void 158f0bc4ed1SChristos Margiolis kinst_probe_create(struct kinst_probe *kp, linker_file_t lf) 159f0bc4ed1SChristos Margiolis { 160f0bc4ed1SChristos Margiolis kp->kp_id = dtrace_probe_create(kinst_id, lf->filename, 161f0bc4ed1SChristos Margiolis kp->kp_func, kp->kp_name, 3, kp); 162f0bc4ed1SChristos Margiolis 163f0bc4ed1SChristos Margiolis LIST_INSERT_HEAD(KINST_GETPROBE(kp->kp_patchpoint), kp, kp_hashnext); 164f0bc4ed1SChristos Margiolis } 165f0bc4ed1SChristos Margiolis 166f0bc4ed1SChristos Margiolis static int 167f0bc4ed1SChristos Margiolis kinst_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, 168f0bc4ed1SChristos Margiolis struct thread *td __unused) 169f0bc4ed1SChristos Margiolis { 170f0bc4ed1SChristos Margiolis return (0); 171f0bc4ed1SChristos Margiolis } 172f0bc4ed1SChristos Margiolis 173f0bc4ed1SChristos Margiolis static int 174f0bc4ed1SChristos Margiolis kinst_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused, 175f0bc4ed1SChristos Margiolis struct thread *td __unused) 176f0bc4ed1SChristos Margiolis { 177f0bc4ed1SChristos Margiolis dtrace_condense(kinst_id); 178f0bc4ed1SChristos Margiolis return (0); 179f0bc4ed1SChristos Margiolis } 180f0bc4ed1SChristos Margiolis 181f0bc4ed1SChristos Margiolis static int 182f0bc4ed1SChristos Margiolis kinst_linker_file_cb(linker_file_t lf, void *arg) 183f0bc4ed1SChristos Margiolis { 184f0bc4ed1SChristos Margiolis dtrace_kinst_probedesc_t *pd; 185f0bc4ed1SChristos Margiolis 186f0bc4ed1SChristos Margiolis pd = arg; 187f0bc4ed1SChristos Margiolis if (pd->kpd_mod[0] != '\0' && strcmp(pd->kpd_mod, lf->filename) != 0) 188f0bc4ed1SChristos Margiolis return (0); 189f0bc4ed1SChristos Margiolis 190f0bc4ed1SChristos Margiolis /* 191f0bc4ed1SChristos Margiolis * Invoke kinst_make_probe_function() once for each function symbol in 192f0bc4ed1SChristos Margiolis * the module "lf". 193f0bc4ed1SChristos Margiolis */ 194f0bc4ed1SChristos Margiolis return (linker_file_function_listall(lf, kinst_make_probe, arg)); 195f0bc4ed1SChristos Margiolis } 196f0bc4ed1SChristos Margiolis 197f0bc4ed1SChristos Margiolis static int 198f0bc4ed1SChristos Margiolis kinst_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, 199f0bc4ed1SChristos Margiolis int flags __unused, struct thread *td __unused) 200f0bc4ed1SChristos Margiolis { 201f0bc4ed1SChristos Margiolis dtrace_kinst_probedesc_t *pd; 202f0bc4ed1SChristos Margiolis int error = 0; 203f0bc4ed1SChristos Margiolis 204f0bc4ed1SChristos Margiolis switch (cmd) { 205f0bc4ed1SChristos Margiolis case KINSTIOC_MAKEPROBE: 206f0bc4ed1SChristos Margiolis pd = (dtrace_kinst_probedesc_t *)addr; 207f0bc4ed1SChristos Margiolis pd->kpd_func[sizeof(pd->kpd_func) - 1] = '\0'; 208f0bc4ed1SChristos Margiolis pd->kpd_mod[sizeof(pd->kpd_mod) - 1] = '\0'; 209f0bc4ed1SChristos Margiolis 210f0bc4ed1SChristos Margiolis /* Loop over all functions in the kernel and loaded modules. */ 211f0bc4ed1SChristos Margiolis error = linker_file_foreach(kinst_linker_file_cb, pd); 212f0bc4ed1SChristos Margiolis break; 213f0bc4ed1SChristos Margiolis default: 214f0bc4ed1SChristos Margiolis error = ENOTTY; 215f0bc4ed1SChristos Margiolis break; 216f0bc4ed1SChristos Margiolis } 217f0bc4ed1SChristos Margiolis 218f0bc4ed1SChristos Margiolis return (error); 219f0bc4ed1SChristos Margiolis } 220f0bc4ed1SChristos Margiolis 221f0bc4ed1SChristos Margiolis static void 222f0bc4ed1SChristos Margiolis kinst_provide_module(void *arg, modctl_t *lf) 223f0bc4ed1SChristos Margiolis { 224f0bc4ed1SChristos Margiolis } 225f0bc4ed1SChristos Margiolis 226f0bc4ed1SChristos Margiolis static void 227f0bc4ed1SChristos Margiolis kinst_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) 228f0bc4ed1SChristos Margiolis { 229f0bc4ed1SChristos Margiolis desc->dtargd_ndx = DTRACE_ARGNONE; 230f0bc4ed1SChristos Margiolis } 231f0bc4ed1SChristos Margiolis 232f0bc4ed1SChristos Margiolis static void 233f0bc4ed1SChristos Margiolis kinst_destroy(void *arg, dtrace_id_t id, void *parg) 234f0bc4ed1SChristos Margiolis { 235f0bc4ed1SChristos Margiolis struct kinst_probe *kp = parg; 236f0bc4ed1SChristos Margiolis 237f0bc4ed1SChristos Margiolis LIST_REMOVE(kp, kp_hashnext); 2385b701ed1SChristos Margiolis #ifndef __amd64__ 2395b701ed1SChristos Margiolis kinst_trampoline_dealloc(kp->kp_tramp); 2405b701ed1SChristos Margiolis #endif 241f0bc4ed1SChristos Margiolis free(kp, M_KINST); 242f0bc4ed1SChristos Margiolis } 243f0bc4ed1SChristos Margiolis 244f0bc4ed1SChristos Margiolis static void 245f0bc4ed1SChristos Margiolis kinst_enable(void *arg, dtrace_id_t id, void *parg) 246f0bc4ed1SChristos Margiolis { 247f0bc4ed1SChristos Margiolis struct kinst_probe *kp = parg; 248a72edfeaSMateusz Guzik static bool warned = false; 249a72edfeaSMateusz Guzik 250a72edfeaSMateusz Guzik if (!warned) { 251a72edfeaSMateusz Guzik KINST_LOG( 252a72edfeaSMateusz Guzik "kinst: This provider is experimental, exercise caution"); 253a72edfeaSMateusz Guzik warned = true; 254a72edfeaSMateusz Guzik } 255f0bc4ed1SChristos Margiolis 256f0bc4ed1SChristos Margiolis kinst_patch_tracepoint(kp, kp->kp_patchval); 257f0bc4ed1SChristos Margiolis } 258f0bc4ed1SChristos Margiolis 259f0bc4ed1SChristos Margiolis static void 260f0bc4ed1SChristos Margiolis kinst_disable(void *arg, dtrace_id_t id, void *parg) 261f0bc4ed1SChristos Margiolis { 262f0bc4ed1SChristos Margiolis struct kinst_probe *kp = parg; 263f0bc4ed1SChristos Margiolis 264f0bc4ed1SChristos Margiolis kinst_patch_tracepoint(kp, kp->kp_savedval); 265f0bc4ed1SChristos Margiolis } 266f0bc4ed1SChristos Margiolis 267f0bc4ed1SChristos Margiolis static int 268f0bc4ed1SChristos Margiolis kinst_load(void *dummy) 269f0bc4ed1SChristos Margiolis { 270f0bc4ed1SChristos Margiolis int error; 271f0bc4ed1SChristos Margiolis 272f0bc4ed1SChristos Margiolis error = kinst_trampoline_init(); 273f0bc4ed1SChristos Margiolis if (error != 0) 274f0bc4ed1SChristos Margiolis return (error); 27584d7fe4aSMark Johnston error = kinst_md_init(); 27684d7fe4aSMark Johnston if (error != 0) { 27784d7fe4aSMark Johnston kinst_trampoline_deinit(); 27884d7fe4aSMark Johnston return (error); 27984d7fe4aSMark Johnston } 280f0bc4ed1SChristos Margiolis 281f0bc4ed1SChristos Margiolis error = dtrace_register("kinst", &kinst_attr, DTRACE_PRIV_USER, NULL, 282f0bc4ed1SChristos Margiolis &kinst_pops, NULL, &kinst_id); 283f0bc4ed1SChristos Margiolis if (error != 0) { 28484d7fe4aSMark Johnston kinst_md_deinit(); 285f0bc4ed1SChristos Margiolis kinst_trampoline_deinit(); 286f0bc4ed1SChristos Margiolis return (error); 287f0bc4ed1SChristos Margiolis } 288f0bc4ed1SChristos Margiolis kinst_probetab = malloc(KINST_PROBETAB_MAX * 289f0bc4ed1SChristos Margiolis sizeof(struct kinst_probe_list), M_KINST, M_WAITOK | M_ZERO); 290f0bc4ed1SChristos Margiolis for (int i = 0; i < KINST_PROBETAB_MAX; i++) 291f0bc4ed1SChristos Margiolis LIST_INIT(&kinst_probetab[i]); 292f0bc4ed1SChristos Margiolis kinst_cdev = make_dev(&kinst_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 293f0bc4ed1SChristos Margiolis "dtrace/kinst"); 294f0bc4ed1SChristos Margiolis dtrace_invop_add(kinst_invop); 295f0bc4ed1SChristos Margiolis return (0); 296f0bc4ed1SChristos Margiolis } 297f0bc4ed1SChristos Margiolis 298f0bc4ed1SChristos Margiolis static int 299f0bc4ed1SChristos Margiolis kinst_unload(void *dummy) 300f0bc4ed1SChristos Margiolis { 301f0bc4ed1SChristos Margiolis free(kinst_probetab, M_KINST); 30284d7fe4aSMark Johnston kinst_md_deinit(); 303f0bc4ed1SChristos Margiolis kinst_trampoline_deinit(); 304f0bc4ed1SChristos Margiolis dtrace_invop_remove(kinst_invop); 305f0bc4ed1SChristos Margiolis destroy_dev(kinst_cdev); 306f0bc4ed1SChristos Margiolis 307f0bc4ed1SChristos Margiolis return (dtrace_unregister(kinst_id)); 308f0bc4ed1SChristos Margiolis } 309f0bc4ed1SChristos Margiolis 310f0bc4ed1SChristos Margiolis static int 311f0bc4ed1SChristos Margiolis kinst_modevent(module_t mod __unused, int type, void *data __unused) 312f0bc4ed1SChristos Margiolis { 313f0bc4ed1SChristos Margiolis int error = 0; 314f0bc4ed1SChristos Margiolis 315f0bc4ed1SChristos Margiolis switch (type) { 316f0bc4ed1SChristos Margiolis case MOD_LOAD: 317f0bc4ed1SChristos Margiolis break; 318f0bc4ed1SChristos Margiolis case MOD_UNLOAD: 319f0bc4ed1SChristos Margiolis break; 320f0bc4ed1SChristos Margiolis case MOD_SHUTDOWN: 321f0bc4ed1SChristos Margiolis break; 322f0bc4ed1SChristos Margiolis default: 323f0bc4ed1SChristos Margiolis error = EOPNOTSUPP; 324f0bc4ed1SChristos Margiolis break; 325f0bc4ed1SChristos Margiolis } 326f0bc4ed1SChristos Margiolis 327f0bc4ed1SChristos Margiolis return (error); 328f0bc4ed1SChristos Margiolis } 329f0bc4ed1SChristos Margiolis 330f0bc4ed1SChristos Margiolis SYSINIT(kinst_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_load, NULL); 331f0bc4ed1SChristos Margiolis SYSUNINIT(kinst_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_unload, 332f0bc4ed1SChristos Margiolis NULL); 333f0bc4ed1SChristos Margiolis 334f0bc4ed1SChristos Margiolis DEV_MODULE(kinst, kinst_modevent, NULL); 335f0bc4ed1SChristos Margiolis MODULE_VERSION(kinst, 1); 336f0bc4ed1SChristos Margiolis MODULE_DEPEND(kinst, dtrace, 1, 1, 1); 337f0bc4ed1SChristos Margiolis MODULE_DEPEND(kinst, opensolaris, 1, 1, 1); 338