1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24eda14cbcSMatt Macy * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25eda14cbcSMatt Macy * Copyright (c) 2014 Integros [integros.com] 26eda14cbcSMatt Macy * Copyright 2017 Joyent, Inc. 27eda14cbcSMatt Macy */ 28eda14cbcSMatt Macy 29eda14cbcSMatt Macy #include <sys/spa.h> 30eda14cbcSMatt Macy #include <sys/spa_impl.h> 31eda14cbcSMatt Macy #include <sys/zap.h> 32eda14cbcSMatt Macy #include <sys/dsl_synctask.h> 33eda14cbcSMatt Macy #include <sys/dmu_tx.h> 34eda14cbcSMatt Macy #include <sys/dmu_objset.h> 35eda14cbcSMatt Macy #include <sys/dsl_dataset.h> 36eda14cbcSMatt Macy #include <sys/dsl_dir.h> 37eda14cbcSMatt Macy #include <sys/cmn_err.h> 38eda14cbcSMatt Macy #include <sys/sunddi.h> 39eda14cbcSMatt Macy #include <sys/cred.h> 40eda14cbcSMatt Macy #include "zfs_comutil.h" 41eda14cbcSMatt Macy #include "zfs_gitrev.h" 42eda14cbcSMatt Macy #ifdef _KERNEL 43eda14cbcSMatt Macy #include <sys/zone.h> 44eda14cbcSMatt Macy #endif 45eda14cbcSMatt Macy 46eda14cbcSMatt Macy /* 47eda14cbcSMatt Macy * Routines to manage the on-disk history log. 48eda14cbcSMatt Macy * 49eda14cbcSMatt Macy * The history log is stored as a dmu object containing 50eda14cbcSMatt Macy * <packed record length, record nvlist> tuples. 51eda14cbcSMatt Macy * 52eda14cbcSMatt Macy * Where "record nvlist" is an nvlist containing uint64_ts and strings, and 53eda14cbcSMatt Macy * "packed record length" is the packed length of the "record nvlist" stored 54eda14cbcSMatt Macy * as a little endian uint64_t. 55eda14cbcSMatt Macy * 56eda14cbcSMatt Macy * The log is implemented as a ring buffer, though the original creation 57eda14cbcSMatt Macy * of the pool ('zpool create') is never overwritten. 58eda14cbcSMatt Macy * 59eda14cbcSMatt Macy * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer 60eda14cbcSMatt Macy * of 'spa_history' stores the offsets for logging/retrieving history as 61eda14cbcSMatt Macy * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of 62eda14cbcSMatt Macy * where the 'zpool create' record is stored. This allows us to never 63eda14cbcSMatt Macy * overwrite the original creation of the pool. 'sh_phys_max_off' is the 64eda14cbcSMatt Macy * physical ending offset in bytes of the log. This tells you the length of 65eda14cbcSMatt Macy * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record 66eda14cbcSMatt Macy * is added, 'sh_eof' is incremented by the size of the record. 67eda14cbcSMatt Macy * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes). 68eda14cbcSMatt Macy * This is where the consumer should start reading from after reading in 69eda14cbcSMatt Macy * the 'zpool create' portion of the log. 70eda14cbcSMatt Macy * 71eda14cbcSMatt Macy * 'sh_records_lost' keeps track of how many records have been overwritten 72eda14cbcSMatt Macy * and permanently lost. 73eda14cbcSMatt Macy */ 74eda14cbcSMatt Macy 75eda14cbcSMatt Macy /* convert a logical offset to physical */ 76eda14cbcSMatt Macy static uint64_t 77eda14cbcSMatt Macy spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp) 78eda14cbcSMatt Macy { 79eda14cbcSMatt Macy uint64_t phys_len; 80eda14cbcSMatt Macy 81eda14cbcSMatt Macy phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len; 82eda14cbcSMatt Macy return ((log_off - shpp->sh_pool_create_len) % phys_len 83eda14cbcSMatt Macy + shpp->sh_pool_create_len); 84eda14cbcSMatt Macy } 85eda14cbcSMatt Macy 86eda14cbcSMatt Macy void 87eda14cbcSMatt Macy spa_history_create_obj(spa_t *spa, dmu_tx_t *tx) 88eda14cbcSMatt Macy { 89eda14cbcSMatt Macy dmu_buf_t *dbp; 90eda14cbcSMatt Macy spa_history_phys_t *shpp; 91eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 92eda14cbcSMatt Macy 93eda14cbcSMatt Macy ASSERT0(spa->spa_history); 94eda14cbcSMatt Macy spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY, 95eda14cbcSMatt Macy SPA_OLD_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS, 96eda14cbcSMatt Macy sizeof (spa_history_phys_t), tx); 97eda14cbcSMatt Macy 98eda14cbcSMatt Macy VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, 99eda14cbcSMatt Macy DMU_POOL_HISTORY, sizeof (uint64_t), 1, 100eda14cbcSMatt Macy &spa->spa_history, tx)); 101eda14cbcSMatt Macy 102eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); 103eda14cbcSMatt Macy ASSERT3U(dbp->db_size, >=, sizeof (spa_history_phys_t)); 104eda14cbcSMatt Macy 105eda14cbcSMatt Macy shpp = dbp->db_data; 106eda14cbcSMatt Macy dmu_buf_will_dirty(dbp, tx); 107eda14cbcSMatt Macy 108eda14cbcSMatt Macy /* 109eda14cbcSMatt Macy * Figure out maximum size of history log. We set it at 110eda14cbcSMatt Macy * 0.1% of pool size, with a max of 1G and min of 128KB. 111eda14cbcSMatt Macy */ 112eda14cbcSMatt Macy shpp->sh_phys_max_off = 113eda14cbcSMatt Macy metaslab_class_get_dspace(spa_normal_class(spa)) / 1000; 114eda14cbcSMatt Macy shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30); 115eda14cbcSMatt Macy shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10); 116eda14cbcSMatt Macy 117eda14cbcSMatt Macy dmu_buf_rele(dbp, FTAG); 118eda14cbcSMatt Macy } 119eda14cbcSMatt Macy 120eda14cbcSMatt Macy /* 121eda14cbcSMatt Macy * Change 'sh_bof' to the beginning of the next record. 122eda14cbcSMatt Macy */ 123eda14cbcSMatt Macy static int 124eda14cbcSMatt Macy spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp) 125eda14cbcSMatt Macy { 126eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 127eda14cbcSMatt Macy uint64_t firstread, reclen, phys_bof; 128eda14cbcSMatt Macy char buf[sizeof (reclen)]; 129eda14cbcSMatt Macy int err; 130eda14cbcSMatt Macy 131eda14cbcSMatt Macy phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp); 132eda14cbcSMatt Macy firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof); 133eda14cbcSMatt Macy 134eda14cbcSMatt Macy if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread, 135eda14cbcSMatt Macy buf, DMU_READ_PREFETCH)) != 0) 136eda14cbcSMatt Macy return (err); 137eda14cbcSMatt Macy if (firstread != sizeof (reclen)) { 138eda14cbcSMatt Macy if ((err = dmu_read(mos, spa->spa_history, 139eda14cbcSMatt Macy shpp->sh_pool_create_len, sizeof (reclen) - firstread, 140eda14cbcSMatt Macy buf + firstread, DMU_READ_PREFETCH)) != 0) 141eda14cbcSMatt Macy return (err); 142eda14cbcSMatt Macy } 143eda14cbcSMatt Macy 144eda14cbcSMatt Macy reclen = LE_64(*((uint64_t *)buf)); 145eda14cbcSMatt Macy shpp->sh_bof += reclen + sizeof (reclen); 146eda14cbcSMatt Macy shpp->sh_records_lost++; 147eda14cbcSMatt Macy return (0); 148eda14cbcSMatt Macy } 149eda14cbcSMatt Macy 150eda14cbcSMatt Macy static int 151eda14cbcSMatt Macy spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, 152eda14cbcSMatt Macy dmu_tx_t *tx) 153eda14cbcSMatt Macy { 154eda14cbcSMatt Macy uint64_t firstwrite, phys_eof; 155eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 156eda14cbcSMatt Macy int err; 157eda14cbcSMatt Macy 158eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&spa->spa_history_lock)); 159eda14cbcSMatt Macy 160eda14cbcSMatt Macy /* see if we need to reset logical BOF */ 161eda14cbcSMatt Macy while (shpp->sh_phys_max_off - shpp->sh_pool_create_len - 162eda14cbcSMatt Macy (shpp->sh_eof - shpp->sh_bof) <= len) { 163eda14cbcSMatt Macy if ((err = spa_history_advance_bof(spa, shpp)) != 0) { 164eda14cbcSMatt Macy return (err); 165eda14cbcSMatt Macy } 166eda14cbcSMatt Macy } 167eda14cbcSMatt Macy 168eda14cbcSMatt Macy phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); 169eda14cbcSMatt Macy firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof); 170eda14cbcSMatt Macy shpp->sh_eof += len; 171eda14cbcSMatt Macy dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx); 172eda14cbcSMatt Macy 173eda14cbcSMatt Macy len -= firstwrite; 174eda14cbcSMatt Macy if (len > 0) { 175eda14cbcSMatt Macy /* write out the rest at the beginning of physical file */ 176eda14cbcSMatt Macy dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len, 177eda14cbcSMatt Macy len, (char *)buf + firstwrite, tx); 178eda14cbcSMatt Macy } 179eda14cbcSMatt Macy 180eda14cbcSMatt Macy return (0); 181eda14cbcSMatt Macy } 182eda14cbcSMatt Macy 183eda14cbcSMatt Macy /* 184eda14cbcSMatt Macy * Post a history sysevent. 185eda14cbcSMatt Macy * 186eda14cbcSMatt Macy * The nvlist_t* passed into this function will be transformed into a new 187eda14cbcSMatt Macy * nvlist where: 188eda14cbcSMatt Macy * 189eda14cbcSMatt Macy * 1. Nested nvlists will be flattened to a single level 190eda14cbcSMatt Macy * 2. Keys will have their names normalized (to remove any problematic 191eda14cbcSMatt Macy * characters, such as whitespace) 192eda14cbcSMatt Macy * 193eda14cbcSMatt Macy * The nvlist_t passed into this function will duplicated and should be freed 194eda14cbcSMatt Macy * by caller. 195eda14cbcSMatt Macy * 196eda14cbcSMatt Macy */ 197eda14cbcSMatt Macy static void 198eda14cbcSMatt Macy spa_history_log_notify(spa_t *spa, nvlist_t *nvl) 199eda14cbcSMatt Macy { 200eda14cbcSMatt Macy nvlist_t *hist_nvl = fnvlist_alloc(); 201eda14cbcSMatt Macy uint64_t uint64; 202eda14cbcSMatt Macy char *string; 203eda14cbcSMatt Macy 204eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_CMD, &string) == 0) 205eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_CMD, string); 206eda14cbcSMatt Macy 207eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) 208eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); 209eda14cbcSMatt Macy 210eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_ZONE, &string) == 0) 211eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_ZONE, string); 212eda14cbcSMatt Macy 213eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_HOST, &string) == 0) 214eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_HOST, string); 215eda14cbcSMatt Macy 216eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME, &string) == 0) 217eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_DSNAME, string); 218eda14cbcSMatt Macy 219eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR, &string) == 0) 220eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_STR, string); 221eda14cbcSMatt Macy 222eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL, &string) == 0) 223eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_IOCTL, string); 224eda14cbcSMatt Macy 225eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) 226eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); 227eda14cbcSMatt Macy 228eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID, &uint64) == 0) 229eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_DSID, uint64); 230eda14cbcSMatt Macy 231eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG, &uint64) == 0) 232eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TXG, uint64); 233eda14cbcSMatt Macy 234eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TIME, &uint64) == 0) 235eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TIME, uint64); 236eda14cbcSMatt Macy 237eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_WHO, &uint64) == 0) 238eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_WHO, uint64); 239eda14cbcSMatt Macy 240eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_INT_EVENT, &uint64) == 0) 241eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_INT_EVENT, uint64); 242eda14cbcSMatt Macy 243eda14cbcSMatt Macy spa_event_notify(spa, NULL, hist_nvl, ESC_ZFS_HISTORY_EVENT); 244eda14cbcSMatt Macy 245eda14cbcSMatt Macy nvlist_free(hist_nvl); 246eda14cbcSMatt Macy } 247eda14cbcSMatt Macy 248eda14cbcSMatt Macy /* 249eda14cbcSMatt Macy * Write out a history event. 250eda14cbcSMatt Macy */ 251eda14cbcSMatt Macy /*ARGSUSED*/ 252eda14cbcSMatt Macy static void 253eda14cbcSMatt Macy spa_history_log_sync(void *arg, dmu_tx_t *tx) 254eda14cbcSMatt Macy { 255eda14cbcSMatt Macy nvlist_t *nvl = arg; 256eda14cbcSMatt Macy spa_t *spa = dmu_tx_pool(tx)->dp_spa; 257eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 258eda14cbcSMatt Macy dmu_buf_t *dbp; 259eda14cbcSMatt Macy spa_history_phys_t *shpp; 260eda14cbcSMatt Macy size_t reclen; 261eda14cbcSMatt Macy uint64_t le_len; 262eda14cbcSMatt Macy char *record_packed = NULL; 263eda14cbcSMatt Macy int ret; 264eda14cbcSMatt Macy 265eda14cbcSMatt Macy /* 266eda14cbcSMatt Macy * If we have an older pool that doesn't have a command 267eda14cbcSMatt Macy * history object, create it now. 268eda14cbcSMatt Macy */ 269eda14cbcSMatt Macy mutex_enter(&spa->spa_history_lock); 270eda14cbcSMatt Macy if (!spa->spa_history) 271eda14cbcSMatt Macy spa_history_create_obj(spa, tx); 272eda14cbcSMatt Macy mutex_exit(&spa->spa_history_lock); 273eda14cbcSMatt Macy 274eda14cbcSMatt Macy /* 275eda14cbcSMatt Macy * Get the offset of where we need to write via the bonus buffer. 276eda14cbcSMatt Macy * Update the offset when the write completes. 277eda14cbcSMatt Macy */ 278eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); 279eda14cbcSMatt Macy shpp = dbp->db_data; 280eda14cbcSMatt Macy 281eda14cbcSMatt Macy dmu_buf_will_dirty(dbp, tx); 282eda14cbcSMatt Macy 283eda14cbcSMatt Macy #ifdef ZFS_DEBUG 284eda14cbcSMatt Macy { 285eda14cbcSMatt Macy dmu_object_info_t doi; 286eda14cbcSMatt Macy dmu_object_info_from_db(dbp, &doi); 287eda14cbcSMatt Macy ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); 288eda14cbcSMatt Macy } 289eda14cbcSMatt Macy #endif 290eda14cbcSMatt Macy 291eda14cbcSMatt Macy fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); 292eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname()->nodename); 293eda14cbcSMatt Macy 294eda14cbcSMatt Macy if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { 295eda14cbcSMatt Macy zfs_dbgmsg("command: %s", 296eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); 297eda14cbcSMatt Macy } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { 298eda14cbcSMatt Macy if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { 299eda14cbcSMatt Macy zfs_dbgmsg("txg %lld %s %s (id %llu) %s", 300eda14cbcSMatt Macy fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), 301eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), 302eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), 303eda14cbcSMatt Macy fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID), 304eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); 305eda14cbcSMatt Macy } else { 306eda14cbcSMatt Macy zfs_dbgmsg("txg %lld %s %s", 307eda14cbcSMatt Macy fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), 308eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), 309eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); 310eda14cbcSMatt Macy } 311eda14cbcSMatt Macy /* 312eda14cbcSMatt Macy * The history sysevent is posted only for internal history 313eda14cbcSMatt Macy * messages to show what has happened, not how it happened. For 314eda14cbcSMatt Macy * example, the following command: 315eda14cbcSMatt Macy * 316eda14cbcSMatt Macy * # zfs destroy -r tank/foo 317eda14cbcSMatt Macy * 318eda14cbcSMatt Macy * will result in one sysevent posted per dataset that is 319eda14cbcSMatt Macy * destroyed as a result of the command - which could be more 320eda14cbcSMatt Macy * than one event in total. By contrast, if the sysevent was 321eda14cbcSMatt Macy * posted as a result of the ZPOOL_HIST_CMD key being present 322eda14cbcSMatt Macy * it would result in only one sysevent being posted with the 323eda14cbcSMatt Macy * full command line arguments, requiring the consumer to know 324eda14cbcSMatt Macy * how to parse and understand zfs(1M) command invocations. 325eda14cbcSMatt Macy */ 326eda14cbcSMatt Macy spa_history_log_notify(spa, nvl); 327eda14cbcSMatt Macy } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { 328eda14cbcSMatt Macy zfs_dbgmsg("ioctl %s", 329eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); 330eda14cbcSMatt Macy } 331eda14cbcSMatt Macy 332eda14cbcSMatt Macy VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE, 333eda14cbcSMatt Macy KM_SLEEP), ==, 0); 334eda14cbcSMatt Macy 335eda14cbcSMatt Macy mutex_enter(&spa->spa_history_lock); 336eda14cbcSMatt Macy 337eda14cbcSMatt Macy /* write out the packed length as little endian */ 338eda14cbcSMatt Macy le_len = LE_64((uint64_t)reclen); 339eda14cbcSMatt Macy ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx); 340eda14cbcSMatt Macy if (!ret) 341eda14cbcSMatt Macy ret = spa_history_write(spa, record_packed, reclen, shpp, tx); 342eda14cbcSMatt Macy 343eda14cbcSMatt Macy /* The first command is the create, which we keep forever */ 344eda14cbcSMatt Macy if (ret == 0 && shpp->sh_pool_create_len == 0 && 345eda14cbcSMatt Macy nvlist_exists(nvl, ZPOOL_HIST_CMD)) { 346eda14cbcSMatt Macy shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; 347eda14cbcSMatt Macy } 348eda14cbcSMatt Macy 349eda14cbcSMatt Macy mutex_exit(&spa->spa_history_lock); 350eda14cbcSMatt Macy fnvlist_pack_free(record_packed, reclen); 351eda14cbcSMatt Macy dmu_buf_rele(dbp, FTAG); 352eda14cbcSMatt Macy fnvlist_free(nvl); 353eda14cbcSMatt Macy } 354eda14cbcSMatt Macy 355eda14cbcSMatt Macy /* 356eda14cbcSMatt Macy * Write out a history event. 357eda14cbcSMatt Macy */ 358eda14cbcSMatt Macy int 359eda14cbcSMatt Macy spa_history_log(spa_t *spa, const char *msg) 360eda14cbcSMatt Macy { 361eda14cbcSMatt Macy int err; 362eda14cbcSMatt Macy nvlist_t *nvl = fnvlist_alloc(); 363eda14cbcSMatt Macy 364eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); 365eda14cbcSMatt Macy err = spa_history_log_nvl(spa, nvl); 366eda14cbcSMatt Macy fnvlist_free(nvl); 367eda14cbcSMatt Macy return (err); 368eda14cbcSMatt Macy } 369eda14cbcSMatt Macy 370eda14cbcSMatt Macy int 371eda14cbcSMatt Macy spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) 372eda14cbcSMatt Macy { 373eda14cbcSMatt Macy int err = 0; 374eda14cbcSMatt Macy dmu_tx_t *tx; 375eda14cbcSMatt Macy nvlist_t *nvarg, *in_nvl = NULL; 376eda14cbcSMatt Macy 377eda14cbcSMatt Macy if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) 378eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 379eda14cbcSMatt Macy 380eda14cbcSMatt Macy err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl); 381eda14cbcSMatt Macy if (err == 0) { 382eda14cbcSMatt Macy (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS); 383eda14cbcSMatt Macy } 384eda14cbcSMatt Macy 385eda14cbcSMatt Macy tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 386eda14cbcSMatt Macy err = dmu_tx_assign(tx, TXG_WAIT); 387eda14cbcSMatt Macy if (err) { 388eda14cbcSMatt Macy dmu_tx_abort(tx); 389eda14cbcSMatt Macy return (err); 390eda14cbcSMatt Macy } 391eda14cbcSMatt Macy 392eda14cbcSMatt Macy VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP)); 393eda14cbcSMatt Macy if (spa_history_zone() != NULL) { 394eda14cbcSMatt Macy fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, 395eda14cbcSMatt Macy spa_history_zone()); 396eda14cbcSMatt Macy } 397eda14cbcSMatt Macy fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); 398eda14cbcSMatt Macy 399eda14cbcSMatt Macy /* Kick this off asynchronously; errors are ignored. */ 400*2c48331dSMatt Macy dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, tx); 401eda14cbcSMatt Macy dmu_tx_commit(tx); 402eda14cbcSMatt Macy 403eda14cbcSMatt Macy /* spa_history_log_sync will free nvl */ 404eda14cbcSMatt Macy return (err); 405eda14cbcSMatt Macy } 406eda14cbcSMatt Macy 407eda14cbcSMatt Macy /* 408eda14cbcSMatt Macy * Read out the command history. 409eda14cbcSMatt Macy */ 410eda14cbcSMatt Macy int 411eda14cbcSMatt Macy spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) 412eda14cbcSMatt Macy { 413eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 414eda14cbcSMatt Macy dmu_buf_t *dbp; 415eda14cbcSMatt Macy uint64_t read_len, phys_read_off, phys_eof; 416eda14cbcSMatt Macy uint64_t leftover = 0; 417eda14cbcSMatt Macy spa_history_phys_t *shpp; 418eda14cbcSMatt Macy int err; 419eda14cbcSMatt Macy 420eda14cbcSMatt Macy /* 421eda14cbcSMatt Macy * If the command history doesn't exist (older pool), 422eda14cbcSMatt Macy * that's ok, just return ENOENT. 423eda14cbcSMatt Macy */ 424eda14cbcSMatt Macy if (!spa->spa_history) 425eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 426eda14cbcSMatt Macy 427eda14cbcSMatt Macy /* 428eda14cbcSMatt Macy * The history is logged asynchronously, so when they request 429eda14cbcSMatt Macy * the first chunk of history, make sure everything has been 430eda14cbcSMatt Macy * synced to disk so that we get it. 431eda14cbcSMatt Macy */ 432eda14cbcSMatt Macy if (*offp == 0 && spa_writeable(spa)) 433eda14cbcSMatt Macy txg_wait_synced(spa_get_dsl(spa), 0); 434eda14cbcSMatt Macy 435eda14cbcSMatt Macy if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0) 436eda14cbcSMatt Macy return (err); 437eda14cbcSMatt Macy shpp = dbp->db_data; 438eda14cbcSMatt Macy 439eda14cbcSMatt Macy #ifdef ZFS_DEBUG 440eda14cbcSMatt Macy { 441eda14cbcSMatt Macy dmu_object_info_t doi; 442eda14cbcSMatt Macy dmu_object_info_from_db(dbp, &doi); 443eda14cbcSMatt Macy ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); 444eda14cbcSMatt Macy } 445eda14cbcSMatt Macy #endif 446eda14cbcSMatt Macy 447eda14cbcSMatt Macy mutex_enter(&spa->spa_history_lock); 448eda14cbcSMatt Macy phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); 449eda14cbcSMatt Macy 450eda14cbcSMatt Macy if (*offp < shpp->sh_pool_create_len) { 451eda14cbcSMatt Macy /* read in just the zpool create history */ 452eda14cbcSMatt Macy phys_read_off = *offp; 453eda14cbcSMatt Macy read_len = MIN(*len, shpp->sh_pool_create_len - 454eda14cbcSMatt Macy phys_read_off); 455eda14cbcSMatt Macy } else { 456eda14cbcSMatt Macy /* 457eda14cbcSMatt Macy * Need to reset passed in offset to BOF if the passed in 458eda14cbcSMatt Macy * offset has since been overwritten. 459eda14cbcSMatt Macy */ 460eda14cbcSMatt Macy *offp = MAX(*offp, shpp->sh_bof); 461eda14cbcSMatt Macy phys_read_off = spa_history_log_to_phys(*offp, shpp); 462eda14cbcSMatt Macy 463eda14cbcSMatt Macy /* 464eda14cbcSMatt Macy * Read up to the minimum of what the user passed down or 465eda14cbcSMatt Macy * the EOF (physical or logical). If we hit physical EOF, 466eda14cbcSMatt Macy * use 'leftover' to read from the physical BOF. 467eda14cbcSMatt Macy */ 468eda14cbcSMatt Macy if (phys_read_off <= phys_eof) { 469eda14cbcSMatt Macy read_len = MIN(*len, phys_eof - phys_read_off); 470eda14cbcSMatt Macy } else { 471eda14cbcSMatt Macy read_len = MIN(*len, 472eda14cbcSMatt Macy shpp->sh_phys_max_off - phys_read_off); 473eda14cbcSMatt Macy if (phys_read_off + *len > shpp->sh_phys_max_off) { 474eda14cbcSMatt Macy leftover = MIN(*len - read_len, 475eda14cbcSMatt Macy phys_eof - shpp->sh_pool_create_len); 476eda14cbcSMatt Macy } 477eda14cbcSMatt Macy } 478eda14cbcSMatt Macy } 479eda14cbcSMatt Macy 480eda14cbcSMatt Macy /* offset for consumer to use next */ 481eda14cbcSMatt Macy *offp += read_len + leftover; 482eda14cbcSMatt Macy 483eda14cbcSMatt Macy /* tell the consumer how much you actually read */ 484eda14cbcSMatt Macy *len = read_len + leftover; 485eda14cbcSMatt Macy 486eda14cbcSMatt Macy if (read_len == 0) { 487eda14cbcSMatt Macy mutex_exit(&spa->spa_history_lock); 488eda14cbcSMatt Macy dmu_buf_rele(dbp, FTAG); 489eda14cbcSMatt Macy return (0); 490eda14cbcSMatt Macy } 491eda14cbcSMatt Macy 492eda14cbcSMatt Macy err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf, 493eda14cbcSMatt Macy DMU_READ_PREFETCH); 494eda14cbcSMatt Macy if (leftover && err == 0) { 495eda14cbcSMatt Macy err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len, 496eda14cbcSMatt Macy leftover, buf + read_len, DMU_READ_PREFETCH); 497eda14cbcSMatt Macy } 498eda14cbcSMatt Macy mutex_exit(&spa->spa_history_lock); 499eda14cbcSMatt Macy 500eda14cbcSMatt Macy dmu_buf_rele(dbp, FTAG); 501eda14cbcSMatt Macy return (err); 502eda14cbcSMatt Macy } 503eda14cbcSMatt Macy 504eda14cbcSMatt Macy /* 505eda14cbcSMatt Macy * The nvlist will be consumed by this call. 506eda14cbcSMatt Macy */ 507eda14cbcSMatt Macy static void 508eda14cbcSMatt Macy log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, 509eda14cbcSMatt Macy dmu_tx_t *tx, const char *fmt, va_list adx) 510eda14cbcSMatt Macy { 511eda14cbcSMatt Macy char *msg; 512eda14cbcSMatt Macy 513eda14cbcSMatt Macy /* 514eda14cbcSMatt Macy * If this is part of creating a pool, not everything is 515eda14cbcSMatt Macy * initialized yet, so don't bother logging the internal events. 516eda14cbcSMatt Macy * Likewise if the pool is not writeable. 517eda14cbcSMatt Macy */ 518eda14cbcSMatt Macy if (spa_is_initializing(spa) || !spa_writeable(spa)) { 519eda14cbcSMatt Macy fnvlist_free(nvl); 520eda14cbcSMatt Macy return; 521eda14cbcSMatt Macy } 522eda14cbcSMatt Macy 523eda14cbcSMatt Macy msg = kmem_vasprintf(fmt, adx); 524eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); 525eda14cbcSMatt Macy kmem_strfree(msg); 526eda14cbcSMatt Macy 527eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); 528eda14cbcSMatt Macy fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); 529eda14cbcSMatt Macy 530eda14cbcSMatt Macy if (dmu_tx_is_syncing(tx)) { 531eda14cbcSMatt Macy spa_history_log_sync(nvl, tx); 532eda14cbcSMatt Macy } else { 533eda14cbcSMatt Macy dsl_sync_task_nowait(spa_get_dsl(spa), 534*2c48331dSMatt Macy spa_history_log_sync, nvl, tx); 535eda14cbcSMatt Macy } 536eda14cbcSMatt Macy /* spa_history_log_sync() will free nvl */ 537eda14cbcSMatt Macy } 538eda14cbcSMatt Macy 539eda14cbcSMatt Macy void 540eda14cbcSMatt Macy spa_history_log_internal(spa_t *spa, const char *operation, 541eda14cbcSMatt Macy dmu_tx_t *tx, const char *fmt, ...) 542eda14cbcSMatt Macy { 543eda14cbcSMatt Macy dmu_tx_t *htx = tx; 544eda14cbcSMatt Macy va_list adx; 545eda14cbcSMatt Macy 546eda14cbcSMatt Macy /* create a tx if we didn't get one */ 547eda14cbcSMatt Macy if (tx == NULL) { 548eda14cbcSMatt Macy htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 549eda14cbcSMatt Macy if (dmu_tx_assign(htx, TXG_WAIT) != 0) { 550eda14cbcSMatt Macy dmu_tx_abort(htx); 551eda14cbcSMatt Macy return; 552eda14cbcSMatt Macy } 553eda14cbcSMatt Macy } 554eda14cbcSMatt Macy 555eda14cbcSMatt Macy va_start(adx, fmt); 556eda14cbcSMatt Macy log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx); 557eda14cbcSMatt Macy va_end(adx); 558eda14cbcSMatt Macy 559eda14cbcSMatt Macy /* if we didn't get a tx from the caller, commit the one we made */ 560eda14cbcSMatt Macy if (tx == NULL) 561eda14cbcSMatt Macy dmu_tx_commit(htx); 562eda14cbcSMatt Macy } 563eda14cbcSMatt Macy 564eda14cbcSMatt Macy void 565eda14cbcSMatt Macy spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, 566eda14cbcSMatt Macy dmu_tx_t *tx, const char *fmt, ...) 567eda14cbcSMatt Macy { 568eda14cbcSMatt Macy va_list adx; 569eda14cbcSMatt Macy char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 570eda14cbcSMatt Macy nvlist_t *nvl = fnvlist_alloc(); 571eda14cbcSMatt Macy 572eda14cbcSMatt Macy ASSERT(tx != NULL); 573eda14cbcSMatt Macy 574eda14cbcSMatt Macy dsl_dataset_name(ds, namebuf); 575eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); 576eda14cbcSMatt Macy fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); 577eda14cbcSMatt Macy 578eda14cbcSMatt Macy va_start(adx, fmt); 579eda14cbcSMatt Macy log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); 580eda14cbcSMatt Macy va_end(adx); 581eda14cbcSMatt Macy } 582eda14cbcSMatt Macy 583eda14cbcSMatt Macy void 584eda14cbcSMatt Macy spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, 585eda14cbcSMatt Macy dmu_tx_t *tx, const char *fmt, ...) 586eda14cbcSMatt Macy { 587eda14cbcSMatt Macy va_list adx; 588eda14cbcSMatt Macy char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 589eda14cbcSMatt Macy nvlist_t *nvl = fnvlist_alloc(); 590eda14cbcSMatt Macy 591eda14cbcSMatt Macy ASSERT(tx != NULL); 592eda14cbcSMatt Macy 593eda14cbcSMatt Macy dsl_dir_name(dd, namebuf); 594eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); 595eda14cbcSMatt Macy fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, 596eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_head_dataset_obj); 597eda14cbcSMatt Macy 598eda14cbcSMatt Macy va_start(adx, fmt); 599eda14cbcSMatt Macy log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); 600eda14cbcSMatt Macy va_end(adx); 601eda14cbcSMatt Macy } 602eda14cbcSMatt Macy 603eda14cbcSMatt Macy void 604eda14cbcSMatt Macy spa_history_log_version(spa_t *spa, const char *operation, dmu_tx_t *tx) 605eda14cbcSMatt Macy { 606eda14cbcSMatt Macy utsname_t *u = utsname(); 607eda14cbcSMatt Macy 608eda14cbcSMatt Macy spa_history_log_internal(spa, operation, tx, 609eda14cbcSMatt Macy "pool version %llu; software version %s; uts %s %s %s %s", 610eda14cbcSMatt Macy (u_longlong_t)spa_version(spa), ZFS_META_GITREV, 611eda14cbcSMatt Macy u->nodename, u->release, u->version, u->machine); 612eda14cbcSMatt Macy } 613eda14cbcSMatt Macy 614eda14cbcSMatt Macy #ifndef _KERNEL 615eda14cbcSMatt Macy const char * 616eda14cbcSMatt Macy spa_history_zone(void) 617eda14cbcSMatt Macy { 618eda14cbcSMatt Macy return (NULL); 619eda14cbcSMatt Macy } 620eda14cbcSMatt Macy #endif 621eda14cbcSMatt Macy 622eda14cbcSMatt Macy #if defined(_KERNEL) 623eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_create_obj); 624eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_get); 625eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_log); 626eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_log_internal); 627eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_log_version); 628eda14cbcSMatt Macy #endif 629