1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24eda14cbcSMatt Macy * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25eda14cbcSMatt Macy * Copyright (c) 2014 Integros [integros.com] 26eda14cbcSMatt Macy * Copyright 2017 Joyent, Inc. 27eda14cbcSMatt Macy */ 28eda14cbcSMatt Macy 29eda14cbcSMatt Macy #include <sys/spa.h> 30eda14cbcSMatt Macy #include <sys/spa_impl.h> 31eda14cbcSMatt Macy #include <sys/zap.h> 32eda14cbcSMatt Macy #include <sys/dsl_synctask.h> 33eda14cbcSMatt Macy #include <sys/dmu_tx.h> 34eda14cbcSMatt Macy #include <sys/dmu_objset.h> 35eda14cbcSMatt Macy #include <sys/dsl_dataset.h> 36eda14cbcSMatt Macy #include <sys/dsl_dir.h> 37eda14cbcSMatt Macy #include <sys/cmn_err.h> 38eda14cbcSMatt Macy #include <sys/sunddi.h> 39eda14cbcSMatt Macy #include <sys/cred.h> 40eda14cbcSMatt Macy #include "zfs_comutil.h" 41eda14cbcSMatt Macy #include "zfs_gitrev.h" 42eda14cbcSMatt Macy #ifdef _KERNEL 43eda14cbcSMatt Macy #include <sys/zone.h> 44eda14cbcSMatt Macy #endif 45eda14cbcSMatt Macy 46eda14cbcSMatt Macy /* 47eda14cbcSMatt Macy * Routines to manage the on-disk history log. 48eda14cbcSMatt Macy * 49eda14cbcSMatt Macy * The history log is stored as a dmu object containing 50eda14cbcSMatt Macy * <packed record length, record nvlist> tuples. 51eda14cbcSMatt Macy * 52eda14cbcSMatt Macy * Where "record nvlist" is an nvlist containing uint64_ts and strings, and 53eda14cbcSMatt Macy * "packed record length" is the packed length of the "record nvlist" stored 54eda14cbcSMatt Macy * as a little endian uint64_t. 55eda14cbcSMatt Macy * 56eda14cbcSMatt Macy * The log is implemented as a ring buffer, though the original creation 57eda14cbcSMatt Macy * of the pool ('zpool create') is never overwritten. 58eda14cbcSMatt Macy * 59eda14cbcSMatt Macy * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer 60eda14cbcSMatt Macy * of 'spa_history' stores the offsets for logging/retrieving history as 61eda14cbcSMatt Macy * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of 62eda14cbcSMatt Macy * where the 'zpool create' record is stored. This allows us to never 63eda14cbcSMatt Macy * overwrite the original creation of the pool. 'sh_phys_max_off' is the 64eda14cbcSMatt Macy * physical ending offset in bytes of the log. This tells you the length of 65eda14cbcSMatt Macy * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record 66eda14cbcSMatt Macy * is added, 'sh_eof' is incremented by the size of the record. 67eda14cbcSMatt Macy * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes). 68eda14cbcSMatt Macy * This is where the consumer should start reading from after reading in 69eda14cbcSMatt Macy * the 'zpool create' portion of the log. 70eda14cbcSMatt Macy * 71eda14cbcSMatt Macy * 'sh_records_lost' keeps track of how many records have been overwritten 72eda14cbcSMatt Macy * and permanently lost. 73eda14cbcSMatt Macy */ 74eda14cbcSMatt Macy 75eda14cbcSMatt Macy /* convert a logical offset to physical */ 76eda14cbcSMatt Macy static uint64_t 77eda14cbcSMatt Macy spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp) 78eda14cbcSMatt Macy { 79eda14cbcSMatt Macy uint64_t phys_len; 80eda14cbcSMatt Macy 81eda14cbcSMatt Macy phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len; 82eda14cbcSMatt Macy return ((log_off - shpp->sh_pool_create_len) % phys_len 83eda14cbcSMatt Macy + shpp->sh_pool_create_len); 84eda14cbcSMatt Macy } 85eda14cbcSMatt Macy 86eda14cbcSMatt Macy void 87eda14cbcSMatt Macy spa_history_create_obj(spa_t *spa, dmu_tx_t *tx) 88eda14cbcSMatt Macy { 89eda14cbcSMatt Macy dmu_buf_t *dbp; 90eda14cbcSMatt Macy spa_history_phys_t *shpp; 91eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 92eda14cbcSMatt Macy 93eda14cbcSMatt Macy ASSERT0(spa->spa_history); 94eda14cbcSMatt Macy spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY, 95eda14cbcSMatt Macy SPA_OLD_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS, 96eda14cbcSMatt Macy sizeof (spa_history_phys_t), tx); 97eda14cbcSMatt Macy 98eda14cbcSMatt Macy VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, 99eda14cbcSMatt Macy DMU_POOL_HISTORY, sizeof (uint64_t), 1, 100eda14cbcSMatt Macy &spa->spa_history, tx)); 101eda14cbcSMatt Macy 102eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); 103eda14cbcSMatt Macy ASSERT3U(dbp->db_size, >=, sizeof (spa_history_phys_t)); 104eda14cbcSMatt Macy 105eda14cbcSMatt Macy shpp = dbp->db_data; 106eda14cbcSMatt Macy dmu_buf_will_dirty(dbp, tx); 107eda14cbcSMatt Macy 108eda14cbcSMatt Macy /* 109eda14cbcSMatt Macy * Figure out maximum size of history log. We set it at 110eda14cbcSMatt Macy * 0.1% of pool size, with a max of 1G and min of 128KB. 111eda14cbcSMatt Macy */ 112eda14cbcSMatt Macy shpp->sh_phys_max_off = 113eda14cbcSMatt Macy metaslab_class_get_dspace(spa_normal_class(spa)) / 1000; 114eda14cbcSMatt Macy shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30); 115eda14cbcSMatt Macy shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10); 116eda14cbcSMatt Macy 117eda14cbcSMatt Macy dmu_buf_rele(dbp, FTAG); 118eda14cbcSMatt Macy } 119eda14cbcSMatt Macy 120eda14cbcSMatt Macy /* 121eda14cbcSMatt Macy * Change 'sh_bof' to the beginning of the next record. 122eda14cbcSMatt Macy */ 123eda14cbcSMatt Macy static int 124eda14cbcSMatt Macy spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp) 125eda14cbcSMatt Macy { 126eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 127eda14cbcSMatt Macy uint64_t firstread, reclen, phys_bof; 128eda14cbcSMatt Macy char buf[sizeof (reclen)]; 129eda14cbcSMatt Macy int err; 130eda14cbcSMatt Macy 131eda14cbcSMatt Macy phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp); 132eda14cbcSMatt Macy firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof); 133eda14cbcSMatt Macy 134eda14cbcSMatt Macy if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread, 135eda14cbcSMatt Macy buf, DMU_READ_PREFETCH)) != 0) 136eda14cbcSMatt Macy return (err); 137eda14cbcSMatt Macy if (firstread != sizeof (reclen)) { 138eda14cbcSMatt Macy if ((err = dmu_read(mos, spa->spa_history, 139eda14cbcSMatt Macy shpp->sh_pool_create_len, sizeof (reclen) - firstread, 140eda14cbcSMatt Macy buf + firstread, DMU_READ_PREFETCH)) != 0) 141eda14cbcSMatt Macy return (err); 142eda14cbcSMatt Macy } 143eda14cbcSMatt Macy 144eda14cbcSMatt Macy reclen = LE_64(*((uint64_t *)buf)); 145eda14cbcSMatt Macy shpp->sh_bof += reclen + sizeof (reclen); 146eda14cbcSMatt Macy shpp->sh_records_lost++; 147eda14cbcSMatt Macy return (0); 148eda14cbcSMatt Macy } 149eda14cbcSMatt Macy 150eda14cbcSMatt Macy static int 151eda14cbcSMatt Macy spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, 152eda14cbcSMatt Macy dmu_tx_t *tx) 153eda14cbcSMatt Macy { 154eda14cbcSMatt Macy uint64_t firstwrite, phys_eof; 155eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 156eda14cbcSMatt Macy int err; 157eda14cbcSMatt Macy 158eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&spa->spa_history_lock)); 159eda14cbcSMatt Macy 160eda14cbcSMatt Macy /* see if we need to reset logical BOF */ 161eda14cbcSMatt Macy while (shpp->sh_phys_max_off - shpp->sh_pool_create_len - 162eda14cbcSMatt Macy (shpp->sh_eof - shpp->sh_bof) <= len) { 163eda14cbcSMatt Macy if ((err = spa_history_advance_bof(spa, shpp)) != 0) { 164eda14cbcSMatt Macy return (err); 165eda14cbcSMatt Macy } 166eda14cbcSMatt Macy } 167eda14cbcSMatt Macy 168eda14cbcSMatt Macy phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); 169eda14cbcSMatt Macy firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof); 170eda14cbcSMatt Macy shpp->sh_eof += len; 171eda14cbcSMatt Macy dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx); 172eda14cbcSMatt Macy 173eda14cbcSMatt Macy len -= firstwrite; 174eda14cbcSMatt Macy if (len > 0) { 175eda14cbcSMatt Macy /* write out the rest at the beginning of physical file */ 176eda14cbcSMatt Macy dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len, 177eda14cbcSMatt Macy len, (char *)buf + firstwrite, tx); 178eda14cbcSMatt Macy } 179eda14cbcSMatt Macy 180eda14cbcSMatt Macy return (0); 181eda14cbcSMatt Macy } 182eda14cbcSMatt Macy 183eda14cbcSMatt Macy /* 184eda14cbcSMatt Macy * Post a history sysevent. 185eda14cbcSMatt Macy * 186eda14cbcSMatt Macy * The nvlist_t* passed into this function will be transformed into a new 187eda14cbcSMatt Macy * nvlist where: 188eda14cbcSMatt Macy * 189eda14cbcSMatt Macy * 1. Nested nvlists will be flattened to a single level 190eda14cbcSMatt Macy * 2. Keys will have their names normalized (to remove any problematic 191eda14cbcSMatt Macy * characters, such as whitespace) 192eda14cbcSMatt Macy * 193eda14cbcSMatt Macy * The nvlist_t passed into this function will duplicated and should be freed 194eda14cbcSMatt Macy * by caller. 195eda14cbcSMatt Macy * 196eda14cbcSMatt Macy */ 197eda14cbcSMatt Macy static void 198eda14cbcSMatt Macy spa_history_log_notify(spa_t *spa, nvlist_t *nvl) 199eda14cbcSMatt Macy { 200eda14cbcSMatt Macy nvlist_t *hist_nvl = fnvlist_alloc(); 201eda14cbcSMatt Macy uint64_t uint64; 2022a58b312SMartin Matuska const char *string; 203eda14cbcSMatt Macy 204eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_CMD, &string) == 0) 205eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_CMD, string); 206eda14cbcSMatt Macy 207eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) 208eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); 209eda14cbcSMatt Macy 210eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_ZONE, &string) == 0) 211eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_ZONE, string); 212eda14cbcSMatt Macy 213eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_HOST, &string) == 0) 214eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_HOST, string); 215eda14cbcSMatt Macy 216eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME, &string) == 0) 217eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_DSNAME, string); 218eda14cbcSMatt Macy 219eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR, &string) == 0) 220eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_STR, string); 221eda14cbcSMatt Macy 222eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL, &string) == 0) 223eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_IOCTL, string); 224eda14cbcSMatt Macy 225eda14cbcSMatt Macy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) 226eda14cbcSMatt Macy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); 227eda14cbcSMatt Macy 228eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID, &uint64) == 0) 229eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_DSID, uint64); 230eda14cbcSMatt Macy 231eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG, &uint64) == 0) 232eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TXG, uint64); 233eda14cbcSMatt Macy 234eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TIME, &uint64) == 0) 235eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TIME, uint64); 236eda14cbcSMatt Macy 237eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_WHO, &uint64) == 0) 238eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_WHO, uint64); 239eda14cbcSMatt Macy 240eda14cbcSMatt Macy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_INT_EVENT, &uint64) == 0) 241eda14cbcSMatt Macy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_INT_EVENT, uint64); 242eda14cbcSMatt Macy 243eda14cbcSMatt Macy spa_event_notify(spa, NULL, hist_nvl, ESC_ZFS_HISTORY_EVENT); 244eda14cbcSMatt Macy 245eda14cbcSMatt Macy nvlist_free(hist_nvl); 246eda14cbcSMatt Macy } 247eda14cbcSMatt Macy 248eda14cbcSMatt Macy /* 249eda14cbcSMatt Macy * Write out a history event. 250eda14cbcSMatt Macy */ 251eda14cbcSMatt Macy static void 252eda14cbcSMatt Macy spa_history_log_sync(void *arg, dmu_tx_t *tx) 253eda14cbcSMatt Macy { 254eda14cbcSMatt Macy nvlist_t *nvl = arg; 255eda14cbcSMatt Macy spa_t *spa = dmu_tx_pool(tx)->dp_spa; 256eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 257eda14cbcSMatt Macy dmu_buf_t *dbp; 258eda14cbcSMatt Macy spa_history_phys_t *shpp; 259eda14cbcSMatt Macy size_t reclen; 260eda14cbcSMatt Macy uint64_t le_len; 261eda14cbcSMatt Macy char *record_packed = NULL; 262eda14cbcSMatt Macy int ret; 263eda14cbcSMatt Macy 264eda14cbcSMatt Macy /* 265eda14cbcSMatt Macy * If we have an older pool that doesn't have a command 266eda14cbcSMatt Macy * history object, create it now. 267eda14cbcSMatt Macy */ 268eda14cbcSMatt Macy mutex_enter(&spa->spa_history_lock); 269eda14cbcSMatt Macy if (!spa->spa_history) 270eda14cbcSMatt Macy spa_history_create_obj(spa, tx); 271eda14cbcSMatt Macy mutex_exit(&spa->spa_history_lock); 272eda14cbcSMatt Macy 273eda14cbcSMatt Macy /* 274eda14cbcSMatt Macy * Get the offset of where we need to write via the bonus buffer. 275eda14cbcSMatt Macy * Update the offset when the write completes. 276eda14cbcSMatt Macy */ 277eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); 278eda14cbcSMatt Macy shpp = dbp->db_data; 279eda14cbcSMatt Macy 280eda14cbcSMatt Macy dmu_buf_will_dirty(dbp, tx); 281eda14cbcSMatt Macy 282eda14cbcSMatt Macy #ifdef ZFS_DEBUG 283eda14cbcSMatt Macy { 284eda14cbcSMatt Macy dmu_object_info_t doi; 285eda14cbcSMatt Macy dmu_object_info_from_db(dbp, &doi); 286eda14cbcSMatt Macy ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); 287eda14cbcSMatt Macy } 288eda14cbcSMatt Macy #endif 289eda14cbcSMatt Macy 290eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname()->nodename); 291eda14cbcSMatt Macy 292eda14cbcSMatt Macy if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { 293eda14cbcSMatt Macy zfs_dbgmsg("command: %s", 294eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); 295eda14cbcSMatt Macy } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { 296eda14cbcSMatt Macy if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { 297eda14cbcSMatt Macy zfs_dbgmsg("txg %lld %s %s (id %llu) %s", 29833b8c039SMartin Matuska (longlong_t)fnvlist_lookup_uint64(nvl, 29933b8c039SMartin Matuska ZPOOL_HIST_TXG), 300eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), 301eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), 30233b8c039SMartin Matuska (u_longlong_t)fnvlist_lookup_uint64(nvl, 30333b8c039SMartin Matuska ZPOOL_HIST_DSID), 304eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); 305eda14cbcSMatt Macy } else { 306eda14cbcSMatt Macy zfs_dbgmsg("txg %lld %s %s", 30733b8c039SMartin Matuska (longlong_t)fnvlist_lookup_uint64(nvl, 30833b8c039SMartin Matuska ZPOOL_HIST_TXG), 309eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), 310eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); 311eda14cbcSMatt Macy } 312eda14cbcSMatt Macy /* 313eda14cbcSMatt Macy * The history sysevent is posted only for internal history 314eda14cbcSMatt Macy * messages to show what has happened, not how it happened. For 315eda14cbcSMatt Macy * example, the following command: 316eda14cbcSMatt Macy * 317eda14cbcSMatt Macy * # zfs destroy -r tank/foo 318eda14cbcSMatt Macy * 319eda14cbcSMatt Macy * will result in one sysevent posted per dataset that is 320eda14cbcSMatt Macy * destroyed as a result of the command - which could be more 321eda14cbcSMatt Macy * than one event in total. By contrast, if the sysevent was 322eda14cbcSMatt Macy * posted as a result of the ZPOOL_HIST_CMD key being present 323eda14cbcSMatt Macy * it would result in only one sysevent being posted with the 324eda14cbcSMatt Macy * full command line arguments, requiring the consumer to know 3257877fdebSMatt Macy * how to parse and understand zfs(8) command invocations. 326eda14cbcSMatt Macy */ 327eda14cbcSMatt Macy spa_history_log_notify(spa, nvl); 328eda14cbcSMatt Macy } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { 329eda14cbcSMatt Macy zfs_dbgmsg("ioctl %s", 330eda14cbcSMatt Macy fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); 331eda14cbcSMatt Macy } 332eda14cbcSMatt Macy 333eda14cbcSMatt Macy VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE, 334eda14cbcSMatt Macy KM_SLEEP), ==, 0); 335eda14cbcSMatt Macy 336eda14cbcSMatt Macy mutex_enter(&spa->spa_history_lock); 337eda14cbcSMatt Macy 338eda14cbcSMatt Macy /* write out the packed length as little endian */ 339eda14cbcSMatt Macy le_len = LE_64((uint64_t)reclen); 340eda14cbcSMatt Macy ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx); 341eda14cbcSMatt Macy if (!ret) 342eda14cbcSMatt Macy ret = spa_history_write(spa, record_packed, reclen, shpp, tx); 343eda14cbcSMatt Macy 344eda14cbcSMatt Macy /* The first command is the create, which we keep forever */ 345eda14cbcSMatt Macy if (ret == 0 && shpp->sh_pool_create_len == 0 && 346eda14cbcSMatt Macy nvlist_exists(nvl, ZPOOL_HIST_CMD)) { 347eda14cbcSMatt Macy shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; 348eda14cbcSMatt Macy } 349eda14cbcSMatt Macy 350eda14cbcSMatt Macy mutex_exit(&spa->spa_history_lock); 351eda14cbcSMatt Macy fnvlist_pack_free(record_packed, reclen); 352eda14cbcSMatt Macy dmu_buf_rele(dbp, FTAG); 353eda14cbcSMatt Macy fnvlist_free(nvl); 354eda14cbcSMatt Macy } 355eda14cbcSMatt Macy 356eda14cbcSMatt Macy /* 357eda14cbcSMatt Macy * Write out a history event. 358eda14cbcSMatt Macy */ 359eda14cbcSMatt Macy int 360eda14cbcSMatt Macy spa_history_log(spa_t *spa, const char *msg) 361eda14cbcSMatt Macy { 362eda14cbcSMatt Macy int err; 363eda14cbcSMatt Macy nvlist_t *nvl = fnvlist_alloc(); 364eda14cbcSMatt Macy 365eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); 366eda14cbcSMatt Macy err = spa_history_log_nvl(spa, nvl); 367eda14cbcSMatt Macy fnvlist_free(nvl); 368eda14cbcSMatt Macy return (err); 369eda14cbcSMatt Macy } 370eda14cbcSMatt Macy 371eda14cbcSMatt Macy int 372eda14cbcSMatt Macy spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) 373eda14cbcSMatt Macy { 374eda14cbcSMatt Macy int err = 0; 375eda14cbcSMatt Macy dmu_tx_t *tx; 376eda14cbcSMatt Macy nvlist_t *nvarg, *in_nvl = NULL; 377eda14cbcSMatt Macy 378eda14cbcSMatt Macy if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) 379eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 380eda14cbcSMatt Macy 381eda14cbcSMatt Macy err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl); 382eda14cbcSMatt Macy if (err == 0) { 383eda14cbcSMatt Macy (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS); 384eda14cbcSMatt Macy } 385eda14cbcSMatt Macy 386eda14cbcSMatt Macy tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 387eda14cbcSMatt Macy err = dmu_tx_assign(tx, TXG_WAIT); 388eda14cbcSMatt Macy if (err) { 389eda14cbcSMatt Macy dmu_tx_abort(tx); 390eda14cbcSMatt Macy return (err); 391eda14cbcSMatt Macy } 392eda14cbcSMatt Macy 393*17aab35aSMartin Matuska ASSERT3UF(tx->tx_txg, <=, spa_final_dirty_txg(spa), 394*17aab35aSMartin Matuska "Logged %s after final txg was set!", "nvlist"); 395*17aab35aSMartin Matuska 396eda14cbcSMatt Macy VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP)); 397eda14cbcSMatt Macy if (spa_history_zone() != NULL) { 398eda14cbcSMatt Macy fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, 399eda14cbcSMatt Macy spa_history_zone()); 400eda14cbcSMatt Macy } 401eda14cbcSMatt Macy fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); 402eda14cbcSMatt Macy 403184c1b94SMartin Matuska /* 404184c1b94SMartin Matuska * Since the history is recorded asynchronously, the effective time is 405184c1b94SMartin Matuska * now, which may be considerably before the change is made on disk. 406184c1b94SMartin Matuska */ 407184c1b94SMartin Matuska fnvlist_add_uint64(nvarg, ZPOOL_HIST_TIME, gethrestime_sec()); 408184c1b94SMartin Matuska 409eda14cbcSMatt Macy /* Kick this off asynchronously; errors are ignored. */ 4102c48331dSMatt Macy dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, tx); 411eda14cbcSMatt Macy dmu_tx_commit(tx); 412eda14cbcSMatt Macy 413eda14cbcSMatt Macy /* spa_history_log_sync will free nvl */ 414eda14cbcSMatt Macy return (err); 415eda14cbcSMatt Macy } 416eda14cbcSMatt Macy 417eda14cbcSMatt Macy /* 418eda14cbcSMatt Macy * Read out the command history. 419eda14cbcSMatt Macy */ 420eda14cbcSMatt Macy int 421eda14cbcSMatt Macy spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) 422eda14cbcSMatt Macy { 423eda14cbcSMatt Macy objset_t *mos = spa->spa_meta_objset; 424eda14cbcSMatt Macy dmu_buf_t *dbp; 425eda14cbcSMatt Macy uint64_t read_len, phys_read_off, phys_eof; 426eda14cbcSMatt Macy uint64_t leftover = 0; 427eda14cbcSMatt Macy spa_history_phys_t *shpp; 428eda14cbcSMatt Macy int err; 429eda14cbcSMatt Macy 430eda14cbcSMatt Macy /* 431eda14cbcSMatt Macy * If the command history doesn't exist (older pool), 432eda14cbcSMatt Macy * that's ok, just return ENOENT. 433eda14cbcSMatt Macy */ 434eda14cbcSMatt Macy if (!spa->spa_history) 435eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 436eda14cbcSMatt Macy 437eda14cbcSMatt Macy /* 438eda14cbcSMatt Macy * The history is logged asynchronously, so when they request 439eda14cbcSMatt Macy * the first chunk of history, make sure everything has been 440eda14cbcSMatt Macy * synced to disk so that we get it. 441eda14cbcSMatt Macy */ 442eda14cbcSMatt Macy if (*offp == 0 && spa_writeable(spa)) 443eda14cbcSMatt Macy txg_wait_synced(spa_get_dsl(spa), 0); 444eda14cbcSMatt Macy 445eda14cbcSMatt Macy if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0) 446eda14cbcSMatt Macy return (err); 447eda14cbcSMatt Macy shpp = dbp->db_data; 448eda14cbcSMatt Macy 449eda14cbcSMatt Macy #ifdef ZFS_DEBUG 450eda14cbcSMatt Macy { 451eda14cbcSMatt Macy dmu_object_info_t doi; 452eda14cbcSMatt Macy dmu_object_info_from_db(dbp, &doi); 453eda14cbcSMatt Macy ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); 454eda14cbcSMatt Macy } 455eda14cbcSMatt Macy #endif 456eda14cbcSMatt Macy 457eda14cbcSMatt Macy mutex_enter(&spa->spa_history_lock); 458eda14cbcSMatt Macy phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); 459eda14cbcSMatt Macy 460eda14cbcSMatt Macy if (*offp < shpp->sh_pool_create_len) { 461eda14cbcSMatt Macy /* read in just the zpool create history */ 462eda14cbcSMatt Macy phys_read_off = *offp; 463eda14cbcSMatt Macy read_len = MIN(*len, shpp->sh_pool_create_len - 464eda14cbcSMatt Macy phys_read_off); 465eda14cbcSMatt Macy } else { 466eda14cbcSMatt Macy /* 467eda14cbcSMatt Macy * Need to reset passed in offset to BOF if the passed in 468eda14cbcSMatt Macy * offset has since been overwritten. 469eda14cbcSMatt Macy */ 470eda14cbcSMatt Macy *offp = MAX(*offp, shpp->sh_bof); 471eda14cbcSMatt Macy phys_read_off = spa_history_log_to_phys(*offp, shpp); 472eda14cbcSMatt Macy 473eda14cbcSMatt Macy /* 474eda14cbcSMatt Macy * Read up to the minimum of what the user passed down or 475eda14cbcSMatt Macy * the EOF (physical or logical). If we hit physical EOF, 476eda14cbcSMatt Macy * use 'leftover' to read from the physical BOF. 477eda14cbcSMatt Macy */ 478eda14cbcSMatt Macy if (phys_read_off <= phys_eof) { 479eda14cbcSMatt Macy read_len = MIN(*len, phys_eof - phys_read_off); 480eda14cbcSMatt Macy } else { 481eda14cbcSMatt Macy read_len = MIN(*len, 482eda14cbcSMatt Macy shpp->sh_phys_max_off - phys_read_off); 483eda14cbcSMatt Macy if (phys_read_off + *len > shpp->sh_phys_max_off) { 484eda14cbcSMatt Macy leftover = MIN(*len - read_len, 485eda14cbcSMatt Macy phys_eof - shpp->sh_pool_create_len); 486eda14cbcSMatt Macy } 487eda14cbcSMatt Macy } 488eda14cbcSMatt Macy } 489eda14cbcSMatt Macy 490eda14cbcSMatt Macy /* offset for consumer to use next */ 491eda14cbcSMatt Macy *offp += read_len + leftover; 492eda14cbcSMatt Macy 493eda14cbcSMatt Macy /* tell the consumer how much you actually read */ 494eda14cbcSMatt Macy *len = read_len + leftover; 495eda14cbcSMatt Macy 496eda14cbcSMatt Macy if (read_len == 0) { 497eda14cbcSMatt Macy mutex_exit(&spa->spa_history_lock); 498eda14cbcSMatt Macy dmu_buf_rele(dbp, FTAG); 499eda14cbcSMatt Macy return (0); 500eda14cbcSMatt Macy } 501eda14cbcSMatt Macy 502eda14cbcSMatt Macy err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf, 503eda14cbcSMatt Macy DMU_READ_PREFETCH); 504eda14cbcSMatt Macy if (leftover && err == 0) { 505eda14cbcSMatt Macy err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len, 506eda14cbcSMatt Macy leftover, buf + read_len, DMU_READ_PREFETCH); 507eda14cbcSMatt Macy } 508eda14cbcSMatt Macy mutex_exit(&spa->spa_history_lock); 509eda14cbcSMatt Macy 510eda14cbcSMatt Macy dmu_buf_rele(dbp, FTAG); 511eda14cbcSMatt Macy return (err); 512eda14cbcSMatt Macy } 513eda14cbcSMatt Macy 514eda14cbcSMatt Macy /* 515eda14cbcSMatt Macy * The nvlist will be consumed by this call. 516eda14cbcSMatt Macy */ 517eda14cbcSMatt Macy static void 518eda14cbcSMatt Macy log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, 519eda14cbcSMatt Macy dmu_tx_t *tx, const char *fmt, va_list adx) 520eda14cbcSMatt Macy { 521eda14cbcSMatt Macy char *msg; 522eda14cbcSMatt Macy 523eda14cbcSMatt Macy /* 524eda14cbcSMatt Macy * If this is part of creating a pool, not everything is 525eda14cbcSMatt Macy * initialized yet, so don't bother logging the internal events. 526eda14cbcSMatt Macy * Likewise if the pool is not writeable. 527eda14cbcSMatt Macy */ 528eda14cbcSMatt Macy if (spa_is_initializing(spa) || !spa_writeable(spa)) { 529eda14cbcSMatt Macy fnvlist_free(nvl); 530eda14cbcSMatt Macy return; 531eda14cbcSMatt Macy } 532eda14cbcSMatt Macy 533*17aab35aSMartin Matuska ASSERT3UF(tx->tx_txg, <=, spa_final_dirty_txg(spa), 534*17aab35aSMartin Matuska "Logged after final txg was set: %s %s", operation, fmt); 535*17aab35aSMartin Matuska 536eda14cbcSMatt Macy msg = kmem_vasprintf(fmt, adx); 537eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); 538eda14cbcSMatt Macy kmem_strfree(msg); 539eda14cbcSMatt Macy 540eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); 541eda14cbcSMatt Macy fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); 542184c1b94SMartin Matuska fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); 543eda14cbcSMatt Macy 544eda14cbcSMatt Macy if (dmu_tx_is_syncing(tx)) { 545eda14cbcSMatt Macy spa_history_log_sync(nvl, tx); 546eda14cbcSMatt Macy } else { 547eda14cbcSMatt Macy dsl_sync_task_nowait(spa_get_dsl(spa), 5482c48331dSMatt Macy spa_history_log_sync, nvl, tx); 549eda14cbcSMatt Macy } 550eda14cbcSMatt Macy /* spa_history_log_sync() will free nvl */ 551eda14cbcSMatt Macy } 552eda14cbcSMatt Macy 553eda14cbcSMatt Macy void 554eda14cbcSMatt Macy spa_history_log_internal(spa_t *spa, const char *operation, 555eda14cbcSMatt Macy dmu_tx_t *tx, const char *fmt, ...) 556eda14cbcSMatt Macy { 557eda14cbcSMatt Macy dmu_tx_t *htx = tx; 558eda14cbcSMatt Macy va_list adx; 559eda14cbcSMatt Macy 560eda14cbcSMatt Macy /* create a tx if we didn't get one */ 561eda14cbcSMatt Macy if (tx == NULL) { 562eda14cbcSMatt Macy htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 563eda14cbcSMatt Macy if (dmu_tx_assign(htx, TXG_WAIT) != 0) { 564eda14cbcSMatt Macy dmu_tx_abort(htx); 565eda14cbcSMatt Macy return; 566eda14cbcSMatt Macy } 567eda14cbcSMatt Macy } 568eda14cbcSMatt Macy 569eda14cbcSMatt Macy va_start(adx, fmt); 570eda14cbcSMatt Macy log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx); 571eda14cbcSMatt Macy va_end(adx); 572eda14cbcSMatt Macy 573eda14cbcSMatt Macy /* if we didn't get a tx from the caller, commit the one we made */ 574eda14cbcSMatt Macy if (tx == NULL) 575eda14cbcSMatt Macy dmu_tx_commit(htx); 576eda14cbcSMatt Macy } 577eda14cbcSMatt Macy 578eda14cbcSMatt Macy void 579eda14cbcSMatt Macy spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, 580eda14cbcSMatt Macy dmu_tx_t *tx, const char *fmt, ...) 581eda14cbcSMatt Macy { 582eda14cbcSMatt Macy va_list adx; 583eda14cbcSMatt Macy char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 584eda14cbcSMatt Macy nvlist_t *nvl = fnvlist_alloc(); 585eda14cbcSMatt Macy 586eda14cbcSMatt Macy ASSERT(tx != NULL); 587eda14cbcSMatt Macy 588eda14cbcSMatt Macy dsl_dataset_name(ds, namebuf); 589eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); 590eda14cbcSMatt Macy fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); 591eda14cbcSMatt Macy 592eda14cbcSMatt Macy va_start(adx, fmt); 593eda14cbcSMatt Macy log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); 594eda14cbcSMatt Macy va_end(adx); 595eda14cbcSMatt Macy } 596eda14cbcSMatt Macy 597eda14cbcSMatt Macy void 598eda14cbcSMatt Macy spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, 599eda14cbcSMatt Macy dmu_tx_t *tx, const char *fmt, ...) 600eda14cbcSMatt Macy { 601eda14cbcSMatt Macy va_list adx; 602eda14cbcSMatt Macy char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 603eda14cbcSMatt Macy nvlist_t *nvl = fnvlist_alloc(); 604eda14cbcSMatt Macy 605eda14cbcSMatt Macy ASSERT(tx != NULL); 606eda14cbcSMatt Macy 607eda14cbcSMatt Macy dsl_dir_name(dd, namebuf); 608eda14cbcSMatt Macy fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); 609eda14cbcSMatt Macy fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, 610eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_head_dataset_obj); 611eda14cbcSMatt Macy 612eda14cbcSMatt Macy va_start(adx, fmt); 613eda14cbcSMatt Macy log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); 614eda14cbcSMatt Macy va_end(adx); 615eda14cbcSMatt Macy } 616eda14cbcSMatt Macy 617eda14cbcSMatt Macy void 618eda14cbcSMatt Macy spa_history_log_version(spa_t *spa, const char *operation, dmu_tx_t *tx) 619eda14cbcSMatt Macy { 620eda14cbcSMatt Macy utsname_t *u = utsname(); 621eda14cbcSMatt Macy 622eda14cbcSMatt Macy spa_history_log_internal(spa, operation, tx, 623eda14cbcSMatt Macy "pool version %llu; software version %s; uts %s %s %s %s", 624eda14cbcSMatt Macy (u_longlong_t)spa_version(spa), ZFS_META_GITREV, 625eda14cbcSMatt Macy u->nodename, u->release, u->version, u->machine); 626eda14cbcSMatt Macy } 627eda14cbcSMatt Macy 628eda14cbcSMatt Macy #ifndef _KERNEL 629eda14cbcSMatt Macy const char * 630eda14cbcSMatt Macy spa_history_zone(void) 631eda14cbcSMatt Macy { 632eda14cbcSMatt Macy return (NULL); 633eda14cbcSMatt Macy } 634eda14cbcSMatt Macy #endif 635eda14cbcSMatt Macy 636eda14cbcSMatt Macy #if defined(_KERNEL) 637eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_create_obj); 638eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_get); 639eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_log); 640eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_log_internal); 641eda14cbcSMatt Macy EXPORT_SYMBOL(spa_history_log_version); 642eda14cbcSMatt Macy #endif 643