1eac7052fSMatt Macy /* 2eac7052fSMatt Macy * CDDL HEADER START 3eac7052fSMatt Macy * 4eac7052fSMatt Macy * The contents of this file are subject to the terms of the 5eac7052fSMatt Macy * Common Development and Distribution License (the "License"). 6eac7052fSMatt Macy * You may not use this file except in compliance with the License. 7eac7052fSMatt Macy * 8eac7052fSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eac7052fSMatt Macy * See the License for the specific language governing permissions 11eac7052fSMatt Macy * and limitations under the License. 12eac7052fSMatt Macy * 13eac7052fSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eac7052fSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eac7052fSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eac7052fSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eac7052fSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eac7052fSMatt Macy * 19eac7052fSMatt Macy * CDDL HEADER END 20eac7052fSMatt Macy */ 21eac7052fSMatt Macy 22eac7052fSMatt Macy #include <sys/zfs_context.h> 23eac7052fSMatt Macy #include <sys/spa_impl.h> 24eac7052fSMatt Macy #include <sys/vdev_impl.h> 25eac7052fSMatt Macy #include <sys/spa.h> 26eac7052fSMatt Macy #include <zfs_comutil.h> 27eac7052fSMatt Macy 28eac7052fSMatt Macy /* 29eac7052fSMatt Macy * Keeps stats on last N reads per spa_t, disabled by default. 30eac7052fSMatt Macy */ 31be181ee2SMartin Matuska static uint_t zfs_read_history = B_FALSE; 32eac7052fSMatt Macy 33eac7052fSMatt Macy /* 34eac7052fSMatt Macy * Include cache hits in history, disabled by default. 35eac7052fSMatt Macy */ 36e92ffd9bSMartin Matuska static int zfs_read_history_hits = B_FALSE; 37eac7052fSMatt Macy 38eac7052fSMatt Macy /* 39eac7052fSMatt Macy * Keeps stats on the last 100 txgs by default. 40eac7052fSMatt Macy */ 41be181ee2SMartin Matuska static uint_t zfs_txg_history = 100; 42eac7052fSMatt Macy 43eac7052fSMatt Macy /* 44eac7052fSMatt Macy * Keeps stats on the last N MMP updates, disabled by default. 45eac7052fSMatt Macy */ 46be181ee2SMartin Matuska static uint_t zfs_multihost_history = B_FALSE; 47eac7052fSMatt Macy 48eac7052fSMatt Macy /* 49eac7052fSMatt Macy * ========================================================================== 50eac7052fSMatt Macy * SPA Read History Routines 51eac7052fSMatt Macy * ========================================================================== 52eac7052fSMatt Macy */ 53eac7052fSMatt Macy 54eac7052fSMatt Macy /* 55eac7052fSMatt Macy * Read statistics - Information exported regarding each arc_read call 56eac7052fSMatt Macy */ 57eac7052fSMatt Macy typedef struct spa_read_history { 58eac7052fSMatt Macy hrtime_t start; /* time read completed */ 59eac7052fSMatt Macy uint64_t objset; /* read from this objset */ 60eac7052fSMatt Macy uint64_t object; /* read of this object number */ 61eac7052fSMatt Macy uint64_t level; /* block's indirection level */ 62eac7052fSMatt Macy uint64_t blkid; /* read of this block id */ 63eac7052fSMatt Macy char origin[24]; /* read originated from here */ 64eac7052fSMatt Macy uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */ 65eac7052fSMatt Macy pid_t pid; /* PID of task doing read */ 66eac7052fSMatt Macy char comm[16]; /* process name of task doing read */ 67eac7052fSMatt Macy procfs_list_node_t srh_node; 68eac7052fSMatt Macy } spa_read_history_t; 69eac7052fSMatt Macy 70eac7052fSMatt Macy static int 71eac7052fSMatt Macy spa_read_history_show_header(struct seq_file *f) 72eac7052fSMatt Macy { 73eac7052fSMatt Macy seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s " 74eac7052fSMatt Macy "%-24s %-8s %-16s\n", "UID", "start", "objset", "object", 75eac7052fSMatt Macy "level", "blkid", "aflags", "origin", "pid", "process"); 76eac7052fSMatt Macy 77eac7052fSMatt Macy return (0); 78eac7052fSMatt Macy } 79eac7052fSMatt Macy 80eac7052fSMatt Macy static int 81eac7052fSMatt Macy spa_read_history_show(struct seq_file *f, void *data) 82eac7052fSMatt Macy { 83eac7052fSMatt Macy spa_read_history_t *srh = (spa_read_history_t *)data; 84eac7052fSMatt Macy 85eac7052fSMatt Macy seq_printf(f, "%-8llu %-16llu 0x%-6llx " 86eac7052fSMatt Macy "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n", 87eac7052fSMatt Macy (u_longlong_t)srh->srh_node.pln_id, srh->start, 88eac7052fSMatt Macy (longlong_t)srh->objset, (longlong_t)srh->object, 89eac7052fSMatt Macy (longlong_t)srh->level, (longlong_t)srh->blkid, 90eac7052fSMatt Macy srh->aflags, srh->origin, srh->pid, srh->comm); 91eac7052fSMatt Macy 92eac7052fSMatt Macy return (0); 93eac7052fSMatt Macy } 94eac7052fSMatt Macy 95eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */ 96eac7052fSMatt Macy static void 97eac7052fSMatt Macy spa_read_history_truncate(spa_history_list_t *shl, unsigned int size) 98eac7052fSMatt Macy { 99eac7052fSMatt Macy spa_read_history_t *srh; 100eac7052fSMatt Macy while (shl->size > size) { 101eac7052fSMatt Macy srh = list_remove_head(&shl->procfs_list.pl_list); 102eac7052fSMatt Macy ASSERT3P(srh, !=, NULL); 103eac7052fSMatt Macy kmem_free(srh, sizeof (spa_read_history_t)); 104eac7052fSMatt Macy shl->size--; 105eac7052fSMatt Macy } 106eac7052fSMatt Macy 107eac7052fSMatt Macy if (size == 0) 108eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list)); 109eac7052fSMatt Macy } 110eac7052fSMatt Macy 111eac7052fSMatt Macy static int 112eac7052fSMatt Macy spa_read_history_clear(procfs_list_t *procfs_list) 113eac7052fSMatt Macy { 114eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private; 115eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock); 116eac7052fSMatt Macy spa_read_history_truncate(shl, 0); 117eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock); 118eac7052fSMatt Macy return (0); 119eac7052fSMatt Macy } 120eac7052fSMatt Macy 121eac7052fSMatt Macy static void 122eac7052fSMatt Macy spa_read_history_init(spa_t *spa) 123eac7052fSMatt Macy { 124eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history; 125eac7052fSMatt Macy 126eac7052fSMatt Macy shl->size = 0; 127eac7052fSMatt Macy shl->procfs_list.pl_private = shl; 128c40487d4SMatt Macy procfs_list_install("zfs", 129c40487d4SMatt Macy spa_name(spa), 130eac7052fSMatt Macy "reads", 131eac7052fSMatt Macy 0600, 132eac7052fSMatt Macy &shl->procfs_list, 133eac7052fSMatt Macy spa_read_history_show, 134eac7052fSMatt Macy spa_read_history_show_header, 135eac7052fSMatt Macy spa_read_history_clear, 136eac7052fSMatt Macy offsetof(spa_read_history_t, srh_node)); 137eac7052fSMatt Macy } 138eac7052fSMatt Macy 139eac7052fSMatt Macy static void 140eac7052fSMatt Macy spa_read_history_destroy(spa_t *spa) 141eac7052fSMatt Macy { 142eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history; 143eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list); 144eac7052fSMatt Macy spa_read_history_truncate(shl, 0); 145eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list); 146eac7052fSMatt Macy } 147eac7052fSMatt Macy 148eac7052fSMatt Macy void 149eac7052fSMatt Macy spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags) 150eac7052fSMatt Macy { 151eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history; 152eac7052fSMatt Macy spa_read_history_t *srh; 153eac7052fSMatt Macy 154eac7052fSMatt Macy ASSERT3P(spa, !=, NULL); 155eac7052fSMatt Macy ASSERT3P(zb, !=, NULL); 156eac7052fSMatt Macy 157eac7052fSMatt Macy if (zfs_read_history == 0 && shl->size == 0) 158eac7052fSMatt Macy return; 159eac7052fSMatt Macy 160eac7052fSMatt Macy if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED)) 161eac7052fSMatt Macy return; 162eac7052fSMatt Macy 163eac7052fSMatt Macy srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP); 164eac7052fSMatt Macy strlcpy(srh->comm, getcomm(), sizeof (srh->comm)); 165eac7052fSMatt Macy srh->start = gethrtime(); 166eac7052fSMatt Macy srh->objset = zb->zb_objset; 167eac7052fSMatt Macy srh->object = zb->zb_object; 168eac7052fSMatt Macy srh->level = zb->zb_level; 169eac7052fSMatt Macy srh->blkid = zb->zb_blkid; 170eac7052fSMatt Macy srh->aflags = aflags; 171eac7052fSMatt Macy srh->pid = getpid(); 172eac7052fSMatt Macy 173eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock); 174eac7052fSMatt Macy 175eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, srh); 176eac7052fSMatt Macy shl->size++; 177eac7052fSMatt Macy 178eac7052fSMatt Macy spa_read_history_truncate(shl, zfs_read_history); 179eac7052fSMatt Macy 180eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock); 181eac7052fSMatt Macy } 182eac7052fSMatt Macy 183eac7052fSMatt Macy /* 184eac7052fSMatt Macy * ========================================================================== 185eac7052fSMatt Macy * SPA TXG History Routines 186eac7052fSMatt Macy * ========================================================================== 187eac7052fSMatt Macy */ 188eac7052fSMatt Macy 189eac7052fSMatt Macy /* 190eac7052fSMatt Macy * Txg statistics - Information exported regarding each txg sync 191eac7052fSMatt Macy */ 192eac7052fSMatt Macy 193eac7052fSMatt Macy typedef struct spa_txg_history { 194eac7052fSMatt Macy uint64_t txg; /* txg id */ 195eac7052fSMatt Macy txg_state_t state; /* active txg state */ 196eac7052fSMatt Macy uint64_t nread; /* number of bytes read */ 197eac7052fSMatt Macy uint64_t nwritten; /* number of bytes written */ 198eac7052fSMatt Macy uint64_t reads; /* number of read operations */ 199eac7052fSMatt Macy uint64_t writes; /* number of write operations */ 200eac7052fSMatt Macy uint64_t ndirty; /* number of dirty bytes */ 201eac7052fSMatt Macy hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */ 202eac7052fSMatt Macy procfs_list_node_t sth_node; 203eac7052fSMatt Macy } spa_txg_history_t; 204eac7052fSMatt Macy 205eac7052fSMatt Macy static int 206eac7052fSMatt Macy spa_txg_history_show_header(struct seq_file *f) 207eac7052fSMatt Macy { 208eac7052fSMatt Macy seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s " 209eac7052fSMatt Macy "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state", 210eac7052fSMatt Macy "ndirty", "nread", "nwritten", "reads", "writes", 211eac7052fSMatt Macy "otime", "qtime", "wtime", "stime"); 212eac7052fSMatt Macy return (0); 213eac7052fSMatt Macy } 214eac7052fSMatt Macy 215eac7052fSMatt Macy static int 216eac7052fSMatt Macy spa_txg_history_show(struct seq_file *f, void *data) 217eac7052fSMatt Macy { 218eac7052fSMatt Macy spa_txg_history_t *sth = (spa_txg_history_t *)data; 219eac7052fSMatt Macy uint64_t open = 0, quiesce = 0, wait = 0, sync = 0; 220eac7052fSMatt Macy char state; 221eac7052fSMatt Macy 222eac7052fSMatt Macy switch (sth->state) { 223eac7052fSMatt Macy case TXG_STATE_BIRTH: state = 'B'; break; 224eac7052fSMatt Macy case TXG_STATE_OPEN: state = 'O'; break; 225eac7052fSMatt Macy case TXG_STATE_QUIESCED: state = 'Q'; break; 226eac7052fSMatt Macy case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break; 227eac7052fSMatt Macy case TXG_STATE_SYNCED: state = 'S'; break; 228eac7052fSMatt Macy case TXG_STATE_COMMITTED: state = 'C'; break; 229eac7052fSMatt Macy default: state = '?'; break; 230eac7052fSMatt Macy } 231eac7052fSMatt Macy 232eac7052fSMatt Macy if (sth->times[TXG_STATE_OPEN]) 233eac7052fSMatt Macy open = sth->times[TXG_STATE_OPEN] - 234eac7052fSMatt Macy sth->times[TXG_STATE_BIRTH]; 235eac7052fSMatt Macy 236eac7052fSMatt Macy if (sth->times[TXG_STATE_QUIESCED]) 237eac7052fSMatt Macy quiesce = sth->times[TXG_STATE_QUIESCED] - 238eac7052fSMatt Macy sth->times[TXG_STATE_OPEN]; 239eac7052fSMatt Macy 240eac7052fSMatt Macy if (sth->times[TXG_STATE_WAIT_FOR_SYNC]) 241eac7052fSMatt Macy wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] - 242eac7052fSMatt Macy sth->times[TXG_STATE_QUIESCED]; 243eac7052fSMatt Macy 244eac7052fSMatt Macy if (sth->times[TXG_STATE_SYNCED]) 245eac7052fSMatt Macy sync = sth->times[TXG_STATE_SYNCED] - 246eac7052fSMatt Macy sth->times[TXG_STATE_WAIT_FOR_SYNC]; 247eac7052fSMatt Macy 248eac7052fSMatt Macy seq_printf(f, "%-8llu %-16llu %-5c %-12llu " 249eac7052fSMatt Macy "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n", 250eac7052fSMatt Macy (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state, 251eac7052fSMatt Macy (u_longlong_t)sth->ndirty, 252eac7052fSMatt Macy (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten, 253eac7052fSMatt Macy (u_longlong_t)sth->reads, (u_longlong_t)sth->writes, 254eac7052fSMatt Macy (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait, 255eac7052fSMatt Macy (u_longlong_t)sync); 256eac7052fSMatt Macy 257eac7052fSMatt Macy return (0); 258eac7052fSMatt Macy } 259eac7052fSMatt Macy 260eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */ 261eac7052fSMatt Macy static void 262eac7052fSMatt Macy spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size) 263eac7052fSMatt Macy { 264eac7052fSMatt Macy spa_txg_history_t *sth; 265eac7052fSMatt Macy while (shl->size > size) { 266eac7052fSMatt Macy sth = list_remove_head(&shl->procfs_list.pl_list); 267eac7052fSMatt Macy ASSERT3P(sth, !=, NULL); 268eac7052fSMatt Macy kmem_free(sth, sizeof (spa_txg_history_t)); 269eac7052fSMatt Macy shl->size--; 270eac7052fSMatt Macy } 271eac7052fSMatt Macy 272eac7052fSMatt Macy if (size == 0) 273eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list)); 274eac7052fSMatt Macy 275eac7052fSMatt Macy } 276eac7052fSMatt Macy 277eac7052fSMatt Macy static int 278eac7052fSMatt Macy spa_txg_history_clear(procfs_list_t *procfs_list) 279eac7052fSMatt Macy { 280eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private; 281eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock); 282eac7052fSMatt Macy spa_txg_history_truncate(shl, 0); 283eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock); 284eac7052fSMatt Macy return (0); 285eac7052fSMatt Macy } 286eac7052fSMatt Macy 287eac7052fSMatt Macy static void 288eac7052fSMatt Macy spa_txg_history_init(spa_t *spa) 289eac7052fSMatt Macy { 290eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history; 291eac7052fSMatt Macy 292eac7052fSMatt Macy shl->size = 0; 293eac7052fSMatt Macy shl->procfs_list.pl_private = shl; 294c40487d4SMatt Macy procfs_list_install("zfs", 295c40487d4SMatt Macy spa_name(spa), 296eac7052fSMatt Macy "txgs", 297eac7052fSMatt Macy 0644, 298eac7052fSMatt Macy &shl->procfs_list, 299eac7052fSMatt Macy spa_txg_history_show, 300eac7052fSMatt Macy spa_txg_history_show_header, 301eac7052fSMatt Macy spa_txg_history_clear, 302eac7052fSMatt Macy offsetof(spa_txg_history_t, sth_node)); 303eac7052fSMatt Macy } 304eac7052fSMatt Macy 305eac7052fSMatt Macy static void 306eac7052fSMatt Macy spa_txg_history_destroy(spa_t *spa) 307eac7052fSMatt Macy { 308eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history; 309eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list); 310eac7052fSMatt Macy spa_txg_history_truncate(shl, 0); 311eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list); 312eac7052fSMatt Macy } 313eac7052fSMatt Macy 314eac7052fSMatt Macy /* 315eac7052fSMatt Macy * Add a new txg to historical record. 316eac7052fSMatt Macy */ 317eac7052fSMatt Macy void 318eac7052fSMatt Macy spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time) 319eac7052fSMatt Macy { 320eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history; 321eac7052fSMatt Macy spa_txg_history_t *sth; 322eac7052fSMatt Macy 323eac7052fSMatt Macy if (zfs_txg_history == 0 && shl->size == 0) 324eac7052fSMatt Macy return; 325eac7052fSMatt Macy 326eac7052fSMatt Macy sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP); 327eac7052fSMatt Macy sth->txg = txg; 328eac7052fSMatt Macy sth->state = TXG_STATE_OPEN; 329eac7052fSMatt Macy sth->times[TXG_STATE_BIRTH] = birth_time; 330eac7052fSMatt Macy 331eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock); 332eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, sth); 333eac7052fSMatt Macy shl->size++; 334eac7052fSMatt Macy spa_txg_history_truncate(shl, zfs_txg_history); 335eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock); 336eac7052fSMatt Macy } 337eac7052fSMatt Macy 338eac7052fSMatt Macy /* 339eac7052fSMatt Macy * Set txg state completion time and increment current state. 340eac7052fSMatt Macy */ 341eac7052fSMatt Macy int 342eac7052fSMatt Macy spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state, 343eac7052fSMatt Macy hrtime_t completed_time) 344eac7052fSMatt Macy { 345eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history; 346eac7052fSMatt Macy spa_txg_history_t *sth; 347eac7052fSMatt Macy int error = ENOENT; 348eac7052fSMatt Macy 349eac7052fSMatt Macy if (zfs_txg_history == 0) 350eac7052fSMatt Macy return (0); 351eac7052fSMatt Macy 352eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock); 353eac7052fSMatt Macy for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL; 354eac7052fSMatt Macy sth = list_prev(&shl->procfs_list.pl_list, sth)) { 355eac7052fSMatt Macy if (sth->txg == txg) { 356eac7052fSMatt Macy sth->times[completed_state] = completed_time; 357eac7052fSMatt Macy sth->state++; 358eac7052fSMatt Macy error = 0; 359eac7052fSMatt Macy break; 360eac7052fSMatt Macy } 361eac7052fSMatt Macy } 362eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock); 363eac7052fSMatt Macy 364eac7052fSMatt Macy return (error); 365eac7052fSMatt Macy } 366eac7052fSMatt Macy 367eac7052fSMatt Macy /* 368eac7052fSMatt Macy * Set txg IO stats. 369eac7052fSMatt Macy */ 370eac7052fSMatt Macy static int 371eac7052fSMatt Macy spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread, 372eac7052fSMatt Macy uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty) 373eac7052fSMatt Macy { 374eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history; 375eac7052fSMatt Macy spa_txg_history_t *sth; 376eac7052fSMatt Macy int error = ENOENT; 377eac7052fSMatt Macy 378eac7052fSMatt Macy if (zfs_txg_history == 0) 379eac7052fSMatt Macy return (0); 380eac7052fSMatt Macy 381eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock); 382eac7052fSMatt Macy for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL; 383eac7052fSMatt Macy sth = list_prev(&shl->procfs_list.pl_list, sth)) { 384eac7052fSMatt Macy if (sth->txg == txg) { 385eac7052fSMatt Macy sth->nread = nread; 386eac7052fSMatt Macy sth->nwritten = nwritten; 387eac7052fSMatt Macy sth->reads = reads; 388eac7052fSMatt Macy sth->writes = writes; 389eac7052fSMatt Macy sth->ndirty = ndirty; 390eac7052fSMatt Macy error = 0; 391eac7052fSMatt Macy break; 392eac7052fSMatt Macy } 393eac7052fSMatt Macy } 394eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock); 395eac7052fSMatt Macy 396eac7052fSMatt Macy return (error); 397eac7052fSMatt Macy } 398eac7052fSMatt Macy 399eac7052fSMatt Macy txg_stat_t * 400eac7052fSMatt Macy spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp) 401eac7052fSMatt Macy { 402eac7052fSMatt Macy txg_stat_t *ts; 403eac7052fSMatt Macy 404eac7052fSMatt Macy if (zfs_txg_history == 0) 405eac7052fSMatt Macy return (NULL); 406eac7052fSMatt Macy 407eac7052fSMatt Macy ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP); 408eac7052fSMatt Macy 409eac7052fSMatt Macy spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 410eac7052fSMatt Macy vdev_get_stats(spa->spa_root_vdev, &ts->vs1); 411eac7052fSMatt Macy spa_config_exit(spa, SCL_CONFIG, FTAG); 412eac7052fSMatt Macy 413eac7052fSMatt Macy ts->txg = txg; 414eac7052fSMatt Macy ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK]; 415eac7052fSMatt Macy 416eac7052fSMatt Macy spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime()); 417eac7052fSMatt Macy 418eac7052fSMatt Macy return (ts); 419eac7052fSMatt Macy } 420eac7052fSMatt Macy 421eac7052fSMatt Macy void 422eac7052fSMatt Macy spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts) 423eac7052fSMatt Macy { 424eac7052fSMatt Macy if (ts == NULL) 425eac7052fSMatt Macy return; 426eac7052fSMatt Macy 427eac7052fSMatt Macy if (zfs_txg_history == 0) { 428eac7052fSMatt Macy kmem_free(ts, sizeof (txg_stat_t)); 429eac7052fSMatt Macy return; 430eac7052fSMatt Macy } 431eac7052fSMatt Macy 432eac7052fSMatt Macy spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 433eac7052fSMatt Macy vdev_get_stats(spa->spa_root_vdev, &ts->vs2); 434eac7052fSMatt Macy spa_config_exit(spa, SCL_CONFIG, FTAG); 435eac7052fSMatt Macy 436eac7052fSMatt Macy spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime()); 437eac7052fSMatt Macy spa_txg_history_set_io(spa, ts->txg, 438eac7052fSMatt Macy ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ], 439eac7052fSMatt Macy ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE], 440eac7052fSMatt Macy ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ], 441eac7052fSMatt Macy ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE], 442eac7052fSMatt Macy ts->ndirty); 443eac7052fSMatt Macy 444eac7052fSMatt Macy kmem_free(ts, sizeof (txg_stat_t)); 445eac7052fSMatt Macy } 446eac7052fSMatt Macy 447eac7052fSMatt Macy /* 448eac7052fSMatt Macy * ========================================================================== 449eac7052fSMatt Macy * SPA TX Assign Histogram Routines 450eac7052fSMatt Macy * ========================================================================== 451eac7052fSMatt Macy */ 452eac7052fSMatt Macy 453eac7052fSMatt Macy /* 454eac7052fSMatt Macy * Tx statistics - Information exported regarding dmu_tx_assign time. 455eac7052fSMatt Macy */ 456eac7052fSMatt Macy 457eac7052fSMatt Macy /* 458eac7052fSMatt Macy * When the kstat is written zero all buckets. When the kstat is read 459eac7052fSMatt Macy * count the number of trailing buckets set to zero and update ks_ndata 460eac7052fSMatt Macy * such that they are not output. 461eac7052fSMatt Macy */ 462eac7052fSMatt Macy static int 463eac7052fSMatt Macy spa_tx_assign_update(kstat_t *ksp, int rw) 464eac7052fSMatt Macy { 465eac7052fSMatt Macy spa_t *spa = ksp->ks_private; 466eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram; 467eac7052fSMatt Macy int i; 468eac7052fSMatt Macy 469eac7052fSMatt Macy if (rw == KSTAT_WRITE) { 470eac7052fSMatt Macy for (i = 0; i < shk->count; i++) 471eac7052fSMatt Macy ((kstat_named_t *)shk->priv)[i].value.ui64 = 0; 472eac7052fSMatt Macy } 473eac7052fSMatt Macy 474eac7052fSMatt Macy for (i = shk->count; i > 0; i--) 475eac7052fSMatt Macy if (((kstat_named_t *)shk->priv)[i-1].value.ui64 != 0) 476eac7052fSMatt Macy break; 477eac7052fSMatt Macy 478eac7052fSMatt Macy ksp->ks_ndata = i; 479eac7052fSMatt Macy ksp->ks_data_size = i * sizeof (kstat_named_t); 480eac7052fSMatt Macy 481eac7052fSMatt Macy return (0); 482eac7052fSMatt Macy } 483eac7052fSMatt Macy 484eac7052fSMatt Macy static void 485eac7052fSMatt Macy spa_tx_assign_init(spa_t *spa) 486eac7052fSMatt Macy { 487eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram; 488eac7052fSMatt Macy char *name; 489eac7052fSMatt Macy kstat_named_t *ks; 490eac7052fSMatt Macy kstat_t *ksp; 491eac7052fSMatt Macy int i; 492eac7052fSMatt Macy 493eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); 494eac7052fSMatt Macy 495eac7052fSMatt Macy shk->count = 42; /* power of two buckets for 1ns to 2,199s */ 496eac7052fSMatt Macy shk->size = shk->count * sizeof (kstat_named_t); 497eac7052fSMatt Macy shk->priv = kmem_alloc(shk->size, KM_SLEEP); 498eac7052fSMatt Macy 499eac7052fSMatt Macy name = kmem_asprintf("zfs/%s", spa_name(spa)); 500eac7052fSMatt Macy 501eac7052fSMatt Macy for (i = 0; i < shk->count; i++) { 502eac7052fSMatt Macy ks = &((kstat_named_t *)shk->priv)[i]; 503eac7052fSMatt Macy ks->data_type = KSTAT_DATA_UINT64; 504eac7052fSMatt Macy ks->value.ui64 = 0; 505eac7052fSMatt Macy (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns", 506eac7052fSMatt Macy (u_longlong_t)1 << i); 507eac7052fSMatt Macy } 508eac7052fSMatt Macy 509eac7052fSMatt Macy ksp = kstat_create(name, 0, "dmu_tx_assign", "misc", 510eac7052fSMatt Macy KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL); 511eac7052fSMatt Macy shk->kstat = ksp; 512eac7052fSMatt Macy 513eac7052fSMatt Macy if (ksp) { 514eac7052fSMatt Macy ksp->ks_lock = &shk->lock; 515eac7052fSMatt Macy ksp->ks_data = shk->priv; 516eac7052fSMatt Macy ksp->ks_ndata = shk->count; 517eac7052fSMatt Macy ksp->ks_data_size = shk->size; 518eac7052fSMatt Macy ksp->ks_private = spa; 519eac7052fSMatt Macy ksp->ks_update = spa_tx_assign_update; 520eac7052fSMatt Macy kstat_install(ksp); 521eac7052fSMatt Macy } 522eac7052fSMatt Macy kmem_strfree(name); 523eac7052fSMatt Macy } 524eac7052fSMatt Macy 525eac7052fSMatt Macy static void 526eac7052fSMatt Macy spa_tx_assign_destroy(spa_t *spa) 527eac7052fSMatt Macy { 528eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram; 529eac7052fSMatt Macy kstat_t *ksp; 530eac7052fSMatt Macy 531eac7052fSMatt Macy ksp = shk->kstat; 532eac7052fSMatt Macy if (ksp) 533eac7052fSMatt Macy kstat_delete(ksp); 534eac7052fSMatt Macy 535eac7052fSMatt Macy kmem_free(shk->priv, shk->size); 536eac7052fSMatt Macy mutex_destroy(&shk->lock); 537eac7052fSMatt Macy } 538eac7052fSMatt Macy 539eac7052fSMatt Macy void 540eac7052fSMatt Macy spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs) 541eac7052fSMatt Macy { 542eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram; 543eac7052fSMatt Macy uint64_t idx = 0; 544eac7052fSMatt Macy 545eac7052fSMatt Macy while (((1ULL << idx) < nsecs) && (idx < shk->size - 1)) 546eac7052fSMatt Macy idx++; 547eac7052fSMatt Macy 548eac7052fSMatt Macy atomic_inc_64(&((kstat_named_t *)shk->priv)[idx].value.ui64); 549eac7052fSMatt Macy } 550eac7052fSMatt Macy 551eac7052fSMatt Macy /* 552eac7052fSMatt Macy * ========================================================================== 553eac7052fSMatt Macy * SPA MMP History Routines 554eac7052fSMatt Macy * ========================================================================== 555eac7052fSMatt Macy */ 556eac7052fSMatt Macy 557eac7052fSMatt Macy /* 558eac7052fSMatt Macy * MMP statistics - Information exported regarding attempted MMP writes 559eac7052fSMatt Macy * For MMP writes issued, fields used as per comments below. 560eac7052fSMatt Macy * For MMP writes skipped, an entry represents a span of time when 561eac7052fSMatt Macy * writes were skipped for same reason (error from mmp_random_leaf). 562eac7052fSMatt Macy * Differences are: 563eac7052fSMatt Macy * timestamp time first write skipped, if >1 skipped in a row 564eac7052fSMatt Macy * mmp_delay delay value at timestamp 565eac7052fSMatt Macy * vdev_guid number of writes skipped 566eac7052fSMatt Macy * io_error one of enum mmp_error 567eac7052fSMatt Macy * duration time span (ns) of skipped writes 568eac7052fSMatt Macy */ 569eac7052fSMatt Macy 570eac7052fSMatt Macy typedef struct spa_mmp_history { 571eac7052fSMatt Macy uint64_t mmp_node_id; /* unique # for updates */ 572eac7052fSMatt Macy uint64_t txg; /* txg of last sync */ 573eac7052fSMatt Macy uint64_t timestamp; /* UTC time MMP write issued */ 574eac7052fSMatt Macy uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */ 575eac7052fSMatt Macy uint64_t vdev_guid; /* unique ID of leaf vdev */ 576eac7052fSMatt Macy char *vdev_path; 577eac7052fSMatt Macy int vdev_label; /* vdev label */ 578eac7052fSMatt Macy int io_error; /* error status of MMP write */ 579eac7052fSMatt Macy hrtime_t error_start; /* hrtime of start of error period */ 580eac7052fSMatt Macy hrtime_t duration; /* time from submission to completion */ 581eac7052fSMatt Macy procfs_list_node_t smh_node; 582eac7052fSMatt Macy } spa_mmp_history_t; 583eac7052fSMatt Macy 584eac7052fSMatt Macy static int 585eac7052fSMatt Macy spa_mmp_history_show_header(struct seq_file *f) 586eac7052fSMatt Macy { 587eac7052fSMatt Macy seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s " 588eac7052fSMatt Macy "%-10s %s\n", "id", "txg", "timestamp", "error", "duration", 589eac7052fSMatt Macy "mmp_delay", "vdev_guid", "vdev_label", "vdev_path"); 590eac7052fSMatt Macy return (0); 591eac7052fSMatt Macy } 592eac7052fSMatt Macy 593eac7052fSMatt Macy static int 594eac7052fSMatt Macy spa_mmp_history_show(struct seq_file *f, void *data) 595eac7052fSMatt Macy { 596eac7052fSMatt Macy spa_mmp_history_t *smh = (spa_mmp_history_t *)data; 597eac7052fSMatt Macy char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu " 598eac7052fSMatt Macy "%-10lld %s\n"; 599eac7052fSMatt Macy char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu " 600eac7052fSMatt Macy "%-10lld %s\n"; 601eac7052fSMatt Macy 602eac7052fSMatt Macy seq_printf(f, (smh->error_start ? skip_fmt : write_fmt), 603eac7052fSMatt Macy (u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg, 604eac7052fSMatt Macy (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error, 605eac7052fSMatt Macy (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay, 606eac7052fSMatt Macy (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label, 607eac7052fSMatt Macy (smh->vdev_path ? smh->vdev_path : "-")); 608eac7052fSMatt Macy 609eac7052fSMatt Macy return (0); 610eac7052fSMatt Macy } 611eac7052fSMatt Macy 612eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */ 613eac7052fSMatt Macy static void 614eac7052fSMatt Macy spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size) 615eac7052fSMatt Macy { 616eac7052fSMatt Macy spa_mmp_history_t *smh; 617eac7052fSMatt Macy while (shl->size > size) { 618eac7052fSMatt Macy smh = list_remove_head(&shl->procfs_list.pl_list); 619eac7052fSMatt Macy if (smh->vdev_path) 620eac7052fSMatt Macy kmem_strfree(smh->vdev_path); 621eac7052fSMatt Macy kmem_free(smh, sizeof (spa_mmp_history_t)); 622eac7052fSMatt Macy shl->size--; 623eac7052fSMatt Macy } 624eac7052fSMatt Macy 625eac7052fSMatt Macy if (size == 0) 626eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list)); 627eac7052fSMatt Macy 628eac7052fSMatt Macy } 629eac7052fSMatt Macy 630eac7052fSMatt Macy static int 631eac7052fSMatt Macy spa_mmp_history_clear(procfs_list_t *procfs_list) 632eac7052fSMatt Macy { 633eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private; 634eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock); 635eac7052fSMatt Macy spa_mmp_history_truncate(shl, 0); 636eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock); 637eac7052fSMatt Macy return (0); 638eac7052fSMatt Macy } 639eac7052fSMatt Macy 640eac7052fSMatt Macy static void 641eac7052fSMatt Macy spa_mmp_history_init(spa_t *spa) 642eac7052fSMatt Macy { 643eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history; 644eac7052fSMatt Macy 645eac7052fSMatt Macy shl->size = 0; 646eac7052fSMatt Macy 647eac7052fSMatt Macy shl->procfs_list.pl_private = shl; 648c40487d4SMatt Macy procfs_list_install("zfs", 649c40487d4SMatt Macy spa_name(spa), 650eac7052fSMatt Macy "multihost", 651eac7052fSMatt Macy 0644, 652eac7052fSMatt Macy &shl->procfs_list, 653eac7052fSMatt Macy spa_mmp_history_show, 654eac7052fSMatt Macy spa_mmp_history_show_header, 655eac7052fSMatt Macy spa_mmp_history_clear, 656eac7052fSMatt Macy offsetof(spa_mmp_history_t, smh_node)); 657eac7052fSMatt Macy } 658eac7052fSMatt Macy 659eac7052fSMatt Macy static void 660eac7052fSMatt Macy spa_mmp_history_destroy(spa_t *spa) 661eac7052fSMatt Macy { 662eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history; 663eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list); 664eac7052fSMatt Macy spa_mmp_history_truncate(shl, 0); 665eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list); 666eac7052fSMatt Macy } 667eac7052fSMatt Macy 668eac7052fSMatt Macy /* 669eac7052fSMatt Macy * Set duration in existing "skip" record to how long we have waited for a leaf 670eac7052fSMatt Macy * vdev to become available. 671eac7052fSMatt Macy * 672eac7052fSMatt Macy * Important that we start search at the tail of the list where new 673eac7052fSMatt Macy * records are inserted, so this is normally an O(1) operation. 674eac7052fSMatt Macy */ 675eac7052fSMatt Macy int 676eac7052fSMatt Macy spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id) 677eac7052fSMatt Macy { 678eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history; 679eac7052fSMatt Macy spa_mmp_history_t *smh; 680eac7052fSMatt Macy int error = ENOENT; 681eac7052fSMatt Macy 682eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0) 683eac7052fSMatt Macy return (0); 684eac7052fSMatt Macy 685eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock); 686eac7052fSMatt Macy for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL; 687eac7052fSMatt Macy smh = list_prev(&shl->procfs_list.pl_list, smh)) { 688eac7052fSMatt Macy if (smh->mmp_node_id == mmp_node_id) { 689eac7052fSMatt Macy ASSERT3U(smh->io_error, !=, 0); 690eac7052fSMatt Macy smh->duration = gethrtime() - smh->error_start; 691eac7052fSMatt Macy smh->vdev_guid++; 692eac7052fSMatt Macy error = 0; 693eac7052fSMatt Macy break; 694eac7052fSMatt Macy } 695eac7052fSMatt Macy } 696eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock); 697eac7052fSMatt Macy 698eac7052fSMatt Macy return (error); 699eac7052fSMatt Macy } 700eac7052fSMatt Macy 701eac7052fSMatt Macy /* 702eac7052fSMatt Macy * Set MMP write duration and error status in existing record. 703eac7052fSMatt Macy * See comment re: search order above spa_mmp_history_set_skip(). 704eac7052fSMatt Macy */ 705eac7052fSMatt Macy int 706eac7052fSMatt Macy spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error, 707eac7052fSMatt Macy hrtime_t duration) 708eac7052fSMatt Macy { 709eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history; 710eac7052fSMatt Macy spa_mmp_history_t *smh; 711eac7052fSMatt Macy int error = ENOENT; 712eac7052fSMatt Macy 713eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0) 714eac7052fSMatt Macy return (0); 715eac7052fSMatt Macy 716eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock); 717eac7052fSMatt Macy for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL; 718eac7052fSMatt Macy smh = list_prev(&shl->procfs_list.pl_list, smh)) { 719eac7052fSMatt Macy if (smh->mmp_node_id == mmp_node_id) { 720eac7052fSMatt Macy ASSERT(smh->io_error == 0); 721eac7052fSMatt Macy smh->io_error = io_error; 722eac7052fSMatt Macy smh->duration = duration; 723eac7052fSMatt Macy error = 0; 724eac7052fSMatt Macy break; 725eac7052fSMatt Macy } 726eac7052fSMatt Macy } 727eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock); 728eac7052fSMatt Macy 729eac7052fSMatt Macy return (error); 730eac7052fSMatt Macy } 731eac7052fSMatt Macy 732eac7052fSMatt Macy /* 733eac7052fSMatt Macy * Add a new MMP historical record. 734eac7052fSMatt Macy * error == 0 : a write was issued. 735eac7052fSMatt Macy * error != 0 : a write was not issued because no leaves were found. 736eac7052fSMatt Macy */ 737eac7052fSMatt Macy void 738eac7052fSMatt Macy spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, 739eac7052fSMatt Macy uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id, 740eac7052fSMatt Macy int error) 741eac7052fSMatt Macy { 742eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history; 743eac7052fSMatt Macy spa_mmp_history_t *smh; 744eac7052fSMatt Macy 745eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0) 746eac7052fSMatt Macy return; 747eac7052fSMatt Macy 748eac7052fSMatt Macy smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP); 749eac7052fSMatt Macy smh->txg = txg; 750eac7052fSMatt Macy smh->timestamp = timestamp; 751eac7052fSMatt Macy smh->mmp_delay = mmp_delay; 752eac7052fSMatt Macy if (vd) { 753eac7052fSMatt Macy smh->vdev_guid = vd->vdev_guid; 754eac7052fSMatt Macy if (vd->vdev_path) 755eac7052fSMatt Macy smh->vdev_path = kmem_strdup(vd->vdev_path); 756eac7052fSMatt Macy } 757eac7052fSMatt Macy smh->vdev_label = label; 758eac7052fSMatt Macy smh->mmp_node_id = mmp_node_id; 759eac7052fSMatt Macy 760eac7052fSMatt Macy if (error) { 761eac7052fSMatt Macy smh->io_error = error; 762eac7052fSMatt Macy smh->error_start = gethrtime(); 763eac7052fSMatt Macy smh->vdev_guid = 1; 764eac7052fSMatt Macy } 765eac7052fSMatt Macy 766eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock); 767eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, smh); 768eac7052fSMatt Macy shl->size++; 769eac7052fSMatt Macy spa_mmp_history_truncate(shl, zfs_multihost_history); 770eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock); 771eac7052fSMatt Macy } 772eac7052fSMatt Macy 773eac7052fSMatt Macy static void * 774eac7052fSMatt Macy spa_state_addr(kstat_t *ksp, loff_t n) 775eac7052fSMatt Macy { 776eac7052fSMatt Macy if (n == 0) 777eac7052fSMatt Macy return (ksp->ks_private); /* return the spa_t */ 778eac7052fSMatt Macy return (NULL); 779eac7052fSMatt Macy } 780eac7052fSMatt Macy 781eac7052fSMatt Macy static int 782eac7052fSMatt Macy spa_state_data(char *buf, size_t size, void *data) 783eac7052fSMatt Macy { 784eac7052fSMatt Macy spa_t *spa = (spa_t *)data; 785eac7052fSMatt Macy (void) snprintf(buf, size, "%s\n", spa_state_to_name(spa)); 786eac7052fSMatt Macy return (0); 787eac7052fSMatt Macy } 788eac7052fSMatt Macy 789eac7052fSMatt Macy /* 790eac7052fSMatt Macy * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state. 791eac7052fSMatt Macy * 792eac7052fSMatt Macy * This is a lock-less read of the pool's state (unlike using 'zpool', which 793eac7052fSMatt Macy * can potentially block for seconds). Because it doesn't block, it can useful 794eac7052fSMatt Macy * as a pool heartbeat value. 795eac7052fSMatt Macy */ 796eac7052fSMatt Macy static void 797eac7052fSMatt Macy spa_state_init(spa_t *spa) 798eac7052fSMatt Macy { 799eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.state; 800eac7052fSMatt Macy char *name; 801eac7052fSMatt Macy kstat_t *ksp; 802eac7052fSMatt Macy 803eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); 804eac7052fSMatt Macy 805eac7052fSMatt Macy name = kmem_asprintf("zfs/%s", spa_name(spa)); 806eac7052fSMatt Macy ksp = kstat_create(name, 0, "state", "misc", 807eac7052fSMatt Macy KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); 808eac7052fSMatt Macy 809eac7052fSMatt Macy shk->kstat = ksp; 810eac7052fSMatt Macy if (ksp) { 811eac7052fSMatt Macy ksp->ks_lock = &shk->lock; 812eac7052fSMatt Macy ksp->ks_data = NULL; 813eac7052fSMatt Macy ksp->ks_private = spa; 814eac7052fSMatt Macy ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS; 815eac7052fSMatt Macy kstat_set_raw_ops(ksp, NULL, spa_state_data, spa_state_addr); 816eac7052fSMatt Macy kstat_install(ksp); 817eac7052fSMatt Macy } 818eac7052fSMatt Macy 819eac7052fSMatt Macy kmem_strfree(name); 820eac7052fSMatt Macy } 821eac7052fSMatt Macy 822e3aa18adSMartin Matuska static int 823e3aa18adSMartin Matuska spa_guid_data(char *buf, size_t size, void *data) 824e3aa18adSMartin Matuska { 825e3aa18adSMartin Matuska spa_t *spa = (spa_t *)data; 826e3aa18adSMartin Matuska (void) snprintf(buf, size, "%llu\n", (u_longlong_t)spa_guid(spa)); 827e3aa18adSMartin Matuska return (0); 828e3aa18adSMartin Matuska } 829e3aa18adSMartin Matuska 830e3aa18adSMartin Matuska static void 831e3aa18adSMartin Matuska spa_guid_init(spa_t *spa) 832e3aa18adSMartin Matuska { 833e3aa18adSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.guid; 834e3aa18adSMartin Matuska char *name; 835e3aa18adSMartin Matuska kstat_t *ksp; 836e3aa18adSMartin Matuska 837e3aa18adSMartin Matuska mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); 838e3aa18adSMartin Matuska 839e3aa18adSMartin Matuska name = kmem_asprintf("zfs/%s", spa_name(spa)); 840e3aa18adSMartin Matuska 841e3aa18adSMartin Matuska ksp = kstat_create(name, 0, "guid", "misc", 842e3aa18adSMartin Matuska KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); 843e3aa18adSMartin Matuska 844e3aa18adSMartin Matuska shk->kstat = ksp; 845e3aa18adSMartin Matuska if (ksp) { 846e3aa18adSMartin Matuska ksp->ks_lock = &shk->lock; 847e3aa18adSMartin Matuska ksp->ks_data = NULL; 848e3aa18adSMartin Matuska ksp->ks_private = spa; 849e3aa18adSMartin Matuska ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS; 850e3aa18adSMartin Matuska kstat_set_raw_ops(ksp, NULL, spa_guid_data, spa_state_addr); 851e3aa18adSMartin Matuska kstat_install(ksp); 852e3aa18adSMartin Matuska } 853e3aa18adSMartin Matuska 854e3aa18adSMartin Matuska kmem_strfree(name); 855e3aa18adSMartin Matuska } 856e3aa18adSMartin Matuska 857eac7052fSMatt Macy static void 858eac7052fSMatt Macy spa_health_destroy(spa_t *spa) 859eac7052fSMatt Macy { 860eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.state; 861eac7052fSMatt Macy kstat_t *ksp = shk->kstat; 862eac7052fSMatt Macy if (ksp) 863eac7052fSMatt Macy kstat_delete(ksp); 864eac7052fSMatt Macy 865eac7052fSMatt Macy mutex_destroy(&shk->lock); 866eac7052fSMatt Macy } 867eac7052fSMatt Macy 868e3aa18adSMartin Matuska static void 869e3aa18adSMartin Matuska spa_guid_destroy(spa_t *spa) 870e3aa18adSMartin Matuska { 871e3aa18adSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.guid; 872e3aa18adSMartin Matuska kstat_t *ksp = shk->kstat; 873e3aa18adSMartin Matuska if (ksp) 874e3aa18adSMartin Matuska kstat_delete(ksp); 875e3aa18adSMartin Matuska 876e3aa18adSMartin Matuska mutex_destroy(&shk->lock); 877e3aa18adSMartin Matuska } 878e3aa18adSMartin Matuska 879e92ffd9bSMartin Matuska static const spa_iostats_t spa_iostats_template = { 880eac7052fSMatt Macy { "trim_extents_written", KSTAT_DATA_UINT64 }, 881eac7052fSMatt Macy { "trim_bytes_written", KSTAT_DATA_UINT64 }, 882eac7052fSMatt Macy { "trim_extents_skipped", KSTAT_DATA_UINT64 }, 883eac7052fSMatt Macy { "trim_bytes_skipped", KSTAT_DATA_UINT64 }, 884eac7052fSMatt Macy { "trim_extents_failed", KSTAT_DATA_UINT64 }, 885eac7052fSMatt Macy { "trim_bytes_failed", KSTAT_DATA_UINT64 }, 886eac7052fSMatt Macy { "autotrim_extents_written", KSTAT_DATA_UINT64 }, 887eac7052fSMatt Macy { "autotrim_bytes_written", KSTAT_DATA_UINT64 }, 888eac7052fSMatt Macy { "autotrim_extents_skipped", KSTAT_DATA_UINT64 }, 889eac7052fSMatt Macy { "autotrim_bytes_skipped", KSTAT_DATA_UINT64 }, 890eac7052fSMatt Macy { "autotrim_extents_failed", KSTAT_DATA_UINT64 }, 891eac7052fSMatt Macy { "autotrim_bytes_failed", KSTAT_DATA_UINT64 }, 892eac7052fSMatt Macy { "simple_trim_extents_written", KSTAT_DATA_UINT64 }, 893eac7052fSMatt Macy { "simple_trim_bytes_written", KSTAT_DATA_UINT64 }, 894eac7052fSMatt Macy { "simple_trim_extents_skipped", KSTAT_DATA_UINT64 }, 895eac7052fSMatt Macy { "simple_trim_bytes_skipped", KSTAT_DATA_UINT64 }, 896eac7052fSMatt Macy { "simple_trim_extents_failed", KSTAT_DATA_UINT64 }, 897eac7052fSMatt Macy { "simple_trim_bytes_failed", KSTAT_DATA_UINT64 }, 898*7a7741afSMartin Matuska { "arc_read_count", KSTAT_DATA_UINT64 }, 899*7a7741afSMartin Matuska { "arc_read_bytes", KSTAT_DATA_UINT64 }, 900*7a7741afSMartin Matuska { "arc_write_count", KSTAT_DATA_UINT64 }, 901*7a7741afSMartin Matuska { "arc_write_bytes", KSTAT_DATA_UINT64 }, 902*7a7741afSMartin Matuska { "direct_read_count", KSTAT_DATA_UINT64 }, 903*7a7741afSMartin Matuska { "direct_read_bytes", KSTAT_DATA_UINT64 }, 904*7a7741afSMartin Matuska { "direct_write_count", KSTAT_DATA_UINT64 }, 905*7a7741afSMartin Matuska { "direct_write_bytes", KSTAT_DATA_UINT64 }, 906eac7052fSMatt Macy }; 907eac7052fSMatt Macy 908eac7052fSMatt Macy #define SPA_IOSTATS_ADD(stat, val) \ 909eac7052fSMatt Macy atomic_add_64(&iostats->stat.value.ui64, (val)); 910eac7052fSMatt Macy 911eac7052fSMatt Macy void 912eac7052fSMatt Macy spa_iostats_trim_add(spa_t *spa, trim_type_t type, 913eac7052fSMatt Macy uint64_t extents_written, uint64_t bytes_written, 914eac7052fSMatt Macy uint64_t extents_skipped, uint64_t bytes_skipped, 915eac7052fSMatt Macy uint64_t extents_failed, uint64_t bytes_failed) 916eac7052fSMatt Macy { 917eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats; 918eac7052fSMatt Macy kstat_t *ksp = shk->kstat; 919eac7052fSMatt Macy spa_iostats_t *iostats; 920eac7052fSMatt Macy 921eac7052fSMatt Macy if (ksp == NULL) 922eac7052fSMatt Macy return; 923eac7052fSMatt Macy 924eac7052fSMatt Macy iostats = ksp->ks_data; 925eac7052fSMatt Macy if (type == TRIM_TYPE_MANUAL) { 926eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_written, extents_written); 927eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_written, bytes_written); 928eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_skipped, extents_skipped); 929eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_skipped, bytes_skipped); 930eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_failed, extents_failed); 931eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_failed, bytes_failed); 932eac7052fSMatt Macy } else if (type == TRIM_TYPE_AUTO) { 933eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_written, extents_written); 934eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_written, bytes_written); 935eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_skipped, extents_skipped); 936eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_skipped, bytes_skipped); 937eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_failed, extents_failed); 938eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_failed, bytes_failed); 939eac7052fSMatt Macy } else { 940eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_written, extents_written); 941eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_written, bytes_written); 942eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_skipped, extents_skipped); 943eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_skipped, bytes_skipped); 944eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_failed, extents_failed); 945eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_failed, bytes_failed); 946eac7052fSMatt Macy } 947eac7052fSMatt Macy } 948eac7052fSMatt Macy 949*7a7741afSMartin Matuska void 950*7a7741afSMartin Matuska spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) 951*7a7741afSMartin Matuska { 952*7a7741afSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.iostats; 953*7a7741afSMartin Matuska kstat_t *ksp = shk->kstat; 954*7a7741afSMartin Matuska 955*7a7741afSMartin Matuska if (ksp == NULL) 956*7a7741afSMartin Matuska return; 957*7a7741afSMartin Matuska 958*7a7741afSMartin Matuska spa_iostats_t *iostats = ksp->ks_data; 959*7a7741afSMartin Matuska if (flags & DMU_DIRECTIO) { 960*7a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_read_count, iops); 961*7a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_read_bytes, size); 962*7a7741afSMartin Matuska } else { 963*7a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_read_count, iops); 964*7a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_read_bytes, size); 965*7a7741afSMartin Matuska } 966*7a7741afSMartin Matuska } 967*7a7741afSMartin Matuska 968*7a7741afSMartin Matuska void 969*7a7741afSMartin Matuska spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) 970*7a7741afSMartin Matuska { 971*7a7741afSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.iostats; 972*7a7741afSMartin Matuska kstat_t *ksp = shk->kstat; 973*7a7741afSMartin Matuska 974*7a7741afSMartin Matuska if (ksp == NULL) 975*7a7741afSMartin Matuska return; 976*7a7741afSMartin Matuska 977*7a7741afSMartin Matuska spa_iostats_t *iostats = ksp->ks_data; 978*7a7741afSMartin Matuska if (flags & DMU_DIRECTIO) { 979*7a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_write_count, iops); 980*7a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_write_bytes, size); 981*7a7741afSMartin Matuska } else { 982*7a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_write_count, iops); 983*7a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_write_bytes, size); 984*7a7741afSMartin Matuska } 985*7a7741afSMartin Matuska } 986*7a7741afSMartin Matuska 987eac7052fSMatt Macy static int 988eac7052fSMatt Macy spa_iostats_update(kstat_t *ksp, int rw) 989eac7052fSMatt Macy { 990eac7052fSMatt Macy if (rw == KSTAT_WRITE) { 991eac7052fSMatt Macy memcpy(ksp->ks_data, &spa_iostats_template, 992eac7052fSMatt Macy sizeof (spa_iostats_t)); 993eac7052fSMatt Macy } 994eac7052fSMatt Macy 995eac7052fSMatt Macy return (0); 996eac7052fSMatt Macy } 997eac7052fSMatt Macy 998eac7052fSMatt Macy static void 999eac7052fSMatt Macy spa_iostats_init(spa_t *spa) 1000eac7052fSMatt Macy { 1001eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats; 1002eac7052fSMatt Macy 1003eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); 1004eac7052fSMatt Macy 1005eac7052fSMatt Macy char *name = kmem_asprintf("zfs/%s", spa_name(spa)); 1006eac7052fSMatt Macy kstat_t *ksp = kstat_create(name, 0, "iostats", "misc", 1007eac7052fSMatt Macy KSTAT_TYPE_NAMED, sizeof (spa_iostats_t) / sizeof (kstat_named_t), 1008eac7052fSMatt Macy KSTAT_FLAG_VIRTUAL); 1009eac7052fSMatt Macy 1010eac7052fSMatt Macy shk->kstat = ksp; 1011eac7052fSMatt Macy if (ksp) { 1012eac7052fSMatt Macy int size = sizeof (spa_iostats_t); 1013eac7052fSMatt Macy ksp->ks_lock = &shk->lock; 1014eac7052fSMatt Macy ksp->ks_private = spa; 1015eac7052fSMatt Macy ksp->ks_update = spa_iostats_update; 1016eac7052fSMatt Macy ksp->ks_data = kmem_alloc(size, KM_SLEEP); 1017eac7052fSMatt Macy memcpy(ksp->ks_data, &spa_iostats_template, size); 1018eac7052fSMatt Macy kstat_install(ksp); 1019eac7052fSMatt Macy } 1020eac7052fSMatt Macy 1021eac7052fSMatt Macy kmem_strfree(name); 1022eac7052fSMatt Macy } 1023eac7052fSMatt Macy 1024eac7052fSMatt Macy static void 1025eac7052fSMatt Macy spa_iostats_destroy(spa_t *spa) 1026eac7052fSMatt Macy { 1027eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats; 1028eac7052fSMatt Macy kstat_t *ksp = shk->kstat; 1029eac7052fSMatt Macy if (ksp) { 1030eac7052fSMatt Macy kmem_free(ksp->ks_data, sizeof (spa_iostats_t)); 1031eac7052fSMatt Macy kstat_delete(ksp); 1032eac7052fSMatt Macy } 1033eac7052fSMatt Macy 1034eac7052fSMatt Macy mutex_destroy(&shk->lock); 1035eac7052fSMatt Macy } 1036eac7052fSMatt Macy 1037eac7052fSMatt Macy void 1038eac7052fSMatt Macy spa_stats_init(spa_t *spa) 1039eac7052fSMatt Macy { 1040eac7052fSMatt Macy spa_read_history_init(spa); 1041eac7052fSMatt Macy spa_txg_history_init(spa); 1042eac7052fSMatt Macy spa_tx_assign_init(spa); 1043eac7052fSMatt Macy spa_mmp_history_init(spa); 1044eac7052fSMatt Macy spa_state_init(spa); 1045e3aa18adSMartin Matuska spa_guid_init(spa); 1046eac7052fSMatt Macy spa_iostats_init(spa); 1047eac7052fSMatt Macy } 1048eac7052fSMatt Macy 1049eac7052fSMatt Macy void 1050eac7052fSMatt Macy spa_stats_destroy(spa_t *spa) 1051eac7052fSMatt Macy { 1052eac7052fSMatt Macy spa_iostats_destroy(spa); 1053eac7052fSMatt Macy spa_health_destroy(spa); 1054eac7052fSMatt Macy spa_tx_assign_destroy(spa); 1055eac7052fSMatt Macy spa_txg_history_destroy(spa); 1056eac7052fSMatt Macy spa_read_history_destroy(spa); 1057eac7052fSMatt Macy spa_mmp_history_destroy(spa); 1058e3aa18adSMartin Matuska spa_guid_destroy(spa); 1059eac7052fSMatt Macy } 1060eac7052fSMatt Macy 1061be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs, zfs_, read_history, UINT, ZMOD_RW, 1062eac7052fSMatt Macy "Historical statistics for the last N reads"); 1063eac7052fSMatt Macy 1064eac7052fSMatt Macy ZFS_MODULE_PARAM(zfs, zfs_, read_history_hits, INT, ZMOD_RW, 1065eac7052fSMatt Macy "Include cache hits in read history"); 1066eac7052fSMatt Macy 1067be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs_txg, zfs_txg_, history, UINT, ZMOD_RW, 1068eac7052fSMatt Macy "Historical statistics for the last N txgs"); 1069eac7052fSMatt Macy 1070be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs_multihost, zfs_multihost_, history, UINT, ZMOD_RW, 1071eac7052fSMatt Macy "Historical statistics for last N multihost writes"); 1072