Lines Matching defs:msp

345 static void metaslab_passivate(metaslab_t *msp, uint64_t weight);
346 static uint64_t metaslab_weight_from_range_tree(metaslab_t *msp);
644 metaslab_t *msp = multilist_sublist_head(mls);
646 while (msp != NULL) {
647 mutex_enter(&msp->ms_lock);
655 if (!multilist_link_active(&msp->ms_class_txg_node)) {
656 mutex_exit(&msp->ms_lock);
661 metaslab_t *next_msp = multilist_sublist_next(mls, msp);
664 msp->ms_selected_txg + metaslab_unload_delay &&
665 now > msp->ms_selected_time +
667 (msp->ms_allocator == -1 ||
669 metaslab_evict(msp, txg);
676 mutex_exit(&msp->ms_lock);
679 mutex_exit(&msp->ms_lock);
680 msp = next_msp;
965 metaslab_t *msp = mga->mga_primary;
966 if (msp != NULL) {
967 mutex_enter(&msp->ms_lock);
968 metaslab_passivate(msp,
969 metaslab_weight_from_range_tree(msp));
970 mutex_exit(&msp->ms_lock);
972 msp = mga->mga_secondary;
973 if (msp != NULL) {
974 mutex_enter(&msp->ms_lock);
975 metaslab_passivate(msp,
976 metaslab_weight_from_range_tree(msp));
977 mutex_exit(&msp->ms_lock);
1038 for (metaslab_t *msp = avl_first(t);
1039 msp != NULL; msp = AVL_NEXT(t, msp)) {
1040 VERIFY3P(msp->ms_group, ==, mg);
1042 if (msp->ms_sm == NULL)
1047 msp->ms_sm->sm_phys->smp_histogram[i];
1060 metaslab_group_histogram_add(metaslab_group_t *mg, metaslab_t *msp)
1065 ASSERT(MUTEX_HELD(&msp->ms_lock));
1066 if (msp->ms_sm == NULL)
1075 msp->ms_sm->sm_phys->smp_histogram[i];
1077 msp->ms_sm->sm_phys->smp_histogram[i];
1084 metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp)
1089 ASSERT(MUTEX_HELD(&msp->ms_lock));
1090 if (msp->ms_sm == NULL)
1097 msp->ms_sm->sm_phys->smp_histogram[i]);
1099 msp->ms_sm->sm_phys->smp_histogram[i]);
1104 msp->ms_sm->sm_phys->smp_histogram[i];
1106 msp->ms_sm->sm_phys->smp_histogram[i];
1113 metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp)
1115 ASSERT(msp->ms_group == NULL);
1117 msp->ms_group = mg;
1118 msp->ms_weight = 0;
1119 avl_add(&mg->mg_metaslab_tree, msp);
1122 mutex_enter(&msp->ms_lock);
1123 metaslab_group_histogram_add(mg, msp);
1124 mutex_exit(&msp->ms_lock);
1128 metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp)
1130 mutex_enter(&msp->ms_lock);
1131 metaslab_group_histogram_remove(mg, msp);
1132 mutex_exit(&msp->ms_lock);
1135 ASSERT(msp->ms_group == mg);
1136 avl_remove(&mg->mg_metaslab_tree, msp);
1138 metaslab_class_t *mc = msp->ms_group->mg_class;
1140 multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
1141 if (multilist_link_active(&msp->ms_class_txg_node))
1142 multilist_sublist_remove(mls, msp);
1145 msp->ms_group = NULL;
1150 metaslab_group_sort_impl(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
1152 ASSERT(MUTEX_HELD(&msp->ms_lock));
1154 ASSERT(msp->ms_group == mg);
1156 avl_remove(&mg->mg_metaslab_tree, msp);
1157 msp->ms_weight = weight;
1158 avl_add(&mg->mg_metaslab_tree, msp);
1163 metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
1170 ASSERT(MUTEX_HELD(&msp->ms_lock));
1173 metaslab_group_sort_impl(mg, msp, weight);
1192 metaslab_t *msp = vd->vdev_ms[m];
1194 if (msp->ms_fragmentation == ZFS_FRAG_INVALID)
1196 if (msp->ms_group != mg)
1200 fragmentation += msp->ms_fragmentation;
1515 metaslab_largest_allocatable(metaslab_t *msp)
1517 zfs_btree_t *t = &msp->ms_allocatable_by_size;
1523 metaslab_size_tree_full_load(msp->ms_allocatable);
1529 return (rs_get_end(rs, msp->ms_allocatable) - rs_get_start(rs,
1530 msp->ms_allocatable));
1538 metaslab_largest_unflushed_free(metaslab_t *msp)
1540 ASSERT(MUTEX_HELD(&msp->ms_lock));
1542 if (msp->ms_unflushed_frees == NULL)
1545 if (zfs_btree_numnodes(&msp->ms_unflushed_frees_by_size) == 0)
1546 metaslab_size_tree_full_load(msp->ms_unflushed_frees);
1547 range_seg_t *rs = zfs_btree_last(&msp->ms_unflushed_frees_by_size,
1575 uint64_t rstart = rs_get_start(rs, msp->ms_unflushed_frees);
1576 uint64_t rsize = rs_get_end(rs, msp->ms_unflushed_frees) - rstart;
1580 boolean_t found = range_tree_find_in(msp->ms_defer[t], rstart,
1591 boolean_t found = range_tree_find_in(msp->ms_freed, rstart,
1652 static uint64_t metaslab_df_alloc(metaslab_t *msp, uint64_t size);
1653 static uint64_t metaslab_cf_alloc(metaslab_t *msp, uint64_t size);
1654 static uint64_t metaslab_ndf_alloc(metaslab_t *msp, uint64_t size);
1740 metaslab_df_alloc(metaslab_t *msp, uint64_t size)
1750 uint64_t *cursor = &msp->ms_lbas[highbit64(align) - 1];
1751 range_tree_t *rt = msp->ms_allocatable;
1752 uint_t free_pct = range_tree_space(rt) * 100 / msp->ms_size;
1755 ASSERT(MUTEX_HELD(&msp->ms_lock));
1761 if (metaslab_largest_allocatable(msp) < metaslab_df_alloc_threshold ||
1771 if (zfs_btree_numnodes(&msp->ms_allocatable_by_size) == 0)
1772 metaslab_size_tree_full_load(msp->ms_allocatable);
1776 rs = zfs_btree_last(&msp->ms_allocatable_by_size, NULL);
1780 rs = metaslab_block_find(&msp->ms_allocatable_by_size,
1781 rt, msp->ms_start, size, &where);
1803 metaslab_cf_alloc(metaslab_t *msp, uint64_t size)
1805 range_tree_t *rt = msp->ms_allocatable;
1806 zfs_btree_t *t = &msp->ms_allocatable_by_size;
1807 uint64_t *cursor = &msp->ms_lbas[0];
1808 uint64_t *cursor_end = &msp->ms_lbas[1];
1811 ASSERT(MUTEX_HELD(&msp->ms_lock));
1819 metaslab_size_tree_full_load(msp->ms_allocatable);
1851 metaslab_ndf_alloc(metaslab_t *msp, uint64_t size)
1853 zfs_btree_t *t = &msp->ms_allocatable->rt_root;
1854 range_tree_t *rt = msp->ms_allocatable;
1859 uint64_t *cursor = &msp->ms_lbas[hbit - 1];
1860 uint64_t max_size = metaslab_largest_allocatable(msp);
1862 ASSERT(MUTEX_HELD(&msp->ms_lock));
1872 t = &msp->ms_allocatable_by_size;
1901 metaslab_load_wait(metaslab_t *msp)
1903 ASSERT(MUTEX_HELD(&msp->ms_lock));
1905 while (msp->ms_loading) {
1906 ASSERT(!msp->ms_loaded);
1907 cv_wait(&msp->ms_load_cv, &msp->ms_lock);
1915 metaslab_flush_wait(metaslab_t *msp)
1917 ASSERT(MUTEX_HELD(&msp->ms_lock));
1919 while (msp->ms_flushing)
1920 cv_wait(&msp->ms_flush_cv, &msp->ms_lock);
1926 metaslab_t *msp = arg;
1932 return ((unsigned int)msp->ms_id % multilist_get_num_sublists(ml));
1936 metaslab_allocated_space(metaslab_t *msp)
1938 return (msp->ms_allocated_space);
1945 metaslab_verify_space(metaslab_t *msp, uint64_t txg)
1947 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
1951 ASSERT(MUTEX_HELD(&msp->ms_lock));
1952 ASSERT(!msp->ms_condensing);
1964 if (txg != spa_syncing_txg(spa) || msp->ms_sm == NULL ||
1965 !msp->ms_loaded)
1973 ASSERT3S(space_map_allocated(msp->ms_sm), >=, 0);
1975 ASSERT3U(space_map_allocated(msp->ms_sm), >=,
1976 range_tree_space(msp->ms_unflushed_frees));
1978 ASSERT3U(metaslab_allocated_space(msp), ==,
1979 space_map_allocated(msp->ms_sm) +
1980 range_tree_space(msp->ms_unflushed_allocs) -
1981 range_tree_space(msp->ms_unflushed_frees));
1983 sm_free_space = msp->ms_size - metaslab_allocated_space(msp);
1991 range_tree_space(msp->ms_allocating[(txg + t) & TXG_MASK]);
1993 ASSERT3U(allocating + msp->ms_allocated_this_txg, ==,
1994 msp->ms_allocating_total);
1996 ASSERT3U(msp->ms_deferspace, ==,
1997 range_tree_space(msp->ms_defer[0]) +
1998 range_tree_space(msp->ms_defer[1]));
2000 msp_free_space = range_tree_space(msp->ms_allocatable) + allocating +
2001 msp->ms_deferspace + range_tree_space(msp->ms_freed);
2007 metaslab_aux_histograms_clear(metaslab_t *msp)
2013 ASSERT(msp->ms_loaded);
2015 memset(msp->ms_synchist, 0, sizeof (msp->ms_synchist));
2017 memset(msp->ms_deferhist[t], 0, sizeof (msp->ms_deferhist[t]));
2054 metaslab_aux_histograms_update(metaslab_t *msp)
2056 space_map_t *sm = msp->ms_sm;
2065 if (msp->ms_loaded) {
2066 metaslab_aux_histograms_clear(msp);
2068 metaslab_aux_histogram_add(msp->ms_synchist,
2069 sm->sm_shift, msp->ms_freed);
2072 metaslab_aux_histogram_add(msp->ms_deferhist[t],
2073 sm->sm_shift, msp->ms_defer[t]);
2077 metaslab_aux_histogram_add(msp->ms_synchist,
2078 sm->sm_shift, msp->ms_freeing);
2087 metaslab_aux_histograms_update_done(metaslab_t *msp, boolean_t defer_allowed)
2089 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
2090 space_map_t *sm = msp->ms_sm;
2107 memcpy(msp->ms_deferhist[hist_index], msp->ms_synchist,
2108 sizeof (msp->ms_synchist));
2110 memset(msp->ms_deferhist[hist_index], 0,
2111 sizeof (msp->ms_deferhist[hist_index]));
2113 memset(msp->ms_synchist, 0, sizeof (msp->ms_synchist));
2122 metaslab_verify_weight_and_frag(metaslab_t *msp)
2124 ASSERT(MUTEX_HELD(&msp->ms_lock));
2138 if (msp->ms_group == NULL)
2146 vdev_t *vd = msp->ms_group->mg_vd;
2156 if (txg_list_member(&vd->vdev_ms_list, msp, t))
2165 if (!spa_writeable(msp->ms_group->mg_vd->vdev_spa))
2169 if (msp->ms_loaded) {
2170 range_tree_stat_verify(msp->ms_allocatable);
2171 VERIFY(space_map_histogram_verify(msp->ms_sm,
2172 msp->ms_allocatable));
2175 uint64_t weight = msp->ms_weight;
2176 uint64_t was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
2177 boolean_t space_based = WEIGHT_IS_SPACEBASED(msp->ms_weight);
2178 uint64_t frag = msp->ms_fragmentation;
2179 uint64_t max_segsize = msp->ms_max_size;
2181 msp->ms_weight = 0;
2182 msp->ms_fragmentation = 0;
2197 msp->ms_weight = metaslab_weight(msp, B_TRUE) | was_active;
2199 VERIFY3U(max_segsize, ==, msp->ms_max_size);
2205 if ((space_based && !WEIGHT_IS_SPACEBASED(msp->ms_weight)) ||
2206 (!space_based && WEIGHT_IS_SPACEBASED(msp->ms_weight))) {
2207 msp->ms_fragmentation = frag;
2208 msp->ms_weight = weight;
2212 VERIFY3U(msp->ms_fragmentation, ==, frag);
2213 VERIFY3U(msp->ms_weight, ==, weight);
2239 metaslab_t *msp = multilist_sublist_head(mls);
2241 while (msp != NULL && allmem * zfs_metaslab_mem_limit / 100 <
2246 metaslab_idx_func(&mc->mc_metaslab_txg_list, msp));
2248 if (!multilist_link_active(&msp->ms_class_txg_node)) {
2252 metaslab_t *next_msp = multilist_sublist_next(mls, msp);
2265 if (msp->ms_loading) {
2266 msp = next_msp;
2283 mutex_enter(&msp->ms_lock);
2284 if (msp->ms_allocator == -1 && msp->ms_sm != NULL &&
2285 msp->ms_allocating_total == 0) {
2286 metaslab_unload(msp);
2288 mutex_exit(&msp->ms_lock);
2289 msp = next_msp;
2299 metaslab_load_impl(metaslab_t *msp)
2303 ASSERT(MUTEX_HELD(&msp->ms_lock));
2304 ASSERT(msp->ms_loading);
2305 ASSERT(!msp->ms_condensing);
2331 uint64_t length = msp->ms_synced_length;
2332 mutex_exit(&msp->ms_lock);
2336 if (msp->ms_allocatable->rt_arg == NULL) {
2339 mrap = msp->ms_allocatable->rt_arg;
2340 msp->ms_allocatable->rt_ops = NULL;
2341 msp->ms_allocatable->rt_arg = NULL;
2343 mrap->mra_bt = &msp->ms_allocatable_by_size;
2346 if (msp->ms_sm != NULL) {
2347 error = space_map_load_length(msp->ms_sm, msp->ms_allocatable,
2351 metaslab_rt_create(msp->ms_allocatable, mrap);
2352 msp->ms_allocatable->rt_ops = &metaslab_rt_ops;
2353 msp->ms_allocatable->rt_arg = mrap;
2356 arg.rt = msp->ms_allocatable;
2358 range_tree_walk(msp->ms_allocatable, metaslab_size_sorted_add,
2365 metaslab_rt_create(msp->ms_allocatable, mrap);
2366 msp->ms_allocatable->rt_ops = &metaslab_rt_ops;
2367 msp->ms_allocatable->rt_arg = mrap;
2373 range_tree_add(msp->ms_allocatable,
2374 msp->ms_start, msp->ms_size);
2376 if (msp->ms_new) {
2384 ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
2385 ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
2396 mutex_enter(&msp->ms_sync_lock);
2397 mutex_enter(&msp->ms_lock);
2399 ASSERT(!msp->ms_condensing);
2400 ASSERT(!msp->ms_flushing);
2403 mutex_exit(&msp->ms_sync_lock);
2407 ASSERT3P(msp->ms_group, !=, NULL);
2408 msp->ms_loaded = B_TRUE;
2415 range_tree_walk(msp->ms_unflushed_allocs,
2416 range_tree_remove, msp->ms_allocatable);
2417 range_tree_walk(msp->ms_unflushed_frees,
2418 range_tree_add, msp->ms_allocatable);
2420 ASSERT3P(msp->ms_group, !=, NULL);
2421 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
2446 range_tree_walk(msp->ms_freed,
2447 range_tree_remove, msp->ms_allocatable);
2465 range_tree_walk(msp->ms_defer[t],
2466 range_tree_remove, msp->ms_allocatable);
2481 uint64_t weight = msp->ms_weight;
2482 uint64_t max_size = msp->ms_max_size;
2483 metaslab_recalculate_weight_and_sort(msp);
2485 ASSERT3U(weight, <=, msp->ms_weight);
2486 msp->ms_max_size = metaslab_largest_allocatable(msp);
2487 ASSERT3U(max_size, <=, msp->ms_max_size);
2489 msp->ms_load_time = load_end;
2498 (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
2499 (u_longlong_t)msp->ms_id,
2500 (u_longlong_t)space_map_length(msp->ms_sm),
2501 (u_longlong_t)range_tree_space(msp->ms_unflushed_allocs),
2502 (u_longlong_t)range_tree_space(msp->ms_unflushed_frees),
2503 (u_longlong_t)range_tree_space(msp->ms_freed),
2504 (u_longlong_t)range_tree_space(msp->ms_defer[0]),
2505 (u_longlong_t)range_tree_space(msp->ms_defer[1]),
2506 (longlong_t)((load_start - msp->ms_unload_time) / 1000000),
2508 (u_longlong_t)msp->ms_max_size,
2509 (u_longlong_t)msp->ms_max_size - max_size,
2510 (u_longlong_t)weight, (u_longlong_t)msp->ms_weight);
2512 metaslab_verify_space(msp, spa_syncing_txg(spa));
2513 mutex_exit(&msp->ms_sync_lock);
2518 metaslab_load(metaslab_t *msp)
2520 ASSERT(MUTEX_HELD(&msp->ms_lock));
2526 metaslab_load_wait(msp);
2527 if (msp->ms_loaded)
2529 VERIFY(!msp->ms_loading);
2530 ASSERT(!msp->ms_condensing);
2538 msp->ms_loading = B_TRUE;
2545 if (msp->ms_flushing)
2546 metaslab_flush_wait(msp);
2553 ASSERT(!msp->ms_loaded);
2560 if (spa_normal_class(msp->ms_group->mg_class->mc_spa) ==
2561 msp->ms_group->mg_class) {
2562 metaslab_potentially_evict(msp->ms_group->mg_class);
2565 int error = metaslab_load_impl(msp);
2567 ASSERT(MUTEX_HELD(&msp->ms_lock));
2568 msp->ms_loading = B_FALSE;
2569 cv_broadcast(&msp->ms_load_cv);
2575 metaslab_unload(metaslab_t *msp)
2577 ASSERT(MUTEX_HELD(&msp->ms_lock));
2584 if (!msp->ms_loaded)
2587 range_tree_vacate(msp->ms_allocatable, NULL, NULL);
2588 msp->ms_loaded = B_FALSE;
2589 msp->ms_unload_time = gethrtime();
2591 msp->ms_activation_weight = 0;
2592 msp->ms_weight &= ~METASLAB_ACTIVE_MASK;
2594 if (msp->ms_group != NULL) {
2595 metaslab_class_t *mc = msp->ms_group->mg_class;
2597 multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
2598 if (multilist_link_active(&msp->ms_class_txg_node))
2599 multilist_sublist_remove(mls, msp);
2602 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
2608 (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
2609 (u_longlong_t)msp->ms_id,
2610 (u_longlong_t)msp->ms_weight,
2611 (u_longlong_t)msp->ms_selected_txg,
2612 (u_longlong_t)(msp->ms_unload_time -
2613 msp->ms_selected_time) / 1000 / 1000,
2614 (u_longlong_t)msp->ms_alloc_txg,
2615 (u_longlong_t)(msp->ms_unload_time -
2616 msp->ms_load_time) / 1000 / 1000,
2617 (u_longlong_t)msp->ms_max_size);
2632 if (msp->ms_group != NULL)
2633 metaslab_recalculate_weight_and_sort(msp);
2644 metaslab_calculate_range_tree_type(vdev_t *vdev, metaslab_t *msp,
2650 *start = msp->ms_start;
2660 metaslab_set_selected_txg(metaslab_t *msp, uint64_t txg)
2662 ASSERT(MUTEX_HELD(&msp->ms_lock));
2663 metaslab_class_t *mc = msp->ms_group->mg_class;
2665 multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
2666 if (multilist_link_active(&msp->ms_class_txg_node))
2667 multilist_sublist_remove(mls, msp);
2668 msp->ms_selected_txg = txg;
2669 msp->ms_selected_time = gethrtime();
2670 multilist_sublist_insert_tail(mls, msp);
2689 uint64_t txg, metaslab_t **msp)
2791 *msp = ms;
2797 metaslab_fini_flush_data(metaslab_t *msp)
2799 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
2801 if (metaslab_unflushed_txg(msp) == 0) {
2802 ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL),
2809 avl_remove(&spa->spa_metaslabs_by_flushed, msp);
2812 spa_log_sm_decrement_mscount(spa, metaslab_unflushed_txg(msp));
2813 spa_log_summary_decrement_mscount(spa, metaslab_unflushed_txg(msp),
2814 metaslab_unflushed_dirty(msp));
2826 metaslab_fini(metaslab_t *msp)
2828 metaslab_group_t *mg = msp->ms_group;
2832 metaslab_fini_flush_data(msp);
2834 metaslab_group_remove(mg, msp);
2836 mutex_enter(&msp->ms_lock);
2837 VERIFY(msp->ms_group == NULL);
2844 if (!msp->ms_new) {
2846 -metaslab_allocated_space(msp), 0, -msp->ms_size);
2849 space_map_close(msp->ms_sm);
2850 msp->ms_sm = NULL;
2852 metaslab_unload(msp);
2854 range_tree_destroy(msp->ms_allocatable);
2855 range_tree_destroy(msp->ms_freeing);
2856 range_tree_destroy(msp->ms_freed);
2859 metaslab_unflushed_changes_memused(msp));
2861 metaslab_unflushed_changes_memused(msp);
2862 range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
2863 range_tree_destroy(msp->ms_unflushed_allocs);
2864 range_tree_destroy(msp->ms_checkpointing);
2865 range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
2866 range_tree_destroy(msp->ms_unflushed_frees);
2869 range_tree_destroy(msp->ms_allocating[t]);
2872 range_tree_destroy(msp->ms_defer[t]);
2874 ASSERT0(msp->ms_deferspace);
2877 ASSERT(!txg_list_member(&vd->vdev_ms_list, msp, t));
2879 range_tree_vacate(msp->ms_trim, NULL, NULL);
2880 range_tree_destroy(msp->ms_trim);
2882 mutex_exit(&msp->ms_lock);
2883 cv_destroy(&msp->ms_load_cv);
2884 cv_destroy(&msp->ms_flush_cv);
2885 mutex_destroy(&msp->ms_lock);
2886 mutex_destroy(&msp->ms_sync_lock);
2887 ASSERT3U(msp->ms_allocator, ==, -1);
2889 kmem_free(msp, sizeof (metaslab_t));
2939 metaslab_set_fragmentation(metaslab_t *msp, boolean_t nodirty)
2941 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
2948 msp->ms_fragmentation = ZFS_FRAG_INVALID;
2956 if (msp->ms_sm == NULL) {
2957 msp->ms_fragmentation = 0;
2965 if (msp->ms_sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) {
2967 vdev_t *vd = msp->ms_group->mg_vd;
2979 msp->ms_condense_wanted = B_TRUE;
2980 vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
2983 (u_longlong_t)msp->ms_id,
2986 msp->ms_fragmentation = ZFS_FRAG_INVALID;
2992 uint8_t shift = msp->ms_sm->sm_shift;
2997 if (msp->ms_sm->sm_phys->smp_histogram[i] == 0)
3000 space = msp->ms_sm->sm_phys->smp_histogram[i] << (i + shift);
3011 msp->ms_fragmentation = fragmentation;
3020 metaslab_space_weight(metaslab_t *msp)
3022 metaslab_group_t *mg = msp->ms_group;
3026 ASSERT(MUTEX_HELD(&msp->ms_lock));
3031 space = msp->ms_size - metaslab_allocated_space(msp);
3034 msp->ms_fragmentation != ZFS_FRAG_INVALID) {
3042 space = (space * (100 - (msp->ms_fragmentation - 1))) / 100;
3066 weight = 2 * weight - (msp->ms_id * weight) / vd->vdev_ms_count;
3076 if (msp->ms_loaded && msp->ms_fragmentation != ZFS_FRAG_INVALID &&
3077 msp->ms_fragmentation <= zfs_metaslab_fragmentation_threshold) {
3078 weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK);
3092 metaslab_weight_from_range_tree(metaslab_t *msp)
3097 ASSERT(msp->ms_loaded);
3101 uint8_t shift = msp->ms_group->mg_vd->vdev_ashift;
3105 segments += msp->ms_allocatable->rt_histogram[i];
3133 metaslab_weight_from_spacemap(metaslab_t *msp)
3135 space_map_t *sm = msp->ms_sm;
3136 ASSERT(!msp->ms_loaded);
3151 deferspace_histogram[i] += msp->ms_synchist[i];
3154 deferspace_histogram[i] += msp->ms_deferhist[t][i];
3180 metaslab_segment_weight(metaslab_t *msp)
3182 metaslab_group_t *mg = msp->ms_group;
3186 ASSERT(MUTEX_HELD(&msp->ms_lock));
3191 if (metaslab_allocated_space(msp) == 0) {
3192 int idx = highbit64(msp->ms_size) - 1;
3207 ASSERT3U(msp->ms_sm->sm_dbuf->db_size, ==, sizeof (space_map_phys_t));
3212 if (metaslab_allocated_space(msp) == msp->ms_size)
3219 if (msp->ms_loaded) {
3220 weight = metaslab_weight_from_range_tree(msp);
3222 weight = metaslab_weight_from_spacemap(msp);
3230 if (msp->ms_activation_weight != 0 && weight != 0)
3231 WEIGHT_SET_ACTIVE(weight, WEIGHT_GET_ACTIVE(msp->ms_weight));
3245 metaslab_should_allocate(metaslab_t *msp, uint64_t asize, boolean_t try_hard)
3253 if (unlikely(msp->ms_new))
3263 if (msp->ms_loaded ||
3264 (msp->ms_max_size != 0 && !try_hard && gethrtime() <
3265 msp->ms_unload_time + SEC2NSEC(zfs_metaslab_max_size_cache_sec)))
3266 return (msp->ms_max_size >= asize);
3269 if (!WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
3277 1ULL << (WEIGHT_GET_INDEX(msp->ms_weight) + 1));
3280 (msp->ms_weight & ~METASLAB_WEIGHT_TYPE));
3287 metaslab_weight(metaslab_t *msp, boolean_t nodirty)
3289 vdev_t *vd = msp->ms_group->mg_vd;
3293 ASSERT(MUTEX_HELD(&msp->ms_lock));
3295 metaslab_set_fragmentation(msp, nodirty);
3307 if (msp->ms_loaded) {
3308 msp->ms_max_size = metaslab_largest_allocatable(msp);
3310 msp->ms_max_size = MAX(msp->ms_max_size,
3311 metaslab_largest_unflushed_free(msp));
3319 (msp->ms_sm == NULL || msp->ms_sm->sm_dbuf->db_size ==
3321 weight = metaslab_segment_weight(msp);
3323 weight = metaslab_space_weight(msp);
3329 metaslab_recalculate_weight_and_sort(metaslab_t *msp)
3331 ASSERT(MUTEX_HELD(&msp->ms_lock));
3334 uint64_t was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
3335 metaslab_group_sort(msp->ms_group, msp,
3336 metaslab_weight(msp, B_FALSE) | was_active);
3340 metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp,
3344 ASSERT(MUTEX_HELD(&msp->ms_lock));
3351 ASSERT0(msp->ms_activation_weight);
3352 msp->ms_activation_weight = msp->ms_weight;
3353 metaslab_group_sort(mg, msp, msp->ms_weight |
3367 *mspp = msp;
3368 ASSERT3S(msp->ms_allocator, ==, -1);
3369 msp->ms_allocator = allocator;
3370 msp->ms_primary = (activation_weight == METASLAB_WEIGHT_PRIMARY);
3372 ASSERT0(msp->ms_activation_weight);
3373 msp->ms_activation_weight = msp->ms_weight;
3374 metaslab_group_sort_impl(mg, msp,
3375 msp->ms_weight | activation_weight);
3382 metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight)
3384 ASSERT(MUTEX_HELD(&msp->ms_lock));
3397 if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) {
3398 ASSERT(msp->ms_loaded);
3402 int error = metaslab_load(msp);
3404 metaslab_group_sort(msp->ms_group, msp, 0);
3423 if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) {
3424 if (msp->ms_allocator != allocator)
3427 if ((msp->ms_weight & activation_weight) == 0)
3431 msp->ms_primary);
3442 if (msp->ms_weight == 0) {
3443 ASSERT0(range_tree_space(msp->ms_allocatable));
3447 if ((error = metaslab_activate_allocator(msp->ms_group, msp,
3452 ASSERT(msp->ms_loaded);
3453 ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
3459 metaslab_passivate_allocator(metaslab_group_t *mg, metaslab_t *msp,
3462 ASSERT(MUTEX_HELD(&msp->ms_lock));
3463 ASSERT(msp->ms_loaded);
3465 if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) {
3466 metaslab_group_sort(mg, msp, weight);
3471 ASSERT3P(msp->ms_group, ==, mg);
3472 ASSERT3S(0, <=, msp->ms_allocator);
3473 ASSERT3U(msp->ms_allocator, <, mg->mg_allocators);
3475 metaslab_group_allocator_t *mga = &mg->mg_allocator[msp->ms_allocator];
3476 if (msp->ms_primary) {
3477 ASSERT3P(mga->mga_primary, ==, msp);
3478 ASSERT(msp->ms_weight & METASLAB_WEIGHT_PRIMARY);
3481 ASSERT3P(mga->mga_secondary, ==, msp);
3482 ASSERT(msp->ms_weight & METASLAB_WEIGHT_SECONDARY);
3485 msp->ms_allocator = -1;
3486 metaslab_group_sort_impl(mg, msp, weight);
3491 metaslab_passivate(metaslab_t *msp, uint64_t weight)
3500 ASSERT(!WEIGHT_IS_SPACEBASED(msp->ms_weight) ||
3502 range_tree_space(msp->ms_allocatable) == 0);
3505 ASSERT(msp->ms_activation_weight != 0);
3506 msp->ms_activation_weight = 0;
3507 metaslab_passivate_allocator(msp->ms_group, msp, weight);
3508 ASSERT0(msp->ms_weight & METASLAB_ACTIVE_MASK);
3523 metaslab_segment_may_passivate(metaslab_t *msp)
3525 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
3527 if (WEIGHT_IS_SPACEBASED(msp->ms_weight) || spa_sync_pass(spa) > 1)
3535 uint64_t weight = metaslab_weight_from_range_tree(msp);
3536 int activation_idx = WEIGHT_GET_INDEX(msp->ms_activation_weight);
3540 metaslab_passivate(msp, weight);
3546 metaslab_t *msp = arg;
3547 metaslab_class_t *mc = msp->ms_group->mg_class;
3551 ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock));
3553 mutex_enter(&msp->ms_lock);
3554 (void) metaslab_load(msp);
3555 metaslab_set_selected_txg(msp, spa_syncing_txg(spa));
3556 mutex_exit(&msp->ms_lock);
3564 metaslab_t *msp;
3576 for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp)) {
3577 ASSERT3P(msp->ms_group, ==, mg);
3585 if (++m > metaslab_preload_limit && !msp->ms_condense_wanted) {
3590 msp, TQ_SLEEP | (m <= mg->mg_allocators ? TQ_FRONT : 0))
3618 metaslab_should_condense(metaslab_t *msp)
3620 space_map_t *sm = msp->ms_sm;
3621 vdev_t *vd = msp->ms_group->mg_vd;
3624 ASSERT(MUTEX_HELD(&msp->ms_lock));
3625 ASSERT(msp->ms_loaded);
3633 if (range_tree_numsegs(msp->ms_allocatable) == 0 ||
3634 msp->ms_condense_wanted)
3640 msp->ms_allocatable, SM_NO_VDEVID);
3655 metaslab_condense(metaslab_t *msp, dmu_tx_t *tx)
3658 space_map_t *sm = msp->ms_sm;
3660 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
3662 ASSERT(MUTEX_HELD(&msp->ms_lock));
3663 ASSERT(msp->ms_loaded);
3664 ASSERT(msp->ms_sm != NULL);
3709 ASSERT(range_tree_is_empty(msp->ms_freed)); /* since it is pass 1 */
3711 zfs_dbgmsg("condensing: txg %llu, msp[%llu] %px, vdev id %llu, "
3713 (u_longlong_t)txg, (u_longlong_t)msp->ms_id, msp,
3714 (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
3715 spa->spa_name, (u_longlong_t)space_map_length(msp->ms_sm),
3716 (u_longlong_t)range_tree_numsegs(msp->ms_allocatable),
3717 msp->ms_condense_wanted ? "TRUE" : "FALSE");
3719 msp->ms_condense_wanted = B_FALSE;
3723 type = metaslab_calculate_range_tree_type(msp->ms_group->mg_vd, msp,
3729 range_tree_walk(msp->ms_defer[t],
3734 range_tree_walk(msp->ms_allocating[(txg + t) & TXG_MASK],
3739 metaslab_unflushed_changes_memused(msp));
3741 metaslab_unflushed_changes_memused(msp);
3742 range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
3743 range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
3753 msp->ms_condensing = B_TRUE;
3755 mutex_exit(&msp->ms_lock);
3756 uint64_t object = space_map_object(msp->ms_sm);
3765 if (space_map_object(msp->ms_sm) != object) {
3766 object = space_map_object(msp->ms_sm);
3768 msp->ms_group->mg_vd->vdev_ms_array, sizeof (uint64_t) *
3769 msp->ms_id, sizeof (uint64_t), &object, tx);
3785 range_tree_add(tmp_tree, msp->ms_start, msp->ms_size);
3787 space_map_write(sm, msp->ms_allocatable, SM_FREE, SM_NO_VDEVID, tx);
3794 mutex_enter(&msp->ms_lock);
3796 msp->ms_condensing = B_FALSE;
3797 metaslab_flush_update(msp, tx);
3801 metaslab_unflushed_add(metaslab_t *msp, dmu_tx_t *tx)
3803 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
3805 ASSERT(msp->ms_sm != NULL);
3806 ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
3807 ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
3810 metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
3811 metaslab_set_unflushed_dirty(msp, B_TRUE);
3812 avl_add(&spa->spa_metaslabs_by_flushed, msp);
3820 metaslab_unflushed_bump(metaslab_t *msp, dmu_tx_t *tx, boolean_t dirty)
3822 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
3824 ASSERT(msp->ms_sm != NULL);
3825 ASSERT(metaslab_unflushed_txg(msp) != 0);
3826 ASSERT3P(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL), ==, msp);
3827 ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
3828 ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
3833 uint64_t ms_prev_flushed_txg = metaslab_unflushed_txg(msp);
3834 boolean_t ms_prev_flushed_dirty = metaslab_unflushed_dirty(msp);
3836 avl_remove(&spa->spa_metaslabs_by_flushed, msp);
3837 metaslab_set_unflushed_txg(msp, spa_syncing_txg(spa), tx);
3838 metaslab_set_unflushed_dirty(msp, dirty);
3839 avl_add(&spa->spa_metaslabs_by_flushed, msp);
3862 metaslab_flush_update(metaslab_t *msp, dmu_tx_t *tx)
3864 metaslab_group_t *mg = msp->ms_group;
3867 ASSERT(MUTEX_HELD(&msp->ms_lock));
3876 msp->ms_synced_length = space_map_length(msp->ms_sm);
3883 metaslab_unflushed_txg(msp) == 0)
3886 metaslab_unflushed_bump(msp, tx, B_FALSE);
3890 metaslab_flush(metaslab_t *msp, dmu_tx_t *tx)
3892 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
3894 ASSERT(MUTEX_HELD(&msp->ms_lock));
3898 ASSERT(msp->ms_sm != NULL);
3899 ASSERT(metaslab_unflushed_txg(msp) != 0);
3900 ASSERT(avl_find(&spa->spa_metaslabs_by_flushed, msp, NULL) != NULL);
3909 ASSERT3U(metaslab_unflushed_txg(msp), <, dmu_tx_get_txg(tx));
3915 if (msp->ms_loading)
3918 metaslab_verify_space(msp, dmu_tx_get_txg(tx));
3919 metaslab_verify_weight_and_frag(msp);
3932 if (msp->ms_loaded && metaslab_should_condense(msp)) {
3933 metaslab_group_t *mg = msp->ms_group;
3942 metaslab_group_histogram_remove(mg, msp);
3944 metaslab_condense(msp, tx);
3946 space_map_histogram_clear(msp->ms_sm);
3947 space_map_histogram_add(msp->ms_sm, msp->ms_allocatable, tx);
3948 ASSERT(range_tree_is_empty(msp->ms_freed));
3950 space_map_histogram_add(msp->ms_sm,
3951 msp->ms_defer[t], tx);
3953 metaslab_aux_histograms_update(msp);
3955 metaslab_group_histogram_add(mg, msp);
3959 metaslab_verify_space(msp, dmu_tx_get_txg(tx));
3968 metaslab_recalculate_weight_and_sort(msp);
3972 msp->ms_flushing = B_TRUE;
3973 uint64_t sm_len_before = space_map_length(msp->ms_sm);
3975 mutex_exit(&msp->ms_lock);
3976 space_map_write(msp->ms_sm, msp->ms_unflushed_allocs, SM_ALLOC,
3978 space_map_write(msp->ms_sm, msp->ms_unflushed_frees, SM_FREE,
3980 mutex_enter(&msp->ms_lock);
3982 uint64_t sm_len_after = space_map_length(msp->ms_sm);
3988 (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
3989 (u_longlong_t)msp->ms_id,
3990 (u_longlong_t)range_tree_space(msp->ms_unflushed_allocs),
3991 (u_longlong_t)range_tree_space(msp->ms_unflushed_frees),
3996 metaslab_unflushed_changes_memused(msp));
3998 metaslab_unflushed_changes_memused(msp);
3999 range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
4000 range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
4002 metaslab_verify_space(msp, dmu_tx_get_txg(tx));
4003 metaslab_verify_weight_and_frag(msp);
4005 metaslab_flush_update(msp, tx);
4007 metaslab_verify_space(msp, dmu_tx_get_txg(tx));
4008 metaslab_verify_weight_and_frag(msp);
4010 msp->ms_flushing = B_FALSE;
4011 cv_broadcast(&msp->ms_flush_cv);
4019 metaslab_sync(metaslab_t *msp, uint64_t txg)
4021 metaslab_group_t *mg = msp->ms_group;
4025 range_tree_t *alloctree = msp->ms_allocating[txg & TXG_MASK];
4033 if (msp->ms_new) {
4035 ASSERT0(range_tree_space(msp->ms_freeing));
4036 ASSERT0(range_tree_space(msp->ms_freed));
4037 ASSERT0(range_tree_space(msp->ms_checkpointing));
4038 ASSERT0(range_tree_space(msp->ms_trim));
4054 range_tree_is_empty(msp->ms_freeing) &&
4055 range_tree_is_empty(msp->ms_checkpointing) &&
4056 !(msp->ms_loaded && msp->ms_condense_wanted &&
4083 if (msp->ms_sm == NULL) {
4091 msp->ms_id, sizeof (uint64_t), &new_object, tx);
4093 VERIFY0(space_map_open(&msp->ms_sm, mos, new_object,
4094 msp->ms_start, msp->ms_size, vd->vdev_ashift));
4095 ASSERT(msp->ms_sm != NULL);
4097 ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
4098 ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
4099 ASSERT0(metaslab_allocated_space(msp));
4102 if (!range_tree_is_empty(msp->ms_checkpointing) &&
4124 mutex_enter(&msp->ms_sync_lock);
4125 mutex_enter(&msp->ms_lock);
4134 metaslab_group_histogram_remove(mg, msp);
4136 if (spa->spa_sync_pass == 1 && msp->ms_loaded &&
4137 metaslab_should_condense(msp))
4138 metaslab_condense(msp, tx);
4145 mutex_exit(&msp->ms_lock);
4149 if (metaslab_unflushed_txg(msp) == 0)
4150 metaslab_unflushed_add(msp, tx);
4151 else if (!metaslab_unflushed_dirty(msp))
4152 metaslab_unflushed_bump(msp, tx, B_TRUE);
4156 space_map_write(log_sm, msp->ms_freeing, SM_FREE,
4158 mutex_enter(&msp->ms_lock);
4161 metaslab_unflushed_changes_memused(msp));
4163 metaslab_unflushed_changes_memused(msp);
4165 msp->ms_unflushed_frees, msp->ms_unflushed_allocs);
4166 range_tree_remove_xor_add(msp->ms_freeing,
4167 msp->ms_unflushed_allocs, msp->ms_unflushed_frees);
4169 metaslab_unflushed_changes_memused(msp);
4173 space_map_write(msp->ms_sm, alloctree, SM_ALLOC,
4175 space_map_write(msp->ms_sm, msp->ms_freeing, SM_FREE,
4177 mutex_enter(&msp->ms_lock);
4180 msp->ms_allocated_space += range_tree_space(alloctree);
4181 ASSERT3U(msp->ms_allocated_space, >=,
4182 range_tree_space(msp->ms_freeing));
4183 msp->ms_allocated_space -= range_tree_space(msp->ms_freeing);
4185 if (!range_tree_is_empty(msp->ms_checkpointing)) {
4195 mutex_exit(&msp->ms_lock);
4197 msp->ms_checkpointing, SM_FREE, SM_NO_VDEVID, tx);
4198 mutex_enter(&msp->ms_lock);
4201 range_tree_space(msp->ms_checkpointing);
4203 range_tree_space(msp->ms_checkpointing);
4207 range_tree_vacate(msp->ms_checkpointing, NULL, NULL);
4210 if (msp->ms_loaded) {
4217 space_map_histogram_clear(msp->ms_sm);
4218 space_map_histogram_add(msp->ms_sm, msp->ms_allocatable, tx);
4227 space_map_histogram_add(msp->ms_sm, msp->ms_freed, tx);
4237 space_map_histogram_add(msp->ms_sm,
4238 msp->ms_defer[t], tx);
4249 space_map_histogram_add(msp->ms_sm, msp->ms_freeing, tx);
4250 metaslab_aux_histograms_update(msp);
4252 metaslab_group_histogram_add(mg, msp);
4267 range_tree_swap(&msp->ms_freeing, &msp->ms_freed);
4268 ASSERT0(msp->ms_allocated_this_txg);
4270 range_tree_vacate(msp->ms_freeing,
4271 range_tree_add, msp->ms_freed);
4273 msp->ms_allocated_this_txg += range_tree_space(alloctree);
4276 ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK]));
4277 ASSERT0(range_tree_space(msp->ms_allocating[TXG_CLEAN(txg)
4279 ASSERT0(range_tree_space(msp->ms_freeing));
4280 ASSERT0(range_tree_space(msp->ms_checkpointing));
4282 mutex_exit(&msp->ms_lock);
4290 msp->ms_id * sizeof (uint64_t), sizeof (uint64_t), &object, 0));
4291 VERIFY3U(object, ==, space_map_object(msp->ms_sm));
4293 mutex_exit(&msp->ms_sync_lock);
4298 metaslab_evict(metaslab_t *msp, uint64_t txg)
4300 if (!msp->ms_loaded || msp->ms_disabled != 0)
4305 msp->ms_allocating[(txg + t) & TXG_MASK]));
4307 if (msp->ms_allocator != -1)
4308 metaslab_passivate(msp, msp->ms_weight & ~METASLAB_ACTIVE_MASK);
4311 metaslab_unload(msp);
4319 metaslab_sync_done(metaslab_t *msp, uint64_t txg)
4321 metaslab_group_t *mg = msp->ms_group;
4330 mutex_enter(&msp->ms_lock);
4332 if (msp->ms_new) {
4334 metaslab_space_update(vd, mg->mg_class, 0, 0, msp->ms_size);
4337 VERIFY0(msp->ms_allocated_this_txg);
4338 VERIFY0(range_tree_space(msp->ms_freed));
4341 ASSERT0(range_tree_space(msp->ms_freeing));
4342 ASSERT0(range_tree_space(msp->ms_checkpointing));
4344 defer_tree = &msp->ms_defer[txg % TXG_DEFER_SIZE];
4354 alloc_delta = msp->ms_allocated_this_txg -
4355 range_tree_space(msp->ms_freed);
4358 defer_delta = range_tree_space(msp->ms_freed) -
4374 metaslab_load_wait(msp);
4388 range_tree_walk(*defer_tree, range_tree_add, msp->ms_trim);
4390 range_tree_walk(msp->ms_freed, range_tree_add,
4391 msp->ms_trim);
4394 range_tree_vacate(msp->ms_trim, NULL, NULL);
4404 msp->ms_loaded ? range_tree_add : NULL, msp->ms_allocatable);
4406 range_tree_swap(&msp->ms_freed, defer_tree);
4408 range_tree_vacate(msp->ms_freed,
4409 msp->ms_loaded ? range_tree_add : NULL,
4410 msp->ms_allocatable);
4413 msp->ms_synced_length = space_map_length(msp->ms_sm);
4415 msp->ms_deferspace += defer_delta;
4416 ASSERT3S(msp->ms_deferspace, >=, 0);
4417 ASSERT3S(msp->ms_deferspace, <=, msp->ms_size);
4418 if (msp->ms_deferspace != 0) {
4423 vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
4425 metaslab_aux_histograms_update_done(msp, defer_allowed);
4427 if (msp->ms_new) {
4428 msp->ms_new = B_FALSE;
4438 metaslab_recalculate_weight_and_sort(msp);
4440 ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK]));
4441 ASSERT0(range_tree_space(msp->ms_freeing));
4442 ASSERT0(range_tree_space(msp->ms_freed));
4443 ASSERT0(range_tree_space(msp->ms_checkpointing));
4444 msp->ms_allocating_total -= msp->ms_allocated_this_txg;
4445 msp->ms_allocated_this_txg = 0;
4446 mutex_exit(&msp->ms_lock);
4477 metaslab_is_unique(metaslab_t *msp, dva_t *dva)
4484 if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva))
4487 dva_ms_id = DVA_GET_OFFSET(dva) >> msp->ms_group->mg_vd->vdev_ms_shift;
4489 return (msp->ms_id != dva_ms_id);
4503 metaslab_t *msp, uint64_t psize, uint32_t dva_id, uint64_t offset,
4533 mat->mat_msp = msp;
4540 if (msp != NULL)
4541 mat->mat_weight = msp->ms_weight;
4648 metaslab_block_alloc(metaslab_t *msp, uint64_t size, uint64_t txg)
4651 range_tree_t *rt = msp->ms_allocatable;
4652 metaslab_class_t *mc = msp->ms_group->mg_class;
4654 ASSERT(MUTEX_HELD(&msp->ms_lock));
4655 VERIFY(!msp->ms_condensing);
4656 VERIFY0(msp->ms_disabled);
4657 VERIFY0(msp->ms_new);
4659 start = mc->mc_ops->msop_alloc(msp, size);
4661 metaslab_group_t *mg = msp->ms_group;
4666 VERIFY3U(range_tree_space(rt) - size, <=, msp->ms_size);
4668 range_tree_clear(msp->ms_trim, start, size);
4670 if (range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
4671 vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
4673 range_tree_add(msp->ms_allocating[txg & TXG_MASK], start, size);
4674 msp->ms_allocating_total += size;
4677 msp->ms_alloc_txg = txg;
4678 metaslab_verify_space(msp, txg);
4685 msp->ms_max_size = metaslab_largest_allocatable(msp);
4708 metaslab_t *msp = avl_find(t, search, &idx);
4709 if (msp == NULL)
4710 msp = avl_nearest(t, idx, AVL_AFTER);
4713 for (; msp != NULL; msp = AVL_NEXT(t, msp)) {
4722 if (!metaslab_should_allocate(msp, asize, try_hard)) {
4723 metaslab_trace_add(zal, mg, msp, asize, d,
4732 if (msp->ms_condensing || msp->ms_disabled > 0 || msp->ms_new)
4735 *was_active = msp->ms_allocator != -1;
4748 !metaslab_is_unique(msp, &dva[i]))
4755 if (msp != NULL) {
4756 search->ms_weight = msp->ms_weight;
4757 search->ms_start = msp->ms_start + 1;
4758 search->ms_allocator = msp->ms_allocator;
4759 search->ms_primary = msp->ms_primary;
4761 return (msp);
4765 metaslab_active_mask_verify(metaslab_t *msp)
4767 ASSERT(MUTEX_HELD(&msp->ms_lock));
4772 if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0)
4775 if (msp->ms_weight & METASLAB_WEIGHT_PRIMARY) {
4776 VERIFY0(msp->ms_weight & METASLAB_WEIGHT_SECONDARY);
4777 VERIFY0(msp->ms_weight & METASLAB_WEIGHT_CLAIM);
4778 VERIFY3S(msp->ms_allocator, !=, -1);
4779 VERIFY(msp->ms_primary);
4783 if (msp->ms_weight & METASLAB_WEIGHT_SECONDARY) {
4784 VERIFY0(msp->ms_weight & METASLAB_WEIGHT_PRIMARY);
4785 VERIFY0(msp->ms_weight & METASLAB_WEIGHT_CLAIM);
4786 VERIFY3S(msp->ms_allocator, !=, -1);
4787 VERIFY(!msp->ms_primary);
4791 if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) {
4792 VERIFY0(msp->ms_weight & METASLAB_WEIGHT_PRIMARY);
4793 VERIFY0(msp->ms_weight & METASLAB_WEIGHT_SECONDARY);
4794 VERIFY3S(msp->ms_allocator, ==, -1);
4804 metaslab_t *msp = NULL;
4848 msp = mga->mga_primary;
4856 ASSERT(msp->ms_primary);
4857 ASSERT3S(msp->ms_allocator, ==, allocator);
4858 ASSERT(msp->ms_loaded);
4861 ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
4864 msp = mga->mga_secondary;
4870 ASSERT(!msp->ms_primary);
4871 ASSERT3S(msp->ms_allocator, ==, allocator);
4872 ASSERT(msp->ms_loaded);
4875 ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
4877 msp = find_valid_metaslab(mg, activation_weight, dva, d,
4883 if (msp == NULL) {
4887 mutex_enter(&msp->ms_lock);
4889 metaslab_active_mask_verify(msp);
4897 metaslab_t *, msp, uint64_t, activation_weight,
4909 if (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK)) {
4910 ASSERT3S(msp->ms_allocator, ==, -1);
4911 mutex_exit(&msp->ms_lock);
4921 if (!was_active && (msp->ms_weight & METASLAB_ACTIVE_MASK) &&
4922 (msp->ms_allocator != -1) &&
4923 (msp->ms_allocator != allocator || ((activation_weight ==
4924 METASLAB_WEIGHT_PRIMARY) != msp->ms_primary))) {
4925 ASSERT(msp->ms_loaded);
4926 ASSERT((msp->ms_weight & METASLAB_WEIGHT_CLAIM) ||
4927 msp->ms_allocator != -1);
4928 mutex_exit(&msp->ms_lock);
4939 if (msp->ms_weight & METASLAB_WEIGHT_CLAIM &&
4941 ASSERT(msp->ms_loaded);
4942 ASSERT3S(msp->ms_allocator, ==, -1);
4943 metaslab_passivate(msp, msp->ms_weight &
4945 mutex_exit(&msp->ms_lock);
4949 metaslab_set_selected_txg(msp, txg);
4952 metaslab_activate(msp, allocator, activation_weight);
4953 metaslab_active_mask_verify(msp);
4974 mutex_exit(&msp->ms_lock);
4977 ASSERT(msp->ms_loaded);
4986 if (!metaslab_should_allocate(msp, asize, try_hard)) {
4988 metaslab_trace_add(zal, mg, msp, asize, d,
5000 if (msp->ms_condensing) {
5001 metaslab_trace_add(zal, mg, msp, asize, d,
5004 metaslab_passivate(msp, msp->ms_weight &
5007 mutex_exit(&msp->ms_lock);
5009 } else if (msp->ms_disabled > 0) {
5010 metaslab_trace_add(zal, mg, msp, asize, d,
5013 metaslab_passivate(msp, msp->ms_weight &
5016 mutex_exit(&msp->ms_lock);
5020 offset = metaslab_block_alloc(msp, asize, txg);
5021 metaslab_trace_add(zal, mg, msp, asize, d, offset, allocator);
5026 metaslab_segment_may_passivate(msp);
5030 ASSERT(msp->ms_loaded);
5037 DTRACE_PROBE2(ms__alloc__failure, metaslab_t *, msp,
5063 if (WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
5064 weight = metaslab_largest_allocatable(msp);
5067 weight = metaslab_weight_from_range_tree(msp);
5071 metaslab_passivate(msp, weight);
5085 weight |= msp->ms_weight & METASLAB_ACTIVE_MASK;
5086 metaslab_group_sort(mg, msp, weight);
5088 metaslab_active_mask_verify(msp);
5096 ASSERT(!metaslab_should_allocate(msp, asize, try_hard));
5098 mutex_exit(&msp->ms_lock);
5100 mutex_exit(&msp->ms_lock);
5375 metaslab_t *msp;
5382 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
5384 VERIFY(!msp->ms_condensing);
5385 VERIFY3U(offset, >=, msp->ms_start);
5386 VERIFY3U(offset + asize, <=, msp->ms_start + msp->ms_size);
5392 mutex_enter(&msp->ms_lock);
5393 if (range_tree_is_empty(msp->ms_freeing) &&
5394 range_tree_is_empty(msp->ms_checkpointing)) {
5395 vdev_dirty(vd, VDD_METASLAB, msp, spa_syncing_txg(spa));
5400 range_tree_add(msp->ms_checkpointing, offset, asize);
5402 range_tree_add(msp->ms_freeing, offset, asize);
5404 mutex_exit(&msp->ms_lock);
5595 metaslab_t *msp;
5623 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
5625 mutex_enter(&msp->ms_lock);
5626 range_tree_remove(msp->ms_allocating[txg & TXG_MASK],
5628 msp->ms_allocating_total -= size;
5630 VERIFY(!msp->ms_condensing);
5631 VERIFY3U(offset, >=, msp->ms_start);
5632 VERIFY3U(offset + size, <=, msp->ms_start + msp->ms_size);
5633 VERIFY3U(range_tree_space(msp->ms_allocatable) + size, <=,
5634 msp->ms_size);
5637 range_tree_add(msp->ms_allocatable, offset, size);
5638 mutex_exit(&msp->ms_lock);
5711 metaslab_t *msp;
5719 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
5721 mutex_enter(&msp->ms_lock);
5723 if ((txg != 0 && spa_writeable(spa)) || !msp->ms_loaded) {
5724 error = metaslab_activate(msp, 0, METASLAB_WEIGHT_CLAIM);
5726 ASSERT(msp->ms_loaded);
5727 ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
5733 !range_tree_contains(msp->ms_allocatable, offset, size))
5737 mutex_exit(&msp->ms_lock);
5741 VERIFY(!msp->ms_condensing);
5744 VERIFY3U(range_tree_space(msp->ms_allocatable) - size, <=,
5745 msp->ms_size);
5746 range_tree_remove(msp->ms_allocatable, offset, size);
5747 range_tree_clear(msp->ms_trim, offset, size);
5750 metaslab_class_t *mc = msp->ms_group->mg_class;
5752 multilist_sublist_lock_obj(&mc->mc_metaslab_txg_list, msp);
5753 if (!multilist_link_active(&msp->ms_class_txg_node)) {
5754 msp->ms_selected_txg = txg;
5755 multilist_sublist_insert_head(mls, msp);
5759 if (range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
5760 vdev_dirty(vd, VDD_METASLAB, msp, txg);
5761 range_tree_add(msp->ms_allocating[txg & TXG_MASK],
5763 msp->ms_allocating_total += size;
5766 mutex_exit(&msp->ms_lock);
5998 metaslab_t *msp;
6014 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
6016 mutex_enter(&msp->ms_lock);
6017 if (msp->ms_loaded) {
6018 range_tree_verify_not_present(msp->ms_allocatable,
6033 range_tree_verify_not_present(msp->ms_freeing, offset, size);
6034 range_tree_verify_not_present(msp->ms_checkpointing, offset, size);
6035 range_tree_verify_not_present(msp->ms_freed, offset, size);
6037 range_tree_verify_not_present(msp->ms_defer[j], offset, size);
6038 range_tree_verify_not_present(msp->ms_trim, offset, size);
6039 mutex_exit(&msp->ms_lock);
6094 metaslab_disable(metaslab_t *msp)
6096 ASSERT(!MUTEX_HELD(&msp->ms_lock));
6097 metaslab_group_t *mg = msp->ms_group;
6112 if (msp->ms_disabled == 0) {
6115 mutex_enter(&msp->ms_lock);
6116 msp->ms_disabled++;
6117 mutex_exit(&msp->ms_lock);
6125 metaslab_enable(metaslab_t *msp, boolean_t sync, boolean_t unload)
6127 metaslab_group_t *mg = msp->ms_group;
6139 mutex_enter(&msp->ms_lock);
6140 if (--msp->ms_disabled == 0) {
6144 metaslab_unload(msp);
6146 mutex_exit(&msp->ms_lock);