14769Sdp78419 /*
24769Sdp78419 * CDDL HEADER START
34769Sdp78419 *
44769Sdp78419 * The contents of this file are subject to the terms of the
54769Sdp78419 * Common Development and Distribution License (the "License").
64769Sdp78419 * You may not use this file except in compliance with the License.
74769Sdp78419 *
84769Sdp78419 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
94769Sdp78419 * or http://www.opensolaris.org/os/licensing.
104769Sdp78419 * See the License for the specific language governing permissions
114769Sdp78419 * and limitations under the License.
124769Sdp78419 *
134769Sdp78419 * When distributing Covered Code, include this CDDL HEADER in each
144769Sdp78419 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
154769Sdp78419 * If applicable, add the following below this CDDL HEADER, with the
164769Sdp78419 * fields enclosed by brackets "[]" replaced with your own identifying
174769Sdp78419 * information: Portions Copyright [yyyy] [name of copyright owner]
184769Sdp78419 *
194769Sdp78419 * CDDL HEADER END
204769Sdp78419 */
214769Sdp78419
224769Sdp78419 /*
23*12013SHaik.Aftandilian@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
244769Sdp78419 * Use is subject to license terms.
254769Sdp78419 */
264769Sdp78419
274769Sdp78419 #include <sys/types.h>
284769Sdp78419 #include <sys/sysmacros.h>
294769Sdp78419 #include <sys/machsystm.h>
304769Sdp78419 #include <sys/machparam.h>
314769Sdp78419 #include <sys/cmn_err.h>
324769Sdp78419 #include <sys/stat.h>
334769Sdp78419 #include <sys/mach_descrip.h>
344769Sdp78419 #include <sys/memnode.h>
354769Sdp78419 #include <sys/mdesc.h>
364769Sdp78419 #include <sys/mpo.h>
3710106SJason.Beloro@Sun.COM #include <vm/page.h>
384769Sdp78419 #include <vm/vm_dep.h>
395079Sjc25722 #include <vm/hat_sfmmu.h>
405358Sjc25722 #include <sys/promif.h>
414769Sdp78419
424769Sdp78419 /*
434769Sdp78419 * MPO and the sun4v memory representation
444769Sdp78419 * ---------------------------------------
454769Sdp78419 *
464769Sdp78419 * Latency groups are defined in the sun4v achitecture by memory-latency-group
474769Sdp78419 * nodes in the Machine Description, as specified in FWARC/2007/260. These
484769Sdp78419 * tie together cpu nodes and mblock nodes, and contain mask and match
494769Sdp78419 * properties that identify the portion of an mblock that belongs to the
504769Sdp78419 * lgroup. Mask and match are defined in the Physical Address (PA) space,
514769Sdp78419 * but an mblock defines Real Addresses (RA). To translate, the mblock
524769Sdp78419 * includes the property address-congruence-offset, hereafter referred to as
534769Sdp78419 * ra_to_pa. A real address ra is a member of an lgroup if
544769Sdp78419 *
554769Sdp78419 * (ra + mblock.ra_to_pa) & lgroup.mask == lgroup.match
564769Sdp78419 *
574769Sdp78419 * The MD is traversed, and information on all mblocks is kept in the array
584769Sdp78419 * mpo_mblock[]. Information on all CPUs, including which lgroup they map
594769Sdp78419 * to, is kept in the array mpo_cpu[].
604769Sdp78419 *
614769Sdp78419 * This implementation makes (and verifies) the simplifying assumption that
624769Sdp78419 * the mask bits are the same for all defined lgroups, and that all 1 bits in
634769Sdp78419 * the mask are contiguous. Thus the number of lgroups is bounded by the
644769Sdp78419 * number of possible mask values, and the lgrp_handle_t is defined as the
654769Sdp78419 * mask value, shifted right to eliminate the 0 bit positions in mask. The
664769Sdp78419 * masks and values are also referred to as "home bits" in the code.
674769Sdp78419 *
684769Sdp78419 * A mem_node is defined to be 1:1 with an lgrp_handle_t, thus each lgroup
694769Sdp78419 * has exactly 1 mem_node, and plat_pfn_to_mem_node() must find the mblock
704769Sdp78419 * containing a pfn, apply the mblock's ra_to_pa adjustment, and extract the
714769Sdp78419 * home bits. This yields the mem_node.
724769Sdp78419 *
734769Sdp78419 * Interfaces
744769Sdp78419 * ----------
754769Sdp78419 *
764769Sdp78419 * This file exports the following entry points:
774769Sdp78419 *
784769Sdp78419 * plat_lgrp_init()
794769Sdp78419 * plat_build_mem_nodes()
804769Sdp78419 * plat_lgrp_cpu_to_hand()
814769Sdp78419 * plat_lgrp_latency()
824769Sdp78419 * plat_pfn_to_mem_node()
834769Sdp78419 * These implement the usual platform lgroup interfaces.
844769Sdp78419 *
854769Sdp78419 * plat_rapfn_to_papfn()
864769Sdp78419 * Recover the PA page coloring bits from an RA.
874769Sdp78419 *
884769Sdp78419 * plat_mem_node_iterator_init()
894769Sdp78419 * Initialize an iterator to efficiently step through pages in a mem_node.
904769Sdp78419 *
914769Sdp78419 * plat_mem_node_intersect_range()
924769Sdp78419 * Find the intersection with a mem_node.
9310106SJason.Beloro@Sun.COM *
9410106SJason.Beloro@Sun.COM * plat_slice_add()
9510106SJason.Beloro@Sun.COM * plat_slice_del()
9610106SJason.Beloro@Sun.COM * Platform hooks to add/delete a pfn range.
9710106SJason.Beloro@Sun.COM *
9810106SJason.Beloro@Sun.COM * Internal Organization
9910106SJason.Beloro@Sun.COM * ---------------------
10010106SJason.Beloro@Sun.COM *
10110106SJason.Beloro@Sun.COM * A number of routines are used both boot/DR code which (re)build
10210106SJason.Beloro@Sun.COM * appropriate MPO structures.
10310106SJason.Beloro@Sun.COM *
10410106SJason.Beloro@Sun.COM * mblock_alloc()
10510106SJason.Beloro@Sun.COM * Allocate memory for mblocks and stripes as
10610106SJason.Beloro@Sun.COM * appropriate for boot or memory DR.
10710106SJason.Beloro@Sun.COM *
10810106SJason.Beloro@Sun.COM * mblock_free()
10910106SJason.Beloro@Sun.COM * Free memory allocated by mblock_alloc.
11010106SJason.Beloro@Sun.COM *
11110106SJason.Beloro@Sun.COM * mblock_update()
11210106SJason.Beloro@Sun.COM * Build mblocks based on mblock nodes read from the MD.
11310106SJason.Beloro@Sun.COM *
11410106SJason.Beloro@Sun.COM * mblock_update_add()
11510106SJason.Beloro@Sun.COM * Rebuild mblocks after a memory DR add operation.
11610106SJason.Beloro@Sun.COM *
11710106SJason.Beloro@Sun.COM * mblock_update_del()
11810106SJason.Beloro@Sun.COM * Rebuild mblocks after a memory DR delete operation.
11910106SJason.Beloro@Sun.COM *
12010106SJason.Beloro@Sun.COM * mblock_install()
12110106SJason.Beloro@Sun.COM * Install mblocks as the new configuration.
12210106SJason.Beloro@Sun.COM *
12310106SJason.Beloro@Sun.COM * mstripe_update()
12410106SJason.Beloro@Sun.COM * Build stripes based on mblocks.
12510106SJason.Beloro@Sun.COM *
12610106SJason.Beloro@Sun.COM * mnode_update()
12710106SJason.Beloro@Sun.COM * Call memnode layer to add/del a pfn range, based on stripes.
12810106SJason.Beloro@Sun.COM *
12910106SJason.Beloro@Sun.COM * The platform interfaces allocate all memory required for the
13010106SJason.Beloro@Sun.COM * particualar update first, block access to the MPO structures
13110106SJason.Beloro@Sun.COM * while they are updated, and free old structures after the update.
1324769Sdp78419 */
1334769Sdp78419
1344769Sdp78419 int sun4v_mpo_enable = 1;
1354769Sdp78419 int sun4v_mpo_debug = 0;
1364769Sdp78419 char sun4v_mpo_status[256] = "";
1374769Sdp78419
1384769Sdp78419 /* Save CPU info from the MD and associate CPUs with lgroups */
1394769Sdp78419 static struct cpu_md mpo_cpu[NCPU];
1404769Sdp78419
1414769Sdp78419 /* Save lgroup info from the MD */
1424769Sdp78419 #define MAX_MD_LGROUPS 32
1434769Sdp78419 static struct lgrp_md mpo_lgroup[MAX_MD_LGROUPS];
1444769Sdp78419 static int n_lgrpnodes = 0;
1454769Sdp78419 static int n_locality_groups = 0;
1464769Sdp78419 static int max_locality_groups = 0;
14710106SJason.Beloro@Sun.COM static int szc_mask0 = 0;
1484769Sdp78419
1494769Sdp78419 /* Save mblocks from the MD */
1505358Sjc25722 #define SMALL_MBLOCKS_COUNT 8
1515358Sjc25722 static struct mblock_md *mpo_mblock;
1525358Sjc25722 static struct mblock_md small_mpo_mblocks[SMALL_MBLOCKS_COUNT];
1534769Sdp78419 static int n_mblocks = 0;
1544769Sdp78419
1554769Sdp78419 /* Save mem_node stripes calculate from mblocks and lgroups. */
1565358Sjc25722 static mem_stripe_t *mem_stripes;
1575358Sjc25722 static mem_stripe_t small_mem_stripes[SMALL_MBLOCKS_COUNT * MAX_MEM_NODES];
1584769Sdp78419 static int n_mem_stripes = 0;
1594769Sdp78419 static pfn_t mnode_stride; /* distance between stripes, start to start */
1604769Sdp78419 static int stripe_shift; /* stride/stripes expressed as a shift */
1614769Sdp78419 static pfn_t mnode_pages; /* mem_node stripe width */
1624769Sdp78419
1634769Sdp78419 /* Save home mask and shift used to calculate lgrp_handle_t values */
1644769Sdp78419 static uint64_t home_mask = 0;
1654769Sdp78419 static pfn_t home_mask_pfn = 0;
1664769Sdp78419 static int home_mask_shift = 0;
1674769Sdp78419 static uint_t home_mask_pfn_shift = 0;
1684769Sdp78419
1694769Sdp78419 /* Save lowest and highest latencies found across all lgroups */
1704769Sdp78419 static int lower_latency = 0;
1714769Sdp78419 static int higher_latency = 0;
1724769Sdp78419
1734769Sdp78419 static pfn_t base_ra_to_pa_pfn = 0; /* ra_to_pa for single mblock memory */
17410106SJason.Beloro@Sun.COM static int mpo_genid; /* config gen; updated by mem DR */
17510106SJason.Beloro@Sun.COM static mpo_config_t mpo_config; /* current mblocks and stripes */
17610106SJason.Beloro@Sun.COM
17710106SJason.Beloro@Sun.COM typedef enum { U_ADD, U_ADD_ALL, U_DEL } update_t;
1784769Sdp78419
1794769Sdp78419 static int valid_pages(md_t *md, mde_cookie_t cpu0);
1804769Sdp78419 static int unique_home_mem_lg_count(uint64_t mem_lg_homeset);
1814769Sdp78419 static int fix_interleave(void);
1824769Sdp78419
18310106SJason.Beloro@Sun.COM static int mblock_alloc(mpo_config_t *, update_t, int nmblocks);
18410106SJason.Beloro@Sun.COM static void mblock_install(mpo_config_t *);
18510106SJason.Beloro@Sun.COM static void mblock_free(mpo_config_t *);
18610106SJason.Beloro@Sun.COM static void mblock_update(mpo_config_t *, md_t, mde_cookie_t *mblocknodes);
18710106SJason.Beloro@Sun.COM static void mblock_update_add(mpo_config_t *);
18810106SJason.Beloro@Sun.COM static void mblock_update_del(mpo_config_t *, mpo_config_t *, pfn_t, pfn_t);
18910106SJason.Beloro@Sun.COM static void mstripe_update(mpo_config_t *);
19010106SJason.Beloro@Sun.COM static void mnode_update(mpo_config_t *, pfn_t, pfn_t, update_t);
19110106SJason.Beloro@Sun.COM
1924769Sdp78419 /* Debug support */
1934769Sdp78419 #if defined(DEBUG) && !defined(lint)
19410106SJason.Beloro@Sun.COM #define VALIDATE_SLICE(base, end) { \
19510106SJason.Beloro@Sun.COM ASSERT(IS_P2ALIGNED(ptob(base), TTEBYTES(TTE256M))); \
19610106SJason.Beloro@Sun.COM ASSERT(IS_P2ALIGNED(ptob(end - base + 1), TTEBYTES(TTE256M))); \
19710106SJason.Beloro@Sun.COM }
1984769Sdp78419 #define MPO_DEBUG(args...) if (sun4v_mpo_debug) printf(args)
1994769Sdp78419 #else
20010106SJason.Beloro@Sun.COM #define VALIDATE_SLICE(base, end)
2014769Sdp78419 #define MPO_DEBUG(...)
2024769Sdp78419 #endif /* DEBUG */
2034769Sdp78419
2044769Sdp78419 /* Record status message, viewable from mdb */
2054769Sdp78419 #define MPO_STATUS(args...) { \
2064769Sdp78419 (void) snprintf(sun4v_mpo_status, sizeof (sun4v_mpo_status), args); \
2074769Sdp78419 MPO_DEBUG(sun4v_mpo_status); \
2084769Sdp78419 }
2094769Sdp78419
2104769Sdp78419 /*
21110106SJason.Beloro@Sun.COM * The MPO locks are to protect the MPO metadata while that
21210106SJason.Beloro@Sun.COM * information is updated as a result of a memory DR operation.
21310106SJason.Beloro@Sun.COM * The read lock must be acquired to read the metadata and the
21410106SJason.Beloro@Sun.COM * write locks must be acquired to update it.
21510106SJason.Beloro@Sun.COM */
21610106SJason.Beloro@Sun.COM #define mpo_rd_lock kpreempt_disable
21710106SJason.Beloro@Sun.COM #define mpo_rd_unlock kpreempt_enable
21810106SJason.Beloro@Sun.COM
21910106SJason.Beloro@Sun.COM static void
mpo_wr_lock()22010106SJason.Beloro@Sun.COM mpo_wr_lock()
22110106SJason.Beloro@Sun.COM {
22210106SJason.Beloro@Sun.COM mutex_enter(&cpu_lock);
22310106SJason.Beloro@Sun.COM pause_cpus(NULL);
22410106SJason.Beloro@Sun.COM mutex_exit(&cpu_lock);
22510106SJason.Beloro@Sun.COM }
22610106SJason.Beloro@Sun.COM
22710106SJason.Beloro@Sun.COM static void
mpo_wr_unlock()22810106SJason.Beloro@Sun.COM mpo_wr_unlock()
22910106SJason.Beloro@Sun.COM {
23010106SJason.Beloro@Sun.COM mutex_enter(&cpu_lock);
23110106SJason.Beloro@Sun.COM start_cpus();
23210106SJason.Beloro@Sun.COM mutex_exit(&cpu_lock);
23310106SJason.Beloro@Sun.COM }
23410106SJason.Beloro@Sun.COM
23510106SJason.Beloro@Sun.COM /*
2364769Sdp78419 * Routine to read a uint64_t from a given md
2374769Sdp78419 */
2384769Sdp78419 static int64_t
get_int(md_t md,mde_cookie_t node,char * propname,uint64_t * val)2394769Sdp78419 get_int(md_t md, mde_cookie_t node, char *propname, uint64_t *val)
2404769Sdp78419 {
2414769Sdp78419 int err = md_get_prop_val(md, node, propname, val);
2424769Sdp78419 return (err);
2434769Sdp78419 }
2444769Sdp78419
2454769Sdp78419 static int
mblock_cmp(const void * a,const void * b)2464769Sdp78419 mblock_cmp(const void *a, const void *b)
2474769Sdp78419 {
2484769Sdp78419 struct mblock_md *m1 = (struct mblock_md *)a;
2494769Sdp78419 struct mblock_md *m2 = (struct mblock_md *)b;
2504769Sdp78419
2514769Sdp78419 if (m1->base < m2->base)
2524769Sdp78419 return (-1);
2534769Sdp78419 else if (m1->base == m2->base)
2544769Sdp78419 return (0);
2554769Sdp78419 else
2564769Sdp78419 return (1);
2574769Sdp78419 }
2584769Sdp78419
2594769Sdp78419 static void
mblock_sort(struct mblock_md * mblocks,int n)2604769Sdp78419 mblock_sort(struct mblock_md *mblocks, int n)
2614769Sdp78419 {
2624769Sdp78419 extern void qsort(void *, size_t, size_t,
2634769Sdp78419 int (*)(const void *, const void *));
2644769Sdp78419
2654769Sdp78419 qsort(mblocks, n, sizeof (mblocks[0]), mblock_cmp);
2664769Sdp78419 }
2674769Sdp78419
2685468Sjc25722 static void
mpo_update_tunables(void)2695468Sjc25722 mpo_update_tunables(void)
2705468Sjc25722 {
2715468Sjc25722 int i, ncpu_min;
2725468Sjc25722
2735468Sjc25722 /*
2745468Sjc25722 * lgrp_expand_proc_thresh is the minimum load on the lgroups
2755468Sjc25722 * this process is currently running on before considering
2765468Sjc25722 * expanding threads to another lgroup.
2775468Sjc25722 *
2785468Sjc25722 * lgrp_expand_proc_diff determines how much less the remote lgroup
2795468Sjc25722 * must be loaded before expanding to it.
2805468Sjc25722 *
2815468Sjc25722 * On sun4v CMT processors, threads share a core pipeline, and
2825468Sjc25722 * at less than 100% utilization, best throughput is obtained by
2835468Sjc25722 * spreading threads across more cores, even if some are in a
2845468Sjc25722 * different lgroup. Spread threads to a new lgroup if the
2855468Sjc25722 * current group is more than 50% loaded. Because of virtualization,
2865468Sjc25722 * lgroups may have different numbers of CPUs, but the tunables
2875468Sjc25722 * apply to all lgroups, so find the smallest lgroup and compute
2885468Sjc25722 * 50% loading.
2895468Sjc25722 */
2905468Sjc25722
2915468Sjc25722 ncpu_min = NCPU;
2925468Sjc25722 for (i = 0; i < n_lgrpnodes; i++) {
2935468Sjc25722 int ncpu = mpo_lgroup[i].ncpu;
2945468Sjc25722 if (ncpu != 0 && ncpu < ncpu_min)
2955468Sjc25722 ncpu_min = ncpu;
2965468Sjc25722 }
2975468Sjc25722 lgrp_expand_proc_thresh = ncpu_min * lgrp_loadavg_max_effect / 2;
2985468Sjc25722
2995468Sjc25722 /* new home may only be half as loaded as the existing home to use it */
3005468Sjc25722 lgrp_expand_proc_diff = lgrp_expand_proc_thresh / 2;
3015468Sjc25722
3025468Sjc25722 lgrp_loadavg_tolerance = lgrp_loadavg_max_effect;
3035468Sjc25722 }
3045468Sjc25722
3055468Sjc25722 static mde_cookie_t
cpuid_to_cpunode(md_t * md,int cpuid)3065468Sjc25722 cpuid_to_cpunode(md_t *md, int cpuid)
3075468Sjc25722 {
3085468Sjc25722 mde_cookie_t rootnode, foundnode, *cpunodes;
3095468Sjc25722 uint64_t cpuid_prop;
3105468Sjc25722 int n_cpunodes, i;
3115468Sjc25722
3125468Sjc25722 if (md == NULL)
3135468Sjc25722 return (MDE_INVAL_ELEM_COOKIE);
3145468Sjc25722
3155468Sjc25722 rootnode = md_root_node(md);
3165468Sjc25722 if (rootnode == MDE_INVAL_ELEM_COOKIE)
3175468Sjc25722 return (MDE_INVAL_ELEM_COOKIE);
3185468Sjc25722
3195468Sjc25722 n_cpunodes = md_alloc_scan_dag(md, rootnode, PROP_LG_CPU,
3205468Sjc25722 "fwd", &cpunodes);
3215468Sjc25722 if (n_cpunodes <= 0 || n_cpunodes > NCPU)
3225468Sjc25722 goto cpuid_fail;
3235468Sjc25722
3245468Sjc25722 for (i = 0; i < n_cpunodes; i++) {
3255468Sjc25722 if (md_get_prop_val(md, cpunodes[i], PROP_LG_CPU_ID,
3265468Sjc25722 &cpuid_prop))
3275468Sjc25722 break;
3285468Sjc25722 if (cpuid_prop == (uint64_t)cpuid) {
3295468Sjc25722 foundnode = cpunodes[i];
3305468Sjc25722 md_free_scan_dag(md, &cpunodes);
3315468Sjc25722 return (foundnode);
3325468Sjc25722 }
3335468Sjc25722 }
3345468Sjc25722 cpuid_fail:
3355468Sjc25722 if (n_cpunodes > 0)
3365468Sjc25722 md_free_scan_dag(md, &cpunodes);
3375468Sjc25722 return (MDE_INVAL_ELEM_COOKIE);
3385468Sjc25722 }
3395468Sjc25722
3405468Sjc25722 static int
mpo_cpu_to_lgroup(md_t * md,mde_cookie_t cpunode)3415468Sjc25722 mpo_cpu_to_lgroup(md_t *md, mde_cookie_t cpunode)
3425468Sjc25722 {
3435468Sjc25722 mde_cookie_t *nodes;
3445468Sjc25722 uint64_t latency, lowest_latency;
3455468Sjc25722 uint64_t address_match, lowest_address_match;
3465468Sjc25722 int n_lgroups, j, result = 0;
3475468Sjc25722
3485468Sjc25722 /* Find lgroup nodes reachable from this cpu */
3495468Sjc25722 n_lgroups = md_alloc_scan_dag(md, cpunode, PROP_LG_MEM_LG,
3505468Sjc25722 "fwd", &nodes);
3515468Sjc25722
3525468Sjc25722 lowest_latency = ~(0UL);
3535468Sjc25722
3545468Sjc25722 /* Find the lgroup node with the smallest latency */
3555468Sjc25722 for (j = 0; j < n_lgroups; j++) {
3565468Sjc25722 result = get_int(md, nodes[j], PROP_LG_LATENCY,
3575468Sjc25722 &latency);
3585468Sjc25722 result |= get_int(md, nodes[j], PROP_LG_MATCH,
3595468Sjc25722 &address_match);
3605468Sjc25722 if (result != 0) {
3615468Sjc25722 j = -1;
3625468Sjc25722 goto to_lgrp_done;
3635468Sjc25722 }
3645468Sjc25722 if (latency < lowest_latency) {
3655468Sjc25722 lowest_latency = latency;
3665468Sjc25722 lowest_address_match = address_match;
3675468Sjc25722 }
3685468Sjc25722 }
3695468Sjc25722 for (j = 0; j < n_lgrpnodes; j++) {
3705468Sjc25722 if ((mpo_lgroup[j].latency == lowest_latency) &&
3715468Sjc25722 (mpo_lgroup[j].addr_match == lowest_address_match))
3725468Sjc25722 break;
3735468Sjc25722 }
3745468Sjc25722 if (j == n_lgrpnodes)
3755468Sjc25722 j = -1;
3765468Sjc25722
3775468Sjc25722 to_lgrp_done:
3785468Sjc25722 if (n_lgroups > 0)
3795468Sjc25722 md_free_scan_dag(md, &nodes);
3805468Sjc25722 return (j);
3815468Sjc25722 }
3825468Sjc25722
3835468Sjc25722 /* Called when DR'ing in a CPU */
3845468Sjc25722 void
mpo_cpu_add(md_t * md,int cpuid)385*12013SHaik.Aftandilian@Sun.COM mpo_cpu_add(md_t *md, int cpuid)
3865468Sjc25722 {
3875468Sjc25722 mde_cookie_t cpunode;
3885468Sjc25722
3895468Sjc25722 int i;
3905468Sjc25722
3915468Sjc25722 if (n_lgrpnodes <= 0)
3925468Sjc25722 return;
3935468Sjc25722
3945468Sjc25722 if (md == NULL)
3955468Sjc25722 goto add_fail;
3965468Sjc25722
3975468Sjc25722 cpunode = cpuid_to_cpunode(md, cpuid);
3985468Sjc25722 if (cpunode == MDE_INVAL_ELEM_COOKIE)
3995468Sjc25722 goto add_fail;
4005468Sjc25722
4015468Sjc25722 i = mpo_cpu_to_lgroup(md, cpunode);
4025468Sjc25722 if (i == -1)
4035468Sjc25722 goto add_fail;
4045468Sjc25722
4055468Sjc25722 mpo_cpu[cpuid].lgrp_index = i;
4065468Sjc25722 mpo_cpu[cpuid].home = mpo_lgroup[i].addr_match >> home_mask_shift;
4075468Sjc25722 mpo_lgroup[i].ncpu++;
4085468Sjc25722 mpo_update_tunables();
4095468Sjc25722 return;
4105468Sjc25722 add_fail:
4115468Sjc25722 panic("mpo_cpu_add: Cannot read MD");
4125468Sjc25722 }
4135468Sjc25722
4145468Sjc25722 /* Called when DR'ing out a CPU */
4155468Sjc25722 void
mpo_cpu_remove(int cpuid)4165468Sjc25722 mpo_cpu_remove(int cpuid)
4175468Sjc25722 {
4185468Sjc25722 int i;
4195468Sjc25722
4205468Sjc25722 if (n_lgrpnodes <= 0)
4215468Sjc25722 return;
4225468Sjc25722
4235468Sjc25722 i = mpo_cpu[cpuid].lgrp_index;
4245468Sjc25722 mpo_lgroup[i].ncpu--;
4255468Sjc25722 mpo_cpu[cpuid].home = 0;
4265468Sjc25722 mpo_cpu[cpuid].lgrp_index = -1;
4275468Sjc25722 mpo_update_tunables();
4285468Sjc25722 }
4295468Sjc25722
43010106SJason.Beloro@Sun.COM static mde_cookie_t
md_get_root(md_t * md)43110106SJason.Beloro@Sun.COM md_get_root(md_t *md)
4324769Sdp78419 {
43310106SJason.Beloro@Sun.COM mde_cookie_t root = MDE_INVAL_ELEM_COOKIE;
43410106SJason.Beloro@Sun.COM int n_nodes;
4354769Sdp78419
4364769Sdp78419 n_nodes = md_node_count(md);
4374769Sdp78419
4384769Sdp78419 if (n_nodes <= 0) {
43910106SJason.Beloro@Sun.COM MPO_STATUS("md_get_root: No nodes in node count\n");
44010106SJason.Beloro@Sun.COM return (root);
4414769Sdp78419 }
4424769Sdp78419
4434769Sdp78419 root = md_root_node(md);
4444769Sdp78419
4454769Sdp78419 if (root == MDE_INVAL_ELEM_COOKIE) {
44610106SJason.Beloro@Sun.COM MPO_STATUS("md_get_root: Root node is missing\n");
44710106SJason.Beloro@Sun.COM return (root);
4484769Sdp78419 }
4494769Sdp78419
45010106SJason.Beloro@Sun.COM MPO_DEBUG("md_get_root: Node Count: %d\n", n_nodes);
45110106SJason.Beloro@Sun.COM MPO_DEBUG("md_get_root: md: %p\n", md);
45210106SJason.Beloro@Sun.COM MPO_DEBUG("md_get_root: root: %lx\n", root);
45310106SJason.Beloro@Sun.COM done:
45410106SJason.Beloro@Sun.COM return (root);
45510106SJason.Beloro@Sun.COM }
4564769Sdp78419
45710106SJason.Beloro@Sun.COM static int
lgrp_update(md_t * md,mde_cookie_t root)45810106SJason.Beloro@Sun.COM lgrp_update(md_t *md, mde_cookie_t root)
45910106SJason.Beloro@Sun.COM {
46010106SJason.Beloro@Sun.COM int i, j, result;
46110106SJason.Beloro@Sun.COM int ret_val = 0;
46210106SJason.Beloro@Sun.COM int sub_page_fix;
46310106SJason.Beloro@Sun.COM mde_cookie_t *nodes, *lgrpnodes;
4644769Sdp78419
4654769Sdp78419 n_lgrpnodes = md_alloc_scan_dag(md, root, PROP_LG_MEM_LG,
4664769Sdp78419 "fwd", &lgrpnodes);
4674769Sdp78419
4684769Sdp78419 if (n_lgrpnodes <= 0 || n_lgrpnodes >= MAX_MD_LGROUPS) {
46910106SJason.Beloro@Sun.COM MPO_STATUS("lgrp_update: No Lgroups\n");
4704769Sdp78419 ret_val = -1;
4714769Sdp78419 goto fail;
4724769Sdp78419 }
4734769Sdp78419
47410106SJason.Beloro@Sun.COM MPO_DEBUG("lgrp_update: mem_lgs: %d\n", n_lgrpnodes);
4754769Sdp78419
4764769Sdp78419 for (i = 0; i < n_lgrpnodes; i++) {
4774769Sdp78419 mpo_lgroup[i].node = lgrpnodes[i];
4784769Sdp78419 mpo_lgroup[i].id = i;
4794769Sdp78419 mpo_lgroup[i].ncpu = 0;
4804769Sdp78419 result = get_int(md, lgrpnodes[i], PROP_LG_MASK,
4814769Sdp78419 &mpo_lgroup[i].addr_mask);
4824769Sdp78419 result |= get_int(md, lgrpnodes[i], PROP_LG_MATCH,
4834769Sdp78419 &mpo_lgroup[i].addr_match);
4844769Sdp78419
4854769Sdp78419 /*
4864769Sdp78419 * If either the mask or match properties are missing, set to 0
4874769Sdp78419 */
4884769Sdp78419 if (result < 0) {
4894769Sdp78419 mpo_lgroup[i].addr_mask = 0;
4904769Sdp78419 mpo_lgroup[i].addr_match = 0;
4914769Sdp78419 }
4924769Sdp78419
4934769Sdp78419 /* Set latency to 0 if property not present */
4944769Sdp78419
4954769Sdp78419 result = get_int(md, lgrpnodes[i], PROP_LG_LATENCY,
4964769Sdp78419 &mpo_lgroup[i].latency);
4974769Sdp78419 if (result < 0)
4984769Sdp78419 mpo_lgroup[i].latency = 0;
4994769Sdp78419 }
5004769Sdp78419
5014769Sdp78419 /*
5024769Sdp78419 * Sub-page level interleave is not yet supported. Check for it,
5034769Sdp78419 * and remove sub-page interleaved lgroups from mpo_lgroup and
5044769Sdp78419 * n_lgrpnodes. If no lgroups are left, return.
5054769Sdp78419 */
5064769Sdp78419
5074769Sdp78419 sub_page_fix = fix_interleave();
5084769Sdp78419 if (n_lgrpnodes == 0) {
5094769Sdp78419 ret_val = -1;
5104769Sdp78419 goto fail;
5114769Sdp78419 }
5124769Sdp78419
5134769Sdp78419 /* Ensure that all of the addr_mask values are the same */
5144769Sdp78419
5154769Sdp78419 for (i = 0; i < n_lgrpnodes; i++) {
5164769Sdp78419 if (mpo_lgroup[0].addr_mask != mpo_lgroup[i].addr_mask) {
51710106SJason.Beloro@Sun.COM MPO_STATUS("lgrp_update: "
5184769Sdp78419 "addr_mask values are not the same\n");
5194769Sdp78419 ret_val = -1;
5204769Sdp78419 goto fail;
5214769Sdp78419 }
5224769Sdp78419 }
5234769Sdp78419
5244769Sdp78419 /*
5254769Sdp78419 * Ensure that all lgrp nodes see all the mblocks. However, if
5264769Sdp78419 * sub-page interleave is being fixed, they do not, so skip
5274769Sdp78419 * the check.
5284769Sdp78419 */
5294769Sdp78419
5304769Sdp78419 if (sub_page_fix == 0) {
5314769Sdp78419 for (i = 0; i < n_lgrpnodes; i++) {
5324769Sdp78419 j = md_alloc_scan_dag(md, mpo_lgroup[i].node,
5334769Sdp78419 PROP_LG_MBLOCK, "fwd", &nodes);
5344769Sdp78419 md_free_scan_dag(md, &nodes);
5354769Sdp78419 if (j != n_mblocks) {
53610106SJason.Beloro@Sun.COM MPO_STATUS("lgrp_update: "
5374769Sdp78419 "sub-page interleave is being fixed\n");
5384769Sdp78419 ret_val = -1;
5394769Sdp78419 goto fail;
5404769Sdp78419 }
5414769Sdp78419 }
5424769Sdp78419 }
54310106SJason.Beloro@Sun.COM fail:
54410106SJason.Beloro@Sun.COM if (n_lgrpnodes > 0) {
54510106SJason.Beloro@Sun.COM md_free_scan_dag(md, &lgrpnodes);
54610106SJason.Beloro@Sun.COM for (i = 0; i < n_lgrpnodes; i++)
54710106SJason.Beloro@Sun.COM mpo_lgroup[i].node = MDE_INVAL_ELEM_COOKIE;
54810106SJason.Beloro@Sun.COM }
54910106SJason.Beloro@Sun.COM
55010106SJason.Beloro@Sun.COM return (ret_val);
55110106SJason.Beloro@Sun.COM }
55210106SJason.Beloro@Sun.COM
55310106SJason.Beloro@Sun.COM /*
55410106SJason.Beloro@Sun.COM *
55510106SJason.Beloro@Sun.COM * Traverse the MD to determine:
55610106SJason.Beloro@Sun.COM *
55710106SJason.Beloro@Sun.COM * Number of CPU nodes, lgrp_nodes, and mblocks
55810106SJason.Beloro@Sun.COM * Then for each lgrp_node, obtain the appropriate data.
55910106SJason.Beloro@Sun.COM * For each CPU, determine its home locality and store it.
56010106SJason.Beloro@Sun.COM * For each mblock, retrieve its data and store it.
56110106SJason.Beloro@Sun.COM */
56210106SJason.Beloro@Sun.COM static int
lgrp_traverse(md_t * md)56310106SJason.Beloro@Sun.COM lgrp_traverse(md_t *md)
56410106SJason.Beloro@Sun.COM {
56510106SJason.Beloro@Sun.COM mde_cookie_t root, *cpunodes, *mblocknodes;
56610106SJason.Beloro@Sun.COM int o;
56710106SJason.Beloro@Sun.COM uint64_t i, k, stripe, stride;
56810106SJason.Beloro@Sun.COM uint64_t mem_lg_homeset = 0;
56910106SJason.Beloro@Sun.COM int ret_val = 0;
57010106SJason.Beloro@Sun.COM int result = 0;
57110106SJason.Beloro@Sun.COM int n_cpunodes = 0;
57210106SJason.Beloro@Sun.COM mpo_config_t new_config;
57310106SJason.Beloro@Sun.COM
57410106SJason.Beloro@Sun.COM if ((root = md_get_root(md)) == MDE_INVAL_ELEM_COOKIE) {
57510106SJason.Beloro@Sun.COM ret_val = -1;
57610106SJason.Beloro@Sun.COM goto fail;
57710106SJason.Beloro@Sun.COM }
57810106SJason.Beloro@Sun.COM
57910106SJason.Beloro@Sun.COM n_mblocks = md_alloc_scan_dag(md, root, PROP_LG_MBLOCK, "fwd",
58010106SJason.Beloro@Sun.COM &mblocknodes);
58110106SJason.Beloro@Sun.COM if (n_mblocks <= 0) {
58210106SJason.Beloro@Sun.COM MPO_STATUS("lgrp_traverse: No mblock nodes detected in Machine "
58310106SJason.Beloro@Sun.COM "Descriptor\n");
58410106SJason.Beloro@Sun.COM ret_val = -1;
58510106SJason.Beloro@Sun.COM goto fail;
58610106SJason.Beloro@Sun.COM }
58710106SJason.Beloro@Sun.COM
58810106SJason.Beloro@Sun.COM /*
58910106SJason.Beloro@Sun.COM * Build the Memory Nodes. Do this before any possibility of
59010106SJason.Beloro@Sun.COM * bailing from this routine so we obtain ra_to_pa (needed for page
59110106SJason.Beloro@Sun.COM * coloring) even when there are no lgroups defined.
59210106SJason.Beloro@Sun.COM */
59310106SJason.Beloro@Sun.COM if (mblock_alloc(&new_config, U_ADD_ALL, n_mblocks) < 0) {
59410106SJason.Beloro@Sun.COM ret_val = -1;
59510106SJason.Beloro@Sun.COM goto fail;
59610106SJason.Beloro@Sun.COM }
59710106SJason.Beloro@Sun.COM
59810106SJason.Beloro@Sun.COM mblock_update(&new_config, md, mblocknodes);
59910106SJason.Beloro@Sun.COM mblock_install(&new_config);
60010106SJason.Beloro@Sun.COM
60110106SJason.Beloro@Sun.COM /* Page coloring hook is required so we can iterate through mnodes */
60210106SJason.Beloro@Sun.COM if (&page_next_pfn_for_color_cpu == NULL) {
60310106SJason.Beloro@Sun.COM MPO_STATUS("lgrp_traverse: No page coloring support\n");
60410106SJason.Beloro@Sun.COM ret_val = -1;
60510106SJason.Beloro@Sun.COM goto fail;
60610106SJason.Beloro@Sun.COM }
60710106SJason.Beloro@Sun.COM
60810106SJason.Beloro@Sun.COM /* Global enable for mpo */
60910106SJason.Beloro@Sun.COM if (sun4v_mpo_enable == 0) {
61010106SJason.Beloro@Sun.COM MPO_STATUS("lgrp_traverse: MPO feature is not enabled\n");
61110106SJason.Beloro@Sun.COM ret_val = -1;
61210106SJason.Beloro@Sun.COM goto fail;
61310106SJason.Beloro@Sun.COM }
61410106SJason.Beloro@Sun.COM
61510106SJason.Beloro@Sun.COM n_cpunodes = md_alloc_scan_dag(md, root, PROP_LG_CPU, "fwd", &cpunodes);
61610106SJason.Beloro@Sun.COM
61710106SJason.Beloro@Sun.COM if (n_cpunodes <= 0 || n_cpunodes > NCPU) {
61810106SJason.Beloro@Sun.COM MPO_STATUS("lgrp_traverse: No CPU nodes detected "
61910106SJason.Beloro@Sun.COM "in MD\n");
62010106SJason.Beloro@Sun.COM ret_val = -1;
62110106SJason.Beloro@Sun.COM goto fail;
62210106SJason.Beloro@Sun.COM }
62310106SJason.Beloro@Sun.COM
62410106SJason.Beloro@Sun.COM MPO_DEBUG("lgrp_traverse: cpus: %d\n", n_cpunodes);
62510106SJason.Beloro@Sun.COM
62610106SJason.Beloro@Sun.COM if ((ret_val = lgrp_update(md, root)) == -1)
62710106SJason.Beloro@Sun.COM goto fail;
6284769Sdp78419
6294769Sdp78419 /*
6304769Sdp78419 * Use the address mask from the first lgroup node
6314769Sdp78419 * to establish our home_mask.
6324769Sdp78419 */
6334769Sdp78419 home_mask = mpo_lgroup[0].addr_mask;
6344769Sdp78419 home_mask_pfn = btop(home_mask);
6354769Sdp78419 home_mask_shift = lowbit(home_mask) - 1;
6364769Sdp78419 home_mask_pfn_shift = home_mask_shift - PAGESHIFT;
6374769Sdp78419 mnode_pages = btop(1ULL << home_mask_shift);
6384769Sdp78419
6394769Sdp78419 /*
6404769Sdp78419 * How many values are possible in home mask? Assume the mask
6414769Sdp78419 * bits are contiguous.
6424769Sdp78419 */
6434769Sdp78419 max_locality_groups =
6444769Sdp78419 1 << highbit(home_mask_pfn >> home_mask_pfn_shift);
6454769Sdp78419
64610106SJason.Beloro@Sun.COM stripe_shift = highbit(max_locality_groups) - 1;
64710106SJason.Beloro@Sun.COM stripe = ptob(mnode_pages);
64810106SJason.Beloro@Sun.COM stride = max_locality_groups * stripe;
64910106SJason.Beloro@Sun.COM mnode_stride = btop(stride);
65010106SJason.Beloro@Sun.COM
6514769Sdp78419 /* Now verify the home mask bits are contiguous */
6524769Sdp78419
6534769Sdp78419 if (max_locality_groups - 1 != home_mask_pfn >> home_mask_pfn_shift) {
6544769Sdp78419 MPO_STATUS("lgrp_traverse: "
6554769Sdp78419 "home mask bits are not contiguous\n");
6564769Sdp78419 ret_val = -1;
6574769Sdp78419 goto fail;
6584769Sdp78419 }
6594769Sdp78419
6604769Sdp78419 /* Record all of the home bits */
6614769Sdp78419
6624769Sdp78419 for (i = 0; i < n_lgrpnodes; i++) {
6634769Sdp78419 HOMESET_ADD(mem_lg_homeset,
6644769Sdp78419 mpo_lgroup[i].addr_match >> home_mask_shift);
6654769Sdp78419 }
6664769Sdp78419
6674769Sdp78419 /* Count the number different "home" mem_lg's we've discovered */
6684769Sdp78419
6694769Sdp78419 n_locality_groups = unique_home_mem_lg_count(mem_lg_homeset);
6704769Sdp78419
6714769Sdp78419 /* If we have only 1 locality group then we can exit */
6724769Sdp78419 if (n_locality_groups == 1) {
6734769Sdp78419 MPO_STATUS("lgrp_traverse: n_locality_groups == 1\n");
6744769Sdp78419 ret_val = -1;
6754769Sdp78419 goto fail;
6764769Sdp78419 }
6774769Sdp78419
6784769Sdp78419 /*
6794769Sdp78419 * Set the latencies. A CPU's lgroup is defined by the lowest
6804769Sdp78419 * latency found. All other memory is considered remote, and the
6814769Sdp78419 * remote latency is represented by the highest latency found.
6824769Sdp78419 * Thus hierarchical lgroups, if any, are approximated by a
6834769Sdp78419 * two level scheme.
6844769Sdp78419 *
6854769Sdp78419 * The Solaris MPO framework by convention wants to see latencies
6864769Sdp78419 * in units of nano-sec/10. In the MD, the units are defined to be
6874769Sdp78419 * pico-seconds.
6884769Sdp78419 */
6894769Sdp78419
6904769Sdp78419 lower_latency = mpo_lgroup[0].latency;
6914769Sdp78419 higher_latency = mpo_lgroup[0].latency;
6924769Sdp78419
6934769Sdp78419 for (i = 1; i < n_lgrpnodes; i++) {
6944769Sdp78419 if (mpo_lgroup[i].latency < lower_latency) {
6954769Sdp78419 lower_latency = mpo_lgroup[i].latency;
6964769Sdp78419 }
6974769Sdp78419 if (mpo_lgroup[i].latency > higher_latency) {
6984769Sdp78419 higher_latency = mpo_lgroup[i].latency;
6994769Sdp78419 }
7004769Sdp78419 }
7014769Sdp78419 lower_latency /= 10000;
7024769Sdp78419 higher_latency /= 10000;
7034769Sdp78419
7044769Sdp78419 /* Clear our CPU data */
7054769Sdp78419
7064769Sdp78419 for (i = 0; i < NCPU; i++) {
7074769Sdp78419 mpo_cpu[i].home = 0;
7085468Sjc25722 mpo_cpu[i].lgrp_index = -1;
7094769Sdp78419 }
7104769Sdp78419
7114769Sdp78419 /* Build the CPU nodes */
7124769Sdp78419 for (i = 0; i < n_cpunodes; i++) {
7134769Sdp78419
7144769Sdp78419 /* Read in the lgroup nodes */
7154769Sdp78419 result = get_int(md, cpunodes[i], PROP_LG_CPU_ID, &k);
7164769Sdp78419 if (result < 0) {
7174769Sdp78419 MPO_STATUS("lgrp_traverse: PROP_LG_CPU_ID missing\n");
7184769Sdp78419 ret_val = -1;
7194769Sdp78419 goto fail;
7204769Sdp78419 }
7214769Sdp78419
7225468Sjc25722 o = mpo_cpu_to_lgroup(md, cpunodes[i]);
7235468Sjc25722 if (o == -1) {
7244769Sdp78419 ret_val = -1;
7254769Sdp78419 goto fail;
7264769Sdp78419 }
7275468Sjc25722 mpo_cpu[k].lgrp_index = o;
7285468Sjc25722 mpo_cpu[k].home = mpo_lgroup[o].addr_match >> home_mask_shift;
7295468Sjc25722 mpo_lgroup[o].ncpu++;
7304769Sdp78419 }
7314769Sdp78419 /* Validate that no large pages cross mnode boundaries. */
7324769Sdp78419 if (valid_pages(md, cpunodes[0]) == 0) {
7334769Sdp78419 ret_val = -1;
7344769Sdp78419 goto fail;
7354769Sdp78419 }
7364769Sdp78419
7374769Sdp78419 fail:
7384769Sdp78419 if (n_cpunodes > 0)
7394769Sdp78419 md_free_scan_dag(md, &cpunodes);
7404769Sdp78419 if (n_mblocks > 0)
7414769Sdp78419 md_free_scan_dag(md, &mblocknodes);
7424769Sdp78419 else
7434769Sdp78419 panic("lgrp_traverse: No memory blocks found");
7444769Sdp78419
74510106SJason.Beloro@Sun.COM if (ret_val == 0) {
7464769Sdp78419 MPO_STATUS("MPO feature is enabled.\n");
74710106SJason.Beloro@Sun.COM } else
74810106SJason.Beloro@Sun.COM sun4v_mpo_enable = 0; /* set this for DR */
7494769Sdp78419
7504769Sdp78419 return (ret_val);
7514769Sdp78419 }
7524769Sdp78419
7534769Sdp78419 /*
7544769Sdp78419 * Determine the number of unique mem_lg's present in our system
7554769Sdp78419 */
7564769Sdp78419 static int
unique_home_mem_lg_count(uint64_t mem_lg_homeset)7574769Sdp78419 unique_home_mem_lg_count(uint64_t mem_lg_homeset)
7584769Sdp78419 {
7594769Sdp78419 int homeid;
7604769Sdp78419 int count = 0;
7614769Sdp78419
7624769Sdp78419 /*
7634769Sdp78419 * Scan the "home" bits of the mem_lgs, count
7644769Sdp78419 * the number that are unique.
7654769Sdp78419 */
7664769Sdp78419
7674769Sdp78419 for (homeid = 0; homeid < NLGRPS_MAX; homeid++) {
7684769Sdp78419 if (MEM_LG_ISMEMBER(mem_lg_homeset, homeid)) {
7694769Sdp78419 count++;
7704769Sdp78419 }
7714769Sdp78419 }
7724769Sdp78419
7734769Sdp78419 MPO_DEBUG("unique_home_mem_lg_count: homeset %lx\n",
7744769Sdp78419 mem_lg_homeset);
7754769Sdp78419 MPO_DEBUG("unique_home_mem_lg_count: count: %d\n", count);
7764769Sdp78419
7774769Sdp78419 /* Default must be at least one */
7784769Sdp78419 if (count == 0)
7794769Sdp78419 count = 1;
7804769Sdp78419
7814769Sdp78419 return (count);
7824769Sdp78419 }
7834769Sdp78419
7844769Sdp78419 /*
7854769Sdp78419 * Platform specific lgroup initialization
7864769Sdp78419 */
7874769Sdp78419 void
plat_lgrp_init(void)7884769Sdp78419 plat_lgrp_init(void)
7894769Sdp78419 {
7904769Sdp78419 md_t *md;
7915468Sjc25722 int rc;
7924769Sdp78419
7934769Sdp78419 /* Get the Machine Descriptor handle */
7944769Sdp78419
7954769Sdp78419 md = md_get_handle();
7964769Sdp78419
7974769Sdp78419 /* If not, we cannot continue */
7984769Sdp78419
7994769Sdp78419 if (md == NULL) {
8004769Sdp78419 panic("cannot access machine descriptor\n");
8014769Sdp78419 } else {
8024769Sdp78419 rc = lgrp_traverse(md);
8034769Sdp78419 (void) md_fini_handle(md);
8044769Sdp78419 }
8054769Sdp78419
8064769Sdp78419 /*
8074769Sdp78419 * If we can't process the MD for lgroups then at least let the
8084769Sdp78419 * system try to boot. Assume we have one lgroup so that
8094769Sdp78419 * when plat_build_mem_nodes is called, it will attempt to init
8104769Sdp78419 * an mnode based on the supplied memory segment.
8114769Sdp78419 */
8124769Sdp78419
8134769Sdp78419 if (rc == -1) {
8144769Sdp78419 home_mask_pfn = 0;
8154769Sdp78419 max_locality_groups = 1;
8164769Sdp78419 n_locality_groups = 1;
8174769Sdp78419 return;
8184769Sdp78419 }
8194769Sdp78419
8204769Sdp78419 mem_node_pfn_shift = 0;
8214769Sdp78419 mem_node_physalign = 0;
8224769Sdp78419
8234769Sdp78419 /* Use lgroup-aware TSB allocations */
8244769Sdp78419 tsb_lgrp_affinity = 1;
8254769Sdp78419
8264769Sdp78419 /* Require that a home lgroup have some memory to be chosen */
8274769Sdp78419 lgrp_mem_free_thresh = 1;
8284769Sdp78419
8294769Sdp78419 /* Standard home-on-next-touch policy */
8304769Sdp78419 lgrp_mem_policy_root = LGRP_MEM_POLICY_NEXT;
8314769Sdp78419
8324769Sdp78419 /* Disable option to choose root lgroup if all leaf lgroups are busy */
8334769Sdp78419 lgrp_load_thresh = UINT32_MAX;
8345468Sjc25722
8355468Sjc25722 mpo_update_tunables();
8364769Sdp78419 }
8374769Sdp78419
8384769Sdp78419 /*
8394769Sdp78419 * Helper routine for debugging calls to mem_node_add_slice()
8404769Sdp78419 */
8414769Sdp78419 static void
mpo_mem_node_add_slice(pfn_t basepfn,pfn_t endpfn)8424769Sdp78419 mpo_mem_node_add_slice(pfn_t basepfn, pfn_t endpfn)
8434769Sdp78419 {
8444769Sdp78419 #if defined(DEBUG) && !defined(lint)
8454769Sdp78419 static int slice_count = 0;
8464769Sdp78419
8474769Sdp78419 slice_count++;
8484769Sdp78419 MPO_DEBUG("mem_add_slice(%d): basepfn: %lx endpfn: %lx\n",
8494769Sdp78419 slice_count, basepfn, endpfn);
8504769Sdp78419 #endif
8514769Sdp78419 mem_node_add_slice(basepfn, endpfn);
8524769Sdp78419 }
8534769Sdp78419
85410106SJason.Beloro@Sun.COM static void
mpo_mem_node_del_slice(pfn_t basepfn,pfn_t endpfn)85510106SJason.Beloro@Sun.COM mpo_mem_node_del_slice(pfn_t basepfn, pfn_t endpfn)
85610106SJason.Beloro@Sun.COM {
85710106SJason.Beloro@Sun.COM #if defined(DEBUG) && !defined(lint)
85810106SJason.Beloro@Sun.COM static int slice_count = 0;
85910106SJason.Beloro@Sun.COM
86010106SJason.Beloro@Sun.COM slice_count++;
86110106SJason.Beloro@Sun.COM MPO_DEBUG("mem_del_slice(%d): basepfn: %lx endpfn: %lx\n",
86210106SJason.Beloro@Sun.COM slice_count, basepfn, endpfn);
86310106SJason.Beloro@Sun.COM #endif
86410106SJason.Beloro@Sun.COM mem_node_del_slice(basepfn, endpfn);
86510106SJason.Beloro@Sun.COM }
86610106SJason.Beloro@Sun.COM
8674769Sdp78419 /*
8684769Sdp78419 * Helper routine for debugging calls to plat_assign_lgrphand_to_mem_node()
8694769Sdp78419 */
8704769Sdp78419 static void
mpo_plat_assign_lgrphand_to_mem_node(lgrp_handle_t plathand,int mnode)8714769Sdp78419 mpo_plat_assign_lgrphand_to_mem_node(lgrp_handle_t plathand, int mnode)
8724769Sdp78419 {
87310106SJason.Beloro@Sun.COM MPO_DEBUG("plat_assign_to_mem_nodes: lgroup home %ld, "
8744769Sdp78419 "mnode index: %d\n", plathand, mnode);
8754769Sdp78419 plat_assign_lgrphand_to_mem_node(plathand, mnode);
8764769Sdp78419 }
8774769Sdp78419
8784769Sdp78419 /*
8794769Sdp78419 * plat_build_mem_nodes()
8804769Sdp78419 *
8814769Sdp78419 * Define the mem_nodes based on the modified boot memory list,
8824769Sdp78419 * or based on info read from the MD in plat_lgrp_init().
8834769Sdp78419 *
8844769Sdp78419 * When the home mask lies in the middle of the address bits (as it does on
8854769Sdp78419 * Victoria Falls), then the memory in one mem_node is no longer contiguous;
8864769Sdp78419 * it is striped across an mblock in a repeating pattern of contiguous memory
8874769Sdp78419 * followed by a gap. The stripe width is the size of the contiguous piece.
8884769Sdp78419 * The stride is the distance from the start of one contiguous piece to the
8894769Sdp78419 * start of the next. The gap is thus stride - stripe_width.
8904769Sdp78419 *
8914769Sdp78419 * The stripe of an mnode that falls within an mblock is described by the type
8924769Sdp78419 * mem_stripe_t, and there is one mem_stripe_t per mnode per mblock. The
8934769Sdp78419 * mem_stripe_t's are kept in a global array mem_stripes[]. The index into
8944769Sdp78419 * this array is predetermined. The mem_stripe_t that describes mnode m
8954769Sdp78419 * within mpo_mblock[i] is stored at
8964769Sdp78419 * mem_stripes[ m + i * max_locality_groups ]
8974769Sdp78419 *
8984769Sdp78419 * max_locality_groups is the total number of possible locality groups,
8994769Sdp78419 * as defined by the size of the home mask, even if the memory assigned
9004769Sdp78419 * to the domain is small and does not cover all the lgroups. Thus some
9014769Sdp78419 * mem_stripe_t's may be empty.
9024769Sdp78419 *
9034769Sdp78419 * The members of mem_stripe_t are:
9044769Sdp78419 * physbase: First valid page in mem_node in the corresponding mblock
9054769Sdp78419 * physmax: Last valid page in mem_node in mblock
9064769Sdp78419 * offset: The full stripe width starts at physbase - offset.
9074769Sdp78419 * Thus if offset is non-zero, this mem_node starts in the middle
9084769Sdp78419 * of a stripe width, and the second full stripe starts at
9094769Sdp78419 * physbase - offset + stride. (even though physmax may fall in the
9104769Sdp78419 * middle of a stripe width, we do not save the ending fragment size
9114769Sdp78419 * in this data structure.)
9124769Sdp78419 * exists: Set to 1 if the mblock has memory in this mem_node stripe.
9134769Sdp78419 *
9144769Sdp78419 * The stripe width is kept in the global mnode_pages.
9154769Sdp78419 * The stride is kept in the global mnode_stride.
9164769Sdp78419 * All the above use pfn's as the unit.
9174769Sdp78419 *
9184769Sdp78419 * As an example, the memory layout for a domain with 2 mblocks and 4
9194769Sdp78419 * mem_nodes 0,1,2,3 could look like this:
9204769Sdp78419 *
9214769Sdp78419 * 123012301230 ... 012301230123 ...
9224769Sdp78419 * mblock 0 mblock 1
9234769Sdp78419 */
9244769Sdp78419
92510106SJason.Beloro@Sun.COM /*ARGSUSED*/
9264769Sdp78419 void
plat_build_mem_nodes(prom_memlist_t * list,size_t nelems)9275648Ssetje plat_build_mem_nodes(prom_memlist_t *list, size_t nelems)
9284769Sdp78419 {
92910106SJason.Beloro@Sun.COM int elem;
93010106SJason.Beloro@Sun.COM uint64_t base, len;
9314769Sdp78419
9325079Sjc25722 /* Pre-reserve space for plat_assign_lgrphand_to_mem_node */
9335079Sjc25722 max_mem_nodes = max_locality_groups;
9344769Sdp78419
93510106SJason.Beloro@Sun.COM mstripe_update(&mpo_config);
93610106SJason.Beloro@Sun.COM
9375079Sjc25722 /* Check for non-MPO sun4v platforms */
9384769Sdp78419 if (n_locality_groups <= 1) {
9395079Sjc25722 mpo_plat_assign_lgrphand_to_mem_node(LGRP_DEFAULT_HANDLE, 0);
9405648Ssetje for (elem = 0; elem < nelems; list++, elem++) {
9415648Ssetje base = list->addr;
9425648Ssetje len = list->size;
9434769Sdp78419
9444769Sdp78419 mpo_mem_node_add_slice(btop(base),
9454769Sdp78419 btop(base + len - 1));
9464769Sdp78419 }
9474769Sdp78419 mem_node_pfn_shift = 0;
9484769Sdp78419 mem_node_physalign = 0;
94910106SJason.Beloro@Sun.COM } else
95010106SJason.Beloro@Sun.COM mnode_update(&mpo_config, 0, 0, U_ADD_ALL);
9514769Sdp78419
9524769Sdp78419 /*
9534769Sdp78419 * Indicate to vm_pagelist that the hpm_counters array
9544769Sdp78419 * should be shared because the ranges overlap.
9554769Sdp78419 */
9564769Sdp78419 if (max_mem_nodes > 1) {
9574769Sdp78419 interleaved_mnodes = 1;
9584769Sdp78419 }
9594769Sdp78419 }
9604769Sdp78419
9614769Sdp78419 /*
9624769Sdp78419 * Return the locality group value for the supplied processor
9634769Sdp78419 */
9644769Sdp78419 lgrp_handle_t
plat_lgrp_cpu_to_hand(processorid_t id)9654769Sdp78419 plat_lgrp_cpu_to_hand(processorid_t id)
9664769Sdp78419 {
96710106SJason.Beloro@Sun.COM lgrp_handle_t lgrphand;
96810106SJason.Beloro@Sun.COM
96910106SJason.Beloro@Sun.COM mpo_rd_lock();
9704769Sdp78419 if (n_locality_groups > 1) {
97110106SJason.Beloro@Sun.COM lgrphand = (lgrp_handle_t)mpo_cpu[(int)id].home;
9724769Sdp78419 } else {
97310106SJason.Beloro@Sun.COM lgrphand = (lgrp_handle_t)LGRP_DEFAULT_HANDLE; /* Default */
9744769Sdp78419 }
97510106SJason.Beloro@Sun.COM mpo_rd_unlock();
97610106SJason.Beloro@Sun.COM
97710106SJason.Beloro@Sun.COM return (lgrphand);
9784769Sdp78419 }
9794769Sdp78419
9804769Sdp78419 int
plat_lgrp_latency(lgrp_handle_t from,lgrp_handle_t to)9814769Sdp78419 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to)
9824769Sdp78419 {
9834769Sdp78419 /*
9844769Sdp78419 * Return min remote latency when there are more than two lgroups
9854769Sdp78419 * (root and child) and getting latency between two different lgroups
9864769Sdp78419 * or root is involved.
9874769Sdp78419 */
9884769Sdp78419 if (lgrp_optimizations() && (from != to ||
9894769Sdp78419 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE)) {
9904769Sdp78419 return ((int)higher_latency);
9914769Sdp78419 } else {
9924769Sdp78419 return ((int)lower_latency);
9934769Sdp78419 }
9944769Sdp78419 }
9954769Sdp78419
9964769Sdp78419 int
plat_pfn_to_mem_node(pfn_t pfn)9974769Sdp78419 plat_pfn_to_mem_node(pfn_t pfn)
9984769Sdp78419 {
9994769Sdp78419 int i, mnode;
10004769Sdp78419 pfn_t ra_to_pa_pfn;
10014769Sdp78419 struct mblock_md *mb;
10024769Sdp78419
10034769Sdp78419 if (n_locality_groups <= 1)
10044769Sdp78419 return (0);
10054769Sdp78419
10064769Sdp78419 /*
10074769Sdp78419 * The mnode is defined to be 1:1 with the lgroup handle, which
10084769Sdp78419 * is taken from from the home bits. Find the mblock in which
10094769Sdp78419 * the pfn falls to get the ra_to_pa adjustment, and extract
10104769Sdp78419 * the home bits.
10114769Sdp78419 */
101210106SJason.Beloro@Sun.COM mpo_rd_lock();
10134769Sdp78419 mb = &mpo_mblock[0];
10144769Sdp78419 for (i = 0; i < n_mblocks; i++) {
10154769Sdp78419 if (pfn >= mb->base_pfn && pfn <= mb->end_pfn) {
10164769Sdp78419 ra_to_pa_pfn = btop(mb->ra_to_pa);
10174769Sdp78419 mnode = (((pfn + ra_to_pa_pfn) & home_mask_pfn) >>
10184769Sdp78419 home_mask_pfn_shift);
10194769Sdp78419 ASSERT(mnode < max_mem_nodes);
102010106SJason.Beloro@Sun.COM mpo_rd_unlock();
10214769Sdp78419 return (mnode);
10224769Sdp78419 }
10234769Sdp78419 mb++;
10244769Sdp78419 }
10254769Sdp78419
10264769Sdp78419 panic("plat_pfn_to_mem_node() failed to find mblock: pfn=%lx\n", pfn);
10274769Sdp78419 return (pfn);
10284769Sdp78419 }
10294769Sdp78419
10304769Sdp78419 /*
10314769Sdp78419 * plat_rapfn_to_papfn
10324769Sdp78419 *
10334769Sdp78419 * Convert a pfn in RA space to a pfn in PA space, in which the page coloring
10344769Sdp78419 * and home mask bits are correct. The upper bits do not necessarily
10354769Sdp78419 * match the actual PA, however.
10364769Sdp78419 */
10374769Sdp78419 pfn_t
plat_rapfn_to_papfn(pfn_t pfn)10384769Sdp78419 plat_rapfn_to_papfn(pfn_t pfn)
10394769Sdp78419 {
10404769Sdp78419 int i;
10414769Sdp78419 pfn_t ra_to_pa_pfn;
10424769Sdp78419 struct mblock_md *mb;
10434769Sdp78419
10444769Sdp78419 ASSERT(n_mblocks > 0);
10454769Sdp78419 if (n_mblocks == 1)
10464769Sdp78419 return (pfn + base_ra_to_pa_pfn);
10474769Sdp78419
10484769Sdp78419 /*
10494769Sdp78419 * Find the mblock in which the pfn falls
10504769Sdp78419 * in order to get the ra_to_pa adjustment.
10514769Sdp78419 */
105210106SJason.Beloro@Sun.COM mpo_rd_lock();
10534769Sdp78419 for (mb = &mpo_mblock[0], i = 0; i < n_mblocks; i++, mb++) {
10544769Sdp78419 if (pfn <= mb->end_pfn && pfn >= mb->base_pfn) {
10554769Sdp78419 ra_to_pa_pfn = btop(mb->ra_to_pa);
105610106SJason.Beloro@Sun.COM mpo_rd_unlock();
10574769Sdp78419 return (pfn + ra_to_pa_pfn);
10584769Sdp78419 }
10594769Sdp78419 }
10604769Sdp78419
10614769Sdp78419 panic("plat_rapfn_to_papfn() failed to find mblock: pfn=%lx\n", pfn);
10624769Sdp78419 return (pfn);
10634769Sdp78419 }
10644769Sdp78419
10654769Sdp78419 /*
10664769Sdp78419 * plat_mem_node_iterator_init()
10676041Sdp78419 * Initialize cookie "it" to iterate over pfn's in an mnode. There is
10686041Sdp78419 * no additional iterator function. The caller uses the info from
10696041Sdp78419 * the iterator structure directly.
10704769Sdp78419 *
10716041Sdp78419 * pfn: starting pfn.
10726041Sdp78419 * mnode: desired mnode.
10736041Sdp78419 * szc: desired page size.
10746041Sdp78419 * init:
10756041Sdp78419 * if 1, start a new traversal, initialize "it", find first
10766041Sdp78419 * mblock containing pfn, and return its starting pfn
10776041Sdp78419 * within the mnode.
10786041Sdp78419 * if 0, continue the previous traversal using passed-in data
10796041Sdp78419 * from "it", advance to the next mblock, and return its
10806041Sdp78419 * starting pfn within the mnode.
10816041Sdp78419 * it: returns readonly data to the caller; see below.
10824769Sdp78419 *
10836041Sdp78419 * The input pfn must be aligned for the page size szc.
10846041Sdp78419 *
10856041Sdp78419 * Returns: starting pfn for the iteration for the mnode/mblock,
10866041Sdp78419 * which is aligned according to the page size,
10876041Sdp78419 * or returns (pfn_t)(-1) if the input pfn lies past the last
10886041Sdp78419 * valid pfn of the mnode.
10896041Sdp78419 * Returns misc values in the "it" struct that allows the caller
10906041Sdp78419 * to advance the pfn within an mblock using address arithmetic;
10916041Sdp78419 * see definition of mem_node_iterator_t in vm_dep.h.
10926041Sdp78419 * When the caller calculates a pfn that is greater than the
10936041Sdp78419 * returned value it->mi_mblock_end, the caller should again
10946041Sdp78419 * call plat_mem_node_iterator_init, passing init=0.
109510106SJason.Beloro@Sun.COM *
109610106SJason.Beloro@Sun.COM * The last mblock in continuation case may be invalid because
109710106SJason.Beloro@Sun.COM * of memory DR. To detect this situation mi_genid is checked
109810106SJason.Beloro@Sun.COM * against mpo_genid which is incremented after a memory DR
109910106SJason.Beloro@Sun.COM * operation. See also plat_slice_add()/plat_slice_del().
11004769Sdp78419 */
11014769Sdp78419 pfn_t
plat_mem_node_iterator_init(pfn_t pfn,int mnode,uchar_t szc,mem_node_iterator_t * it,int init)11026041Sdp78419 plat_mem_node_iterator_init(pfn_t pfn, int mnode, uchar_t szc,
11034769Sdp78419 mem_node_iterator_t *it, int init)
11044769Sdp78419 {
11054769Sdp78419 int i;
11066041Sdp78419 pgcnt_t szcpgcnt = PNUM_SIZE(szc);
11074769Sdp78419 struct mblock_md *mblock;
11084769Sdp78419 pfn_t base, end;
11096041Sdp78419 mem_stripe_t *ms;
11106041Sdp78419 uint64_t szcpagesize;
11114769Sdp78419
11124769Sdp78419 ASSERT(it != NULL);
11134769Sdp78419 ASSERT(mnode >= 0 && mnode < max_mem_nodes);
11144769Sdp78419 ASSERT(n_mblocks > 0);
11156041Sdp78419 ASSERT(P2PHASE(pfn, szcpgcnt) == 0);
11164769Sdp78419
111710106SJason.Beloro@Sun.COM mpo_rd_lock();
111810106SJason.Beloro@Sun.COM
111910106SJason.Beloro@Sun.COM if (init || (it->mi_genid != mpo_genid)) {
112010106SJason.Beloro@Sun.COM it->mi_genid = mpo_genid;
11214769Sdp78419 it->mi_last_mblock = 0;
11224769Sdp78419 it->mi_init = 1;
11234769Sdp78419 }
11244769Sdp78419
11254769Sdp78419 /* Check if mpo is not enabled and we only have one mblock */
11264769Sdp78419 if (n_locality_groups == 1 && n_mblocks == 1) {
112710106SJason.Beloro@Sun.COM if (P2PHASE(base_ra_to_pa_pfn, szcpgcnt)) {
112810106SJason.Beloro@Sun.COM pfn = (pfn_t)-1;
112910106SJason.Beloro@Sun.COM goto done;
113010106SJason.Beloro@Sun.COM }
11314769Sdp78419 it->mi_mnode = mnode;
11324769Sdp78419 it->mi_ra_to_pa = base_ra_to_pa_pfn;
11334769Sdp78419 it->mi_mnode_pfn_mask = 0;
11344769Sdp78419 it->mi_mnode_pfn_shift = 0;
11354769Sdp78419 it->mi_mnode_mask = 0;
11364769Sdp78419 it->mi_mblock_base = mem_node_config[mnode].physbase;
11374769Sdp78419 it->mi_mblock_end = mem_node_config[mnode].physmax;
11384769Sdp78419 if (pfn < it->mi_mblock_base)
11396041Sdp78419 pfn = P2ROUNDUP(it->mi_mblock_base, szcpgcnt);
11406041Sdp78419 if ((pfn + szcpgcnt - 1) > it->mi_mblock_end)
11414769Sdp78419 pfn = (pfn_t)-1;
114210106SJason.Beloro@Sun.COM goto done;
11434769Sdp78419 }
11444769Sdp78419
11456041Sdp78419 /* init=1 means begin iterator, init=0 means continue */
11466041Sdp78419 if (init == 1) {
11476041Sdp78419 i = 0;
11486041Sdp78419 } else {
11496041Sdp78419 ASSERT(it->mi_last_mblock < n_mblocks);
11506041Sdp78419 i = it->mi_last_mblock;
11516041Sdp78419 ASSERT(pfn >
11526041Sdp78419 mem_stripes[i * max_locality_groups + mnode].physmax);
115310106SJason.Beloro@Sun.COM if (++i == n_mblocks) {
115410106SJason.Beloro@Sun.COM pfn = (pfn_t)-1;
115510106SJason.Beloro@Sun.COM goto done;
115610106SJason.Beloro@Sun.COM }
11576041Sdp78419 }
11586041Sdp78419
11594769Sdp78419 /*
11606041Sdp78419 * Find mblock that contains pfn for mnode's stripe, or first such an
11616041Sdp78419 * mblock after pfn, else pfn is out of bound and we'll return -1.
11626041Sdp78419 * mblocks and stripes are sorted in ascending address order.
11634769Sdp78419 */
11646041Sdp78419 szcpagesize = szcpgcnt << PAGESHIFT;
11654769Sdp78419 for (; i < n_mblocks; i++) {
11666041Sdp78419 if (P2PHASE(mpo_mblock[i].ra_to_pa, szcpagesize))
11676041Sdp78419 continue;
11686041Sdp78419 ms = &mem_stripes[i * max_locality_groups + mnode];
11696041Sdp78419 if (ms->exists && (pfn + szcpgcnt - 1) <= ms->physmax &&
11706041Sdp78419 (P2ROUNDUP(ms->physbase, szcpgcnt) + szcpgcnt - 1) <=
11716041Sdp78419 ms->physmax)
11724769Sdp78419 break;
11734769Sdp78419 }
11744769Sdp78419 if (i == n_mblocks) {
11754769Sdp78419 it->mi_last_mblock = i - 1;
117610106SJason.Beloro@Sun.COM pfn = (pfn_t)-1;
117710106SJason.Beloro@Sun.COM goto done;
11784769Sdp78419 }
11796041Sdp78419
11804769Sdp78419 it->mi_last_mblock = i;
11814769Sdp78419
11824769Sdp78419 mblock = &mpo_mblock[i];
11836041Sdp78419 base = ms->physbase;
11846041Sdp78419 end = ms->physmax;
11854769Sdp78419
11864769Sdp78419 it->mi_mnode = mnode;
11874769Sdp78419 it->mi_ra_to_pa = btop(mblock->ra_to_pa);
11884769Sdp78419 it->mi_mblock_base = base;
11894769Sdp78419 it->mi_mblock_end = end;
11904769Sdp78419 it->mi_mnode_pfn_mask = home_mask_pfn; /* is 0 for non-MPO case */
11914769Sdp78419 it->mi_mnode_pfn_shift = home_mask_pfn_shift;
11924769Sdp78419 it->mi_mnode_mask = max_locality_groups - 1;
11936041Sdp78419 if (pfn < base) {
11946041Sdp78419 pfn = P2ROUNDUP(base, szcpgcnt);
11956041Sdp78419 ASSERT(pfn + szcpgcnt - 1 <= end);
11966041Sdp78419 }
11976041Sdp78419 ASSERT((pfn + szcpgcnt - 1) <= mpo_mblock[i].end_pfn);
119810106SJason.Beloro@Sun.COM done:
119910106SJason.Beloro@Sun.COM mpo_rd_unlock();
12004769Sdp78419 return (pfn);
12014769Sdp78419 }
12024769Sdp78419
12034769Sdp78419 /*
12044769Sdp78419 * plat_mem_node_intersect_range()
12054769Sdp78419 *
12064769Sdp78419 * Find the intersection between a memnode and a range of pfn's.
12074769Sdp78419 */
12084769Sdp78419 void
plat_mem_node_intersect_range(pfn_t test_base,pgcnt_t test_len,int mnode,pgcnt_t * npages_out)12094769Sdp78419 plat_mem_node_intersect_range(pfn_t test_base, pgcnt_t test_len,
12104769Sdp78419 int mnode, pgcnt_t *npages_out)
12114769Sdp78419 {
12124769Sdp78419 pfn_t offset, len, hole, base, end, test_end, frag;
12134769Sdp78419 pfn_t nearest;
12144769Sdp78419 mem_stripe_t *ms;
12154769Sdp78419 int i, npages;
12164769Sdp78419
12174769Sdp78419 *npages_out = 0;
12184769Sdp78419
12194769Sdp78419 if (!mem_node_config[mnode].exists || test_len == 0)
12204769Sdp78419 return;
12214769Sdp78419
12224769Sdp78419 base = mem_node_config[mnode].physbase;
12234769Sdp78419 end = mem_node_config[mnode].physmax;
12244769Sdp78419
12254769Sdp78419 test_end = test_base + test_len - 1;
12264769Sdp78419 if (end < test_base || base > test_end)
12274769Sdp78419 return;
12284769Sdp78419
12294769Sdp78419 if (n_locality_groups == 1) {
12304769Sdp78419 *npages_out = MIN(test_end, end) - MAX(test_base, base) + 1;
12314769Sdp78419 return;
12324769Sdp78419 }
12334769Sdp78419
12344769Sdp78419 hole = mnode_stride - mnode_pages;
12354769Sdp78419 npages = 0;
12364769Sdp78419
12374769Sdp78419 /*
12384769Sdp78419 * Iterate over all the stripes for this mnode (one per mblock),
12394769Sdp78419 * find the intersection with each, and accumulate the intersections.
12404769Sdp78419 *
12414769Sdp78419 * Determing the intersection with a stripe is tricky. If base or end
12424769Sdp78419 * fall outside the mem_node bounds, round them to physbase/physmax of
12434769Sdp78419 * mem_node. If base or end fall in a gap, round them to start of
12444769Sdp78419 * nearest stripe. If they fall within a stripe, keep base or end,
12454769Sdp78419 * but calculate the fragment size that should be excluded from the
12464769Sdp78419 * stripe. Calculate how many strides fall in the adjusted range,
12474769Sdp78419 * multiply by stripe width, and add the start and end fragments.
12484769Sdp78419 */
12494769Sdp78419
125010106SJason.Beloro@Sun.COM mpo_rd_lock();
12514769Sdp78419 for (i = mnode; i < n_mem_stripes; i += max_locality_groups) {
12524769Sdp78419 ms = &mem_stripes[i];
12534769Sdp78419 if (ms->exists &&
12544769Sdp78419 test_base <= (end = ms->physmax) &&
12554769Sdp78419 test_end >= (base = ms->physbase)) {
12564769Sdp78419
12574769Sdp78419 offset = ms->offset;
12584769Sdp78419
12594769Sdp78419 if (test_base > base) {
12604769Sdp78419 /* Round test_base to next multiple of stride */
12614769Sdp78419 len = P2ROUNDUP(test_base - (base - offset),
12624769Sdp78419 mnode_stride);
12634769Sdp78419 nearest = base - offset + len;
12644769Sdp78419 /*
12654769Sdp78419 * Compute distance from test_base to the
12664769Sdp78419 * stride boundary to see if test_base falls
12674769Sdp78419 * in the stripe or in the hole.
12684769Sdp78419 */
12694769Sdp78419 if (nearest - test_base > hole) {
12704769Sdp78419 /*
12714769Sdp78419 * test_base lies in stripe,
12724769Sdp78419 * and offset should be excluded.
12734769Sdp78419 */
12744769Sdp78419 offset = test_base -
12754769Sdp78419 (nearest - mnode_stride);
12764769Sdp78419 base = test_base;
12774769Sdp78419 } else {
12784769Sdp78419 /* round up to next stripe start */
12794769Sdp78419 offset = 0;
12804769Sdp78419 base = nearest;
12814769Sdp78419 if (base > end)
12824769Sdp78419 continue;
12834769Sdp78419 }
12844769Sdp78419
12854769Sdp78419 }
12864769Sdp78419
12874769Sdp78419 if (test_end < end)
12884769Sdp78419 end = test_end;
12894769Sdp78419 end++; /* adjust to an exclusive bound */
12904769Sdp78419
12914769Sdp78419 /* Round end to next multiple of stride */
12924769Sdp78419 len = P2ROUNDUP(end - (base - offset), mnode_stride);
12934769Sdp78419 nearest = (base - offset) + len;
12944769Sdp78419 if (nearest - end <= hole) {
12954769Sdp78419 /* end falls in hole, use entire last stripe */
12964769Sdp78419 frag = 0;
12974769Sdp78419 } else {
12984769Sdp78419 /* end falls in stripe, compute fragment */
12994769Sdp78419 frag = nearest - hole - end;
13004769Sdp78419 }
13014769Sdp78419
13024769Sdp78419 len = (len >> stripe_shift) - offset - frag;
13034769Sdp78419 npages += len;
13044769Sdp78419 }
13054769Sdp78419 }
13064769Sdp78419
13074769Sdp78419 *npages_out = npages;
130810106SJason.Beloro@Sun.COM mpo_rd_unlock();
13094769Sdp78419 }
13104769Sdp78419
13114769Sdp78419 /*
13124769Sdp78419 * valid_pages()
13134769Sdp78419 *
13144769Sdp78419 * Return 1 if pages are valid and do not cross mnode boundaries
13154769Sdp78419 * (which would break page free list assumptions), and 0 otherwise.
13164769Sdp78419 */
13174769Sdp78419
13184769Sdp78419 #define MNODE(pa) \
13194769Sdp78419 ((btop(pa) & home_mask_pfn) >> home_mask_pfn_shift)
13204769Sdp78419
13214769Sdp78419 static int
valid_pages(md_t * md,mde_cookie_t cpu0)13224769Sdp78419 valid_pages(md_t *md, mde_cookie_t cpu0)
13234769Sdp78419 {
13244769Sdp78419 int i, max_szc;
13254769Sdp78419 uint64_t last_page_base, szc_mask;
13264769Sdp78419 uint64_t max_page_len, max_coalesce_len;
13274769Sdp78419 struct mblock_md *mb = mpo_mblock;
13284769Sdp78419
13294769Sdp78419 /*
13304769Sdp78419 * Find the smaller of the largest page possible and supported.
13314769Sdp78419 * mmu_exported_pagesize_mask is not yet initialized, so read
13324769Sdp78419 * it from the MD. Apply minimal fixups in case of broken MDs
13334769Sdp78419 * to get a sane mask.
13344769Sdp78419 */
13354769Sdp78419
133610106SJason.Beloro@Sun.COM if (cpu0 == NULL)
133710106SJason.Beloro@Sun.COM szc_mask = szc_mask0;
133810106SJason.Beloro@Sun.COM else {
133910106SJason.Beloro@Sun.COM if (md_get_prop_val(md, cpu0, "mmu-page-size-list", &szc_mask))
134010106SJason.Beloro@Sun.COM szc_mask = 0;
134110106SJason.Beloro@Sun.COM /* largest in sun4v default support */
134210106SJason.Beloro@Sun.COM szc_mask |= (1 << TTE4M);
134310106SJason.Beloro@Sun.COM szc_mask0 = szc_mask;
134410106SJason.Beloro@Sun.COM }
13454769Sdp78419 max_szc = highbit(szc_mask) - 1;
13464769Sdp78419 if (max_szc > TTE256M)
13474769Sdp78419 max_szc = TTE256M;
13484769Sdp78419 max_page_len = TTEBYTES(max_szc);
13494769Sdp78419
13504769Sdp78419 /*
13514769Sdp78419 * Page coalescing code coalesces all sizes up to 256M on sun4v, even
13524769Sdp78419 * if mmu-page-size-list does not contain it, so 256M pages must fall
13534769Sdp78419 * within one mnode to use MPO.
13544769Sdp78419 */
13554769Sdp78419 max_coalesce_len = TTEBYTES(TTE256M);
13564769Sdp78419 ASSERT(max_coalesce_len >= max_page_len);
13574769Sdp78419
13584769Sdp78419 if (ptob(mnode_pages) < max_coalesce_len) {
13594769Sdp78419 MPO_STATUS("Page too large; MPO disabled: page = %lx, "
13604769Sdp78419 "mnode slice = %lx\n", max_coalesce_len, ptob(mnode_pages));
13614769Sdp78419 return (0);
13624769Sdp78419 }
13634769Sdp78419
13644769Sdp78419 for (i = 0; i < n_mblocks; i++) {
13654769Sdp78419 uint64_t base = mb->base;
13664769Sdp78419 uint64_t end = mb->base + mb->size - 1;
13674769Sdp78419 uint64_t ra_to_pa = mb->ra_to_pa;
13684769Sdp78419
13694769Sdp78419 /*
13704769Sdp78419 * If mblock is smaller than the max page size, then
13714769Sdp78419 * RA = PA mod MAXPAGE is not guaranteed, but it must
13724769Sdp78419 * not span mnodes.
13734769Sdp78419 */
13744769Sdp78419 if (mb->size < max_page_len) {
13754769Sdp78419 if (MNODE(base + ra_to_pa) != MNODE(end + ra_to_pa)) {
13764769Sdp78419 MPO_STATUS("Small mblock spans mnodes; "
13774769Sdp78419 "MPO disabled: base = %lx, end = %lx, "
13784769Sdp78419 "ra2pa = %lx\n", base, end, ra_to_pa);
13794769Sdp78419 return (0);
13804769Sdp78419 }
13814769Sdp78419 } else {
13824769Sdp78419 /* Verify RA = PA mod MAXPAGE, using coalesce size */
13834769Sdp78419 uint64_t pa_base = base + ra_to_pa;
13844769Sdp78419 if ((base & (max_coalesce_len - 1)) !=
13854769Sdp78419 (pa_base & (max_coalesce_len - 1))) {
13864769Sdp78419 MPO_STATUS("bad page alignment; MPO disabled: "
13874769Sdp78419 "ra = %lx, pa = %lx, pagelen = %lx\n",
13884769Sdp78419 base, pa_base, max_coalesce_len);
13894769Sdp78419 return (0);
13904769Sdp78419 }
13914769Sdp78419 }
13924769Sdp78419
13934769Sdp78419 /*
13944769Sdp78419 * Find start of last large page in mblock in RA space.
13954769Sdp78419 * If page extends into the next mblock, verify the
13964769Sdp78419 * mnode does not change.
13974769Sdp78419 */
13984769Sdp78419 last_page_base = P2ALIGN(end, max_coalesce_len);
13994769Sdp78419 if (i + 1 < n_mblocks &&
14004769Sdp78419 last_page_base + max_coalesce_len > mb[1].base &&
14014769Sdp78419 MNODE(last_page_base + ra_to_pa) !=
14024769Sdp78419 MNODE(mb[1].base + mb[1].ra_to_pa)) {
14034769Sdp78419 MPO_STATUS("Large page spans mblocks; MPO disabled: "
14044769Sdp78419 "end = %lx, ra2pa = %lx, base = %lx, ra2pa = %lx, "
14054769Sdp78419 "pagelen = %lx\n", end, ra_to_pa, mb[1].base,
14064769Sdp78419 mb[1].ra_to_pa, max_coalesce_len);
14074769Sdp78419 return (0);
14084769Sdp78419 }
14094769Sdp78419
14104769Sdp78419 mb++;
14114769Sdp78419 }
14124769Sdp78419 return (1);
14134769Sdp78419 }
14144769Sdp78419
14154769Sdp78419
14164769Sdp78419 /*
14174769Sdp78419 * fix_interleave() - Find lgroups with sub-page sized memory interleave,
14184769Sdp78419 * if any, and remove them. This yields a config where the "coarse
14194769Sdp78419 * grained" lgroups cover all of memory, even though part of that memory
14204769Sdp78419 * is fine grain interleaved and does not deliver a purely local memory
14214769Sdp78419 * latency.
14224769Sdp78419 *
14234769Sdp78419 * This function reads and modifies the globals:
14244769Sdp78419 * mpo_lgroup[], n_lgrpnodes
14254769Sdp78419 *
14264769Sdp78419 * Returns 1 if lgroup nodes were removed, 0 otherwise.
14274769Sdp78419 */
14284769Sdp78419
14294769Sdp78419 static int
fix_interleave(void)14304769Sdp78419 fix_interleave(void)
14314769Sdp78419 {
14324769Sdp78419 int i, j;
14334769Sdp78419 uint64_t mask = 0;
14344769Sdp78419
14354769Sdp78419 j = 0;
14364769Sdp78419 for (i = 0; i < n_lgrpnodes; i++) {
14374769Sdp78419 if ((mpo_lgroup[i].addr_mask & PAGEOFFSET) != 0) {
14384769Sdp78419 /* remove this lgroup */
14394769Sdp78419 mask = mpo_lgroup[i].addr_mask;
14404769Sdp78419 } else {
14414769Sdp78419 mpo_lgroup[j++] = mpo_lgroup[i];
14424769Sdp78419 }
14434769Sdp78419 }
14444769Sdp78419 n_lgrpnodes = j;
14454769Sdp78419
14464769Sdp78419 if (mask != 0)
14474769Sdp78419 MPO_STATUS("sub-page interleave %lx found; "
14484769Sdp78419 "removing lgroup.\n", mask);
14494769Sdp78419
14504769Sdp78419 return (mask != 0);
14514769Sdp78419 }
145210106SJason.Beloro@Sun.COM
145310106SJason.Beloro@Sun.COM /*
145410106SJason.Beloro@Sun.COM * mblock_alloc
145510106SJason.Beloro@Sun.COM *
145610106SJason.Beloro@Sun.COM * Allocate memory for mblock an stripe arrays from either static or
145710106SJason.Beloro@Sun.COM * dynamic space depending on utype, and return the result in mc.
145810106SJason.Beloro@Sun.COM * Returns 0 on success and -1 on error.
145910106SJason.Beloro@Sun.COM */
146010106SJason.Beloro@Sun.COM
146110106SJason.Beloro@Sun.COM static int
mblock_alloc(mpo_config_t * mc,update_t utype,int nmblocks)146210106SJason.Beloro@Sun.COM mblock_alloc(mpo_config_t *mc, update_t utype, int nmblocks)
146310106SJason.Beloro@Sun.COM {
146410106SJason.Beloro@Sun.COM mblock_md_t *mb = NULL;
146510106SJason.Beloro@Sun.COM mem_stripe_t *ms = NULL;
146610106SJason.Beloro@Sun.COM int nstripes = MAX_MEM_NODES * nmblocks;
146710106SJason.Beloro@Sun.COM size_t mblocksz = nmblocks * sizeof (struct mblock_md);
146810106SJason.Beloro@Sun.COM size_t mstripesz = nstripes * sizeof (mem_stripe_t);
146910106SJason.Beloro@Sun.COM size_t allocsz = mmu_ptob(mmu_btopr(mblocksz + mstripesz));
147010106SJason.Beloro@Sun.COM
147110106SJason.Beloro@Sun.COM /*
147210106SJason.Beloro@Sun.COM * Allocate space for mblocks and mstripes.
147310106SJason.Beloro@Sun.COM *
147410106SJason.Beloro@Sun.COM * For DR allocations, just use kmem_alloc(), and set
147510106SJason.Beloro@Sun.COM * mc_alloc_sz to indicate it was used.
147610106SJason.Beloro@Sun.COM *
147710106SJason.Beloro@Sun.COM * For boot allocation:
147810106SJason.Beloro@Sun.COM * If we have a small number of mblocks we will use the space
147910106SJason.Beloro@Sun.COM * that we preallocated. Otherwise, we will dynamically
148010106SJason.Beloro@Sun.COM * allocate the space from the prom and map it to the
148110106SJason.Beloro@Sun.COM * reserved VA at MPOBUF_BASE.
148210106SJason.Beloro@Sun.COM */
148310106SJason.Beloro@Sun.COM
148410106SJason.Beloro@Sun.COM if (utype == U_ADD || utype == U_DEL) {
148510106SJason.Beloro@Sun.COM mb = (struct mblock_md *)kmem_zalloc(allocsz, KM_SLEEP);
148610106SJason.Beloro@Sun.COM ms = (mem_stripe_t *)(mb + nmblocks);
148710106SJason.Beloro@Sun.COM mc->mc_alloc_sz = allocsz;
148810106SJason.Beloro@Sun.COM } else if (nmblocks <= SMALL_MBLOCKS_COUNT) {
148910106SJason.Beloro@Sun.COM mb = &small_mpo_mblocks[0];
149010106SJason.Beloro@Sun.COM ms = &small_mem_stripes[0];
149110106SJason.Beloro@Sun.COM mc->mc_alloc_sz = 0;
149210106SJason.Beloro@Sun.COM } else {
149310106SJason.Beloro@Sun.COM /* Ensure that we dont request more space than reserved */
149410106SJason.Beloro@Sun.COM if (allocsz > MPOBUF_SIZE) {
149510106SJason.Beloro@Sun.COM MPO_STATUS("mblock_alloc: Insufficient space "
149610106SJason.Beloro@Sun.COM "for mblock structures \n");
149710106SJason.Beloro@Sun.COM return (-1);
149810106SJason.Beloro@Sun.COM }
149910106SJason.Beloro@Sun.COM mb = (struct mblock_md *)
150010106SJason.Beloro@Sun.COM prom_alloc((caddr_t)MPOBUF_BASE, allocsz, PAGESIZE);
150110106SJason.Beloro@Sun.COM if (mb != (struct mblock_md *)MPOBUF_BASE) {
150210106SJason.Beloro@Sun.COM MPO_STATUS("mblock_alloc: Cannot allocate space "
150310106SJason.Beloro@Sun.COM "for mblocks \n");
150410106SJason.Beloro@Sun.COM return (-1);
150510106SJason.Beloro@Sun.COM }
150610106SJason.Beloro@Sun.COM mpo_heap32_buf = (caddr_t)MPOBUF_BASE;
150710106SJason.Beloro@Sun.COM mpo_heap32_bufsz = MPOBUF_SIZE;
150810106SJason.Beloro@Sun.COM ms = (mem_stripe_t *)(mb + nmblocks);
150910106SJason.Beloro@Sun.COM mc->mc_alloc_sz = 0;
151010106SJason.Beloro@Sun.COM }
151110106SJason.Beloro@Sun.COM mc->mc_mblocks = mb;
151210106SJason.Beloro@Sun.COM mc->mc_stripes = ms;
151310106SJason.Beloro@Sun.COM mc->mc_nmblocks = nmblocks;
151410106SJason.Beloro@Sun.COM mc->mc_nstripes = nstripes;
151510106SJason.Beloro@Sun.COM MPO_DEBUG("mblock_alloc: mblocks: %d\n", nmblocks);
151610106SJason.Beloro@Sun.COM return (0);
151710106SJason.Beloro@Sun.COM }
151810106SJason.Beloro@Sun.COM
151910106SJason.Beloro@Sun.COM /*
152010106SJason.Beloro@Sun.COM * mblock_free
152110106SJason.Beloro@Sun.COM *
152210106SJason.Beloro@Sun.COM * Free memory in mc that was allocated by mblock_alloc.
152310106SJason.Beloro@Sun.COM */
152410106SJason.Beloro@Sun.COM
152510106SJason.Beloro@Sun.COM static void
mblock_free(mpo_config_t * mc)152610106SJason.Beloro@Sun.COM mblock_free(mpo_config_t *mc)
152710106SJason.Beloro@Sun.COM {
152810106SJason.Beloro@Sun.COM if (mc->mc_alloc_sz > 0) {
152910106SJason.Beloro@Sun.COM ASSERT(mc->mc_mblocks != mpo_mblock);
153010106SJason.Beloro@Sun.COM kmem_free((caddr_t)mc->mc_mblocks, mc->mc_alloc_sz);
153110106SJason.Beloro@Sun.COM }
153210106SJason.Beloro@Sun.COM bzero(mc, sizeof (*mc));
153310106SJason.Beloro@Sun.COM }
153410106SJason.Beloro@Sun.COM
153510106SJason.Beloro@Sun.COM /*
153610106SJason.Beloro@Sun.COM * mblock_install
153710106SJason.Beloro@Sun.COM *
153810106SJason.Beloro@Sun.COM * Install mblock config passed in mc as the global configuration.
153910106SJason.Beloro@Sun.COM * May only be called at boot or while holding mpo_wr_lock.
154010106SJason.Beloro@Sun.COM */
154110106SJason.Beloro@Sun.COM
154210106SJason.Beloro@Sun.COM static void
mblock_install(mpo_config_t * mc)154310106SJason.Beloro@Sun.COM mblock_install(mpo_config_t *mc)
154410106SJason.Beloro@Sun.COM {
154510106SJason.Beloro@Sun.COM mpo_mblock = mc->mc_mblocks;
154610106SJason.Beloro@Sun.COM n_mblocks = mc->mc_nmblocks;
154710106SJason.Beloro@Sun.COM mem_stripes = mc->mc_stripes;
154810106SJason.Beloro@Sun.COM n_mem_stripes = mc->mc_nstripes;
154910106SJason.Beloro@Sun.COM base_ra_to_pa_pfn = btop(mc->mc_mblocks[0].ra_to_pa);
155010106SJason.Beloro@Sun.COM mpo_config = *mc;
155110106SJason.Beloro@Sun.COM }
155210106SJason.Beloro@Sun.COM
155310106SJason.Beloro@Sun.COM /*
155410106SJason.Beloro@Sun.COM * mblock_update
155510106SJason.Beloro@Sun.COM *
155610106SJason.Beloro@Sun.COM * Traverse mblocknodes, read the mblock properties from the MD, and
155710106SJason.Beloro@Sun.COM * save the mblocks in mc.
155810106SJason.Beloro@Sun.COM */
155910106SJason.Beloro@Sun.COM
156010106SJason.Beloro@Sun.COM static void
mblock_update(mpo_config_t * mc,md_t md,mde_cookie_t * mblocknodes)156110106SJason.Beloro@Sun.COM mblock_update(mpo_config_t *mc, md_t md, mde_cookie_t *mblocknodes)
156210106SJason.Beloro@Sun.COM {
156310106SJason.Beloro@Sun.COM uint64_t i, j;
156410106SJason.Beloro@Sun.COM int result = 0;
156510106SJason.Beloro@Sun.COM mblock_md_t *mblock = mc->mc_mblocks;
156610106SJason.Beloro@Sun.COM
156710106SJason.Beloro@Sun.COM for (i = 0, j = 0; j < mc->mc_nmblocks; j++) {
156810106SJason.Beloro@Sun.COM
156910106SJason.Beloro@Sun.COM /* Without a base or size value we will fail */
157010106SJason.Beloro@Sun.COM result = get_int(md, mblocknodes[j], PROP_LG_BASE,
157110106SJason.Beloro@Sun.COM &mblock[i].base);
157210106SJason.Beloro@Sun.COM if (result < 0) {
157310106SJason.Beloro@Sun.COM MPO_STATUS("mblock_update: "
157410106SJason.Beloro@Sun.COM "PROP_LG_BASE is missing\n");
157510106SJason.Beloro@Sun.COM mc->mc_nmblocks = 0;
157610106SJason.Beloro@Sun.COM return;
157710106SJason.Beloro@Sun.COM }
157810106SJason.Beloro@Sun.COM
157910106SJason.Beloro@Sun.COM result = get_int(md, mblocknodes[j], PROP_LG_SIZE,
158010106SJason.Beloro@Sun.COM &mblock[i].size);
158110106SJason.Beloro@Sun.COM if (result < 0) {
158210106SJason.Beloro@Sun.COM MPO_STATUS("mblock_update: "
158310106SJason.Beloro@Sun.COM "PROP_LG_SIZE is missing\n");
158410106SJason.Beloro@Sun.COM mc->mc_nmblocks = 0;
158510106SJason.Beloro@Sun.COM return;
158610106SJason.Beloro@Sun.COM }
158710106SJason.Beloro@Sun.COM
158810106SJason.Beloro@Sun.COM result = get_int(md, mblocknodes[j],
158910106SJason.Beloro@Sun.COM PROP_LG_RA_PA_OFFSET, &mblock[i].ra_to_pa);
159010106SJason.Beloro@Sun.COM
159110106SJason.Beloro@Sun.COM /* If we don't have an ra_pa_offset, just set it to 0 */
159210106SJason.Beloro@Sun.COM if (result < 0)
159310106SJason.Beloro@Sun.COM mblock[i].ra_to_pa = 0;
159410106SJason.Beloro@Sun.COM
159510106SJason.Beloro@Sun.COM MPO_DEBUG("mblock[%ld]: base = %lx, size = %lx, "
159610106SJason.Beloro@Sun.COM "ra_to_pa = %lx\n", i,
159710106SJason.Beloro@Sun.COM mblock[i].base,
159810106SJason.Beloro@Sun.COM mblock[i].size,
159910106SJason.Beloro@Sun.COM mblock[i].ra_to_pa);
160010106SJason.Beloro@Sun.COM
160110106SJason.Beloro@Sun.COM /* check for unsupportable values of base and size */
160210106SJason.Beloro@Sun.COM if (mblock[i].base > mblock[i].base + mblock[i].size) {
160310106SJason.Beloro@Sun.COM MPO_STATUS("mblock_update: "
160410106SJason.Beloro@Sun.COM "PROP_LG_BASE+PROP_LG_SIZE is invalid: "
160510106SJason.Beloro@Sun.COM "base = %lx, size = %lx\n",
160610106SJason.Beloro@Sun.COM mblock[i].base, mblock[i].size);
160710106SJason.Beloro@Sun.COM mc->mc_nmblocks = 0;
160810106SJason.Beloro@Sun.COM return;
160910106SJason.Beloro@Sun.COM }
161010106SJason.Beloro@Sun.COM
161110106SJason.Beloro@Sun.COM /* eliminate size==0 blocks */
161210106SJason.Beloro@Sun.COM if (mblock[i].size != 0) {
161310106SJason.Beloro@Sun.COM uint64_t base = mblock[i].base;
161410106SJason.Beloro@Sun.COM uint64_t end = base + mblock[i].size;
161510106SJason.Beloro@Sun.COM ASSERT(end > base);
161610106SJason.Beloro@Sun.COM mblock[i].base_pfn = btop(base);
161710106SJason.Beloro@Sun.COM mblock[i].end_pfn = btop(end - 1);
161810106SJason.Beloro@Sun.COM i++;
161910106SJason.Beloro@Sun.COM }
162010106SJason.Beloro@Sun.COM }
162110106SJason.Beloro@Sun.COM
162210106SJason.Beloro@Sun.COM if (i == 0) {
162310106SJason.Beloro@Sun.COM MPO_STATUS("mblock_update: "
162410106SJason.Beloro@Sun.COM "No non-empty mblock nodes were found "
162510106SJason.Beloro@Sun.COM "in the Machine Descriptor\n");
162610106SJason.Beloro@Sun.COM mc->mc_nmblocks = 0;
162710106SJason.Beloro@Sun.COM return;
162810106SJason.Beloro@Sun.COM }
162910106SJason.Beloro@Sun.COM ASSERT(i <= mc->mc_nmblocks);
163010106SJason.Beloro@Sun.COM mc->mc_nmblocks = i;
163110106SJason.Beloro@Sun.COM
163210106SJason.Beloro@Sun.COM /* Must sort mblocks by address for mem_node_iterator_init() */
163310106SJason.Beloro@Sun.COM mblock_sort(mblock, mc->mc_nmblocks);
163410106SJason.Beloro@Sun.COM }
163510106SJason.Beloro@Sun.COM
163610106SJason.Beloro@Sun.COM /*
163710106SJason.Beloro@Sun.COM * mblock_update_add
163810106SJason.Beloro@Sun.COM *
163910106SJason.Beloro@Sun.COM * Update mblock config after a memory DR add. The added range is not
164010106SJason.Beloro@Sun.COM * needed, as we read *all* mblock nodes from the MD. Save the mblocks
164110106SJason.Beloro@Sun.COM * in mc.
164210106SJason.Beloro@Sun.COM */
164310106SJason.Beloro@Sun.COM
164410106SJason.Beloro@Sun.COM static void
mblock_update_add(mpo_config_t * mc)164510106SJason.Beloro@Sun.COM mblock_update_add(mpo_config_t *mc)
164610106SJason.Beloro@Sun.COM {
164710106SJason.Beloro@Sun.COM md_t *md;
164810106SJason.Beloro@Sun.COM mde_cookie_t root, *mblocknodes;
164910106SJason.Beloro@Sun.COM int nmblocks = 0;
165010106SJason.Beloro@Sun.COM
165110106SJason.Beloro@Sun.COM if ((md = md_get_handle()) == NULL) {
165210106SJason.Beloro@Sun.COM MPO_STATUS("Cannot access Machine Descriptor\n");
165310106SJason.Beloro@Sun.COM goto error;
165410106SJason.Beloro@Sun.COM }
165510106SJason.Beloro@Sun.COM
165610106SJason.Beloro@Sun.COM if ((root = md_get_root(md)) == MDE_INVAL_ELEM_COOKIE)
165710106SJason.Beloro@Sun.COM goto error;
165810106SJason.Beloro@Sun.COM
165910106SJason.Beloro@Sun.COM nmblocks = md_alloc_scan_dag(md, root, PROP_LG_MBLOCK, "fwd",
166010106SJason.Beloro@Sun.COM &mblocknodes);
166110106SJason.Beloro@Sun.COM if (nmblocks <= 0) {
166210106SJason.Beloro@Sun.COM MPO_STATUS("No mblock nodes detected in Machine Descriptor\n");
166310106SJason.Beloro@Sun.COM goto error;
166410106SJason.Beloro@Sun.COM }
166510106SJason.Beloro@Sun.COM
166610106SJason.Beloro@Sun.COM if (mblock_alloc(mc, U_ADD, nmblocks) < 0)
166710106SJason.Beloro@Sun.COM goto error;
166810106SJason.Beloro@Sun.COM
166910106SJason.Beloro@Sun.COM mblock_update(mc, md, mblocknodes);
167010106SJason.Beloro@Sun.COM md_free_scan_dag(md, &mblocknodes);
167110106SJason.Beloro@Sun.COM (void) md_fini_handle(md);
167210106SJason.Beloro@Sun.COM return;
167310106SJason.Beloro@Sun.COM error:
167410106SJason.Beloro@Sun.COM panic("mblock_update_add: cannot process mblocks from MD.\n");
167510106SJason.Beloro@Sun.COM }
167610106SJason.Beloro@Sun.COM
167710106SJason.Beloro@Sun.COM /*
167810106SJason.Beloro@Sun.COM * mblock_update_del
167910106SJason.Beloro@Sun.COM *
168010106SJason.Beloro@Sun.COM * Update mblocks after a memory DR deletion of the range (ubase, uend).
168110106SJason.Beloro@Sun.COM * Allocate a new mblock config, copy old config to the new, modify the new
168210106SJason.Beloro@Sun.COM * mblocks to reflect the deletion. The new mblocks are returned in
168310106SJason.Beloro@Sun.COM * mc_new and are not yet installed as the active config.
168410106SJason.Beloro@Sun.COM */
168510106SJason.Beloro@Sun.COM
168610106SJason.Beloro@Sun.COM static void
mblock_update_del(mpo_config_t * mc_new,mpo_config_t * mc_old,pfn_t ubase,pfn_t uend)168710106SJason.Beloro@Sun.COM mblock_update_del(mpo_config_t *mc_new, mpo_config_t *mc_old, pfn_t ubase,
168810106SJason.Beloro@Sun.COM pfn_t uend)
168910106SJason.Beloro@Sun.COM {
169010106SJason.Beloro@Sun.COM int i, j;
169110106SJason.Beloro@Sun.COM pfn_t base, end;
169210106SJason.Beloro@Sun.COM mblock_md_t *mblock;
169310106SJason.Beloro@Sun.COM int nmblocks = mc_old->mc_nmblocks;
169410106SJason.Beloro@Sun.COM
169510106SJason.Beloro@Sun.COM MPO_DEBUG("mblock_update_del(0x%lx, 0x%lx)\n", ubase, uend);
169610106SJason.Beloro@Sun.COM
169710106SJason.Beloro@Sun.COM /*
169810106SJason.Beloro@Sun.COM * Allocate mblocks in mc_new and copy the old to the new.
169910106SJason.Beloro@Sun.COM * Allocate one extra in case the deletion splits an mblock.
170010106SJason.Beloro@Sun.COM */
170110106SJason.Beloro@Sun.COM if (mblock_alloc(mc_new, U_DEL, nmblocks + 1) < 0)
170210106SJason.Beloro@Sun.COM return;
170310106SJason.Beloro@Sun.COM mblock = mc_new->mc_mblocks;
170410106SJason.Beloro@Sun.COM bcopy(mc_old->mc_mblocks, mblock, nmblocks * sizeof (mblock_md_t));
170510106SJason.Beloro@Sun.COM
170610106SJason.Beloro@Sun.COM /*
170710106SJason.Beloro@Sun.COM * Find the mblock containing the deleted range and adjust it in
170810106SJason.Beloro@Sun.COM * the new config.
170910106SJason.Beloro@Sun.COM */
171010106SJason.Beloro@Sun.COM for (i = 0; i < nmblocks; i++) {
171110106SJason.Beloro@Sun.COM
171210106SJason.Beloro@Sun.COM base = btop(mblock[i].base);
171310106SJason.Beloro@Sun.COM end = base + btop(mblock[i].size) - 1;
171410106SJason.Beloro@Sun.COM
171510106SJason.Beloro@Sun.COM /*
171610106SJason.Beloro@Sun.COM * Adjust the mblock based on the subset that was deleted.
171710106SJason.Beloro@Sun.COM *
171810106SJason.Beloro@Sun.COM * If the entire mblk was deleted, compact the table.
171910106SJason.Beloro@Sun.COM *
172010106SJason.Beloro@Sun.COM * If the middle of the mblk was deleted, extend
172110106SJason.Beloro@Sun.COM * the table. Space for the new slot was already
172210106SJason.Beloro@Sun.COM * allocated.
172310106SJason.Beloro@Sun.COM *
172410106SJason.Beloro@Sun.COM * The memory to be deleted is a mblock or a subset of
172510106SJason.Beloro@Sun.COM * and does not span multiple mblocks.
172610106SJason.Beloro@Sun.COM */
172710106SJason.Beloro@Sun.COM if (base == ubase && end == uend) {
172810106SJason.Beloro@Sun.COM for (j = i; j < nmblocks - 1; j++)
172910106SJason.Beloro@Sun.COM mblock[j] = mblock[j + 1];
173010106SJason.Beloro@Sun.COM nmblocks--;
173110106SJason.Beloro@Sun.COM bzero(&mblock[nmblocks], sizeof (*mblock));
173210106SJason.Beloro@Sun.COM break;
173310106SJason.Beloro@Sun.COM } else if (base < ubase && end > uend) {
173410106SJason.Beloro@Sun.COM for (j = nmblocks - 1; j >= i; j--)
173510106SJason.Beloro@Sun.COM mblock[j + 1] = mblock[j];
173610106SJason.Beloro@Sun.COM mblock[i].size = ptob(ubase - base);
173710106SJason.Beloro@Sun.COM mblock[i].end_pfn = ubase - 1;
173810106SJason.Beloro@Sun.COM mblock[i + 1].base = ptob(uend + 1);
173910106SJason.Beloro@Sun.COM mblock[i + 1].size = ptob(end - uend);
174010106SJason.Beloro@Sun.COM mblock[i + 1].base_pfn = uend + 1;
174110106SJason.Beloro@Sun.COM nmblocks++;
174210106SJason.Beloro@Sun.COM break;
174310106SJason.Beloro@Sun.COM } else if (base == ubase) {
174410106SJason.Beloro@Sun.COM MPO_DEBUG("mblock_update_del: shrink>"
174510106SJason.Beloro@Sun.COM " i=%d base=0x%lx end=0x%lx", i, base, end);
174610106SJason.Beloro@Sun.COM mblock[i].base = ptob(uend + 1);
174710106SJason.Beloro@Sun.COM mblock[i].size -= ptob(uend - ubase + 1);
174810106SJason.Beloro@Sun.COM base = uend + 1;
174910106SJason.Beloro@Sun.COM mblock[i].base_pfn = base;
175010106SJason.Beloro@Sun.COM mblock[i].end_pfn = end;
175110106SJason.Beloro@Sun.COM MPO_DEBUG(" nbase=0x%lx nend=0x%lx\n", base, end);
175210106SJason.Beloro@Sun.COM break;
175310106SJason.Beloro@Sun.COM } else if (end == uend) {
175410106SJason.Beloro@Sun.COM MPO_DEBUG("mblock_update_del: shrink<"
175510106SJason.Beloro@Sun.COM " i=%d base=0x%lx end=0x%lx", i, base, end);
175610106SJason.Beloro@Sun.COM mblock[i].size -= ptob(uend - ubase + 1);
175710106SJason.Beloro@Sun.COM end = ubase - 1;
175810106SJason.Beloro@Sun.COM mblock[i].base_pfn = base;
175910106SJason.Beloro@Sun.COM mblock[i].end_pfn = end;
176010106SJason.Beloro@Sun.COM MPO_DEBUG(" nbase=0x%lx nend=0x%lx\n", base, end);
176110106SJason.Beloro@Sun.COM break;
176210106SJason.Beloro@Sun.COM }
176310106SJason.Beloro@Sun.COM }
176410106SJason.Beloro@Sun.COM mc_new->mc_nmblocks = nmblocks;
176510106SJason.Beloro@Sun.COM ASSERT(end > base);
176610106SJason.Beloro@Sun.COM }
176710106SJason.Beloro@Sun.COM
176810106SJason.Beloro@Sun.COM /*
176910106SJason.Beloro@Sun.COM * mstripe_update
177010106SJason.Beloro@Sun.COM *
177110106SJason.Beloro@Sun.COM * Read mblocks from mc and update mstripes in mc
177210106SJason.Beloro@Sun.COM */
177310106SJason.Beloro@Sun.COM
177410106SJason.Beloro@Sun.COM static void
mstripe_update(mpo_config_t * mc)177510106SJason.Beloro@Sun.COM mstripe_update(mpo_config_t *mc)
177610106SJason.Beloro@Sun.COM {
177710106SJason.Beloro@Sun.COM lgrp_handle_t lgrphand, lgrp_start;
177810106SJason.Beloro@Sun.COM int i, mnode;
177910106SJason.Beloro@Sun.COM uint64_t offset, stripe_end, base, end, ra_to_pa, stride;
178010106SJason.Beloro@Sun.COM uint64_t stripe, frag, remove;
178110106SJason.Beloro@Sun.COM mem_stripe_t *ms;
178210106SJason.Beloro@Sun.COM mblock_md_t *mblock = mc->mc_mblocks;
178310106SJason.Beloro@Sun.COM int nmblocks = mc->mc_nmblocks;
178410106SJason.Beloro@Sun.COM int mstripesz = MAX_MEM_NODES * nmblocks * sizeof (mem_stripe_t);
178510106SJason.Beloro@Sun.COM
178610106SJason.Beloro@Sun.COM /* Check for non-MPO sun4v platforms or memory DR removal */
178710106SJason.Beloro@Sun.COM if (n_locality_groups <= 1) {
178810106SJason.Beloro@Sun.COM ASSERT(n_locality_groups == 1);
178910106SJason.Beloro@Sun.COM ASSERT(max_locality_groups == 1 && max_mem_nodes == 1);
179010106SJason.Beloro@Sun.COM
179110106SJason.Beloro@Sun.COM if (nmblocks == 1) {
179210106SJason.Beloro@Sun.COM mc->mc_nstripes = 0;
179310106SJason.Beloro@Sun.COM } else {
179410106SJason.Beloro@Sun.COM mc->mc_nstripes = nmblocks;
179510106SJason.Beloro@Sun.COM bzero(mc->mc_stripes, mstripesz);
179610106SJason.Beloro@Sun.COM for (i = 0; i < nmblocks; i++) {
179710106SJason.Beloro@Sun.COM mc->mc_stripes[i].exists = 1;
179810106SJason.Beloro@Sun.COM mc->mc_stripes[i].physbase = mblock[i].base_pfn;
179910106SJason.Beloro@Sun.COM mc->mc_stripes[i].physmax = mblock[i].end_pfn;
180010106SJason.Beloro@Sun.COM }
180110106SJason.Beloro@Sun.COM }
180210106SJason.Beloro@Sun.COM return;
180310106SJason.Beloro@Sun.COM }
180410106SJason.Beloro@Sun.COM
180510106SJason.Beloro@Sun.COM bzero(mc->mc_stripes, mstripesz);
180610106SJason.Beloro@Sun.COM mc->mc_nstripes = max_locality_groups * nmblocks;
180710106SJason.Beloro@Sun.COM stripe = ptob(mnode_pages);
180810106SJason.Beloro@Sun.COM stride = max_locality_groups * stripe;
180910106SJason.Beloro@Sun.COM
181010106SJason.Beloro@Sun.COM for (i = 0; i < nmblocks; i++) {
181110106SJason.Beloro@Sun.COM base = mblock[i].base;
181210106SJason.Beloro@Sun.COM end = base + mblock[i].size;
181310106SJason.Beloro@Sun.COM ra_to_pa = mblock[i].ra_to_pa;
181410106SJason.Beloro@Sun.COM
181510106SJason.Beloro@Sun.COM /* Find the offset from the prev stripe boundary in PA space. */
181610106SJason.Beloro@Sun.COM offset = (base + ra_to_pa) & (stripe - 1);
181710106SJason.Beloro@Sun.COM
181810106SJason.Beloro@Sun.COM /* Set the next stripe boundary. */
181910106SJason.Beloro@Sun.COM stripe_end = base - offset + stripe;
182010106SJason.Beloro@Sun.COM
182110106SJason.Beloro@Sun.COM lgrp_start = (((base + ra_to_pa) & home_mask) >>
182210106SJason.Beloro@Sun.COM home_mask_shift);
182310106SJason.Beloro@Sun.COM lgrphand = lgrp_start;
182410106SJason.Beloro@Sun.COM
182510106SJason.Beloro@Sun.COM /*
182610106SJason.Beloro@Sun.COM * Loop over all lgroups covered by the mblock, creating a
182710106SJason.Beloro@Sun.COM * stripe for each. Stop when lgrp_start is visited again.
182810106SJason.Beloro@Sun.COM */
182910106SJason.Beloro@Sun.COM do {
183010106SJason.Beloro@Sun.COM /* mblock may not span all lgroups */
183110106SJason.Beloro@Sun.COM if (base >= end)
183210106SJason.Beloro@Sun.COM break;
183310106SJason.Beloro@Sun.COM
183410106SJason.Beloro@Sun.COM mnode = lgrphand;
183510106SJason.Beloro@Sun.COM ASSERT(mnode < max_mem_nodes);
183610106SJason.Beloro@Sun.COM
183710106SJason.Beloro@Sun.COM /*
183810106SJason.Beloro@Sun.COM * Calculate the size of the fragment that does not
183910106SJason.Beloro@Sun.COM * belong to the mnode in the last partial stride.
184010106SJason.Beloro@Sun.COM */
184110106SJason.Beloro@Sun.COM frag = (end - (base - offset)) & (stride - 1);
184210106SJason.Beloro@Sun.COM if (frag == 0) {
184310106SJason.Beloro@Sun.COM /* remove the gap */
184410106SJason.Beloro@Sun.COM remove = stride - stripe;
184510106SJason.Beloro@Sun.COM } else if (frag < stripe) {
184610106SJason.Beloro@Sun.COM /* fragment fits in stripe; keep it all */
184710106SJason.Beloro@Sun.COM remove = 0;
184810106SJason.Beloro@Sun.COM } else {
184910106SJason.Beloro@Sun.COM /* fragment is large; trim after whole stripe */
185010106SJason.Beloro@Sun.COM remove = frag - stripe;
185110106SJason.Beloro@Sun.COM }
185210106SJason.Beloro@Sun.COM
185310106SJason.Beloro@Sun.COM ms = &mc->mc_stripes[i * max_locality_groups + mnode];
185410106SJason.Beloro@Sun.COM ms->physbase = btop(base);
185510106SJason.Beloro@Sun.COM ms->physmax = btop(end - 1 - remove);
185610106SJason.Beloro@Sun.COM ms->offset = btop(offset);
185710106SJason.Beloro@Sun.COM ms->exists = 1;
185810106SJason.Beloro@Sun.COM
185910106SJason.Beloro@Sun.COM base = stripe_end;
186010106SJason.Beloro@Sun.COM stripe_end += stripe;
186110106SJason.Beloro@Sun.COM offset = 0;
186210106SJason.Beloro@Sun.COM lgrphand = (((base + ra_to_pa) & home_mask) >>
186310106SJason.Beloro@Sun.COM home_mask_shift);
186410106SJason.Beloro@Sun.COM } while (lgrphand != lgrp_start);
186510106SJason.Beloro@Sun.COM }
186610106SJason.Beloro@Sun.COM }
186710106SJason.Beloro@Sun.COM
186810106SJason.Beloro@Sun.COM #define INTERSECT(a, b, c, d) \
186910106SJason.Beloro@Sun.COM if (((a) >= (c) && (a) <= (d)) || \
187010106SJason.Beloro@Sun.COM ((c) >= (a) && (c) <= (b))) { \
187110106SJason.Beloro@Sun.COM (c) = MAX((a), (c)); \
187210106SJason.Beloro@Sun.COM (d) = MIN((b), (d)); \
187310106SJason.Beloro@Sun.COM } else { \
187410106SJason.Beloro@Sun.COM ASSERT((a) >= (d) || (b) <= (c)); \
187510106SJason.Beloro@Sun.COM continue; \
187610106SJason.Beloro@Sun.COM } \
187710106SJason.Beloro@Sun.COM
187810106SJason.Beloro@Sun.COM /*
187910106SJason.Beloro@Sun.COM * mnode_update
188010106SJason.Beloro@Sun.COM *
188110106SJason.Beloro@Sun.COM * Read stripes from mc and update mnode extents. The mnode extents are
188210106SJason.Beloro@Sun.COM * part of the live configuration, so this can only be done at boot time
188310106SJason.Beloro@Sun.COM * or while holding the mpo_wr_lock.
188410106SJason.Beloro@Sun.COM */
188510106SJason.Beloro@Sun.COM
188610106SJason.Beloro@Sun.COM static void
mnode_update(mpo_config_t * mc,pfn_t ubase,pfn_t uend,update_t utype)188710106SJason.Beloro@Sun.COM mnode_update(mpo_config_t *mc, pfn_t ubase, pfn_t uend, update_t utype)
188810106SJason.Beloro@Sun.COM {
188910106SJason.Beloro@Sun.COM int i, j, mnode, found;
189010106SJason.Beloro@Sun.COM pfn_t base, end;
189110106SJason.Beloro@Sun.COM mem_stripe_t *ms;
189210106SJason.Beloro@Sun.COM
189310106SJason.Beloro@Sun.COM MPO_DEBUG("mnode_udpate: basepfn: %lx endpfn: %lx\n", ubase, uend);
189410106SJason.Beloro@Sun.COM
189510106SJason.Beloro@Sun.COM if (n_locality_groups <= 1 && mc->mc_nmblocks == 1) {
189610106SJason.Beloro@Sun.COM if (utype == U_ADD)
189710106SJason.Beloro@Sun.COM mpo_mem_node_add_slice(ubase, uend);
189810106SJason.Beloro@Sun.COM else if (utype == U_DEL)
189910106SJason.Beloro@Sun.COM mpo_mem_node_del_slice(ubase, uend);
190010106SJason.Beloro@Sun.COM else
190110106SJason.Beloro@Sun.COM panic("mnode update: %d: invalid\n", utype);
190210106SJason.Beloro@Sun.COM return;
190310106SJason.Beloro@Sun.COM }
190410106SJason.Beloro@Sun.COM
190510106SJason.Beloro@Sun.COM found = 0;
190610106SJason.Beloro@Sun.COM for (i = 0; i < mc->mc_nmblocks; i++) {
190710106SJason.Beloro@Sun.COM for (mnode = 0; mnode < max_locality_groups; mnode++) {
190810106SJason.Beloro@Sun.COM
190910106SJason.Beloro@Sun.COM j = i * max_locality_groups + mnode;
191010106SJason.Beloro@Sun.COM ms = &mc->mc_stripes[j];
191110106SJason.Beloro@Sun.COM if (!ms->exists)
191210106SJason.Beloro@Sun.COM continue;
191310106SJason.Beloro@Sun.COM
191410106SJason.Beloro@Sun.COM base = ms->physbase;
191510106SJason.Beloro@Sun.COM end = ms->physmax;
191610106SJason.Beloro@Sun.COM
191710106SJason.Beloro@Sun.COM /*
191810106SJason.Beloro@Sun.COM * Look for the mstripes intersecting this slice.
191910106SJason.Beloro@Sun.COM *
192010106SJason.Beloro@Sun.COM * The mstripe and slice pairs may not be equal
192110106SJason.Beloro@Sun.COM * if a subset of a mblock is added/deleted.
192210106SJason.Beloro@Sun.COM */
192310106SJason.Beloro@Sun.COM switch (utype) {
192410106SJason.Beloro@Sun.COM case U_ADD:
192510106SJason.Beloro@Sun.COM INTERSECT(ubase, uend, base, end);
192610106SJason.Beloro@Sun.COM /*FALLTHROUGH*/
192710106SJason.Beloro@Sun.COM case U_ADD_ALL:
192810106SJason.Beloro@Sun.COM if (n_locality_groups > 1)
192910106SJason.Beloro@Sun.COM mpo_plat_assign_lgrphand_to_mem_node(
193010106SJason.Beloro@Sun.COM mnode, mnode);
193110106SJason.Beloro@Sun.COM mpo_mem_node_add_slice(base, end);
193210106SJason.Beloro@Sun.COM break;
193310106SJason.Beloro@Sun.COM case U_DEL:
193410106SJason.Beloro@Sun.COM INTERSECT(ubase, uend, base, end);
193510106SJason.Beloro@Sun.COM mpo_mem_node_del_slice(base, end);
193610106SJason.Beloro@Sun.COM break;
193710106SJason.Beloro@Sun.COM default:
193810106SJason.Beloro@Sun.COM panic("mnode_update: %d: invalid\n", utype);
193910106SJason.Beloro@Sun.COM break;
194010106SJason.Beloro@Sun.COM }
194110106SJason.Beloro@Sun.COM
194210106SJason.Beloro@Sun.COM found++;
194310106SJason.Beloro@Sun.COM }
194410106SJason.Beloro@Sun.COM }
194510106SJason.Beloro@Sun.COM
194610106SJason.Beloro@Sun.COM if (!found)
194710106SJason.Beloro@Sun.COM panic("mnode_update: mstripe not found");
194810106SJason.Beloro@Sun.COM
194910106SJason.Beloro@Sun.COM #ifdef DEBUG
195010106SJason.Beloro@Sun.COM if (utype == U_ADD_ALL || utype == U_DEL)
195110106SJason.Beloro@Sun.COM return;
195210106SJason.Beloro@Sun.COM found = 0;
195310106SJason.Beloro@Sun.COM for (i = 0; i < max_mem_nodes; i++) {
195410106SJason.Beloro@Sun.COM if (!mem_node_config[i].exists)
195510106SJason.Beloro@Sun.COM continue;
195610106SJason.Beloro@Sun.COM if (ubase >= mem_node_config[i].physbase &&
195710106SJason.Beloro@Sun.COM ubase <= mem_node_config[i].physmax)
195810106SJason.Beloro@Sun.COM found |= 1;
195910106SJason.Beloro@Sun.COM if (uend >= mem_node_config[i].physbase &&
196010106SJason.Beloro@Sun.COM uend <= mem_node_config[i].physmax)
196110106SJason.Beloro@Sun.COM found |= 2;
196210106SJason.Beloro@Sun.COM }
196310106SJason.Beloro@Sun.COM ASSERT(found == 3);
196410106SJason.Beloro@Sun.COM {
196510106SJason.Beloro@Sun.COM pfn_t minpfn, maxpfn;
196610106SJason.Beloro@Sun.COM
196710106SJason.Beloro@Sun.COM mem_node_max_range(&minpfn, &maxpfn);
196810106SJason.Beloro@Sun.COM ASSERT(minpfn <= ubase);
196910106SJason.Beloro@Sun.COM ASSERT(maxpfn >= uend);
197010106SJason.Beloro@Sun.COM }
197110106SJason.Beloro@Sun.COM #endif
197210106SJason.Beloro@Sun.COM }
197310106SJason.Beloro@Sun.COM
197410106SJason.Beloro@Sun.COM /*
197510106SJason.Beloro@Sun.COM * Plat_slice_add()/plat_slice_del() are the platform hooks
197610106SJason.Beloro@Sun.COM * for adding/deleting a pfn range to/from the system.
197710106SJason.Beloro@Sun.COM *
197810106SJason.Beloro@Sun.COM * Platform_slice_add() is used for both boot/DR cases.
197910106SJason.Beloro@Sun.COM *
198010106SJason.Beloro@Sun.COM * - Zeus has already added the mblocks to the MD, so read the updated
198110106SJason.Beloro@Sun.COM * MD and allocate all data structures required to manage the new memory
198210106SJason.Beloro@Sun.COM * configuration.
198310106SJason.Beloro@Sun.COM *
198410106SJason.Beloro@Sun.COM * - Recompute the stripes which are derived from the mblocks.
198510106SJason.Beloro@Sun.COM *
198610106SJason.Beloro@Sun.COM * - Update (expand) the mnode extents and install the modified mblocks as
198710106SJason.Beloro@Sun.COM * the new mpo config. This must be done while holding the mpo_wr_lock
198810106SJason.Beloro@Sun.COM * to guarantee that no other threads access the mpo meta-data.
198910106SJason.Beloro@Sun.COM *
199010106SJason.Beloro@Sun.COM * - Unlock MPO data structures; the new config is live. Free the old config.
199110106SJason.Beloro@Sun.COM *
199210106SJason.Beloro@Sun.COM * Plat_slice_del() is used for DR only.
199310106SJason.Beloro@Sun.COM *
199410106SJason.Beloro@Sun.COM * - Zeus has not yet modified the MD to reflect the deletion, so copy
199510106SJason.Beloro@Sun.COM * the old mpo mblocks and delete the range from the copy.
199610106SJason.Beloro@Sun.COM *
199710106SJason.Beloro@Sun.COM * - Recompute the stripes which are derived from the mblocks.
199810106SJason.Beloro@Sun.COM *
199910106SJason.Beloro@Sun.COM * - Update (shrink) the mnode extents and install the modified mblocks as
200010106SJason.Beloro@Sun.COM * the new mpo config. This must be done while holding the mpo_wr_lock
200110106SJason.Beloro@Sun.COM * to guarantee that no other threads access the mpo meta-data.
200210106SJason.Beloro@Sun.COM *
200310106SJason.Beloro@Sun.COM * - Unlock MPO data structures; the new config is live. Free the old config.
200410106SJason.Beloro@Sun.COM */
200510106SJason.Beloro@Sun.COM
200610106SJason.Beloro@Sun.COM void
plat_slice_add(pfn_t base,pfn_t end)200710106SJason.Beloro@Sun.COM plat_slice_add(pfn_t base, pfn_t end)
200810106SJason.Beloro@Sun.COM {
200910106SJason.Beloro@Sun.COM mpo_config_t old_config = mpo_config;
201010106SJason.Beloro@Sun.COM mpo_config_t new_config;
201110106SJason.Beloro@Sun.COM
201210106SJason.Beloro@Sun.COM VALIDATE_SLICE(base, end);
201310106SJason.Beloro@Sun.COM mblock_update_add(&new_config);
201410106SJason.Beloro@Sun.COM mstripe_update(&new_config);
201510106SJason.Beloro@Sun.COM mpo_wr_lock();
201610106SJason.Beloro@Sun.COM mblock_install(&new_config);
201710106SJason.Beloro@Sun.COM /* Use new config to add all ranges for mnode_update */
201810106SJason.Beloro@Sun.COM mnode_update(&new_config, base, end, U_ADD);
201910106SJason.Beloro@Sun.COM mpo_genid++;
202010106SJason.Beloro@Sun.COM mpo_wr_unlock();
202110106SJason.Beloro@Sun.COM mblock_free(&old_config);
202210106SJason.Beloro@Sun.COM }
202310106SJason.Beloro@Sun.COM
202410106SJason.Beloro@Sun.COM void
plat_slice_del(pfn_t base,pfn_t end)202510106SJason.Beloro@Sun.COM plat_slice_del(pfn_t base, pfn_t end)
202610106SJason.Beloro@Sun.COM {
202710106SJason.Beloro@Sun.COM mpo_config_t old_config = mpo_config;
202810106SJason.Beloro@Sun.COM mpo_config_t new_config;
202910106SJason.Beloro@Sun.COM
203010106SJason.Beloro@Sun.COM VALIDATE_SLICE(base, end);
203110106SJason.Beloro@Sun.COM mblock_update_del(&new_config, &old_config, base, end);
203210106SJason.Beloro@Sun.COM mstripe_update(&new_config);
203310106SJason.Beloro@Sun.COM mpo_wr_lock();
203410106SJason.Beloro@Sun.COM /* Use old config to find deleted range for mnode_update */
203510106SJason.Beloro@Sun.COM mnode_update(&old_config, base, end, U_DEL);
203610106SJason.Beloro@Sun.COM mblock_install(&new_config);
203710106SJason.Beloro@Sun.COM mpo_genid++;
203810106SJason.Beloro@Sun.COM mpo_wr_unlock();
203910106SJason.Beloro@Sun.COM mblock_free(&old_config);
204010106SJason.Beloro@Sun.COM }
2041