10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
54769Sdp78419 * Common Development and Distribution License (the "License").
64769Sdp78419 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
2211474SJonathan.Adams@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #include <sys/systm.h>
270Sstevel@tonic-gate #include <sys/sysmacros.h>
280Sstevel@tonic-gate #include <sys/bootconf.h>
290Sstevel@tonic-gate #include <sys/atomic.h>
300Sstevel@tonic-gate #include <sys/lgrp.h>
310Sstevel@tonic-gate #include <sys/memlist.h>
320Sstevel@tonic-gate #include <sys/memnode.h>
330Sstevel@tonic-gate #include <sys/platform_module.h>
34414Skchow #include <vm/vm_dep.h>
350Sstevel@tonic-gate
360Sstevel@tonic-gate int max_mem_nodes = 1;
370Sstevel@tonic-gate
380Sstevel@tonic-gate struct mem_node_conf mem_node_config[MAX_MEM_NODES];
390Sstevel@tonic-gate int mem_node_pfn_shift;
400Sstevel@tonic-gate /*
410Sstevel@tonic-gate * num_memnodes should be updated atomically and always >=
420Sstevel@tonic-gate * the number of bits in memnodes_mask or the algorithm may fail.
430Sstevel@tonic-gate */
440Sstevel@tonic-gate uint16_t num_memnodes;
450Sstevel@tonic-gate mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
460Sstevel@tonic-gate
470Sstevel@tonic-gate /*
480Sstevel@tonic-gate * If set, mem_node_physalign should be a power of two, and
490Sstevel@tonic-gate * should reflect the minimum address alignment of each node.
500Sstevel@tonic-gate */
510Sstevel@tonic-gate uint64_t mem_node_physalign;
520Sstevel@tonic-gate
530Sstevel@tonic-gate /*
540Sstevel@tonic-gate * Platform hooks we will need.
550Sstevel@tonic-gate */
560Sstevel@tonic-gate
570Sstevel@tonic-gate #pragma weak plat_build_mem_nodes
580Sstevel@tonic-gate #pragma weak plat_slice_add
590Sstevel@tonic-gate #pragma weak plat_slice_del
600Sstevel@tonic-gate
610Sstevel@tonic-gate /*
620Sstevel@tonic-gate * Adjust the memnode config after a DR operation.
630Sstevel@tonic-gate *
640Sstevel@tonic-gate * It is rather tricky to do these updates since we can't
650Sstevel@tonic-gate * protect the memnode structures with locks, so we must
660Sstevel@tonic-gate * be mindful of the order in which updates and reads to
670Sstevel@tonic-gate * these values can occur.
680Sstevel@tonic-gate */
690Sstevel@tonic-gate
700Sstevel@tonic-gate void
mem_node_add_slice(pfn_t start,pfn_t end)710Sstevel@tonic-gate mem_node_add_slice(pfn_t start, pfn_t end)
720Sstevel@tonic-gate {
730Sstevel@tonic-gate int mnode;
740Sstevel@tonic-gate mnodeset_t newmask, oldmask;
750Sstevel@tonic-gate
760Sstevel@tonic-gate /*
770Sstevel@tonic-gate * DR will pass us the first pfn that is allocatable.
780Sstevel@tonic-gate * We need to round down to get the real start of
790Sstevel@tonic-gate * the slice.
800Sstevel@tonic-gate */
810Sstevel@tonic-gate if (mem_node_physalign) {
820Sstevel@tonic-gate start &= ~(btop(mem_node_physalign) - 1);
830Sstevel@tonic-gate end = roundup(end, btop(mem_node_physalign)) - 1;
840Sstevel@tonic-gate }
850Sstevel@tonic-gate
860Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(start);
87*12004Sjiang.liu@intel.com ASSERT(mnode >= 0 && mnode < max_mem_nodes);
880Sstevel@tonic-gate
890Sstevel@tonic-gate if (cas32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
900Sstevel@tonic-gate /*
910Sstevel@tonic-gate * Add slice to existing node.
920Sstevel@tonic-gate */
930Sstevel@tonic-gate if (start < mem_node_config[mnode].physbase)
940Sstevel@tonic-gate mem_node_config[mnode].physbase = start;
950Sstevel@tonic-gate if (end > mem_node_config[mnode].physmax)
960Sstevel@tonic-gate mem_node_config[mnode].physmax = end;
970Sstevel@tonic-gate } else {
980Sstevel@tonic-gate mem_node_config[mnode].physbase = start;
990Sstevel@tonic-gate mem_node_config[mnode].physmax = end;
1000Sstevel@tonic-gate atomic_add_16(&num_memnodes, 1);
1010Sstevel@tonic-gate do {
1020Sstevel@tonic-gate oldmask = memnodes_mask;
1030Sstevel@tonic-gate newmask = memnodes_mask | (1ull << mnode);
1040Sstevel@tonic-gate } while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
1050Sstevel@tonic-gate }
1060Sstevel@tonic-gate
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate * Inform the common lgrp framework about the new memory
1090Sstevel@tonic-gate */
1100Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
1110Sstevel@tonic-gate }
1120Sstevel@tonic-gate
1130Sstevel@tonic-gate /*
1140Sstevel@tonic-gate * Remove a PFN range from a memnode. On some platforms,
1150Sstevel@tonic-gate * the memnode will be created with physbase at the first
1160Sstevel@tonic-gate * allocatable PFN, but later deleted with the MC slice
1170Sstevel@tonic-gate * base address converted to a PFN, in which case we need
1180Sstevel@tonic-gate * to assume physbase and up.
1190Sstevel@tonic-gate */
1200Sstevel@tonic-gate void
mem_node_del_slice(pfn_t start,pfn_t end)12110106SJason.Beloro@Sun.COM mem_node_del_slice(pfn_t start, pfn_t end)
1220Sstevel@tonic-gate {
1230Sstevel@tonic-gate int mnode;
1240Sstevel@tonic-gate pgcnt_t delta_pgcnt, node_size;
1250Sstevel@tonic-gate mnodeset_t omask, nmask;
1260Sstevel@tonic-gate
1270Sstevel@tonic-gate if (mem_node_physalign) {
1280Sstevel@tonic-gate start &= ~(btop(mem_node_physalign) - 1);
1290Sstevel@tonic-gate end = roundup(end, btop(mem_node_physalign)) - 1;
1300Sstevel@tonic-gate }
1310Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(start);
1320Sstevel@tonic-gate
133*12004Sjiang.liu@intel.com ASSERT(mnode >= 0 && mnode < max_mem_nodes);
1340Sstevel@tonic-gate ASSERT(mem_node_config[mnode].exists == 1);
1350Sstevel@tonic-gate
13610106SJason.Beloro@Sun.COM delta_pgcnt = end - start;
13710106SJason.Beloro@Sun.COM node_size = mem_node_config[mnode].physmax -
13810106SJason.Beloro@Sun.COM mem_node_config[mnode].physbase;
13910106SJason.Beloro@Sun.COM
14010106SJason.Beloro@Sun.COM if (node_size > delta_pgcnt) {
14110106SJason.Beloro@Sun.COM /*
14210106SJason.Beloro@Sun.COM * Subtract the slice from the memnode.
14310106SJason.Beloro@Sun.COM */
14410106SJason.Beloro@Sun.COM if (start <= mem_node_config[mnode].physbase)
14510106SJason.Beloro@Sun.COM mem_node_config[mnode].physbase = end + 1;
14610106SJason.Beloro@Sun.COM ASSERT(end <= mem_node_config[mnode].physmax);
14710106SJason.Beloro@Sun.COM if (end == mem_node_config[mnode].physmax)
14810106SJason.Beloro@Sun.COM mem_node_config[mnode].physmax = start - 1;
14910106SJason.Beloro@Sun.COM } else {
15010106SJason.Beloro@Sun.COM /*
15110106SJason.Beloro@Sun.COM * Let the common lgrp framework know this mnode is
15210106SJason.Beloro@Sun.COM * leaving
15310106SJason.Beloro@Sun.COM */
15410106SJason.Beloro@Sun.COM lgrp_config(LGRP_CONFIG_MEM_DEL,
15510106SJason.Beloro@Sun.COM mnode, MEM_NODE_2_LGRPHAND(mnode));
1560Sstevel@tonic-gate
15710106SJason.Beloro@Sun.COM /*
15810106SJason.Beloro@Sun.COM * Delete the whole node.
15910106SJason.Beloro@Sun.COM */
16010106SJason.Beloro@Sun.COM ASSERT(MNODE_PGCNT(mnode) == 0);
16110106SJason.Beloro@Sun.COM do {
16210106SJason.Beloro@Sun.COM omask = memnodes_mask;
16310106SJason.Beloro@Sun.COM nmask = omask & ~(1ull << mnode);
16410106SJason.Beloro@Sun.COM } while (cas64(&memnodes_mask, omask, nmask) != omask);
16510106SJason.Beloro@Sun.COM atomic_add_16(&num_memnodes, -1);
16610106SJason.Beloro@Sun.COM mem_node_config[mnode].exists = 0;
16710106SJason.Beloro@Sun.COM }
16810106SJason.Beloro@Sun.COM }
1690Sstevel@tonic-gate
17010106SJason.Beloro@Sun.COM void
mem_node_add_range(pfn_t start,pfn_t end)17110106SJason.Beloro@Sun.COM mem_node_add_range(pfn_t start, pfn_t end)
17210106SJason.Beloro@Sun.COM {
17310106SJason.Beloro@Sun.COM if (&plat_slice_add)
17410106SJason.Beloro@Sun.COM plat_slice_add(start, end);
17510106SJason.Beloro@Sun.COM else
17610106SJason.Beloro@Sun.COM mem_node_add_slice(start, end);
17710106SJason.Beloro@Sun.COM }
1780Sstevel@tonic-gate
17910106SJason.Beloro@Sun.COM void
mem_node_del_range(pfn_t start,pfn_t end)18010106SJason.Beloro@Sun.COM mem_node_del_range(pfn_t start, pfn_t end)
18110106SJason.Beloro@Sun.COM {
18210106SJason.Beloro@Sun.COM if (&plat_slice_del)
18310106SJason.Beloro@Sun.COM plat_slice_del(start, end);
18410106SJason.Beloro@Sun.COM else
18510106SJason.Beloro@Sun.COM mem_node_del_slice(start, end);
1860Sstevel@tonic-gate }
1870Sstevel@tonic-gate
1880Sstevel@tonic-gate void
startup_build_mem_nodes(struct memlist * list)1890Sstevel@tonic-gate startup_build_mem_nodes(struct memlist *list)
1900Sstevel@tonic-gate {
1910Sstevel@tonic-gate pfn_t start, end;
1920Sstevel@tonic-gate
1930Sstevel@tonic-gate /* LINTED: ASSERT will always true or false */
1940Sstevel@tonic-gate ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
1950Sstevel@tonic-gate
1960Sstevel@tonic-gate if (&plat_build_mem_nodes) {
1970Sstevel@tonic-gate plat_build_mem_nodes(list);
1980Sstevel@tonic-gate } else {
1990Sstevel@tonic-gate /*
2000Sstevel@tonic-gate * Boot install lists are arranged <addr, len>, ...
2010Sstevel@tonic-gate */
2020Sstevel@tonic-gate while (list) {
20311474SJonathan.Adams@Sun.COM start = list->ml_address >> PAGESHIFT;
2040Sstevel@tonic-gate if (start > physmax)
2050Sstevel@tonic-gate continue;
20611474SJonathan.Adams@Sun.COM end =
20711474SJonathan.Adams@Sun.COM (list->ml_address + list->ml_size - 1) >> PAGESHIFT;
2080Sstevel@tonic-gate if (end > physmax)
2090Sstevel@tonic-gate end = physmax;
21010106SJason.Beloro@Sun.COM mem_node_add_range(start, end);
21111474SJonathan.Adams@Sun.COM list = list->ml_next;
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate mem_node_physalign = 0;
2140Sstevel@tonic-gate mem_node_pfn_shift = 0;
2150Sstevel@tonic-gate }
2160Sstevel@tonic-gate }
2170Sstevel@tonic-gate
2180Sstevel@tonic-gate /*
2190Sstevel@tonic-gate * Allocate an unassigned memnode.
2200Sstevel@tonic-gate */
2210Sstevel@tonic-gate int
mem_node_alloc()2220Sstevel@tonic-gate mem_node_alloc()
2230Sstevel@tonic-gate {
2240Sstevel@tonic-gate int mnode;
2250Sstevel@tonic-gate mnodeset_t newmask, oldmask;
2260Sstevel@tonic-gate
2270Sstevel@tonic-gate /*
2280Sstevel@tonic-gate * Find an unused memnode. Update it atomically to prevent
2290Sstevel@tonic-gate * a first time memnode creation race.
2300Sstevel@tonic-gate */
2310Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++)
2320Sstevel@tonic-gate if (cas32((uint32_t *)&mem_node_config[mnode].exists,
2334769Sdp78419 0, 1) == 0)
2340Sstevel@tonic-gate break;
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate if (mnode >= max_mem_nodes)
2370Sstevel@tonic-gate panic("Out of free memnodes\n");
2380Sstevel@tonic-gate
2390Sstevel@tonic-gate mem_node_config[mnode].physbase = (pfn_t)-1l;
2400Sstevel@tonic-gate mem_node_config[mnode].physmax = 0;
2410Sstevel@tonic-gate atomic_add_16(&num_memnodes, 1);
2420Sstevel@tonic-gate do {
2430Sstevel@tonic-gate oldmask = memnodes_mask;
2440Sstevel@tonic-gate newmask = memnodes_mask | (1ull << mnode);
2450Sstevel@tonic-gate } while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
2460Sstevel@tonic-gate
2470Sstevel@tonic-gate return (mnode);
2480Sstevel@tonic-gate }
2490Sstevel@tonic-gate
2500Sstevel@tonic-gate /*
2510Sstevel@tonic-gate * Find the intersection between a memnode and a memlist
2520Sstevel@tonic-gate * and returns the number of pages that overlap.
2530Sstevel@tonic-gate *
2540Sstevel@tonic-gate * Assumes the list is protected from DR operations by
2550Sstevel@tonic-gate * the memlist lock.
2560Sstevel@tonic-gate */
2570Sstevel@tonic-gate pgcnt_t
mem_node_memlist_pages(int mnode,struct memlist * mlist)2580Sstevel@tonic-gate mem_node_memlist_pages(int mnode, struct memlist *mlist)
2590Sstevel@tonic-gate {
2600Sstevel@tonic-gate pfn_t base, end;
2610Sstevel@tonic-gate pfn_t cur_base, cur_end;
2620Sstevel@tonic-gate pgcnt_t npgs;
2630Sstevel@tonic-gate struct memlist *pmem;
2640Sstevel@tonic-gate
2650Sstevel@tonic-gate base = mem_node_config[mnode].physbase;
2660Sstevel@tonic-gate end = mem_node_config[mnode].physmax;
2670Sstevel@tonic-gate npgs = 0;
2680Sstevel@tonic-gate
2690Sstevel@tonic-gate memlist_read_lock();
2700Sstevel@tonic-gate
27111474SJonathan.Adams@Sun.COM for (pmem = mlist; pmem; pmem = pmem->ml_next) {
27211474SJonathan.Adams@Sun.COM cur_base = btop(pmem->ml_address);
27311474SJonathan.Adams@Sun.COM cur_end = cur_base + btop(pmem->ml_size) - 1;
2744769Sdp78419 if (end < cur_base || base > cur_end)
2750Sstevel@tonic-gate continue;
2760Sstevel@tonic-gate npgs = npgs + (MIN(cur_end, end) -
2770Sstevel@tonic-gate MAX(cur_base, base)) + 1;
2780Sstevel@tonic-gate }
2790Sstevel@tonic-gate
2800Sstevel@tonic-gate memlist_read_unlock();
2810Sstevel@tonic-gate
2820Sstevel@tonic-gate return (npgs);
2830Sstevel@tonic-gate }
284