xref: /onnv-gate/usr/src/uts/sun4u/opl/io/dr_mem.c (revision 11474:857f9db4ef05)
11772Sjl139090 /*
21772Sjl139090  * CDDL HEADER START
31772Sjl139090  *
41772Sjl139090  * The contents of this file are subject to the terms of the
51772Sjl139090  * Common Development and Distribution License (the "License").
61772Sjl139090  * You may not use this file except in compliance with the License.
71772Sjl139090  *
81772Sjl139090  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91772Sjl139090  * or http://www.opensolaris.org/os/licensing.
101772Sjl139090  * See the License for the specific language governing permissions
111772Sjl139090  * and limitations under the License.
121772Sjl139090  *
131772Sjl139090  * When distributing Covered Code, include this CDDL HEADER in each
141772Sjl139090  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151772Sjl139090  * If applicable, add the following below this CDDL HEADER, with the
161772Sjl139090  * fields enclosed by brackets "[]" replaced with your own identifying
171772Sjl139090  * information: Portions Copyright [yyyy] [name of copyright owner]
181772Sjl139090  *
191772Sjl139090  * CDDL HEADER END
201772Sjl139090  */
211772Sjl139090 /*
22*11474SJonathan.Adams@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
231772Sjl139090  * Use is subject to license terms.
241772Sjl139090  */
251772Sjl139090 
261772Sjl139090 /*
271772Sjl139090  * DR memory support routines.
281772Sjl139090  */
291772Sjl139090 
301772Sjl139090 #include <sys/note.h>
311772Sjl139090 #include <sys/debug.h>
321772Sjl139090 #include <sys/types.h>
331772Sjl139090 #include <sys/errno.h>
341772Sjl139090 #include <sys/param.h>
351772Sjl139090 #include <sys/dditypes.h>
361772Sjl139090 #include <sys/kmem.h>
371772Sjl139090 #include <sys/conf.h>
381772Sjl139090 #include <sys/ddi.h>
391772Sjl139090 #include <sys/sunddi.h>
401772Sjl139090 #include <sys/sunndi.h>
411772Sjl139090 #include <sys/ddi_impldefs.h>
421772Sjl139090 #include <sys/ndi_impldefs.h>
431772Sjl139090 #include <sys/sysmacros.h>
441772Sjl139090 #include <sys/machsystm.h>
451772Sjl139090 #include <sys/spitregs.h>
461772Sjl139090 #include <sys/cpuvar.h>
471772Sjl139090 #include <sys/promif.h>
481772Sjl139090 #include <vm/seg_kmem.h>
491772Sjl139090 #include <sys/lgrp.h>
501772Sjl139090 #include <sys/platform_module.h>
511772Sjl139090 
521772Sjl139090 #include <vm/page.h>
531772Sjl139090 
541772Sjl139090 #include <sys/dr.h>
551772Sjl139090 #include <sys/dr_util.h>
561772Sjl139090 #include <sys/drmach.h>
573354Sjl139090 #include <sys/kobj.h>
581772Sjl139090 
591772Sjl139090 extern struct memlist	*phys_install;
603354Sjl139090 extern vnode_t		*retired_pages;
611772Sjl139090 
621772Sjl139090 /* TODO: push this reference below drmach line */
631772Sjl139090 extern int		kcage_on;
641772Sjl139090 
651772Sjl139090 /* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
663354Sjl139090 static char *dr_ie_fmt = "dr_mem.c %d";
671772Sjl139090 
681772Sjl139090 typedef enum {
691772Sjl139090 	DR_TP_INVALID = -1,
701772Sjl139090 	DR_TP_SAME,
711772Sjl139090 	DR_TP_LARGE,
721772Sjl139090 	DR_TP_NONRELOC,
731772Sjl139090 	DR_TP_FLOATING
741772Sjl139090 } dr_target_pref_t;
751772Sjl139090 
761772Sjl139090 static int		dr_post_detach_mem_unit(dr_mem_unit_t *mp);
771772Sjl139090 static int		dr_reserve_mem_spans(memhandle_t *mhp,
781772Sjl139090 				struct memlist *mlist);
791772Sjl139090 static int		dr_select_mem_target(dr_handle_t *hp,
801772Sjl139090 				dr_mem_unit_t *mp, struct memlist *ml);
811772Sjl139090 static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
821772Sjl139090 static struct memlist	*dr_memlist_del_retired_pages(struct memlist *ml);
831772Sjl139090 static dr_target_pref_t	dr_get_target_preference(dr_handle_t *hp,
841772Sjl139090 				dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
851772Sjl139090 				struct memlist *s_ml, struct memlist *x_ml,
861772Sjl139090 				struct memlist *b_ml);
871772Sjl139090 
881772Sjl139090 static int		memlist_canfit(struct memlist *s_mlist,
891772Sjl139090 				struct memlist *t_mlist);
901772Sjl139090 static int		dr_del_mlist_query(struct memlist *mlist,
911772Sjl139090 				memquery_t *mp);
921772Sjl139090 static struct memlist	*dr_get_copy_mlist(struct memlist *s_ml,
931772Sjl139090 				struct memlist *t_ml, dr_mem_unit_t *s_mp,
941772Sjl139090 				dr_mem_unit_t *t_mp);
951772Sjl139090 static struct memlist	*dr_get_nonreloc_mlist(struct memlist *s_ml,
961772Sjl139090 				dr_mem_unit_t *s_mp);
971772Sjl139090 static int		dr_memlist_canfit(struct memlist *s_mlist,
981772Sjl139090 				struct memlist *t_mlist, dr_mem_unit_t *s_mp,
991772Sjl139090 				dr_mem_unit_t *t_mp);
1001772Sjl139090 
1011772Sjl139090 /*
1021772Sjl139090  * dr_mem_unit_t.sbm_flags
1031772Sjl139090  */
1041772Sjl139090 #define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
1051772Sjl139090 #define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
1061772Sjl139090 #define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
1071772Sjl139090 #define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
1081772Sjl139090 #define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
1091772Sjl139090 
1101772Sjl139090 /* helper macros */
1111772Sjl139090 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
1121772Sjl139090 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
1131772Sjl139090 
1141772Sjl139090 static struct memlist *
dr_get_memlist(dr_mem_unit_t * mp)1151772Sjl139090 dr_get_memlist(dr_mem_unit_t *mp)
1161772Sjl139090 {
1171772Sjl139090 	struct memlist	*mlist = NULL;
1181772Sjl139090 	sbd_error_t	*err;
1191772Sjl139090 	static fn_t	f = "dr_get_memlist";
1201772Sjl139090 
1211772Sjl139090 	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
1221772Sjl139090 
1231772Sjl139090 	/*
1241772Sjl139090 	 * Return cached memlist, if present.
1251772Sjl139090 	 * This memlist will be present following an
1261772Sjl139090 	 * unconfigure (a.k.a: detach) of this memunit.
1271772Sjl139090 	 * It should only be used in the case were a configure
1281772Sjl139090 	 * is bringing this memunit back in without going
1291772Sjl139090 	 * through the disconnect and connect states.
1301772Sjl139090 	 */
1311772Sjl139090 	if (mp->sbm_mlist) {
1321772Sjl139090 		PR_MEM("%s: found cached memlist\n", f);
1331772Sjl139090 
1341772Sjl139090 		mlist = memlist_dup(mp->sbm_mlist);
1351772Sjl139090 	} else {
1361772Sjl139090 		uint64_t basepa = _ptob64(mp->sbm_basepfn);
1371772Sjl139090 
1381772Sjl139090 		/* attempt to construct a memlist using phys_install */
1391772Sjl139090 
1401772Sjl139090 		/* round down to slice base address */
1411772Sjl139090 		basepa &= ~(mp->sbm_slice_size - 1);
1421772Sjl139090 
1431772Sjl139090 		/* get a copy of phys_install to edit */
1441772Sjl139090 		memlist_read_lock();
1451772Sjl139090 		mlist = memlist_dup(phys_install);
1461772Sjl139090 		memlist_read_unlock();
1471772Sjl139090 
1481772Sjl139090 		/* trim lower irrelevant span */
1491772Sjl139090 		if (mlist)
1501772Sjl139090 			mlist = memlist_del_span(mlist, 0ull, basepa);
1511772Sjl139090 
1521772Sjl139090 		/* trim upper irrelevant span */
1531772Sjl139090 		if (mlist) {
1541772Sjl139090 			uint64_t endpa;
1551772Sjl139090 
1561772Sjl139090 			basepa += mp->sbm_slice_size;
1571772Sjl139090 			endpa = _ptob64(physmax + 1);
1581772Sjl139090 			if (endpa > basepa)
1591772Sjl139090 				mlist = memlist_del_span(
1605579Sjesusm 				    mlist, basepa,
1615579Sjesusm 				    endpa - basepa);
1621772Sjl139090 		}
1631772Sjl139090 
1641772Sjl139090 		if (mlist) {
1651772Sjl139090 			/* successfully built a memlist */
1661772Sjl139090 			PR_MEM("%s: derived memlist from phys_install\n", f);
1671772Sjl139090 		}
1681772Sjl139090 
1691772Sjl139090 		/* if no mlist yet, try platform layer */
1701772Sjl139090 		if (!mlist) {
1711772Sjl139090 			err = drmach_mem_get_memlist(
1725579Sjesusm 			    mp->sbm_cm.sbdev_id, &mlist);
1731772Sjl139090 			if (err) {
1741772Sjl139090 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1751772Sjl139090 				mlist = NULL; /* paranoia */
1761772Sjl139090 			}
1771772Sjl139090 		}
1781772Sjl139090 	}
1791772Sjl139090 
1801772Sjl139090 	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
1811772Sjl139090 	PR_MEMLIST_DUMP(mlist);
1821772Sjl139090 
1831772Sjl139090 	return (mlist);
1841772Sjl139090 }
1851772Sjl139090 
1861772Sjl139090 typedef struct {
1871772Sjl139090 	kcondvar_t cond;
1881772Sjl139090 	kmutex_t lock;
1891772Sjl139090 	int error;
1901772Sjl139090 	int done;
1911772Sjl139090 } dr_release_mem_sync_t;
1921772Sjl139090 
1931772Sjl139090 /*
1941772Sjl139090  * Memory has been logically removed by the time this routine is called.
1951772Sjl139090  */
1961772Sjl139090 static void
dr_mem_del_done(void * arg,int error)1971772Sjl139090 dr_mem_del_done(void *arg, int error)
1981772Sjl139090 {
1991772Sjl139090 	dr_release_mem_sync_t *ds = arg;
2001772Sjl139090 
2011772Sjl139090 	mutex_enter(&ds->lock);
2021772Sjl139090 	ds->error = error;
2031772Sjl139090 	ds->done = 1;
2041772Sjl139090 	cv_signal(&ds->cond);
2051772Sjl139090 	mutex_exit(&ds->lock);
2061772Sjl139090 }
2071772Sjl139090 
2081772Sjl139090 /*
2091772Sjl139090  * When we reach here the memory being drained should have
2101772Sjl139090  * already been reserved in dr_pre_release_mem().
2111772Sjl139090  * Our only task here is to kick off the "drain" and wait
2121772Sjl139090  * for it to finish.
2131772Sjl139090  */
2141772Sjl139090 void
dr_release_mem(dr_common_unit_t * cp)2151772Sjl139090 dr_release_mem(dr_common_unit_t *cp)
2161772Sjl139090 {
2171772Sjl139090 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
2181772Sjl139090 	int		err;
2191772Sjl139090 	dr_release_mem_sync_t rms;
2201772Sjl139090 	static fn_t	f = "dr_release_mem";
2211772Sjl139090 
2221772Sjl139090 	/* check that this memory unit has been reserved */
2231772Sjl139090 	if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
2241772Sjl139090 		DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
2251772Sjl139090 		return;
2261772Sjl139090 	}
2271772Sjl139090 
2281772Sjl139090 	bzero((void *) &rms, sizeof (rms));
2291772Sjl139090 
2301772Sjl139090 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
2311772Sjl139090 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
2321772Sjl139090 
2331772Sjl139090 	mutex_enter(&rms.lock);
2341772Sjl139090 	err = kphysm_del_start(mp->sbm_memhandle,
2355579Sjesusm 	    dr_mem_del_done, (void *) &rms);
2361772Sjl139090 	if (err == KPHYSM_OK) {
2371772Sjl139090 		/* wait for completion or interrupt */
2381772Sjl139090 		while (!rms.done) {
2391772Sjl139090 			if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
2401772Sjl139090 				/* then there is a pending UNIX signal */
2411772Sjl139090 				(void) kphysm_del_cancel(mp->sbm_memhandle);
2421772Sjl139090 
2431772Sjl139090 				/* wait for completion */
2441772Sjl139090 				while (!rms.done)
2451772Sjl139090 					cv_wait(&rms.cond, &rms.lock);
2461772Sjl139090 			}
2471772Sjl139090 		}
2481772Sjl139090 		/* get the result of the memory delete operation */
2491772Sjl139090 		err = rms.error;
2501772Sjl139090 	}
2511772Sjl139090 	mutex_exit(&rms.lock);
2521772Sjl139090 
2531772Sjl139090 	cv_destroy(&rms.cond);
2541772Sjl139090 	mutex_destroy(&rms.lock);
2551772Sjl139090 
2561772Sjl139090 	if (err != KPHYSM_OK) {
2571772Sjl139090 		int e_code;
2581772Sjl139090 
2591772Sjl139090 		switch (err) {
2601772Sjl139090 			case KPHYSM_ENOWORK:
2611772Sjl139090 				e_code = ESBD_NOERROR;
2621772Sjl139090 				break;
2631772Sjl139090 
2641772Sjl139090 			case KPHYSM_EHANDLE:
2651772Sjl139090 			case KPHYSM_ESEQUENCE:
2661772Sjl139090 				e_code = ESBD_INTERNAL;
2671772Sjl139090 				break;
2681772Sjl139090 
2691772Sjl139090 			case KPHYSM_ENOTVIABLE:
2701772Sjl139090 				e_code = ESBD_MEM_NOTVIABLE;
2711772Sjl139090 				break;
2721772Sjl139090 
2731772Sjl139090 			case KPHYSM_EREFUSED:
2741772Sjl139090 				e_code = ESBD_MEM_REFUSED;
2751772Sjl139090 				break;
2761772Sjl139090 
2771772Sjl139090 			case KPHYSM_ENONRELOC:
2781772Sjl139090 				e_code = ESBD_MEM_NONRELOC;
2791772Sjl139090 				break;
2801772Sjl139090 
2811772Sjl139090 			case KPHYSM_ECANCELLED:
2821772Sjl139090 				e_code = ESBD_MEM_CANCELLED;
2831772Sjl139090 				break;
2841772Sjl139090 
2851772Sjl139090 			case KPHYSM_ERESOURCE:
2861772Sjl139090 				e_code = ESBD_MEMFAIL;
2871772Sjl139090 				break;
2881772Sjl139090 
2891772Sjl139090 			default:
2901772Sjl139090 				cmn_err(CE_WARN,
2915579Sjesusm 				    "%s: unexpected kphysm error code %d,"
2925579Sjesusm 				    " id 0x%p",
2935579Sjesusm 				    f, err, mp->sbm_cm.sbdev_id);
2941772Sjl139090 
2951772Sjl139090 				e_code = ESBD_IO;
2961772Sjl139090 				break;
2971772Sjl139090 		}
2981772Sjl139090 
2991772Sjl139090 		if (e_code != ESBD_NOERROR) {
3003712Sbm42561 			dr_dev_err(CE_WARN, &mp->sbm_cm, e_code);
3011772Sjl139090 		}
3021772Sjl139090 	}
3031772Sjl139090 }
3041772Sjl139090 
3051772Sjl139090 void
dr_attach_mem(dr_handle_t * hp,dr_common_unit_t * cp)3061772Sjl139090 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
3071772Sjl139090 {
3081772Sjl139090 	_NOTE(ARGUNUSED(hp))
3091772Sjl139090 
3101772Sjl139090 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
3111772Sjl139090 	struct memlist	*ml, *mc;
3121772Sjl139090 	sbd_error_t	*err;
3131772Sjl139090 	static fn_t	f = "dr_attach_mem";
3141772Sjl139090 
3151772Sjl139090 	PR_MEM("%s...\n", f);
3161772Sjl139090 
3171772Sjl139090 	dr_lock_status(hp->h_bd);
3181772Sjl139090 	err = drmach_configure(cp->sbdev_id, 0);
3191772Sjl139090 	dr_unlock_status(hp->h_bd);
3201772Sjl139090 	if (err) {
3211772Sjl139090 		DRERR_SET_C(&cp->sbdev_error, &err);
3221772Sjl139090 		return;
3231772Sjl139090 	}
3241772Sjl139090 
3251772Sjl139090 	ml = dr_get_memlist(mp);
326*11474SJonathan.Adams@Sun.COM 	for (mc = ml; mc; mc = mc->ml_next) {
3271772Sjl139090 		int		 rv;
3281772Sjl139090 		sbd_error_t	*err;
3291772Sjl139090 
3301772Sjl139090 		rv = kphysm_add_memory_dynamic(
331*11474SJonathan.Adams@Sun.COM 		    (pfn_t)(mc->ml_address >> PAGESHIFT),
332*11474SJonathan.Adams@Sun.COM 		    (pgcnt_t)(mc->ml_size >> PAGESHIFT));
3331772Sjl139090 		if (rv != KPHYSM_OK) {
3341772Sjl139090 			/*
3351772Sjl139090 			 * translate kphysm error and
3361772Sjl139090 			 * store in devlist error
3371772Sjl139090 			 */
3381772Sjl139090 			switch (rv) {
3391772Sjl139090 			case KPHYSM_ERESOURCE:
3401772Sjl139090 				rv = ESBD_NOMEM;
3411772Sjl139090 				break;
3421772Sjl139090 
3431772Sjl139090 			case KPHYSM_EFAULT:
3441772Sjl139090 				rv = ESBD_FAULT;
3451772Sjl139090 				break;
3461772Sjl139090 
3471772Sjl139090 			default:
3481772Sjl139090 				rv = ESBD_INTERNAL;
3491772Sjl139090 				break;
3501772Sjl139090 			}
3511772Sjl139090 
3521772Sjl139090 			if (rv == ESBD_INTERNAL) {
3531772Sjl139090 				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
3541772Sjl139090 			} else
3551772Sjl139090 				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
3561772Sjl139090 			break;
3571772Sjl139090 		}
3581772Sjl139090 
3591772Sjl139090 		err = drmach_mem_add_span(
360*11474SJonathan.Adams@Sun.COM 		    mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
3611772Sjl139090 		if (err) {
3621772Sjl139090 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
3631772Sjl139090 			break;
3641772Sjl139090 		}
3651772Sjl139090 	}
3661772Sjl139090 
3671772Sjl139090 	memlist_delete(ml);
3681772Sjl139090 
3691772Sjl139090 	/* back out if configure failed */
3701772Sjl139090 	if (mp->sbm_cm.sbdev_error != NULL) {
3711772Sjl139090 		dr_lock_status(hp->h_bd);
3721772Sjl139090 		err = drmach_unconfigure(cp->sbdev_id, 0);
3731772Sjl139090 		if (err)
3741772Sjl139090 			sbd_err_clear(&err);
3751772Sjl139090 		dr_unlock_status(hp->h_bd);
3761772Sjl139090 	}
3771772Sjl139090 }
3781772Sjl139090 
3791772Sjl139090 static struct memlist *
dr_memlist_del_retired_pages(struct memlist * mlist)3801772Sjl139090 dr_memlist_del_retired_pages(struct memlist *mlist)
3811772Sjl139090 {
3821772Sjl139090 	page_t		*pp;
3831772Sjl139090 	pfn_t		pfn;
3841772Sjl139090 	kmutex_t	*vphm;
3853354Sjl139090 	vnode_t		*vp = retired_pages;
3861772Sjl139090 	static fn_t	f = "dr_memlist_del_retired_pages";
3871772Sjl139090 
3881772Sjl139090 	vphm = page_vnode_mutex(vp);
3891772Sjl139090 	mutex_enter(vphm);
3901772Sjl139090 
3911772Sjl139090 	PR_MEM("%s\n", f);
3921772Sjl139090 
3931772Sjl139090 	if ((pp = vp->v_pages) == NULL) {
3941772Sjl139090 		mutex_exit(vphm);
3951772Sjl139090 		return (mlist);
3961772Sjl139090 	}
3971772Sjl139090 
3981772Sjl139090 	do {
3991772Sjl139090 		ASSERT(pp != NULL);
4003354Sjl139090 		ASSERT(pp->p_vnode == retired_pages);
4011772Sjl139090 
4023658Sbm42561 		if (!page_try_reclaim_lock(pp, SE_SHARED, SE_RETIRED))
4031772Sjl139090 			continue;
4041772Sjl139090 
4051772Sjl139090 		pfn = page_pptonum(pp);
4061772Sjl139090 
4071772Sjl139090 		/*
4081772Sjl139090 		 * Page retirement currently breaks large pages into PAGESIZE
4091772Sjl139090 		 * pages. If this changes, need to remove the assert and deal
4101772Sjl139090 		 * with different page sizes.
4111772Sjl139090 		 */
4121772Sjl139090 		ASSERT(pp->p_szc == 0);
4131772Sjl139090 
4141772Sjl139090 		if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
4151772Sjl139090 			mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
4161772Sjl139090 			PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
4171772Sjl139090 			    "from memlist\n", ptob(pfn), pfn);
4181772Sjl139090 		}
4191772Sjl139090 
4201772Sjl139090 		page_unlock(pp);
4211772Sjl139090 	} while ((pp = pp->p_vpnext) != vp->v_pages);
4221772Sjl139090 
4231772Sjl139090 	mutex_exit(vphm);
4241772Sjl139090 
4251772Sjl139090 	return (mlist);
4261772Sjl139090 }
4271772Sjl139090 
4281772Sjl139090 static int
dr_move_memory(dr_handle_t * hp,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)4291772Sjl139090 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
4301772Sjl139090 {
4311772Sjl139090 	int		rv = -1;
4321772Sjl139090 	time_t		 copytime;
4331772Sjl139090 	drmachid_t	 cr_id;
4341772Sjl139090 	dr_sr_handle_t	*srhp = NULL;
4351772Sjl139090 	dr_board_t	*t_bp, *s_bp;
4361772Sjl139090 	struct memlist	*c_ml, *d_ml;
4371772Sjl139090 	sbd_error_t	*err;
4381772Sjl139090 	static fn_t	 f = "dr_move_memory";
4391772Sjl139090 
4401772Sjl139090 	PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
4415579Sjesusm 	    f,
4425579Sjesusm 	    s_mp->sbm_cm.sbdev_path,
4435579Sjesusm 	    t_mp->sbm_cm.sbdev_path);
4441772Sjl139090 
4451772Sjl139090 	ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
4461772Sjl139090 	ASSERT(s_mp->sbm_peer == t_mp);
4471772Sjl139090 	ASSERT(s_mp->sbm_mlist);
4481772Sjl139090 
4491772Sjl139090 	ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
4501772Sjl139090 	ASSERT(t_mp->sbm_peer == s_mp);
4511772Sjl139090 
4521772Sjl139090 	/*
4531772Sjl139090 	 * create a memlist of spans to copy by removing
4541772Sjl139090 	 * the spans that have been deleted, if any, from
4551772Sjl139090 	 * the full source board memlist.  s_mp->sbm_del_mlist
4561772Sjl139090 	 * will be NULL if there were no spans deleted from
4571772Sjl139090 	 * the source board.
4581772Sjl139090 	 */
4591772Sjl139090 	c_ml = memlist_dup(s_mp->sbm_mlist);
4601772Sjl139090 	d_ml = s_mp->sbm_del_mlist;
4611772Sjl139090 	while (d_ml != NULL) {
462*11474SJonathan.Adams@Sun.COM 		c_ml = memlist_del_span(c_ml, d_ml->ml_address, d_ml->ml_size);
463*11474SJonathan.Adams@Sun.COM 		d_ml = d_ml->ml_next;
4641772Sjl139090 	}
4651772Sjl139090 
4661772Sjl139090 	/*
4671772Sjl139090 	 * Remove retired pages from the copy list. The page content
4681772Sjl139090 	 * need not be copied since the pages are no longer in use.
4691772Sjl139090 	 */
4701772Sjl139090 	PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
4711772Sjl139090 	PR_MEMLIST_DUMP(c_ml);
4721772Sjl139090 
4731772Sjl139090 	c_ml = dr_memlist_del_retired_pages(c_ml);
4741772Sjl139090 
4751772Sjl139090 	PR_MEM("%s: copy list after removing retired pages:\n", f);
4761772Sjl139090 	PR_MEMLIST_DUMP(c_ml);
4771772Sjl139090 
4781772Sjl139090 	/*
4791772Sjl139090 	 * With parallel copy, it shouldn't make a difference which
4801772Sjl139090 	 * CPU is the actual master during copy-rename since all
4811772Sjl139090 	 * CPUs participate in the parallel copy anyway.
4821772Sjl139090 	 */
4831772Sjl139090 	affinity_set(CPU_CURRENT);
4841772Sjl139090 
4851772Sjl139090 	err = drmach_copy_rename_init(
4865579Sjesusm 	    t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
4871772Sjl139090 	if (err) {
4881772Sjl139090 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
4891772Sjl139090 		affinity_clear();
4901772Sjl139090 		memlist_delete(c_ml);
4911772Sjl139090 		return (-1);
4921772Sjl139090 	}
4931772Sjl139090 
4941772Sjl139090 	srhp = dr_get_sr_handle(hp);
4951772Sjl139090 	ASSERT(srhp);
4961772Sjl139090 
49711066Srafael.vanoni@sun.com 	copytime = ddi_get_lbolt();
4981772Sjl139090 
4991772Sjl139090 	/* Quiesce the OS.  */
5001772Sjl139090 	if (dr_suspend(srhp)) {
5011772Sjl139090 		cmn_err(CE_WARN, "%s: failed to quiesce OS"
5025579Sjesusm 		    " for copy-rename", f);
5031772Sjl139090 
5041772Sjl139090 		err = drmach_copy_rename_fini(cr_id);
5051772Sjl139090 		if (err) {
5061772Sjl139090 			/*
5071772Sjl139090 			 * no error is expected since the program has
5081772Sjl139090 			 * not yet run.
5091772Sjl139090 			 */
5101772Sjl139090 
5111772Sjl139090 			/* catch this in debug kernels */
5121772Sjl139090 			ASSERT(0);
5131772Sjl139090 
5141772Sjl139090 			sbd_err_clear(&err);
5151772Sjl139090 		}
5161772Sjl139090 
5171772Sjl139090 		/* suspend error reached via hp */
5181772Sjl139090 		s_mp->sbm_cm.sbdev_error = hp->h_err;
5191772Sjl139090 		hp->h_err = NULL;
5201772Sjl139090 		goto done;
5211772Sjl139090 	}
5221772Sjl139090 
5231772Sjl139090 	drmach_copy_rename(cr_id);
5241772Sjl139090 
5251772Sjl139090 	/* Resume the OS.  */
5261772Sjl139090 	dr_resume(srhp);
5271772Sjl139090 
52811066Srafael.vanoni@sun.com 	copytime = ddi_get_lbolt() - copytime;
5291772Sjl139090 
5301772Sjl139090 	if (err = drmach_copy_rename_fini(cr_id))
5311772Sjl139090 		goto done;
5321772Sjl139090 
5331772Sjl139090 	/*
5341772Sjl139090 	 * Rename memory for lgroup.
5351772Sjl139090 	 * Source and target board numbers are packaged in arg.
5361772Sjl139090 	 */
5371772Sjl139090 	s_bp = s_mp->sbm_cm.sbdev_bp;
5381772Sjl139090 	t_bp = t_mp->sbm_cm.sbdev_bp;
5391772Sjl139090 
5401772Sjl139090 	lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
5415579Sjesusm 	    (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
5421772Sjl139090 
5431772Sjl139090 
5441772Sjl139090 	PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
5455579Sjesusm 	    f, copytime, copytime / hz);
5461772Sjl139090 
5471772Sjl139090 	rv = 0;
5481772Sjl139090 done:
5491772Sjl139090 	if (srhp)
5501772Sjl139090 		dr_release_sr_handle(srhp);
5511772Sjl139090 	if (err)
5521772Sjl139090 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
5531772Sjl139090 	affinity_clear();
5541772Sjl139090 
5551772Sjl139090 	return (rv);
5561772Sjl139090 }
5571772Sjl139090 
5581772Sjl139090 /*
5591772Sjl139090  * If detaching node contains memory that is "non-permanent"
5601772Sjl139090  * then the memory adr's are simply cleared.  If the memory
5611772Sjl139090  * is non-relocatable, then do a copy-rename.
5621772Sjl139090  */
5631772Sjl139090 void
dr_detach_mem(dr_handle_t * hp,dr_common_unit_t * cp)5641772Sjl139090 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
5651772Sjl139090 {
5661772Sjl139090 	int			rv = 0;
5671772Sjl139090 	dr_mem_unit_t		*s_mp = (dr_mem_unit_t *)cp;
5681772Sjl139090 	dr_mem_unit_t		*t_mp;
5691772Sjl139090 	dr_state_t		state;
5701772Sjl139090 	static fn_t		f = "dr_detach_mem";
5711772Sjl139090 
5721772Sjl139090 	PR_MEM("%s...\n", f);
5731772Sjl139090 
5741772Sjl139090 	/* lookup target mem unit and target board structure, if any */
5751772Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
5761772Sjl139090 		t_mp = s_mp->sbm_peer;
5771772Sjl139090 		ASSERT(t_mp != NULL);
5781772Sjl139090 		ASSERT(t_mp->sbm_peer == s_mp);
5791772Sjl139090 	} else {
5801772Sjl139090 		t_mp = NULL;
5811772Sjl139090 	}
5821772Sjl139090 
5831772Sjl139090 	/* verify mem unit's state is UNREFERENCED */
5841772Sjl139090 	state = s_mp->sbm_cm.sbdev_state;
5851772Sjl139090 	if (state != DR_STATE_UNREFERENCED) {
5861772Sjl139090 		dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
5871772Sjl139090 		return;
5881772Sjl139090 	}
5891772Sjl139090 
5901772Sjl139090 	/* verify target mem unit's state is UNREFERENCED, if any */
5911772Sjl139090 	if (t_mp != NULL) {
5921772Sjl139090 		state = t_mp->sbm_cm.sbdev_state;
5931772Sjl139090 		if (state != DR_STATE_UNREFERENCED) {
5941772Sjl139090 			dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
5951772Sjl139090 			return;
5961772Sjl139090 		}
5971772Sjl139090 	}
5981772Sjl139090 
5991772Sjl139090 	/*
6001772Sjl139090 	 * If there is no target board (no copy/rename was needed), then
6011772Sjl139090 	 * we're done!
6021772Sjl139090 	 */
6031772Sjl139090 	if (t_mp == NULL) {
6041772Sjl139090 		sbd_error_t *err;
6051772Sjl139090 		/*
6061772Sjl139090 		 * Reprogram interconnect hardware and disable
6071772Sjl139090 		 * memory controllers for memory node that's going away.
6081772Sjl139090 		 */
6091772Sjl139090 
6101772Sjl139090 		err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
6111772Sjl139090 		if (err) {
6121772Sjl139090 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
6131772Sjl139090 			rv = -1;
6141772Sjl139090 		}
6151772Sjl139090 	} else {
6161772Sjl139090 		rv = dr_move_memory(hp, s_mp, t_mp);
6171772Sjl139090 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
6185579Sjesusm 		    f,
6195579Sjesusm 		    rv ? "FAILED" : "COMPLETED",
6205579Sjesusm 		    s_mp->sbm_cm.sbdev_bp->b_num,
6215579Sjesusm 		    t_mp->sbm_cm.sbdev_bp->b_num);
6221772Sjl139090 
6231772Sjl139090 		if (rv != 0)
6241772Sjl139090 			(void) dr_cancel_mem(s_mp);
6251772Sjl139090 	}
6261772Sjl139090 
6271772Sjl139090 	if (rv == 0) {
6281772Sjl139090 		sbd_error_t *err;
6291772Sjl139090 
6301772Sjl139090 		dr_lock_status(hp->h_bd);
6311772Sjl139090 		err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
6321772Sjl139090 		dr_unlock_status(hp->h_bd);
6331772Sjl139090 		if (err)
6341772Sjl139090 			sbd_err_clear(&err);
6351772Sjl139090 	}
6361772Sjl139090 }
6371772Sjl139090 
6381772Sjl139090 /*
6391772Sjl139090  * This routine acts as a wrapper for kphysm_del_span_query in order to
6401772Sjl139090  * support potential memory holes in a board's physical address space.
6411772Sjl139090  * It calls kphysm_del_span_query for each node in a memlist and accumulates
6421772Sjl139090  * the results in *mp.
6431772Sjl139090  */
6441772Sjl139090 static int
dr_del_mlist_query(struct memlist * mlist,memquery_t * mp)6451772Sjl139090 dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
6461772Sjl139090 {
6471772Sjl139090 	struct memlist	*ml;
6481772Sjl139090 	int		 rv = 0;
6491772Sjl139090 
6501772Sjl139090 
6511772Sjl139090 	if (mlist == NULL)
6521772Sjl139090 		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
6531772Sjl139090 
6541772Sjl139090 	mp->phys_pages = 0;
6551772Sjl139090 	mp->managed = 0;
6561772Sjl139090 	mp->nonrelocatable = 0;
6571772Sjl139090 	mp->first_nonrelocatable = (pfn_t)-1;	/* XXX */
6581772Sjl139090 	mp->last_nonrelocatable = 0;
6591772Sjl139090 
660*11474SJonathan.Adams@Sun.COM 	for (ml = mlist; ml; ml = ml->ml_next) {
6611772Sjl139090 		memquery_t mq;
6621772Sjl139090 
6631772Sjl139090 		rv = kphysm_del_span_query(
664*11474SJonathan.Adams@Sun.COM 		    _b64top(ml->ml_address), _b64top(ml->ml_size), &mq);
6651772Sjl139090 		if (rv)
6661772Sjl139090 			break;
6671772Sjl139090 
6681772Sjl139090 		mp->phys_pages += mq.phys_pages;
6691772Sjl139090 		mp->managed += mq.managed;
6701772Sjl139090 		mp->nonrelocatable += mq.nonrelocatable;
6711772Sjl139090 
6721772Sjl139090 		if (mq.nonrelocatable != 0) {
6731772Sjl139090 			if (mq.first_nonrelocatable < mp->first_nonrelocatable)
6741772Sjl139090 				mp->first_nonrelocatable =
6755579Sjesusm 				    mq.first_nonrelocatable;
6761772Sjl139090 			if (mq.last_nonrelocatable > mp->last_nonrelocatable)
6771772Sjl139090 				mp->last_nonrelocatable =
6785579Sjesusm 				    mq.last_nonrelocatable;
6791772Sjl139090 		}
6801772Sjl139090 	}
6811772Sjl139090 
6821772Sjl139090 	if (mp->nonrelocatable == 0)
6831772Sjl139090 		mp->first_nonrelocatable = 0;	/* XXX */
6841772Sjl139090 
6851772Sjl139090 	return (rv);
6861772Sjl139090 }
6871772Sjl139090 
6881772Sjl139090 /*
6891772Sjl139090  * NOTE: This routine is only partially smart about multiple
6901772Sjl139090  *	 mem-units.  Need to make mem-status structure smart
6911772Sjl139090  *	 about them also.
6921772Sjl139090  */
6931772Sjl139090 int
dr_mem_status(dr_handle_t * hp,dr_devset_t devset,sbd_dev_stat_t * dsp)6941772Sjl139090 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
6951772Sjl139090 {
6961772Sjl139090 	int		m, mix;
6971772Sjl139090 	memdelstat_t	mdst;
6981772Sjl139090 	memquery_t	mq;
6991772Sjl139090 	dr_board_t	*bp;
7001772Sjl139090 	dr_mem_unit_t	*mp;
7011772Sjl139090 	sbd_mem_stat_t	*msp;
7021772Sjl139090 	static fn_t	f = "dr_mem_status";
7031772Sjl139090 
7041772Sjl139090 	bp = hp->h_bd;
7051772Sjl139090 	devset &= DR_DEVS_PRESENT(bp);
7061772Sjl139090 
7071772Sjl139090 	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
7081772Sjl139090 		int		rv;
7091772Sjl139090 		sbd_error_t	*err;
7101772Sjl139090 		drmach_status_t	 pstat;
7111772Sjl139090 		dr_mem_unit_t	*p_mp;
7121772Sjl139090 
7131772Sjl139090 		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
7141772Sjl139090 			continue;
7151772Sjl139090 
7161772Sjl139090 		mp = dr_get_mem_unit(bp, m);
7171772Sjl139090 
7181772Sjl139090 		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
7191772Sjl139090 			/* present, but not fully initialized */
7201772Sjl139090 			continue;
7211772Sjl139090 		}
7221772Sjl139090 
7231772Sjl139090 		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
7241772Sjl139090 			continue;
7251772Sjl139090 
7261772Sjl139090 		/* fetch platform status */
7271772Sjl139090 		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
7281772Sjl139090 		if (err) {
7291772Sjl139090 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
7301772Sjl139090 			continue;
7311772Sjl139090 		}
7321772Sjl139090 
7331772Sjl139090 		msp = &dsp->d_mem;
7341772Sjl139090 		bzero((caddr_t)msp, sizeof (*msp));
7351772Sjl139090 
73611311SSurya.Prakki@Sun.COM 		(void) strncpy(msp->ms_cm.c_id.c_name, pstat.type,
7375579Sjesusm 		    sizeof (msp->ms_cm.c_id.c_name));
7381772Sjl139090 		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
7391772Sjl139090 		msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
7401772Sjl139090 		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
7411772Sjl139090 		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
7421772Sjl139090 		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
7431772Sjl139090 		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
7441772Sjl139090 
7451772Sjl139090 		msp->ms_totpages = mp->sbm_npages;
7461772Sjl139090 		msp->ms_basepfn = mp->sbm_basepfn;
7471772Sjl139090 		msp->ms_pageslost = mp->sbm_pageslost;
7481772Sjl139090 		msp->ms_cage_enabled = kcage_on;
7491772Sjl139090 
7501772Sjl139090 		if (mp->sbm_flags & DR_MFLAG_RESERVED)
7511772Sjl139090 			p_mp = mp->sbm_peer;
7521772Sjl139090 		else
7531772Sjl139090 			p_mp = NULL;
7541772Sjl139090 
7551772Sjl139090 		if (p_mp == NULL) {
7561772Sjl139090 			msp->ms_peer_is_target = 0;
7571772Sjl139090 			msp->ms_peer_ap_id[0] = '\0';
7581772Sjl139090 		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
7591772Sjl139090 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7601772Sjl139090 			char *minor;
7611772Sjl139090 
7621772Sjl139090 			/*
7631772Sjl139090 			 * b_dip doesn't have to be held for ddi_pathname()
7641772Sjl139090 			 * because the board struct (dr_board_t) will be
7651772Sjl139090 			 * destroyed before b_dip detaches.
7661772Sjl139090 			 */
7671772Sjl139090 			(void) ddi_pathname(bp->b_dip, path);
7681772Sjl139090 			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
7691772Sjl139090 
77011311SSurya.Prakki@Sun.COM 			(void) snprintf(msp->ms_peer_ap_id,
7711772Sjl139090 			    sizeof (msp->ms_peer_ap_id), "%s%s",
7721772Sjl139090 			    path, (minor == NULL) ? "" : minor);
7731772Sjl139090 
7741772Sjl139090 			kmem_free(path, MAXPATHLEN);
7751772Sjl139090 
7761772Sjl139090 			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
7771772Sjl139090 				msp->ms_peer_is_target = 1;
7781772Sjl139090 		}
7791772Sjl139090 
7801772Sjl139090 		if (mp->sbm_flags & DR_MFLAG_RELOWNER)
7811772Sjl139090 			rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
7821772Sjl139090 		else
7831772Sjl139090 			rv = KPHYSM_EHANDLE;	/* force 'if' to fail */
7841772Sjl139090 
7851772Sjl139090 		if (rv == KPHYSM_OK) {
7861772Sjl139090 			/*
7871772Sjl139090 			 * Any pages above managed is "free",
7881772Sjl139090 			 * i.e. it's collected.
7891772Sjl139090 			 */
7901772Sjl139090 			msp->ms_detpages += (uint_t)(mdst.collected +
7911772Sjl139090 			    mdst.phys_pages - mdst.managed);
7921772Sjl139090 		} else {
7931772Sjl139090 			/*
7941772Sjl139090 			 * If we're UNREFERENCED or UNCONFIGURED,
7951772Sjl139090 			 * then the number of detached pages is
7961772Sjl139090 			 * however many pages are on the board.
7971772Sjl139090 			 * I.e. detached = not in use by OS.
7981772Sjl139090 			 */
7991772Sjl139090 			switch (msp->ms_cm.c_ostate) {
8001772Sjl139090 			/*
8011772Sjl139090 			 * changed to use cfgadm states
8021772Sjl139090 			 *
8031772Sjl139090 			 * was:
8041772Sjl139090 			 *	case DR_STATE_UNREFERENCED:
8051772Sjl139090 			 *	case DR_STATE_UNCONFIGURED:
8061772Sjl139090 			 */
8071772Sjl139090 			case SBD_STAT_UNCONFIGURED:
8081772Sjl139090 				msp->ms_detpages = msp->ms_totpages;
8091772Sjl139090 				break;
8101772Sjl139090 
8111772Sjl139090 			default:
8121772Sjl139090 				break;
8131772Sjl139090 			}
8141772Sjl139090 		}
8151772Sjl139090 
8161772Sjl139090 		/*
8171772Sjl139090 		 * kphysm_del_span_query can report non-reloc pages = total
8181772Sjl139090 		 * pages for memory that is not yet configured
8191772Sjl139090 		 */
8201772Sjl139090 		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
8211772Sjl139090 			struct memlist *ml;
8221772Sjl139090 
8231772Sjl139090 			ml = dr_get_memlist(mp);
8241772Sjl139090 			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
8251772Sjl139090 			memlist_delete(ml);
8261772Sjl139090 
8271772Sjl139090 			if (rv == KPHYSM_OK) {
8281772Sjl139090 				msp->ms_managed_pages = mq.managed;
8291772Sjl139090 				msp->ms_noreloc_pages = mq.nonrelocatable;
8301772Sjl139090 				msp->ms_noreloc_first =
8311772Sjl139090 				    mq.first_nonrelocatable;
8321772Sjl139090 				msp->ms_noreloc_last =
8331772Sjl139090 				    mq.last_nonrelocatable;
8341772Sjl139090 				msp->ms_cm.c_sflags = 0;
8351772Sjl139090 				if (mq.nonrelocatable) {
8361772Sjl139090 					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
8371772Sjl139090 					    msp->ms_cm.c_sflags);
8381772Sjl139090 				}
8391772Sjl139090 			} else {
8401772Sjl139090 				PR_MEM("%s: kphysm_del_span_query() = %d\n",
8411772Sjl139090 				    f, rv);
8421772Sjl139090 			}
8431772Sjl139090 		}
8441772Sjl139090 
8451772Sjl139090 		/*
8461772Sjl139090 		 * Check source unit state during copy-rename
8471772Sjl139090 		 */
8481772Sjl139090 		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
8491772Sjl139090 		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
8501772Sjl139090 		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
8511772Sjl139090 			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
8521772Sjl139090 
8531772Sjl139090 		mix++;
8541772Sjl139090 		dsp++;
8551772Sjl139090 	}
8561772Sjl139090 
8571772Sjl139090 	return (mix);
8581772Sjl139090 }
8591772Sjl139090 
8601772Sjl139090 int
dr_pre_attach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)8611772Sjl139090 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
8621772Sjl139090 {
8631772Sjl139090 	_NOTE(ARGUNUSED(hp))
8641772Sjl139090 
8651772Sjl139090 	int		err_flag = 0;
8661772Sjl139090 	int		d;
8671772Sjl139090 	sbd_error_t	*err;
8681772Sjl139090 	static fn_t	f = "dr_pre_attach_mem";
8691772Sjl139090 
8701772Sjl139090 	PR_MEM("%s...\n", f);
8711772Sjl139090 
8721772Sjl139090 	for (d = 0; d < devnum; d++) {
8731772Sjl139090 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
8741772Sjl139090 		dr_state_t	state;
8751772Sjl139090 
8761772Sjl139090 		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
8771772Sjl139090 
8781772Sjl139090 		state = mp->sbm_cm.sbdev_state;
8791772Sjl139090 		switch (state) {
8801772Sjl139090 		case DR_STATE_UNCONFIGURED:
8811772Sjl139090 			PR_MEM("%s: recovering from UNCONFIG for %s\n",
8825579Sjesusm 			    f,
8835579Sjesusm 			    mp->sbm_cm.sbdev_path);
8841772Sjl139090 
8851772Sjl139090 			/* use memlist cached by dr_post_detach_mem_unit */
8861772Sjl139090 			ASSERT(mp->sbm_mlist != NULL);
8871772Sjl139090 			PR_MEM("%s: re-configuring cached memlist for %s:\n",
8885579Sjesusm 			    f, mp->sbm_cm.sbdev_path);
8891772Sjl139090 			PR_MEMLIST_DUMP(mp->sbm_mlist);
8901772Sjl139090 
8911772Sjl139090 			/* kphysm del handle should be have been freed */
8921772Sjl139090 			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
8931772Sjl139090 
8941772Sjl139090 			/*FALLTHROUGH*/
8951772Sjl139090 
8961772Sjl139090 		case DR_STATE_CONNECTED:
8971772Sjl139090 			PR_MEM("%s: reprogramming mem hardware on %s\n",
8985579Sjesusm 			    f, mp->sbm_cm.sbdev_bp->b_path);
8991772Sjl139090 
9001772Sjl139090 			PR_MEM("%s: enabling %s\n",
9015579Sjesusm 			    f, mp->sbm_cm.sbdev_path);
9021772Sjl139090 
9031772Sjl139090 			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
9041772Sjl139090 			if (err) {
9051772Sjl139090 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
9061772Sjl139090 				err_flag = 1;
9071772Sjl139090 			}
9081772Sjl139090 			break;
9091772Sjl139090 
9101772Sjl139090 		default:
9111772Sjl139090 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
9121772Sjl139090 			err_flag = 1;
9131772Sjl139090 			break;
9141772Sjl139090 		}
9151772Sjl139090 
9161772Sjl139090 		/* exit for loop if error encountered */
9171772Sjl139090 		if (err_flag)
9181772Sjl139090 			break;
9191772Sjl139090 	}
9201772Sjl139090 
9211772Sjl139090 	return (err_flag ? -1 : 0);
9221772Sjl139090 }
9231772Sjl139090 
9243354Sjl139090 static void
dr_update_mc_memory()9253354Sjl139090 dr_update_mc_memory()
9263354Sjl139090 {
9273354Sjl139090 	void		(*mc_update_mlist)(void);
9283354Sjl139090 
9293354Sjl139090 	/*
9303354Sjl139090 	 * mc-opl is configured during drmach_mem_new but the memory
9313354Sjl139090 	 * has not been added to phys_install at that time.
9323354Sjl139090 	 * we must inform mc-opl to update the mlist after we
9333354Sjl139090 	 * attach or detach a system board.
9343354Sjl139090 	 */
9353354Sjl139090 
9363354Sjl139090 	mc_update_mlist = (void (*)(void))
9373354Sjl139090 	    modgetsymvalue("opl_mc_update_mlist", 0);
9383354Sjl139090 
9393354Sjl139090 	if (mc_update_mlist != NULL) {
9403354Sjl139090 		(*mc_update_mlist)();
9413354Sjl139090 	}
9423354Sjl139090 }
9433354Sjl139090 
9441772Sjl139090 int
dr_post_attach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)9451772Sjl139090 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
9461772Sjl139090 {
9471772Sjl139090 	_NOTE(ARGUNUSED(hp))
9481772Sjl139090 
9491772Sjl139090 	int		d;
9501772Sjl139090 	static fn_t	f = "dr_post_attach_mem";
9511772Sjl139090 
9521772Sjl139090 	PR_MEM("%s...\n", f);
9531772Sjl139090 
9541772Sjl139090 	for (d = 0; d < devnum; d++) {
9551772Sjl139090 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
9561772Sjl139090 		struct memlist	*mlist, *ml;
9571772Sjl139090 
9581772Sjl139090 		mlist = dr_get_memlist(mp);
9591772Sjl139090 		if (mlist == NULL) {
9603354Sjl139090 			/* OPL supports memoryless board */
9611772Sjl139090 			continue;
9621772Sjl139090 		}
9631772Sjl139090 
9641772Sjl139090 		/*
9651772Sjl139090 		 * Verify the memory really did successfully attach
9661772Sjl139090 		 * by checking for its existence in phys_install.
9671772Sjl139090 		 */
9681772Sjl139090 		memlist_read_lock();
9691772Sjl139090 		if (memlist_intersect(phys_install, mlist) == 0) {
9701772Sjl139090 			memlist_read_unlock();
9711772Sjl139090 
9721772Sjl139090 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
9731772Sjl139090 
9741772Sjl139090 			PR_MEM("%s: %s memlist not in phys_install",
9755579Sjesusm 			    f, mp->sbm_cm.sbdev_path);
9761772Sjl139090 
9771772Sjl139090 			memlist_delete(mlist);
9781772Sjl139090 			continue;
9791772Sjl139090 		}
9801772Sjl139090 		memlist_read_unlock();
9811772Sjl139090 
982*11474SJonathan.Adams@Sun.COM 		for (ml = mlist; ml != NULL; ml = ml->ml_next) {
9831772Sjl139090 			sbd_error_t *err;
9841772Sjl139090 
9851772Sjl139090 			err = drmach_mem_add_span(
9865579Sjesusm 			    mp->sbm_cm.sbdev_id,
987*11474SJonathan.Adams@Sun.COM 			    ml->ml_address,
988*11474SJonathan.Adams@Sun.COM 			    ml->ml_size);
9891772Sjl139090 			if (err)
9901772Sjl139090 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
9911772Sjl139090 		}
9921772Sjl139090 
9931772Sjl139090 		memlist_delete(mlist);
9941772Sjl139090 
9951772Sjl139090 		/*
9961772Sjl139090 		 * Destroy cached memlist, if any.
9971772Sjl139090 		 * There will be a cached memlist in sbm_mlist if
9981772Sjl139090 		 * this board is being configured directly after
9991772Sjl139090 		 * an unconfigure.
10001772Sjl139090 		 * To support this transition, dr_post_detach_mem
10011772Sjl139090 		 * left a copy of the last known memlist in sbm_mlist.
10021772Sjl139090 		 * This memlist could differ from any derived from
10031772Sjl139090 		 * hardware if while this memunit was last configured
10041772Sjl139090 		 * the system detected and deleted bad pages from
10051772Sjl139090 		 * phys_install.  The location of those bad pages
10061772Sjl139090 		 * will be reflected in the cached memlist.
10071772Sjl139090 		 */
10081772Sjl139090 		if (mp->sbm_mlist) {
10091772Sjl139090 			memlist_delete(mp->sbm_mlist);
10101772Sjl139090 			mp->sbm_mlist = NULL;
10111772Sjl139090 		}
10121772Sjl139090 	}
10131772Sjl139090 
10143354Sjl139090 	dr_update_mc_memory();
10153354Sjl139090 
10161772Sjl139090 	return (0);
10171772Sjl139090 }
10181772Sjl139090 
10191772Sjl139090 int
dr_pre_detach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)10201772Sjl139090 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
10211772Sjl139090 {
10221772Sjl139090 	_NOTE(ARGUNUSED(hp))
10231772Sjl139090 
10241772Sjl139090 	int d;
10251772Sjl139090 
10261772Sjl139090 	for (d = 0; d < devnum; d++) {
10271772Sjl139090 		dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
10281772Sjl139090 
10291772Sjl139090 		cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
10301772Sjl139090 	}
10311772Sjl139090 
10321772Sjl139090 	return (0);
10331772Sjl139090 }
10341772Sjl139090 
10351772Sjl139090 int
dr_post_detach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)10361772Sjl139090 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
10371772Sjl139090 {
10381772Sjl139090 	_NOTE(ARGUNUSED(hp))
10391772Sjl139090 
10401772Sjl139090 	int		d, rv;
10411772Sjl139090 	static fn_t	f = "dr_post_detach_mem";
10421772Sjl139090 
10431772Sjl139090 	PR_MEM("%s...\n", f);
10441772Sjl139090 
10451772Sjl139090 	rv = 0;
10461772Sjl139090 	for (d = 0; d < devnum; d++) {
10471772Sjl139090 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
10481772Sjl139090 
10491772Sjl139090 		ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
10501772Sjl139090 
10511772Sjl139090 		if (dr_post_detach_mem_unit(mp))
10521772Sjl139090 			rv = -1;
10531772Sjl139090 	}
10543354Sjl139090 	dr_update_mc_memory();
10551772Sjl139090 
10561772Sjl139090 	return (rv);
10571772Sjl139090 }
10581772Sjl139090 
10591772Sjl139090 static void
dr_add_memory_spans(dr_mem_unit_t * mp,struct memlist * ml)10601772Sjl139090 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
10611772Sjl139090 {
10621772Sjl139090 	static fn_t	f = "dr_add_memory_spans";
10631772Sjl139090 
10641772Sjl139090 	PR_MEM("%s...", f);
10651772Sjl139090 	PR_MEMLIST_DUMP(ml);
10661772Sjl139090 
10671772Sjl139090 #ifdef DEBUG
10681772Sjl139090 	memlist_read_lock();
10691772Sjl139090 	if (memlist_intersect(phys_install, ml)) {
10701772Sjl139090 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
10711772Sjl139090 	}
10721772Sjl139090 	memlist_read_unlock();
10731772Sjl139090 #endif
10741772Sjl139090 
1075*11474SJonathan.Adams@Sun.COM 	for (; ml; ml = ml->ml_next) {
10761772Sjl139090 		pfn_t		 base;
10771772Sjl139090 		pgcnt_t		 npgs;
10781772Sjl139090 		int		 rv;
10791772Sjl139090 		sbd_error_t	*err;
10801772Sjl139090 
1081*11474SJonathan.Adams@Sun.COM 		base = _b64top(ml->ml_address);
1082*11474SJonathan.Adams@Sun.COM 		npgs = _b64top(ml->ml_size);
10831772Sjl139090 
10841772Sjl139090 		rv = kphysm_add_memory_dynamic(base, npgs);
10851772Sjl139090 
10861772Sjl139090 		err = drmach_mem_add_span(
10875579Sjesusm 		    mp->sbm_cm.sbdev_id,
1088*11474SJonathan.Adams@Sun.COM 		    ml->ml_address,
1089*11474SJonathan.Adams@Sun.COM 		    ml->ml_size);
10901772Sjl139090 
10911772Sjl139090 		if (err)
10921772Sjl139090 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
10931772Sjl139090 
10941772Sjl139090 		if (rv != KPHYSM_OK) {
10951772Sjl139090 			cmn_err(CE_WARN, "%s:"
10965579Sjesusm 			    " unexpected kphysm_add_memory_dynamic"
10975579Sjesusm 			    " return value %d;"
10985579Sjesusm 			    " basepfn=0x%lx, npages=%ld\n",
10995579Sjesusm 			    f, rv, base, npgs);
11001772Sjl139090 
11011772Sjl139090 			continue;
11021772Sjl139090 		}
11031772Sjl139090 	}
11041772Sjl139090 }
11051772Sjl139090 
11061772Sjl139090 static int
memlist_touch(struct memlist * ml,uint64_t add)11073354Sjl139090 memlist_touch(struct memlist *ml, uint64_t add)
11083354Sjl139090 {
11093354Sjl139090 	while (ml != NULL) {
1110*11474SJonathan.Adams@Sun.COM 		if ((add == ml->ml_address) ||
1111*11474SJonathan.Adams@Sun.COM 		    (add == (ml->ml_address + ml->ml_size)))
11123354Sjl139090 			return (1);
1113*11474SJonathan.Adams@Sun.COM 		ml = ml->ml_next;
11143354Sjl139090 	}
11153354Sjl139090 	return (0);
11163354Sjl139090 }
11173354Sjl139090 
11183354Sjl139090 static sbd_error_t *
dr_process_excess_mlist(dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp,struct memlist * t_excess_mlist)11193354Sjl139090 dr_process_excess_mlist(dr_mem_unit_t *s_mp,
11203354Sjl139090 	dr_mem_unit_t *t_mp, struct memlist *t_excess_mlist)
11213354Sjl139090 {
11223354Sjl139090 	struct memlist	*ml;
11233354Sjl139090 	sbd_error_t	*err;
11243354Sjl139090 	static fn_t	f = "dr_process_excess_mlist";
11253354Sjl139090 	uint64_t	new_pa, nbytes;
11263354Sjl139090 	int rv;
11273354Sjl139090 
11283354Sjl139090 	err = NULL;
11293354Sjl139090 
11303354Sjl139090 	/*
11313354Sjl139090 	 * After the small <-> big copy-rename,
11323354Sjl139090 	 * the original address space for the
11333354Sjl139090 	 * source board may have excess to be
11343354Sjl139090 	 * deleted. This is a case different
11353354Sjl139090 	 * from the big->small excess source
11363354Sjl139090 	 * memory case listed below.
11373354Sjl139090 	 * Remove s_mp->sbm_del_mlist from
11383354Sjl139090 	 * the kernel cage glist.
11393354Sjl139090 	 */
11403354Sjl139090 	for (ml = s_mp->sbm_del_mlist; ml;
1141*11474SJonathan.Adams@Sun.COM 	    ml = ml->ml_next) {
11423354Sjl139090 		PR_MEM("%s: delete small<->big copy-"
11433354Sjl139090 		    "rename source excess memory", f);
11443354Sjl139090 		PR_MEMLIST_DUMP(ml);
11453354Sjl139090 
11463354Sjl139090 		err = drmach_mem_del_span(
11475579Sjesusm 		    s_mp->sbm_cm.sbdev_id,
1148*11474SJonathan.Adams@Sun.COM 		    ml->ml_address, ml->ml_size);
11493354Sjl139090 		if (err)
11503354Sjl139090 			DRERR_SET_C(&s_mp->
11513354Sjl139090 			    sbm_cm.sbdev_error, &err);
11523354Sjl139090 		ASSERT(err == NULL);
11533354Sjl139090 	}
11543354Sjl139090 
11553354Sjl139090 	PR_MEM("%s: adding back remaining portion"
11565579Sjesusm 	    " of %s, memlist:\n",
11575579Sjesusm 	    f, t_mp->sbm_cm.sbdev_path);
11583354Sjl139090 	PR_MEMLIST_DUMP(t_excess_mlist);
11593354Sjl139090 
1160*11474SJonathan.Adams@Sun.COM 	for (ml = t_excess_mlist; ml; ml = ml->ml_next) {
11615579Sjesusm 		struct memlist ml0;
11625579Sjesusm 
1163*11474SJonathan.Adams@Sun.COM 		ml0.ml_address = ml->ml_address;
1164*11474SJonathan.Adams@Sun.COM 		ml0.ml_size = ml->ml_size;
1165*11474SJonathan.Adams@Sun.COM 		ml0.ml_next = ml0.ml_prev = NULL;
11665579Sjesusm 
11675579Sjesusm 		/*
11685579Sjesusm 		 * If the memory object is 256 MB aligned (max page size
11695579Sjesusm 		 * on OPL, it will not be coalesced to the adjacent memory
11705579Sjesusm 		 * chunks.  The coalesce logic assumes contiguous page
11715579Sjesusm 		 * structures for contiguous memory and we hit panic.
11725579Sjesusm 		 * For anything less than 256 MB alignment, we have
11735579Sjesusm 		 * to make sure that it is not adjacent to anything.
11745579Sjesusm 		 * If the new chunk is adjacent to phys_install, we
11755579Sjesusm 		 * truncate it to 4MB boundary.  4 MB is somewhat
11765579Sjesusm 		 * arbitrary.  However we do not want to create
11775579Sjesusm 		 * very small segments because they can cause problem.
11785579Sjesusm 		 * The extreme case of 8K segment will fail
11795579Sjesusm 		 * kphysm_add_memory_dynamic(), e.g.
11805579Sjesusm 		 */
1181*11474SJonathan.Adams@Sun.COM 		if ((ml->ml_address & (MH_MPSS_ALIGNMENT - 1)) ||
1182*11474SJonathan.Adams@Sun.COM 		    (ml->ml_size & (MH_MPSS_ALIGNMENT - 1))) {
11833354Sjl139090 
11843354Sjl139090 		memlist_read_lock();
1185*11474SJonathan.Adams@Sun.COM 		rv = memlist_touch(phys_install, ml0.ml_address);
11863354Sjl139090 		memlist_read_unlock();
11873354Sjl139090 
11883354Sjl139090 		if (rv) {
1189*11474SJonathan.Adams@Sun.COM 			new_pa = roundup(ml0.ml_address + 1, MH_MIN_ALIGNMENT);
1190*11474SJonathan.Adams@Sun.COM 			nbytes = (new_pa -  ml0.ml_address);
1191*11474SJonathan.Adams@Sun.COM 			if (nbytes >= ml0.ml_size) {
11925579Sjesusm 				t_mp->sbm_dyn_segs =
11935579Sjesusm 				    memlist_del_span(t_mp->sbm_dyn_segs,
1194*11474SJonathan.Adams@Sun.COM 				    ml0.ml_address, ml0.ml_size);
11955579Sjesusm 				continue;
11965579Sjesusm 			}
11973354Sjl139090 			t_mp->sbm_dyn_segs =
11983354Sjl139090 			    memlist_del_span(t_mp->sbm_dyn_segs,
1199*11474SJonathan.Adams@Sun.COM 			    ml0.ml_address, nbytes);
1200*11474SJonathan.Adams@Sun.COM 			ml0.ml_size -= nbytes;
1201*11474SJonathan.Adams@Sun.COM 			ml0.ml_address = new_pa;
12023354Sjl139090 		}
12033354Sjl139090 
1204*11474SJonathan.Adams@Sun.COM 		if (ml0.ml_size == 0) {
12055579Sjesusm 			continue;
12063354Sjl139090 		}
12073354Sjl139090 
12083354Sjl139090 		memlist_read_lock();
1209*11474SJonathan.Adams@Sun.COM 		rv = memlist_touch(phys_install, ml0.ml_address + ml0.ml_size);
12103354Sjl139090 		memlist_read_unlock();
12113354Sjl139090 
12123354Sjl139090 		if (rv) {
1213*11474SJonathan.Adams@Sun.COM 			new_pa = rounddown(ml0.ml_address + ml0.ml_size - 1,
12145579Sjesusm 			    MH_MIN_ALIGNMENT);
1215*11474SJonathan.Adams@Sun.COM 			nbytes = (ml0.ml_address + ml0.ml_size - new_pa);
1216*11474SJonathan.Adams@Sun.COM 			if (nbytes >= ml0.ml_size) {
12175579Sjesusm 				t_mp->sbm_dyn_segs =
12185579Sjesusm 				    memlist_del_span(t_mp->sbm_dyn_segs,
1219*11474SJonathan.Adams@Sun.COM 				    ml0.ml_address, ml0.ml_size);
12205579Sjesusm 				continue;
12215579Sjesusm 			}
12223354Sjl139090 			t_mp->sbm_dyn_segs =
12233354Sjl139090 			    memlist_del_span(t_mp->sbm_dyn_segs,
12243354Sjl139090 			    new_pa, nbytes);
1225*11474SJonathan.Adams@Sun.COM 			ml0.ml_size -= nbytes;
12263354Sjl139090 		}
12273354Sjl139090 
1228*11474SJonathan.Adams@Sun.COM 		if (ml0.ml_size > 0) {
12295579Sjesusm 			dr_add_memory_spans(s_mp, &ml0);
12303354Sjl139090 		}
1231*11474SJonathan.Adams@Sun.COM 		} else if (ml0.ml_size > 0) {
12325579Sjesusm 			dr_add_memory_spans(s_mp, &ml0);
12335579Sjesusm 		}
12343354Sjl139090 	}
12353354Sjl139090 	memlist_delete(t_excess_mlist);
12363354Sjl139090 	return (err);
12373354Sjl139090 }
12383354Sjl139090 
12393354Sjl139090 static int
dr_post_detach_mem_unit(dr_mem_unit_t * s_mp)12401772Sjl139090 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
12411772Sjl139090 {
12421772Sjl139090 	uint64_t	sz = s_mp->sbm_slice_size;
12431772Sjl139090 	uint64_t	sm = sz - 1;
12441772Sjl139090 	/* old and new below refer to PAs before and after copy-rename */
12451772Sjl139090 	uint64_t	s_old_basepa, s_new_basepa;
12461772Sjl139090 	uint64_t	t_old_basepa, t_new_basepa;
12471772Sjl139090 	dr_mem_unit_t	*t_mp, *x_mp;
12481772Sjl139090 	drmach_mem_info_t	minfo;
12491772Sjl139090 	struct memlist	*ml;
12501772Sjl139090 	struct memlist	*t_excess_mlist;
12511772Sjl139090 	int		rv;
12521772Sjl139090 	int		s_excess_mem_deleted = 0;
12531772Sjl139090 	sbd_error_t	*err;
12541772Sjl139090 	static fn_t	f = "dr_post_detach_mem_unit";
12551772Sjl139090 
12561772Sjl139090 	PR_MEM("%s...\n", f);
12571772Sjl139090 
12581772Sjl139090 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
12591772Sjl139090 	PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
12605579Sjesusm 	    f, s_mp->sbm_cm.sbdev_path);
12611772Sjl139090 	PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
12621772Sjl139090 
12631772Sjl139090 	/* sanity check */
12641772Sjl139090 	ASSERT(s_mp->sbm_del_mlist == NULL ||
12655579Sjesusm 	    (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
12661772Sjl139090 
12671772Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
12681772Sjl139090 		t_mp = s_mp->sbm_peer;
12691772Sjl139090 		ASSERT(t_mp != NULL);
12701772Sjl139090 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
12711772Sjl139090 		ASSERT(t_mp->sbm_peer == s_mp);
12721772Sjl139090 
12731772Sjl139090 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
12741772Sjl139090 		ASSERT(t_mp->sbm_del_mlist);
12751772Sjl139090 
12761772Sjl139090 		PR_MEM("%s: target %s: deleted memlist:\n",
12775579Sjesusm 		    f, t_mp->sbm_cm.sbdev_path);
12781772Sjl139090 		PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
12791772Sjl139090 	} else {
12801772Sjl139090 		/* this is no target unit */
12811772Sjl139090 		t_mp = NULL;
12821772Sjl139090 	}
12831772Sjl139090 
12841772Sjl139090 	/*
12851772Sjl139090 	 * Verify the memory really did successfully detach
12861772Sjl139090 	 * by checking for its non-existence in phys_install.
12871772Sjl139090 	 */
12881772Sjl139090 	rv = 0;
12891772Sjl139090 	memlist_read_lock();
12901772Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
12911772Sjl139090 		x_mp = s_mp;
12921772Sjl139090 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
12931772Sjl139090 	}
12941772Sjl139090 	if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
12951772Sjl139090 		x_mp = t_mp;
12961772Sjl139090 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
12971772Sjl139090 	}
12981772Sjl139090 	memlist_read_unlock();
12991772Sjl139090 
13001772Sjl139090 	if (rv) {
13011772Sjl139090 		/* error: memlist still in phys_install */
13021772Sjl139090 		DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
13031772Sjl139090 	}
13041772Sjl139090 
13051772Sjl139090 	/*
13061772Sjl139090 	 * clean mem unit state and bail out if an error has been recorded.
13071772Sjl139090 	 */
13081772Sjl139090 	rv = 0;
13091772Sjl139090 	if (s_mp->sbm_cm.sbdev_error) {
13101772Sjl139090 		PR_MEM("%s: %s flags=%x", f,
13115579Sjesusm 		    s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
13121772Sjl139090 		DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
13131772Sjl139090 		DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
13141772Sjl139090 		dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
13151772Sjl139090 		rv = -1;
13161772Sjl139090 	}
13171772Sjl139090 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
13181772Sjl139090 		PR_MEM("%s: %s flags=%x", f,
13195579Sjesusm 		    s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
13201772Sjl139090 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
13211772Sjl139090 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
13221772Sjl139090 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
13231772Sjl139090 		rv = -1;
13241772Sjl139090 	}
13251772Sjl139090 	if (rv)
13261772Sjl139090 		goto cleanup;
13271772Sjl139090 
13281772Sjl139090 	s_old_basepa = _ptob64(s_mp->sbm_basepfn);
13291772Sjl139090 	err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
13301772Sjl139090 	ASSERT(err == NULL);
13311772Sjl139090 	s_new_basepa = minfo.mi_basepa;
13321772Sjl139090 
13331772Sjl139090 	PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
13341772Sjl139090 	PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
13351772Sjl139090 
13361772Sjl139090 	if (t_mp != NULL) {
13371772Sjl139090 		struct memlist *s_copy_mlist;
13381772Sjl139090 
13391772Sjl139090 		t_old_basepa = _ptob64(t_mp->sbm_basepfn);
13401772Sjl139090 		err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
13411772Sjl139090 		ASSERT(err == NULL);
13421772Sjl139090 		t_new_basepa = minfo.mi_basepa;
13431772Sjl139090 
13441772Sjl139090 		PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
13451772Sjl139090 		PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
13461772Sjl139090 
13471772Sjl139090 		/*
13481772Sjl139090 		 * Construct copy list with original source addresses.
13491772Sjl139090 		 * Used to add back excess target mem.
13501772Sjl139090 		 */
13511772Sjl139090 		s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1352*11474SJonathan.Adams@Sun.COM 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
13531772Sjl139090 			s_copy_mlist = memlist_del_span(s_copy_mlist,
1354*11474SJonathan.Adams@Sun.COM 			    ml->ml_address, ml->ml_size);
13551772Sjl139090 		}
13561772Sjl139090 
13571772Sjl139090 		PR_MEM("%s: source copy list:\n:", f);
13581772Sjl139090 		PR_MEMLIST_DUMP(s_copy_mlist);
13591772Sjl139090 
13601772Sjl139090 		/*
13611772Sjl139090 		 * We had to swap mem-units, so update
13621772Sjl139090 		 * memlists accordingly with new base
13631772Sjl139090 		 * addresses.
13641772Sjl139090 		 */
1365*11474SJonathan.Adams@Sun.COM 		for (ml = t_mp->sbm_mlist; ml; ml = ml->ml_next) {
1366*11474SJonathan.Adams@Sun.COM 			ml->ml_address -= t_old_basepa;
1367*11474SJonathan.Adams@Sun.COM 			ml->ml_address += t_new_basepa;
13681772Sjl139090 		}
13691772Sjl139090 
13701772Sjl139090 		/*
13711772Sjl139090 		 * There is no need to explicitly rename the target delete
13721772Sjl139090 		 * memlist, because sbm_del_mlist and sbm_mlist always
13731772Sjl139090 		 * point to the same memlist for a copy/rename operation.
13741772Sjl139090 		 */
13751772Sjl139090 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
13761772Sjl139090 
13771772Sjl139090 		PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
13781772Sjl139090 		PR_MEMLIST_DUMP(t_mp->sbm_mlist);
13791772Sjl139090 
1380*11474SJonathan.Adams@Sun.COM 		for (ml = s_mp->sbm_mlist; ml; ml = ml->ml_next) {
1381*11474SJonathan.Adams@Sun.COM 			ml->ml_address -= s_old_basepa;
1382*11474SJonathan.Adams@Sun.COM 			ml->ml_address += s_new_basepa;
13831772Sjl139090 		}
13841772Sjl139090 
13851772Sjl139090 		PR_MEM("%s: renamed source memlist:\n", f);
13861772Sjl139090 		PR_MEMLIST_DUMP(s_mp->sbm_mlist);
13873354Sjl139090 		PR_MEM("%s: source dyn seg memlist:\n", f);
13883354Sjl139090 		PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
13891772Sjl139090 
13901772Sjl139090 		/*
13911772Sjl139090 		 * Keep track of dynamically added segments
13921772Sjl139090 		 * since they cannot be split if we need to delete
13931772Sjl139090 		 * excess source memory later for this board.
13941772Sjl139090 		 */
13951772Sjl139090 		if (t_mp->sbm_dyn_segs)
13961772Sjl139090 			memlist_delete(t_mp->sbm_dyn_segs);
13971772Sjl139090 		t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
13981772Sjl139090 		s_mp->sbm_dyn_segs = NULL;
13991772Sjl139090 
14001772Sjl139090 		/*
14011772Sjl139090 		 * Add back excess target memory.
14021772Sjl139090 		 * Subtract out the portion of the target memory
14031772Sjl139090 		 * node that was taken over by the source memory
14041772Sjl139090 		 * node.
14051772Sjl139090 		 */
14061772Sjl139090 		t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1407*11474SJonathan.Adams@Sun.COM 		for (ml = s_copy_mlist; ml; ml = ml->ml_next) {
14081772Sjl139090 			t_excess_mlist =
14091772Sjl139090 			    memlist_del_span(t_excess_mlist,
1410*11474SJonathan.Adams@Sun.COM 			    ml->ml_address, ml->ml_size);
14111772Sjl139090 		}
14123354Sjl139090 		PR_MEM("%s: excess memlist:\n", f);
14133354Sjl139090 		PR_MEMLIST_DUMP(t_excess_mlist);
14141772Sjl139090 
14151772Sjl139090 		/*
14161772Sjl139090 		 * Update dynamically added segs
14171772Sjl139090 		 */
1418*11474SJonathan.Adams@Sun.COM 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
14191772Sjl139090 			t_mp->sbm_dyn_segs =
14201772Sjl139090 			    memlist_del_span(t_mp->sbm_dyn_segs,
1421*11474SJonathan.Adams@Sun.COM 			    ml->ml_address, ml->ml_size);
14221772Sjl139090 		}
1423*11474SJonathan.Adams@Sun.COM 		for (ml = t_excess_mlist; ml; ml = ml->ml_next) {
14241772Sjl139090 			t_mp->sbm_dyn_segs =
14251772Sjl139090 			    memlist_cat_span(t_mp->sbm_dyn_segs,
1426*11474SJonathan.Adams@Sun.COM 			    ml->ml_address, ml->ml_size);
14271772Sjl139090 		}
14281772Sjl139090 		PR_MEM("%s: %s: updated dynamic seg list:\n",
14291772Sjl139090 		    f, t_mp->sbm_cm.sbdev_path);
14301772Sjl139090 		PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
14311772Sjl139090 
14321772Sjl139090 		if (t_excess_mlist != NULL) {
14333354Sjl139090 			err = dr_process_excess_mlist(s_mp, t_mp,
14345579Sjesusm 			    t_excess_mlist);
14351772Sjl139090 			s_excess_mem_deleted = 1;
14361772Sjl139090 		}
14373354Sjl139090 
14381772Sjl139090 		memlist_delete(s_copy_mlist);
14391772Sjl139090 
14401772Sjl139090 #ifdef DEBUG
14411772Sjl139090 		/*
14421772Sjl139090 		 * s_mp->sbm_del_mlist may still needed
14431772Sjl139090 		 */
14441772Sjl139090 		PR_MEM("%s: source delete memeory flag %d",
14451772Sjl139090 		    f, s_excess_mem_deleted);
14461772Sjl139090 		PR_MEM("%s: source delete memlist", f);
14471772Sjl139090 		PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
14481772Sjl139090 #endif
14491772Sjl139090 
14501772Sjl139090 	}
14511772Sjl139090 
14521772Sjl139090 	if (t_mp != NULL) {
14531772Sjl139090 		/* delete target's entire address space */
14541772Sjl139090 		err = drmach_mem_del_span(
14555579Sjesusm 		    t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
14561772Sjl139090 		if (err)
14571772Sjl139090 			DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
14581772Sjl139090 		ASSERT(err == NULL);
14591772Sjl139090 
14601772Sjl139090 		/*
14611772Sjl139090 		 * After the copy/rename, the original address space
14621772Sjl139090 		 * for the source board (which is now located on the
14631772Sjl139090 		 * target board) may now have some excess to be deleted.
14641772Sjl139090 		 * Those excess memory on the source board are kept in
14651772Sjl139090 		 * source board's sbm_del_mlist
14661772Sjl139090 		 */
14671772Sjl139090 		for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
1468*11474SJonathan.Adams@Sun.COM 		    ml = ml->ml_next) {
14691772Sjl139090 			PR_MEM("%s: delete source excess memory", f);
14701772Sjl139090 			PR_MEMLIST_DUMP(ml);
14711772Sjl139090 
14721772Sjl139090 			err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1473*11474SJonathan.Adams@Sun.COM 			    ml->ml_address, ml->ml_size);
14741772Sjl139090 			if (err)
14751772Sjl139090 				DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
14761772Sjl139090 			ASSERT(err == NULL);
14771772Sjl139090 		}
14781772Sjl139090 
14791772Sjl139090 	} else {
14801772Sjl139090 		/* delete board's entire address space */
14811772Sjl139090 		err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
14825579Sjesusm 		    s_old_basepa & ~ sm, sz);
14831772Sjl139090 		if (err)
14841772Sjl139090 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
14851772Sjl139090 		ASSERT(err == NULL);
14861772Sjl139090 	}
14871772Sjl139090 
14881772Sjl139090 cleanup:
14891772Sjl139090 	/* clean up target mem unit */
14901772Sjl139090 	if (t_mp != NULL) {
14911772Sjl139090 		memlist_delete(t_mp->sbm_del_mlist);
14921772Sjl139090 		/* no need to delete sbm_mlist, it shares sbm_del_mlist */
14931772Sjl139090 
14941772Sjl139090 		t_mp->sbm_del_mlist = NULL;
14951772Sjl139090 		t_mp->sbm_mlist = NULL;
14961772Sjl139090 		t_mp->sbm_peer = NULL;
14971772Sjl139090 		t_mp->sbm_flags = 0;
14981772Sjl139090 		t_mp->sbm_cm.sbdev_busy = 0;
14991772Sjl139090 		dr_init_mem_unit_data(t_mp);
15001772Sjl139090 
15011772Sjl139090 	}
15021772Sjl139090 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
15031772Sjl139090 		/*
15041772Sjl139090 		 * now that copy/rename has completed, undo this
15051772Sjl139090 		 * work that was done in dr_release_mem_done.
15061772Sjl139090 		 */
15071772Sjl139090 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
15081772Sjl139090 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
15091772Sjl139090 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
15101772Sjl139090 	}
15111772Sjl139090 
15121772Sjl139090 	/*
15131772Sjl139090 	 * clean up (source) board's mem unit structure.
15141772Sjl139090 	 * NOTE: sbm_mlist is retained if no error has been record (in other
15151772Sjl139090 	 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
15161772Sjl139090 	 * referred to elsewhere as the cached memlist.  The cached memlist
15171772Sjl139090 	 * is used to re-attach (configure back in) this memunit from the
15181772Sjl139090 	 * unconfigured state.  The memlist is retained because it may
15191772Sjl139090 	 * represent bad pages that were detected while the memory was
15201772Sjl139090 	 * configured into the OS.  The OS deletes bad pages from phys_install.
15211772Sjl139090 	 * Those deletes, if any, will be represented in the cached mlist.
15221772Sjl139090 	 */
15231772Sjl139090 	if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
15241772Sjl139090 		memlist_delete(s_mp->sbm_del_mlist);
15251772Sjl139090 
15261772Sjl139090 	if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
15271772Sjl139090 		memlist_delete(s_mp->sbm_mlist);
15281772Sjl139090 		s_mp->sbm_mlist = NULL;
15291772Sjl139090 	}
15301772Sjl139090 
15311772Sjl139090 	if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
15321772Sjl139090 		memlist_delete(s_mp->sbm_dyn_segs);
15331772Sjl139090 		s_mp->sbm_dyn_segs = NULL;
15341772Sjl139090 	}
15351772Sjl139090 
15361772Sjl139090 	s_mp->sbm_del_mlist = NULL;
15371772Sjl139090 	s_mp->sbm_peer = NULL;
15381772Sjl139090 	s_mp->sbm_flags = 0;
15391772Sjl139090 	s_mp->sbm_cm.sbdev_busy = 0;
15401772Sjl139090 	dr_init_mem_unit_data(s_mp);
15411772Sjl139090 
15421772Sjl139090 	PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
15431772Sjl139090 	PR_MEMLIST_DUMP(s_mp->sbm_mlist);
15441772Sjl139090 
15451772Sjl139090 	return (0);
15461772Sjl139090 }
15471772Sjl139090 
15481772Sjl139090 /*
15491772Sjl139090  * Successful return from this function will have the memory
15501772Sjl139090  * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
15511772Sjl139090  * and waiting.  This routine's job is to select the memory that
15521772Sjl139090  * actually has to be released (detached) which may not necessarily
15531772Sjl139090  * be the same memory node that came in in devlist[],
15541772Sjl139090  * i.e. a copy-rename is needed.
15551772Sjl139090  */
15561772Sjl139090 int
dr_pre_release_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)15571772Sjl139090 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
15581772Sjl139090 {
15591772Sjl139090 	int		d;
15601772Sjl139090 	int		err_flag = 0;
15611772Sjl139090 	static fn_t	f = "dr_pre_release_mem";
15621772Sjl139090 
15631772Sjl139090 	PR_MEM("%s...\n", f);
15641772Sjl139090 
15651772Sjl139090 	for (d = 0; d < devnum; d++) {
15661772Sjl139090 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
15671772Sjl139090 		int		rv;
15681772Sjl139090 		memquery_t	mq;
15691772Sjl139090 		struct memlist	*ml;
15701772Sjl139090 
15711772Sjl139090 		if (mp->sbm_cm.sbdev_error) {
15721772Sjl139090 			err_flag = 1;
15731772Sjl139090 			continue;
15741772Sjl139090 		} else if (!kcage_on) {
15751772Sjl139090 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
15761772Sjl139090 			err_flag = 1;
15771772Sjl139090 			continue;
15781772Sjl139090 		}
15791772Sjl139090 
15801772Sjl139090 		if (mp->sbm_flags & DR_MFLAG_RESERVED) {
15811772Sjl139090 			/*
15821772Sjl139090 			 * Board is currently involved in a delete
15831772Sjl139090 			 * memory operation. Can't detach this guy until
15841772Sjl139090 			 * that operation completes.
15851772Sjl139090 			 */
15861772Sjl139090 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
15871772Sjl139090 			err_flag = 1;
15881772Sjl139090 			break;
15891772Sjl139090 		}
15901772Sjl139090 
15911772Sjl139090 		/* flags should be clean at this time */
15921772Sjl139090 		ASSERT(mp->sbm_flags == 0);
15931772Sjl139090 
15941772Sjl139090 		ASSERT(mp->sbm_mlist == NULL);
15951772Sjl139090 		ASSERT(mp->sbm_del_mlist == NULL);
15961772Sjl139090 		if (mp->sbm_mlist != NULL) {
15971772Sjl139090 			memlist_delete(mp->sbm_mlist);
15981772Sjl139090 			mp->sbm_mlist = NULL;
15991772Sjl139090 		}
16001772Sjl139090 
16011772Sjl139090 		ml = dr_get_memlist(mp);
16021772Sjl139090 		if (ml == NULL) {
16031772Sjl139090 			err_flag = 1;
16041772Sjl139090 			PR_MEM("%s: no memlist found for %s\n",
16051772Sjl139090 			    f, mp->sbm_cm.sbdev_path);
16061772Sjl139090 			continue;
16071772Sjl139090 		}
16081772Sjl139090 
16091772Sjl139090 		/*
16101772Sjl139090 		 * Check whether the detaching memory requires a
16111772Sjl139090 		 * copy-rename.
16121772Sjl139090 		 */
16131772Sjl139090 		ASSERT(mp->sbm_npages != 0);
16143354Sjl139090 
16151772Sjl139090 		rv = dr_del_mlist_query(ml, &mq);
16161772Sjl139090 		if (rv != KPHYSM_OK) {
16171772Sjl139090 			memlist_delete(ml);
16181772Sjl139090 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
16191772Sjl139090 			err_flag = 1;
16201772Sjl139090 			break;
16211772Sjl139090 		}
16221772Sjl139090 
16231772Sjl139090 		if (mq.nonrelocatable != 0) {
16241772Sjl139090 			if (!(dr_cmd_flags(hp) &
16255579Sjesusm 			    (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
16261772Sjl139090 				memlist_delete(ml);
16271772Sjl139090 				/* caller wasn't prompted for a suspend */
16281772Sjl139090 				dr_dev_err(CE_WARN, &mp->sbm_cm,
16295579Sjesusm 				    ESBD_QUIESCE_REQD);
16301772Sjl139090 				err_flag = 1;
16311772Sjl139090 				break;
16321772Sjl139090 			}
16331772Sjl139090 		}
16341772Sjl139090 
16351772Sjl139090 		/* allocate a kphysm handle */
16361772Sjl139090 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
16371772Sjl139090 		if (rv != KPHYSM_OK) {
16381772Sjl139090 			memlist_delete(ml);
16391772Sjl139090 
16401772Sjl139090 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
16411772Sjl139090 			err_flag = 1;
16421772Sjl139090 			break;
16431772Sjl139090 		}
16441772Sjl139090 		mp->sbm_flags |= DR_MFLAG_RELOWNER;
16451772Sjl139090 
16461772Sjl139090 		if ((mq.nonrelocatable != 0) ||
16475579Sjesusm 		    dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
16481772Sjl139090 			/*
16491772Sjl139090 			 * Either the detaching memory node contains
16501772Sjl139090 			 * non-reloc memory or we failed to reserve the
16511772Sjl139090 			 * detaching memory node (which did _not_ have
16521772Sjl139090 			 * any non-reloc memory, i.e. some non-reloc mem
16531772Sjl139090 			 * got onboard).
16541772Sjl139090 			 */
16551772Sjl139090 
16561772Sjl139090 			if (dr_select_mem_target(hp, mp, ml)) {
16571772Sjl139090 				int rv;
16581772Sjl139090 
16591772Sjl139090 				/*
16601772Sjl139090 				 * We had no luck locating a target
16611772Sjl139090 				 * memory node to be the recipient of
16621772Sjl139090 				 * the non-reloc memory on the node
16631772Sjl139090 				 * we're trying to detach.
16641772Sjl139090 				 * Clean up be disposing the mem handle
16651772Sjl139090 				 * and the mem list.
16661772Sjl139090 				 */
16671772Sjl139090 				rv = kphysm_del_release(mp->sbm_memhandle);
16681772Sjl139090 				if (rv != KPHYSM_OK) {
16691772Sjl139090 					/*
16701772Sjl139090 					 * can do nothing but complain
16711772Sjl139090 					 * and hope helpful for debug
16721772Sjl139090 					 */
16731772Sjl139090 					cmn_err(CE_WARN, "%s: unexpected"
16745579Sjesusm 					    " kphysm_del_release return"
16755579Sjesusm 					    " value %d",
16765579Sjesusm 					    f, rv);
16771772Sjl139090 				}
16781772Sjl139090 				mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
16791772Sjl139090 
16801772Sjl139090 				memlist_delete(ml);
16811772Sjl139090 
16821772Sjl139090 				/* make sure sbm_flags is clean */
16831772Sjl139090 				ASSERT(mp->sbm_flags == 0);
16841772Sjl139090 
16851772Sjl139090 				dr_dev_err(CE_WARN,
16865579Sjesusm 				    &mp->sbm_cm, ESBD_NO_TARGET);
16871772Sjl139090 
16881772Sjl139090 				err_flag = 1;
16891772Sjl139090 				break;
16901772Sjl139090 			}
16911772Sjl139090 
16921772Sjl139090 			/*
16931772Sjl139090 			 * ml is not memlist_delete'd here because
16941772Sjl139090 			 * it has been assigned to mp->sbm_mlist
16951772Sjl139090 			 * by dr_select_mem_target.
16961772Sjl139090 			 */
16971772Sjl139090 		} else {
16981772Sjl139090 			/* no target needed to detach this board */
16991772Sjl139090 			mp->sbm_flags |= DR_MFLAG_RESERVED;
17001772Sjl139090 			mp->sbm_peer = NULL;
17011772Sjl139090 			mp->sbm_del_mlist = ml;
17021772Sjl139090 			mp->sbm_mlist = ml;
17031772Sjl139090 			mp->sbm_cm.sbdev_busy = 1;
17041772Sjl139090 		}
17051772Sjl139090 #ifdef DEBUG
17061772Sjl139090 		ASSERT(mp->sbm_mlist != NULL);
17071772Sjl139090 
17081772Sjl139090 		if (mp->sbm_flags & DR_MFLAG_SOURCE) {
17091772Sjl139090 			PR_MEM("%s: release of %s requires copy/rename;"
17105579Sjesusm 			    " selected target board %s\n",
17115579Sjesusm 			    f,
17125579Sjesusm 			    mp->sbm_cm.sbdev_path,
17135579Sjesusm 			    mp->sbm_peer->sbm_cm.sbdev_path);
17141772Sjl139090 		} else {
17151772Sjl139090 			PR_MEM("%s: copy/rename not required to release %s\n",
17165579Sjesusm 			    f, mp->sbm_cm.sbdev_path);
17171772Sjl139090 		}
17181772Sjl139090 
17191772Sjl139090 		ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
17201772Sjl139090 		ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
17211772Sjl139090 #endif
17221772Sjl139090 	}
17231772Sjl139090 
17241772Sjl139090 	return (err_flag ? -1 : 0);
17251772Sjl139090 }
17261772Sjl139090 
17271772Sjl139090 void
dr_release_mem_done(dr_common_unit_t * cp)17281772Sjl139090 dr_release_mem_done(dr_common_unit_t *cp)
17291772Sjl139090 {
17301772Sjl139090 	dr_mem_unit_t	*s_mp = (dr_mem_unit_t *)cp;
17311772Sjl139090 	dr_mem_unit_t *t_mp, *mp;
17321772Sjl139090 	int		rv;
17331772Sjl139090 	static fn_t	f = "dr_release_mem_done";
17341772Sjl139090 
17351772Sjl139090 	/*
17361772Sjl139090 	 * This unit will be flagged with DR_MFLAG_SOURCE, if it
17371772Sjl139090 	 * has a target unit.
17381772Sjl139090 	 */
17391772Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
17401772Sjl139090 		t_mp = s_mp->sbm_peer;
17411772Sjl139090 		ASSERT(t_mp != NULL);
17421772Sjl139090 		ASSERT(t_mp->sbm_peer == s_mp);
17431772Sjl139090 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
17441772Sjl139090 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
17451772Sjl139090 	} else {
17461772Sjl139090 		/* this is no target unit */
17471772Sjl139090 		t_mp = NULL;
17481772Sjl139090 	}
17491772Sjl139090 
17501772Sjl139090 	/* free delete handle */
17511772Sjl139090 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
17521772Sjl139090 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
17531772Sjl139090 	rv = kphysm_del_release(s_mp->sbm_memhandle);
17541772Sjl139090 	if (rv != KPHYSM_OK) {
17551772Sjl139090 		/*
17561772Sjl139090 		 * can do nothing but complain
17571772Sjl139090 		 * and hope helpful for debug
17581772Sjl139090 		 */
17591772Sjl139090 		cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
17605579Sjesusm 		    " return value %d", f, rv);
17611772Sjl139090 	}
17621772Sjl139090 	s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
17631772Sjl139090 
17641772Sjl139090 	/*
17651772Sjl139090 	 * If an error was encountered during release, clean up
17661772Sjl139090 	 * the source (and target, if present) unit data.
17671772Sjl139090 	 */
17681772Sjl139090 /* XXX Can we know that sbdev_error was encountered during release? */
17691772Sjl139090 	if (s_mp->sbm_cm.sbdev_error != NULL) {
17701772Sjl139090 
17711772Sjl139090 		if (t_mp != NULL) {
17721772Sjl139090 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
17731772Sjl139090 			t_mp->sbm_del_mlist = NULL;
17741772Sjl139090 
17751772Sjl139090 			if (t_mp->sbm_mlist != NULL) {
17761772Sjl139090 				memlist_delete(t_mp->sbm_mlist);
17771772Sjl139090 				t_mp->sbm_mlist = NULL;
17781772Sjl139090 			}
17791772Sjl139090 
17801772Sjl139090 			t_mp->sbm_peer = NULL;
17811772Sjl139090 			t_mp->sbm_flags = 0;
17821772Sjl139090 			t_mp->sbm_cm.sbdev_busy = 0;
17831772Sjl139090 		}
17841772Sjl139090 
17851772Sjl139090 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
17861772Sjl139090 			memlist_delete(s_mp->sbm_del_mlist);
17871772Sjl139090 		s_mp->sbm_del_mlist = NULL;
17881772Sjl139090 
17891772Sjl139090 		if (s_mp->sbm_mlist != NULL) {
17901772Sjl139090 			memlist_delete(s_mp->sbm_mlist);
17911772Sjl139090 			s_mp->sbm_mlist = NULL;
17921772Sjl139090 		}
17931772Sjl139090 
17941772Sjl139090 		s_mp->sbm_peer = NULL;
17951772Sjl139090 		s_mp->sbm_flags = 0;
17961772Sjl139090 		s_mp->sbm_cm.sbdev_busy = 0;
17971772Sjl139090 
17981772Sjl139090 		/* bail out */
17991772Sjl139090 		return;
18001772Sjl139090 	}
18011772Sjl139090 
18021772Sjl139090 	DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
18031772Sjl139090 	dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
18041772Sjl139090 
18051772Sjl139090 	if (t_mp != NULL) {
18061772Sjl139090 		/*
18071772Sjl139090 		 * the kphysm delete operation that drained the source
18081772Sjl139090 		 * board also drained this target board.  Since the source
18091772Sjl139090 		 * board drain is now known to have succeeded, we know this
18101772Sjl139090 		 * target board is drained too.
18111772Sjl139090 		 *
18121772Sjl139090 		 * because DR_DEV_SET_RELEASED and dr_device_transition
18131772Sjl139090 		 * is done here, the dr_release_dev_done should not
18141772Sjl139090 		 * fail.
18151772Sjl139090 		 */
18161772Sjl139090 		DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
18171772Sjl139090 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
18181772Sjl139090 
18191772Sjl139090 		/*
18201772Sjl139090 		 * NOTE: do not transition target's board state,
18211772Sjl139090 		 * even if the mem-unit was the last configure
18221772Sjl139090 		 * unit of the board.  When copy/rename completes
18231772Sjl139090 		 * this mem-unit will transitioned back to
18241772Sjl139090 		 * the configured state.  In the meantime, the
18251772Sjl139090 		 * board's must remain as is.
18261772Sjl139090 		 */
18271772Sjl139090 	}
18281772Sjl139090 
18291772Sjl139090 	/* if board(s) had deleted memory, verify it is gone */
18301772Sjl139090 	rv = 0;
18311772Sjl139090 	memlist_read_lock();
18321772Sjl139090 	if (s_mp->sbm_del_mlist != NULL) {
18331772Sjl139090 		mp = s_mp;
18341772Sjl139090 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
18351772Sjl139090 	}
18361772Sjl139090 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
18371772Sjl139090 		mp = t_mp;
18381772Sjl139090 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
18391772Sjl139090 	}
18401772Sjl139090 	memlist_read_unlock();
18411772Sjl139090 	if (rv) {
18421772Sjl139090 		cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
18435579Sjesusm 		    "deleted memory still found in phys_install",
18445579Sjesusm 		    f,
18455579Sjesusm 		    (mp == t_mp ? "target " : ""),
18465579Sjesusm 		    mp->sbm_cm.sbdev_bp->b_num,
18475579Sjesusm 		    mp->sbm_cm.sbdev_unum);
18481772Sjl139090 
18491772Sjl139090 		DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
18501772Sjl139090 		return;
18511772Sjl139090 	}
18521772Sjl139090 
18531772Sjl139090 	s_mp->sbm_flags |= DR_MFLAG_RELDONE;
18541772Sjl139090 	if (t_mp != NULL)
18551772Sjl139090 		t_mp->sbm_flags |= DR_MFLAG_RELDONE;
18561772Sjl139090 
18571772Sjl139090 	/* this should not fail */
18581772Sjl139090 	if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
18591772Sjl139090 		/* catch this in debug kernels */
18601772Sjl139090 		ASSERT(0);
18611772Sjl139090 		return;
18621772Sjl139090 	}
18631772Sjl139090 
18641772Sjl139090 	PR_MEM("%s: marking %s release DONE\n",
18655579Sjesusm 	    f, s_mp->sbm_cm.sbdev_path);
18661772Sjl139090 
18671772Sjl139090 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
18681772Sjl139090 
18691772Sjl139090 	if (t_mp != NULL) {
18701772Sjl139090 		/* should not fail */
18711772Sjl139090 		rv = dr_release_dev_done(&t_mp->sbm_cm);
18721772Sjl139090 		if (rv != 0) {
18731772Sjl139090 			/* catch this in debug kernels */
18741772Sjl139090 			ASSERT(0);
18751772Sjl139090 			return;
18761772Sjl139090 		}
18771772Sjl139090 
18781772Sjl139090 		PR_MEM("%s: marking %s release DONE\n",
18795579Sjesusm 		    f, t_mp->sbm_cm.sbdev_path);
18801772Sjl139090 
18811772Sjl139090 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
18821772Sjl139090 	}
18831772Sjl139090 }
18841772Sjl139090 
18851772Sjl139090 /*ARGSUSED*/
18861772Sjl139090 int
dr_disconnect_mem(dr_mem_unit_t * mp)18871772Sjl139090 dr_disconnect_mem(dr_mem_unit_t *mp)
18881772Sjl139090 {
18891772Sjl139090 	static fn_t	f = "dr_disconnect_mem";
18901772Sjl139090 	update_membounds_t umb;
18911772Sjl139090 
18921772Sjl139090 #ifdef DEBUG
18931772Sjl139090 	int state = mp->sbm_cm.sbdev_state;
18941772Sjl139090 	ASSERT(state == DR_STATE_CONNECTED ||
18955579Sjesusm 	    state == DR_STATE_UNCONFIGURED);
18961772Sjl139090 #endif
18971772Sjl139090 
18981772Sjl139090 	PR_MEM("%s...\n", f);
18991772Sjl139090 
19001772Sjl139090 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
19011772Sjl139090 		memlist_delete(mp->sbm_del_mlist);
19021772Sjl139090 	mp->sbm_del_mlist = NULL;
19031772Sjl139090 
19041772Sjl139090 	if (mp->sbm_mlist) {
19051772Sjl139090 		memlist_delete(mp->sbm_mlist);
19061772Sjl139090 		mp->sbm_mlist = NULL;
19071772Sjl139090 	}
19081772Sjl139090 
19091772Sjl139090 	/*
19101772Sjl139090 	 * Remove memory from lgroup
19111772Sjl139090 	 * For now, only board info is required.
19121772Sjl139090 	 */
19131772Sjl139090 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
19141772Sjl139090 	umb.u_base = (uint64_t)-1;
19151772Sjl139090 	umb.u_len = (uint64_t)-1;
19161772Sjl139090 
19171772Sjl139090 	lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
19181772Sjl139090 
19191772Sjl139090 	return (0);
19201772Sjl139090 }
19211772Sjl139090 
19221772Sjl139090 int
dr_cancel_mem(dr_mem_unit_t * s_mp)19231772Sjl139090 dr_cancel_mem(dr_mem_unit_t *s_mp)
19241772Sjl139090 {
19251772Sjl139090 	dr_mem_unit_t	*t_mp;
19261772Sjl139090 	dr_state_t	state;
19271772Sjl139090 	static fn_t	f = "dr_cancel_mem";
19281772Sjl139090 
19291772Sjl139090 	state = s_mp->sbm_cm.sbdev_state;
19301772Sjl139090 
19311772Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
19321772Sjl139090 		/* must cancel source board, not target board */
19331772Sjl139090 		/* TODO: set error */
19341772Sjl139090 		return (-1);
19351772Sjl139090 	} else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
19361772Sjl139090 		t_mp = s_mp->sbm_peer;
19371772Sjl139090 		ASSERT(t_mp != NULL);
19381772Sjl139090 		ASSERT(t_mp->sbm_peer == s_mp);
19391772Sjl139090 
19401772Sjl139090 		/* must always match the source board's state */
19411772Sjl139090 		/* TODO: is this assertion correct? */
19421772Sjl139090 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
19431772Sjl139090 	} else {
19441772Sjl139090 		/* this is no target unit */
19451772Sjl139090 		t_mp = NULL;
19461772Sjl139090 	}
19471772Sjl139090 
19481772Sjl139090 	switch (state) {
19491772Sjl139090 	case DR_STATE_UNREFERENCED:	/* state set by dr_release_dev_done */
19501772Sjl139090 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
19511772Sjl139090 
19521772Sjl139090 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
19531772Sjl139090 			PR_MEM("%s: undoing target %s memory delete\n",
19545579Sjesusm 			    f, t_mp->sbm_cm.sbdev_path);
19551772Sjl139090 			dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
19561772Sjl139090 
19571772Sjl139090 			DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
19581772Sjl139090 		}
19591772Sjl139090 
19601772Sjl139090 		if (s_mp->sbm_del_mlist != NULL) {
19611772Sjl139090 			PR_MEM("%s: undoing %s memory delete\n",
19625579Sjesusm 			    f, s_mp->sbm_cm.sbdev_path);
19631772Sjl139090 
19641772Sjl139090 			dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
19651772Sjl139090 		}
19661772Sjl139090 
19671772Sjl139090 		/*FALLTHROUGH*/
19681772Sjl139090 
19691772Sjl139090 /* TODO: should no longer be possible to see the release state here */
19701772Sjl139090 	case DR_STATE_RELEASE:	/* state set by dr_release_mem_done */
19711772Sjl139090 
19721772Sjl139090 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
19731772Sjl139090 
19741772Sjl139090 		if (t_mp != NULL) {
19751772Sjl139090 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
19761772Sjl139090 			t_mp->sbm_del_mlist = NULL;
19771772Sjl139090 
19781772Sjl139090 			if (t_mp->sbm_mlist != NULL) {
19791772Sjl139090 				memlist_delete(t_mp->sbm_mlist);
19801772Sjl139090 				t_mp->sbm_mlist = NULL;
19811772Sjl139090 			}
19821772Sjl139090 
19831772Sjl139090 			t_mp->sbm_peer = NULL;
19841772Sjl139090 			t_mp->sbm_flags = 0;
19851772Sjl139090 			t_mp->sbm_cm.sbdev_busy = 0;
19861772Sjl139090 			dr_init_mem_unit_data(t_mp);
19871772Sjl139090 
19881772Sjl139090 			DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
19891772Sjl139090 
19901772Sjl139090 			dr_device_transition(
19915579Sjesusm 			    &t_mp->sbm_cm, DR_STATE_CONFIGURED);
19921772Sjl139090 		}
19931772Sjl139090 
19941772Sjl139090 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
19951772Sjl139090 			memlist_delete(s_mp->sbm_del_mlist);
19961772Sjl139090 		s_mp->sbm_del_mlist = NULL;
19971772Sjl139090 
19981772Sjl139090 		if (s_mp->sbm_mlist != NULL) {
19991772Sjl139090 			memlist_delete(s_mp->sbm_mlist);
20001772Sjl139090 			s_mp->sbm_mlist = NULL;
20011772Sjl139090 		}
20021772Sjl139090 
20031772Sjl139090 		s_mp->sbm_peer = NULL;
20041772Sjl139090 		s_mp->sbm_flags = 0;
20051772Sjl139090 		s_mp->sbm_cm.sbdev_busy = 0;
20061772Sjl139090 		dr_init_mem_unit_data(s_mp);
20071772Sjl139090 
20081772Sjl139090 		return (0);
20091772Sjl139090 
20101772Sjl139090 	default:
20111772Sjl139090 		PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
20125579Sjesusm 		    f, (int)state, s_mp->sbm_cm.sbdev_path);
20131772Sjl139090 
20141772Sjl139090 		return (-1);
20151772Sjl139090 	}
20161772Sjl139090 	/*NOTREACHED*/
20171772Sjl139090 }
20181772Sjl139090 
20191772Sjl139090 void
dr_init_mem_unit(dr_mem_unit_t * mp)20201772Sjl139090 dr_init_mem_unit(dr_mem_unit_t *mp)
20211772Sjl139090 {
20221772Sjl139090 	dr_state_t	new_state;
20231772Sjl139090 
20241772Sjl139090 
20251772Sjl139090 	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
20261772Sjl139090 		new_state = DR_STATE_CONFIGURED;
20271772Sjl139090 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
20281772Sjl139090 	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
20291772Sjl139090 		new_state = DR_STATE_CONNECTED;
20301772Sjl139090 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
20311772Sjl139090 	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
20321772Sjl139090 		new_state = DR_STATE_OCCUPIED;
20331772Sjl139090 	} else {
20341772Sjl139090 		new_state = DR_STATE_EMPTY;
20351772Sjl139090 	}
20361772Sjl139090 
20371772Sjl139090 	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
20381772Sjl139090 		dr_init_mem_unit_data(mp);
20391772Sjl139090 
20401772Sjl139090 	/* delay transition until fully initialized */
20411772Sjl139090 	dr_device_transition(&mp->sbm_cm, new_state);
20421772Sjl139090 }
20431772Sjl139090 
20441772Sjl139090 static void
dr_init_mem_unit_data(dr_mem_unit_t * mp)20451772Sjl139090 dr_init_mem_unit_data(dr_mem_unit_t *mp)
20461772Sjl139090 {
20471772Sjl139090 	drmachid_t	id = mp->sbm_cm.sbdev_id;
20481772Sjl139090 	drmach_mem_info_t	minfo;
20491772Sjl139090 	sbd_error_t	*err;
20501772Sjl139090 	static fn_t	f = "dr_init_mem_unit_data";
20511772Sjl139090 	update_membounds_t umb;
20521772Sjl139090 
20531772Sjl139090 	PR_MEM("%s...\n", f);
20541772Sjl139090 
20551772Sjl139090 	/* a little sanity checking */
20561772Sjl139090 	ASSERT(mp->sbm_peer == NULL);
20571772Sjl139090 	ASSERT(mp->sbm_flags == 0);
20581772Sjl139090 
20591772Sjl139090 	if (err = drmach_mem_get_info(id, &minfo)) {
20601772Sjl139090 		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
20611772Sjl139090 		return;
20621772Sjl139090 	}
20631772Sjl139090 	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
20641772Sjl139090 	mp->sbm_npages = _b64top(minfo.mi_size);
20651772Sjl139090 	mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
20661772Sjl139090 	mp->sbm_slice_size = minfo.mi_slice_size;
20671772Sjl139090 
20681772Sjl139090 	/*
20691772Sjl139090 	 * Add memory to lgroup
20701772Sjl139090 	 */
20711772Sjl139090 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
20721772Sjl139090 	umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
20731772Sjl139090 	umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
20741772Sjl139090 
20751772Sjl139090 	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
20761772Sjl139090 
20771772Sjl139090 	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
20785579Sjesusm 	    f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
20791772Sjl139090 }
20801772Sjl139090 
20811772Sjl139090 static int
dr_reserve_mem_spans(memhandle_t * mhp,struct memlist * ml)20821772Sjl139090 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
20831772Sjl139090 {
20841772Sjl139090 	int		err;
20851772Sjl139090 	pfn_t		base;
20861772Sjl139090 	pgcnt_t		npgs;
20871772Sjl139090 	struct memlist	*mc;
20881772Sjl139090 	static fn_t	f = "dr_reserve_mem_spans";
20891772Sjl139090 
20901772Sjl139090 	PR_MEM("%s...\n", f);
20911772Sjl139090 
20921772Sjl139090 	/*
20931772Sjl139090 	 * Walk the supplied memlist scheduling each span for removal
20941772Sjl139090 	 * with kphysm_del_span.  It is possible that a span may intersect
20951772Sjl139090 	 * an area occupied by the cage.
20961772Sjl139090 	 */
2097*11474SJonathan.Adams@Sun.COM 	for (mc = ml; mc != NULL; mc = mc->ml_next) {
2098*11474SJonathan.Adams@Sun.COM 		base = _b64top(mc->ml_address);
2099*11474SJonathan.Adams@Sun.COM 		npgs = _b64top(mc->ml_size);
21001772Sjl139090 
21011772Sjl139090 		err = kphysm_del_span(*mhp, base, npgs);
21021772Sjl139090 		if (err != KPHYSM_OK) {
21031772Sjl139090 			cmn_err(CE_WARN, "%s memory reserve failed."
21045579Sjesusm 			    " unexpected kphysm_del_span return value %d;"
21055579Sjesusm 			    " basepfn=0x%lx npages=%ld",
21065579Sjesusm 			    f, err, base, npgs);
21071772Sjl139090 
21081772Sjl139090 			return (-1);
21091772Sjl139090 		}
21101772Sjl139090 	}
21111772Sjl139090 
21121772Sjl139090 	return (0);
21131772Sjl139090 }
21141772Sjl139090 
21151772Sjl139090 #define	DR_SMT_NPREF_SETS	6
21161772Sjl139090 #define	DR_SMT_NUNITS_PER_SET	MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
21171772Sjl139090 
21181772Sjl139090 /* debug counters */
21191772Sjl139090 int dr_smt_realigned;
21201772Sjl139090 int dr_smt_preference[DR_SMT_NPREF_SETS];
21211772Sjl139090 
21221772Sjl139090 #ifdef DEBUG
21231772Sjl139090 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
21241772Sjl139090 #endif
21251772Sjl139090 
21261772Sjl139090 /*
21271772Sjl139090  * Find and reserve a copy/rename target board suitable for the
21281772Sjl139090  * given source board.
21291772Sjl139090  * All boards in the system are examined and categorized in relation to
21301772Sjl139090  * their memory size versus the source board's memory size.  Order of
21311772Sjl139090  * preference is:
21321772Sjl139090  *	1st copy all source, source/target same size
21331772Sjl139090  *	2nd copy all source, larger target
21341772Sjl139090  * 	3rd copy nonrelocatable source span
21351772Sjl139090  */
21361772Sjl139090 static int
dr_select_mem_target(dr_handle_t * hp,dr_mem_unit_t * s_mp,struct memlist * s_ml)21371772Sjl139090 dr_select_mem_target(dr_handle_t *hp,
21381772Sjl139090 	dr_mem_unit_t *s_mp, struct memlist *s_ml)
21391772Sjl139090 {
21401772Sjl139090 	dr_target_pref_t preference; /* lower value is higher preference */
21411772Sjl139090 	int		idx;
21421772Sjl139090 	dr_mem_unit_t	**sets;
21431772Sjl139090 
21441772Sjl139090 	int		t_bd;
21451772Sjl139090 	int		t_unit;
21461772Sjl139090 	int		rv;
21471772Sjl139090 	dr_board_t	*s_bp, *t_bp;
21481772Sjl139090 	dr_mem_unit_t	*t_mp, *c_mp;
21491772Sjl139090 	struct memlist	*d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
21501772Sjl139090 	memquery_t	s_mq = {0};
21511772Sjl139090 	static fn_t	f = "dr_select_mem_target";
21521772Sjl139090 
21531772Sjl139090 	PR_MEM("%s...\n", f);
21541772Sjl139090 
21551772Sjl139090 	ASSERT(s_ml != NULL);
21561772Sjl139090 
21571772Sjl139090 	sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
21581772Sjl139090 	    DR_SMT_NPREF_SETS);
21591772Sjl139090 
21601772Sjl139090 	s_bp = hp->h_bd;
21611772Sjl139090 	/* calculate the offset into the slice of the last source board pfn */
21621772Sjl139090 	ASSERT(s_mp->sbm_npages != 0);
21631772Sjl139090 
21641772Sjl139090 	/*
21651772Sjl139090 	 * Find non-relocatable span on source board.
21661772Sjl139090 	 */
21671772Sjl139090 	rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
21681772Sjl139090 	if (rv != KPHYSM_OK) {
21691772Sjl139090 		PR_MEM("%s: %s: unexpected kphysm_del_span_query"
21701772Sjl139090 		    " return value %d; basepfn 0x%lx, npages %ld\n",
21711772Sjl139090 		    f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
21721772Sjl139090 		    s_mp->sbm_npages);
21731772Sjl139090 		return (-1);
21741772Sjl139090 	}
21751772Sjl139090 
21761772Sjl139090 	ASSERT(s_mq.phys_pages != 0);
21771772Sjl139090 	ASSERT(s_mq.nonrelocatable != 0);
21781772Sjl139090 
21791772Sjl139090 	PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
21801772Sjl139090 	    s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
21811772Sjl139090 	    s_mq.last_nonrelocatable);
21821772Sjl139090 
21831772Sjl139090 	/* break down s_ml if it contains dynamic segments */
21841772Sjl139090 	b_ml = memlist_dup(s_ml);
21851772Sjl139090 
2186*11474SJonathan.Adams@Sun.COM 	for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->ml_next) {
2187*11474SJonathan.Adams@Sun.COM 		b_ml = memlist_del_span(b_ml, ml->ml_address, ml->ml_size);
2188*11474SJonathan.Adams@Sun.COM 		b_ml = memlist_cat_span(b_ml, ml->ml_address, ml->ml_size);
21891772Sjl139090 	}
21901772Sjl139090 
21911772Sjl139090 
21921772Sjl139090 	/*
21931772Sjl139090 	 * Make one pass through all memory units on all boards
21941772Sjl139090 	 * and categorize them with respect to the source board.
21951772Sjl139090 	 */
21961772Sjl139090 	for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
21971772Sjl139090 		/*
21981772Sjl139090 		 * The board structs are a contiguous array
21991772Sjl139090 		 * so we take advantage of that to find the
22001772Sjl139090 		 * correct board struct pointer for a given
22011772Sjl139090 		 * board number.
22021772Sjl139090 		 */
22031772Sjl139090 		t_bp = dr_lookup_board(t_bd);
22041772Sjl139090 
22051772Sjl139090 		/* source board can not be its own target */
22061772Sjl139090 		if (s_bp->b_num == t_bp->b_num)
22071772Sjl139090 			continue;
22081772Sjl139090 
22091772Sjl139090 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
22101772Sjl139090 
22111772Sjl139090 			t_mp = dr_get_mem_unit(t_bp, t_unit);
22121772Sjl139090 
22131772Sjl139090 			/* this memory node must be attached */
22141772Sjl139090 			if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
22151772Sjl139090 				continue;
22161772Sjl139090 
22171772Sjl139090 			/* source unit can not be its own target */
22181772Sjl139090 			if (s_mp == t_mp) {
22191772Sjl139090 				/* catch this is debug kernels */
22201772Sjl139090 				ASSERT(0);
22211772Sjl139090 				continue;
22221772Sjl139090 			}
22231772Sjl139090 
22241772Sjl139090 			/*
22251772Sjl139090 			 * this memory node must not already be reserved
22261772Sjl139090 			 * by some other memory delete operation.
22271772Sjl139090 			 */
22281772Sjl139090 			if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
22291772Sjl139090 				continue;
22301772Sjl139090 
22311772Sjl139090 			/* get target board memlist */
22321772Sjl139090 			t_ml = dr_get_memlist(t_mp);
22331772Sjl139090 			if (t_ml == NULL) {
22341772Sjl139090 				cmn_err(CE_WARN, "%s: no memlist for"
22351772Sjl139090 				    " mem-unit %d, board %d", f,
22361772Sjl139090 				    t_mp->sbm_cm.sbdev_bp->b_num,
22371772Sjl139090 				    t_mp->sbm_cm.sbdev_unum);
22381772Sjl139090 				continue;
22391772Sjl139090 			}
22401772Sjl139090 
22411772Sjl139090 			preference = dr_get_target_preference(hp, t_mp, s_mp,
22421772Sjl139090 			    t_ml, s_ml, b_ml);
22431772Sjl139090 
22445579Sjesusm 			memlist_delete(t_ml);
22455579Sjesusm 
22461772Sjl139090 			if (preference == DR_TP_INVALID)
22471772Sjl139090 				continue;
22481772Sjl139090 
22491772Sjl139090 			dr_smt_preference[preference]++;
22501772Sjl139090 
22511772Sjl139090 			/* calculate index to start of preference set */
22521772Sjl139090 			idx  = DR_SMT_NUNITS_PER_SET * preference;
22531772Sjl139090 			/* calculate offset to respective element */
22541772Sjl139090 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
22551772Sjl139090 
22561772Sjl139090 			ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
22571772Sjl139090 			sets[idx] = t_mp;
22581772Sjl139090 		}
22591772Sjl139090 	}
22601772Sjl139090 
22611772Sjl139090 	if (b_ml != NULL)
22621772Sjl139090 		memlist_delete(b_ml);
22631772Sjl139090 
22641772Sjl139090 	/*
22651772Sjl139090 	 * NOTE: this would be a good place to sort each candidate
22661772Sjl139090 	 * set in to some desired order, e.g. memory size in ascending
22671772Sjl139090 	 * order.  Without an additional sorting step here, the order
22681772Sjl139090 	 * within a set is ascending board number order.
22691772Sjl139090 	 */
22701772Sjl139090 
22711772Sjl139090 	c_mp = NULL;
22721772Sjl139090 	x_ml = NULL;
22731772Sjl139090 	t_ml = NULL;
22741772Sjl139090 	for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
22751772Sjl139090 		memquery_t mq;
22761772Sjl139090 
22771772Sjl139090 		preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
22781772Sjl139090 
22791772Sjl139090 		ASSERT(preference != DR_TP_INVALID);
22801772Sjl139090 
22811772Sjl139090 		/* cleanup t_ml after previous pass */
22821772Sjl139090 		if (t_ml != NULL) {
22831772Sjl139090 			memlist_delete(t_ml);
22841772Sjl139090 			t_ml = NULL;
22851772Sjl139090 		}
22861772Sjl139090 
22871772Sjl139090 		/* get candidate target board mem unit */
22881772Sjl139090 		t_mp = sets[idx];
22891772Sjl139090 		if (t_mp == NULL)
22901772Sjl139090 			continue;
22911772Sjl139090 
22921772Sjl139090 		/* get target board memlist */
22931772Sjl139090 		t_ml = dr_get_memlist(t_mp);
22941772Sjl139090 		if (t_ml == NULL) {
22951772Sjl139090 			cmn_err(CE_WARN, "%s: no memlist for"
22965579Sjesusm 			    " mem-unit %d, board %d",
22975579Sjesusm 			    f,
22985579Sjesusm 			    t_mp->sbm_cm.sbdev_bp->b_num,
22995579Sjesusm 			    t_mp->sbm_cm.sbdev_unum);
23001772Sjl139090 
23011772Sjl139090 			continue;
23021772Sjl139090 		}
23031772Sjl139090 
23041772Sjl139090 		PR_MEM("%s: checking for no-reloc in %s, "
23055579Sjesusm 		    " basepfn=0x%lx, npages=%ld\n",
23065579Sjesusm 		    f,
23075579Sjesusm 		    t_mp->sbm_cm.sbdev_path,
23085579Sjesusm 		    t_mp->sbm_basepfn,
23095579Sjesusm 		    t_mp->sbm_npages);
23101772Sjl139090 
23111772Sjl139090 		rv = dr_del_mlist_query(t_ml, &mq);
23121772Sjl139090 		if (rv != KPHYSM_OK) {
23131772Sjl139090 			PR_MEM("%s: kphysm_del_span_query:"
23145579Sjesusm 			    " unexpected return value %d\n", f, rv);
23151772Sjl139090 
23161772Sjl139090 			continue;
23171772Sjl139090 		}
23181772Sjl139090 
23191772Sjl139090 		if (mq.nonrelocatable != 0) {
23201772Sjl139090 			PR_MEM("%s: candidate %s has"
23215579Sjesusm 			    " nonrelocatable span [0x%lx..0x%lx]\n",
23225579Sjesusm 			    f,
23235579Sjesusm 			    t_mp->sbm_cm.sbdev_path,
23245579Sjesusm 			    mq.first_nonrelocatable,
23255579Sjesusm 			    mq.last_nonrelocatable);
23261772Sjl139090 
23271772Sjl139090 			continue;
23281772Sjl139090 		}
23291772Sjl139090 
23301772Sjl139090 #ifdef DEBUG
23311772Sjl139090 		/*
23321772Sjl139090 		 * This is a debug tool for excluding certain boards
23331772Sjl139090 		 * from being selected as a target board candidate.
23341772Sjl139090 		 * dr_ignore_board is only tested by this driver.
23351772Sjl139090 		 * It must be set with adb, obp, /etc/system or your
23361772Sjl139090 		 * favorite debugger.
23371772Sjl139090 		 */
23381772Sjl139090 		if (dr_ignore_board &
23395579Sjesusm 		    (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
23401772Sjl139090 			PR_MEM("%s: dr_ignore_board flag set,"
23415579Sjesusm 			    " ignoring %s as candidate\n",
23425579Sjesusm 			    f, t_mp->sbm_cm.sbdev_path);
23431772Sjl139090 			continue;
23441772Sjl139090 		}
23451772Sjl139090 #endif
23461772Sjl139090 
23471772Sjl139090 		/*
23481772Sjl139090 		 * Reserve excess source board memory, if any.
23491772Sjl139090 		 *
23501772Sjl139090 		 * Only the nonrelocatable source span will be copied
23511772Sjl139090 		 * so schedule the rest of the source mem to be deleted.
23521772Sjl139090 		 */
23531772Sjl139090 		switch (preference) {
23541772Sjl139090 		case DR_TP_NONRELOC:
23551772Sjl139090 			/*
23561772Sjl139090 			 * Get source copy memlist and use it to construct
23571772Sjl139090 			 * delete memlist.
23581772Sjl139090 			 */
23591772Sjl139090 			d_ml = memlist_dup(s_ml);
23601772Sjl139090 			x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
23611772Sjl139090 
23621772Sjl139090 			/* XXX */
23631772Sjl139090 			ASSERT(d_ml != NULL);
23641772Sjl139090 			ASSERT(x_ml != NULL);
23651772Sjl139090 
2366*11474SJonathan.Adams@Sun.COM 			for (ml = x_ml; ml != NULL; ml = ml->ml_next) {
2367*11474SJonathan.Adams@Sun.COM 				d_ml = memlist_del_span(d_ml, ml->ml_address,
2368*11474SJonathan.Adams@Sun.COM 				    ml->ml_size);
23691772Sjl139090 			}
23701772Sjl139090 
23711772Sjl139090 			PR_MEM("%s: %s: reserving src brd memlist:\n", f,
23721772Sjl139090 			    s_mp->sbm_cm.sbdev_path);
23731772Sjl139090 			PR_MEMLIST_DUMP(d_ml);
23741772Sjl139090 
23751772Sjl139090 			/* reserve excess spans */
23761772Sjl139090 			if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
23771772Sjl139090 			    d_ml) != 0) {
23781772Sjl139090 				/* likely more non-reloc pages appeared */
23791772Sjl139090 				/* TODO: restart from top? */
23801772Sjl139090 				continue;
23811772Sjl139090 			}
23821772Sjl139090 			break;
23831772Sjl139090 		default:
23841772Sjl139090 			d_ml = NULL;
23851772Sjl139090 			break;
23861772Sjl139090 		}
23871772Sjl139090 
23881772Sjl139090 		s_mp->sbm_flags |= DR_MFLAG_RESERVED;
23891772Sjl139090 
23901772Sjl139090 		/*
23911772Sjl139090 		 * reserve all memory on target board.
23921772Sjl139090 		 * NOTE: source board's memhandle is used.
23931772Sjl139090 		 *
23941772Sjl139090 		 * If this succeeds (eq 0), then target selection is
23951772Sjl139090 		 * complete and all unwanted memory spans, both source and
23961772Sjl139090 		 * target, have been reserved.  Loop is terminated.
23971772Sjl139090 		 */
23981772Sjl139090 		if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
23991772Sjl139090 			PR_MEM("%s: %s: target board memory reserved\n",
24005579Sjesusm 			    f, t_mp->sbm_cm.sbdev_path);
24011772Sjl139090 
24021772Sjl139090 			/* a candidate target board is now reserved */
24031772Sjl139090 			t_mp->sbm_flags |= DR_MFLAG_RESERVED;
24041772Sjl139090 			c_mp = t_mp;
24051772Sjl139090 
24061772Sjl139090 			/* *** EXITING LOOP *** */
24071772Sjl139090 			break;
24081772Sjl139090 		}
24091772Sjl139090 
24101772Sjl139090 		/* did not successfully reserve the target board. */
24111772Sjl139090 		PR_MEM("%s: could not reserve target %s\n",
24125579Sjesusm 		    f, t_mp->sbm_cm.sbdev_path);
24131772Sjl139090 
24141772Sjl139090 		/*
24151772Sjl139090 		 * NOTE: an undo of the dr_reserve_mem_span work
24161772Sjl139090 		 * will happen automatically when the memhandle
24171772Sjl139090 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
24181772Sjl139090 		 */
24191772Sjl139090 
24201772Sjl139090 		s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
24211772Sjl139090 	}
24221772Sjl139090 
24231772Sjl139090 	/* clean up after memlist editing logic */
24241772Sjl139090 	if (x_ml != NULL)
24251772Sjl139090 		memlist_delete(x_ml);
24261772Sjl139090 
24271772Sjl139090 	FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
24281772Sjl139090 	    DR_SMT_NPREF_SETS);
24291772Sjl139090 
24301772Sjl139090 	/*
24311772Sjl139090 	 * c_mp will be NULL when the entire sets[] array
24321772Sjl139090 	 * has been searched without reserving a target board.
24331772Sjl139090 	 */
24341772Sjl139090 	if (c_mp == NULL) {
24351772Sjl139090 		PR_MEM("%s: %s: target selection failed.\n",
24365579Sjesusm 		    f, s_mp->sbm_cm.sbdev_path);
24371772Sjl139090 
24381772Sjl139090 		if (t_ml != NULL)
24391772Sjl139090 			memlist_delete(t_ml);
24401772Sjl139090 
24411772Sjl139090 		return (-1);
24421772Sjl139090 	}
24431772Sjl139090 
24441772Sjl139090 	PR_MEM("%s: found target %s for source %s\n",
24455579Sjesusm 	    f,
24465579Sjesusm 	    c_mp->sbm_cm.sbdev_path,
24475579Sjesusm 	    s_mp->sbm_cm.sbdev_path);
24481772Sjl139090 
24491772Sjl139090 	s_mp->sbm_peer = c_mp;
24501772Sjl139090 	s_mp->sbm_flags |= DR_MFLAG_SOURCE;
24511772Sjl139090 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
24521772Sjl139090 	s_mp->sbm_mlist = s_ml;
24531772Sjl139090 	s_mp->sbm_cm.sbdev_busy = 1;
24541772Sjl139090 
24551772Sjl139090 	c_mp->sbm_peer = s_mp;
24561772Sjl139090 	c_mp->sbm_flags |= DR_MFLAG_TARGET;
24571772Sjl139090 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
24581772Sjl139090 	c_mp->sbm_mlist = t_ml;
24591772Sjl139090 	c_mp->sbm_cm.sbdev_busy = 1;
24601772Sjl139090 
24611772Sjl139090 	return (0);
24621772Sjl139090 }
24631772Sjl139090 
24641772Sjl139090 /*
24651772Sjl139090  * Returns target preference rank:
24661772Sjl139090  *     -1 not a valid copy-rename target board
24671772Sjl139090  *	0 copy all source, source/target same size
24681772Sjl139090  *	1 copy all source, larger target
24691772Sjl139090  * 	2 copy nonrelocatable source span
24701772Sjl139090  */
24711772Sjl139090 static dr_target_pref_t
dr_get_target_preference(dr_handle_t * hp,dr_mem_unit_t * t_mp,dr_mem_unit_t * s_mp,struct memlist * t_ml,struct memlist * s_ml,struct memlist * b_ml)24721772Sjl139090 dr_get_target_preference(dr_handle_t *hp,
24731772Sjl139090     dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
24741772Sjl139090     struct memlist *t_ml, struct memlist *s_ml,
24751772Sjl139090     struct memlist *b_ml)
24761772Sjl139090 {
24771772Sjl139090 	dr_target_pref_t preference;
24781772Sjl139090 	struct memlist *s_nonreloc_ml = NULL;
24791772Sjl139090 	drmachid_t t_id;
24801772Sjl139090 	static fn_t	f = "dr_get_target_preference";
24811772Sjl139090 
24821772Sjl139090 	t_id = t_mp->sbm_cm.sbdev_bp->b_id;
24831772Sjl139090 
24841772Sjl139090 	/*
24851772Sjl139090 	 * Can the entire source board be copied?
24861772Sjl139090 	 */
24871772Sjl139090 	if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
24881772Sjl139090 		if (s_mp->sbm_npages == t_mp->sbm_npages)
24891772Sjl139090 			preference = DR_TP_SAME;	/* same size */
24901772Sjl139090 		else
24911772Sjl139090 			preference = DR_TP_LARGE;	/* larger target */
24921772Sjl139090 	} else {
24931772Sjl139090 		/*
24941772Sjl139090 		 * Entire source won't fit so try non-relocatable memory only
24951772Sjl139090 		 * (target aligned).
24961772Sjl139090 		 */
24971772Sjl139090 		s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
24981772Sjl139090 		if (s_nonreloc_ml == NULL) {
24991772Sjl139090 			PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
25001772Sjl139090 			preference = DR_TP_INVALID;
25011772Sjl139090 		}
25021772Sjl139090 		if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
25031772Sjl139090 			preference = DR_TP_NONRELOC;
25041772Sjl139090 		else
25051772Sjl139090 			preference = DR_TP_INVALID;
25061772Sjl139090 	}
25071772Sjl139090 
25081772Sjl139090 	if (s_nonreloc_ml != NULL)
25091772Sjl139090 		memlist_delete(s_nonreloc_ml);
25101772Sjl139090 
25111772Sjl139090 	/*
25121772Sjl139090 	 * Force floating board preference lower than all other boards
25131772Sjl139090 	 * if the force flag is present; otherwise disallow the board.
25141772Sjl139090 	 */
25151772Sjl139090 	if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
25161772Sjl139090 		if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
25171772Sjl139090 			preference += DR_TP_FLOATING;
25181772Sjl139090 		else
25191772Sjl139090 			preference = DR_TP_INVALID;
25201772Sjl139090 	}
25211772Sjl139090 
25221772Sjl139090 	PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
25231772Sjl139090 	    preference);
25241772Sjl139090 
25251772Sjl139090 	return (preference);
25261772Sjl139090 }
25271772Sjl139090 
25281772Sjl139090 /*
25291772Sjl139090  * Create a memlist representing the source memory that will be copied to
25301772Sjl139090  * the target board.  The memory to be copied is the maximum amount that
25311772Sjl139090  * will fit on the target board.
25321772Sjl139090  */
25331772Sjl139090 static struct memlist *
dr_get_copy_mlist(struct memlist * s_mlist,struct memlist * t_mlist,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)25341772Sjl139090 dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
25351772Sjl139090     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
25361772Sjl139090 {
25371772Sjl139090 	struct memlist	*t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
25381772Sjl139090 	uint64_t	s_slice_mask, s_slice_base;
25391772Sjl139090 	uint64_t	t_slice_mask, t_slice_base;
25401772Sjl139090 	static fn_t	f = "dr_get_copy_mlist";
25411772Sjl139090 
25421772Sjl139090 	ASSERT(s_mlist != NULL);
25431772Sjl139090 	ASSERT(t_mlist != NULL);
25441772Sjl139090 	ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
25451772Sjl139090 
25461772Sjl139090 	s_slice_mask = s_mp->sbm_slice_size - 1;
2547*11474SJonathan.Adams@Sun.COM 	s_slice_base = s_mlist->ml_address & ~s_slice_mask;
25481772Sjl139090 
25491772Sjl139090 	t_slice_mask = t_mp->sbm_slice_size - 1;
2550*11474SJonathan.Adams@Sun.COM 	t_slice_base = t_mlist->ml_address & ~t_slice_mask;
25511772Sjl139090 
25521772Sjl139090 	t_ml = memlist_dup(t_mlist);
25531772Sjl139090 	s_del_ml = memlist_dup(s_mlist);
25541772Sjl139090 	s_copy_ml = memlist_dup(s_mlist);
25551772Sjl139090 
25561772Sjl139090 	/* XXX */
25571772Sjl139090 	ASSERT(t_ml != NULL);
25581772Sjl139090 	ASSERT(s_del_ml != NULL);
25591772Sjl139090 	ASSERT(s_copy_ml != NULL);
25601772Sjl139090 
25611772Sjl139090 	/*
25621772Sjl139090 	 * To construct the source copy memlist:
25631772Sjl139090 	 *
25641772Sjl139090 	 * The target memlist is converted to the post-rename
25651772Sjl139090 	 * source addresses.  This is the physical address range
25661772Sjl139090 	 * the target will have after the copy-rename.  Overlaying
25671772Sjl139090 	 * and deleting this from the current source memlist will
25681772Sjl139090 	 * give the source delete memlist.  The copy memlist is
25691772Sjl139090 	 * the reciprocal of the source delete memlist.
25701772Sjl139090 	 */
2571*11474SJonathan.Adams@Sun.COM 	for (ml = t_ml; ml != NULL; ml = ml->ml_next) {
25721772Sjl139090 		/*
25731772Sjl139090 		 * Normalize relative to target slice base PA
25741772Sjl139090 		 * in order to preseve slice offsets.
25751772Sjl139090 		 */
2576*11474SJonathan.Adams@Sun.COM 		ml->ml_address -= t_slice_base;
25771772Sjl139090 		/*
25781772Sjl139090 		 * Convert to source slice PA address.
25791772Sjl139090 		 */
2580*11474SJonathan.Adams@Sun.COM 		ml->ml_address += s_slice_base;
25811772Sjl139090 	}
25821772Sjl139090 
2583*11474SJonathan.Adams@Sun.COM 	for (ml = t_ml; ml != NULL; ml = ml->ml_next) {
2584*11474SJonathan.Adams@Sun.COM 		s_del_ml = memlist_del_span(s_del_ml,
2585*11474SJonathan.Adams@Sun.COM 		    ml->ml_address, ml->ml_size);
25861772Sjl139090 	}
25871772Sjl139090 
25881772Sjl139090 	/*
25891772Sjl139090 	 * Expand the delete mlist to fully include any dynamic segments
25901772Sjl139090 	 * it intersects with.
25911772Sjl139090 	 */
2592*11474SJonathan.Adams@Sun.COM 	for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->ml_next) {
2593*11474SJonathan.Adams@Sun.COM 		uint64_t del_base = ml->ml_address;
2594*11474SJonathan.Adams@Sun.COM 		uint64_t del_end = ml->ml_address + ml->ml_size;
25951772Sjl139090 		struct memlist *dyn;
25961772Sjl139090 
2597*11474SJonathan.Adams@Sun.COM 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL;
2598*11474SJonathan.Adams@Sun.COM 		    dyn = dyn->ml_next) {
2599*11474SJonathan.Adams@Sun.COM 			uint64_t dyn_base = dyn->ml_address;
2600*11474SJonathan.Adams@Sun.COM 			uint64_t dyn_end = dyn->ml_address + dyn->ml_size;
26011772Sjl139090 
26021772Sjl139090 			if (del_base > dyn_base && del_base < dyn_end)
26031772Sjl139090 				del_base = dyn_base;
26041772Sjl139090 
26051772Sjl139090 			if (del_end > dyn_base && del_end < dyn_end)
26061772Sjl139090 				del_end = dyn_end;
26071772Sjl139090 		}
26081772Sjl139090 
26091772Sjl139090 		x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
26101772Sjl139090 	}
26111772Sjl139090 
26121772Sjl139090 	memlist_delete(s_del_ml);
26131772Sjl139090 	s_del_ml = x_ml;
26141772Sjl139090 
2615*11474SJonathan.Adams@Sun.COM 	for (ml = s_del_ml; ml != NULL; ml = ml->ml_next) {
2616*11474SJonathan.Adams@Sun.COM 		s_copy_ml = memlist_del_span(s_copy_ml,
2617*11474SJonathan.Adams@Sun.COM 		    ml->ml_address, ml->ml_size);
26181772Sjl139090 	}
26191772Sjl139090 
26201772Sjl139090 	PR_MEM("%s: source delete mlist\n", f);
26211772Sjl139090 	PR_MEMLIST_DUMP(s_del_ml);
26221772Sjl139090 
26231772Sjl139090 	PR_MEM("%s: source copy mlist\n", f);
26241772Sjl139090 	PR_MEMLIST_DUMP(s_copy_ml);
26251772Sjl139090 
26261772Sjl139090 	memlist_delete(t_ml);
26271772Sjl139090 	memlist_delete(s_del_ml);
26281772Sjl139090 
26291772Sjl139090 	return (s_copy_ml);
26301772Sjl139090 }
26311772Sjl139090 
26321772Sjl139090 /*
26331772Sjl139090  * Scan the non-relocatable spans on the source memory
26341772Sjl139090  * and construct a minimum mlist that includes all non-reloc
26351772Sjl139090  * memory subject to target alignment, and dynamic segment
26361772Sjl139090  * constraints where only whole dynamic segments may be deleted.
26371772Sjl139090  */
26381772Sjl139090 static struct memlist *
dr_get_nonreloc_mlist(struct memlist * s_ml,dr_mem_unit_t * s_mp)26391772Sjl139090 dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
26401772Sjl139090 {
26411772Sjl139090 	struct memlist	*x_ml = NULL;
26421772Sjl139090 	struct memlist	*ml;
26431772Sjl139090 	static fn_t	f = "dr_get_nonreloc_mlist";
26441772Sjl139090 
26451772Sjl139090 	PR_MEM("%s: checking for split of dyn seg list:\n", f);
26461772Sjl139090 	PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
26471772Sjl139090 
2648*11474SJonathan.Adams@Sun.COM 	for (ml = s_ml; ml; ml = ml->ml_next) {
26491772Sjl139090 		int rv;
26501772Sjl139090 		uint64_t nr_base, nr_end;
26511772Sjl139090 		memquery_t mq;
26521772Sjl139090 		struct memlist *dyn;
26531772Sjl139090 
26541772Sjl139090 		rv = kphysm_del_span_query(
2655*11474SJonathan.Adams@Sun.COM 		    _b64top(ml->ml_address), _b64top(ml->ml_size), &mq);
26561772Sjl139090 		if (rv) {
26571772Sjl139090 			memlist_delete(x_ml);
26581772Sjl139090 			return (NULL);
26591772Sjl139090 		}
26601772Sjl139090 
26611772Sjl139090 		if (mq.nonrelocatable == 0)
26621772Sjl139090 			continue;
26631772Sjl139090 
26641772Sjl139090 		PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
26655579Sjesusm 		    _ptob64(mq.first_nonrelocatable),
26665579Sjesusm 		    _ptob64(mq.last_nonrelocatable),
26675579Sjesusm 		    mq.first_nonrelocatable,
26685579Sjesusm 		    mq.last_nonrelocatable);
26691772Sjl139090 
26701772Sjl139090 		/*
26711772Sjl139090 		 * Align the span at both ends to allow for possible
26721772Sjl139090 		 * cage expansion.
26731772Sjl139090 		 */
26741772Sjl139090 		nr_base = _ptob64(mq.first_nonrelocatable);
26751772Sjl139090 		nr_end = _ptob64(mq.last_nonrelocatable + 1);
26761772Sjl139090 
26771772Sjl139090 		PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
26785579Sjesusm 		    f, nr_base, nr_end);
26791772Sjl139090 
26801772Sjl139090 		/*
26811772Sjl139090 		 * Expand the non-reloc span to fully include any
26821772Sjl139090 		 * dynamic segments it intersects with.
26831772Sjl139090 		 */
2684*11474SJonathan.Adams@Sun.COM 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL;
2685*11474SJonathan.Adams@Sun.COM 		    dyn = dyn->ml_next) {
2686*11474SJonathan.Adams@Sun.COM 			uint64_t dyn_base = dyn->ml_address;
2687*11474SJonathan.Adams@Sun.COM 			uint64_t dyn_end = dyn->ml_address + dyn->ml_size;
26881772Sjl139090 
26891772Sjl139090 			if (nr_base > dyn_base && nr_base < dyn_end)
26901772Sjl139090 				nr_base = dyn_base;
26911772Sjl139090 
26921772Sjl139090 			if (nr_end > dyn_base && nr_end < dyn_end)
26931772Sjl139090 				nr_end = dyn_end;
26941772Sjl139090 		}
26951772Sjl139090 
26961772Sjl139090 		x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
26971772Sjl139090 	}
26981772Sjl139090 
26991772Sjl139090 	if (x_ml == NULL) {
27001772Sjl139090 		PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
27011772Sjl139090 		return (NULL);
27021772Sjl139090 	}
27031772Sjl139090 
27041772Sjl139090 	PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
27051772Sjl139090 	PR_MEMLIST_DUMP(x_ml);
27061772Sjl139090 
27071772Sjl139090 	return (x_ml);
27081772Sjl139090 }
27091772Sjl139090 
27101772Sjl139090 /*
27111772Sjl139090  * Check if source memlist can fit in target memlist while maintaining
27121772Sjl139090  * relative offsets within board.
27131772Sjl139090  */
27141772Sjl139090 static int
dr_memlist_canfit(struct memlist * s_mlist,struct memlist * t_mlist,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)27151772Sjl139090 dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
27161772Sjl139090     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
27171772Sjl139090 {
27181772Sjl139090 	int		canfit = 0;
27191772Sjl139090 	struct memlist	*s_ml, *t_ml, *ml;
27201772Sjl139090 	uint64_t	s_slice_mask, t_slice_mask;
27211772Sjl139090 	static fn_t	f = "dr_mlist_canfit";
27221772Sjl139090 
27231772Sjl139090 	s_ml = memlist_dup(s_mlist);
27241772Sjl139090 	t_ml = memlist_dup(t_mlist);
27251772Sjl139090 
27261772Sjl139090 	if (s_ml == NULL || t_ml == NULL) {
27271772Sjl139090 		cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
27281772Sjl139090 		goto done;
27291772Sjl139090 	}
27301772Sjl139090 
27311772Sjl139090 	s_slice_mask = s_mp->sbm_slice_size - 1;
27321772Sjl139090 	t_slice_mask = t_mp->sbm_slice_size - 1;
27331772Sjl139090 
27341772Sjl139090 	/*
27351772Sjl139090 	 * Normalize to slice relative offsets.
27361772Sjl139090 	 */
2737*11474SJonathan.Adams@Sun.COM 	for (ml = s_ml; ml; ml = ml->ml_next)
2738*11474SJonathan.Adams@Sun.COM 		ml->ml_address &= s_slice_mask;
2739*11474SJonathan.Adams@Sun.COM 
2740*11474SJonathan.Adams@Sun.COM 	for (ml = t_ml; ml; ml = ml->ml_next)
2741*11474SJonathan.Adams@Sun.COM 		ml->ml_address &= t_slice_mask;
27421772Sjl139090 
27431772Sjl139090 	canfit = memlist_canfit(s_ml, t_ml);
27441772Sjl139090 done:
27451772Sjl139090 	memlist_delete(s_ml);
27461772Sjl139090 	memlist_delete(t_ml);
27471772Sjl139090 
27481772Sjl139090 	return (canfit);
27491772Sjl139090 }
27501772Sjl139090 
27511772Sjl139090 /*
27521772Sjl139090  * Memlist support.
27531772Sjl139090  */
27541772Sjl139090 
27551772Sjl139090 /*
27561772Sjl139090  * Determine whether the source memlist (s_mlist) will
27571772Sjl139090  * fit into the target memlist (t_mlist) in terms of
27581772Sjl139090  * size and holes.  Assumes the caller has normalized the
27591772Sjl139090  * memlist physical addresses for comparison.
27601772Sjl139090  */
27611772Sjl139090 static int
memlist_canfit(struct memlist * s_mlist,struct memlist * t_mlist)27621772Sjl139090 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
27631772Sjl139090 {
27641772Sjl139090 	int		rv = 0;
27651772Sjl139090 	struct memlist	*s_ml, *t_ml;
27661772Sjl139090 
27671772Sjl139090 	if ((s_mlist == NULL) || (t_mlist == NULL))
27681772Sjl139090 		return (0);
27691772Sjl139090 
27701772Sjl139090 	s_ml = s_mlist;
2771*11474SJonathan.Adams@Sun.COM 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->ml_next) {
27721772Sjl139090 		uint64_t	s_start, s_end;
27731772Sjl139090 		uint64_t	t_start, t_end;
27741772Sjl139090 
2775*11474SJonathan.Adams@Sun.COM 		t_start = t_ml->ml_address;
2776*11474SJonathan.Adams@Sun.COM 		t_end = t_start + t_ml->ml_size;
2777*11474SJonathan.Adams@Sun.COM 
2778*11474SJonathan.Adams@Sun.COM 		for (; s_ml; s_ml = s_ml->ml_next) {
2779*11474SJonathan.Adams@Sun.COM 			s_start = s_ml->ml_address;
2780*11474SJonathan.Adams@Sun.COM 			s_end = s_start + s_ml->ml_size;
27811772Sjl139090 
27821772Sjl139090 			if ((s_start < t_start) || (s_end > t_end))
27831772Sjl139090 				break;
27841772Sjl139090 		}
27851772Sjl139090 	}
27861772Sjl139090 
27871772Sjl139090 	/*
27881772Sjl139090 	 * If we ran out of source memlist chunks that mean
27891772Sjl139090 	 * we found a home for all of them.
27901772Sjl139090 	 */
27911772Sjl139090 	if (s_ml == NULL)
27921772Sjl139090 		rv = 1;
27931772Sjl139090 
27941772Sjl139090 	return (rv);
27951772Sjl139090 }
2796