xref: /illumos-gate/usr/src/lib/libumem/amd64/umem_genasm.c (revision b1e2e3fb17324e9ddf43db264a0c64da7756d9e6)
14f364e7cSRobert Mustacchi /*
24f364e7cSRobert Mustacchi  * CDDL HEADER START
34f364e7cSRobert Mustacchi  *
44f364e7cSRobert Mustacchi  * The contents of this file are subject to the terms of the
54f364e7cSRobert Mustacchi  * Common Development and Distribution License (the "License").
64f364e7cSRobert Mustacchi  * You may not use this file except in compliance with the License.
74f364e7cSRobert Mustacchi  *
84f364e7cSRobert Mustacchi  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
94f364e7cSRobert Mustacchi  * or http://www.opensolaris.org/os/licensing.
104f364e7cSRobert Mustacchi  * See the License for the specific language governing permissions
114f364e7cSRobert Mustacchi  * and limitations under the License.
124f364e7cSRobert Mustacchi  *
134f364e7cSRobert Mustacchi  * When distributing Covered Code, include this CDDL HEADER in each
144f364e7cSRobert Mustacchi  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
154f364e7cSRobert Mustacchi  * If applicable, add the following below this CDDL HEADER, with the
164f364e7cSRobert Mustacchi  * fields enclosed by brackets "[]" replaced with your own identifying
174f364e7cSRobert Mustacchi  * information: Portions Copyright [yyyy] [name of copyright owner]
184f364e7cSRobert Mustacchi  *
194f364e7cSRobert Mustacchi  * CDDL HEADER END
204f364e7cSRobert Mustacchi  */
214f364e7cSRobert Mustacchi /*
22*b1e2e3fbSRobert Mustacchi  * Copyright (c) 2019 Joyent, Inc.  All rights reserved.
234f364e7cSRobert Mustacchi  */
244f364e7cSRobert Mustacchi 
254f364e7cSRobert Mustacchi /*
264f364e7cSRobert Mustacchi  * Don't Panic! If you find the blocks of assembly that follow confusing and
274f364e7cSRobert Mustacchi  * you're questioning why they exist, please go read section 8 of the umem.c big
284f364e7cSRobert Mustacchi  * theory statement. Next familiarize yourself with the malloc and free
294f364e7cSRobert Mustacchi  * implementations in libumem's malloc.c.
304f364e7cSRobert Mustacchi  *
314f364e7cSRobert Mustacchi  * What follows is the amd64 implementation of the thread caching automatic
324f364e7cSRobert Mustacchi  * assembly generation. The amd64 calling conventions are documented in the
334f364e7cSRobert Mustacchi  * 64-bit System V ABI. For our purposes what matters is that our first argument
344f364e7cSRobert Mustacchi  * will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We
354f364e7cSRobert Mustacchi  * are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11.
364f364e7cSRobert Mustacchi  *
374f364e7cSRobert Mustacchi  * For both our implementation of malloc and free we only use the registers we
384f364e7cSRobert Mustacchi  * don't have to preserve.
394f364e7cSRobert Mustacchi  *
404f364e7cSRobert Mustacchi  * Malloc register usage:
414f364e7cSRobert Mustacchi  * 	o. rdi: Original size to malloc. This never changes and is preserved.
424f364e7cSRobert Mustacchi  * 	o. rsi: Adjusted malloc size for malloc_data_tag(s).
434f364e7cSRobert Mustacchi  * 	o. rcx: Pointer to the tmem_t in the ulwp_t.
444f364e7cSRobert Mustacchi  * 	o. rdx: Pointer to the tmem_t array of roots
454f364e7cSRobert Mustacchi  * 	o. r8:  Size of the cache
464f364e7cSRobert Mustacchi  * 	o. r9:  Scratch register
474f364e7cSRobert Mustacchi  *
484f364e7cSRobert Mustacchi  * Free register usage:
494f364e7cSRobert Mustacchi  *	o. rdi: Original buffer to free. This never changes and is preserved.
504f364e7cSRobert Mustacchi  *	o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s).
514f364e7cSRobert Mustacchi  * 	o. rcx: Pointer to the tmem_t in the ulwp_t.
524f364e7cSRobert Mustacchi  * 	o. rdx: Pointer to the tmem_t array of roots
534f364e7cSRobert Mustacchi  * 	o. r8:  Size of the cache
544f364e7cSRobert Mustacchi  * 	o. r9:  Scratch register
554f364e7cSRobert Mustacchi  *
564f364e7cSRobert Mustacchi  * Once we determine what cache we are using, we increment %rdx to the
574f364e7cSRobert Mustacchi  * appropriate offset and set %r8 with the size of the cache. This means that
584f364e7cSRobert Mustacchi  * when we break out to the normal buffer allocation point %rdx contains the
594f364e7cSRobert Mustacchi  * head of the linked list and %r8 is the amount that we have to adjust the
604f364e7cSRobert Mustacchi  * thread's cached amount by.
614f364e7cSRobert Mustacchi  *
624f364e7cSRobert Mustacchi  * Each block of assembly has psuedocode that describes its purpose.
634f364e7cSRobert Mustacchi  */
644f364e7cSRobert Mustacchi 
65*b1e2e3fbSRobert Mustacchi /*
66*b1e2e3fbSRobert Mustacchi  * umem_base must be first.
67*b1e2e3fbSRobert Mustacchi  */
68*b1e2e3fbSRobert Mustacchi #include "umem_base.h"
69*b1e2e3fbSRobert Mustacchi 
704f364e7cSRobert Mustacchi #include <inttypes.h>
714f364e7cSRobert Mustacchi #include <strings.h>
724f364e7cSRobert Mustacchi #include <umem_impl.h>
73*b1e2e3fbSRobert Mustacchi #include <atomic.h>
74*b1e2e3fbSRobert Mustacchi #include <sys/mman.h>
75*b1e2e3fbSRobert Mustacchi #include <errno.h>
76*b1e2e3fbSRobert Mustacchi 
774f364e7cSRobert Mustacchi 
784f364e7cSRobert Mustacchi #include <stdio.h>
794f364e7cSRobert Mustacchi 
804f364e7cSRobert Mustacchi const int umem_genasm_supported = 1;
814f364e7cSRobert Mustacchi static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
824f364e7cSRobert Mustacchi static size_t umem_genasm_msize = 576;
834f364e7cSRobert Mustacchi static uintptr_t umem_genasm_fptr = (uintptr_t)&_free;
844f364e7cSRobert Mustacchi static size_t umem_genasm_fsize = 576;
854f364e7cSRobert Mustacchi static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc;
864f364e7cSRobert Mustacchi static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free;
874f364e7cSRobert Mustacchi 
884f364e7cSRobert Mustacchi #define	UMEM_GENASM_MAX64	(UINT32_MAX / sizeof (uintptr_t))
894f364e7cSRobert Mustacchi #define	PTC_JMPADDR(dest, src)	(dest - (src + 4))
904f364e7cSRobert Mustacchi #define	PTC_ROOT_SIZE	sizeof (uintptr_t)
914f364e7cSRobert Mustacchi #define	MULTINOP	0x0000441f0f
924f364e7cSRobert Mustacchi 
934f364e7cSRobert Mustacchi /*
944f364e7cSRobert Mustacchi  * void *ptcmalloc(size_t orig_size);
954f364e7cSRobert Mustacchi  *
964f364e7cSRobert Mustacchi  * size_t size = orig_size + 8;
974f364e7cSRobert Mustacchi  * if (size > UMEM_SECOND_ALIGN)
984f364e7cSRobert Mustacchi  * 	size += 8;
994f364e7cSRobert Mustacchi  *
1004f364e7cSRobert Mustacchi  * if (size < orig_size)
1014f364e7cSRobert Mustacchi  * 	goto tomalloc;		! This is overflow
1024f364e7cSRobert Mustacchi  *
1034f364e7cSRobert Mustacchi  * if (size > cache_max)
1044f364e7cSRobert Mustacchi  * 	goto tomalloc
1054f364e7cSRobert Mustacchi  *
1064f364e7cSRobert Mustacchi  * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
1074f364e7cSRobert Mustacchi  * void **roots = t->tm_roots;
1084f364e7cSRobert Mustacchi  */
1094f364e7cSRobert Mustacchi #define	PTC_MALINIT_JOUT	0x13
1104f364e7cSRobert Mustacchi #define	PTC_MALINIT_MCS	0x1a
1114f364e7cSRobert Mustacchi #define	PTC_MALINIT_JOV	0x20
1124f364e7cSRobert Mustacchi #define	PTC_MALINIT_SOFF	0x30
1134f364e7cSRobert Mustacchi static const uint8_t malinit[] =  {
1144f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x77, 0x08,		/* leaq 0x8(%rdi),%rsi */
1154f364e7cSRobert Mustacchi 	0x48, 0x83, 0xfe, 0x10,		/* cmpq $0x10, %rsi */
1164f364e7cSRobert Mustacchi 	0x76, 0x04,			/* jbe +0x4 */
1174f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x77, 0x10,		/* leaq 0x10(%rdi),%rsi */
1184f364e7cSRobert Mustacchi 	0x48, 0x39, 0xfe,		/* cmpq %rdi,%rsi */
1194f364e7cSRobert Mustacchi 	0x0f, 0x82, 0x00, 0x00, 0x00, 0x00,	/* jb +errout */
1204f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
1214f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
1224f364e7cSRobert Mustacchi 	0x0f, 0x87, 0x00, 0x00, 0x00, 0x00,	/* ja +errout */
1234f364e7cSRobert Mustacchi 	0x64, 0x48, 0x8b, 0x0c, 0x25,
1244f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq %fs:0x0,%rcx */
1254f364e7cSRobert Mustacchi 	0x48, 0x81, 0xc1,
1264f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* addq $SOFF, %rcx */
1274f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x51, 0x08,		/* leaq 0x8(%rcx),%rdx */
1284f364e7cSRobert Mustacchi };
1294f364e7cSRobert Mustacchi 
1304f364e7cSRobert Mustacchi /*
1314f364e7cSRobert Mustacchi  * void ptcfree(void *buf);
1324f364e7cSRobert Mustacchi  *
1334f364e7cSRobert Mustacchi  * if (buf == NULL)
1344f364e7cSRobert Mustacchi  * 	return;
1354f364e7cSRobert Mustacchi  *
1364f364e7cSRobert Mustacchi  * malloc_data_t *tag = buf;
1374f364e7cSRobert Mustacchi  * tag--;
1384f364e7cSRobert Mustacchi  * int size = tag->malloc_size;
1394f364e7cSRobert Mustacchi  * int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size);
1404f364e7cSRobert Mustacchi  * if (tagval == MALLOC_SECOND_MAGIC) {
1414f364e7cSRobert Mustacchi  * 	tag--;
1424f364e7cSRobert Mustacchi  * } else if (tagval != MALLOC_MAGIC) {
1434f364e7cSRobert Mustacchi  * 	goto tofree;
1444f364e7cSRobert Mustacchi  * }
1454f364e7cSRobert Mustacchi  *
1464f364e7cSRobert Mustacchi  * if (size > cache_max)
1474f364e7cSRobert Mustacchi  * 	goto tofree;
1484f364e7cSRobert Mustacchi  *
1494f364e7cSRobert Mustacchi  * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
1504f364e7cSRobert Mustacchi  * void **roots = t->tm_roots;
1514f364e7cSRobert Mustacchi  */
1524f364e7cSRobert Mustacchi #define	PTC_FRINI_JDONE	0x05
1534f364e7cSRobert Mustacchi #define	PTC_FRINI_JFREE	0x25
1544f364e7cSRobert Mustacchi #define	PTC_FRINI_MCS	0x30
1554f364e7cSRobert Mustacchi #define	PTC_FRINI_JOV	0x36
1564f364e7cSRobert Mustacchi #define	PTC_FRINI_SOFF	0x46
1574f364e7cSRobert Mustacchi static const uint8_t freeinit[] = {
1584f364e7cSRobert Mustacchi 	0x48, 0x85, 0xff,		/* testq %rdi,%rdi */
1594f364e7cSRobert Mustacchi 	0x0f, 0x84, 0x00, 0x00, 0x00, 0x00,	/* jmp $JDONE (done) */
1604f364e7cSRobert Mustacchi 	0x8b, 0x77, 0xf8,		/* movl -0x8(%rdi),%esi */
1614f364e7cSRobert Mustacchi 	0x8b, 0x47, 0xfc,		/* movl -0x4(%rdi),%eax */
1624f364e7cSRobert Mustacchi 	0x01, 0xf0,			/* addl %esi,%eax */
1634f364e7cSRobert Mustacchi 	0x3d, 0x00, 0x70, 0xba, 0x16,	/* cmpl $MALLOC_2_MAGIC, %eax */
1644f364e7cSRobert Mustacchi 	0x75, 0x06,			/* jne +0x6 (checkover) */
1654f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x47, 0xf0,		/* leaq -0x10(%rdi),%eax */
1664f364e7cSRobert Mustacchi 	0xeb, 0x0f,			/* jmp +0xf (freebuf) */
1674f364e7cSRobert Mustacchi 	0x3d, 0x00, 0xc0, 0x10, 0x3a,	/* cmpl $MALLOC_MAGIC, %eax */
1684f364e7cSRobert Mustacchi 	0x0f, 0x85, 0x00, 0x00, 0x00, 0x00,	/* jmp +JFREE (goto torfree) */
1694f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x47, 0xf8,		/* leaq -0x8(%rdi),%rax */
1704f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
1714f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
1724f364e7cSRobert Mustacchi 	0x0f, 0x87, 0x00, 0x00, 0x00, 0x00,	/* ja +errout */
1734f364e7cSRobert Mustacchi 	0x64, 0x48, 0x8b, 0x0c, 0x25,
1744f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq %fs:0x0,%rcx */
1754f364e7cSRobert Mustacchi 	0x48, 0x81, 0xc1,
1764f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* addq $SOFF, %rcx */
1774f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x51, 0x08,		/* leaq 0x8(%rcx),%rdx */
1784f364e7cSRobert Mustacchi };
1794f364e7cSRobert Mustacchi 
1804f364e7cSRobert Mustacchi /*
1814f364e7cSRobert Mustacchi  * if (size <= $CACHE_SIZE) {
1824f364e7cSRobert Mustacchi  *	csize = $CACHE_SIZE;
1834f364e7cSRobert Mustacchi  * } else ...				! goto next cache
1844f364e7cSRobert Mustacchi  */
1854f364e7cSRobert Mustacchi #define	PTC_INICACHE_CMP	0x03
1864f364e7cSRobert Mustacchi #define	PTC_INICACHE_SIZE	0x0c
1874f364e7cSRobert Mustacchi #define	PTC_INICACHE_JMP	0x11
1884f364e7cSRobert Mustacchi static const uint8_t inicache[] = {
1894f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
1904f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
1914f364e7cSRobert Mustacchi 	0x77, 0x0c,			/* ja +0xc (next cache) */
1924f364e7cSRobert Mustacchi 	0x49, 0xc7, 0xc0,
1934f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq sizeof ($CACHE), %r8 */
1944f364e7cSRobert Mustacchi 	0xe9, 0x00, 0x00, 0x00, 0x00,	/* jmp $JMP (allocbuf) */
1954f364e7cSRobert Mustacchi };
1964f364e7cSRobert Mustacchi 
1974f364e7cSRobert Mustacchi /*
1984f364e7cSRobert Mustacchi  * if (size <= $CACHE_SIZE) {
1994f364e7cSRobert Mustacchi  *	csize = $CACHE_SIZE;
2004f364e7cSRobert Mustacchi  *	roots += $CACHE_NUM;
2014f364e7cSRobert Mustacchi  * } else ...				! goto next cache
2024f364e7cSRobert Mustacchi  */
2034f364e7cSRobert Mustacchi #define	PTC_GENCACHE_CMP	0x03
2044f364e7cSRobert Mustacchi #define	PTC_GENCACHE_SIZE	0x0c
2054f364e7cSRobert Mustacchi #define	PTC_GENCACHE_NUM	0x13
2064f364e7cSRobert Mustacchi #define	PTC_GENCACHE_JMP	0x18
2074f364e7cSRobert Mustacchi static const uint8_t gencache[] = {
2084f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
2094f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
2104f364e7cSRobert Mustacchi 	0x77, 0x14,			/* ja +0xc (next cache) */
2114f364e7cSRobert Mustacchi 	0x49, 0xc7, 0xc0,
2124f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq sizeof ($CACHE), %r8 */
2134f364e7cSRobert Mustacchi 	0x48, 0x81, 0xc2,
2144f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* addq $8*ii, %rdx */
2154f364e7cSRobert Mustacchi 	0xe9, 0x00, 0x00, 0x00, 0x00	/* jmp +$JMP (allocbuf ) */
2164f364e7cSRobert Mustacchi };
2174f364e7cSRobert Mustacchi 
2184f364e7cSRobert Mustacchi /*
2194f364e7cSRobert Mustacchi  * else if (size <= $CACHE_SIZE) {
2204f364e7cSRobert Mustacchi  *	csize = $CACHE_SIZE;
2214f364e7cSRobert Mustacchi  *	roots += $CACHE_NUM;
2224f364e7cSRobert Mustacchi  * } else {
2234f364e7cSRobert Mustacchi  *	goto tofunc; 			! goto tomalloc if ptcmalloc.
2244f364e7cSRobert Mustacchi  * }					! goto tofree if ptcfree.
2254f364e7cSRobert Mustacchi  */
2264f364e7cSRobert Mustacchi #define	PTC_FINCACHE_CMP	0x03
2274f364e7cSRobert Mustacchi #define	PTC_FINCACHE_JMP	0x08
2284f364e7cSRobert Mustacchi #define	PTC_FINCACHE_SIZE	0x0c
2294f364e7cSRobert Mustacchi #define	PTC_FINCACHE_NUM	0x13
2304f364e7cSRobert Mustacchi static const uint8_t fincache[] = {
2314f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
2324f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
2334f364e7cSRobert Mustacchi 	0x77, 0x00,			/* ja +JMP (to real malloc) */
2344f364e7cSRobert Mustacchi 	0x49, 0xc7, 0xc0,
2354f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq sizeof ($CACHE), %r8 */
2364f364e7cSRobert Mustacchi 	0x48, 0x81, 0xc2,
2374f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* addq $8*ii, %rdx */
2384f364e7cSRobert Mustacchi 
2394f364e7cSRobert Mustacchi };
2404f364e7cSRobert Mustacchi 
2414f364e7cSRobert Mustacchi /*
2424f364e7cSRobert Mustacchi  * if (*root == NULL)
2434f364e7cSRobert Mustacchi  * 	goto tomalloc;
2444f364e7cSRobert Mustacchi  *
2454f364e7cSRobert Mustacchi  * malloc_data_t *ret = *root;
2464f364e7cSRobert Mustacchi  * *root = *(void **)ret;
2474f364e7cSRobert Mustacchi  * t->tm_size += csize;
2484f364e7cSRobert Mustacchi  * ret->malloc_size = size;
2494f364e7cSRobert Mustacchi  *
2504f364e7cSRobert Mustacchi  * if (size > UMEM_SECOND_ALIGN) {
2514f364e7cSRobert Mustacchi  *	ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
2524f364e7cSRobert Mustacchi  *	ret += 2;
2534f364e7cSRobert Mustacchi  * } else {
2544f364e7cSRobert Mustacchi  *	ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
2554f364e7cSRobert Mustacchi  *	ret += 1;
2564f364e7cSRobert Mustacchi  * }
2574f364e7cSRobert Mustacchi  *
2584f364e7cSRobert Mustacchi  * return ((void *)ret);
2594f364e7cSRobert Mustacchi  * tomalloc:
2604f364e7cSRobert Mustacchi  * 	return (malloc(orig_size));
2614f364e7cSRobert Mustacchi  */
2624f364e7cSRobert Mustacchi #define	PTC_MALFINI_ALLABEL	0x00
2634f364e7cSRobert Mustacchi #define	PTC_MALFINI_JMLABEL	0x40
2644f364e7cSRobert Mustacchi #define	PTC_MALFINI_JMADDR	0x41
2654f364e7cSRobert Mustacchi static const uint8_t malfini[] = {
2664f364e7cSRobert Mustacchi 	0x48, 0x8b, 0x02,		/* movl (%rdx),%rax */
2674f364e7cSRobert Mustacchi 	0x48, 0x85, 0xc0,		/* testq %rax,%rax */
2684f364e7cSRobert Mustacchi 	0x74, 0x38,			/* je +0x38 (errout) */
2694f364e7cSRobert Mustacchi 	0x4c, 0x8b, 0x08,		/* movq (%rax),%r9 */
2704f364e7cSRobert Mustacchi 	0x4c, 0x89, 0x0a,		/* movq %r9,(%rdx) */
2714f364e7cSRobert Mustacchi 	0x4c, 0x29, 0x01,		/* subq %rsi,(%rcx) */
2724f364e7cSRobert Mustacchi 	0x48, 0x83, 0xfe, 0x10,		/* cmpq $0x10,%rsi */
2734f364e7cSRobert Mustacchi 	0x76, 0x15,			/* jbe +0x15 */
2744f364e7cSRobert Mustacchi 	0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */
2754f364e7cSRobert Mustacchi 	0x89, 0x70, 0x08,		/* movl %r9d,0x8(%rax) */
2764f364e7cSRobert Mustacchi 	0x41, 0x29, 0xf1,		/* subl %esi, %r9d */
2774f364e7cSRobert Mustacchi 	0x44, 0x89, 0x48, 0x0c,		/* movl %r9d, 0xc(%rax) */
2784f364e7cSRobert Mustacchi 	0x48, 0x83, 0xc0, 0x10,		/* addq $0x10, %rax */
2794f364e7cSRobert Mustacchi 	0xc3,				/* ret */
2804f364e7cSRobert Mustacchi 	0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a,	/* movl %MALLOC_MAGIC, %r9d */
2814f364e7cSRobert Mustacchi 	0x89, 0x30,			/* movl %esi,(%rax) */
2824f364e7cSRobert Mustacchi 	0x41, 0x29, 0xf1,		/* subl %esi,%r9d */
2834f364e7cSRobert Mustacchi 	0x44, 0x89, 0x48, 0x04,		/* movl %r9d,0x4(%rax) */
2844f364e7cSRobert Mustacchi 	0x48, 0x83, 0xc0, 0x08,		/* addq $0x8,%rax */
2854f364e7cSRobert Mustacchi 	0xc3,				/* ret */
2864f364e7cSRobert Mustacchi 	0xe9, 0x00, 0x00, 0x00, 0x00	/* jmp $MALLOC */
2874f364e7cSRobert Mustacchi };
2884f364e7cSRobert Mustacchi 
2894f364e7cSRobert Mustacchi /*
2904f364e7cSRobert Mustacchi  * if (t->tm_size + csize > umem_ptc_size)
2914f364e7cSRobert Mustacchi  * 	goto tofree;
2924f364e7cSRobert Mustacchi  *
2934f364e7cSRobert Mustacchi  * t->tm_size += csize
2944f364e7cSRobert Mustacchi  * *(void **)tag = *root;
2954f364e7cSRobert Mustacchi  * *root = tag;
2964f364e7cSRobert Mustacchi  * return;
2974f364e7cSRobert Mustacchi  * tofree:
2984f364e7cSRobert Mustacchi  * 	free(buf);
2994f364e7cSRobert Mustacchi  * 	return;
3004f364e7cSRobert Mustacchi  */
3014f364e7cSRobert Mustacchi #define	PTC_FRFINI_RBUFLABEL	0x00
3024f364e7cSRobert Mustacchi #define	PTC_FRFINI_CACHEMAX	0x09
3034f364e7cSRobert Mustacchi #define	PTC_FRFINI_DONELABEL	0x1b
3044f364e7cSRobert Mustacchi #define	PTC_FRFINI_JFLABEL	0x1c
3054f364e7cSRobert Mustacchi #define	PTC_FRFINI_JFADDR	0x1d
3064f364e7cSRobert Mustacchi static const uint8_t freefini[] = {
3074f364e7cSRobert Mustacchi 	0x4c, 0x8b, 0x09,		/* movq (%rcx),%r9 */
3084f364e7cSRobert Mustacchi 	0x4d, 0x01, 0xc1,		/* addq %r8, %r9 */
3094f364e7cSRobert Mustacchi 	0x49, 0x81, 0xf9,
3104f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpl $THR_CACHE_MAX, %r9 */
3114f364e7cSRobert Mustacchi 	0x77, 0x0d,			/* jae +0xd (torfree) */
3124f364e7cSRobert Mustacchi 	0x4c, 0x01, 0x01,		/* addq %r8,(%rcx) */
3134f364e7cSRobert Mustacchi 	0x4c, 0x8b, 0x0a,		/* movq (%rdx),%r9 */
3144f364e7cSRobert Mustacchi 	0x4c, 0x89, 0x08,		/* movq %r9,(%rax) */
3154f364e7cSRobert Mustacchi 	0x48, 0x89, 0x02,		/* movq %rax,(%rdx) */
3164f364e7cSRobert Mustacchi 	0xc3,				/* ret */
3174f364e7cSRobert Mustacchi 	0xe9, 0x00, 0x00, 0x00, 0x00	/* jmp free */
3184f364e7cSRobert Mustacchi };
3194f364e7cSRobert Mustacchi 
3204f364e7cSRobert Mustacchi /*
3214f364e7cSRobert Mustacchi  * Construct the initial part of malloc. off contains the offset from curthread
3224f364e7cSRobert Mustacchi  * to the root of the tmem structure. ep is the address of the label to error
3234f364e7cSRobert Mustacchi  * and jump to free. csize is the size of the largest umem_cache in ptcumem.
3244f364e7cSRobert Mustacchi  */
3254f364e7cSRobert Mustacchi static int
genasm_malinit(uint8_t * bp,uint32_t off,uint32_t ep,uint32_t csize)3264f364e7cSRobert Mustacchi genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize)
3274f364e7cSRobert Mustacchi {
3284f364e7cSRobert Mustacchi 	uint32_t addr;
3294f364e7cSRobert Mustacchi 
3304f364e7cSRobert Mustacchi 	bcopy(malinit, bp, sizeof (malinit));
3314f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT);
3324f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr));
3334f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize));
3344f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV);
3354f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr));
3364f364e7cSRobert Mustacchi 	bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off));
3374f364e7cSRobert Mustacchi 
3384f364e7cSRobert Mustacchi 	return (sizeof (malinit));
3394f364e7cSRobert Mustacchi }
3404f364e7cSRobert Mustacchi 
3414f364e7cSRobert Mustacchi static int
genasm_frinit(uint8_t * bp,uint32_t off,uint32_t dp,uint32_t ep,uint32_t mcs)3424f364e7cSRobert Mustacchi genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs)
3434f364e7cSRobert Mustacchi {
3444f364e7cSRobert Mustacchi 	uint32_t addr;
3454f364e7cSRobert Mustacchi 
3464f364e7cSRobert Mustacchi 	bcopy(freeinit, bp, sizeof (freeinit));
3474f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE);
3484f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr));
3494f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE);
3504f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr));
3514f364e7cSRobert Mustacchi 	bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs));
3524f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ep, PTC_FRINI_JOV);
3534f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr));
3544f364e7cSRobert Mustacchi 	bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off));
3554f364e7cSRobert Mustacchi 	return (sizeof (freeinit));
3564f364e7cSRobert Mustacchi }
3574f364e7cSRobert Mustacchi 
3584f364e7cSRobert Mustacchi 
3594f364e7cSRobert Mustacchi /*
3604f364e7cSRobert Mustacchi  * Create the initial cache entry of the specified size. The value of ap tells
3614f364e7cSRobert Mustacchi  * us what the address of the label to try and allocate a buffer. This value is
3624f364e7cSRobert Mustacchi  * an offset from the current base to that value.
3634f364e7cSRobert Mustacchi  */
3644f364e7cSRobert Mustacchi static int
genasm_firstcache(uint8_t * bp,uint32_t csize,uint32_t ap)3654f364e7cSRobert Mustacchi genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap)
3664f364e7cSRobert Mustacchi {
3674f364e7cSRobert Mustacchi 	uint32_t addr;
3684f364e7cSRobert Mustacchi 
3694f364e7cSRobert Mustacchi 	bcopy(inicache, bp, sizeof (inicache));
3704f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize));
3714f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize));
3724f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP);
3734f364e7cSRobert Mustacchi 	ASSERT(addr != 0);
3744f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr));
3754f364e7cSRobert Mustacchi 
3764f364e7cSRobert Mustacchi 	return (sizeof (inicache));
3774f364e7cSRobert Mustacchi }
3784f364e7cSRobert Mustacchi 
3794f364e7cSRobert Mustacchi static int
genasm_gencache(uint8_t * bp,int num,uint32_t csize,uint32_t ap)3804f364e7cSRobert Mustacchi genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap)
3814f364e7cSRobert Mustacchi {
3824f364e7cSRobert Mustacchi 	uint32_t addr;
3834f364e7cSRobert Mustacchi 	uint32_t coff;
3844f364e7cSRobert Mustacchi 
3854f364e7cSRobert Mustacchi 	ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num);
3864f364e7cSRobert Mustacchi 	ASSERT(num != 0);
3874f364e7cSRobert Mustacchi 	bcopy(gencache, bp, sizeof (gencache));
3884f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize));
3894f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize));
3904f364e7cSRobert Mustacchi 	coff = num * PTC_ROOT_SIZE;
3914f364e7cSRobert Mustacchi 	bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff));
3924f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP);
3934f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr));
3944f364e7cSRobert Mustacchi 
3954f364e7cSRobert Mustacchi 	return (sizeof (gencache));
3964f364e7cSRobert Mustacchi }
3974f364e7cSRobert Mustacchi 
3984f364e7cSRobert Mustacchi static int
genasm_lastcache(uint8_t * bp,int num,uint32_t csize,uint32_t ep)3994f364e7cSRobert Mustacchi genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep)
4004f364e7cSRobert Mustacchi {
4014f364e7cSRobert Mustacchi 	uint8_t eap;
4024f364e7cSRobert Mustacchi 	uint32_t coff;
4034f364e7cSRobert Mustacchi 
4044f364e7cSRobert Mustacchi 	ASSERT(ep <= 0xff && ep > 7);
4054f364e7cSRobert Mustacchi 	ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num);
4064f364e7cSRobert Mustacchi 	bcopy(fincache, bp, sizeof (fincache));
4074f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize));
4084f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize));
4094f364e7cSRobert Mustacchi 	coff = num * PTC_ROOT_SIZE;
4104f364e7cSRobert Mustacchi 	bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff));
4114f364e7cSRobert Mustacchi 	eap = ep - PTC_FINCACHE_JMP - 1;
4124f364e7cSRobert Mustacchi 	bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap));
4134f364e7cSRobert Mustacchi 
4144f364e7cSRobert Mustacchi 	return (sizeof (fincache));
4154f364e7cSRobert Mustacchi }
4164f364e7cSRobert Mustacchi 
4174f364e7cSRobert Mustacchi static int
genasm_malfini(uint8_t * bp,uintptr_t mptr)4184f364e7cSRobert Mustacchi genasm_malfini(uint8_t *bp, uintptr_t mptr)
4194f364e7cSRobert Mustacchi {
4204f364e7cSRobert Mustacchi 	uint32_t addr;
4214f364e7cSRobert Mustacchi 
4224f364e7cSRobert Mustacchi 	bcopy(malfini, bp, sizeof (malfini));
4234f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR));
4244f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr));
4254f364e7cSRobert Mustacchi 
4264f364e7cSRobert Mustacchi 	return (sizeof (malfini));
4274f364e7cSRobert Mustacchi }
4284f364e7cSRobert Mustacchi 
4294f364e7cSRobert Mustacchi static int
genasm_frfini(uint8_t * bp,uint32_t maxthr,uintptr_t fptr)4304f364e7cSRobert Mustacchi genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr)
4314f364e7cSRobert Mustacchi {
4324f364e7cSRobert Mustacchi 	uint32_t addr;
4334f364e7cSRobert Mustacchi 
4344f364e7cSRobert Mustacchi 	bcopy(freefini, bp, sizeof (freefini));
4354f364e7cSRobert Mustacchi 	bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr));
4364f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR));
4374f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr));
4384f364e7cSRobert Mustacchi 
4394f364e7cSRobert Mustacchi 	return (sizeof (freefini));
4404f364e7cSRobert Mustacchi }
4414f364e7cSRobert Mustacchi 
4424f364e7cSRobert Mustacchi /*
4434f364e7cSRobert Mustacchi  * The malloc inline assembly is constructed as follows:
4444f364e7cSRobert Mustacchi  *
4454f364e7cSRobert Mustacchi  * o Malloc prologue assembly
4464f364e7cSRobert Mustacchi  * o Generic first-cache check
4474f364e7cSRobert Mustacchi  * o n Generic cache checks (where n = _tmem_get_entries() - 2)
4484f364e7cSRobert Mustacchi  * o Generic last-cache check
4494f364e7cSRobert Mustacchi  * o Malloc epilogue assembly
4504f364e7cSRobert Mustacchi  *
4514f364e7cSRobert Mustacchi  * Generally there are at least three caches. When there is only one cache we
4524f364e7cSRobert Mustacchi  * only use the generic last-cache. In the case where there are two caches, we
4534f364e7cSRobert Mustacchi  * just leave out the middle ones.
4544f364e7cSRobert Mustacchi  */
4554f364e7cSRobert Mustacchi static int
genasm_malloc(void * base,size_t len,int nents,int * umem_alloc_sizes)4564f364e7cSRobert Mustacchi genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes)
4574f364e7cSRobert Mustacchi {
4584f364e7cSRobert Mustacchi 	int ii, off;
4594f364e7cSRobert Mustacchi 	uint8_t *bp;
4604f364e7cSRobert Mustacchi 	size_t total;
4614f364e7cSRobert Mustacchi 	uint32_t allocoff, erroff;
4624f364e7cSRobert Mustacchi 
4634f364e7cSRobert Mustacchi 	total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache);
4644f364e7cSRobert Mustacchi 
4654f364e7cSRobert Mustacchi 	if (nents >= 2)
4664f364e7cSRobert Mustacchi 		total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
4674f364e7cSRobert Mustacchi 
4684f364e7cSRobert Mustacchi 	if (total > len)
4694f364e7cSRobert Mustacchi 		return (1);
4704f364e7cSRobert Mustacchi 
4714f364e7cSRobert Mustacchi 	erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL;
4724f364e7cSRobert Mustacchi 	allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL;
4734f364e7cSRobert Mustacchi 
4744f364e7cSRobert Mustacchi 	bp = base;
4754f364e7cSRobert Mustacchi 
4764f364e7cSRobert Mustacchi 	off = genasm_malinit(bp, umem_tmem_off, erroff,
4774f364e7cSRobert Mustacchi 	    umem_alloc_sizes[nents-1]);
4784f364e7cSRobert Mustacchi 	bp += off;
4794f364e7cSRobert Mustacchi 	allocoff -= off;
4804f364e7cSRobert Mustacchi 	erroff -= off;
4814f364e7cSRobert Mustacchi 
4824f364e7cSRobert Mustacchi 	if (nents > 1) {
4834f364e7cSRobert Mustacchi 		off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff);
4844f364e7cSRobert Mustacchi 		bp += off;
4854f364e7cSRobert Mustacchi 		allocoff -= off;
4864f364e7cSRobert Mustacchi 		erroff -= off;
4874f364e7cSRobert Mustacchi 	}
4884f364e7cSRobert Mustacchi 
4894f364e7cSRobert Mustacchi 	for (ii = 1; ii < nents - 1; ii++) {
4904f364e7cSRobert Mustacchi 		off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff);
4914f364e7cSRobert Mustacchi 		bp += off;
4924f364e7cSRobert Mustacchi 		allocoff -= off;
4934f364e7cSRobert Mustacchi 		erroff -= off;
4944f364e7cSRobert Mustacchi 	}
4954f364e7cSRobert Mustacchi 
4964f364e7cSRobert Mustacchi 	bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
4974f364e7cSRobert Mustacchi 	    erroff);
4984f364e7cSRobert Mustacchi 	bp += genasm_malfini(bp, umem_genasm_omptr);
4994f364e7cSRobert Mustacchi 	ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
5004f364e7cSRobert Mustacchi 
5014f364e7cSRobert Mustacchi 	return (0);
5024f364e7cSRobert Mustacchi }
5034f364e7cSRobert Mustacchi 
5044f364e7cSRobert Mustacchi static int
genasm_free(void * base,size_t len,int nents,int * umem_alloc_sizes)5054f364e7cSRobert Mustacchi genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes)
5064f364e7cSRobert Mustacchi {
5074f364e7cSRobert Mustacchi 	uint8_t *bp;
5084f364e7cSRobert Mustacchi 	int ii, off;
5094f364e7cSRobert Mustacchi 	size_t total;
5104f364e7cSRobert Mustacchi 	uint32_t rbufoff, retoff, erroff;
5114f364e7cSRobert Mustacchi 
5124f364e7cSRobert Mustacchi 	/* Assume that nents has already been audited for us */
5134f364e7cSRobert Mustacchi 	total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache);
5144f364e7cSRobert Mustacchi 	if (nents >= 2)
5154f364e7cSRobert Mustacchi 		total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
5164f364e7cSRobert Mustacchi 
5174f364e7cSRobert Mustacchi 	if (total > len)
5184f364e7cSRobert Mustacchi 		return (1);
5194f364e7cSRobert Mustacchi 
5204f364e7cSRobert Mustacchi 	erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL);
5214f364e7cSRobert Mustacchi 	rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL);
5224f364e7cSRobert Mustacchi 	retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL);
5234f364e7cSRobert Mustacchi 
5244f364e7cSRobert Mustacchi 	bp = base;
5254f364e7cSRobert Mustacchi 
5264f364e7cSRobert Mustacchi 	off = genasm_frinit(bp, umem_tmem_off, retoff, erroff,
5274f364e7cSRobert Mustacchi 	    umem_alloc_sizes[nents - 1]);
5284f364e7cSRobert Mustacchi 	bp += off;
5294f364e7cSRobert Mustacchi 	erroff -= off;
5304f364e7cSRobert Mustacchi 	rbufoff -= off;
5314f364e7cSRobert Mustacchi 
5324f364e7cSRobert Mustacchi 	if (nents > 1) {
5334f364e7cSRobert Mustacchi 		off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff);
5344f364e7cSRobert Mustacchi 		bp += off;
5354f364e7cSRobert Mustacchi 		erroff -= off;
5364f364e7cSRobert Mustacchi 		rbufoff -= off;
5374f364e7cSRobert Mustacchi 	}
5384f364e7cSRobert Mustacchi 
5394f364e7cSRobert Mustacchi 	for (ii = 1; ii < nents - 1; ii++) {
5404f364e7cSRobert Mustacchi 		off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff);
5414f364e7cSRobert Mustacchi 		bp += off;
5424f364e7cSRobert Mustacchi 		rbufoff -= off;
5434f364e7cSRobert Mustacchi 		erroff -= off;
5444f364e7cSRobert Mustacchi 	}
5454f364e7cSRobert Mustacchi 
5464f364e7cSRobert Mustacchi 	bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
5474f364e7cSRobert Mustacchi 	    erroff);
5484f364e7cSRobert Mustacchi 	bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr);
5494f364e7cSRobert Mustacchi 	ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
5504f364e7cSRobert Mustacchi 
5514f364e7cSRobert Mustacchi 	return (0);
5524f364e7cSRobert Mustacchi }
5534f364e7cSRobert Mustacchi 
554*b1e2e3fbSRobert Mustacchi boolean_t
umem_genasm(int * cp,umem_cache_t ** caches,int nc)5554f364e7cSRobert Mustacchi umem_genasm(int *cp, umem_cache_t **caches, int nc)
5564f364e7cSRobert Mustacchi {
5574f364e7cSRobert Mustacchi 	int nents, i;
5584f364e7cSRobert Mustacchi 	uint8_t *mptr;
5594f364e7cSRobert Mustacchi 	uint8_t *fptr;
5604f364e7cSRobert Mustacchi 	uint64_t v, *vptr;
561*b1e2e3fbSRobert Mustacchi 	size_t mplen, fplen;
562*b1e2e3fbSRobert Mustacchi 	uintptr_t mpbase, fpbase;
563*b1e2e3fbSRobert Mustacchi 	boolean_t ret = B_FALSE;
5644f364e7cSRobert Mustacchi 
5654f364e7cSRobert Mustacchi 	mptr = (void *)((uintptr_t)umem_genasm_mptr + 5);
5664f364e7cSRobert Mustacchi 	fptr = (void *)((uintptr_t)umem_genasm_fptr + 5);
5674f364e7cSRobert Mustacchi 	if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 ||
568*b1e2e3fbSRobert Mustacchi 	    umem_genasm_fptr == 0 || umem_genasm_fsize == 0) {
569*b1e2e3fbSRobert Mustacchi 		return (B_FALSE);
570*b1e2e3fbSRobert Mustacchi 	}
571*b1e2e3fbSRobert Mustacchi 
572*b1e2e3fbSRobert Mustacchi 	mplen = P2ROUNDUP(umem_genasm_msize, pagesize);
573*b1e2e3fbSRobert Mustacchi 	mpbase = P2ALIGN((uintptr_t)umem_genasm_mptr, pagesize);
574*b1e2e3fbSRobert Mustacchi 	fplen = P2ROUNDUP(umem_genasm_fsize, pagesize);
575*b1e2e3fbSRobert Mustacchi 	fpbase = P2ALIGN((uintptr_t)umem_genasm_mptr, pagesize);
576*b1e2e3fbSRobert Mustacchi 
577*b1e2e3fbSRobert Mustacchi 	/*
578*b1e2e3fbSRobert Mustacchi 	 * If the values straddle a page boundary, then we might need to
579*b1e2e3fbSRobert Mustacchi 	 * actually remap two pages.
580*b1e2e3fbSRobert Mustacchi 	 */
581*b1e2e3fbSRobert Mustacchi 	if (P2ALIGN(umem_genasm_msize + (uintptr_t)umem_genasm_mptr,
582*b1e2e3fbSRobert Mustacchi 	    pagesize) != mpbase) {
583*b1e2e3fbSRobert Mustacchi 		mplen += pagesize;
584*b1e2e3fbSRobert Mustacchi 	}
585*b1e2e3fbSRobert Mustacchi 
586*b1e2e3fbSRobert Mustacchi 	if (P2ALIGN(umem_genasm_fsize + (uintptr_t)umem_genasm_fptr,
587*b1e2e3fbSRobert Mustacchi 	    pagesize) != fpbase) {
588*b1e2e3fbSRobert Mustacchi 		fplen += pagesize;
589*b1e2e3fbSRobert Mustacchi 	}
590*b1e2e3fbSRobert Mustacchi 
591*b1e2e3fbSRobert Mustacchi 	if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_WRITE |
592*b1e2e3fbSRobert Mustacchi 	    PROT_EXEC) != 0) {
593*b1e2e3fbSRobert Mustacchi 		return (B_FALSE);
594*b1e2e3fbSRobert Mustacchi 	}
595*b1e2e3fbSRobert Mustacchi 
596*b1e2e3fbSRobert Mustacchi 	if (mprotect((void *)fpbase, fplen, PROT_READ | PROT_WRITE |
597*b1e2e3fbSRobert Mustacchi 	    PROT_EXEC) != 0) {
598*b1e2e3fbSRobert Mustacchi 		if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_EXEC) !=
599*b1e2e3fbSRobert Mustacchi 		    0) {
600*b1e2e3fbSRobert Mustacchi 			umem_panic("genasm failed to restore memory "
601*b1e2e3fbSRobert Mustacchi 			    "protection: %d", errno);
602*b1e2e3fbSRobert Mustacchi 		}
603*b1e2e3fbSRobert Mustacchi 		return (B_FALSE);
604*b1e2e3fbSRobert Mustacchi 	}
6054f364e7cSRobert Mustacchi 
6064f364e7cSRobert Mustacchi 	/*
6074f364e7cSRobert Mustacchi 	 * The total number of caches that we can service is the minimum of:
6084f364e7cSRobert Mustacchi 	 *  o the amount supported by libc
6094f364e7cSRobert Mustacchi 	 *  o the total number of umem caches
6104f364e7cSRobert Mustacchi 	 *  o we use a single byte addl, so it's MAX_UINT32 / sizeof (uintptr_t)
6114f364e7cSRobert Mustacchi 	 *    For 64-bit, this is MAX_UINT32 >> 3, a lot.
6124f364e7cSRobert Mustacchi 	 */
6134f364e7cSRobert Mustacchi 	nents = _tmem_get_nentries();
6144f364e7cSRobert Mustacchi 
6154f364e7cSRobert Mustacchi 	if (UMEM_GENASM_MAX64 < nents)
6164f364e7cSRobert Mustacchi 		nents = UMEM_GENASM_MAX64;
6174f364e7cSRobert Mustacchi 
6184f364e7cSRobert Mustacchi 	if (nc < nents)
6194f364e7cSRobert Mustacchi 		nents = nc;
6204f364e7cSRobert Mustacchi 
621*b1e2e3fbSRobert Mustacchi 	/*
622*b1e2e3fbSRobert Mustacchi 	 * If the number of per-thread caches has been set to zero or the
623*b1e2e3fbSRobert Mustacchi 	 * per-thread cache size has been set to zero, don't bother trying to
624*b1e2e3fbSRobert Mustacchi 	 * write any assembly and just use the default malloc and free. When we
625*b1e2e3fbSRobert Mustacchi 	 * return, indicate that there is no PTC support.
626*b1e2e3fbSRobert Mustacchi 	 */
627*b1e2e3fbSRobert Mustacchi 	if (nents == 0 || umem_ptc_size == 0) {
628*b1e2e3fbSRobert Mustacchi 		goto out;
629*b1e2e3fbSRobert Mustacchi 	}
6304f364e7cSRobert Mustacchi 
6314f364e7cSRobert Mustacchi 	/* Take into account the jump */
632*b1e2e3fbSRobert Mustacchi 	if (genasm_malloc(mptr, umem_genasm_msize, nents, cp) != 0) {
633*b1e2e3fbSRobert Mustacchi 		goto out;
634*b1e2e3fbSRobert Mustacchi 	}
6354f364e7cSRobert Mustacchi 
636*b1e2e3fbSRobert Mustacchi 	if (genasm_free(fptr, umem_genasm_fsize, nents, cp) != 0) {
637*b1e2e3fbSRobert Mustacchi 		goto out;
638*b1e2e3fbSRobert Mustacchi 	}
6394f364e7cSRobert Mustacchi 
6404f364e7cSRobert Mustacchi 	/* nop out the jump with a multibyte jump */
6414f364e7cSRobert Mustacchi 	vptr = (void *)umem_genasm_mptr;
6424f364e7cSRobert Mustacchi 	v = MULTINOP;
6434f364e7cSRobert Mustacchi 	v |= *vptr & (0xffffffULL << 40);
6444f364e7cSRobert Mustacchi 	(void) atomic_swap_64(vptr, v);
6454f364e7cSRobert Mustacchi 	vptr = (void *)umem_genasm_fptr;
6464f364e7cSRobert Mustacchi 	v = MULTINOP;
6474f364e7cSRobert Mustacchi 	v |= *vptr & (0xffffffULL << 40);
6484f364e7cSRobert Mustacchi 	(void) atomic_swap_64(vptr, v);
6494f364e7cSRobert Mustacchi 
6504f364e7cSRobert Mustacchi 	for (i = 0; i < nents; i++)
6514f364e7cSRobert Mustacchi 		caches[i]->cache_flags |= UMF_PTC;
6524f364e7cSRobert Mustacchi 
653*b1e2e3fbSRobert Mustacchi 	ret = B_TRUE;
654*b1e2e3fbSRobert Mustacchi out:
655*b1e2e3fbSRobert Mustacchi 	if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_EXEC) != 0) {
656*b1e2e3fbSRobert Mustacchi 		umem_panic("genasm failed to restore memory protection: %d",
657*b1e2e3fbSRobert Mustacchi 		    errno);
658*b1e2e3fbSRobert Mustacchi 	}
659*b1e2e3fbSRobert Mustacchi 
660*b1e2e3fbSRobert Mustacchi 	if (mprotect((void *)fpbase, fplen, PROT_READ | PROT_EXEC) != 0) {
661*b1e2e3fbSRobert Mustacchi 		umem_panic("genasm failed to restore memory protection: %d",
662*b1e2e3fbSRobert Mustacchi 		    errno);
663*b1e2e3fbSRobert Mustacchi 	}
664*b1e2e3fbSRobert Mustacchi 
665*b1e2e3fbSRobert Mustacchi 	return (ret);
6664f364e7cSRobert Mustacchi }
667