xref: /netbsd-src/sys/arch/x86/x86/patch.c (revision fc4f42693f9b1c31f39f9cf50af1bf2010325808)
1 /*	$NetBSD: patch.c,v 1.34 2018/03/13 16:52:42 maxv Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Patch kernel code at boot time, depending on available CPU features.
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.34 2018/03/13 16:52:42 maxv Exp $");
38 
39 #include "opt_lockdebug.h"
40 #ifdef i386
41 #include "opt_spldebug.h"
42 #endif
43 
44 #include <sys/types.h>
45 #include <sys/systm.h>
46 
47 #include <machine/cpu.h>
48 #include <machine/cpufunc.h>
49 #include <machine/specialreg.h>
50 #include <machine/frameasm.h>
51 
52 #include <x86/cpuvar.h>
53 #include <x86/cputypes.h>
54 
55 struct hotpatch {
56 	uint8_t name;
57 	uint8_t size;
58 	void *addr;
59 } __packed;
60 
61 void	spllower(int);
62 void	spllower_end(void);
63 void	cx8_spllower(int);
64 void	cx8_spllower_end(void);
65 void	cx8_spllower_patch(void);
66 
67 void	mutex_spin_exit_end(void);
68 void	i686_mutex_spin_exit(int);
69 void	i686_mutex_spin_exit_end(void);
70 void	i686_mutex_spin_exit_patch(void);
71 
72 void	membar_consumer(void);
73 void	membar_consumer_end(void);
74 void	membar_sync(void);
75 void	membar_sync_end(void);
76 void	sse2_lfence(void);
77 void	sse2_lfence_end(void);
78 void	sse2_mfence(void);
79 void	sse2_mfence_end(void);
80 
81 void	_atomic_cas_64(void);
82 void	_atomic_cas_64_end(void);
83 void	_atomic_cas_cx8(void);
84 void	_atomic_cas_cx8_end(void);
85 
86 extern void	*atomic_lockpatch[];
87 
88 #define	X86_NOP		0x90
89 #define	X86_REP		0xf3
90 #define	X86_RET		0xc3
91 #define	X86_CS		0x2e
92 #define	X86_DS		0x3e
93 #define	X86_GROUP_0F	0x0f
94 
95 static void
96 adjust_jumpoff(uint8_t *ptr, void *from_s, void *to_s)
97 {
98 
99 	/* Branch hints */
100 	if (ptr[0] == X86_CS || ptr[0] == X86_DS)
101 		ptr++;
102 	/* Conditional jumps */
103 	if (ptr[0] == X86_GROUP_0F)
104 		ptr++;
105 	/* 4-byte relative jump or call */
106 	*(uint32_t *)(ptr + 1 - (uintptr_t)from_s + (uintptr_t)to_s) +=
107 	    ((uint32_t)(uintptr_t)from_s - (uint32_t)(uintptr_t)to_s);
108 }
109 
110 static void __unused
111 patchfunc(void *from_s, void *from_e, void *to_s, void *to_e,
112 	  void *pcrel)
113 {
114 
115 	if ((uintptr_t)from_e - (uintptr_t)from_s !=
116 	    (uintptr_t)to_e - (uintptr_t)to_s)
117 		panic("patchfunc: sizes do not match (from=%p)", from_s);
118 
119 	memcpy(to_s, from_s, (uintptr_t)to_e - (uintptr_t)to_s);
120 	if (pcrel != NULL)
121 		adjust_jumpoff(pcrel, from_s, to_s);
122 
123 #ifdef GPROF
124 #ifdef i386
125 #define	MCOUNT_CALL_OFFSET	3
126 #endif
127 #ifdef __x86_64__
128 #define	MCOUNT_CALL_OFFSET	5
129 #endif
130 	/* Patch mcount call offset */
131 	adjust_jumpoff((uint8_t *)from_s + MCOUNT_CALL_OFFSET, from_s, to_s);
132 #endif
133 }
134 
135 static inline void __unused
136 patchbytes(void *addr, const uint8_t *bytes, size_t size)
137 {
138 	uint8_t *ptr = (uint8_t *)addr;
139 	size_t i;
140 
141 	for (i = 0; i < size; i++) {
142 		ptr[i] = bytes[i];
143 	}
144 }
145 
146 void
147 x86_hotpatch(uint32_t name, const uint8_t *bytes, size_t size)
148 {
149 	extern char __rodata_hotpatch_start;
150 	extern char __rodata_hotpatch_end;
151 	struct hotpatch *hps, *hpe, *hp;
152 
153 	hps = (struct hotpatch *)&__rodata_hotpatch_start;
154 	hpe = (struct hotpatch *)&__rodata_hotpatch_end;
155 
156 	for (hp = hps; hp < hpe; hp++) {
157 		if (hp->name != name) {
158 			continue;
159 		}
160 		if (hp->size != size) {
161 			panic("x86_hotpatch: incorrect size");
162 		}
163 		patchbytes(hp->addr, bytes, size);
164 	}
165 }
166 
167 void
168 x86_patch_window_open(u_long *psl, u_long *cr0)
169 {
170 	/* Disable interrupts. */
171 	*psl = x86_read_psl();
172 	x86_disable_intr();
173 
174 	/* Disable write protection in supervisor mode. */
175 	*cr0 = rcr0();
176 	lcr0(*cr0 & ~CR0_WP);
177 }
178 
179 void
180 x86_patch_window_close(u_long psl, u_long cr0)
181 {
182 	/* Write back and invalidate cache, flush pipelines. */
183 	wbinvd();
184 	x86_flush();
185 
186 	/* Re-enable write protection. */
187 	lcr0(cr0);
188 
189 	/* Restore the PSL, potentially re-enabling interrupts. */
190 	x86_write_psl(psl);
191 }
192 
193 void
194 x86_patch(bool early)
195 {
196 	static bool first, second;
197 	u_long psl;
198 	u_long cr0;
199 
200 	if (early) {
201 		if (first)
202 			return;
203 		first = true;
204 	} else {
205 		if (second)
206 			return;
207 		second = true;
208 	}
209 
210 	x86_patch_window_open(&psl, &cr0);
211 
212 #if !defined(GPROF)
213 	if (!early && ncpu == 1) {
214 #ifndef LOCKDEBUG
215 		/*
216 		 * Uniprocessor: kill LOCK prefixes.
217 		 */
218 		const uint8_t bytes[] = {
219 			X86_NOP
220 		};
221 
222 		/* lock -> nop */
223 		x86_hotpatch(HP_NAME_NOLOCK, bytes, sizeof(bytes));
224 		for (int i = 0; atomic_lockpatch[i] != 0; i++)
225 			patchbytes(atomic_lockpatch[i], bytes, sizeof(bytes));
226 #endif
227 	}
228 
229 	if (!early && (cpu_feature[0] & CPUID_SSE2) != 0) {
230 		/*
231 		 * Faster memory barriers.  We do not need to patch
232 		 * membar_producer to use SFENCE because on x86
233 		 * ordinary non-temporal stores are always issued in
234 		 * program order to main memory and to other CPUs.
235 		 */
236 		patchfunc(
237 		    sse2_lfence, sse2_lfence_end,
238 		    membar_consumer, membar_consumer_end,
239 		    NULL
240 		);
241 		patchfunc(
242 		    sse2_mfence, sse2_mfence_end,
243 		    membar_sync, membar_sync_end,
244 		    NULL
245 		);
246 	}
247 #endif	/* GPROF */
248 
249 #ifdef i386
250 	/*
251 	 * Patch early and late.  Second time around the 'lock' prefix
252 	 * may be gone.
253 	 */
254 	if ((cpu_feature[0] & CPUID_CX8) != 0) {
255 		patchfunc(
256 		    _atomic_cas_cx8, _atomic_cas_cx8_end,
257 		    _atomic_cas_64, _atomic_cas_64_end,
258 		    NULL
259 		);
260 	}
261 #endif	/* i386 */
262 
263 #if !defined(SPLDEBUG)
264 	if (!early && (cpu_feature[0] & CPUID_CX8) != 0) {
265 		/* Faster splx(), mutex_spin_exit(). */
266 		patchfunc(
267 		    cx8_spllower, cx8_spllower_end,
268 		    spllower, spllower_end,
269 		    cx8_spllower_patch
270 		);
271 #if defined(i386) && !defined(LOCKDEBUG)
272 		patchfunc(
273 		    i686_mutex_spin_exit, i686_mutex_spin_exit_end,
274 		    mutex_spin_exit, mutex_spin_exit_end,
275 		    i686_mutex_spin_exit_patch
276 		);
277 #endif	/* i386 && !LOCKDEBUG */
278 	}
279 #endif /* !SPLDEBUG */
280 
281 	/*
282 	 * On some Opteron revisions, locked operations erroneously
283 	 * allow memory references to be `bled' outside of critical
284 	 * sections.  Apply workaround.
285 	 */
286 	if (cpu_vendor == CPUVENDOR_AMD &&
287 	    (CPUID_TO_FAMILY(cpu_info_primary.ci_signature) == 0xe ||
288 	    (CPUID_TO_FAMILY(cpu_info_primary.ci_signature) == 0xf &&
289 	    CPUID_TO_EXTMODEL(cpu_info_primary.ci_signature) < 0x4))) {
290 		const uint8_t bytes[] = {
291 			0x0F, 0xAE, 0xE8 /* lfence */
292 		};
293 
294 		/* ret,nop,nop -> lfence */
295 		x86_hotpatch(HP_NAME_RETFENCE, bytes, sizeof(bytes));
296 	}
297 
298 	/*
299 	 * If SMAP is present then patch the prepared holes with clac/stac
300 	 * instructions.
301 	 *
302 	 * clac = 0x0f, 0x01, 0xca
303 	 * stac = 0x0f, 0x01, 0xcb
304 	 */
305 	if (!early && cpu_feature[5] & CPUID_SEF_SMAP) {
306 		KASSERT(rcr4() & CR4_SMAP);
307 		const uint8_t clac_bytes[] = {
308 			0x0F, 0x01, 0xCA /* clac */
309 		};
310 		const uint8_t stac_bytes[] = {
311 			0x0F, 0x01, 0xCB /* stac */
312 		};
313 
314 		/* nop,nop,nop -> clac */
315 		x86_hotpatch(HP_NAME_CLAC, clac_bytes, sizeof(clac_bytes));
316 
317 		/* nop,nop,nop -> stac */
318 		x86_hotpatch(HP_NAME_STAC, stac_bytes, sizeof(stac_bytes));
319 	}
320 
321 	x86_patch_window_close(psl, cr0);
322 }
323