xref: /netbsd-src/sys/arch/x86/x86/patch.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /*	$NetBSD: patch.c,v 1.24 2017/10/27 23:22:01 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Patch kernel code at boot time, depending on available CPU features.
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.24 2017/10/27 23:22:01 riastradh Exp $");
38 
39 #include "opt_lockdebug.h"
40 #ifdef i386
41 #include "opt_spldebug.h"
42 #endif
43 
44 #include <sys/types.h>
45 #include <sys/systm.h>
46 
47 #include <machine/cpu.h>
48 #include <machine/cpufunc.h>
49 #include <machine/specialreg.h>
50 
51 #include <x86/cpuvar.h>
52 #include <x86/cputypes.h>
53 
54 void	spllower(int);
55 void	spllower_end(void);
56 void	cx8_spllower(int);
57 void	cx8_spllower_end(void);
58 void	cx8_spllower_patch(void);
59 
60 void	mutex_spin_exit_end(void);
61 void	i686_mutex_spin_exit(int);
62 void	i686_mutex_spin_exit_end(void);
63 void	i686_mutex_spin_exit_patch(void);
64 
65 void	membar_consumer(void);
66 void	membar_consumer_end(void);
67 void	membar_sync(void);
68 void	membar_sync_end(void);
69 void	sse2_lfence(void);
70 void	sse2_lfence_end(void);
71 void	sse2_mfence(void);
72 void	sse2_mfence_end(void);
73 
74 void	_atomic_cas_64(void);
75 void	_atomic_cas_64_end(void);
76 void	_atomic_cas_cx8(void);
77 void	_atomic_cas_cx8_end(void);
78 
79 extern void	*x86_lockpatch[];
80 extern void	*x86_clacpatch[];
81 extern void	*x86_stacpatch[];
82 extern void	*x86_retpatch[];
83 extern void	*atomic_lockpatch[];
84 
85 #define	X86_NOP		0x90
86 #define	X86_REP		0xf3
87 #define	X86_RET		0xc3
88 #define	X86_CS		0x2e
89 #define	X86_DS		0x3e
90 #define	X86_GROUP_0F	0x0f
91 
92 static void
93 adjust_jumpoff(uint8_t *ptr, void *from_s, void *to_s)
94 {
95 
96 	/* Branch hints */
97 	if (ptr[0] == X86_CS || ptr[0] == X86_DS)
98 		ptr++;
99 	/* Conditional jumps */
100 	if (ptr[0] == X86_GROUP_0F)
101 		ptr++;
102 	/* 4-byte relative jump or call */
103 	*(uint32_t *)(ptr + 1 - (uintptr_t)from_s + (uintptr_t)to_s) +=
104 	    ((uint32_t)(uintptr_t)from_s - (uint32_t)(uintptr_t)to_s);
105 }
106 
107 static void __unused
108 patchfunc(void *from_s, void *from_e, void *to_s, void *to_e,
109 	  void *pcrel)
110 {
111 
112 	if ((uintptr_t)from_e - (uintptr_t)from_s !=
113 	    (uintptr_t)to_e - (uintptr_t)to_s)
114 		panic("patchfunc: sizes do not match (from=%p)", from_s);
115 
116 	memcpy(to_s, from_s, (uintptr_t)to_e - (uintptr_t)to_s);
117 	if (pcrel != NULL)
118 		adjust_jumpoff(pcrel, from_s, to_s);
119 
120 #ifdef GPROF
121 #ifdef i386
122 #define	MCOUNT_CALL_OFFSET	3
123 #endif
124 #ifdef __x86_64__
125 #define	MCOUNT_CALL_OFFSET	5
126 #endif
127 	/* Patch mcount call offset */
128 	adjust_jumpoff((uint8_t *)from_s + MCOUNT_CALL_OFFSET, from_s, to_s);
129 #endif
130 }
131 
132 static inline void __unused
133 patchbytes(void *addr, const int byte1, const int byte2, const int byte3)
134 {
135 
136 	((uint8_t *)addr)[0] = (uint8_t)byte1;
137 	if (byte2 != -1)
138 		((uint8_t *)addr)[1] = (uint8_t)byte2;
139 	if (byte3 != -1)
140 		((uint8_t *)addr)[2] = (uint8_t)byte3;
141 }
142 
143 void
144 x86_patch(bool early)
145 {
146 	static bool first, second;
147 	u_long psl;
148 	u_long cr0;
149 	int i;
150 
151 	if (early) {
152 		if (first)
153 			return;
154 		first = true;
155 	} else {
156 		if (second)
157 			return;
158 		second = true;
159 	}
160 
161 	/* Disable interrupts. */
162 	psl = x86_read_psl();
163 	x86_disable_intr();
164 
165 	/* Disable write protection in supervisor mode. */
166 	cr0 = rcr0();
167 	lcr0(cr0 & ~CR0_WP);
168 
169 #if !defined(GPROF)
170 	if (!early && ncpu == 1) {
171 #ifndef LOCKDEBUG
172 		/* Uniprocessor: kill LOCK prefixes. */
173 		for (i = 0; x86_lockpatch[i] != 0; i++)
174 			patchbytes(x86_lockpatch[i], X86_NOP, -1, -1);
175 		for (i = 0; atomic_lockpatch[i] != 0; i++)
176 			patchbytes(atomic_lockpatch[i], X86_NOP, -1, -1);
177 #endif	/* !LOCKDEBUG */
178 	}
179 	if (!early && (cpu_feature[0] & CPUID_SSE2) != 0) {
180 		/*
181 		 * Faster memory barriers.  We do not need to patch
182 		 * membar_producer to use SFENCE because on x86
183 		 * ordinary non-temporal stores are always issued in
184 		 * program order to main memory and to other CPUs.
185 		 */
186 		patchfunc(
187 		    sse2_lfence, sse2_lfence_end,
188 		    membar_consumer, membar_consumer_end,
189 		    NULL
190 		);
191 		patchfunc(
192 		    sse2_mfence, sse2_mfence_end,
193 		    membar_sync, membar_sync_end,
194 		    NULL
195 		);
196 	}
197 #endif	/* GPROF */
198 
199 #ifdef i386
200 	/*
201 	 * Patch early and late.  Second time around the 'lock' prefix
202 	 * may be gone.
203 	 */
204 	if ((cpu_feature[0] & CPUID_CX8) != 0) {
205 		patchfunc(
206 		    _atomic_cas_cx8, _atomic_cas_cx8_end,
207 		    _atomic_cas_64, _atomic_cas_64_end,
208 		    NULL
209 		);
210 	}
211 #endif	/* i386 */
212 
213 #if !defined(SPLDEBUG)
214 	if (!early && (cpu_feature[0] & CPUID_CX8) != 0) {
215 		/* Faster splx(), mutex_spin_exit(). */
216 		patchfunc(
217 		    cx8_spllower, cx8_spllower_end,
218 		    spllower, spllower_end,
219 		    cx8_spllower_patch
220 		);
221 #if defined(i386) && !defined(LOCKDEBUG)
222 		patchfunc(
223 		    i686_mutex_spin_exit, i686_mutex_spin_exit_end,
224 		    mutex_spin_exit, mutex_spin_exit_end,
225 		    i686_mutex_spin_exit_patch
226 		);
227 #endif	/* i386 && !LOCKDEBUG */
228 	}
229 #endif /* !SPLDEBUG */
230 
231 	/*
232 	 * On some Opteron revisions, locked operations erroneously
233 	 * allow memory references to be `bled' outside of critical
234 	 * sections.  Apply workaround.
235 	 */
236 	if (cpu_vendor == CPUVENDOR_AMD &&
237 	    (CPUID_TO_FAMILY(cpu_info_primary.ci_signature) == 0xe ||
238 	    (CPUID_TO_FAMILY(cpu_info_primary.ci_signature) == 0xf &&
239 	    CPUID_TO_EXTMODEL(cpu_info_primary.ci_signature) < 0x4))) {
240 		for (i = 0; x86_retpatch[i] != 0; i++) {
241 			/* ret,nop,nop,ret -> lfence,ret */
242 			patchbytes(x86_retpatch[i], 0x0f, 0xae, 0xe8);
243 		}
244 	}
245 
246 #ifdef amd64
247 	/*
248 	 * If SMAP is present then patch the prepared holes with clac/stac
249 	 * instructions.
250 	 *
251 	 * clac = 0x0f, 0x01, 0xca
252 	 * stac = 0x0f, 0x01, 0xcb
253 	 */
254 	if (!early && cpu_feature[5] & CPUID_SEF_SMAP) {
255 		KASSERT(rcr4() & CR4_SMAP);
256 		for (i = 0; x86_clacpatch[i] != NULL; i++) {
257 			/* ret,int3,int3 -> clac */
258 			patchbytes(x86_clacpatch[i],
259 			    0x0f, 0x01, 0xca);
260 		}
261 		for (i = 0; x86_stacpatch[i] != NULL; i++) {
262 			/* ret,int3,int3 -> stac */
263 			patchbytes(x86_stacpatch[i],
264 			    0x0f, 0x01, 0xcb);
265 		}
266 	}
267 #endif
268 
269 	/* Write back and invalidate cache, flush pipelines. */
270 	wbinvd();
271 	x86_flush();
272 	x86_write_psl(psl);
273 
274 	/* Re-enable write protection. */
275 	lcr0(cr0);
276 }
277