1 /* $NetBSD: patch.c,v 1.34 2018/03/13 16:52:42 maxv Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Patch kernel code at boot time, depending on available CPU features. 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.34 2018/03/13 16:52:42 maxv Exp $"); 38 39 #include "opt_lockdebug.h" 40 #ifdef i386 41 #include "opt_spldebug.h" 42 #endif 43 44 #include <sys/types.h> 45 #include <sys/systm.h> 46 47 #include <machine/cpu.h> 48 #include <machine/cpufunc.h> 49 #include <machine/specialreg.h> 50 #include <machine/frameasm.h> 51 52 #include <x86/cpuvar.h> 53 #include <x86/cputypes.h> 54 55 struct hotpatch { 56 uint8_t name; 57 uint8_t size; 58 void *addr; 59 } __packed; 60 61 void spllower(int); 62 void spllower_end(void); 63 void cx8_spllower(int); 64 void cx8_spllower_end(void); 65 void cx8_spllower_patch(void); 66 67 void mutex_spin_exit_end(void); 68 void i686_mutex_spin_exit(int); 69 void i686_mutex_spin_exit_end(void); 70 void i686_mutex_spin_exit_patch(void); 71 72 void membar_consumer(void); 73 void membar_consumer_end(void); 74 void membar_sync(void); 75 void membar_sync_end(void); 76 void sse2_lfence(void); 77 void sse2_lfence_end(void); 78 void sse2_mfence(void); 79 void sse2_mfence_end(void); 80 81 void _atomic_cas_64(void); 82 void _atomic_cas_64_end(void); 83 void _atomic_cas_cx8(void); 84 void _atomic_cas_cx8_end(void); 85 86 extern void *atomic_lockpatch[]; 87 88 #define X86_NOP 0x90 89 #define X86_REP 0xf3 90 #define X86_RET 0xc3 91 #define X86_CS 0x2e 92 #define X86_DS 0x3e 93 #define X86_GROUP_0F 0x0f 94 95 static void 96 adjust_jumpoff(uint8_t *ptr, void *from_s, void *to_s) 97 { 98 99 /* Branch hints */ 100 if (ptr[0] == X86_CS || ptr[0] == X86_DS) 101 ptr++; 102 /* Conditional jumps */ 103 if (ptr[0] == X86_GROUP_0F) 104 ptr++; 105 /* 4-byte relative jump or call */ 106 *(uint32_t *)(ptr + 1 - (uintptr_t)from_s + (uintptr_t)to_s) += 107 ((uint32_t)(uintptr_t)from_s - (uint32_t)(uintptr_t)to_s); 108 } 109 110 static void __unused 111 patchfunc(void *from_s, void *from_e, void *to_s, void *to_e, 112 void *pcrel) 113 { 114 115 if ((uintptr_t)from_e - (uintptr_t)from_s != 116 (uintptr_t)to_e - (uintptr_t)to_s) 117 panic("patchfunc: sizes do not match (from=%p)", from_s); 118 119 memcpy(to_s, from_s, (uintptr_t)to_e - (uintptr_t)to_s); 120 if (pcrel != NULL) 121 adjust_jumpoff(pcrel, from_s, to_s); 122 123 #ifdef GPROF 124 #ifdef i386 125 #define MCOUNT_CALL_OFFSET 3 126 #endif 127 #ifdef __x86_64__ 128 #define MCOUNT_CALL_OFFSET 5 129 #endif 130 /* Patch mcount call offset */ 131 adjust_jumpoff((uint8_t *)from_s + MCOUNT_CALL_OFFSET, from_s, to_s); 132 #endif 133 } 134 135 static inline void __unused 136 patchbytes(void *addr, const uint8_t *bytes, size_t size) 137 { 138 uint8_t *ptr = (uint8_t *)addr; 139 size_t i; 140 141 for (i = 0; i < size; i++) { 142 ptr[i] = bytes[i]; 143 } 144 } 145 146 void 147 x86_hotpatch(uint32_t name, const uint8_t *bytes, size_t size) 148 { 149 extern char __rodata_hotpatch_start; 150 extern char __rodata_hotpatch_end; 151 struct hotpatch *hps, *hpe, *hp; 152 153 hps = (struct hotpatch *)&__rodata_hotpatch_start; 154 hpe = (struct hotpatch *)&__rodata_hotpatch_end; 155 156 for (hp = hps; hp < hpe; hp++) { 157 if (hp->name != name) { 158 continue; 159 } 160 if (hp->size != size) { 161 panic("x86_hotpatch: incorrect size"); 162 } 163 patchbytes(hp->addr, bytes, size); 164 } 165 } 166 167 void 168 x86_patch_window_open(u_long *psl, u_long *cr0) 169 { 170 /* Disable interrupts. */ 171 *psl = x86_read_psl(); 172 x86_disable_intr(); 173 174 /* Disable write protection in supervisor mode. */ 175 *cr0 = rcr0(); 176 lcr0(*cr0 & ~CR0_WP); 177 } 178 179 void 180 x86_patch_window_close(u_long psl, u_long cr0) 181 { 182 /* Write back and invalidate cache, flush pipelines. */ 183 wbinvd(); 184 x86_flush(); 185 186 /* Re-enable write protection. */ 187 lcr0(cr0); 188 189 /* Restore the PSL, potentially re-enabling interrupts. */ 190 x86_write_psl(psl); 191 } 192 193 void 194 x86_patch(bool early) 195 { 196 static bool first, second; 197 u_long psl; 198 u_long cr0; 199 200 if (early) { 201 if (first) 202 return; 203 first = true; 204 } else { 205 if (second) 206 return; 207 second = true; 208 } 209 210 x86_patch_window_open(&psl, &cr0); 211 212 #if !defined(GPROF) 213 if (!early && ncpu == 1) { 214 #ifndef LOCKDEBUG 215 /* 216 * Uniprocessor: kill LOCK prefixes. 217 */ 218 const uint8_t bytes[] = { 219 X86_NOP 220 }; 221 222 /* lock -> nop */ 223 x86_hotpatch(HP_NAME_NOLOCK, bytes, sizeof(bytes)); 224 for (int i = 0; atomic_lockpatch[i] != 0; i++) 225 patchbytes(atomic_lockpatch[i], bytes, sizeof(bytes)); 226 #endif 227 } 228 229 if (!early && (cpu_feature[0] & CPUID_SSE2) != 0) { 230 /* 231 * Faster memory barriers. We do not need to patch 232 * membar_producer to use SFENCE because on x86 233 * ordinary non-temporal stores are always issued in 234 * program order to main memory and to other CPUs. 235 */ 236 patchfunc( 237 sse2_lfence, sse2_lfence_end, 238 membar_consumer, membar_consumer_end, 239 NULL 240 ); 241 patchfunc( 242 sse2_mfence, sse2_mfence_end, 243 membar_sync, membar_sync_end, 244 NULL 245 ); 246 } 247 #endif /* GPROF */ 248 249 #ifdef i386 250 /* 251 * Patch early and late. Second time around the 'lock' prefix 252 * may be gone. 253 */ 254 if ((cpu_feature[0] & CPUID_CX8) != 0) { 255 patchfunc( 256 _atomic_cas_cx8, _atomic_cas_cx8_end, 257 _atomic_cas_64, _atomic_cas_64_end, 258 NULL 259 ); 260 } 261 #endif /* i386 */ 262 263 #if !defined(SPLDEBUG) 264 if (!early && (cpu_feature[0] & CPUID_CX8) != 0) { 265 /* Faster splx(), mutex_spin_exit(). */ 266 patchfunc( 267 cx8_spllower, cx8_spllower_end, 268 spllower, spllower_end, 269 cx8_spllower_patch 270 ); 271 #if defined(i386) && !defined(LOCKDEBUG) 272 patchfunc( 273 i686_mutex_spin_exit, i686_mutex_spin_exit_end, 274 mutex_spin_exit, mutex_spin_exit_end, 275 i686_mutex_spin_exit_patch 276 ); 277 #endif /* i386 && !LOCKDEBUG */ 278 } 279 #endif /* !SPLDEBUG */ 280 281 /* 282 * On some Opteron revisions, locked operations erroneously 283 * allow memory references to be `bled' outside of critical 284 * sections. Apply workaround. 285 */ 286 if (cpu_vendor == CPUVENDOR_AMD && 287 (CPUID_TO_FAMILY(cpu_info_primary.ci_signature) == 0xe || 288 (CPUID_TO_FAMILY(cpu_info_primary.ci_signature) == 0xf && 289 CPUID_TO_EXTMODEL(cpu_info_primary.ci_signature) < 0x4))) { 290 const uint8_t bytes[] = { 291 0x0F, 0xAE, 0xE8 /* lfence */ 292 }; 293 294 /* ret,nop,nop -> lfence */ 295 x86_hotpatch(HP_NAME_RETFENCE, bytes, sizeof(bytes)); 296 } 297 298 /* 299 * If SMAP is present then patch the prepared holes with clac/stac 300 * instructions. 301 * 302 * clac = 0x0f, 0x01, 0xca 303 * stac = 0x0f, 0x01, 0xcb 304 */ 305 if (!early && cpu_feature[5] & CPUID_SEF_SMAP) { 306 KASSERT(rcr4() & CR4_SMAP); 307 const uint8_t clac_bytes[] = { 308 0x0F, 0x01, 0xCA /* clac */ 309 }; 310 const uint8_t stac_bytes[] = { 311 0x0F, 0x01, 0xCB /* stac */ 312 }; 313 314 /* nop,nop,nop -> clac */ 315 x86_hotpatch(HP_NAME_CLAC, clac_bytes, sizeof(clac_bytes)); 316 317 /* nop,nop,nop -> stac */ 318 x86_hotpatch(HP_NAME_STAC, stac_bytes, sizeof(stac_bytes)); 319 } 320 321 x86_patch_window_close(psl, cr0); 322 } 323