1 /* $NetBSD: patch.c,v 1.24 2017/10/27 23:22:01 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Patch kernel code at boot time, depending on available CPU features. 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.24 2017/10/27 23:22:01 riastradh Exp $"); 38 39 #include "opt_lockdebug.h" 40 #ifdef i386 41 #include "opt_spldebug.h" 42 #endif 43 44 #include <sys/types.h> 45 #include <sys/systm.h> 46 47 #include <machine/cpu.h> 48 #include <machine/cpufunc.h> 49 #include <machine/specialreg.h> 50 51 #include <x86/cpuvar.h> 52 #include <x86/cputypes.h> 53 54 void spllower(int); 55 void spllower_end(void); 56 void cx8_spllower(int); 57 void cx8_spllower_end(void); 58 void cx8_spllower_patch(void); 59 60 void mutex_spin_exit_end(void); 61 void i686_mutex_spin_exit(int); 62 void i686_mutex_spin_exit_end(void); 63 void i686_mutex_spin_exit_patch(void); 64 65 void membar_consumer(void); 66 void membar_consumer_end(void); 67 void membar_sync(void); 68 void membar_sync_end(void); 69 void sse2_lfence(void); 70 void sse2_lfence_end(void); 71 void sse2_mfence(void); 72 void sse2_mfence_end(void); 73 74 void _atomic_cas_64(void); 75 void _atomic_cas_64_end(void); 76 void _atomic_cas_cx8(void); 77 void _atomic_cas_cx8_end(void); 78 79 extern void *x86_lockpatch[]; 80 extern void *x86_clacpatch[]; 81 extern void *x86_stacpatch[]; 82 extern void *x86_retpatch[]; 83 extern void *atomic_lockpatch[]; 84 85 #define X86_NOP 0x90 86 #define X86_REP 0xf3 87 #define X86_RET 0xc3 88 #define X86_CS 0x2e 89 #define X86_DS 0x3e 90 #define X86_GROUP_0F 0x0f 91 92 static void 93 adjust_jumpoff(uint8_t *ptr, void *from_s, void *to_s) 94 { 95 96 /* Branch hints */ 97 if (ptr[0] == X86_CS || ptr[0] == X86_DS) 98 ptr++; 99 /* Conditional jumps */ 100 if (ptr[0] == X86_GROUP_0F) 101 ptr++; 102 /* 4-byte relative jump or call */ 103 *(uint32_t *)(ptr + 1 - (uintptr_t)from_s + (uintptr_t)to_s) += 104 ((uint32_t)(uintptr_t)from_s - (uint32_t)(uintptr_t)to_s); 105 } 106 107 static void __unused 108 patchfunc(void *from_s, void *from_e, void *to_s, void *to_e, 109 void *pcrel) 110 { 111 112 if ((uintptr_t)from_e - (uintptr_t)from_s != 113 (uintptr_t)to_e - (uintptr_t)to_s) 114 panic("patchfunc: sizes do not match (from=%p)", from_s); 115 116 memcpy(to_s, from_s, (uintptr_t)to_e - (uintptr_t)to_s); 117 if (pcrel != NULL) 118 adjust_jumpoff(pcrel, from_s, to_s); 119 120 #ifdef GPROF 121 #ifdef i386 122 #define MCOUNT_CALL_OFFSET 3 123 #endif 124 #ifdef __x86_64__ 125 #define MCOUNT_CALL_OFFSET 5 126 #endif 127 /* Patch mcount call offset */ 128 adjust_jumpoff((uint8_t *)from_s + MCOUNT_CALL_OFFSET, from_s, to_s); 129 #endif 130 } 131 132 static inline void __unused 133 patchbytes(void *addr, const int byte1, const int byte2, const int byte3) 134 { 135 136 ((uint8_t *)addr)[0] = (uint8_t)byte1; 137 if (byte2 != -1) 138 ((uint8_t *)addr)[1] = (uint8_t)byte2; 139 if (byte3 != -1) 140 ((uint8_t *)addr)[2] = (uint8_t)byte3; 141 } 142 143 void 144 x86_patch(bool early) 145 { 146 static bool first, second; 147 u_long psl; 148 u_long cr0; 149 int i; 150 151 if (early) { 152 if (first) 153 return; 154 first = true; 155 } else { 156 if (second) 157 return; 158 second = true; 159 } 160 161 /* Disable interrupts. */ 162 psl = x86_read_psl(); 163 x86_disable_intr(); 164 165 /* Disable write protection in supervisor mode. */ 166 cr0 = rcr0(); 167 lcr0(cr0 & ~CR0_WP); 168 169 #if !defined(GPROF) 170 if (!early && ncpu == 1) { 171 #ifndef LOCKDEBUG 172 /* Uniprocessor: kill LOCK prefixes. */ 173 for (i = 0; x86_lockpatch[i] != 0; i++) 174 patchbytes(x86_lockpatch[i], X86_NOP, -1, -1); 175 for (i = 0; atomic_lockpatch[i] != 0; i++) 176 patchbytes(atomic_lockpatch[i], X86_NOP, -1, -1); 177 #endif /* !LOCKDEBUG */ 178 } 179 if (!early && (cpu_feature[0] & CPUID_SSE2) != 0) { 180 /* 181 * Faster memory barriers. We do not need to patch 182 * membar_producer to use SFENCE because on x86 183 * ordinary non-temporal stores are always issued in 184 * program order to main memory and to other CPUs. 185 */ 186 patchfunc( 187 sse2_lfence, sse2_lfence_end, 188 membar_consumer, membar_consumer_end, 189 NULL 190 ); 191 patchfunc( 192 sse2_mfence, sse2_mfence_end, 193 membar_sync, membar_sync_end, 194 NULL 195 ); 196 } 197 #endif /* GPROF */ 198 199 #ifdef i386 200 /* 201 * Patch early and late. Second time around the 'lock' prefix 202 * may be gone. 203 */ 204 if ((cpu_feature[0] & CPUID_CX8) != 0) { 205 patchfunc( 206 _atomic_cas_cx8, _atomic_cas_cx8_end, 207 _atomic_cas_64, _atomic_cas_64_end, 208 NULL 209 ); 210 } 211 #endif /* i386 */ 212 213 #if !defined(SPLDEBUG) 214 if (!early && (cpu_feature[0] & CPUID_CX8) != 0) { 215 /* Faster splx(), mutex_spin_exit(). */ 216 patchfunc( 217 cx8_spllower, cx8_spllower_end, 218 spllower, spllower_end, 219 cx8_spllower_patch 220 ); 221 #if defined(i386) && !defined(LOCKDEBUG) 222 patchfunc( 223 i686_mutex_spin_exit, i686_mutex_spin_exit_end, 224 mutex_spin_exit, mutex_spin_exit_end, 225 i686_mutex_spin_exit_patch 226 ); 227 #endif /* i386 && !LOCKDEBUG */ 228 } 229 #endif /* !SPLDEBUG */ 230 231 /* 232 * On some Opteron revisions, locked operations erroneously 233 * allow memory references to be `bled' outside of critical 234 * sections. Apply workaround. 235 */ 236 if (cpu_vendor == CPUVENDOR_AMD && 237 (CPUID_TO_FAMILY(cpu_info_primary.ci_signature) == 0xe || 238 (CPUID_TO_FAMILY(cpu_info_primary.ci_signature) == 0xf && 239 CPUID_TO_EXTMODEL(cpu_info_primary.ci_signature) < 0x4))) { 240 for (i = 0; x86_retpatch[i] != 0; i++) { 241 /* ret,nop,nop,ret -> lfence,ret */ 242 patchbytes(x86_retpatch[i], 0x0f, 0xae, 0xe8); 243 } 244 } 245 246 #ifdef amd64 247 /* 248 * If SMAP is present then patch the prepared holes with clac/stac 249 * instructions. 250 * 251 * clac = 0x0f, 0x01, 0xca 252 * stac = 0x0f, 0x01, 0xcb 253 */ 254 if (!early && cpu_feature[5] & CPUID_SEF_SMAP) { 255 KASSERT(rcr4() & CR4_SMAP); 256 for (i = 0; x86_clacpatch[i] != NULL; i++) { 257 /* ret,int3,int3 -> clac */ 258 patchbytes(x86_clacpatch[i], 259 0x0f, 0x01, 0xca); 260 } 261 for (i = 0; x86_stacpatch[i] != NULL; i++) { 262 /* ret,int3,int3 -> stac */ 263 patchbytes(x86_stacpatch[i], 264 0x0f, 0x01, 0xcb); 265 } 266 } 267 #endif 268 269 /* Write back and invalidate cache, flush pipelines. */ 270 wbinvd(); 271 x86_flush(); 272 x86_write_psl(psl); 273 274 /* Re-enable write protection. */ 275 lcr0(cr0); 276 } 277