1 /* Copyright (C) 2021 Free Software Foundation, Inc.
2 Contributed by Oracle.
3
4 This file is part of GNU Binutils.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
20
21 #include "config.h"
22 #include <alloca.h>
23 #include <dlfcn.h>
24 #include <stdlib.h>
25 #include <signal.h>
26 #include <unistd.h>
27 #include <pthread.h>
28
29 #include "gp-defs.h"
30 #include "collector.h"
31 #include "gp-experiment.h"
32 #include "memmgr.h"
33 #include "tsd.h"
34
35 /* Get dynamic module interface*/
36 #include "collector_module.h"
37
38 /* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */
39 #include "data_pckts.h"
40
41 #if ARCH(SPARC)
42 struct frame
43 {
44 long fr_local[8]; /* saved locals */
45 long fr_arg[6]; /* saved arguments [0 - 5] */
46 struct frame *fr_savfp; /* saved frame pointer */
47 long fr_savpc; /* saved program counter */
48 #if WSIZE(32)
49 char *fr_stret; /* struct return addr */
50 #endif
51 long fr_argd[6]; /* arg dump area */
52 long fr_argx[1]; /* array of args past the sixth */
53 };
54
55 #elif ARCH(Intel)
56 struct frame
57 {
58 unsigned long fr_savfp;
59 unsigned long fr_savpc;
60 };
61 #endif
62
63 /* Set the debug trace level */
64 #define DBG_LT0 0
65 #define DBG_LT1 1
66 #define DBG_LT2 2
67 #define DBG_LT3 3
68
69 int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL;
70 #define VM_NO_ACCESS (-1)
71 #define VM_NOT_VM_MEMORY (-2)
72 #define VM_NOT_X_SEGMENT (-3)
73
74 #define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end))
75
76 /*
77 * Weed through all the arch dependent stuff to get the right definition
78 * for 'pc' in the ucontext structure. The system header files are mess
79 * dealing with all the arch (just look for PC, R_PC, REG_PC).
80 *
81 */
82
83 #if ARCH(SPARC)
84
85 #define IN_BARRIER(x) \
86 ( barrier_hdl && \
87 (unsigned long)x >= barrier_hdl && \
88 (unsigned long)x < barrier_hdlx )
89 static unsigned long barrier_hdl = 0;
90 static unsigned long barrier_hdlx = 0;
91
92 #if WSIZE(64)
93 #define STACK_BIAS 2047
94 #define IN_TRAP_HANDLER(x) \
95 ( misalign_hdl && \
96 (unsigned long)x >= misalign_hdl && \
97 (unsigned long)x < misalign_hdlx )
98 static unsigned long misalign_hdl = 0;
99 static unsigned long misalign_hdlx = 0;
100 #elif WSIZE(32)
101 #define STACK_BIAS 0
102 #endif
103
104 #if WSIZE(64)
105 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)])
106 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6])
107 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC])
108 #else
109 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)])
110 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6])
111 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC])
112 #endif
113
114 #elif ARCH(Intel)
115 #include "opcodes/disassemble.h"
116
117 static int
fprintf_func(void * arg ATTRIBUTE_UNUSED,const char * fmt ATTRIBUTE_UNUSED,...)118 fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...)
119 {
120 return 0;
121 }
122
123 static int
fprintf_styled_func(void * arg ATTRIBUTE_UNUSED,enum disassembler_style st ATTRIBUTE_UNUSED,const char * fmt ATTRIBUTE_UNUSED,...)124 fprintf_styled_func (void *arg ATTRIBUTE_UNUSED,
125 enum disassembler_style st ATTRIBUTE_UNUSED,
126 const char *fmt ATTRIBUTE_UNUSED, ...)
127 {
128 return 0;
129 }
130
131 /* Get LENGTH bytes from info's buffer, at target address memaddr.
132 Transfer them to myaddr. */
133 static int
read_memory_func(bfd_vma memaddr,bfd_byte * myaddr,unsigned int length,disassemble_info * info)134 read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
135 disassemble_info *info)
136 {
137 unsigned int opb = info->octets_per_byte;
138 size_t end_addr_offset = length / opb;
139 size_t max_addr_offset = info->buffer_length / opb;
140 size_t octets = (memaddr - info->buffer_vma) * opb;
141 if (memaddr < info->buffer_vma
142 || memaddr - info->buffer_vma > max_addr_offset
143 || memaddr - info->buffer_vma + end_addr_offset > max_addr_offset
144 || (info->stop_vma && (memaddr >= info->stop_vma
145 || memaddr + end_addr_offset > info->stop_vma)))
146 return -1;
147 memcpy (myaddr, info->buffer + octets, length);
148 return 0;
149 }
150
151 static void
print_address_func(bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)152 print_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
153 disassemble_info *info ATTRIBUTE_UNUSED) { }
154
155 static asymbol *
symbol_at_address_func(bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)156 symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
157 disassemble_info *info ATTRIBUTE_UNUSED)
158 {
159 return NULL;
160 }
161
162 static bfd_boolean
symbol_is_valid(asymbol * sym ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)163 symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED,
164 disassemble_info *info ATTRIBUTE_UNUSED)
165 {
166 return TRUE;
167 }
168
169 static void
memory_error_func(int status ATTRIBUTE_UNUSED,bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)170 memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED,
171 disassemble_info *info ATTRIBUTE_UNUSED) { }
172
173
174 #if WSIZE(32)
175 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP])
176 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP])
177 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP])
178
179 #elif WSIZE(64)
180 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP])
181 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP])
182 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP])
183 #endif /* WSIZE() */
184
185 #elif ARCH(Aarch64)
186 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15])
187 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13])
188 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14])
189 #endif /* ARCH() */
190
191 /*
192 * FILL_CONTEXT() for all platforms
193 * Could use getcontext() except:
194 * - it's not guaranteed to be async signal safe
195 * - it's a system call and not that lightweight
196 * - it's not portable as of POSIX.1-2008
197 * So we just use low-level mechanisms to fill in the few fields we need.
198 */
199 #if ARCH(SPARC)
200 #if WSIZE(32)
201 #define FILL_CONTEXT(context) \
202 { \
203 greg_t fp; \
204 __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
205 __asm__ __volatile__( "ta 3" ); \
206 GET_SP(context) = fp; \
207 GET_PC(context) = (greg_t)0; \
208 }
209
210 #elif WSIZE(64)
211 #define FILL_CONTEXT(context) \
212 { \
213 greg_t fp; \
214 __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
215 __asm__ __volatile__( "flushw" ); \
216 GET_SP(context) = fp; \
217 GET_PC(context) = (greg_t)0; \
218 }
219 #endif /* WSIZE() */
220
221 #elif ARCH(Intel)
222 #define FILL_CONTEXT(context) \
223 { \
224 context->uc_link = NULL; \
225 void *sp = __collector_getsp(); \
226 GET_SP(context) = (intptr_t)sp; \
227 GET_FP(context) = (intptr_t)__collector_getfp(); \
228 GET_PC(context) = (intptr_t)__collector_getpc(); \
229 context->uc_stack.ss_sp = sp; \
230 context->uc_stack.ss_size = 0x100000; \
231 }
232
233 #elif ARCH(Aarch64)
234 #define FILL_CONTEXT(context) \
235 { getcontext(context); \
236 context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \
237 }
238
239 #endif /* ARCH() */
240
241 static int
getByteInstruction(unsigned char * p)242 getByteInstruction (unsigned char *p)
243 {
244 if (__collector_VM_ReadByteInstruction)
245 {
246 int v = __collector_VM_ReadByteInstruction (p);
247 if (v != VM_NOT_VM_MEMORY)
248 return v;
249 }
250 return *p;
251 }
252
253 struct DataHandle *dhndl = NULL;
254
255 static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY;
256
257 /* To support two OpenMP API's we use a pointer
258 * to the actual function.
259 */
260 int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL;
261 int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL;
262
263 #define DEFAULT_MAX_NFRAMES 256
264 static int max_native_nframes = DEFAULT_MAX_NFRAMES;
265 static int max_java_nframes = DEFAULT_MAX_NFRAMES;
266
267 #define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) )
268 #define JAVA_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) * 2 + 16 )
269 #define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) )
270
271 #define ROOT_UID 801425552975190205ULL
272 #define ROOT_UID_INV 92251691606677ULL
273 #define ROOT_IDX 13907816567264074199ULL
274 #define ROOT_IDX_INV 2075111ULL
275 #define UIDTableSize 1048576
276 static volatile uint64_t *UIDTable = NULL;
277 static volatile int seen_omp = 0;
278
279 static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode);
280 static FrameInfo compute_uid (Frame_packet *frp);
281 static int omp_no_walk = 0;
282
283 #if ARCH(Intel)
284 #define ValTableSize 1048576
285 #define OmpValTableSize 65536
286 static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs
287 static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs
288 static struct WalkContext *OmpCurCtxs = NULL;
289 static struct WalkContext *OmpCtxs = NULL;
290 static uint32_t *OmpVals = NULL;
291 static unsigned long *OmpRAs = NULL;
292 static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend);
293 static int parse_x86_AVX_instruction (unsigned char *pc);
294
295 struct WalkContext
296 {
297 unsigned long pc;
298 unsigned long sp;
299 unsigned long fp;
300 unsigned long ln;
301 unsigned long sbase; /* stack boundary */
302 unsigned long tbgn; /* current memory segment start */
303 unsigned long tend; /* current memory segment end */
304 };
305 #endif
306
307 #if defined(DEBUG) && ARCH(Intel)
308 #include <execinfo.h>
309
310 static void
dump_stack(int nline)311 dump_stack (int nline)
312 {
313 if ((__collector_tracelevel & SP_DUMP_STACK) == 0)
314 return;
315
316 enum Constexpr { MAX_SIZE = 1024 };
317 void *array[MAX_SIZE];
318 size_t sz = backtrace (array, MAX_SIZE);
319 char **strings = backtrace_symbols (array, sz);
320 DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz);
321 for (int i = 0; i < sz; i++)
322 DprintfT (SP_DUMP_STACK, " %3d: %p %s\n", i, array[i],
323 strings[i] ? strings[i] : "???");
324 }
325
326 #define dump_targets(nline, ntrg, targets) \
327 if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \
328 for(int i = 0; i < ntrg; i++) \
329 DprintfT (SP_DUMP_UNWIND, " %2d: 0x%lx\n", i, (long) targets[i])
330 #else
331 #define dump_stack(x)
332 #define dump_targets(nline, ntrg, targets)
333 #endif
334
335 void
__collector_ext_unwind_key_init(int isPthread,void * stack)336 __collector_ext_unwind_key_init (int isPthread, void * stack)
337 {
338 void * ptr = __collector_tsd_get_by_key (unwind_key);
339 if (ptr == NULL)
340 {
341 TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n");
342 return;
343 }
344 if (isPthread)
345 {
346 size_t stack_size = 0;
347 void *stack_addr = 0;
348 pthread_t pthread = pthread_self ();
349 pthread_attr_t attr;
350 int err = pthread_getattr_np (pthread, &attr);
351 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err);
352 if (err == 0)
353 {
354 err = pthread_attr_getstack (&attr, &stack_addr, &stack_size);
355 if (err == 0)
356 stack_addr = (char*) stack_addr + stack_size;
357 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n",
358 (long) stack_size, stack_addr, err);
359 err = pthread_attr_destroy (&attr);
360 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err);
361 }
362 *(void**) ptr = stack_addr;
363 }
364 else
365 *(void**) ptr = stack; // cloned thread
366 }
367
368 void
__collector_ext_unwind_init(int record)369 __collector_ext_unwind_init (int record)
370 {
371 int sz = UIDTableSize * sizeof (*UIDTable);
372 UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1);
373 if (UIDTable == NULL)
374 {
375 __collector_terminate_expt ();
376 return;
377 }
378 CALL_UTIL (memset)((void*) UIDTable, 0, sz);
379
380 char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH");
381 if (str != NULL && *str != 0)
382 {
383 char *endptr;
384 int n = CALL_UTIL (strtol)(str, &endptr, 0);
385 if (endptr != str && n >= 0)
386 {
387 if (n < 5)
388 n = 5;
389 if (n > MAX_STACKDEPTH)
390 n = MAX_STACKDEPTH;
391 max_java_nframes = n;
392 }
393 }
394
395 str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH");
396 if (str != NULL && *str != 0)
397 {
398 char *endptr = str;
399 int n = CALL_UTIL (strtol)(str, &endptr, 0);
400 if (endptr != str && n >= 0)
401 {
402 if (n < 5)
403 n = 5;
404 if (n > MAX_STACKDEPTH)
405 n = MAX_STACKDEPTH;
406 max_native_nframes = n;
407 }
408 }
409
410 TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n",
411 max_native_nframes, max_java_nframes);
412 omp_no_walk = 1;
413
414 if (__collector_VM_ReadByteInstruction == NULL)
415 __collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction");
416
417 #if ARCH(SPARC)
418 #if WSIZE(64)
419 misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler");
420 misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end");
421 if (misalign_hdlx == 0)
422 misalign_hdlx = misalign_hdl + 292;
423 barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
424 barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
425 if (barrier_hdlx == 0)
426 barrier_hdl = 0;
427 #else
428 barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
429 barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
430 if (barrier_hdlx == 0)
431 barrier_hdl = 0;
432 #endif /* WSIZE() */
433
434 #elif ARCH(Intel)
435 sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP);
436 AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
437 sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK);
438 AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
439 if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL))
440 {
441 sz = OmpValTableSize * sizeof (*OmpCurCtxs);
442 OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
443 sz = OmpValTableSize * sizeof (*OmpCtxs);
444 OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
445 sz = OmpValTableSize * sizeof (*OmpVals);
446 OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
447 sz = OmpValTableSize * sizeof (*OmpRAs);
448 OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
449 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
450 {
451 TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n");
452 __collector_terminate_expt ();
453 return;
454 }
455 }
456 #endif /* ARCH() */
457
458 if (record)
459 {
460 dhndl = __collector_create_handle (SP_FRINFO_FILE);
461 __collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE);
462 }
463
464 unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL);
465 if (unwind_key == COLLECTOR_TSD_INVALID_KEY)
466 {
467 TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n");
468 __collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created</%s>\n",
469 SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT);
470 return;
471 }
472 TprintfT (0, "unwind_init() completed normally\n");
473 return;
474 }
475
476 void
__collector_ext_unwind_close()477 __collector_ext_unwind_close ()
478 {
479 __collector_delete_handle (dhndl);
480 dhndl = NULL;
481 }
482
483 void*
__collector_ext_return_address(unsigned level)484 __collector_ext_return_address (unsigned level)
485 {
486 if (NULL == UIDTable) //unwind not initialized yet
487 return NULL;
488 unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller
489 ucontext_t context;
490 FILL_CONTEXT ((&context));
491 char* buf = (char*) alloca (size);
492 if (buf == NULL)
493 {
494 TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size);
495 return NULL;
496 }
497 int sz = stack_unwind (buf, size, NULL, NULL, &context, 0);
498 if (sz < (level + 3) * sizeof (long))
499 {
500 TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz);
501 return NULL;
502 }
503 long *lbuf = (long*) buf;
504 TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]);
505 return (void *) (lbuf[level + 2]);
506 }
507 /*
508 * Collector interface method getFrameInfo
509 */
510 FrameInfo
__collector_get_frame_info(hrtime_t ts,int mode,void * arg)511 __collector_get_frame_info (hrtime_t ts, int mode, void *arg)
512 {
513 ucontext_t *context = NULL;
514 void *bptr = NULL;
515 CM_Array *array = NULL;
516
517 int unwind_mode = 0;
518 int do_walk = 1;
519
520 if (mode & FRINFO_NO_WALK)
521 do_walk = 0;
522 int bmode = mode & 0xffff;
523 int pseudo_context = 0;
524 if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK)
525 {
526 bptr = arg;
527 context = (ucontext_t*) alloca (sizeof (ucontext_t));
528 FILL_CONTEXT (context);
529 unwind_mode |= bmode;
530 }
531 else if (bmode == FRINFO_FROM_UC)
532 {
533 context = (ucontext_t*) arg;
534 if (context == NULL)
535 return (FrameInfo) 0;
536 if (GET_SP (context) == 0)
537 pseudo_context = 1;
538 }
539 else if (bmode == FRINFO_FROM_ARRAY)
540 {
541 array = (CM_Array*) arg;
542 if (array == NULL || array->length <= 0)
543 return (FrameInfo) 0;
544 }
545 else
546 return (FrameInfo) 0;
547
548 int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes);
549 if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
550 max_frame_size += JAVA_FRAME_BYTES (max_java_nframes);
551
552 Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size);
553 frpckt->type = FRAME_PCKT;
554 frpckt->hsize = sizeof (Frame_packet);
555
556 char *d = (char*) (frpckt + 1);
557 int size = max_frame_size;
558
559 #define MIN(a,b) ((a)<(b)?(a):(b))
560 /* get Java info */
561 if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
562 {
563 /* use only 2/3 of the buffer and leave the rest for the native stack */
564 int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes));
565 if (tmpsz > 0)
566 {
567 int sz = __collector_ext_jstack_unwind (d, tmpsz, context);
568 d += sz;
569 size -= sz;
570 }
571 }
572
573 /* get native stack */
574 if (context)
575 {
576 Stack_info *sinfo = (Stack_info*) d;
577 int sz = sizeof (Stack_info);
578 d += sz;
579 size -= sz;
580 #if ARCH(Intel)
581 if (omp_no_walk == 0)
582 do_walk = 1;
583 #endif
584 if (do_walk == 0)
585 unwind_mode |= FRINFO_NO_WALK;
586
587 int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes));
588 if (tmpsz > 0)
589 {
590 sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode);
591 d += sz;
592 size -= sz;
593 }
594 sinfo->kind = STACK_INFO;
595 sinfo->hsize = (d - (char*) sinfo);
596 }
597
598 /* create a stack image from user data */
599 if (array && array->length > 0)
600 {
601 Stack_info *sinfo = (Stack_info*) d;
602 int sz = sizeof (Stack_info);
603 d += sz;
604 size -= sz;
605 sz = array->length;
606 if (sz > size)
607 sz = size; // YXXX should we mark this with truncation frame?
608 __collector_memcpy (d, array->bytes, sz);
609 d += sz;
610 size -= sz;
611 sinfo->kind = STACK_INFO;
612 sinfo->hsize = (d - (char*) sinfo);
613 }
614
615 /* Compute the total size */
616 frpckt->tsize = d - (char*) frpckt;
617 FrameInfo uid = compute_uid (frpckt);
618 return uid;
619 }
620
621 FrameInfo
compute_uid(Frame_packet * frp)622 compute_uid (Frame_packet *frp)
623 {
624 uint64_t idxs[LAST_INFO];
625 uint64_t uid = ROOT_UID;
626 uint64_t idx = ROOT_IDX;
627
628 Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize);
629 char *end = (char*) frp + frp->tsize;
630 for (;;)
631 {
632 if ((char*) cinfo >= end || cinfo->hsize == 0 ||
633 (char*) cinfo + cinfo->hsize > end)
634 break;
635
636 /* Start with a different value to avoid matching with uid */
637 uint64_t uidt = 1;
638 uint64_t idxt = 1;
639 long *ptr = (long*) ((char*) cinfo + cinfo->hsize);
640 long *bnd = (long*) ((char*) cinfo + sizeof (Common_info));
641 TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize);
642 while (ptr > bnd)
643 {
644 long val = *(--ptr);
645 tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val);
646 uidt = (uidt + val) * ROOT_UID;
647 idxt = (idxt + val) * ROOT_IDX;
648 uid = (uid + val) * ROOT_UID;
649 idx = (idx + val) * ROOT_IDX;
650 }
651 if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
652 {
653 cinfo->uid = uidt;
654 idxs[cinfo->kind] = idxt;
655 }
656 cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
657 }
658 tprintf (DBG_LT2, "\n");
659
660 /* Check if we have already recorded that uid.
661 * The following fragment contains benign data races.
662 * It's important, though, that all reads from UIDTable
663 * happen before writes.
664 */
665 int found1 = 0;
666 int idx1 = (int) ((idx >> 44) % UIDTableSize);
667 if (UIDTable[idx1] == uid)
668 found1 = 1;
669 int found2 = 0;
670 int idx2 = (int) ((idx >> 24) % UIDTableSize);
671 if (UIDTable[idx2] == uid)
672 found2 = 1;
673 int found3 = 0;
674 int idx3 = (int) ((idx >> 4) % UIDTableSize);
675 if (UIDTable[idx3] == uid)
676 found3 = 1;
677 if (!found1)
678 UIDTable[idx1] = uid;
679 if (!found2)
680 UIDTable[idx2] = uid;
681 if (!found3)
682 UIDTable[idx3] = uid;
683
684 if (found1 || found2 || found3)
685 return (FrameInfo) uid;
686 frp->uid = uid;
687
688 /* Compress info's */
689 cinfo = (Common_info*) ((char*) frp + frp->hsize);
690 for (;;)
691 {
692 if ((char*) cinfo >= end || cinfo->hsize == 0 ||
693 (char*) cinfo + cinfo->hsize > end)
694 break;
695 if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
696 {
697 long *ptr = (long*) ((char*) cinfo + sizeof (Common_info));
698 long *bnd = (long*) ((char*) cinfo + cinfo->hsize);
699 uint64_t uidt = cinfo->uid;
700 uint64_t idxt = idxs[cinfo->kind];
701 int found = 0;
702 int first = 1;
703 while (ptr < bnd - 1)
704 {
705 int idx1 = (int) ((idxt >> 44) % UIDTableSize);
706 if (UIDTable[idx1] == uidt)
707 {
708 found = 1;
709 break;
710 }
711 else if (first)
712 {
713 first = 0;
714 UIDTable[idx1] = uidt;
715 }
716 long val = *ptr++;
717 uidt = uidt * ROOT_UID_INV - val;
718 idxt = idxt * ROOT_IDX_INV - val;
719 }
720 if (found)
721 {
722 char *d = (char*) ptr;
723 char *s = (char*) bnd;
724 if (!first)
725 {
726 int i;
727 for (i = 0; i<sizeof (uidt); i++)
728 {
729 *d++ = (char) uidt;
730 uidt = uidt >> 8;
731 }
732 }
733 int delta = s - d;
734 while (s < end)
735 *d++ = *s++;
736 cinfo->kind |= COMPRESSED_INFO;
737 cinfo->hsize -= delta;
738 frp->tsize -= delta;
739 end -= delta;
740 }
741 }
742 cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
743 }
744 __collector_write_packet (dhndl, (CM_Packet*) frp);
745 return (FrameInfo) uid;
746 }
747
748 FrameInfo
__collector_getUID(CM_Array * arg,FrameInfo suid)749 __collector_getUID (CM_Array *arg, FrameInfo suid)
750 {
751 if (arg->length % sizeof (long) != 0 ||
752 (long) arg->bytes % sizeof (long) != 0)
753 return (FrameInfo) - 1;
754 if (arg->length == 0)
755 return suid;
756
757 uint64_t uid = suid ? suid : 1;
758 uint64_t idx = suid ? suid : 1;
759 long *ptr = (long*) ((char*) arg->bytes + arg->length);
760 long *bnd = (long*) (arg->bytes);
761 while (ptr > bnd)
762 {
763 long val = *(--ptr);
764 uid = (uid + val) * ROOT_UID;
765 idx = (idx + val) * ROOT_IDX;
766 }
767
768 /* Check if we have already recorded that uid.
769 * The following fragment contains benign data races.
770 * It's important, though, that all reads from UIDTable
771 * happen before writes.
772 */
773 int found1 = 0;
774 int idx1 = (int) ((idx >> 44) % UIDTableSize);
775 if (UIDTable[idx1] == uid)
776 found1 = 1;
777 int found2 = 0;
778 int idx2 = (int) ((idx >> 24) % UIDTableSize);
779 if (UIDTable[idx2] == uid)
780 found2 = 1;
781 int found3 = 0;
782 int idx3 = (int) ((idx >> 4) % UIDTableSize);
783 if (UIDTable[idx3] == uid)
784 found3 = 1;
785
786 if (!found1)
787 UIDTable[idx1] = uid;
788 if (!found2)
789 UIDTable[idx2] = uid;
790 if (!found3)
791 UIDTable[idx3] = uid;
792 if (found1 || found2 || found3)
793 return (FrameInfo) uid;
794
795 int sz = sizeof (Uid_packet) + arg->length;
796 if (suid)
797 sz += sizeof (suid);
798 Uid_packet *uidp = alloca (sz);
799 uidp->tsize = sz;
800 uidp->type = UID_PCKT;
801 uidp->flags = 0;
802 uidp->uid = uid;
803
804 /* Compress */
805 ptr = (long*) (arg->bytes);
806 bnd = (long*) ((char*) arg->bytes + arg->length);
807 long *dst = (long*) (uidp + 1);
808 uint64_t uidt = uid;
809 uint64_t idxt = idx;
810 uint64_t luid = suid; /* link uid */
811
812 while (ptr < bnd)
813 {
814
815 long val = *ptr++;
816 *dst++ = val;
817
818 if ((bnd - ptr) > sizeof (uidt))
819 {
820 uidt = uidt * ROOT_UID_INV - val;
821 idxt = idxt * ROOT_IDX_INV - val;
822 int idx1 = (int) ((idxt >> 44) % UIDTableSize);
823 if (UIDTable[idx1] == uidt)
824 {
825 luid = uidt;
826 break;
827 }
828 }
829 }
830 if (luid)
831 {
832 char *d = (char*) dst;
833 for (int i = 0; i<sizeof (luid); i++)
834 {
835 *d++ = (char) luid;
836 luid = luid >> 8;
837 }
838 uidp->flags |= COMPRESSED_INFO;
839 uidp->tsize = d - (char*) uidp;
840 }
841 __collector_write_packet (dhndl, (CM_Packet*) uidp);
842
843 return (FrameInfo) uid;
844 }
845
846 int
__collector_getStackTrace(void * buf,int size,void * bptr,void * eptr,void * arg)847 __collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg)
848 {
849 if (arg == (void*) __collector_omp_stack_trace)
850 seen_omp = 1;
851 int do_walk = 1;
852 if (arg == NULL || arg == (void*) __collector_omp_stack_trace)
853 {
854 do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1;
855 ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t));
856 FILL_CONTEXT (context);
857 arg = context;
858 }
859 int unwind_mode = 0;
860 if (do_walk == 0)
861 unwind_mode |= FRINFO_NO_WALK;
862 return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode);
863 }
864
865 #if ARCH(SPARC)
866 /*
867 * These are important data structures taken from the header files reg.h and
868 * ucontext.h. They are used for the stack trace algorithm explained below.
869 *
870 * typedef struct ucontext {
871 * u_long uc_flags;
872 * struct ucontext *uc_link;
873 * usigset_t uc_sigmask;
874 * stack_t uc_stack;
875 * mcontext_t uc_mcontext;
876 * long uc_filler[23];
877 * } ucontext_t;
878 *
879 * #define SPARC_MAXREGWINDOW 31
880 *
881 * struct rwindow {
882 * greg_t rw_local[8];
883 * greg_t rw_in[8];
884 * };
885 *
886 * #define rw_fp rw_in[6]
887 * #define rw_rtn rw_in[7]
888 *
889 * struct gwindows {
890 * int wbcnt;
891 * int *spbuf[SPARC_MAXREGWINDOW];
892 * struct rwindow wbuf[SPARC_MAXREGWINDOW];
893 * };
894 *
895 * typedef struct gwindows gwindows_t;
896 *
897 * typedef struct {
898 * gregset_t gregs;
899 * gwindows_t *gwins;
900 * fpregset_t fpregs;
901 * long filler[21];
902 * } mcontext_t;
903 *
904 * The stack would look like this when SIGPROF occurrs.
905 *
906 * ------------------------- <- high memory
907 * | |
908 * | |
909 * -------------------------
910 * | |
911 * ------------------------- <- fp' <-|
912 * | | |
913 * : : |
914 * | | |
915 * ------------------------- |
916 * | fp |----------|
917 * | |
918 * ------------------------- <- sp'
919 * | | | |
920 * | gwins | <- saved stack pointers & | |
921 * | | register windows | |- mcontext
922 * ------------------------- | |
923 * | gregs | <- saved registers | |
924 * ------------------------- |
925 * | | |- ucontext
926 * ------------------------- <- ucp (ucontext pointer) |
927 * | | |
928 * | | |- siginfo
929 * ------------------------- <- sip (siginfo pointer) |
930 * | |
931 * ------------------------- <- sp
932 *
933 * Then the signal handler is called with:
934 * handler( signo, sip, uip );
935 * When gwins is null, all the stack frames are saved in the user stack.
936 * In that case we can find sp' from gregs and walk the stack for a backtrace.
937 * However, if gwins is not null we will have a more complicated case.
938 * Wbcnt(in gwins) tells you how many saved register windows are valid.
939 * This is important because the kernel does not allocate the entire array.
940 * And the top most frame is saved in the lowest index element. The next
941 * paragraph explains the possible causes.
942 *
943 * There are two routines in the kernel to flush out user register windows.
944 * flush_user_windows and flush_user_windows_to_stack
945 * The first routine will not cause a page fault. Therefore if the user
946 * stack is not in memory, the register windows will be saved to the pcb.
947 * This can happen when the kernel is trying to deliver a signal and
948 * the user stack got swap out. The kernel will then build a new context for
949 * the signal handler and the saved register windows will
950 * be copied to the ucontext as show above. On the other hand,
951 * flush_user_windows_to_stack can cause a page fault, and if it failed
952 * then there is something wrong (stack overflow, misalign).
953 * The first saved register window does not necessary correspond to the
954 * first stack frame. So the current stack pointer must be compare with
955 * the stack pointers in spbuf to find a match.
956 *
957 * We will also follow the uc_link field in ucontext to trace also nested
958 * signal stack frames.
959 *
960 */
961
962 /* Dealing with trap handlers.
963 * When a user defined trap handler is invoked the return address
964 * (or actually the address of an instruction that raised the trap)
965 * is passed to the trap handler in %l6, whereas saved %o7 contains
966 * garbage. First, we need to find out if a particular pc belongs
967 * to the trap handler, and if so, take the %l6 value from the stack rather
968 * than %o7 from either the stack or the register.
969 * There are three possible situations represented
970 * by the following stacks:
971 *
972 * MARKER MARKER MARKER
973 * trap handler pc __func pc before 'save' __func pc after 'save'
974 * %l6 %o7 from reg %o7 (garbage)
975 * ... %l6 trap handler pc
976 * ... %l6
977 * ...
978 * where __func is a function called from the trap handler.
979 *
980 * Currently this is implemented to only deal with __misalign_trap_handler
981 * set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER
982 * macro shows it. A general solution is postponed.
983 */
984
985 /* Special handling of unwind through the parallel loop barrier code:
986 *
987 * The library defines two symbols, __mt_EndOfTask_Barrier_ and
988 * __mt_EndOfTask_Barrier_Dummy_ representing the first word of
989 * the barrier sychronization code, and the first word following
990 * it. Whenever the leaf PC is between these two symbols,
991 * the unwind code is special-cased as follows:
992 * The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf
993 * function, so its return address is in a register, not saved on
994 * the stack.
995 *
996 * MARKER
997 * __mt_EndOfTask_Barrier_ PC -- the leaf PC
998 * loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_
999 * this address is taken from the %O0 register
1000 * {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_
1001 * ...
1002 *
1003 * With this trick, the analyzer will show the time in the barrier
1004 * attributed to the loop at the end of which the barrier synchronization
1005 * is taking place. That loop body routine, will be shown as called
1006 * from the function from which it was extracted, which will be shown
1007 * as called from the real caller, either the slave or master library routine.
1008 */
1009
1010 /*
1011 * These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner.
1012 *
1013 * Note that 0x82 is ASI_PNF. See
1014 * http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134
1015 * ASI address space identifier; PNF primary no fault
1016 */
1017
1018 /* load an int from an address */
1019
1020 /* if the address is illegal, return a 0 */
1021 static int
SPARC_no_fault_load_int(void * addr)1022 SPARC_no_fault_load_int (void *addr)
1023 {
1024 int val;
1025 __asm__ __volatile__(
1026 "lda [%1] 0x82, %0\n\t"
1027 : "=r" (val)
1028 : "r" (addr)
1029 );
1030
1031 return val;
1032 }
1033
1034 /* check if an address is invalid
1035 *
1036 * A no-fault load of an illegal address still faults, but it does so silently to the calling process.
1037 * It returns a 0, but so could a load of a legal address.
1038 * So, we time the load. A "fast" load must be a successful load.
1039 * A "slow" load is probably a fault.
1040 * Since it could also be a cache/TLB miss or other abnormality,
1041 * it's safest to retry a slow load.
1042 * The cost of trying a valid address should be some nanosecs.
1043 * The cost of trying an invalid address up to 10 times could be some microsecs.
1044 */
1045 #if 0
1046 static
1047 int invalid_SPARC_addr(void *addr)
1048 {
1049 long t1, t2;
1050 int i;
1051
1052 for (i=0; i<10; i++) {
1053 __asm__ __volatile__(
1054 "rd %%tick, %0\n\t"
1055 "lduba [%2] 0x82, %%g0\n\t"
1056 "rd %%tick, %1\n\t"
1057 : "=r" (t1), "=r" (t2)
1058 : "r" (addr) );
1059 if ( (t2 - t1) < 100 )
1060 return 0;
1061 }
1062 return 1;
1063 }
1064 #endif
1065
1066 /*
1067 * The standard SPARC procedure-calling convention is that the
1068 * calling PC (for determining the return address when the procedure
1069 * is finished) is placed in register %o7. A called procedure
1070 * typically executes a "save" instruction that shifts the register
1071 * window, and %o7 becomes %i7.
1072 *
1073 * Optimized leaf procedures do not shift the register window.
1074 * They assume the return address will remain %o7. So when
1075 * we process a leaf PC, we walk instructions to see if there
1076 * is a call, restore, or other instruction that would indicate
1077 * we can IGNORE %o7 because this is NOT a leaf procedure.
1078 *
1079 * If a limited instruction walk uncovers no such hint, we save
1080 * not only the PC but the %o7 value as well... just to be safe.
1081 * Later, in DBE post-processing of the call stacks, we decide
1082 * whether any recorded %o7 value should be used as a caller
1083 * frame or should be discarded.
1084 */
1085
1086 #define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0)
1087 #define IS_SAVE(x) (((x) & 0xc1f80000) == 0x81e00000)
1088 #define IS_MOVO7R(x) (((x) & 0xc1f8201f) == 0x8160000f)
1089 #define IS_MOVRO7(x) (((x) & 0xfff82000) == 0x9f600000)
1090 #define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000)
1091 #define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000)
1092 #define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f)
1093 #define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000)
1094 #define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000)
1095 #define IS_RET(x) ((x) == 0x81c7e008)
1096 #define IS_RETL(x) ((x) == 0x81c3e008)
1097 #define IS_RETURN(x) (((x) & 0xc1f80000) == 0x81c80000)
1098 #define IS_BRANCH(x) ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000))
1099 #define IS_CALL(x) (((x) & 0xc0000000) == 0x40000000)
1100 #define IS_LDO7(x) (((x) & 0xfff80000) == 0xde000000)
1101
1102 static long pagesize = 0;
1103
1104 static int
process_leaf(long * lbuf,int ind,int lsize,void * context)1105 process_leaf (long *lbuf, int ind, int lsize, void *context)
1106 {
1107 greg_t pc = GET_PC (context);
1108 greg_t o7 = GET_GREG (context, REG_O7);
1109
1110 /* omazur: TBR START -- not used */
1111 if (IN_BARRIER (pc))
1112 {
1113 if (ind < lsize)
1114 lbuf[ind++] = pc;
1115 if (ind < lsize)
1116 lbuf[ind++] = GET_GREG (context, REG_O0);
1117 return ind;
1118 }
1119 /* omazur: TBR END */
1120 #if WSIZE(64)
1121 if (IN_TRAP_HANDLER (pc))
1122 {
1123 if (ind < lsize)
1124 lbuf[ind++] = pc;
1125 return ind;
1126 }
1127 #endif
1128 unsigned *instrp = (unsigned *) pc;
1129 unsigned *end_addr = instrp + 20;
1130 while (instrp < end_addr)
1131 {
1132 unsigned instr = *instrp++;
1133 if (IS_ILLTRAP (instr))
1134 break;
1135 else if (IS_SAVE (instr))
1136 {
1137 if (ind < lsize)
1138 lbuf[ind++] = pc;
1139 if (o7 && ind < lsize)
1140 lbuf[ind++] = o7;
1141 return ind;
1142 }
1143 else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr))
1144 break;
1145 else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr))
1146 {
1147 int rs2 = (instr & 0x1f) + REG_G1 - 1;
1148 o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1149 break;
1150 }
1151 else if (IS_ORRG0O7 (instr))
1152 {
1153 int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1;
1154 o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1155 break;
1156 }
1157 else if (IS_RESTORE (instr))
1158 {
1159 o7 = 0;
1160 break;
1161 }
1162 else if (IS_RETURN (instr))
1163 {
1164 o7 = 0;
1165 break;
1166 }
1167 else if (IS_RET (instr))
1168 {
1169 o7 = 0;
1170 break;
1171 }
1172 else if (IS_RETL (instr))
1173 {
1174 /* process delay slot */
1175 instr = *instrp++;
1176 if (IS_RESTORE (instr))
1177 o7 = 0;
1178 break;
1179 }
1180 else if (IS_BRANCH (instr))
1181 {
1182 unsigned *backbegin = ((unsigned *) pc - 1);
1183 unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc);
1184 while (backbegin > backend)
1185 {
1186 // 21920143 stack unwind: SPARC process_leaf backtracks too far
1187 /*
1188 * We've already dereferenced backbegin+1.
1189 * So if backbegin is on the same page, we're fine.
1190 * If we've gone to a different page, possibly things are not fine.
1191 * We don't really know how to test that.
1192 * Let's just assume the worst: that dereferencing backbegin would segv.
1193 * We won't know if we're in a leaf function or not.
1194 */
1195 if (pagesize == 0)
1196 pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE);
1197 if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*))
1198 break;
1199 unsigned backinstr = *backbegin--;
1200 if (IS_LDO7 (backinstr))
1201 {
1202 o7 = 0;
1203 break;
1204 }
1205 else if (IS_ILLTRAP (backinstr))
1206 break;
1207 else if (IS_RETURN (backinstr))
1208 break;
1209 else if (IS_RET (backinstr))
1210 break;
1211 else if (IS_RETL (backinstr))
1212 break;
1213 else if (IS_CALL (backinstr))
1214 break;
1215 else if (IS_SAVE (backinstr))
1216 {
1217 o7 = 0;
1218 break;
1219 }
1220 }
1221 break;
1222 }
1223 else if (IS_CALL (instr))
1224 o7 = 0;
1225 }
1226
1227 #if WSIZE(64)
1228 if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32)
1229 {
1230 /* 20924821 SEGV in unwind code on SPARC/Linux
1231 * We've seen this condition in some SPARC-Linux runs.
1232 * o7 is non-zero but not a valid address.
1233 * Values like 4 or -7 have been seen.
1234 * Let's check if o7 is unreasonably small.
1235 * If so, set to 0 so that it won't be recorded.
1236 * Otherwise, there is risk of it being dereferenced in process_sigreturn().
1237 */
1238 // __collector_log_write("<event kind=\"%s\" id=\"%d\">time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x</event>\n",
1239 // SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc );
1240 o7 = 0;
1241 }
1242 #endif
1243
1244 if (o7)
1245 {
1246 if (ind < lsize)
1247 lbuf[ind++] = SP_LEAF_CHECK_MARKER;
1248 if (ind < lsize)
1249 lbuf[ind++] = pc;
1250 if (ind < lsize)
1251 lbuf[ind++] = o7;
1252 }
1253 else if (ind < lsize)
1254 lbuf[ind++] = pc;
1255 return ind;
1256 }
1257
1258 #if WSIZE(64)
1259 // detect signal handler
1260 static int
process_sigreturn(long * lbuf,int ind,int lsize,unsigned char * tpc,struct frame ** pfp,void * bptr,int extra_frame)1261 process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc,
1262 struct frame **pfp, void * bptr, int extra_frame)
1263 {
1264 // cheap checks whether tpc is obviously not an instruction address
1265 if ((4096 > (unsigned long) tpc) // the first page is off limits
1266 || (3 & (unsigned long) tpc))
1267 return ind; // the address is not aligned
1268
1269 // get the instruction at tpc, skipping over as many as 7 nop's (0x01000000)
1270 int insn, i;
1271 for (i = 0; i < 7; i++)
1272 {
1273 insn = SPARC_no_fault_load_int ((void *) tpc);
1274 if (insn != 0x01000000)
1275 break;
1276 tpc += 4;
1277 }
1278
1279 // we're not expecting 0 (and it could mean an illegal address)
1280 if (insn == 0)
1281 return ind;
1282
1283 // We are looking for __rt_sigreturn_stub with the instruction
1284 // 0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1
1285 if (insn == 0x82102065)
1286 {
1287 /*
1288 * according to linux kernel source code,
1289 * syscall(_NR_rt_sigreturn) uses the following data in stack:
1290 * struct rt_signal_frame {
1291 * struct sparc_stackf ss;
1292 * siginfo_t info;
1293 * struct pt_regs regs;
1294 * ....};
1295 * sizeof(struct sparc_stackf) is 192;
1296 * sizeof(siginfo_t) is 128;
1297 * we need to get the register values from regs, which is defined as:
1298 * struct pt_regs {
1299 * unsigned long u_regs[16];
1300 * unsigned long tstate;
1301 * unsigned long tpc;
1302 * unsigned long tnpc;
1303 * ....};
1304 * pc and fp register has offset of 120 and 112;
1305 * the pc of kill() is stored in tnpc, whose offest is 136.
1306 */
1307 greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136));
1308 greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120));
1309 (*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112));
1310 if (pc && pc1)
1311 {
1312 if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2)
1313 {
1314 lbuf[0] = pc1;
1315 if (ind == 0)
1316 ind++;
1317 }
1318 if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr)
1319 {
1320 if (ind < lsize)
1321 lbuf[ind++] = (unsigned long) tpc;
1322 if (ind < lsize)
1323 lbuf[ind++] = pc;
1324 if (ind < lsize)
1325 lbuf[ind++] = pc1;
1326 }
1327 }
1328 DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp));
1329 }
1330 return ind;
1331 }
1332 #endif
1333
1334 /*
1335 * int stack_unwind( char *buf, int size, ucontext_t *context )
1336 * This routine looks into the mcontext and
1337 * trace stack frames to record return addresses.
1338 */
1339 int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)1340 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
1341 {
1342 /*
1343 * trace the stack frames from user stack.
1344 * We are assuming that the frame pointer and return address
1345 * are null when we are at the top level.
1346 */
1347 long *lbuf = (long*) buf;
1348 int lsize = size / sizeof (long);
1349 struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */
1350 greg_t pc; /* program counter */
1351 int extra_frame = 0;
1352 if ((mode & 0xffff) == FRINFO_FROM_STACK)
1353 extra_frame = 1;
1354
1355 int ind = 0;
1356 if (bptr == NULL)
1357 ind = process_leaf (lbuf, ind, lsize, context);
1358
1359 int extra_frame = 0;
1360 if ((mode & 0xffff) == FRINFO_FROM_STACK)
1361 extra_frame = 1;
1362 int ind = 0;
1363 if (bptr == NULL)
1364 ind = process_leaf (lbuf, ind, lsize, context);
1365
1366 while (fp)
1367 {
1368 if (ind >= lsize)
1369 break;
1370 fp = (struct frame *) ((char *) fp + STACK_BIAS);
1371 if (eptr && fp >= (struct frame *) eptr)
1372 {
1373 ind = ind >= 2 ? ind - 2 : 0;
1374 break;
1375 }
1376 #if WSIZE(64) // detect signal handler
1377 unsigned char * tpc = ((unsigned char*) (fp->fr_savpc));
1378 struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS);
1379 int old_ind = ind;
1380 ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame);
1381 if (ind != old_ind)
1382 {
1383 pc = (greg_t) tpc;
1384 fp = tfp;
1385 }
1386 else
1387 #endif
1388 {
1389 #if WSIZE(64)
1390 if (IN_TRAP_HANDLER (lbuf[ind - 1]))
1391 pc = fp->fr_local[6];
1392 else
1393 pc = fp->fr_savpc;
1394 #else
1395 pc = fp->fr_savpc;
1396 #endif
1397 fp = fp->fr_savfp;
1398 if (pc)
1399 {
1400 if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2)
1401 {
1402 lbuf[0] = pc;
1403 if (ind == 0)
1404 ind++;
1405 }
1406 if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr)
1407 lbuf[ind++] = pc;
1408 }
1409 }
1410
1411 /* 4616238: _door_return may have a frame that has non-zero
1412 * saved stack pointer and zero pc
1413 */
1414 if (pc == (greg_t) NULL)
1415 break;
1416 }
1417
1418 if (ind >= lsize)
1419 { /* truncated stack handling */
1420 ind = lsize - 1;
1421 lbuf[ind++] = SP_TRUNC_STACK_MARKER;
1422 }
1423 return ind * sizeof (long);
1424 }
1425
1426 #elif ARCH(Intel)
1427
1428 /* get __NR_<syscall_name> constants */
1429 #include <syscall.h>
1430
1431 /*
1432 * From uts/intel/ia32/os/sendsig.c:
1433 *
1434 * An amd64 signal frame looks like this on the stack:
1435 *
1436 * old %rsp:
1437 * <128 bytes of untouched stack space>
1438 * <a siginfo_t [optional]>
1439 * <a ucontext_t>
1440 * <siginfo_t *>
1441 * <signal number>
1442 * new %rsp: <return address (deliberately invalid)>
1443 *
1444 * The signal number and siginfo_t pointer are only pushed onto the stack in
1445 * order to allow stack backtraces. The actual signal handling code expects the
1446 * arguments in registers.
1447 *
1448 * An i386 SVR4/ABI signal frame looks like this on the stack:
1449 *
1450 * old %esp:
1451 * <a siginfo32_t [optional]>
1452 * <a ucontext32_t>
1453 * <pointer to that ucontext32_t>
1454 * <pointer to that siginfo32_t>
1455 * <signo>
1456 * new %esp: <return address (deliberately invalid)>
1457 */
1458
1459 #if WSIZE(32)
1460 #define OPC_REG(x) ((x)&0x7)
1461 #define MRM_REGD(x) (((x)>>3)&0x7)
1462 #define MRM_REGS(x) ((x)&0x7)
1463 #define RED_ZONE 0
1464 #elif WSIZE(64)
1465 #define OPC_REG(x) (B|((x)&0x7))
1466 #define MRM_REGD(x) (R|(((x)>>3)&0x7))
1467 #define MRM_REGS(x) (B|((x)&0x7))
1468 #define RED_ZONE 16
1469 #endif
1470 #define MRM_EXT(x) (((x)>>3)&0x7)
1471 #define MRM_MOD(x) ((x)&0xc0)
1472
1473 #define RAX 0
1474 #define RDX 2
1475 #define RSP 4
1476 #define RBP 5
1477
1478 struct AdvWalkContext
1479 {
1480 unsigned char *pc;
1481 unsigned long *sp;
1482 unsigned long *sp_safe;
1483 unsigned long *fp;
1484 unsigned long *fp_sav;
1485 unsigned long *fp_loc;
1486 unsigned long rax;
1487 unsigned long rdx;
1488 unsigned long ra_sav;
1489 unsigned long *ra_loc;
1490 unsigned long regs[16];
1491 int tidx; /* targets table index */
1492 uint32_t cval; /* cache value */
1493 };
1494
1495 static unsigned long
getRegVal(struct AdvWalkContext * cur,int r,int * undefRez)1496 getRegVal (struct AdvWalkContext *cur, int r, int *undefRez)
1497 {
1498 if (cur->regs[r] == 0)
1499 {
1500 if (r == RBP)
1501 {
1502 tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx cur->pc=0x%lx\n",
1503 (unsigned long) cur->fp, (unsigned long) cur->pc);
1504 return (unsigned long) cur->fp;
1505 }
1506 *undefRez = 1;
1507 }
1508 tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx cur->pc=0x%lx\n",
1509 r, (unsigned long) cur->regs[r], (unsigned long) cur->pc);
1510 return cur->regs[r];
1511 }
1512
1513 static unsigned char *
check_modrm(unsigned char * pc)1514 check_modrm (unsigned char *pc)
1515 {
1516 unsigned char modrm = *pc++;
1517 unsigned char mod = MRM_MOD (modrm);
1518 if (mod == 0xc0)
1519 return pc;
1520 unsigned char regs = modrm & 0x07;
1521 if (regs == RSP)
1522 {
1523 if (mod == 0x40)
1524 return pc + 2; // SIB + disp8
1525 if (mod == 0x80)
1526 return pc + 5; // SIB + disp32
1527 return pc + 1; // SIB
1528 }
1529 if (mod == 0x0)
1530 {
1531 if (regs == RBP)
1532 pc += 4; // disp32
1533 }
1534 else if (mod == 0x40)
1535 pc += 1; /* byte */
1536 else if (mod == 0x80)
1537 pc += 4; /* word */
1538 return pc;
1539 }
1540
1541 static int
read_int(unsigned char * pc,int w)1542 read_int (unsigned char *pc, int w)
1543 {
1544 if (w == 1)
1545 return *((char *) pc);
1546 if (w == 2)
1547 return *(short*) pc;
1548 return *(int*) pc;
1549 }
1550
1551 /* Return codes */
1552 enum
1553 {
1554 RA_FAILURE = 0,
1555 RA_SUCCESS,
1556 RA_END_OF_STACK,
1557 RA_SIGRETURN,
1558 RA_RT_SIGRETURN
1559 };
1560
1561 /* Cache value encodings */
1562 static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */
1563 static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */
1564
1565
1566 #define MAXCTX 16
1567 #define MAXTRGTS 64
1568 #define MAXJMPREG 2
1569 #define MAXJMPREGCTX 3
1570
1571 #define DELETE_CURCTX() __collector_memcpy (cur, buf + (--nctx), sizeof (*cur))
1572
1573 /**
1574 * Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK
1575 * @param wctx
1576 * @return
1577 */
1578 static int
cache_get(struct WalkContext * wctx)1579 cache_get (struct WalkContext *wctx)
1580 {
1581 unsigned long addr;
1582 if (AddrTable_RA_FROMFP != NULL)
1583 {
1584 uint64_t idx = wctx->pc % ValTableSize;
1585 addr = AddrTable_RA_FROMFP[ idx ];
1586 if (addr == wctx->pc)
1587 { // Found in AddrTable_RA_FROMFP
1588 unsigned long *sp = NULL;
1589 unsigned long fp = wctx->fp;
1590 /* validate fp before use */
1591 if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
1592 return RA_FAILURE;
1593 sp = (unsigned long *) fp;
1594 fp = *sp++;
1595 unsigned long ra = *sp++;
1596 unsigned long tbgn = wctx->tbgn;
1597 unsigned long tend = wctx->tend;
1598 if (ra < tbgn || ra >= tend)
1599 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1600 return RA_FAILURE;
1601 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1602 if (npc == 0)
1603 return RA_FAILURE;
1604 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc);
1605 wctx->pc = npc;
1606 wctx->sp = (unsigned long) sp;
1607 wctx->fp = fp;
1608 wctx->tbgn = tbgn;
1609 wctx->tend = tend;
1610 return RA_SUCCESS;
1611 }
1612 }
1613 if (NULL == AddrTable_RA_EOSTCK)
1614 return RA_FAILURE;
1615 uint64_t idx = wctx->pc % ValTableSize;
1616 addr = AddrTable_RA_EOSTCK[ idx ];
1617 if (addr != wctx->pc)
1618 return RA_FAILURE;
1619 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__);
1620 return RA_END_OF_STACK;
1621 }
1622 /**
1623 * Save pc in RA_FROMFP or RA_EOSTCK cache depending on val
1624 * @param wctx
1625 */
1626 static void
cache_put(struct WalkContext * wctx,const uint32_t val)1627 cache_put (struct WalkContext *wctx, const uint32_t val)
1628 {
1629 if (RA_FROMFP == val)
1630 {
1631 // save pc in RA_FROMFP cache
1632 if (NULL != AddrTable_RA_FROMFP)
1633 {
1634 uint64_t idx = wctx->pc % ValTableSize;
1635 AddrTable_RA_FROMFP[ idx ] = wctx->pc;
1636 if (NULL != AddrTable_RA_EOSTCK)
1637 if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc)
1638 // invalidate pc in RA_EOSTCK cache
1639 AddrTable_RA_EOSTCK[ idx ] = 0;
1640 }
1641 return;
1642 }
1643 if (RA_EOSTCK == val)
1644 {
1645 // save pc in RA_EOSTCK cache
1646 if (NULL != AddrTable_RA_EOSTCK)
1647 {
1648 uint64_t idx = wctx->pc % ValTableSize;
1649 AddrTable_RA_EOSTCK[ idx ] = wctx->pc;
1650 if (NULL != AddrTable_RA_FROMFP)
1651 {
1652 if (AddrTable_RA_FROMFP[ idx ] == wctx->pc)
1653 // invalidate pc in RA_FROMFP cache
1654 AddrTable_RA_FROMFP[ idx ] = 0;
1655 }
1656 }
1657 return;
1658 }
1659 }
1660
1661 static int
process_return_real(struct WalkContext * wctx,struct AdvWalkContext * cur,int cache_on)1662 process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on)
1663 {
1664 if ((unsigned long) cur->sp >= wctx->sbase ||
1665 (unsigned long) cur->sp < wctx->sp)
1666 {
1667 DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n",
1668 cur->sp, wctx->sp, wctx->sbase);
1669 return RA_FAILURE;
1670 }
1671
1672 unsigned long ra;
1673 if (cur->sp == cur->ra_loc)
1674 {
1675 ra = cur->ra_sav;
1676 cur->sp++;
1677 }
1678 else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
1679 ra = *cur->sp++;
1680 else
1681 {
1682 DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe);
1683 return RA_FAILURE;
1684 }
1685 if (ra == 0)
1686 {
1687 if (cache_on)
1688 cache_put (wctx, RA_EOSTCK);
1689 wctx->pc = ra;
1690 wctx->sp = (unsigned long) cur->sp;
1691 wctx->fp = (unsigned long) cur->fp;
1692 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__);
1693 return RA_END_OF_STACK;
1694 }
1695
1696 unsigned long tbgn = wctx->tbgn;
1697 unsigned long tend = wctx->tend;
1698 if (ra < tbgn || ra >= tend)
1699 {
1700 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1701 {
1702 DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n",
1703 ra, wctx->tbgn, wctx->tend);
1704 return RA_FAILURE;
1705 }
1706 }
1707
1708 if (cur->cval == RA_FROMFP)
1709 {
1710 if (wctx->fp == (unsigned long) (cur->sp - 2))
1711 {
1712 if (cache_on)
1713 cache_put (wctx, RA_FROMFP);
1714 }
1715 else
1716 cur->cval = 0;
1717 }
1718
1719 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1720 if (npc == 0)
1721 {
1722 if (cur->cval == RA_FROMFP)
1723 {
1724 /* We have another evidence that we can trust this RA */
1725 DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc);
1726 wctx->pc = ra;
1727 }
1728 else
1729 {
1730 DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n");
1731 return RA_FAILURE;
1732 }
1733 }
1734 else
1735 wctx->pc = npc;
1736 wctx->sp = (unsigned long) cur->sp;
1737 wctx->fp = (unsigned long) cur->fp;
1738 wctx->tbgn = tbgn;
1739 wctx->tend = tend;
1740 return RA_SUCCESS;
1741 }
1742
1743 static int
process_return(struct WalkContext * wctx,struct AdvWalkContext * cur)1744 process_return (struct WalkContext *wctx, struct AdvWalkContext *cur)
1745 {
1746 return process_return_real (wctx, cur, 1);
1747 }
1748
1749 static void
omp_cache_put(unsigned long * cur_sp_safe,struct WalkContext * wctx_pc_save,struct WalkContext * wctx,uint32_t val)1750 omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save,
1751 struct WalkContext *wctx, uint32_t val)
1752 {
1753 if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL))
1754 {
1755 size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs);
1756 OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1757 sz = OmpValTableSize * sizeof (*OmpCtxs);
1758 OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1759 sz = OmpValTableSize * sizeof (*OmpVals);
1760 OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
1761 sz = OmpValTableSize * sizeof (*OmpRAs);
1762 OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
1763 }
1764 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1765 return;
1766
1767 #define USE_18434988_OMP_CACHE_WORKAROUND
1768 #ifndef USE_18434988_OMP_CACHE_WORKAROUND
1769 uint64_t idx = wctx_pc_save->pc * ROOT_IDX;
1770 OmpVals[ idx % OmpValTableSize ] = val;
1771 idx = (idx + val) * ROOT_IDX;
1772 __collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1773 idx = (idx + val) * ROOT_IDX;
1774 __collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1775 #endif
1776 unsigned long *sp = NULL;
1777 unsigned long fp = wctx_pc_save->fp;
1778 int from_fp = 0;
1779 if (val == RA_END_OF_STACK)
1780 {
1781 sp = (unsigned long *) (wctx->sp);
1782 sp--;
1783 TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp);
1784 }
1785 else
1786 {
1787 if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp))
1788 {
1789 sp = (unsigned long *) (wctx->sp);
1790 sp--;
1791 TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp);
1792 }
1793 else
1794 {
1795 TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp);
1796 sp = (unsigned long *) fp;
1797 from_fp = 1;
1798 }
1799 }
1800
1801 if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase))
1802 return;
1803
1804 unsigned long ra = *sp++;
1805 if (from_fp)
1806 {
1807 unsigned long tbgn = wctx_pc_save->tbgn;
1808 unsigned long tend = wctx_pc_save->tend;
1809 if (ra < tbgn || ra >= tend)
1810 {
1811 sp = (unsigned long *) (wctx->sp);
1812 sp--;
1813 ra = *sp++;
1814 }
1815 }
1816 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1817 uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX;
1818 uint64_t idx2 = (idx1 + val) * ROOT_IDX;
1819 uint64_t idx3 = (idx2 + val) * ROOT_IDX;
1820 uint64_t idx4 = (idx3 + val) * ROOT_IDX;
1821 OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock
1822 OmpVals[ idx1 % OmpValTableSize ] = val;
1823 __collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1824 __collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1825 OmpRAs [ idx4 % OmpValTableSize ] = ra;
1826 #else
1827 idx = (idx + val) * ROOT_IDX;
1828 OmpRAs[ idx % OmpValTableSize ] = ra;
1829 #endif
1830 TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc);
1831 }
1832
1833 /*
1834 * See bug 17166877 - malloc_internal unwind failure.
1835 * Sometimes there are several calls right after ret, like:
1836 * leave
1837 * ret
1838 * call xxx
1839 * call xxxx
1840 * call xxxxx
1841 * If they are also jump targets, we should better not
1842 * create new jump context for those, since they may
1843 * end up into some other function.
1844 */
1845 static int
is_after_ret(unsigned char * npc)1846 is_after_ret (unsigned char * npc)
1847 {
1848 if (*npc != 0xe8)
1849 return 0;
1850 unsigned char * onpc = npc;
1851 int ncall = 1;
1852 int maxsteps = 10;
1853 int mincalls = 3;
1854 int steps = 0;
1855 while (*(npc - 5) == 0xe8 && steps < maxsteps)
1856 {
1857 npc -= 5;
1858 ncall++;
1859 steps++;
1860 }
1861 if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9)
1862 return 0;
1863 steps = 0;
1864 while (*(onpc + 5) == 0xe8 && steps < maxsteps)
1865 {
1866 onpc += 5;
1867 ncall++;
1868 steps++;
1869 }
1870 if (ncall < mincalls)
1871 return 0;
1872 return 1;
1873 }
1874
1875 static int
find_i386_ret_addr(struct WalkContext * wctx,int do_walk)1876 find_i386_ret_addr (struct WalkContext *wctx, int do_walk)
1877 {
1878 if (wctx->sp == 0)
1879 // Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT()
1880 return RA_FAILURE;
1881
1882 /* Check cached values */
1883 int retc = cache_get (wctx);
1884 if (retc != RA_FAILURE)
1885 return retc;
1886
1887 /* An attempt to perform code analysis for call stack tracing */
1888 unsigned char opcode;
1889 unsigned char extop;
1890 unsigned char extop2;
1891 unsigned char modrm;
1892 int imm8; /* immediate operand, byte */
1893 int immv; /* immediate operand, word(2) or doubleword(4) */
1894 int reg; /* register code */
1895
1896 /* Buffer for branch targets (analysis stoppers) */
1897 unsigned char *targets[MAXTRGTS];
1898 int ntrg = 0; /* number of entries in the table */
1899 targets[ntrg++] = (unsigned char*) wctx->pc;
1900 targets[ntrg++] = (unsigned char*) - 1;
1901
1902 struct AdvWalkContext buf[MAXCTX];
1903 struct AdvWalkContext *cur = buf;
1904 CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur));
1905
1906 cur->pc = (unsigned char*) wctx->pc;
1907 cur->sp = (unsigned long*) wctx->sp;
1908 cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */
1909 cur->fp = (unsigned long*) wctx->fp;
1910 cur->tidx = 1;
1911 DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc);
1912
1913 int nctx = 1; /* number of contexts being processed */
1914 int cnt = 8192; /* number of instructions to analyse */
1915
1916 /*
1917 * The basic idea of our x86 stack unwind is that we don't know
1918 * if we can trust the frame-pointer register. So we walk
1919 * instructions to find a return instruction, at which point
1920 * we know the return address is on the top of the stack, etc.
1921 *
1922 * A severe challenge to walking x86 instructions is when we
1923 * encounter "jmp *(reg)" instructions, where we are expected
1924 * to jump to the (unknown-to-us) contents of a register.
1925 *
1926 * The "jmp_reg" code here attempts to keep track of the
1927 * context for such a jump, deferring any handling of such
1928 * a difficult case. We continue with other contexts, hoping
1929 * that some other walk will take us to a return instruction.
1930 *
1931 * If no other walk helps, we return to "jmp_reg" contexts.
1932 * While we don't know the jump target, it is possible that the
1933 * bytes immediately following the jmp_reg instruction represent
1934 * one possible target, as might be the case when a "switch"
1935 * statement is compiled.
1936 *
1937 * Unfortunately, the bytes following a "jmp_reg" instruction might
1938 * instead be a jump target from somewhere else -- execution might
1939 * never "fall through" from the preceding "jmp_reg". Those bytes
1940 * might not even be instructions at all. There are many uses of
1941 * jmp_reg instructions beyond just compiling switch statements.
1942 *
1943 * So walking the bytes after a "jmp_reg" instruction can lead
1944 * to bugs and undefined behavior, including SEGV and core dump.
1945 *
1946 * We currently do not really understand the "jmp_reg" code below.
1947 */
1948 int jmp_reg_switch_mode = 0;
1949 int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case
1950 int total_num_jmp_reg = 0; // number of total jmp *reg met
1951 struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case
1952 struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases
1953 struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases
1954
1955 int cur_jmp_reg_switch = 0; // current switch table
1956 int num_jmp_reg_switch = 0; // number of switch table
1957 int jmp_reg_switch_case = 0; // case number in current switch table
1958 unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case
1959 unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target
1960 unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets
1961 int max_jmp_reg_switch_case = 2;
1962 #if WSIZE(32)
1963 int max_switch_pc_offset = 512;
1964 #else // WSIZE(64)
1965 int max_switch_pc_offset = 1024;
1966 #endif
1967 int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG
1968 int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions
1969
1970
1971 int save_ctx = 0; // flag to save walk context in the cache to speed up unwind
1972 struct WalkContext wctx_pc_save;
1973 if (do_walk == 0)
1974 // do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first
1975 __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
1976
1977 startWalk:
1978 if (do_walk == 0)
1979 { // try to resolve RA from stack frame pointer
1980 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1981 {
1982 do_walk = 1;
1983 goto startWalk;
1984 }
1985 // before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext))
1986 uint64_t idx = wctx->pc * ROOT_IDX;
1987 uint32_t val = OmpVals[idx % OmpValTableSize];
1988 idx = (idx + val) * ROOT_IDX;
1989 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1990 // Check ra: if it is 0 - then cache is invalid
1991 uint64_t idx4;
1992 idx4 = (idx + val) * ROOT_IDX;
1993 idx4 = (idx4 + val) * ROOT_IDX;
1994 if (0 == OmpRAs[ idx4 % OmpValTableSize ]) // Invalid cache
1995 goto checkFP;
1996 #endif
1997 struct WalkContext saved_ctx;
1998 __collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
1999 if (wctx->pc == saved_ctx.pc
2000 && wctx->sp == saved_ctx.sp
2001 && wctx->fp == saved_ctx.fp
2002 && wctx->tbgn == saved_ctx.tbgn
2003 && wctx->tend == saved_ctx.tend)
2004 { // key match, RA may be valid
2005 idx = (idx + val) * ROOT_IDX;
2006 unsigned long *sp = NULL;
2007 unsigned long fp = wctx->fp;
2008 int from_fp = 0;
2009 if (val == RA_END_OF_STACK)
2010 {
2011 DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc);
2012 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2013 return val;
2014 }
2015 else
2016 {
2017 if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
2018 {
2019 TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc);
2020 sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2021 sp--;
2022 if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2023 {
2024 goto checkFP;
2025 }
2026 unsigned long ra = *sp;
2027 uint64_t idx2 = (idx + val) * ROOT_IDX;
2028 if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2029 {
2030 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2031 TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val);
2032 return val;
2033 }
2034 TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val);
2035 goto checkFP;
2036 }
2037 sp = (unsigned long *) fp;
2038 from_fp = 1;
2039 }
2040
2041 uint64_t idx2 = (idx + val) * ROOT_IDX;
2042 unsigned long ra = *sp++;
2043 if (from_fp)
2044 {
2045 unsigned long tbgn = wctx->tbgn;
2046 unsigned long tend = wctx->tend;
2047 if (ra < tbgn || ra >= tend)
2048 {
2049 sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2050 sp--;
2051 //if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) {
2052 // The check above was replaced with the check below,
2053 // because we do not know why "- 16" and "- sizeof(*sp)" was used.
2054 if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2055 goto checkFP;
2056 else
2057 ra = *sp;
2058 }
2059 }
2060 if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2061 {
2062 TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc);
2063 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2064 return val;
2065 }
2066 }
2067 goto checkFP;
2068 }
2069 else
2070 {
2071 CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2072 CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2073 }
2074 while (cnt--)
2075 {
2076 if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1))
2077 { // no context available, try jmp switch mode
2078 int i = 0;
2079 if (num_jmp_reg == expected_num_jmp_reg)
2080 jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode
2081 DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n",
2082 num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode);
2083 // the ideal asm of switch is
2084 // jmp reg
2085 // ...//case 1
2086 // ret
2087 // ...//case 2
2088 // ret
2089 // ...//etc
2090 if (jmp_reg_switch_mode == 0)
2091 {
2092 num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg
2093 jmp_reg_switch_mode = 1; // begin switch mode
2094 for (i = 0; i < num_jmp_reg_switch; i++)
2095 {
2096 if (jmp_reg_switch_ctx[i] == NULL)
2097 jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i]));
2098 if (jmp_reg_switch_ctx[i] != NULL)
2099 { // backup jmp_reg_ctx
2100 __collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i]));
2101 cur_jmp_reg_switch = 0; // reset the current switch table
2102 jmp_reg_switch_case = 0; // reset the case number in current switch table
2103 }
2104 }
2105 if (jmp_reg_switch_backup_ctx == NULL)
2106 { // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA
2107 jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx));
2108 if (jmp_reg_switch_backup_ctx != NULL)
2109 __collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur));
2110 DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n");
2111 }
2112 }
2113 if (jmp_reg_switch_mode == 1)
2114 { // in the process of trying switch cases
2115 if (cur_jmp_reg_switch == num_jmp_reg_switch)
2116 {
2117 DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n");
2118 if (jmp_reg_switch_backup_ctx != NULL)
2119 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2120 int rc = process_return_real (wctx, cur, 0);
2121 if (rc == RA_SUCCESS)
2122 {
2123 if (save_ctx)
2124 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2125 return rc;
2126 }
2127 break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP
2128 }
2129 unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc;
2130 if (jmp_reg_switch_case == 0)
2131 // first switch case
2132 npc = check_modrm (npc); // pc next to "jmp reg" instruction
2133 else if (jmp_reg_switch_pc != NULL)
2134 npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case
2135 else
2136 {
2137 DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n",
2138 jmp_reg_switch_case, jmp_reg_switch_pc);
2139 break; //goto checkFP
2140 }
2141 jmp_reg_switch_base = npc;
2142 struct AdvWalkContext *new = buf + nctx;
2143 nctx += 1;
2144 __collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new));
2145 new->pc = npc;
2146 cur = new; /* advance the new context first */
2147 jmp_reg_switch_pc = NULL;
2148 jmp_reg_switch_case++;
2149 if (jmp_reg_switch_case == max_jmp_reg_switch_case)
2150 { // done many cases, change to another switch table
2151 cur_jmp_reg_switch++;
2152 jmp_reg_switch_case = 0;
2153 }
2154 }
2155 num_jmp_reg = 0;
2156 }
2157 if (jmp_reg_switch_mode == 1)
2158 { // when processing switch cases, check pc each time
2159 unsigned long tbgn = wctx->tbgn;
2160 unsigned long tend = wctx->tend;
2161 if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend)
2162 {
2163 DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc));
2164 break;
2165 }
2166 if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset)
2167 {
2168 DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n");
2169 if (jmp_reg_switch_backup_ctx != NULL)
2170 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2171 int rc = process_return_real (wctx, cur, 0);
2172 if (rc == RA_SUCCESS)
2173 {
2174 if (save_ctx)
2175 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2176 return rc;
2177 }
2178 break; // limit the walk offset after jmp reg instruction, got checkFP
2179 }
2180 }
2181
2182 if (nctx == 0)
2183 break;
2184 // dump_targets (__LINE__, ntrg, targets);
2185 while (cur->pc > targets[cur->tidx])
2186 cur->tidx += 1;
2187 if (cur->pc == targets[cur->tidx])
2188 {
2189 /* Stop analysis. Delete context. */
2190 if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old)
2191 {
2192 if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL)
2193 {
2194 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n",
2195 __LINE__, cur->pc, jmp_reg_switch_pc, nctx);
2196 jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case
2197 jmp_reg_switch_pc_old = jmp_reg_switch_pc;
2198 }
2199 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__);
2200 DELETE_CURCTX ();
2201 if (cur >= buf + nctx)
2202 cur = buf;
2203 continue;
2204 }
2205 if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old)
2206 jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old
2207 }
2208
2209 /* let's walk the next x86 instruction */
2210 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n",
2211 __LINE__, (long) (cur - buf), (unsigned long) cur->pc,
2212 (int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2],
2213 (int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5],
2214 (int) cur->pc[6], (unsigned long) cur->sp);
2215 int v = 4; /* Operand size */
2216 int a = 4; /* Address size */
2217 /* int W = 0; REX.W bit */
2218 #if WSIZE(64)
2219 int R = 0; /* REX.R bit */
2220 #endif
2221 int X = 0; /* REX.X bit */
2222 int B = 0; /* REX.B bit */
2223 /* Check prefixes */
2224 int done = 0;
2225 while (!done)
2226 {
2227 opcode = *cur->pc++;
2228 switch (opcode)
2229 {
2230 case 0x66: /* opd size override */
2231 v = 2;
2232 break;
2233 case 0x67: /*addr size override */
2234 a = 2;
2235 break;
2236 #if WSIZE(64)
2237 case 0x40: /* REX */
2238 case 0x41:
2239 case 0x42:
2240 case 0x43:
2241 case 0x44:
2242 case 0x45:
2243 case 0x46:
2244 case 0x47:
2245 case 0x48:
2246 case 0x49:
2247 case 0x4a:
2248 case 0x4b:
2249 case 0x4c:
2250 case 0x4d:
2251 case 0x4e:
2252 case 0x4f:
2253 B = (opcode & 0x1) ? 8 : 0;
2254 X = (opcode & 0x2) ? 8 : 0;
2255 R = (opcode & 0x4) ? 8 : 0;
2256 if (opcode & 0x8) /* 64 bit operand size */
2257 v = 8;
2258 opcode = *cur->pc++;
2259 done = 1;
2260 break;
2261 #endif
2262 default:
2263 done = 1;
2264 break;
2265 }
2266 }
2267 int z = (v == 8) ? 4 : v;
2268 switch (opcode)
2269 {
2270 case 0x0: /* add Eb,Gb */
2271 case 0x01: /* add Ev,Gv */
2272 case 0x02: /* add Gb,Eb */
2273 case 0x03: /* add Gv,Ev */
2274 cur->pc = check_modrm (cur->pc);
2275 break;
2276 case 0x04: /* add %al,Ib */
2277 cur->pc += 1;
2278 break;
2279 case 0x05: /* add %eax,Iz */
2280 cur->pc += z;
2281 break;
2282 case 0x06: /* push es */
2283 cur->sp -= 1;
2284 break;
2285 case 0x07: /* pop es */
2286 cur->sp += 1;
2287 if (cur->sp - RED_ZONE > cur->sp_safe)
2288 cur->sp_safe = cur->sp - RED_ZONE;
2289 break;
2290 case 0x08: /* or Eb,Gb */
2291 case 0x09: /* or Ev,Gv */
2292 case 0x0a: /* or Gb,Eb */
2293 case 0x0b: /* or Gv,Ev */
2294 cur->pc = check_modrm (cur->pc);
2295 break;
2296 case 0x0c: /* or %al,Ib */
2297 cur->pc += 1;
2298 break;
2299 case 0x0d: /* or %eax,Iz */
2300 cur->pc += z;
2301 break;
2302 case 0x0e: /* push cs */
2303 cur->sp -= 1;
2304 break;
2305 case 0x0f: /* two-byte opcodes */
2306 extop = *cur->pc++;
2307 switch (extop)
2308 { /* RTM or HLE */
2309 case 0x01:
2310 extop2 = *cur->pc;
2311 switch (extop2)
2312 {
2313 case 0xd5: /* xend */
2314 case 0xd6: /* xtest */
2315 cur->pc++;
2316 break;
2317 default:
2318 break;
2319 }
2320 break;
2321 case 0x03:
2322 cur->pc = check_modrm (cur->pc);
2323 break;
2324 case 0x0b:
2325 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n",
2326 __LINE__, (int) opcode);
2327 DELETE_CURCTX ();
2328 break;
2329 case 0x05: /* syscall */
2330 case 0x34: /* sysenter */
2331 if (cur->rax == __NR_exit)
2332 {
2333 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2334 __LINE__, (int) opcode);
2335 DELETE_CURCTX ();
2336 break;
2337 }
2338 else if (cur->rax == __NR_rt_sigreturn)
2339 {
2340 if (jmp_reg_switch_mode == 1)
2341 {
2342 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n",
2343 __LINE__, (int) opcode);
2344 goto checkFP;
2345 }
2346 wctx->sp = (unsigned long) cur->sp;
2347 if (save_ctx)
2348 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
2349 return RA_RT_SIGRETURN;
2350 }
2351 #if WSIZE(32)
2352 else if (cur->rax == __NR_sigreturn)
2353 {
2354 if (jmp_reg_switch_mode == 1)
2355 {
2356 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n");
2357 goto checkFP;
2358 }
2359 wctx->sp = (unsigned long) cur->sp;
2360 if (save_ctx)
2361 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
2362 return RA_SIGRETURN;
2363 }
2364 #endif
2365 /* Check for Linus' trick in the vsyscall page */
2366 while (*cur->pc == 0x90) /* nop */
2367 cur->pc++;
2368 if (*cur->pc == 0xeb) /* jmp imm8 */
2369 cur->pc += 2;
2370 break;
2371 case 0x0d: /* nop Ev */
2372 cur->pc = check_modrm (cur->pc);
2373 break;
2374 case 0x10: /* xmm Vq,Wq */
2375 case 0x11:
2376 case 0x12:
2377 case 0x13:
2378 case 0x14:
2379 case 0x15:
2380 case 0x16:
2381 case 0x17:
2382 cur->pc = check_modrm (cur->pc);
2383 break;
2384 case 0x18: /* prefetch */
2385 cur->pc = check_modrm (cur->pc);
2386 break;
2387 case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */
2388 cur->pc += 2;
2389 break;
2390 case 0x1f: /* nop Ev */
2391 cur->pc = check_modrm (cur->pc);
2392 break;
2393 case 0x28: /* xmm Vq,Wq */
2394 case 0x29:
2395 case 0x2a:
2396 case 0x2b:
2397 case 0x2c:
2398 case 0x2d:
2399 case 0x2e:
2400 case 0x2f:
2401 cur->pc = check_modrm (cur->pc);
2402 break;
2403 case 0x30: /* wrmsr */
2404 case 0x31: /* rdtsc */
2405 case 0x32: /* rdmsr */
2406 case 0x33: /* rdpmc */
2407 break;
2408 /* case 0x34: sysenter (see above) */
2409 case 0x38: case 0x3a:
2410 extop2 = *cur->pc++;
2411 cur->pc = check_modrm (cur->pc);
2412 // 21275311 Unwind failure in native stack for java application running on jdk8
2413 // Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte.
2414 if (extop == 0x3a)
2415 cur->pc++;
2416 break;
2417 case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */
2418 case 0x44: case 0x45: case 0x46: case 0x47:
2419 case 0x48: case 0x49: case 0x4a: case 0x4b:
2420 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
2421 cur->pc = check_modrm (cur->pc);
2422 break;
2423 case 0x50: case 0x51: case 0x52: case 0x53:
2424 case 0x54: case 0x55: case 0x56: case 0x57:
2425 case 0x58: case 0x59: case 0x5a: case 0x5b:
2426 case 0x5c: case 0x5d: case 0x5e: case 0x5f:
2427 case 0x60: case 0x61: case 0x62: case 0x63:
2428 case 0x64: case 0x65: case 0x66: case 0x67:
2429 case 0x68: case 0x69: case 0x6a: case 0x6b:
2430 case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2431 cur->pc = check_modrm (cur->pc);
2432 break;
2433 case 0x70: case 0x71: case 0x72: case 0x73:
2434 cur->pc = check_modrm (cur->pc) + 1;
2435 break;
2436 case 0x74: case 0x75: case 0x76:
2437 cur->pc = check_modrm (cur->pc);
2438 break;
2439 case 0x77:
2440 break;
2441 case 0x7c: case 0x7d: case 0x7e: case 0x7f:
2442 cur->pc = check_modrm (cur->pc);
2443 break;
2444 case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */
2445 case 0x84: case 0x85: case 0x86: case 0x87:
2446 case 0x88: case 0x89: case 0x8a: case 0x8b:
2447 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
2448 immv = read_int (cur->pc, z);
2449 cur->pc += z;
2450 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2451 {
2452 int tidx = 0;
2453 unsigned char *npc = cur->pc + immv;
2454 if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
2455 {
2456 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2457 __LINE__, (int) opcode);
2458 DELETE_CURCTX ();
2459 break;
2460 }
2461 if (is_after_ret (npc))
2462 break;
2463 while (npc > targets[tidx])
2464 tidx += 1;
2465 if (npc != targets[tidx])
2466 {
2467 if (ntrg < MAXTRGTS)
2468 {
2469 for (int i = 0; i < nctx; i++)
2470 if (buf[i].tidx >= tidx)
2471 buf[i].tidx++;
2472
2473 /* insert a new target */
2474 for (int i = ntrg; i > tidx; i--)
2475 targets[i] = targets[i - 1];
2476 ntrg += 1;
2477 targets[tidx++] = npc;
2478 }
2479 else
2480 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n",
2481 __LINE__, ntrg);
2482 struct AdvWalkContext *new = buf + nctx;
2483 nctx += 1;
2484 __collector_memcpy (new, cur, sizeof (*new));
2485 new->pc = npc;
2486 new->tidx = tidx;
2487 cur = new; /* advance the new context first */
2488 continue;
2489 }
2490 }
2491 else
2492 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n",
2493 __LINE__, ntrg);
2494 break;
2495 case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */
2496 case 0x94: case 0x95: case 0x96: case 0x97:
2497 case 0x98: case 0x99: case 0x9a: case 0x9b:
2498 case 0x9c: case 0x9d: case 0x9e: case 0x9f:
2499 cur->pc = check_modrm (cur->pc);
2500 break;
2501 case 0xa0: /* push fs */
2502 cur->sp -= 1;
2503 break;
2504 case 0xa1: /* pop fs */
2505 cur->sp += 1;
2506 if (cur->sp - RED_ZONE > cur->sp_safe)
2507 cur->sp_safe = cur->sp - RED_ZONE;
2508 break;
2509 case 0xa2: /* cpuid */
2510 break;
2511 case 0xa3: /* bt Ev,Gv */
2512 cur->pc = check_modrm (cur->pc);
2513 break;
2514 case 0xa4: /* shld Ev,Gv,Ib */
2515 cur->pc = check_modrm (cur->pc);
2516 cur->pc += 1;
2517 break;
2518 case 0xa5: /* shld Ev,Gv,%cl */
2519 cur->pc = check_modrm (cur->pc);
2520 break;
2521 case 0xa8: /* push gs */
2522 cur->sp -= 1;
2523 break;
2524 case 0xa9: /* pop gs */
2525 cur->sp += 1;
2526 if (cur->sp - RED_ZONE > cur->sp_safe)
2527 cur->sp_safe = cur->sp - RED_ZONE;
2528 break;
2529 case 0xaa: /* rsm */
2530 break;
2531 case 0xab: /* bts Ev,Gv */
2532 cur->pc = check_modrm (cur->pc);
2533 break;
2534 case 0xac: /* shrd Ev,Gv,Ib */
2535 cur->pc = check_modrm (cur->pc);
2536 cur->pc += 1;
2537 break;
2538 case 0xad: /* shrd Ev,Gv,%cl */
2539 cur->pc = check_modrm (cur->pc);
2540 break;
2541 case 0xae: /* group15 */
2542 cur->pc = check_modrm (cur->pc);
2543 break;
2544 case 0xaf: /* imul Gv,Ev */
2545 cur->pc = check_modrm (cur->pc);
2546 break;
2547 case 0xb1: /* cmpxchg Ev,Gv */
2548 cur->pc = check_modrm (cur->pc);
2549 break;
2550 case 0xb3:
2551 case 0xb6: /* movzx Gv,Eb */
2552 case 0xb7: /* movzx Gv,Ew */
2553 cur->pc = check_modrm (cur->pc);
2554 break;
2555 case 0xba: /* group8 Ev,Ib */
2556 cur->pc = check_modrm (cur->pc);
2557 cur->pc += 1;
2558 break;
2559 case 0xbb: /* btc Ev,Gv */
2560 case 0xbc: /* bsf Gv,Ev */
2561 case 0xbd: /* bsr Gv,Ev */
2562 cur->pc = check_modrm (cur->pc);
2563 break;
2564 case 0xbe: /* movsx Gv,Eb */
2565 case 0xbf: /* movsx Gv,Ew */
2566 cur->pc = check_modrm (cur->pc);
2567 break;
2568 case 0xc0: /* xadd Eb,Gb */
2569 case 0xc1: /* xadd Ev,Gv */
2570 cur->pc = check_modrm (cur->pc);
2571 break;
2572 case 0xc2: /* cmpps V,W,Ib */
2573 cur->pc = check_modrm (cur->pc);
2574 cur->pc += 1;
2575 break;
2576 case 0xc3: /* movnti M,G */
2577 cur->pc = check_modrm (cur->pc);
2578 break;
2579 case 0xc6: /* shufps V,W,Ib */
2580 cur->pc = check_modrm (cur->pc);
2581 cur->pc += 1;
2582 break;
2583 case 0xc7: /* RDRAND */
2584 cur->pc = check_modrm (cur->pc);
2585 break;
2586 case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */
2587 case 0xcc: case 0xcd: case 0xce: case 0xcf:
2588 break;
2589 case 0xd0: case 0xd1: case 0xd2: case 0xd3:
2590 case 0xd4: case 0xd5: case 0xd6: case 0xd7:
2591 case 0xd8: case 0xd9: case 0xda: case 0xdb:
2592 case 0xdc: case 0xdd: case 0xde: case 0xdf:
2593 case 0xe0: case 0xe1: case 0xe2: case 0xe3:
2594 case 0xe4: case 0xe5: case 0xe6: case 0xe7:
2595 case 0xe8: case 0xe9: case 0xea: case 0xeb:
2596 case 0xec: case 0xed: case 0xee: case 0xef:
2597 case 0xf0: case 0xf1: case 0xf2: case 0xf3:
2598 case 0xf4: case 0xf5: case 0xf6: case 0xf7:
2599 case 0xf8: case 0xf9: case 0xfa: case 0xfb:
2600 case 0xfc: case 0xfd: case 0xfe: case 0xff:
2601 cur->pc = check_modrm (cur->pc);
2602 break;
2603 default:
2604 if (jmp_reg_switch_mode == 1 && extop == 0x0b)
2605 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n",
2606 __LINE__, (int) extop, jmp_reg_switch_mode);
2607 else
2608 {
2609 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n",
2610 __LINE__, (int) extop, jmp_reg_switch_mode);
2611 DELETE_CURCTX ();
2612 }
2613 break;
2614 }
2615 break;
2616 case 0x10: /* adc Eb,Gb */
2617 case 0x11: /* adc Ev,Gv */
2618 case 0x12: /* adc Gb,Eb */
2619 case 0x13: /* adc Gv,Ev */
2620 cur->pc = check_modrm (cur->pc);
2621 break;
2622 case 0x14: /* adc %al,Ib */
2623 cur->pc += 1;
2624 break;
2625 case 0x15: /* adc %eax,Iz */
2626 cur->pc += z;
2627 break;
2628 case 0x16: /* push ss */
2629 cur->sp -= 1;
2630 break;
2631 case 0x17: /* pop ss */
2632 cur->sp += 1;
2633 if (cur->sp - RED_ZONE > cur->sp_safe)
2634 cur->sp_safe = cur->sp - RED_ZONE;
2635 break;
2636 case 0x18: /* sbb Eb,Gb */
2637 case 0x19: /* sbb Ev,Gv */
2638 case 0x1a: /* sbb Gb,Eb */
2639 case 0x1b: /* sbb Gv,Ev */
2640 cur->pc = check_modrm (cur->pc);
2641 break;
2642 case 0x1c: /* sbb %al,Ib */
2643 cur->pc += 1;
2644 break;
2645 case 0x1d: /* sbb %eax,Iz */
2646 cur->pc += z;
2647 break;
2648 case 0x1e: /* push ds */
2649 cur->sp -= 1;
2650 break;
2651 case 0x1f: /* pop ds */
2652 cur->sp += 1;
2653 if (cur->sp - RED_ZONE > cur->sp_safe)
2654 cur->sp_safe = cur->sp - RED_ZONE;
2655 break;
2656 case 0x20: /* and Eb,Gb */
2657 case 0x21: /* and Ev,Gv */
2658 case 0x22: /* and Gb,Eb */
2659 case 0x23: /* and Gv,Ev */
2660 cur->pc = check_modrm (cur->pc);
2661 break;
2662 case 0x24: /* and %al,Ib */
2663 cur->pc += 1;
2664 break;
2665 case 0x25: /* and %eax,Iz */
2666 cur->pc += z;
2667 break;
2668 case 0x26: /* seg=es prefix */
2669 break;
2670 case 0x27: /* daa */
2671 break;
2672 case 0x28: /* sub Eb,Gb */
2673 case 0x29: /* sub Ev,Gv */
2674 case 0x2a: /* sub Gb,Eb */
2675 case 0x2b: /* sub Gv,Ev */
2676 cur->pc = check_modrm (cur->pc);
2677 break;
2678 case 0x2c: /* sub %al,Ib */
2679 cur->pc += 1;
2680 break;
2681 case 0x2d: /* sub %eax,Iz */
2682 cur->pc += z;
2683 break;
2684 case 0x2e: /* seg=cs prefix */
2685 break;
2686 case 0x2f: /* das */
2687 break;
2688 case 0x30: /* xor Eb,Gb */
2689 case 0x31: /* xor Ev,Gv */
2690 case 0x32: /* xor Gb,Eb */
2691 case 0x33: /* xor Gv,Ev */
2692 cur->pc = check_modrm (cur->pc);
2693 break;
2694 case 0x34: /* xor %al,Ib */
2695 cur->pc += 1;
2696 break;
2697 case 0x35: /* xor %eax,Iz */
2698 cur->pc += z;
2699 break;
2700 case 0x36: /* seg=ss prefix */
2701 break;
2702 case 0x37: /* aaa */
2703 break;
2704 case 0x38: /* cmp Eb,Gb */
2705 case 0x39: /* cmp Ev,Gv */
2706 case 0x3a: /* cmp Gb,Eb */
2707 case 0x3b: /* cmp Gv,Ev */
2708 cur->pc = check_modrm (cur->pc);
2709 break;
2710 case 0x3c: /* cmp %al,Ib */
2711 cur->pc += 1;
2712 break;
2713 case 0x3d: /* cmp %eax,Iz */
2714 cur->pc += z;
2715 break;
2716 case 0x3e: /* seg=ds prefix */
2717 break;
2718 case 0x3f: /* aas */
2719 break;
2720 #if WSIZE(32)
2721 case 0x40: /* inc %eax */
2722 case 0x41: /* inc %ecx */
2723 case 0x42: /* inc %edx */
2724 case 0x43: /* inc %ebx */
2725 break;
2726 case 0x44: /* inc %esp */
2727 /* Can't be a valid stack pointer - delete context */
2728 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__);
2729 DELETE_CURCTX ();
2730 break;
2731 case 0x45: /* inc %ebp */
2732 case 0x46: /* inc %esi */
2733 case 0x47: /* inc %edi */
2734 case 0x48: /* dec %eax */
2735 case 0x49: /* dec %ecx */
2736 case 0x4a: /* dec %edx */
2737 case 0x4b: /* dec %ebx */
2738 break;
2739 case 0x4c: /* dec %esp */
2740 /* Can't be a valid stack pointer - delete context */
2741 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__);
2742 DELETE_CURCTX ();
2743 break;
2744 case 0x4d: /* dec %ebp */
2745 case 0x4e: /* dec %esi */
2746 case 0x4f: /* dec %edi */
2747 break;
2748 #endif
2749 case 0x50: /* push %eax */
2750 case 0x51: /* push %ecx */
2751 case 0x52: /* push %edx */
2752 case 0x53: /* push %ebx */
2753 case 0x54: /* push %esp */
2754 case 0x55: /* push %ebp */
2755 case 0x56: /* push %esi */
2756 case 0x57: /* push %edi */
2757 cur->sp -= 1;
2758 reg = OPC_REG (opcode);
2759 if (reg == RBP)
2760 {
2761 #if 0
2762 /* Don't do this check yet. Affects tail calls. */
2763 /* avoid other function's prologue */
2764 if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) ||
2765 (cur->pc[0] == 0x8b && cur->pc[1] == 0xec))
2766 {
2767 /* mov %esp,%ebp */
2768 DELETE_CURCTX ();
2769 break;
2770 }
2771 #endif
2772 if (cur->fp_loc == NULL)
2773 {
2774 cur->fp_loc = cur->sp;
2775 cur->fp_sav = cur->fp;
2776 }
2777 }
2778 break;
2779 case 0x58: /* pop %eax */
2780 case 0x59: /* pop %ecx */
2781 case 0x5a: /* pop %edx */
2782 case 0x5b: /* pop %ebx */
2783 case 0x5c: /* pop %esp */
2784 case 0x5d: /* pop %ebp */
2785 case 0x5e: /* pop %esi */
2786 case 0x5f: /* pop %edi */
2787 reg = OPC_REG (opcode);
2788 cur->regs[reg] = 0;
2789 if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase))
2790 cur->regs[reg] = *cur->sp;
2791 DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n",
2792 __LINE__, reg, (unsigned long) cur->regs[reg]);
2793 if (reg == RDX)
2794 {
2795 if (cur->sp >= cur->sp_safe &&
2796 (unsigned long) cur->sp < wctx->sbase)
2797 cur->rdx = *cur->sp;
2798 }
2799 else if (reg == RBP)
2800 {
2801 if (cur->fp_loc == cur->sp)
2802 {
2803 cur->fp = cur->fp_sav;
2804 cur->fp_loc = NULL;
2805 }
2806 else if (cur->sp >= cur->sp_safe &&
2807 (unsigned long) cur->sp < wctx->sbase)
2808 cur->fp = (unsigned long*) (*cur->sp);
2809 }
2810 else if (reg == RSP)
2811 {
2812 /* f.e. JVM I2CAdapter */
2813 if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
2814 {
2815 unsigned long *nsp = (unsigned long*) (*cur->sp);
2816 if (nsp >= cur->sp && nsp <= cur->fp)
2817 {
2818 cur->sp = nsp;
2819 }
2820 else
2821 {
2822 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address, opcode=0x%02x\n",
2823 __LINE__, opcode);
2824 goto checkFP;
2825 }
2826 }
2827 else
2828 {
2829 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode=0x%02x\n",
2830 __LINE__, opcode);
2831 goto checkFP;
2832 }
2833 break;
2834 }
2835 cur->sp += 1;
2836 if (cur->sp - RED_ZONE > cur->sp_safe)
2837 {
2838 cur->sp_safe = cur->sp - RED_ZONE;
2839 }
2840 break;
2841 case 0x60: /* pusha(d) */
2842 cur->sp -= 8;
2843 break;
2844 case 0x61: /* popa(d) */
2845 cur->sp += 8;
2846 if (cur->sp - RED_ZONE > cur->sp_safe)
2847 cur->sp_safe = cur->sp - RED_ZONE;
2848 break;
2849 case 0x62: /* group AVX, 4-bytes EVEX prefix */
2850 {
2851 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
2852 int len = parse_x86_AVX_instruction (pc);
2853 if (len < 4)
2854 {
2855 DELETE_CURCTX ();
2856 }
2857 else
2858 {
2859 pc += len;
2860 cur->pc = pc;
2861 }
2862 }
2863 break;
2864 case 0x63: /* arpl Ew,Gw (32) movsxd Gv,Ev (64)*/
2865 cur->pc = check_modrm (cur->pc);
2866 break;
2867 case 0x64: /* seg=fs prefix */
2868 case 0x65: /* seg=gs prefix */
2869 break;
2870 case 0x66: /* opd size override */
2871 case 0x67: /* addr size override */
2872 break;
2873 case 0x68: /* push Iz */
2874 cur->sp = (unsigned long*) ((long) cur->sp - z);
2875 cur->pc += z;
2876 break;
2877 case 0x69: /* imul Gv,Ev,Iz */
2878 cur->pc = check_modrm (cur->pc);
2879 cur->pc += z;
2880 break;
2881 case 0x6a: /* push Ib */
2882 cur->sp = (unsigned long*) ((long) cur->sp - v);
2883 cur->pc += 1;
2884 break;
2885 case 0x6b: /* imul Gv,Ev,Ib */
2886 cur->pc = check_modrm (cur->pc);
2887 cur->pc += 1;
2888 break;
2889 case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2890 cur->pc = check_modrm (cur->pc);
2891 break;
2892 case 0x70: /* jo Jb */
2893 case 0x71: /* jno Jb */
2894 case 0x72: /* jb Jb */
2895 case 0x73: /* jnb Jb */
2896 case 0x74: /* jz Jb */
2897 case 0x75: /* jnz Jb */
2898 case 0x76: /* jna Jb */
2899 case 0x77: /* ja Jb */
2900 case 0x78: /* js Jb */
2901 case 0x79: /* jns Jb */
2902 case 0x7a: /* jp Jb */
2903 case 0x7b: /* jnp Jb */
2904 case 0x7c: /* jl Jb */
2905 case 0x7d: /* jge Jb */
2906 case 0x7e: /* jle Jb */
2907 case 0x7f: /* jg Jb */
2908 imm8 = *(char*) cur->pc++;
2909 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2910 {
2911 int tidx = 0;
2912 unsigned char *npc = cur->pc + imm8;
2913 if (is_after_ret (npc))
2914 break;
2915 while (npc > targets[tidx])
2916 tidx += 1;
2917 if (npc != targets[tidx])
2918 {
2919 if (ntrg < MAXTRGTS)
2920 {
2921 for (int i = 0; i < nctx; i++)
2922 if (buf[i].tidx >= tidx)
2923 buf[i].tidx++;
2924
2925 /* insert a new target */
2926 for (int i = ntrg; i > tidx; i--)
2927 targets[i] = targets[i - 1];
2928 ntrg += 1;
2929 targets[tidx++] = npc;
2930 }
2931 else
2932 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg(%d)=max\n", __LINE__, ntrg);
2933 struct AdvWalkContext *new = buf + nctx;
2934 nctx += 1;
2935 __collector_memcpy (new, cur, sizeof (*new));
2936 new->pc = npc;
2937 new->tidx = tidx;
2938 cur = new; /* advance the new context first */
2939 continue;
2940 }
2941 }
2942 else
2943 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx(%d)=max\n", __LINE__, nctx);
2944 break;
2945 case 0x80: /* group1 Eb,Ib */
2946 cur->pc = check_modrm (cur->pc);
2947 cur->pc += 1;
2948 break;
2949 case 0x81: /* group1 Ev,Iz */
2950 modrm = *cur->pc;
2951 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2952 {
2953 int immz = read_int (cur->pc + 1, z);
2954 extop = MRM_EXT (modrm);
2955 if (extop == 0) /* add imm32,%esp */
2956 cur->sp = (unsigned long*) ((long) cur->sp + immz);
2957 else if (extop == 4) /* and imm32,%esp */
2958 cur->sp = (unsigned long*) ((long) cur->sp & immz);
2959 else if (extop == 5) /* sub imm32,%esp */
2960 cur->sp = (unsigned long*) ((long) cur->sp - immz);
2961 if (cur->sp - RED_ZONE > cur->sp_safe)
2962 cur->sp_safe = cur->sp - RED_ZONE;
2963 }
2964 cur->pc = check_modrm (cur->pc);
2965 cur->pc += z;
2966 break;
2967 case 0x82: /* group1 Eb,Ib */
2968 cur->pc = check_modrm (cur->pc);
2969 cur->pc += 1;
2970 break;
2971 case 0x83: /* group1 Ev,Ib */
2972 modrm = *cur->pc;
2973 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2974 {
2975 imm8 = (char) cur->pc[1]; /* sign extension */
2976 extop = MRM_EXT (modrm);
2977 if (extop == 0) /* add imm8,%esp */
2978 cur->sp = (unsigned long*) ((long) cur->sp + imm8);
2979 else if (extop == 4) /* and imm8,%esp */
2980 cur->sp = (unsigned long*) ((long) cur->sp & imm8);
2981 else if (extop == 5) /* sub imm8,%esp */
2982 cur->sp = (unsigned long*) ((long) cur->sp - imm8);
2983 if (cur->sp - RED_ZONE > cur->sp_safe)
2984 cur->sp_safe = cur->sp - RED_ZONE;
2985 }
2986 cur->pc = check_modrm (cur->pc);
2987 cur->pc += 1;
2988 break;
2989 case 0x84: /* test Eb,Gb */
2990 case 0x85: /* test Ev,Gv */
2991 case 0x86: /* xchg Eb,Gb */
2992 case 0x87: /* xchg Ev,Gv */
2993 cur->pc = check_modrm (cur->pc);
2994 break;
2995 case 0x88: /* mov Eb,Gb */
2996 cur->pc = check_modrm (cur->pc);
2997 break;
2998 case 0x89: /* mov Ev,Gv */
2999 modrm = *cur->pc;
3000 if (MRM_MOD (modrm) == 0xc0)
3001 {
3002 if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3003 /* movl %esp,%ebp */
3004 cur->fp = cur->sp;
3005 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3006 { /* mov %ebp,%esp */
3007 cur->sp = cur->fp;
3008 if (cur->sp - RED_ZONE > cur->sp_safe)
3009 cur->sp_safe = cur->sp - RED_ZONE;
3010 if (wctx->fp == (unsigned long) cur->sp)
3011 cur->cval = RA_FROMFP;
3012 }
3013 }
3014 else if (MRM_MOD (modrm) == 0x80)
3015 {
3016 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3017 {
3018 if (cur->pc[1] == 0x24)
3019 { /* mov %ebp,disp32(%esp) - JVM */
3020 immv = read_int (cur->pc + 2, 4);
3021 cur->fp_loc = (unsigned long*) ((char*) cur->sp + immv);
3022 cur->fp_sav = cur->fp;
3023 }
3024 }
3025 }
3026 else if (MRM_MOD (modrm) == 0x40)
3027 {
3028 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3029 {
3030 if (cur->pc[1] == 0x24 && cur->pc[2] == 0x0)
3031 { /* movl %edx,0(%esp) */
3032 cur->ra_loc = cur->sp;
3033 cur->ra_sav = cur->rdx;
3034 }
3035 }
3036 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3037 {
3038 if (cur->pc[1] == 0x24)
3039 { /* mov %ebp,disp8(%esp) - JVM */
3040 imm8 = ((char*) (cur->pc))[2];
3041 cur->fp_loc = (unsigned long*) ((char*) cur->sp + imm8);
3042 cur->fp_sav = cur->fp;
3043 }
3044 }
3045 }
3046 else if (MRM_MOD (modrm) == 0x0)
3047 {
3048 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3049 {
3050 if (cur->pc[1] == 0x24)
3051 { /* mov %ebp,(%esp) */
3052 cur->fp_loc = cur->sp;
3053 cur->fp_sav = cur->fp;
3054 }
3055 }
3056 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3057 {
3058 if (cur->pc[1] == 0x24)
3059 { /* movl %edx,(%esp) */
3060 cur->ra_loc = cur->sp;
3061 cur->ra_sav = cur->rdx;
3062 }
3063 }
3064 }
3065 cur->pc = check_modrm (cur->pc);
3066 break;
3067 case 0x8a: /* mov Gb,Eb */
3068 cur->pc = check_modrm (cur->pc);
3069 break;
3070 case 0x8b: /* mov Gv,Ev */
3071 modrm = *cur->pc;
3072 if (MRM_MOD (modrm) == 0xc0)
3073 {
3074 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3075 /* mov %esp,%ebp */
3076 cur->fp = cur->sp;
3077 else if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3078 { /* mov %ebp,%esp */
3079 cur->sp = cur->fp;
3080 if (cur->sp - RED_ZONE > cur->sp_safe)
3081 cur->sp_safe = cur->sp - RED_ZONE;
3082 if (wctx->fp == (unsigned long) cur->sp)
3083 cur->cval = RA_FROMFP;
3084 }
3085 }
3086 else if (MRM_MOD (modrm) == 0x80)
3087 {
3088 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3089 {
3090 if (cur->pc[1] == 0x24)
3091 { /* mov disp32(%esp),%ebp */
3092 immv = read_int (cur->pc + 2, 4);
3093 unsigned long *ptr = (unsigned long*) ((char*) cur->sp + immv);
3094 if (cur->fp_loc == ptr)
3095 {
3096 cur->fp = cur->fp_sav;
3097 cur->fp_loc = NULL;
3098 }
3099 else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3100 cur->fp = (unsigned long*) (*ptr);
3101 }
3102 }
3103 }
3104 else if (MRM_MOD (modrm) == 0x40)
3105 {
3106 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3107 {
3108 if (cur->pc[1] == 0x24)
3109 { /* mov disp8(%esp),%ebp - JVM */
3110 imm8 = ((char*) (cur->pc))[2];
3111 unsigned long *ptr = (unsigned long*) ((char*) cur->sp + imm8);
3112 if (cur->fp_loc == ptr)
3113 {
3114 cur->fp = cur->fp_sav;
3115 cur->fp_loc = NULL;
3116 }
3117 else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3118 cur->fp = (unsigned long*) (*ptr);
3119 }
3120 }
3121 }
3122 else if (MRM_MOD (modrm) == 0x0)
3123 {
3124 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3125 {
3126 if (cur->pc[1] == 0x24)
3127 { /* mov (%esp),%ebp */
3128 if (cur->fp_loc == cur->sp)
3129 {
3130 cur->fp = cur->fp_sav;
3131 cur->fp_loc = NULL;
3132 }
3133 else if (cur->sp >= cur->sp_safe &&
3134 (unsigned long) cur->sp < wctx->sbase)
3135 cur->fp = (unsigned long*) *cur->sp;
3136 }
3137 }
3138 }
3139 cur->pc = check_modrm (cur->pc);
3140 break;
3141 case 0x8c: /* mov Mw,Sw */
3142 cur->pc = check_modrm (cur->pc);
3143 break;
3144 case 0x8d: /* lea Gv,M */
3145 modrm = *cur->pc;
3146 if (MRM_REGD (modrm) == RSP)
3147 {
3148 unsigned char *pc = cur->pc;
3149 // Mez: need to use always regs[RSP/RBP] instead cur->sp(or fp):
3150 cur->regs[RSP] = (unsigned long) cur->sp;
3151 cur->regs[RBP] = (unsigned long) cur->fp;
3152 cur->pc++;
3153 int mod = (modrm >> 6) & 3;
3154 int r_m = modrm & 7;
3155 long val = 0;
3156 int undefRez = 0;
3157 if (mod == 0x3)
3158 val = getRegVal (cur, MRM_REGS (modrm), &undefRez);
3159 else if (r_m == 4)
3160 { // SP or R12. Decode SIB-byte.
3161 int sib = *cur->pc++;
3162 int scale = 1 << (sib >> 6);
3163 int index = X | ((sib >> 3) & 7);
3164 int base = B | (sib & 7);
3165 if (mod == 0)
3166 {
3167 if ((base & 7) == 5)
3168 { // BP or R13
3169 if (index != 4) // SP
3170 val += getRegVal (cur, index, &undefRez) * scale;
3171 val += read_int (cur->pc, 4);
3172 cur->pc += 4;
3173 }
3174 else
3175 {
3176 val += getRegVal (cur, base, &undefRez);
3177 if (index != 4) // SP
3178 val += getRegVal (cur, index, &undefRez) * scale;
3179 }
3180 }
3181 else
3182 {
3183 val += getRegVal (cur, base, &undefRez);
3184 if (index != 4) // SP
3185 val += getRegVal (cur, index, &undefRez) * scale;
3186 if (mod == 1)
3187 {
3188 val += read_int (cur->pc, 1);
3189 cur->pc++;
3190 }
3191 else
3192 { // mod == 2
3193 val += read_int (cur->pc, 4);
3194 cur->pc += 4;
3195 }
3196 }
3197 }
3198 else if (mod == 0)
3199 {
3200 if (r_m == 5)
3201 { // BP or R13
3202 val += read_int (cur->pc, 4);
3203 cur->pc += 4;
3204 }
3205 else
3206 val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3207 }
3208 else
3209 { // mod == 1 || mod == 2
3210 val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3211 if (mod == 1)
3212 {
3213 val += read_int (cur->pc, 1);
3214 cur->pc++;
3215 }
3216 else
3217 { // mod == 2
3218 val += read_int (cur->pc, 4);
3219 cur->pc += 4;
3220 }
3221 }
3222 if (undefRez)
3223 {
3224 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx val=0x%lx\n",
3225 __LINE__, (unsigned long) cur->pc, (unsigned long) val);
3226 goto checkFP;
3227 }
3228 cur->regs[MRM_REGD (modrm)] = val;
3229 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cur->pc=0x%lx val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3230 __LINE__, (unsigned long) cur->pc, (unsigned long) val,
3231 (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3232 if (cur->pc != check_modrm (pc))
3233 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d ERROR: cur->pc=0x%lx != check_modrm(0x%lx)=0x%lx\n",
3234 __LINE__, (unsigned long) cur->pc, (unsigned long) pc,
3235 (unsigned long) check_modrm (pc));
3236 if (MRM_REGD (modrm) == RSP)
3237 {
3238 if (!isInside ((unsigned long) val, wctx->sp, wctx->sbase))
3239 {
3240 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx opcode=0x%02x val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3241 __LINE__, (unsigned long) cur->pc, opcode, (unsigned long) val,
3242 (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3243 goto checkFP;
3244 }
3245 cur->sp = (unsigned long *) val;
3246 if (cur->sp - RED_ZONE > cur->sp_safe)
3247 cur->sp_safe = cur->sp - RED_ZONE;
3248 }
3249 }
3250 else
3251 cur->pc = check_modrm (cur->pc);
3252 break;
3253 case 0x8e: /* mov Sw,Ew */
3254 cur->pc = check_modrm (cur->pc);
3255 break;
3256 case 0x8f: /* pop Ev */
3257 cur->pc = check_modrm (cur->pc);
3258 cur->sp += 1;
3259 if (cur->sp - RED_ZONE > cur->sp_safe)
3260 cur->sp_safe = cur->sp - RED_ZONE;
3261 break;
3262 case 0x90: /* nop */
3263 break;
3264 case 0x91: /* xchg %eax,%ecx */
3265 case 0x92: /* xchg %eax,%edx */
3266 case 0x93: /* xchg %eax,%ebx */
3267 case 0x94: /* xchg %eax,%esp XXXX */
3268 case 0x95: /* xchg %eax,%ebp XXXX */
3269 case 0x96: /* xchg %eax,%esi */
3270 case 0x97: /* xchg %eax,%edi */
3271 break;
3272 case 0x98: /* cbw/cwde */
3273 case 0x99: /* cwd/cwq */
3274 break;
3275 case 0x9a: /* callf Ap */
3276 if (jmp_reg_switch_mode == 1)
3277 {
3278 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3279 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3280 int rc = process_return (wctx, tmpctx);
3281 if (rc != RA_FAILURE)
3282 {
3283 if (save_ctx)
3284 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3285 return rc;
3286 }
3287 }
3288 cur->pc += 2 + a;
3289 break;
3290 case 0x9b: /* fwait */
3291 case 0x9c: /* pushf Fv */
3292 case 0x9d: /* popf Fv */
3293 case 0x9e: /* sahf */
3294 case 0x9f: /* lahf */
3295 break;
3296 case 0xa0: /* mov al,Ob */
3297 case 0xa1: /* mov eax,Ov */
3298 case 0xa2: /* mov Ob,al */
3299 case 0xa3: /* mov Ov,eax */
3300 cur->pc += a;
3301 break;
3302 case 0xa4: /* movsb Yb,Xb */
3303 case 0xa5: /* movsd Yv,Xv */
3304 case 0xa6: /* cmpsb Yb,Xb */
3305 case 0xa7: /* cmpsd Xv,Yv */
3306 break;
3307 case 0xa8: /* test al,Ib */
3308 cur->pc += 1;
3309 break;
3310 case 0xa9: /* test eax,Iz */
3311 cur->pc += z;
3312 break;
3313 case 0xaa: /* stosb Yb,%al */
3314 case 0xab: /* stosd Yv,%eax */
3315 case 0xac: /* lodsb %al,Xb */
3316 case 0xad: /* lodsd %eax,Xv */
3317 case 0xae: /* scasb %al,Yb */
3318 case 0xaf: /* scasd %eax,Yv */
3319 break;
3320 case 0xb0: /* mov %al,Ib */
3321 case 0xb1: /* mov %cl,Ib */
3322 case 0xb2: /* mov %dl,Ib */
3323 case 0xb3: /* mov %bl,Ib */
3324 case 0xb4: /* mov %ah,Ib */
3325 case 0xb5: /* mov %ch,Ib */
3326 case 0xb6: /* mov %dh,Ib */
3327 case 0xb7: /* mov %bh,Ib */
3328 cur->pc += 1;
3329 break;
3330 case 0xb8: /* mov Iv,%eax */
3331 case 0xb9: /* mov Iv,%ecx */
3332 case 0xba: /* mov Iv,%edx */
3333 case 0xbb: /* mov Iv,%ebx */
3334 case 0xbc: /* mov Iv,%esp */
3335 case 0xbd: /* mov Iv,%rbp */
3336 case 0xbe: /* mov Iv,%esi */
3337 case 0xbf: /* mov Iv,%edi */
3338 reg = OPC_REG (opcode);
3339 if (reg == RAX)
3340 cur->rax = read_int (cur->pc, v);
3341 cur->pc += v;
3342 break;
3343 case 0xc0: /* group2 Eb,Ib */
3344 case 0xc1: /* group2 Ev,Ib */
3345 cur->pc = check_modrm (cur->pc) + 1;
3346 break;
3347 case 0xc2: /* ret Iw */
3348 /* In the dynamic linker we may see that
3349 * the actual return address is at sp+immv,
3350 * while sp points to the resolved address.
3351 */
3352 {
3353 immv = read_int (cur->pc, 2);
3354 int rc = process_return (wctx, cur);
3355 if (rc != RA_FAILURE)
3356 {
3357 if (jmp_reg_switch_mode == 1)
3358 {
3359 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__);
3360 goto checkFP;
3361 }
3362 wctx->sp += immv;
3363 if (save_ctx)
3364 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3365 return rc;
3366 }
3367 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc2.\n", __LINE__);
3368 DELETE_CURCTX ();
3369 }
3370 break;
3371 case 0xc3: /* ret */
3372 {
3373 int rc = process_return (wctx, cur);
3374 if (rc != RA_FAILURE)
3375 {
3376 if (save_ctx)
3377 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3378 return rc;
3379 }
3380 if (jmp_reg_switch_mode == 1)
3381 jmp_reg_switch_pc = cur->pc;
3382 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc3.\n", __LINE__);
3383 DELETE_CURCTX ();
3384 }
3385 break;
3386 case 0xc4: /* group AVX, 3-bytes VEX prefix */
3387 {
3388 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3389 int len = parse_x86_AVX_instruction (pc);
3390 if (len < 3)
3391 DELETE_CURCTX ();
3392 else
3393 {
3394 pc += len;
3395 cur->pc = pc;
3396 }
3397 }
3398 break;
3399 case 0xc5: /* group AVX, 2-bytes VEX prefix */
3400 {
3401 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3402 int len = parse_x86_AVX_instruction (pc);
3403 if (len < 2)
3404 DELETE_CURCTX ();
3405 else
3406 {
3407 pc += len;
3408 cur->pc = pc;
3409 }
3410 }
3411 break;
3412 case 0xc6:
3413 modrm = *cur->pc;
3414 if (modrm == 0xf8) /* xabort */
3415 cur->pc += 2;
3416 else /* mov Eb,Ib */
3417 cur->pc = check_modrm (cur->pc) + 1;
3418 break;
3419 case 0xc7:
3420 modrm = *cur->pc;
3421 if (modrm == 0xf8) /* xbegin */
3422 cur->pc += v + 1;
3423 else
3424 { /* mov Ev,Iz */
3425 extop = MRM_EXT (modrm);
3426 if (extop != 0)
3427 {
3428 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xc7\n", __LINE__);
3429 goto checkFP;
3430 }
3431 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RAX)
3432 cur->rax = read_int (cur->pc + 1, z);
3433 cur->pc = check_modrm (cur->pc) + z;
3434 }
3435 break;
3436 case 0xc8: /* enter Iw,Ib */
3437 cur->pc += 3;
3438 break;
3439 case 0xc9: /* leave */
3440 /* mov %ebp,%esp */
3441 cur->sp = cur->fp;
3442 /* pop %ebp */
3443 if (cur->fp_loc == cur->sp)
3444 {
3445 cur->fp = cur->fp_sav;
3446 cur->fp_loc = NULL;
3447 }
3448 else if (cur->sp >= cur->sp_safe &&
3449 (unsigned long) cur->sp < wctx->sbase)
3450 {
3451 cur->fp = (unsigned long*) (*cur->sp);
3452 if (wctx->fp == (unsigned long) cur->sp)
3453 cur->cval = RA_FROMFP;
3454 }
3455 cur->sp += 1;
3456 if (cur->sp - RED_ZONE > cur->sp_safe)
3457 cur->sp_safe = cur->sp - RED_ZONE;
3458 break;
3459 case 0xca: /* retf Iw */
3460 cur->pc += 2; /* XXXX process return */
3461 break;
3462 case 0xcb: /* retf */
3463 break; /* XXXX process return */
3464 case 0xcc: /* int 3 */
3465 break;
3466 case 0xcd: /* int Ib */
3467 if (*cur->pc == 0x80)
3468 {
3469 if (cur->rax == __NR_exit)
3470 {
3471 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xcd.\n", __LINE__);
3472 DELETE_CURCTX ();
3473 break;
3474 }
3475 else if (cur->rax == __NR_rt_sigreturn)
3476 {
3477 if (jmp_reg_switch_mode == 1)
3478 {
3479 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0xcd\n",
3480 __LINE__);
3481 goto checkFP;
3482 }
3483 wctx->sp = (unsigned long) cur->sp;
3484 if (save_ctx)
3485 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
3486 return RA_RT_SIGRETURN;
3487 }
3488 #if WSIZE(32)
3489 else if (cur->rax == __NR_sigreturn)
3490 {
3491 if (jmp_reg_switch_mode == 1)
3492 {
3493 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode = 0xc2\n",
3494 __LINE__);
3495 goto checkFP;
3496 }
3497 wctx->sp = (unsigned long) cur->sp;
3498 if (save_ctx)
3499 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
3500 return RA_SIGRETURN;
3501 }
3502 #endif
3503 }
3504 cur->pc += 1;
3505 break;
3506 case 0xce: /* into */
3507 case 0xcf: /* iret */
3508 break;
3509 case 0xd0: /* shift group2 Eb,1 */
3510 case 0xd1: /* shift group2 Ev,1 */
3511 case 0xd2: /* shift group2 Eb,%cl */
3512 case 0xd3: /* shift group2 Ev,%cl */
3513 cur->pc = check_modrm (cur->pc);
3514 break;
3515 case 0xd4: /* aam Ib */
3516 cur->pc += 1;
3517 break;
3518 case 0xd5: /* aad Ib */
3519 cur->pc += 1;
3520 break;
3521 case 0xd6: /* falc? */
3522 break;
3523 case 0xd7:
3524 cur->pc = check_modrm (cur->pc);
3525 cur->pc++;
3526 break;
3527 case 0xd8: /* esc instructions */
3528 case 0xd9:
3529 case 0xda:
3530 case 0xdb:
3531 case 0xdc:
3532 case 0xdd:
3533 case 0xde:
3534 case 0xdf:
3535 cur->pc = check_modrm (cur->pc);
3536 break;
3537 case 0xe0: /* loopne Jb */
3538 case 0xe1: /* loope Jb */
3539 case 0xe2: /* loop Jb */
3540 case 0xe3: /* jcxz Jb */
3541 imm8 = *(char*) cur->pc++;
3542 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
3543 {
3544 int tidx = 0;
3545 unsigned char *npc = cur->pc + imm8;
3546 if (is_after_ret (npc))
3547 break;
3548 while (npc > targets[tidx])
3549 tidx += 1;
3550 if (npc != targets[tidx])
3551 {
3552 if (ntrg < MAXTRGTS)
3553 {
3554 for (int i = 0; i < nctx; i++)
3555 if (buf[i].tidx >= tidx)
3556 buf[i].tidx++;
3557 /* insert a new target */
3558 for (int i = ntrg; i > tidx; i--)
3559 targets[i] = targets[i - 1];
3560 ntrg += 1;
3561 targets[tidx++] = npc;
3562 }
3563 else
3564 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3565 struct AdvWalkContext *new = buf + nctx;
3566 nctx += 1;
3567 __collector_memcpy (new, cur, sizeof (*new));
3568 new->pc = npc;
3569 new->tidx = tidx;
3570 cur = new; /* advance the new context first */
3571 continue;
3572 }
3573 }
3574 else
3575 DprintfT (SP_DUMP_UNWIND, "unwind.c: nctx = max\n");
3576 break;
3577 case 0xe4: case 0xe5:
3578 cur->pc = check_modrm (cur->pc);
3579 cur->pc++;
3580 break;
3581 case 0xe6: case 0xe7:
3582 cur->pc++;
3583 cur->pc = check_modrm (cur->pc);
3584 break;
3585 case 0xec: case 0xed: case 0xee: case 0xef:
3586 cur->pc = check_modrm (cur->pc);
3587 break;
3588 case 0xe8: /* call Jz (f64) */
3589 {
3590 if (jmp_reg_switch_mode == 1)
3591 {
3592 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3593 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3594 int rc = process_return (wctx, tmpctx);
3595 if (rc != RA_FAILURE)
3596 {
3597 if (save_ctx)
3598 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3599 return rc;
3600 }
3601 }
3602 int immz = read_int (cur->pc, z);
3603 if (immz == 0)
3604 /* special case in PIC code */
3605 cur->sp -= 1;
3606 cur->pc += z;
3607 }
3608 break;
3609 case 0xe9: /* jump Jz */
3610 {
3611 int immz = read_int (cur->pc, z);
3612 unsigned char *npc = cur->pc + z + immz;
3613 if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
3614 {
3615 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3616 DELETE_CURCTX ();
3617 break;
3618 }
3619 int tidx = 0;
3620 while (npc > targets[tidx])
3621 tidx += 1;
3622 if (npc != targets[tidx])
3623 {
3624 if (ntrg < MAXTRGTS)
3625 {
3626 for (int i = 0; i < nctx; i++)
3627 if (buf[i].tidx >= tidx)
3628 buf[i].tidx++;
3629 /* insert a new target */
3630 for (int i = ntrg; i > tidx; i--)
3631 targets[i] = targets[i - 1];
3632 ntrg += 1;
3633 targets[tidx++] = npc;
3634 }
3635 else
3636 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3637 cur->pc = npc;
3638 cur->tidx = tidx;
3639 continue; /* advance this context first */
3640 }
3641 else
3642 {
3643 /* Delete context */
3644 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3645 DELETE_CURCTX ();
3646 }
3647 }
3648 break;
3649 case 0xeb: /* jump imm8 */
3650 {
3651 imm8 = *(char*) cur->pc++;
3652 int tidx = 0;
3653 unsigned char *npc = cur->pc + imm8;
3654 while (npc > targets[tidx])
3655 tidx += 1;
3656 if (npc != targets[tidx])
3657 {
3658 if (ntrg < MAXTRGTS)
3659 {
3660 for (int i = 0; i < nctx; i++)
3661 if (buf[i].tidx >= tidx)
3662 buf[i].tidx++;
3663 /* insert a new target */
3664 for (int i = ntrg; i > tidx; i--)
3665 targets[i] = targets[i - 1];
3666 ntrg += 1;
3667 targets[tidx++] = npc;
3668 }
3669 else
3670 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3671 cur->pc = npc;
3672 cur->tidx = tidx;
3673 continue; /* advance this context first */
3674 }
3675 else
3676 {
3677 /* Delete context */
3678 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xeb.\n", __LINE__);
3679 DELETE_CURCTX ();
3680 }
3681 }
3682 break;
3683 case 0xf0: /* lock prefix */
3684 case 0xf2: /* repne prefix */
3685 case 0xf3: /* repz prefix */
3686 break;
3687 case 0xf4: /* hlt */
3688 extop2 = *(cur->pc - 3);
3689 if (extop2 == 0x90)
3690 {
3691 // 17851712 occasional SEGV in find_i386_ret_addr in unwind.c during attach
3692 if (save_ctx)
3693 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3694 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3695 return RA_END_OF_STACK;
3696 }
3697 /* We see 'hlt' in _start. Stop analysis, revert to FP */
3698 /* A workaround for the Linux main stack */
3699 if (nctx > 1)
3700 {
3701 DELETE_CURCTX ();
3702 break;
3703 }
3704 if (cur->fp == 0)
3705 {
3706 if (jmp_reg_switch_mode == 1)
3707 {
3708 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xf4\n");
3709 goto checkFP;
3710 }
3711 cache_put (wctx, RA_EOSTCK);
3712 wctx->pc = 0;
3713 wctx->sp = 0;
3714 wctx->fp = 0;
3715 if (save_ctx)
3716 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3717 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3718 return RA_END_OF_STACK;
3719 }
3720 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xf4\n", __LINE__);
3721 goto checkFP;
3722 case 0xf5: /* cmc */
3723 break;
3724 case 0xf6: /* group3 Eb */
3725 modrm = *cur->pc;
3726 extop = MRM_EXT (modrm);
3727 cur->pc = check_modrm (cur->pc);
3728 if (extop == 0x0) /* test Ib */
3729 cur->pc += 1;
3730 break;
3731 case 0xf7: /* group3 Ev */
3732 modrm = *cur->pc;
3733 extop = MRM_EXT (modrm);
3734 cur->pc = check_modrm (cur->pc);
3735 if (extop == 0x0) /* test Iz */
3736 cur->pc += z;
3737 break;
3738 case 0xf8: /* clc */
3739 case 0xf9: /* stc */
3740 case 0xfa: /* cli */
3741 case 0xfb: /* sti */
3742 case 0xfc: /* cld */
3743 case 0xfd: /* std */
3744 break;
3745 case 0xfe: /* group4 */
3746 modrm = *cur->pc;
3747 extop = MRM_EXT (modrm);
3748 switch (extop)
3749 {
3750 case 0x0: /* inc Eb */
3751 case 0x1: /* dec Eb */
3752 cur->pc = check_modrm (cur->pc);
3753 break;
3754 case 0x7:
3755 cur->pc = check_modrm (cur->pc);
3756 break;
3757 default:
3758 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xfe %x\n",
3759 __LINE__, extop);
3760 DELETE_CURCTX ();
3761 break;
3762 }
3763 break;
3764 case 0xff: /* group5 */
3765 modrm = *cur->pc;
3766 extop = MRM_EXT (modrm);
3767 switch (extop)
3768 {
3769 case 0x0: /* inc Ev */
3770 case 0x1: /* dec Ev */
3771 cur->pc = check_modrm (cur->pc);
3772 break;
3773 case 0x2: /* calln Ev */
3774 if (jmp_reg_switch_mode == 1)
3775 {
3776 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3777 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3778 int rc = process_return (wctx, tmpctx);
3779 if (rc != RA_FAILURE)
3780 {
3781 if (save_ctx)
3782 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3783 return rc;
3784 }
3785 }
3786 cur->pc = check_modrm (cur->pc);
3787 break;
3788 case 0x3: /* callf Ep */
3789 if (jmp_reg_switch_mode == 1)
3790 {
3791 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3792 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3793 int rc = process_return (wctx, tmpctx);
3794 if (rc != RA_FAILURE)
3795 {
3796 if (save_ctx)
3797 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3798 return rc;
3799 }
3800 }
3801 cur->pc = check_modrm (cur->pc); /* XXXX */
3802 break;
3803 case 0x4: /* jumpn Ev */
3804 /* This instruction appears in PLT or
3805 * in tail call optimization.
3806 * In both cases treat it as return.
3807 * Save jump *(reg) - switch, etc, for later use when no ctx left
3808 */
3809 if (modrm == 0x25 || /* jumpn *disp32 */
3810 MRM_MOD (modrm) == 0x40 || /* jumpn byte(reg) */
3811 MRM_MOD (modrm) == 0x80) /* jumpn word(reg) */
3812 {
3813 DprintfT (SP_DUMP_UNWIND, "unwind.c: PLT or tail call: %p\n", cur->pc - 1);
3814 int rc = process_return (wctx, cur);
3815 if (rc != RA_FAILURE)
3816 {
3817 if (jmp_reg_switch_mode == 1 && total_num_jmp_reg < max_num_jmp_reg_seen)
3818 {
3819 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xff\n");
3820 goto checkFP;
3821 }
3822 if (save_ctx)
3823 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3824 return rc;
3825 }
3826 }
3827 else if (modrm != 0x24 /*ignore SIB*/) /* jumpn *(reg) or jumpn reg */
3828 {
3829 // 22846120 stack unwind does not find caller of __memcpy_ssse3_back with B64 intel-Linux
3830 /*
3831 * For now, let's deal rather narrowly with this scenario. If:
3832 * - we are in the middle of an "ff e2" instruction, and
3833 * - the next instruction is undefined ( 0f 0b == ud2 )
3834 * then test return. (Might eventually have to broaden the scope
3835 * of this fix to other registers/etc.)
3836 */
3837 if (cur->pc[0] == 0xe2 && cur->pc[1] == 0x0f && cur->pc[2] == 0x0b)
3838 {
3839 int rc = process_return_real (wctx, cur, 0);
3840 if (rc == RA_SUCCESS)
3841 {
3842 if (save_ctx)
3843 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3844 return rc;
3845 }
3846 }
3847
3848 // 22691241 shjsynprog, jsynprog core dump from find_i386_ret_addr
3849 /*
3850 * Here is another oddity. Java 9 seems to emit dynamically generated
3851 * code where a code block ends with a "jmp *reg" and then padding to a
3852 * multiple-of-16 boundary and then a bunch of 0s. In this case, let's
3853 * not continue to walk bytes since we would be walking off the end of
3854 * the instructions into ... something. Treating them as instructions
3855 * can lead to unexpected results, including SEGV.
3856 */
3857 /*
3858 * While the general problem deserves a better solution, let's look
3859 * here only for one particular case:
3860 * 0xff 0xe7 jmp *reg
3861 * nop to bring us to a multiple-of-16 boundary
3862 * 0x0000000000000a00 something that does not look like an instruction
3863 *
3864 * A different nop might be used depending on how much padding is needed
3865 * to reach that multiple-of-16 boundary. We've seen two:
3866 * 0x90 one byte
3867 * 0x0f 0x1f 0x40 0x00 four bytes
3868 */
3869 // confirm the instruction is 0xff 0xe7
3870 if (cur->pc[0] == 0xe7)
3871 {
3872 // check for correct-length nop and find next 16-byte boundary
3873 int found_nop = 0;
3874 unsigned long long *boundary = 0;
3875 switch ((((unsigned long) (cur->pc)) & 0xf))
3876 {
3877 case 0xb: // look for 4-byte nop
3878 if (*((unsigned *) (cur->pc + 1)) == 0x00401f0f)
3879 found_nop = 1;
3880 boundary = (unsigned long long *) (cur->pc + 5);
3881 break;
3882 case 0xe: // look for 1-byte nop
3883 if (cur->pc[1] == 0x90)
3884 found_nop = 1;
3885 boundary = (unsigned long long *) (cur->pc + 2);
3886 break;
3887 default:
3888 break;
3889 }
3890
3891 // if nop is found, check what's at the boundary
3892 if (found_nop && *boundary == 0x000000000a00)
3893 {
3894 DELETE_CURCTX ();
3895 break;
3896 }
3897 }
3898
3899 DprintfT (SP_DUMP_UNWIND, "unwind.c: probably PLT or tail call or switch table: %p\n",
3900 cur->pc - 1);
3901 if (num_jmp_reg < expected_num_jmp_reg)
3902 {
3903 if (jmp_reg_ctx[num_jmp_reg] == NULL)
3904 jmp_reg_ctx[num_jmp_reg] = (struct AdvWalkContext *) alloca (sizeof (*cur));
3905 if (jmp_reg_ctx[num_jmp_reg] != NULL)
3906 __collector_memcpy (jmp_reg_ctx[num_jmp_reg], cur, sizeof (*cur));
3907 }
3908 if (num_jmp_reg < expected_num_jmp_reg ||
3909 (num_jmp_reg >= expected_num_jmp_reg &&
3910 jmp_reg_ctx[expected_num_jmp_reg - 1] != NULL &&
3911 cur->pc != jmp_reg_ctx[expected_num_jmp_reg - 1]->pc))
3912 {
3913 num_jmp_reg++;
3914 total_num_jmp_reg++;
3915 }
3916 if (jmp_reg_switch_mode == 1 && total_num_jmp_reg >= max_num_jmp_reg_seen)
3917 {
3918 int rc = process_return_real (wctx, cur, 0);
3919 if (rc == RA_SUCCESS)
3920 {
3921 if (save_ctx)
3922 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3923 return rc;
3924 }
3925 }
3926 }
3927 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xff.\n", __LINE__);
3928 DELETE_CURCTX ();
3929 break;
3930 case 0x5: /* jmpf Ep */
3931 cur->pc = check_modrm (cur->pc); /* XXXX */
3932 break;
3933 case 0x6: /* push Ev */
3934 cur->pc = check_modrm (cur->pc);
3935 cur->sp -= 1;
3936 break;
3937 case 0x7:
3938 cur->pc = check_modrm (cur->pc); /* XXXX */
3939 if (jmp_reg_switch_mode == 1)
3940 {
3941 int rc = process_return_real (wctx, cur, 0);
3942 if (rc == RA_SUCCESS)
3943 {
3944 if (save_ctx)
3945 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3946 return rc;
3947 }
3948 }
3949 break;
3950 default:
3951 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xff %x\n",
3952 __LINE__, (int) extop);
3953 DELETE_CURCTX ();
3954 break;
3955 }
3956 break;
3957 default:
3958 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x%x\n",
3959 __LINE__, (int) opcode);
3960 DELETE_CURCTX ();
3961 break;
3962 }
3963
3964 /* switch to next context */
3965 if (++cur >= buf + nctx)
3966 cur = buf;
3967 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d switch context: cur=0x%lx(%ld) nctx=%d cnt=%d\n",
3968 __LINE__, (unsigned long) cur, (long) (cur - buf), (int) nctx, (int) cnt);
3969 }
3970
3971 checkFP:
3972 Tprintf (DBG_LT3, "find_i386_ret_addr:%d checkFP: wctx=0x%lx fp=0x%lx ln=0x%lx pc=0x%lx sbase=0x%lx sp=0x%lx tbgn=0x%lx tend=0x%lx\n",
3973 __LINE__, (unsigned long) wctx, (unsigned long) wctx->fp,
3974 (unsigned long) wctx->ln, (unsigned long) wctx->pc, (unsigned long) wctx->sbase,
3975 (unsigned long) wctx->sp, (unsigned long) wctx->tbgn, (unsigned long) wctx->tend);
3976
3977 if (jmp_reg_switch_mode == 1)
3978 { // not deal with switch cases not ending with ret
3979 if (jmp_reg_switch_backup_ctx != NULL)
3980 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
3981 DprintfT (SP_DUMP_UNWIND, "stack_unwind jmp reg mode on: pc = 0x%lx cnt = %d, nctx = %d\n", wctx->pc, cnt, nctx);
3982 }
3983
3984 unsigned long *cur_fp = cur->fp;
3985 unsigned long *cur_sp = cur->sp;
3986 if (do_walk == 0)
3987 __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
3988
3989 /* Resort to the frame pointer */
3990 if (cur->fp_loc)
3991 cur->fp = cur->fp_sav;
3992 cur->sp = cur->fp;
3993 if ((unsigned long) cur->sp >= wctx->sbase ||
3994 (unsigned long) cur->sp < wctx->sp)
3995 {
3996 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d do_walk=%d cur->sp=0x%p out of range. wctx->sbase=0x%lx wctx->sp=0x%lx wctx->pc=0x%lx\n",
3997 __LINE__, (int) do_walk, cur->sp, (unsigned long) wctx->sbase,
3998 (unsigned long) wctx->sp, (unsigned long) wctx->pc);
3999 if (do_walk == 0)
4000 {
4001 cur->sp = cur_sp;
4002 cur->fp = cur_fp;
4003 do_walk = 1;
4004 save_ctx = 1;
4005 goto startWalk;
4006 }
4007 if (save_ctx)
4008 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4009 return RA_FAILURE;
4010 }
4011
4012 unsigned long fp = *cur->sp++;
4013 if (fp <= (unsigned long) cur->sp || fp >= wctx->sbase)
4014 {
4015 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d fp=0x%016llx out of range. cur->sp=%p wctx->sbase=0x%lx wctx->pc=0x%lx\n",
4016 __LINE__, (unsigned long long) fp, cur->sp,
4017 (unsigned long) wctx->sbase, (unsigned long) wctx->pc);
4018 if (do_walk == 0)
4019 {
4020 cur->sp = cur_sp;
4021 cur->fp = cur_fp;
4022 do_walk = 1;
4023 save_ctx = 1;
4024 goto startWalk;
4025 }
4026 if (save_ctx)
4027 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4028 return RA_FAILURE;
4029 }
4030
4031 unsigned long ra = *cur->sp++;
4032 if (ra == 0)
4033 {
4034 cache_put (wctx, RA_EOSTCK);
4035 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK wctx->pc = 0x%lx\n", __LINE__, wctx->pc);
4036 if (save_ctx)
4037 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
4038 return RA_END_OF_STACK;
4039 }
4040
4041 unsigned long tbgn = wctx->tbgn;
4042 unsigned long tend = wctx->tend;
4043 if (ra < tbgn || ra >= tend)
4044 {
4045 // We do not know yet if update_map_segments is really needed
4046 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
4047 {
4048 DprintfT (SP_DUMP_UNWIND, "unwind.c: __collector_check_segment fail. wctx->pc = 0x%lx\n", wctx->pc);
4049 if (do_walk == 0)
4050 {
4051 cur->sp = cur_sp;
4052 cur->fp = cur_fp;
4053 do_walk = 1;
4054 save_ctx = 1;
4055 goto startWalk;
4056 }
4057 if (save_ctx)
4058 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4059 return RA_FAILURE;
4060 }
4061 }
4062
4063 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
4064 if (npc == 0)
4065 {
4066 DprintfT (SP_DUMP_UNWIND, "unwind.c: adjust_ret_addr fail. wctx->pc = 0x%lx\n", wctx->pc);
4067 if (do_walk == 0)
4068 {
4069 cur->sp = cur_sp;
4070 cur->fp = cur_fp;
4071 do_walk = 1;
4072 save_ctx = 1;
4073 goto startWalk;
4074 }
4075 if (save_ctx)
4076 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4077 return RA_FAILURE;
4078 }
4079 wctx->pc = npc;
4080 wctx->sp = (unsigned long) cur->sp;
4081 wctx->fp = fp;
4082 wctx->tbgn = tbgn;
4083 wctx->tend = tend;
4084
4085 if (save_ctx)
4086 {
4087 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SUCCESS);
4088 DprintfT (SP_DUMP_UNWIND, "unwind.c: cache walk context. wctx_pc_save->pc = 0x%lx\n", wctx_pc_save.pc);
4089 }
4090 return RA_SUCCESS;
4091 }
4092
4093 /*
4094 * We have the return address, but we would like to report to the user
4095 * the calling PC, which is the instruction immediately preceding the
4096 * return address. Unfortunately, x86 instructions can have variable
4097 * length. So we back up 8 bytes and try to figure out where the
4098 * calling PC starts. (FWIW, call instructions are often 5-bytes long.)
4099 */
4100 unsigned long
adjust_ret_addr(unsigned long ra,unsigned long segoff,unsigned long tend)4101 adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend)
4102 {
4103 unsigned long npc = 0;
4104 int i = segoff < 8 ? segoff : 8;
4105 for (; i > 1; i--)
4106 {
4107 unsigned char *ptr = (unsigned char*) ra - i;
4108 int z = 4;
4109 int a = 4;
4110 int done = 0;
4111 int bVal;
4112 while (!done)
4113 {
4114 bVal = getByteInstruction (ptr);
4115 if (bVal < 0)
4116 return 0;
4117 switch (bVal)
4118 {
4119 case 0x26:
4120 case 0x36:
4121 #if WSIZE(64)
4122 ptr += 1;
4123 break;
4124 #endif
4125 case 0x64:
4126 case 0x65:
4127 bVal = getByteInstruction (ptr + 1);
4128 if (bVal < 0)
4129 return 0;
4130 if (bVal == 0xe8)
4131 // a workaround for bug 16193041, assuming "call Jz" has no segment override prefix
4132 done = 1;
4133 else
4134 ptr += 1;
4135 break;
4136 case 0x66:
4137 z = 2;
4138 ptr += 1;
4139 break;
4140 case 0x67:
4141 a = 2;
4142 ptr += 1;
4143 break;
4144 default:
4145 done = 1;
4146 break;
4147 }
4148 }
4149 #if WSIZE(64)
4150 bVal = getByteInstruction (ptr);
4151 if (bVal < 0)
4152 return 0;
4153 if (bVal >= 0x40 && bVal <= 0x4f)
4154 { /* XXXX not all REX codes applicable */
4155 if (bVal & 0x8)
4156 z = 4;
4157 ptr += 1;
4158 }
4159 #endif
4160 int opcode = getByteInstruction (ptr);
4161 if (opcode < 0)
4162 return 0;
4163 ptr++;
4164 switch (opcode)
4165 {
4166 case 0xe8: /* call Jz (f64) */
4167 ptr += z;
4168 break;
4169 case 0x9a: /* callf Ap */
4170 ptr += 2 + a;
4171 break;
4172 case 0xff: /* calln Ev , callf Ep */
4173 {
4174 int extop = MRM_EXT (*ptr);
4175 if (extop == 2 || extop == 3)
4176 ptr = check_modrm (ptr);
4177 }
4178 break;
4179 default:
4180 continue;
4181 }
4182 if ((unsigned long) ptr == ra)
4183 {
4184 npc = ra - i;
4185 break;
4186 }
4187 }
4188 if (npc == 0)
4189 {
4190 unsigned char * ptr = (unsigned char *) ra;
4191 #if WSIZE(32)
4192 // test __kernel_sigreturn or __kernel_rt_sigreturn
4193 if ((ra + 7 < tend && getByteInstruction (ptr) == 0x58
4194 && getByteInstruction (ptr + 1) == 0xb8
4195 && getByteInstruction (ptr + 6) == 0xcd
4196 && getByteInstruction (ptr + 7) == 0x80) /* pop %eax; mov $NNNN, %eax; int */
4197 || (ra + 7 < tend && getByteInstruction (ptr) == 0x58
4198 && getByteInstruction (ptr + 1) == 0xb8
4199 && getByteInstruction (ptr + 6) == 0x0f
4200 && getByteInstruction (ptr + 7) == 0x05) /* pop %eax; mov $NNNN, %eax; syscall */
4201 || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4202 && getByteInstruction (ptr + 5) == 0xcd
4203 && getByteInstruction (ptr + 6) == 0x80) /* mov $NNNN, %eax; int */
4204 || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4205 && getByteInstruction (ptr + 5) == 0x0f
4206 && getByteInstruction (ptr + 6) == 0x05)) /* mov $NNNN, %eax; syscall */
4207 #else //WSIZE(64)
4208 // test __restore_rt
4209 if (ra + 8 < tend && getByteInstruction (ptr) == 0x48
4210 && getByteInstruction (ptr + 7) == 0x0f
4211 && getByteInstruction (ptr + 8) == 0x05) /* mov $NNNNNNNN, %rax; syscall */
4212 #endif
4213 {
4214 npc = ra;
4215 }
4216 }
4217 if (npc == 0 && __collector_java_mode
4218 && __collector_java_asyncgetcalltrace_loaded)
4219 { // detect jvm interpreter code for java user threads
4220 unsigned char * ptr = (unsigned char *) ra;
4221 #if WSIZE(32)
4222 // up to J170
4223 /*
4224 * ff 24 9d e0 64 02 f5 jmp *-0xafd9b20(,%ebx,4)
4225 * 8b 4e 01 movl 1(%esi),%ecx
4226 * f7 d1 notl %ecx
4227 * 8b 5d ec movl -0x14(%ebp),%ebx
4228 * c1 e1 02 shll $2,%ecx
4229 * eb d8 jmp .-0x26 [ 0x92a ]
4230 * 83 ec 08 subl $8,%esp || 8b 65 f8 movl -8(%ebp),%esp
4231 * */
4232 if (ra - 20 >= (ra - segoff) && ((*ptr == 0x83 && *(ptr + 1) == 0xec) || (*ptr == 0x8b && *(ptr + 1) == 0x65))
4233 && *(ptr - 2) == 0xeb
4234 && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4235 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4236 && *(ptr - 10) == 0xf7 && *(ptr - 9) == 0xd1
4237 && *(ptr - 13) == 0x8b && *(ptr - 12) == 0x4e
4238 && *(ptr - 20) == 0xff && *(ptr - 19) == 0x24 && *(ptr - 18) == 0x9d)
4239 {
4240 npc = ra - 20;
4241 }
4242 // J180 J190
4243 // ff 24 9d ** ** ** ** jmp *-0x*******(,%ebx,4)
4244 if (npc == 0
4245 && ra - 7 >= (ra - segoff)
4246 && *(ptr - 7) == 0xff
4247 && *(ptr - 6) == 0x24
4248 && *(ptr - 5) == 0x9d)
4249 {
4250 npc = ra - 7;
4251 }
4252 #else //WSIZE(64)
4253 // up to J170
4254 /*
4255 * 41 ff 24 da jmp *(%r10,%rbx,8)
4256 * 41 8b 4d 01 movl 1(%r13),%ecx
4257 * f7 d1 notl %ecx
4258 * 48 8b 5d d8 movq -0x28(%rbp),%rbx
4259 * c1 e1 02 shll $2,%ecx
4260 * eb cc jmp .-0x32 [ 0xd23 ]
4261 * 48 8b 65 f0 movq -0x10(%rbp),%rsp
4262 */
4263 if (ra - 19 >= (ra - segoff) && *ptr == 0x48 && ((*(ptr + 1) == 0x8b && *(ptr + 2) == 0x65) || (*(ptr + 1) == 0x83 && *(ptr + 2) == 0xec))
4264 && *(ptr - 2) == 0xeb
4265 && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4266 && *(ptr - 9) == 0x48 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4267 && *(ptr - 11) == 0xf7 && *(ptr - 10) == 0xd1
4268 && *(ptr - 15) == 0x41 && *(ptr - 14) == 0x8b && *(ptr - 13) == 0x4d
4269 && *(ptr - 19) == 0x41 && *(ptr - 18) == 0xff)
4270 npc = ra - 19;
4271 // J180 J190
4272 // 41 ff 24 da jmp *(%r10,%rbx,8)
4273 if (npc == 0
4274 && ra - 4 >= (ra - segoff)
4275 && *(ptr - 4) == 0x41
4276 && *(ptr - 3) == 0xff
4277 && *(ptr - 2) == 0x24
4278 && *(ptr - 1) == 0xda)
4279 npc = ra - 4;
4280 #endif
4281 }
4282
4283 return npc;
4284 }
4285
4286 /*
4287 * Parses AVX instruction and returns its length.
4288 * Returns 0 if parsing failed.
4289 * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
4290 */
4291 static int
parse_x86_AVX_instruction(unsigned char * pc)4292 parse_x86_AVX_instruction (unsigned char *pc)
4293 {
4294 /*
4295 * VEX prefix has a two-byte form (0xc5) and a three byte form (0xc4).
4296 * If an instruction syntax can be encoded using the two-byte form,
4297 * it can also be encoded using the three byte form of VEX.
4298 * The latter increases the length of the instruction by one byte.
4299 * This may be helpful in some situations for code alignment.
4300 *
4301 Byte 0 Byte 1 Byte 2 Byte 3
4302 (Bit Position) 7 0 7 6 5 4 0 7 6 3 2 10
4303 3-byte VEX [ 11000100 ] [ R X B | m-mmmm ] [ W | vvvv | L | pp ]
4304 7 0 7 6 3 2 10
4305 2-byte VEX [ 11000101 ] [ R | vvvv | L | pp ]
4306 7 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
4307 4-byte EVEX [ 01100010 ] [ R X B R1 0 0 m m ] [ W v v v v 1 p p ] [ z L1 L B1 V1 a a a ]
4308
4309 R: REX.R in 1's complement (inverted) form
4310 0: Same as REX.R=1 (64-bit mode only)
4311 1: Same as REX.R=0 (must be 1 in 32-bit mode)
4312
4313 X: REX.X in 1's complement (inverted) form
4314 0: Same as REX.X=1 (64-bit mode only)
4315 1: Same as REX.X=0 (must be 1 in 32-bit mode)
4316
4317 B: REX.B in 1's complement (inverted) form
4318 0: Same as REX.B=1 (64-bit mode only)
4319 1: Same as REX.B=0 (Ignored in 32-bit mode).
4320
4321 W: opcode specific (use like REX.W, or used for opcode
4322 extension, or ignored, depending on the opcode byte)
4323
4324 m-mmmm:
4325 00000: Reserved for future use (will #UD)
4326 00001: implied 0F leading opcode byte
4327 00010: implied 0F 38 leading opcode bytes
4328 00011: implied 0F 3A leading opcode bytes
4329 00100-11111: Reserved for future use (will #UD)
4330
4331 vvvv: a register specifier (in 1's complement form) or 1111 if unused.
4332
4333 L: Vector Length
4334 0: scalar or 128-bit vector
4335 1: 256-bit vector
4336
4337 pp: opcode extension providing equivalent functionality of a SIMD prefix
4338 00: None
4339 01: 66
4340 10: F3
4341 11: F2
4342 *
4343 * Example: 0xc5f877L vzeroupper
4344 * VEX prefix: 0xc5 0x77
4345 * Opcode: 0xf8
4346 *
4347 */
4348 int len = 0;
4349 disassemble_info dis_info;
4350 dis_info.arch = bfd_arch_i386;
4351 dis_info.mach = bfd_mach_x86_64;
4352 dis_info.flavour = bfd_target_unknown_flavour;
4353 dis_info.endian = BFD_ENDIAN_UNKNOWN;
4354 dis_info.endian_code = dis_info.endian;
4355 dis_info.octets_per_byte = 1;
4356 dis_info.disassembler_needs_relocs = FALSE;
4357 dis_info.fprintf_func = fprintf_func;
4358 dis_info.fprintf_styled_func = fprintf_styled_func;
4359 dis_info.stream = NULL;
4360 dis_info.disassembler_options = NULL;
4361 dis_info.read_memory_func = read_memory_func;
4362 dis_info.memory_error_func = memory_error_func;
4363 dis_info.print_address_func = print_address_func;
4364 dis_info.symbol_at_address_func = symbol_at_address_func;
4365 dis_info.symbol_is_valid = symbol_is_valid;
4366 dis_info.display_endian = BFD_ENDIAN_UNKNOWN;
4367 dis_info.symtab = NULL;
4368 dis_info.symtab_size = 0;
4369 dis_info.buffer_vma = 0;
4370 dis_info.buffer = pc;
4371 dis_info.buffer_length = 8;
4372
4373 disassembler_ftype disassemble = print_insn_i386;
4374 if (disassemble == NULL)
4375 {
4376 DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction ERROR: unsupported disassemble\n");
4377 return 0;
4378 }
4379 len = disassemble (0, &dis_info);
4380 DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction: returned %d pc: %p\n", len, pc);
4381 return len;
4382 }
4383
4384 /*
4385 * In the Intel world, a stack frame looks like this:
4386 *
4387 * %fp0->| |
4388 * |-------------------------------|
4389 * | Args to next subroutine |
4390 * |-------------------------------|-\
4391 * %sp0->| One word struct-ret address | |
4392 * |-------------------------------| > minimum stack frame (8 bytes)
4393 * | Previous frame pointer (%fp0)| |
4394 * %fp1->|-------------------------------|-/
4395 * | Local variables |
4396 * %sp1->|-------------------------------|
4397 *
4398 */
4399
4400 int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)4401 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4402 {
4403 long *lbuf = (long*) buf;
4404 int lsize = size / sizeof (long);
4405 int ind = 0;
4406 int do_walk = 1;
4407 int extra_frame = 0;
4408 if (mode & FRINFO_NO_WALK)
4409 do_walk = 0;
4410 if ((mode & 0xffff) == FRINFO_FROM_STACK)
4411 extra_frame = 1;
4412
4413 /*
4414 * trace the stack frames from user stack.
4415 * We are assuming that the frame pointer and return address
4416 * are null when we are at the top level.
4417 */
4418 struct WalkContext wctx;
4419 wctx.pc = GET_PC (context);
4420 wctx.sp = GET_SP (context);
4421 wctx.fp = GET_FP (context);
4422 wctx.ln = (unsigned long) context->uc_link;
4423 unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4424 if (sbase && *sbase > wctx.sp)
4425 wctx.sbase = *sbase;
4426 else
4427 {
4428 wctx.sbase = wctx.sp + 0x100000;
4429 if (wctx.sbase < wctx.sp) /* overflow */
4430 wctx.sbase = (unsigned long) - 1;
4431 }
4432 // We do not know yet if update_map_segments is really needed
4433 __collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0);
4434
4435 for (;;)
4436 {
4437 if (ind >= lsize || wctx.pc == 0)
4438 break;
4439 if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4440 {
4441 lbuf[0] = wctx.pc;
4442 if (ind == 0)
4443 {
4444 ind++;
4445 if (ind >= lsize)
4446 break;
4447 }
4448 }
4449 if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4450 {
4451 lbuf[ind++] = wctx.pc;
4452 if (ind >= lsize)
4453 break;
4454 }
4455
4456 for (;;)
4457 {
4458 if (eptr != NULL && wctx.sp >= (unsigned long) eptr)
4459 {
4460 ind = ind >= 2 ? ind - 2 : 0;
4461 goto exit;
4462 }
4463 int ret = find_i386_ret_addr (&wctx, do_walk);
4464 DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d find_i386_ret_addr returns %d\n", __LINE__, ret);
4465 if (ret == RA_FAILURE)
4466 {
4467 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4468 goto exit;
4469 }
4470
4471 if (ret == RA_END_OF_STACK)
4472 goto exit;
4473 #if WSIZE(32)
4474 if (ret == RA_RT_SIGRETURN)
4475 {
4476 struct SigFrame
4477 {
4478 unsigned long arg0;
4479 unsigned long arg1;
4480 unsigned long arg2;
4481 } *sframe = (struct SigFrame*) wctx.sp;
4482 ucontext_t *ncontext = (ucontext_t*) sframe->arg2;
4483 wctx.pc = GET_PC (ncontext);
4484 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4485 {
4486 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4487 goto exit;
4488 }
4489 unsigned long nsp = GET_SP (ncontext);
4490 /* Check the new stack pointer */
4491 if (nsp <= sframe->arg2 || nsp > sframe->arg2 + sizeof (ucontext_t) + 1024)
4492 {
4493 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4494 goto exit;
4495 }
4496 wctx.sp = nsp;
4497 wctx.fp = GET_FP (ncontext);
4498 break;
4499 }
4500 else if (ret == RA_SIGRETURN)
4501 {
4502 struct sigcontext *sctx = (struct sigcontext*) wctx.sp;
4503 wctx.pc = sctx->eip;
4504 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4505 {
4506 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4507 goto exit;
4508 }
4509 wctx.sp = sctx->esp;
4510 wctx.fp = sctx->ebp;
4511 break;
4512 }
4513 #elif WSIZE(64)
4514 if (ret == RA_RT_SIGRETURN)
4515 {
4516 ucontext_t *ncontext = (ucontext_t*) wctx.sp;
4517 wctx.pc = GET_PC (ncontext);
4518 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4519 {
4520 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4521 goto exit;
4522 }
4523 unsigned long nsp = GET_SP (ncontext);
4524 /* Check the new stack pointer */
4525 if (nsp <= wctx.sp || nsp > wctx.sp + sizeof (ucontext_t) + 1024)
4526 {
4527 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4528 goto exit;
4529 }
4530 wctx.sp = nsp;
4531 wctx.fp = GET_FP (ncontext);
4532 break;
4533 }
4534 #endif /* WSIZE() */
4535 if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4536 {
4537 lbuf[0] = wctx.pc;
4538 if (ind == 0)
4539 {
4540 ind++;
4541 if (ind >= lsize)
4542 break;
4543 }
4544 }
4545 if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4546 {
4547 lbuf[ind++] = wctx.pc;
4548 if (ind >= lsize)
4549 goto exit;
4550 }
4551 }
4552 }
4553
4554 exit:
4555 #if defined(DEBUG)
4556 if ((SP_DUMP_UNWIND & __collector_tracelevel) != 0)
4557 {
4558 DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d found %d frames\n\n", __LINE__, ind);
4559 for (int i = 0; i < ind; i++)
4560 DprintfT (SP_DUMP_UNWIND, " %3d: 0x%lx\n", i, (unsigned long) lbuf[i]);
4561 }
4562 #endif
4563 dump_stack (__LINE__);
4564 if (ind >= lsize)
4565 {
4566 ind = lsize - 1;
4567 lbuf[ind++] = (unsigned long) SP_TRUNC_STACK_MARKER;
4568 }
4569 return ind * sizeof (long);
4570 }
4571
4572 #elif ARCH(Aarch64)
4573
4574 static int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)4575 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4576 {
4577 if (buf && bptr && eptr && context && size + mode > 0)
4578 getByteInstruction ((unsigned char *) eptr);
4579 int ind = 0;
4580 __u64 *lbuf = (void *) buf;
4581 int lsize = size / sizeof (__u64);
4582 __u64 pc = context->uc_mcontext.pc;
4583 __u64 sp = context->uc_mcontext.sp;
4584 __u64 stack_base;
4585 unsigned long tbgn = 0;
4586 unsigned long tend = 0;
4587
4588 unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4589 if (sbase && *sbase > sp)
4590 stack_base = *sbase;
4591 else
4592 {
4593 stack_base = sp + 0x100000;
4594 if (stack_base < sp) // overflow
4595 stack_base = (__u64) -1;
4596 }
4597 DprintfT (SP_DUMP_UNWIND,
4598 "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx stack_base=0x%llx\n",
4599 __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp,
4600 (unsigned long long) stack_base);
4601
4602 while (sp && pc)
4603 {
4604 DprintfT (SP_DUMP_UNWIND,
4605 "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx\n",
4606 __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp);
4607 // Dl_info dlinfo;
4608 // if (!dladdr ((void *) pc, &dlinfo))
4609 // break;
4610 // DprintfT (SP_DUMP_UNWIND, "%2d: %llx <%s+%llu> (%s)\n",
4611 // ind, (unsigned long long) pc,
4612 // dlinfo.dli_sname ? dlinfo.dli_sname : "(?)",
4613 // (unsigned long long) pc - (unsigned long long) dlinfo.dli_saddr,
4614 // dlinfo.dli_fname);
4615 lbuf[ind++] = pc;
4616 if (ind >= lsize || sp >= stack_base || (sp & 15) != 0)
4617 break;
4618 if (pc < tbgn || pc >= tend)
4619 if (!__collector_check_segment ((unsigned long) pc, &tbgn, &tend, 0))
4620 {
4621 DprintfT (SP_DUMP_UNWIND,
4622 "unwind.c:%d __collector_check_segment failed. sp=0x%lx\n",
4623 __LINE__, (unsigned long) sp);
4624 break;
4625 }
4626 pc = ((__u64 *) sp)[1];
4627 __u64 old_sp = sp;
4628 sp = ((__u64 *) sp)[0];
4629 if (sp < old_sp)
4630 break;
4631 }
4632 if (ind >= lsize)
4633 {
4634 ind = lsize - 1;
4635 lbuf[ind++] = (__u64) SP_TRUNC_STACK_MARKER;
4636 }
4637 return ind * sizeof (__u64);
4638 }
4639 #endif /* ARCH() */
4640