1 /* Copyright (C) 2021-2024 Free Software Foundation, Inc.
2 Contributed by Oracle.
3
4 This file is part of GNU Binutils.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
20
21 #include "config.h"
22 #include <alloca.h>
23 #include <dlfcn.h>
24 #include <stdlib.h>
25 #include <signal.h>
26 #include <unistd.h>
27 #include <pthread.h>
28
29 #include "gp-defs.h"
30 #include "collector.h"
31 #include "gp-experiment.h"
32 #include "memmgr.h"
33 #include "tsd.h"
34
35 /* Get dynamic module interface*/
36 #include "collector_module.h"
37
38 /* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */
39 #include "data_pckts.h"
40
41 #if ARCH(SPARC)
42 struct frame
43 {
44 long fr_local[8]; /* saved locals */
45 long fr_arg[6]; /* saved arguments [0 - 5] */
46 struct frame *fr_savfp; /* saved frame pointer */
47 long fr_savpc; /* saved program counter */
48 #if WSIZE(32)
49 char *fr_stret; /* struct return addr */
50 #endif
51 long fr_argd[6]; /* arg dump area */
52 long fr_argx[1]; /* array of args past the sixth */
53 };
54
55 #elif ARCH(Intel)
56 struct frame
57 {
58 unsigned long fr_savfp;
59 unsigned long fr_savpc;
60 };
61 #endif
62
63 /* Set the debug trace level */
64 #define DBG_LT0 0
65 #define DBG_LT1 1
66 #define DBG_LT2 2
67 #define DBG_LT3 3
68
69 int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL;
70 #define VM_NO_ACCESS (-1)
71 #define VM_NOT_VM_MEMORY (-2)
72 #define VM_NOT_X_SEGMENT (-3)
73
74 #define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end))
75
76 /*
77 * Weed through all the arch dependent stuff to get the right definition
78 * for 'pc' in the ucontext structure. The system header files are mess
79 * dealing with all the arch (just look for PC, R_PC, REG_PC).
80 *
81 */
82
83 #if ARCH(SPARC)
84
85 #define IN_BARRIER(x) \
86 ( barrier_hdl && \
87 (unsigned long)x >= barrier_hdl && \
88 (unsigned long)x < barrier_hdlx )
89 static unsigned long barrier_hdl = 0;
90 static unsigned long barrier_hdlx = 0;
91
92 #if WSIZE(64)
93 #define STACK_BIAS 2047
94 #define IN_TRAP_HANDLER(x) \
95 ( misalign_hdl && \
96 (unsigned long)x >= misalign_hdl && \
97 (unsigned long)x < misalign_hdlx )
98 static unsigned long misalign_hdl = 0;
99 static unsigned long misalign_hdlx = 0;
100 #elif WSIZE(32)
101 #define STACK_BIAS 0
102 #endif
103
104 #if WSIZE(64)
105 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)])
106 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6])
107 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC])
108 #else
109 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)])
110 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6])
111 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC])
112 #endif
113
114 #elif ARCH(Intel)
115 #include "opcodes/disassemble.h"
116
117 static int
fprintf_func(void * arg ATTRIBUTE_UNUSED,const char * fmt ATTRIBUTE_UNUSED,...)118 fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...)
119 {
120 return 0;
121 }
122
123 static int
fprintf_styled_func(void * arg ATTRIBUTE_UNUSED,enum disassembler_style st ATTRIBUTE_UNUSED,const char * fmt ATTRIBUTE_UNUSED,...)124 fprintf_styled_func (void *arg ATTRIBUTE_UNUSED,
125 enum disassembler_style st ATTRIBUTE_UNUSED,
126 const char *fmt ATTRIBUTE_UNUSED, ...)
127 {
128 return 0;
129 }
130
131 /* Get LENGTH bytes from info's buffer, at target address memaddr.
132 Transfer them to myaddr. */
133 static int
read_memory_func(bfd_vma memaddr,bfd_byte * myaddr,unsigned int length,disassemble_info * info)134 read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
135 disassemble_info *info)
136 {
137 unsigned int opb = info->octets_per_byte;
138 size_t end_addr_offset = length / opb;
139 size_t max_addr_offset = info->buffer_length / opb;
140 size_t octets = (memaddr - info->buffer_vma) * opb;
141 if (memaddr < info->buffer_vma
142 || memaddr - info->buffer_vma > max_addr_offset
143 || memaddr - info->buffer_vma + end_addr_offset > max_addr_offset
144 || (info->stop_vma && (memaddr >= info->stop_vma
145 || memaddr + end_addr_offset > info->stop_vma)))
146 return -1;
147 memcpy (myaddr, info->buffer + octets, length);
148 return 0;
149 }
150
151 static void
print_address_func(bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)152 print_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
153 disassemble_info *info ATTRIBUTE_UNUSED) { }
154
155 static asymbol *
symbol_at_address_func(bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)156 symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
157 disassemble_info *info ATTRIBUTE_UNUSED)
158 {
159 return NULL;
160 }
161
162 static bfd_boolean
symbol_is_valid(asymbol * sym ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)163 symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED,
164 disassemble_info *info ATTRIBUTE_UNUSED)
165 {
166 return TRUE;
167 }
168
169 static void
memory_error_func(int status ATTRIBUTE_UNUSED,bfd_vma addr ATTRIBUTE_UNUSED,disassemble_info * info ATTRIBUTE_UNUSED)170 memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED,
171 disassemble_info *info ATTRIBUTE_UNUSED) { }
172
173
174 #if WSIZE(32)
175 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP])
176 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP])
177 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP])
178
179 #elif WSIZE(64)
180 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP])
181 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP])
182 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP])
183 #endif /* WSIZE() */
184
185 #elif ARCH(Aarch64)
186 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15])
187 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13])
188 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14])
189 #endif /* ARCH() */
190
191 /*
192 * FILL_CONTEXT() for all platforms
193 * Could use getcontext() except:
194 * - it's not guaranteed to be async signal safe
195 * - it's a system call and not that lightweight
196 * - it's not portable as of POSIX.1-2008
197 * So we just use low-level mechanisms to fill in the few fields we need.
198 */
199 #if ARCH(SPARC)
200 #if WSIZE(32)
201 #define FILL_CONTEXT(context) \
202 { \
203 greg_t fp; \
204 __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
205 __asm__ __volatile__( "ta 3" ); \
206 GET_SP(context) = fp; \
207 GET_PC(context) = (greg_t)0; \
208 }
209
210 #elif WSIZE(64)
211 #define FILL_CONTEXT(context) \
212 { \
213 greg_t fp; \
214 __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
215 __asm__ __volatile__( "flushw" ); \
216 GET_SP(context) = fp; \
217 GET_PC(context) = (greg_t)0; \
218 }
219 #endif /* WSIZE() */
220
221 #elif ARCH(Intel)
222 #define FILL_CONTEXT(context) \
223 { \
224 context->uc_link = NULL; \
225 void *sp = __collector_getsp(); \
226 GET_SP(context) = (intptr_t)sp; \
227 GET_FP(context) = (intptr_t)__collector_getfp(); \
228 GET_PC(context) = (intptr_t)__collector_getpc(); \
229 context->uc_stack.ss_sp = sp; \
230 context->uc_stack.ss_size = 0x100000; \
231 }
232
233 #elif ARCH(Aarch64)
234 #if defined(__MUSL_LIBC)
235 typedef uint64_t __u64;
236 #endif
237
238 #define FILL_CONTEXT(context) \
239 { CALL_UTIL (getcontext) (context); \
240 context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \
241 }
242
243 #endif /* ARCH() */
244
245 static int
getByteInstruction(unsigned char * p)246 getByteInstruction (unsigned char *p)
247 {
248 if (__collector_VM_ReadByteInstruction)
249 {
250 int v = __collector_VM_ReadByteInstruction (p);
251 if (v != VM_NOT_VM_MEMORY)
252 return v;
253 }
254 return *p;
255 }
256
257 struct DataHandle *dhndl = NULL;
258
259 static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY;
260
261 /* To support two OpenMP API's we use a pointer
262 * to the actual function.
263 */
264 int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL;
265 int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL;
266
267 #define DEFAULT_MAX_NFRAMES 256
268 static int max_native_nframes = DEFAULT_MAX_NFRAMES;
269 static int max_java_nframes = DEFAULT_MAX_NFRAMES;
270
271 #define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) )
272 #define JAVA_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) * 2 + 16 )
273 #define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) )
274
275 #define ROOT_UID 801425552975190205ULL
276 #define ROOT_UID_INV 92251691606677ULL
277 #define ROOT_IDX 13907816567264074199ULL
278 #define ROOT_IDX_INV 2075111ULL
279 #define UIDTableSize 1048576
280 static volatile uint64_t *UIDTable = NULL;
281 static volatile int seen_omp = 0;
282
283 static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode);
284 static FrameInfo compute_uid (Frame_packet *frp);
285 static int omp_no_walk = 0;
286
287 #if ARCH(Intel)
288 #define ValTableSize 1048576
289 #define OmpValTableSize 65536
290 static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs
291 static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs
292 static struct WalkContext *OmpCurCtxs = NULL;
293 static struct WalkContext *OmpCtxs = NULL;
294 static uint32_t *OmpVals = NULL;
295 static unsigned long *OmpRAs = NULL;
296 static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend);
297 static int parse_x86_AVX_instruction (unsigned char *pc);
298
299 struct WalkContext
300 {
301 unsigned long pc;
302 unsigned long sp;
303 unsigned long fp;
304 unsigned long ln;
305 unsigned long sbase; /* stack boundary */
306 unsigned long tbgn; /* current memory segment start */
307 unsigned long tend; /* current memory segment end */
308 };
309 #endif
310
311 #if defined(DEBUG) && ARCH(Intel)
312 #include <execinfo.h>
313
314 static void
dump_stack(int nline)315 dump_stack (int nline)
316 {
317 if ((__collector_tracelevel & SP_DUMP_STACK) == 0)
318 return;
319
320 enum Constexpr { MAX_SIZE = 1024 };
321 void *array[MAX_SIZE];
322 size_t sz = backtrace (array, MAX_SIZE);
323 char **strings = backtrace_symbols (array, sz);
324 DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz);
325 for (int i = 0; i < sz; i++)
326 DprintfT (SP_DUMP_STACK, " %3d: %p %s\n", i, array[i],
327 strings[i] ? strings[i] : "???");
328 }
329
330 #define dump_targets(nline, ntrg, targets) \
331 if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \
332 for(int i = 0; i < ntrg; i++) \
333 DprintfT (SP_DUMP_UNWIND, " %2d: 0x%lx\n", i, (long) targets[i])
334 #else
335 #define dump_stack(x)
336 #define dump_targets(nline, ntrg, targets)
337 #endif
338
339 void
__collector_ext_unwind_key_init(int isPthread,void * stack)340 __collector_ext_unwind_key_init (int isPthread, void * stack)
341 {
342 void * ptr = __collector_tsd_get_by_key (unwind_key);
343 if (ptr == NULL)
344 {
345 TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n");
346 return;
347 }
348 if (isPthread)
349 {
350 size_t stack_size = 0;
351 void *stack_addr = 0;
352 pthread_t pthread = pthread_self ();
353 pthread_attr_t attr;
354 int err = pthread_getattr_np (pthread, &attr);
355 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err);
356 if (err == 0)
357 {
358 err = pthread_attr_getstack (&attr, &stack_addr, &stack_size);
359 if (err == 0)
360 stack_addr = (char*) stack_addr + stack_size;
361 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n",
362 (long) stack_size, stack_addr, err);
363 err = pthread_attr_destroy (&attr);
364 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err);
365 }
366 *(void**) ptr = stack_addr;
367 }
368 else
369 *(void**) ptr = stack; // cloned thread
370 }
371
372 void
__collector_ext_unwind_init(int record)373 __collector_ext_unwind_init (int record)
374 {
375 int sz = UIDTableSize * sizeof (*UIDTable);
376 UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1);
377 if (UIDTable == NULL)
378 {
379 __collector_terminate_expt ();
380 return;
381 }
382 CALL_UTIL (memset)((void*) UIDTable, 0, sz);
383
384 char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH");
385 if (str != NULL && *str != 0)
386 {
387 char *endptr;
388 int n = CALL_UTIL (strtol)(str, &endptr, 0);
389 if (endptr != str && n >= 0)
390 {
391 if (n < 5)
392 n = 5;
393 if (n > MAX_STACKDEPTH)
394 n = MAX_STACKDEPTH;
395 max_java_nframes = n;
396 }
397 }
398
399 str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH");
400 if (str != NULL && *str != 0)
401 {
402 char *endptr = str;
403 int n = CALL_UTIL (strtol)(str, &endptr, 0);
404 if (endptr != str && n >= 0)
405 {
406 if (n < 5)
407 n = 5;
408 if (n > MAX_STACKDEPTH)
409 n = MAX_STACKDEPTH;
410 max_native_nframes = n;
411 }
412 }
413
414 TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n",
415 max_native_nframes, max_java_nframes);
416 omp_no_walk = 1;
417
418 if (__collector_VM_ReadByteInstruction == NULL)
419 __collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction");
420
421 #if ARCH(SPARC)
422 #if WSIZE(64)
423 misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler");
424 misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end");
425 if (misalign_hdlx == 0)
426 misalign_hdlx = misalign_hdl + 292;
427 barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
428 barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
429 if (barrier_hdlx == 0)
430 barrier_hdl = 0;
431 #else
432 barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
433 barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
434 if (barrier_hdlx == 0)
435 barrier_hdl = 0;
436 #endif /* WSIZE() */
437
438 #elif ARCH(Intel)
439 sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP);
440 AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
441 sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK);
442 AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
443 if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL))
444 {
445 sz = OmpValTableSize * sizeof (*OmpCurCtxs);
446 OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
447 sz = OmpValTableSize * sizeof (*OmpCtxs);
448 OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
449 sz = OmpValTableSize * sizeof (*OmpVals);
450 OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
451 sz = OmpValTableSize * sizeof (*OmpRAs);
452 OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
453 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
454 {
455 TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n");
456 __collector_terminate_expt ();
457 return;
458 }
459 }
460 #endif /* ARCH() */
461
462 if (record)
463 {
464 dhndl = __collector_create_handle (SP_FRINFO_FILE);
465 __collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE);
466 }
467
468 unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL);
469 if (unwind_key == COLLECTOR_TSD_INVALID_KEY)
470 {
471 TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n");
472 __collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created</%s>\n",
473 SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT);
474 return;
475 }
476 TprintfT (0, "unwind_init() completed normally\n");
477 return;
478 }
479
480 void
__collector_ext_unwind_close()481 __collector_ext_unwind_close ()
482 {
483 __collector_delete_handle (dhndl);
484 dhndl = NULL;
485 }
486
487 void*
__collector_ext_return_address(unsigned level)488 __collector_ext_return_address (unsigned level)
489 {
490 if (NULL == UIDTable) //unwind not initialized yet
491 return NULL;
492 unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller
493 ucontext_t context;
494 FILL_CONTEXT ((&context));
495 char* buf = (char*) alloca (size);
496 if (buf == NULL)
497 {
498 TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size);
499 return NULL;
500 }
501 int sz = stack_unwind (buf, size, NULL, NULL, &context, 0);
502 if (sz < (level + 3) * sizeof (long))
503 {
504 TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz);
505 return NULL;
506 }
507 long *lbuf = (long*) buf;
508 TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]);
509 return (void *) (lbuf[level + 2]);
510 }
511 /*
512 * Collector interface method getFrameInfo
513 */
514 FrameInfo
__collector_get_frame_info(hrtime_t ts,int mode,void * arg)515 __collector_get_frame_info (hrtime_t ts, int mode, void *arg)
516 {
517 ucontext_t *context = NULL;
518 void *bptr = NULL;
519 CM_Array *array = NULL;
520
521 int unwind_mode = 0;
522 int do_walk = 1;
523
524 if (mode & FRINFO_NO_WALK)
525 do_walk = 0;
526 int bmode = mode & 0xffff;
527 int pseudo_context = 0;
528 if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK)
529 {
530 bptr = arg;
531 context = (ucontext_t*) alloca (sizeof (ucontext_t));
532 FILL_CONTEXT (context);
533 unwind_mode |= bmode;
534 }
535 else if (bmode == FRINFO_FROM_UC)
536 {
537 context = (ucontext_t*) arg;
538 if (context == NULL)
539 return (FrameInfo) 0;
540 if (GET_SP (context) == 0)
541 pseudo_context = 1;
542 }
543 else if (bmode == FRINFO_FROM_ARRAY)
544 {
545 array = (CM_Array*) arg;
546 if (array == NULL || array->length <= 0)
547 return (FrameInfo) 0;
548 }
549 else
550 return (FrameInfo) 0;
551
552 int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes);
553 if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
554 max_frame_size += JAVA_FRAME_BYTES (max_java_nframes);
555
556 Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size);
557 frpckt->type = FRAME_PCKT;
558 frpckt->hsize = sizeof (Frame_packet);
559
560 char *d = (char*) (frpckt + 1);
561 int size = max_frame_size;
562
563 #define MIN(a,b) ((a)<(b)?(a):(b))
564 #if defined(GPROFNG_JAVA_PROFILING)
565 /* get Java info */
566 if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
567 {
568 /* use only 2/3 of the buffer and leave the rest for the native stack */
569 int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes));
570 if (tmpsz > 0)
571 {
572 int sz = __collector_ext_jstack_unwind (d, tmpsz, context);
573 d += sz;
574 size -= sz;
575 }
576 }
577 #endif
578
579 /* get native stack */
580 if (context)
581 {
582 Stack_info *sinfo = (Stack_info*) d;
583 int sz = sizeof (Stack_info);
584 d += sz;
585 size -= sz;
586 #if ARCH(Intel)
587 if (omp_no_walk == 0)
588 do_walk = 1;
589 #endif
590 if (do_walk == 0)
591 unwind_mode |= FRINFO_NO_WALK;
592
593 int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes));
594 if (tmpsz > 0)
595 {
596 sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode);
597 d += sz;
598 size -= sz;
599 }
600 sinfo->kind = STACK_INFO;
601 sinfo->hsize = (d - (char*) sinfo);
602 }
603
604 /* create a stack image from user data */
605 if (array && array->length > 0)
606 {
607 Stack_info *sinfo = (Stack_info*) d;
608 int sz = sizeof (Stack_info);
609 d += sz;
610 size -= sz;
611 sz = array->length;
612 if (sz > size)
613 sz = size; // YXXX should we mark this with truncation frame?
614 __collector_memcpy (d, array->bytes, sz);
615 d += sz;
616 size -= sz;
617 sinfo->kind = STACK_INFO;
618 sinfo->hsize = (d - (char*) sinfo);
619 }
620
621 /* Compute the total size */
622 frpckt->tsize = d - (char*) frpckt;
623 FrameInfo uid = compute_uid (frpckt);
624 return uid;
625 }
626
627 FrameInfo
compute_uid(Frame_packet * frp)628 compute_uid (Frame_packet *frp)
629 {
630 uint64_t idxs[LAST_INFO];
631 uint64_t uid = ROOT_UID;
632 uint64_t idx = ROOT_IDX;
633
634 Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize);
635 char *end = (char*) frp + frp->tsize;
636 for (;;)
637 {
638 if ((char*) cinfo >= end || cinfo->hsize == 0 ||
639 (char*) cinfo + cinfo->hsize > end)
640 break;
641
642 /* Start with a different value to avoid matching with uid */
643 uint64_t uidt = 1;
644 uint64_t idxt = 1;
645 long *ptr = (long*) ((char*) cinfo + cinfo->hsize);
646 long *bnd = (long*) ((char*) cinfo + sizeof (Common_info));
647 TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize);
648 while (ptr > bnd)
649 {
650 long val = *(--ptr);
651 tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val);
652 uidt = (uidt + val) * ROOT_UID;
653 idxt = (idxt + val) * ROOT_IDX;
654 uid = (uid + val) * ROOT_UID;
655 idx = (idx + val) * ROOT_IDX;
656 }
657 if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
658 {
659 cinfo->uid = uidt;
660 idxs[cinfo->kind] = idxt;
661 }
662 cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
663 }
664 tprintf (DBG_LT2, "\n");
665
666 /* Check if we have already recorded that uid.
667 * The following fragment contains benign data races.
668 * It's important, though, that all reads from UIDTable
669 * happen before writes.
670 */
671 int found1 = 0;
672 int idx1 = (int) ((idx >> 44) % UIDTableSize);
673 if (UIDTable[idx1] == uid)
674 found1 = 1;
675 int found2 = 0;
676 int idx2 = (int) ((idx >> 24) % UIDTableSize);
677 if (UIDTable[idx2] == uid)
678 found2 = 1;
679 int found3 = 0;
680 int idx3 = (int) ((idx >> 4) % UIDTableSize);
681 if (UIDTable[idx3] == uid)
682 found3 = 1;
683 if (!found1)
684 UIDTable[idx1] = uid;
685 if (!found2)
686 UIDTable[idx2] = uid;
687 if (!found3)
688 UIDTable[idx3] = uid;
689
690 if (found1 || found2 || found3)
691 return (FrameInfo) uid;
692 frp->uid = uid;
693
694 /* Compress info's */
695 cinfo = (Common_info*) ((char*) frp + frp->hsize);
696 for (;;)
697 {
698 if ((char*) cinfo >= end || cinfo->hsize == 0 ||
699 (char*) cinfo + cinfo->hsize > end)
700 break;
701 if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
702 {
703 long *ptr = (long*) ((char*) cinfo + sizeof (Common_info));
704 long *bnd = (long*) ((char*) cinfo + cinfo->hsize);
705 uint64_t uidt = cinfo->uid;
706 uint64_t idxt = idxs[cinfo->kind];
707 int found = 0;
708 int first = 1;
709 while (ptr < bnd - 1)
710 {
711 int idx1 = (int) ((idxt >> 44) % UIDTableSize);
712 if (UIDTable[idx1] == uidt)
713 {
714 found = 1;
715 break;
716 }
717 else if (first)
718 {
719 first = 0;
720 UIDTable[idx1] = uidt;
721 }
722 long val = *ptr++;
723 uidt = uidt * ROOT_UID_INV - val;
724 idxt = idxt * ROOT_IDX_INV - val;
725 }
726 if (found)
727 {
728 char *d = (char*) ptr;
729 char *s = (char*) bnd;
730 if (!first)
731 {
732 int i;
733 for (i = 0; i<sizeof (uidt); i++)
734 {
735 *d++ = (char) uidt;
736 uidt = uidt >> 8;
737 }
738 }
739 int delta = s - d;
740 while (s < end)
741 *d++ = *s++;
742 cinfo->kind |= COMPRESSED_INFO;
743 cinfo->hsize -= delta;
744 frp->tsize -= delta;
745 end -= delta;
746 }
747 }
748 cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
749 }
750 __collector_write_packet (dhndl, (CM_Packet*) frp);
751 return (FrameInfo) uid;
752 }
753
754 FrameInfo
__collector_getUID(CM_Array * arg,FrameInfo suid)755 __collector_getUID (CM_Array *arg, FrameInfo suid)
756 {
757 if (arg->length % sizeof (long) != 0 ||
758 (long) arg->bytes % sizeof (long) != 0)
759 return (FrameInfo) - 1;
760 if (arg->length == 0)
761 return suid;
762
763 uint64_t uid = suid ? suid : 1;
764 uint64_t idx = suid ? suid : 1;
765 long *ptr = (long*) ((char*) arg->bytes + arg->length);
766 long *bnd = (long*) (arg->bytes);
767 while (ptr > bnd)
768 {
769 long val = *(--ptr);
770 uid = (uid + val) * ROOT_UID;
771 idx = (idx + val) * ROOT_IDX;
772 }
773
774 /* Check if we have already recorded that uid.
775 * The following fragment contains benign data races.
776 * It's important, though, that all reads from UIDTable
777 * happen before writes.
778 */
779 int found1 = 0;
780 int idx1 = (int) ((idx >> 44) % UIDTableSize);
781 if (UIDTable[idx1] == uid)
782 found1 = 1;
783 int found2 = 0;
784 int idx2 = (int) ((idx >> 24) % UIDTableSize);
785 if (UIDTable[idx2] == uid)
786 found2 = 1;
787 int found3 = 0;
788 int idx3 = (int) ((idx >> 4) % UIDTableSize);
789 if (UIDTable[idx3] == uid)
790 found3 = 1;
791
792 if (!found1)
793 UIDTable[idx1] = uid;
794 if (!found2)
795 UIDTable[idx2] = uid;
796 if (!found3)
797 UIDTable[idx3] = uid;
798 if (found1 || found2 || found3)
799 return (FrameInfo) uid;
800
801 int sz = sizeof (Uid_packet) + arg->length;
802 if (suid)
803 sz += sizeof (suid);
804 Uid_packet *uidp = alloca (sz);
805 uidp->tsize = sz;
806 uidp->type = UID_PCKT;
807 uidp->flags = 0;
808 uidp->uid = uid;
809
810 /* Compress */
811 ptr = (long*) (arg->bytes);
812 bnd = (long*) ((char*) arg->bytes + arg->length);
813 long *dst = (long*) (uidp + 1);
814 uint64_t uidt = uid;
815 uint64_t idxt = idx;
816 uint64_t luid = suid; /* link uid */
817
818 while (ptr < bnd)
819 {
820
821 long val = *ptr++;
822 *dst++ = val;
823
824 if ((bnd - ptr) > sizeof (uidt))
825 {
826 uidt = uidt * ROOT_UID_INV - val;
827 idxt = idxt * ROOT_IDX_INV - val;
828 int idx1 = (int) ((idxt >> 44) % UIDTableSize);
829 if (UIDTable[idx1] == uidt)
830 {
831 luid = uidt;
832 break;
833 }
834 }
835 }
836 if (luid)
837 {
838 char *d = (char*) dst;
839 for (int i = 0; i<sizeof (luid); i++)
840 {
841 *d++ = (char) luid;
842 luid = luid >> 8;
843 }
844 uidp->flags |= COMPRESSED_INFO;
845 uidp->tsize = d - (char*) uidp;
846 }
847 __collector_write_packet (dhndl, (CM_Packet*) uidp);
848
849 return (FrameInfo) uid;
850 }
851
852 int
__collector_getStackTrace(void * buf,int size,void * bptr,void * eptr,void * arg)853 __collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg)
854 {
855 if (arg == (void*) __collector_omp_stack_trace)
856 seen_omp = 1;
857 int do_walk = 1;
858 if (arg == NULL || arg == (void*) __collector_omp_stack_trace)
859 {
860 do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1;
861 ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t));
862 FILL_CONTEXT (context);
863 arg = context;
864 }
865 int unwind_mode = 0;
866 if (do_walk == 0)
867 unwind_mode |= FRINFO_NO_WALK;
868 return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode);
869 }
870
871 #if ARCH(SPARC)
872 /*
873 * These are important data structures taken from the header files reg.h and
874 * ucontext.h. They are used for the stack trace algorithm explained below.
875 *
876 * typedef struct ucontext {
877 * u_long uc_flags;
878 * struct ucontext *uc_link;
879 * usigset_t uc_sigmask;
880 * stack_t uc_stack;
881 * mcontext_t uc_mcontext;
882 * long uc_filler[23];
883 * } ucontext_t;
884 *
885 * #define SPARC_MAXREGWINDOW 31
886 *
887 * struct rwindow {
888 * greg_t rw_local[8];
889 * greg_t rw_in[8];
890 * };
891 *
892 * #define rw_fp rw_in[6]
893 * #define rw_rtn rw_in[7]
894 *
895 * struct gwindows {
896 * int wbcnt;
897 * int *spbuf[SPARC_MAXREGWINDOW];
898 * struct rwindow wbuf[SPARC_MAXREGWINDOW];
899 * };
900 *
901 * typedef struct gwindows gwindows_t;
902 *
903 * typedef struct {
904 * gregset_t gregs;
905 * gwindows_t *gwins;
906 * fpregset_t fpregs;
907 * long filler[21];
908 * } mcontext_t;
909 *
910 * The stack would look like this when SIGPROF occurrs.
911 *
912 * ------------------------- <- high memory
913 * | |
914 * | |
915 * -------------------------
916 * | |
917 * ------------------------- <- fp' <-|
918 * | | |
919 * : : |
920 * | | |
921 * ------------------------- |
922 * | fp |----------|
923 * | |
924 * ------------------------- <- sp'
925 * | | | |
926 * | gwins | <- saved stack pointers & | |
927 * | | register windows | |- mcontext
928 * ------------------------- | |
929 * | gregs | <- saved registers | |
930 * ------------------------- |
931 * | | |- ucontext
932 * ------------------------- <- ucp (ucontext pointer) |
933 * | | |
934 * | | |- siginfo
935 * ------------------------- <- sip (siginfo pointer) |
936 * | |
937 * ------------------------- <- sp
938 *
939 * Then the signal handler is called with:
940 * handler( signo, sip, uip );
941 * When gwins is null, all the stack frames are saved in the user stack.
942 * In that case we can find sp' from gregs and walk the stack for a backtrace.
943 * However, if gwins is not null we will have a more complicated case.
944 * Wbcnt(in gwins) tells you how many saved register windows are valid.
945 * This is important because the kernel does not allocate the entire array.
946 * And the top most frame is saved in the lowest index element. The next
947 * paragraph explains the possible causes.
948 *
949 * There are two routines in the kernel to flush out user register windows.
950 * flush_user_windows and flush_user_windows_to_stack
951 * The first routine will not cause a page fault. Therefore if the user
952 * stack is not in memory, the register windows will be saved to the pcb.
953 * This can happen when the kernel is trying to deliver a signal and
954 * the user stack got swap out. The kernel will then build a new context for
955 * the signal handler and the saved register windows will
956 * be copied to the ucontext as show above. On the other hand,
957 * flush_user_windows_to_stack can cause a page fault, and if it failed
958 * then there is something wrong (stack overflow, misalign).
959 * The first saved register window does not necessary correspond to the
960 * first stack frame. So the current stack pointer must be compare with
961 * the stack pointers in spbuf to find a match.
962 *
963 * We will also follow the uc_link field in ucontext to trace also nested
964 * signal stack frames.
965 *
966 */
967
968 /* Dealing with trap handlers.
969 * When a user defined trap handler is invoked the return address
970 * (or actually the address of an instruction that raised the trap)
971 * is passed to the trap handler in %l6, whereas saved %o7 contains
972 * garbage. First, we need to find out if a particular pc belongs
973 * to the trap handler, and if so, take the %l6 value from the stack rather
974 * than %o7 from either the stack or the register.
975 * There are three possible situations represented
976 * by the following stacks:
977 *
978 * MARKER MARKER MARKER
979 * trap handler pc __func pc before 'save' __func pc after 'save'
980 * %l6 %o7 from reg %o7 (garbage)
981 * ... %l6 trap handler pc
982 * ... %l6
983 * ...
984 * where __func is a function called from the trap handler.
985 *
986 * Currently this is implemented to only deal with __misalign_trap_handler
987 * set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER
988 * macro shows it. A general solution is postponed.
989 */
990
991 /* Special handling of unwind through the parallel loop barrier code:
992 *
993 * The library defines two symbols, __mt_EndOfTask_Barrier_ and
994 * __mt_EndOfTask_Barrier_Dummy_ representing the first word of
995 * the barrier sychronization code, and the first word following
996 * it. Whenever the leaf PC is between these two symbols,
997 * the unwind code is special-cased as follows:
998 * The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf
999 * function, so its return address is in a register, not saved on
1000 * the stack.
1001 *
1002 * MARKER
1003 * __mt_EndOfTask_Barrier_ PC -- the leaf PC
1004 * loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_
1005 * this address is taken from the %O0 register
1006 * {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_
1007 * ...
1008 *
1009 * With this trick, the analyzer will show the time in the barrier
1010 * attributed to the loop at the end of which the barrier synchronization
1011 * is taking place. That loop body routine, will be shown as called
1012 * from the function from which it was extracted, which will be shown
1013 * as called from the real caller, either the slave or master library routine.
1014 */
1015
1016 /*
1017 * These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner.
1018 *
1019 * Note that 0x82 is ASI_PNF. See
1020 * http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134
1021 * ASI address space identifier; PNF primary no fault
1022 */
1023
1024 /* load an int from an address */
1025
1026 /* if the address is illegal, return a 0 */
1027 static int
SPARC_no_fault_load_int(void * addr)1028 SPARC_no_fault_load_int (void *addr)
1029 {
1030 int val;
1031 __asm__ __volatile__(
1032 "lda [%1] 0x82, %0\n\t"
1033 : "=r" (val)
1034 : "r" (addr)
1035 );
1036
1037 return val;
1038 }
1039
1040 /* check if an address is invalid
1041 *
1042 * A no-fault load of an illegal address still faults, but it does so silently to the calling process.
1043 * It returns a 0, but so could a load of a legal address.
1044 * So, we time the load. A "fast" load must be a successful load.
1045 * A "slow" load is probably a fault.
1046 * Since it could also be a cache/TLB miss or other abnormality,
1047 * it's safest to retry a slow load.
1048 * The cost of trying a valid address should be some nanosecs.
1049 * The cost of trying an invalid address up to 10 times could be some microsecs.
1050 */
1051 #if 0
1052 static
1053 int invalid_SPARC_addr(void *addr)
1054 {
1055 long t1, t2;
1056 int i;
1057
1058 for (i=0; i<10; i++) {
1059 __asm__ __volatile__(
1060 "rd %%tick, %0\n\t"
1061 "lduba [%2] 0x82, %%g0\n\t"
1062 "rd %%tick, %1\n\t"
1063 : "=r" (t1), "=r" (t2)
1064 : "r" (addr) );
1065 if ( (t2 - t1) < 100 )
1066 return 0;
1067 }
1068 return 1;
1069 }
1070 #endif
1071
1072 /*
1073 * The standard SPARC procedure-calling convention is that the
1074 * calling PC (for determining the return address when the procedure
1075 * is finished) is placed in register %o7. A called procedure
1076 * typically executes a "save" instruction that shifts the register
1077 * window, and %o7 becomes %i7.
1078 *
1079 * Optimized leaf procedures do not shift the register window.
1080 * They assume the return address will remain %o7. So when
1081 * we process a leaf PC, we walk instructions to see if there
1082 * is a call, restore, or other instruction that would indicate
1083 * we can IGNORE %o7 because this is NOT a leaf procedure.
1084 *
1085 * If a limited instruction walk uncovers no such hint, we save
1086 * not only the PC but the %o7 value as well... just to be safe.
1087 * Later, in DBE post-processing of the call stacks, we decide
1088 * whether any recorded %o7 value should be used as a caller
1089 * frame or should be discarded.
1090 */
1091
1092 #define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0)
1093 #define IS_SAVE(x) (((x) & 0xc1f80000) == 0x81e00000)
1094 #define IS_MOVO7R(x) (((x) & 0xc1f8201f) == 0x8160000f)
1095 #define IS_MOVRO7(x) (((x) & 0xfff82000) == 0x9f600000)
1096 #define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000)
1097 #define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000)
1098 #define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f)
1099 #define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000)
1100 #define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000)
1101 #define IS_RET(x) ((x) == 0x81c7e008)
1102 #define IS_RETL(x) ((x) == 0x81c3e008)
1103 #define IS_RETURN(x) (((x) & 0xc1f80000) == 0x81c80000)
1104 #define IS_BRANCH(x) ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000))
1105 #define IS_CALL(x) (((x) & 0xc0000000) == 0x40000000)
1106 #define IS_LDO7(x) (((x) & 0xfff80000) == 0xde000000)
1107
1108 static long pagesize = 0;
1109
1110 static int
process_leaf(long * lbuf,int ind,int lsize,void * context)1111 process_leaf (long *lbuf, int ind, int lsize, void *context)
1112 {
1113 greg_t pc = GET_PC (context);
1114 greg_t o7 = GET_GREG (context, REG_O7);
1115
1116 /* omazur: TBR START -- not used */
1117 if (IN_BARRIER (pc))
1118 {
1119 if (ind < lsize)
1120 lbuf[ind++] = pc;
1121 if (ind < lsize)
1122 lbuf[ind++] = GET_GREG (context, REG_O0);
1123 return ind;
1124 }
1125 /* omazur: TBR END */
1126 #if WSIZE(64)
1127 if (IN_TRAP_HANDLER (pc))
1128 {
1129 if (ind < lsize)
1130 lbuf[ind++] = pc;
1131 return ind;
1132 }
1133 #endif
1134 unsigned *instrp = (unsigned *) pc;
1135 unsigned *end_addr = instrp + 20;
1136 while (instrp < end_addr)
1137 {
1138 unsigned instr = *instrp++;
1139 if (IS_ILLTRAP (instr))
1140 break;
1141 else if (IS_SAVE (instr))
1142 {
1143 if (ind < lsize)
1144 lbuf[ind++] = pc;
1145 if (o7 && ind < lsize)
1146 lbuf[ind++] = o7;
1147 return ind;
1148 }
1149 else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr))
1150 break;
1151 else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr))
1152 {
1153 int rs2 = (instr & 0x1f) + REG_G1 - 1;
1154 o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1155 break;
1156 }
1157 else if (IS_ORRG0O7 (instr))
1158 {
1159 int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1;
1160 o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1161 break;
1162 }
1163 else if (IS_RESTORE (instr))
1164 {
1165 o7 = 0;
1166 break;
1167 }
1168 else if (IS_RETURN (instr))
1169 {
1170 o7 = 0;
1171 break;
1172 }
1173 else if (IS_RET (instr))
1174 {
1175 o7 = 0;
1176 break;
1177 }
1178 else if (IS_RETL (instr))
1179 {
1180 /* process delay slot */
1181 instr = *instrp++;
1182 if (IS_RESTORE (instr))
1183 o7 = 0;
1184 break;
1185 }
1186 else if (IS_BRANCH (instr))
1187 {
1188 unsigned *backbegin = ((unsigned *) pc - 1);
1189 unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc);
1190 while (backbegin > backend)
1191 {
1192 // 21920143 stack unwind: SPARC process_leaf backtracks too far
1193 /*
1194 * We've already dereferenced backbegin+1.
1195 * So if backbegin is on the same page, we're fine.
1196 * If we've gone to a different page, possibly things are not fine.
1197 * We don't really know how to test that.
1198 * Let's just assume the worst: that dereferencing backbegin would segv.
1199 * We won't know if we're in a leaf function or not.
1200 */
1201 if (pagesize == 0)
1202 pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE);
1203 if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*))
1204 break;
1205 unsigned backinstr = *backbegin--;
1206 if (IS_LDO7 (backinstr))
1207 {
1208 o7 = 0;
1209 break;
1210 }
1211 else if (IS_ILLTRAP (backinstr))
1212 break;
1213 else if (IS_RETURN (backinstr))
1214 break;
1215 else if (IS_RET (backinstr))
1216 break;
1217 else if (IS_RETL (backinstr))
1218 break;
1219 else if (IS_CALL (backinstr))
1220 break;
1221 else if (IS_SAVE (backinstr))
1222 {
1223 o7 = 0;
1224 break;
1225 }
1226 }
1227 break;
1228 }
1229 else if (IS_CALL (instr))
1230 o7 = 0;
1231 }
1232
1233 #if WSIZE(64)
1234 if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32)
1235 {
1236 /* 20924821 SEGV in unwind code on SPARC/Linux
1237 * We've seen this condition in some SPARC-Linux runs.
1238 * o7 is non-zero but not a valid address.
1239 * Values like 4 or -7 have been seen.
1240 * Let's check if o7 is unreasonably small.
1241 * If so, set to 0 so that it won't be recorded.
1242 * Otherwise, there is risk of it being dereferenced in process_sigreturn().
1243 */
1244 // __collector_log_write("<event kind=\"%s\" id=\"%d\">time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x</event>\n",
1245 // SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc );
1246 o7 = 0;
1247 }
1248 #endif
1249
1250 if (o7)
1251 {
1252 if (ind < lsize)
1253 lbuf[ind++] = SP_LEAF_CHECK_MARKER;
1254 if (ind < lsize)
1255 lbuf[ind++] = pc;
1256 if (ind < lsize)
1257 lbuf[ind++] = o7;
1258 }
1259 else if (ind < lsize)
1260 lbuf[ind++] = pc;
1261 return ind;
1262 }
1263
1264 #if WSIZE(64)
1265 // detect signal handler
1266 static int
process_sigreturn(long * lbuf,int ind,int lsize,unsigned char * tpc,struct frame ** pfp,void * bptr,int extra_frame)1267 process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc,
1268 struct frame **pfp, void * bptr, int extra_frame)
1269 {
1270 // cheap checks whether tpc is obviously not an instruction address
1271 if ((4096 > (unsigned long) tpc) // the first page is off limits
1272 || (3 & (unsigned long) tpc))
1273 return ind; // the address is not aligned
1274
1275 // get the instruction at tpc, skipping over as many as 7 nop's (0x01000000)
1276 int insn, i;
1277 for (i = 0; i < 7; i++)
1278 {
1279 insn = SPARC_no_fault_load_int ((void *) tpc);
1280 if (insn != 0x01000000)
1281 break;
1282 tpc += 4;
1283 }
1284
1285 // we're not expecting 0 (and it could mean an illegal address)
1286 if (insn == 0)
1287 return ind;
1288
1289 // We are looking for __rt_sigreturn_stub with the instruction
1290 // 0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1
1291 if (insn == 0x82102065)
1292 {
1293 /*
1294 * according to linux kernel source code,
1295 * syscall(_NR_rt_sigreturn) uses the following data in stack:
1296 * struct rt_signal_frame {
1297 * struct sparc_stackf ss;
1298 * siginfo_t info;
1299 * struct pt_regs regs;
1300 * ....};
1301 * sizeof(struct sparc_stackf) is 192;
1302 * sizeof(siginfo_t) is 128;
1303 * we need to get the register values from regs, which is defined as:
1304 * struct pt_regs {
1305 * unsigned long u_regs[16];
1306 * unsigned long tstate;
1307 * unsigned long tpc;
1308 * unsigned long tnpc;
1309 * ....};
1310 * pc and fp register has offset of 120 and 112;
1311 * the pc of kill() is stored in tnpc, whose offest is 136.
1312 */
1313 greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136));
1314 greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120));
1315 (*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112));
1316 if (pc && pc1)
1317 {
1318 if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2)
1319 {
1320 lbuf[0] = pc1;
1321 if (ind == 0)
1322 ind++;
1323 }
1324 if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr)
1325 {
1326 if (ind < lsize)
1327 lbuf[ind++] = (unsigned long) tpc;
1328 if (ind < lsize)
1329 lbuf[ind++] = pc;
1330 if (ind < lsize)
1331 lbuf[ind++] = pc1;
1332 }
1333 }
1334 DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp));
1335 }
1336 return ind;
1337 }
1338 #endif
1339
1340 /*
1341 * int stack_unwind( char *buf, int size, ucontext_t *context )
1342 * This routine looks into the mcontext and
1343 * trace stack frames to record return addresses.
1344 */
1345 int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)1346 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
1347 {
1348 /*
1349 * trace the stack frames from user stack.
1350 * We are assuming that the frame pointer and return address
1351 * are null when we are at the top level.
1352 */
1353 long *lbuf = (long*) buf;
1354 int lsize = size / sizeof (long);
1355 struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */
1356 greg_t pc; /* program counter */
1357 int extra_frame = 0;
1358 if ((mode & 0xffff) == FRINFO_FROM_STACK)
1359 extra_frame = 1;
1360
1361 int ind = 0;
1362 if (bptr == NULL)
1363 ind = process_leaf (lbuf, ind, lsize, context);
1364
1365 int extra_frame = 0;
1366 if ((mode & 0xffff) == FRINFO_FROM_STACK)
1367 extra_frame = 1;
1368 int ind = 0;
1369 if (bptr == NULL)
1370 ind = process_leaf (lbuf, ind, lsize, context);
1371
1372 while (fp)
1373 {
1374 if (ind >= lsize)
1375 break;
1376 fp = (struct frame *) ((char *) fp + STACK_BIAS);
1377 if (eptr && fp >= (struct frame *) eptr)
1378 {
1379 ind = ind >= 2 ? ind - 2 : 0;
1380 break;
1381 }
1382 #if WSIZE(64) // detect signal handler
1383 unsigned char * tpc = ((unsigned char*) (fp->fr_savpc));
1384 struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS);
1385 int old_ind = ind;
1386 ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame);
1387 if (ind != old_ind)
1388 {
1389 pc = (greg_t) tpc;
1390 fp = tfp;
1391 }
1392 else
1393 #endif
1394 {
1395 #if WSIZE(64)
1396 if (IN_TRAP_HANDLER (lbuf[ind - 1]))
1397 pc = fp->fr_local[6];
1398 else
1399 pc = fp->fr_savpc;
1400 #else
1401 pc = fp->fr_savpc;
1402 #endif
1403 fp = fp->fr_savfp;
1404 if (pc)
1405 {
1406 if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2)
1407 {
1408 lbuf[0] = pc;
1409 if (ind == 0)
1410 ind++;
1411 }
1412 if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr)
1413 lbuf[ind++] = pc;
1414 }
1415 }
1416
1417 /* 4616238: _door_return may have a frame that has non-zero
1418 * saved stack pointer and zero pc
1419 */
1420 if (pc == (greg_t) NULL)
1421 break;
1422 }
1423
1424 if (ind >= lsize)
1425 { /* truncated stack handling */
1426 ind = lsize - 1;
1427 lbuf[ind++] = SP_TRUNC_STACK_MARKER;
1428 }
1429 return ind * sizeof (long);
1430 }
1431
1432 #elif ARCH(Intel)
1433
1434 /* get __NR_<syscall_name> constants */
1435 #include <syscall.h>
1436
1437 /*
1438 * From uts/intel/ia32/os/sendsig.c:
1439 *
1440 * An amd64 signal frame looks like this on the stack:
1441 *
1442 * old %rsp:
1443 * <128 bytes of untouched stack space>
1444 * <a siginfo_t [optional]>
1445 * <a ucontext_t>
1446 * <siginfo_t *>
1447 * <signal number>
1448 * new %rsp: <return address (deliberately invalid)>
1449 *
1450 * The signal number and siginfo_t pointer are only pushed onto the stack in
1451 * order to allow stack backtraces. The actual signal handling code expects the
1452 * arguments in registers.
1453 *
1454 * An i386 SVR4/ABI signal frame looks like this on the stack:
1455 *
1456 * old %esp:
1457 * <a siginfo32_t [optional]>
1458 * <a ucontext32_t>
1459 * <pointer to that ucontext32_t>
1460 * <pointer to that siginfo32_t>
1461 * <signo>
1462 * new %esp: <return address (deliberately invalid)>
1463 */
1464
1465 #if WSIZE(32)
1466 #define OPC_REG(x) ((x)&0x7)
1467 #define MRM_REGD(x) (((x)>>3)&0x7)
1468 #define MRM_REGS(x) ((x)&0x7)
1469 #define RED_ZONE 0
1470 #elif WSIZE(64)
1471 #define OPC_REG(x) (B|((x)&0x7))
1472 #define MRM_REGD(x) (R|(((x)>>3)&0x7))
1473 #define MRM_REGS(x) (B|((x)&0x7))
1474 #define RED_ZONE 16
1475 #endif
1476 #define MRM_EXT(x) (((x)>>3)&0x7)
1477 #define MRM_MOD(x) ((x)&0xc0)
1478
1479 #define RAX 0
1480 #define RDX 2
1481 #define RSP 4
1482 #define RBP 5
1483
1484 struct AdvWalkContext
1485 {
1486 unsigned char *pc;
1487 unsigned long *sp;
1488 unsigned long *sp_safe;
1489 unsigned long *fp;
1490 unsigned long *fp_sav;
1491 unsigned long *fp_loc;
1492 unsigned long rax;
1493 unsigned long rdx;
1494 unsigned long ra_sav;
1495 unsigned long *ra_loc;
1496 unsigned long regs[16];
1497 int tidx; /* targets table index */
1498 uint32_t cval; /* cache value */
1499 };
1500
1501 static unsigned long
getRegVal(struct AdvWalkContext * cur,int r,int * undefRez)1502 getRegVal (struct AdvWalkContext *cur, int r, int *undefRez)
1503 {
1504 if (cur->regs[r] == 0)
1505 {
1506 if (r == RBP)
1507 {
1508 tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx cur->pc=0x%lx\n",
1509 (unsigned long) cur->fp, (unsigned long) cur->pc);
1510 return (unsigned long) cur->fp;
1511 }
1512 *undefRez = 1;
1513 }
1514 tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx cur->pc=0x%lx\n",
1515 r, (unsigned long) cur->regs[r], (unsigned long) cur->pc);
1516 return cur->regs[r];
1517 }
1518
1519 static unsigned char *
check_modrm(unsigned char * pc)1520 check_modrm (unsigned char *pc)
1521 {
1522 unsigned char modrm = *pc++;
1523 unsigned char mod = MRM_MOD (modrm);
1524 if (mod == 0xc0)
1525 return pc;
1526 unsigned char regs = modrm & 0x07;
1527 if (regs == RSP)
1528 {
1529 if (mod == 0x40)
1530 return pc + 2; // SIB + disp8
1531 if (mod == 0x80)
1532 return pc + 5; // SIB + disp32
1533 return pc + 1; // SIB
1534 }
1535 if (mod == 0x0)
1536 {
1537 if (regs == RBP)
1538 pc += 4; // disp32
1539 }
1540 else if (mod == 0x40)
1541 pc += 1; /* byte */
1542 else if (mod == 0x80)
1543 pc += 4; /* word */
1544 return pc;
1545 }
1546
1547 static int
read_int(unsigned char * pc,int w)1548 read_int (unsigned char *pc, int w)
1549 {
1550 if (w == 1)
1551 return *((char *) pc);
1552 if (w == 2)
1553 return *(short*) pc;
1554 return *(int*) pc;
1555 }
1556
1557 /* Return codes */
1558 enum
1559 {
1560 RA_FAILURE = 0,
1561 RA_SUCCESS,
1562 RA_END_OF_STACK,
1563 RA_SIGRETURN,
1564 RA_RT_SIGRETURN
1565 };
1566
1567 /* Cache value encodings */
1568 static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */
1569 static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */
1570
1571
1572 #define MAXCTX 16
1573 #define MAXTRGTS 64
1574 #define MAXJMPREG 2
1575 #define MAXJMPREGCTX 3
1576
1577 #define DELETE_CURCTX() __collector_memcpy (cur, buf + (--nctx), sizeof (*cur))
1578
1579 /**
1580 * Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK
1581 * @param wctx
1582 * @return
1583 */
1584 static int
cache_get(struct WalkContext * wctx)1585 cache_get (struct WalkContext *wctx)
1586 {
1587 unsigned long addr;
1588 if (AddrTable_RA_FROMFP != NULL)
1589 {
1590 uint64_t idx = wctx->pc % ValTableSize;
1591 addr = AddrTable_RA_FROMFP[ idx ];
1592 if (addr == wctx->pc)
1593 { // Found in AddrTable_RA_FROMFP
1594 unsigned long *sp = NULL;
1595 unsigned long fp = wctx->fp;
1596 /* validate fp before use */
1597 if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
1598 return RA_FAILURE;
1599 sp = (unsigned long *) fp;
1600 fp = *sp++;
1601 unsigned long ra = *sp++;
1602 unsigned long tbgn = wctx->tbgn;
1603 unsigned long tend = wctx->tend;
1604 if (ra < tbgn || ra >= tend)
1605 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1606 return RA_FAILURE;
1607 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1608 if (npc == 0)
1609 return RA_FAILURE;
1610 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc);
1611 wctx->pc = npc;
1612 wctx->sp = (unsigned long) sp;
1613 wctx->fp = fp;
1614 wctx->tbgn = tbgn;
1615 wctx->tend = tend;
1616 return RA_SUCCESS;
1617 }
1618 }
1619 if (NULL == AddrTable_RA_EOSTCK)
1620 return RA_FAILURE;
1621 uint64_t idx = wctx->pc % ValTableSize;
1622 addr = AddrTable_RA_EOSTCK[ idx ];
1623 if (addr != wctx->pc)
1624 return RA_FAILURE;
1625 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__);
1626 return RA_END_OF_STACK;
1627 }
1628 /**
1629 * Save pc in RA_FROMFP or RA_EOSTCK cache depending on val
1630 * @param wctx
1631 */
1632 static void
cache_put(struct WalkContext * wctx,const uint32_t val)1633 cache_put (struct WalkContext *wctx, const uint32_t val)
1634 {
1635 if (RA_FROMFP == val)
1636 {
1637 // save pc in RA_FROMFP cache
1638 if (NULL != AddrTable_RA_FROMFP)
1639 {
1640 uint64_t idx = wctx->pc % ValTableSize;
1641 AddrTable_RA_FROMFP[ idx ] = wctx->pc;
1642 if (NULL != AddrTable_RA_EOSTCK)
1643 if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc)
1644 // invalidate pc in RA_EOSTCK cache
1645 AddrTable_RA_EOSTCK[ idx ] = 0;
1646 }
1647 return;
1648 }
1649 if (RA_EOSTCK == val)
1650 {
1651 // save pc in RA_EOSTCK cache
1652 if (NULL != AddrTable_RA_EOSTCK)
1653 {
1654 uint64_t idx = wctx->pc % ValTableSize;
1655 AddrTable_RA_EOSTCK[ idx ] = wctx->pc;
1656 if (NULL != AddrTable_RA_FROMFP)
1657 {
1658 if (AddrTable_RA_FROMFP[ idx ] == wctx->pc)
1659 // invalidate pc in RA_FROMFP cache
1660 AddrTable_RA_FROMFP[ idx ] = 0;
1661 }
1662 }
1663 return;
1664 }
1665 }
1666
1667 static int
process_return_real(struct WalkContext * wctx,struct AdvWalkContext * cur,int cache_on)1668 process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on)
1669 {
1670 if ((unsigned long) cur->sp >= wctx->sbase ||
1671 (unsigned long) cur->sp < wctx->sp)
1672 {
1673 DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n",
1674 cur->sp, wctx->sp, wctx->sbase);
1675 return RA_FAILURE;
1676 }
1677
1678 unsigned long ra;
1679 if (cur->sp == cur->ra_loc)
1680 {
1681 ra = cur->ra_sav;
1682 cur->sp++;
1683 }
1684 else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
1685 ra = *cur->sp++;
1686 else
1687 {
1688 DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe);
1689 return RA_FAILURE;
1690 }
1691 if (ra == 0)
1692 {
1693 if (cache_on)
1694 cache_put (wctx, RA_EOSTCK);
1695 wctx->pc = ra;
1696 wctx->sp = (unsigned long) cur->sp;
1697 wctx->fp = (unsigned long) cur->fp;
1698 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__);
1699 return RA_END_OF_STACK;
1700 }
1701
1702 unsigned long tbgn = wctx->tbgn;
1703 unsigned long tend = wctx->tend;
1704 if (ra < tbgn || ra >= tend)
1705 {
1706 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1707 {
1708 DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n",
1709 ra, wctx->tbgn, wctx->tend);
1710 return RA_FAILURE;
1711 }
1712 }
1713
1714 if (cur->cval == RA_FROMFP)
1715 {
1716 if (wctx->fp == (unsigned long) (cur->sp - 2))
1717 {
1718 if (cache_on)
1719 cache_put (wctx, RA_FROMFP);
1720 }
1721 else
1722 cur->cval = 0;
1723 }
1724
1725 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1726 if (npc == 0)
1727 {
1728 if (cur->cval == RA_FROMFP)
1729 {
1730 /* We have another evidence that we can trust this RA */
1731 DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc);
1732 wctx->pc = ra;
1733 }
1734 else
1735 {
1736 DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n");
1737 return RA_FAILURE;
1738 }
1739 }
1740 else
1741 wctx->pc = npc;
1742 wctx->sp = (unsigned long) cur->sp;
1743 wctx->fp = (unsigned long) cur->fp;
1744 wctx->tbgn = tbgn;
1745 wctx->tend = tend;
1746 return RA_SUCCESS;
1747 }
1748
1749 static int
process_return(struct WalkContext * wctx,struct AdvWalkContext * cur)1750 process_return (struct WalkContext *wctx, struct AdvWalkContext *cur)
1751 {
1752 return process_return_real (wctx, cur, 1);
1753 }
1754
1755 static void
omp_cache_put(unsigned long * cur_sp_safe,struct WalkContext * wctx_pc_save,struct WalkContext * wctx,uint32_t val)1756 omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save,
1757 struct WalkContext *wctx, uint32_t val)
1758 {
1759 if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL))
1760 {
1761 size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs);
1762 OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1763 sz = OmpValTableSize * sizeof (*OmpCtxs);
1764 OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1765 sz = OmpValTableSize * sizeof (*OmpVals);
1766 OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
1767 sz = OmpValTableSize * sizeof (*OmpRAs);
1768 OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
1769 }
1770 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1771 return;
1772
1773 #define USE_18434988_OMP_CACHE_WORKAROUND
1774 #ifndef USE_18434988_OMP_CACHE_WORKAROUND
1775 uint64_t idx = wctx_pc_save->pc * ROOT_IDX;
1776 OmpVals[ idx % OmpValTableSize ] = val;
1777 idx = (idx + val) * ROOT_IDX;
1778 __collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1779 idx = (idx + val) * ROOT_IDX;
1780 __collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1781 #endif
1782 unsigned long *sp = NULL;
1783 unsigned long fp = wctx_pc_save->fp;
1784 int from_fp = 0;
1785 if (val == RA_END_OF_STACK)
1786 {
1787 sp = (unsigned long *) (wctx->sp);
1788 sp--;
1789 TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp);
1790 }
1791 else
1792 {
1793 if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp))
1794 {
1795 sp = (unsigned long *) (wctx->sp);
1796 sp--;
1797 TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp);
1798 }
1799 else
1800 {
1801 TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp);
1802 sp = (unsigned long *) fp;
1803 from_fp = 1;
1804 }
1805 }
1806
1807 if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase))
1808 return;
1809
1810 unsigned long ra = *sp++;
1811 if (from_fp)
1812 {
1813 unsigned long tbgn = wctx_pc_save->tbgn;
1814 unsigned long tend = wctx_pc_save->tend;
1815 if (ra < tbgn || ra >= tend)
1816 {
1817 sp = (unsigned long *) (wctx->sp);
1818 sp--;
1819 ra = *sp++;
1820 }
1821 }
1822 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1823 uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX;
1824 uint64_t idx2 = (idx1 + val) * ROOT_IDX;
1825 uint64_t idx3 = (idx2 + val) * ROOT_IDX;
1826 uint64_t idx4 = (idx3 + val) * ROOT_IDX;
1827 OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock
1828 OmpVals[ idx1 % OmpValTableSize ] = val;
1829 __collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1830 __collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1831 OmpRAs [ idx4 % OmpValTableSize ] = ra;
1832 #else
1833 idx = (idx + val) * ROOT_IDX;
1834 OmpRAs[ idx % OmpValTableSize ] = ra;
1835 #endif
1836 TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc);
1837 }
1838
1839 /*
1840 * See bug 17166877 - malloc_internal unwind failure.
1841 * Sometimes there are several calls right after ret, like:
1842 * leave
1843 * ret
1844 * call xxx
1845 * call xxxx
1846 * call xxxxx
1847 * If they are also jump targets, we should better not
1848 * create new jump context for those, since they may
1849 * end up into some other function.
1850 */
1851 static int
is_after_ret(unsigned char * npc)1852 is_after_ret (unsigned char * npc)
1853 {
1854 if (*npc != 0xe8)
1855 return 0;
1856 unsigned char * onpc = npc;
1857 int ncall = 1;
1858 int maxsteps = 10;
1859 int mincalls = 3;
1860 int steps = 0;
1861 while (*(npc - 5) == 0xe8 && steps < maxsteps)
1862 {
1863 npc -= 5;
1864 ncall++;
1865 steps++;
1866 }
1867 if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9)
1868 return 0;
1869 steps = 0;
1870 while (*(onpc + 5) == 0xe8 && steps < maxsteps)
1871 {
1872 onpc += 5;
1873 ncall++;
1874 steps++;
1875 }
1876 if (ncall < mincalls)
1877 return 0;
1878 return 1;
1879 }
1880
1881 static int
find_i386_ret_addr(struct WalkContext * wctx,int do_walk)1882 find_i386_ret_addr (struct WalkContext *wctx, int do_walk)
1883 {
1884 if (wctx->sp == 0)
1885 // Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT()
1886 return RA_FAILURE;
1887
1888 /* Check cached values */
1889 int retc = cache_get (wctx);
1890 if (retc != RA_FAILURE)
1891 return retc;
1892
1893 /* An attempt to perform code analysis for call stack tracing */
1894 unsigned char opcode;
1895 unsigned char extop;
1896 unsigned char extop2;
1897 unsigned char modrm;
1898 int imm8; /* immediate operand, byte */
1899 int immv; /* immediate operand, word(2) or doubleword(4) */
1900 int reg; /* register code */
1901
1902 /* Buffer for branch targets (analysis stoppers) */
1903 unsigned char *targets[MAXTRGTS];
1904 int ntrg = 0; /* number of entries in the table */
1905 targets[ntrg++] = (unsigned char*) wctx->pc;
1906 targets[ntrg++] = (unsigned char*) - 1;
1907
1908 struct AdvWalkContext buf[MAXCTX];
1909 struct AdvWalkContext *cur = buf;
1910 CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur));
1911
1912 cur->pc = (unsigned char*) wctx->pc;
1913 cur->sp = (unsigned long*) wctx->sp;
1914 cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */
1915 cur->fp = (unsigned long*) wctx->fp;
1916 cur->tidx = 1;
1917 DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc);
1918
1919 int nctx = 1; /* number of contexts being processed */
1920 int cnt = 8192; /* number of instructions to analyse */
1921
1922 /*
1923 * The basic idea of our x86 stack unwind is that we don't know
1924 * if we can trust the frame-pointer register. So we walk
1925 * instructions to find a return instruction, at which point
1926 * we know the return address is on the top of the stack, etc.
1927 *
1928 * A severe challenge to walking x86 instructions is when we
1929 * encounter "jmp *(reg)" instructions, where we are expected
1930 * to jump to the (unknown-to-us) contents of a register.
1931 *
1932 * The "jmp_reg" code here attempts to keep track of the
1933 * context for such a jump, deferring any handling of such
1934 * a difficult case. We continue with other contexts, hoping
1935 * that some other walk will take us to a return instruction.
1936 *
1937 * If no other walk helps, we return to "jmp_reg" contexts.
1938 * While we don't know the jump target, it is possible that the
1939 * bytes immediately following the jmp_reg instruction represent
1940 * one possible target, as might be the case when a "switch"
1941 * statement is compiled.
1942 *
1943 * Unfortunately, the bytes following a "jmp_reg" instruction might
1944 * instead be a jump target from somewhere else -- execution might
1945 * never "fall through" from the preceding "jmp_reg". Those bytes
1946 * might not even be instructions at all. There are many uses of
1947 * jmp_reg instructions beyond just compiling switch statements.
1948 *
1949 * So walking the bytes after a "jmp_reg" instruction can lead
1950 * to bugs and undefined behavior, including SEGV and core dump.
1951 *
1952 * We currently do not really understand the "jmp_reg" code below.
1953 */
1954 int jmp_reg_switch_mode = 0;
1955 int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case
1956 int total_num_jmp_reg = 0; // number of total jmp *reg met
1957 struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case
1958 struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases
1959 struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases
1960
1961 int cur_jmp_reg_switch = 0; // current switch table
1962 int num_jmp_reg_switch = 0; // number of switch table
1963 int jmp_reg_switch_case = 0; // case number in current switch table
1964 unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case
1965 unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target
1966 unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets
1967 int max_jmp_reg_switch_case = 2;
1968 #if WSIZE(32)
1969 int max_switch_pc_offset = 512;
1970 #else // WSIZE(64)
1971 int max_switch_pc_offset = 1024;
1972 #endif
1973 int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG
1974 int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions
1975
1976
1977 int save_ctx = 0; // flag to save walk context in the cache to speed up unwind
1978 struct WalkContext wctx_pc_save;
1979 if (do_walk == 0)
1980 // do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first
1981 __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
1982
1983 startWalk:
1984 if (do_walk == 0)
1985 { // try to resolve RA from stack frame pointer
1986 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1987 {
1988 do_walk = 1;
1989 goto startWalk;
1990 }
1991 // before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext))
1992 uint64_t idx = wctx->pc * ROOT_IDX;
1993 uint32_t val = OmpVals[idx % OmpValTableSize];
1994 idx = (idx + val) * ROOT_IDX;
1995 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1996 // Check ra: if it is 0 - then cache is invalid
1997 uint64_t idx4;
1998 idx4 = (idx + val) * ROOT_IDX;
1999 idx4 = (idx4 + val) * ROOT_IDX;
2000 if (0 == OmpRAs[ idx4 % OmpValTableSize ]) // Invalid cache
2001 goto checkFP;
2002 #endif
2003 struct WalkContext saved_ctx;
2004 __collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2005 if (wctx->pc == saved_ctx.pc
2006 && wctx->sp == saved_ctx.sp
2007 && wctx->fp == saved_ctx.fp
2008 && wctx->tbgn == saved_ctx.tbgn
2009 && wctx->tend == saved_ctx.tend)
2010 { // key match, RA may be valid
2011 idx = (idx + val) * ROOT_IDX;
2012 unsigned long *sp = NULL;
2013 unsigned long fp = wctx->fp;
2014 int from_fp = 0;
2015 if (val == RA_END_OF_STACK)
2016 {
2017 DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc);
2018 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2019 return val;
2020 }
2021 else
2022 {
2023 if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
2024 {
2025 TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc);
2026 sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2027 sp--;
2028 if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2029 {
2030 goto checkFP;
2031 }
2032 unsigned long ra = *sp;
2033 uint64_t idx2 = (idx + val) * ROOT_IDX;
2034 if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2035 {
2036 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2037 TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val);
2038 return val;
2039 }
2040 TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val);
2041 goto checkFP;
2042 }
2043 sp = (unsigned long *) fp;
2044 from_fp = 1;
2045 }
2046
2047 uint64_t idx2 = (idx + val) * ROOT_IDX;
2048 unsigned long ra = *sp++;
2049 if (from_fp)
2050 {
2051 unsigned long tbgn = wctx->tbgn;
2052 unsigned long tend = wctx->tend;
2053 if (ra < tbgn || ra >= tend)
2054 {
2055 sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2056 sp--;
2057 //if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) {
2058 // The check above was replaced with the check below,
2059 // because we do not know why "- 16" and "- sizeof(*sp)" was used.
2060 if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2061 goto checkFP;
2062 else
2063 ra = *sp;
2064 }
2065 }
2066 if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2067 {
2068 TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc);
2069 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2070 return val;
2071 }
2072 }
2073 goto checkFP;
2074 }
2075 else
2076 {
2077 CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2078 CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2079 }
2080 while (cnt--)
2081 {
2082 if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1))
2083 { // no context available, try jmp switch mode
2084 int i = 0;
2085 if (num_jmp_reg == expected_num_jmp_reg)
2086 jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode
2087 DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n",
2088 num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode);
2089 // the ideal asm of switch is
2090 // jmp reg
2091 // ...//case 1
2092 // ret
2093 // ...//case 2
2094 // ret
2095 // ...//etc
2096 if (jmp_reg_switch_mode == 0)
2097 {
2098 num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg
2099 jmp_reg_switch_mode = 1; // begin switch mode
2100 for (i = 0; i < num_jmp_reg_switch; i++)
2101 {
2102 if (jmp_reg_switch_ctx[i] == NULL)
2103 jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i]));
2104 if (jmp_reg_switch_ctx[i] != NULL)
2105 { // backup jmp_reg_ctx
2106 __collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i]));
2107 cur_jmp_reg_switch = 0; // reset the current switch table
2108 jmp_reg_switch_case = 0; // reset the case number in current switch table
2109 }
2110 }
2111 if (jmp_reg_switch_backup_ctx == NULL)
2112 { // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA
2113 jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx));
2114 if (jmp_reg_switch_backup_ctx != NULL)
2115 __collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur));
2116 DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n");
2117 }
2118 }
2119 if (jmp_reg_switch_mode == 1)
2120 { // in the process of trying switch cases
2121 if (cur_jmp_reg_switch == num_jmp_reg_switch)
2122 {
2123 DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n");
2124 if (jmp_reg_switch_backup_ctx != NULL)
2125 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2126 int rc = process_return_real (wctx, cur, 0);
2127 if (rc == RA_SUCCESS)
2128 {
2129 if (save_ctx)
2130 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2131 return rc;
2132 }
2133 break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP
2134 }
2135 unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc;
2136 if (jmp_reg_switch_case == 0)
2137 // first switch case
2138 npc = check_modrm (npc); // pc next to "jmp reg" instruction
2139 else if (jmp_reg_switch_pc != NULL)
2140 npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case
2141 else
2142 {
2143 DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n",
2144 jmp_reg_switch_case, jmp_reg_switch_pc);
2145 break; //goto checkFP
2146 }
2147 jmp_reg_switch_base = npc;
2148 struct AdvWalkContext *new = buf + nctx;
2149 nctx += 1;
2150 __collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new));
2151 new->pc = npc;
2152 cur = new; /* advance the new context first */
2153 jmp_reg_switch_pc = NULL;
2154 jmp_reg_switch_case++;
2155 if (jmp_reg_switch_case == max_jmp_reg_switch_case)
2156 { // done many cases, change to another switch table
2157 cur_jmp_reg_switch++;
2158 jmp_reg_switch_case = 0;
2159 }
2160 }
2161 num_jmp_reg = 0;
2162 }
2163 if (jmp_reg_switch_mode == 1)
2164 { // when processing switch cases, check pc each time
2165 unsigned long tbgn = wctx->tbgn;
2166 unsigned long tend = wctx->tend;
2167 if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend)
2168 {
2169 DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc));
2170 break;
2171 }
2172 if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset)
2173 {
2174 DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n");
2175 if (jmp_reg_switch_backup_ctx != NULL)
2176 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2177 int rc = process_return_real (wctx, cur, 0);
2178 if (rc == RA_SUCCESS)
2179 {
2180 if (save_ctx)
2181 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2182 return rc;
2183 }
2184 break; // limit the walk offset after jmp reg instruction, got checkFP
2185 }
2186 }
2187
2188 if (nctx == 0)
2189 break;
2190 // dump_targets (__LINE__, ntrg, targets);
2191 while (cur->pc > targets[cur->tidx])
2192 cur->tidx += 1;
2193 if (cur->pc == targets[cur->tidx])
2194 {
2195 /* Stop analysis. Delete context. */
2196 if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old)
2197 {
2198 if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL)
2199 {
2200 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n",
2201 __LINE__, cur->pc, jmp_reg_switch_pc, nctx);
2202 jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case
2203 jmp_reg_switch_pc_old = jmp_reg_switch_pc;
2204 }
2205 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__);
2206 DELETE_CURCTX ();
2207 if (cur >= buf + nctx)
2208 cur = buf;
2209 continue;
2210 }
2211 if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old)
2212 jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old
2213 }
2214
2215 /* let's walk the next x86 instruction */
2216 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n",
2217 __LINE__, (long) (cur - buf), (unsigned long) cur->pc,
2218 (int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2],
2219 (int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5],
2220 (int) cur->pc[6], (unsigned long) cur->sp);
2221 int v = 4; /* Operand size */
2222 int a = 4; /* Address size */
2223 /* int W = 0; REX.W bit */
2224 #if WSIZE(64)
2225 int R = 0; /* REX.R bit */
2226 #endif
2227 int X = 0; /* REX.X bit */
2228 int B = 0; /* REX.B bit */
2229 /* Check prefixes */
2230 int done = 0;
2231 while (!done)
2232 {
2233 opcode = *cur->pc++;
2234 switch (opcode)
2235 {
2236 case 0x66: /* opd size override */
2237 v = 2;
2238 break;
2239 case 0x67: /*addr size override */
2240 a = 2;
2241 break;
2242 #if WSIZE(64)
2243 case 0x40: /* REX */
2244 case 0x41:
2245 case 0x42:
2246 case 0x43:
2247 case 0x44:
2248 case 0x45:
2249 case 0x46:
2250 case 0x47:
2251 case 0x48:
2252 case 0x49:
2253 case 0x4a:
2254 case 0x4b:
2255 case 0x4c:
2256 case 0x4d:
2257 case 0x4e:
2258 case 0x4f:
2259 B = (opcode & 0x1) ? 8 : 0;
2260 X = (opcode & 0x2) ? 8 : 0;
2261 R = (opcode & 0x4) ? 8 : 0;
2262 if (opcode & 0x8) /* 64 bit operand size */
2263 v = 8;
2264 opcode = *cur->pc++;
2265 done = 1;
2266 break;
2267 #endif
2268 default:
2269 done = 1;
2270 break;
2271 }
2272 }
2273 int z = (v == 8) ? 4 : v;
2274 switch (opcode)
2275 {
2276 case 0x0: /* add Eb,Gb */
2277 case 0x01: /* add Ev,Gv */
2278 case 0x02: /* add Gb,Eb */
2279 case 0x03: /* add Gv,Ev */
2280 cur->pc = check_modrm (cur->pc);
2281 break;
2282 case 0x04: /* add %al,Ib */
2283 cur->pc += 1;
2284 break;
2285 case 0x05: /* add %eax,Iz */
2286 cur->pc += z;
2287 break;
2288 case 0x06: /* push es */
2289 cur->sp -= 1;
2290 break;
2291 case 0x07: /* pop es */
2292 cur->sp += 1;
2293 if (cur->sp - RED_ZONE > cur->sp_safe)
2294 cur->sp_safe = cur->sp - RED_ZONE;
2295 break;
2296 case 0x08: /* or Eb,Gb */
2297 case 0x09: /* or Ev,Gv */
2298 case 0x0a: /* or Gb,Eb */
2299 case 0x0b: /* or Gv,Ev */
2300 cur->pc = check_modrm (cur->pc);
2301 break;
2302 case 0x0c: /* or %al,Ib */
2303 cur->pc += 1;
2304 break;
2305 case 0x0d: /* or %eax,Iz */
2306 cur->pc += z;
2307 break;
2308 case 0x0e: /* push cs */
2309 cur->sp -= 1;
2310 break;
2311 case 0x0f: /* two-byte opcodes */
2312 extop = *cur->pc++;
2313 switch (extop)
2314 { /* RTM or HLE */
2315 case 0x01:
2316 extop2 = *cur->pc;
2317 switch (extop2)
2318 {
2319 case 0xd5: /* xend */
2320 case 0xd6: /* xtest */
2321 cur->pc++;
2322 break;
2323 default:
2324 break;
2325 }
2326 break;
2327 case 0x03:
2328 cur->pc = check_modrm (cur->pc);
2329 break;
2330 case 0x0b:
2331 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n",
2332 __LINE__, (int) opcode);
2333 DELETE_CURCTX ();
2334 break;
2335 case 0x05: /* syscall */
2336 case 0x34: /* sysenter */
2337 if (cur->rax == __NR_exit)
2338 {
2339 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2340 __LINE__, (int) opcode);
2341 DELETE_CURCTX ();
2342 break;
2343 }
2344 else if (cur->rax == __NR_rt_sigreturn)
2345 {
2346 if (jmp_reg_switch_mode == 1)
2347 {
2348 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n",
2349 __LINE__, (int) opcode);
2350 goto checkFP;
2351 }
2352 wctx->sp = (unsigned long) cur->sp;
2353 if (save_ctx)
2354 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
2355 return RA_RT_SIGRETURN;
2356 }
2357 #if WSIZE(32)
2358 else if (cur->rax == __NR_sigreturn)
2359 {
2360 if (jmp_reg_switch_mode == 1)
2361 {
2362 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n");
2363 goto checkFP;
2364 }
2365 wctx->sp = (unsigned long) cur->sp;
2366 if (save_ctx)
2367 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
2368 return RA_SIGRETURN;
2369 }
2370 #endif
2371 /* Check for Linus' trick in the vsyscall page */
2372 while (*cur->pc == 0x90) /* nop */
2373 cur->pc++;
2374 if (*cur->pc == 0xeb) /* jmp imm8 */
2375 cur->pc += 2;
2376 break;
2377 case 0x0d: /* nop Ev */
2378 cur->pc = check_modrm (cur->pc);
2379 break;
2380 case 0x10: /* xmm Vq,Wq */
2381 case 0x11:
2382 case 0x12:
2383 case 0x13:
2384 case 0x14:
2385 case 0x15:
2386 case 0x16:
2387 case 0x17:
2388 cur->pc = check_modrm (cur->pc);
2389 break;
2390 case 0x18: /* prefetch */
2391 cur->pc = check_modrm (cur->pc);
2392 break;
2393 case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */
2394 cur->pc += 2;
2395 break;
2396 case 0x1f: /* nop Ev */
2397 cur->pc = check_modrm (cur->pc);
2398 break;
2399 case 0x28: /* xmm Vq,Wq */
2400 case 0x29:
2401 case 0x2a:
2402 case 0x2b:
2403 case 0x2c:
2404 case 0x2d:
2405 case 0x2e:
2406 case 0x2f:
2407 cur->pc = check_modrm (cur->pc);
2408 break;
2409 case 0x30: /* wrmsr */
2410 case 0x31: /* rdtsc */
2411 case 0x32: /* rdmsr */
2412 case 0x33: /* rdpmc */
2413 break;
2414 /* case 0x34: sysenter (see above) */
2415 case 0x38: case 0x3a:
2416 extop2 = *cur->pc++;
2417 cur->pc = check_modrm (cur->pc);
2418 // 21275311 Unwind failure in native stack for java application running on jdk8
2419 // Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte.
2420 if (extop == 0x3a)
2421 cur->pc++;
2422 break;
2423 case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */
2424 case 0x44: case 0x45: case 0x46: case 0x47:
2425 case 0x48: case 0x49: case 0x4a: case 0x4b:
2426 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
2427 cur->pc = check_modrm (cur->pc);
2428 break;
2429 case 0x50: case 0x51: case 0x52: case 0x53:
2430 case 0x54: case 0x55: case 0x56: case 0x57:
2431 case 0x58: case 0x59: case 0x5a: case 0x5b:
2432 case 0x5c: case 0x5d: case 0x5e: case 0x5f:
2433 case 0x60: case 0x61: case 0x62: case 0x63:
2434 case 0x64: case 0x65: case 0x66: case 0x67:
2435 case 0x68: case 0x69: case 0x6a: case 0x6b:
2436 case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2437 cur->pc = check_modrm (cur->pc);
2438 break;
2439 case 0x70: case 0x71: case 0x72: case 0x73:
2440 cur->pc = check_modrm (cur->pc) + 1;
2441 break;
2442 case 0x74: case 0x75: case 0x76:
2443 cur->pc = check_modrm (cur->pc);
2444 break;
2445 case 0x77:
2446 break;
2447 case 0x7c: case 0x7d: case 0x7e: case 0x7f:
2448 cur->pc = check_modrm (cur->pc);
2449 break;
2450 case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */
2451 case 0x84: case 0x85: case 0x86: case 0x87:
2452 case 0x88: case 0x89: case 0x8a: case 0x8b:
2453 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
2454 immv = read_int (cur->pc, z);
2455 cur->pc += z;
2456 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2457 {
2458 int tidx = 0;
2459 unsigned char *npc = cur->pc + immv;
2460 if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
2461 {
2462 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2463 __LINE__, (int) opcode);
2464 DELETE_CURCTX ();
2465 break;
2466 }
2467 if (is_after_ret (npc))
2468 break;
2469 while (npc > targets[tidx])
2470 tidx += 1;
2471 if (npc != targets[tidx])
2472 {
2473 if (ntrg < MAXTRGTS)
2474 {
2475 for (int i = 0; i < nctx; i++)
2476 if (buf[i].tidx >= tidx)
2477 buf[i].tidx++;
2478
2479 /* insert a new target */
2480 for (int i = ntrg; i > tidx; i--)
2481 targets[i] = targets[i - 1];
2482 ntrg += 1;
2483 targets[tidx++] = npc;
2484 }
2485 else
2486 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n",
2487 __LINE__, ntrg);
2488 struct AdvWalkContext *new = buf + nctx;
2489 nctx += 1;
2490 __collector_memcpy (new, cur, sizeof (*new));
2491 new->pc = npc;
2492 new->tidx = tidx;
2493 cur = new; /* advance the new context first */
2494 continue;
2495 }
2496 }
2497 else
2498 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n",
2499 __LINE__, ntrg);
2500 break;
2501 case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */
2502 case 0x94: case 0x95: case 0x96: case 0x97:
2503 case 0x98: case 0x99: case 0x9a: case 0x9b:
2504 case 0x9c: case 0x9d: case 0x9e: case 0x9f:
2505 cur->pc = check_modrm (cur->pc);
2506 break;
2507 case 0xa0: /* push fs */
2508 cur->sp -= 1;
2509 break;
2510 case 0xa1: /* pop fs */
2511 cur->sp += 1;
2512 if (cur->sp - RED_ZONE > cur->sp_safe)
2513 cur->sp_safe = cur->sp - RED_ZONE;
2514 break;
2515 case 0xa2: /* cpuid */
2516 break;
2517 case 0xa3: /* bt Ev,Gv */
2518 cur->pc = check_modrm (cur->pc);
2519 break;
2520 case 0xa4: /* shld Ev,Gv,Ib */
2521 cur->pc = check_modrm (cur->pc);
2522 cur->pc += 1;
2523 break;
2524 case 0xa5: /* shld Ev,Gv,%cl */
2525 cur->pc = check_modrm (cur->pc);
2526 break;
2527 case 0xa8: /* push gs */
2528 cur->sp -= 1;
2529 break;
2530 case 0xa9: /* pop gs */
2531 cur->sp += 1;
2532 if (cur->sp - RED_ZONE > cur->sp_safe)
2533 cur->sp_safe = cur->sp - RED_ZONE;
2534 break;
2535 case 0xaa: /* rsm */
2536 break;
2537 case 0xab: /* bts Ev,Gv */
2538 cur->pc = check_modrm (cur->pc);
2539 break;
2540 case 0xac: /* shrd Ev,Gv,Ib */
2541 cur->pc = check_modrm (cur->pc);
2542 cur->pc += 1;
2543 break;
2544 case 0xad: /* shrd Ev,Gv,%cl */
2545 cur->pc = check_modrm (cur->pc);
2546 break;
2547 case 0xae: /* group15 */
2548 cur->pc = check_modrm (cur->pc);
2549 break;
2550 case 0xaf: /* imul Gv,Ev */
2551 cur->pc = check_modrm (cur->pc);
2552 break;
2553 case 0xb1: /* cmpxchg Ev,Gv */
2554 cur->pc = check_modrm (cur->pc);
2555 break;
2556 case 0xb3:
2557 case 0xb6: /* movzx Gv,Eb */
2558 case 0xb7: /* movzx Gv,Ew */
2559 cur->pc = check_modrm (cur->pc);
2560 break;
2561 case 0xba: /* group8 Ev,Ib */
2562 cur->pc = check_modrm (cur->pc);
2563 cur->pc += 1;
2564 break;
2565 case 0xbb: /* btc Ev,Gv */
2566 case 0xbc: /* bsf Gv,Ev */
2567 case 0xbd: /* bsr Gv,Ev */
2568 cur->pc = check_modrm (cur->pc);
2569 break;
2570 case 0xbe: /* movsx Gv,Eb */
2571 case 0xbf: /* movsx Gv,Ew */
2572 cur->pc = check_modrm (cur->pc);
2573 break;
2574 case 0xc0: /* xadd Eb,Gb */
2575 case 0xc1: /* xadd Ev,Gv */
2576 cur->pc = check_modrm (cur->pc);
2577 break;
2578 case 0xc2: /* cmpps V,W,Ib */
2579 cur->pc = check_modrm (cur->pc);
2580 cur->pc += 1;
2581 break;
2582 case 0xc3: /* movnti M,G */
2583 cur->pc = check_modrm (cur->pc);
2584 break;
2585 case 0xc6: /* shufps V,W,Ib */
2586 cur->pc = check_modrm (cur->pc);
2587 cur->pc += 1;
2588 break;
2589 case 0xc7: /* RDRAND */
2590 cur->pc = check_modrm (cur->pc);
2591 break;
2592 case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */
2593 case 0xcc: case 0xcd: case 0xce: case 0xcf:
2594 break;
2595 case 0xd0: case 0xd1: case 0xd2: case 0xd3:
2596 case 0xd4: case 0xd5: case 0xd6: case 0xd7:
2597 case 0xd8: case 0xd9: case 0xda: case 0xdb:
2598 case 0xdc: case 0xdd: case 0xde: case 0xdf:
2599 case 0xe0: case 0xe1: case 0xe2: case 0xe3:
2600 case 0xe4: case 0xe5: case 0xe6: case 0xe7:
2601 case 0xe8: case 0xe9: case 0xea: case 0xeb:
2602 case 0xec: case 0xed: case 0xee: case 0xef:
2603 case 0xf0: case 0xf1: case 0xf2: case 0xf3:
2604 case 0xf4: case 0xf5: case 0xf6: case 0xf7:
2605 case 0xf8: case 0xf9: case 0xfa: case 0xfb:
2606 case 0xfc: case 0xfd: case 0xfe: case 0xff:
2607 cur->pc = check_modrm (cur->pc);
2608 break;
2609 default:
2610 if (jmp_reg_switch_mode == 1 && extop == 0x0b)
2611 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n",
2612 __LINE__, (int) extop, jmp_reg_switch_mode);
2613 else
2614 {
2615 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n",
2616 __LINE__, (int) extop, jmp_reg_switch_mode);
2617 DELETE_CURCTX ();
2618 }
2619 break;
2620 }
2621 break;
2622 case 0x10: /* adc Eb,Gb */
2623 case 0x11: /* adc Ev,Gv */
2624 case 0x12: /* adc Gb,Eb */
2625 case 0x13: /* adc Gv,Ev */
2626 cur->pc = check_modrm (cur->pc);
2627 break;
2628 case 0x14: /* adc %al,Ib */
2629 cur->pc += 1;
2630 break;
2631 case 0x15: /* adc %eax,Iz */
2632 cur->pc += z;
2633 break;
2634 case 0x16: /* push ss */
2635 cur->sp -= 1;
2636 break;
2637 case 0x17: /* pop ss */
2638 cur->sp += 1;
2639 if (cur->sp - RED_ZONE > cur->sp_safe)
2640 cur->sp_safe = cur->sp - RED_ZONE;
2641 break;
2642 case 0x18: /* sbb Eb,Gb */
2643 case 0x19: /* sbb Ev,Gv */
2644 case 0x1a: /* sbb Gb,Eb */
2645 case 0x1b: /* sbb Gv,Ev */
2646 cur->pc = check_modrm (cur->pc);
2647 break;
2648 case 0x1c: /* sbb %al,Ib */
2649 cur->pc += 1;
2650 break;
2651 case 0x1d: /* sbb %eax,Iz */
2652 cur->pc += z;
2653 break;
2654 case 0x1e: /* push ds */
2655 cur->sp -= 1;
2656 break;
2657 case 0x1f: /* pop ds */
2658 cur->sp += 1;
2659 if (cur->sp - RED_ZONE > cur->sp_safe)
2660 cur->sp_safe = cur->sp - RED_ZONE;
2661 break;
2662 case 0x20: /* and Eb,Gb */
2663 case 0x21: /* and Ev,Gv */
2664 case 0x22: /* and Gb,Eb */
2665 case 0x23: /* and Gv,Ev */
2666 cur->pc = check_modrm (cur->pc);
2667 break;
2668 case 0x24: /* and %al,Ib */
2669 cur->pc += 1;
2670 break;
2671 case 0x25: /* and %eax,Iz */
2672 cur->pc += z;
2673 break;
2674 case 0x26: /* seg=es prefix */
2675 break;
2676 case 0x27: /* daa */
2677 break;
2678 case 0x28: /* sub Eb,Gb */
2679 case 0x29: /* sub Ev,Gv */
2680 case 0x2a: /* sub Gb,Eb */
2681 case 0x2b: /* sub Gv,Ev */
2682 cur->pc = check_modrm (cur->pc);
2683 break;
2684 case 0x2c: /* sub %al,Ib */
2685 cur->pc += 1;
2686 break;
2687 case 0x2d: /* sub %eax,Iz */
2688 cur->pc += z;
2689 break;
2690 case 0x2e: /* seg=cs prefix */
2691 break;
2692 case 0x2f: /* das */
2693 break;
2694 case 0x30: /* xor Eb,Gb */
2695 case 0x31: /* xor Ev,Gv */
2696 case 0x32: /* xor Gb,Eb */
2697 case 0x33: /* xor Gv,Ev */
2698 cur->pc = check_modrm (cur->pc);
2699 break;
2700 case 0x34: /* xor %al,Ib */
2701 cur->pc += 1;
2702 break;
2703 case 0x35: /* xor %eax,Iz */
2704 cur->pc += z;
2705 break;
2706 case 0x36: /* seg=ss prefix */
2707 break;
2708 case 0x37: /* aaa */
2709 break;
2710 case 0x38: /* cmp Eb,Gb */
2711 case 0x39: /* cmp Ev,Gv */
2712 case 0x3a: /* cmp Gb,Eb */
2713 case 0x3b: /* cmp Gv,Ev */
2714 cur->pc = check_modrm (cur->pc);
2715 break;
2716 case 0x3c: /* cmp %al,Ib */
2717 cur->pc += 1;
2718 break;
2719 case 0x3d: /* cmp %eax,Iz */
2720 cur->pc += z;
2721 break;
2722 case 0x3e: /* seg=ds prefix */
2723 break;
2724 case 0x3f: /* aas */
2725 break;
2726 #if WSIZE(32)
2727 case 0x40: /* inc %eax */
2728 case 0x41: /* inc %ecx */
2729 case 0x42: /* inc %edx */
2730 case 0x43: /* inc %ebx */
2731 break;
2732 case 0x44: /* inc %esp */
2733 /* Can't be a valid stack pointer - delete context */
2734 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__);
2735 DELETE_CURCTX ();
2736 break;
2737 case 0x45: /* inc %ebp */
2738 case 0x46: /* inc %esi */
2739 case 0x47: /* inc %edi */
2740 case 0x48: /* dec %eax */
2741 case 0x49: /* dec %ecx */
2742 case 0x4a: /* dec %edx */
2743 case 0x4b: /* dec %ebx */
2744 break;
2745 case 0x4c: /* dec %esp */
2746 /* Can't be a valid stack pointer - delete context */
2747 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__);
2748 DELETE_CURCTX ();
2749 break;
2750 case 0x4d: /* dec %ebp */
2751 case 0x4e: /* dec %esi */
2752 case 0x4f: /* dec %edi */
2753 break;
2754 #endif
2755 case 0x50: /* push %eax */
2756 case 0x51: /* push %ecx */
2757 case 0x52: /* push %edx */
2758 case 0x53: /* push %ebx */
2759 case 0x54: /* push %esp */
2760 case 0x55: /* push %ebp */
2761 case 0x56: /* push %esi */
2762 case 0x57: /* push %edi */
2763 cur->sp -= 1;
2764 reg = OPC_REG (opcode);
2765 if (reg == RBP)
2766 {
2767 #if 0
2768 /* Don't do this check yet. Affects tail calls. */
2769 /* avoid other function's prologue */
2770 if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) ||
2771 (cur->pc[0] == 0x8b && cur->pc[1] == 0xec))
2772 {
2773 /* mov %esp,%ebp */
2774 DELETE_CURCTX ();
2775 break;
2776 }
2777 #endif
2778 if (cur->fp_loc == NULL)
2779 {
2780 cur->fp_loc = cur->sp;
2781 cur->fp_sav = cur->fp;
2782 }
2783 }
2784 break;
2785 case 0x58: /* pop %eax */
2786 case 0x59: /* pop %ecx */
2787 case 0x5a: /* pop %edx */
2788 case 0x5b: /* pop %ebx */
2789 case 0x5c: /* pop %esp */
2790 case 0x5d: /* pop %ebp */
2791 case 0x5e: /* pop %esi */
2792 case 0x5f: /* pop %edi */
2793 reg = OPC_REG (opcode);
2794 cur->regs[reg] = 0;
2795 if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase))
2796 cur->regs[reg] = *cur->sp;
2797 DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n",
2798 __LINE__, reg, (unsigned long) cur->regs[reg]);
2799 if (reg == RDX)
2800 {
2801 if (cur->sp >= cur->sp_safe &&
2802 (unsigned long) cur->sp < wctx->sbase)
2803 cur->rdx = *cur->sp;
2804 }
2805 else if (reg == RBP)
2806 {
2807 if (cur->fp_loc == cur->sp)
2808 {
2809 cur->fp = cur->fp_sav;
2810 cur->fp_loc = NULL;
2811 }
2812 else if (cur->sp >= cur->sp_safe &&
2813 (unsigned long) cur->sp < wctx->sbase)
2814 cur->fp = (unsigned long*) (*cur->sp);
2815 }
2816 else if (reg == RSP)
2817 {
2818 /* f.e. JVM I2CAdapter */
2819 if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
2820 {
2821 unsigned long *nsp = (unsigned long*) (*cur->sp);
2822 if (nsp >= cur->sp && nsp <= cur->fp)
2823 {
2824 cur->sp = nsp;
2825 }
2826 else
2827 {
2828 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address, opcode=0x%02x\n",
2829 __LINE__, opcode);
2830 goto checkFP;
2831 }
2832 }
2833 else
2834 {
2835 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode=0x%02x\n",
2836 __LINE__, opcode);
2837 goto checkFP;
2838 }
2839 break;
2840 }
2841 cur->sp += 1;
2842 if (cur->sp - RED_ZONE > cur->sp_safe)
2843 {
2844 cur->sp_safe = cur->sp - RED_ZONE;
2845 }
2846 break;
2847 case 0x60: /* pusha(d) */
2848 cur->sp -= 8;
2849 break;
2850 case 0x61: /* popa(d) */
2851 cur->sp += 8;
2852 if (cur->sp - RED_ZONE > cur->sp_safe)
2853 cur->sp_safe = cur->sp - RED_ZONE;
2854 break;
2855 case 0x62: /* group AVX, 4-bytes EVEX prefix */
2856 {
2857 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
2858 int len = parse_x86_AVX_instruction (pc);
2859 if (len < 4)
2860 {
2861 DELETE_CURCTX ();
2862 }
2863 else
2864 {
2865 pc += len;
2866 cur->pc = pc;
2867 }
2868 }
2869 break;
2870 case 0x63: /* arpl Ew,Gw (32) movsxd Gv,Ev (64)*/
2871 cur->pc = check_modrm (cur->pc);
2872 break;
2873 case 0x64: /* seg=fs prefix */
2874 case 0x65: /* seg=gs prefix */
2875 break;
2876 case 0x66: /* opd size override */
2877 case 0x67: /* addr size override */
2878 break;
2879 case 0x68: /* push Iz */
2880 cur->sp = (unsigned long*) ((long) cur->sp - z);
2881 cur->pc += z;
2882 break;
2883 case 0x69: /* imul Gv,Ev,Iz */
2884 cur->pc = check_modrm (cur->pc);
2885 cur->pc += z;
2886 break;
2887 case 0x6a: /* push Ib */
2888 cur->sp = (unsigned long*) ((long) cur->sp - v);
2889 cur->pc += 1;
2890 break;
2891 case 0x6b: /* imul Gv,Ev,Ib */
2892 cur->pc = check_modrm (cur->pc);
2893 cur->pc += 1;
2894 break;
2895 case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2896 cur->pc = check_modrm (cur->pc);
2897 break;
2898 case 0x70: /* jo Jb */
2899 case 0x71: /* jno Jb */
2900 case 0x72: /* jb Jb */
2901 case 0x73: /* jnb Jb */
2902 case 0x74: /* jz Jb */
2903 case 0x75: /* jnz Jb */
2904 case 0x76: /* jna Jb */
2905 case 0x77: /* ja Jb */
2906 case 0x78: /* js Jb */
2907 case 0x79: /* jns Jb */
2908 case 0x7a: /* jp Jb */
2909 case 0x7b: /* jnp Jb */
2910 case 0x7c: /* jl Jb */
2911 case 0x7d: /* jge Jb */
2912 case 0x7e: /* jle Jb */
2913 case 0x7f: /* jg Jb */
2914 imm8 = *(char*) cur->pc++;
2915 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2916 {
2917 int tidx = 0;
2918 unsigned char *npc = cur->pc + imm8;
2919 if (is_after_ret (npc))
2920 break;
2921 while (npc > targets[tidx])
2922 tidx += 1;
2923 if (npc != targets[tidx])
2924 {
2925 if (ntrg < MAXTRGTS)
2926 {
2927 for (int i = 0; i < nctx; i++)
2928 if (buf[i].tidx >= tidx)
2929 buf[i].tidx++;
2930
2931 /* insert a new target */
2932 for (int i = ntrg; i > tidx; i--)
2933 targets[i] = targets[i - 1];
2934 ntrg += 1;
2935 targets[tidx++] = npc;
2936 }
2937 else
2938 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg(%d)=max\n", __LINE__, ntrg);
2939 struct AdvWalkContext *new = buf + nctx;
2940 nctx += 1;
2941 __collector_memcpy (new, cur, sizeof (*new));
2942 new->pc = npc;
2943 new->tidx = tidx;
2944 cur = new; /* advance the new context first */
2945 continue;
2946 }
2947 }
2948 else
2949 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx(%d)=max\n", __LINE__, nctx);
2950 break;
2951 case 0x80: /* group1 Eb,Ib */
2952 cur->pc = check_modrm (cur->pc);
2953 cur->pc += 1;
2954 break;
2955 case 0x81: /* group1 Ev,Iz */
2956 modrm = *cur->pc;
2957 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2958 {
2959 int immz = read_int (cur->pc + 1, z);
2960 extop = MRM_EXT (modrm);
2961 if (extop == 0) /* add imm32,%esp */
2962 cur->sp = (unsigned long*) ((long) cur->sp + immz);
2963 else if (extop == 4) /* and imm32,%esp */
2964 cur->sp = (unsigned long*) ((long) cur->sp & immz);
2965 else if (extop == 5) /* sub imm32,%esp */
2966 cur->sp = (unsigned long*) ((long) cur->sp - immz);
2967 if (cur->sp - RED_ZONE > cur->sp_safe)
2968 cur->sp_safe = cur->sp - RED_ZONE;
2969 }
2970 cur->pc = check_modrm (cur->pc);
2971 cur->pc += z;
2972 break;
2973 case 0x82: /* group1 Eb,Ib */
2974 cur->pc = check_modrm (cur->pc);
2975 cur->pc += 1;
2976 break;
2977 case 0x83: /* group1 Ev,Ib */
2978 modrm = *cur->pc;
2979 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2980 {
2981 imm8 = (char) cur->pc[1]; /* sign extension */
2982 extop = MRM_EXT (modrm);
2983 if (extop == 0) /* add imm8,%esp */
2984 cur->sp = (unsigned long*) ((long) cur->sp + imm8);
2985 else if (extop == 4) /* and imm8,%esp */
2986 cur->sp = (unsigned long*) ((long) cur->sp & imm8);
2987 else if (extop == 5) /* sub imm8,%esp */
2988 cur->sp = (unsigned long*) ((long) cur->sp - imm8);
2989 if (cur->sp - RED_ZONE > cur->sp_safe)
2990 cur->sp_safe = cur->sp - RED_ZONE;
2991 }
2992 cur->pc = check_modrm (cur->pc);
2993 cur->pc += 1;
2994 break;
2995 case 0x84: /* test Eb,Gb */
2996 case 0x85: /* test Ev,Gv */
2997 case 0x86: /* xchg Eb,Gb */
2998 case 0x87: /* xchg Ev,Gv */
2999 cur->pc = check_modrm (cur->pc);
3000 break;
3001 case 0x88: /* mov Eb,Gb */
3002 cur->pc = check_modrm (cur->pc);
3003 break;
3004 case 0x89: /* mov Ev,Gv */
3005 modrm = *cur->pc;
3006 if (MRM_MOD (modrm) == 0xc0)
3007 {
3008 if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3009 /* movl %esp,%ebp */
3010 cur->fp = cur->sp;
3011 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3012 { /* mov %ebp,%esp */
3013 cur->sp = cur->fp;
3014 if (cur->sp - RED_ZONE > cur->sp_safe)
3015 cur->sp_safe = cur->sp - RED_ZONE;
3016 if (wctx->fp == (unsigned long) cur->sp)
3017 cur->cval = RA_FROMFP;
3018 }
3019 }
3020 else if (MRM_MOD (modrm) == 0x80)
3021 {
3022 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3023 {
3024 if (cur->pc[1] == 0x24)
3025 { /* mov %ebp,disp32(%esp) - JVM */
3026 immv = read_int (cur->pc + 2, 4);
3027 cur->fp_loc = (unsigned long*) ((char*) cur->sp + immv);
3028 cur->fp_sav = cur->fp;
3029 }
3030 }
3031 }
3032 else if (MRM_MOD (modrm) == 0x40)
3033 {
3034 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3035 {
3036 if (cur->pc[1] == 0x24 && cur->pc[2] == 0x0)
3037 { /* movl %edx,0(%esp) */
3038 cur->ra_loc = cur->sp;
3039 cur->ra_sav = cur->rdx;
3040 }
3041 }
3042 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3043 {
3044 if (cur->pc[1] == 0x24)
3045 { /* mov %ebp,disp8(%esp) - JVM */
3046 imm8 = ((char*) (cur->pc))[2];
3047 cur->fp_loc = (unsigned long*) ((char*) cur->sp + imm8);
3048 cur->fp_sav = cur->fp;
3049 }
3050 }
3051 }
3052 else if (MRM_MOD (modrm) == 0x0)
3053 {
3054 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3055 {
3056 if (cur->pc[1] == 0x24)
3057 { /* mov %ebp,(%esp) */
3058 cur->fp_loc = cur->sp;
3059 cur->fp_sav = cur->fp;
3060 }
3061 }
3062 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3063 {
3064 if (cur->pc[1] == 0x24)
3065 { /* movl %edx,(%esp) */
3066 cur->ra_loc = cur->sp;
3067 cur->ra_sav = cur->rdx;
3068 }
3069 }
3070 }
3071 cur->pc = check_modrm (cur->pc);
3072 break;
3073 case 0x8a: /* mov Gb,Eb */
3074 cur->pc = check_modrm (cur->pc);
3075 break;
3076 case 0x8b: /* mov Gv,Ev */
3077 modrm = *cur->pc;
3078 if (MRM_MOD (modrm) == 0xc0)
3079 {
3080 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3081 /* mov %esp,%ebp */
3082 cur->fp = cur->sp;
3083 else if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3084 { /* mov %ebp,%esp */
3085 cur->sp = cur->fp;
3086 if (cur->sp - RED_ZONE > cur->sp_safe)
3087 cur->sp_safe = cur->sp - RED_ZONE;
3088 if (wctx->fp == (unsigned long) cur->sp)
3089 cur->cval = RA_FROMFP;
3090 }
3091 }
3092 else if (MRM_MOD (modrm) == 0x80)
3093 {
3094 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3095 {
3096 if (cur->pc[1] == 0x24)
3097 { /* mov disp32(%esp),%ebp */
3098 immv = read_int (cur->pc + 2, 4);
3099 unsigned long *ptr = (unsigned long*) ((char*) cur->sp + immv);
3100 if (cur->fp_loc == ptr)
3101 {
3102 cur->fp = cur->fp_sav;
3103 cur->fp_loc = NULL;
3104 }
3105 else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3106 cur->fp = (unsigned long*) (*ptr);
3107 }
3108 }
3109 }
3110 else if (MRM_MOD (modrm) == 0x40)
3111 {
3112 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3113 {
3114 if (cur->pc[1] == 0x24)
3115 { /* mov disp8(%esp),%ebp - JVM */
3116 imm8 = ((char*) (cur->pc))[2];
3117 unsigned long *ptr = (unsigned long*) ((char*) cur->sp + imm8);
3118 if (cur->fp_loc == ptr)
3119 {
3120 cur->fp = cur->fp_sav;
3121 cur->fp_loc = NULL;
3122 }
3123 else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3124 cur->fp = (unsigned long*) (*ptr);
3125 }
3126 }
3127 }
3128 else if (MRM_MOD (modrm) == 0x0)
3129 {
3130 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3131 {
3132 if (cur->pc[1] == 0x24)
3133 { /* mov (%esp),%ebp */
3134 if (cur->fp_loc == cur->sp)
3135 {
3136 cur->fp = cur->fp_sav;
3137 cur->fp_loc = NULL;
3138 }
3139 else if (cur->sp >= cur->sp_safe &&
3140 (unsigned long) cur->sp < wctx->sbase)
3141 cur->fp = (unsigned long*) *cur->sp;
3142 }
3143 }
3144 }
3145 cur->pc = check_modrm (cur->pc);
3146 break;
3147 case 0x8c: /* mov Mw,Sw */
3148 cur->pc = check_modrm (cur->pc);
3149 break;
3150 case 0x8d: /* lea Gv,M */
3151 modrm = *cur->pc;
3152 if (MRM_REGD (modrm) == RSP)
3153 {
3154 unsigned char *pc = cur->pc;
3155 // Mez: need to use always regs[RSP/RBP] instead cur->sp(or fp):
3156 cur->regs[RSP] = (unsigned long) cur->sp;
3157 cur->regs[RBP] = (unsigned long) cur->fp;
3158 cur->pc++;
3159 int mod = (modrm >> 6) & 3;
3160 int r_m = modrm & 7;
3161 long val = 0;
3162 int undefRez = 0;
3163 if (mod == 0x3)
3164 val = getRegVal (cur, MRM_REGS (modrm), &undefRez);
3165 else if (r_m == 4)
3166 { // SP or R12. Decode SIB-byte.
3167 int sib = *cur->pc++;
3168 int scale = 1 << (sib >> 6);
3169 int index = X | ((sib >> 3) & 7);
3170 int base = B | (sib & 7);
3171 if (mod == 0)
3172 {
3173 if ((base & 7) == 5)
3174 { // BP or R13
3175 if (index != 4) // SP
3176 val += getRegVal (cur, index, &undefRez) * scale;
3177 val += read_int (cur->pc, 4);
3178 cur->pc += 4;
3179 }
3180 else
3181 {
3182 val += getRegVal (cur, base, &undefRez);
3183 if (index != 4) // SP
3184 val += getRegVal (cur, index, &undefRez) * scale;
3185 }
3186 }
3187 else
3188 {
3189 val += getRegVal (cur, base, &undefRez);
3190 if (index != 4) // SP
3191 val += getRegVal (cur, index, &undefRez) * scale;
3192 if (mod == 1)
3193 {
3194 val += read_int (cur->pc, 1);
3195 cur->pc++;
3196 }
3197 else
3198 { // mod == 2
3199 val += read_int (cur->pc, 4);
3200 cur->pc += 4;
3201 }
3202 }
3203 }
3204 else if (mod == 0)
3205 {
3206 if (r_m == 5)
3207 { // BP or R13
3208 val += read_int (cur->pc, 4);
3209 cur->pc += 4;
3210 }
3211 else
3212 val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3213 }
3214 else
3215 { // mod == 1 || mod == 2
3216 val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3217 if (mod == 1)
3218 {
3219 val += read_int (cur->pc, 1);
3220 cur->pc++;
3221 }
3222 else
3223 { // mod == 2
3224 val += read_int (cur->pc, 4);
3225 cur->pc += 4;
3226 }
3227 }
3228 if (undefRez)
3229 {
3230 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx val=0x%lx\n",
3231 __LINE__, (unsigned long) cur->pc, (unsigned long) val);
3232 goto checkFP;
3233 }
3234 cur->regs[MRM_REGD (modrm)] = val;
3235 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cur->pc=0x%lx val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3236 __LINE__, (unsigned long) cur->pc, (unsigned long) val,
3237 (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3238 if (cur->pc != check_modrm (pc))
3239 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d ERROR: cur->pc=0x%lx != check_modrm(0x%lx)=0x%lx\n",
3240 __LINE__, (unsigned long) cur->pc, (unsigned long) pc,
3241 (unsigned long) check_modrm (pc));
3242 if (MRM_REGD (modrm) == RSP)
3243 {
3244 if (!isInside ((unsigned long) val, wctx->sp, wctx->sbase))
3245 {
3246 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx opcode=0x%02x val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3247 __LINE__, (unsigned long) cur->pc, opcode, (unsigned long) val,
3248 (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3249 goto checkFP;
3250 }
3251 cur->sp = (unsigned long *) val;
3252 if (cur->sp - RED_ZONE > cur->sp_safe)
3253 cur->sp_safe = cur->sp - RED_ZONE;
3254 }
3255 }
3256 else
3257 cur->pc = check_modrm (cur->pc);
3258 break;
3259 case 0x8e: /* mov Sw,Ew */
3260 cur->pc = check_modrm (cur->pc);
3261 break;
3262 case 0x8f: /* pop Ev */
3263 cur->pc = check_modrm (cur->pc);
3264 cur->sp += 1;
3265 if (cur->sp - RED_ZONE > cur->sp_safe)
3266 cur->sp_safe = cur->sp - RED_ZONE;
3267 break;
3268 case 0x90: /* nop */
3269 break;
3270 case 0x91: /* xchg %eax,%ecx */
3271 case 0x92: /* xchg %eax,%edx */
3272 case 0x93: /* xchg %eax,%ebx */
3273 case 0x94: /* xchg %eax,%esp XXXX */
3274 case 0x95: /* xchg %eax,%ebp XXXX */
3275 case 0x96: /* xchg %eax,%esi */
3276 case 0x97: /* xchg %eax,%edi */
3277 break;
3278 case 0x98: /* cbw/cwde */
3279 case 0x99: /* cwd/cwq */
3280 break;
3281 case 0x9a: /* callf Ap */
3282 if (jmp_reg_switch_mode == 1)
3283 {
3284 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3285 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3286 int rc = process_return (wctx, tmpctx);
3287 if (rc != RA_FAILURE)
3288 {
3289 if (save_ctx)
3290 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3291 return rc;
3292 }
3293 }
3294 cur->pc += 2 + a;
3295 break;
3296 case 0x9b: /* fwait */
3297 case 0x9c: /* pushf Fv */
3298 case 0x9d: /* popf Fv */
3299 case 0x9e: /* sahf */
3300 case 0x9f: /* lahf */
3301 break;
3302 case 0xa0: /* mov al,Ob */
3303 case 0xa1: /* mov eax,Ov */
3304 case 0xa2: /* mov Ob,al */
3305 case 0xa3: /* mov Ov,eax */
3306 cur->pc += a;
3307 break;
3308 case 0xa4: /* movsb Yb,Xb */
3309 case 0xa5: /* movsd Yv,Xv */
3310 case 0xa6: /* cmpsb Yb,Xb */
3311 case 0xa7: /* cmpsd Xv,Yv */
3312 break;
3313 case 0xa8: /* test al,Ib */
3314 cur->pc += 1;
3315 break;
3316 case 0xa9: /* test eax,Iz */
3317 cur->pc += z;
3318 break;
3319 case 0xaa: /* stosb Yb,%al */
3320 case 0xab: /* stosd Yv,%eax */
3321 case 0xac: /* lodsb %al,Xb */
3322 case 0xad: /* lodsd %eax,Xv */
3323 case 0xae: /* scasb %al,Yb */
3324 case 0xaf: /* scasd %eax,Yv */
3325 break;
3326 case 0xb0: /* mov %al,Ib */
3327 case 0xb1: /* mov %cl,Ib */
3328 case 0xb2: /* mov %dl,Ib */
3329 case 0xb3: /* mov %bl,Ib */
3330 case 0xb4: /* mov %ah,Ib */
3331 case 0xb5: /* mov %ch,Ib */
3332 case 0xb6: /* mov %dh,Ib */
3333 case 0xb7: /* mov %bh,Ib */
3334 cur->pc += 1;
3335 break;
3336 case 0xb8: /* mov Iv,%eax */
3337 case 0xb9: /* mov Iv,%ecx */
3338 case 0xba: /* mov Iv,%edx */
3339 case 0xbb: /* mov Iv,%ebx */
3340 case 0xbc: /* mov Iv,%esp */
3341 case 0xbd: /* mov Iv,%rbp */
3342 case 0xbe: /* mov Iv,%esi */
3343 case 0xbf: /* mov Iv,%edi */
3344 reg = OPC_REG (opcode);
3345 if (reg == RAX)
3346 cur->rax = read_int (cur->pc, v);
3347 cur->pc += v;
3348 break;
3349 case 0xc0: /* group2 Eb,Ib */
3350 case 0xc1: /* group2 Ev,Ib */
3351 cur->pc = check_modrm (cur->pc) + 1;
3352 break;
3353 case 0xc2: /* ret Iw */
3354 /* In the dynamic linker we may see that
3355 * the actual return address is at sp+immv,
3356 * while sp points to the resolved address.
3357 */
3358 {
3359 immv = read_int (cur->pc, 2);
3360 int rc = process_return (wctx, cur);
3361 if (rc != RA_FAILURE)
3362 {
3363 if (jmp_reg_switch_mode == 1)
3364 {
3365 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__);
3366 goto checkFP;
3367 }
3368 wctx->sp += immv;
3369 if (save_ctx)
3370 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3371 return rc;
3372 }
3373 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc2.\n", __LINE__);
3374 DELETE_CURCTX ();
3375 }
3376 break;
3377 case 0xc3: /* ret */
3378 {
3379 int rc = process_return (wctx, cur);
3380 if (rc != RA_FAILURE)
3381 {
3382 if (save_ctx)
3383 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3384 return rc;
3385 }
3386 if (jmp_reg_switch_mode == 1)
3387 jmp_reg_switch_pc = cur->pc;
3388 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc3.\n", __LINE__);
3389 DELETE_CURCTX ();
3390 }
3391 break;
3392 case 0xc4: /* group AVX, 3-bytes VEX prefix */
3393 {
3394 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3395 int len = parse_x86_AVX_instruction (pc);
3396 if (len < 3)
3397 DELETE_CURCTX ();
3398 else
3399 {
3400 pc += len;
3401 cur->pc = pc;
3402 }
3403 }
3404 break;
3405 case 0xc5: /* group AVX, 2-bytes VEX prefix */
3406 {
3407 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3408 int len = parse_x86_AVX_instruction (pc);
3409 if (len < 2)
3410 DELETE_CURCTX ();
3411 else
3412 {
3413 pc += len;
3414 cur->pc = pc;
3415 }
3416 }
3417 break;
3418 case 0xc6:
3419 modrm = *cur->pc;
3420 if (modrm == 0xf8) /* xabort */
3421 cur->pc += 2;
3422 else /* mov Eb,Ib */
3423 cur->pc = check_modrm (cur->pc) + 1;
3424 break;
3425 case 0xc7:
3426 modrm = *cur->pc;
3427 if (modrm == 0xf8) /* xbegin */
3428 cur->pc += v + 1;
3429 else
3430 { /* mov Ev,Iz */
3431 extop = MRM_EXT (modrm);
3432 if (extop != 0)
3433 {
3434 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xc7\n", __LINE__);
3435 goto checkFP;
3436 }
3437 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RAX)
3438 cur->rax = read_int (cur->pc + 1, z);
3439 cur->pc = check_modrm (cur->pc) + z;
3440 }
3441 break;
3442 case 0xc8: /* enter Iw,Ib */
3443 cur->pc += 3;
3444 break;
3445 case 0xc9: /* leave */
3446 /* mov %ebp,%esp */
3447 cur->sp = cur->fp;
3448 /* pop %ebp */
3449 if (cur->fp_loc == cur->sp)
3450 {
3451 cur->fp = cur->fp_sav;
3452 cur->fp_loc = NULL;
3453 }
3454 else if (cur->sp >= cur->sp_safe &&
3455 (unsigned long) cur->sp < wctx->sbase)
3456 {
3457 cur->fp = (unsigned long*) (*cur->sp);
3458 if (wctx->fp == (unsigned long) cur->sp)
3459 cur->cval = RA_FROMFP;
3460 }
3461 cur->sp += 1;
3462 if (cur->sp - RED_ZONE > cur->sp_safe)
3463 cur->sp_safe = cur->sp - RED_ZONE;
3464 break;
3465 case 0xca: /* retf Iw */
3466 cur->pc += 2; /* XXXX process return */
3467 break;
3468 case 0xcb: /* retf */
3469 break; /* XXXX process return */
3470 case 0xcc: /* int 3 */
3471 break;
3472 case 0xcd: /* int Ib */
3473 if (*cur->pc == 0x80)
3474 {
3475 if (cur->rax == __NR_exit)
3476 {
3477 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xcd.\n", __LINE__);
3478 DELETE_CURCTX ();
3479 break;
3480 }
3481 else if (cur->rax == __NR_rt_sigreturn)
3482 {
3483 if (jmp_reg_switch_mode == 1)
3484 {
3485 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0xcd\n",
3486 __LINE__);
3487 goto checkFP;
3488 }
3489 wctx->sp = (unsigned long) cur->sp;
3490 if (save_ctx)
3491 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
3492 return RA_RT_SIGRETURN;
3493 }
3494 #if WSIZE(32)
3495 else if (cur->rax == __NR_sigreturn)
3496 {
3497 if (jmp_reg_switch_mode == 1)
3498 {
3499 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode = 0xc2\n",
3500 __LINE__);
3501 goto checkFP;
3502 }
3503 wctx->sp = (unsigned long) cur->sp;
3504 if (save_ctx)
3505 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
3506 return RA_SIGRETURN;
3507 }
3508 #endif
3509 }
3510 cur->pc += 1;
3511 break;
3512 case 0xce: /* into */
3513 case 0xcf: /* iret */
3514 break;
3515 case 0xd0: /* shift group2 Eb,1 */
3516 case 0xd1: /* shift group2 Ev,1 */
3517 case 0xd2: /* shift group2 Eb,%cl */
3518 case 0xd3: /* shift group2 Ev,%cl */
3519 cur->pc = check_modrm (cur->pc);
3520 break;
3521 case 0xd4: /* aam Ib */
3522 cur->pc += 1;
3523 break;
3524 case 0xd5: /* aad Ib */
3525 cur->pc += 1;
3526 break;
3527 case 0xd6: /* falc? */
3528 break;
3529 case 0xd7:
3530 cur->pc = check_modrm (cur->pc);
3531 cur->pc++;
3532 break;
3533 case 0xd8: /* esc instructions */
3534 case 0xd9:
3535 case 0xda:
3536 case 0xdb:
3537 case 0xdc:
3538 case 0xdd:
3539 case 0xde:
3540 case 0xdf:
3541 cur->pc = check_modrm (cur->pc);
3542 break;
3543 case 0xe0: /* loopne Jb */
3544 case 0xe1: /* loope Jb */
3545 case 0xe2: /* loop Jb */
3546 case 0xe3: /* jcxz Jb */
3547 imm8 = *(char*) cur->pc++;
3548 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
3549 {
3550 int tidx = 0;
3551 unsigned char *npc = cur->pc + imm8;
3552 if (is_after_ret (npc))
3553 break;
3554 while (npc > targets[tidx])
3555 tidx += 1;
3556 if (npc != targets[tidx])
3557 {
3558 if (ntrg < MAXTRGTS)
3559 {
3560 for (int i = 0; i < nctx; i++)
3561 if (buf[i].tidx >= tidx)
3562 buf[i].tidx++;
3563 /* insert a new target */
3564 for (int i = ntrg; i > tidx; i--)
3565 targets[i] = targets[i - 1];
3566 ntrg += 1;
3567 targets[tidx++] = npc;
3568 }
3569 else
3570 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3571 struct AdvWalkContext *new = buf + nctx;
3572 nctx += 1;
3573 __collector_memcpy (new, cur, sizeof (*new));
3574 new->pc = npc;
3575 new->tidx = tidx;
3576 cur = new; /* advance the new context first */
3577 continue;
3578 }
3579 }
3580 else
3581 DprintfT (SP_DUMP_UNWIND, "unwind.c: nctx = max\n");
3582 break;
3583 case 0xe4: case 0xe5:
3584 cur->pc = check_modrm (cur->pc);
3585 cur->pc++;
3586 break;
3587 case 0xe6: case 0xe7:
3588 cur->pc++;
3589 cur->pc = check_modrm (cur->pc);
3590 break;
3591 case 0xec: case 0xed: case 0xee: case 0xef:
3592 cur->pc = check_modrm (cur->pc);
3593 break;
3594 case 0xe8: /* call Jz (f64) */
3595 {
3596 if (jmp_reg_switch_mode == 1)
3597 {
3598 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3599 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3600 int rc = process_return (wctx, tmpctx);
3601 if (rc != RA_FAILURE)
3602 {
3603 if (save_ctx)
3604 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3605 return rc;
3606 }
3607 }
3608 int immz = read_int (cur->pc, z);
3609 if (immz == 0)
3610 /* special case in PIC code */
3611 cur->sp -= 1;
3612 cur->pc += z;
3613 }
3614 break;
3615 case 0xe9: /* jump Jz */
3616 {
3617 int immz = read_int (cur->pc, z);
3618 unsigned char *npc = cur->pc + z + immz;
3619 if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
3620 {
3621 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3622 DELETE_CURCTX ();
3623 break;
3624 }
3625 int tidx = 0;
3626 while (npc > targets[tidx])
3627 tidx += 1;
3628 if (npc != targets[tidx])
3629 {
3630 if (ntrg < MAXTRGTS)
3631 {
3632 for (int i = 0; i < nctx; i++)
3633 if (buf[i].tidx >= tidx)
3634 buf[i].tidx++;
3635 /* insert a new target */
3636 for (int i = ntrg; i > tidx; i--)
3637 targets[i] = targets[i - 1];
3638 ntrg += 1;
3639 targets[tidx++] = npc;
3640 }
3641 else
3642 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3643 cur->pc = npc;
3644 cur->tidx = tidx;
3645 continue; /* advance this context first */
3646 }
3647 else
3648 {
3649 /* Delete context */
3650 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3651 DELETE_CURCTX ();
3652 }
3653 }
3654 break;
3655 case 0xeb: /* jump imm8 */
3656 {
3657 imm8 = *(char*) cur->pc++;
3658 int tidx = 0;
3659 unsigned char *npc = cur->pc + imm8;
3660 while (npc > targets[tidx])
3661 tidx += 1;
3662 if (npc != targets[tidx])
3663 {
3664 if (ntrg < MAXTRGTS)
3665 {
3666 for (int i = 0; i < nctx; i++)
3667 if (buf[i].tidx >= tidx)
3668 buf[i].tidx++;
3669 /* insert a new target */
3670 for (int i = ntrg; i > tidx; i--)
3671 targets[i] = targets[i - 1];
3672 ntrg += 1;
3673 targets[tidx++] = npc;
3674 }
3675 else
3676 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3677 cur->pc = npc;
3678 cur->tidx = tidx;
3679 continue; /* advance this context first */
3680 }
3681 else
3682 {
3683 /* Delete context */
3684 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xeb.\n", __LINE__);
3685 DELETE_CURCTX ();
3686 }
3687 }
3688 break;
3689 case 0xf0: /* lock prefix */
3690 case 0xf2: /* repne prefix */
3691 case 0xf3: /* repz prefix */
3692 break;
3693 case 0xf4: /* hlt */
3694 extop2 = *(cur->pc - 3);
3695 if (extop2 == 0x90)
3696 {
3697 // 17851712 occasional SEGV in find_i386_ret_addr in unwind.c during attach
3698 if (save_ctx)
3699 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3700 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3701 return RA_END_OF_STACK;
3702 }
3703 /* We see 'hlt' in _start. Stop analysis, revert to FP */
3704 /* A workaround for the Linux main stack */
3705 if (nctx > 1)
3706 {
3707 DELETE_CURCTX ();
3708 break;
3709 }
3710 if (cur->fp == 0)
3711 {
3712 if (jmp_reg_switch_mode == 1)
3713 {
3714 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xf4\n");
3715 goto checkFP;
3716 }
3717 cache_put (wctx, RA_EOSTCK);
3718 wctx->pc = 0;
3719 wctx->sp = 0;
3720 wctx->fp = 0;
3721 if (save_ctx)
3722 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3723 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3724 return RA_END_OF_STACK;
3725 }
3726 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xf4\n", __LINE__);
3727 goto checkFP;
3728 case 0xf5: /* cmc */
3729 break;
3730 case 0xf6: /* group3 Eb */
3731 modrm = *cur->pc;
3732 extop = MRM_EXT (modrm);
3733 cur->pc = check_modrm (cur->pc);
3734 if (extop == 0x0) /* test Ib */
3735 cur->pc += 1;
3736 break;
3737 case 0xf7: /* group3 Ev */
3738 modrm = *cur->pc;
3739 extop = MRM_EXT (modrm);
3740 cur->pc = check_modrm (cur->pc);
3741 if (extop == 0x0) /* test Iz */
3742 cur->pc += z;
3743 break;
3744 case 0xf8: /* clc */
3745 case 0xf9: /* stc */
3746 case 0xfa: /* cli */
3747 case 0xfb: /* sti */
3748 case 0xfc: /* cld */
3749 case 0xfd: /* std */
3750 break;
3751 case 0xfe: /* group4 */
3752 modrm = *cur->pc;
3753 extop = MRM_EXT (modrm);
3754 switch (extop)
3755 {
3756 case 0x0: /* inc Eb */
3757 case 0x1: /* dec Eb */
3758 cur->pc = check_modrm (cur->pc);
3759 break;
3760 case 0x7:
3761 cur->pc = check_modrm (cur->pc);
3762 break;
3763 default:
3764 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xfe %x\n",
3765 __LINE__, extop);
3766 DELETE_CURCTX ();
3767 break;
3768 }
3769 break;
3770 case 0xff: /* group5 */
3771 modrm = *cur->pc;
3772 extop = MRM_EXT (modrm);
3773 switch (extop)
3774 {
3775 case 0x0: /* inc Ev */
3776 case 0x1: /* dec Ev */
3777 cur->pc = check_modrm (cur->pc);
3778 break;
3779 case 0x2: /* calln Ev */
3780 if (jmp_reg_switch_mode == 1)
3781 {
3782 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3783 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3784 int rc = process_return (wctx, tmpctx);
3785 if (rc != RA_FAILURE)
3786 {
3787 if (save_ctx)
3788 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3789 return rc;
3790 }
3791 }
3792 cur->pc = check_modrm (cur->pc);
3793 break;
3794 case 0x3: /* callf Ep */
3795 if (jmp_reg_switch_mode == 1)
3796 {
3797 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3798 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3799 int rc = process_return (wctx, tmpctx);
3800 if (rc != RA_FAILURE)
3801 {
3802 if (save_ctx)
3803 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3804 return rc;
3805 }
3806 }
3807 cur->pc = check_modrm (cur->pc); /* XXXX */
3808 break;
3809 case 0x4: /* jumpn Ev */
3810 /* This instruction appears in PLT or
3811 * in tail call optimization.
3812 * In both cases treat it as return.
3813 * Save jump *(reg) - switch, etc, for later use when no ctx left
3814 */
3815 if (modrm == 0x25 || /* jumpn *disp32 */
3816 MRM_MOD (modrm) == 0x40 || /* jumpn byte(reg) */
3817 MRM_MOD (modrm) == 0x80) /* jumpn word(reg) */
3818 {
3819 DprintfT (SP_DUMP_UNWIND, "unwind.c: PLT or tail call: %p\n", cur->pc - 1);
3820 int rc = process_return (wctx, cur);
3821 if (rc != RA_FAILURE)
3822 {
3823 if (jmp_reg_switch_mode == 1 && total_num_jmp_reg < max_num_jmp_reg_seen)
3824 {
3825 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xff\n");
3826 goto checkFP;
3827 }
3828 if (save_ctx)
3829 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3830 return rc;
3831 }
3832 }
3833 else if (modrm != 0x24 /*ignore SIB*/) /* jumpn *(reg) or jumpn reg */
3834 {
3835 // 22846120 stack unwind does not find caller of __memcpy_ssse3_back with B64 intel-Linux
3836 /*
3837 * For now, let's deal rather narrowly with this scenario. If:
3838 * - we are in the middle of an "ff e2" instruction, and
3839 * - the next instruction is undefined ( 0f 0b == ud2 )
3840 * then test return. (Might eventually have to broaden the scope
3841 * of this fix to other registers/etc.)
3842 */
3843 if (cur->pc[0] == 0xe2 && cur->pc[1] == 0x0f && cur->pc[2] == 0x0b)
3844 {
3845 int rc = process_return_real (wctx, cur, 0);
3846 if (rc == RA_SUCCESS)
3847 {
3848 if (save_ctx)
3849 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3850 return rc;
3851 }
3852 }
3853
3854 // 22691241 shjsynprog, jsynprog core dump from find_i386_ret_addr
3855 /*
3856 * Here is another oddity. Java 9 seems to emit dynamically generated
3857 * code where a code block ends with a "jmp *reg" and then padding to a
3858 * multiple-of-16 boundary and then a bunch of 0s. In this case, let's
3859 * not continue to walk bytes since we would be walking off the end of
3860 * the instructions into ... something. Treating them as instructions
3861 * can lead to unexpected results, including SEGV.
3862 */
3863 /*
3864 * While the general problem deserves a better solution, let's look
3865 * here only for one particular case:
3866 * 0xff 0xe7 jmp *reg
3867 * nop to bring us to a multiple-of-16 boundary
3868 * 0x0000000000000a00 something that does not look like an instruction
3869 *
3870 * A different nop might be used depending on how much padding is needed
3871 * to reach that multiple-of-16 boundary. We've seen two:
3872 * 0x90 one byte
3873 * 0x0f 0x1f 0x40 0x00 four bytes
3874 */
3875 // confirm the instruction is 0xff 0xe7
3876 if (cur->pc[0] == 0xe7)
3877 {
3878 // check for correct-length nop and find next 16-byte boundary
3879 int found_nop = 0;
3880 unsigned long long *boundary = 0;
3881 switch ((((unsigned long) (cur->pc)) & 0xf))
3882 {
3883 case 0xb: // look for 4-byte nop
3884 if (*((unsigned *) (cur->pc + 1)) == 0x00401f0f)
3885 found_nop = 1;
3886 boundary = (unsigned long long *) (cur->pc + 5);
3887 break;
3888 case 0xe: // look for 1-byte nop
3889 if (cur->pc[1] == 0x90)
3890 found_nop = 1;
3891 boundary = (unsigned long long *) (cur->pc + 2);
3892 break;
3893 default:
3894 break;
3895 }
3896
3897 // if nop is found, check what's at the boundary
3898 if (found_nop && *boundary == 0x000000000a00)
3899 {
3900 DELETE_CURCTX ();
3901 break;
3902 }
3903 }
3904
3905 DprintfT (SP_DUMP_UNWIND, "unwind.c: probably PLT or tail call or switch table: %p\n",
3906 cur->pc - 1);
3907 if (num_jmp_reg < expected_num_jmp_reg)
3908 {
3909 if (jmp_reg_ctx[num_jmp_reg] == NULL)
3910 jmp_reg_ctx[num_jmp_reg] = (struct AdvWalkContext *) alloca (sizeof (*cur));
3911 if (jmp_reg_ctx[num_jmp_reg] != NULL)
3912 __collector_memcpy (jmp_reg_ctx[num_jmp_reg], cur, sizeof (*cur));
3913 }
3914 if (num_jmp_reg < expected_num_jmp_reg ||
3915 (num_jmp_reg >= expected_num_jmp_reg &&
3916 jmp_reg_ctx[expected_num_jmp_reg - 1] != NULL &&
3917 cur->pc != jmp_reg_ctx[expected_num_jmp_reg - 1]->pc))
3918 {
3919 num_jmp_reg++;
3920 total_num_jmp_reg++;
3921 }
3922 if (jmp_reg_switch_mode == 1 && total_num_jmp_reg >= max_num_jmp_reg_seen)
3923 {
3924 int rc = process_return_real (wctx, cur, 0);
3925 if (rc == RA_SUCCESS)
3926 {
3927 if (save_ctx)
3928 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3929 return rc;
3930 }
3931 }
3932 }
3933 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xff.\n", __LINE__);
3934 DELETE_CURCTX ();
3935 break;
3936 case 0x5: /* jmpf Ep */
3937 cur->pc = check_modrm (cur->pc); /* XXXX */
3938 break;
3939 case 0x6: /* push Ev */
3940 cur->pc = check_modrm (cur->pc);
3941 cur->sp -= 1;
3942 break;
3943 case 0x7:
3944 cur->pc = check_modrm (cur->pc); /* XXXX */
3945 if (jmp_reg_switch_mode == 1)
3946 {
3947 int rc = process_return_real (wctx, cur, 0);
3948 if (rc == RA_SUCCESS)
3949 {
3950 if (save_ctx)
3951 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3952 return rc;
3953 }
3954 }
3955 break;
3956 default:
3957 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xff %x\n",
3958 __LINE__, (int) extop);
3959 DELETE_CURCTX ();
3960 break;
3961 }
3962 break;
3963 default:
3964 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x%x\n",
3965 __LINE__, (int) opcode);
3966 DELETE_CURCTX ();
3967 break;
3968 }
3969
3970 /* switch to next context */
3971 if (++cur >= buf + nctx)
3972 cur = buf;
3973 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d switch context: cur=0x%lx(%ld) nctx=%d cnt=%d\n",
3974 __LINE__, (unsigned long) cur, (long) (cur - buf), (int) nctx, (int) cnt);
3975 }
3976
3977 checkFP:
3978 Tprintf (DBG_LT3, "find_i386_ret_addr:%d checkFP: wctx=0x%lx fp=0x%lx ln=0x%lx pc=0x%lx sbase=0x%lx sp=0x%lx tbgn=0x%lx tend=0x%lx\n",
3979 __LINE__, (unsigned long) wctx, (unsigned long) wctx->fp,
3980 (unsigned long) wctx->ln, (unsigned long) wctx->pc, (unsigned long) wctx->sbase,
3981 (unsigned long) wctx->sp, (unsigned long) wctx->tbgn, (unsigned long) wctx->tend);
3982
3983 if (jmp_reg_switch_mode == 1)
3984 { // not deal with switch cases not ending with ret
3985 if (jmp_reg_switch_backup_ctx != NULL)
3986 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
3987 DprintfT (SP_DUMP_UNWIND, "stack_unwind jmp reg mode on: pc = 0x%lx cnt = %d, nctx = %d\n", wctx->pc, cnt, nctx);
3988 }
3989
3990 unsigned long *cur_fp = cur->fp;
3991 unsigned long *cur_sp = cur->sp;
3992 if (do_walk == 0)
3993 __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
3994
3995 /* Resort to the frame pointer */
3996 if (cur->fp_loc)
3997 cur->fp = cur->fp_sav;
3998 cur->sp = cur->fp;
3999 if ((unsigned long) cur->sp >= wctx->sbase ||
4000 (unsigned long) cur->sp < wctx->sp)
4001 {
4002 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d do_walk=%d cur->sp=0x%p out of range. wctx->sbase=0x%lx wctx->sp=0x%lx wctx->pc=0x%lx\n",
4003 __LINE__, (int) do_walk, cur->sp, (unsigned long) wctx->sbase,
4004 (unsigned long) wctx->sp, (unsigned long) wctx->pc);
4005 if (do_walk == 0)
4006 {
4007 cur->sp = cur_sp;
4008 cur->fp = cur_fp;
4009 do_walk = 1;
4010 save_ctx = 1;
4011 goto startWalk;
4012 }
4013 if (save_ctx)
4014 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4015 return RA_FAILURE;
4016 }
4017
4018 unsigned long fp = *cur->sp++;
4019 if (fp <= (unsigned long) cur->sp || fp >= wctx->sbase)
4020 {
4021 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d fp=0x%016llx out of range. cur->sp=%p wctx->sbase=0x%lx wctx->pc=0x%lx\n",
4022 __LINE__, (unsigned long long) fp, cur->sp,
4023 (unsigned long) wctx->sbase, (unsigned long) wctx->pc);
4024 if (do_walk == 0)
4025 {
4026 cur->sp = cur_sp;
4027 cur->fp = cur_fp;
4028 do_walk = 1;
4029 save_ctx = 1;
4030 goto startWalk;
4031 }
4032 if (save_ctx)
4033 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4034 return RA_FAILURE;
4035 }
4036
4037 unsigned long ra = *cur->sp++;
4038 if (ra == 0)
4039 {
4040 cache_put (wctx, RA_EOSTCK);
4041 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK wctx->pc = 0x%lx\n", __LINE__, wctx->pc);
4042 if (save_ctx)
4043 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
4044 return RA_END_OF_STACK;
4045 }
4046
4047 unsigned long tbgn = wctx->tbgn;
4048 unsigned long tend = wctx->tend;
4049 if (ra < tbgn || ra >= tend)
4050 {
4051 // We do not know yet if update_map_segments is really needed
4052 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
4053 {
4054 DprintfT (SP_DUMP_UNWIND, "unwind.c: __collector_check_segment fail. wctx->pc = 0x%lx\n", wctx->pc);
4055 if (do_walk == 0)
4056 {
4057 cur->sp = cur_sp;
4058 cur->fp = cur_fp;
4059 do_walk = 1;
4060 save_ctx = 1;
4061 goto startWalk;
4062 }
4063 if (save_ctx)
4064 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4065 return RA_FAILURE;
4066 }
4067 }
4068
4069 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
4070 if (npc == 0)
4071 {
4072 DprintfT (SP_DUMP_UNWIND, "unwind.c: adjust_ret_addr fail. wctx->pc = 0x%lx\n", wctx->pc);
4073 if (do_walk == 0)
4074 {
4075 cur->sp = cur_sp;
4076 cur->fp = cur_fp;
4077 do_walk = 1;
4078 save_ctx = 1;
4079 goto startWalk;
4080 }
4081 if (save_ctx)
4082 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4083 return RA_FAILURE;
4084 }
4085 wctx->pc = npc;
4086 wctx->sp = (unsigned long) cur->sp;
4087 wctx->fp = fp;
4088 wctx->tbgn = tbgn;
4089 wctx->tend = tend;
4090
4091 if (save_ctx)
4092 {
4093 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SUCCESS);
4094 DprintfT (SP_DUMP_UNWIND, "unwind.c: cache walk context. wctx_pc_save->pc = 0x%lx\n", wctx_pc_save.pc);
4095 }
4096 return RA_SUCCESS;
4097 }
4098
4099 /*
4100 * We have the return address, but we would like to report to the user
4101 * the calling PC, which is the instruction immediately preceding the
4102 * return address. Unfortunately, x86 instructions can have variable
4103 * length. So we back up 8 bytes and try to figure out where the
4104 * calling PC starts. (FWIW, call instructions are often 5-bytes long.)
4105 */
4106 unsigned long
adjust_ret_addr(unsigned long ra,unsigned long segoff,unsigned long tend)4107 adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend)
4108 {
4109 unsigned long npc = 0;
4110 int i = segoff < 8 ? segoff : 8;
4111 for (; i > 1; i--)
4112 {
4113 unsigned char *ptr = (unsigned char*) ra - i;
4114 int z = 4;
4115 int a = 4;
4116 int done = 0;
4117 int bVal;
4118 while (!done)
4119 {
4120 bVal = getByteInstruction (ptr);
4121 if (bVal < 0)
4122 return 0;
4123 switch (bVal)
4124 {
4125 case 0x26:
4126 case 0x36:
4127 #if WSIZE(64)
4128 ptr += 1;
4129 break;
4130 #endif
4131 case 0x64:
4132 case 0x65:
4133 bVal = getByteInstruction (ptr + 1);
4134 if (bVal < 0)
4135 return 0;
4136 if (bVal == 0xe8)
4137 // a workaround for bug 16193041, assuming "call Jz" has no segment override prefix
4138 done = 1;
4139 else
4140 ptr += 1;
4141 break;
4142 case 0x66:
4143 z = 2;
4144 ptr += 1;
4145 break;
4146 case 0x67:
4147 a = 2;
4148 ptr += 1;
4149 break;
4150 default:
4151 done = 1;
4152 break;
4153 }
4154 }
4155 #if WSIZE(64)
4156 bVal = getByteInstruction (ptr);
4157 if (bVal < 0)
4158 return 0;
4159 if (bVal >= 0x40 && bVal <= 0x4f)
4160 { /* XXXX not all REX codes applicable */
4161 if (bVal & 0x8)
4162 z = 4;
4163 ptr += 1;
4164 }
4165 #endif
4166 int opcode = getByteInstruction (ptr);
4167 if (opcode < 0)
4168 return 0;
4169 ptr++;
4170 switch (opcode)
4171 {
4172 case 0xe8: /* call Jz (f64) */
4173 ptr += z;
4174 break;
4175 case 0x9a: /* callf Ap */
4176 ptr += 2 + a;
4177 break;
4178 case 0xff: /* calln Ev , callf Ep */
4179 {
4180 int extop = MRM_EXT (*ptr);
4181 if (extop == 2 || extop == 3)
4182 ptr = check_modrm (ptr);
4183 }
4184 break;
4185 default:
4186 continue;
4187 }
4188 if ((unsigned long) ptr == ra)
4189 {
4190 npc = ra - i;
4191 break;
4192 }
4193 }
4194 if (npc == 0)
4195 {
4196 unsigned char * ptr = (unsigned char *) ra;
4197 #if WSIZE(32)
4198 // test __kernel_sigreturn or __kernel_rt_sigreturn
4199 if ((ra + 7 < tend && getByteInstruction (ptr) == 0x58
4200 && getByteInstruction (ptr + 1) == 0xb8
4201 && getByteInstruction (ptr + 6) == 0xcd
4202 && getByteInstruction (ptr + 7) == 0x80) /* pop %eax; mov $NNNN, %eax; int */
4203 || (ra + 7 < tend && getByteInstruction (ptr) == 0x58
4204 && getByteInstruction (ptr + 1) == 0xb8
4205 && getByteInstruction (ptr + 6) == 0x0f
4206 && getByteInstruction (ptr + 7) == 0x05) /* pop %eax; mov $NNNN, %eax; syscall */
4207 || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4208 && getByteInstruction (ptr + 5) == 0xcd
4209 && getByteInstruction (ptr + 6) == 0x80) /* mov $NNNN, %eax; int */
4210 || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4211 && getByteInstruction (ptr + 5) == 0x0f
4212 && getByteInstruction (ptr + 6) == 0x05)) /* mov $NNNN, %eax; syscall */
4213 #else //WSIZE(64)
4214 // test __restore_rt
4215 if (ra + 8 < tend && getByteInstruction (ptr) == 0x48
4216 && getByteInstruction (ptr + 7) == 0x0f
4217 && getByteInstruction (ptr + 8) == 0x05) /* mov $NNNNNNNN, %rax; syscall */
4218 #endif
4219 {
4220 npc = ra;
4221 }
4222 }
4223 if (npc == 0 && __collector_java_mode
4224 && __collector_java_asyncgetcalltrace_loaded)
4225 { // detect jvm interpreter code for java user threads
4226 unsigned char * ptr = (unsigned char *) ra;
4227 #if WSIZE(32)
4228 // up to J170
4229 /*
4230 * ff 24 9d e0 64 02 f5 jmp *-0xafd9b20(,%ebx,4)
4231 * 8b 4e 01 movl 1(%esi),%ecx
4232 * f7 d1 notl %ecx
4233 * 8b 5d ec movl -0x14(%ebp),%ebx
4234 * c1 e1 02 shll $2,%ecx
4235 * eb d8 jmp .-0x26 [ 0x92a ]
4236 * 83 ec 08 subl $8,%esp || 8b 65 f8 movl -8(%ebp),%esp
4237 * */
4238 if (ra - 20 >= (ra - segoff) && ((*ptr == 0x83 && *(ptr + 1) == 0xec) || (*ptr == 0x8b && *(ptr + 1) == 0x65))
4239 && *(ptr - 2) == 0xeb
4240 && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4241 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4242 && *(ptr - 10) == 0xf7 && *(ptr - 9) == 0xd1
4243 && *(ptr - 13) == 0x8b && *(ptr - 12) == 0x4e
4244 && *(ptr - 20) == 0xff && *(ptr - 19) == 0x24 && *(ptr - 18) == 0x9d)
4245 {
4246 npc = ra - 20;
4247 }
4248 // J180 J190
4249 // ff 24 9d ** ** ** ** jmp *-0x*******(,%ebx,4)
4250 if (npc == 0
4251 && ra - 7 >= (ra - segoff)
4252 && *(ptr - 7) == 0xff
4253 && *(ptr - 6) == 0x24
4254 && *(ptr - 5) == 0x9d)
4255 {
4256 npc = ra - 7;
4257 }
4258 #else //WSIZE(64)
4259 // up to J170
4260 /*
4261 * 41 ff 24 da jmp *(%r10,%rbx,8)
4262 * 41 8b 4d 01 movl 1(%r13),%ecx
4263 * f7 d1 notl %ecx
4264 * 48 8b 5d d8 movq -0x28(%rbp),%rbx
4265 * c1 e1 02 shll $2,%ecx
4266 * eb cc jmp .-0x32 [ 0xd23 ]
4267 * 48 8b 65 f0 movq -0x10(%rbp),%rsp
4268 */
4269 if (ra - 19 >= (ra - segoff) && *ptr == 0x48 && ((*(ptr + 1) == 0x8b && *(ptr + 2) == 0x65) || (*(ptr + 1) == 0x83 && *(ptr + 2) == 0xec))
4270 && *(ptr - 2) == 0xeb
4271 && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4272 && *(ptr - 9) == 0x48 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4273 && *(ptr - 11) == 0xf7 && *(ptr - 10) == 0xd1
4274 && *(ptr - 15) == 0x41 && *(ptr - 14) == 0x8b && *(ptr - 13) == 0x4d
4275 && *(ptr - 19) == 0x41 && *(ptr - 18) == 0xff)
4276 npc = ra - 19;
4277 // J180 J190
4278 // 41 ff 24 da jmp *(%r10,%rbx,8)
4279 if (npc == 0
4280 && ra - 4 >= (ra - segoff)
4281 && *(ptr - 4) == 0x41
4282 && *(ptr - 3) == 0xff
4283 && *(ptr - 2) == 0x24
4284 && *(ptr - 1) == 0xda)
4285 npc = ra - 4;
4286 #endif
4287 }
4288
4289 return npc;
4290 }
4291
4292 /*
4293 * Parses AVX instruction and returns its length.
4294 * Returns 0 if parsing failed.
4295 * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
4296 */
4297 static int
parse_x86_AVX_instruction(unsigned char * pc)4298 parse_x86_AVX_instruction (unsigned char *pc)
4299 {
4300 /*
4301 * VEX prefix has a two-byte form (0xc5) and a three byte form (0xc4).
4302 * If an instruction syntax can be encoded using the two-byte form,
4303 * it can also be encoded using the three byte form of VEX.
4304 * The latter increases the length of the instruction by one byte.
4305 * This may be helpful in some situations for code alignment.
4306 *
4307 Byte 0 Byte 1 Byte 2 Byte 3
4308 (Bit Position) 7 0 7 6 5 4 0 7 6 3 2 10
4309 3-byte VEX [ 11000100 ] [ R X B | m-mmmm ] [ W | vvvv | L | pp ]
4310 7 0 7 6 3 2 10
4311 2-byte VEX [ 11000101 ] [ R | vvvv | L | pp ]
4312 7 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
4313 4-byte EVEX [ 01100010 ] [ R X B R1 0 0 m m ] [ W v v v v 1 p p ] [ z L1 L B1 V1 a a a ]
4314
4315 R: REX.R in 1's complement (inverted) form
4316 0: Same as REX.R=1 (64-bit mode only)
4317 1: Same as REX.R=0 (must be 1 in 32-bit mode)
4318
4319 X: REX.X in 1's complement (inverted) form
4320 0: Same as REX.X=1 (64-bit mode only)
4321 1: Same as REX.X=0 (must be 1 in 32-bit mode)
4322
4323 B: REX.B in 1's complement (inverted) form
4324 0: Same as REX.B=1 (64-bit mode only)
4325 1: Same as REX.B=0 (Ignored in 32-bit mode).
4326
4327 W: opcode specific (use like REX.W, or used for opcode
4328 extension, or ignored, depending on the opcode byte)
4329
4330 m-mmmm:
4331 00000: Reserved for future use (will #UD)
4332 00001: implied 0F leading opcode byte
4333 00010: implied 0F 38 leading opcode bytes
4334 00011: implied 0F 3A leading opcode bytes
4335 00100-11111: Reserved for future use (will #UD)
4336
4337 vvvv: a register specifier (in 1's complement form) or 1111 if unused.
4338
4339 L: Vector Length
4340 0: scalar or 128-bit vector
4341 1: 256-bit vector
4342
4343 pp: opcode extension providing equivalent functionality of a SIMD prefix
4344 00: None
4345 01: 66
4346 10: F3
4347 11: F2
4348 *
4349 * Example: 0xc5f877L vzeroupper
4350 * VEX prefix: 0xc5 0x77
4351 * Opcode: 0xf8
4352 *
4353 */
4354 int len = 0;
4355 disassemble_info dis_info;
4356 dis_info.arch = bfd_arch_i386;
4357 dis_info.mach = bfd_mach_x86_64;
4358 dis_info.flavour = bfd_target_unknown_flavour;
4359 dis_info.endian = BFD_ENDIAN_UNKNOWN;
4360 dis_info.endian_code = dis_info.endian;
4361 dis_info.octets_per_byte = 1;
4362 dis_info.disassembler_needs_relocs = FALSE;
4363 dis_info.fprintf_func = fprintf_func;
4364 dis_info.fprintf_styled_func = fprintf_styled_func;
4365 dis_info.stream = NULL;
4366 dis_info.disassembler_options = NULL;
4367 dis_info.read_memory_func = read_memory_func;
4368 dis_info.memory_error_func = memory_error_func;
4369 dis_info.print_address_func = print_address_func;
4370 dis_info.symbol_at_address_func = symbol_at_address_func;
4371 dis_info.symbol_is_valid = symbol_is_valid;
4372 dis_info.display_endian = BFD_ENDIAN_UNKNOWN;
4373 dis_info.symtab = NULL;
4374 dis_info.symtab_size = 0;
4375 dis_info.buffer_vma = 0;
4376 dis_info.buffer = pc;
4377 dis_info.buffer_length = 8;
4378
4379 disassembler_ftype disassemble = print_insn_i386;
4380 if (disassemble == NULL)
4381 {
4382 DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction ERROR: unsupported disassemble\n");
4383 return 0;
4384 }
4385 len = disassemble (0, &dis_info);
4386 DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction: returned %d pc: %p\n", len, pc);
4387 return len;
4388 }
4389
4390 /*
4391 * In the Intel world, a stack frame looks like this:
4392 *
4393 * %fp0->| |
4394 * |-------------------------------|
4395 * | Args to next subroutine |
4396 * |-------------------------------|-\
4397 * %sp0->| One word struct-ret address | |
4398 * |-------------------------------| > minimum stack frame (8 bytes)
4399 * | Previous frame pointer (%fp0)| |
4400 * %fp1->|-------------------------------|-/
4401 * | Local variables |
4402 * %sp1->|-------------------------------|
4403 *
4404 */
4405
4406 int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)4407 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4408 {
4409 long *lbuf = (long*) buf;
4410 int lsize = size / sizeof (long);
4411 int ind = 0;
4412 int do_walk = 1;
4413 int extra_frame = 0;
4414 if (mode & FRINFO_NO_WALK)
4415 do_walk = 0;
4416 if ((mode & 0xffff) == FRINFO_FROM_STACK)
4417 extra_frame = 1;
4418
4419 /*
4420 * trace the stack frames from user stack.
4421 * We are assuming that the frame pointer and return address
4422 * are null when we are at the top level.
4423 */
4424 struct WalkContext wctx;
4425 wctx.pc = GET_PC (context);
4426 wctx.sp = GET_SP (context);
4427 wctx.fp = GET_FP (context);
4428 wctx.ln = (unsigned long) context->uc_link;
4429 unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4430 if (sbase && *sbase > wctx.sp)
4431 wctx.sbase = *sbase;
4432 else
4433 {
4434 wctx.sbase = wctx.sp + 0x100000;
4435 if (wctx.sbase < wctx.sp) /* overflow */
4436 wctx.sbase = (unsigned long) - 1;
4437 }
4438 // We do not know yet if update_map_segments is really needed
4439 __collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0);
4440
4441 for (;;)
4442 {
4443 if (ind >= lsize || wctx.pc == 0)
4444 break;
4445 if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4446 {
4447 lbuf[0] = wctx.pc;
4448 if (ind == 0)
4449 {
4450 ind++;
4451 if (ind >= lsize)
4452 break;
4453 }
4454 }
4455 if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4456 {
4457 lbuf[ind++] = wctx.pc;
4458 if (ind >= lsize)
4459 break;
4460 }
4461
4462 for (;;)
4463 {
4464 if (eptr != NULL && wctx.sp >= (unsigned long) eptr)
4465 {
4466 ind = ind >= 2 ? ind - 2 : 0;
4467 goto exit;
4468 }
4469 int ret = find_i386_ret_addr (&wctx, do_walk);
4470 DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d find_i386_ret_addr returns %d\n", __LINE__, ret);
4471 if (ret == RA_FAILURE)
4472 {
4473 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4474 goto exit;
4475 }
4476
4477 if (ret == RA_END_OF_STACK)
4478 goto exit;
4479 #if WSIZE(32)
4480 if (ret == RA_RT_SIGRETURN)
4481 {
4482 struct SigFrame
4483 {
4484 unsigned long arg0;
4485 unsigned long arg1;
4486 unsigned long arg2;
4487 } *sframe = (struct SigFrame*) wctx.sp;
4488 ucontext_t *ncontext = (ucontext_t*) sframe->arg2;
4489 wctx.pc = GET_PC (ncontext);
4490 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4491 {
4492 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4493 goto exit;
4494 }
4495 unsigned long nsp = GET_SP (ncontext);
4496 /* Check the new stack pointer */
4497 if (nsp <= sframe->arg2 || nsp > sframe->arg2 + sizeof (ucontext_t) + 1024)
4498 {
4499 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4500 goto exit;
4501 }
4502 wctx.sp = nsp;
4503 wctx.fp = GET_FP (ncontext);
4504 break;
4505 }
4506 else if (ret == RA_SIGRETURN)
4507 {
4508 struct sigcontext *sctx = (struct sigcontext*) wctx.sp;
4509 wctx.pc = sctx->eip;
4510 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4511 {
4512 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4513 goto exit;
4514 }
4515 wctx.sp = sctx->esp;
4516 wctx.fp = sctx->ebp;
4517 break;
4518 }
4519 #elif WSIZE(64)
4520 if (ret == RA_RT_SIGRETURN)
4521 {
4522 ucontext_t *ncontext = (ucontext_t*) wctx.sp;
4523 wctx.pc = GET_PC (ncontext);
4524 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4525 {
4526 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4527 goto exit;
4528 }
4529 unsigned long nsp = GET_SP (ncontext);
4530 /* Check the new stack pointer */
4531 if (nsp <= wctx.sp || nsp > wctx.sp + sizeof (ucontext_t) + 1024)
4532 {
4533 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4534 goto exit;
4535 }
4536 wctx.sp = nsp;
4537 wctx.fp = GET_FP (ncontext);
4538 break;
4539 }
4540 #endif /* WSIZE() */
4541 if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4542 {
4543 lbuf[0] = wctx.pc;
4544 if (ind == 0)
4545 {
4546 ind++;
4547 if (ind >= lsize)
4548 break;
4549 }
4550 }
4551 if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4552 {
4553 lbuf[ind++] = wctx.pc;
4554 if (ind >= lsize)
4555 goto exit;
4556 }
4557 }
4558 }
4559
4560 exit:
4561 #if defined(DEBUG)
4562 if ((SP_DUMP_UNWIND & __collector_tracelevel) != 0)
4563 {
4564 DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d found %d frames\n\n", __LINE__, ind);
4565 for (int i = 0; i < ind; i++)
4566 DprintfT (SP_DUMP_UNWIND, " %3d: 0x%lx\n", i, (unsigned long) lbuf[i]);
4567 }
4568 #endif
4569 dump_stack (__LINE__);
4570 if (ind >= lsize)
4571 {
4572 ind = lsize - 1;
4573 lbuf[ind++] = (unsigned long) SP_TRUNC_STACK_MARKER;
4574 }
4575 return ind * sizeof (long);
4576 }
4577
4578 #elif ARCH(Aarch64)
4579
4580 static int
stack_unwind(char * buf,int size,void * bptr,void * eptr,ucontext_t * context,int mode)4581 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4582 {
4583 if (buf && bptr && eptr && context && size + mode > 0)
4584 getByteInstruction ((unsigned char *) eptr);
4585 int ind = 0;
4586 __u64 *lbuf = (void *) buf;
4587 int lsize = size / sizeof (__u64);
4588 __u64 pc = context->uc_mcontext.pc;
4589 __u64 sp = context->uc_mcontext.sp;
4590 __u64 stack_base;
4591 unsigned long tbgn = 0;
4592 unsigned long tend = 0;
4593
4594 unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4595 if (sbase && *sbase > sp)
4596 stack_base = *sbase;
4597 else
4598 {
4599 stack_base = sp + 0x100000;
4600 if (stack_base < sp) // overflow
4601 stack_base = (__u64) -1;
4602 }
4603 DprintfT (SP_DUMP_UNWIND,
4604 "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx stack_base=0x%llx\n",
4605 __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp,
4606 (unsigned long long) stack_base);
4607
4608 while (sp && pc)
4609 {
4610 DprintfT (SP_DUMP_UNWIND,
4611 "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx\n",
4612 __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp);
4613 // Dl_info dlinfo;
4614 // if (!dladdr ((void *) pc, &dlinfo))
4615 // break;
4616 // DprintfT (SP_DUMP_UNWIND, "%2d: %llx <%s+%llu> (%s)\n",
4617 // ind, (unsigned long long) pc,
4618 // dlinfo.dli_sname ? dlinfo.dli_sname : "(?)",
4619 // (unsigned long long) pc - (unsigned long long) dlinfo.dli_saddr,
4620 // dlinfo.dli_fname);
4621 lbuf[ind++] = pc;
4622 if (ind >= lsize || sp >= stack_base || (sp & 15) != 0)
4623 break;
4624 if (pc < tbgn || pc >= tend)
4625 if (!__collector_check_segment ((unsigned long) pc, &tbgn, &tend, 0))
4626 {
4627 DprintfT (SP_DUMP_UNWIND,
4628 "unwind.c:%d __collector_check_segment failed. sp=0x%lx\n",
4629 __LINE__, (unsigned long) sp);
4630 break;
4631 }
4632 pc = ((__u64 *) sp)[1];
4633 __u64 old_sp = sp;
4634 sp = ((__u64 *) sp)[0];
4635 if (sp < old_sp)
4636 break;
4637 }
4638 if (ind >= lsize)
4639 {
4640 ind = lsize - 1;
4641 lbuf[ind++] = (__u64) SP_TRUNC_STACK_MARKER;
4642 }
4643 return ind * sizeof (__u64);
4644 }
4645 #endif /* ARCH() */
4646