1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <string.h> 7 #include <stdint.h> 8 #include <errno.h> 9 #include <unistd.h> 10 #include <fcntl.h> 11 #include <sys/wait.h> 12 #include <sys/queue.h> 13 #include <sys/mman.h> 14 #include <sys/stat.h> 15 #include <time.h> 16 17 #include <rte_keepalive.h> 18 19 #include <shm.h> 20 21 #define MAX_TIMEOUTS 4 22 #define SEM_TIMEOUT_SECS 2 23 24 static struct rte_keepalive_shm *ka_shm_create(void) 25 { 26 int fd = shm_open(RTE_KEEPALIVE_SHM_NAME, O_RDWR, 0666); 27 size_t size = sizeof(struct rte_keepalive_shm); 28 struct rte_keepalive_shm *shm; 29 30 if (fd < 0) 31 printf("Failed to open %s as SHM:%s\n", 32 RTE_KEEPALIVE_SHM_NAME, 33 strerror(errno)); 34 else { 35 shm = (struct rte_keepalive_shm *) mmap( 36 0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 37 close(fd); 38 if (shm == MAP_FAILED) 39 printf("Failed to mmap SHM:%s\n", strerror(errno)); 40 else 41 return shm; 42 } 43 44 /* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1 */ 45 shm = 0; 46 return NULL; 47 } 48 49 int main(void) 50 { 51 struct rte_keepalive_shm *shm = ka_shm_create(); 52 struct timespec timeout = { .tv_nsec = 0 }; 53 int idx_core; 54 int cnt_cores; 55 uint64_t last_seen_alive_time = 0; 56 uint64_t most_recent_alive_time; 57 int cnt_timeouts = 0; 58 int sem_errno; 59 60 if (shm == NULL) { 61 printf("Unable to access shared core state\n"); 62 return 1; 63 } 64 while (1) { 65 most_recent_alive_time = 0; 66 for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; 67 idx_core++) 68 if (shm->core_last_seen_times[idx_core] > 69 most_recent_alive_time) 70 most_recent_alive_time = 71 shm->core_last_seen_times[idx_core]; 72 73 timeout.tv_sec = time(NULL) + SEM_TIMEOUT_SECS; 74 if (sem_timedwait(&shm->core_died, &timeout) == -1) { 75 /* Assume no core death signals and no change in any 76 * last-seen times is the keepalive monitor itself 77 * failing. 78 */ 79 sem_errno = errno; 80 last_seen_alive_time = most_recent_alive_time; 81 if (sem_errno == ETIMEDOUT) { 82 if (last_seen_alive_time == 83 most_recent_alive_time && 84 cnt_timeouts++ > 85 MAX_TIMEOUTS) { 86 printf("No updates. Exiting..\n"); 87 break; 88 } 89 } else 90 printf("sem_timedwait() error (%s)\n", 91 strerror(sem_errno)); 92 continue; 93 } 94 cnt_timeouts = 0; 95 96 cnt_cores = 0; 97 for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; 98 idx_core++) 99 if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD) 100 cnt_cores++; 101 if (cnt_cores == 0) { 102 /* Can happen if core was restarted since Semaphore 103 * was sent, due to agent being offline. 104 */ 105 printf("Warning: Empty dead core report\n"); 106 continue; 107 } 108 109 printf("%i dead cores: ", cnt_cores); 110 for (idx_core = 0; 111 idx_core < RTE_KEEPALIVE_MAXCORES; 112 idx_core++) 113 if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD) 114 printf("%d, ", idx_core); 115 printf("\b\b\n"); 116 } 117 if (munmap(shm, sizeof(struct rte_keepalive_shm)) != 0) 118 printf("Warning: munmap() failed\n"); 119 return 0; 120 } 121