1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2016 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdio.h> 35 #include <string.h> 36 #include <stdint.h> 37 #include <errno.h> 38 #include <unistd.h> 39 #include <fcntl.h> 40 #include <sys/wait.h> 41 #include <sys/queue.h> 42 #include <sys/mman.h> 43 #include <sys/stat.h> 44 #include <time.h> 45 46 #include <rte_keepalive.h> 47 48 #include <shm.h> 49 50 #define MAX_TIMEOUTS 4 51 #define SEM_TIMEOUT_SECS 2 52 53 static struct rte_keepalive_shm *ka_shm_create(void) 54 { 55 int fd = shm_open(RTE_KEEPALIVE_SHM_NAME, O_RDWR, 0666); 56 size_t size = sizeof(struct rte_keepalive_shm); 57 struct rte_keepalive_shm *shm; 58 59 if (fd < 0) 60 printf("Failed to open %s as SHM:%s\n", 61 RTE_KEEPALIVE_SHM_NAME, 62 strerror(errno)); 63 else { 64 shm = (struct rte_keepalive_shm *) mmap( 65 0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 66 close(fd); 67 if (shm == MAP_FAILED) 68 printf("Failed to mmap SHM:%s\n", strerror(errno)); 69 else 70 return shm; 71 } 72 73 /* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1 */ 74 shm = 0; 75 return NULL; 76 } 77 78 int main(void) 79 { 80 struct rte_keepalive_shm *shm = ka_shm_create(); 81 struct timespec timeout = { .tv_nsec = 0 }; 82 int idx_core; 83 int cnt_cores; 84 uint64_t last_seen_alive_time = 0; 85 uint64_t most_recent_alive_time; 86 int cnt_timeouts = 0; 87 int sem_errno; 88 89 if (shm == NULL) { 90 printf("Unable to access shared core state\n"); 91 return 1; 92 } 93 while (1) { 94 most_recent_alive_time = 0; 95 for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; 96 idx_core++) 97 if (shm->core_last_seen_times[idx_core] > 98 most_recent_alive_time) 99 most_recent_alive_time = 100 shm->core_last_seen_times[idx_core]; 101 102 timeout.tv_sec = time(NULL) + SEM_TIMEOUT_SECS; 103 if (sem_timedwait(&shm->core_died, &timeout) == -1) { 104 /* Assume no core death signals and no change in any 105 * last-seen times is the keepalive monitor itself 106 * failing. 107 */ 108 sem_errno = errno; 109 last_seen_alive_time = most_recent_alive_time; 110 if (sem_errno == ETIMEDOUT) { 111 if (last_seen_alive_time == 112 most_recent_alive_time && 113 cnt_timeouts++ > 114 MAX_TIMEOUTS) { 115 printf("No updates. Exiting..\n"); 116 break; 117 } 118 } else 119 printf("sem_timedwait() error (%s)\n", 120 strerror(sem_errno)); 121 continue; 122 } 123 cnt_timeouts = 0; 124 125 cnt_cores = 0; 126 for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; 127 idx_core++) 128 if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD) 129 cnt_cores++; 130 if (cnt_cores == 0) { 131 /* Can happen if core was restarted since Semaphore 132 * was sent, due to agent being offline. 133 */ 134 printf("Warning: Empty dead core report\n"); 135 continue; 136 } 137 138 printf("%i dead cores: ", cnt_cores); 139 for (idx_core = 0; 140 idx_core < RTE_KEEPALIVE_MAXCORES; 141 idx_core++) 142 if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD) 143 printf("%d, ", idx_core); 144 printf("\b\b\n"); 145 } 146 if (munmap(shm, sizeof(struct rte_keepalive_shm)) != 0) 147 printf("Warning: munmap() failed\n"); 148 return 0; 149 } 150