1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2016 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <errno.h> 35 #include <stdio.h> 36 #include <string.h> 37 #include <stdint.h> 38 #include <errno.h> 39 #include <unistd.h> 40 #include <fcntl.h> 41 #include <sys/wait.h> 42 #include <sys/queue.h> 43 #include <sys/mman.h> 44 #include <sys/stat.h> 45 #include <time.h> 46 47 #include <rte_keepalive.h> 48 49 #include <shm.h> 50 51 #define MAX_TIMEOUTS 4 52 #define SEM_TIMEOUT_SECS 2 53 54 static struct rte_keepalive_shm *ka_shm_create(void) 55 { 56 int fd = shm_open(RTE_KEEPALIVE_SHM_NAME, O_RDWR, 0666); 57 size_t size = sizeof(struct rte_keepalive_shm); 58 struct rte_keepalive_shm *shm; 59 60 if (fd < 0) 61 printf("Failed to open %s as SHM:%s\n", 62 RTE_KEEPALIVE_SHM_NAME, 63 strerror(errno)); 64 else { 65 shm = (struct rte_keepalive_shm *) mmap( 66 0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 67 close(fd); 68 if (shm == MAP_FAILED) 69 printf("Failed to mmap SHM:%s\n", strerror(errno)); 70 else 71 return shm; 72 } 73 74 /* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1 */ 75 shm = 0; 76 return NULL; 77 } 78 79 int main(void) 80 { 81 struct rte_keepalive_shm *shm = ka_shm_create(); 82 struct timespec timeout = { .tv_nsec = 0 }; 83 int idx_core; 84 int cnt_cores; 85 uint64_t last_seen_alive_time = 0; 86 uint64_t most_recent_alive_time; 87 int cnt_timeouts = 0; 88 int sem_errno; 89 90 if (shm == NULL) { 91 printf("Unable to access shared core state\n"); 92 return 1; 93 } 94 while (1) { 95 most_recent_alive_time = 0; 96 for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; 97 idx_core++) 98 if (shm->core_last_seen_times[idx_core] > 99 most_recent_alive_time) 100 most_recent_alive_time = 101 shm->core_last_seen_times[idx_core]; 102 103 timeout.tv_sec = time(NULL) + SEM_TIMEOUT_SECS; 104 if (sem_timedwait(&shm->core_died, &timeout) == -1) { 105 /* Assume no core death signals and no change in any 106 * last-seen times is the keepalive monitor itself 107 * failing. 108 */ 109 sem_errno = errno; 110 last_seen_alive_time = most_recent_alive_time; 111 if (sem_errno == ETIMEDOUT) { 112 if (last_seen_alive_time == 113 most_recent_alive_time && 114 cnt_timeouts++ > 115 MAX_TIMEOUTS) { 116 printf("No updates. Exiting..\n"); 117 break; 118 } 119 } else 120 printf("sem_timedwait() error (%s)\n", 121 strerror(sem_errno)); 122 continue; 123 } 124 cnt_timeouts = 0; 125 126 cnt_cores = 0; 127 for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; 128 idx_core++) 129 if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD) 130 cnt_cores++; 131 if (cnt_cores == 0) { 132 /* Can happen if core was restarted since Semaphore 133 * was sent, due to agent being offline. 134 */ 135 printf("Warning: Empty dead core report\n"); 136 continue; 137 } 138 139 printf("%i dead cores: ", cnt_cores); 140 for (idx_core = 0; 141 idx_core < RTE_KEEPALIVE_MAXCORES; 142 idx_core++) 143 if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD) 144 printf("%d, ", idx_core); 145 printf("\b\b\n"); 146 } 147 if (munmap(shm, sizeof(struct rte_keepalive_shm)) != 0) 148 printf("Warning: munmap() failed\n"); 149 return 0; 150 } 151