1*3998e2a0SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2*3998e2a0SBruce Richardson * Copyright(c) 2016 Intel Corporation
37b2a704cSRemy Horton */
47b2a704cSRemy Horton
57b2a704cSRemy Horton #include <stdio.h>
67b2a704cSRemy Horton #include <string.h>
77b2a704cSRemy Horton #include <stdint.h>
87b2a704cSRemy Horton #include <errno.h>
97b2a704cSRemy Horton #include <unistd.h>
107b2a704cSRemy Horton #include <fcntl.h>
117b2a704cSRemy Horton #include <sys/wait.h>
127b2a704cSRemy Horton #include <sys/queue.h>
137b2a704cSRemy Horton #include <sys/mman.h>
147b2a704cSRemy Horton #include <sys/stat.h>
157b2a704cSRemy Horton #include <time.h>
167b2a704cSRemy Horton
177b2a704cSRemy Horton #include <rte_keepalive.h>
187b2a704cSRemy Horton
197b2a704cSRemy Horton #include <shm.h>
207b2a704cSRemy Horton
217b2a704cSRemy Horton #define MAX_TIMEOUTS 4
227b2a704cSRemy Horton #define SEM_TIMEOUT_SECS 2
237b2a704cSRemy Horton
ka_shm_create(void)247b2a704cSRemy Horton static struct rte_keepalive_shm *ka_shm_create(void)
257b2a704cSRemy Horton {
267b2a704cSRemy Horton int fd = shm_open(RTE_KEEPALIVE_SHM_NAME, O_RDWR, 0666);
277b2a704cSRemy Horton size_t size = sizeof(struct rte_keepalive_shm);
287b2a704cSRemy Horton struct rte_keepalive_shm *shm;
297b2a704cSRemy Horton
307b2a704cSRemy Horton if (fd < 0)
317b2a704cSRemy Horton printf("Failed to open %s as SHM:%s\n",
327b2a704cSRemy Horton RTE_KEEPALIVE_SHM_NAME,
337b2a704cSRemy Horton strerror(errno));
347b2a704cSRemy Horton else {
357b2a704cSRemy Horton shm = (struct rte_keepalive_shm *) mmap(
367b2a704cSRemy Horton 0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
377b2a704cSRemy Horton close(fd);
387b2a704cSRemy Horton if (shm == MAP_FAILED)
397b2a704cSRemy Horton printf("Failed to mmap SHM:%s\n", strerror(errno));
407b2a704cSRemy Horton else
417b2a704cSRemy Horton return shm;
427b2a704cSRemy Horton }
437b2a704cSRemy Horton
447b2a704cSRemy Horton /* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1 */
457b2a704cSRemy Horton shm = 0;
467b2a704cSRemy Horton return NULL;
477b2a704cSRemy Horton }
487b2a704cSRemy Horton
main(void)497b2a704cSRemy Horton int main(void)
507b2a704cSRemy Horton {
517b2a704cSRemy Horton struct rte_keepalive_shm *shm = ka_shm_create();
527b2a704cSRemy Horton struct timespec timeout = { .tv_nsec = 0 };
537b2a704cSRemy Horton int idx_core;
547b2a704cSRemy Horton int cnt_cores;
557b2a704cSRemy Horton uint64_t last_seen_alive_time = 0;
567b2a704cSRemy Horton uint64_t most_recent_alive_time;
577b2a704cSRemy Horton int cnt_timeouts = 0;
587b2a704cSRemy Horton int sem_errno;
597b2a704cSRemy Horton
607b2a704cSRemy Horton if (shm == NULL) {
617b2a704cSRemy Horton printf("Unable to access shared core state\n");
627b2a704cSRemy Horton return 1;
637b2a704cSRemy Horton }
647b2a704cSRemy Horton while (1) {
657b2a704cSRemy Horton most_recent_alive_time = 0;
667b2a704cSRemy Horton for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
677b2a704cSRemy Horton idx_core++)
687b2a704cSRemy Horton if (shm->core_last_seen_times[idx_core] >
697b2a704cSRemy Horton most_recent_alive_time)
707b2a704cSRemy Horton most_recent_alive_time =
717b2a704cSRemy Horton shm->core_last_seen_times[idx_core];
727b2a704cSRemy Horton
737b2a704cSRemy Horton timeout.tv_sec = time(NULL) + SEM_TIMEOUT_SECS;
747b2a704cSRemy Horton if (sem_timedwait(&shm->core_died, &timeout) == -1) {
757b2a704cSRemy Horton /* Assume no core death signals and no change in any
767b2a704cSRemy Horton * last-seen times is the keepalive monitor itself
777b2a704cSRemy Horton * failing.
787b2a704cSRemy Horton */
797b2a704cSRemy Horton sem_errno = errno;
807b2a704cSRemy Horton last_seen_alive_time = most_recent_alive_time;
817b2a704cSRemy Horton if (sem_errno == ETIMEDOUT) {
827b2a704cSRemy Horton if (last_seen_alive_time ==
837b2a704cSRemy Horton most_recent_alive_time &&
847b2a704cSRemy Horton cnt_timeouts++ >
857b2a704cSRemy Horton MAX_TIMEOUTS) {
867b2a704cSRemy Horton printf("No updates. Exiting..\n");
877b2a704cSRemy Horton break;
887b2a704cSRemy Horton }
897b2a704cSRemy Horton } else
907b2a704cSRemy Horton printf("sem_timedwait() error (%s)\n",
917b2a704cSRemy Horton strerror(sem_errno));
927b2a704cSRemy Horton continue;
937b2a704cSRemy Horton }
947b2a704cSRemy Horton cnt_timeouts = 0;
957b2a704cSRemy Horton
967b2a704cSRemy Horton cnt_cores = 0;
977b2a704cSRemy Horton for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
987b2a704cSRemy Horton idx_core++)
997b2a704cSRemy Horton if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD)
1007b2a704cSRemy Horton cnt_cores++;
1017b2a704cSRemy Horton if (cnt_cores == 0) {
1027b2a704cSRemy Horton /* Can happen if core was restarted since Semaphore
1037b2a704cSRemy Horton * was sent, due to agent being offline.
1047b2a704cSRemy Horton */
1057b2a704cSRemy Horton printf("Warning: Empty dead core report\n");
1067b2a704cSRemy Horton continue;
1077b2a704cSRemy Horton }
1087b2a704cSRemy Horton
1097b2a704cSRemy Horton printf("%i dead cores: ", cnt_cores);
1107b2a704cSRemy Horton for (idx_core = 0;
1117b2a704cSRemy Horton idx_core < RTE_KEEPALIVE_MAXCORES;
1127b2a704cSRemy Horton idx_core++)
1137b2a704cSRemy Horton if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD)
1147b2a704cSRemy Horton printf("%d, ", idx_core);
1157b2a704cSRemy Horton printf("\b\b\n");
1167b2a704cSRemy Horton }
1177b2a704cSRemy Horton if (munmap(shm, sizeof(struct rte_keepalive_shm)) != 0)
1187b2a704cSRemy Horton printf("Warning: munmap() failed\n");
1197b2a704cSRemy Horton return 0;
1207b2a704cSRemy Horton }
121