xref: /dpdk/examples/l2fwd-keepalive/ka-agent/main.c (revision a599eb31f2e477674fc6176cdf989ee17432b552)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdint.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <sys/wait.h>
41 #include <sys/queue.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <time.h>
45 
46 #include <rte_keepalive.h>
47 
48 #include <shm.h>
49 
50 #define MAX_TIMEOUTS 4
51 #define SEM_TIMEOUT_SECS 2
52 
53 static struct rte_keepalive_shm *ka_shm_create(void)
54 {
55 	int fd = shm_open(RTE_KEEPALIVE_SHM_NAME, O_RDWR, 0666);
56 	size_t size = sizeof(struct rte_keepalive_shm);
57 	struct rte_keepalive_shm *shm;
58 
59 	if (fd < 0)
60 		printf("Failed to open %s as SHM:%s\n",
61 			RTE_KEEPALIVE_SHM_NAME,
62 		strerror(errno));
63 	else {
64 		shm = (struct rte_keepalive_shm *) mmap(
65 			0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
66 		close(fd);
67 		if (shm == MAP_FAILED)
68 			printf("Failed to mmap SHM:%s\n", strerror(errno));
69 		else
70 			return shm;
71 	}
72 
73 	/* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1 */
74 	shm = 0;
75 	return NULL;
76 }
77 
78 int main(void)
79 {
80 	struct rte_keepalive_shm *shm = ka_shm_create();
81 	struct timespec timeout = { .tv_nsec = 0 };
82 	int idx_core;
83 	int cnt_cores;
84 	uint64_t last_seen_alive_time = 0;
85 	uint64_t most_recent_alive_time;
86 	int cnt_timeouts = 0;
87 	int sem_errno;
88 
89 	if (shm == NULL) {
90 		printf("Unable to access shared core state\n");
91 		return 1;
92 	}
93 	while (1) {
94 		most_recent_alive_time = 0;
95 		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
96 				idx_core++)
97 			if (shm->core_last_seen_times[idx_core] >
98 					most_recent_alive_time)
99 				most_recent_alive_time =
100 					shm->core_last_seen_times[idx_core];
101 
102 		timeout.tv_sec = time(NULL) + SEM_TIMEOUT_SECS;
103 		if (sem_timedwait(&shm->core_died, &timeout) == -1) {
104 			/* Assume no core death signals and no change in any
105 			 * last-seen times is the keepalive monitor itself
106 			 * failing.
107 			 */
108 			sem_errno = errno;
109 			last_seen_alive_time = most_recent_alive_time;
110 			if (sem_errno == ETIMEDOUT) {
111 				if (last_seen_alive_time ==
112 						most_recent_alive_time &&
113 						cnt_timeouts++ >
114 						MAX_TIMEOUTS) {
115 					printf("No updates. Exiting..\n");
116 					break;
117 					}
118 			} else
119 				printf("sem_timedwait() error (%s)\n",
120 					strerror(sem_errno));
121 			continue;
122 		}
123 		cnt_timeouts = 0;
124 
125 		cnt_cores = 0;
126 		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
127 				idx_core++)
128 			if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD)
129 				cnt_cores++;
130 		if (cnt_cores == 0) {
131 			/* Can happen if core was restarted since Semaphore
132 			 * was sent, due to agent being offline.
133 			 */
134 			printf("Warning: Empty dead core report\n");
135 			continue;
136 		}
137 
138 		printf("%i dead cores: ", cnt_cores);
139 		for (idx_core = 0;
140 				idx_core < RTE_KEEPALIVE_MAXCORES;
141 				idx_core++)
142 			if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD)
143 				printf("%d, ", idx_core);
144 		printf("\b\b\n");
145 	}
146 	if (munmap(shm, sizeof(struct rte_keepalive_shm)) != 0)
147 		printf("Warning: munmap() failed\n");
148 	return 0;
149 }
150