xref: /dpdk/examples/l2fwd-keepalive/ka-agent/main.c (revision 0857b942113874c69dc3db5df11a828ee3cc9b6b)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <errno.h>
35 #include <stdio.h>
36 #include <string.h>
37 #include <stdint.h>
38 #include <errno.h>
39 #include <unistd.h>
40 #include <fcntl.h>
41 #include <sys/wait.h>
42 #include <sys/queue.h>
43 #include <sys/mman.h>
44 #include <sys/stat.h>
45 #include <time.h>
46 
47 #include <rte_keepalive.h>
48 
49 #include <shm.h>
50 
51 #define MAX_TIMEOUTS 4
52 #define SEM_TIMEOUT_SECS 2
53 
54 static struct rte_keepalive_shm *ka_shm_create(void)
55 {
56 	int fd = shm_open(RTE_KEEPALIVE_SHM_NAME, O_RDWR, 0666);
57 	size_t size = sizeof(struct rte_keepalive_shm);
58 	struct rte_keepalive_shm *shm;
59 
60 	if (fd < 0)
61 		printf("Failed to open %s as SHM:%s\n",
62 			RTE_KEEPALIVE_SHM_NAME,
63 		strerror(errno));
64 	else {
65 		shm = (struct rte_keepalive_shm *) mmap(
66 			0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
67 		close(fd);
68 		if (shm == MAP_FAILED)
69 			printf("Failed to mmap SHM:%s\n", strerror(errno));
70 		else
71 			return shm;
72 	}
73 
74 	/* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1 */
75 	shm = 0;
76 	return NULL;
77 }
78 
79 int main(void)
80 {
81 	struct rte_keepalive_shm *shm = ka_shm_create();
82 	struct timespec timeout = { .tv_nsec = 0 };
83 	int idx_core;
84 	int cnt_cores;
85 	uint64_t last_seen_alive_time = 0;
86 	uint64_t most_recent_alive_time;
87 	int cnt_timeouts = 0;
88 	int sem_errno;
89 
90 	if (shm == NULL) {
91 		printf("Unable to access shared core state\n");
92 		return 1;
93 	}
94 	while (1) {
95 		most_recent_alive_time = 0;
96 		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
97 				idx_core++)
98 			if (shm->core_last_seen_times[idx_core] >
99 					most_recent_alive_time)
100 				most_recent_alive_time =
101 					shm->core_last_seen_times[idx_core];
102 
103 		timeout.tv_sec = time(NULL) + SEM_TIMEOUT_SECS;
104 		if (sem_timedwait(&shm->core_died, &timeout) == -1) {
105 			/* Assume no core death signals and no change in any
106 			 * last-seen times is the keepalive monitor itself
107 			 * failing.
108 			 */
109 			sem_errno = errno;
110 			last_seen_alive_time = most_recent_alive_time;
111 			if (sem_errno == ETIMEDOUT) {
112 				if (last_seen_alive_time ==
113 						most_recent_alive_time &&
114 						cnt_timeouts++ >
115 						MAX_TIMEOUTS) {
116 					printf("No updates. Exiting..\n");
117 					break;
118 					}
119 			} else
120 				printf("sem_timedwait() error (%s)\n",
121 					strerror(sem_errno));
122 			continue;
123 		}
124 		cnt_timeouts = 0;
125 
126 		cnt_cores = 0;
127 		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
128 				idx_core++)
129 			if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD)
130 				cnt_cores++;
131 		if (cnt_cores == 0) {
132 			/* Can happen if core was restarted since Semaphore
133 			 * was sent, due to agent being offline.
134 			 */
135 			printf("Warning: Empty dead core report\n");
136 			continue;
137 		}
138 
139 		printf("%i dead cores: ", cnt_cores);
140 		for (idx_core = 0;
141 				idx_core < RTE_KEEPALIVE_MAXCORES;
142 				idx_core++)
143 			if (shm->core_state[idx_core] == RTE_KA_STATE_DEAD)
144 				printf("%d, ", idx_core);
145 		printf("\b\b\n");
146 	}
147 	if (munmap(shm, sizeof(struct rte_keepalive_shm)) != 0)
148 		printf("Warning: munmap() failed\n");
149 	return 0;
150 }
151