xref: /dpdk/lib/eal/common/malloc_mp.c (revision 99a2dd955fba6e4cc23b77d590a033650ced9c45)
1*99a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
2*99a2dd95SBruce Richardson  * Copyright(c) 2018 Intel Corporation
3*99a2dd95SBruce Richardson  */
4*99a2dd95SBruce Richardson 
5*99a2dd95SBruce Richardson #include <string.h>
6*99a2dd95SBruce Richardson #include <sys/time.h>
7*99a2dd95SBruce Richardson 
8*99a2dd95SBruce Richardson #include <rte_alarm.h>
9*99a2dd95SBruce Richardson #include <rte_errno.h>
10*99a2dd95SBruce Richardson #include <rte_string_fns.h>
11*99a2dd95SBruce Richardson 
12*99a2dd95SBruce Richardson #include "eal_memalloc.h"
13*99a2dd95SBruce Richardson #include "eal_memcfg.h"
14*99a2dd95SBruce Richardson #include "eal_private.h"
15*99a2dd95SBruce Richardson 
16*99a2dd95SBruce Richardson #include "malloc_elem.h"
17*99a2dd95SBruce Richardson #include "malloc_mp.h"
18*99a2dd95SBruce Richardson 
19*99a2dd95SBruce Richardson #define MP_ACTION_SYNC "mp_malloc_sync"
20*99a2dd95SBruce Richardson /**< request sent by primary process to notify of changes in memory map */
21*99a2dd95SBruce Richardson #define MP_ACTION_ROLLBACK "mp_malloc_rollback"
22*99a2dd95SBruce Richardson /**< request sent by primary process to notify of changes in memory map. this is
23*99a2dd95SBruce Richardson  * essentially a regular sync request, but we cannot send sync requests while
24*99a2dd95SBruce Richardson  * another one is in progress, and we might have to - therefore, we do this as
25*99a2dd95SBruce Richardson  * a separate callback.
26*99a2dd95SBruce Richardson  */
27*99a2dd95SBruce Richardson #define MP_ACTION_REQUEST "mp_malloc_request"
28*99a2dd95SBruce Richardson /**< request sent by secondary process to ask for allocation/deallocation */
29*99a2dd95SBruce Richardson #define MP_ACTION_RESPONSE "mp_malloc_response"
30*99a2dd95SBruce Richardson /**< response sent to secondary process to indicate result of request */
31*99a2dd95SBruce Richardson 
32*99a2dd95SBruce Richardson /* forward declarations */
33*99a2dd95SBruce Richardson static int
34*99a2dd95SBruce Richardson handle_sync_response(const struct rte_mp_msg *request,
35*99a2dd95SBruce Richardson 		const struct rte_mp_reply *reply);
36*99a2dd95SBruce Richardson static int
37*99a2dd95SBruce Richardson handle_rollback_response(const struct rte_mp_msg *request,
38*99a2dd95SBruce Richardson 		const struct rte_mp_reply *reply);
39*99a2dd95SBruce Richardson 
40*99a2dd95SBruce Richardson #define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
41*99a2dd95SBruce Richardson 
42*99a2dd95SBruce Richardson /* when we're allocating, we need to store some state to ensure that we can
43*99a2dd95SBruce Richardson  * roll back later
44*99a2dd95SBruce Richardson  */
45*99a2dd95SBruce Richardson struct primary_alloc_req_state {
46*99a2dd95SBruce Richardson 	struct malloc_heap *heap;
47*99a2dd95SBruce Richardson 	struct rte_memseg **ms;
48*99a2dd95SBruce Richardson 	int ms_len;
49*99a2dd95SBruce Richardson 	struct malloc_elem *elem;
50*99a2dd95SBruce Richardson 	void *map_addr;
51*99a2dd95SBruce Richardson 	size_t map_len;
52*99a2dd95SBruce Richardson };
53*99a2dd95SBruce Richardson 
54*99a2dd95SBruce Richardson enum req_state {
55*99a2dd95SBruce Richardson 	REQ_STATE_INACTIVE = 0,
56*99a2dd95SBruce Richardson 	REQ_STATE_ACTIVE,
57*99a2dd95SBruce Richardson 	REQ_STATE_COMPLETE
58*99a2dd95SBruce Richardson };
59*99a2dd95SBruce Richardson 
60*99a2dd95SBruce Richardson struct mp_request {
61*99a2dd95SBruce Richardson 	TAILQ_ENTRY(mp_request) next;
62*99a2dd95SBruce Richardson 	struct malloc_mp_req user_req; /**< contents of request */
63*99a2dd95SBruce Richardson 	pthread_cond_t cond; /**< variable we use to time out on this request */
64*99a2dd95SBruce Richardson 	enum req_state state; /**< indicate status of this request */
65*99a2dd95SBruce Richardson 	struct primary_alloc_req_state alloc_state;
66*99a2dd95SBruce Richardson };
67*99a2dd95SBruce Richardson 
68*99a2dd95SBruce Richardson /*
69*99a2dd95SBruce Richardson  * We could've used just a single request, but it may be possible for
70*99a2dd95SBruce Richardson  * secondaries to timeout earlier than the primary, and send a new request while
71*99a2dd95SBruce Richardson  * primary is still expecting replies to the old one. Therefore, each new
72*99a2dd95SBruce Richardson  * request will get assigned a new ID, which is how we will distinguish between
73*99a2dd95SBruce Richardson  * expected and unexpected messages.
74*99a2dd95SBruce Richardson  */
75*99a2dd95SBruce Richardson TAILQ_HEAD(mp_request_list, mp_request);
76*99a2dd95SBruce Richardson static struct {
77*99a2dd95SBruce Richardson 	struct mp_request_list list;
78*99a2dd95SBruce Richardson 	pthread_mutex_t lock;
79*99a2dd95SBruce Richardson } mp_request_list = {
80*99a2dd95SBruce Richardson 	.list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
81*99a2dd95SBruce Richardson 	.lock = PTHREAD_MUTEX_INITIALIZER
82*99a2dd95SBruce Richardson };
83*99a2dd95SBruce Richardson 
84*99a2dd95SBruce Richardson /**
85*99a2dd95SBruce Richardson  * General workflow is the following:
86*99a2dd95SBruce Richardson  *
87*99a2dd95SBruce Richardson  * Allocation:
88*99a2dd95SBruce Richardson  * S: send request to primary
89*99a2dd95SBruce Richardson  * P: attempt to allocate memory
90*99a2dd95SBruce Richardson  *    if failed, sendmsg failure
91*99a2dd95SBruce Richardson  *    if success, send sync request
92*99a2dd95SBruce Richardson  * S: if received msg of failure, quit
93*99a2dd95SBruce Richardson  *    if received sync request, synchronize memory map and reply with result
94*99a2dd95SBruce Richardson  * P: if received sync request result
95*99a2dd95SBruce Richardson  *    if success, sendmsg success
96*99a2dd95SBruce Richardson  *    if failure, roll back allocation and send a rollback request
97*99a2dd95SBruce Richardson  * S: if received msg of success, quit
98*99a2dd95SBruce Richardson  *    if received rollback request, synchronize memory map and reply with result
99*99a2dd95SBruce Richardson  * P: if received sync request result
100*99a2dd95SBruce Richardson  *    sendmsg sync request result
101*99a2dd95SBruce Richardson  * S: if received msg, quit
102*99a2dd95SBruce Richardson  *
103*99a2dd95SBruce Richardson  * Aside from timeouts, there are three points where we can quit:
104*99a2dd95SBruce Richardson  *  - if allocation failed straight away
105*99a2dd95SBruce Richardson  *  - if allocation and sync request succeeded
106*99a2dd95SBruce Richardson  *  - if allocation succeeded, sync request failed, allocation rolled back and
107*99a2dd95SBruce Richardson  *    rollback request received (irrespective of whether it succeeded or failed)
108*99a2dd95SBruce Richardson  *
109*99a2dd95SBruce Richardson  * Deallocation:
110*99a2dd95SBruce Richardson  * S: send request to primary
111*99a2dd95SBruce Richardson  * P: attempt to deallocate memory
112*99a2dd95SBruce Richardson  *    if failed, sendmsg failure
113*99a2dd95SBruce Richardson  *    if success, send sync request
114*99a2dd95SBruce Richardson  * S: if received msg of failure, quit
115*99a2dd95SBruce Richardson  *    if received sync request, synchronize memory map and reply with result
116*99a2dd95SBruce Richardson  * P: if received sync request result
117*99a2dd95SBruce Richardson  *    sendmsg sync request result
118*99a2dd95SBruce Richardson  * S: if received msg, quit
119*99a2dd95SBruce Richardson  *
120*99a2dd95SBruce Richardson  * There is no "rollback" from deallocation, as it's safe to have some memory
121*99a2dd95SBruce Richardson  * mapped in some processes - it's absent from the heap, so it won't get used.
122*99a2dd95SBruce Richardson  */
123*99a2dd95SBruce Richardson 
124*99a2dd95SBruce Richardson static struct mp_request *
125*99a2dd95SBruce Richardson find_request_by_id(uint64_t id)
126*99a2dd95SBruce Richardson {
127*99a2dd95SBruce Richardson 	struct mp_request *req;
128*99a2dd95SBruce Richardson 	TAILQ_FOREACH(req, &mp_request_list.list, next) {
129*99a2dd95SBruce Richardson 		if (req->user_req.id == id)
130*99a2dd95SBruce Richardson 			break;
131*99a2dd95SBruce Richardson 	}
132*99a2dd95SBruce Richardson 	return req;
133*99a2dd95SBruce Richardson }
134*99a2dd95SBruce Richardson 
135*99a2dd95SBruce Richardson /* this ID is, like, totally guaranteed to be absolutely unique. pinky swear. */
136*99a2dd95SBruce Richardson static uint64_t
137*99a2dd95SBruce Richardson get_unique_id(void)
138*99a2dd95SBruce Richardson {
139*99a2dd95SBruce Richardson 	uint64_t id;
140*99a2dd95SBruce Richardson 	do {
141*99a2dd95SBruce Richardson 		id = rte_rand();
142*99a2dd95SBruce Richardson 	} while (find_request_by_id(id) != NULL);
143*99a2dd95SBruce Richardson 	return id;
144*99a2dd95SBruce Richardson }
145*99a2dd95SBruce Richardson 
146*99a2dd95SBruce Richardson /* secondary will respond to sync requests thusly */
147*99a2dd95SBruce Richardson static int
148*99a2dd95SBruce Richardson handle_sync(const struct rte_mp_msg *msg, const void *peer)
149*99a2dd95SBruce Richardson {
150*99a2dd95SBruce Richardson 	struct rte_mp_msg reply;
151*99a2dd95SBruce Richardson 	const struct malloc_mp_req *req =
152*99a2dd95SBruce Richardson 			(const struct malloc_mp_req *)msg->param;
153*99a2dd95SBruce Richardson 	struct malloc_mp_req *resp =
154*99a2dd95SBruce Richardson 			(struct malloc_mp_req *)reply.param;
155*99a2dd95SBruce Richardson 	int ret;
156*99a2dd95SBruce Richardson 
157*99a2dd95SBruce Richardson 	if (req->t != REQ_TYPE_SYNC) {
158*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Unexpected request from primary\n");
159*99a2dd95SBruce Richardson 		return -1;
160*99a2dd95SBruce Richardson 	}
161*99a2dd95SBruce Richardson 
162*99a2dd95SBruce Richardson 	memset(&reply, 0, sizeof(reply));
163*99a2dd95SBruce Richardson 
164*99a2dd95SBruce Richardson 	reply.num_fds = 0;
165*99a2dd95SBruce Richardson 	strlcpy(reply.name, msg->name, sizeof(reply.name));
166*99a2dd95SBruce Richardson 	reply.len_param = sizeof(*resp);
167*99a2dd95SBruce Richardson 
168*99a2dd95SBruce Richardson 	ret = eal_memalloc_sync_with_primary();
169*99a2dd95SBruce Richardson 
170*99a2dd95SBruce Richardson 	resp->t = REQ_TYPE_SYNC;
171*99a2dd95SBruce Richardson 	resp->id = req->id;
172*99a2dd95SBruce Richardson 	resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL;
173*99a2dd95SBruce Richardson 
174*99a2dd95SBruce Richardson 	rte_mp_reply(&reply, peer);
175*99a2dd95SBruce Richardson 
176*99a2dd95SBruce Richardson 	return 0;
177*99a2dd95SBruce Richardson }
178*99a2dd95SBruce Richardson 
179*99a2dd95SBruce Richardson static int
180*99a2dd95SBruce Richardson handle_free_request(const struct malloc_mp_req *m)
181*99a2dd95SBruce Richardson {
182*99a2dd95SBruce Richardson 	const struct rte_memseg_list *msl;
183*99a2dd95SBruce Richardson 	void *start, *end;
184*99a2dd95SBruce Richardson 	size_t len;
185*99a2dd95SBruce Richardson 
186*99a2dd95SBruce Richardson 	len = m->free_req.len;
187*99a2dd95SBruce Richardson 	start = m->free_req.addr;
188*99a2dd95SBruce Richardson 	end = RTE_PTR_ADD(start, len - 1);
189*99a2dd95SBruce Richardson 
190*99a2dd95SBruce Richardson 	/* check if the requested memory actually exists */
191*99a2dd95SBruce Richardson 	msl = rte_mem_virt2memseg_list(start);
192*99a2dd95SBruce Richardson 	if (msl == NULL) {
193*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Requested to free unknown memory\n");
194*99a2dd95SBruce Richardson 		return -1;
195*99a2dd95SBruce Richardson 	}
196*99a2dd95SBruce Richardson 
197*99a2dd95SBruce Richardson 	/* check if end is within the same memory region */
198*99a2dd95SBruce Richardson 	if (rte_mem_virt2memseg_list(end) != msl) {
199*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Requested to free memory spanning multiple regions\n");
200*99a2dd95SBruce Richardson 		return -1;
201*99a2dd95SBruce Richardson 	}
202*99a2dd95SBruce Richardson 
203*99a2dd95SBruce Richardson 	/* we're supposed to only free memory that's not external */
204*99a2dd95SBruce Richardson 	if (msl->external) {
205*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Requested to free external memory\n");
206*99a2dd95SBruce Richardson 		return -1;
207*99a2dd95SBruce Richardson 	}
208*99a2dd95SBruce Richardson 
209*99a2dd95SBruce Richardson 	/* now that we've validated the request, announce it */
210*99a2dd95SBruce Richardson 	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
211*99a2dd95SBruce Richardson 			m->free_req.addr, m->free_req.len);
212*99a2dd95SBruce Richardson 
213*99a2dd95SBruce Richardson 	/* now, do the actual freeing */
214*99a2dd95SBruce Richardson 	return malloc_heap_free_pages(m->free_req.addr, m->free_req.len);
215*99a2dd95SBruce Richardson }
216*99a2dd95SBruce Richardson 
217*99a2dd95SBruce Richardson static int
218*99a2dd95SBruce Richardson handle_alloc_request(const struct malloc_mp_req *m,
219*99a2dd95SBruce Richardson 		struct mp_request *req)
220*99a2dd95SBruce Richardson {
221*99a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
222*99a2dd95SBruce Richardson 	const struct malloc_req_alloc *ar = &m->alloc_req;
223*99a2dd95SBruce Richardson 	struct malloc_heap *heap;
224*99a2dd95SBruce Richardson 	struct malloc_elem *elem;
225*99a2dd95SBruce Richardson 	struct rte_memseg **ms;
226*99a2dd95SBruce Richardson 	size_t alloc_sz;
227*99a2dd95SBruce Richardson 	int n_segs;
228*99a2dd95SBruce Richardson 	void *map_addr;
229*99a2dd95SBruce Richardson 
230*99a2dd95SBruce Richardson 	/* this is checked by the API, but we need to prevent divide by zero */
231*99a2dd95SBruce Richardson 	if (ar->page_sz == 0 || !rte_is_power_of_2(ar->page_sz)) {
232*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Attempting to allocate with invalid page size\n");
233*99a2dd95SBruce Richardson 		return -1;
234*99a2dd95SBruce Richardson 	}
235*99a2dd95SBruce Richardson 
236*99a2dd95SBruce Richardson 	/* heap idx is index into the heap array, not socket ID */
237*99a2dd95SBruce Richardson 	if (ar->malloc_heap_idx >= RTE_MAX_HEAPS) {
238*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Attempting to allocate from invalid heap\n");
239*99a2dd95SBruce Richardson 		return -1;
240*99a2dd95SBruce Richardson 	}
241*99a2dd95SBruce Richardson 
242*99a2dd95SBruce Richardson 	heap = &mcfg->malloc_heaps[ar->malloc_heap_idx];
243*99a2dd95SBruce Richardson 
244*99a2dd95SBruce Richardson 	/*
245*99a2dd95SBruce Richardson 	 * for allocations, we must only use internal heaps, but since the
246*99a2dd95SBruce Richardson 	 * rte_malloc_heap_socket_is_external() is thread-safe and we're already
247*99a2dd95SBruce Richardson 	 * read-locked, we'll have to take advantage of the fact that internal
248*99a2dd95SBruce Richardson 	 * socket ID's are always lower than RTE_MAX_NUMA_NODES.
249*99a2dd95SBruce Richardson 	 */
250*99a2dd95SBruce Richardson 	if (heap->socket_id >= RTE_MAX_NUMA_NODES) {
251*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Attempting to allocate from external heap\n");
252*99a2dd95SBruce Richardson 		return -1;
253*99a2dd95SBruce Richardson 	}
254*99a2dd95SBruce Richardson 
255*99a2dd95SBruce Richardson 	alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size +
256*99a2dd95SBruce Richardson 			MALLOC_ELEM_TRAILER_LEN, ar->page_sz);
257*99a2dd95SBruce Richardson 	n_segs = alloc_sz / ar->page_sz;
258*99a2dd95SBruce Richardson 
259*99a2dd95SBruce Richardson 	/* we can't know in advance how many pages we'll need, so we malloc */
260*99a2dd95SBruce Richardson 	ms = malloc(sizeof(*ms) * n_segs);
261*99a2dd95SBruce Richardson 	if (ms == NULL) {
262*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
263*99a2dd95SBruce Richardson 		return -1;
264*99a2dd95SBruce Richardson 	}
265*99a2dd95SBruce Richardson 	memset(ms, 0, sizeof(*ms) * n_segs);
266*99a2dd95SBruce Richardson 
267*99a2dd95SBruce Richardson 	elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
268*99a2dd95SBruce Richardson 			ar->flags, ar->align, ar->bound, ar->contig, ms,
269*99a2dd95SBruce Richardson 			n_segs);
270*99a2dd95SBruce Richardson 
271*99a2dd95SBruce Richardson 	if (elem == NULL)
272*99a2dd95SBruce Richardson 		goto fail;
273*99a2dd95SBruce Richardson 
274*99a2dd95SBruce Richardson 	map_addr = ms[0]->addr;
275*99a2dd95SBruce Richardson 
276*99a2dd95SBruce Richardson 	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
277*99a2dd95SBruce Richardson 
278*99a2dd95SBruce Richardson 	/* we have succeeded in allocating memory, but we still need to sync
279*99a2dd95SBruce Richardson 	 * with other processes. however, since DPDK IPC is single-threaded, we
280*99a2dd95SBruce Richardson 	 * send an asynchronous request and exit this callback.
281*99a2dd95SBruce Richardson 	 */
282*99a2dd95SBruce Richardson 
283*99a2dd95SBruce Richardson 	req->alloc_state.ms = ms;
284*99a2dd95SBruce Richardson 	req->alloc_state.ms_len = n_segs;
285*99a2dd95SBruce Richardson 	req->alloc_state.map_addr = map_addr;
286*99a2dd95SBruce Richardson 	req->alloc_state.map_len = alloc_sz;
287*99a2dd95SBruce Richardson 	req->alloc_state.elem = elem;
288*99a2dd95SBruce Richardson 	req->alloc_state.heap = heap;
289*99a2dd95SBruce Richardson 
290*99a2dd95SBruce Richardson 	return 0;
291*99a2dd95SBruce Richardson fail:
292*99a2dd95SBruce Richardson 	free(ms);
293*99a2dd95SBruce Richardson 	return -1;
294*99a2dd95SBruce Richardson }
295*99a2dd95SBruce Richardson 
296*99a2dd95SBruce Richardson /* first stage of primary handling requests from secondary */
297*99a2dd95SBruce Richardson static int
298*99a2dd95SBruce Richardson handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
299*99a2dd95SBruce Richardson {
300*99a2dd95SBruce Richardson 	const struct malloc_mp_req *m =
301*99a2dd95SBruce Richardson 			(const struct malloc_mp_req *)msg->param;
302*99a2dd95SBruce Richardson 	struct mp_request *entry;
303*99a2dd95SBruce Richardson 	int ret;
304*99a2dd95SBruce Richardson 
305*99a2dd95SBruce Richardson 	/* lock access to request */
306*99a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
307*99a2dd95SBruce Richardson 
308*99a2dd95SBruce Richardson 	/* make sure it's not a dupe */
309*99a2dd95SBruce Richardson 	entry = find_request_by_id(m->id);
310*99a2dd95SBruce Richardson 	if (entry != NULL) {
311*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Duplicate request id\n");
312*99a2dd95SBruce Richardson 		goto fail;
313*99a2dd95SBruce Richardson 	}
314*99a2dd95SBruce Richardson 
315*99a2dd95SBruce Richardson 	entry = malloc(sizeof(*entry));
316*99a2dd95SBruce Richardson 	if (entry == NULL) {
317*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Unable to allocate memory for request\n");
318*99a2dd95SBruce Richardson 		goto fail;
319*99a2dd95SBruce Richardson 	}
320*99a2dd95SBruce Richardson 
321*99a2dd95SBruce Richardson 	/* erase all data */
322*99a2dd95SBruce Richardson 	memset(entry, 0, sizeof(*entry));
323*99a2dd95SBruce Richardson 
324*99a2dd95SBruce Richardson 	if (m->t == REQ_TYPE_ALLOC) {
325*99a2dd95SBruce Richardson 		ret = handle_alloc_request(m, entry);
326*99a2dd95SBruce Richardson 	} else if (m->t == REQ_TYPE_FREE) {
327*99a2dd95SBruce Richardson 		ret = handle_free_request(m);
328*99a2dd95SBruce Richardson 	} else {
329*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Unexpected request from secondary\n");
330*99a2dd95SBruce Richardson 		goto fail;
331*99a2dd95SBruce Richardson 	}
332*99a2dd95SBruce Richardson 
333*99a2dd95SBruce Richardson 	if (ret != 0) {
334*99a2dd95SBruce Richardson 		struct rte_mp_msg resp_msg;
335*99a2dd95SBruce Richardson 		struct malloc_mp_req *resp =
336*99a2dd95SBruce Richardson 				(struct malloc_mp_req *)resp_msg.param;
337*99a2dd95SBruce Richardson 
338*99a2dd95SBruce Richardson 		/* send failure message straight away */
339*99a2dd95SBruce Richardson 		resp_msg.num_fds = 0;
340*99a2dd95SBruce Richardson 		resp_msg.len_param = sizeof(*resp);
341*99a2dd95SBruce Richardson 		strlcpy(resp_msg.name, MP_ACTION_RESPONSE,
342*99a2dd95SBruce Richardson 				sizeof(resp_msg.name));
343*99a2dd95SBruce Richardson 
344*99a2dd95SBruce Richardson 		resp->t = m->t;
345*99a2dd95SBruce Richardson 		resp->result = REQ_RESULT_FAIL;
346*99a2dd95SBruce Richardson 		resp->id = m->id;
347*99a2dd95SBruce Richardson 
348*99a2dd95SBruce Richardson 		if (rte_mp_sendmsg(&resp_msg)) {
349*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't send response\n");
350*99a2dd95SBruce Richardson 			goto fail;
351*99a2dd95SBruce Richardson 		}
352*99a2dd95SBruce Richardson 		/* we did not modify the request */
353*99a2dd95SBruce Richardson 		free(entry);
354*99a2dd95SBruce Richardson 	} else {
355*99a2dd95SBruce Richardson 		struct rte_mp_msg sr_msg;
356*99a2dd95SBruce Richardson 		struct malloc_mp_req *sr =
357*99a2dd95SBruce Richardson 				(struct malloc_mp_req *)sr_msg.param;
358*99a2dd95SBruce Richardson 		struct timespec ts;
359*99a2dd95SBruce Richardson 
360*99a2dd95SBruce Richardson 		memset(&sr_msg, 0, sizeof(sr_msg));
361*99a2dd95SBruce Richardson 
362*99a2dd95SBruce Richardson 		/* we can do something, so send sync request asynchronously */
363*99a2dd95SBruce Richardson 		sr_msg.num_fds = 0;
364*99a2dd95SBruce Richardson 		sr_msg.len_param = sizeof(*sr);
365*99a2dd95SBruce Richardson 		strlcpy(sr_msg.name, MP_ACTION_SYNC, sizeof(sr_msg.name));
366*99a2dd95SBruce Richardson 
367*99a2dd95SBruce Richardson 		ts.tv_nsec = 0;
368*99a2dd95SBruce Richardson 		ts.tv_sec = MP_TIMEOUT_S;
369*99a2dd95SBruce Richardson 
370*99a2dd95SBruce Richardson 		/* sync requests carry no data */
371*99a2dd95SBruce Richardson 		sr->t = REQ_TYPE_SYNC;
372*99a2dd95SBruce Richardson 		sr->id = m->id;
373*99a2dd95SBruce Richardson 
374*99a2dd95SBruce Richardson 		/* there may be stray timeout still waiting */
375*99a2dd95SBruce Richardson 		do {
376*99a2dd95SBruce Richardson 			ret = rte_mp_request_async(&sr_msg, &ts,
377*99a2dd95SBruce Richardson 					handle_sync_response);
378*99a2dd95SBruce Richardson 		} while (ret != 0 && rte_errno == EEXIST);
379*99a2dd95SBruce Richardson 		if (ret != 0) {
380*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't send sync request\n");
381*99a2dd95SBruce Richardson 			if (m->t == REQ_TYPE_ALLOC)
382*99a2dd95SBruce Richardson 				free(entry->alloc_state.ms);
383*99a2dd95SBruce Richardson 			goto fail;
384*99a2dd95SBruce Richardson 		}
385*99a2dd95SBruce Richardson 
386*99a2dd95SBruce Richardson 		/* mark request as in progress */
387*99a2dd95SBruce Richardson 		memcpy(&entry->user_req, m, sizeof(*m));
388*99a2dd95SBruce Richardson 		entry->state = REQ_STATE_ACTIVE;
389*99a2dd95SBruce Richardson 
390*99a2dd95SBruce Richardson 		TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
391*99a2dd95SBruce Richardson 	}
392*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
393*99a2dd95SBruce Richardson 	return 0;
394*99a2dd95SBruce Richardson fail:
395*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
396*99a2dd95SBruce Richardson 	free(entry);
397*99a2dd95SBruce Richardson 	return -1;
398*99a2dd95SBruce Richardson }
399*99a2dd95SBruce Richardson 
400*99a2dd95SBruce Richardson /* callback for asynchronous sync requests for primary. this will either do a
401*99a2dd95SBruce Richardson  * sendmsg with results, or trigger rollback request.
402*99a2dd95SBruce Richardson  */
403*99a2dd95SBruce Richardson static int
404*99a2dd95SBruce Richardson handle_sync_response(const struct rte_mp_msg *request,
405*99a2dd95SBruce Richardson 		const struct rte_mp_reply *reply)
406*99a2dd95SBruce Richardson {
407*99a2dd95SBruce Richardson 	enum malloc_req_result result;
408*99a2dd95SBruce Richardson 	struct mp_request *entry;
409*99a2dd95SBruce Richardson 	const struct malloc_mp_req *mpreq =
410*99a2dd95SBruce Richardson 			(const struct malloc_mp_req *)request->param;
411*99a2dd95SBruce Richardson 	int i;
412*99a2dd95SBruce Richardson 
413*99a2dd95SBruce Richardson 	/* lock the request */
414*99a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
415*99a2dd95SBruce Richardson 
416*99a2dd95SBruce Richardson 	entry = find_request_by_id(mpreq->id);
417*99a2dd95SBruce Richardson 	if (entry == NULL) {
418*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Wrong request ID\n");
419*99a2dd95SBruce Richardson 		goto fail;
420*99a2dd95SBruce Richardson 	}
421*99a2dd95SBruce Richardson 
422*99a2dd95SBruce Richardson 	result = REQ_RESULT_SUCCESS;
423*99a2dd95SBruce Richardson 
424*99a2dd95SBruce Richardson 	if (reply->nb_received != reply->nb_sent)
425*99a2dd95SBruce Richardson 		result = REQ_RESULT_FAIL;
426*99a2dd95SBruce Richardson 
427*99a2dd95SBruce Richardson 	for (i = 0; i < reply->nb_received; i++) {
428*99a2dd95SBruce Richardson 		struct malloc_mp_req *resp =
429*99a2dd95SBruce Richardson 				(struct malloc_mp_req *)reply->msgs[i].param;
430*99a2dd95SBruce Richardson 
431*99a2dd95SBruce Richardson 		if (resp->t != REQ_TYPE_SYNC) {
432*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Unexpected response to sync request\n");
433*99a2dd95SBruce Richardson 			result = REQ_RESULT_FAIL;
434*99a2dd95SBruce Richardson 			break;
435*99a2dd95SBruce Richardson 		}
436*99a2dd95SBruce Richardson 		if (resp->id != entry->user_req.id) {
437*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Response to wrong sync request\n");
438*99a2dd95SBruce Richardson 			result = REQ_RESULT_FAIL;
439*99a2dd95SBruce Richardson 			break;
440*99a2dd95SBruce Richardson 		}
441*99a2dd95SBruce Richardson 		if (resp->result == REQ_RESULT_FAIL) {
442*99a2dd95SBruce Richardson 			result = REQ_RESULT_FAIL;
443*99a2dd95SBruce Richardson 			break;
444*99a2dd95SBruce Richardson 		}
445*99a2dd95SBruce Richardson 	}
446*99a2dd95SBruce Richardson 
447*99a2dd95SBruce Richardson 	if (entry->user_req.t == REQ_TYPE_FREE) {
448*99a2dd95SBruce Richardson 		struct rte_mp_msg msg;
449*99a2dd95SBruce Richardson 		struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
450*99a2dd95SBruce Richardson 
451*99a2dd95SBruce Richardson 		memset(&msg, 0, sizeof(msg));
452*99a2dd95SBruce Richardson 
453*99a2dd95SBruce Richardson 		/* this is a free request, just sendmsg result */
454*99a2dd95SBruce Richardson 		resp->t = REQ_TYPE_FREE;
455*99a2dd95SBruce Richardson 		resp->result = result;
456*99a2dd95SBruce Richardson 		resp->id = entry->user_req.id;
457*99a2dd95SBruce Richardson 		msg.num_fds = 0;
458*99a2dd95SBruce Richardson 		msg.len_param = sizeof(*resp);
459*99a2dd95SBruce Richardson 		strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
460*99a2dd95SBruce Richardson 
461*99a2dd95SBruce Richardson 		if (rte_mp_sendmsg(&msg))
462*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
463*99a2dd95SBruce Richardson 
464*99a2dd95SBruce Richardson 		TAILQ_REMOVE(&mp_request_list.list, entry, next);
465*99a2dd95SBruce Richardson 		free(entry);
466*99a2dd95SBruce Richardson 	} else if (entry->user_req.t == REQ_TYPE_ALLOC &&
467*99a2dd95SBruce Richardson 			result == REQ_RESULT_SUCCESS) {
468*99a2dd95SBruce Richardson 		struct malloc_heap *heap = entry->alloc_state.heap;
469*99a2dd95SBruce Richardson 		struct rte_mp_msg msg;
470*99a2dd95SBruce Richardson 		struct malloc_mp_req *resp =
471*99a2dd95SBruce Richardson 				(struct malloc_mp_req *)msg.param;
472*99a2dd95SBruce Richardson 
473*99a2dd95SBruce Richardson 		memset(&msg, 0, sizeof(msg));
474*99a2dd95SBruce Richardson 
475*99a2dd95SBruce Richardson 		heap->total_size += entry->alloc_state.map_len;
476*99a2dd95SBruce Richardson 
477*99a2dd95SBruce Richardson 		/* result is success, so just notify secondary about this */
478*99a2dd95SBruce Richardson 		resp->t = REQ_TYPE_ALLOC;
479*99a2dd95SBruce Richardson 		resp->result = result;
480*99a2dd95SBruce Richardson 		resp->id = entry->user_req.id;
481*99a2dd95SBruce Richardson 		msg.num_fds = 0;
482*99a2dd95SBruce Richardson 		msg.len_param = sizeof(*resp);
483*99a2dd95SBruce Richardson 		strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
484*99a2dd95SBruce Richardson 
485*99a2dd95SBruce Richardson 		if (rte_mp_sendmsg(&msg))
486*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
487*99a2dd95SBruce Richardson 
488*99a2dd95SBruce Richardson 		TAILQ_REMOVE(&mp_request_list.list, entry, next);
489*99a2dd95SBruce Richardson 		free(entry->alloc_state.ms);
490*99a2dd95SBruce Richardson 		free(entry);
491*99a2dd95SBruce Richardson 	} else if (entry->user_req.t == REQ_TYPE_ALLOC &&
492*99a2dd95SBruce Richardson 			result == REQ_RESULT_FAIL) {
493*99a2dd95SBruce Richardson 		struct rte_mp_msg rb_msg;
494*99a2dd95SBruce Richardson 		struct malloc_mp_req *rb =
495*99a2dd95SBruce Richardson 				(struct malloc_mp_req *)rb_msg.param;
496*99a2dd95SBruce Richardson 		struct timespec ts;
497*99a2dd95SBruce Richardson 		struct primary_alloc_req_state *state =
498*99a2dd95SBruce Richardson 				&entry->alloc_state;
499*99a2dd95SBruce Richardson 		int ret;
500*99a2dd95SBruce Richardson 
501*99a2dd95SBruce Richardson 		memset(&rb_msg, 0, sizeof(rb_msg));
502*99a2dd95SBruce Richardson 
503*99a2dd95SBruce Richardson 		/* we've failed to sync, so do a rollback */
504*99a2dd95SBruce Richardson 		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
505*99a2dd95SBruce Richardson 				state->map_addr, state->map_len);
506*99a2dd95SBruce Richardson 
507*99a2dd95SBruce Richardson 		rollback_expand_heap(state->ms, state->ms_len, state->elem,
508*99a2dd95SBruce Richardson 				state->map_addr, state->map_len);
509*99a2dd95SBruce Richardson 
510*99a2dd95SBruce Richardson 		/* send rollback request */
511*99a2dd95SBruce Richardson 		rb_msg.num_fds = 0;
512*99a2dd95SBruce Richardson 		rb_msg.len_param = sizeof(*rb);
513*99a2dd95SBruce Richardson 		strlcpy(rb_msg.name, MP_ACTION_ROLLBACK, sizeof(rb_msg.name));
514*99a2dd95SBruce Richardson 
515*99a2dd95SBruce Richardson 		ts.tv_nsec = 0;
516*99a2dd95SBruce Richardson 		ts.tv_sec = MP_TIMEOUT_S;
517*99a2dd95SBruce Richardson 
518*99a2dd95SBruce Richardson 		/* sync requests carry no data */
519*99a2dd95SBruce Richardson 		rb->t = REQ_TYPE_SYNC;
520*99a2dd95SBruce Richardson 		rb->id = entry->user_req.id;
521*99a2dd95SBruce Richardson 
522*99a2dd95SBruce Richardson 		/* there may be stray timeout still waiting */
523*99a2dd95SBruce Richardson 		do {
524*99a2dd95SBruce Richardson 			ret = rte_mp_request_async(&rb_msg, &ts,
525*99a2dd95SBruce Richardson 					handle_rollback_response);
526*99a2dd95SBruce Richardson 		} while (ret != 0 && rte_errno == EEXIST);
527*99a2dd95SBruce Richardson 		if (ret != 0) {
528*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Could not send rollback request to secondary process\n");
529*99a2dd95SBruce Richardson 
530*99a2dd95SBruce Richardson 			/* we couldn't send rollback request, but that's OK -
531*99a2dd95SBruce Richardson 			 * secondary will time out, and memory has been removed
532*99a2dd95SBruce Richardson 			 * from heap anyway.
533*99a2dd95SBruce Richardson 			 */
534*99a2dd95SBruce Richardson 			TAILQ_REMOVE(&mp_request_list.list, entry, next);
535*99a2dd95SBruce Richardson 			free(state->ms);
536*99a2dd95SBruce Richardson 			free(entry);
537*99a2dd95SBruce Richardson 			goto fail;
538*99a2dd95SBruce Richardson 		}
539*99a2dd95SBruce Richardson 	} else {
540*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, " to sync request of unknown type\n");
541*99a2dd95SBruce Richardson 		goto fail;
542*99a2dd95SBruce Richardson 	}
543*99a2dd95SBruce Richardson 
544*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
545*99a2dd95SBruce Richardson 	return 0;
546*99a2dd95SBruce Richardson fail:
547*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
548*99a2dd95SBruce Richardson 	return -1;
549*99a2dd95SBruce Richardson }
550*99a2dd95SBruce Richardson 
551*99a2dd95SBruce Richardson static int
552*99a2dd95SBruce Richardson handle_rollback_response(const struct rte_mp_msg *request,
553*99a2dd95SBruce Richardson 		const struct rte_mp_reply *reply __rte_unused)
554*99a2dd95SBruce Richardson {
555*99a2dd95SBruce Richardson 	struct rte_mp_msg msg;
556*99a2dd95SBruce Richardson 	struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
557*99a2dd95SBruce Richardson 	const struct malloc_mp_req *mpreq =
558*99a2dd95SBruce Richardson 			(const struct malloc_mp_req *)request->param;
559*99a2dd95SBruce Richardson 	struct mp_request *entry;
560*99a2dd95SBruce Richardson 
561*99a2dd95SBruce Richardson 	/* lock the request */
562*99a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
563*99a2dd95SBruce Richardson 
564*99a2dd95SBruce Richardson 	memset(&msg, 0, sizeof(msg));
565*99a2dd95SBruce Richardson 
566*99a2dd95SBruce Richardson 	entry = find_request_by_id(mpreq->id);
567*99a2dd95SBruce Richardson 	if (entry == NULL) {
568*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Wrong request ID\n");
569*99a2dd95SBruce Richardson 		goto fail;
570*99a2dd95SBruce Richardson 	}
571*99a2dd95SBruce Richardson 
572*99a2dd95SBruce Richardson 	if (entry->user_req.t != REQ_TYPE_ALLOC) {
573*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Unexpected active request\n");
574*99a2dd95SBruce Richardson 		goto fail;
575*99a2dd95SBruce Richardson 	}
576*99a2dd95SBruce Richardson 
577*99a2dd95SBruce Richardson 	/* we don't care if rollback succeeded, request still failed */
578*99a2dd95SBruce Richardson 	resp->t = REQ_TYPE_ALLOC;
579*99a2dd95SBruce Richardson 	resp->result = REQ_RESULT_FAIL;
580*99a2dd95SBruce Richardson 	resp->id = mpreq->id;
581*99a2dd95SBruce Richardson 	msg.num_fds = 0;
582*99a2dd95SBruce Richardson 	msg.len_param = sizeof(*resp);
583*99a2dd95SBruce Richardson 	strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
584*99a2dd95SBruce Richardson 
585*99a2dd95SBruce Richardson 	if (rte_mp_sendmsg(&msg))
586*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
587*99a2dd95SBruce Richardson 
588*99a2dd95SBruce Richardson 	/* clean up */
589*99a2dd95SBruce Richardson 	TAILQ_REMOVE(&mp_request_list.list, entry, next);
590*99a2dd95SBruce Richardson 	free(entry->alloc_state.ms);
591*99a2dd95SBruce Richardson 	free(entry);
592*99a2dd95SBruce Richardson 
593*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
594*99a2dd95SBruce Richardson 	return 0;
595*99a2dd95SBruce Richardson fail:
596*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
597*99a2dd95SBruce Richardson 	return -1;
598*99a2dd95SBruce Richardson }
599*99a2dd95SBruce Richardson 
600*99a2dd95SBruce Richardson /* final stage of the request from secondary */
601*99a2dd95SBruce Richardson static int
602*99a2dd95SBruce Richardson handle_response(const struct rte_mp_msg *msg, const void *peer  __rte_unused)
603*99a2dd95SBruce Richardson {
604*99a2dd95SBruce Richardson 	const struct malloc_mp_req *m =
605*99a2dd95SBruce Richardson 			(const struct malloc_mp_req *)msg->param;
606*99a2dd95SBruce Richardson 	struct mp_request *entry;
607*99a2dd95SBruce Richardson 
608*99a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
609*99a2dd95SBruce Richardson 
610*99a2dd95SBruce Richardson 	entry = find_request_by_id(m->id);
611*99a2dd95SBruce Richardson 	if (entry != NULL) {
612*99a2dd95SBruce Richardson 		/* update request status */
613*99a2dd95SBruce Richardson 		entry->user_req.result = m->result;
614*99a2dd95SBruce Richardson 
615*99a2dd95SBruce Richardson 		entry->state = REQ_STATE_COMPLETE;
616*99a2dd95SBruce Richardson 
617*99a2dd95SBruce Richardson 		/* trigger thread wakeup */
618*99a2dd95SBruce Richardson 		pthread_cond_signal(&entry->cond);
619*99a2dd95SBruce Richardson 	}
620*99a2dd95SBruce Richardson 
621*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
622*99a2dd95SBruce Richardson 
623*99a2dd95SBruce Richardson 	return 0;
624*99a2dd95SBruce Richardson }
625*99a2dd95SBruce Richardson 
626*99a2dd95SBruce Richardson /* synchronously request memory map sync, this is only called whenever primary
627*99a2dd95SBruce Richardson  * process initiates the allocation.
628*99a2dd95SBruce Richardson  */
629*99a2dd95SBruce Richardson int
630*99a2dd95SBruce Richardson request_sync(void)
631*99a2dd95SBruce Richardson {
632*99a2dd95SBruce Richardson 	struct rte_mp_msg msg;
633*99a2dd95SBruce Richardson 	struct rte_mp_reply reply;
634*99a2dd95SBruce Richardson 	struct malloc_mp_req *req = (struct malloc_mp_req *)msg.param;
635*99a2dd95SBruce Richardson 	struct timespec ts;
636*99a2dd95SBruce Richardson 	int i, ret = -1;
637*99a2dd95SBruce Richardson 
638*99a2dd95SBruce Richardson 	memset(&msg, 0, sizeof(msg));
639*99a2dd95SBruce Richardson 	memset(&reply, 0, sizeof(reply));
640*99a2dd95SBruce Richardson 
641*99a2dd95SBruce Richardson 	/* no need to create tailq entries as this is entirely synchronous */
642*99a2dd95SBruce Richardson 
643*99a2dd95SBruce Richardson 	msg.num_fds = 0;
644*99a2dd95SBruce Richardson 	msg.len_param = sizeof(*req);
645*99a2dd95SBruce Richardson 	strlcpy(msg.name, MP_ACTION_SYNC, sizeof(msg.name));
646*99a2dd95SBruce Richardson 
647*99a2dd95SBruce Richardson 	/* sync request carries no data */
648*99a2dd95SBruce Richardson 	req->t = REQ_TYPE_SYNC;
649*99a2dd95SBruce Richardson 	req->id = get_unique_id();
650*99a2dd95SBruce Richardson 
651*99a2dd95SBruce Richardson 	ts.tv_nsec = 0;
652*99a2dd95SBruce Richardson 	ts.tv_sec = MP_TIMEOUT_S;
653*99a2dd95SBruce Richardson 
654*99a2dd95SBruce Richardson 	/* there may be stray timeout still waiting */
655*99a2dd95SBruce Richardson 	do {
656*99a2dd95SBruce Richardson 		ret = rte_mp_request_sync(&msg, &reply, &ts);
657*99a2dd95SBruce Richardson 	} while (ret != 0 && rte_errno == EEXIST);
658*99a2dd95SBruce Richardson 	if (ret != 0) {
659*99a2dd95SBruce Richardson 		/* if IPC is unsupported, behave as if the call succeeded */
660*99a2dd95SBruce Richardson 		if (rte_errno != ENOTSUP)
661*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Could not send sync request to secondary process\n");
662*99a2dd95SBruce Richardson 		else
663*99a2dd95SBruce Richardson 			ret = 0;
664*99a2dd95SBruce Richardson 		goto out;
665*99a2dd95SBruce Richardson 	}
666*99a2dd95SBruce Richardson 
667*99a2dd95SBruce Richardson 	if (reply.nb_received != reply.nb_sent) {
668*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Not all secondaries have responded\n");
669*99a2dd95SBruce Richardson 		goto out;
670*99a2dd95SBruce Richardson 	}
671*99a2dd95SBruce Richardson 
672*99a2dd95SBruce Richardson 	for (i = 0; i < reply.nb_received; i++) {
673*99a2dd95SBruce Richardson 		struct malloc_mp_req *resp =
674*99a2dd95SBruce Richardson 				(struct malloc_mp_req *)reply.msgs[i].param;
675*99a2dd95SBruce Richardson 		if (resp->t != REQ_TYPE_SYNC) {
676*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Unexpected response from secondary\n");
677*99a2dd95SBruce Richardson 			goto out;
678*99a2dd95SBruce Richardson 		}
679*99a2dd95SBruce Richardson 		if (resp->id != req->id) {
680*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Wrong request ID\n");
681*99a2dd95SBruce Richardson 			goto out;
682*99a2dd95SBruce Richardson 		}
683*99a2dd95SBruce Richardson 		if (resp->result != REQ_RESULT_SUCCESS) {
684*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Secondary process failed to synchronize\n");
685*99a2dd95SBruce Richardson 			goto out;
686*99a2dd95SBruce Richardson 		}
687*99a2dd95SBruce Richardson 	}
688*99a2dd95SBruce Richardson 
689*99a2dd95SBruce Richardson 	ret = 0;
690*99a2dd95SBruce Richardson out:
691*99a2dd95SBruce Richardson 	free(reply.msgs);
692*99a2dd95SBruce Richardson 	return ret;
693*99a2dd95SBruce Richardson }
694*99a2dd95SBruce Richardson 
695*99a2dd95SBruce Richardson /* this is a synchronous wrapper around a bunch of asynchronous requests to
696*99a2dd95SBruce Richardson  * primary process. this will initiate a request and wait until responses come.
697*99a2dd95SBruce Richardson  */
698*99a2dd95SBruce Richardson int
699*99a2dd95SBruce Richardson request_to_primary(struct malloc_mp_req *user_req)
700*99a2dd95SBruce Richardson {
701*99a2dd95SBruce Richardson 	struct rte_mp_msg msg;
702*99a2dd95SBruce Richardson 	struct malloc_mp_req *msg_req = (struct malloc_mp_req *)msg.param;
703*99a2dd95SBruce Richardson 	struct mp_request *entry;
704*99a2dd95SBruce Richardson 	struct timespec ts;
705*99a2dd95SBruce Richardson 	struct timeval now;
706*99a2dd95SBruce Richardson 	int ret;
707*99a2dd95SBruce Richardson 
708*99a2dd95SBruce Richardson 	memset(&msg, 0, sizeof(msg));
709*99a2dd95SBruce Richardson 	memset(&ts, 0, sizeof(ts));
710*99a2dd95SBruce Richardson 
711*99a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
712*99a2dd95SBruce Richardson 
713*99a2dd95SBruce Richardson 	entry = malloc(sizeof(*entry));
714*99a2dd95SBruce Richardson 	if (entry == NULL) {
715*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Cannot allocate memory for request\n");
716*99a2dd95SBruce Richardson 		goto fail;
717*99a2dd95SBruce Richardson 	}
718*99a2dd95SBruce Richardson 
719*99a2dd95SBruce Richardson 	memset(entry, 0, sizeof(*entry));
720*99a2dd95SBruce Richardson 
721*99a2dd95SBruce Richardson 	if (gettimeofday(&now, NULL) < 0) {
722*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Cannot get current time\n");
723*99a2dd95SBruce Richardson 		goto fail;
724*99a2dd95SBruce Richardson 	}
725*99a2dd95SBruce Richardson 
726*99a2dd95SBruce Richardson 	ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
727*99a2dd95SBruce Richardson 	ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
728*99a2dd95SBruce Richardson 			(now.tv_usec * 1000) / 1000000000;
729*99a2dd95SBruce Richardson 
730*99a2dd95SBruce Richardson 	/* initialize the request */
731*99a2dd95SBruce Richardson 	pthread_cond_init(&entry->cond, NULL);
732*99a2dd95SBruce Richardson 
733*99a2dd95SBruce Richardson 	msg.num_fds = 0;
734*99a2dd95SBruce Richardson 	msg.len_param = sizeof(*msg_req);
735*99a2dd95SBruce Richardson 	strlcpy(msg.name, MP_ACTION_REQUEST, sizeof(msg.name));
736*99a2dd95SBruce Richardson 
737*99a2dd95SBruce Richardson 	/* (attempt to) get a unique id */
738*99a2dd95SBruce Richardson 	user_req->id = get_unique_id();
739*99a2dd95SBruce Richardson 
740*99a2dd95SBruce Richardson 	/* copy contents of user request into the message */
741*99a2dd95SBruce Richardson 	memcpy(msg_req, user_req, sizeof(*msg_req));
742*99a2dd95SBruce Richardson 
743*99a2dd95SBruce Richardson 	if (rte_mp_sendmsg(&msg)) {
744*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Cannot send message to primary\n");
745*99a2dd95SBruce Richardson 		goto fail;
746*99a2dd95SBruce Richardson 	}
747*99a2dd95SBruce Richardson 
748*99a2dd95SBruce Richardson 	/* copy contents of user request into active request */
749*99a2dd95SBruce Richardson 	memcpy(&entry->user_req, user_req, sizeof(*user_req));
750*99a2dd95SBruce Richardson 
751*99a2dd95SBruce Richardson 	/* mark request as in progress */
752*99a2dd95SBruce Richardson 	entry->state = REQ_STATE_ACTIVE;
753*99a2dd95SBruce Richardson 
754*99a2dd95SBruce Richardson 	TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
755*99a2dd95SBruce Richardson 
756*99a2dd95SBruce Richardson 	/* finally, wait on timeout */
757*99a2dd95SBruce Richardson 	do {
758*99a2dd95SBruce Richardson 		ret = pthread_cond_timedwait(&entry->cond,
759*99a2dd95SBruce Richardson 				&mp_request_list.lock, &ts);
760*99a2dd95SBruce Richardson 	} while (ret != 0 && ret != ETIMEDOUT);
761*99a2dd95SBruce Richardson 
762*99a2dd95SBruce Richardson 	if (entry->state != REQ_STATE_COMPLETE) {
763*99a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Request timed out\n");
764*99a2dd95SBruce Richardson 		ret = -1;
765*99a2dd95SBruce Richardson 	} else {
766*99a2dd95SBruce Richardson 		ret = 0;
767*99a2dd95SBruce Richardson 		user_req->result = entry->user_req.result;
768*99a2dd95SBruce Richardson 	}
769*99a2dd95SBruce Richardson 	TAILQ_REMOVE(&mp_request_list.list, entry, next);
770*99a2dd95SBruce Richardson 	free(entry);
771*99a2dd95SBruce Richardson 
772*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
773*99a2dd95SBruce Richardson 	return ret;
774*99a2dd95SBruce Richardson fail:
775*99a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
776*99a2dd95SBruce Richardson 	free(entry);
777*99a2dd95SBruce Richardson 	return -1;
778*99a2dd95SBruce Richardson }
779*99a2dd95SBruce Richardson 
780*99a2dd95SBruce Richardson int
781*99a2dd95SBruce Richardson register_mp_requests(void)
782*99a2dd95SBruce Richardson {
783*99a2dd95SBruce Richardson 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
784*99a2dd95SBruce Richardson 		/* it's OK for primary to not support IPC */
785*99a2dd95SBruce Richardson 		if (rte_mp_action_register(MP_ACTION_REQUEST, handle_request) &&
786*99a2dd95SBruce Richardson 				rte_errno != ENOTSUP) {
787*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
788*99a2dd95SBruce Richardson 				MP_ACTION_REQUEST);
789*99a2dd95SBruce Richardson 			return -1;
790*99a2dd95SBruce Richardson 		}
791*99a2dd95SBruce Richardson 	} else {
792*99a2dd95SBruce Richardson 		if (rte_mp_action_register(MP_ACTION_SYNC, handle_sync)) {
793*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
794*99a2dd95SBruce Richardson 				MP_ACTION_SYNC);
795*99a2dd95SBruce Richardson 			return -1;
796*99a2dd95SBruce Richardson 		}
797*99a2dd95SBruce Richardson 		if (rte_mp_action_register(MP_ACTION_ROLLBACK, handle_sync)) {
798*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
799*99a2dd95SBruce Richardson 				MP_ACTION_SYNC);
800*99a2dd95SBruce Richardson 			return -1;
801*99a2dd95SBruce Richardson 		}
802*99a2dd95SBruce Richardson 		if (rte_mp_action_register(MP_ACTION_RESPONSE,
803*99a2dd95SBruce Richardson 				handle_response)) {
804*99a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
805*99a2dd95SBruce Richardson 				MP_ACTION_RESPONSE);
806*99a2dd95SBruce Richardson 			return -1;
807*99a2dd95SBruce Richardson 		}
808*99a2dd95SBruce Richardson 	}
809*99a2dd95SBruce Richardson 	return 0;
810*99a2dd95SBruce Richardson }
811