xref: /dpdk/lib/eal/common/malloc_mp.c (revision ce2f7d472e80c1b6779f230a1c7b137157f5ff3d)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2018 Intel Corporation
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #include <string.h>
699a2dd95SBruce Richardson #include <sys/time.h>
799a2dd95SBruce Richardson 
899a2dd95SBruce Richardson #include <rte_errno.h>
999a2dd95SBruce Richardson #include <rte_string_fns.h>
1099a2dd95SBruce Richardson 
1199a2dd95SBruce Richardson #include "eal_memalloc.h"
1299a2dd95SBruce Richardson #include "eal_memcfg.h"
1399a2dd95SBruce Richardson #include "eal_private.h"
1499a2dd95SBruce Richardson 
1599a2dd95SBruce Richardson #include "malloc_elem.h"
1699a2dd95SBruce Richardson #include "malloc_mp.h"
1799a2dd95SBruce Richardson 
1899a2dd95SBruce Richardson #define MP_ACTION_SYNC "mp_malloc_sync"
1999a2dd95SBruce Richardson /**< request sent by primary process to notify of changes in memory map */
2099a2dd95SBruce Richardson #define MP_ACTION_ROLLBACK "mp_malloc_rollback"
2199a2dd95SBruce Richardson /**< request sent by primary process to notify of changes in memory map. this is
2299a2dd95SBruce Richardson  * essentially a regular sync request, but we cannot send sync requests while
2399a2dd95SBruce Richardson  * another one is in progress, and we might have to - therefore, we do this as
2499a2dd95SBruce Richardson  * a separate callback.
2599a2dd95SBruce Richardson  */
2699a2dd95SBruce Richardson #define MP_ACTION_REQUEST "mp_malloc_request"
2799a2dd95SBruce Richardson /**< request sent by secondary process to ask for allocation/deallocation */
2899a2dd95SBruce Richardson #define MP_ACTION_RESPONSE "mp_malloc_response"
2999a2dd95SBruce Richardson /**< response sent to secondary process to indicate result of request */
3099a2dd95SBruce Richardson 
3199a2dd95SBruce Richardson /* forward declarations */
3299a2dd95SBruce Richardson static int
3399a2dd95SBruce Richardson handle_sync_response(const struct rte_mp_msg *request,
3499a2dd95SBruce Richardson 		const struct rte_mp_reply *reply);
3599a2dd95SBruce Richardson static int
3699a2dd95SBruce Richardson handle_rollback_response(const struct rte_mp_msg *request,
3799a2dd95SBruce Richardson 		const struct rte_mp_reply *reply);
3899a2dd95SBruce Richardson 
3999a2dd95SBruce Richardson #define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
4099a2dd95SBruce Richardson 
4199a2dd95SBruce Richardson /* when we're allocating, we need to store some state to ensure that we can
4299a2dd95SBruce Richardson  * roll back later
4399a2dd95SBruce Richardson  */
4499a2dd95SBruce Richardson struct primary_alloc_req_state {
4599a2dd95SBruce Richardson 	struct malloc_heap *heap;
4699a2dd95SBruce Richardson 	struct rte_memseg **ms;
4799a2dd95SBruce Richardson 	int ms_len;
4899a2dd95SBruce Richardson 	struct malloc_elem *elem;
4999a2dd95SBruce Richardson 	void *map_addr;
5099a2dd95SBruce Richardson 	size_t map_len;
5199a2dd95SBruce Richardson };
5299a2dd95SBruce Richardson 
5399a2dd95SBruce Richardson enum req_state {
5499a2dd95SBruce Richardson 	REQ_STATE_INACTIVE = 0,
5599a2dd95SBruce Richardson 	REQ_STATE_ACTIVE,
5699a2dd95SBruce Richardson 	REQ_STATE_COMPLETE
5799a2dd95SBruce Richardson };
5899a2dd95SBruce Richardson 
5999a2dd95SBruce Richardson struct mp_request {
6099a2dd95SBruce Richardson 	TAILQ_ENTRY(mp_request) next;
6199a2dd95SBruce Richardson 	struct malloc_mp_req user_req; /**< contents of request */
6299a2dd95SBruce Richardson 	pthread_cond_t cond; /**< variable we use to time out on this request */
6399a2dd95SBruce Richardson 	enum req_state state; /**< indicate status of this request */
6499a2dd95SBruce Richardson 	struct primary_alloc_req_state alloc_state;
6599a2dd95SBruce Richardson };
6699a2dd95SBruce Richardson 
6799a2dd95SBruce Richardson /*
6899a2dd95SBruce Richardson  * We could've used just a single request, but it may be possible for
6999a2dd95SBruce Richardson  * secondaries to timeout earlier than the primary, and send a new request while
7099a2dd95SBruce Richardson  * primary is still expecting replies to the old one. Therefore, each new
7199a2dd95SBruce Richardson  * request will get assigned a new ID, which is how we will distinguish between
7299a2dd95SBruce Richardson  * expected and unexpected messages.
7399a2dd95SBruce Richardson  */
7499a2dd95SBruce Richardson TAILQ_HEAD(mp_request_list, mp_request);
7599a2dd95SBruce Richardson static struct {
7699a2dd95SBruce Richardson 	struct mp_request_list list;
7799a2dd95SBruce Richardson 	pthread_mutex_t lock;
7899a2dd95SBruce Richardson } mp_request_list = {
7999a2dd95SBruce Richardson 	.list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
8099a2dd95SBruce Richardson 	.lock = PTHREAD_MUTEX_INITIALIZER
8199a2dd95SBruce Richardson };
8299a2dd95SBruce Richardson 
8399a2dd95SBruce Richardson /**
8499a2dd95SBruce Richardson  * General workflow is the following:
8599a2dd95SBruce Richardson  *
8699a2dd95SBruce Richardson  * Allocation:
8799a2dd95SBruce Richardson  * S: send request to primary
8899a2dd95SBruce Richardson  * P: attempt to allocate memory
8999a2dd95SBruce Richardson  *    if failed, sendmsg failure
9099a2dd95SBruce Richardson  *    if success, send sync request
9199a2dd95SBruce Richardson  * S: if received msg of failure, quit
9299a2dd95SBruce Richardson  *    if received sync request, synchronize memory map and reply with result
9399a2dd95SBruce Richardson  * P: if received sync request result
9499a2dd95SBruce Richardson  *    if success, sendmsg success
9599a2dd95SBruce Richardson  *    if failure, roll back allocation and send a rollback request
9699a2dd95SBruce Richardson  * S: if received msg of success, quit
9799a2dd95SBruce Richardson  *    if received rollback request, synchronize memory map and reply with result
9899a2dd95SBruce Richardson  * P: if received sync request result
9999a2dd95SBruce Richardson  *    sendmsg sync request result
10099a2dd95SBruce Richardson  * S: if received msg, quit
10199a2dd95SBruce Richardson  *
10299a2dd95SBruce Richardson  * Aside from timeouts, there are three points where we can quit:
10399a2dd95SBruce Richardson  *  - if allocation failed straight away
10499a2dd95SBruce Richardson  *  - if allocation and sync request succeeded
10599a2dd95SBruce Richardson  *  - if allocation succeeded, sync request failed, allocation rolled back and
10699a2dd95SBruce Richardson  *    rollback request received (irrespective of whether it succeeded or failed)
10799a2dd95SBruce Richardson  *
10899a2dd95SBruce Richardson  * Deallocation:
10999a2dd95SBruce Richardson  * S: send request to primary
11099a2dd95SBruce Richardson  * P: attempt to deallocate memory
11199a2dd95SBruce Richardson  *    if failed, sendmsg failure
11299a2dd95SBruce Richardson  *    if success, send sync request
11399a2dd95SBruce Richardson  * S: if received msg of failure, quit
11499a2dd95SBruce Richardson  *    if received sync request, synchronize memory map and reply with result
11599a2dd95SBruce Richardson  * P: if received sync request result
11699a2dd95SBruce Richardson  *    sendmsg sync request result
11799a2dd95SBruce Richardson  * S: if received msg, quit
11899a2dd95SBruce Richardson  *
11999a2dd95SBruce Richardson  * There is no "rollback" from deallocation, as it's safe to have some memory
12099a2dd95SBruce Richardson  * mapped in some processes - it's absent from the heap, so it won't get used.
12199a2dd95SBruce Richardson  */
12299a2dd95SBruce Richardson 
12399a2dd95SBruce Richardson static struct mp_request *
12499a2dd95SBruce Richardson find_request_by_id(uint64_t id)
12599a2dd95SBruce Richardson {
12699a2dd95SBruce Richardson 	struct mp_request *req;
12799a2dd95SBruce Richardson 	TAILQ_FOREACH(req, &mp_request_list.list, next) {
12899a2dd95SBruce Richardson 		if (req->user_req.id == id)
12999a2dd95SBruce Richardson 			break;
13099a2dd95SBruce Richardson 	}
13199a2dd95SBruce Richardson 	return req;
13299a2dd95SBruce Richardson }
13399a2dd95SBruce Richardson 
13499a2dd95SBruce Richardson /* this ID is, like, totally guaranteed to be absolutely unique. pinky swear. */
13599a2dd95SBruce Richardson static uint64_t
13699a2dd95SBruce Richardson get_unique_id(void)
13799a2dd95SBruce Richardson {
13899a2dd95SBruce Richardson 	uint64_t id;
13999a2dd95SBruce Richardson 	do {
14099a2dd95SBruce Richardson 		id = rte_rand();
14199a2dd95SBruce Richardson 	} while (find_request_by_id(id) != NULL);
14299a2dd95SBruce Richardson 	return id;
14399a2dd95SBruce Richardson }
14499a2dd95SBruce Richardson 
14599a2dd95SBruce Richardson /* secondary will respond to sync requests thusly */
14699a2dd95SBruce Richardson static int
14799a2dd95SBruce Richardson handle_sync(const struct rte_mp_msg *msg, const void *peer)
14899a2dd95SBruce Richardson {
14999a2dd95SBruce Richardson 	struct rte_mp_msg reply;
15099a2dd95SBruce Richardson 	const struct malloc_mp_req *req =
15199a2dd95SBruce Richardson 			(const struct malloc_mp_req *)msg->param;
15299a2dd95SBruce Richardson 	struct malloc_mp_req *resp =
15399a2dd95SBruce Richardson 			(struct malloc_mp_req *)reply.param;
15499a2dd95SBruce Richardson 	int ret;
15599a2dd95SBruce Richardson 
15699a2dd95SBruce Richardson 	if (req->t != REQ_TYPE_SYNC) {
15799a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Unexpected request from primary\n");
15899a2dd95SBruce Richardson 		return -1;
15999a2dd95SBruce Richardson 	}
16099a2dd95SBruce Richardson 
16199a2dd95SBruce Richardson 	memset(&reply, 0, sizeof(reply));
16299a2dd95SBruce Richardson 
16399a2dd95SBruce Richardson 	reply.num_fds = 0;
16499a2dd95SBruce Richardson 	strlcpy(reply.name, msg->name, sizeof(reply.name));
16599a2dd95SBruce Richardson 	reply.len_param = sizeof(*resp);
16699a2dd95SBruce Richardson 
16799a2dd95SBruce Richardson 	ret = eal_memalloc_sync_with_primary();
16899a2dd95SBruce Richardson 
16999a2dd95SBruce Richardson 	resp->t = REQ_TYPE_SYNC;
17099a2dd95SBruce Richardson 	resp->id = req->id;
17199a2dd95SBruce Richardson 	resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL;
17299a2dd95SBruce Richardson 
17376b49dcbSChengwen Feng 	return rte_mp_reply(&reply, peer);
17499a2dd95SBruce Richardson }
17599a2dd95SBruce Richardson 
17699a2dd95SBruce Richardson static int
17799a2dd95SBruce Richardson handle_free_request(const struct malloc_mp_req *m)
17899a2dd95SBruce Richardson {
17999a2dd95SBruce Richardson 	const struct rte_memseg_list *msl;
18099a2dd95SBruce Richardson 	void *start, *end;
18199a2dd95SBruce Richardson 	size_t len;
18299a2dd95SBruce Richardson 
18399a2dd95SBruce Richardson 	len = m->free_req.len;
18499a2dd95SBruce Richardson 	start = m->free_req.addr;
18599a2dd95SBruce Richardson 	end = RTE_PTR_ADD(start, len - 1);
18699a2dd95SBruce Richardson 
18799a2dd95SBruce Richardson 	/* check if the requested memory actually exists */
18899a2dd95SBruce Richardson 	msl = rte_mem_virt2memseg_list(start);
18999a2dd95SBruce Richardson 	if (msl == NULL) {
19099a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Requested to free unknown memory\n");
19199a2dd95SBruce Richardson 		return -1;
19299a2dd95SBruce Richardson 	}
19399a2dd95SBruce Richardson 
19499a2dd95SBruce Richardson 	/* check if end is within the same memory region */
19599a2dd95SBruce Richardson 	if (rte_mem_virt2memseg_list(end) != msl) {
19699a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Requested to free memory spanning multiple regions\n");
19799a2dd95SBruce Richardson 		return -1;
19899a2dd95SBruce Richardson 	}
19999a2dd95SBruce Richardson 
20099a2dd95SBruce Richardson 	/* we're supposed to only free memory that's not external */
20199a2dd95SBruce Richardson 	if (msl->external) {
20299a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Requested to free external memory\n");
20399a2dd95SBruce Richardson 		return -1;
20499a2dd95SBruce Richardson 	}
20599a2dd95SBruce Richardson 
20699a2dd95SBruce Richardson 	/* now that we've validated the request, announce it */
20799a2dd95SBruce Richardson 	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
20899a2dd95SBruce Richardson 			m->free_req.addr, m->free_req.len);
20999a2dd95SBruce Richardson 
21099a2dd95SBruce Richardson 	/* now, do the actual freeing */
21199a2dd95SBruce Richardson 	return malloc_heap_free_pages(m->free_req.addr, m->free_req.len);
21299a2dd95SBruce Richardson }
21399a2dd95SBruce Richardson 
21499a2dd95SBruce Richardson static int
21599a2dd95SBruce Richardson handle_alloc_request(const struct malloc_mp_req *m,
21699a2dd95SBruce Richardson 		struct mp_request *req)
21799a2dd95SBruce Richardson {
21899a2dd95SBruce Richardson 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
21999a2dd95SBruce Richardson 	const struct malloc_req_alloc *ar = &m->alloc_req;
22099a2dd95SBruce Richardson 	struct malloc_heap *heap;
22199a2dd95SBruce Richardson 	struct malloc_elem *elem;
22299a2dd95SBruce Richardson 	struct rte_memseg **ms;
22399a2dd95SBruce Richardson 	size_t alloc_sz;
22499a2dd95SBruce Richardson 	int n_segs;
22599a2dd95SBruce Richardson 	void *map_addr;
22699a2dd95SBruce Richardson 
22799a2dd95SBruce Richardson 	/* this is checked by the API, but we need to prevent divide by zero */
22899a2dd95SBruce Richardson 	if (ar->page_sz == 0 || !rte_is_power_of_2(ar->page_sz)) {
22999a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Attempting to allocate with invalid page size\n");
23099a2dd95SBruce Richardson 		return -1;
23199a2dd95SBruce Richardson 	}
23299a2dd95SBruce Richardson 
23399a2dd95SBruce Richardson 	/* heap idx is index into the heap array, not socket ID */
23499a2dd95SBruce Richardson 	if (ar->malloc_heap_idx >= RTE_MAX_HEAPS) {
23599a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Attempting to allocate from invalid heap\n");
23699a2dd95SBruce Richardson 		return -1;
23799a2dd95SBruce Richardson 	}
23899a2dd95SBruce Richardson 
23999a2dd95SBruce Richardson 	heap = &mcfg->malloc_heaps[ar->malloc_heap_idx];
24099a2dd95SBruce Richardson 
24199a2dd95SBruce Richardson 	/*
24299a2dd95SBruce Richardson 	 * for allocations, we must only use internal heaps, but since the
24399a2dd95SBruce Richardson 	 * rte_malloc_heap_socket_is_external() is thread-safe and we're already
24499a2dd95SBruce Richardson 	 * read-locked, we'll have to take advantage of the fact that internal
24599a2dd95SBruce Richardson 	 * socket ID's are always lower than RTE_MAX_NUMA_NODES.
24699a2dd95SBruce Richardson 	 */
24799a2dd95SBruce Richardson 	if (heap->socket_id >= RTE_MAX_NUMA_NODES) {
24899a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Attempting to allocate from external heap\n");
24999a2dd95SBruce Richardson 		return -1;
25099a2dd95SBruce Richardson 	}
25199a2dd95SBruce Richardson 
25299a2dd95SBruce Richardson 	alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size +
253*ce2f7d47SFidaullah Noonari 			MALLOC_ELEM_OVERHEAD, ar->page_sz);
25499a2dd95SBruce Richardson 	n_segs = alloc_sz / ar->page_sz;
25599a2dd95SBruce Richardson 
25699a2dd95SBruce Richardson 	/* we can't know in advance how many pages we'll need, so we malloc */
25799a2dd95SBruce Richardson 	ms = malloc(sizeof(*ms) * n_segs);
25899a2dd95SBruce Richardson 	if (ms == NULL) {
25999a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
26099a2dd95SBruce Richardson 		return -1;
26199a2dd95SBruce Richardson 	}
26299a2dd95SBruce Richardson 	memset(ms, 0, sizeof(*ms) * n_segs);
26399a2dd95SBruce Richardson 
26499a2dd95SBruce Richardson 	elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
26599a2dd95SBruce Richardson 			ar->flags, ar->align, ar->bound, ar->contig, ms,
26699a2dd95SBruce Richardson 			n_segs);
26799a2dd95SBruce Richardson 
26899a2dd95SBruce Richardson 	if (elem == NULL)
26999a2dd95SBruce Richardson 		goto fail;
27099a2dd95SBruce Richardson 
27199a2dd95SBruce Richardson 	map_addr = ms[0]->addr;
27299a2dd95SBruce Richardson 
27399a2dd95SBruce Richardson 	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
27499a2dd95SBruce Richardson 
27599a2dd95SBruce Richardson 	/* we have succeeded in allocating memory, but we still need to sync
27699a2dd95SBruce Richardson 	 * with other processes. however, since DPDK IPC is single-threaded, we
27799a2dd95SBruce Richardson 	 * send an asynchronous request and exit this callback.
27899a2dd95SBruce Richardson 	 */
27999a2dd95SBruce Richardson 
28099a2dd95SBruce Richardson 	req->alloc_state.ms = ms;
28199a2dd95SBruce Richardson 	req->alloc_state.ms_len = n_segs;
28299a2dd95SBruce Richardson 	req->alloc_state.map_addr = map_addr;
28399a2dd95SBruce Richardson 	req->alloc_state.map_len = alloc_sz;
28499a2dd95SBruce Richardson 	req->alloc_state.elem = elem;
28599a2dd95SBruce Richardson 	req->alloc_state.heap = heap;
28699a2dd95SBruce Richardson 
28799a2dd95SBruce Richardson 	return 0;
28899a2dd95SBruce Richardson fail:
28999a2dd95SBruce Richardson 	free(ms);
29099a2dd95SBruce Richardson 	return -1;
29199a2dd95SBruce Richardson }
29299a2dd95SBruce Richardson 
29399a2dd95SBruce Richardson /* first stage of primary handling requests from secondary */
29499a2dd95SBruce Richardson static int
29599a2dd95SBruce Richardson handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
29699a2dd95SBruce Richardson {
29799a2dd95SBruce Richardson 	const struct malloc_mp_req *m =
29899a2dd95SBruce Richardson 			(const struct malloc_mp_req *)msg->param;
29999a2dd95SBruce Richardson 	struct mp_request *entry;
30099a2dd95SBruce Richardson 	int ret;
30199a2dd95SBruce Richardson 
30299a2dd95SBruce Richardson 	/* lock access to request */
30399a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
30499a2dd95SBruce Richardson 
30599a2dd95SBruce Richardson 	/* make sure it's not a dupe */
30699a2dd95SBruce Richardson 	entry = find_request_by_id(m->id);
30799a2dd95SBruce Richardson 	if (entry != NULL) {
30899a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Duplicate request id\n");
30999a2dd95SBruce Richardson 		goto fail;
31099a2dd95SBruce Richardson 	}
31199a2dd95SBruce Richardson 
31299a2dd95SBruce Richardson 	entry = malloc(sizeof(*entry));
31399a2dd95SBruce Richardson 	if (entry == NULL) {
31499a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Unable to allocate memory for request\n");
31599a2dd95SBruce Richardson 		goto fail;
31699a2dd95SBruce Richardson 	}
31799a2dd95SBruce Richardson 
31899a2dd95SBruce Richardson 	/* erase all data */
31999a2dd95SBruce Richardson 	memset(entry, 0, sizeof(*entry));
32099a2dd95SBruce Richardson 
32199a2dd95SBruce Richardson 	if (m->t == REQ_TYPE_ALLOC) {
32299a2dd95SBruce Richardson 		ret = handle_alloc_request(m, entry);
32399a2dd95SBruce Richardson 	} else if (m->t == REQ_TYPE_FREE) {
32499a2dd95SBruce Richardson 		ret = handle_free_request(m);
32599a2dd95SBruce Richardson 	} else {
32699a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Unexpected request from secondary\n");
32799a2dd95SBruce Richardson 		goto fail;
32899a2dd95SBruce Richardson 	}
32999a2dd95SBruce Richardson 
33099a2dd95SBruce Richardson 	if (ret != 0) {
33199a2dd95SBruce Richardson 		struct rte_mp_msg resp_msg;
33299a2dd95SBruce Richardson 		struct malloc_mp_req *resp =
33399a2dd95SBruce Richardson 				(struct malloc_mp_req *)resp_msg.param;
33499a2dd95SBruce Richardson 
33599a2dd95SBruce Richardson 		/* send failure message straight away */
33699a2dd95SBruce Richardson 		resp_msg.num_fds = 0;
33799a2dd95SBruce Richardson 		resp_msg.len_param = sizeof(*resp);
33899a2dd95SBruce Richardson 		strlcpy(resp_msg.name, MP_ACTION_RESPONSE,
33999a2dd95SBruce Richardson 				sizeof(resp_msg.name));
34099a2dd95SBruce Richardson 
34199a2dd95SBruce Richardson 		resp->t = m->t;
34299a2dd95SBruce Richardson 		resp->result = REQ_RESULT_FAIL;
34399a2dd95SBruce Richardson 		resp->id = m->id;
34499a2dd95SBruce Richardson 
34599a2dd95SBruce Richardson 		if (rte_mp_sendmsg(&resp_msg)) {
34699a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't send response\n");
34799a2dd95SBruce Richardson 			goto fail;
34899a2dd95SBruce Richardson 		}
34999a2dd95SBruce Richardson 		/* we did not modify the request */
35099a2dd95SBruce Richardson 		free(entry);
35199a2dd95SBruce Richardson 	} else {
35299a2dd95SBruce Richardson 		struct rte_mp_msg sr_msg;
35399a2dd95SBruce Richardson 		struct malloc_mp_req *sr =
35499a2dd95SBruce Richardson 				(struct malloc_mp_req *)sr_msg.param;
35599a2dd95SBruce Richardson 		struct timespec ts;
35699a2dd95SBruce Richardson 
35799a2dd95SBruce Richardson 		memset(&sr_msg, 0, sizeof(sr_msg));
35899a2dd95SBruce Richardson 
35999a2dd95SBruce Richardson 		/* we can do something, so send sync request asynchronously */
36099a2dd95SBruce Richardson 		sr_msg.num_fds = 0;
36199a2dd95SBruce Richardson 		sr_msg.len_param = sizeof(*sr);
36299a2dd95SBruce Richardson 		strlcpy(sr_msg.name, MP_ACTION_SYNC, sizeof(sr_msg.name));
36399a2dd95SBruce Richardson 
36499a2dd95SBruce Richardson 		ts.tv_nsec = 0;
36599a2dd95SBruce Richardson 		ts.tv_sec = MP_TIMEOUT_S;
36699a2dd95SBruce Richardson 
36799a2dd95SBruce Richardson 		/* sync requests carry no data */
36899a2dd95SBruce Richardson 		sr->t = REQ_TYPE_SYNC;
36999a2dd95SBruce Richardson 		sr->id = m->id;
37099a2dd95SBruce Richardson 
37199a2dd95SBruce Richardson 		/* there may be stray timeout still waiting */
37299a2dd95SBruce Richardson 		do {
37399a2dd95SBruce Richardson 			ret = rte_mp_request_async(&sr_msg, &ts,
37499a2dd95SBruce Richardson 					handle_sync_response);
37599a2dd95SBruce Richardson 		} while (ret != 0 && rte_errno == EEXIST);
37699a2dd95SBruce Richardson 		if (ret != 0) {
37799a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't send sync request\n");
37899a2dd95SBruce Richardson 			if (m->t == REQ_TYPE_ALLOC)
37999a2dd95SBruce Richardson 				free(entry->alloc_state.ms);
38099a2dd95SBruce Richardson 			goto fail;
38199a2dd95SBruce Richardson 		}
38299a2dd95SBruce Richardson 
38399a2dd95SBruce Richardson 		/* mark request as in progress */
38499a2dd95SBruce Richardson 		memcpy(&entry->user_req, m, sizeof(*m));
38599a2dd95SBruce Richardson 		entry->state = REQ_STATE_ACTIVE;
38699a2dd95SBruce Richardson 
38799a2dd95SBruce Richardson 		TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
38899a2dd95SBruce Richardson 	}
38999a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
39099a2dd95SBruce Richardson 	return 0;
39199a2dd95SBruce Richardson fail:
39299a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
39399a2dd95SBruce Richardson 	free(entry);
39499a2dd95SBruce Richardson 	return -1;
39599a2dd95SBruce Richardson }
39699a2dd95SBruce Richardson 
39799a2dd95SBruce Richardson /* callback for asynchronous sync requests for primary. this will either do a
39899a2dd95SBruce Richardson  * sendmsg with results, or trigger rollback request.
39999a2dd95SBruce Richardson  */
40099a2dd95SBruce Richardson static int
40199a2dd95SBruce Richardson handle_sync_response(const struct rte_mp_msg *request,
40299a2dd95SBruce Richardson 		const struct rte_mp_reply *reply)
40399a2dd95SBruce Richardson {
40499a2dd95SBruce Richardson 	enum malloc_req_result result;
40599a2dd95SBruce Richardson 	struct mp_request *entry;
40699a2dd95SBruce Richardson 	const struct malloc_mp_req *mpreq =
40799a2dd95SBruce Richardson 			(const struct malloc_mp_req *)request->param;
40899a2dd95SBruce Richardson 	int i;
40999a2dd95SBruce Richardson 
41099a2dd95SBruce Richardson 	/* lock the request */
41199a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
41299a2dd95SBruce Richardson 
41399a2dd95SBruce Richardson 	entry = find_request_by_id(mpreq->id);
41499a2dd95SBruce Richardson 	if (entry == NULL) {
41599a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Wrong request ID\n");
41699a2dd95SBruce Richardson 		goto fail;
41799a2dd95SBruce Richardson 	}
41899a2dd95SBruce Richardson 
41999a2dd95SBruce Richardson 	result = REQ_RESULT_SUCCESS;
42099a2dd95SBruce Richardson 
42199a2dd95SBruce Richardson 	if (reply->nb_received != reply->nb_sent)
42299a2dd95SBruce Richardson 		result = REQ_RESULT_FAIL;
42399a2dd95SBruce Richardson 
42499a2dd95SBruce Richardson 	for (i = 0; i < reply->nb_received; i++) {
42599a2dd95SBruce Richardson 		struct malloc_mp_req *resp =
42699a2dd95SBruce Richardson 				(struct malloc_mp_req *)reply->msgs[i].param;
42799a2dd95SBruce Richardson 
42899a2dd95SBruce Richardson 		if (resp->t != REQ_TYPE_SYNC) {
42999a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Unexpected response to sync request\n");
43099a2dd95SBruce Richardson 			result = REQ_RESULT_FAIL;
43199a2dd95SBruce Richardson 			break;
43299a2dd95SBruce Richardson 		}
43399a2dd95SBruce Richardson 		if (resp->id != entry->user_req.id) {
43499a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Response to wrong sync request\n");
43599a2dd95SBruce Richardson 			result = REQ_RESULT_FAIL;
43699a2dd95SBruce Richardson 			break;
43799a2dd95SBruce Richardson 		}
43899a2dd95SBruce Richardson 		if (resp->result == REQ_RESULT_FAIL) {
43999a2dd95SBruce Richardson 			result = REQ_RESULT_FAIL;
44099a2dd95SBruce Richardson 			break;
44199a2dd95SBruce Richardson 		}
44299a2dd95SBruce Richardson 	}
44399a2dd95SBruce Richardson 
44499a2dd95SBruce Richardson 	if (entry->user_req.t == REQ_TYPE_FREE) {
44599a2dd95SBruce Richardson 		struct rte_mp_msg msg;
44699a2dd95SBruce Richardson 		struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
44799a2dd95SBruce Richardson 
44899a2dd95SBruce Richardson 		memset(&msg, 0, sizeof(msg));
44999a2dd95SBruce Richardson 
45099a2dd95SBruce Richardson 		/* this is a free request, just sendmsg result */
45199a2dd95SBruce Richardson 		resp->t = REQ_TYPE_FREE;
45299a2dd95SBruce Richardson 		resp->result = result;
45399a2dd95SBruce Richardson 		resp->id = entry->user_req.id;
45499a2dd95SBruce Richardson 		msg.num_fds = 0;
45599a2dd95SBruce Richardson 		msg.len_param = sizeof(*resp);
45699a2dd95SBruce Richardson 		strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
45799a2dd95SBruce Richardson 
45899a2dd95SBruce Richardson 		if (rte_mp_sendmsg(&msg))
45999a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
46099a2dd95SBruce Richardson 
46199a2dd95SBruce Richardson 		TAILQ_REMOVE(&mp_request_list.list, entry, next);
46299a2dd95SBruce Richardson 		free(entry);
46399a2dd95SBruce Richardson 	} else if (entry->user_req.t == REQ_TYPE_ALLOC &&
46499a2dd95SBruce Richardson 			result == REQ_RESULT_SUCCESS) {
46599a2dd95SBruce Richardson 		struct malloc_heap *heap = entry->alloc_state.heap;
46699a2dd95SBruce Richardson 		struct rte_mp_msg msg;
46799a2dd95SBruce Richardson 		struct malloc_mp_req *resp =
46899a2dd95SBruce Richardson 				(struct malloc_mp_req *)msg.param;
46999a2dd95SBruce Richardson 
47099a2dd95SBruce Richardson 		memset(&msg, 0, sizeof(msg));
47199a2dd95SBruce Richardson 
47299a2dd95SBruce Richardson 		heap->total_size += entry->alloc_state.map_len;
47399a2dd95SBruce Richardson 
47499a2dd95SBruce Richardson 		/* result is success, so just notify secondary about this */
47599a2dd95SBruce Richardson 		resp->t = REQ_TYPE_ALLOC;
47699a2dd95SBruce Richardson 		resp->result = result;
47799a2dd95SBruce Richardson 		resp->id = entry->user_req.id;
47899a2dd95SBruce Richardson 		msg.num_fds = 0;
47999a2dd95SBruce Richardson 		msg.len_param = sizeof(*resp);
48099a2dd95SBruce Richardson 		strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
48199a2dd95SBruce Richardson 
48299a2dd95SBruce Richardson 		if (rte_mp_sendmsg(&msg))
48399a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
48499a2dd95SBruce Richardson 
48599a2dd95SBruce Richardson 		TAILQ_REMOVE(&mp_request_list.list, entry, next);
48699a2dd95SBruce Richardson 		free(entry->alloc_state.ms);
48799a2dd95SBruce Richardson 		free(entry);
48899a2dd95SBruce Richardson 	} else if (entry->user_req.t == REQ_TYPE_ALLOC &&
48999a2dd95SBruce Richardson 			result == REQ_RESULT_FAIL) {
49099a2dd95SBruce Richardson 		struct rte_mp_msg rb_msg;
49199a2dd95SBruce Richardson 		struct malloc_mp_req *rb =
49299a2dd95SBruce Richardson 				(struct malloc_mp_req *)rb_msg.param;
49399a2dd95SBruce Richardson 		struct timespec ts;
49499a2dd95SBruce Richardson 		struct primary_alloc_req_state *state =
49599a2dd95SBruce Richardson 				&entry->alloc_state;
49699a2dd95SBruce Richardson 		int ret;
49799a2dd95SBruce Richardson 
49899a2dd95SBruce Richardson 		memset(&rb_msg, 0, sizeof(rb_msg));
49999a2dd95SBruce Richardson 
50099a2dd95SBruce Richardson 		/* we've failed to sync, so do a rollback */
50199a2dd95SBruce Richardson 		eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
50299a2dd95SBruce Richardson 				state->map_addr, state->map_len);
50399a2dd95SBruce Richardson 
50499a2dd95SBruce Richardson 		rollback_expand_heap(state->ms, state->ms_len, state->elem,
50599a2dd95SBruce Richardson 				state->map_addr, state->map_len);
50699a2dd95SBruce Richardson 
50799a2dd95SBruce Richardson 		/* send rollback request */
50899a2dd95SBruce Richardson 		rb_msg.num_fds = 0;
50999a2dd95SBruce Richardson 		rb_msg.len_param = sizeof(*rb);
51099a2dd95SBruce Richardson 		strlcpy(rb_msg.name, MP_ACTION_ROLLBACK, sizeof(rb_msg.name));
51199a2dd95SBruce Richardson 
51299a2dd95SBruce Richardson 		ts.tv_nsec = 0;
51399a2dd95SBruce Richardson 		ts.tv_sec = MP_TIMEOUT_S;
51499a2dd95SBruce Richardson 
51599a2dd95SBruce Richardson 		/* sync requests carry no data */
51699a2dd95SBruce Richardson 		rb->t = REQ_TYPE_SYNC;
51799a2dd95SBruce Richardson 		rb->id = entry->user_req.id;
51899a2dd95SBruce Richardson 
51999a2dd95SBruce Richardson 		/* there may be stray timeout still waiting */
52099a2dd95SBruce Richardson 		do {
52199a2dd95SBruce Richardson 			ret = rte_mp_request_async(&rb_msg, &ts,
52299a2dd95SBruce Richardson 					handle_rollback_response);
52399a2dd95SBruce Richardson 		} while (ret != 0 && rte_errno == EEXIST);
52499a2dd95SBruce Richardson 		if (ret != 0) {
52599a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Could not send rollback request to secondary process\n");
52699a2dd95SBruce Richardson 
52799a2dd95SBruce Richardson 			/* we couldn't send rollback request, but that's OK -
52899a2dd95SBruce Richardson 			 * secondary will time out, and memory has been removed
52999a2dd95SBruce Richardson 			 * from heap anyway.
53099a2dd95SBruce Richardson 			 */
53199a2dd95SBruce Richardson 			TAILQ_REMOVE(&mp_request_list.list, entry, next);
53299a2dd95SBruce Richardson 			free(state->ms);
53399a2dd95SBruce Richardson 			free(entry);
53499a2dd95SBruce Richardson 			goto fail;
53599a2dd95SBruce Richardson 		}
53699a2dd95SBruce Richardson 	} else {
53799a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, " to sync request of unknown type\n");
53899a2dd95SBruce Richardson 		goto fail;
53999a2dd95SBruce Richardson 	}
54099a2dd95SBruce Richardson 
54199a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
54299a2dd95SBruce Richardson 	return 0;
54399a2dd95SBruce Richardson fail:
54499a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
54599a2dd95SBruce Richardson 	return -1;
54699a2dd95SBruce Richardson }
54799a2dd95SBruce Richardson 
54899a2dd95SBruce Richardson static int
54999a2dd95SBruce Richardson handle_rollback_response(const struct rte_mp_msg *request,
55099a2dd95SBruce Richardson 		const struct rte_mp_reply *reply __rte_unused)
55199a2dd95SBruce Richardson {
55299a2dd95SBruce Richardson 	struct rte_mp_msg msg;
55399a2dd95SBruce Richardson 	struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
55499a2dd95SBruce Richardson 	const struct malloc_mp_req *mpreq =
55599a2dd95SBruce Richardson 			(const struct malloc_mp_req *)request->param;
55699a2dd95SBruce Richardson 	struct mp_request *entry;
55799a2dd95SBruce Richardson 
55899a2dd95SBruce Richardson 	/* lock the request */
55999a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
56099a2dd95SBruce Richardson 
56199a2dd95SBruce Richardson 	memset(&msg, 0, sizeof(msg));
56299a2dd95SBruce Richardson 
56399a2dd95SBruce Richardson 	entry = find_request_by_id(mpreq->id);
56499a2dd95SBruce Richardson 	if (entry == NULL) {
56599a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Wrong request ID\n");
56699a2dd95SBruce Richardson 		goto fail;
56799a2dd95SBruce Richardson 	}
56899a2dd95SBruce Richardson 
56999a2dd95SBruce Richardson 	if (entry->user_req.t != REQ_TYPE_ALLOC) {
57099a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Unexpected active request\n");
57199a2dd95SBruce Richardson 		goto fail;
57299a2dd95SBruce Richardson 	}
57399a2dd95SBruce Richardson 
57499a2dd95SBruce Richardson 	/* we don't care if rollback succeeded, request still failed */
57599a2dd95SBruce Richardson 	resp->t = REQ_TYPE_ALLOC;
57699a2dd95SBruce Richardson 	resp->result = REQ_RESULT_FAIL;
57799a2dd95SBruce Richardson 	resp->id = mpreq->id;
57899a2dd95SBruce Richardson 	msg.num_fds = 0;
57999a2dd95SBruce Richardson 	msg.len_param = sizeof(*resp);
58099a2dd95SBruce Richardson 	strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
58199a2dd95SBruce Richardson 
58299a2dd95SBruce Richardson 	if (rte_mp_sendmsg(&msg))
58399a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
58499a2dd95SBruce Richardson 
58599a2dd95SBruce Richardson 	/* clean up */
58699a2dd95SBruce Richardson 	TAILQ_REMOVE(&mp_request_list.list, entry, next);
58799a2dd95SBruce Richardson 	free(entry->alloc_state.ms);
58899a2dd95SBruce Richardson 	free(entry);
58999a2dd95SBruce Richardson 
59099a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
59199a2dd95SBruce Richardson 	return 0;
59299a2dd95SBruce Richardson fail:
59399a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
59499a2dd95SBruce Richardson 	return -1;
59599a2dd95SBruce Richardson }
59699a2dd95SBruce Richardson 
59799a2dd95SBruce Richardson /* final stage of the request from secondary */
59899a2dd95SBruce Richardson static int
59999a2dd95SBruce Richardson handle_response(const struct rte_mp_msg *msg, const void *peer  __rte_unused)
60099a2dd95SBruce Richardson {
60199a2dd95SBruce Richardson 	const struct malloc_mp_req *m =
60299a2dd95SBruce Richardson 			(const struct malloc_mp_req *)msg->param;
60399a2dd95SBruce Richardson 	struct mp_request *entry;
60499a2dd95SBruce Richardson 
60599a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
60699a2dd95SBruce Richardson 
60799a2dd95SBruce Richardson 	entry = find_request_by_id(m->id);
60899a2dd95SBruce Richardson 	if (entry != NULL) {
60999a2dd95SBruce Richardson 		/* update request status */
61099a2dd95SBruce Richardson 		entry->user_req.result = m->result;
61199a2dd95SBruce Richardson 
61299a2dd95SBruce Richardson 		entry->state = REQ_STATE_COMPLETE;
61399a2dd95SBruce Richardson 
61499a2dd95SBruce Richardson 		/* trigger thread wakeup */
61599a2dd95SBruce Richardson 		pthread_cond_signal(&entry->cond);
61699a2dd95SBruce Richardson 	}
61799a2dd95SBruce Richardson 
61899a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
61999a2dd95SBruce Richardson 
62099a2dd95SBruce Richardson 	return 0;
62199a2dd95SBruce Richardson }
62299a2dd95SBruce Richardson 
62399a2dd95SBruce Richardson /* synchronously request memory map sync, this is only called whenever primary
62499a2dd95SBruce Richardson  * process initiates the allocation.
62599a2dd95SBruce Richardson  */
62699a2dd95SBruce Richardson int
62799a2dd95SBruce Richardson request_sync(void)
62899a2dd95SBruce Richardson {
62999a2dd95SBruce Richardson 	struct rte_mp_msg msg;
63099a2dd95SBruce Richardson 	struct rte_mp_reply reply;
63199a2dd95SBruce Richardson 	struct malloc_mp_req *req = (struct malloc_mp_req *)msg.param;
63299a2dd95SBruce Richardson 	struct timespec ts;
63399a2dd95SBruce Richardson 	int i, ret = -1;
63499a2dd95SBruce Richardson 
63599a2dd95SBruce Richardson 	memset(&msg, 0, sizeof(msg));
63699a2dd95SBruce Richardson 	memset(&reply, 0, sizeof(reply));
63799a2dd95SBruce Richardson 
63899a2dd95SBruce Richardson 	/* no need to create tailq entries as this is entirely synchronous */
63999a2dd95SBruce Richardson 
64099a2dd95SBruce Richardson 	msg.num_fds = 0;
64199a2dd95SBruce Richardson 	msg.len_param = sizeof(*req);
64299a2dd95SBruce Richardson 	strlcpy(msg.name, MP_ACTION_SYNC, sizeof(msg.name));
64399a2dd95SBruce Richardson 
64499a2dd95SBruce Richardson 	/* sync request carries no data */
64599a2dd95SBruce Richardson 	req->t = REQ_TYPE_SYNC;
64699a2dd95SBruce Richardson 	req->id = get_unique_id();
64799a2dd95SBruce Richardson 
64899a2dd95SBruce Richardson 	ts.tv_nsec = 0;
64999a2dd95SBruce Richardson 	ts.tv_sec = MP_TIMEOUT_S;
65099a2dd95SBruce Richardson 
65199a2dd95SBruce Richardson 	/* there may be stray timeout still waiting */
65299a2dd95SBruce Richardson 	do {
65399a2dd95SBruce Richardson 		ret = rte_mp_request_sync(&msg, &reply, &ts);
65499a2dd95SBruce Richardson 	} while (ret != 0 && rte_errno == EEXIST);
65599a2dd95SBruce Richardson 	if (ret != 0) {
65699a2dd95SBruce Richardson 		/* if IPC is unsupported, behave as if the call succeeded */
65799a2dd95SBruce Richardson 		if (rte_errno != ENOTSUP)
65899a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Could not send sync request to secondary process\n");
65999a2dd95SBruce Richardson 		else
66099a2dd95SBruce Richardson 			ret = 0;
66199a2dd95SBruce Richardson 		goto out;
66299a2dd95SBruce Richardson 	}
66399a2dd95SBruce Richardson 
66499a2dd95SBruce Richardson 	if (reply.nb_received != reply.nb_sent) {
66599a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Not all secondaries have responded\n");
66699a2dd95SBruce Richardson 		goto out;
66799a2dd95SBruce Richardson 	}
66899a2dd95SBruce Richardson 
66999a2dd95SBruce Richardson 	for (i = 0; i < reply.nb_received; i++) {
67099a2dd95SBruce Richardson 		struct malloc_mp_req *resp =
67199a2dd95SBruce Richardson 				(struct malloc_mp_req *)reply.msgs[i].param;
67299a2dd95SBruce Richardson 		if (resp->t != REQ_TYPE_SYNC) {
67399a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Unexpected response from secondary\n");
67499a2dd95SBruce Richardson 			goto out;
67599a2dd95SBruce Richardson 		}
67699a2dd95SBruce Richardson 		if (resp->id != req->id) {
67799a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Wrong request ID\n");
67899a2dd95SBruce Richardson 			goto out;
67999a2dd95SBruce Richardson 		}
68099a2dd95SBruce Richardson 		if (resp->result != REQ_RESULT_SUCCESS) {
68199a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Secondary process failed to synchronize\n");
68299a2dd95SBruce Richardson 			goto out;
68399a2dd95SBruce Richardson 		}
68499a2dd95SBruce Richardson 	}
68599a2dd95SBruce Richardson 
68699a2dd95SBruce Richardson 	ret = 0;
68799a2dd95SBruce Richardson out:
68899a2dd95SBruce Richardson 	free(reply.msgs);
68999a2dd95SBruce Richardson 	return ret;
69099a2dd95SBruce Richardson }
69199a2dd95SBruce Richardson 
69299a2dd95SBruce Richardson /* this is a synchronous wrapper around a bunch of asynchronous requests to
69399a2dd95SBruce Richardson  * primary process. this will initiate a request and wait until responses come.
69499a2dd95SBruce Richardson  */
69599a2dd95SBruce Richardson int
69699a2dd95SBruce Richardson request_to_primary(struct malloc_mp_req *user_req)
69799a2dd95SBruce Richardson {
69899a2dd95SBruce Richardson 	struct rte_mp_msg msg;
69999a2dd95SBruce Richardson 	struct malloc_mp_req *msg_req = (struct malloc_mp_req *)msg.param;
70099a2dd95SBruce Richardson 	struct mp_request *entry;
70199a2dd95SBruce Richardson 	struct timespec ts;
70299a2dd95SBruce Richardson 	struct timeval now;
70399a2dd95SBruce Richardson 	int ret;
70499a2dd95SBruce Richardson 
70599a2dd95SBruce Richardson 	memset(&msg, 0, sizeof(msg));
70699a2dd95SBruce Richardson 	memset(&ts, 0, sizeof(ts));
70799a2dd95SBruce Richardson 
70899a2dd95SBruce Richardson 	pthread_mutex_lock(&mp_request_list.lock);
70999a2dd95SBruce Richardson 
71099a2dd95SBruce Richardson 	entry = malloc(sizeof(*entry));
71199a2dd95SBruce Richardson 	if (entry == NULL) {
71299a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Cannot allocate memory for request\n");
71399a2dd95SBruce Richardson 		goto fail;
71499a2dd95SBruce Richardson 	}
71599a2dd95SBruce Richardson 
71699a2dd95SBruce Richardson 	memset(entry, 0, sizeof(*entry));
71799a2dd95SBruce Richardson 
71899a2dd95SBruce Richardson 	if (gettimeofday(&now, NULL) < 0) {
71999a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Cannot get current time\n");
72099a2dd95SBruce Richardson 		goto fail;
72199a2dd95SBruce Richardson 	}
72299a2dd95SBruce Richardson 
72399a2dd95SBruce Richardson 	ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
72499a2dd95SBruce Richardson 	ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
72599a2dd95SBruce Richardson 			(now.tv_usec * 1000) / 1000000000;
72699a2dd95SBruce Richardson 
72799a2dd95SBruce Richardson 	/* initialize the request */
72899a2dd95SBruce Richardson 	pthread_cond_init(&entry->cond, NULL);
72999a2dd95SBruce Richardson 
73099a2dd95SBruce Richardson 	msg.num_fds = 0;
73199a2dd95SBruce Richardson 	msg.len_param = sizeof(*msg_req);
73299a2dd95SBruce Richardson 	strlcpy(msg.name, MP_ACTION_REQUEST, sizeof(msg.name));
73399a2dd95SBruce Richardson 
73499a2dd95SBruce Richardson 	/* (attempt to) get a unique id */
73599a2dd95SBruce Richardson 	user_req->id = get_unique_id();
73699a2dd95SBruce Richardson 
73799a2dd95SBruce Richardson 	/* copy contents of user request into the message */
73899a2dd95SBruce Richardson 	memcpy(msg_req, user_req, sizeof(*msg_req));
73999a2dd95SBruce Richardson 
74099a2dd95SBruce Richardson 	if (rte_mp_sendmsg(&msg)) {
74199a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Cannot send message to primary\n");
74299a2dd95SBruce Richardson 		goto fail;
74399a2dd95SBruce Richardson 	}
74499a2dd95SBruce Richardson 
74599a2dd95SBruce Richardson 	/* copy contents of user request into active request */
74699a2dd95SBruce Richardson 	memcpy(&entry->user_req, user_req, sizeof(*user_req));
74799a2dd95SBruce Richardson 
74899a2dd95SBruce Richardson 	/* mark request as in progress */
74999a2dd95SBruce Richardson 	entry->state = REQ_STATE_ACTIVE;
75099a2dd95SBruce Richardson 
75199a2dd95SBruce Richardson 	TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
75299a2dd95SBruce Richardson 
75399a2dd95SBruce Richardson 	/* finally, wait on timeout */
75499a2dd95SBruce Richardson 	do {
75599a2dd95SBruce Richardson 		ret = pthread_cond_timedwait(&entry->cond,
75699a2dd95SBruce Richardson 				&mp_request_list.lock, &ts);
75799a2dd95SBruce Richardson 	} while (ret != 0 && ret != ETIMEDOUT);
75899a2dd95SBruce Richardson 
75999a2dd95SBruce Richardson 	if (entry->state != REQ_STATE_COMPLETE) {
76099a2dd95SBruce Richardson 		RTE_LOG(ERR, EAL, "Request timed out\n");
76199a2dd95SBruce Richardson 		ret = -1;
76299a2dd95SBruce Richardson 	} else {
76399a2dd95SBruce Richardson 		ret = 0;
76499a2dd95SBruce Richardson 		user_req->result = entry->user_req.result;
76599a2dd95SBruce Richardson 	}
76699a2dd95SBruce Richardson 	TAILQ_REMOVE(&mp_request_list.list, entry, next);
76799a2dd95SBruce Richardson 	free(entry);
76899a2dd95SBruce Richardson 
76999a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
77099a2dd95SBruce Richardson 	return ret;
77199a2dd95SBruce Richardson fail:
77299a2dd95SBruce Richardson 	pthread_mutex_unlock(&mp_request_list.lock);
77399a2dd95SBruce Richardson 	free(entry);
77499a2dd95SBruce Richardson 	return -1;
77599a2dd95SBruce Richardson }
77699a2dd95SBruce Richardson 
77799a2dd95SBruce Richardson int
77899a2dd95SBruce Richardson register_mp_requests(void)
77999a2dd95SBruce Richardson {
78099a2dd95SBruce Richardson 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
78199a2dd95SBruce Richardson 		/* it's OK for primary to not support IPC */
78299a2dd95SBruce Richardson 		if (rte_mp_action_register(MP_ACTION_REQUEST, handle_request) &&
78399a2dd95SBruce Richardson 				rte_errno != ENOTSUP) {
78499a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
78599a2dd95SBruce Richardson 				MP_ACTION_REQUEST);
78699a2dd95SBruce Richardson 			return -1;
78799a2dd95SBruce Richardson 		}
78899a2dd95SBruce Richardson 	} else {
78999a2dd95SBruce Richardson 		if (rte_mp_action_register(MP_ACTION_SYNC, handle_sync)) {
79099a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
79199a2dd95SBruce Richardson 				MP_ACTION_SYNC);
79299a2dd95SBruce Richardson 			return -1;
79399a2dd95SBruce Richardson 		}
79499a2dd95SBruce Richardson 		if (rte_mp_action_register(MP_ACTION_ROLLBACK, handle_sync)) {
79599a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
79699a2dd95SBruce Richardson 				MP_ACTION_SYNC);
79799a2dd95SBruce Richardson 			return -1;
79899a2dd95SBruce Richardson 		}
79999a2dd95SBruce Richardson 		if (rte_mp_action_register(MP_ACTION_RESPONSE,
80099a2dd95SBruce Richardson 				handle_response)) {
80199a2dd95SBruce Richardson 			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
80299a2dd95SBruce Richardson 				MP_ACTION_RESPONSE);
80399a2dd95SBruce Richardson 			return -1;
80499a2dd95SBruce Richardson 		}
80599a2dd95SBruce Richardson 	}
80699a2dd95SBruce Richardson 	return 0;
80799a2dd95SBruce Richardson }
808a0cc7be2SStephen Hemminger 
809a0cc7be2SStephen Hemminger void
810a0cc7be2SStephen Hemminger unregister_mp_requests(void)
811a0cc7be2SStephen Hemminger {
812a0cc7be2SStephen Hemminger 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
813a0cc7be2SStephen Hemminger 		rte_mp_action_unregister(MP_ACTION_REQUEST);
814a0cc7be2SStephen Hemminger 	} else {
815a0cc7be2SStephen Hemminger 		rte_mp_action_unregister(MP_ACTION_SYNC);
816a0cc7be2SStephen Hemminger 		rte_mp_action_unregister(MP_ACTION_ROLLBACK);
817a0cc7be2SStephen Hemminger 		rte_mp_action_unregister(MP_ACTION_RESPONSE);
818a0cc7be2SStephen Hemminger 	}
819a0cc7be2SStephen Hemminger }
820