xref: /spdk/include/spdk_internal/sock.h (revision 9bff828f99403ad2f3ac3b8b29b62acb83b24145)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /** \file
35  * TCP network implementation abstraction layer
36  */
37 
38 #ifndef SPDK_INTERNAL_SOCK_H
39 #define SPDK_INTERNAL_SOCK_H
40 
41 #include "spdk/stdinc.h"
42 #include "spdk/sock.h"
43 #include "spdk/queue.h"
44 #include "spdk/likely.h"
45 
46 #ifdef __cplusplus
47 extern "C" {
48 #endif
49 
50 #define MAX_EVENTS_PER_POLL 32
51 #define DEFAULT_SOCK_PRIORITY 0
52 #define MIN_SOCK_PIPE_SIZE 1024
53 #define MIN_SO_RCVBUF_SIZE (2 * 1024 * 1024)
54 #define MIN_SO_SNDBUF_SIZE (2 * 1024 * 1024)
55 #define IOV_BATCH_SIZE 64
56 
57 struct spdk_sock {
58 	struct spdk_net_impl		*net_impl;
59 	struct spdk_sock_opts		opts;
60 	struct spdk_sock_group_impl	*group_impl;
61 	TAILQ_ENTRY(spdk_sock)		link;
62 
63 	TAILQ_HEAD(, spdk_sock_request)	queued_reqs;
64 	TAILQ_HEAD(, spdk_sock_request)	pending_reqs;
65 	int				queued_iovcnt;
66 	int				cb_cnt;
67 	spdk_sock_cb			cb_fn;
68 	void				*cb_arg;
69 	uint32_t			zerocopy_threshold;
70 	struct {
71 		uint8_t		closed		: 1;
72 		uint8_t		reserved	: 7;
73 	} flags;
74 };
75 
76 struct spdk_sock_group {
77 	STAILQ_HEAD(, spdk_sock_group_impl)	group_impls;
78 	void					*ctx;
79 };
80 
81 struct spdk_sock_group_impl {
82 	struct spdk_net_impl			*net_impl;
83 	struct spdk_sock_group			*group;
84 	TAILQ_HEAD(, spdk_sock)			socks;
85 	STAILQ_ENTRY(spdk_sock_group_impl)	link;
86 };
87 
88 struct spdk_sock_map {
89 	STAILQ_HEAD(, spdk_sock_placement_id_entry) entries;
90 	pthread_mutex_t mtx;
91 };
92 
93 struct spdk_net_impl {
94 	const char *name;
95 	int priority;
96 
97 	int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr,
98 		       int clen, uint16_t *cport);
99 	struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts);
100 	struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts);
101 	struct spdk_sock *(*accept)(struct spdk_sock *sock);
102 	int (*close)(struct spdk_sock *sock);
103 	ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len);
104 	ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
105 	ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
106 
107 	void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req);
108 	int (*flush)(struct spdk_sock *sock);
109 
110 	int (*set_recvlowat)(struct spdk_sock *sock, int nbytes);
111 	int (*set_recvbuf)(struct spdk_sock *sock, int sz);
112 	int (*set_sendbuf)(struct spdk_sock *sock, int sz);
113 
114 	bool (*is_ipv6)(struct spdk_sock *sock);
115 	bool (*is_ipv4)(struct spdk_sock *sock);
116 	bool (*is_connected)(struct spdk_sock *sock);
117 
118 	struct spdk_sock_group_impl *(*group_impl_get_optimal)(struct spdk_sock *sock,
119 			struct spdk_sock_group_impl *hint);
120 	struct spdk_sock_group_impl *(*group_impl_create)(void);
121 	int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
122 	int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
123 	int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events,
124 			       struct spdk_sock **socks);
125 	int (*group_impl_close)(struct spdk_sock_group_impl *group);
126 
127 	int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len);
128 	int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len);
129 
130 	STAILQ_ENTRY(spdk_net_impl) link;
131 };
132 
133 void spdk_net_impl_register(struct spdk_net_impl *impl, int priority);
134 
135 #define SPDK_NET_IMPL_REGISTER(name, impl, priority) \
136 static void __attribute__((constructor)) net_impl_register_##name(void) \
137 { \
138 	spdk_net_impl_register(impl, priority); \
139 }
140 
141 static inline void
142 spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req)
143 {
144 	assert(req->internal.curr_list == NULL);
145 	TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link);
146 #ifdef DEBUG
147 	req->internal.curr_list = &sock->queued_reqs;
148 #endif
149 	sock->queued_iovcnt += req->iovcnt;
150 }
151 
152 static inline void
153 spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req)
154 {
155 	assert(req->internal.curr_list == &sock->queued_reqs);
156 	TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
157 	assert(sock->queued_iovcnt >= req->iovcnt);
158 	sock->queued_iovcnt -= req->iovcnt;
159 	TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link);
160 #ifdef DEBUG
161 	req->internal.curr_list = &sock->pending_reqs;
162 #endif
163 }
164 
165 static inline int
166 spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err)
167 {
168 	bool closed;
169 	int rc = 0;
170 
171 	assert(req->internal.curr_list == &sock->pending_reqs);
172 	TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
173 #ifdef DEBUG
174 	req->internal.curr_list = NULL;
175 #endif
176 
177 	req->internal.offset = 0;
178 	req->internal.is_zcopy = 0;
179 
180 	closed = sock->flags.closed;
181 	sock->cb_cnt++;
182 	req->cb_fn(req->cb_arg, err);
183 	assert(sock->cb_cnt > 0);
184 	sock->cb_cnt--;
185 
186 	if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
187 		/* The user closed the socket in response to a callback above. */
188 		rc = -1;
189 		spdk_sock_close(&sock);
190 	}
191 
192 	return rc;
193 }
194 
195 static inline int
196 spdk_sock_abort_requests(struct spdk_sock *sock)
197 {
198 	struct spdk_sock_request *req;
199 	bool closed;
200 	int rc = 0;
201 
202 	closed = sock->flags.closed;
203 	sock->cb_cnt++;
204 
205 	req = TAILQ_FIRST(&sock->pending_reqs);
206 	while (req) {
207 		assert(req->internal.curr_list == &sock->pending_reqs);
208 		TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
209 #ifdef DEBUG
210 		req->internal.curr_list = NULL;
211 #endif
212 
213 		req->cb_fn(req->cb_arg, -ECANCELED);
214 
215 		req = TAILQ_FIRST(&sock->pending_reqs);
216 	}
217 
218 	req = TAILQ_FIRST(&sock->queued_reqs);
219 	while (req) {
220 		assert(req->internal.curr_list == &sock->queued_reqs);
221 		TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
222 #ifdef DEBUG
223 		req->internal.curr_list = NULL;
224 #endif
225 
226 		assert(sock->queued_iovcnt >= req->iovcnt);
227 		sock->queued_iovcnt -= req->iovcnt;
228 
229 		req->cb_fn(req->cb_arg, -ECANCELED);
230 
231 		req = TAILQ_FIRST(&sock->queued_reqs);
232 	}
233 	assert(sock->cb_cnt > 0);
234 	sock->cb_cnt--;
235 
236 	assert(TAILQ_EMPTY(&sock->queued_reqs));
237 	assert(TAILQ_EMPTY(&sock->pending_reqs));
238 
239 	if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
240 		/* The user closed the socket in response to a callback above. */
241 		rc = -1;
242 		spdk_sock_close(&sock);
243 	}
244 
245 	return rc;
246 }
247 
248 static inline int
249 spdk_sock_prep_reqs(struct spdk_sock *_sock, struct iovec *iovs, int index,
250 		    struct spdk_sock_request **last_req, int *flags)
251 {
252 	int iovcnt, i;
253 	struct spdk_sock_request *req;
254 	unsigned int offset;
255 	uint64_t total = 0;
256 
257 	/* Gather an iov */
258 	iovcnt = index;
259 	if (spdk_unlikely(iovcnt >= IOV_BATCH_SIZE)) {
260 		goto end;
261 	}
262 
263 	if (last_req != NULL && *last_req != NULL) {
264 		req = TAILQ_NEXT(*last_req, internal.link);
265 	} else {
266 		req = TAILQ_FIRST(&_sock->queued_reqs);
267 	}
268 
269 	while (req) {
270 		offset = req->internal.offset;
271 
272 		for (i = 0; i < req->iovcnt; i++) {
273 			/* Consume any offset first */
274 			if (offset >= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len) {
275 				offset -= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len;
276 				continue;
277 			}
278 
279 			iovs[iovcnt].iov_base = SPDK_SOCK_REQUEST_IOV(req, i)->iov_base + offset;
280 			iovs[iovcnt].iov_len = SPDK_SOCK_REQUEST_IOV(req, i)->iov_len - offset;
281 
282 			total += iovs[iovcnt].iov_len;
283 			iovcnt++;
284 			offset = 0;
285 
286 			if (iovcnt >= IOV_BATCH_SIZE) {
287 				break;
288 			}
289 		}
290 		if (iovcnt >= IOV_BATCH_SIZE) {
291 			break;
292 		}
293 
294 		if (last_req != NULL) {
295 			*last_req = req;
296 		}
297 		req = TAILQ_NEXT(req, internal.link);
298 	}
299 
300 end:
301 
302 #if defined(MSG_ZEROCOPY)
303 	/* if data size < zerocopy_threshold, remove MSG_ZEROCOPY flag */
304 	if (total < _sock->zerocopy_threshold && flags != NULL) {
305 		*flags = *flags & (~MSG_ZEROCOPY);
306 	}
307 #endif
308 
309 	return iovcnt;
310 }
311 
312 static inline void
313 spdk_sock_get_placement_id(int fd, enum spdk_placement_mode mode, int *placement_id)
314 {
315 	*placement_id = -1;
316 
317 	switch (mode) {
318 	case PLACEMENT_NONE:
319 		break;
320 	case PLACEMENT_MARK:
321 	case PLACEMENT_NAPI: {
322 #if defined(SO_INCOMING_NAPI_ID)
323 		socklen_t len = sizeof(int);
324 
325 		getsockopt(fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &len);
326 #endif
327 		break;
328 	}
329 	case PLACEMENT_CPU: {
330 #if defined(SO_INCOMING_CPU)
331 		socklen_t len = sizeof(int);
332 
333 		getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, placement_id, &len);
334 #endif
335 		break;
336 	}
337 	default:
338 		break;
339 	}
340 }
341 
342 /**
343  * Insert a group into the placement map.
344  * If the group is already in the map, take a reference.
345  */
346 int spdk_sock_map_insert(struct spdk_sock_map *map, int placement_id,
347 			 struct spdk_sock_group_impl *group_impl);
348 
349 /**
350  * Release a reference for the given placement_id. If the reference count goes to 0, the
351  * entry will no longer be associated with a group.
352  */
353 void spdk_sock_map_release(struct spdk_sock_map *map, int placement_id);
354 
355 /**
356  * Look up the group for the given placement_id.
357  */
358 int spdk_sock_map_lookup(struct spdk_sock_map *map, int placement_id,
359 			 struct spdk_sock_group_impl **group_impl, struct spdk_sock_group_impl *hint);
360 
361 /**
362  * Find a placement id with no associated group
363  */
364 int spdk_sock_map_find_free(struct spdk_sock_map *map);
365 
366 /**
367  * Clean up all memory associated with the given map
368  */
369 void spdk_sock_map_cleanup(struct spdk_sock_map *map);
370 
371 #ifdef __cplusplus
372 }
373 #endif
374 
375 #endif /* SPDK_INTERNAL_SOCK_H */
376