1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /** \file 35 * TCP network implementation abstraction layer 36 */ 37 38 #ifndef SPDK_INTERNAL_SOCK_H 39 #define SPDK_INTERNAL_SOCK_H 40 41 #include "spdk/stdinc.h" 42 #include "spdk/sock.h" 43 #include "spdk/queue.h" 44 #include "spdk/likely.h" 45 46 #ifdef __cplusplus 47 extern "C" { 48 #endif 49 50 #define MAX_EVENTS_PER_POLL 32 51 #define DEFAULT_SOCK_PRIORITY 0 52 #define MIN_SOCK_PIPE_SIZE 1024 53 #define MIN_SO_RCVBUF_SIZE (2 * 1024 * 1024) 54 #define MIN_SO_SNDBUF_SIZE (2 * 1024 * 1024) 55 #define IOV_BATCH_SIZE 64 56 57 struct spdk_sock { 58 struct spdk_net_impl *net_impl; 59 struct spdk_sock_opts opts; 60 struct spdk_sock_group_impl *group_impl; 61 TAILQ_ENTRY(spdk_sock) link; 62 63 TAILQ_HEAD(, spdk_sock_request) queued_reqs; 64 TAILQ_HEAD(, spdk_sock_request) pending_reqs; 65 int queued_iovcnt; 66 int cb_cnt; 67 spdk_sock_cb cb_fn; 68 void *cb_arg; 69 uint32_t zerocopy_threshold; 70 struct { 71 uint8_t closed : 1; 72 uint8_t reserved : 7; 73 } flags; 74 }; 75 76 struct spdk_sock_group { 77 STAILQ_HEAD(, spdk_sock_group_impl) group_impls; 78 void *ctx; 79 }; 80 81 struct spdk_sock_group_impl { 82 struct spdk_net_impl *net_impl; 83 struct spdk_sock_group *group; 84 TAILQ_HEAD(, spdk_sock) socks; 85 STAILQ_ENTRY(spdk_sock_group_impl) link; 86 }; 87 88 struct spdk_sock_map { 89 STAILQ_HEAD(, spdk_sock_placement_id_entry) entries; 90 pthread_mutex_t mtx; 91 }; 92 93 struct spdk_net_impl { 94 const char *name; 95 int priority; 96 97 int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr, 98 int clen, uint16_t *cport); 99 struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts); 100 struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts); 101 struct spdk_sock *(*accept)(struct spdk_sock *sock); 102 int (*close)(struct spdk_sock *sock); 103 ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len); 104 ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt); 105 ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt); 106 107 void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req); 108 int (*flush)(struct spdk_sock *sock); 109 110 int (*set_recvlowat)(struct spdk_sock *sock, int nbytes); 111 int (*set_recvbuf)(struct spdk_sock *sock, int sz); 112 int (*set_sendbuf)(struct spdk_sock *sock, int sz); 113 114 bool (*is_ipv6)(struct spdk_sock *sock); 115 bool (*is_ipv4)(struct spdk_sock *sock); 116 bool (*is_connected)(struct spdk_sock *sock); 117 118 struct spdk_sock_group_impl *(*group_impl_get_optimal)(struct spdk_sock *sock, 119 struct spdk_sock_group_impl *hint); 120 struct spdk_sock_group_impl *(*group_impl_create)(void); 121 int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock); 122 int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock); 123 int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events, 124 struct spdk_sock **socks); 125 int (*group_impl_close)(struct spdk_sock_group_impl *group); 126 127 int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len); 128 int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len); 129 130 STAILQ_ENTRY(spdk_net_impl) link; 131 }; 132 133 void spdk_net_impl_register(struct spdk_net_impl *impl, int priority); 134 135 #define SPDK_NET_IMPL_REGISTER(name, impl, priority) \ 136 static void __attribute__((constructor)) net_impl_register_##name(void) \ 137 { \ 138 spdk_net_impl_register(impl, priority); \ 139 } 140 141 static inline void 142 spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req) 143 { 144 assert(req->internal.curr_list == NULL); 145 TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link); 146 #ifdef DEBUG 147 req->internal.curr_list = &sock->queued_reqs; 148 #endif 149 sock->queued_iovcnt += req->iovcnt; 150 } 151 152 static inline void 153 spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req) 154 { 155 assert(req->internal.curr_list == &sock->queued_reqs); 156 TAILQ_REMOVE(&sock->queued_reqs, req, internal.link); 157 assert(sock->queued_iovcnt >= req->iovcnt); 158 sock->queued_iovcnt -= req->iovcnt; 159 TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link); 160 #ifdef DEBUG 161 req->internal.curr_list = &sock->pending_reqs; 162 #endif 163 } 164 165 static inline int 166 spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err) 167 { 168 bool closed; 169 int rc = 0; 170 171 assert(req->internal.curr_list == &sock->pending_reqs); 172 TAILQ_REMOVE(&sock->pending_reqs, req, internal.link); 173 #ifdef DEBUG 174 req->internal.curr_list = NULL; 175 #endif 176 177 req->internal.offset = 0; 178 req->internal.is_zcopy = 0; 179 180 closed = sock->flags.closed; 181 sock->cb_cnt++; 182 req->cb_fn(req->cb_arg, err); 183 assert(sock->cb_cnt > 0); 184 sock->cb_cnt--; 185 186 if (sock->cb_cnt == 0 && !closed && sock->flags.closed) { 187 /* The user closed the socket in response to a callback above. */ 188 rc = -1; 189 spdk_sock_close(&sock); 190 } 191 192 return rc; 193 } 194 195 static inline int 196 spdk_sock_abort_requests(struct spdk_sock *sock) 197 { 198 struct spdk_sock_request *req; 199 bool closed; 200 int rc = 0; 201 202 closed = sock->flags.closed; 203 sock->cb_cnt++; 204 205 req = TAILQ_FIRST(&sock->pending_reqs); 206 while (req) { 207 assert(req->internal.curr_list == &sock->pending_reqs); 208 TAILQ_REMOVE(&sock->pending_reqs, req, internal.link); 209 #ifdef DEBUG 210 req->internal.curr_list = NULL; 211 #endif 212 213 req->cb_fn(req->cb_arg, -ECANCELED); 214 215 req = TAILQ_FIRST(&sock->pending_reqs); 216 } 217 218 req = TAILQ_FIRST(&sock->queued_reqs); 219 while (req) { 220 assert(req->internal.curr_list == &sock->queued_reqs); 221 TAILQ_REMOVE(&sock->queued_reqs, req, internal.link); 222 #ifdef DEBUG 223 req->internal.curr_list = NULL; 224 #endif 225 226 assert(sock->queued_iovcnt >= req->iovcnt); 227 sock->queued_iovcnt -= req->iovcnt; 228 229 req->cb_fn(req->cb_arg, -ECANCELED); 230 231 req = TAILQ_FIRST(&sock->queued_reqs); 232 } 233 assert(sock->cb_cnt > 0); 234 sock->cb_cnt--; 235 236 assert(TAILQ_EMPTY(&sock->queued_reqs)); 237 assert(TAILQ_EMPTY(&sock->pending_reqs)); 238 239 if (sock->cb_cnt == 0 && !closed && sock->flags.closed) { 240 /* The user closed the socket in response to a callback above. */ 241 rc = -1; 242 spdk_sock_close(&sock); 243 } 244 245 return rc; 246 } 247 248 static inline int 249 spdk_sock_prep_reqs(struct spdk_sock *_sock, struct iovec *iovs, int index, 250 struct spdk_sock_request **last_req, int *flags) 251 { 252 int iovcnt, i; 253 struct spdk_sock_request *req; 254 unsigned int offset; 255 uint64_t total = 0; 256 257 /* Gather an iov */ 258 iovcnt = index; 259 if (spdk_unlikely(iovcnt >= IOV_BATCH_SIZE)) { 260 goto end; 261 } 262 263 if (last_req != NULL && *last_req != NULL) { 264 req = TAILQ_NEXT(*last_req, internal.link); 265 } else { 266 req = TAILQ_FIRST(&_sock->queued_reqs); 267 } 268 269 while (req) { 270 offset = req->internal.offset; 271 272 for (i = 0; i < req->iovcnt; i++) { 273 /* Consume any offset first */ 274 if (offset >= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len) { 275 offset -= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len; 276 continue; 277 } 278 279 iovs[iovcnt].iov_base = SPDK_SOCK_REQUEST_IOV(req, i)->iov_base + offset; 280 iovs[iovcnt].iov_len = SPDK_SOCK_REQUEST_IOV(req, i)->iov_len - offset; 281 282 total += iovs[iovcnt].iov_len; 283 iovcnt++; 284 offset = 0; 285 286 if (iovcnt >= IOV_BATCH_SIZE) { 287 break; 288 } 289 } 290 if (iovcnt >= IOV_BATCH_SIZE) { 291 break; 292 } 293 294 if (last_req != NULL) { 295 *last_req = req; 296 } 297 req = TAILQ_NEXT(req, internal.link); 298 } 299 300 end: 301 302 #if defined(MSG_ZEROCOPY) 303 /* if data size < zerocopy_threshold, remove MSG_ZEROCOPY flag */ 304 if (total < _sock->zerocopy_threshold && flags != NULL) { 305 *flags = *flags & (~MSG_ZEROCOPY); 306 } 307 #endif 308 309 return iovcnt; 310 } 311 312 static inline void 313 spdk_sock_get_placement_id(int fd, enum spdk_placement_mode mode, int *placement_id) 314 { 315 *placement_id = -1; 316 317 switch (mode) { 318 case PLACEMENT_NONE: 319 break; 320 case PLACEMENT_MARK: 321 case PLACEMENT_NAPI: { 322 #if defined(SO_INCOMING_NAPI_ID) 323 socklen_t len = sizeof(int); 324 325 getsockopt(fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &len); 326 #endif 327 break; 328 } 329 case PLACEMENT_CPU: { 330 #if defined(SO_INCOMING_CPU) 331 socklen_t len = sizeof(int); 332 333 getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, placement_id, &len); 334 #endif 335 break; 336 } 337 default: 338 break; 339 } 340 } 341 342 /** 343 * Insert a group into the placement map. 344 * If the group is already in the map, take a reference. 345 */ 346 int spdk_sock_map_insert(struct spdk_sock_map *map, int placement_id, 347 struct spdk_sock_group_impl *group_impl); 348 349 /** 350 * Release a reference for the given placement_id. If the reference count goes to 0, the 351 * entry will no longer be associated with a group. 352 */ 353 void spdk_sock_map_release(struct spdk_sock_map *map, int placement_id); 354 355 /** 356 * Look up the group for the given placement_id. 357 */ 358 int spdk_sock_map_lookup(struct spdk_sock_map *map, int placement_id, 359 struct spdk_sock_group_impl **group_impl, struct spdk_sock_group_impl *hint); 360 361 /** 362 * Find a placement id with no associated group 363 */ 364 int spdk_sock_map_find_free(struct spdk_sock_map *map); 365 366 /** 367 * Clean up all memory associated with the given map 368 */ 369 void spdk_sock_map_cleanup(struct spdk_sock_map *map); 370 371 #ifdef __cplusplus 372 } 373 #endif 374 375 #endif /* SPDK_INTERNAL_SOCK_H */ 376