xref: /freebsd-src/sys/ofed/drivers/infiniband/core/ib_addr.c (revision 32a95656b51ebefcdf3e0b02c110825f59abd26f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
5  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
6  * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
7  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
8  *
9  * This software is available to you under a choice of one of two
10  * licenses.  You may choose to be licensed under the terms of the GNU
11  * General Public License (GPL) Version 2, available from the file
12  * COPYING in the main directory of this source tree, or the
13  * OpenIB.org BSD license below:
14  *
15  *     Redistribution and use in source and binary forms, with or
16  *     without modification, are permitted provided that the following
17  *     conditions are met:
18  *
19  *      - Redistributions of source code must retain the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer.
22  *
23  *      - Redistributions in binary form must reproduce the above
24  *        copyright notice, this list of conditions and the following
25  *        disclaimer in the documentation and/or other materials
26  *        provided with the distribution.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35  * SOFTWARE.
36  */
37 
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include <linux/mutex.h>
42 #include <linux/slab.h>
43 #include <linux/workqueue.h>
44 #include <linux/module.h>
45 #include <net/if_llatbl.h>
46 #include <net/route.h>
47 #include <net/route/nhop.h>
48 #include <net/netevent.h>
49 #include <net/if_llatbl.h>
50 #include <rdma/ib_addr.h>
51 #include <rdma/ib.h>
52 
53 #include <netinet/in_fib.h>
54 #include <netinet/if_ether.h>
55 #include <netinet/ip_var.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_pcb.h>
58 #include <netinet6/in6_fib.h>
59 
60 #include "core_priv.h"
61 
62 struct addr_req {
63 	struct list_head list;
64 	struct sockaddr_storage src_addr;
65 	struct sockaddr_storage dst_addr;
66 	struct rdma_dev_addr *addr;
67 	struct rdma_addr_client *client;
68 	void *context;
69 	void (*callback)(int status, struct sockaddr *src_addr,
70 			 struct rdma_dev_addr *addr, void *context);
71 	int timeout;
72 	int status;
73 };
74 
75 static void process_req(struct work_struct *work);
76 
77 static DEFINE_MUTEX(lock);
78 static LIST_HEAD(req_list);
79 static DECLARE_DELAYED_WORK(work, process_req);
80 static struct workqueue_struct *addr_wq;
81 
82 int rdma_addr_size(struct sockaddr *addr)
83 {
84 	switch (addr->sa_family) {
85 	case AF_INET:
86 		return sizeof(struct sockaddr_in);
87 	case AF_INET6:
88 		return sizeof(struct sockaddr_in6);
89 	case AF_IB:
90 		return sizeof(struct sockaddr_ib);
91 	default:
92 		return 0;
93 	}
94 }
95 EXPORT_SYMBOL(rdma_addr_size);
96 
97 int rdma_addr_size_in6(struct sockaddr_in6 *addr)
98 {
99 	int ret = rdma_addr_size((struct sockaddr *) addr);
100 
101 	return ret <= sizeof(*addr) ? ret : 0;
102 }
103 EXPORT_SYMBOL(rdma_addr_size_in6);
104 
105 int rdma_addr_size_kss(struct sockaddr_storage *addr)
106 {
107 	int ret = rdma_addr_size((struct sockaddr *) addr);
108 
109 	return ret <= sizeof(*addr) ? ret : 0;
110 }
111 EXPORT_SYMBOL(rdma_addr_size_kss);
112 
113 static struct rdma_addr_client self;
114 
115 void rdma_addr_register_client(struct rdma_addr_client *client)
116 {
117 	atomic_set(&client->refcount, 1);
118 	init_completion(&client->comp);
119 }
120 EXPORT_SYMBOL(rdma_addr_register_client);
121 
122 static inline void put_client(struct rdma_addr_client *client)
123 {
124 	if (atomic_dec_and_test(&client->refcount))
125 		complete(&client->comp);
126 }
127 
128 void rdma_addr_unregister_client(struct rdma_addr_client *client)
129 {
130 	put_client(client);
131 	wait_for_completion(&client->comp);
132 }
133 EXPORT_SYMBOL(rdma_addr_unregister_client);
134 
135 static inline void
136 rdma_copy_addr_sub(u8 *dst, const u8 *src, unsigned min, unsigned max)
137 {
138 	if (min > max)
139 		min = max;
140 	memcpy(dst, src, min);
141 	memset(dst + min, 0, max - min);
142 }
143 
144 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
145 		     const unsigned char *dst_dev_addr)
146 {
147 	/* check for loopback device */
148 	if (dev->if_flags & IFF_LOOPBACK) {
149 		dev_addr->dev_type = ARPHRD_ETHER;
150 		memset(dev_addr->src_dev_addr, 0, MAX_ADDR_LEN);
151 		memset(dev_addr->broadcast, 0, MAX_ADDR_LEN);
152 		memset(dev_addr->dst_dev_addr, 0, MAX_ADDR_LEN);
153 		dev_addr->bound_dev_if = dev->if_index;
154 		return (0);
155 	} else if (dev->if_type == IFT_INFINIBAND)
156 		dev_addr->dev_type = ARPHRD_INFINIBAND;
157 	else if (dev->if_type == IFT_ETHER)
158 		dev_addr->dev_type = ARPHRD_ETHER;
159 	else
160 		dev_addr->dev_type = 0;
161 	rdma_copy_addr_sub(dev_addr->src_dev_addr, IF_LLADDR(dev),
162 			   dev->if_addrlen, MAX_ADDR_LEN);
163 	rdma_copy_addr_sub(dev_addr->broadcast, dev->if_broadcastaddr,
164 			   dev->if_addrlen, MAX_ADDR_LEN);
165 	if (dst_dev_addr != NULL) {
166 		rdma_copy_addr_sub(dev_addr->dst_dev_addr, dst_dev_addr,
167 				   dev->if_addrlen, MAX_ADDR_LEN);
168 	}
169 	dev_addr->bound_dev_if = dev->if_index;
170 	return 0;
171 }
172 EXPORT_SYMBOL(rdma_copy_addr);
173 
174 int rdma_translate_ip(const struct sockaddr *addr,
175 		      struct rdma_dev_addr *dev_addr)
176 {
177 	struct ifnet *dev;
178 	int ret;
179 
180 	if (dev_addr->bound_dev_if) {
181 		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
182 	} else switch (addr->sa_family) {
183 #ifdef INET
184 	case AF_INET:
185 		dev = ip_ifp_find(dev_addr->net,
186 			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
187 		break;
188 #endif
189 #ifdef INET6
190 	case AF_INET6:
191 		dev = ip6_ifp_find(dev_addr->net,
192 			((const struct sockaddr_in6 *)addr)->sin6_addr, 0);
193 		break;
194 #endif
195 	default:
196 		dev = NULL;
197 		break;
198 	}
199 
200 	if (dev != NULL) {
201 		/* disallow connections through 127.0.0.1 itself */
202 		if (dev->if_flags & IFF_LOOPBACK)
203 			ret = -EINVAL;
204 		else
205 			ret = rdma_copy_addr(dev_addr, dev, NULL);
206 		dev_put(dev);
207 	} else {
208 		ret = -ENODEV;
209 	}
210 	return ret;
211 }
212 EXPORT_SYMBOL(rdma_translate_ip);
213 
214 static void set_timeout(int time)
215 {
216 	int delay;	/* under FreeBSD ticks are 32-bit */
217 
218 	delay = time - jiffies;
219 	if (delay <= 0)
220 		delay = 1;
221 	else if (delay > hz)
222 		delay = hz;
223 
224 	mod_delayed_work(addr_wq, &work, delay);
225 }
226 
227 static void queue_req(struct addr_req *req)
228 {
229 	struct addr_req *temp_req;
230 
231 	mutex_lock(&lock);
232 	list_for_each_entry_reverse(temp_req, &req_list, list) {
233 		if (time_after_eq(req->timeout, temp_req->timeout))
234 			break;
235 	}
236 
237 	list_add(&req->list, &temp_req->list);
238 
239 	if (req_list.next == &req->list)
240 		set_timeout(req->timeout);
241 	mutex_unlock(&lock);
242 }
243 
244 #if defined(INET) || defined(INET6)
245 static int addr_resolve_multi(u8 *edst, struct ifnet *ifp, struct sockaddr *dst_in)
246 {
247 	struct sockaddr *llsa;
248 	struct sockaddr_dl sdl;
249 	int error;
250 
251 	sdl.sdl_len = sizeof(sdl);
252 	llsa = (struct sockaddr *)&sdl;
253 
254 	if (ifp->if_resolvemulti == NULL) {
255 		error = EOPNOTSUPP;
256 	} else {
257 		error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
258 		if (error == 0) {
259 			rdma_copy_addr_sub(edst, LLADDR((struct sockaddr_dl *)llsa),
260 			    ifp->if_addrlen, MAX_ADDR_LEN);
261 		}
262 	}
263 	return (error);
264 }
265 #endif
266 
267 #ifdef INET
268 static int addr4_resolve(struct sockaddr_in *src_in,
269 			 const struct sockaddr_in *dst_in,
270 			 struct rdma_dev_addr *addr,
271 			 u8 *edst,
272 			 struct ifnet **ifpp)
273 {
274 	enum {
275 		ADDR_VALID = 0,
276 		ADDR_SRC_ANY = 1,
277 		ADDR_DST_ANY = 2,
278 	};
279 	struct sockaddr_in dst_tmp = *dst_in;
280 	in_port_t src_port;
281 	struct sockaddr *saddr = NULL;
282 	struct nhop_object *nh;
283 	struct ifnet *ifp;
284 	int error;
285 	int type;
286 
287 	NET_EPOCH_ASSERT();
288 
289 	/* set VNET, if any */
290 	CURVNET_SET(addr->net);
291 
292 	/* set default TTL limit */
293 	addr->hoplimit = V_ip_defttl;
294 
295 	type = ADDR_VALID;
296 	if (src_in->sin_addr.s_addr == INADDR_ANY)
297 		type |= ADDR_SRC_ANY;
298 	if (dst_tmp.sin_addr.s_addr == INADDR_ANY)
299 		type |= ADDR_DST_ANY;
300 
301 	/*
302 	 * Make sure the socket address length field is set.
303 	 */
304 	dst_tmp.sin_len = sizeof(dst_tmp);
305 
306 	/* Step 1 - lookup destination route if any */
307 	switch (type) {
308 	case ADDR_VALID:
309 	case ADDR_SRC_ANY:
310 		/* regular destination route lookup */
311 		nh = fib4_lookup(RT_DEFAULT_FIB, dst_tmp.sin_addr,0,NHR_NONE,0);
312 		if (nh == NULL) {
313 			error = EHOSTUNREACH;
314 			goto done;
315 		}
316 		break;
317 	default:
318 		error = ENETUNREACH;
319 		goto done;
320 	}
321 
322 	/* Step 2 - find outgoing network interface */
323 	switch (type) {
324 	case ADDR_VALID:
325 		/* get source interface */
326 		if (addr->bound_dev_if != 0) {
327 			ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
328 		} else {
329 			ifp = ip_ifp_find(addr->net, src_in->sin_addr.s_addr);
330 		}
331 
332 		/* check source interface */
333 		if (ifp == NULL) {
334 			error = ENETUNREACH;
335 			goto done;
336 		} else if (ifp->if_flags & IFF_LOOPBACK) {
337 			/*
338 			 * Source address cannot be a loopback device.
339 			 */
340 			error = EHOSTUNREACH;
341 			goto error_put_ifp;
342 		} else if (nh->nh_ifp->if_flags & IFF_LOOPBACK) {
343 			if (memcmp(&src_in->sin_addr, &dst_in->sin_addr,
344 			    sizeof(src_in->sin_addr))) {
345 				/*
346 				 * Destination is loopback, but source
347 				 * and destination address is not the
348 				 * same.
349 				 */
350 				error = EHOSTUNREACH;
351 				goto error_put_ifp;
352 			}
353 			/* get destination network interface from route */
354 			dev_put(ifp);
355 			ifp = nh->nh_ifp;
356 			dev_hold(ifp);
357 		} else if (ifp != nh->nh_ifp) {
358 			/*
359 			 * Source and destination interfaces are
360 			 * different.
361 			 */
362 			error = ENETUNREACH;
363 			goto error_put_ifp;
364 		}
365 		break;
366 	case ADDR_SRC_ANY:
367 		/* check for loopback device */
368 		if (nh->nh_ifp->if_flags & IFF_LOOPBACK)
369 			saddr = (struct sockaddr *)&dst_tmp;
370 		else
371 			saddr = nh->nh_ifa->ifa_addr;
372 
373 		/* get destination network interface from route */
374 		ifp = nh->nh_ifp;
375 		dev_hold(ifp);
376 		break;
377 	default:
378 		break;
379 	}
380 
381 	/*
382 	 * Step 3 - resolve destination MAC address
383 	 */
384 	if (dst_tmp.sin_addr.s_addr == INADDR_BROADCAST) {
385 		rdma_copy_addr_sub(edst, ifp->if_broadcastaddr,
386 		    ifp->if_addrlen, MAX_ADDR_LEN);
387 		error = 0;
388 	} else if (IN_MULTICAST(ntohl(dst_tmp.sin_addr.s_addr))) {
389 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
390 		error = addr_resolve_multi(edst, ifp, (struct sockaddr *)&dst_tmp);
391 		if (error != 0)
392 			goto error_put_ifp;
393 		else if (is_gw)
394 			addr->network = RDMA_NETWORK_IPV4;
395 	} else if (ifp->if_flags & IFF_LOOPBACK) {
396 		memset(edst, 0, MAX_ADDR_LEN);
397 		error = 0;
398 	} else {
399 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
400 		memset(edst, 0, MAX_ADDR_LEN);
401 #ifdef INET6
402 		if (is_gw && nh->gw_sa.sa_family == AF_INET6)
403 			error = nd6_resolve(ifp, LLE_SF(AF_INET, is_gw), NULL,
404 			    &nh->gw_sa, edst, NULL, NULL);
405 		else
406 #endif
407 			error = arpresolve(ifp, is_gw, NULL, is_gw ?
408 			    &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
409 			    edst, NULL, NULL);
410 
411 		if (error != 0)
412 			goto error_put_ifp;
413 		else if (is_gw)
414 			addr->network = RDMA_NETWORK_IPV4;
415 	}
416 
417 	/*
418 	 * Step 4 - update source address, if any
419 	 */
420 	if (saddr != NULL) {
421 		src_port = src_in->sin_port;
422 		memcpy(src_in, saddr, rdma_addr_size(saddr));
423 		src_in->sin_port = src_port;	/* preserve port number */
424 	}
425 
426 	*ifpp = ifp;
427 
428 	goto done;
429 
430 error_put_ifp:
431 	dev_put(ifp);
432 done:
433 	CURVNET_RESTORE();
434 
435 	if (error == EWOULDBLOCK || error == EAGAIN)
436 		error = ENODATA;
437 	return (-error);
438 }
439 #else
440 static int addr4_resolve(struct sockaddr_in *src_in,
441 			 const struct sockaddr_in *dst_in,
442 			 struct rdma_dev_addr *addr,
443 			 u8 *edst,
444 			 struct ifnet **ifpp)
445 {
446 	return -EADDRNOTAVAIL;
447 }
448 #endif
449 
450 #ifdef INET6
451 static int addr6_resolve(struct sockaddr_in6 *src_in,
452 			 const struct sockaddr_in6 *dst_in,
453 			 struct rdma_dev_addr *addr,
454 			 u8 *edst,
455 			 struct ifnet **ifpp)
456 {
457 	enum {
458 		ADDR_VALID = 0,
459 		ADDR_SRC_ANY = 1,
460 		ADDR_DST_ANY = 2,
461 	};
462 	struct sockaddr_in6 dst_tmp = *dst_in;
463 	in_port_t src_port;
464 	struct sockaddr *saddr = NULL;
465 	struct nhop_object *nh;
466 	struct ifnet *ifp;
467 	int error;
468 	int type;
469 
470 	NET_EPOCH_ASSERT();
471 
472 	/* set VNET, if any */
473 	CURVNET_SET(addr->net);
474 
475 	/* set default TTL limit */
476 	addr->hoplimit = V_ip_defttl;
477 
478 	type = ADDR_VALID;
479 	if (ipv6_addr_any(&src_in->sin6_addr))
480 		type |= ADDR_SRC_ANY;
481 	if (ipv6_addr_any(&dst_tmp.sin6_addr))
482 		type |= ADDR_DST_ANY;
483 
484 	/*
485 	 * Make sure the socket address length field is set.
486 	 */
487 	dst_tmp.sin6_len = sizeof(dst_tmp);
488 
489 	/*
490 	 * Make sure the scope ID gets embedded, else nd6_resolve() will
491 	 * not find the record.
492 	 */
493 	dst_tmp.sin6_scope_id = addr->bound_dev_if;
494 	sa6_embedscope(&dst_tmp, 0);
495 
496 	/* Step 1 - lookup destination route if any */
497 	switch (type) {
498 	case ADDR_VALID:
499 		/* sanity check for IPv4 addresses */
500 		if (ipv6_addr_v4mapped(&src_in->sin6_addr) !=
501 		    ipv6_addr_v4mapped(&dst_tmp.sin6_addr)) {
502 			error = EAFNOSUPPORT;
503 			goto done;
504 		}
505 		/* FALLTHROUGH */
506 	case ADDR_SRC_ANY:
507 		/* regular destination route lookup */
508 		nh = fib6_lookup(RT_DEFAULT_FIB, &dst_in->sin6_addr,
509 		    addr->bound_dev_if, NHR_NONE, 0);
510 		if (nh == NULL) {
511 			error = EHOSTUNREACH;
512 			goto done;
513 		}
514 		break;
515 	default:
516 		error = ENETUNREACH;
517 		goto done;
518 	}
519 
520 	/* Step 2 - find outgoing network interface */
521 	switch (type) {
522 	case ADDR_VALID:
523 		/* get source interface */
524 		if (addr->bound_dev_if != 0) {
525 			ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
526 		} else {
527 			ifp = ip6_ifp_find(addr->net, src_in->sin6_addr, 0);
528 		}
529 
530 		/* check source interface */
531 		if (ifp == NULL) {
532 			error = ENETUNREACH;
533 			goto done;
534 		} else if (ifp->if_flags & IFF_LOOPBACK) {
535 			/*
536 			 * Source address cannot be a loopback device.
537 			 */
538 			error = EHOSTUNREACH;
539 			goto error_put_ifp;
540 		} else if (nh->nh_ifp->if_flags & IFF_LOOPBACK) {
541 			if (memcmp(&src_in->sin6_addr, &dst_in->sin6_addr,
542 			    sizeof(src_in->sin6_addr))) {
543 				/*
544 				 * Destination is loopback, but source
545 				 * and destination address is not the
546 				 * same.
547 				 */
548 				error = EHOSTUNREACH;
549 				goto error_put_ifp;
550 			}
551 			/* get destination network interface from route */
552 			dev_put(ifp);
553 			ifp = nh->nh_ifp;
554 			dev_hold(ifp);
555 		} else if (ifp != nh->nh_ifp) {
556 			/*
557 			 * Source and destination interfaces are
558 			 * different.
559 			 */
560 			error = ENETUNREACH;
561 			goto error_put_ifp;
562 		}
563 		break;
564 	case ADDR_SRC_ANY:
565 		/* check for loopback device */
566 		if (nh->nh_ifp->if_flags & IFF_LOOPBACK)
567 			saddr = (struct sockaddr *)&dst_tmp;
568 		else
569 			saddr = nh->nh_ifa->ifa_addr;
570 
571 		/* get destination network interface from route */
572 		ifp = nh->nh_ifp;
573 		dev_hold(ifp);
574 		break;
575 	default:
576 		break;
577 	}
578 
579 	/*
580 	 * Step 3 - resolve destination MAC address
581 	 */
582 	if (IN6_IS_ADDR_MULTICAST(&dst_tmp.sin6_addr)) {
583 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
584 		error = addr_resolve_multi(edst, ifp,
585 		    (struct sockaddr *)&dst_tmp);
586 		if (error != 0)
587 			goto error_put_ifp;
588 		else if (is_gw)
589 			addr->network = RDMA_NETWORK_IPV6;
590 	} else if (nh->nh_ifp->if_flags & IFF_LOOPBACK) {
591 		memset(edst, 0, MAX_ADDR_LEN);
592 		error = 0;
593 	} else {
594 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
595 		memset(edst, 0, MAX_ADDR_LEN);
596 		error = nd6_resolve(ifp, LLE_SF(AF_INET6, is_gw), NULL,
597 		    is_gw ? &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
598 		    edst, NULL, NULL);
599 		if (error != 0)
600 			goto error_put_ifp;
601 		else if (is_gw)
602 			addr->network = RDMA_NETWORK_IPV6;
603 	}
604 
605 	/*
606 	 * Step 4 - update source address, if any
607 	 */
608 	if (saddr != NULL) {
609 		src_port = src_in->sin6_port;
610 		memcpy(src_in, saddr, rdma_addr_size(saddr));
611 		src_in->sin6_port = src_port;	/* preserve port number */
612 	}
613 
614 	*ifpp = ifp;
615 
616 	goto done;
617 
618 error_put_ifp:
619 	dev_put(ifp);
620 done:
621 	CURVNET_RESTORE();
622 
623 	if (error == EWOULDBLOCK || error == EAGAIN)
624 		error = ENODATA;
625 	return (-error);
626 }
627 #else
628 static int addr6_resolve(struct sockaddr_in6 *src_in,
629 			 const struct sockaddr_in6 *dst_in,
630 			 struct rdma_dev_addr *addr,
631 			 u8 *edst,
632 			 struct ifnet **ifpp)
633 {
634 	return -EADDRNOTAVAIL;
635 }
636 #endif
637 
638 static int addr_resolve_neigh(struct ifnet *dev,
639 			      const struct sockaddr *dst_in,
640 			      u8 *edst,
641 			      struct rdma_dev_addr *addr)
642 {
643 	if (dev->if_flags & IFF_LOOPBACK) {
644 		int ret;
645 
646 		/*
647 		 * Binding to a loopback device is not allowed. Make
648 		 * sure the destination device address is global by
649 		 * clearing the bound device interface:
650 		 */
651 		if (addr->bound_dev_if == dev->if_index)
652 			addr->bound_dev_if = 0;
653 
654 		ret = rdma_translate_ip(dst_in, addr);
655 		if (ret == 0) {
656 			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
657 			       MAX_ADDR_LEN);
658 		}
659 		return ret;
660 	}
661 
662 	/* If the device doesn't do ARP internally */
663 	if (!(dev->if_flags & IFF_NOARP))
664 		return rdma_copy_addr(addr, dev, edst);
665 
666 	return rdma_copy_addr(addr, dev, NULL);
667 }
668 
669 static int addr_resolve(struct sockaddr *src_in,
670 			const struct sockaddr *dst_in,
671 			struct rdma_dev_addr *addr)
672 {
673 	struct epoch_tracker et;
674 	struct ifnet *ndev = NULL;
675 	u8 edst[MAX_ADDR_LEN];
676 	int ret;
677 
678 	if (dst_in->sa_family != src_in->sa_family)
679 		return -EINVAL;
680 
681 	NET_EPOCH_ENTER(et);
682 	switch (src_in->sa_family) {
683 	case AF_INET:
684 		ret = addr4_resolve((struct sockaddr_in *)src_in,
685 				    (const struct sockaddr_in *)dst_in,
686 				    addr, edst, &ndev);
687 		break;
688 	case AF_INET6:
689 		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
690 				    (const struct sockaddr_in6 *)dst_in, addr,
691 				    edst, &ndev);
692 		break;
693 	default:
694 		ret = -EADDRNOTAVAIL;
695 		break;
696 	}
697 	NET_EPOCH_EXIT(et);
698 
699 	/* check for error */
700 	if (ret != 0)
701 		return ret;
702 
703 	/* store MAC addresses and check for loopback */
704 	ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
705 
706 	/* set belonging VNET, if any */
707 	addr->net = dev_net(ndev);
708 	dev_put(ndev);
709 
710 	return ret;
711 }
712 
713 static void process_req(struct work_struct *work)
714 {
715 	struct addr_req *req, *temp_req;
716 	struct sockaddr *src_in, *dst_in;
717 	struct list_head done_list;
718 
719 	INIT_LIST_HEAD(&done_list);
720 
721 	mutex_lock(&lock);
722 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
723 		if (req->status == -ENODATA) {
724 			src_in = (struct sockaddr *) &req->src_addr;
725 			dst_in = (struct sockaddr *) &req->dst_addr;
726 			req->status = addr_resolve(src_in, dst_in, req->addr);
727 			if (req->status && time_after_eq(jiffies, req->timeout))
728 				req->status = -ETIMEDOUT;
729 			else if (req->status == -ENODATA)
730 				continue;
731 		}
732 		list_move_tail(&req->list, &done_list);
733 	}
734 
735 	if (!list_empty(&req_list)) {
736 		req = list_entry(req_list.next, struct addr_req, list);
737 		set_timeout(req->timeout);
738 	}
739 	mutex_unlock(&lock);
740 
741 	list_for_each_entry_safe(req, temp_req, &done_list, list) {
742 		list_del(&req->list);
743 		req->callback(req->status, (struct sockaddr *) &req->src_addr,
744 			req->addr, req->context);
745 		put_client(req->client);
746 		kfree(req);
747 	}
748 }
749 
750 int rdma_resolve_ip(struct rdma_addr_client *client,
751 		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
752 		    struct rdma_dev_addr *addr, int timeout_ms,
753 		    void (*callback)(int status, struct sockaddr *src_addr,
754 				     struct rdma_dev_addr *addr, void *context),
755 		    void *context)
756 {
757 	struct sockaddr *src_in, *dst_in;
758 	struct addr_req *req;
759 	int ret = 0;
760 
761 	req = kzalloc(sizeof *req, GFP_KERNEL);
762 	if (!req)
763 		return -ENOMEM;
764 
765 	src_in = (struct sockaddr *) &req->src_addr;
766 	dst_in = (struct sockaddr *) &req->dst_addr;
767 
768 	if (src_addr) {
769 		if (src_addr->sa_family != dst_addr->sa_family) {
770 			ret = -EINVAL;
771 			goto err;
772 		}
773 
774 		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
775 	} else {
776 		src_in->sa_family = dst_addr->sa_family;
777 	}
778 
779 	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
780 	req->addr = addr;
781 	req->callback = callback;
782 	req->context = context;
783 	req->client = client;
784 	atomic_inc(&client->refcount);
785 
786 	req->status = addr_resolve(src_in, dst_in, addr);
787 	switch (req->status) {
788 	case 0:
789 		req->timeout = jiffies;
790 		queue_req(req);
791 		break;
792 	case -ENODATA:
793 		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
794 		queue_req(req);
795 		break;
796 	default:
797 		ret = req->status;
798 		atomic_dec(&client->refcount);
799 		goto err;
800 	}
801 	return ret;
802 err:
803 	kfree(req);
804 	return ret;
805 }
806 EXPORT_SYMBOL(rdma_resolve_ip);
807 
808 int rdma_resolve_ip_route(struct sockaddr *src_addr,
809 			  const struct sockaddr *dst_addr,
810 			  struct rdma_dev_addr *addr)
811 {
812 	struct sockaddr_storage ssrc_addr = {};
813 	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
814 
815 	if (src_addr) {
816 		if (src_addr->sa_family != dst_addr->sa_family)
817 			return -EINVAL;
818 
819 		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
820 	} else {
821 		src_in->sa_family = dst_addr->sa_family;
822 	}
823 
824 	return addr_resolve(src_in, dst_addr, addr);
825 }
826 EXPORT_SYMBOL(rdma_resolve_ip_route);
827 
828 void rdma_addr_cancel(struct rdma_dev_addr *addr)
829 {
830 	struct addr_req *req, *temp_req;
831 
832 	mutex_lock(&lock);
833 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
834 		if (req->addr == addr) {
835 			req->status = -ECANCELED;
836 			req->timeout = jiffies;
837 			list_move(&req->list, &req_list);
838 			set_timeout(req->timeout);
839 			break;
840 		}
841 	}
842 	mutex_unlock(&lock);
843 }
844 EXPORT_SYMBOL(rdma_addr_cancel);
845 
846 struct resolve_cb_context {
847 	struct rdma_dev_addr *addr;
848 	struct completion comp;
849 	int status;
850 };
851 
852 static void resolve_cb(int status, struct sockaddr *src_addr,
853 	     struct rdma_dev_addr *addr, void *context)
854 {
855 	if (!status)
856 		memcpy(((struct resolve_cb_context *)context)->addr,
857 		       addr, sizeof(struct rdma_dev_addr));
858 	((struct resolve_cb_context *)context)->status = status;
859 	complete(&((struct resolve_cb_context *)context)->comp);
860 }
861 
862 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
863 				 const union ib_gid *dgid,
864 				 u8 *dmac, struct ifnet *dev,
865 				 int *hoplimit)
866 {
867 	int ret = 0;
868 	struct rdma_dev_addr dev_addr;
869 	struct resolve_cb_context ctx;
870 
871 	union rdma_sockaddr sgid_addr, dgid_addr;
872 
873 	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
874 	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
875 
876 	memset(&dev_addr, 0, sizeof(dev_addr));
877 
878 	dev_addr.bound_dev_if = dev->if_index;
879 	dev_addr.net = dev_net(dev);
880 
881 	ctx.addr = &dev_addr;
882 	init_completion(&ctx.comp);
883 	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
884 			&dev_addr, 1000, resolve_cb, &ctx);
885 	if (ret)
886 		return ret;
887 
888 	wait_for_completion(&ctx.comp);
889 
890 	ret = ctx.status;
891 	if (ret)
892 		return ret;
893 
894 	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
895 	if (hoplimit)
896 		*hoplimit = dev_addr.hoplimit;
897 	return ret;
898 }
899 EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
900 
901 int addr_init(void)
902 {
903 	addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);
904 	if (!addr_wq)
905 		return -ENOMEM;
906 
907 	rdma_addr_register_client(&self);
908 
909 	return 0;
910 }
911 
912 void addr_cleanup(void)
913 {
914 	rdma_addr_unregister_client(&self);
915 	destroy_workqueue(addr_wq);
916 }
917