xref: /freebsd-src/sys/netlink/netlink_message_writer.c (revision a466cc55373fc3cf86837f09da729535b57e69a1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_netlink.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/malloc.h>
34 #include <sys/lock.h>
35 #include <sys/rmlock.h>
36 #include <sys/mbuf.h>
37 #include <sys/ck.h>
38 #include <sys/socket.h>
39 #include <sys/socketvar.h>
40 #include <sys/syslog.h>
41 
42 #include <netlink/netlink.h>
43 #include <netlink/netlink_ctl.h>
44 #include <netlink/netlink_linux.h>
45 #include <netlink/netlink_var.h>
46 
47 #define	DEBUG_MOD_NAME	nl_writer
48 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
49 #include <netlink/netlink_debug.h>
50 _DECLARE_DEBUG(LOG_INFO);
51 
52 /*
53  * The goal of this file is to provide convenient message writing KPI on top of
54  * different storage methods (mbufs, uio, temporary memory chunks).
55  *
56  * The main KPI guarantee is that the (last) message always resides in the contiguous
57  *  memory buffer, so one is able to update the header after writing the entire message.
58  *
59  * This guarantee comes with a side effect of potentially reallocating underlying
60  *  buffer, so one needs to update the desired pointers after something is added
61  *  to the header.
62  *
63  * Messaging layer contains hooks performing transparent Linux translation for the messages.
64  *
65  * There are 3 types of supported targets:
66  *  * socket (adds mbufs to the socket buffer, used for message replies)
67  *  * group (sends mbuf/chain to the specified groups, used for the notifications)
68  *  * chain (returns mbuf chain, used in Linux message translation code)
69  *
70  * There are 3 types of storage:
71  * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
72  *    fits in MCLBYTES)
73  * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
74  *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
75  * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
76  *    Linux sockets, calls translation hook prior to sending messages to the socket).
77  *
78  * Internally, KPI switches between different types of storage when memory requirements
79  *  change. It happens transparently to the caller.
80  */
81 
82 /*
83  * Uma zone for the mbuf-based Netlink storage
84  */
85 static uma_zone_t	nlmsg_zone;
86 
87 static void
88 nl_free_mbuf_storage(struct mbuf *m)
89 {
90 	uma_zfree(nlmsg_zone, m->m_ext.ext_buf);
91 }
92 
93 static int
94 nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused)
95 {
96 	struct mbuf *m = (struct mbuf *)arg;
97 
98 	if (m != NULL)
99 		m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE);
100 
101 	return (0);
102 }
103 
104 static struct mbuf *
105 nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags)
106 {
107 	struct mbuf *m, *m_storage;
108 
109 	if (size <= MHLEN)
110 		return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags));
111 
112 	if (__predict_false(size > NLMBUFSIZE))
113 		return (NULL);
114 
115 	m = m_gethdr(malloc_flags, MT_DATA);
116 	if (m == NULL)
117 		return (NULL);
118 
119 	m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags);
120 	if (m_storage == NULL) {
121 		m_free_raw(m);
122 		return (NULL);
123 	}
124 
125 	return (m);
126 }
127 
128 static struct mbuf *
129 nl_get_mbuf(int size, int malloc_flags)
130 {
131 	return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR));
132 }
133 
134 void
135 nl_init_msg_zone(void)
136 {
137 	nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage,
138 	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
139 }
140 
141 void
142 nl_destroy_msg_zone(void)
143 {
144 	uma_zdestroy(nlmsg_zone);
145 }
146 
147 
148 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
149 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
150 
151 struct nlwriter_ops {
152 	nlwriter_op_init	*init;
153 	nlwriter_op_write	*write_socket;
154 	nlwriter_op_write	*write_group;
155 	nlwriter_op_write	*write_chain;
156 };
157 
158 /*
159  * NS_WRITER_TYPE_BUF
160  * Writes message to a temporary memory buffer,
161  * flushing to the socket/group when buffer size limit is reached
162  */
163 static bool
164 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
165 {
166 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
167 	nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
168 	if (__predict_false(nw->_storage == NULL))
169 		return (false);
170 	nw->alloc_len = size;
171 	nw->offset = 0;
172 	nw->hdr = NULL;
173 	nw->data = nw->_storage;
174 	nw->writer_type = NS_WRITER_TYPE_BUF;
175 	nw->malloc_flag = mflag;
176 	nw->num_messages = 0;
177 	nw->enomem = false;
178 	return (true);
179 }
180 
181 static bool
182 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
183 {
184 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
185 	if (__predict_false(datalen == 0)) {
186 		free(buf, M_NETLINK);
187 		return (true);
188 	}
189 
190 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
191 	if (__predict_false(m == NULL)) {
192 		/* XXX: should we set sorcverr? */
193 		free(buf, M_NETLINK);
194 		return (false);
195 	}
196 	m_append(m, datalen, buf);
197 	free(buf, M_NETLINK);
198 
199 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
200 	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
201 }
202 
203 static bool
204 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
205 {
206 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
207 	    nw->arg.group.proto, nw->arg.group.id);
208 	if (__predict_false(datalen == 0)) {
209 		free(buf, M_NETLINK);
210 		return (true);
211 	}
212 
213 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
214 	if (__predict_false(m == NULL)) {
215 		free(buf, M_NETLINK);
216 		return (false);
217 	}
218 	bool success = m_append(m, datalen, buf) != 0;
219 	free(buf, M_NETLINK);
220 
221 	if (!success)
222 		return (false);
223 
224 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
225 	return (true);
226 }
227 
228 static bool
229 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
230 {
231 	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
232 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
233 
234 	if (__predict_false(datalen == 0)) {
235 		free(buf, M_NETLINK);
236 		return (true);
237 	}
238 
239 	if (*m0 == NULL) {
240 		struct mbuf *m;
241 
242 		m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
243 		if (__predict_false(m == NULL)) {
244 			free(buf, M_NETLINK);
245 			return (false);
246 		}
247 		*m0 = m;
248 	}
249 	if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
250 		free(buf, M_NETLINK);
251 		return (false);
252 	}
253 	return (true);
254 }
255 
256 
257 /*
258  * NS_WRITER_TYPE_MBUF
259  * Writes message to the allocated mbuf,
260  * flushing to socket/group when mbuf size limit is reached.
261  * This is the most efficient mechanism as it avoids double-copying.
262  *
263  * Allocates a single mbuf suitable to store up to @size bytes of data.
264  * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr.
265  * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone.
266  * Returns NULL on greater size or the allocation failure.
267  */
268 static bool
269 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
270 {
271 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
272 	struct mbuf *m = nl_get_mbuf(size, mflag);
273 
274 	if (__predict_false(m == NULL))
275 		return (false);
276 	nw->alloc_len = M_TRAILINGSPACE(m);
277 	nw->offset = 0;
278 	nw->hdr = NULL;
279 	nw->_storage = (void *)m;
280 	nw->data = mtod(m, void *);
281 	nw->writer_type = NS_WRITER_TYPE_MBUF;
282 	nw->malloc_flag = mflag;
283 	nw->num_messages = 0;
284 	nw->enomem = false;
285 	memset(nw->data, 0, size);
286 	NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
287 	    m, size, nw->alloc_len, nw->data);
288 	return (true);
289 }
290 
291 static bool
292 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
293 {
294 	struct mbuf *m = (struct mbuf *)buf;
295 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
296 
297 	if (__predict_false(datalen == 0)) {
298 		m_freem(m);
299 		return (true);
300 	}
301 
302 	m->m_pkthdr.len = datalen;
303 	m->m_len = datalen;
304 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
305 	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
306 }
307 
308 static bool
309 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
310 {
311 	struct mbuf *m = (struct mbuf *)buf;
312 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
313 	    nw->arg.group.proto, nw->arg.group.id);
314 
315 	if (__predict_false(datalen == 0)) {
316 		m_freem(m);
317 		return (true);
318 	}
319 
320 	m->m_pkthdr.len = datalen;
321 	m->m_len = datalen;
322 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
323 	return (true);
324 }
325 
326 static bool
327 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
328 {
329 	struct mbuf *m_new = (struct mbuf *)buf;
330 	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
331 
332 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
333 
334 	if (__predict_false(datalen == 0)) {
335 		m_freem(m_new);
336 		return (true);
337 	}
338 
339 	m_new->m_pkthdr.len = datalen;
340 	m_new->m_len = datalen;
341 
342 	if (*m0 == NULL) {
343 		*m0 = m_new;
344 	} else {
345 		struct mbuf *m_last;
346 		for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
347 			;
348 		m_last->m_next = m_new;
349 		(*m0)->m_pkthdr.len += datalen;
350 	}
351 
352 	return (true);
353 }
354 
355 /*
356  * NS_WRITER_TYPE_LBUF
357  * Writes message to the allocated memory buffer,
358  * flushing to socket/group when mbuf size limit is reached.
359  * Calls linux handler to rewrite messages before sending to the socket.
360  */
361 static bool
362 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
363 {
364 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
365 	size = roundup2(size, sizeof(void *));
366 	int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
367 	char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
368 	if (__predict_false(buf == NULL))
369 		return (false);
370 
371 	/* Fill buffer header first */
372 	struct linear_buffer *lb = (struct linear_buffer *)buf;
373 	lb->base = &buf[sizeof(struct linear_buffer) + size];
374 	lb->size = size + SCRATCH_BUFFER_SIZE;
375 
376 	nw->alloc_len = size;
377 	nw->offset = 0;
378 	nw->hdr = NULL;
379 	nw->_storage = buf;
380 	nw->data = (char *)(lb + 1);
381 	nw->malloc_flag = mflag;
382 	nw->writer_type = NS_WRITER_TYPE_LBUF;
383 	nw->num_messages = 0;
384 	nw->enomem = false;
385 	return (true);
386 }
387 
388 static bool
389 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
390 {
391 	struct linear_buffer *lb = (struct linear_buffer *)buf;
392 	char *data = (char *)(lb + 1);
393 	struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr);
394 
395 	if (__predict_false(datalen == 0)) {
396 		free(buf, M_NETLINK);
397 		return (true);
398 	}
399 
400 	struct mbuf *m = NULL;
401 	if (linux_netlink_p != NULL)
402 		m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
403 	free(buf, M_NETLINK);
404 
405 	if (__predict_false(m == NULL)) {
406 		/* XXX: should we set sorcverr? */
407 		return (false);
408 	}
409 
410 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
411 	return (nl_send_one(m, nlp, cnt, io_flags));
412 }
413 
414 /* Shouldn't be called (maybe except Linux code originating message) */
415 static bool
416 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
417 {
418 	struct linear_buffer *lb = (struct linear_buffer *)buf;
419 	char *data = (char *)(lb + 1);
420 
421 	if (__predict_false(datalen == 0)) {
422 		free(buf, M_NETLINK);
423 		return (true);
424 	}
425 
426 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
427 	if (__predict_false(m == NULL)) {
428 		free(buf, M_NETLINK);
429 		return (false);
430 	}
431 	m_append(m, datalen, data);
432 	free(buf, M_NETLINK);
433 
434 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
435 	return (true);
436 }
437 
438 static const struct nlwriter_ops nlmsg_writers[] = {
439 	/* NS_WRITER_TYPE_MBUF */
440 	{
441 		.init = nlmsg_get_ns_mbuf,
442 		.write_socket = nlmsg_write_socket_mbuf,
443 		.write_group = nlmsg_write_group_mbuf,
444 		.write_chain = nlmsg_write_chain_mbuf,
445 	},
446 	/* NS_WRITER_TYPE_BUF */
447 	{
448 		.init = nlmsg_get_ns_buf,
449 		.write_socket = nlmsg_write_socket_buf,
450 		.write_group = nlmsg_write_group_buf,
451 		.write_chain = nlmsg_write_chain_buf,
452 	},
453 	/* NS_WRITER_TYPE_LBUF */
454 	{
455 		.init = nlmsg_get_ns_lbuf,
456 		.write_socket = nlmsg_write_socket_lbuf,
457 		.write_group = nlmsg_write_group_lbuf,
458 	},
459 };
460 
461 static void
462 nlmsg_set_callback(struct nl_writer *nw)
463 {
464 	const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
465 
466 	switch (nw->writer_target) {
467 	case NS_WRITER_TARGET_SOCKET:
468 		nw->cb = pops->write_socket;
469 		break;
470 	case NS_WRITER_TARGET_GROUP:
471 		nw->cb = pops->write_group;
472 		break;
473 	case NS_WRITER_TARGET_CHAIN:
474 		nw->cb = pops->write_chain;
475 		break;
476 	default:
477 		panic("not implemented");
478 	}
479 }
480 
481 static bool
482 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
483 {
484 	MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
485 	NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
486 	return (nlmsg_writers[type].init(nw, size, waitok));
487 }
488 
489 static bool
490 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
491 {
492 	int type;
493 
494 	if (!is_linux) {
495 		if (__predict_true(size <= MCLBYTES))
496 			type = NS_WRITER_TYPE_MBUF;
497 		else
498 			type = NS_WRITER_TYPE_BUF;
499 	} else
500 		type = NS_WRITER_TYPE_LBUF;
501 	return (nlmsg_get_buf_type(nw, size, type, waitok));
502 }
503 
504 bool
505 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
506 {
507 	if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
508 		return (false);
509 	nw->arg.ptr = (void *)nlp;
510 	nw->writer_target = NS_WRITER_TARGET_SOCKET;
511 	nlmsg_set_callback(nw);
512 	return (true);
513 }
514 
515 bool
516 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
517 {
518 	if (!nlmsg_get_buf(nw, size, false, false))
519 		return (false);
520 	nw->arg.group.proto = protocol;
521 	nw->arg.group.id = group_id;
522 	nw->writer_target = NS_WRITER_TARGET_GROUP;
523 	nlmsg_set_callback(nw);
524 	return (true);
525 }
526 
527 bool
528 _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
529 {
530 	if (!nlmsg_get_buf(nw, size, false, false))
531 		return (false);
532 	*pm = NULL;
533 	nw->arg.ptr = (void *)pm;
534 	nw->writer_target = NS_WRITER_TARGET_CHAIN;
535 	nlmsg_set_callback(nw);
536 	NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
537 	return (true);
538 }
539 
540 void
541 _nlmsg_ignore_limit(struct nl_writer *nw)
542 {
543 	nw->ignore_limit = true;
544 }
545 
546 bool
547 _nlmsg_flush(struct nl_writer *nw)
548 {
549 
550 	if (__predict_false(nw->hdr != NULL)) {
551 		/* Last message has not been completed, skip it. */
552 		int completed_len = (char *)nw->hdr - nw->data;
553 		/* Send completed messages */
554 		nw->offset -= nw->offset - completed_len;
555 		nw->hdr = NULL;
556 	}
557 
558 	NL_LOG(LOG_DEBUG2, "OUT");
559 	bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
560 	nw->_storage = NULL;
561 
562 	if (!result) {
563 		NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
564 	}
565 
566 	return (result);
567 }
568 
569 /*
570  * Flushes previous data and allocates new underlying storage
571  *  sufficient for holding at least @required_len bytes.
572  * Return true on success.
573  */
574 bool
575 _nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
576 {
577 	struct nl_writer ns_new = {};
578 	int completed_len, new_len;
579 
580 	if (nw->enomem)
581 		return (false);
582 
583 	NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
584 	    nw->offset, nw->alloc_len, required_len);
585 
586 	/* Calculated new buffer size and allocate it s*/
587 	completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
588 	if (completed_len > 0 && required_len < MCLBYTES) {
589 		/* We already ran out of space, use the largest effective size */
590 		new_len = max(nw->alloc_len, MCLBYTES);
591 	} else {
592 		if (nw->alloc_len < MCLBYTES)
593 			new_len = MCLBYTES;
594 		else
595 			new_len = nw->alloc_len * 2;
596 		while (new_len < required_len)
597 			new_len *= 2;
598 	}
599 	bool waitok = (nw->malloc_flag == M_WAITOK);
600 	bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
601 	if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
602 		nw->enomem = true;
603 		NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
604 		return (false);
605 	}
606 	if (nw->ignore_limit)
607 		nlmsg_ignore_limit(&ns_new);
608 
609 	/* Update callback data */
610 	ns_new.writer_target = nw->writer_target;
611 	nlmsg_set_callback(&ns_new);
612 	ns_new.arg = nw->arg;
613 
614 	/* Copy last (unfinished) header to the new storage */
615 	int last_len = nw->offset - completed_len;
616 	if (last_len > 0) {
617 		memcpy(ns_new.data, nw->hdr, last_len);
618 		ns_new.hdr = (struct nlmsghdr *)ns_new.data;
619 		ns_new.offset = last_len;
620 	}
621 
622 	NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
623 
624 	/* Flush completed headers & switch to the new nw */
625 	nlmsg_flush(nw);
626 	memcpy(nw, &ns_new, sizeof(struct nl_writer));
627 	NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
628 
629 	return (true);
630 }
631 
632 bool
633 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
634     uint16_t flags, uint32_t len)
635 {
636 	struct nlmsghdr *hdr;
637 
638 	MPASS(nw->hdr == NULL);
639 
640 	int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
641 	if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
642 		if (!nlmsg_refill_buffer(nw, required_len))
643 			return (false);
644 	}
645 
646 	hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
647 
648 	hdr->nlmsg_len = len;
649 	hdr->nlmsg_type = type;
650 	hdr->nlmsg_flags = flags;
651 	hdr->nlmsg_seq = seq;
652 	hdr->nlmsg_pid = portid;
653 
654 	nw->hdr = hdr;
655 	nw->offset += sizeof(struct nlmsghdr);
656 
657 	return (true);
658 }
659 
660 bool
661 _nlmsg_end(struct nl_writer *nw)
662 {
663 	MPASS(nw->hdr != NULL);
664 
665 	if (nw->enomem) {
666 		NL_LOG(LOG_DEBUG, "ENOMEM when dumping message");
667 		nlmsg_abort(nw);
668 		return (false);
669 	}
670 
671 	nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
672 	NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
673 	    nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
674 	    nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
675 	nw->hdr = NULL;
676 	nw->num_messages++;
677 	return (true);
678 }
679 
680 void
681 _nlmsg_abort(struct nl_writer *nw)
682 {
683 	if (nw->hdr != NULL) {
684 		nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
685 		nw->hdr = NULL;
686 	}
687 }
688 
689 void
690 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr,
691     struct nl_pstate *npt)
692 {
693 	struct nlmsgerr *errmsg;
694 	int payload_len;
695 	uint32_t flags = nlp->nl_flags;
696 	struct nl_writer *nw = npt->nw;
697 	bool cap_ack;
698 
699 	payload_len = sizeof(struct nlmsgerr);
700 
701 	/*
702 	 * The only case when we send the full message in the
703 	 * reply is when there is an error and NETLINK_CAP_ACK
704 	 * is not set.
705 	 */
706 	cap_ack = (error == 0) || (flags & NLF_CAP_ACK);
707 	if (!cap_ack)
708 		payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr);
709 	payload_len = NETLINK_ALIGN(payload_len);
710 
711 	uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0;
712 	if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK)
713 		nl_flags |= NLM_F_ACK_TLVS;
714 
715 	NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d",
716 	    hdr->nlmsg_type, hdr->nlmsg_seq);
717 
718 	if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len))
719 		goto enomem;
720 
721 	errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr);
722 	errmsg->error = error;
723 	/* In case of error copy the whole message, else just the header */
724 	memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len);
725 
726 	if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK)
727 		nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg);
728 	if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK)
729 		nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off);
730 	if (npt->cookie != NULL)
731 		nlattr_add_raw(nw, npt->cookie);
732 
733 	if (nlmsg_end(nw))
734 		return;
735 enomem:
736 	NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u",
737 	    hdr->nlmsg_type, hdr->nlmsg_seq);
738 	nlmsg_abort(nw);
739 }
740 
741 bool
742 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
743 {
744 	if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
745 		NL_LOG(LOG_DEBUG, "Error finalizing table dump");
746 		return (false);
747 	}
748 	/* Save operation result */
749 	int *perror = nlmsg_reserve_object(nw, int);
750 	NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
751 	    nw->offset, perror);
752 	*perror = error;
753 	nlmsg_end(nw);
754 	nw->suppress_ack = true;
755 
756 	return (true);
757 }
758