xref: /netbsd-src/sys/kern/subr_pcq.c (revision 7dd29855d341dae5995eee2beabacc7bb6fd8727)
1*7dd29855Sriastradh /*	$NetBSD: subr_pcq.c,v 1.20 2023/02/24 11:02:27 riastradh Exp $	*/
29204531aSrmind 
3c8d306e3Smatt /*-
4189acff9Sad  * Copyright (c) 2009, 2019 The NetBSD Foundation, Inc.
5c8d306e3Smatt  * All rights reserved.
6c8d306e3Smatt  *
7c8d306e3Smatt  * This code is derived from software contributed to The NetBSD Foundation
8f1f42831Srmind  * by Andrew Doran.
9c8d306e3Smatt  *
10c8d306e3Smatt  * Redistribution and use in source and binary forms, with or without
11c8d306e3Smatt  * modification, are permitted provided that the following conditions
12c8d306e3Smatt  * are met:
13c8d306e3Smatt  * 1. Redistributions of source code must retain the above copyright
14c8d306e3Smatt  *    notice, this list of conditions and the following disclaimer.
15c8d306e3Smatt  * 2. Redistributions in binary form must reproduce the above copyright
16c8d306e3Smatt  *    notice, this list of conditions and the following disclaimer in the
17c8d306e3Smatt  *    documentation and/or other materials provided with the distribution.
18c8d306e3Smatt  *
19c8d306e3Smatt  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20c8d306e3Smatt  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21c8d306e3Smatt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22c8d306e3Smatt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23c8d306e3Smatt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24c8d306e3Smatt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25c8d306e3Smatt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26c8d306e3Smatt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27c8d306e3Smatt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28c8d306e3Smatt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29c8d306e3Smatt  * POSSIBILITY OF SUCH DAMAGE.
30c8d306e3Smatt  */
319204531aSrmind 
32f1f42831Srmind /*
33f1f42831Srmind  * Lockless producer/consumer queue.
34a11a64aaSriastradh  *
35a11a64aaSriastradh  * Summary of the producer algorithm in pcq_put (may run many in
36a11a64aaSriastradh  * parallel with each other and with a consumer):
37a11a64aaSriastradh  *
38a11a64aaSriastradh  *	P1. initialize an item
39a11a64aaSriastradh  *
40a11a64aaSriastradh  *	P2. atomic_cas(&pcq->pcq_pc) loop to advance the producer
41a11a64aaSriastradh  *	    pointer, reserving a space at c (fails if not enough space)
42a11a64aaSriastradh  *
43a11a64aaSriastradh  *	P3. atomic_store_release(&pcq->pcq_items[c], item) to publish
44a11a64aaSriastradh  *	    the item in the space it reserved
45a11a64aaSriastradh  *
46a11a64aaSriastradh  * Summary of the consumer algorithm in pcq_get (must be serialized by
47a11a64aaSriastradh  * caller with other consumers, may run in parallel with any number of
48a11a64aaSriastradh  * producers):
49a11a64aaSriastradh  *
50a11a64aaSriastradh  *	C1. atomic_load_relaxed(&pcq->pcq_pc) to get the consumer
51a11a64aaSriastradh  *	    pointer and a snapshot of the producer pointer, which may
52a11a64aaSriastradh  *	    point to null items or point to initialized items (fails if
53a11a64aaSriastradh  *	    no space reserved for published items yet)
54a11a64aaSriastradh  *
55a11a64aaSriastradh  *	C2. atomic_load_consume(&pcq->pcq_items[c]) to get the next
56a11a64aaSriastradh  *	    unconsumed but potentially published item (fails if item
57a11a64aaSriastradh  *	    not published yet)
58a11a64aaSriastradh  *
59a11a64aaSriastradh  *	C3. pcq->pcq_items[c] = NULL to consume the next unconsumed but
60a11a64aaSriastradh  *	    published item
61a11a64aaSriastradh  *
62a11a64aaSriastradh  *	C4. membar_producer
63a11a64aaSriastradh  *
64a11a64aaSriastradh  *	C5. atomic_cas(&pcq->pcq_pc) loop to advance the consumer
65a11a64aaSriastradh  *	    pointer
66a11a64aaSriastradh  *
67a11a64aaSriastradh  *	C6. use the item
68a11a64aaSriastradh  *
69a11a64aaSriastradh  * Note that there is a weird bare membar_producer which is not matched
70a11a64aaSriastradh  * by membar_consumer.  This is one of the rare cases of a memory
71a11a64aaSriastradh  * barrier on one side that is not matched by a memory barrier on
72a11a64aaSriastradh  * another side, but the ordering works out, with a somewhat more
73a11a64aaSriastradh  * involved proof.
74a11a64aaSriastradh  *
75a11a64aaSriastradh  * Some properties that need to be proved:
76a11a64aaSriastradh  *
77a11a64aaSriastradh  *	Theorem 1.  For pcq_put call that leads into pcq_get:
78a11a64aaSriastradh  *	Initializing item at P1 is dependency-ordered before usage of
79a11a64aaSriastradh  *	item at C6, so items placed by pcq_put can be safely used by
80a11a64aaSriastradh  *	the caller of pcq_get.
81a11a64aaSriastradh  *
82a11a64aaSriastradh  *	Proof sketch.
83a11a64aaSriastradh  *
84a11a64aaSriastradh  *		Assume load/store P2 synchronizes with load/store C1
85a11a64aaSriastradh  *		(if not, pcq_get fails in `if (p == c) return NULL').
86a11a64aaSriastradh  *
87a11a64aaSriastradh  *		Assume store-release P3 synchronizes with load-consume
88a11a64aaSriastradh  *		C2 (if not, pcq_get fails in `if (item == NULL) return
89a11a64aaSriastradh  *		NULL').
90a11a64aaSriastradh  *
91a11a64aaSriastradh  *		Then:
92a11a64aaSriastradh  *
93a11a64aaSriastradh  *		- P1 is sequenced before store-release P3
94a11a64aaSriastradh  *		- store-release P3 synchronizes with load-consume C2
95a11a64aaSriastradh  *		- load-consume C2 is dependency-ordered before C6
96a11a64aaSriastradh  *
97a11a64aaSriastradh  *		Hence transitively, P1 is dependency-ordered before C6,
98a11a64aaSriastradh  *		QED.
99a11a64aaSriastradh  *
100a11a64aaSriastradh  *	Theorem 2.  For pcq_get call followed by pcq_put: Nulling out
101a11a64aaSriastradh  *	location at store C3 happens before placing a new item in the
102a11a64aaSriastradh  *	same location at store P3, so items are not lost.
103a11a64aaSriastradh  *
104a11a64aaSriastradh  *	Proof sketch.
105a11a64aaSriastradh  *
106a11a64aaSriastradh  *		Assume load/store C5 synchronizes with load/store P2
107a11a64aaSriastradh  *		(otherwise pcq_peek starts over the CAS loop or fails).
108a11a64aaSriastradh  *
109a11a64aaSriastradh  *		Then:
110a11a64aaSriastradh  *
111a11a64aaSriastradh  *		- store C3 is sequenced before membar_producer C4
112a11a64aaSriastradh  *		- membar_producer C4 is sequenced before load/store C5
113a11a64aaSriastradh  *		- load/store C5 synchronizes with load/store P2 at &pcq->pcq_pc
114a11a64aaSriastradh  *		- P2 is sequenced before store-release P3
115a11a64aaSriastradh  *
116a11a64aaSriastradh  *		Hence transitively, store C3 happens before
117a11a64aaSriastradh  *		store-release P3, QED.
118f1f42831Srmind  */
119f1f42831Srmind 
120c8d306e3Smatt #include <sys/cdefs.h>
121*7dd29855Sriastradh __KERNEL_RCSID(0, "$NetBSD: subr_pcq.c,v 1.20 2023/02/24 11:02:27 riastradh Exp $");
122c8d306e3Smatt 
123c8d306e3Smatt #include <sys/param.h>
124c8d306e3Smatt #include <sys/types.h>
125c8d306e3Smatt #include <sys/atomic.h>
126c8d306e3Smatt #include <sys/kmem.h>
127c8d306e3Smatt 
128c8d306e3Smatt #include <sys/pcq.h>
129c8d306e3Smatt 
130f1f42831Srmind /*
131f1f42831Srmind  * Internal producer-consumer queue structure.  Note: providing a separate
132f1f42831Srmind  * cache-line both for pcq_t::pcq_pc and pcq_t::pcq_items.
133f1f42831Srmind  */
134c8d306e3Smatt struct pcq {
135f1f42831Srmind 	u_int			pcq_nitems;
136f1f42831Srmind 	uint8_t			pcq_pad1[COHERENCY_UNIT - sizeof(u_int)];
137f1f42831Srmind 	volatile uint32_t	pcq_pc;
138f1f42831Srmind 	uint8_t			pcq_pad2[COHERENCY_UNIT - sizeof(uint32_t)];
139f1f42831Srmind 	void * volatile		pcq_items[];
140c8d306e3Smatt };
141c8d306e3Smatt 
142f1f42831Srmind /*
143f1f42831Srmind  * Producer (p) - stored in the lower 16 bits of pcq_t::pcq_pc.
144f1f42831Srmind  * Consumer (c) - in the higher 16 bits.
145f1f42831Srmind  *
146f1f42831Srmind  * We have a limitation of 16 bits i.e. 0xffff items in the queue.
147190a99cbSrmind  * The PCQ_MAXLEN constant is set accordingly.
148f1f42831Srmind  */
149f1f42831Srmind 
150f1f42831Srmind static inline void
pcq_split(uint32_t v,u_int * p,u_int * c)151f1f42831Srmind pcq_split(uint32_t v, u_int *p, u_int *c)
152c8d306e3Smatt {
1539204531aSrmind 
154f1f42831Srmind 	*p = v & 0xffff;
155f1f42831Srmind 	*c = v >> 16;
156c8d306e3Smatt }
157c8d306e3Smatt 
158f1f42831Srmind static inline uint32_t
pcq_combine(u_int p,u_int c)159f1f42831Srmind pcq_combine(u_int p, u_int c)
160f1f42831Srmind {
161f1f42831Srmind 
162f1f42831Srmind 	return p | (c << 16);
163f1f42831Srmind }
164f1f42831Srmind 
165f1f42831Srmind static inline u_int
pcq_advance(pcq_t * pcq,u_int pc)166f1f42831Srmind pcq_advance(pcq_t *pcq, u_int pc)
167f1f42831Srmind {
168f1f42831Srmind 
169f1f42831Srmind 	if (__predict_false(++pc == pcq->pcq_nitems)) {
170f1f42831Srmind 		return 0;
171f1f42831Srmind 	}
172f1f42831Srmind 	return pc;
173f1f42831Srmind }
174f1f42831Srmind 
175f1f42831Srmind /*
176f1f42831Srmind  * pcq_put: place an item at the end of the queue.
177f1f42831Srmind  */
178c8d306e3Smatt bool
pcq_put(pcq_t * pcq,void * item)179c8d306e3Smatt pcq_put(pcq_t *pcq, void *item)
180c8d306e3Smatt {
181f1f42831Srmind 	uint32_t v, nv;
182f1f42831Srmind 	u_int op, p, c;
183c8d306e3Smatt 
184c8d306e3Smatt 	KASSERT(item != NULL);
185c8d306e3Smatt 
186f1f42831Srmind 	do {
187640d128aSriastradh 		v = atomic_load_relaxed(&pcq->pcq_pc);
188f1f42831Srmind 		pcq_split(v, &op, &c);
189f1f42831Srmind 		p = pcq_advance(pcq, op);
190f1f42831Srmind 		if (p == c) {
191f1f42831Srmind 			/* Queue is full. */
192f1f42831Srmind 			return false;
193f1f42831Srmind 		}
194f1f42831Srmind 		nv = pcq_combine(p, c);
195f1f42831Srmind 	} while (atomic_cas_32(&pcq->pcq_pc, v, nv) != v);
196c8d306e3Smatt 
197c8d306e3Smatt 	/*
198f1f42831Srmind 	 * Ensure that the update to pcq_pc is globally visible before the
199f1f42831Srmind 	 * data item.  See pcq_get().  This also ensures that any changes
200f1f42831Srmind 	 * that the caller made to the data item are globally visible
201f1f42831Srmind 	 * before we put it onto the list.
202c8d306e3Smatt 	 */
20332f80d57Sriastradh 	atomic_store_release(&pcq->pcq_items[op], item);
204f1f42831Srmind 
205f1f42831Srmind 	/*
206f1f42831Srmind 	 * Synchronization activity to wake up the consumer will ensure
207f1f42831Srmind 	 * that the update to pcq_items[] is visible before the wakeup
20800fea3bcSwiz 	 * arrives.  So, we do not need an additional memory barrier here.
209f1f42831Srmind 	 */
210c8d306e3Smatt 	return true;
211c8d306e3Smatt }
212c8d306e3Smatt 
213c8d306e3Smatt /*
214f1f42831Srmind  * pcq_peek: return the next item from the queue without removal.
215c8d306e3Smatt  */
216f1f42831Srmind void *
pcq_peek(pcq_t * pcq)217f1f42831Srmind pcq_peek(pcq_t *pcq)
218f1f42831Srmind {
219640d128aSriastradh 	const uint32_t v = atomic_load_relaxed(&pcq->pcq_pc);
220f1f42831Srmind 	u_int p, c;
221c8d306e3Smatt 
222f1f42831Srmind 	pcq_split(v, &p, &c);
223f1f42831Srmind 
224f1f42831Srmind 	/* See comment on race below in pcq_get(). */
225640d128aSriastradh 	return (p == c) ? NULL : atomic_load_consume(&pcq->pcq_items[c]);
226c8d306e3Smatt }
227c8d306e3Smatt 
228c8d306e3Smatt /*
229f1f42831Srmind  * pcq_get: remove and return the next item for consumption or NULL if empty.
230f1f42831Srmind  *
23132cded6cSdholland  * => The caller must prevent concurrent gets from occurring.
232c8d306e3Smatt  */
233c8d306e3Smatt void *
pcq_get(pcq_t * pcq)234c8d306e3Smatt pcq_get(pcq_t *pcq)
235c8d306e3Smatt {
236f1f42831Srmind 	uint32_t v, nv;
237f1f42831Srmind 	u_int p, c;
238c8d306e3Smatt 	void *item;
239c8d306e3Smatt 
240640d128aSriastradh 	v = atomic_load_relaxed(&pcq->pcq_pc);
241f1f42831Srmind 	pcq_split(v, &p, &c);
242f1f42831Srmind 	if (p == c) {
243f1f42831Srmind 		/* Queue is empty: nothing to return. */
244c8d306e3Smatt 		return NULL;
245f1f42831Srmind 	}
246640d128aSriastradh 	item = atomic_load_consume(&pcq->pcq_items[c]);
247f1f42831Srmind 	if (item == NULL) {
248f1f42831Srmind 		/*
249f1f42831Srmind 		 * Raced with sender: we rely on a notification (e.g. softint
250f1f42831Srmind 		 * or wakeup) being generated after the producer's pcq_put(),
251f1f42831Srmind 		 * causing us to retry pcq_get() later.
252f1f42831Srmind 		 */
253f1f42831Srmind 		return NULL;
254f1f42831Srmind 	}
2558ffa1234Sriastradh 	/*
2568ffa1234Sriastradh 	 * We have exclusive access to this slot, so no need for
2578ffa1234Sriastradh 	 * atomic_store_*.
2588ffa1234Sriastradh 	 */
259f1f42831Srmind 	pcq->pcq_items[c] = NULL;
260f1f42831Srmind 	c = pcq_advance(pcq, c);
261f1f42831Srmind 	nv = pcq_combine(p, c);
262c8d306e3Smatt 
263c8d306e3Smatt 	/*
264d707e3dfSriastradh 	 * Ensure that update to pcq_items[c] becomes globally visible
265edcd2fffSskrll 	 * before the update to pcq_pc.  If it were reordered to occur
266f1f42831Srmind 	 * after it, we could in theory wipe out a modification made
267d707e3dfSriastradh 	 * to pcq_items[c] by pcq_put().
268d707e3dfSriastradh 	 *
269d707e3dfSriastradh 	 * No need for load-before-store ordering of membar_release
270d707e3dfSriastradh 	 * because the only load we need to ensure happens first is the
271d707e3dfSriastradh 	 * load of pcq->pcq_items[c], but that necessarily happens
272d707e3dfSriastradh 	 * before the store to pcq->pcq_items[c] to null it out because
273a11a64aaSriastradh 	 * it is at the same memory location.  Yes, this is a bare
274a11a64aaSriastradh 	 * membar_producer with no matching membar_consumer.
275c8d306e3Smatt 	 */
276c8d306e3Smatt 	membar_producer();
277f1f42831Srmind 	while (__predict_false(atomic_cas_32(&pcq->pcq_pc, v, nv) != v)) {
278640d128aSriastradh 		v = atomic_load_relaxed(&pcq->pcq_pc);
279f1f42831Srmind 		pcq_split(v, &p, &c);
280f1f42831Srmind 		c = pcq_advance(pcq, c);
281f1f42831Srmind 		nv = pcq_combine(p, c);
282f1f42831Srmind 	}
283c8d306e3Smatt 	return item;
284c8d306e3Smatt }
285c8d306e3Smatt 
286c8d306e3Smatt pcq_t *
pcq_create(size_t nitems,km_flag_t kmflags)287f1f42831Srmind pcq_create(size_t nitems, km_flag_t kmflags)
288c8d306e3Smatt {
289c8d306e3Smatt 	pcq_t *pcq;
290c8d306e3Smatt 
2917bbfab3aSriastradh 	KASSERT(nitems > 0);
2927bbfab3aSriastradh 	KASSERT(nitems <= PCQ_MAXLEN);
293c8d306e3Smatt 
294976959cfSalnsn 	pcq = kmem_zalloc(offsetof(pcq_t, pcq_items[nitems]), kmflags);
295189acff9Sad 	if (pcq != NULL) {
296f1f42831Srmind 		pcq->pcq_nitems = nitems;
297189acff9Sad 	}
298c8d306e3Smatt 	return pcq;
299c8d306e3Smatt }
300c8d306e3Smatt 
301c8d306e3Smatt void
pcq_destroy(pcq_t * pcq)302c8d306e3Smatt pcq_destroy(pcq_t *pcq)
303c8d306e3Smatt {
3049204531aSrmind 
305f1f42831Srmind 	kmem_free(pcq, offsetof(pcq_t, pcq_items[pcq->pcq_nitems]));
306f1f42831Srmind }
307c8d306e3Smatt 
308f1f42831Srmind size_t
pcq_maxitems(pcq_t * pcq)309f1f42831Srmind pcq_maxitems(pcq_t *pcq)
310f1f42831Srmind {
311f1f42831Srmind 
312f1f42831Srmind 	return pcq->pcq_nitems;
313c8d306e3Smatt }
314