1*bf0d449cSmpi /* $OpenBSD: subr_percpu.c,v 1.11 2023/09/16 09:33:27 mpi Exp $ */
204e271afSdlg
304e271afSdlg /*
404e271afSdlg * Copyright (c) 2016 David Gwynne <dlg@openbsd.org>
504e271afSdlg *
604e271afSdlg * Permission to use, copy, modify, and distribute this software for any
704e271afSdlg * purpose with or without fee is hereby granted, provided that the above
804e271afSdlg * copyright notice and this permission notice appear in all copies.
904e271afSdlg *
1004e271afSdlg * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1104e271afSdlg * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1204e271afSdlg * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1304e271afSdlg * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1404e271afSdlg * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1504e271afSdlg * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1604e271afSdlg * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1704e271afSdlg */
1804e271afSdlg
1904e271afSdlg #include <sys/param.h>
2004e271afSdlg #include <sys/systm.h>
2104e271afSdlg #include <sys/pool.h>
2204e271afSdlg #include <sys/malloc.h>
2304e271afSdlg
2404e271afSdlg #include <sys/percpu.h>
2504e271afSdlg
2604e271afSdlg #ifdef MULTIPROCESSOR
2704e271afSdlg struct pool cpumem_pl;
2804e271afSdlg
2904e271afSdlg void
percpu_init(void)3004e271afSdlg percpu_init(void)
3104e271afSdlg {
328457285fSdlg pool_init(&cpumem_pl, sizeof(struct cpumem) * ncpusfound, 0,
338457285fSdlg IPL_NONE, PR_WAITOK, "percpumem", &pool_allocator_single);
3404e271afSdlg }
3504e271afSdlg
3604e271afSdlg struct cpumem *
cpumem_get(struct pool * pp)3704e271afSdlg cpumem_get(struct pool *pp)
3804e271afSdlg {
3904e271afSdlg struct cpumem *cm;
4004e271afSdlg unsigned int cpu;
4104e271afSdlg
4204e271afSdlg cm = pool_get(&cpumem_pl, PR_WAITOK);
4304e271afSdlg
448457285fSdlg for (cpu = 0; cpu < ncpusfound; cpu++)
4504e271afSdlg cm[cpu].mem = pool_get(pp, PR_WAITOK | PR_ZERO);
4604e271afSdlg
4704e271afSdlg return (cm);
4804e271afSdlg }
4904e271afSdlg
5004e271afSdlg void
cpumem_put(struct pool * pp,struct cpumem * cm)5104e271afSdlg cpumem_put(struct pool *pp, struct cpumem *cm)
5204e271afSdlg {
5304e271afSdlg unsigned int cpu;
5404e271afSdlg
558457285fSdlg for (cpu = 0; cpu < ncpusfound; cpu++)
5604e271afSdlg pool_put(pp, cm[cpu].mem);
5704e271afSdlg
5804e271afSdlg pool_put(&cpumem_pl, cm);
5904e271afSdlg }
6004e271afSdlg
6104e271afSdlg struct cpumem *
cpumem_malloc(size_t sz,int type)6204e271afSdlg cpumem_malloc(size_t sz, int type)
6304e271afSdlg {
6404e271afSdlg struct cpumem *cm;
6504e271afSdlg unsigned int cpu;
6604e271afSdlg
6704e271afSdlg sz = roundup(sz, CACHELINESIZE);
6804e271afSdlg
6904e271afSdlg cm = pool_get(&cpumem_pl, PR_WAITOK);
7004e271afSdlg
718457285fSdlg for (cpu = 0; cpu < ncpusfound; cpu++)
7204e271afSdlg cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO);
7304e271afSdlg
7404e271afSdlg return (cm);
7504e271afSdlg }
7604e271afSdlg
7704e271afSdlg struct cpumem *
cpumem_malloc_ncpus(struct cpumem * bootcm,size_t sz,int type)78780021b3Sdlg cpumem_malloc_ncpus(struct cpumem *bootcm, size_t sz, int type)
7904e271afSdlg {
8004e271afSdlg struct cpumem *cm;
8104e271afSdlg unsigned int cpu;
8204e271afSdlg
8304e271afSdlg sz = roundup(sz, CACHELINESIZE);
8404e271afSdlg
8504e271afSdlg cm = pool_get(&cpumem_pl, PR_WAITOK);
8604e271afSdlg
8704e271afSdlg cm[0].mem = bootcm[0].mem;
888457285fSdlg for (cpu = 1; cpu < ncpusfound; cpu++)
8904e271afSdlg cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO);
9004e271afSdlg
9104e271afSdlg return (cm);
9204e271afSdlg }
9304e271afSdlg
9404e271afSdlg void
cpumem_free(struct cpumem * cm,int type,size_t sz)9504e271afSdlg cpumem_free(struct cpumem *cm, int type, size_t sz)
9604e271afSdlg {
9704e271afSdlg unsigned int cpu;
9804e271afSdlg
9904e271afSdlg sz = roundup(sz, CACHELINESIZE);
10004e271afSdlg
1018457285fSdlg for (cpu = 0; cpu < ncpusfound; cpu++)
10204e271afSdlg free(cm[cpu].mem, type, sz);
10304e271afSdlg
10404e271afSdlg pool_put(&cpumem_pl, cm);
10504e271afSdlg }
10604e271afSdlg
10704e271afSdlg void *
cpumem_first(struct cpumem_iter * i,struct cpumem * cm)10804e271afSdlg cpumem_first(struct cpumem_iter *i, struct cpumem *cm)
10904e271afSdlg {
11004e271afSdlg i->cpu = 0;
11104e271afSdlg
11204e271afSdlg return (cm[0].mem);
11304e271afSdlg }
11404e271afSdlg
11504e271afSdlg void *
cpumem_next(struct cpumem_iter * i,struct cpumem * cm)11604e271afSdlg cpumem_next(struct cpumem_iter *i, struct cpumem *cm)
11704e271afSdlg {
11804e271afSdlg unsigned int cpu = ++i->cpu;
11904e271afSdlg
1208457285fSdlg if (cpu >= ncpusfound)
12104e271afSdlg return (NULL);
12204e271afSdlg
12304e271afSdlg return (cm[cpu].mem);
12404e271afSdlg }
12504e271afSdlg
12604e271afSdlg struct cpumem *
counters_alloc(unsigned int n)127599d0588Sjca counters_alloc(unsigned int n)
12804e271afSdlg {
12904e271afSdlg struct cpumem *cm;
13004e271afSdlg struct cpumem_iter cmi;
13104e271afSdlg uint64_t *counters;
13204e271afSdlg unsigned int i;
13304e271afSdlg
13404e271afSdlg KASSERT(n > 0);
13504e271afSdlg
13604e271afSdlg n++; /* add space for a generation number */
137599d0588Sjca cm = cpumem_malloc(n * sizeof(uint64_t), M_COUNTERS);
13804e271afSdlg
13904e271afSdlg CPUMEM_FOREACH(counters, &cmi, cm) {
14004e271afSdlg for (i = 0; i < n; i++)
14104e271afSdlg counters[i] = 0;
14204e271afSdlg }
14304e271afSdlg
14404e271afSdlg return (cm);
14504e271afSdlg }
14604e271afSdlg
14704e271afSdlg struct cpumem *
counters_alloc_ncpus(struct cpumem * cm,unsigned int n)148599d0588Sjca counters_alloc_ncpus(struct cpumem *cm, unsigned int n)
14904e271afSdlg {
15004e271afSdlg n++; /* the generation number */
151599d0588Sjca return (cpumem_malloc_ncpus(cm, n * sizeof(uint64_t), M_COUNTERS));
15204e271afSdlg }
15304e271afSdlg
15404e271afSdlg void
counters_free(struct cpumem * cm,unsigned int n)155599d0588Sjca counters_free(struct cpumem *cm, unsigned int n)
15604e271afSdlg {
15704e271afSdlg n++; /* generation number */
158599d0588Sjca cpumem_free(cm, M_COUNTERS, n * sizeof(uint64_t));
15904e271afSdlg }
16004e271afSdlg
16104e271afSdlg void
counters_read(struct cpumem * cm,uint64_t * output,unsigned int n,uint64_t * scratch)162*bf0d449cSmpi counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
163*bf0d449cSmpi uint64_t *scratch)
16404e271afSdlg {
16504e271afSdlg struct cpumem_iter cmi;
166*bf0d449cSmpi uint64_t *gen, *counters, *temp = scratch;
16704e271afSdlg uint64_t enter, leave;
16804e271afSdlg unsigned int i;
16904e271afSdlg
17004e271afSdlg for (i = 0; i < n; i++)
17104e271afSdlg output[i] = 0;
17204e271afSdlg
173*bf0d449cSmpi if (scratch == NULL)
17404e271afSdlg temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK);
17504e271afSdlg
17604e271afSdlg gen = cpumem_first(&cmi, cm);
17704e271afSdlg do {
17804e271afSdlg counters = gen + 1;
17904e271afSdlg
18004e271afSdlg enter = *gen;
18104e271afSdlg for (;;) {
18204e271afSdlg /* the generation number is odd during an update */
18304e271afSdlg while (enter & 1) {
18404e271afSdlg yield();
18504e271afSdlg enter = *gen;
18604e271afSdlg }
18704e271afSdlg
188e05abaedSbluhm membar_consumer();
18904e271afSdlg for (i = 0; i < n; i++)
19004e271afSdlg temp[i] = counters[i];
19104e271afSdlg
19204e271afSdlg membar_consumer();
19304e271afSdlg leave = *gen;
19404e271afSdlg
19504e271afSdlg if (enter == leave)
19604e271afSdlg break;
19704e271afSdlg
19804e271afSdlg enter = leave;
19904e271afSdlg }
20004e271afSdlg
20104e271afSdlg for (i = 0; i < n; i++)
20204e271afSdlg output[i] += temp[i];
20304e271afSdlg
20404e271afSdlg gen = cpumem_next(&cmi, cm);
20504e271afSdlg } while (gen != NULL);
20604e271afSdlg
207*bf0d449cSmpi if (scratch == NULL)
20804e271afSdlg free(temp, M_TEMP, n * sizeof(uint64_t));
20904e271afSdlg }
21004e271afSdlg
21104e271afSdlg void
counters_zero(struct cpumem * cm,unsigned int n)21204e271afSdlg counters_zero(struct cpumem *cm, unsigned int n)
21304e271afSdlg {
21404e271afSdlg struct cpumem_iter cmi;
21504e271afSdlg uint64_t *counters;
21604e271afSdlg unsigned int i;
21704e271afSdlg
21804e271afSdlg counters = cpumem_first(&cmi, cm);
2198f25c57fSbluhm membar_producer();
22004e271afSdlg do {
22104e271afSdlg for (i = 0; i < n; i++)
22204e271afSdlg counters[i] = 0;
223e05abaedSbluhm /* zero the generation numbers too */
224e05abaedSbluhm membar_producer();
225e05abaedSbluhm counters[i] = 0;
22604e271afSdlg
22704e271afSdlg counters = cpumem_next(&cmi, cm);
22804e271afSdlg } while (counters != NULL);
22904e271afSdlg }
23004e271afSdlg
23104e271afSdlg #else /* MULTIPROCESSOR */
23204e271afSdlg
23304e271afSdlg /*
23404e271afSdlg * Uniprocessor implementation of per-CPU data structures.
23504e271afSdlg *
23604e271afSdlg * UP percpu memory is a single memory allocation cast to/from the
23704e271afSdlg * cpumem struct. It is not scaled up to the size of cacheline because
23804e271afSdlg * there's no other cache to contend with.
23904e271afSdlg */
24004e271afSdlg
24104e271afSdlg void
percpu_init(void)24204e271afSdlg percpu_init(void)
24304e271afSdlg {
24404e271afSdlg /* nop */
24504e271afSdlg }
24604e271afSdlg
24704e271afSdlg struct cpumem *
cpumem_get(struct pool * pp)24804e271afSdlg cpumem_get(struct pool *pp)
24904e271afSdlg {
2505cffaf98Sdlg return (pool_get(pp, PR_WAITOK | PR_ZERO));
25104e271afSdlg }
25204e271afSdlg
25304e271afSdlg void
cpumem_put(struct pool * pp,struct cpumem * cm)25404e271afSdlg cpumem_put(struct pool *pp, struct cpumem *cm)
25504e271afSdlg {
25604e271afSdlg pool_put(pp, cm);
25704e271afSdlg }
25804e271afSdlg
25904e271afSdlg struct cpumem *
cpumem_malloc(size_t sz,int type)26004e271afSdlg cpumem_malloc(size_t sz, int type)
26104e271afSdlg {
2625cffaf98Sdlg return (malloc(sz, type, M_WAITOK | M_ZERO));
26304e271afSdlg }
26404e271afSdlg
26504e271afSdlg struct cpumem *
cpumem_malloc_ncpus(struct cpumem * cm,size_t sz,int type)266780021b3Sdlg cpumem_malloc_ncpus(struct cpumem *cm, size_t sz, int type)
26704e271afSdlg {
26804e271afSdlg return (cm);
26904e271afSdlg }
27004e271afSdlg
27104e271afSdlg void
cpumem_free(struct cpumem * cm,int type,size_t sz)27204e271afSdlg cpumem_free(struct cpumem *cm, int type, size_t sz)
27304e271afSdlg {
27404e271afSdlg free(cm, type, sz);
27504e271afSdlg }
27604e271afSdlg
2776c8f19e1Sderaadt void *
cpumem_first(struct cpumem_iter * i,struct cpumem * cm)2786c8f19e1Sderaadt cpumem_first(struct cpumem_iter *i, struct cpumem *cm)
2796c8f19e1Sderaadt {
2806c8f19e1Sderaadt return (cm);
2816c8f19e1Sderaadt }
2826c8f19e1Sderaadt
2836c8f19e1Sderaadt void *
cpumem_next(struct cpumem_iter * i,struct cpumem * cm)2846c8f19e1Sderaadt cpumem_next(struct cpumem_iter *i, struct cpumem *cm)
2856c8f19e1Sderaadt {
2866c8f19e1Sderaadt return (NULL);
2876c8f19e1Sderaadt }
2886c8f19e1Sderaadt
28904e271afSdlg struct cpumem *
counters_alloc(unsigned int n)290599d0588Sjca counters_alloc(unsigned int n)
29104e271afSdlg {
29204e271afSdlg KASSERT(n > 0);
29304e271afSdlg
294599d0588Sjca return (cpumem_malloc(n * sizeof(uint64_t), M_COUNTERS));
29504e271afSdlg }
29604e271afSdlg
29704e271afSdlg struct cpumem *
counters_alloc_ncpus(struct cpumem * cm,unsigned int n)298599d0588Sjca counters_alloc_ncpus(struct cpumem *cm, unsigned int n)
29904e271afSdlg {
300678831beSjsg /* this is unnecessary, but symmetrical */
301599d0588Sjca return (cpumem_malloc_ncpus(cm, n * sizeof(uint64_t), M_COUNTERS));
30204e271afSdlg }
30304e271afSdlg
30404e271afSdlg void
counters_free(struct cpumem * cm,unsigned int n)305599d0588Sjca counters_free(struct cpumem *cm, unsigned int n)
30604e271afSdlg {
307599d0588Sjca cpumem_free(cm, M_COUNTERS, n * sizeof(uint64_t));
30804e271afSdlg }
30904e271afSdlg
31004e271afSdlg void
counters_read(struct cpumem * cm,uint64_t * output,unsigned int n,uint64_t * scratch)311*bf0d449cSmpi counters_read(struct cpumem *cm, uint64_t *output, unsigned int n,
312*bf0d449cSmpi uint64_t *scratch)
31304e271afSdlg {
31404e271afSdlg uint64_t *counters;
31504e271afSdlg unsigned int i;
31604e271afSdlg int s;
31704e271afSdlg
31804e271afSdlg counters = (uint64_t *)cm;
31904e271afSdlg
32004e271afSdlg s = splhigh();
32104e271afSdlg for (i = 0; i < n; i++)
32204e271afSdlg output[i] = counters[i];
32304e271afSdlg splx(s);
32404e271afSdlg }
32504e271afSdlg
32604e271afSdlg void
counters_zero(struct cpumem * cm,unsigned int n)32704e271afSdlg counters_zero(struct cpumem *cm, unsigned int n)
32804e271afSdlg {
32904e271afSdlg uint64_t *counters;
33004e271afSdlg unsigned int i;
33104e271afSdlg int s;
33204e271afSdlg
33304e271afSdlg counters = (uint64_t *)cm;
33404e271afSdlg
33504e271afSdlg s = splhigh();
33604e271afSdlg for (i = 0; i < n; i++)
33704e271afSdlg counters[i] = 0;
33804e271afSdlg splx(s);
33904e271afSdlg }
34004e271afSdlg
34104e271afSdlg #endif /* MULTIPROCESSOR */
342