1 /*-
2 * Copyright (c) 2021 The FreeBSD Foundation
3 *
4 * This software were developed by Konstantin Belousov <kib@FreeBSD.org>
5 * under sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/cpuset.h>
32 #include <sys/lock.h>
33 #include <sys/malloc.h>
34 #include <sys/membarrier.h>
35 #include <sys/mutex.h>
36 #include <sys/proc.h>
37 #include <sys/sched.h>
38 #include <sys/smp.h>
39 #include <sys/syscallsubr.h>
40 #include <sys/sysproto.h>
41
42 #include <vm/vm_param.h>
43 #include <vm/vm.h>
44 #include <vm/pmap.h>
45 #include <vm/vm_map.h>
46
47 #define MEMBARRIER_SUPPORTED_CMDS ( \
48 MEMBARRIER_CMD_GLOBAL | \
49 MEMBARRIER_CMD_GLOBAL_EXPEDITED | \
50 MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED | \
51 MEMBARRIER_CMD_PRIVATE_EXPEDITED | \
52 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED | \
53 MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE | \
54 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
55
56 static void
membarrier_action_seqcst(void * arg __unused)57 membarrier_action_seqcst(void *arg __unused)
58 {
59 atomic_thread_fence_seq_cst();
60 }
61
62 static void
membarrier_action_seqcst_sync_core(void * arg __unused)63 membarrier_action_seqcst_sync_core(void *arg __unused)
64 {
65 atomic_thread_fence_seq_cst();
66 cpu_sync_core();
67 }
68
69 static void
do_membarrier_ipi(cpuset_t * csp,void (* func)(void *))70 do_membarrier_ipi(cpuset_t *csp, void (*func)(void *))
71 {
72 atomic_thread_fence_seq_cst();
73 smp_rendezvous_cpus(*csp, smp_no_rendezvous_barrier, func,
74 smp_no_rendezvous_barrier, NULL);
75 atomic_thread_fence_seq_cst();
76 }
77
78 static void
check_cpu_switched(int c,cpuset_t * csp,uint64_t * swt,bool init)79 check_cpu_switched(int c, cpuset_t *csp, uint64_t *swt, bool init)
80 {
81 struct pcpu *pc;
82 uint64_t sw;
83
84 if (CPU_ISSET(c, csp))
85 return;
86
87 pc = cpuid_to_pcpu[c];
88 if (pc->pc_curthread == pc->pc_idlethread) {
89 CPU_SET(c, csp);
90 return;
91 }
92
93 /*
94 * Sync with context switch to ensure that override of
95 * pc_curthread with non-idle thread pointer is visible before
96 * reading of pc_switchtime.
97 */
98 atomic_thread_fence_acq();
99
100 sw = pc->pc_switchtime;
101 if (init)
102 swt[c] = sw;
103 else if (sw != swt[c])
104 CPU_SET(c, csp);
105 }
106
107 /*
108 *
109 * XXXKIB: We execute the requested action (seq_cst and possibly
110 * sync_core) on current CPU as well. There is no guarantee that
111 * current thread executes anything with the full fence semantics
112 * during syscall execution. Similarly, cpu_core_sync() semantics
113 * might be not provided by the syscall return. E.g. on amd64 we
114 * typically return without IRET.
115 */
116 int
kern_membarrier(struct thread * td,int cmd,unsigned flags,int cpu_id)117 kern_membarrier(struct thread *td, int cmd, unsigned flags, int cpu_id)
118 {
119 struct proc *p, *p1;
120 struct thread *td1;
121 cpuset_t cs;
122 uint64_t *swt;
123 int c, error;
124 bool first;
125
126 if (flags != 0 || (cmd & ~MEMBARRIER_SUPPORTED_CMDS) != 0)
127 return (EINVAL);
128
129 if (cmd == MEMBARRIER_CMD_QUERY) {
130 td->td_retval[0] = MEMBARRIER_SUPPORTED_CMDS;
131 return (0);
132 }
133
134 p = td->td_proc;
135 error = 0;
136
137 switch (cmd) {
138 case MEMBARRIER_CMD_GLOBAL:
139 swt = malloc((mp_maxid + 1) * sizeof(*swt), M_TEMP, M_WAITOK);
140 CPU_ZERO(&cs);
141 sched_pin();
142 CPU_SET(PCPU_GET(cpuid), &cs);
143 for (first = true; error == 0; first = false) {
144 CPU_FOREACH(c)
145 check_cpu_switched(c, &cs, swt, first);
146 if (CPU_CMP(&cs, &all_cpus) == 0)
147 break;
148 error = pause_sig("mmbr", 1);
149 if (error == EWOULDBLOCK)
150 error = 0;
151 }
152 sched_unpin();
153 free(swt, M_TEMP);
154 atomic_thread_fence_seq_cst();
155 break;
156
157 case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
158 if ((td->td_proc->p_flag2 & P2_MEMBAR_GLOBE) == 0) {
159 error = EPERM;
160 } else {
161 CPU_ZERO(&cs);
162 CPU_FOREACH(c) {
163 td1 = cpuid_to_pcpu[c]->pc_curthread;
164 p1 = td1->td_proc;
165 if (p1 != NULL &&
166 (p1->p_flag2 & P2_MEMBAR_GLOBE) != 0)
167 CPU_SET(c, &cs);
168 }
169 do_membarrier_ipi(&cs, membarrier_action_seqcst);
170 }
171 break;
172
173 case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
174 if ((p->p_flag2 & P2_MEMBAR_GLOBE) == 0) {
175 PROC_LOCK(p);
176 p->p_flag2 |= P2_MEMBAR_GLOBE;
177 PROC_UNLOCK(p);
178 }
179 break;
180
181 case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
182 if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE) == 0) {
183 error = EPERM;
184 } else {
185 pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
186 do_membarrier_ipi(&cs, membarrier_action_seqcst);
187 }
188 break;
189
190 case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
191 if ((p->p_flag2 & P2_MEMBAR_PRIVE) == 0) {
192 PROC_LOCK(p);
193 p->p_flag2 |= P2_MEMBAR_PRIVE;
194 PROC_UNLOCK(p);
195 }
196 break;
197
198 case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
199 if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) {
200 error = EPERM;
201 } else {
202 /*
203 * Calculating the IPI multicast mask from
204 * pmap active mask means that we do not call
205 * cpu_sync_core() on CPUs that were missed
206 * from pmap active mask but could be switched
207 * from or to meantime. This is fine at least
208 * on amd64 because threads always use slow
209 * (IRETQ) path to return from syscall after
210 * context switch.
211 */
212 pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
213
214 do_membarrier_ipi(&cs,
215 membarrier_action_seqcst_sync_core);
216 }
217 break;
218
219 case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
220 if ((p->p_flag2 & P2_MEMBAR_PRIVE_SYNCORE) == 0) {
221 PROC_LOCK(p);
222 p->p_flag2 |= P2_MEMBAR_PRIVE_SYNCORE;
223 PROC_UNLOCK(p);
224 }
225 break;
226
227 default:
228 error = EINVAL;
229 break;
230 }
231
232 return (error);
233 }
234
235 int
sys_membarrier(struct thread * td,struct membarrier_args * uap)236 sys_membarrier(struct thread *td, struct membarrier_args *uap)
237 {
238 return (kern_membarrier(td, uap->cmd, uap->flags, uap->cpu_id));
239 }
240