1 /* libbdev - IPC and recovery functions */
2
3 #include <minix/drivers.h>
4 #include <minix/bdev.h>
5 #include <assert.h>
6
7 #include "const.h"
8 #include "type.h"
9 #include "proto.h"
10
bdev_cancel(dev_t dev)11 static void bdev_cancel(dev_t dev)
12 {
13 /* Recovering the driver for the given device has failed repeatedly. Mark it as
14 * permanently unusable, and clean up any associated calls and resources.
15 */
16 bdev_call_t *call, *next;
17
18 printf("bdev: giving up on major %d\n", major(dev));
19
20 /* Cancel all pending asynchronous requests. */
21 call = NULL;
22
23 while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL)
24 bdev_callback_asyn(call, EDEADSRCDST);
25
26 /* Mark the driver as unusable. */
27 bdev_driver_clear(dev);
28 }
29
bdev_recover(dev_t dev,int update_endpt)30 static int bdev_recover(dev_t dev, int update_endpt)
31 {
32 /* The IPC subsystem has signaled an error communicating to the driver
33 * associated with the given device. Try to recover. If 'update_endpt' is set,
34 * we need to find the new endpoint of the driver first. Return TRUE iff
35 * recovery has been successful.
36 */
37 bdev_call_t *call, *next;
38 endpoint_t endpt;
39 int r, active, nr_tries;
40
41 /* Only print output if there is something to recover. Some drivers may be
42 * shut down and later restarted legitimately, and if they were not in use
43 * while that happened, there is no need to flood the console with messages.
44 */
45 active = bdev_minor_is_open(dev) || bdev_call_iter_maj(dev, NULL, &next);
46
47 if (active)
48 printf("bdev: recovering from a driver restart on major %d\n",
49 major(dev));
50
51 for (nr_tries = 0; nr_tries < RECOVER_TRIES; nr_tries++) {
52 /* First update the endpoint, if necessary. */
53 if (update_endpt)
54 (void) bdev_driver_update(dev);
55
56 if ((endpt = bdev_driver_get(dev)) == NONE)
57 break;
58
59 /* If anything goes wrong, update the endpoint again next time. */
60 update_endpt = TRUE;
61
62 /* Reopen all minor devices on the new driver. */
63 if ((r = bdev_minor_reopen(dev)) != OK) {
64 /* If the driver died again, we may give it another try. */
65 if (r == EDEADSRCDST)
66 continue;
67
68 /* If another error occurred, we cannot continue using the
69 * driver as is, but we also cannot force it to restart.
70 */
71 break;
72 }
73
74 /* Resend all asynchronous requests. */
75 call = NULL;
76
77 while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL) {
78 /* It is not strictly necessary that we manage to reissue all
79 * asynchronous requests successfully. We can fail them on an
80 * individual basis here, without affecting the overall
81 * recovery. Note that we will never get new IPC failures here.
82 */
83 if ((r = bdev_restart_asyn(call)) != OK)
84 bdev_callback_asyn(call, r);
85 }
86
87 /* Recovery seems successful. We can now reissue the current
88 * synchronous request (if any), and continue normal operation.
89 */
90 if (active)
91 printf("bdev: recovery successful, new driver at %d\n", endpt);
92
93 return TRUE;
94 }
95
96 /* Recovery failed repeatedly. Give up on this driver. */
97 bdev_cancel(dev);
98
99 return FALSE;
100 }
101
bdev_update(dev_t dev,char * label)102 void bdev_update(dev_t dev, char *label)
103 {
104 /* Set the endpoint for a driver. Perform recovery if necessary.
105 */
106 endpoint_t endpt, old_endpt;
107
108 old_endpt = bdev_driver_get(dev);
109
110 endpt = bdev_driver_set(dev, label);
111
112 /* If updating the driver causes an endpoint change, we need to perform
113 * recovery, but not update the endpoint yet again.
114 */
115 if (old_endpt != NONE && old_endpt != endpt)
116 bdev_recover(dev, FALSE /*update_endpt*/);
117 }
118
bdev_senda(dev_t dev,const message * m_orig,bdev_id_t id)119 int bdev_senda(dev_t dev, const message *m_orig, bdev_id_t id)
120 {
121 /* Send an asynchronous request for the given device. This function will never
122 * get any new IPC errors sending to the driver. If sending an asynchronous
123 * request fails, we will find out through other ways later.
124 */
125 endpoint_t endpt;
126 message m;
127 int r;
128
129 /* If we have no usable driver endpoint, fail instantly. */
130 if ((endpt = bdev_driver_get(dev)) == NONE)
131 return EDEADSRCDST;
132
133 m = *m_orig;
134 m.m_lbdev_lblockdriver_msg.id = id;
135
136 r = asynsend(endpt, &m);
137
138 if (r != OK)
139 printf("bdev: asynsend to driver (%d) failed (%d)\n", endpt, r);
140
141 return r;
142 }
143
bdev_sendrec(dev_t dev,const message * m_orig)144 int bdev_sendrec(dev_t dev, const message *m_orig)
145 {
146 /* Send a synchronous request for the given device, and wait for the reply.
147 * Return ERESTART if the caller should try to reissue the request.
148 */
149 endpoint_t endpt;
150 message m;
151 int r;
152
153 /* If we have no usable driver endpoint, fail instantly. */
154 if ((endpt = bdev_driver_get(dev)) == NONE)
155 return EDEADSRCDST;
156
157 /* Send the request and block until we receive a reply. */
158 m = *m_orig;
159 m.m_lbdev_lblockdriver_msg.id = NO_ID;
160
161 r = ipc_sendrec(endpt, &m);
162
163 /* If communication failed, the driver has died. We assume it will be
164 * restarted soon after, so we attempt recovery. Upon success, we let the
165 * caller reissue the synchronous request.
166 */
167 if (r == EDEADSRCDST) {
168 if (!bdev_recover(dev, TRUE /*update_endpt*/))
169 return EDEADSRCDST;
170
171 return ERESTART;
172 }
173
174 if (r != OK) {
175 printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r);
176 return r;
177 }
178
179 if (m.m_type != BDEV_REPLY) {
180 printf("bdev: driver (%d) sent weird response (%d)\n",
181 endpt, m.m_type);
182 return EINVAL;
183 }
184
185 /* The protocol contract states that no asynchronous reply can satisfy a
186 * synchronous SENDREC call, so we can never get an asynchronous reply here.
187 */
188 if (m.m_lblockdriver_lbdev_reply.id != NO_ID) {
189 printf("bdev: driver (%d) sent invalid ID (%d)\n", endpt,
190 m.m_lblockdriver_lbdev_reply.id);
191 return EINVAL;
192 }
193
194 /* Unless the caller is misusing libbdev, we will only get ERESTART if we
195 * have managed to resend a raw block I/O request to the driver after a
196 * restart, but before VFS has had a chance to reopen the associated device
197 * first. This is highly exceptional, and hard to deal with correctly. We
198 * take the easiest route: sleep for a while so that VFS can reopen the
199 * device, and then resend the request. If the call keeps failing, the caller
200 * will eventually give up.
201 */
202 if (m.m_lblockdriver_lbdev_reply.status == ERESTART) {
203 printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
204 endpt);
205
206 micro_delay(1000);
207
208 return ERESTART;
209 }
210
211 /* Return the result of our request. */
212 return m.m_lblockdriver_lbdev_reply.status;
213 }
214
bdev_receive(dev_t dev,message * m)215 static int bdev_receive(dev_t dev, message *m)
216 {
217 /* Receive one valid message.
218 */
219 endpoint_t endpt;
220 int r, nr_tries = 0;
221
222 for (;;) {
223 /* Retrieve and check the driver endpoint on every try, as it will
224 * change with each driver restart.
225 */
226 if ((endpt = bdev_driver_get(dev)) == NONE)
227 return EDEADSRCDST;
228
229 r = sef_receive(endpt, m);
230
231 if (r == EDEADSRCDST) {
232 /* If we reached the maximum number of retries, give up. */
233 if (++nr_tries == DRIVER_TRIES)
234 break;
235
236 /* Attempt recovery. If successful, all asynchronous requests
237 * will have been resent, and we can retry receiving a reply.
238 */
239 if (!bdev_recover(dev, TRUE /*update_endpt*/))
240 return EDEADSRCDST;
241
242 continue;
243 }
244
245 if (r != OK) {
246 printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r);
247
248 return r;
249 }
250
251 if (m->m_type != BDEV_REPLY) {
252 printf("bdev: driver (%d) sent weird response (%d)\n",
253 endpt, m->m_type);
254 return EINVAL;
255 }
256
257 /* The caller is responsible for checking the ID and status. */
258 return OK;
259 }
260
261 /* All tries failed, even though all recovery attempts succeeded. In this
262 * case, we let the caller recheck whether it wants to keep calling us,
263 * returning ERESTART to indicate we can be called again but did not actually
264 * receive a message.
265 */
266 return ERESTART;
267 }
268
bdev_reply_asyn(message * m)269 void bdev_reply_asyn(message *m)
270 {
271 /* A reply has come in from a disk driver.
272 */
273 bdev_call_t *call;
274 endpoint_t endpt;
275 bdev_id_t id;
276 int r;
277
278 /* This is a requirement for the caller. */
279 assert(m->m_type == BDEV_REPLY);
280
281 /* Get the corresponding asynchronous call structure. */
282 id = m->m_lblockdriver_lbdev_reply.id;
283
284 if ((call = bdev_call_get(id)) == NULL) {
285 printf("bdev: driver (%d) replied to unknown request (%d)\n",
286 m->m_source, m->m_lblockdriver_lbdev_reply.id);
287 return;
288 }
289
290 /* Make sure the reply was sent from the right endpoint. */
291 endpt = bdev_driver_get(call->dev);
292
293 if (m->m_source != endpt) {
294 /* If the endpoint is NONE, this may be a stray reply. */
295 if (endpt != NONE)
296 printf("bdev: driver (%d) replied to request not sent to it\n",
297 m->m_source);
298 return;
299 }
300
301 /* See the ERESTART comment in bdev_sendrec(). */
302 if (m->m_lblockdriver_lbdev_reply.status == ERESTART) {
303 printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
304 endpt);
305
306 micro_delay(1000);
307
308 if ((r = bdev_restart_asyn(call)) != OK)
309 bdev_callback_asyn(call, r);
310
311 return;
312 }
313
314 bdev_callback_asyn(call, m->m_lblockdriver_lbdev_reply.status);
315 }
316
bdev_wait_asyn(bdev_id_t id)317 int bdev_wait_asyn(bdev_id_t id)
318 {
319 /* Wait for an asynchronous request to complete.
320 */
321 bdev_call_t *call;
322 dev_t dev;
323 message m;
324 int r;
325
326 if ((call = bdev_call_get(id)) == NULL)
327 return ENOENT;
328
329 dev = call->dev;
330
331 do {
332 if ((r = bdev_receive(dev, &m)) != OK && r != ERESTART)
333 return r;
334
335 /* Processing the reply will free up the call structure as a side
336 * effect. If we repeatedly get ERESTART, we will repeatedly resend the
337 * asynchronous request, which will then eventually hit the retry limit
338 * and we will break out of the loop.
339 */
340 if (r == OK)
341 bdev_reply_asyn(&m);
342
343 } while (bdev_call_get(id) != NULL);
344
345 return OK;
346 }
347