xref: /minix3/minix/lib/libbdev/ipc.c (revision 433d6423c39e34ec4b79c950597bb2d236f886be)
1*433d6423SLionel Sambuc /* libbdev - IPC and recovery functions */
2*433d6423SLionel Sambuc 
3*433d6423SLionel Sambuc #include <minix/drivers.h>
4*433d6423SLionel Sambuc #include <minix/bdev.h>
5*433d6423SLionel Sambuc #include <assert.h>
6*433d6423SLionel Sambuc 
7*433d6423SLionel Sambuc #include "const.h"
8*433d6423SLionel Sambuc #include "type.h"
9*433d6423SLionel Sambuc #include "proto.h"
10*433d6423SLionel Sambuc 
bdev_cancel(dev_t dev)11*433d6423SLionel Sambuc static void bdev_cancel(dev_t dev)
12*433d6423SLionel Sambuc {
13*433d6423SLionel Sambuc /* Recovering the driver for the given device has failed repeatedly. Mark it as
14*433d6423SLionel Sambuc  * permanently unusable, and clean up any associated calls and resources.
15*433d6423SLionel Sambuc  */
16*433d6423SLionel Sambuc   bdev_call_t *call, *next;
17*433d6423SLionel Sambuc 
18*433d6423SLionel Sambuc   printf("bdev: giving up on major %d\n", major(dev));
19*433d6423SLionel Sambuc 
20*433d6423SLionel Sambuc   /* Cancel all pending asynchronous requests. */
21*433d6423SLionel Sambuc   call = NULL;
22*433d6423SLionel Sambuc 
23*433d6423SLionel Sambuc   while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL)
24*433d6423SLionel Sambuc 	bdev_callback_asyn(call, EDEADSRCDST);
25*433d6423SLionel Sambuc 
26*433d6423SLionel Sambuc   /* Mark the driver as unusable. */
27*433d6423SLionel Sambuc   bdev_driver_clear(dev);
28*433d6423SLionel Sambuc }
29*433d6423SLionel Sambuc 
bdev_recover(dev_t dev,int update_endpt)30*433d6423SLionel Sambuc static int bdev_recover(dev_t dev, int update_endpt)
31*433d6423SLionel Sambuc {
32*433d6423SLionel Sambuc /* The IPC subsystem has signaled an error communicating to the driver
33*433d6423SLionel Sambuc  * associated with the given device. Try to recover. If 'update_endpt' is set,
34*433d6423SLionel Sambuc  * we need to find the new endpoint of the driver first. Return TRUE iff
35*433d6423SLionel Sambuc  * recovery has been successful.
36*433d6423SLionel Sambuc  */
37*433d6423SLionel Sambuc   bdev_call_t *call, *next;
38*433d6423SLionel Sambuc   endpoint_t endpt;
39*433d6423SLionel Sambuc   int r, active, nr_tries;
40*433d6423SLionel Sambuc 
41*433d6423SLionel Sambuc   /* Only print output if there is something to recover. Some drivers may be
42*433d6423SLionel Sambuc    * shut down and later restarted legitimately, and if they were not in use
43*433d6423SLionel Sambuc    * while that happened, there is no need to flood the console with messages.
44*433d6423SLionel Sambuc    */
45*433d6423SLionel Sambuc   active = bdev_minor_is_open(dev) || bdev_call_iter_maj(dev, NULL, &next);
46*433d6423SLionel Sambuc 
47*433d6423SLionel Sambuc   if (active)
48*433d6423SLionel Sambuc 	printf("bdev: recovering from a driver restart on major %d\n",
49*433d6423SLionel Sambuc 		major(dev));
50*433d6423SLionel Sambuc 
51*433d6423SLionel Sambuc   for (nr_tries = 0; nr_tries < RECOVER_TRIES; nr_tries++) {
52*433d6423SLionel Sambuc 	/* First update the endpoint, if necessary. */
53*433d6423SLionel Sambuc 	if (update_endpt)
54*433d6423SLionel Sambuc 		(void) bdev_driver_update(dev);
55*433d6423SLionel Sambuc 
56*433d6423SLionel Sambuc 	if ((endpt = bdev_driver_get(dev)) == NONE)
57*433d6423SLionel Sambuc 		break;
58*433d6423SLionel Sambuc 
59*433d6423SLionel Sambuc 	/* If anything goes wrong, update the endpoint again next time. */
60*433d6423SLionel Sambuc 	update_endpt = TRUE;
61*433d6423SLionel Sambuc 
62*433d6423SLionel Sambuc 	/* Reopen all minor devices on the new driver. */
63*433d6423SLionel Sambuc 	if ((r = bdev_minor_reopen(dev)) != OK) {
64*433d6423SLionel Sambuc 		/* If the driver died again, we may give it another try. */
65*433d6423SLionel Sambuc 		if (r == EDEADSRCDST)
66*433d6423SLionel Sambuc 			continue;
67*433d6423SLionel Sambuc 
68*433d6423SLionel Sambuc 		/* If another error occurred, we cannot continue using the
69*433d6423SLionel Sambuc 		 * driver as is, but we also cannot force it to restart.
70*433d6423SLionel Sambuc 		 */
71*433d6423SLionel Sambuc 		break;
72*433d6423SLionel Sambuc 	}
73*433d6423SLionel Sambuc 
74*433d6423SLionel Sambuc 	/* Resend all asynchronous requests. */
75*433d6423SLionel Sambuc 	call = NULL;
76*433d6423SLionel Sambuc 
77*433d6423SLionel Sambuc 	while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL) {
78*433d6423SLionel Sambuc 		/* It is not strictly necessary that we manage to reissue all
79*433d6423SLionel Sambuc 		 * asynchronous requests successfully. We can fail them on an
80*433d6423SLionel Sambuc 		 * individual basis here, without affecting the overall
81*433d6423SLionel Sambuc 		 * recovery. Note that we will never get new IPC failures here.
82*433d6423SLionel Sambuc 		 */
83*433d6423SLionel Sambuc 		if ((r = bdev_restart_asyn(call)) != OK)
84*433d6423SLionel Sambuc 			bdev_callback_asyn(call, r);
85*433d6423SLionel Sambuc 	}
86*433d6423SLionel Sambuc 
87*433d6423SLionel Sambuc 	/* Recovery seems successful. We can now reissue the current
88*433d6423SLionel Sambuc 	 * synchronous request (if any), and continue normal operation.
89*433d6423SLionel Sambuc 	 */
90*433d6423SLionel Sambuc 	if (active)
91*433d6423SLionel Sambuc 		printf("bdev: recovery successful, new driver at %d\n", endpt);
92*433d6423SLionel Sambuc 
93*433d6423SLionel Sambuc 	return TRUE;
94*433d6423SLionel Sambuc   }
95*433d6423SLionel Sambuc 
96*433d6423SLionel Sambuc   /* Recovery failed repeatedly. Give up on this driver. */
97*433d6423SLionel Sambuc   bdev_cancel(dev);
98*433d6423SLionel Sambuc 
99*433d6423SLionel Sambuc   return FALSE;
100*433d6423SLionel Sambuc }
101*433d6423SLionel Sambuc 
bdev_update(dev_t dev,char * label)102*433d6423SLionel Sambuc void bdev_update(dev_t dev, char *label)
103*433d6423SLionel Sambuc {
104*433d6423SLionel Sambuc /* Set the endpoint for a driver. Perform recovery if necessary.
105*433d6423SLionel Sambuc  */
106*433d6423SLionel Sambuc   endpoint_t endpt, old_endpt;
107*433d6423SLionel Sambuc 
108*433d6423SLionel Sambuc   old_endpt = bdev_driver_get(dev);
109*433d6423SLionel Sambuc 
110*433d6423SLionel Sambuc   endpt = bdev_driver_set(dev, label);
111*433d6423SLionel Sambuc 
112*433d6423SLionel Sambuc   /* If updating the driver causes an endpoint change, we need to perform
113*433d6423SLionel Sambuc    * recovery, but not update the endpoint yet again.
114*433d6423SLionel Sambuc    */
115*433d6423SLionel Sambuc   if (old_endpt != NONE && old_endpt != endpt)
116*433d6423SLionel Sambuc 	bdev_recover(dev, FALSE /*update_endpt*/);
117*433d6423SLionel Sambuc }
118*433d6423SLionel Sambuc 
bdev_senda(dev_t dev,const message * m_orig,bdev_id_t id)119*433d6423SLionel Sambuc int bdev_senda(dev_t dev, const message *m_orig, bdev_id_t id)
120*433d6423SLionel Sambuc {
121*433d6423SLionel Sambuc /* Send an asynchronous request for the given device. This function will never
122*433d6423SLionel Sambuc  * get any new IPC errors sending to the driver. If sending an asynchronous
123*433d6423SLionel Sambuc  * request fails, we will find out through other ways later.
124*433d6423SLionel Sambuc  */
125*433d6423SLionel Sambuc   endpoint_t endpt;
126*433d6423SLionel Sambuc   message m;
127*433d6423SLionel Sambuc   int r;
128*433d6423SLionel Sambuc 
129*433d6423SLionel Sambuc   /* If we have no usable driver endpoint, fail instantly. */
130*433d6423SLionel Sambuc   if ((endpt = bdev_driver_get(dev)) == NONE)
131*433d6423SLionel Sambuc 	return EDEADSRCDST;
132*433d6423SLionel Sambuc 
133*433d6423SLionel Sambuc   m = *m_orig;
134*433d6423SLionel Sambuc   m.m_lbdev_lblockdriver_msg.id = id;
135*433d6423SLionel Sambuc 
136*433d6423SLionel Sambuc   r = asynsend(endpt, &m);
137*433d6423SLionel Sambuc 
138*433d6423SLionel Sambuc   if (r != OK)
139*433d6423SLionel Sambuc 	printf("bdev: asynsend to driver (%d) failed (%d)\n", endpt, r);
140*433d6423SLionel Sambuc 
141*433d6423SLionel Sambuc   return r;
142*433d6423SLionel Sambuc }
143*433d6423SLionel Sambuc 
bdev_sendrec(dev_t dev,const message * m_orig)144*433d6423SLionel Sambuc int bdev_sendrec(dev_t dev, const message *m_orig)
145*433d6423SLionel Sambuc {
146*433d6423SLionel Sambuc /* Send a synchronous request for the given device, and wait for the reply.
147*433d6423SLionel Sambuc  * Return ERESTART if the caller should try to reissue the request.
148*433d6423SLionel Sambuc  */
149*433d6423SLionel Sambuc   endpoint_t endpt;
150*433d6423SLionel Sambuc   message m;
151*433d6423SLionel Sambuc   int r;
152*433d6423SLionel Sambuc 
153*433d6423SLionel Sambuc   /* If we have no usable driver endpoint, fail instantly. */
154*433d6423SLionel Sambuc   if ((endpt = bdev_driver_get(dev)) == NONE)
155*433d6423SLionel Sambuc 	return EDEADSRCDST;
156*433d6423SLionel Sambuc 
157*433d6423SLionel Sambuc   /* Send the request and block until we receive a reply. */
158*433d6423SLionel Sambuc   m = *m_orig;
159*433d6423SLionel Sambuc   m.m_lbdev_lblockdriver_msg.id = NO_ID;
160*433d6423SLionel Sambuc 
161*433d6423SLionel Sambuc   r = ipc_sendrec(endpt, &m);
162*433d6423SLionel Sambuc 
163*433d6423SLionel Sambuc   /* If communication failed, the driver has died. We assume it will be
164*433d6423SLionel Sambuc    * restarted soon after, so we attempt recovery. Upon success, we let the
165*433d6423SLionel Sambuc    * caller reissue the synchronous request.
166*433d6423SLionel Sambuc    */
167*433d6423SLionel Sambuc   if (r == EDEADSRCDST) {
168*433d6423SLionel Sambuc 	if (!bdev_recover(dev, TRUE /*update_endpt*/))
169*433d6423SLionel Sambuc 		return EDEADSRCDST;
170*433d6423SLionel Sambuc 
171*433d6423SLionel Sambuc 	return ERESTART;
172*433d6423SLionel Sambuc   }
173*433d6423SLionel Sambuc 
174*433d6423SLionel Sambuc   if (r != OK) {
175*433d6423SLionel Sambuc 	printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r);
176*433d6423SLionel Sambuc 	return r;
177*433d6423SLionel Sambuc   }
178*433d6423SLionel Sambuc 
179*433d6423SLionel Sambuc   if (m.m_type != BDEV_REPLY) {
180*433d6423SLionel Sambuc 	printf("bdev: driver (%d) sent weird response (%d)\n",
181*433d6423SLionel Sambuc 		endpt, m.m_type);
182*433d6423SLionel Sambuc 	return EINVAL;
183*433d6423SLionel Sambuc   }
184*433d6423SLionel Sambuc 
185*433d6423SLionel Sambuc   /* The protocol contract states that no asynchronous reply can satisfy a
186*433d6423SLionel Sambuc    * synchronous SENDREC call, so we can never get an asynchronous reply here.
187*433d6423SLionel Sambuc    */
188*433d6423SLionel Sambuc   if (m.m_lblockdriver_lbdev_reply.id != NO_ID) {
189*433d6423SLionel Sambuc 	printf("bdev: driver (%d) sent invalid ID (%d)\n", endpt,
190*433d6423SLionel Sambuc 		m.m_lblockdriver_lbdev_reply.id);
191*433d6423SLionel Sambuc 	return EINVAL;
192*433d6423SLionel Sambuc   }
193*433d6423SLionel Sambuc 
194*433d6423SLionel Sambuc   /* Unless the caller is misusing libbdev, we will only get ERESTART if we
195*433d6423SLionel Sambuc    * have managed to resend a raw block I/O request to the driver after a
196*433d6423SLionel Sambuc    * restart, but before VFS has had a chance to reopen the associated device
197*433d6423SLionel Sambuc    * first. This is highly exceptional, and hard to deal with correctly. We
198*433d6423SLionel Sambuc    * take the easiest route: sleep for a while so that VFS can reopen the
199*433d6423SLionel Sambuc    * device, and then resend the request. If the call keeps failing, the caller
200*433d6423SLionel Sambuc    * will eventually give up.
201*433d6423SLionel Sambuc    */
202*433d6423SLionel Sambuc   if (m.m_lblockdriver_lbdev_reply.status == ERESTART) {
203*433d6423SLionel Sambuc 	printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
204*433d6423SLionel Sambuc 		endpt);
205*433d6423SLionel Sambuc 
206*433d6423SLionel Sambuc 	micro_delay(1000);
207*433d6423SLionel Sambuc 
208*433d6423SLionel Sambuc 	return ERESTART;
209*433d6423SLionel Sambuc   }
210*433d6423SLionel Sambuc 
211*433d6423SLionel Sambuc   /* Return the result of our request. */
212*433d6423SLionel Sambuc   return m.m_lblockdriver_lbdev_reply.status;
213*433d6423SLionel Sambuc }
214*433d6423SLionel Sambuc 
bdev_receive(dev_t dev,message * m)215*433d6423SLionel Sambuc static int bdev_receive(dev_t dev, message *m)
216*433d6423SLionel Sambuc {
217*433d6423SLionel Sambuc /* Receive one valid message.
218*433d6423SLionel Sambuc  */
219*433d6423SLionel Sambuc   endpoint_t endpt;
220*433d6423SLionel Sambuc   int r, nr_tries = 0;
221*433d6423SLionel Sambuc 
222*433d6423SLionel Sambuc   for (;;) {
223*433d6423SLionel Sambuc 	/* Retrieve and check the driver endpoint on every try, as it will
224*433d6423SLionel Sambuc 	 * change with each driver restart.
225*433d6423SLionel Sambuc 	 */
226*433d6423SLionel Sambuc 	if ((endpt = bdev_driver_get(dev)) == NONE)
227*433d6423SLionel Sambuc 		return EDEADSRCDST;
228*433d6423SLionel Sambuc 
229*433d6423SLionel Sambuc 	r = sef_receive(endpt, m);
230*433d6423SLionel Sambuc 
231*433d6423SLionel Sambuc 	if (r == EDEADSRCDST) {
232*433d6423SLionel Sambuc 		/* If we reached the maximum number of retries, give up. */
233*433d6423SLionel Sambuc 		if (++nr_tries == DRIVER_TRIES)
234*433d6423SLionel Sambuc 			break;
235*433d6423SLionel Sambuc 
236*433d6423SLionel Sambuc 		/* Attempt recovery. If successful, all asynchronous requests
237*433d6423SLionel Sambuc 		 * will have been resent, and we can retry receiving a reply.
238*433d6423SLionel Sambuc 		 */
239*433d6423SLionel Sambuc 		if (!bdev_recover(dev, TRUE /*update_endpt*/))
240*433d6423SLionel Sambuc 			return EDEADSRCDST;
241*433d6423SLionel Sambuc 
242*433d6423SLionel Sambuc 		continue;
243*433d6423SLionel Sambuc 	}
244*433d6423SLionel Sambuc 
245*433d6423SLionel Sambuc 	if (r != OK) {
246*433d6423SLionel Sambuc 		printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r);
247*433d6423SLionel Sambuc 
248*433d6423SLionel Sambuc 		return r;
249*433d6423SLionel Sambuc 	}
250*433d6423SLionel Sambuc 
251*433d6423SLionel Sambuc 	if (m->m_type != BDEV_REPLY) {
252*433d6423SLionel Sambuc 		printf("bdev: driver (%d) sent weird response (%d)\n",
253*433d6423SLionel Sambuc 			endpt, m->m_type);
254*433d6423SLionel Sambuc 		return EINVAL;
255*433d6423SLionel Sambuc 	}
256*433d6423SLionel Sambuc 
257*433d6423SLionel Sambuc 	/* The caller is responsible for checking the ID and status. */
258*433d6423SLionel Sambuc 	return OK;
259*433d6423SLionel Sambuc   }
260*433d6423SLionel Sambuc 
261*433d6423SLionel Sambuc   /* All tries failed, even though all recovery attempts succeeded. In this
262*433d6423SLionel Sambuc    * case, we let the caller recheck whether it wants to keep calling us,
263*433d6423SLionel Sambuc    * returning ERESTART to indicate we can be called again but did not actually
264*433d6423SLionel Sambuc    * receive a message.
265*433d6423SLionel Sambuc    */
266*433d6423SLionel Sambuc   return ERESTART;
267*433d6423SLionel Sambuc }
268*433d6423SLionel Sambuc 
bdev_reply_asyn(message * m)269*433d6423SLionel Sambuc void bdev_reply_asyn(message *m)
270*433d6423SLionel Sambuc {
271*433d6423SLionel Sambuc /* A reply has come in from a disk driver.
272*433d6423SLionel Sambuc  */
273*433d6423SLionel Sambuc   bdev_call_t *call;
274*433d6423SLionel Sambuc   endpoint_t endpt;
275*433d6423SLionel Sambuc   bdev_id_t id;
276*433d6423SLionel Sambuc   int r;
277*433d6423SLionel Sambuc 
278*433d6423SLionel Sambuc   /* This is a requirement for the caller. */
279*433d6423SLionel Sambuc   assert(m->m_type == BDEV_REPLY);
280*433d6423SLionel Sambuc 
281*433d6423SLionel Sambuc   /* Get the corresponding asynchronous call structure. */
282*433d6423SLionel Sambuc   id = m->m_lblockdriver_lbdev_reply.id;
283*433d6423SLionel Sambuc 
284*433d6423SLionel Sambuc   if ((call = bdev_call_get(id)) == NULL) {
285*433d6423SLionel Sambuc 	printf("bdev: driver (%d) replied to unknown request (%d)\n",
286*433d6423SLionel Sambuc 		m->m_source, m->m_lblockdriver_lbdev_reply.id);
287*433d6423SLionel Sambuc 	return;
288*433d6423SLionel Sambuc   }
289*433d6423SLionel Sambuc 
290*433d6423SLionel Sambuc   /* Make sure the reply was sent from the right endpoint. */
291*433d6423SLionel Sambuc   endpt = bdev_driver_get(call->dev);
292*433d6423SLionel Sambuc 
293*433d6423SLionel Sambuc   if (m->m_source != endpt) {
294*433d6423SLionel Sambuc 	/* If the endpoint is NONE, this may be a stray reply. */
295*433d6423SLionel Sambuc 	if (endpt != NONE)
296*433d6423SLionel Sambuc 		printf("bdev: driver (%d) replied to request not sent to it\n",
297*433d6423SLionel Sambuc 			m->m_source);
298*433d6423SLionel Sambuc 	return;
299*433d6423SLionel Sambuc   }
300*433d6423SLionel Sambuc 
301*433d6423SLionel Sambuc   /* See the ERESTART comment in bdev_sendrec(). */
302*433d6423SLionel Sambuc   if (m->m_lblockdriver_lbdev_reply.status == ERESTART) {
303*433d6423SLionel Sambuc 	printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
304*433d6423SLionel Sambuc 		endpt);
305*433d6423SLionel Sambuc 
306*433d6423SLionel Sambuc 	micro_delay(1000);
307*433d6423SLionel Sambuc 
308*433d6423SLionel Sambuc 	if ((r = bdev_restart_asyn(call)) != OK)
309*433d6423SLionel Sambuc 		bdev_callback_asyn(call, r);
310*433d6423SLionel Sambuc 
311*433d6423SLionel Sambuc 	return;
312*433d6423SLionel Sambuc   }
313*433d6423SLionel Sambuc 
314*433d6423SLionel Sambuc   bdev_callback_asyn(call, m->m_lblockdriver_lbdev_reply.status);
315*433d6423SLionel Sambuc }
316*433d6423SLionel Sambuc 
bdev_wait_asyn(bdev_id_t id)317*433d6423SLionel Sambuc int bdev_wait_asyn(bdev_id_t id)
318*433d6423SLionel Sambuc {
319*433d6423SLionel Sambuc /* Wait for an asynchronous request to complete.
320*433d6423SLionel Sambuc  */
321*433d6423SLionel Sambuc   bdev_call_t *call;
322*433d6423SLionel Sambuc   dev_t dev;
323*433d6423SLionel Sambuc   message m;
324*433d6423SLionel Sambuc   int r;
325*433d6423SLionel Sambuc 
326*433d6423SLionel Sambuc   if ((call = bdev_call_get(id)) == NULL)
327*433d6423SLionel Sambuc 	return ENOENT;
328*433d6423SLionel Sambuc 
329*433d6423SLionel Sambuc   dev = call->dev;
330*433d6423SLionel Sambuc 
331*433d6423SLionel Sambuc   do {
332*433d6423SLionel Sambuc 	if ((r = bdev_receive(dev, &m)) != OK && r != ERESTART)
333*433d6423SLionel Sambuc 		return r;
334*433d6423SLionel Sambuc 
335*433d6423SLionel Sambuc 	/* Processing the reply will free up the call structure as a side
336*433d6423SLionel Sambuc 	 * effect. If we repeatedly get ERESTART, we will repeatedly resend the
337*433d6423SLionel Sambuc 	 * asynchronous request, which will then eventually hit the retry limit
338*433d6423SLionel Sambuc 	 * and we will break out of the loop.
339*433d6423SLionel Sambuc 	 */
340*433d6423SLionel Sambuc 	if (r == OK)
341*433d6423SLionel Sambuc 		bdev_reply_asyn(&m);
342*433d6423SLionel Sambuc 
343*433d6423SLionel Sambuc   } while (bdev_call_get(id) != NULL);
344*433d6423SLionel Sambuc 
345*433d6423SLionel Sambuc   return OK;
346*433d6423SLionel Sambuc }
347