xref: /minix3/minix/drivers/storage/filter/driver.c (revision 433d6423c39e34ec4b79c950597bb2d236f886be)
1 /* Filter driver - lowest layer - disk driver management */
2 
3 #include "inc.h"
4 
5 /* Drivers. */
6 static struct driverinfo driver[2];
7 
8 /* State variables. */
9 static asynmsg_t amsgtable[2];
10 
11 static int size_known = 0;
12 static u64_t disk_size;
13 
14 static int problem_stats[BD_LAST] = { 0 };
15 
16 /*===========================================================================*
17  *				driver_open				     *
18  *===========================================================================*/
driver_open(int which)19 static int driver_open(int which)
20 {
21 	/* Perform an open or close operation on the driver. This is
22 	 * unfinished code: we should never be doing a blocking ipc_sendrec()
23          * to the driver.
24 	 */
25 	message msg;
26 	cp_grant_id_t gid;
27 	struct part_geom part;
28 	sector_t sectors;
29 	int r;
30 
31 	memset(&msg, 0, sizeof(msg));
32 	msg.m_type = BDEV_OPEN;
33 	msg.m_lbdev_lblockdriver_msg.minor = driver[which].minor;
34 	msg.m_lbdev_lblockdriver_msg.access = BDEV_R_BIT | BDEV_W_BIT;
35 	msg.m_lbdev_lblockdriver_msg.id = 0;
36 	r = ipc_sendrec(driver[which].endpt, &msg);
37 
38 	if (r != OK) {
39 		/* Should we restart the driver now? */
40 		printf("Filter: driver_open: ipc_sendrec returned %d\n", r);
41 
42 		return RET_REDO;
43 	}
44 
45 	if(msg.m_type != BDEV_REPLY ||
46 		msg.m_lblockdriver_lbdev_reply.status != OK) {
47 		printf("Filter: driver_open: ipc_sendrec returned %d, %d\n",
48 			msg.m_type, msg.m_lblockdriver_lbdev_reply.status);
49 
50 		return RET_REDO;
51 	}
52 
53 	/* Take the opportunity to retrieve the hard disk size. */
54 	gid = cpf_grant_direct(driver[which].endpt,
55 		(vir_bytes) &part, sizeof(part), CPF_WRITE);
56 	if(!GRANT_VALID(gid))
57 		panic("invalid grant: %d", gid);
58 
59 	memset(&msg, 0, sizeof(msg));
60 	msg.m_type = BDEV_IOCTL;
61 	msg.m_lbdev_lblockdriver_msg.minor = driver[which].minor;
62 	msg.m_lbdev_lblockdriver_msg.request = DIOCGETP;
63 	msg.m_lbdev_lblockdriver_msg.grant = gid;
64 	msg.m_lbdev_lblockdriver_msg.user = NONE;
65 	msg.m_lbdev_lblockdriver_msg.id = 0;
66 
67 	r = ipc_sendrec(driver[which].endpt, &msg);
68 
69 	cpf_revoke(gid);
70 
71 	if (r != OK || msg.m_type != BDEV_REPLY ||
72 		msg.m_lblockdriver_lbdev_reply.status != OK) {
73 		/* Not sure what to do here, either. */
74 		printf("Filter: ioctl(DIOCGETP) returned (%d, %d)\n",
75 			r, msg.m_type);
76 
77 		return RET_REDO;
78 	}
79 
80 	if(!size_known) {
81 		disk_size = part.size;
82 		size_known = 1;
83 		sectors = (unsigned long)(disk_size / SECTOR_SIZE);
84 		if ((u64_t)sectors * SECTOR_SIZE != disk_size) {
85 			printf("Filter: partition too large\n");
86 
87 			return RET_REDO;
88 		}
89 #if DEBUG
90 		printf("Filter: partition size: 0x%"PRIx64" / %lu sectors\n",
91 			disk_size, sectors);
92 #endif
93 	} else {
94 		if (disk_size != part.size) {
95 			printf("Filter: partition size mismatch "
96 				"(0x%"PRIx64" != 0x%"PRIx64")\n",
97 				part.size, disk_size);
98 
99 			return RET_REDO;
100 		}
101 	}
102 
103 	return OK;
104 }
105 
106 /*===========================================================================*
107  *				driver_close				     *
108  *===========================================================================*/
driver_close(int which)109 static int driver_close(int which)
110 {
111 	message msg;
112 	int r;
113 
114 	memset(&msg, 0, sizeof(msg));
115 	msg.m_type = BDEV_CLOSE;
116 	msg.m_lbdev_lblockdriver_msg.minor = driver[which].minor;
117 	msg.m_lbdev_lblockdriver_msg.id = 0;
118 	r = ipc_sendrec(driver[which].endpt, &msg);
119 
120 	if (r != OK) {
121 		/* Should we restart the driver now? */
122 		printf("Filter: driver_close: ipc_sendrec returned %d\n", r);
123 
124 		return RET_REDO;
125 	}
126 
127 	if(msg.m_type != BDEV_REPLY ||
128 		msg.m_lblockdriver_lbdev_reply.status != OK) {
129 		printf("Filter: driver_close: ipc_sendrec returned %d, %d\n",
130 			msg.m_type, msg.m_lblockdriver_lbdev_reply.status);
131 
132 		return RET_REDO;
133 	}
134 
135 	return OK;
136 }
137 
138 /*===========================================================================*
139  *				driver_init				     *
140  *===========================================================================*/
driver_init(void)141 void driver_init(void)
142 {
143 	/* Initialize the driver layer. */
144 	int r;
145 
146 	memset(driver, 0, sizeof(driver));
147 
148 	/* Endpoints unknown. */
149 	driver[DRIVER_MAIN].endpt = NONE;
150 	driver[DRIVER_BACKUP].endpt = NONE;
151 
152 	/* Get disk driver's and this proc's endpoint. */
153 	driver[DRIVER_MAIN].label = MAIN_LABEL;
154 	driver[DRIVER_MAIN].minor = MAIN_MINOR;
155 
156 	/* No up received yet but expected when the driver starts. */
157 	driver[DRIVER_MAIN].up_event = UP_EXPECTED;
158 	driver[DRIVER_BACKUP].up_event = UP_EXPECTED;
159 
160 	r = ds_retrieve_label_endpt(driver[DRIVER_MAIN].label,
161 		&driver[DRIVER_MAIN].endpt);
162 	if (r != OK) {
163 		printf("Filter: failed to get main disk driver's endpoint: "
164 			"%d\n", r);
165 		bad_driver(DRIVER_MAIN, BD_DEAD, EFAULT);
166 		check_driver(DRIVER_MAIN);
167 	}
168 	else if (driver_open(DRIVER_MAIN) != OK) {
169 		panic("unhandled driver_open failure");
170 	}
171 
172 	if(USE_MIRROR) {
173 		driver[DRIVER_BACKUP].label = BACKUP_LABEL;
174 		driver[DRIVER_BACKUP].minor = BACKUP_MINOR;
175 
176 		if(!strcmp(driver[DRIVER_MAIN].label,
177 				driver[DRIVER_BACKUP].label)) {
178 			panic("same driver: not tested");
179 		}
180 
181 		r = ds_retrieve_label_endpt(driver[DRIVER_BACKUP].label,
182 			&driver[DRIVER_BACKUP].endpt);
183 		if (r != OK) {
184 			printf("Filter: failed to get backup disk driver's "
185 				"endpoint: %d\n", r);
186 			bad_driver(DRIVER_BACKUP, BD_DEAD, EFAULT);
187 			check_driver(DRIVER_BACKUP);
188 		}
189 		else if (driver_open(DRIVER_BACKUP) != OK) {
190 			panic("unhandled driver_open failure");
191 		}
192 	}
193 }
194 
195 /*===========================================================================*
196  *				driver_shutdown				     *
197  *===========================================================================*/
driver_shutdown(void)198 void driver_shutdown(void)
199 {
200 	/* Clean up. */
201 
202 #if DEBUG
203 	printf("Filter: %u driver deaths, %u protocol errors, "
204 		"%u data errors\n", problem_stats[BD_DEAD],
205 		problem_stats[BD_PROTO], problem_stats[BD_DATA]);
206 #endif
207 
208 	if(driver_close(DRIVER_MAIN) != OK)
209 		printf("Filter: BDEV_CLOSE failed on shutdown (1)\n");
210 
211 	if(USE_MIRROR)
212 		if(driver_close(DRIVER_BACKUP) != OK)
213 			printf("Filter: BDEV_CLOSE failed on shutdown (2)\n");
214 }
215 
216 /*===========================================================================*
217  *				get_raw_size				     *
218  *===========================================================================*/
get_raw_size(void)219 u64_t get_raw_size(void)
220 {
221 	/* Return the size of the raw disks as used by the filter driver.
222 	 */
223 
224 	return disk_size;
225 }
226 
227 /*===========================================================================*
228  *				reset_kills				     *
229  *===========================================================================*/
reset_kills(void)230 void reset_kills(void)
231 {
232 	/* Reset kill and retry statistics. */
233 	driver[DRIVER_MAIN].kills = 0;
234 	driver[DRIVER_MAIN].retries = 0;
235 	driver[DRIVER_BACKUP].kills = 0;
236 	driver[DRIVER_BACKUP].retries = 0;
237 }
238 
239 /*===========================================================================*
240  *				bad_driver				     *
241  *===========================================================================*/
bad_driver(int which,int type,int error)242 int bad_driver(int which, int type, int error)
243 {
244 	/* A disk driver has died or produced an error. Mark it so that we can
245 	 * deal with it later, and return RET_REDO to indicate that the
246 	 * current operation is to be retried. Also store an error code to
247 	 * return to the user if the situation is unrecoverable.
248 	 */
249 	driver[which].problem = type;
250 	driver[which].error = error;
251 
252 	return RET_REDO;
253 }
254 
255 /*===========================================================================*
256  *				new_driver_ep				     *
257  *===========================================================================*/
new_driver_ep(int which)258 static int new_driver_ep(int which)
259 {
260 	/* See if a new driver instance has already been started for the given
261 	 * driver, by retrieving its entry from DS.
262 	 */
263 	int r;
264 	endpoint_t endpt;
265 
266 	r = ds_retrieve_label_endpt(driver[which].label, &endpt);
267 
268 	if (r != OK) {
269 		printf("Filter: DS query for %s failed\n",
270 			driver[which].label);
271 
272 		return 0;
273 	}
274 
275 	if (endpt == driver[which].endpt) {
276 #if DEBUG
277 		printf("Filter: same endpoint for %s\n", driver[which].label);
278 #endif
279 		return 0;
280 	}
281 
282 #if DEBUG
283 	printf("Filter: new enpdoint for %s: %d -> %d\n", driver[which].label,
284 		driver[which].endpt, endpt);
285 #endif
286 
287 	driver[which].endpt = endpt;
288 
289 	return 1;
290 }
291 
292 /*===========================================================================*
293  *				check_problem				     *
294  *===========================================================================*/
check_problem(int which,int problem,int retries,int * tell_rs)295 static int check_problem(int which, int problem, int retries, int *tell_rs)
296 {
297 	/* A problem has occurred with a driver. Update statistics, and decide
298 	 * what to do. If EAGAIN is returned, the driver should be restarted;
299 	 * any other result will be passed up.
300 	 */
301 
302 #if DEBUG
303 	printf("Filter: check_problem processing driver %d, problem %d\n",
304 		which, problem);
305 #endif
306 
307 	problem_stats[problem]++;
308 
309 	if(new_driver_ep(which)) {
310 #if DEBUG
311 		printf("Filter: check_problem: noticed a new driver\n");
312 #endif
313 
314 		if(driver_open(which) == OK) {
315 #if DEBUG2
316 			printf("Filter: open OK -> no recovery\n");
317 #endif
318 			return OK;
319 		} else {
320 #if DEBUG2
321 			printf("Filter: open not OK -> recovery\n");
322 #endif
323 			problem = BD_PROTO;
324 			problem_stats[problem]++;
325 		}
326 	}
327 
328 	/* If the driver has died, we always need to restart it. If it has
329 	 * been giving problems, we first retry the request, up to N times,
330 	 * after which we kill and restart the driver. We restart the driver
331 	 * up to M times, after which we remove the driver from the mirror
332 	 * configuration. If we are not set up to do mirroring, we can only
333 	 * do one thing, and that is continue to limp along with the bad
334 	 * driver..
335 	 */
336 	switch(problem) {
337 	case BD_PROTO:
338 	case BD_DATA:
339 		driver[which].retries++;
340 
341 #if DEBUG
342 		printf("Filter: disk driver %d has had "
343 			"%d/%d retry attempts, %d/%d kills\n", which,
344 			driver[which].retries, NR_RETRIES,
345 			driver[which].kills, NR_RESTARTS);
346 #endif
347 
348 		if (driver[which].retries < NR_RETRIES) {
349 			if(retries == 1) {
350 #if DEBUG
351 				printf("Filter: not restarting; retrying "
352 					"(retries %d/%d, kills %d/%d)\n",
353 					driver[which].retries, NR_RETRIES,
354 					driver[which].kills, NR_RESTARTS);
355 #endif
356 				return OK;
357 			}
358 #if DEBUG
359 			printf("Filter: restarting (retries %d/%d, "
360 				"kills %d/%d, internal retry %d)\n",
361 				driver[which].retries, NR_RETRIES,
362 				driver[which].kills, NR_RESTARTS, retries);
363 #endif
364 		}
365 
366 #if DEBUG
367 		printf("Filter: disk driver %d has reached error "
368 			"threshold, restarting driver\n", which);
369 #endif
370 
371 		*tell_rs = (driver[which].up_event != UP_PENDING);
372 		break;
373 
374 	case BD_DEAD:
375 		/* Can't kill that which is already dead.. */
376 		*tell_rs = 0;
377 		break;
378 
379 	default:
380 		panic("invalid problem: %d", problem);
381 	}
382 
383 	/* At this point, the driver will be restarted. */
384 	driver[which].retries = 0;
385 	driver[which].kills++;
386 
387 	if (driver[which].kills < NR_RESTARTS)
388 		return EAGAIN;
389 
390 	/* We've reached the maximum number of restarts for this driver. */
391 	if (USE_MIRROR) {
392 		printf("Filter: kill threshold reached, disabling mirroring\n");
393 
394 		USE_MIRROR = 0;
395 
396 		if (which == DRIVER_MAIN) {
397 			driver[DRIVER_MAIN] = driver[DRIVER_BACKUP];
398 
399 			/* This is not necessary. */
400 			strlcpy(MAIN_LABEL, BACKUP_LABEL, sizeof(MAIN_LABEL));
401 			MAIN_MINOR = BACKUP_MINOR;
402 		}
403 
404 		driver[DRIVER_BACKUP].endpt = NONE;
405 
406 		return OK;
407 	}
408 	else {
409 		/* We tried, we really did. But now we give up. Tell the user.
410 		 */
411 		printf("Filter: kill threshold reached, returning error\n");
412 
413 		if (driver[which].error == EAGAIN) return EIO;
414 
415 		return driver[which].error;
416 	}
417 }
418 
419 /*===========================================================================*
420  *				restart_driver				     *
421  *===========================================================================*/
restart_driver(int which,int tell_rs)422 static void restart_driver(int which, int tell_rs)
423 {
424 	/* Restart the given driver. Block until the new instance is up.
425 	 */
426 	message msg;
427 	int ipc_status;
428 	int r;
429 
430 	if (tell_rs) {
431 		/* Tell RS to refresh or restart the driver */
432 		msg.m_type = RS_REFRESH;
433 		msg.m_rs_req.addr = driver[which].label;
434 		msg.m_rs_req.len = strlen(driver[which].label);
435 
436 #if DEBUG
437 		printf("Filter: asking RS to refresh %s..\n",
438 			driver[which].label);
439 #endif
440 
441 		r = ipc_sendrec(RS_PROC_NR, &msg);
442 
443 		if (r != OK || msg.m_type != OK)
444 			panic("RS request failed: %d", r);
445 
446 #if DEBUG
447 		printf("Filter: RS call succeeded\n");
448 #endif
449 	}
450 
451 	/* Wait until the new driver instance is up, and get its endpoint. */
452 #if DEBUG
453 	printf("Filter: endpoint update driver %d; old endpoint %d\n",
454 		which, driver[which].endpt);
455 #endif
456 
457 	if(driver[which].up_event == UP_EXPECTED) {
458 		driver[which].up_event = UP_NONE;
459 	}
460 	while(driver[which].up_event != UP_PENDING) {
461 		r = driver_receive(DS_PROC_NR, &msg, &ipc_status);
462 		if(r != OK)
463 			panic("driver_receive returned error: %d", r);
464 
465 		ds_event();
466 	}
467 }
468 
469 /*===========================================================================*
470  *				check_driver				     *
471  *===========================================================================*/
check_driver(int which)472 int check_driver(int which)
473 {
474 	/* See if the given driver has been troublesome, and if so, deal with
475 	 * it.
476 	 */
477 	int problem, tell_rs;
478 	int r, retries = 0;
479 
480 	problem = driver[which].problem;
481 
482 	if (problem == BD_NONE)
483 		return OK;
484 
485 	do {
486 		if(retries) {
487 #if DEBUG
488 			printf("Filter: check_driver: retry number %d\n",
489 				retries);
490 #endif
491 			problem = BD_PROTO;
492 		}
493 		retries++;
494 		driver[which].problem = BD_NONE;
495 
496 		/* Decide what to do: continue operation, restart the driver,
497 		 * or return an error.
498 		 */
499 		r = check_problem(which, problem, retries, &tell_rs);
500 		if (r != EAGAIN)
501 			return r;
502 
503 		/* Restarting the driver it is. First tell RS (if necessary),
504 		 * then wait for the new driver instance to come up.
505 		 */
506 		restart_driver(which, tell_rs);
507 
508 		/* Finally, open the device on the new driver */
509 	} while (driver_open(which) != OK);
510 
511 #if DEBUG
512 	printf("Filter: check_driver restarted driver %d, endpoint %d\n",
513 		which, driver[which].endpt);
514 #endif
515 
516 	return OK;
517 }
518 
519 /*===========================================================================*
520  *				flt_senda				     *
521  *===========================================================================*/
flt_senda(message * mess,int which)522 static int flt_senda(message *mess, int which)
523 {
524 	/* Send a message to one driver. Can only return OK at the moment. */
525 	int r;
526 	asynmsg_t *amp;
527 
528 	/* Fill in the last bits of the message. */
529 	mess->m_lbdev_lblockdriver_msg.minor = driver[which].minor;
530 	mess->m_lbdev_lblockdriver_msg.id = 0;
531 
532 	/* Send the message asynchronously. */
533 	amp = &amsgtable[which];
534 	amp->dst = driver[which].endpt;
535 	amp->msg = *mess;
536 	amp->flags = AMF_VALID;
537 	r = ipc_senda(amsgtable, 2);
538 
539 	if(r != OK)
540 		panic("ipc_senda returned error: %d", r);
541 
542 	return r;
543 }
544 
545 /*===========================================================================*
546  *				check_senda				     *
547  *===========================================================================*/
check_senda(int which)548 static int check_senda(int which)
549 {
550 	/* Check whether an earlier senda resulted in an error indicating the
551 	 * message never got delivered. Only in that case can we reliably say
552 	 * that the driver died. Return BD_DEAD in this case, and BD_PROTO
553 	 * otherwise.
554 	 */
555 	asynmsg_t *amp;
556 
557 	amp = &amsgtable[which];
558 
559 	if ((amp->flags & AMF_DONE) && (amp->result == EDEADSRCDST)) {
560 
561 		return BD_DEAD;
562 	}
563 
564 	return BD_PROTO;
565 }
566 
567 /*===========================================================================*
568  *				flt_receive				     *
569  *===========================================================================*/
flt_receive(message * mess,int which)570 static int flt_receive(message *mess, int which)
571 {
572 	/* Receive a message from one or either driver, unless a timeout
573 	 * occurs. Can only return OK or RET_REDO.
574 	 */
575 	int r;
576 	int ipc_status;
577 
578 	for (;;) {
579 		r = driver_receive(ANY, mess, &ipc_status);
580 		if(r != OK)
581 			panic("driver_receive returned error: %d", r);
582 
583 		if(mess->m_source == DS_PROC_NR && is_ipc_notify(ipc_status)) {
584 			ds_event();
585 			continue;
586 		}
587 
588 		if(mess->m_source == CLOCK && is_ipc_notify(ipc_status)) {
589 			if (mess->m_notify.timestamp < flt_alarm((clock_t) -1)) {
590 #if DEBUG
591 				printf("Filter: SKIPPING old alarm "
592 					"notification\n");
593 #endif
594 				continue;
595 			}
596 
597 #if DEBUG
598 			printf("Filter: timeout waiting for disk driver %d "
599 				"reply!\n", which);
600 #endif
601 
602 			/* If we're waiting for either driver,
603 		 	 * both are at fault.
604 		 	 */
605 			if (which < 0) {
606 				bad_driver(DRIVER_MAIN,
607 					check_senda(DRIVER_MAIN), EFAULT);
608 
609 				return bad_driver(DRIVER_BACKUP,
610 					check_senda(DRIVER_BACKUP), EFAULT);
611 			}
612 
613 			/* Otherwise, just report the one not replying as dead.
614 			 */
615 			return bad_driver(which, check_senda(which), EFAULT);
616 		}
617 
618 		if (mess->m_source != driver[DRIVER_MAIN].endpt &&
619 				mess->m_source != driver[DRIVER_BACKUP].endpt) {
620 #if DEBUG
621 			printf("Filter: got STRAY message %d from %d\n",
622 				mess->m_type, mess->m_source);
623 #endif
624 
625 			continue;
626 		}
627 
628 		/* We are waiting for a reply from one specific driver. */
629 		if (which >= 0) {
630 			/* If the message source is that driver, good. */
631 			if (mess->m_source == driver[which].endpt)
632 				break;
633 
634 			/* This should probably be treated as a real protocol
635 			 * error. We do not abort any receives (not even paired
636 			 * receives) except because of timeouts. Getting here
637 			 * means a driver replied at least the timeout period
638 			 * later than expected, which should be enough reason
639 			 * to kill it really. The other explanation is that it
640 			 * is actually violating the protocol and sending bogus
641 			 * messages...
642 			 */
643 #if DEBUG
644 			printf("Filter: got UNEXPECTED reply from %d\n",
645 				mess->m_source);
646 #endif
647 
648 			continue;
649 		}
650 
651 		/* We got a message from one of the drivers, and we didn't
652 		 * care which one we wanted to receive from. A-OK.
653 		 */
654 		break;
655 	}
656 
657 	return OK;
658 }
659 
660 /*===========================================================================*
661  *				flt_sendrec				     *
662  *===========================================================================*/
flt_sendrec(message * mess,int which)663 static int flt_sendrec(message *mess, int which)
664 {
665 	int r;
666 
667 	r = flt_senda(mess, which);
668 	if(r != OK)
669 		return r;
670 
671 	if(check_senda(which) == BD_DEAD) {
672 		return bad_driver(which, BD_DEAD, EFAULT);
673 	}
674 
675 	/* Set alarm. */
676 	flt_alarm(DRIVER_TIMEOUT);
677 
678 	r = flt_receive(mess, which);
679 
680 	/* Clear the alarm. */
681 	flt_alarm(0);
682 	return r;
683 }
684 
685 /*===========================================================================*
686  *				do_sendrec_both				     *
687  *===========================================================================*/
do_sendrec_both(message * m1,message * m2)688 static int do_sendrec_both(message *m1, message *m2)
689 {
690 	/* If USEE_MIRROR is set, call flt_sendrec() to both drivers.
691 	 * Otherwise, only call flt_sendrec() to the main driver.
692 	 * This function will only return either OK or RET_REDO.
693 	 */
694 	int r, which = -1;
695 	message ma, mb;
696 
697 	/* If the two disks use the same driver, call flt_sendrec() twice
698 	 * sequentially. Such a setup is not very useful though.
699 	 */
700 	if (!strcmp(driver[DRIVER_MAIN].label, driver[DRIVER_BACKUP].label)) {
701 		if ((r = flt_sendrec(m1, DRIVER_MAIN)) != OK) return r;
702 		return flt_sendrec(m2, DRIVER_BACKUP);
703 	}
704 
705 	/* If the two disks use different drivers, call flt_senda()
706 	 * twice, and then flt_receive(), and distinguish the return
707 	 * messages by means of m_source.
708 	 */
709 	if ((r = flt_senda(m1, DRIVER_MAIN)) != OK) return r;
710 	if ((r = flt_senda(m2, DRIVER_BACKUP)) != OK) return r;
711 
712 	/* Set alarm. */
713 	flt_alarm(DRIVER_TIMEOUT);
714 
715 	/* The message received by the 1st flt_receive() may not be
716 	 * from DRIVER_MAIN.
717 	 */
718 	if ((r = flt_receive(&ma, -1)) != OK) {
719 		flt_alarm(0);
720 		return r;
721 	}
722 
723 	if (ma.m_source == driver[DRIVER_MAIN].endpt) {
724 		which = DRIVER_BACKUP;
725 	} else if (ma.m_source == driver[DRIVER_BACKUP].endpt) {
726 		which = DRIVER_MAIN;
727 	} else {
728 		panic("message from unexpected source: %d",
729 			ma.m_source);
730 	}
731 
732 	r = flt_receive(&mb, which);
733 
734 	/* Clear the alarm. */
735 	flt_alarm(0);
736 
737 	if(r != OK)
738 		return r;
739 
740 	if (ma.m_source == driver[DRIVER_MAIN].endpt) {
741 		*m1 = ma;
742 		*m2 = mb;
743 	} else {
744 		*m1 = mb;
745 		*m2 = ma;
746 	}
747 
748 	return OK;
749 }
750 
751 /*===========================================================================*
752  *				do_sendrec_one				     *
753  *===========================================================================*/
do_sendrec_one(message * m1)754 static int do_sendrec_one(message *m1)
755 {
756 	/* Only talk to the main driver. If something goes wrong, it will
757 	 * be fixed elsewhere.
758 	 * This function will only return either OK or RET_REDO.
759 	 */
760 
761     	return flt_sendrec(m1, DRIVER_MAIN);
762 }
763 
764 /*===========================================================================*
765  *				paired_sendrec				     *
766  *===========================================================================*/
paired_sendrec(message * m1,message * m2,int both)767 static int paired_sendrec(message *m1, message *m2, int both)
768 {
769 	/* Sendrec with the disk driver. If the disk driver is down, and was
770 	 * restarted, redo the request, until the driver works fine, or can't
771 	 * be restarted again.
772 	 */
773 	int r;
774 
775 #if DEBUG2
776 	printf("paired_sendrec(%d) - <%d,%llx,%d> - %x,%x\n",
777 		both, m1->m_type, m1->m_lbdev_lblockdriver_msg.pos,
778 		m1->m_lbdev_lblockdriver_msg.count, m1->m_lbdev_lblockdriver_msg.grant, m2->m_lbdev_lblockdriver_msg.grant);
779 #endif
780 
781 	if (both)
782 		r = do_sendrec_both(m1, m2);
783 	else
784 		r = do_sendrec_one(m1);
785 
786 #if DEBUG2
787 	if (r != OK)
788 		printf("paired_sendrec about to return %d\n", r);
789 #endif
790 
791 	return r;
792 }
793 
794 /*===========================================================================*
795  *				single_grant				     *
796  *===========================================================================*/
single_grant(endpoint_t endpt,vir_bytes buf,int access,cp_grant_id_t * gid,iovec_s_t vector[NR_IOREQS],size_t size)797 static int single_grant(endpoint_t endpt, vir_bytes buf, int access,
798 	cp_grant_id_t *gid, iovec_s_t vector[NR_IOREQS], size_t size)
799 {
800 	/* Create grants for a vectored request to a single driver.
801 	 */
802 	cp_grant_id_t grant;
803 	size_t chunk;
804 	int count;
805 
806 	/* Split up the request into chunks, if requested. This makes no
807 	 * difference at all, except that this works around a weird performance
808 	 * bug with large DMA PRDs on some machines.
809 	 */
810 	if (CHUNK_SIZE > 0) chunk = CHUNK_SIZE;
811 	else chunk = size;
812 
813 	/* Fill in the vector, creating a grant for each item. */
814 	for (count = 0; size > 0 && count < NR_IOREQS; count++) {
815 		/* The last chunk will contain all the remaining data. */
816 		if (chunk > size || count == NR_IOREQS - 1)
817 			chunk = size;
818 
819 		grant = cpf_grant_direct(endpt, buf, chunk, access);
820 		if (!GRANT_VALID(grant))
821 			panic("invalid grant: %d", grant);
822 
823 		vector[count].iov_grant = grant;
824 		vector[count].iov_size = chunk;
825 
826 		buf += chunk;
827 		size -= chunk;
828 	}
829 
830 	/* Then create a grant for the vector itself. */
831 	*gid = cpf_grant_direct(endpt, (vir_bytes) vector,
832 		sizeof(vector[0]) * count, CPF_READ);
833 
834 	if (!GRANT_VALID(*gid))
835 		panic("invalid grant: %d", *gid);
836 
837 	return count;
838 }
839 
840 /*===========================================================================*
841  *				paired_grant				     *
842  *===========================================================================*/
paired_grant(char * buf1,char * buf2,int request,cp_grant_id_t * gids,iovec_s_t vectors[2][NR_IOREQS],size_t size,int both)843 static int paired_grant(char *buf1, char *buf2, int request,
844 	cp_grant_id_t *gids, iovec_s_t vectors[2][NR_IOREQS], size_t size,
845 	int both)
846 {
847 	/* Create memory grants, either to one or to both drivers.
848 	 */
849 	int count, access;
850 
851 	count = 0;
852 	access = (request == FLT_WRITE) ? CPF_READ : CPF_WRITE;
853 
854 	if(driver[DRIVER_MAIN].endpt > 0) {
855 		count = single_grant(driver[DRIVER_MAIN].endpt,
856 			(vir_bytes) buf1, access, &gids[0], vectors[0], size);
857 	}
858 
859 	if (both) {
860 		if(driver[DRIVER_BACKUP].endpt > 0) {
861 			count = single_grant(driver[DRIVER_BACKUP].endpt,
862 				(vir_bytes) buf2, access, &gids[1],
863 				vectors[1], size);
864 		}
865 	}
866         return count;
867 }
868 
869 /*===========================================================================*
870  *				single_revoke				     *
871  *===========================================================================*/
single_revoke(cp_grant_id_t gid,const iovec_s_t vector[NR_IOREQS],int count)872 static void single_revoke(cp_grant_id_t gid,
873 	const iovec_s_t vector[NR_IOREQS], int count)
874 {
875 	/* Revoke all grants associated with a request to a single driver.
876 	 * Modify the given size to reflect the actual I/O performed.
877 	 */
878 	int i;
879 
880 	/* Revoke the grants for all the elements of the vector. */
881 	for (i = 0; i < count; i++)
882 		cpf_revoke(vector[i].iov_grant);
883 
884 	/* Then revoke the grant for the vector itself. */
885 	cpf_revoke(gid);
886 }
887 
888 /*===========================================================================*
889  *				paired_revoke				     *
890  *===========================================================================*/
paired_revoke(const cp_grant_id_t * gids,iovec_s_t vectors[2][NR_IOREQS],int count,int both)891 static void paired_revoke(const cp_grant_id_t *gids,
892         iovec_s_t vectors[2][NR_IOREQS], int count, int both)
893 {
894 	/* Revoke grants to drivers for a single request.
895 	 */
896 
897 	single_revoke(gids[0], vectors[0], count);
898 
899 	if (both)
900 		single_revoke(gids[1], vectors[1], count);
901 }
902 
903 /*===========================================================================*
904  *				read_write				     *
905  *===========================================================================*/
read_write(u64_t pos,char * bufa,char * bufb,size_t * sizep,int request)906 int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request)
907 {
908 	iovec_s_t vectors[2][NR_IOREQS];
909 	message m1, m2;
910 	cp_grant_id_t gids[2];
911 	int r, both, count;
912 
913 	gids[0] = gids[1] = GRANT_INVALID;
914 
915 	/* Send two requests only if mirroring is enabled and the given request
916 	 * is either FLT_READ2 or FLT_WRITE.
917 	 */
918 	both = (USE_MIRROR && request != FLT_READ);
919 
920 	count = paired_grant(bufa, bufb, request, gids, vectors, *sizep, both);
921 
922 	memset(&m1, 0, sizeof(m1));
923 	m1.m_type = (request == FLT_WRITE) ? BDEV_SCATTER : BDEV_GATHER;
924 	m1.m_lbdev_lblockdriver_msg.count = count;
925 	m1.m_lbdev_lblockdriver_msg.pos = pos;
926 
927 	m2 = m1;
928 
929 	m1.m_lbdev_lblockdriver_msg.grant = gids[0];
930 	m2.m_lbdev_lblockdriver_msg.grant = gids[1];
931 
932 	r = paired_sendrec(&m1, &m2, both);
933 
934 	paired_revoke(gids, vectors, count, both);
935 
936 	if(r != OK) {
937 #if DEBUG
938 		if (r != RET_REDO)
939 			printf("Filter: paired_sendrec returned %d\n", r);
940 #endif
941 		return r;
942 	}
943 
944 	if (m1.m_type != BDEV_REPLY ||
945 		m1.m_lblockdriver_lbdev_reply.status < 0) {
946 		printf("Filter: unexpected/invalid reply from main driver: "
947 			"(%x, %d)\n", m1.m_type,
948 			m1.m_lblockdriver_lbdev_reply.status);
949 
950 		return bad_driver(DRIVER_MAIN, BD_PROTO,
951 			(m1.m_type == BDEV_REPLY) ?
952 			m1.m_lblockdriver_lbdev_reply.status : EFAULT);
953 	}
954 
955 	if (m1.m_lblockdriver_lbdev_reply.status != (ssize_t) *sizep) {
956 		printf("Filter: truncated reply from main driver\n");
957 
958 		/* If the driver returned a value *larger* than we requested,
959 		 * OR if we did NOT exceed the disk size, then we should
960 		 * report the driver for acting strangely!
961 		 */
962 		if (m1.m_lblockdriver_lbdev_reply.status > (ssize_t) *sizep ||
963 			(pos + (unsigned int)
964 			    m1.m_lblockdriver_lbdev_reply.status < disk_size))
965 			return bad_driver(DRIVER_MAIN, BD_PROTO, EFAULT);
966 
967 		/* Return the actual size. */
968 		*sizep = m1.m_lblockdriver_lbdev_reply.status;
969 	}
970 
971 	if (both) {
972 		if (m2.m_type != BDEV_REPLY ||
973 			m2.m_lblockdriver_lbdev_reply.status < 0) {
974 			printf("Filter: unexpected/invalid reply from "
975 				"backup driver (%x, %d)\n",
976 				m2.m_type,
977 				m2.m_lblockdriver_lbdev_reply.status);
978 
979 			return bad_driver(DRIVER_BACKUP, BD_PROTO,
980 				m2.m_type == BDEV_REPLY ?
981 				m2.m_lblockdriver_lbdev_reply.status :
982 				EFAULT);
983 		}
984 		if (m2.m_lblockdriver_lbdev_reply.status != (ssize_t) *sizep) {
985 			printf("Filter: truncated reply from backup driver\n");
986 
987 			/* As above */
988 			if (m2.m_lblockdriver_lbdev_reply.status > (ssize_t) *sizep ||
989 					(pos + (unsigned int)
990 					 m2.m_lblockdriver_lbdev_reply.status
991 						< disk_size))
992 				return bad_driver(DRIVER_BACKUP, BD_PROTO,
993 					EFAULT);
994 
995 			/* Return the actual size. */
996 			if ((ssize_t)*sizep >= m2.m_lblockdriver_lbdev_reply.status)
997 				*sizep = m2.m_lblockdriver_lbdev_reply.status;
998 		}
999 	}
1000 
1001 	return OK;
1002 }
1003 
1004 /*===========================================================================*
1005  *				 ds_event				     *
1006  *===========================================================================*/
ds_event()1007 void ds_event()
1008 {
1009 	char key[DS_MAX_KEYLEN];
1010 	char *blkdriver_prefix = "drv.blk.";
1011 	u32_t value;
1012 	int type;
1013 	endpoint_t owner_endpoint;
1014 	int r;
1015 	int which;
1016 
1017 	/* Get the event and the owner from DS. */
1018 	r = ds_check(key, &type, &owner_endpoint);
1019 	if(r != OK) {
1020 		if(r != ENOENT)
1021 			printf("Filter: ds_event: ds_check failed: %d\n", r);
1022 		return;
1023 	}
1024 	r = ds_retrieve_u32(key, &value);
1025 	if(r != OK) {
1026 		printf("Filter: ds_event: ds_retrieve_u32 failed\n");
1027 		return;
1028 	}
1029 
1030 	/* Only check for VFS driver up events. */
1031 	if(strncmp(key, blkdriver_prefix, strlen(blkdriver_prefix))
1032 	   || value != DS_DRIVER_UP) {
1033 		return;
1034 	}
1035 
1036 	/* See if this is a driver we are responsible for. */
1037 	if(driver[DRIVER_MAIN].endpt == owner_endpoint) {
1038 		which = DRIVER_MAIN;
1039 	}
1040 	else if(driver[DRIVER_BACKUP].endpt == owner_endpoint) {
1041 		which = DRIVER_BACKUP;
1042 	}
1043 	else {
1044 		return;
1045 	}
1046 
1047 	/* Mark the driver as (re)started. */
1048 	driver[which].up_event = driver[which].up_event == UP_EXPECTED ?
1049 		UP_NONE : UP_PENDING;
1050 }
1051 
1052