1 /* Filter driver - lowest layer - disk driver management */
2
3 #include "inc.h"
4
5 /* Drivers. */
6 static struct driverinfo driver[2];
7
8 /* State variables. */
9 static asynmsg_t amsgtable[2];
10
11 static int size_known = 0;
12 static u64_t disk_size;
13
14 static int problem_stats[BD_LAST] = { 0 };
15
16 /*===========================================================================*
17 * driver_open *
18 *===========================================================================*/
driver_open(int which)19 static int driver_open(int which)
20 {
21 /* Perform an open or close operation on the driver. This is
22 * unfinished code: we should never be doing a blocking ipc_sendrec()
23 * to the driver.
24 */
25 message msg;
26 cp_grant_id_t gid;
27 struct part_geom part;
28 sector_t sectors;
29 int r;
30
31 memset(&msg, 0, sizeof(msg));
32 msg.m_type = BDEV_OPEN;
33 msg.m_lbdev_lblockdriver_msg.minor = driver[which].minor;
34 msg.m_lbdev_lblockdriver_msg.access = BDEV_R_BIT | BDEV_W_BIT;
35 msg.m_lbdev_lblockdriver_msg.id = 0;
36 r = ipc_sendrec(driver[which].endpt, &msg);
37
38 if (r != OK) {
39 /* Should we restart the driver now? */
40 printf("Filter: driver_open: ipc_sendrec returned %d\n", r);
41
42 return RET_REDO;
43 }
44
45 if(msg.m_type != BDEV_REPLY ||
46 msg.m_lblockdriver_lbdev_reply.status != OK) {
47 printf("Filter: driver_open: ipc_sendrec returned %d, %d\n",
48 msg.m_type, msg.m_lblockdriver_lbdev_reply.status);
49
50 return RET_REDO;
51 }
52
53 /* Take the opportunity to retrieve the hard disk size. */
54 gid = cpf_grant_direct(driver[which].endpt,
55 (vir_bytes) &part, sizeof(part), CPF_WRITE);
56 if(!GRANT_VALID(gid))
57 panic("invalid grant: %d", gid);
58
59 memset(&msg, 0, sizeof(msg));
60 msg.m_type = BDEV_IOCTL;
61 msg.m_lbdev_lblockdriver_msg.minor = driver[which].minor;
62 msg.m_lbdev_lblockdriver_msg.request = DIOCGETP;
63 msg.m_lbdev_lblockdriver_msg.grant = gid;
64 msg.m_lbdev_lblockdriver_msg.user = NONE;
65 msg.m_lbdev_lblockdriver_msg.id = 0;
66
67 r = ipc_sendrec(driver[which].endpt, &msg);
68
69 cpf_revoke(gid);
70
71 if (r != OK || msg.m_type != BDEV_REPLY ||
72 msg.m_lblockdriver_lbdev_reply.status != OK) {
73 /* Not sure what to do here, either. */
74 printf("Filter: ioctl(DIOCGETP) returned (%d, %d)\n",
75 r, msg.m_type);
76
77 return RET_REDO;
78 }
79
80 if(!size_known) {
81 disk_size = part.size;
82 size_known = 1;
83 sectors = (unsigned long)(disk_size / SECTOR_SIZE);
84 if ((u64_t)sectors * SECTOR_SIZE != disk_size) {
85 printf("Filter: partition too large\n");
86
87 return RET_REDO;
88 }
89 #if DEBUG
90 printf("Filter: partition size: 0x%"PRIx64" / %lu sectors\n",
91 disk_size, sectors);
92 #endif
93 } else {
94 if (disk_size != part.size) {
95 printf("Filter: partition size mismatch "
96 "(0x%"PRIx64" != 0x%"PRIx64")\n",
97 part.size, disk_size);
98
99 return RET_REDO;
100 }
101 }
102
103 return OK;
104 }
105
106 /*===========================================================================*
107 * driver_close *
108 *===========================================================================*/
driver_close(int which)109 static int driver_close(int which)
110 {
111 message msg;
112 int r;
113
114 memset(&msg, 0, sizeof(msg));
115 msg.m_type = BDEV_CLOSE;
116 msg.m_lbdev_lblockdriver_msg.minor = driver[which].minor;
117 msg.m_lbdev_lblockdriver_msg.id = 0;
118 r = ipc_sendrec(driver[which].endpt, &msg);
119
120 if (r != OK) {
121 /* Should we restart the driver now? */
122 printf("Filter: driver_close: ipc_sendrec returned %d\n", r);
123
124 return RET_REDO;
125 }
126
127 if(msg.m_type != BDEV_REPLY ||
128 msg.m_lblockdriver_lbdev_reply.status != OK) {
129 printf("Filter: driver_close: ipc_sendrec returned %d, %d\n",
130 msg.m_type, msg.m_lblockdriver_lbdev_reply.status);
131
132 return RET_REDO;
133 }
134
135 return OK;
136 }
137
138 /*===========================================================================*
139 * driver_init *
140 *===========================================================================*/
driver_init(void)141 void driver_init(void)
142 {
143 /* Initialize the driver layer. */
144 int r;
145
146 memset(driver, 0, sizeof(driver));
147
148 /* Endpoints unknown. */
149 driver[DRIVER_MAIN].endpt = NONE;
150 driver[DRIVER_BACKUP].endpt = NONE;
151
152 /* Get disk driver's and this proc's endpoint. */
153 driver[DRIVER_MAIN].label = MAIN_LABEL;
154 driver[DRIVER_MAIN].minor = MAIN_MINOR;
155
156 /* No up received yet but expected when the driver starts. */
157 driver[DRIVER_MAIN].up_event = UP_EXPECTED;
158 driver[DRIVER_BACKUP].up_event = UP_EXPECTED;
159
160 r = ds_retrieve_label_endpt(driver[DRIVER_MAIN].label,
161 &driver[DRIVER_MAIN].endpt);
162 if (r != OK) {
163 printf("Filter: failed to get main disk driver's endpoint: "
164 "%d\n", r);
165 bad_driver(DRIVER_MAIN, BD_DEAD, EFAULT);
166 check_driver(DRIVER_MAIN);
167 }
168 else if (driver_open(DRIVER_MAIN) != OK) {
169 panic("unhandled driver_open failure");
170 }
171
172 if(USE_MIRROR) {
173 driver[DRIVER_BACKUP].label = BACKUP_LABEL;
174 driver[DRIVER_BACKUP].minor = BACKUP_MINOR;
175
176 if(!strcmp(driver[DRIVER_MAIN].label,
177 driver[DRIVER_BACKUP].label)) {
178 panic("same driver: not tested");
179 }
180
181 r = ds_retrieve_label_endpt(driver[DRIVER_BACKUP].label,
182 &driver[DRIVER_BACKUP].endpt);
183 if (r != OK) {
184 printf("Filter: failed to get backup disk driver's "
185 "endpoint: %d\n", r);
186 bad_driver(DRIVER_BACKUP, BD_DEAD, EFAULT);
187 check_driver(DRIVER_BACKUP);
188 }
189 else if (driver_open(DRIVER_BACKUP) != OK) {
190 panic("unhandled driver_open failure");
191 }
192 }
193 }
194
195 /*===========================================================================*
196 * driver_shutdown *
197 *===========================================================================*/
driver_shutdown(void)198 void driver_shutdown(void)
199 {
200 /* Clean up. */
201
202 #if DEBUG
203 printf("Filter: %u driver deaths, %u protocol errors, "
204 "%u data errors\n", problem_stats[BD_DEAD],
205 problem_stats[BD_PROTO], problem_stats[BD_DATA]);
206 #endif
207
208 if(driver_close(DRIVER_MAIN) != OK)
209 printf("Filter: BDEV_CLOSE failed on shutdown (1)\n");
210
211 if(USE_MIRROR)
212 if(driver_close(DRIVER_BACKUP) != OK)
213 printf("Filter: BDEV_CLOSE failed on shutdown (2)\n");
214 }
215
216 /*===========================================================================*
217 * get_raw_size *
218 *===========================================================================*/
get_raw_size(void)219 u64_t get_raw_size(void)
220 {
221 /* Return the size of the raw disks as used by the filter driver.
222 */
223
224 return disk_size;
225 }
226
227 /*===========================================================================*
228 * reset_kills *
229 *===========================================================================*/
reset_kills(void)230 void reset_kills(void)
231 {
232 /* Reset kill and retry statistics. */
233 driver[DRIVER_MAIN].kills = 0;
234 driver[DRIVER_MAIN].retries = 0;
235 driver[DRIVER_BACKUP].kills = 0;
236 driver[DRIVER_BACKUP].retries = 0;
237 }
238
239 /*===========================================================================*
240 * bad_driver *
241 *===========================================================================*/
bad_driver(int which,int type,int error)242 int bad_driver(int which, int type, int error)
243 {
244 /* A disk driver has died or produced an error. Mark it so that we can
245 * deal with it later, and return RET_REDO to indicate that the
246 * current operation is to be retried. Also store an error code to
247 * return to the user if the situation is unrecoverable.
248 */
249 driver[which].problem = type;
250 driver[which].error = error;
251
252 return RET_REDO;
253 }
254
255 /*===========================================================================*
256 * new_driver_ep *
257 *===========================================================================*/
new_driver_ep(int which)258 static int new_driver_ep(int which)
259 {
260 /* See if a new driver instance has already been started for the given
261 * driver, by retrieving its entry from DS.
262 */
263 int r;
264 endpoint_t endpt;
265
266 r = ds_retrieve_label_endpt(driver[which].label, &endpt);
267
268 if (r != OK) {
269 printf("Filter: DS query for %s failed\n",
270 driver[which].label);
271
272 return 0;
273 }
274
275 if (endpt == driver[which].endpt) {
276 #if DEBUG
277 printf("Filter: same endpoint for %s\n", driver[which].label);
278 #endif
279 return 0;
280 }
281
282 #if DEBUG
283 printf("Filter: new enpdoint for %s: %d -> %d\n", driver[which].label,
284 driver[which].endpt, endpt);
285 #endif
286
287 driver[which].endpt = endpt;
288
289 return 1;
290 }
291
292 /*===========================================================================*
293 * check_problem *
294 *===========================================================================*/
check_problem(int which,int problem,int retries,int * tell_rs)295 static int check_problem(int which, int problem, int retries, int *tell_rs)
296 {
297 /* A problem has occurred with a driver. Update statistics, and decide
298 * what to do. If EAGAIN is returned, the driver should be restarted;
299 * any other result will be passed up.
300 */
301
302 #if DEBUG
303 printf("Filter: check_problem processing driver %d, problem %d\n",
304 which, problem);
305 #endif
306
307 problem_stats[problem]++;
308
309 if(new_driver_ep(which)) {
310 #if DEBUG
311 printf("Filter: check_problem: noticed a new driver\n");
312 #endif
313
314 if(driver_open(which) == OK) {
315 #if DEBUG2
316 printf("Filter: open OK -> no recovery\n");
317 #endif
318 return OK;
319 } else {
320 #if DEBUG2
321 printf("Filter: open not OK -> recovery\n");
322 #endif
323 problem = BD_PROTO;
324 problem_stats[problem]++;
325 }
326 }
327
328 /* If the driver has died, we always need to restart it. If it has
329 * been giving problems, we first retry the request, up to N times,
330 * after which we kill and restart the driver. We restart the driver
331 * up to M times, after which we remove the driver from the mirror
332 * configuration. If we are not set up to do mirroring, we can only
333 * do one thing, and that is continue to limp along with the bad
334 * driver..
335 */
336 switch(problem) {
337 case BD_PROTO:
338 case BD_DATA:
339 driver[which].retries++;
340
341 #if DEBUG
342 printf("Filter: disk driver %d has had "
343 "%d/%d retry attempts, %d/%d kills\n", which,
344 driver[which].retries, NR_RETRIES,
345 driver[which].kills, NR_RESTARTS);
346 #endif
347
348 if (driver[which].retries < NR_RETRIES) {
349 if(retries == 1) {
350 #if DEBUG
351 printf("Filter: not restarting; retrying "
352 "(retries %d/%d, kills %d/%d)\n",
353 driver[which].retries, NR_RETRIES,
354 driver[which].kills, NR_RESTARTS);
355 #endif
356 return OK;
357 }
358 #if DEBUG
359 printf("Filter: restarting (retries %d/%d, "
360 "kills %d/%d, internal retry %d)\n",
361 driver[which].retries, NR_RETRIES,
362 driver[which].kills, NR_RESTARTS, retries);
363 #endif
364 }
365
366 #if DEBUG
367 printf("Filter: disk driver %d has reached error "
368 "threshold, restarting driver\n", which);
369 #endif
370
371 *tell_rs = (driver[which].up_event != UP_PENDING);
372 break;
373
374 case BD_DEAD:
375 /* Can't kill that which is already dead.. */
376 *tell_rs = 0;
377 break;
378
379 default:
380 panic("invalid problem: %d", problem);
381 }
382
383 /* At this point, the driver will be restarted. */
384 driver[which].retries = 0;
385 driver[which].kills++;
386
387 if (driver[which].kills < NR_RESTARTS)
388 return EAGAIN;
389
390 /* We've reached the maximum number of restarts for this driver. */
391 if (USE_MIRROR) {
392 printf("Filter: kill threshold reached, disabling mirroring\n");
393
394 USE_MIRROR = 0;
395
396 if (which == DRIVER_MAIN) {
397 driver[DRIVER_MAIN] = driver[DRIVER_BACKUP];
398
399 /* This is not necessary. */
400 strlcpy(MAIN_LABEL, BACKUP_LABEL, sizeof(MAIN_LABEL));
401 MAIN_MINOR = BACKUP_MINOR;
402 }
403
404 driver[DRIVER_BACKUP].endpt = NONE;
405
406 return OK;
407 }
408 else {
409 /* We tried, we really did. But now we give up. Tell the user.
410 */
411 printf("Filter: kill threshold reached, returning error\n");
412
413 if (driver[which].error == EAGAIN) return EIO;
414
415 return driver[which].error;
416 }
417 }
418
419 /*===========================================================================*
420 * restart_driver *
421 *===========================================================================*/
restart_driver(int which,int tell_rs)422 static void restart_driver(int which, int tell_rs)
423 {
424 /* Restart the given driver. Block until the new instance is up.
425 */
426 message msg;
427 int ipc_status;
428 int r;
429
430 if (tell_rs) {
431 /* Tell RS to refresh or restart the driver */
432 msg.m_type = RS_REFRESH;
433 msg.m_rs_req.addr = driver[which].label;
434 msg.m_rs_req.len = strlen(driver[which].label);
435
436 #if DEBUG
437 printf("Filter: asking RS to refresh %s..\n",
438 driver[which].label);
439 #endif
440
441 r = ipc_sendrec(RS_PROC_NR, &msg);
442
443 if (r != OK || msg.m_type != OK)
444 panic("RS request failed: %d", r);
445
446 #if DEBUG
447 printf("Filter: RS call succeeded\n");
448 #endif
449 }
450
451 /* Wait until the new driver instance is up, and get its endpoint. */
452 #if DEBUG
453 printf("Filter: endpoint update driver %d; old endpoint %d\n",
454 which, driver[which].endpt);
455 #endif
456
457 if(driver[which].up_event == UP_EXPECTED) {
458 driver[which].up_event = UP_NONE;
459 }
460 while(driver[which].up_event != UP_PENDING) {
461 r = driver_receive(DS_PROC_NR, &msg, &ipc_status);
462 if(r != OK)
463 panic("driver_receive returned error: %d", r);
464
465 ds_event();
466 }
467 }
468
469 /*===========================================================================*
470 * check_driver *
471 *===========================================================================*/
check_driver(int which)472 int check_driver(int which)
473 {
474 /* See if the given driver has been troublesome, and if so, deal with
475 * it.
476 */
477 int problem, tell_rs;
478 int r, retries = 0;
479
480 problem = driver[which].problem;
481
482 if (problem == BD_NONE)
483 return OK;
484
485 do {
486 if(retries) {
487 #if DEBUG
488 printf("Filter: check_driver: retry number %d\n",
489 retries);
490 #endif
491 problem = BD_PROTO;
492 }
493 retries++;
494 driver[which].problem = BD_NONE;
495
496 /* Decide what to do: continue operation, restart the driver,
497 * or return an error.
498 */
499 r = check_problem(which, problem, retries, &tell_rs);
500 if (r != EAGAIN)
501 return r;
502
503 /* Restarting the driver it is. First tell RS (if necessary),
504 * then wait for the new driver instance to come up.
505 */
506 restart_driver(which, tell_rs);
507
508 /* Finally, open the device on the new driver */
509 } while (driver_open(which) != OK);
510
511 #if DEBUG
512 printf("Filter: check_driver restarted driver %d, endpoint %d\n",
513 which, driver[which].endpt);
514 #endif
515
516 return OK;
517 }
518
519 /*===========================================================================*
520 * flt_senda *
521 *===========================================================================*/
flt_senda(message * mess,int which)522 static int flt_senda(message *mess, int which)
523 {
524 /* Send a message to one driver. Can only return OK at the moment. */
525 int r;
526 asynmsg_t *amp;
527
528 /* Fill in the last bits of the message. */
529 mess->m_lbdev_lblockdriver_msg.minor = driver[which].minor;
530 mess->m_lbdev_lblockdriver_msg.id = 0;
531
532 /* Send the message asynchronously. */
533 amp = &amsgtable[which];
534 amp->dst = driver[which].endpt;
535 amp->msg = *mess;
536 amp->flags = AMF_VALID;
537 r = ipc_senda(amsgtable, 2);
538
539 if(r != OK)
540 panic("ipc_senda returned error: %d", r);
541
542 return r;
543 }
544
545 /*===========================================================================*
546 * check_senda *
547 *===========================================================================*/
check_senda(int which)548 static int check_senda(int which)
549 {
550 /* Check whether an earlier senda resulted in an error indicating the
551 * message never got delivered. Only in that case can we reliably say
552 * that the driver died. Return BD_DEAD in this case, and BD_PROTO
553 * otherwise.
554 */
555 asynmsg_t *amp;
556
557 amp = &amsgtable[which];
558
559 if ((amp->flags & AMF_DONE) && (amp->result == EDEADSRCDST)) {
560
561 return BD_DEAD;
562 }
563
564 return BD_PROTO;
565 }
566
567 /*===========================================================================*
568 * flt_receive *
569 *===========================================================================*/
flt_receive(message * mess,int which)570 static int flt_receive(message *mess, int which)
571 {
572 /* Receive a message from one or either driver, unless a timeout
573 * occurs. Can only return OK or RET_REDO.
574 */
575 int r;
576 int ipc_status;
577
578 for (;;) {
579 r = driver_receive(ANY, mess, &ipc_status);
580 if(r != OK)
581 panic("driver_receive returned error: %d", r);
582
583 if(mess->m_source == DS_PROC_NR && is_ipc_notify(ipc_status)) {
584 ds_event();
585 continue;
586 }
587
588 if(mess->m_source == CLOCK && is_ipc_notify(ipc_status)) {
589 if (mess->m_notify.timestamp < flt_alarm((clock_t) -1)) {
590 #if DEBUG
591 printf("Filter: SKIPPING old alarm "
592 "notification\n");
593 #endif
594 continue;
595 }
596
597 #if DEBUG
598 printf("Filter: timeout waiting for disk driver %d "
599 "reply!\n", which);
600 #endif
601
602 /* If we're waiting for either driver,
603 * both are at fault.
604 */
605 if (which < 0) {
606 bad_driver(DRIVER_MAIN,
607 check_senda(DRIVER_MAIN), EFAULT);
608
609 return bad_driver(DRIVER_BACKUP,
610 check_senda(DRIVER_BACKUP), EFAULT);
611 }
612
613 /* Otherwise, just report the one not replying as dead.
614 */
615 return bad_driver(which, check_senda(which), EFAULT);
616 }
617
618 if (mess->m_source != driver[DRIVER_MAIN].endpt &&
619 mess->m_source != driver[DRIVER_BACKUP].endpt) {
620 #if DEBUG
621 printf("Filter: got STRAY message %d from %d\n",
622 mess->m_type, mess->m_source);
623 #endif
624
625 continue;
626 }
627
628 /* We are waiting for a reply from one specific driver. */
629 if (which >= 0) {
630 /* If the message source is that driver, good. */
631 if (mess->m_source == driver[which].endpt)
632 break;
633
634 /* This should probably be treated as a real protocol
635 * error. We do not abort any receives (not even paired
636 * receives) except because of timeouts. Getting here
637 * means a driver replied at least the timeout period
638 * later than expected, which should be enough reason
639 * to kill it really. The other explanation is that it
640 * is actually violating the protocol and sending bogus
641 * messages...
642 */
643 #if DEBUG
644 printf("Filter: got UNEXPECTED reply from %d\n",
645 mess->m_source);
646 #endif
647
648 continue;
649 }
650
651 /* We got a message from one of the drivers, and we didn't
652 * care which one we wanted to receive from. A-OK.
653 */
654 break;
655 }
656
657 return OK;
658 }
659
660 /*===========================================================================*
661 * flt_sendrec *
662 *===========================================================================*/
flt_sendrec(message * mess,int which)663 static int flt_sendrec(message *mess, int which)
664 {
665 int r;
666
667 r = flt_senda(mess, which);
668 if(r != OK)
669 return r;
670
671 if(check_senda(which) == BD_DEAD) {
672 return bad_driver(which, BD_DEAD, EFAULT);
673 }
674
675 /* Set alarm. */
676 flt_alarm(DRIVER_TIMEOUT);
677
678 r = flt_receive(mess, which);
679
680 /* Clear the alarm. */
681 flt_alarm(0);
682 return r;
683 }
684
685 /*===========================================================================*
686 * do_sendrec_both *
687 *===========================================================================*/
do_sendrec_both(message * m1,message * m2)688 static int do_sendrec_both(message *m1, message *m2)
689 {
690 /* If USEE_MIRROR is set, call flt_sendrec() to both drivers.
691 * Otherwise, only call flt_sendrec() to the main driver.
692 * This function will only return either OK or RET_REDO.
693 */
694 int r, which = -1;
695 message ma, mb;
696
697 /* If the two disks use the same driver, call flt_sendrec() twice
698 * sequentially. Such a setup is not very useful though.
699 */
700 if (!strcmp(driver[DRIVER_MAIN].label, driver[DRIVER_BACKUP].label)) {
701 if ((r = flt_sendrec(m1, DRIVER_MAIN)) != OK) return r;
702 return flt_sendrec(m2, DRIVER_BACKUP);
703 }
704
705 /* If the two disks use different drivers, call flt_senda()
706 * twice, and then flt_receive(), and distinguish the return
707 * messages by means of m_source.
708 */
709 if ((r = flt_senda(m1, DRIVER_MAIN)) != OK) return r;
710 if ((r = flt_senda(m2, DRIVER_BACKUP)) != OK) return r;
711
712 /* Set alarm. */
713 flt_alarm(DRIVER_TIMEOUT);
714
715 /* The message received by the 1st flt_receive() may not be
716 * from DRIVER_MAIN.
717 */
718 if ((r = flt_receive(&ma, -1)) != OK) {
719 flt_alarm(0);
720 return r;
721 }
722
723 if (ma.m_source == driver[DRIVER_MAIN].endpt) {
724 which = DRIVER_BACKUP;
725 } else if (ma.m_source == driver[DRIVER_BACKUP].endpt) {
726 which = DRIVER_MAIN;
727 } else {
728 panic("message from unexpected source: %d",
729 ma.m_source);
730 }
731
732 r = flt_receive(&mb, which);
733
734 /* Clear the alarm. */
735 flt_alarm(0);
736
737 if(r != OK)
738 return r;
739
740 if (ma.m_source == driver[DRIVER_MAIN].endpt) {
741 *m1 = ma;
742 *m2 = mb;
743 } else {
744 *m1 = mb;
745 *m2 = ma;
746 }
747
748 return OK;
749 }
750
751 /*===========================================================================*
752 * do_sendrec_one *
753 *===========================================================================*/
do_sendrec_one(message * m1)754 static int do_sendrec_one(message *m1)
755 {
756 /* Only talk to the main driver. If something goes wrong, it will
757 * be fixed elsewhere.
758 * This function will only return either OK or RET_REDO.
759 */
760
761 return flt_sendrec(m1, DRIVER_MAIN);
762 }
763
764 /*===========================================================================*
765 * paired_sendrec *
766 *===========================================================================*/
paired_sendrec(message * m1,message * m2,int both)767 static int paired_sendrec(message *m1, message *m2, int both)
768 {
769 /* Sendrec with the disk driver. If the disk driver is down, and was
770 * restarted, redo the request, until the driver works fine, or can't
771 * be restarted again.
772 */
773 int r;
774
775 #if DEBUG2
776 printf("paired_sendrec(%d) - <%d,%llx,%d> - %x,%x\n",
777 both, m1->m_type, m1->m_lbdev_lblockdriver_msg.pos,
778 m1->m_lbdev_lblockdriver_msg.count, m1->m_lbdev_lblockdriver_msg.grant, m2->m_lbdev_lblockdriver_msg.grant);
779 #endif
780
781 if (both)
782 r = do_sendrec_both(m1, m2);
783 else
784 r = do_sendrec_one(m1);
785
786 #if DEBUG2
787 if (r != OK)
788 printf("paired_sendrec about to return %d\n", r);
789 #endif
790
791 return r;
792 }
793
794 /*===========================================================================*
795 * single_grant *
796 *===========================================================================*/
single_grant(endpoint_t endpt,vir_bytes buf,int access,cp_grant_id_t * gid,iovec_s_t vector[NR_IOREQS],size_t size)797 static int single_grant(endpoint_t endpt, vir_bytes buf, int access,
798 cp_grant_id_t *gid, iovec_s_t vector[NR_IOREQS], size_t size)
799 {
800 /* Create grants for a vectored request to a single driver.
801 */
802 cp_grant_id_t grant;
803 size_t chunk;
804 int count;
805
806 /* Split up the request into chunks, if requested. This makes no
807 * difference at all, except that this works around a weird performance
808 * bug with large DMA PRDs on some machines.
809 */
810 if (CHUNK_SIZE > 0) chunk = CHUNK_SIZE;
811 else chunk = size;
812
813 /* Fill in the vector, creating a grant for each item. */
814 for (count = 0; size > 0 && count < NR_IOREQS; count++) {
815 /* The last chunk will contain all the remaining data. */
816 if (chunk > size || count == NR_IOREQS - 1)
817 chunk = size;
818
819 grant = cpf_grant_direct(endpt, buf, chunk, access);
820 if (!GRANT_VALID(grant))
821 panic("invalid grant: %d", grant);
822
823 vector[count].iov_grant = grant;
824 vector[count].iov_size = chunk;
825
826 buf += chunk;
827 size -= chunk;
828 }
829
830 /* Then create a grant for the vector itself. */
831 *gid = cpf_grant_direct(endpt, (vir_bytes) vector,
832 sizeof(vector[0]) * count, CPF_READ);
833
834 if (!GRANT_VALID(*gid))
835 panic("invalid grant: %d", *gid);
836
837 return count;
838 }
839
840 /*===========================================================================*
841 * paired_grant *
842 *===========================================================================*/
paired_grant(char * buf1,char * buf2,int request,cp_grant_id_t * gids,iovec_s_t vectors[2][NR_IOREQS],size_t size,int both)843 static int paired_grant(char *buf1, char *buf2, int request,
844 cp_grant_id_t *gids, iovec_s_t vectors[2][NR_IOREQS], size_t size,
845 int both)
846 {
847 /* Create memory grants, either to one or to both drivers.
848 */
849 int count, access;
850
851 count = 0;
852 access = (request == FLT_WRITE) ? CPF_READ : CPF_WRITE;
853
854 if(driver[DRIVER_MAIN].endpt > 0) {
855 count = single_grant(driver[DRIVER_MAIN].endpt,
856 (vir_bytes) buf1, access, &gids[0], vectors[0], size);
857 }
858
859 if (both) {
860 if(driver[DRIVER_BACKUP].endpt > 0) {
861 count = single_grant(driver[DRIVER_BACKUP].endpt,
862 (vir_bytes) buf2, access, &gids[1],
863 vectors[1], size);
864 }
865 }
866 return count;
867 }
868
869 /*===========================================================================*
870 * single_revoke *
871 *===========================================================================*/
single_revoke(cp_grant_id_t gid,const iovec_s_t vector[NR_IOREQS],int count)872 static void single_revoke(cp_grant_id_t gid,
873 const iovec_s_t vector[NR_IOREQS], int count)
874 {
875 /* Revoke all grants associated with a request to a single driver.
876 * Modify the given size to reflect the actual I/O performed.
877 */
878 int i;
879
880 /* Revoke the grants for all the elements of the vector. */
881 for (i = 0; i < count; i++)
882 cpf_revoke(vector[i].iov_grant);
883
884 /* Then revoke the grant for the vector itself. */
885 cpf_revoke(gid);
886 }
887
888 /*===========================================================================*
889 * paired_revoke *
890 *===========================================================================*/
paired_revoke(const cp_grant_id_t * gids,iovec_s_t vectors[2][NR_IOREQS],int count,int both)891 static void paired_revoke(const cp_grant_id_t *gids,
892 iovec_s_t vectors[2][NR_IOREQS], int count, int both)
893 {
894 /* Revoke grants to drivers for a single request.
895 */
896
897 single_revoke(gids[0], vectors[0], count);
898
899 if (both)
900 single_revoke(gids[1], vectors[1], count);
901 }
902
903 /*===========================================================================*
904 * read_write *
905 *===========================================================================*/
read_write(u64_t pos,char * bufa,char * bufb,size_t * sizep,int request)906 int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request)
907 {
908 iovec_s_t vectors[2][NR_IOREQS];
909 message m1, m2;
910 cp_grant_id_t gids[2];
911 int r, both, count;
912
913 gids[0] = gids[1] = GRANT_INVALID;
914
915 /* Send two requests only if mirroring is enabled and the given request
916 * is either FLT_READ2 or FLT_WRITE.
917 */
918 both = (USE_MIRROR && request != FLT_READ);
919
920 count = paired_grant(bufa, bufb, request, gids, vectors, *sizep, both);
921
922 memset(&m1, 0, sizeof(m1));
923 m1.m_type = (request == FLT_WRITE) ? BDEV_SCATTER : BDEV_GATHER;
924 m1.m_lbdev_lblockdriver_msg.count = count;
925 m1.m_lbdev_lblockdriver_msg.pos = pos;
926
927 m2 = m1;
928
929 m1.m_lbdev_lblockdriver_msg.grant = gids[0];
930 m2.m_lbdev_lblockdriver_msg.grant = gids[1];
931
932 r = paired_sendrec(&m1, &m2, both);
933
934 paired_revoke(gids, vectors, count, both);
935
936 if(r != OK) {
937 #if DEBUG
938 if (r != RET_REDO)
939 printf("Filter: paired_sendrec returned %d\n", r);
940 #endif
941 return r;
942 }
943
944 if (m1.m_type != BDEV_REPLY ||
945 m1.m_lblockdriver_lbdev_reply.status < 0) {
946 printf("Filter: unexpected/invalid reply from main driver: "
947 "(%x, %d)\n", m1.m_type,
948 m1.m_lblockdriver_lbdev_reply.status);
949
950 return bad_driver(DRIVER_MAIN, BD_PROTO,
951 (m1.m_type == BDEV_REPLY) ?
952 m1.m_lblockdriver_lbdev_reply.status : EFAULT);
953 }
954
955 if (m1.m_lblockdriver_lbdev_reply.status != (ssize_t) *sizep) {
956 printf("Filter: truncated reply from main driver\n");
957
958 /* If the driver returned a value *larger* than we requested,
959 * OR if we did NOT exceed the disk size, then we should
960 * report the driver for acting strangely!
961 */
962 if (m1.m_lblockdriver_lbdev_reply.status > (ssize_t) *sizep ||
963 (pos + (unsigned int)
964 m1.m_lblockdriver_lbdev_reply.status < disk_size))
965 return bad_driver(DRIVER_MAIN, BD_PROTO, EFAULT);
966
967 /* Return the actual size. */
968 *sizep = m1.m_lblockdriver_lbdev_reply.status;
969 }
970
971 if (both) {
972 if (m2.m_type != BDEV_REPLY ||
973 m2.m_lblockdriver_lbdev_reply.status < 0) {
974 printf("Filter: unexpected/invalid reply from "
975 "backup driver (%x, %d)\n",
976 m2.m_type,
977 m2.m_lblockdriver_lbdev_reply.status);
978
979 return bad_driver(DRIVER_BACKUP, BD_PROTO,
980 m2.m_type == BDEV_REPLY ?
981 m2.m_lblockdriver_lbdev_reply.status :
982 EFAULT);
983 }
984 if (m2.m_lblockdriver_lbdev_reply.status != (ssize_t) *sizep) {
985 printf("Filter: truncated reply from backup driver\n");
986
987 /* As above */
988 if (m2.m_lblockdriver_lbdev_reply.status > (ssize_t) *sizep ||
989 (pos + (unsigned int)
990 m2.m_lblockdriver_lbdev_reply.status
991 < disk_size))
992 return bad_driver(DRIVER_BACKUP, BD_PROTO,
993 EFAULT);
994
995 /* Return the actual size. */
996 if ((ssize_t)*sizep >= m2.m_lblockdriver_lbdev_reply.status)
997 *sizep = m2.m_lblockdriver_lbdev_reply.status;
998 }
999 }
1000
1001 return OK;
1002 }
1003
1004 /*===========================================================================*
1005 * ds_event *
1006 *===========================================================================*/
ds_event()1007 void ds_event()
1008 {
1009 char key[DS_MAX_KEYLEN];
1010 char *blkdriver_prefix = "drv.blk.";
1011 u32_t value;
1012 int type;
1013 endpoint_t owner_endpoint;
1014 int r;
1015 int which;
1016
1017 /* Get the event and the owner from DS. */
1018 r = ds_check(key, &type, &owner_endpoint);
1019 if(r != OK) {
1020 if(r != ENOENT)
1021 printf("Filter: ds_event: ds_check failed: %d\n", r);
1022 return;
1023 }
1024 r = ds_retrieve_u32(key, &value);
1025 if(r != OK) {
1026 printf("Filter: ds_event: ds_retrieve_u32 failed\n");
1027 return;
1028 }
1029
1030 /* Only check for VFS driver up events. */
1031 if(strncmp(key, blkdriver_prefix, strlen(blkdriver_prefix))
1032 || value != DS_DRIVER_UP) {
1033 return;
1034 }
1035
1036 /* See if this is a driver we are responsible for. */
1037 if(driver[DRIVER_MAIN].endpt == owner_endpoint) {
1038 which = DRIVER_MAIN;
1039 }
1040 else if(driver[DRIVER_BACKUP].endpt == owner_endpoint) {
1041 which = DRIVER_BACKUP;
1042 }
1043 else {
1044 return;
1045 }
1046
1047 /* Mark the driver as (re)started. */
1048 driver[which].up_event = driver[which].up_event == UP_EXPECTED ?
1049 UP_NONE : UP_PENDING;
1050 }
1051
1052