1 /*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * %sccs.include.redist.c%
11 *
12 * @(#)ufs_disksubr.c 8.5 (Berkeley) 01/21/94
13 */
14
15 #include <sys/param.h>
16 #include <sys/systm.h>
17 #include <sys/buf.h>
18 #include <sys/disklabel.h>
19 #include <sys/syslog.h>
20
21 /*
22 * Seek sort for disks. We depend on the driver which calls us using b_resid
23 * as the current cylinder number.
24 *
25 * The argument ap structure holds a b_actf activity chain pointer on which we
26 * keep two queues, sorted in ascending cylinder order. The first queue holds
27 * those requests which are positioned after the current cylinder (in the first
28 * request); the second holds requests which came in after their cylinder number
29 * was passed. Thus we implement a one way scan, retracting after reaching the
30 * end of the drive to the first request on the second queue, at which time it
31 * becomes the first queue.
32 *
33 * A one-way scan is natural because of the way UNIX read-ahead blocks are
34 * allocated.
35 */
36
37 /*
38 * For portability with historic industry practice, the
39 * cylinder number has to be maintained in the `b_resid'
40 * field.
41 */
42 #define b_cylinder b_resid
43
44 void
disksort(ap,bp)45 disksort(ap, bp)
46 register struct buf *ap, *bp;
47 {
48 register struct buf *bq;
49
50 /* If the queue is empty, then it's easy. */
51 if (ap->b_actf == NULL) {
52 bp->b_actf = NULL;
53 ap->b_actf = bp;
54 return;
55 }
56
57 /*
58 * If we lie after the first (currently active) request, then we
59 * must locate the second request list and add ourselves to it.
60 */
61 bq = ap->b_actf;
62 if (bp->b_cylinder < bq->b_cylinder) {
63 while (bq->b_actf) {
64 /*
65 * Check for an ``inversion'' in the normally ascending
66 * cylinder numbers, indicating the start of the second
67 * request list.
68 */
69 if (bq->b_actf->b_cylinder < bq->b_cylinder) {
70 /*
71 * Search the second request list for the first
72 * request at a larger cylinder number. We go
73 * before that; if there is no such request, we
74 * go at end.
75 */
76 do {
77 if (bp->b_cylinder <
78 bq->b_actf->b_cylinder)
79 goto insert;
80 if (bp->b_cylinder ==
81 bq->b_actf->b_cylinder &&
82 bp->b_blkno < bq->b_actf->b_blkno)
83 goto insert;
84 bq = bq->b_actf;
85 } while (bq->b_actf);
86 goto insert; /* after last */
87 }
88 bq = bq->b_actf;
89 }
90 /*
91 * No inversions... we will go after the last, and
92 * be the first request in the second request list.
93 */
94 goto insert;
95 }
96 /*
97 * Request is at/after the current request...
98 * sort in the first request list.
99 */
100 while (bq->b_actf) {
101 /*
102 * We want to go after the current request if there is an
103 * inversion after it (i.e. it is the end of the first
104 * request list), or if the next request is a larger cylinder
105 * than our request.
106 */
107 if (bq->b_actf->b_cylinder < bq->b_cylinder ||
108 bp->b_cylinder < bq->b_actf->b_cylinder ||
109 (bp->b_cylinder == bq->b_actf->b_cylinder &&
110 bp->b_blkno < bq->b_actf->b_blkno))
111 goto insert;
112 bq = bq->b_actf;
113 }
114 /*
115 * Neither a second list nor a larger request... we go at the end of
116 * the first list, which is the same as the end of the whole schebang.
117 */
118 insert: bp->b_actf = bq->b_actf;
119 bq->b_actf = bp;
120 }
121
122 /*
123 * Attempt to read a disk label from a device using the indicated stategy
124 * routine. The label must be partly set up before this: secpercyl and
125 * anything required in the strategy routine (e.g., sector size) must be
126 * filled in before calling us. Returns NULL on success and an error
127 * string on failure.
128 */
129 char *
readdisklabel(dev,strat,lp)130 readdisklabel(dev, strat, lp)
131 dev_t dev;
132 int (*strat)();
133 register struct disklabel *lp;
134 {
135 register struct buf *bp;
136 struct disklabel *dlp;
137 char *msg = NULL;
138
139 if (lp->d_secperunit == 0)
140 lp->d_secperunit = 0x1fffffff;
141 lp->d_npartitions = 1;
142 if (lp->d_partitions[0].p_size == 0)
143 lp->d_partitions[0].p_size = 0x1fffffff;
144 lp->d_partitions[0].p_offset = 0;
145
146 bp = geteblk((int)lp->d_secsize);
147 bp->b_dev = dev;
148 bp->b_blkno = LABELSECTOR;
149 bp->b_bcount = lp->d_secsize;
150 bp->b_flags = B_BUSY | B_READ;
151 bp->b_cylinder = LABELSECTOR / lp->d_secpercyl;
152 (*strat)(bp);
153 if (biowait(bp))
154 msg = "I/O error";
155 else for (dlp = (struct disklabel *)bp->b_data;
156 dlp <= (struct disklabel *)((char *)bp->b_data +
157 DEV_BSIZE - sizeof(*dlp));
158 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
159 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
160 if (msg == NULL)
161 msg = "no disk label";
162 } else if (dlp->d_npartitions > MAXPARTITIONS ||
163 dkcksum(dlp) != 0)
164 msg = "disk label corrupted";
165 else {
166 *lp = *dlp;
167 msg = NULL;
168 break;
169 }
170 }
171 bp->b_flags = B_INVAL | B_AGE;
172 brelse(bp);
173 return (msg);
174 }
175
176 /*
177 * Check new disk label for sensibility before setting it.
178 */
179 int
setdisklabel(olp,nlp,openmask)180 setdisklabel(olp, nlp, openmask)
181 register struct disklabel *olp, *nlp;
182 u_long openmask;
183 {
184 register i;
185 register struct partition *opp, *npp;
186
187 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
188 dkcksum(nlp) != 0)
189 return (EINVAL);
190 while ((i = ffs((long)openmask)) != 0) {
191 i--;
192 openmask &= ~(1 << i);
193 if (nlp->d_npartitions <= i)
194 return (EBUSY);
195 opp = &olp->d_partitions[i];
196 npp = &nlp->d_partitions[i];
197 if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
198 return (EBUSY);
199 /*
200 * Copy internally-set partition information
201 * if new label doesn't include it. XXX
202 */
203 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
204 npp->p_fstype = opp->p_fstype;
205 npp->p_fsize = opp->p_fsize;
206 npp->p_frag = opp->p_frag;
207 npp->p_cpg = opp->p_cpg;
208 }
209 }
210 nlp->d_checksum = 0;
211 nlp->d_checksum = dkcksum(nlp);
212 *olp = *nlp;
213 return (0);
214 }
215
216 /* encoding of disk minor numbers, should be elsewhere... */
217 #define dkunit(dev) (minor(dev) >> 3)
218 #define dkpart(dev) (minor(dev) & 07)
219 #define dkminor(unit, part) (((unit) << 3) | (part))
220
221 /*
222 * Write disk label back to device after modification.
223 */
224 int
writedisklabel(dev,strat,lp)225 writedisklabel(dev, strat, lp)
226 dev_t dev;
227 int (*strat)();
228 register struct disklabel *lp;
229 {
230 struct buf *bp;
231 struct disklabel *dlp;
232 int labelpart;
233 int error = 0;
234
235 labelpart = dkpart(dev);
236 if (lp->d_partitions[labelpart].p_offset != 0) {
237 if (lp->d_partitions[0].p_offset != 0)
238 return (EXDEV); /* not quite right */
239 labelpart = 0;
240 }
241 bp = geteblk((int)lp->d_secsize);
242 bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
243 bp->b_blkno = LABELSECTOR;
244 bp->b_bcount = lp->d_secsize;
245 bp->b_flags = B_READ;
246 (*strat)(bp);
247 if (error = biowait(bp))
248 goto done;
249 for (dlp = (struct disklabel *)bp->b_data;
250 dlp <= (struct disklabel *)
251 ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
252 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
253 if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
254 dkcksum(dlp) == 0) {
255 *dlp = *lp;
256 bp->b_flags = B_WRITE;
257 (*strat)(bp);
258 error = biowait(bp);
259 goto done;
260 }
261 }
262 error = ESRCH;
263 done:
264 brelse(bp);
265 return (error);
266 }
267
268 /*
269 * Compute checksum for disk label.
270 */
dkcksum(lp)271 dkcksum(lp)
272 register struct disklabel *lp;
273 {
274 register u_short *start, *end;
275 register u_short sum = 0;
276
277 start = (u_short *)lp;
278 end = (u_short *)&lp->d_partitions[lp->d_npartitions];
279 while (start < end)
280 sum ^= *start++;
281 return (sum);
282 }
283
284 /*
285 * Disk error is the preface to plaintive error messages
286 * about failing disk transfers. It prints messages of the form
287
288 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
289
290 * if the offset of the error in the transfer and a disk label
291 * are both available. blkdone should be -1 if the position of the error
292 * is unknown; the disklabel pointer may be null from drivers that have not
293 * been converted to use them. The message is printed with printf
294 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
295 * The message should be completed (with at least a newline) with printf
296 * or addlog, respectively. There is no trailing space.
297 */
298 void
diskerr(bp,dname,what,pri,blkdone,lp)299 diskerr(bp, dname, what, pri, blkdone, lp)
300 register struct buf *bp;
301 char *dname, *what;
302 int pri, blkdone;
303 register struct disklabel *lp;
304 {
305 int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
306 register void (*pr) __P((const char *, ...));
307 char partname = 'a' + part;
308 int sn;
309
310 if (pri != LOG_PRINTF) {
311 log(pri, "");
312 pr = addlog;
313 } else
314 pr = printf;
315 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
316 bp->b_flags & B_READ ? "read" : "writ");
317 sn = bp->b_blkno;
318 if (bp->b_bcount <= DEV_BSIZE)
319 (*pr)("%d", sn);
320 else {
321 if (blkdone >= 0) {
322 sn += blkdone;
323 (*pr)("%d of ", sn);
324 }
325 (*pr)("%d-%d", bp->b_blkno,
326 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
327 }
328 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
329 #ifdef tahoe
330 sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
331 #endif
332 sn += lp->d_partitions[part].p_offset;
333 (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
334 sn / lp->d_secpercyl);
335 sn %= lp->d_secpercyl;
336 (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
337 }
338 }
339