xref: /minix3/minix/servers/vfs/read.c (revision b80da2a01d0bb632707b7b4e974aa32eaebbcc6f)
1 /* This file contains the heart of the mechanism used to read (and write)
2  * files.  Read and write requests are split up into chunks that do not cross
3  * block boundaries.  Each chunk is then processed in turn.  Reads on special
4  * files are also detected and handled.
5  *
6  * The entry points into this file are
7  *   do_read:	 perform the READ system call by calling read_write
8  *   do_getdents: read entries from a directory (GETDENTS)
9  *   read_write: actually do the work of READ and WRITE
10  *
11  */
12 
13 #include "fs.h"
14 #include <minix/callnr.h>
15 #include <minix/com.h>
16 #include <minix/u64.h>
17 #include <minix/vfsif.h>
18 #include <assert.h>
19 #include <sys/dirent.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include "file.h"
23 #include "vnode.h"
24 #include "vmnt.h"
25 
26 
27 /*===========================================================================*
28  *				do_read					     *
29  *===========================================================================*/
30 int do_read(void)
31 {
32   return(do_read_write_peek(READING, job_m_in.m_lc_vfs_readwrite.fd,
33           job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len));
34 }
35 
36 
37 /*===========================================================================*
38  *				lock_bsf				     *
39  *===========================================================================*/
40 void lock_bsf(void)
41 {
42   struct worker_thread *org_self;
43 
44   if (mutex_trylock(&bsf_lock) == 0)
45 	return;
46 
47   org_self = worker_suspend();
48 
49   if (mutex_lock(&bsf_lock) != 0)
50 	panic("unable to lock block special file lock");
51 
52   worker_resume(org_self);
53 }
54 
55 /*===========================================================================*
56  *				unlock_bsf				     *
57  *===========================================================================*/
58 void unlock_bsf(void)
59 {
60   if (mutex_unlock(&bsf_lock) != 0)
61 	panic("failed to unlock block special file lock");
62 }
63 
64 /*===========================================================================*
65  *				check_bsf				     *
66  *===========================================================================*/
67 void check_bsf_lock(void)
68 {
69 	int r = mutex_trylock(&bsf_lock);
70 
71 	if (r == -EBUSY)
72 		panic("bsf_lock locked");
73 	else if (r != 0)
74 		panic("bsf_lock weird state");
75 
76 	/* r == 0 */
77 	unlock_bsf();
78 }
79 
80 /*===========================================================================*
81  *				actual_read_write_peek			     *
82  *===========================================================================*/
83 int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd,
84 	vir_bytes io_buf, size_t io_nbytes)
85 {
86 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
87   struct filp *f;
88   tll_access_t locktype;
89   int r;
90   int ro = 1;
91 
92   if(rw_flag == WRITING) ro = 0;
93 
94   rfp->fp_fd = io_fd;
95   rfp->fp_io_buffer = io_buf;
96   rfp->fp_io_nbytes = io_nbytes;
97 
98   locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ;
99   if ((f = get_filp2(rfp, rfp->fp_fd, locktype)) == NULL)
100 	return(err_code);
101 
102   assert(f->filp_count > 0);
103 
104   if (((f->filp_mode) & (ro ? R_BIT : W_BIT)) == 0) {
105 	unlock_filp(f);
106 	return(EBADF);
107   }
108   if (rfp->fp_io_nbytes == 0) {
109 	unlock_filp(f);
110 	return(0);	/* so char special files need not check for 0*/
111   }
112 
113   r = read_write(rfp, rw_flag, f, rfp->fp_io_buffer, rfp->fp_io_nbytes, who_e);
114 
115   unlock_filp(f);
116   return(r);
117 }
118 
119 /*===========================================================================*
120  *				do_read_write_peek			     *
121  *===========================================================================*/
122 int do_read_write_peek(int rw_flag, int io_fd, vir_bytes io_buf, size_t io_nbytes)
123 {
124 	return actual_read_write_peek(fp, rw_flag, io_fd, io_buf, io_nbytes);
125 }
126 
127 /*===========================================================================*
128  *				read_write				     *
129  *===========================================================================*/
130 int read_write(struct fproc *rfp, int rw_flag, struct filp *f,
131 	vir_bytes buf, size_t size, endpoint_t for_e)
132 {
133   register struct vnode *vp;
134   off_t position, res_pos;
135   size_t cum_io, res_cum_io;
136   size_t cum_io_incr;
137   int op, r;
138   dev_t dev;
139 
140   position = f->filp_pos;
141   vp = f->filp_vno;
142   r = OK;
143   cum_io = 0;
144 
145   assert(rw_flag == READING || rw_flag == WRITING || rw_flag == PEEKING);
146 
147   if (size > SSIZE_MAX) return(EINVAL);
148 
149   op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE);
150 
151   if (S_ISFIFO(vp->v_mode)) {		/* Pipes */
152 	if (rfp->fp_cum_io_partial != 0) {
153 		panic("VFS: read_write: fp_cum_io_partial not clear");
154 	}
155 	if(rw_flag == PEEKING) {
156 	  	printf("read_write: peek on pipe makes no sense\n");
157 		return EINVAL;
158 	}
159 	r = rw_pipe(rw_flag, for_e, f, buf, size);
160   } else if (S_ISCHR(vp->v_mode)) {	/* Character special files. */
161 	if(rw_flag == PEEKING) {
162 	  	printf("read_write: peek on char device makes no sense\n");
163 		return EINVAL;
164 	}
165 
166 	if (vp->v_sdev == NO_DEV)
167 		panic("VFS: read_write tries to access char dev NO_DEV");
168 
169 	dev = vp->v_sdev;
170 
171 	r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags);
172 	if (r >= 0) {
173 		/* This should no longer happen: all calls are asynchronous. */
174 		printf("VFS: I/O to device %llx succeeded immediately!?\n", dev);
175 		cum_io = r;
176 		position += r;
177 		r = OK;
178 	} else if (r == SUSPEND) {
179 		/* FIXME: multiple read/write operations on a single filp
180 		 * should be serialized. They currently aren't; in order to
181 		 * achieve a similar effect, we optimistically advance the file
182 		 * position here. This works under the following assumptions:
183 		 * - character drivers that use the seek position at all,
184 		 *   expose a view of a statically-sized range of bytes, i.e.,
185 		 *   they are basically byte-granular block devices;
186 		 * - if short I/O or an error is returned, all subsequent calls
187 		 *   will return (respectively) EOF and an error;
188 		 * - the application never checks its own file seek position,
189 		 *   or does not care that it may end up having seeked beyond
190 		 *   the number of bytes it has actually read;
191 		 * - communication to the character driver is FIFO (this one
192 		 *   is actually true! whew).
193 		 * Many improvements are possible here, but in the end,
194 		 * anything short of queuing concurrent operations will be
195 		 * suboptimal - so we settle for this hack for now.
196 		 */
197 		position += size;
198 	}
199   } else if (S_ISBLK(vp->v_mode)) {	/* Block special files. */
200 	if (vp->v_sdev == NO_DEV)
201 		panic("VFS: read_write tries to access block dev NO_DEV");
202 
203 	lock_bsf();
204 
205 	if(rw_flag == PEEKING) {
206 		r = req_bpeek(vp->v_bfs_e, vp->v_sdev, position, size);
207 	} else {
208 		r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, position,
209 		       size, buf, rw_flag, &res_pos, &res_cum_io);
210 		if (r == OK) {
211 			position = res_pos;
212 			cum_io += res_cum_io;
213 		}
214 	}
215 
216 	unlock_bsf();
217   } else {				/* Regular files */
218 	if (rw_flag == WRITING) {
219 		/* Check for O_APPEND flag. */
220 		if (f->filp_flags & O_APPEND) position = vp->v_size;
221 	}
222 
223 	/* Issue request */
224 	if(rw_flag == PEEKING) {
225 		r = req_peek(vp->v_fs_e, vp->v_inode_nr, position, size);
226 	} else {
227 		off_t new_pos;
228 		r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position,
229 			rw_flag, for_e, buf, size, &new_pos,
230 			&cum_io_incr);
231 
232 		if (r >= 0) {
233 			position = new_pos;
234 			cum_io += cum_io_incr;
235 		}
236         }
237   }
238 
239   /* On write, update file size and access time. */
240   if (rw_flag == WRITING) {
241 	if (S_ISREG(vp->v_mode) || S_ISDIR(vp->v_mode)) {
242 		if (position > vp->v_size) {
243 			vp->v_size = position;
244 		}
245 	}
246   }
247 
248   f->filp_pos = position;
249 
250   if (r == EPIPE && rw_flag == WRITING) {
251 	/* Process is writing, but there is no reader. Tell the kernel to
252 	 * generate a SIGPIPE signal.
253 	 */
254 	if (!(f->filp_flags & O_NOSIGPIPE)) {
255 		sys_kill(rfp->fp_endpoint, SIGPIPE);
256 	}
257   }
258 
259   if (r == OK) {
260 	return(cum_io);
261   }
262   return(r);
263 }
264 
265 /*===========================================================================*
266  *				do_getdents				     *
267  *===========================================================================*/
268 int do_getdents(void)
269 {
270 /* Perform the getdents(fd, buf, size) system call. */
271   int r = OK;
272   off_t new_pos;
273   register struct filp *rfilp;
274 
275   fp->fp_fd = job_m_in.m_lc_vfs_readwrite.fd;
276   fp->fp_io_buffer = job_m_in.m_lc_vfs_readwrite.buf;
277   fp->fp_io_nbytes = job_m_in.m_lc_vfs_readwrite.len;
278 
279   /* Is the file descriptor valid? */
280   if ( (rfilp = get_filp(fp->fp_fd, VNODE_READ)) == NULL)
281 	return(err_code);
282 
283   if (!(rfilp->filp_mode & R_BIT))
284 	r = EBADF;
285   else if (!S_ISDIR(rfilp->filp_vno->v_mode))
286 	r = EBADF;
287 
288   if (r == OK) {
289 	r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
290 			 rfilp->filp_pos, fp->fp_io_buffer, fp->fp_io_nbytes,
291 			 &new_pos, 0);
292 
293 	if (r > 0) rfilp->filp_pos = new_pos;
294   }
295 
296   unlock_filp(rfilp);
297   return(r);
298 }
299 
300 
301 /*===========================================================================*
302  *				rw_pipe					     *
303  *===========================================================================*/
304 int rw_pipe(rw_flag, usr_e, f, buf, req_size)
305 int rw_flag;			/* READING or WRITING */
306 endpoint_t usr_e;
307 struct filp *f;
308 vir_bytes buf;
309 size_t req_size;
310 {
311   int r, oflags, partial_pipe = 0;
312   size_t size, cum_io;
313   size_t cum_io_incr;
314   struct vnode *vp;
315   off_t  position, new_pos;
316 
317   /* Must make sure we're operating on locked filp and vnode */
318   assert(tll_locked_by_me(&f->filp_vno->v_lock));
319   assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
320 
321   oflags = f->filp_flags;
322   vp = f->filp_vno;
323   position = 0;	/* Not actually used */
324 
325   assert(rw_flag == READING || rw_flag == WRITING);
326 
327   /* fp->fp_cum_io_partial is only nonzero when doing partial writes.
328    * We clear the field immediately here because we expect completion or error;
329    * its value must be (re)assigned if we end up suspending the write (again).
330    */
331   cum_io = fp->fp_cum_io_partial;
332   fp->fp_cum_io_partial = 0;
333 
334   r = pipe_check(f, rw_flag, oflags, req_size, 0);
335   if (r <= 0) {
336 	if (r == SUSPEND) {
337 		fp->fp_cum_io_partial = cum_io;
338 		pipe_suspend(f, buf, req_size);
339 	}
340 	/* If pipe_check returns an error instead of suspending the call, we
341 	 * return that error, even if we are resuming a partially completed
342 	 * operation (ie, a large blocking write), to match NetBSD's behavior.
343 	 */
344 	return(r);
345   }
346 
347   size = r;
348   if (size < req_size) partial_pipe = 1;
349 
350   /* Truncate read request at size. */
351   if (rw_flag == READING && size > vp->v_size) {
352 	size = vp->v_size;
353   }
354 
355   if (vp->v_mapfs_e == 0)
356 	panic("unmapped pipe");
357 
358   r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e,
359 		    buf, size, &new_pos, &cum_io_incr);
360 
361   if (r != OK) {
362 	assert(r != SUSPEND);
363 	return(r);
364   }
365 
366   cum_io += cum_io_incr;
367   buf += cum_io_incr;
368   req_size -= cum_io_incr;
369 
370   if (rw_flag == READING)
371 	vp->v_size -= cum_io_incr;
372   else
373 	vp->v_size += cum_io_incr;
374 
375   if (partial_pipe) {
376 	/* partial write on pipe with */
377 	/* O_NONBLOCK, return write count */
378 	if (!(oflags & O_NONBLOCK)) {
379 		/* partial write on pipe with req_size > PIPE_BUF,
380 		 * non-atomic
381 		 */
382 		fp->fp_cum_io_partial = cum_io;
383 		pipe_suspend(f, buf, req_size);
384 		return(SUSPEND);
385 	}
386   }
387 
388   assert(fp->fp_cum_io_partial == 0);
389 
390   return(cum_io);
391 }
392