xref: /minix3/minix/servers/vfs/read.c (revision dd41186aac5f9c05e657f127b7e5d33f375d1686)
1 /* This file contains the heart of the mechanism used to read (and write)
2  * files.  Read and write requests are split up into chunks that do not cross
3  * block boundaries.  Each chunk is then processed in turn.  Reads on special
4  * files are also detected and handled.
5  *
6  * The entry points into this file are
7  *   do_read:	 perform the READ system call by calling read_write
8  *   do_getdents: read entries from a directory (GETDENTS)
9  *   read_write: actually do the work of READ and WRITE
10  *
11  */
12 
13 #include "fs.h"
14 #include <minix/callnr.h>
15 #include <minix/com.h>
16 #include <minix/u64.h>
17 #include <minix/vfsif.h>
18 #include <assert.h>
19 #include <sys/dirent.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include "file.h"
23 #include "vnode.h"
24 #include "vmnt.h"
25 
26 
27 /*===========================================================================*
28  *				do_read					     *
29  *===========================================================================*/
30 int do_read(void)
31 {
32 
33   /*
34    * This field is currently reserved for internal usage only, and must be set
35    * to zero by the caller.  We may use it for future SA_RESTART support just
36    * like we are using it internally now.
37    */
38   if (job_m_in.m_lc_vfs_readwrite.cum_io != 0)
39 	return(EINVAL);
40 
41   return(do_read_write_peek(READING, job_m_in.m_lc_vfs_readwrite.fd,
42 	job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len));
43 }
44 
45 
46 /*===========================================================================*
47  *				lock_bsf				     *
48  *===========================================================================*/
49 void lock_bsf(void)
50 {
51   struct worker_thread *org_self;
52 
53   if (mutex_trylock(&bsf_lock) == 0)
54 	return;
55 
56   org_self = worker_suspend();
57 
58   if (mutex_lock(&bsf_lock) != 0)
59 	panic("unable to lock block special file lock");
60 
61   worker_resume(org_self);
62 }
63 
64 /*===========================================================================*
65  *				unlock_bsf				     *
66  *===========================================================================*/
67 void unlock_bsf(void)
68 {
69   if (mutex_unlock(&bsf_lock) != 0)
70 	panic("failed to unlock block special file lock");
71 }
72 
73 /*===========================================================================*
74  *				check_bsf				     *
75  *===========================================================================*/
76 void check_bsf_lock(void)
77 {
78 	int r = mutex_trylock(&bsf_lock);
79 
80 	if (r == -EBUSY)
81 		panic("bsf_lock locked");
82 	else if (r != 0)
83 		panic("bsf_lock weird state");
84 
85 	/* r == 0 */
86 	unlock_bsf();
87 }
88 
89 /*===========================================================================*
90  *				actual_read_write_peek			     *
91  *===========================================================================*/
92 int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd,
93 	vir_bytes buf, size_t nbytes)
94 {
95 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
96   struct filp *f;
97   tll_access_t locktype;
98   int r;
99   int ro = 1;
100 
101   if(rw_flag == WRITING) ro = 0;
102 
103   locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ;
104   if ((f = get_filp2(rfp, fd, locktype)) == NULL)
105 	return(err_code);
106 
107   assert(f->filp_count > 0);
108 
109   if (((f->filp_mode) & (ro ? R_BIT : W_BIT)) == 0) {
110 	unlock_filp(f);
111 	return(EBADF);
112   }
113   if (nbytes == 0) {
114 	unlock_filp(f);
115 	return(0);	/* so char special files need not check for 0*/
116   }
117 
118   r = read_write(rfp, rw_flag, fd, f, buf, nbytes, who_e);
119 
120   unlock_filp(f);
121   return(r);
122 }
123 
124 /*===========================================================================*
125  *				do_read_write_peek			     *
126  *===========================================================================*/
127 int do_read_write_peek(int rw_flag, int fd, vir_bytes buf, size_t nbytes)
128 {
129 	return actual_read_write_peek(fp, rw_flag, fd, buf, nbytes);
130 }
131 
132 /*===========================================================================*
133  *				read_write				     *
134  *===========================================================================*/
135 int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f,
136 	vir_bytes buf, size_t size, endpoint_t for_e)
137 {
138   register struct vnode *vp;
139   off_t position, res_pos;
140   size_t cum_io, res_cum_io;
141   size_t cum_io_incr;
142   int op, r;
143   dev_t dev;
144 
145   position = f->filp_pos;
146   vp = f->filp_vno;
147   r = OK;
148   cum_io = 0;
149 
150   assert(rw_flag == READING || rw_flag == WRITING || rw_flag == PEEKING);
151 
152   if (size > SSIZE_MAX) return(EINVAL);
153 
154   if (S_ISFIFO(vp->v_mode)) {		/* Pipes */
155 	if(rw_flag == PEEKING) {
156 	  	printf("read_write: peek on pipe makes no sense\n");
157 		return EINVAL;
158 	}
159 	assert(fd != -1);
160 	op = (rw_flag == READING ? VFS_READ : VFS_WRITE);
161 	r = rw_pipe(rw_flag, for_e, f, op, fd, buf, size, 0 /*cum_io*/);
162   } else if (S_ISCHR(vp->v_mode)) {	/* Character special files. */
163 	if(rw_flag == PEEKING) {
164 	  	printf("read_write: peek on char device makes no sense\n");
165 		return EINVAL;
166 	}
167 
168 	if (vp->v_sdev == NO_DEV)
169 		panic("VFS: read_write tries to access char dev NO_DEV");
170 
171 	dev = vp->v_sdev;
172 	op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE);
173 
174 	r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags);
175 	if (r >= 0) {
176 		/* This should no longer happen: all calls are asynchronous. */
177 		printf("VFS: I/O to device %llx succeeded immediately!?\n", dev);
178 		cum_io = r;
179 		position += r;
180 		r = OK;
181 	} else if (r == SUSPEND) {
182 		/* FIXME: multiple read/write operations on a single filp
183 		 * should be serialized. They currently aren't; in order to
184 		 * achieve a similar effect, we optimistically advance the file
185 		 * position here. This works under the following assumptions:
186 		 * - character drivers that use the seek position at all,
187 		 *   expose a view of a statically-sized range of bytes, i.e.,
188 		 *   they are basically byte-granular block devices;
189 		 * - if short I/O or an error is returned, all subsequent calls
190 		 *   will return (respectively) EOF and an error;
191 		 * - the application never checks its own file seek position,
192 		 *   or does not care that it may end up having seeked beyond
193 		 *   the number of bytes it has actually read;
194 		 * - communication to the character driver is FIFO (this one
195 		 *   is actually true! whew).
196 		 * Many improvements are possible here, but in the end,
197 		 * anything short of queuing concurrent operations will be
198 		 * suboptimal - so we settle for this hack for now.
199 		 */
200 		position += size;
201 	}
202   } else if (S_ISBLK(vp->v_mode)) {	/* Block special files. */
203 	if (vp->v_sdev == NO_DEV)
204 		panic("VFS: read_write tries to access block dev NO_DEV");
205 
206 	lock_bsf();
207 
208 	if(rw_flag == PEEKING) {
209 		r = req_bpeek(vp->v_bfs_e, vp->v_sdev, position, size);
210 	} else {
211 		r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, position,
212 		       size, buf, rw_flag, &res_pos, &res_cum_io);
213 		if (r == OK) {
214 			position = res_pos;
215 			cum_io += res_cum_io;
216 		}
217 	}
218 
219 	unlock_bsf();
220   } else {				/* Regular files */
221 	if (rw_flag == WRITING) {
222 		/* Check for O_APPEND flag. */
223 		if (f->filp_flags & O_APPEND) position = vp->v_size;
224 	}
225 
226 	/* Issue request */
227 	if(rw_flag == PEEKING) {
228 		r = req_peek(vp->v_fs_e, vp->v_inode_nr, position, size);
229 	} else {
230 		off_t new_pos;
231 		r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position,
232 			rw_flag, for_e, buf, size, &new_pos,
233 			&cum_io_incr);
234 
235 		if (r >= 0) {
236 			position = new_pos;
237 			cum_io += cum_io_incr;
238 		}
239         }
240   }
241 
242   /* On write, update file size and access time. */
243   if (rw_flag == WRITING) {
244 	if (S_ISREG(vp->v_mode) || S_ISDIR(vp->v_mode)) {
245 		if (position > vp->v_size) {
246 			vp->v_size = position;
247 		}
248 	}
249   }
250 
251   f->filp_pos = position;
252 
253   if (r == EPIPE && rw_flag == WRITING) {
254 	/* Process is writing, but there is no reader. Tell the kernel to
255 	 * generate a SIGPIPE signal.
256 	 */
257 	if (!(f->filp_flags & O_NOSIGPIPE)) {
258 		sys_kill(rfp->fp_endpoint, SIGPIPE);
259 	}
260   }
261 
262   if (r == OK) {
263 	return(cum_io);
264   }
265   return(r);
266 }
267 
268 /*===========================================================================*
269  *				do_getdents				     *
270  *===========================================================================*/
271 int do_getdents(void)
272 {
273 /* Perform the getdents(fd, buf, size) system call. */
274   int fd, r = OK;
275   off_t new_pos;
276   vir_bytes buf;
277   size_t size;
278   register struct filp *rfilp;
279 
280   /* This field must always be set to zero for getdents(). */
281   if (job_m_in.m_lc_vfs_readwrite.cum_io != 0)
282 	return(EINVAL);
283 
284   fd = job_m_in.m_lc_vfs_readwrite.fd;
285   buf = job_m_in.m_lc_vfs_readwrite.buf;
286   size = job_m_in.m_lc_vfs_readwrite.len;
287 
288   /* Is the file descriptor valid? */
289   if ( (rfilp = get_filp(fd, VNODE_READ)) == NULL)
290 	return(err_code);
291 
292   if (!(rfilp->filp_mode & R_BIT))
293 	r = EBADF;
294   else if (!S_ISDIR(rfilp->filp_vno->v_mode))
295 	r = EBADF;
296 
297   if (r == OK) {
298 	r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
299 	    rfilp->filp_pos, buf, size, &new_pos, 0);
300 
301 	if (r > 0) rfilp->filp_pos = new_pos;
302   }
303 
304   unlock_filp(rfilp);
305   return(r);
306 }
307 
308 
309 /*===========================================================================*
310  *				rw_pipe					     *
311  *===========================================================================*/
312 int rw_pipe(int rw_flag, endpoint_t usr_e, struct filp *f, int callnr, int fd,
313 	vir_bytes buf, size_t nbytes, size_t cum_io)
314 {
315   int r, oflags, partial_pipe = FALSE;
316   size_t size;
317   size_t cum_io_incr;
318   struct vnode *vp;
319   off_t  position, new_pos;
320 
321   /* Must make sure we're operating on locked filp and vnode */
322   assert(tll_locked_by_me(&f->filp_vno->v_lock));
323   assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
324 
325   oflags = f->filp_flags;
326   vp = f->filp_vno;
327   position = 0;	/* Not actually used */
328 
329   assert(rw_flag == READING || rw_flag == WRITING);
330 
331   r = pipe_check(f, rw_flag, oflags, nbytes, 0);
332   if (r <= 0) {
333 	if (r == SUSPEND)
334 		pipe_suspend(callnr, fd, buf, nbytes, cum_io);
335 
336 	/* If pipe_check returns an error instead of suspending the call, we
337 	 * return that error, even if we are resuming a partially completed
338 	 * operation (ie, a large blocking write), to match NetBSD's behavior.
339 	 */
340 	return(r);
341   }
342 
343   size = r;
344   if (size < nbytes) partial_pipe = TRUE;
345 
346   /* Truncate read request at size. */
347   if (rw_flag == READING && size > vp->v_size) {
348 	size = vp->v_size;
349   }
350 
351   if (vp->v_mapfs_e == 0)
352 	panic("unmapped pipe");
353 
354   r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e,
355 		    buf, size, &new_pos, &cum_io_incr);
356 
357   if (r != OK) {
358 	assert(r != SUSPEND);
359 	return(r);
360   }
361 
362   cum_io += cum_io_incr;
363   buf += cum_io_incr;
364   nbytes -= cum_io_incr;
365 
366   if (rw_flag == READING)
367 	vp->v_size -= cum_io_incr;
368   else
369 	vp->v_size += cum_io_incr;
370 
371   if (partial_pipe) {
372 	/* partial write on pipe with */
373 	/* O_NONBLOCK, return write count */
374 	if (!(oflags & O_NONBLOCK)) {
375 		/* partial write on pipe with nbytes > PIPE_BUF, non-atomic */
376 		pipe_suspend(callnr, fd, buf, nbytes, cum_io);
377 		return(SUSPEND);
378 	}
379   }
380 
381   return(cum_io);
382 }
383