1 /* This file contains the heart of the mechanism used to read (and write) 2 * files. Read and write requests are split up into chunks that do not cross 3 * block boundaries. Each chunk is then processed in turn. Reads on special 4 * files are also detected and handled. 5 * 6 * The entry points into this file are 7 * do_read: perform the READ system call by calling read_write 8 * do_getdents: read entries from a directory (GETDENTS) 9 * read_write: actually do the work of READ and WRITE 10 * 11 */ 12 13 #include "fs.h" 14 #include <minix/callnr.h> 15 #include <minix/com.h> 16 #include <minix/u64.h> 17 #include <minix/vfsif.h> 18 #include <assert.h> 19 #include <sys/dirent.h> 20 #include <fcntl.h> 21 #include <unistd.h> 22 #include "file.h" 23 #include "vnode.h" 24 #include "vmnt.h" 25 26 27 /*===========================================================================* 28 * do_read * 29 *===========================================================================*/ 30 int do_read(void) 31 { 32 33 /* 34 * This field is currently reserved for internal usage only, and must be set 35 * to zero by the caller. We may use it for future SA_RESTART support just 36 * like we are using it internally now. 37 */ 38 if (job_m_in.m_lc_vfs_readwrite.cum_io != 0) 39 return(EINVAL); 40 41 return(do_read_write_peek(READING, job_m_in.m_lc_vfs_readwrite.fd, 42 job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len)); 43 } 44 45 46 /*===========================================================================* 47 * lock_bsf * 48 *===========================================================================*/ 49 void lock_bsf(void) 50 { 51 struct worker_thread *org_self; 52 53 if (mutex_trylock(&bsf_lock) == 0) 54 return; 55 56 org_self = worker_suspend(); 57 58 if (mutex_lock(&bsf_lock) != 0) 59 panic("unable to lock block special file lock"); 60 61 worker_resume(org_self); 62 } 63 64 /*===========================================================================* 65 * unlock_bsf * 66 *===========================================================================*/ 67 void unlock_bsf(void) 68 { 69 if (mutex_unlock(&bsf_lock) != 0) 70 panic("failed to unlock block special file lock"); 71 } 72 73 /*===========================================================================* 74 * check_bsf * 75 *===========================================================================*/ 76 void check_bsf_lock(void) 77 { 78 int r = mutex_trylock(&bsf_lock); 79 80 if (r == -EBUSY) 81 panic("bsf_lock locked"); 82 else if (r != 0) 83 panic("bsf_lock weird state"); 84 85 /* r == 0 */ 86 unlock_bsf(); 87 } 88 89 /*===========================================================================* 90 * actual_read_write_peek * 91 *===========================================================================*/ 92 int actual_read_write_peek(struct fproc *rfp, int rw_flag, int fd, 93 vir_bytes buf, size_t nbytes) 94 { 95 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */ 96 struct filp *f; 97 tll_access_t locktype; 98 int r; 99 int ro = 1; 100 101 if(rw_flag == WRITING) ro = 0; 102 103 locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ; 104 if ((f = get_filp2(rfp, fd, locktype)) == NULL) 105 return(err_code); 106 107 assert(f->filp_count > 0); 108 109 if (((f->filp_mode) & (ro ? R_BIT : W_BIT)) == 0) { 110 unlock_filp(f); 111 return(EBADF); 112 } 113 if (nbytes == 0) { 114 unlock_filp(f); 115 return(0); /* so char special files need not check for 0*/ 116 } 117 118 r = read_write(rfp, rw_flag, fd, f, buf, nbytes, who_e); 119 120 unlock_filp(f); 121 return(r); 122 } 123 124 /*===========================================================================* 125 * do_read_write_peek * 126 *===========================================================================*/ 127 int do_read_write_peek(int rw_flag, int fd, vir_bytes buf, size_t nbytes) 128 { 129 return actual_read_write_peek(fp, rw_flag, fd, buf, nbytes); 130 } 131 132 /*===========================================================================* 133 * read_write * 134 *===========================================================================*/ 135 int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f, 136 vir_bytes buf, size_t size, endpoint_t for_e) 137 { 138 register struct vnode *vp; 139 off_t position, res_pos; 140 size_t cum_io, res_cum_io; 141 size_t cum_io_incr; 142 int op, r; 143 dev_t dev; 144 145 position = f->filp_pos; 146 vp = f->filp_vno; 147 r = OK; 148 cum_io = 0; 149 150 assert(rw_flag == READING || rw_flag == WRITING || rw_flag == PEEKING); 151 152 if (size > SSIZE_MAX) return(EINVAL); 153 154 if (S_ISFIFO(vp->v_mode)) { /* Pipes */ 155 if(rw_flag == PEEKING) { 156 printf("read_write: peek on pipe makes no sense\n"); 157 return EINVAL; 158 } 159 assert(fd != -1); 160 op = (rw_flag == READING ? VFS_READ : VFS_WRITE); 161 r = rw_pipe(rw_flag, for_e, f, op, fd, buf, size, 0 /*cum_io*/); 162 } else if (S_ISCHR(vp->v_mode)) { /* Character special files. */ 163 if(rw_flag == PEEKING) { 164 printf("read_write: peek on char device makes no sense\n"); 165 return EINVAL; 166 } 167 168 if (vp->v_sdev == NO_DEV) 169 panic("VFS: read_write tries to access char dev NO_DEV"); 170 171 dev = vp->v_sdev; 172 op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE); 173 174 r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags); 175 if (r >= 0) { 176 /* This should no longer happen: all calls are asynchronous. */ 177 printf("VFS: I/O to device %llx succeeded immediately!?\n", dev); 178 cum_io = r; 179 position += r; 180 r = OK; 181 } else if (r == SUSPEND) { 182 /* FIXME: multiple read/write operations on a single filp 183 * should be serialized. They currently aren't; in order to 184 * achieve a similar effect, we optimistically advance the file 185 * position here. This works under the following assumptions: 186 * - character drivers that use the seek position at all, 187 * expose a view of a statically-sized range of bytes, i.e., 188 * they are basically byte-granular block devices; 189 * - if short I/O or an error is returned, all subsequent calls 190 * will return (respectively) EOF and an error; 191 * - the application never checks its own file seek position, 192 * or does not care that it may end up having seeked beyond 193 * the number of bytes it has actually read; 194 * - communication to the character driver is FIFO (this one 195 * is actually true! whew). 196 * Many improvements are possible here, but in the end, 197 * anything short of queuing concurrent operations will be 198 * suboptimal - so we settle for this hack for now. 199 */ 200 position += size; 201 } 202 } else if (S_ISBLK(vp->v_mode)) { /* Block special files. */ 203 if (vp->v_sdev == NO_DEV) 204 panic("VFS: read_write tries to access block dev NO_DEV"); 205 206 lock_bsf(); 207 208 if(rw_flag == PEEKING) { 209 r = req_bpeek(vp->v_bfs_e, vp->v_sdev, position, size); 210 } else { 211 r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, position, 212 size, buf, rw_flag, &res_pos, &res_cum_io); 213 if (r == OK) { 214 position = res_pos; 215 cum_io += res_cum_io; 216 } 217 } 218 219 unlock_bsf(); 220 } else { /* Regular files */ 221 if (rw_flag == WRITING) { 222 /* Check for O_APPEND flag. */ 223 if (f->filp_flags & O_APPEND) position = vp->v_size; 224 } 225 226 /* Issue request */ 227 if(rw_flag == PEEKING) { 228 r = req_peek(vp->v_fs_e, vp->v_inode_nr, position, size); 229 } else { 230 off_t new_pos; 231 r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, 232 rw_flag, for_e, buf, size, &new_pos, 233 &cum_io_incr); 234 235 if (r >= 0) { 236 position = new_pos; 237 cum_io += cum_io_incr; 238 } 239 } 240 } 241 242 /* On write, update file size and access time. */ 243 if (rw_flag == WRITING) { 244 if (S_ISREG(vp->v_mode) || S_ISDIR(vp->v_mode)) { 245 if (position > vp->v_size) { 246 vp->v_size = position; 247 } 248 } 249 } 250 251 f->filp_pos = position; 252 253 if (r == EPIPE && rw_flag == WRITING) { 254 /* Process is writing, but there is no reader. Tell the kernel to 255 * generate a SIGPIPE signal. 256 */ 257 if (!(f->filp_flags & O_NOSIGPIPE)) { 258 sys_kill(rfp->fp_endpoint, SIGPIPE); 259 } 260 } 261 262 if (r == OK) { 263 return(cum_io); 264 } 265 return(r); 266 } 267 268 /*===========================================================================* 269 * do_getdents * 270 *===========================================================================*/ 271 int do_getdents(void) 272 { 273 /* Perform the getdents(fd, buf, size) system call. */ 274 int fd, r = OK; 275 off_t new_pos; 276 vir_bytes buf; 277 size_t size; 278 register struct filp *rfilp; 279 280 /* This field must always be set to zero for getdents(). */ 281 if (job_m_in.m_lc_vfs_readwrite.cum_io != 0) 282 return(EINVAL); 283 284 fd = job_m_in.m_lc_vfs_readwrite.fd; 285 buf = job_m_in.m_lc_vfs_readwrite.buf; 286 size = job_m_in.m_lc_vfs_readwrite.len; 287 288 /* Is the file descriptor valid? */ 289 if ( (rfilp = get_filp(fd, VNODE_READ)) == NULL) 290 return(err_code); 291 292 if (!(rfilp->filp_mode & R_BIT)) 293 r = EBADF; 294 else if (!S_ISDIR(rfilp->filp_vno->v_mode)) 295 r = EBADF; 296 297 if (r == OK) { 298 r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr, 299 rfilp->filp_pos, buf, size, &new_pos, 0); 300 301 if (r > 0) rfilp->filp_pos = new_pos; 302 } 303 304 unlock_filp(rfilp); 305 return(r); 306 } 307 308 309 /*===========================================================================* 310 * rw_pipe * 311 *===========================================================================*/ 312 int rw_pipe(int rw_flag, endpoint_t usr_e, struct filp *f, int callnr, int fd, 313 vir_bytes buf, size_t nbytes, size_t cum_io) 314 { 315 int r, oflags, partial_pipe = FALSE; 316 size_t size; 317 size_t cum_io_incr; 318 struct vnode *vp; 319 off_t position, new_pos; 320 321 /* Must make sure we're operating on locked filp and vnode */ 322 assert(tll_locked_by_me(&f->filp_vno->v_lock)); 323 assert(mutex_trylock(&f->filp_lock) == -EDEADLK); 324 325 oflags = f->filp_flags; 326 vp = f->filp_vno; 327 position = 0; /* Not actually used */ 328 329 assert(rw_flag == READING || rw_flag == WRITING); 330 331 r = pipe_check(f, rw_flag, oflags, nbytes, 0); 332 if (r <= 0) { 333 if (r == SUSPEND) 334 pipe_suspend(callnr, fd, buf, nbytes, cum_io); 335 336 /* If pipe_check returns an error instead of suspending the call, we 337 * return that error, even if we are resuming a partially completed 338 * operation (ie, a large blocking write), to match NetBSD's behavior. 339 */ 340 return(r); 341 } 342 343 size = r; 344 if (size < nbytes) partial_pipe = TRUE; 345 346 /* Truncate read request at size. */ 347 if (rw_flag == READING && size > vp->v_size) { 348 size = vp->v_size; 349 } 350 351 if (vp->v_mapfs_e == 0) 352 panic("unmapped pipe"); 353 354 r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e, 355 buf, size, &new_pos, &cum_io_incr); 356 357 if (r != OK) { 358 assert(r != SUSPEND); 359 return(r); 360 } 361 362 cum_io += cum_io_incr; 363 buf += cum_io_incr; 364 nbytes -= cum_io_incr; 365 366 if (rw_flag == READING) 367 vp->v_size -= cum_io_incr; 368 else 369 vp->v_size += cum_io_incr; 370 371 if (partial_pipe) { 372 /* partial write on pipe with */ 373 /* O_NONBLOCK, return write count */ 374 if (!(oflags & O_NONBLOCK)) { 375 /* partial write on pipe with nbytes > PIPE_BUF, non-atomic */ 376 pipe_suspend(callnr, fd, buf, nbytes, cum_io); 377 return(SUSPEND); 378 } 379 } 380 381 return(cum_io); 382 } 383