1 /* This file contains the heart of the mechanism used to read (and write) 2 * files. Read and write requests are split up into chunks that do not cross 3 * block boundaries. Each chunk is then processed in turn. Reads on special 4 * files are also detected and handled. 5 * 6 * The entry points into this file are 7 * do_read: perform the READ system call by calling read_write 8 * do_getdents: read entries from a directory (GETDENTS) 9 * read_write: actually do the work of READ and WRITE 10 * 11 */ 12 13 #include "fs.h" 14 #include <minix/callnr.h> 15 #include <minix/com.h> 16 #include <minix/u64.h> 17 #include <minix/vfsif.h> 18 #include <assert.h> 19 #include <sys/dirent.h> 20 #include <fcntl.h> 21 #include <unistd.h> 22 #include "file.h" 23 #include "vnode.h" 24 #include "vmnt.h" 25 26 27 /*===========================================================================* 28 * do_read * 29 *===========================================================================*/ 30 int do_read(void) 31 { 32 return(do_read_write_peek(READING, job_m_in.m_lc_vfs_readwrite.fd, 33 job_m_in.m_lc_vfs_readwrite.buf, job_m_in.m_lc_vfs_readwrite.len)); 34 } 35 36 37 /*===========================================================================* 38 * lock_bsf * 39 *===========================================================================*/ 40 void lock_bsf(void) 41 { 42 struct worker_thread *org_self; 43 44 if (mutex_trylock(&bsf_lock) == 0) 45 return; 46 47 org_self = worker_suspend(); 48 49 if (mutex_lock(&bsf_lock) != 0) 50 panic("unable to lock block special file lock"); 51 52 worker_resume(org_self); 53 } 54 55 /*===========================================================================* 56 * unlock_bsf * 57 *===========================================================================*/ 58 void unlock_bsf(void) 59 { 60 if (mutex_unlock(&bsf_lock) != 0) 61 panic("failed to unlock block special file lock"); 62 } 63 64 /*===========================================================================* 65 * check_bsf * 66 *===========================================================================*/ 67 void check_bsf_lock(void) 68 { 69 int r = mutex_trylock(&bsf_lock); 70 71 if (r == -EBUSY) 72 panic("bsf_lock locked"); 73 else if (r != 0) 74 panic("bsf_lock weird state"); 75 76 /* r == 0 */ 77 unlock_bsf(); 78 } 79 80 /*===========================================================================* 81 * actual_read_write_peek * 82 *===========================================================================*/ 83 int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd, 84 vir_bytes io_buf, size_t io_nbytes) 85 { 86 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */ 87 struct filp *f; 88 tll_access_t locktype; 89 int r; 90 int ro = 1; 91 92 if(rw_flag == WRITING) ro = 0; 93 94 rfp->fp_fd = io_fd; 95 rfp->fp_io_buffer = io_buf; 96 rfp->fp_io_nbytes = io_nbytes; 97 98 locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ; 99 if ((f = get_filp2(rfp, rfp->fp_fd, locktype)) == NULL) 100 return(err_code); 101 102 assert(f->filp_count > 0); 103 104 if (((f->filp_mode) & (ro ? R_BIT : W_BIT)) == 0) { 105 unlock_filp(f); 106 return(EBADF); 107 } 108 if (rfp->fp_io_nbytes == 0) { 109 unlock_filp(f); 110 return(0); /* so char special files need not check for 0*/ 111 } 112 113 r = read_write(rfp, rw_flag, f, rfp->fp_io_buffer, rfp->fp_io_nbytes, who_e); 114 115 unlock_filp(f); 116 return(r); 117 } 118 119 /*===========================================================================* 120 * do_read_write_peek * 121 *===========================================================================*/ 122 int do_read_write_peek(int rw_flag, int io_fd, vir_bytes io_buf, size_t io_nbytes) 123 { 124 return actual_read_write_peek(fp, rw_flag, io_fd, io_buf, io_nbytes); 125 } 126 127 /*===========================================================================* 128 * read_write * 129 *===========================================================================*/ 130 int read_write(struct fproc *rfp, int rw_flag, struct filp *f, 131 vir_bytes buf, size_t size, endpoint_t for_e) 132 { 133 register struct vnode *vp; 134 off_t position, res_pos; 135 size_t cum_io, res_cum_io; 136 size_t cum_io_incr; 137 int op, r; 138 dev_t dev; 139 140 position = f->filp_pos; 141 vp = f->filp_vno; 142 r = OK; 143 cum_io = 0; 144 145 assert(rw_flag == READING || rw_flag == WRITING || rw_flag == PEEKING); 146 147 if (size > SSIZE_MAX) return(EINVAL); 148 149 op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE); 150 151 if (S_ISFIFO(vp->v_mode)) { /* Pipes */ 152 if (rfp->fp_cum_io_partial != 0) { 153 panic("VFS: read_write: fp_cum_io_partial not clear"); 154 } 155 if(rw_flag == PEEKING) { 156 printf("read_write: peek on pipe makes no sense\n"); 157 return EINVAL; 158 } 159 r = rw_pipe(rw_flag, for_e, f, buf, size); 160 } else if (S_ISCHR(vp->v_mode)) { /* Character special files. */ 161 if(rw_flag == PEEKING) { 162 printf("read_write: peek on char device makes no sense\n"); 163 return EINVAL; 164 } 165 166 if (vp->v_sdev == NO_DEV) 167 panic("VFS: read_write tries to access char dev NO_DEV"); 168 169 dev = vp->v_sdev; 170 171 r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags); 172 if (r >= 0) { 173 /* This should no longer happen: all calls are asynchronous. */ 174 printf("VFS: I/O to device %llx succeeded immediately!?\n", dev); 175 cum_io = r; 176 position += r; 177 r = OK; 178 } else if (r == SUSPEND) { 179 /* FIXME: multiple read/write operations on a single filp 180 * should be serialized. They currently aren't; in order to 181 * achieve a similar effect, we optimistically advance the file 182 * position here. This works under the following assumptions: 183 * - character drivers that use the seek position at all, 184 * expose a view of a statically-sized range of bytes, i.e., 185 * they are basically byte-granular block devices; 186 * - if short I/O or an error is returned, all subsequent calls 187 * will return (respectively) EOF and an error; 188 * - the application never checks its own file seek position, 189 * or does not care that it may end up having seeked beyond 190 * the number of bytes it has actually read; 191 * - communication to the character driver is FIFO (this one 192 * is actually true! whew). 193 * Many improvements are possible here, but in the end, 194 * anything short of queuing concurrent operations will be 195 * suboptimal - so we settle for this hack for now. 196 */ 197 position += size; 198 } 199 } else if (S_ISBLK(vp->v_mode)) { /* Block special files. */ 200 if (vp->v_sdev == NO_DEV) 201 panic("VFS: read_write tries to access block dev NO_DEV"); 202 203 lock_bsf(); 204 205 if(rw_flag == PEEKING) { 206 r = req_bpeek(vp->v_bfs_e, vp->v_sdev, position, size); 207 } else { 208 r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, position, 209 size, buf, rw_flag, &res_pos, &res_cum_io); 210 if (r == OK) { 211 position = res_pos; 212 cum_io += res_cum_io; 213 } 214 } 215 216 unlock_bsf(); 217 } else { /* Regular files */ 218 if (rw_flag == WRITING) { 219 /* Check for O_APPEND flag. */ 220 if (f->filp_flags & O_APPEND) position = vp->v_size; 221 } 222 223 /* Issue request */ 224 if(rw_flag == PEEKING) { 225 r = req_peek(vp->v_fs_e, vp->v_inode_nr, position, size); 226 } else { 227 off_t new_pos; 228 r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position, 229 rw_flag, for_e, buf, size, &new_pos, 230 &cum_io_incr); 231 232 if (r >= 0) { 233 position = new_pos; 234 cum_io += cum_io_incr; 235 } 236 } 237 } 238 239 /* On write, update file size and access time. */ 240 if (rw_flag == WRITING) { 241 if (S_ISREG(vp->v_mode) || S_ISDIR(vp->v_mode)) { 242 if (position > vp->v_size) { 243 vp->v_size = position; 244 } 245 } 246 } 247 248 f->filp_pos = position; 249 250 if (r == EPIPE && rw_flag == WRITING) { 251 /* Process is writing, but there is no reader. Tell the kernel to 252 * generate a SIGPIPE signal. 253 */ 254 if (!(f->filp_flags & O_NOSIGPIPE)) { 255 sys_kill(rfp->fp_endpoint, SIGPIPE); 256 } 257 } 258 259 if (r == OK) { 260 return(cum_io); 261 } 262 return(r); 263 } 264 265 /*===========================================================================* 266 * do_getdents * 267 *===========================================================================*/ 268 int do_getdents(void) 269 { 270 /* Perform the getdents(fd, buf, size) system call. */ 271 int r = OK; 272 off_t new_pos; 273 register struct filp *rfilp; 274 275 fp->fp_fd = job_m_in.m_lc_vfs_readwrite.fd; 276 fp->fp_io_buffer = job_m_in.m_lc_vfs_readwrite.buf; 277 fp->fp_io_nbytes = job_m_in.m_lc_vfs_readwrite.len; 278 279 /* Is the file descriptor valid? */ 280 if ( (rfilp = get_filp(fp->fp_fd, VNODE_READ)) == NULL) 281 return(err_code); 282 283 if (!(rfilp->filp_mode & R_BIT)) 284 r = EBADF; 285 else if (!S_ISDIR(rfilp->filp_vno->v_mode)) 286 r = EBADF; 287 288 if (r == OK) { 289 r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr, 290 rfilp->filp_pos, fp->fp_io_buffer, fp->fp_io_nbytes, 291 &new_pos, 0); 292 293 if (r > 0) rfilp->filp_pos = new_pos; 294 } 295 296 unlock_filp(rfilp); 297 return(r); 298 } 299 300 301 /*===========================================================================* 302 * rw_pipe * 303 *===========================================================================*/ 304 int rw_pipe(rw_flag, usr_e, f, buf, req_size) 305 int rw_flag; /* READING or WRITING */ 306 endpoint_t usr_e; 307 struct filp *f; 308 vir_bytes buf; 309 size_t req_size; 310 { 311 int r, oflags, partial_pipe = 0; 312 size_t size, cum_io; 313 size_t cum_io_incr; 314 struct vnode *vp; 315 off_t position, new_pos; 316 317 /* Must make sure we're operating on locked filp and vnode */ 318 assert(tll_locked_by_me(&f->filp_vno->v_lock)); 319 assert(mutex_trylock(&f->filp_lock) == -EDEADLK); 320 321 oflags = f->filp_flags; 322 vp = f->filp_vno; 323 position = 0; /* Not actually used */ 324 325 assert(rw_flag == READING || rw_flag == WRITING); 326 327 /* fp->fp_cum_io_partial is only nonzero when doing partial writes. 328 * We clear the field immediately here because we expect completion or error; 329 * its value must be (re)assigned if we end up suspending the write (again). 330 */ 331 cum_io = fp->fp_cum_io_partial; 332 fp->fp_cum_io_partial = 0; 333 334 r = pipe_check(f, rw_flag, oflags, req_size, 0); 335 if (r <= 0) { 336 if (r == SUSPEND) { 337 fp->fp_cum_io_partial = cum_io; 338 pipe_suspend(f, buf, req_size); 339 } 340 /* If pipe_check returns an error instead of suspending the call, we 341 * return that error, even if we are resuming a partially completed 342 * operation (ie, a large blocking write), to match NetBSD's behavior. 343 */ 344 return(r); 345 } 346 347 size = r; 348 if (size < req_size) partial_pipe = 1; 349 350 /* Truncate read request at size. */ 351 if (rw_flag == READING && size > vp->v_size) { 352 size = vp->v_size; 353 } 354 355 if (vp->v_mapfs_e == 0) 356 panic("unmapped pipe"); 357 358 r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e, 359 buf, size, &new_pos, &cum_io_incr); 360 361 if (r != OK) { 362 assert(r != SUSPEND); 363 return(r); 364 } 365 366 cum_io += cum_io_incr; 367 buf += cum_io_incr; 368 req_size -= cum_io_incr; 369 370 if (rw_flag == READING) 371 vp->v_size -= cum_io_incr; 372 else 373 vp->v_size += cum_io_incr; 374 375 if (partial_pipe) { 376 /* partial write on pipe with */ 377 /* O_NONBLOCK, return write count */ 378 if (!(oflags & O_NONBLOCK)) { 379 /* partial write on pipe with req_size > PIPE_BUF, 380 * non-atomic 381 */ 382 fp->fp_cum_io_partial = cum_io; 383 pipe_suspend(f, buf, req_size); 384 return(SUSPEND); 385 } 386 } 387 388 assert(fp->fp_cum_io_partial == 0); 389 390 return(cum_io); 391 } 392