1 //===--- Implementation of a platform independent file data structure -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "file.h" 10 11 #include "hdr/func/realloc.h" 12 #include "hdr/stdio_macros.h" 13 #include "hdr/types/off_t.h" 14 #include "src/__support/CPP/new.h" 15 #include "src/__support/CPP/span.h" 16 #include "src/__support/macros/config.h" 17 #include "src/errno/libc_errno.h" // For error macros 18 19 namespace LIBC_NAMESPACE_DECL { 20 21 FileIOResult File::write_unlocked(const void *data, size_t len) { 22 if (!write_allowed()) { 23 err = true; 24 return {0, EBADF}; 25 } 26 27 prev_op = FileOp::WRITE; 28 29 if (bufmode == _IONBF) { // unbuffered. 30 size_t ret_val = 31 write_unlocked_nbf(static_cast<const uint8_t *>(data), len); 32 flush_unlocked(); 33 return ret_val; 34 } else if (bufmode == _IOFBF) { // fully buffered 35 return write_unlocked_fbf(static_cast<const uint8_t *>(data), len); 36 } else /*if (bufmode == _IOLBF) */ { // line buffered 37 return write_unlocked_lbf(static_cast<const uint8_t *>(data), len); 38 } 39 } 40 41 FileIOResult File::write_unlocked_nbf(const uint8_t *data, size_t len) { 42 if (pos > 0) { // If the buffer is not empty 43 // Flush the buffer 44 const size_t write_size = pos; 45 FileIOResult write_result = platform_write(this, buf, write_size); 46 pos = 0; // Buffer is now empty so reset pos to the beginning. 47 // If less bytes were written than expected, then an error occurred. 48 if (write_result < write_size) { 49 err = true; 50 // No bytes from data were written, so return 0. 51 return {0, write_result.error}; 52 } 53 } 54 55 FileIOResult write_result = platform_write(this, data, len); 56 if (write_result < len) 57 err = true; 58 return write_result; 59 } 60 61 FileIOResult File::write_unlocked_fbf(const uint8_t *data, size_t len) { 62 const size_t init_pos = pos; 63 const size_t bufspace = bufsize - pos; 64 65 // If data is too large to be buffered at all, then just write it unbuffered. 66 if (len > bufspace + bufsize) 67 return write_unlocked_nbf(data, len); 68 69 // we split |data| (conceptually) using the split point. Then we handle the 70 // two pieces separately. 71 const size_t split_point = len < bufspace ? len : bufspace; 72 73 // The primary piece is the piece of |data| we want to write to the buffer 74 // before flushing. It will always fit into the buffer, since the split point 75 // is defined as being min(len, bufspace), and it will always exist if len is 76 // non-zero. 77 cpp::span<const uint8_t> primary(data, split_point); 78 79 // The second piece is the remainder of |data|. It is written to the buffer if 80 // it fits, or written directly to the output if it doesn't. If the primary 81 // piece fits entirely in the buffer, the remainder may be nothing. 82 cpp::span<const uint8_t> remainder( 83 static_cast<const uint8_t *>(data) + split_point, len - split_point); 84 85 cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize); 86 87 // Copy the first piece into the buffer. 88 // TODO: Replace the for loop below with a call to internal memcpy. 89 for (size_t i = 0; i < primary.size(); ++i) 90 bufref[pos + i] = primary[i]; 91 pos += primary.size(); 92 93 // If there is no remainder, we can return early, since the first piece has 94 // fit completely into the buffer. 95 if (remainder.size() == 0) 96 return len; 97 98 // We need to flush the buffer now, since there is still data and the buffer 99 // is full. 100 const size_t write_size = pos; 101 102 FileIOResult buf_result = platform_write(this, buf, write_size); 103 size_t bytes_written = buf_result.value; 104 105 pos = 0; // Buffer is now empty so reset pos to the beginning. 106 // If less bytes were written than expected, then an error occurred. Return 107 // the number of bytes that have been written from |data|. 108 if (buf_result.has_error() || bytes_written < write_size) { 109 err = true; 110 return {bytes_written <= init_pos ? 0 : bytes_written - init_pos, 111 buf_result.error}; 112 } 113 114 // The second piece is handled basically the same as the first, although we 115 // know that if the second piece has data in it then the buffer has been 116 // flushed, meaning that pos is always 0. 117 if (remainder.size() < bufsize) { 118 // TODO: Replace the for loop below with a call to internal memcpy. 119 for (size_t i = 0; i < remainder.size(); ++i) 120 bufref[i] = remainder[i]; 121 pos = remainder.size(); 122 } else { 123 124 FileIOResult result = 125 platform_write(this, remainder.data(), remainder.size()); 126 size_t bytes_written = buf_result.value; 127 128 // If less bytes were written than expected, then an error occurred. Return 129 // the number of bytes that have been written from |data|. 130 if (result.has_error() || bytes_written < remainder.size()) { 131 err = true; 132 return {primary.size() + bytes_written, result.error}; 133 } 134 } 135 136 return len; 137 } 138 139 FileIOResult File::write_unlocked_lbf(const uint8_t *data, size_t len) { 140 constexpr uint8_t NEWLINE_CHAR = '\n'; 141 size_t last_newline = len; 142 for (size_t i = len; i >= 1; --i) { 143 if (data[i - 1] == NEWLINE_CHAR) { 144 last_newline = i - 1; 145 break; 146 } 147 } 148 149 // If there is no newline, treat this as fully buffered. 150 if (last_newline == len) { 151 return write_unlocked_fbf(data, len); 152 } 153 154 // we split |data| (conceptually) using the split point. Then we handle the 155 // two pieces separately. 156 const size_t split_point = last_newline + 1; 157 158 // The primary piece is everything in |data| up to the newline. It's written 159 // unbuffered to the output. 160 cpp::span<const uint8_t> primary(data, split_point); 161 162 // The second piece is the remainder of |data|. It is written fully buffered, 163 // meaning it may stay in the buffer if it fits. 164 cpp::span<const uint8_t> remainder( 165 static_cast<const uint8_t *>(data) + split_point, len - split_point); 166 167 size_t written = 0; 168 169 written = write_unlocked_nbf(primary.data(), primary.size()); 170 if (written < primary.size()) { 171 err = true; 172 return written; 173 } 174 175 flush_unlocked(); 176 177 written += write_unlocked_fbf(remainder.data(), remainder.size()); 178 if (written < len) { 179 err = true; 180 return written; 181 } 182 183 return len; 184 } 185 186 FileIOResult File::read_unlocked(void *data, size_t len) { 187 if (!read_allowed()) { 188 err = true; 189 return {0, EBADF}; 190 } 191 192 prev_op = FileOp::READ; 193 194 if (bufmode == _IONBF) { // unbuffered. 195 return read_unlocked_nbf(static_cast<uint8_t *>(data), len); 196 } else if (bufmode == _IOFBF) { // fully buffered 197 return read_unlocked_fbf(static_cast<uint8_t *>(data), len); 198 } else /*if (bufmode == _IOLBF) */ { // line buffered 199 // There is no line buffered mode for read. Use fully buffered instead. 200 return read_unlocked_fbf(static_cast<uint8_t *>(data), len); 201 } 202 } 203 204 size_t File::copy_data_from_buf(uint8_t *data, size_t len) { 205 cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize); 206 cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data), len); 207 208 // Because read_limit is always greater than equal to pos, 209 // available_data is never a wrapped around value. 210 size_t available_data = read_limit - pos; 211 if (len <= available_data) { 212 // TODO: Replace the for loop below with a call to internal memcpy. 213 for (size_t i = 0; i < len; ++i) 214 dataref[i] = bufref[i + pos]; 215 pos += len; 216 return len; 217 } 218 219 // Copy all of the available data. 220 // TODO: Replace the for loop with a call to internal memcpy. 221 for (size_t i = 0; i < available_data; ++i) 222 dataref[i] = bufref[i + pos]; 223 read_limit = pos = 0; // Reset the pointers. 224 225 return available_data; 226 } 227 228 FileIOResult File::read_unlocked_fbf(uint8_t *data, size_t len) { 229 // Read data from the buffer first. 230 size_t available_data = copy_data_from_buf(data, len); 231 if (available_data == len) 232 return available_data; 233 234 // Update the dataref to reflect that fact that we have already 235 // copied |available_data| into |data|. 236 size_t to_fetch = len - available_data; 237 cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data) + available_data, 238 to_fetch); 239 240 if (to_fetch > bufsize) { 241 FileIOResult result = platform_read(this, dataref.data(), to_fetch); 242 size_t fetched_size = result.value; 243 if (result.has_error() || fetched_size < to_fetch) { 244 if (!result.has_error()) 245 eof = true; 246 else 247 err = true; 248 return {available_data + fetched_size, result.error}; 249 } 250 return len; 251 } 252 253 // Fetch and buffer another buffer worth of data. 254 FileIOResult result = platform_read(this, buf, bufsize); 255 size_t fetched_size = result.value; 256 read_limit += fetched_size; 257 size_t transfer_size = fetched_size >= to_fetch ? to_fetch : fetched_size; 258 for (size_t i = 0; i < transfer_size; ++i) 259 dataref[i] = buf[i]; 260 pos += transfer_size; 261 if (result.has_error() || fetched_size < to_fetch) { 262 if (!result.has_error()) 263 eof = true; 264 else 265 err = true; 266 } 267 return {transfer_size + available_data, result.error}; 268 } 269 270 FileIOResult File::read_unlocked_nbf(uint8_t *data, size_t len) { 271 // Check whether there is a character in the ungetc buffer. 272 size_t available_data = copy_data_from_buf(data, len); 273 if (available_data == len) 274 return available_data; 275 276 // Directly copy the data into |data|. 277 cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data) + available_data, 278 len - available_data); 279 FileIOResult result = platform_read(this, dataref.data(), dataref.size()); 280 281 if (result.has_error() || result < dataref.size()) { 282 if (!result.has_error()) 283 eof = true; 284 else 285 err = true; 286 } 287 return {result + available_data, result.error}; 288 } 289 290 int File::ungetc_unlocked(int c) { 291 // There is no meaning to unget if: 292 // 1. You are trying to push back EOF. 293 // 2. Read operations are not allowed on this file. 294 // 3. The previous operation was a write operation. 295 if (c == EOF || !read_allowed() || (prev_op == FileOp::WRITE)) 296 return EOF; 297 298 cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize); 299 if (read_limit == 0) { 300 // If |read_limit| is zero, it can mean three things: 301 // a. This file was just created. 302 // b. The previous operation was a seek operation. 303 // c. The previous operation was a read operation which emptied 304 // the buffer. 305 // For all the above cases, we simply write |c| at the beginning 306 // of the buffer and bump |read_limit|. Note that |pos| will also 307 // be zero in this case, so we don't need to adjust it. 308 bufref[0] = static_cast<unsigned char>(c); 309 ++read_limit; 310 } else { 311 // If |read_limit| is non-zero, it means that there is data in the buffer 312 // from a previous read operation. Which would also mean that |pos| is not 313 // zero. So, we decrement |pos| and write |c| in to the buffer at the new 314 // |pos|. If too many ungetc operations are performed without reads, it 315 // can lead to (pos == 0 but read_limit != 0). We will just error out in 316 // such a case. 317 if (pos == 0) 318 return EOF; 319 --pos; 320 bufref[pos] = static_cast<unsigned char>(c); 321 } 322 323 eof = false; // There is atleast one character that can be read now. 324 err = false; // This operation was a success. 325 return c; 326 } 327 328 ErrorOr<int> File::seek(off_t offset, int whence) { 329 FileLock lock(this); 330 if (prev_op == FileOp::WRITE && pos > 0) { 331 332 FileIOResult buf_result = platform_write(this, buf, pos); 333 if (buf_result.has_error() || buf_result.value < pos) { 334 err = true; 335 return Error(buf_result.error); 336 } 337 } else if (prev_op == FileOp::READ && whence == SEEK_CUR) { 338 // More data could have been read out from the platform file than was 339 // required. So, we have to adjust the offset we pass to platform seek 340 // function. Note that read_limit >= pos is always true. 341 offset -= (read_limit - pos); 342 } 343 pos = read_limit = 0; 344 prev_op = FileOp::SEEK; 345 // Reset the eof flag as a seek might move the file positon to some place 346 // readable. 347 eof = false; 348 auto result = platform_seek(this, offset, whence); 349 if (!result.has_value()) 350 return Error(result.error()); 351 return 0; 352 } 353 354 ErrorOr<off_t> File::tell() { 355 FileLock lock(this); 356 auto seek_target = eof ? SEEK_END : SEEK_CUR; 357 auto result = platform_seek(this, 0, seek_target); 358 if (!result.has_value() || result.value() < 0) 359 return Error(result.error()); 360 off_t platform_offset = result.value(); 361 if (prev_op == FileOp::READ) 362 return platform_offset - (read_limit - pos); 363 if (prev_op == FileOp::WRITE) 364 return platform_offset + pos; 365 return platform_offset; 366 } 367 368 int File::flush_unlocked() { 369 if (prev_op == FileOp::WRITE && pos > 0) { 370 FileIOResult buf_result = platform_write(this, buf, pos); 371 if (buf_result.has_error() || buf_result.value < pos) { 372 err = true; 373 return buf_result.error; 374 } 375 pos = 0; 376 } 377 // TODO: Add POSIX behavior for input streams. 378 return 0; 379 } 380 381 int File::set_buffer(void *buffer, size_t size, int buffer_mode) { 382 // We do not need to lock the file as this method should be called before 383 // other operations are performed on the file. 384 if (buffer != nullptr && size == 0) 385 return EINVAL; 386 387 switch (buffer_mode) { 388 case _IOFBF: 389 case _IOLBF: 390 case _IONBF: 391 break; 392 default: 393 return EINVAL; 394 } 395 396 if (buffer == nullptr && size != 0 && buffer_mode != _IONBF) { 397 // We exclude the case of buffer_mode == _IONBF in this branch 398 // because we don't need to allocate buffer in such a case. 399 if (own_buf) { 400 // This is one of the places where use a C allocation functon 401 // as C++ does not have an equivalent of realloc. 402 buf = reinterpret_cast<uint8_t *>(realloc(buf, size)); 403 if (buf == nullptr) 404 return ENOMEM; 405 } else { 406 AllocChecker ac; 407 buf = new (ac) uint8_t[size]; 408 if (!ac) 409 return ENOMEM; 410 own_buf = true; 411 } 412 bufsize = size; 413 // TODO: Handle allocation failures. 414 } else { 415 if (own_buf) 416 delete buf; 417 if (buffer_mode != _IONBF) { 418 buf = static_cast<uint8_t *>(buffer); 419 bufsize = size; 420 } else { 421 // We don't need any buffer. 422 buf = nullptr; 423 bufsize = 0; 424 } 425 own_buf = false; 426 } 427 bufmode = buffer_mode; 428 adjust_buf(); 429 return 0; 430 } 431 432 File::ModeFlags File::mode_flags(const char *mode) { 433 // First character in |mode| should be 'a', 'r' or 'w'. 434 if (*mode != 'a' && *mode != 'r' && *mode != 'w') 435 return 0; 436 437 // There should be exaclty one main mode ('a', 'r' or 'w') character. 438 // If there are more than one main mode characters listed, then 439 // we will consider |mode| as incorrect and return 0; 440 int main_mode_count = 0; 441 442 ModeFlags flags = 0; 443 for (; *mode != '\0'; ++mode) { 444 switch (*mode) { 445 case 'r': 446 flags |= static_cast<ModeFlags>(OpenMode::READ); 447 ++main_mode_count; 448 break; 449 case 'w': 450 flags |= static_cast<ModeFlags>(OpenMode::WRITE); 451 ++main_mode_count; 452 break; 453 case '+': 454 flags |= static_cast<ModeFlags>(OpenMode::PLUS); 455 break; 456 case 'b': 457 flags |= static_cast<ModeFlags>(ContentType::BINARY); 458 break; 459 case 'a': 460 flags |= static_cast<ModeFlags>(OpenMode::APPEND); 461 ++main_mode_count; 462 break; 463 case 'x': 464 flags |= static_cast<ModeFlags>(CreateType::EXCLUSIVE); 465 break; 466 default: 467 return 0; 468 } 469 } 470 471 if (main_mode_count != 1) 472 return 0; 473 474 return flags; 475 } 476 477 } // namespace LIBC_NAMESPACE_DECL 478