xref: /llvm-project/libc/src/__support/File/file.cpp (revision c6b7bd42d58c166ed37cdd51bd46eaf2564b1f96)
1 //===--- Implementation of a platform independent file data structure -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "file.h"
10 
11 #include "hdr/func/realloc.h"
12 #include "hdr/stdio_macros.h"
13 #include "hdr/types/off_t.h"
14 #include "src/__support/CPP/new.h"
15 #include "src/__support/CPP/span.h"
16 #include "src/__support/macros/config.h"
17 #include "src/errno/libc_errno.h" // For error macros
18 
19 namespace LIBC_NAMESPACE_DECL {
20 
21 FileIOResult File::write_unlocked(const void *data, size_t len) {
22   if (!write_allowed()) {
23     err = true;
24     return {0, EBADF};
25   }
26 
27   prev_op = FileOp::WRITE;
28 
29   if (bufmode == _IONBF) { // unbuffered.
30     size_t ret_val =
31         write_unlocked_nbf(static_cast<const uint8_t *>(data), len);
32     flush_unlocked();
33     return ret_val;
34   } else if (bufmode == _IOFBF) { // fully buffered
35     return write_unlocked_fbf(static_cast<const uint8_t *>(data), len);
36   } else /*if (bufmode == _IOLBF) */ { // line buffered
37     return write_unlocked_lbf(static_cast<const uint8_t *>(data), len);
38   }
39 }
40 
41 FileIOResult File::write_unlocked_nbf(const uint8_t *data, size_t len) {
42   if (pos > 0) { // If the buffer is not empty
43     // Flush the buffer
44     const size_t write_size = pos;
45     FileIOResult write_result = platform_write(this, buf, write_size);
46     pos = 0; // Buffer is now empty so reset pos to the beginning.
47     // If less bytes were written than expected, then an error occurred.
48     if (write_result < write_size) {
49       err = true;
50       // No bytes from data were written, so return 0.
51       return {0, write_result.error};
52     }
53   }
54 
55   FileIOResult write_result = platform_write(this, data, len);
56   if (write_result < len)
57     err = true;
58   return write_result;
59 }
60 
61 FileIOResult File::write_unlocked_fbf(const uint8_t *data, size_t len) {
62   const size_t init_pos = pos;
63   const size_t bufspace = bufsize - pos;
64 
65   // If data is too large to be buffered at all, then just write it unbuffered.
66   if (len > bufspace + bufsize)
67     return write_unlocked_nbf(data, len);
68 
69   // we split |data| (conceptually) using the split point. Then we handle the
70   // two pieces separately.
71   const size_t split_point = len < bufspace ? len : bufspace;
72 
73   // The primary piece is the piece of |data| we want to write to the buffer
74   // before flushing. It will always fit into the buffer, since the split point
75   // is defined as being min(len, bufspace), and it will always exist if len is
76   // non-zero.
77   cpp::span<const uint8_t> primary(data, split_point);
78 
79   // The second piece is the remainder of |data|. It is written to the buffer if
80   // it fits, or written directly to the output if it doesn't. If the primary
81   // piece fits entirely in the buffer, the remainder may be nothing.
82   cpp::span<const uint8_t> remainder(
83       static_cast<const uint8_t *>(data) + split_point, len - split_point);
84 
85   cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize);
86 
87   // Copy the first piece into the buffer.
88   // TODO: Replace the for loop below with a call to internal memcpy.
89   for (size_t i = 0; i < primary.size(); ++i)
90     bufref[pos + i] = primary[i];
91   pos += primary.size();
92 
93   // If there is no remainder, we can return early, since the first piece has
94   // fit completely into the buffer.
95   if (remainder.size() == 0)
96     return len;
97 
98   // We need to flush the buffer now, since there is still data and the buffer
99   // is full.
100   const size_t write_size = pos;
101 
102   FileIOResult buf_result = platform_write(this, buf, write_size);
103   size_t bytes_written = buf_result.value;
104 
105   pos = 0; // Buffer is now empty so reset pos to the beginning.
106   // If less bytes were written than expected, then an error occurred. Return
107   // the number of bytes that have been written from |data|.
108   if (buf_result.has_error() || bytes_written < write_size) {
109     err = true;
110     return {bytes_written <= init_pos ? 0 : bytes_written - init_pos,
111             buf_result.error};
112   }
113 
114   // The second piece is handled basically the same as the first, although we
115   // know that if the second piece has data in it then the buffer has been
116   // flushed, meaning that pos is always 0.
117   if (remainder.size() < bufsize) {
118     // TODO: Replace the for loop below with a call to internal memcpy.
119     for (size_t i = 0; i < remainder.size(); ++i)
120       bufref[i] = remainder[i];
121     pos = remainder.size();
122   } else {
123 
124     FileIOResult result =
125         platform_write(this, remainder.data(), remainder.size());
126     size_t bytes_written = buf_result.value;
127 
128     // If less bytes were written than expected, then an error occurred. Return
129     // the number of bytes that have been written from |data|.
130     if (result.has_error() || bytes_written < remainder.size()) {
131       err = true;
132       return {primary.size() + bytes_written, result.error};
133     }
134   }
135 
136   return len;
137 }
138 
139 FileIOResult File::write_unlocked_lbf(const uint8_t *data, size_t len) {
140   constexpr uint8_t NEWLINE_CHAR = '\n';
141   size_t last_newline = len;
142   for (size_t i = len; i >= 1; --i) {
143     if (data[i - 1] == NEWLINE_CHAR) {
144       last_newline = i - 1;
145       break;
146     }
147   }
148 
149   // If there is no newline, treat this as fully buffered.
150   if (last_newline == len) {
151     return write_unlocked_fbf(data, len);
152   }
153 
154   // we split |data| (conceptually) using the split point. Then we handle the
155   // two pieces separately.
156   const size_t split_point = last_newline + 1;
157 
158   // The primary piece is everything in |data| up to the newline. It's written
159   // unbuffered to the output.
160   cpp::span<const uint8_t> primary(data, split_point);
161 
162   // The second piece is the remainder of |data|. It is written fully buffered,
163   // meaning it may stay in the buffer if it fits.
164   cpp::span<const uint8_t> remainder(
165       static_cast<const uint8_t *>(data) + split_point, len - split_point);
166 
167   size_t written = 0;
168 
169   written = write_unlocked_nbf(primary.data(), primary.size());
170   if (written < primary.size()) {
171     err = true;
172     return written;
173   }
174 
175   flush_unlocked();
176 
177   written += write_unlocked_fbf(remainder.data(), remainder.size());
178   if (written < len) {
179     err = true;
180     return written;
181   }
182 
183   return len;
184 }
185 
186 FileIOResult File::read_unlocked(void *data, size_t len) {
187   if (!read_allowed()) {
188     err = true;
189     return {0, EBADF};
190   }
191 
192   prev_op = FileOp::READ;
193 
194   if (bufmode == _IONBF) { // unbuffered.
195     return read_unlocked_nbf(static_cast<uint8_t *>(data), len);
196   } else if (bufmode == _IOFBF) { // fully buffered
197     return read_unlocked_fbf(static_cast<uint8_t *>(data), len);
198   } else /*if (bufmode == _IOLBF) */ { // line buffered
199     // There is no line buffered mode for read. Use fully buffered instead.
200     return read_unlocked_fbf(static_cast<uint8_t *>(data), len);
201   }
202 }
203 
204 size_t File::copy_data_from_buf(uint8_t *data, size_t len) {
205   cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize);
206   cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data), len);
207 
208   // Because read_limit is always greater than equal to pos,
209   // available_data is never a wrapped around value.
210   size_t available_data = read_limit - pos;
211   if (len <= available_data) {
212     // TODO: Replace the for loop below with a call to internal memcpy.
213     for (size_t i = 0; i < len; ++i)
214       dataref[i] = bufref[i + pos];
215     pos += len;
216     return len;
217   }
218 
219   // Copy all of the available data.
220   // TODO: Replace the for loop with a call to internal memcpy.
221   for (size_t i = 0; i < available_data; ++i)
222     dataref[i] = bufref[i + pos];
223   read_limit = pos = 0; // Reset the pointers.
224 
225   return available_data;
226 }
227 
228 FileIOResult File::read_unlocked_fbf(uint8_t *data, size_t len) {
229   // Read data from the buffer first.
230   size_t available_data = copy_data_from_buf(data, len);
231   if (available_data == len)
232     return available_data;
233 
234   // Update the dataref to reflect that fact that we have already
235   // copied |available_data| into |data|.
236   size_t to_fetch = len - available_data;
237   cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data) + available_data,
238                              to_fetch);
239 
240   if (to_fetch > bufsize) {
241     FileIOResult result = platform_read(this, dataref.data(), to_fetch);
242     size_t fetched_size = result.value;
243     if (result.has_error() || fetched_size < to_fetch) {
244       if (!result.has_error())
245         eof = true;
246       else
247         err = true;
248       return {available_data + fetched_size, result.error};
249     }
250     return len;
251   }
252 
253   // Fetch and buffer another buffer worth of data.
254   FileIOResult result = platform_read(this, buf, bufsize);
255   size_t fetched_size = result.value;
256   read_limit += fetched_size;
257   size_t transfer_size = fetched_size >= to_fetch ? to_fetch : fetched_size;
258   for (size_t i = 0; i < transfer_size; ++i)
259     dataref[i] = buf[i];
260   pos += transfer_size;
261   if (result.has_error() || fetched_size < to_fetch) {
262     if (!result.has_error())
263       eof = true;
264     else
265       err = true;
266   }
267   return {transfer_size + available_data, result.error};
268 }
269 
270 FileIOResult File::read_unlocked_nbf(uint8_t *data, size_t len) {
271   // Check whether there is a character in the ungetc buffer.
272   size_t available_data = copy_data_from_buf(data, len);
273   if (available_data == len)
274     return available_data;
275 
276   // Directly copy the data into |data|.
277   cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data) + available_data,
278                              len - available_data);
279   FileIOResult result = platform_read(this, dataref.data(), dataref.size());
280 
281   if (result.has_error() || result < dataref.size()) {
282     if (!result.has_error())
283       eof = true;
284     else
285       err = true;
286   }
287   return {result + available_data, result.error};
288 }
289 
290 int File::ungetc_unlocked(int c) {
291   // There is no meaning to unget if:
292   // 1. You are trying to push back EOF.
293   // 2. Read operations are not allowed on this file.
294   // 3. The previous operation was a write operation.
295   if (c == EOF || !read_allowed() || (prev_op == FileOp::WRITE))
296     return EOF;
297 
298   cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize);
299   if (read_limit == 0) {
300     // If |read_limit| is zero, it can mean three things:
301     //   a. This file was just created.
302     //   b. The previous operation was a seek operation.
303     //   c. The previous operation was a read operation which emptied
304     //      the buffer.
305     // For all the above cases, we simply write |c| at the beginning
306     // of the buffer and bump |read_limit|. Note that |pos| will also
307     // be zero in this case, so we don't need to adjust it.
308     bufref[0] = static_cast<unsigned char>(c);
309     ++read_limit;
310   } else {
311     // If |read_limit| is non-zero, it means that there is data in the buffer
312     // from a previous read operation. Which would also mean that |pos| is not
313     // zero. So, we decrement |pos| and write |c| in to the buffer at the new
314     // |pos|. If too many ungetc operations are performed without reads, it
315     // can lead to (pos == 0 but read_limit != 0). We will just error out in
316     // such a case.
317     if (pos == 0)
318       return EOF;
319     --pos;
320     bufref[pos] = static_cast<unsigned char>(c);
321   }
322 
323   eof = false; // There is atleast one character that can be read now.
324   err = false; // This operation was a success.
325   return c;
326 }
327 
328 ErrorOr<int> File::seek(off_t offset, int whence) {
329   FileLock lock(this);
330   if (prev_op == FileOp::WRITE && pos > 0) {
331 
332     FileIOResult buf_result = platform_write(this, buf, pos);
333     if (buf_result.has_error() || buf_result.value < pos) {
334       err = true;
335       return Error(buf_result.error);
336     }
337   } else if (prev_op == FileOp::READ && whence == SEEK_CUR) {
338     // More data could have been read out from the platform file than was
339     // required. So, we have to adjust the offset we pass to platform seek
340     // function. Note that read_limit >= pos is always true.
341     offset -= (read_limit - pos);
342   }
343   pos = read_limit = 0;
344   prev_op = FileOp::SEEK;
345   // Reset the eof flag as a seek might move the file positon to some place
346   // readable.
347   eof = false;
348   auto result = platform_seek(this, offset, whence);
349   if (!result.has_value())
350     return Error(result.error());
351   return 0;
352 }
353 
354 ErrorOr<off_t> File::tell() {
355   FileLock lock(this);
356   auto seek_target = eof ? SEEK_END : SEEK_CUR;
357   auto result = platform_seek(this, 0, seek_target);
358   if (!result.has_value() || result.value() < 0)
359     return Error(result.error());
360   off_t platform_offset = result.value();
361   if (prev_op == FileOp::READ)
362     return platform_offset - (read_limit - pos);
363   if (prev_op == FileOp::WRITE)
364     return platform_offset + pos;
365   return platform_offset;
366 }
367 
368 int File::flush_unlocked() {
369   if (prev_op == FileOp::WRITE && pos > 0) {
370     FileIOResult buf_result = platform_write(this, buf, pos);
371     if (buf_result.has_error() || buf_result.value < pos) {
372       err = true;
373       return buf_result.error;
374     }
375     pos = 0;
376   }
377   // TODO: Add POSIX behavior for input streams.
378   return 0;
379 }
380 
381 int File::set_buffer(void *buffer, size_t size, int buffer_mode) {
382   // We do not need to lock the file as this method should be called before
383   // other operations are performed on the file.
384   if (buffer != nullptr && size == 0)
385     return EINVAL;
386 
387   switch (buffer_mode) {
388   case _IOFBF:
389   case _IOLBF:
390   case _IONBF:
391     break;
392   default:
393     return EINVAL;
394   }
395 
396   if (buffer == nullptr && size != 0 && buffer_mode != _IONBF) {
397     // We exclude the case of buffer_mode == _IONBF in this branch
398     // because we don't need to allocate buffer in such a case.
399     if (own_buf) {
400       // This is one of the places where use a C allocation functon
401       // as C++ does not have an equivalent of realloc.
402       buf = reinterpret_cast<uint8_t *>(realloc(buf, size));
403       if (buf == nullptr)
404         return ENOMEM;
405     } else {
406       AllocChecker ac;
407       buf = new (ac) uint8_t[size];
408       if (!ac)
409         return ENOMEM;
410       own_buf = true;
411     }
412     bufsize = size;
413     // TODO: Handle allocation failures.
414   } else {
415     if (own_buf)
416       delete buf;
417     if (buffer_mode != _IONBF) {
418       buf = static_cast<uint8_t *>(buffer);
419       bufsize = size;
420     } else {
421       // We don't need any buffer.
422       buf = nullptr;
423       bufsize = 0;
424     }
425     own_buf = false;
426   }
427   bufmode = buffer_mode;
428   adjust_buf();
429   return 0;
430 }
431 
432 File::ModeFlags File::mode_flags(const char *mode) {
433   // First character in |mode| should be 'a', 'r' or 'w'.
434   if (*mode != 'a' && *mode != 'r' && *mode != 'w')
435     return 0;
436 
437   // There should be exaclty one main mode ('a', 'r' or 'w') character.
438   // If there are more than one main mode characters listed, then
439   // we will consider |mode| as incorrect and return 0;
440   int main_mode_count = 0;
441 
442   ModeFlags flags = 0;
443   for (; *mode != '\0'; ++mode) {
444     switch (*mode) {
445     case 'r':
446       flags |= static_cast<ModeFlags>(OpenMode::READ);
447       ++main_mode_count;
448       break;
449     case 'w':
450       flags |= static_cast<ModeFlags>(OpenMode::WRITE);
451       ++main_mode_count;
452       break;
453     case '+':
454       flags |= static_cast<ModeFlags>(OpenMode::PLUS);
455       break;
456     case 'b':
457       flags |= static_cast<ModeFlags>(ContentType::BINARY);
458       break;
459     case 'a':
460       flags |= static_cast<ModeFlags>(OpenMode::APPEND);
461       ++main_mode_count;
462       break;
463     case 'x':
464       flags |= static_cast<ModeFlags>(CreateType::EXCLUSIVE);
465       break;
466     default:
467       return 0;
468     }
469   }
470 
471   if (main_mode_count != 1)
472     return 0;
473 
474   return flags;
475 }
476 
477 } // namespace LIBC_NAMESPACE_DECL
478