xref: /llvm-project/llvm/lib/Support/MemoryBuffer.cpp (revision 66c602be25c15ca69f6c3a618427ba0237c0d4a9)
1 //===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the MemoryBuffer interface.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/MemoryBuffer.h"
14 #include "llvm/ADT/SmallString.h"
15 #include "llvm/Config/config.h"
16 #include "llvm/Support/Error.h"
17 #include "llvm/Support/ErrorHandling.h"
18 #include "llvm/Support/Errc.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/MathExtras.h"
21 #include "llvm/Support/Process.h"
22 #include "llvm/Support/Program.h"
23 #include "llvm/Support/SmallVectorMemoryBuffer.h"
24 #include <cassert>
25 #include <cstring>
26 #include <new>
27 #include <sys/types.h>
28 #include <system_error>
29 #if !defined(_MSC_VER) && !defined(__MINGW32__)
30 #include <unistd.h>
31 #else
32 #include <io.h>
33 #endif
34 using namespace llvm;
35 
36 //===----------------------------------------------------------------------===//
37 // MemoryBuffer implementation itself.
38 //===----------------------------------------------------------------------===//
39 
40 MemoryBuffer::~MemoryBuffer() { }
41 
42 /// init - Initialize this MemoryBuffer as a reference to externally allocated
43 /// memory, memory that we know is already null terminated.
44 void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
45                         bool RequiresNullTerminator) {
46   assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
47          "Buffer is not null terminated!");
48   BufferStart = BufStart;
49   BufferEnd = BufEnd;
50 }
51 
52 //===----------------------------------------------------------------------===//
53 // MemoryBufferMem implementation.
54 //===----------------------------------------------------------------------===//
55 
56 /// CopyStringRef - Copies contents of a StringRef into a block of memory and
57 /// null-terminates it.
58 static void CopyStringRef(char *Memory, StringRef Data) {
59   if (!Data.empty())
60     memcpy(Memory, Data.data(), Data.size());
61   Memory[Data.size()] = 0; // Null terminate string.
62 }
63 
64 namespace {
65 struct NamedBufferAlloc {
66   const Twine &Name;
67   NamedBufferAlloc(const Twine &Name) : Name(Name) {}
68 };
69 } // namespace
70 
71 void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
72   SmallString<256> NameBuf;
73   StringRef NameRef = Alloc.Name.toStringRef(NameBuf);
74 
75   char *Mem = static_cast<char *>(operator new(N + NameRef.size() + 1));
76   CopyStringRef(Mem + N, NameRef);
77   return Mem;
78 }
79 
80 namespace {
81 /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
82 template<typename MB>
83 class MemoryBufferMem : public MB {
84 public:
85   MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
86     MemoryBuffer::init(InputData.begin(), InputData.end(),
87                        RequiresNullTerminator);
88   }
89 
90   /// Disable sized deallocation for MemoryBufferMem, because it has
91   /// tail-allocated data.
92   void operator delete(void *p) { ::operator delete(p); }
93 
94   StringRef getBufferIdentifier() const override {
95     // The name is stored after the class itself.
96     return StringRef(reinterpret_cast<const char *>(this + 1));
97   }
98 
99   MemoryBuffer::BufferKind getBufferKind() const override {
100     return MemoryBuffer::MemoryBuffer_Malloc;
101   }
102 };
103 } // namespace
104 
105 template <typename MB>
106 static ErrorOr<std::unique_ptr<MB>>
107 getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
108            bool IsText, bool RequiresNullTerminator, bool IsVolatile);
109 
110 std::unique_ptr<MemoryBuffer>
111 MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName,
112                            bool RequiresNullTerminator) {
113   auto *Ret = new (NamedBufferAlloc(BufferName))
114       MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator);
115   return std::unique_ptr<MemoryBuffer>(Ret);
116 }
117 
118 std::unique_ptr<MemoryBuffer>
119 MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
120   return std::unique_ptr<MemoryBuffer>(getMemBuffer(
121       Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator));
122 }
123 
124 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
125 getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) {
126   auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
127   if (!Buf)
128     return make_error_code(errc::not_enough_memory);
129   memcpy(Buf->getBufferStart(), InputData.data(), InputData.size());
130   return std::move(Buf);
131 }
132 
133 std::unique_ptr<MemoryBuffer>
134 MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) {
135   auto Buf = getMemBufferCopyImpl(InputData, BufferName);
136   if (Buf)
137     return std::move(*Buf);
138   return nullptr;
139 }
140 
141 ErrorOr<std::unique_ptr<MemoryBuffer>>
142 MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText,
143                              bool RequiresNullTerminator) {
144   SmallString<256> NameBuf;
145   StringRef NameRef = Filename.toStringRef(NameBuf);
146 
147   if (NameRef == "-")
148     return getSTDIN();
149   return getFile(Filename, IsText, RequiresNullTerminator,
150                  /*IsVolatile=*/false);
151 }
152 
153 ErrorOr<std::unique_ptr<MemoryBuffer>>
154 MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
155                            uint64_t Offset, bool IsVolatile) {
156   return getFileAux<MemoryBuffer>(FilePath, MapSize, Offset, /*IsText=*/false,
157                                   /*RequiresNullTerminator=*/false, IsVolatile);
158 }
159 
160 //===----------------------------------------------------------------------===//
161 // MemoryBuffer::getFile implementation.
162 //===----------------------------------------------------------------------===//
163 
164 namespace {
165 
166 template <typename MB>
167 constexpr sys::fs::mapped_file_region::mapmode Mapmode =
168     sys::fs::mapped_file_region::readonly;
169 template <>
170 constexpr sys::fs::mapped_file_region::mapmode Mapmode<MemoryBuffer> =
171     sys::fs::mapped_file_region::readonly;
172 template <>
173 constexpr sys::fs::mapped_file_region::mapmode Mapmode<WritableMemoryBuffer> =
174     sys::fs::mapped_file_region::priv;
175 template <>
176 constexpr sys::fs::mapped_file_region::mapmode
177     Mapmode<WriteThroughMemoryBuffer> = sys::fs::mapped_file_region::readwrite;
178 
179 /// Memory maps a file descriptor using sys::fs::mapped_file_region.
180 ///
181 /// This handles converting the offset into a legal offset on the platform.
182 template<typename MB>
183 class MemoryBufferMMapFile : public MB {
184   sys::fs::mapped_file_region MFR;
185 
186   static uint64_t getLegalMapOffset(uint64_t Offset) {
187     return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
188   }
189 
190   static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
191     return Len + (Offset - getLegalMapOffset(Offset));
192   }
193 
194   const char *getStart(uint64_t Len, uint64_t Offset) {
195     return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
196   }
197 
198 public:
199   MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len,
200                        uint64_t Offset, std::error_code &EC)
201       : MFR(FD, Mapmode<MB>, getLegalMapSize(Len, Offset),
202             getLegalMapOffset(Offset), EC) {
203     if (!EC) {
204       const char *Start = getStart(Len, Offset);
205       MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator);
206     }
207   }
208 
209   /// Disable sized deallocation for MemoryBufferMMapFile, because it has
210   /// tail-allocated data.
211   void operator delete(void *p) { ::operator delete(p); }
212 
213   StringRef getBufferIdentifier() const override {
214     // The name is stored after the class itself.
215     return StringRef(reinterpret_cast<const char *>(this + 1));
216   }
217 
218   MemoryBuffer::BufferKind getBufferKind() const override {
219     return MemoryBuffer::MemoryBuffer_MMap;
220   }
221 
222   void dontNeedIfMmap() override { MFR.dontNeed(); }
223 };
224 } // namespace
225 
226 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
227 getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
228   SmallString<sys::fs::DefaultReadChunkSize> Buffer;
229   if (Error E = sys::fs::readNativeFileToEOF(FD, Buffer))
230     return errorToErrorCode(std::move(E));
231   return getMemBufferCopyImpl(Buffer, BufferName);
232 }
233 
234 ErrorOr<std::unique_ptr<MemoryBuffer>>
235 MemoryBuffer::getFile(const Twine &Filename, bool IsText,
236                       bool RequiresNullTerminator, bool IsVolatile) {
237   return getFileAux<MemoryBuffer>(Filename, /*MapSize=*/-1, /*Offset=*/0,
238                                   IsText, RequiresNullTerminator, IsVolatile);
239 }
240 
241 template <typename MB>
242 static ErrorOr<std::unique_ptr<MB>>
243 getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
244                 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
245                 bool IsVolatile);
246 
247 template <typename MB>
248 static ErrorOr<std::unique_ptr<MB>>
249 getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
250            bool IsText, bool RequiresNullTerminator, bool IsVolatile) {
251   Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
252       Filename, IsText ? sys::fs::OF_TextWithCRLF : sys::fs::OF_None);
253   if (!FDOrErr)
254     return errorToErrorCode(FDOrErr.takeError());
255   sys::fs::file_t FD = *FDOrErr;
256   auto Ret = getOpenFileImpl<MB>(FD, Filename, /*FileSize=*/-1, MapSize, Offset,
257                                  RequiresNullTerminator, IsVolatile);
258   sys::fs::closeFile(FD);
259   return Ret;
260 }
261 
262 ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
263 WritableMemoryBuffer::getFile(const Twine &Filename, bool IsVolatile) {
264   return getFileAux<WritableMemoryBuffer>(
265       Filename, /*MapSize=*/-1, /*Offset=*/0, /*IsText=*/false,
266       /*RequiresNullTerminator=*/false, IsVolatile);
267 }
268 
269 ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
270 WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
271                                    uint64_t Offset, bool IsVolatile) {
272   return getFileAux<WritableMemoryBuffer>(
273       Filename, MapSize, Offset, /*IsText=*/false,
274       /*RequiresNullTerminator=*/false, IsVolatile);
275 }
276 
277 std::unique_ptr<WritableMemoryBuffer>
278 WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) {
279   using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>;
280   // Allocate space for the MemoryBuffer, the data and the name. It is important
281   // that MemoryBuffer and data are aligned so PointerIntPair works with them.
282   // TODO: Is 16-byte alignment enough?  We copy small object files with large
283   // alignment expectations into this buffer.
284   SmallString<256> NameBuf;
285   StringRef NameRef = BufferName.toStringRef(NameBuf);
286   size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16);
287   size_t RealLen = AlignedStringLen + Size + 1;
288   char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
289   if (!Mem)
290     return nullptr;
291 
292   // The name is stored after the class itself.
293   CopyStringRef(Mem + sizeof(MemBuffer), NameRef);
294 
295   // The buffer begins after the name and must be aligned.
296   char *Buf = Mem + AlignedStringLen;
297   Buf[Size] = 0; // Null terminate buffer.
298 
299   auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true);
300   return std::unique_ptr<WritableMemoryBuffer>(Ret);
301 }
302 
303 std::unique_ptr<WritableMemoryBuffer>
304 WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) {
305   auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName);
306   if (!SB)
307     return nullptr;
308   memset(SB->getBufferStart(), 0, Size);
309   return SB;
310 }
311 
312 static bool shouldUseMmap(sys::fs::file_t FD,
313                           size_t FileSize,
314                           size_t MapSize,
315                           off_t Offset,
316                           bool RequiresNullTerminator,
317                           int PageSize,
318                           bool IsVolatile) {
319   // mmap may leave the buffer without null terminator if the file size changed
320   // by the time the last page is mapped in, so avoid it if the file size is
321   // likely to change.
322   if (IsVolatile && RequiresNullTerminator)
323     return false;
324 
325   // We don't use mmap for small files because this can severely fragment our
326   // address space.
327   if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
328     return false;
329 
330   if (!RequiresNullTerminator)
331     return true;
332 
333   // If we don't know the file size, use fstat to find out.  fstat on an open
334   // file descriptor is cheaper than stat on a random path.
335   // FIXME: this chunk of code is duplicated, but it avoids a fstat when
336   // RequiresNullTerminator = false and MapSize != -1.
337   if (FileSize == size_t(-1)) {
338     sys::fs::file_status Status;
339     if (sys::fs::status(FD, Status))
340       return false;
341     FileSize = Status.getSize();
342   }
343 
344   // If we need a null terminator and the end of the map is inside the file,
345   // we cannot use mmap.
346   size_t End = Offset + MapSize;
347   assert(End <= FileSize);
348   if (End != FileSize)
349     return false;
350 
351   // Don't try to map files that are exactly a multiple of the system page size
352   // if we need a null terminator.
353   if ((FileSize & (PageSize -1)) == 0)
354     return false;
355 
356 #if defined(__CYGWIN__)
357   // Don't try to map files that are exactly a multiple of the physical page size
358   // if we need a null terminator.
359   // FIXME: We should reorganize again getPageSize() on Win32.
360   if ((FileSize & (4096 - 1)) == 0)
361     return false;
362 #endif
363 
364   return true;
365 }
366 
367 static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
368 getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
369                  uint64_t Offset) {
370   Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite(
371       Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None);
372   if (!FDOrErr)
373     return errorToErrorCode(FDOrErr.takeError());
374   sys::fs::file_t FD = *FDOrErr;
375 
376   // Default is to map the full file.
377   if (MapSize == uint64_t(-1)) {
378     // If we don't know the file size, use fstat to find out.  fstat on an open
379     // file descriptor is cheaper than stat on a random path.
380     if (FileSize == uint64_t(-1)) {
381       sys::fs::file_status Status;
382       std::error_code EC = sys::fs::status(FD, Status);
383       if (EC)
384         return EC;
385 
386       // If this not a file or a block device (e.g. it's a named pipe
387       // or character device), we can't mmap it, so error out.
388       sys::fs::file_type Type = Status.type();
389       if (Type != sys::fs::file_type::regular_file &&
390           Type != sys::fs::file_type::block_file)
391         return make_error_code(errc::invalid_argument);
392 
393       FileSize = Status.getSize();
394     }
395     MapSize = FileSize;
396   }
397 
398   std::error_code EC;
399   std::unique_ptr<WriteThroughMemoryBuffer> Result(
400       new (NamedBufferAlloc(Filename))
401           MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize,
402                                                          Offset, EC));
403   if (EC)
404     return EC;
405   return std::move(Result);
406 }
407 
408 ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
409 WriteThroughMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize) {
410   return getReadWriteFile(Filename, FileSize, FileSize, 0);
411 }
412 
413 /// Map a subrange of the specified file as a WritableMemoryBuffer.
414 ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
415 WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
416                                        uint64_t Offset) {
417   return getReadWriteFile(Filename, -1, MapSize, Offset);
418 }
419 
420 template <typename MB>
421 static ErrorOr<std::unique_ptr<MB>>
422 getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
423                 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
424                 bool IsVolatile) {
425   static int PageSize = sys::Process::getPageSizeEstimate();
426 
427   // Default is to map the full file.
428   if (MapSize == uint64_t(-1)) {
429     // If we don't know the file size, use fstat to find out.  fstat on an open
430     // file descriptor is cheaper than stat on a random path.
431     if (FileSize == uint64_t(-1)) {
432       sys::fs::file_status Status;
433       std::error_code EC = sys::fs::status(FD, Status);
434       if (EC)
435         return EC;
436 
437       // If this not a file or a block device (e.g. it's a named pipe
438       // or character device), we can't trust the size. Create the memory
439       // buffer by copying off the stream.
440       sys::fs::file_type Type = Status.type();
441       if (Type != sys::fs::file_type::regular_file &&
442           Type != sys::fs::file_type::block_file)
443         return getMemoryBufferForStream(FD, Filename);
444 
445       FileSize = Status.getSize();
446     }
447     MapSize = FileSize;
448   }
449 
450   if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
451                     PageSize, IsVolatile)) {
452     std::error_code EC;
453     std::unique_ptr<MB> Result(
454         new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>(
455             RequiresNullTerminator, FD, MapSize, Offset, EC));
456     if (!EC)
457       return std::move(Result);
458   }
459 
460 #ifdef __MVS__
461   // Set codepage auto-conversion for z/OS.
462   if (auto EC = llvm::enableAutoConversion(FD))
463     return EC;
464 #endif
465 
466   auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
467   if (!Buf) {
468     // Failed to create a buffer. The only way it can fail is if
469     // new(std::nothrow) returns 0.
470     return make_error_code(errc::not_enough_memory);
471   }
472 
473   // Read until EOF, zero-initialize the rest.
474   MutableArrayRef<char> ToRead = Buf->getBuffer();
475   while (!ToRead.empty()) {
476     Expected<size_t> ReadBytes =
477         sys::fs::readNativeFileSlice(FD, ToRead, Offset);
478     if (!ReadBytes)
479       return errorToErrorCode(ReadBytes.takeError());
480     if (*ReadBytes == 0) {
481       std::memset(ToRead.data(), 0, ToRead.size());
482       break;
483     }
484     ToRead = ToRead.drop_front(*ReadBytes);
485     Offset += *ReadBytes;
486   }
487 
488   return std::move(Buf);
489 }
490 
491 ErrorOr<std::unique_ptr<MemoryBuffer>>
492 MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
493                           bool RequiresNullTerminator, bool IsVolatile) {
494   return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
495                          RequiresNullTerminator, IsVolatile);
496 }
497 
498 ErrorOr<std::unique_ptr<MemoryBuffer>>
499 MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize,
500                                int64_t Offset, bool IsVolatile) {
501   assert(MapSize != uint64_t(-1));
502   return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
503                                        IsVolatile);
504 }
505 
506 ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
507   // Read in all of the data from stdin, we cannot mmap stdin.
508   //
509   // FIXME: That isn't necessarily true, we should try to mmap stdin and
510   // fallback if it fails.
511   sys::ChangeStdinMode(sys::fs::OF_Text);
512 
513   return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
514 }
515 
516 ErrorOr<std::unique_ptr<MemoryBuffer>>
517 MemoryBuffer::getFileAsStream(const Twine &Filename) {
518   Expected<sys::fs::file_t> FDOrErr =
519       sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
520   if (!FDOrErr)
521     return errorToErrorCode(FDOrErr.takeError());
522   sys::fs::file_t FD = *FDOrErr;
523   ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
524       getMemoryBufferForStream(FD, Filename);
525   sys::fs::closeFile(FD);
526   return Ret;
527 }
528 
529 MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
530   StringRef Data = getBuffer();
531   StringRef Identifier = getBufferIdentifier();
532   return MemoryBufferRef(Data, Identifier);
533 }
534 
535 SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {}
536