1 //===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the MemoryBuffer interface. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/MemoryBuffer.h" 14 #include "llvm/ADT/SmallString.h" 15 #include "llvm/Config/config.h" 16 #include "llvm/Support/Alignment.h" 17 #include "llvm/Support/Errc.h" 18 #include "llvm/Support/Error.h" 19 #include "llvm/Support/ErrorHandling.h" 20 #include "llvm/Support/FileSystem.h" 21 #include "llvm/Support/MathExtras.h" 22 #include "llvm/Support/Process.h" 23 #include "llvm/Support/Program.h" 24 #include "llvm/Support/SmallVectorMemoryBuffer.h" 25 #include <cassert> 26 #include <cstring> 27 #include <new> 28 #include <sys/types.h> 29 #include <system_error> 30 #if !defined(_MSC_VER) && !defined(__MINGW32__) 31 #include <unistd.h> 32 #else 33 #include <io.h> 34 #endif 35 36 #ifdef __MVS__ 37 #include "llvm/Support/AutoConvert.h" 38 #endif 39 using namespace llvm; 40 41 //===----------------------------------------------------------------------===// 42 // MemoryBuffer implementation itself. 43 //===----------------------------------------------------------------------===// 44 45 MemoryBuffer::~MemoryBuffer() = default; 46 47 /// init - Initialize this MemoryBuffer as a reference to externally allocated 48 /// memory, memory that we know is already null terminated. 49 void MemoryBuffer::init(const char *BufStart, const char *BufEnd, 50 bool RequiresNullTerminator) { 51 assert((!RequiresNullTerminator || BufEnd[0] == 0) && 52 "Buffer is not null terminated!"); 53 BufferStart = BufStart; 54 BufferEnd = BufEnd; 55 } 56 57 //===----------------------------------------------------------------------===// 58 // MemoryBufferMem implementation. 59 //===----------------------------------------------------------------------===// 60 61 /// CopyStringRef - Copies contents of a StringRef into a block of memory and 62 /// null-terminates it. 63 static void CopyStringRef(char *Memory, StringRef Data) { 64 if (!Data.empty()) 65 memcpy(Memory, Data.data(), Data.size()); 66 Memory[Data.size()] = 0; // Null terminate string. 67 } 68 69 namespace { 70 struct NamedBufferAlloc { 71 const Twine &Name; 72 NamedBufferAlloc(const Twine &Name) : Name(Name) {} 73 }; 74 } // namespace 75 76 void *operator new(size_t N, const NamedBufferAlloc &Alloc) { 77 SmallString<256> NameBuf; 78 StringRef NameRef = Alloc.Name.toStringRef(NameBuf); 79 80 char *Mem = static_cast<char *>(operator new(N + NameRef.size() + 1)); 81 CopyStringRef(Mem + N, NameRef); 82 return Mem; 83 } 84 85 namespace { 86 /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory. 87 template<typename MB> 88 class MemoryBufferMem : public MB { 89 public: 90 MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) { 91 MemoryBuffer::init(InputData.begin(), InputData.end(), 92 RequiresNullTerminator); 93 } 94 95 /// Disable sized deallocation for MemoryBufferMem, because it has 96 /// tail-allocated data. 97 void operator delete(void *p) { ::operator delete(p); } 98 99 StringRef getBufferIdentifier() const override { 100 // The name is stored after the class itself. 101 return StringRef(reinterpret_cast<const char *>(this + 1)); 102 } 103 104 MemoryBuffer::BufferKind getBufferKind() const override { 105 return MemoryBuffer::MemoryBuffer_Malloc; 106 } 107 }; 108 } // namespace 109 110 template <typename MB> 111 static ErrorOr<std::unique_ptr<MB>> 112 getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset, 113 bool IsText, bool RequiresNullTerminator, bool IsVolatile, 114 Optional<Align> Alignment); 115 116 std::unique_ptr<MemoryBuffer> 117 MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName, 118 bool RequiresNullTerminator) { 119 auto *Ret = new (NamedBufferAlloc(BufferName)) 120 MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator); 121 return std::unique_ptr<MemoryBuffer>(Ret); 122 } 123 124 std::unique_ptr<MemoryBuffer> 125 MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) { 126 return std::unique_ptr<MemoryBuffer>(getMemBuffer( 127 Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator)); 128 } 129 130 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>> 131 getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) { 132 auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName); 133 if (!Buf) 134 return make_error_code(errc::not_enough_memory); 135 memcpy(Buf->getBufferStart(), InputData.data(), InputData.size()); 136 return std::move(Buf); 137 } 138 139 std::unique_ptr<MemoryBuffer> 140 MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) { 141 auto Buf = getMemBufferCopyImpl(InputData, BufferName); 142 if (Buf) 143 return std::move(*Buf); 144 return nullptr; 145 } 146 147 ErrorOr<std::unique_ptr<MemoryBuffer>> 148 MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText, 149 bool RequiresNullTerminator, 150 Optional<Align> Alignment) { 151 SmallString<256> NameBuf; 152 StringRef NameRef = Filename.toStringRef(NameBuf); 153 154 if (NameRef == "-") 155 return getSTDIN(); 156 return getFile(Filename, IsText, RequiresNullTerminator, 157 /*IsVolatile=*/false, Alignment); 158 } 159 160 ErrorOr<std::unique_ptr<MemoryBuffer>> 161 MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize, 162 uint64_t Offset, bool IsVolatile, 163 Optional<Align> Alignment) { 164 return getFileAux<MemoryBuffer>(FilePath, MapSize, Offset, /*IsText=*/false, 165 /*RequiresNullTerminator=*/false, IsVolatile, 166 Alignment); 167 } 168 169 //===----------------------------------------------------------------------===// 170 // MemoryBuffer::getFile implementation. 171 //===----------------------------------------------------------------------===// 172 173 namespace { 174 175 template <typename MB> 176 constexpr sys::fs::mapped_file_region::mapmode Mapmode = 177 sys::fs::mapped_file_region::readonly; 178 template <> 179 constexpr sys::fs::mapped_file_region::mapmode Mapmode<MemoryBuffer> = 180 sys::fs::mapped_file_region::readonly; 181 template <> 182 constexpr sys::fs::mapped_file_region::mapmode Mapmode<WritableMemoryBuffer> = 183 sys::fs::mapped_file_region::priv; 184 template <> 185 constexpr sys::fs::mapped_file_region::mapmode 186 Mapmode<WriteThroughMemoryBuffer> = sys::fs::mapped_file_region::readwrite; 187 188 /// Memory maps a file descriptor using sys::fs::mapped_file_region. 189 /// 190 /// This handles converting the offset into a legal offset on the platform. 191 template<typename MB> 192 class MemoryBufferMMapFile : public MB { 193 sys::fs::mapped_file_region MFR; 194 195 static uint64_t getLegalMapOffset(uint64_t Offset) { 196 return Offset & ~(sys::fs::mapped_file_region::alignment() - 1); 197 } 198 199 static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) { 200 return Len + (Offset - getLegalMapOffset(Offset)); 201 } 202 203 const char *getStart(uint64_t Len, uint64_t Offset) { 204 return MFR.const_data() + (Offset - getLegalMapOffset(Offset)); 205 } 206 207 public: 208 MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len, 209 uint64_t Offset, std::error_code &EC) 210 : MFR(FD, Mapmode<MB>, getLegalMapSize(Len, Offset), 211 getLegalMapOffset(Offset), EC) { 212 if (!EC) { 213 const char *Start = getStart(Len, Offset); 214 MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator); 215 } 216 } 217 218 /// Disable sized deallocation for MemoryBufferMMapFile, because it has 219 /// tail-allocated data. 220 void operator delete(void *p) { ::operator delete(p); } 221 222 StringRef getBufferIdentifier() const override { 223 // The name is stored after the class itself. 224 return StringRef(reinterpret_cast<const char *>(this + 1)); 225 } 226 227 MemoryBuffer::BufferKind getBufferKind() const override { 228 return MemoryBuffer::MemoryBuffer_MMap; 229 } 230 231 void dontNeedIfMmap() override { MFR.dontNeed(); } 232 }; 233 } // namespace 234 235 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>> 236 getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) { 237 SmallString<sys::fs::DefaultReadChunkSize> Buffer; 238 if (Error E = sys::fs::readNativeFileToEOF(FD, Buffer)) 239 return errorToErrorCode(std::move(E)); 240 return getMemBufferCopyImpl(Buffer, BufferName); 241 } 242 243 ErrorOr<std::unique_ptr<MemoryBuffer>> 244 MemoryBuffer::getFile(const Twine &Filename, bool IsText, 245 bool RequiresNullTerminator, bool IsVolatile, 246 Optional<Align> Alignment) { 247 return getFileAux<MemoryBuffer>(Filename, /*MapSize=*/-1, /*Offset=*/0, 248 IsText, RequiresNullTerminator, IsVolatile, 249 Alignment); 250 } 251 252 template <typename MB> 253 static ErrorOr<std::unique_ptr<MB>> 254 getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, 255 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, 256 bool IsVolatile, Optional<Align> Alignment); 257 258 template <typename MB> 259 static ErrorOr<std::unique_ptr<MB>> 260 getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset, 261 bool IsText, bool RequiresNullTerminator, bool IsVolatile, 262 Optional<Align> Alignment) { 263 Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead( 264 Filename, IsText ? sys::fs::OF_TextWithCRLF : sys::fs::OF_None); 265 if (!FDOrErr) 266 return errorToErrorCode(FDOrErr.takeError()); 267 sys::fs::file_t FD = *FDOrErr; 268 auto Ret = getOpenFileImpl<MB>(FD, Filename, /*FileSize=*/-1, MapSize, Offset, 269 RequiresNullTerminator, IsVolatile, Alignment); 270 sys::fs::closeFile(FD); 271 return Ret; 272 } 273 274 ErrorOr<std::unique_ptr<WritableMemoryBuffer>> 275 WritableMemoryBuffer::getFile(const Twine &Filename, bool IsVolatile, 276 Optional<Align> Alignment) { 277 return getFileAux<WritableMemoryBuffer>( 278 Filename, /*MapSize=*/-1, /*Offset=*/0, /*IsText=*/false, 279 /*RequiresNullTerminator=*/false, IsVolatile, Alignment); 280 } 281 282 ErrorOr<std::unique_ptr<WritableMemoryBuffer>> 283 WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize, 284 uint64_t Offset, bool IsVolatile, 285 Optional<Align> Alignment) { 286 return getFileAux<WritableMemoryBuffer>( 287 Filename, MapSize, Offset, /*IsText=*/false, 288 /*RequiresNullTerminator=*/false, IsVolatile, Alignment); 289 } 290 291 std::unique_ptr<WritableMemoryBuffer> 292 WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, 293 const Twine &BufferName, 294 Optional<Align> Alignment) { 295 using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>; 296 297 // Use 16-byte alignment if no alignment is specified. 298 Align BufAlign = Alignment.value_or(Align(16)); 299 300 // Allocate space for the MemoryBuffer, the data and the name. It is important 301 // that MemoryBuffer and data are aligned so PointerIntPair works with them. 302 SmallString<256> NameBuf; 303 StringRef NameRef = BufferName.toStringRef(NameBuf); 304 size_t StringLen = sizeof(MemBuffer) + NameRef.size() + 1; 305 size_t RealLen = StringLen + Size + 1 + BufAlign.value(); 306 if (RealLen <= Size) // Check for rollover. 307 return nullptr; 308 char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow)); 309 if (!Mem) 310 return nullptr; 311 312 // The name is stored after the class itself. 313 CopyStringRef(Mem + sizeof(MemBuffer), NameRef); 314 315 // The buffer begins after the name and must be aligned. 316 char *Buf = (char *)alignAddr(Mem + StringLen, BufAlign); 317 Buf[Size] = 0; // Null terminate buffer. 318 319 auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true); 320 return std::unique_ptr<WritableMemoryBuffer>(Ret); 321 } 322 323 std::unique_ptr<WritableMemoryBuffer> 324 WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) { 325 auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName); 326 if (!SB) 327 return nullptr; 328 memset(SB->getBufferStart(), 0, Size); 329 return SB; 330 } 331 332 static bool shouldUseMmap(sys::fs::file_t FD, 333 size_t FileSize, 334 size_t MapSize, 335 off_t Offset, 336 bool RequiresNullTerminator, 337 int PageSize, 338 bool IsVolatile) { 339 // mmap may leave the buffer without null terminator if the file size changed 340 // by the time the last page is mapped in, so avoid it if the file size is 341 // likely to change. 342 if (IsVolatile && RequiresNullTerminator) 343 return false; 344 345 // We don't use mmap for small files because this can severely fragment our 346 // address space. 347 if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize) 348 return false; 349 350 if (!RequiresNullTerminator) 351 return true; 352 353 // If we don't know the file size, use fstat to find out. fstat on an open 354 // file descriptor is cheaper than stat on a random path. 355 // FIXME: this chunk of code is duplicated, but it avoids a fstat when 356 // RequiresNullTerminator = false and MapSize != -1. 357 if (FileSize == size_t(-1)) { 358 sys::fs::file_status Status; 359 if (sys::fs::status(FD, Status)) 360 return false; 361 FileSize = Status.getSize(); 362 } 363 364 // If we need a null terminator and the end of the map is inside the file, 365 // we cannot use mmap. 366 size_t End = Offset + MapSize; 367 assert(End <= FileSize); 368 if (End != FileSize) 369 return false; 370 371 // Don't try to map files that are exactly a multiple of the system page size 372 // if we need a null terminator. 373 if ((FileSize & (PageSize -1)) == 0) 374 return false; 375 376 #if defined(__CYGWIN__) 377 // Don't try to map files that are exactly a multiple of the physical page size 378 // if we need a null terminator. 379 // FIXME: We should reorganize again getPageSize() on Win32. 380 if ((FileSize & (4096 - 1)) == 0) 381 return false; 382 #endif 383 384 return true; 385 } 386 387 static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> 388 getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize, 389 uint64_t Offset) { 390 Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite( 391 Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None); 392 if (!FDOrErr) 393 return errorToErrorCode(FDOrErr.takeError()); 394 sys::fs::file_t FD = *FDOrErr; 395 396 // Default is to map the full file. 397 if (MapSize == uint64_t(-1)) { 398 // If we don't know the file size, use fstat to find out. fstat on an open 399 // file descriptor is cheaper than stat on a random path. 400 if (FileSize == uint64_t(-1)) { 401 sys::fs::file_status Status; 402 std::error_code EC = sys::fs::status(FD, Status); 403 if (EC) 404 return EC; 405 406 // If this not a file or a block device (e.g. it's a named pipe 407 // or character device), we can't mmap it, so error out. 408 sys::fs::file_type Type = Status.type(); 409 if (Type != sys::fs::file_type::regular_file && 410 Type != sys::fs::file_type::block_file) 411 return make_error_code(errc::invalid_argument); 412 413 FileSize = Status.getSize(); 414 } 415 MapSize = FileSize; 416 } 417 418 std::error_code EC; 419 std::unique_ptr<WriteThroughMemoryBuffer> Result( 420 new (NamedBufferAlloc(Filename)) 421 MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize, 422 Offset, EC)); 423 if (EC) 424 return EC; 425 return std::move(Result); 426 } 427 428 ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> 429 WriteThroughMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize) { 430 return getReadWriteFile(Filename, FileSize, FileSize, 0); 431 } 432 433 /// Map a subrange of the specified file as a WritableMemoryBuffer. 434 ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> 435 WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize, 436 uint64_t Offset) { 437 return getReadWriteFile(Filename, -1, MapSize, Offset); 438 } 439 440 template <typename MB> 441 static ErrorOr<std::unique_ptr<MB>> 442 getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, 443 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, 444 bool IsVolatile, Optional<Align> Alignment) { 445 static int PageSize = sys::Process::getPageSizeEstimate(); 446 447 // Default is to map the full file. 448 if (MapSize == uint64_t(-1)) { 449 // If we don't know the file size, use fstat to find out. fstat on an open 450 // file descriptor is cheaper than stat on a random path. 451 if (FileSize == uint64_t(-1)) { 452 sys::fs::file_status Status; 453 std::error_code EC = sys::fs::status(FD, Status); 454 if (EC) 455 return EC; 456 457 // If this not a file or a block device (e.g. it's a named pipe 458 // or character device), we can't trust the size. Create the memory 459 // buffer by copying off the stream. 460 sys::fs::file_type Type = Status.type(); 461 if (Type != sys::fs::file_type::regular_file && 462 Type != sys::fs::file_type::block_file) 463 return getMemoryBufferForStream(FD, Filename); 464 465 FileSize = Status.getSize(); 466 } 467 MapSize = FileSize; 468 } 469 470 if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, 471 PageSize, IsVolatile)) { 472 std::error_code EC; 473 std::unique_ptr<MB> Result( 474 new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>( 475 RequiresNullTerminator, FD, MapSize, Offset, EC)); 476 if (!EC) 477 return std::move(Result); 478 } 479 480 #ifdef __MVS__ 481 // Set codepage auto-conversion for z/OS. 482 if (auto EC = llvm::enableAutoConversion(FD)) 483 return EC; 484 #endif 485 486 auto Buf = 487 WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename, Alignment); 488 if (!Buf) { 489 // Failed to create a buffer. The only way it can fail is if 490 // new(std::nothrow) returns 0. 491 return make_error_code(errc::not_enough_memory); 492 } 493 494 // Read until EOF, zero-initialize the rest. 495 MutableArrayRef<char> ToRead = Buf->getBuffer(); 496 while (!ToRead.empty()) { 497 Expected<size_t> ReadBytes = 498 sys::fs::readNativeFileSlice(FD, ToRead, Offset); 499 if (!ReadBytes) 500 return errorToErrorCode(ReadBytes.takeError()); 501 if (*ReadBytes == 0) { 502 std::memset(ToRead.data(), 0, ToRead.size()); 503 break; 504 } 505 ToRead = ToRead.drop_front(*ReadBytes); 506 Offset += *ReadBytes; 507 } 508 509 return std::move(Buf); 510 } 511 512 ErrorOr<std::unique_ptr<MemoryBuffer>> 513 MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, 514 uint64_t FileSize, bool RequiresNullTerminator, 515 bool IsVolatile, Optional<Align> Alignment) { 516 return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0, 517 RequiresNullTerminator, IsVolatile, 518 Alignment); 519 } 520 521 ErrorOr<std::unique_ptr<MemoryBuffer>> 522 MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, 523 uint64_t MapSize, int64_t Offset, 524 bool IsVolatile, Optional<Align> Alignment) { 525 assert(MapSize != uint64_t(-1)); 526 return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false, 527 IsVolatile, Alignment); 528 } 529 530 ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() { 531 // Read in all of the data from stdin, we cannot mmap stdin. 532 // 533 // FIXME: That isn't necessarily true, we should try to mmap stdin and 534 // fallback if it fails. 535 sys::ChangeStdinMode(sys::fs::OF_Text); 536 537 return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>"); 538 } 539 540 ErrorOr<std::unique_ptr<MemoryBuffer>> 541 MemoryBuffer::getFileAsStream(const Twine &Filename) { 542 Expected<sys::fs::file_t> FDOrErr = 543 sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None); 544 if (!FDOrErr) 545 return errorToErrorCode(FDOrErr.takeError()); 546 sys::fs::file_t FD = *FDOrErr; 547 ErrorOr<std::unique_ptr<MemoryBuffer>> Ret = 548 getMemoryBufferForStream(FD, Filename); 549 sys::fs::closeFile(FD); 550 return Ret; 551 } 552 553 MemoryBufferRef MemoryBuffer::getMemBufferRef() const { 554 StringRef Data = getBuffer(); 555 StringRef Identifier = getBufferIdentifier(); 556 return MemoryBufferRef(Data, Identifier); 557 } 558 559 SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() = default; 560