1 //===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the MemoryBuffer interface. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/MemoryBuffer.h" 14 #include "llvm/ADT/SmallString.h" 15 #include "llvm/Config/config.h" 16 #include "llvm/Support/Alignment.h" 17 #include "llvm/Support/Errc.h" 18 #include "llvm/Support/Error.h" 19 #include "llvm/Support/ErrorHandling.h" 20 #include "llvm/Support/FileSystem.h" 21 #include "llvm/Support/MathExtras.h" 22 #include "llvm/Support/Process.h" 23 #include "llvm/Support/Program.h" 24 #include "llvm/Support/SmallVectorMemoryBuffer.h" 25 #include <cassert> 26 #include <cstring> 27 #include <new> 28 #include <sys/types.h> 29 #include <system_error> 30 #if !defined(_MSC_VER) && !defined(__MINGW32__) 31 #include <unistd.h> 32 #else 33 #include <io.h> 34 #endif 35 36 #ifdef __MVS__ 37 #include "llvm/Support/AutoConvert.h" 38 #endif 39 using namespace llvm; 40 41 //===----------------------------------------------------------------------===// 42 // MemoryBuffer implementation itself. 43 //===----------------------------------------------------------------------===// 44 45 MemoryBuffer::~MemoryBuffer() = default; 46 47 /// init - Initialize this MemoryBuffer as a reference to externally allocated 48 /// memory, memory that we know is already null terminated. 49 void MemoryBuffer::init(const char *BufStart, const char *BufEnd, 50 bool RequiresNullTerminator) { 51 assert((!RequiresNullTerminator || BufEnd[0] == 0) && 52 "Buffer is not null terminated!"); 53 BufferStart = BufStart; 54 BufferEnd = BufEnd; 55 } 56 57 //===----------------------------------------------------------------------===// 58 // MemoryBufferMem implementation. 59 //===----------------------------------------------------------------------===// 60 61 /// CopyStringRef - Copies contents of a StringRef into a block of memory. 62 static void CopyStringRef(char *Memory, StringRef Data) { 63 if (!Data.empty()) 64 memcpy(Memory, Data.data(), Data.size()); 65 } 66 67 namespace { 68 struct NamedBufferAlloc { 69 const Twine &Name; 70 NamedBufferAlloc(const Twine &Name) : Name(Name) {} 71 }; 72 } // namespace 73 74 void *operator new(size_t N, const NamedBufferAlloc &Alloc) { 75 SmallString<256> NameBuf; 76 StringRef NameRef = Alloc.Name.toStringRef(NameBuf); 77 78 char *Mem = 79 static_cast<char *>(operator new(N + sizeof(size_t) + NameRef.size())); 80 *reinterpret_cast<size_t *>(Mem + N) = NameRef.size(); 81 CopyStringRef(Mem + N + sizeof(size_t), NameRef); 82 return Mem; 83 } 84 85 namespace { 86 /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory. 87 template<typename MB> 88 class MemoryBufferMem : public MB { 89 public: 90 MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) { 91 MemoryBuffer::init(InputData.begin(), InputData.end(), 92 RequiresNullTerminator); 93 } 94 95 /// Disable sized deallocation for MemoryBufferMem, because it has 96 /// tail-allocated data. 97 void operator delete(void *p) { ::operator delete(p); } 98 99 StringRef getBufferIdentifier() const override { 100 // The name is stored after the class itself. 101 return StringRef(reinterpret_cast<const char *>(this + 1) + sizeof(size_t), 102 *reinterpret_cast<const size_t *>(this + 1)); 103 } 104 105 MemoryBuffer::BufferKind getBufferKind() const override { 106 return MemoryBuffer::MemoryBuffer_Malloc; 107 } 108 }; 109 } // namespace 110 111 template <typename MB> 112 static ErrorOr<std::unique_ptr<MB>> 113 getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset, 114 bool IsText, bool RequiresNullTerminator, bool IsVolatile, 115 Optional<Align> Alignment); 116 117 std::unique_ptr<MemoryBuffer> 118 MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName, 119 bool RequiresNullTerminator) { 120 auto *Ret = new (NamedBufferAlloc(BufferName)) 121 MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator); 122 return std::unique_ptr<MemoryBuffer>(Ret); 123 } 124 125 std::unique_ptr<MemoryBuffer> 126 MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) { 127 return std::unique_ptr<MemoryBuffer>(getMemBuffer( 128 Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator)); 129 } 130 131 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>> 132 getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) { 133 auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName); 134 if (!Buf) 135 return make_error_code(errc::not_enough_memory); 136 memcpy(Buf->getBufferStart(), InputData.data(), InputData.size()); 137 return std::move(Buf); 138 } 139 140 std::unique_ptr<MemoryBuffer> 141 MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) { 142 auto Buf = getMemBufferCopyImpl(InputData, BufferName); 143 if (Buf) 144 return std::move(*Buf); 145 return nullptr; 146 } 147 148 ErrorOr<std::unique_ptr<MemoryBuffer>> 149 MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText, 150 bool RequiresNullTerminator, 151 Optional<Align> Alignment) { 152 SmallString<256> NameBuf; 153 StringRef NameRef = Filename.toStringRef(NameBuf); 154 155 if (NameRef == "-") 156 return getSTDIN(); 157 return getFile(Filename, IsText, RequiresNullTerminator, 158 /*IsVolatile=*/false, Alignment); 159 } 160 161 ErrorOr<std::unique_ptr<MemoryBuffer>> 162 MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize, 163 uint64_t Offset, bool IsVolatile, 164 Optional<Align> Alignment) { 165 return getFileAux<MemoryBuffer>(FilePath, MapSize, Offset, /*IsText=*/false, 166 /*RequiresNullTerminator=*/false, IsVolatile, 167 Alignment); 168 } 169 170 //===----------------------------------------------------------------------===// 171 // MemoryBuffer::getFile implementation. 172 //===----------------------------------------------------------------------===// 173 174 namespace { 175 176 template <typename MB> 177 constexpr sys::fs::mapped_file_region::mapmode Mapmode = 178 sys::fs::mapped_file_region::readonly; 179 template <> 180 constexpr sys::fs::mapped_file_region::mapmode Mapmode<MemoryBuffer> = 181 sys::fs::mapped_file_region::readonly; 182 template <> 183 constexpr sys::fs::mapped_file_region::mapmode Mapmode<WritableMemoryBuffer> = 184 sys::fs::mapped_file_region::priv; 185 template <> 186 constexpr sys::fs::mapped_file_region::mapmode 187 Mapmode<WriteThroughMemoryBuffer> = sys::fs::mapped_file_region::readwrite; 188 189 /// Memory maps a file descriptor using sys::fs::mapped_file_region. 190 /// 191 /// This handles converting the offset into a legal offset on the platform. 192 template<typename MB> 193 class MemoryBufferMMapFile : public MB { 194 sys::fs::mapped_file_region MFR; 195 196 static uint64_t getLegalMapOffset(uint64_t Offset) { 197 return Offset & ~(sys::fs::mapped_file_region::alignment() - 1); 198 } 199 200 static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) { 201 return Len + (Offset - getLegalMapOffset(Offset)); 202 } 203 204 const char *getStart(uint64_t Len, uint64_t Offset) { 205 return MFR.const_data() + (Offset - getLegalMapOffset(Offset)); 206 } 207 208 public: 209 MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len, 210 uint64_t Offset, std::error_code &EC) 211 : MFR(FD, Mapmode<MB>, getLegalMapSize(Len, Offset), 212 getLegalMapOffset(Offset), EC) { 213 if (!EC) { 214 const char *Start = getStart(Len, Offset); 215 MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator); 216 } 217 } 218 219 /// Disable sized deallocation for MemoryBufferMMapFile, because it has 220 /// tail-allocated data. 221 void operator delete(void *p) { ::operator delete(p); } 222 223 StringRef getBufferIdentifier() const override { 224 // The name is stored after the class itself. 225 return StringRef(reinterpret_cast<const char *>(this + 1) + sizeof(size_t), 226 *reinterpret_cast<const size_t *>(this + 1)); 227 } 228 229 MemoryBuffer::BufferKind getBufferKind() const override { 230 return MemoryBuffer::MemoryBuffer_MMap; 231 } 232 233 void dontNeedIfMmap() override { MFR.dontNeed(); } 234 }; 235 } // namespace 236 237 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>> 238 getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) { 239 SmallString<sys::fs::DefaultReadChunkSize> Buffer; 240 if (Error E = sys::fs::readNativeFileToEOF(FD, Buffer)) 241 return errorToErrorCode(std::move(E)); 242 return getMemBufferCopyImpl(Buffer, BufferName); 243 } 244 245 ErrorOr<std::unique_ptr<MemoryBuffer>> 246 MemoryBuffer::getFile(const Twine &Filename, bool IsText, 247 bool RequiresNullTerminator, bool IsVolatile, 248 Optional<Align> Alignment) { 249 return getFileAux<MemoryBuffer>(Filename, /*MapSize=*/-1, /*Offset=*/0, 250 IsText, RequiresNullTerminator, IsVolatile, 251 Alignment); 252 } 253 254 template <typename MB> 255 static ErrorOr<std::unique_ptr<MB>> 256 getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, 257 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, 258 bool IsVolatile, Optional<Align> Alignment); 259 260 template <typename MB> 261 static ErrorOr<std::unique_ptr<MB>> 262 getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset, 263 bool IsText, bool RequiresNullTerminator, bool IsVolatile, 264 Optional<Align> Alignment) { 265 Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead( 266 Filename, IsText ? sys::fs::OF_TextWithCRLF : sys::fs::OF_None); 267 if (!FDOrErr) 268 return errorToErrorCode(FDOrErr.takeError()); 269 sys::fs::file_t FD = *FDOrErr; 270 auto Ret = getOpenFileImpl<MB>(FD, Filename, /*FileSize=*/-1, MapSize, Offset, 271 RequiresNullTerminator, IsVolatile, Alignment); 272 sys::fs::closeFile(FD); 273 return Ret; 274 } 275 276 ErrorOr<std::unique_ptr<WritableMemoryBuffer>> 277 WritableMemoryBuffer::getFile(const Twine &Filename, bool IsVolatile, 278 Optional<Align> Alignment) { 279 return getFileAux<WritableMemoryBuffer>( 280 Filename, /*MapSize=*/-1, /*Offset=*/0, /*IsText=*/false, 281 /*RequiresNullTerminator=*/false, IsVolatile, Alignment); 282 } 283 284 ErrorOr<std::unique_ptr<WritableMemoryBuffer>> 285 WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize, 286 uint64_t Offset, bool IsVolatile, 287 Optional<Align> Alignment) { 288 return getFileAux<WritableMemoryBuffer>( 289 Filename, MapSize, Offset, /*IsText=*/false, 290 /*RequiresNullTerminator=*/false, IsVolatile, Alignment); 291 } 292 293 std::unique_ptr<WritableMemoryBuffer> 294 WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, 295 const Twine &BufferName, 296 Optional<Align> Alignment) { 297 using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>; 298 299 // Use 16-byte alignment if no alignment is specified. 300 Align BufAlign = Alignment.value_or(Align(16)); 301 302 // Allocate space for the MemoryBuffer, the data and the name. It is important 303 // that MemoryBuffer and data are aligned so PointerIntPair works with them. 304 SmallString<256> NameBuf; 305 StringRef NameRef = BufferName.toStringRef(NameBuf); 306 307 size_t StringLen = sizeof(MemBuffer) + sizeof(size_t) + NameRef.size(); 308 size_t RealLen = StringLen + Size + 1 + BufAlign.value(); 309 if (RealLen <= Size) // Check for rollover. 310 return nullptr; 311 char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow)); 312 if (!Mem) 313 return nullptr; 314 315 // The name is stored after the class itself. 316 *reinterpret_cast<size_t *>(Mem + sizeof(MemBuffer)) = 317 NameRef.size(); // Null terminate buffer. 318 CopyStringRef(Mem + sizeof(MemBuffer) + sizeof(size_t), NameRef); 319 320 // The buffer begins after the name and must be aligned. 321 char *Buf = (char *)alignAddr(Mem + StringLen, BufAlign); 322 323 auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true); 324 Buf[Size] = 0; // Null terminate buffer. 325 return std::unique_ptr<WritableMemoryBuffer>(Ret); 326 } 327 328 std::unique_ptr<WritableMemoryBuffer> 329 WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) { 330 auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName); 331 if (!SB) 332 return nullptr; 333 memset(SB->getBufferStart(), 0, Size); 334 return SB; 335 } 336 337 static bool shouldUseMmap(sys::fs::file_t FD, 338 size_t FileSize, 339 size_t MapSize, 340 off_t Offset, 341 bool RequiresNullTerminator, 342 int PageSize, 343 bool IsVolatile) { 344 // mmap may leave the buffer without null terminator if the file size changed 345 // by the time the last page is mapped in, so avoid it if the file size is 346 // likely to change. 347 if (IsVolatile && RequiresNullTerminator) 348 return false; 349 350 // We don't use mmap for small files because this can severely fragment our 351 // address space. 352 if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize) 353 return false; 354 355 if (!RequiresNullTerminator) 356 return true; 357 358 // If we don't know the file size, use fstat to find out. fstat on an open 359 // file descriptor is cheaper than stat on a random path. 360 // FIXME: this chunk of code is duplicated, but it avoids a fstat when 361 // RequiresNullTerminator = false and MapSize != -1. 362 if (FileSize == size_t(-1)) { 363 sys::fs::file_status Status; 364 if (sys::fs::status(FD, Status)) 365 return false; 366 FileSize = Status.getSize(); 367 } 368 369 // If we need a null terminator and the end of the map is inside the file, 370 // we cannot use mmap. 371 size_t End = Offset + MapSize; 372 assert(End <= FileSize); 373 if (End != FileSize) 374 return false; 375 376 // Don't try to map files that are exactly a multiple of the system page size 377 // if we need a null terminator. 378 if ((FileSize & (PageSize -1)) == 0) 379 return false; 380 381 #if defined(__CYGWIN__) 382 // Don't try to map files that are exactly a multiple of the physical page size 383 // if we need a null terminator. 384 // FIXME: We should reorganize again getPageSize() on Win32. 385 if ((FileSize & (4096 - 1)) == 0) 386 return false; 387 #endif 388 389 return true; 390 } 391 392 static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> 393 getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize, 394 uint64_t Offset) { 395 Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite( 396 Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None); 397 if (!FDOrErr) 398 return errorToErrorCode(FDOrErr.takeError()); 399 sys::fs::file_t FD = *FDOrErr; 400 401 // Default is to map the full file. 402 if (MapSize == uint64_t(-1)) { 403 // If we don't know the file size, use fstat to find out. fstat on an open 404 // file descriptor is cheaper than stat on a random path. 405 if (FileSize == uint64_t(-1)) { 406 sys::fs::file_status Status; 407 std::error_code EC = sys::fs::status(FD, Status); 408 if (EC) 409 return EC; 410 411 // If this not a file or a block device (e.g. it's a named pipe 412 // or character device), we can't mmap it, so error out. 413 sys::fs::file_type Type = Status.type(); 414 if (Type != sys::fs::file_type::regular_file && 415 Type != sys::fs::file_type::block_file) 416 return make_error_code(errc::invalid_argument); 417 418 FileSize = Status.getSize(); 419 } 420 MapSize = FileSize; 421 } 422 423 std::error_code EC; 424 std::unique_ptr<WriteThroughMemoryBuffer> Result( 425 new (NamedBufferAlloc(Filename)) 426 MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize, 427 Offset, EC)); 428 if (EC) 429 return EC; 430 return std::move(Result); 431 } 432 433 ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> 434 WriteThroughMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize) { 435 return getReadWriteFile(Filename, FileSize, FileSize, 0); 436 } 437 438 /// Map a subrange of the specified file as a WritableMemoryBuffer. 439 ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>> 440 WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize, 441 uint64_t Offset) { 442 return getReadWriteFile(Filename, -1, MapSize, Offset); 443 } 444 445 template <typename MB> 446 static ErrorOr<std::unique_ptr<MB>> 447 getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, 448 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, 449 bool IsVolatile, Optional<Align> Alignment) { 450 static int PageSize = sys::Process::getPageSizeEstimate(); 451 452 // Default is to map the full file. 453 if (MapSize == uint64_t(-1)) { 454 // If we don't know the file size, use fstat to find out. fstat on an open 455 // file descriptor is cheaper than stat on a random path. 456 if (FileSize == uint64_t(-1)) { 457 sys::fs::file_status Status; 458 std::error_code EC = sys::fs::status(FD, Status); 459 if (EC) 460 return EC; 461 462 // If this not a file or a block device (e.g. it's a named pipe 463 // or character device), we can't trust the size. Create the memory 464 // buffer by copying off the stream. 465 sys::fs::file_type Type = Status.type(); 466 if (Type != sys::fs::file_type::regular_file && 467 Type != sys::fs::file_type::block_file) 468 return getMemoryBufferForStream(FD, Filename); 469 470 FileSize = Status.getSize(); 471 } 472 MapSize = FileSize; 473 } 474 475 if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, 476 PageSize, IsVolatile)) { 477 std::error_code EC; 478 std::unique_ptr<MB> Result( 479 new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>( 480 RequiresNullTerminator, FD, MapSize, Offset, EC)); 481 if (!EC) 482 return std::move(Result); 483 } 484 485 #ifdef __MVS__ 486 // Set codepage auto-conversion for z/OS. 487 if (auto EC = llvm::enableAutoConversion(FD)) 488 return EC; 489 #endif 490 491 auto Buf = 492 WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename, Alignment); 493 if (!Buf) { 494 // Failed to create a buffer. The only way it can fail is if 495 // new(std::nothrow) returns 0. 496 return make_error_code(errc::not_enough_memory); 497 } 498 499 // Read until EOF, zero-initialize the rest. 500 MutableArrayRef<char> ToRead = Buf->getBuffer(); 501 while (!ToRead.empty()) { 502 Expected<size_t> ReadBytes = 503 sys::fs::readNativeFileSlice(FD, ToRead, Offset); 504 if (!ReadBytes) 505 return errorToErrorCode(ReadBytes.takeError()); 506 if (*ReadBytes == 0) { 507 std::memset(ToRead.data(), 0, ToRead.size()); 508 break; 509 } 510 ToRead = ToRead.drop_front(*ReadBytes); 511 Offset += *ReadBytes; 512 } 513 514 return std::move(Buf); 515 } 516 517 ErrorOr<std::unique_ptr<MemoryBuffer>> 518 MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, 519 uint64_t FileSize, bool RequiresNullTerminator, 520 bool IsVolatile, Optional<Align> Alignment) { 521 return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0, 522 RequiresNullTerminator, IsVolatile, 523 Alignment); 524 } 525 526 ErrorOr<std::unique_ptr<MemoryBuffer>> 527 MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, 528 uint64_t MapSize, int64_t Offset, 529 bool IsVolatile, Optional<Align> Alignment) { 530 assert(MapSize != uint64_t(-1)); 531 return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false, 532 IsVolatile, Alignment); 533 } 534 535 ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() { 536 // Read in all of the data from stdin, we cannot mmap stdin. 537 // 538 // FIXME: That isn't necessarily true, we should try to mmap stdin and 539 // fallback if it fails. 540 sys::ChangeStdinMode(sys::fs::OF_Text); 541 542 return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>"); 543 } 544 545 ErrorOr<std::unique_ptr<MemoryBuffer>> 546 MemoryBuffer::getFileAsStream(const Twine &Filename) { 547 Expected<sys::fs::file_t> FDOrErr = 548 sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None); 549 if (!FDOrErr) 550 return errorToErrorCode(FDOrErr.takeError()); 551 sys::fs::file_t FD = *FDOrErr; 552 ErrorOr<std::unique_ptr<MemoryBuffer>> Ret = 553 getMemoryBufferForStream(FD, Filename); 554 sys::fs::closeFile(FD); 555 return Ret; 556 } 557 558 MemoryBufferRef MemoryBuffer::getMemBufferRef() const { 559 StringRef Data = getBuffer(); 560 StringRef Identifier = getBufferIdentifier(); 561 return MemoryBufferRef(Data, Identifier); 562 } 563 564 SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() = default; 565