1 //===-- Benchmark ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "JSON.h" 10 #include "LibcBenchmark.h" 11 #include "LibcMemoryBenchmark.h" 12 #include "MemorySizeDistributions.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/ErrorHandling.h" 15 #include "llvm/Support/FileSystem.h" 16 #include "llvm/Support/JSON.h" 17 #include "llvm/Support/MemoryBuffer.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 namespace __llvm_libc { 21 22 extern void *memcpy(void *__restrict, const void *__restrict, size_t); 23 extern void *memset(void *, int, size_t); 24 25 } // namespace __llvm_libc 26 27 namespace llvm { 28 namespace libc_benchmarks { 29 30 enum Function { memcpy, memset }; 31 32 static cl::opt<std::string> 33 StudyName("study-name", cl::desc("The name for this study"), cl::Required); 34 35 static cl::opt<Function> 36 MemoryFunction("function", cl::desc("Sets the function to benchmark:"), 37 cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"), 38 clEnumVal(memset, "__llvm_libc::memset")), 39 cl::Required); 40 41 static cl::opt<std::string> 42 SizeDistributionName("size-distribution-name", 43 cl::desc("The name of the distribution to use")); 44 45 static cl::opt<bool> 46 SweepMode("sweep-mode", 47 cl::desc("If set, benchmark all sizes from 0 to sweep-max-size")); 48 49 static cl::opt<uint32_t> 50 SweepMaxSize("sweep-max-size", 51 cl::desc("The maximum size to use in sweep-mode"), 52 cl::init(256)); 53 54 static cl::opt<uint32_t> 55 AlignedAccess("aligned-access", 56 cl::desc("The alignment to use when accessing the buffers\n" 57 "Default is unaligned\n" 58 "Use 0 to disable address randomization"), 59 cl::init(1)); 60 61 static cl::opt<std::string> Output("output", 62 cl::desc("Specify output filename"), 63 cl::value_desc("filename"), cl::init("-")); 64 65 static cl::opt<uint32_t> 66 NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"), 67 cl::init(1)); 68 69 static constexpr int64_t KiB = 1024; 70 static constexpr int64_t ParameterStorageBytes = 4 * KiB; 71 static constexpr int64_t L1LeftAsideBytes = 1 * KiB; 72 73 struct ParameterType { 74 unsigned OffsetBytes : 16; // max : 16 KiB - 1 75 unsigned SizeBytes : 16; // max : 16 KiB - 1 76 }; 77 78 struct MemcpyBenchmark { 79 static constexpr auto GetDistributions = &getMemcpySizeDistributions; 80 static constexpr size_t BufferCount = 2; 81 static void amend(Study &S) { S.Configuration.Function = "memcpy"; } 82 83 MemcpyBenchmark(const size_t BufferSize) 84 : SrcBuffer(BufferSize), DstBuffer(BufferSize) {} 85 86 inline auto functor() { 87 return [this](ParameterType P) { 88 __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes, 89 P.SizeBytes); 90 return DstBuffer + P.OffsetBytes; 91 }; 92 } 93 94 AlignedBuffer SrcBuffer; 95 AlignedBuffer DstBuffer; 96 }; 97 98 struct MemsetBenchmark { 99 static constexpr auto GetDistributions = &getMemsetSizeDistributions; 100 static constexpr size_t BufferCount = 1; 101 static void amend(Study &S) { S.Configuration.Function = "memset"; } 102 103 MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {} 104 105 inline auto functor() { 106 return [this](ParameterType P) { 107 __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF, 108 P.SizeBytes); 109 return DstBuffer + P.OffsetBytes; 110 }; 111 } 112 113 AlignedBuffer DstBuffer; 114 }; 115 116 template <typename Benchmark> struct Harness : Benchmark { 117 using Benchmark::functor; 118 119 Harness(const size_t BufferSize, size_t BatchParameterCount, 120 std::function<unsigned()> SizeSampler, 121 std::function<unsigned()> OffsetSampler) 122 : Benchmark(BufferSize), BufferSize(BufferSize), 123 Parameters(BatchParameterCount), SizeSampler(SizeSampler), 124 OffsetSampler(OffsetSampler) {} 125 126 CircularArrayRef<ParameterType> generateBatch(size_t Iterations) { 127 for (auto &P : Parameters) { 128 P.OffsetBytes = OffsetSampler(); 129 P.SizeBytes = SizeSampler(); 130 if (P.OffsetBytes + P.SizeBytes >= BufferSize) 131 report_fatal_error("Call would result in buffer overflow"); 132 } 133 return cycle(makeArrayRef(Parameters), Iterations); 134 } 135 136 private: 137 const size_t BufferSize; 138 std::vector<ParameterType> Parameters; 139 std::function<unsigned()> SizeSampler; 140 std::function<unsigned()> OffsetSampler; 141 }; 142 143 struct IBenchmark { 144 virtual ~IBenchmark() {} 145 virtual Study run() = 0; 146 }; 147 148 size_t getL1DataCacheSize() { 149 const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches; 150 const auto IsL1DataCache = [](const CacheInfo &CI) { 151 return CI.Type == "Data" && CI.Level == 1; 152 }; 153 const auto CacheIt = find_if(CacheInfos, IsL1DataCache); 154 if (CacheIt != CacheInfos.end()) 155 return CacheIt->Size; 156 report_fatal_error("Unable to read L1 Cache Data Size"); 157 } 158 159 template <typename Benchmark> struct MemfunctionBenchmark : IBenchmark { 160 MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize()) 161 : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes), 162 BufferSize(AvailableSize / Benchmark::BufferCount), 163 BatchParameterCount(BufferSize / sizeof(ParameterType)) { 164 // Handling command line flags 165 if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100) 166 report_fatal_error("Not enough L1 cache"); 167 168 if (!isPowerOfTwoOrZero(AlignedAccess)) 169 report_fatal_error(AlignedAccess.ArgStr + 170 Twine(" must be a power of two or zero")); 171 172 const bool HasDistributionName = !SizeDistributionName.empty(); 173 if (SweepMode && HasDistributionName) 174 report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) + 175 "` or `--" + Twine(SizeDistributionName.ArgStr) + "`"); 176 177 if (SweepMode) { 178 MaxSizeValue = SweepMaxSize; 179 } else { 180 std::map<StringRef, MemorySizeDistribution> Map; 181 for (MemorySizeDistribution Distribution : Benchmark::GetDistributions()) 182 Map[Distribution.Name] = Distribution; 183 if (Map.count(SizeDistributionName) == 0) { 184 std::string Message; 185 raw_string_ostream Stream(Message); 186 Stream << "Unknown --" << SizeDistributionName.ArgStr << "='" 187 << SizeDistributionName << "', available distributions:\n"; 188 for (const auto &Pair : Map) 189 Stream << "'" << Pair.first << "'\n"; 190 report_fatal_error(Stream.str()); 191 } 192 SizeDistribution = Map[SizeDistributionName]; 193 MaxSizeValue = SizeDistribution.Probabilities.size() - 1; 194 } 195 196 // Setup study. 197 Study.StudyName = StudyName; 198 Runtime &RI = Study.Runtime; 199 RI.Host = HostState::get(); 200 RI.BufferSize = BufferSize; 201 RI.BatchParameterCount = BatchParameterCount; 202 203 BenchmarkOptions &BO = RI.BenchmarkOptions; 204 BO.MinDuration = std::chrono::milliseconds(1); 205 BO.MaxDuration = std::chrono::seconds(1); 206 BO.MaxIterations = 10'000'000U; 207 BO.MinSamples = 4; 208 BO.MaxSamples = 1000; 209 BO.Epsilon = 0.01; // 1% 210 BO.ScalingFactor = 1.4; 211 212 StudyConfiguration &SC = Study.Configuration; 213 SC.NumTrials = NumTrials; 214 SC.IsSweepMode = SweepMode; 215 if (SweepMode) 216 SC.SweepModeMaxSize = SweepMaxSize; 217 else 218 SC.SizeDistributionName = SizeDistributionName; 219 SC.AccessAlignment = MaybeAlign(AlignedAccess); 220 221 // Delegate specific flags and configuration. 222 Benchmark::amend(Study); 223 } 224 225 Study run() override { 226 if (SweepMode) 227 runSweepMode(); 228 else 229 runDistributionMode(); 230 return Study; 231 } 232 233 private: 234 const int64_t AvailableSize; 235 const int64_t BufferSize; 236 const size_t BatchParameterCount; 237 size_t MaxSizeValue = 0; 238 MemorySizeDistribution SizeDistribution; 239 Study Study; 240 std::mt19937_64 Gen; 241 242 static constexpr bool isPowerOfTwoOrZero(size_t Value) { 243 return (Value & (Value - 1U)) == 0; 244 } 245 246 std::function<unsigned()> geOffsetSampler() { 247 return [this]() { 248 static OffsetDistribution OD(BufferSize, MaxSizeValue, 249 Study.Configuration.AccessAlignment); 250 return OD(Gen); 251 }; 252 } 253 254 std::function<unsigned()> getSizeSampler() { 255 return [this]() { 256 static std::discrete_distribution<unsigned> Distribution( 257 SizeDistribution.Probabilities.begin(), 258 SizeDistribution.Probabilities.end()); 259 return Distribution(Gen); 260 }; 261 } 262 263 void reportProgress() { 264 static size_t LastPercent = -1; 265 const size_t TotalSteps = Study.Measurements.capacity(); 266 const size_t Steps = Study.Measurements.size(); 267 const size_t Percent = 100 * Steps / TotalSteps; 268 if (Percent == LastPercent) 269 return; 270 LastPercent = Percent; 271 size_t I = 0; 272 errs() << '['; 273 for (; I <= Percent; ++I) 274 errs() << '#'; 275 for (; I <= 100; ++I) 276 errs() << '_'; 277 errs() << "] " << Percent << '%' << '\r'; 278 } 279 280 void runTrials(const BenchmarkOptions &Options, 281 std::function<unsigned()> SizeSampler, 282 std::function<unsigned()> OffsetSampler) { 283 Harness<Benchmark> B(BufferSize, BatchParameterCount, SizeSampler, 284 OffsetSampler); 285 for (size_t i = 0; i < NumTrials; ++i) { 286 const BenchmarkResult Result = benchmark(Options, B, B.functor()); 287 Study.Measurements.push_back(Result.BestGuess); 288 reportProgress(); 289 } 290 } 291 292 void runSweepMode() { 293 Study.Measurements.reserve(NumTrials * SweepMaxSize); 294 295 BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; 296 BO.MinDuration = std::chrono::milliseconds(1); 297 BO.InitialIterations = 100; 298 299 for (size_t Size = 0; Size <= SweepMaxSize; ++Size) { 300 const auto SizeSampler = [Size]() { return Size; }; 301 runTrials(BO, SizeSampler, geOffsetSampler()); 302 } 303 } 304 305 void runDistributionMode() { 306 Study.Measurements.reserve(NumTrials); 307 308 BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; 309 BO.MinDuration = std::chrono::milliseconds(10); 310 BO.InitialIterations = BatchParameterCount * 10; 311 312 runTrials(BO, getSizeSampler(), geOffsetSampler()); 313 } 314 }; 315 316 std::unique_ptr<IBenchmark> getMemfunctionBenchmark() { 317 switch (MemoryFunction) { 318 case memcpy: 319 return std::make_unique<MemfunctionBenchmark<MemcpyBenchmark>>(); 320 case memset: 321 return std::make_unique<MemfunctionBenchmark<MemsetBenchmark>>(); 322 } 323 } 324 325 void writeStudy(const Study &S) { 326 std::error_code EC; 327 raw_fd_ostream FOS(Output, EC); 328 if (EC) 329 report_fatal_error(Twine("Could not open file: ") 330 .concat(EC.message()) 331 .concat(", ") 332 .concat(Output)); 333 json::OStream JOS(FOS); 334 serializeToJson(S, JOS); 335 FOS << "\n"; 336 } 337 338 void main() { 339 checkRequirements(); 340 auto MB = getMemfunctionBenchmark(); 341 writeStudy(MB->run()); 342 } 343 344 } // namespace libc_benchmarks 345 } // namespace llvm 346 347 int main(int argc, char **argv) { 348 llvm::cl::ParseCommandLineOptions(argc, argv); 349 #ifndef NDEBUG 350 static_assert( 351 false, 352 "For reproducibility benchmarks should not be compiled in DEBUG mode."); 353 #endif 354 llvm::libc_benchmarks::main(); 355 return EXIT_SUCCESS; 356 } 357