1 //===-- Benchmark ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "JSON.h" 10 #include "LibcBenchmark.h" 11 #include "LibcMemoryBenchmark.h" 12 #include "MemorySizeDistributions.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/ErrorHandling.h" 15 #include "llvm/Support/FileSystem.h" 16 #include "llvm/Support/JSON.h" 17 #include "llvm/Support/MemoryBuffer.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 namespace __llvm_libc { 21 22 extern void *memcpy(void *__restrict, const void *__restrict, size_t); 23 extern void *memset(void *, int, size_t); 24 25 } // namespace __llvm_libc 26 27 namespace llvm { 28 namespace libc_benchmarks { 29 30 enum Function { memcpy, memset }; 31 32 static cl::opt<std::string> 33 StudyName("study-name", cl::desc("The name for this study"), cl::Required); 34 35 static cl::opt<Function> 36 MemoryFunction("function", cl::desc("Sets the function to benchmark:"), 37 cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"), 38 clEnumVal(memset, "__llvm_libc::memset")), 39 cl::Required); 40 41 static cl::opt<std::string> 42 SizeDistributionName("size-distribution-name", 43 cl::desc("The name of the distribution to use")); 44 45 static cl::opt<bool> 46 SweepMode("sweep-mode", 47 cl::desc("If set, benchmark all sizes from 0 to sweep-max-size")); 48 49 static cl::opt<uint32_t> 50 SweepMaxSize("sweep-max-size", 51 cl::desc("The maximum size to use in sweep-mode"), 52 cl::init(256)); 53 54 static cl::opt<uint32_t> 55 AlignedAccess("aligned-access", 56 cl::desc("The alignment to use when accessing the buffers\n" 57 "Default is unaligned\n" 58 "Use 0 to disable address randomization"), 59 cl::init(1)); 60 61 static cl::opt<std::string> Output("output", 62 cl::desc("Specify output filename"), 63 cl::value_desc("filename"), cl::init("-")); 64 65 static cl::opt<uint32_t> 66 NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"), 67 cl::init(1)); 68 69 static constexpr int64_t KiB = 1024; 70 static constexpr int64_t ParameterStorageBytes = 4 * KiB; 71 static constexpr int64_t L1LeftAsideBytes = 1 * KiB; 72 73 struct ParameterType { 74 unsigned OffsetBytes : 16; // max : 16 KiB - 1 75 unsigned SizeBytes : 16; // max : 16 KiB - 1 76 }; 77 78 struct MemcpyBenchmark { 79 static constexpr auto GetDistributions = &getMemcpySizeDistributions; 80 static constexpr size_t BufferCount = 2; 81 static void amend(Study &S) { S.Configuration.Function = "memcpy"; } 82 83 MemcpyBenchmark(const size_t BufferSize) 84 : SrcBuffer(BufferSize), DstBuffer(BufferSize) {} 85 86 inline auto functor() { 87 return [this](ParameterType P) { 88 __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes, 89 P.SizeBytes); 90 return DstBuffer + P.OffsetBytes; 91 }; 92 } 93 94 AlignedBuffer SrcBuffer; 95 AlignedBuffer DstBuffer; 96 }; 97 98 struct MemsetBenchmark { 99 static constexpr auto GetDistributions = &getMemsetSizeDistributions; 100 static constexpr size_t BufferCount = 1; 101 static void amend(Study &S) { S.Configuration.Function = "memset"; } 102 103 MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {} 104 105 inline auto functor() { 106 return [this](ParameterType P) { 107 __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF, 108 P.SizeBytes); 109 return DstBuffer + P.OffsetBytes; 110 }; 111 } 112 113 AlignedBuffer DstBuffer; 114 }; 115 116 template <typename Benchmark> struct Harness : Benchmark { 117 using Benchmark::functor; 118 119 Harness(const size_t BufferSize, size_t BatchParameterCount, 120 std::function<unsigned()> SizeSampler, 121 std::function<unsigned()> OffsetSampler) 122 : Benchmark(BufferSize), BufferSize(BufferSize), 123 BatchParameterCount(BatchParameterCount), 124 Parameters(BatchParameterCount), SizeSampler(SizeSampler), 125 OffsetSampler(OffsetSampler) {} 126 127 CircularArrayRef<ParameterType> generateBatch(size_t Iterations) { 128 for (auto &P : Parameters) { 129 P.OffsetBytes = OffsetSampler(); 130 P.SizeBytes = SizeSampler(); 131 if (P.OffsetBytes + P.SizeBytes >= BufferSize) 132 report_fatal_error("Call would result in buffer overflow"); 133 } 134 return cycle(makeArrayRef(Parameters), Iterations); 135 } 136 137 private: 138 const size_t BufferSize; 139 const size_t BatchParameterCount; 140 std::vector<ParameterType> Parameters; 141 std::function<unsigned()> SizeSampler; 142 std::function<unsigned()> OffsetSampler; 143 }; 144 145 struct IBenchmark { 146 virtual ~IBenchmark() {} 147 virtual Study run() = 0; 148 }; 149 150 size_t getL1DataCacheSize() { 151 const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches; 152 const auto IsL1DataCache = [](const CacheInfo &CI) { 153 return CI.Type == "Data" && CI.Level == 1; 154 }; 155 const auto CacheIt = find_if(CacheInfos, IsL1DataCache); 156 if (CacheIt != CacheInfos.end()) 157 return CacheIt->Size; 158 report_fatal_error("Unable to read L1 Cache Data Size"); 159 } 160 161 template <typename Benchmark> struct MemfunctionBenchmark : IBenchmark { 162 MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize()) 163 : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes), 164 BufferSize(AvailableSize / Benchmark::BufferCount), 165 BatchParameterCount(BufferSize / sizeof(ParameterType)) { 166 // Handling command line flags 167 if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100) 168 report_fatal_error("Not enough L1 cache"); 169 170 if (!isPowerOfTwoOrZero(AlignedAccess)) 171 report_fatal_error(AlignedAccess.ArgStr + 172 Twine(" must be a power of two or zero")); 173 174 const bool HasDistributionName = !SizeDistributionName.empty(); 175 if (SweepMode && HasDistributionName) 176 report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) + 177 "` or `--" + Twine(SizeDistributionName.ArgStr) + "`"); 178 179 if (SweepMode) { 180 MaxSizeValue = SweepMaxSize; 181 } else { 182 std::map<StringRef, MemorySizeDistribution> Map; 183 for (MemorySizeDistribution Distribution : Benchmark::GetDistributions()) 184 Map[Distribution.Name] = Distribution; 185 if (Map.count(SizeDistributionName) == 0) { 186 std::string Message; 187 raw_string_ostream Stream(Message); 188 Stream << "Unknown --" << SizeDistributionName.ArgStr << "='" 189 << SizeDistributionName << "', available distributions:\n"; 190 for (const auto &Pair : Map) 191 Stream << "'" << Pair.first << "'\n"; 192 report_fatal_error(Stream.str()); 193 } 194 SizeDistribution = Map[SizeDistributionName]; 195 MaxSizeValue = SizeDistribution.Probabilities.size() - 1; 196 } 197 198 // Setup study. 199 Study.StudyName = StudyName; 200 Runtime &RI = Study.Runtime; 201 RI.Host = HostState::get(); 202 RI.BufferSize = BufferSize; 203 RI.BatchParameterCount = BatchParameterCount; 204 205 BenchmarkOptions &BO = RI.BenchmarkOptions; 206 BO.MinDuration = std::chrono::milliseconds(1); 207 BO.MaxDuration = std::chrono::seconds(1); 208 BO.MaxIterations = 10'000'000U; 209 BO.MinSamples = 4; 210 BO.MaxSamples = 1000; 211 BO.Epsilon = 0.01; // 1% 212 BO.ScalingFactor = 1.4; 213 214 StudyConfiguration &SC = Study.Configuration; 215 SC.NumTrials = NumTrials; 216 SC.IsSweepMode = SweepMode; 217 if (SweepMode) 218 SC.SweepModeMaxSize = SweepMaxSize; 219 else 220 SC.SizeDistributionName = SizeDistributionName; 221 SC.AccessAlignment = MaybeAlign(AlignedAccess); 222 223 // Delegate specific flags and configuration. 224 Benchmark::amend(Study); 225 } 226 227 Study run() override { 228 if (SweepMode) 229 runSweepMode(); 230 else 231 runDistributionMode(); 232 return Study; 233 } 234 235 private: 236 const int64_t AvailableSize; 237 const int64_t BufferSize; 238 const size_t BatchParameterCount; 239 size_t MaxSizeValue = 0; 240 MemorySizeDistribution SizeDistribution; 241 Study Study; 242 std::mt19937_64 Gen; 243 244 static constexpr bool isPowerOfTwoOrZero(size_t Value) { 245 return (Value & (Value - 1U)) == 0; 246 } 247 248 std::function<unsigned()> geOffsetSampler() { 249 return [this]() { 250 static OffsetDistribution OD(BufferSize, MaxSizeValue, 251 Study.Configuration.AccessAlignment); 252 return OD(Gen); 253 }; 254 } 255 256 std::function<unsigned()> getSizeSampler() { 257 return [this]() { 258 static std::discrete_distribution<unsigned> Distribution( 259 SizeDistribution.Probabilities.begin(), 260 SizeDistribution.Probabilities.end()); 261 return Distribution(Gen); 262 }; 263 } 264 265 void reportProgress() { 266 static size_t LastPercent = -1; 267 const size_t TotalSteps = Study.Measurements.capacity(); 268 const size_t Steps = Study.Measurements.size(); 269 const size_t Percent = 100 * Steps / TotalSteps; 270 if (Percent == LastPercent) 271 return; 272 LastPercent = Percent; 273 size_t I = 0; 274 errs() << '['; 275 for (; I <= Percent; ++I) 276 errs() << '#'; 277 for (; I <= 100; ++I) 278 errs() << '_'; 279 errs() << "] " << Percent << '%' << '\r'; 280 } 281 282 void runTrials(const BenchmarkOptions &Options, 283 std::function<unsigned()> SizeSampler, 284 std::function<unsigned()> OffsetSampler) { 285 Harness<Benchmark> B(BufferSize, BatchParameterCount, SizeSampler, 286 OffsetSampler); 287 for (size_t i = 0; i < NumTrials; ++i) { 288 const BenchmarkResult Result = benchmark(Options, B, B.functor()); 289 Study.Measurements.push_back(Result.BestGuess); 290 reportProgress(); 291 } 292 } 293 294 void runSweepMode() { 295 Study.Measurements.reserve(NumTrials * SweepMaxSize); 296 297 BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; 298 BO.MinDuration = std::chrono::milliseconds(1); 299 BO.InitialIterations = 100; 300 301 for (size_t Size = 0; Size <= SweepMaxSize; ++Size) { 302 const auto SizeSampler = [Size]() { return Size; }; 303 runTrials(BO, SizeSampler, geOffsetSampler()); 304 } 305 } 306 307 void runDistributionMode() { 308 Study.Measurements.reserve(NumTrials); 309 310 BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; 311 BO.MinDuration = std::chrono::milliseconds(10); 312 BO.InitialIterations = BatchParameterCount * 10; 313 314 runTrials(BO, getSizeSampler(), geOffsetSampler()); 315 } 316 }; 317 318 std::unique_ptr<IBenchmark> getMemfunctionBenchmark() { 319 switch (MemoryFunction) { 320 case memcpy: 321 return std::make_unique<MemfunctionBenchmark<MemcpyBenchmark>>(); 322 case memset: 323 return std::make_unique<MemfunctionBenchmark<MemsetBenchmark>>(); 324 } 325 } 326 327 void writeStudy(const Study &S) { 328 std::error_code EC; 329 raw_fd_ostream FOS(Output, EC); 330 if (EC) 331 report_fatal_error(Twine("Could not open file: ") 332 .concat(EC.message()) 333 .concat(", ") 334 .concat(Output)); 335 json::OStream JOS(FOS); 336 serializeToJson(S, JOS); 337 FOS << "\n"; 338 } 339 340 void main() { 341 checkRequirements(); 342 auto MB = getMemfunctionBenchmark(); 343 writeStudy(MB->run()); 344 } 345 346 } // namespace libc_benchmarks 347 } // namespace llvm 348 349 int main(int argc, char **argv) { 350 llvm::cl::ParseCommandLineOptions(argc, argv); 351 #ifndef NDEBUG 352 static_assert( 353 false, 354 "For reproducibility benchmarks should not be compiled in DEBUG mode."); 355 #endif 356 llvm::libc_benchmarks::main(); 357 return EXIT_SUCCESS; 358 } 359