1 //===-- Benchmark ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "JSON.h" 10 #include "LibcBenchmark.h" 11 #include "LibcMemoryBenchmark.h" 12 #include "MemorySizeDistributions.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/ErrorHandling.h" 15 #include "llvm/Support/FileSystem.h" 16 #include "llvm/Support/JSON.h" 17 #include "llvm/Support/MemoryBuffer.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 namespace __llvm_libc { 21 22 extern void *memcpy(void *__restrict, const void *__restrict, size_t); 23 extern void *memset(void *, int, size_t); 24 25 } // namespace __llvm_libc 26 27 namespace llvm { 28 namespace libc_benchmarks { 29 30 static cl::opt<std::string> 31 StudyName("study-name", cl::desc("The name for this study"), cl::Required); 32 33 static cl::opt<std::string> 34 SizeDistributionName("size-distribution-name", 35 cl::desc("The name of the distribution to use")); 36 37 static cl::opt<bool> 38 SweepMode("sweep-mode", 39 cl::desc("If set, benchmark all sizes from 0 to sweep-max-size")); 40 41 static cl::opt<uint32_t> 42 SweepMaxSize("sweep-max-size", 43 cl::desc("The maximum size to use in sweep-mode"), 44 cl::init(256)); 45 46 static cl::opt<uint32_t> 47 AlignedAccess("aligned-access", 48 cl::desc("The alignment to use when accessing the buffers\n" 49 "Default is unaligned\n" 50 "Use 0 to disable address randomization"), 51 cl::init(1)); 52 53 static cl::opt<std::string> Output("output", 54 cl::desc("Specify output filename"), 55 cl::value_desc("filename"), cl::init("-")); 56 57 static cl::opt<uint32_t> 58 NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"), 59 cl::init(1)); 60 61 static constexpr int64_t KiB = 1024; 62 static constexpr int64_t ParameterStorageBytes = 4 * KiB; 63 static constexpr int64_t L1LeftAsideBytes = 1 * KiB; 64 65 struct ParameterType { 66 unsigned OffsetBytes : 16; // max : 16 KiB - 1 67 unsigned SizeBytes : 16; // max : 16 KiB - 1 68 }; 69 70 #if defined(LIBC_BENCHMARK_FUNCTION_MEMCPY) 71 struct Benchmark { 72 static constexpr auto GetDistributions = &getMemcpySizeDistributions; 73 static constexpr size_t BufferCount = 2; 74 75 Benchmark(const size_t BufferSize) 76 : SrcBuffer(BufferSize), DstBuffer(BufferSize) {} 77 78 inline auto functor() { 79 return [this](ParameterType P) { 80 __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes, 81 P.SizeBytes); 82 return DstBuffer + P.OffsetBytes; 83 }; 84 } 85 86 AlignedBuffer SrcBuffer; 87 AlignedBuffer DstBuffer; 88 }; 89 #elif defined(LIBC_BENCHMARK_FUNCTION_MEMSET) 90 struct Benchmark { 91 static constexpr auto GetDistributions = &getMemsetSizeDistributions; 92 static constexpr size_t BufferCount = 1; 93 94 Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {} 95 96 inline auto functor() { 97 return [this](ParameterType P) { 98 __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF, 99 P.SizeBytes); 100 return DstBuffer + P.OffsetBytes; 101 }; 102 } 103 104 AlignedBuffer DstBuffer; 105 }; 106 #else 107 #error "Missing LIBC_BENCHMARK_FUNCTION_XXX definition" 108 #endif 109 110 struct Harness : Benchmark { 111 112 Harness(const size_t BufferSize, size_t BatchParameterCount, 113 std::function<unsigned()> SizeSampler, 114 std::function<unsigned()> OffsetSampler) 115 : Benchmark(BufferSize), BufferSize(BufferSize), 116 Parameters(BatchParameterCount), SizeSampler(SizeSampler), 117 OffsetSampler(OffsetSampler) {} 118 119 CircularArrayRef<ParameterType> generateBatch(size_t Iterations) { 120 for (auto &P : Parameters) { 121 P.OffsetBytes = OffsetSampler(); 122 P.SizeBytes = SizeSampler(); 123 if (P.OffsetBytes + P.SizeBytes >= BufferSize) 124 report_fatal_error("Call would result in buffer overflow"); 125 } 126 return cycle(makeArrayRef(Parameters), Iterations); 127 } 128 129 private: 130 const size_t BufferSize; 131 std::vector<ParameterType> Parameters; 132 std::function<unsigned()> SizeSampler; 133 std::function<unsigned()> OffsetSampler; 134 }; 135 136 size_t getL1DataCacheSize() { 137 const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches; 138 const auto IsL1DataCache = [](const CacheInfo &CI) { 139 return CI.Type == "Data" && CI.Level == 1; 140 }; 141 const auto CacheIt = find_if(CacheInfos, IsL1DataCache); 142 if (CacheIt != CacheInfos.end()) 143 return CacheIt->Size; 144 report_fatal_error("Unable to read L1 Cache Data Size"); 145 } 146 147 struct MemfunctionBenchmark { 148 MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize()) 149 : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes), 150 BufferSize(AvailableSize / Benchmark::BufferCount), 151 BatchParameterCount(BufferSize / sizeof(ParameterType)) { 152 // Handling command line flags 153 if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100) 154 report_fatal_error("Not enough L1 cache"); 155 156 if (!isPowerOfTwoOrZero(AlignedAccess)) 157 report_fatal_error(AlignedAccess.ArgStr + 158 Twine(" must be a power of two or zero")); 159 160 const bool HasDistributionName = !SizeDistributionName.empty(); 161 if (SweepMode && HasDistributionName) 162 report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) + 163 "` or `--" + Twine(SizeDistributionName.ArgStr) + "`"); 164 165 if (SweepMode) { 166 MaxSizeValue = SweepMaxSize; 167 } else { 168 std::map<StringRef, MemorySizeDistribution> Map; 169 for (MemorySizeDistribution Distribution : Benchmark::GetDistributions()) 170 Map[Distribution.Name] = Distribution; 171 if (Map.count(SizeDistributionName) == 0) { 172 std::string Message; 173 raw_string_ostream Stream(Message); 174 Stream << "Unknown --" << SizeDistributionName.ArgStr << "='" 175 << SizeDistributionName << "', available distributions:\n"; 176 for (const auto &Pair : Map) 177 Stream << "'" << Pair.first << "'\n"; 178 report_fatal_error(Stream.str()); 179 } 180 SizeDistribution = Map[SizeDistributionName]; 181 MaxSizeValue = SizeDistribution.Probabilities.size() - 1; 182 } 183 184 // Setup study. 185 Study.StudyName = StudyName; 186 Runtime &RI = Study.Runtime; 187 RI.Host = HostState::get(); 188 RI.BufferSize = BufferSize; 189 RI.BatchParameterCount = BatchParameterCount; 190 191 BenchmarkOptions &BO = RI.BenchmarkOptions; 192 BO.MinDuration = std::chrono::milliseconds(1); 193 BO.MaxDuration = std::chrono::seconds(1); 194 BO.MaxIterations = 10'000'000U; 195 BO.MinSamples = 4; 196 BO.MaxSamples = 1000; 197 BO.Epsilon = 0.01; // 1% 198 BO.ScalingFactor = 1.4; 199 200 StudyConfiguration &SC = Study.Configuration; 201 SC.NumTrials = NumTrials; 202 SC.IsSweepMode = SweepMode; 203 if (SweepMode) 204 SC.SweepModeMaxSize = SweepMaxSize; 205 else 206 SC.SizeDistributionName = SizeDistributionName; 207 SC.AccessAlignment = MaybeAlign(AlignedAccess); 208 SC.Function = LIBC_BENCHMARK_FUNCTION_NAME; 209 } 210 211 Study run() { 212 if (SweepMode) 213 runSweepMode(); 214 else 215 runDistributionMode(); 216 return Study; 217 } 218 219 private: 220 const int64_t AvailableSize; 221 const int64_t BufferSize; 222 const size_t BatchParameterCount; 223 size_t MaxSizeValue = 0; 224 MemorySizeDistribution SizeDistribution; 225 Study Study; 226 std::mt19937_64 Gen; 227 228 static constexpr bool isPowerOfTwoOrZero(size_t Value) { 229 return (Value & (Value - 1U)) == 0; 230 } 231 232 std::function<unsigned()> geOffsetSampler() { 233 return [this]() { 234 static OffsetDistribution OD(BufferSize, MaxSizeValue, 235 Study.Configuration.AccessAlignment); 236 return OD(Gen); 237 }; 238 } 239 240 std::function<unsigned()> getSizeSampler() { 241 return [this]() { 242 static std::discrete_distribution<unsigned> Distribution( 243 SizeDistribution.Probabilities.begin(), 244 SizeDistribution.Probabilities.end()); 245 return Distribution(Gen); 246 }; 247 } 248 249 void reportProgress() { 250 static size_t LastPercent = -1; 251 const size_t TotalSteps = Study.Measurements.capacity(); 252 const size_t Steps = Study.Measurements.size(); 253 const size_t Percent = 100 * Steps / TotalSteps; 254 if (Percent == LastPercent) 255 return; 256 LastPercent = Percent; 257 size_t I = 0; 258 errs() << '['; 259 for (; I <= Percent; ++I) 260 errs() << '#'; 261 for (; I <= 100; ++I) 262 errs() << '_'; 263 errs() << "] " << Percent << '%' << '\r'; 264 } 265 266 void runTrials(const BenchmarkOptions &Options, 267 std::function<unsigned()> SizeSampler, 268 std::function<unsigned()> OffsetSampler) { 269 Harness B(BufferSize, BatchParameterCount, SizeSampler, OffsetSampler); 270 for (size_t i = 0; i < NumTrials; ++i) { 271 const BenchmarkResult Result = benchmark(Options, B, B.functor()); 272 Study.Measurements.push_back(Result.BestGuess); 273 reportProgress(); 274 } 275 } 276 277 void runSweepMode() { 278 Study.Measurements.reserve(NumTrials * SweepMaxSize); 279 280 BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; 281 BO.MinDuration = std::chrono::milliseconds(1); 282 BO.InitialIterations = 100; 283 284 for (size_t Size = 0; Size <= SweepMaxSize; ++Size) { 285 const auto SizeSampler = [Size]() { return Size; }; 286 runTrials(BO, SizeSampler, geOffsetSampler()); 287 } 288 } 289 290 void runDistributionMode() { 291 Study.Measurements.reserve(NumTrials); 292 293 BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; 294 BO.MinDuration = std::chrono::milliseconds(10); 295 BO.InitialIterations = BatchParameterCount * 10; 296 297 runTrials(BO, getSizeSampler(), geOffsetSampler()); 298 } 299 }; 300 301 void writeStudy(const Study &S) { 302 std::error_code EC; 303 raw_fd_ostream FOS(Output, EC); 304 if (EC) 305 report_fatal_error(Twine("Could not open file: ") 306 .concat(EC.message()) 307 .concat(", ") 308 .concat(Output)); 309 json::OStream JOS(FOS); 310 serializeToJson(S, JOS); 311 FOS << "\n"; 312 } 313 314 void main() { 315 checkRequirements(); 316 MemfunctionBenchmark MB; 317 writeStudy(MB.run()); 318 } 319 320 } // namespace libc_benchmarks 321 } // namespace llvm 322 323 int main(int argc, char **argv) { 324 llvm::cl::ParseCommandLineOptions(argc, argv); 325 #ifndef NDEBUG 326 static_assert( 327 false, 328 "For reproducibility benchmarks should not be compiled in DEBUG mode."); 329 #endif 330 llvm::libc_benchmarks::main(); 331 return EXIT_SUCCESS; 332 } 333