xref: /llvm-project/libc/benchmarks/LibcMemoryBenchmarkMain.cpp (revision d4bb3ef53276213d3ba8987da5f76f423b86160d)
1 //===-- Benchmark ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "JSON.h"
10 #include "LibcBenchmark.h"
11 #include "LibcMemoryBenchmark.h"
12 #include "MemorySizeDistributions.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/ErrorHandling.h"
15 #include "llvm/Support/FileSystem.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 namespace __llvm_libc {
21 
22 extern void *memcpy(void *__restrict, const void *__restrict, size_t);
23 extern void *memset(void *, int, size_t);
24 
25 } // namespace __llvm_libc
26 
27 namespace llvm {
28 namespace libc_benchmarks {
29 
30 enum Function { memcpy, memset };
31 
32 static cl::opt<std::string>
33     StudyName("study-name", cl::desc("The name for this study"), cl::Required);
34 
35 static cl::opt<Function>
36     MemoryFunction("function", cl::desc("Sets the function to benchmark:"),
37                    cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"),
38                               clEnumVal(memset, "__llvm_libc::memset")),
39                    cl::Required);
40 
41 static cl::opt<std::string>
42     SizeDistributionName("size-distribution-name",
43                          cl::desc("The name of the distribution to use"));
44 
45 static cl::opt<bool>
46     SweepMode("sweep-mode",
47               cl::desc("If set, benchmark all sizes from 0 to sweep-max-size"));
48 
49 static cl::opt<uint32_t>
50     SweepMaxSize("sweep-max-size",
51                  cl::desc("The maximum size to use in sweep-mode"),
52                  cl::init(256));
53 
54 static cl::opt<uint32_t>
55     AlignedAccess("aligned-access",
56                   cl::desc("The alignment to use when accessing the buffers\n"
57                            "Default is unaligned\n"
58                            "Use 0 to disable address randomization"),
59                   cl::init(1));
60 
61 static cl::opt<std::string> Output("output",
62                                    cl::desc("Specify output filename"),
63                                    cl::value_desc("filename"), cl::init("-"));
64 
65 static cl::opt<uint32_t>
66     NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
67               cl::init(1));
68 
69 static constexpr int64_t KiB = 1024;
70 static constexpr int64_t ParameterStorageBytes = 4 * KiB;
71 static constexpr int64_t L1LeftAsideBytes = 1 * KiB;
72 
73 struct ParameterType {
74   unsigned OffsetBytes : 16; // max : 16 KiB - 1
75   unsigned SizeBytes : 16;   // max : 16 KiB - 1
76 };
77 
78 struct MemcpyBenchmark {
79   static constexpr auto GetDistributions = &getMemcpySizeDistributions;
80   static constexpr size_t BufferCount = 2;
81   static void amend(Study &S) { S.Configuration.Function = "memcpy"; }
82 
83   MemcpyBenchmark(const size_t BufferSize)
84       : SrcBuffer(BufferSize), DstBuffer(BufferSize) {}
85 
86   inline auto functor() {
87     return [this](ParameterType P) {
88       __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
89                           P.SizeBytes);
90       return DstBuffer + P.OffsetBytes;
91     };
92   }
93 
94   AlignedBuffer SrcBuffer;
95   AlignedBuffer DstBuffer;
96 };
97 
98 struct MemsetBenchmark {
99   static constexpr auto GetDistributions = &getMemsetSizeDistributions;
100   static constexpr size_t BufferCount = 1;
101   static void amend(Study &S) { S.Configuration.Function = "memset"; }
102 
103   MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
104 
105   inline auto functor() {
106     return [this](ParameterType P) {
107       __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
108                           P.SizeBytes);
109       return DstBuffer + P.OffsetBytes;
110     };
111   }
112 
113   AlignedBuffer DstBuffer;
114 };
115 
116 template <typename Benchmark> struct Harness : Benchmark {
117   using Benchmark::functor;
118 
119   Harness(const size_t BufferSize, size_t BatchParameterCount,
120           std::function<unsigned()> SizeSampler,
121           std::function<unsigned()> OffsetSampler)
122       : Benchmark(BufferSize), BufferSize(BufferSize),
123         Parameters(BatchParameterCount), SizeSampler(SizeSampler),
124         OffsetSampler(OffsetSampler) {}
125 
126   CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
127     for (auto &P : Parameters) {
128       P.OffsetBytes = OffsetSampler();
129       P.SizeBytes = SizeSampler();
130       if (P.OffsetBytes + P.SizeBytes >= BufferSize)
131         report_fatal_error("Call would result in buffer overflow");
132     }
133     return cycle(makeArrayRef(Parameters), Iterations);
134   }
135 
136 private:
137   const size_t BufferSize;
138   std::vector<ParameterType> Parameters;
139   std::function<unsigned()> SizeSampler;
140   std::function<unsigned()> OffsetSampler;
141 };
142 
143 struct IBenchmark {
144   virtual ~IBenchmark() {}
145   virtual Study run() = 0;
146 };
147 
148 size_t getL1DataCacheSize() {
149   const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
150   const auto IsL1DataCache = [](const CacheInfo &CI) {
151     return CI.Type == "Data" && CI.Level == 1;
152   };
153   const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
154   if (CacheIt != CacheInfos.end())
155     return CacheIt->Size;
156   report_fatal_error("Unable to read L1 Cache Data Size");
157 }
158 
159 template <typename Benchmark> struct MemfunctionBenchmark : IBenchmark {
160   MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
161       : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
162         BufferSize(AvailableSize / Benchmark::BufferCount),
163         BatchParameterCount(BufferSize / sizeof(ParameterType)) {
164     // Handling command line flags
165     if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
166       report_fatal_error("Not enough L1 cache");
167 
168     if (!isPowerOfTwoOrZero(AlignedAccess))
169       report_fatal_error(AlignedAccess.ArgStr +
170                          Twine(" must be a power of two or zero"));
171 
172     const bool HasDistributionName = !SizeDistributionName.empty();
173     if (SweepMode && HasDistributionName)
174       report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
175                          "` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
176 
177     if (SweepMode) {
178       MaxSizeValue = SweepMaxSize;
179     } else {
180       std::map<StringRef, MemorySizeDistribution> Map;
181       for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
182         Map[Distribution.Name] = Distribution;
183       if (Map.count(SizeDistributionName) == 0) {
184         std::string Message;
185         raw_string_ostream Stream(Message);
186         Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
187                << SizeDistributionName << "', available distributions:\n";
188         for (const auto &Pair : Map)
189           Stream << "'" << Pair.first << "'\n";
190         report_fatal_error(Stream.str());
191       }
192       SizeDistribution = Map[SizeDistributionName];
193       MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
194     }
195 
196     // Setup study.
197     Study.StudyName = StudyName;
198     Runtime &RI = Study.Runtime;
199     RI.Host = HostState::get();
200     RI.BufferSize = BufferSize;
201     RI.BatchParameterCount = BatchParameterCount;
202 
203     BenchmarkOptions &BO = RI.BenchmarkOptions;
204     BO.MinDuration = std::chrono::milliseconds(1);
205     BO.MaxDuration = std::chrono::seconds(1);
206     BO.MaxIterations = 10'000'000U;
207     BO.MinSamples = 4;
208     BO.MaxSamples = 1000;
209     BO.Epsilon = 0.01; // 1%
210     BO.ScalingFactor = 1.4;
211 
212     StudyConfiguration &SC = Study.Configuration;
213     SC.NumTrials = NumTrials;
214     SC.IsSweepMode = SweepMode;
215     if (SweepMode)
216       SC.SweepModeMaxSize = SweepMaxSize;
217     else
218       SC.SizeDistributionName = SizeDistributionName;
219     SC.AccessAlignment = MaybeAlign(AlignedAccess);
220 
221     // Delegate specific flags and configuration.
222     Benchmark::amend(Study);
223   }
224 
225   Study run() override {
226     if (SweepMode)
227       runSweepMode();
228     else
229       runDistributionMode();
230     return Study;
231   }
232 
233 private:
234   const int64_t AvailableSize;
235   const int64_t BufferSize;
236   const size_t BatchParameterCount;
237   size_t MaxSizeValue = 0;
238   MemorySizeDistribution SizeDistribution;
239   Study Study;
240   std::mt19937_64 Gen;
241 
242   static constexpr bool isPowerOfTwoOrZero(size_t Value) {
243     return (Value & (Value - 1U)) == 0;
244   }
245 
246   std::function<unsigned()> geOffsetSampler() {
247     return [this]() {
248       static OffsetDistribution OD(BufferSize, MaxSizeValue,
249                                    Study.Configuration.AccessAlignment);
250       return OD(Gen);
251     };
252   }
253 
254   std::function<unsigned()> getSizeSampler() {
255     return [this]() {
256       static std::discrete_distribution<unsigned> Distribution(
257           SizeDistribution.Probabilities.begin(),
258           SizeDistribution.Probabilities.end());
259       return Distribution(Gen);
260     };
261   }
262 
263   void reportProgress() {
264     static size_t LastPercent = -1;
265     const size_t TotalSteps = Study.Measurements.capacity();
266     const size_t Steps = Study.Measurements.size();
267     const size_t Percent = 100 * Steps / TotalSteps;
268     if (Percent == LastPercent)
269       return;
270     LastPercent = Percent;
271     size_t I = 0;
272     errs() << '[';
273     for (; I <= Percent; ++I)
274       errs() << '#';
275     for (; I <= 100; ++I)
276       errs() << '_';
277     errs() << "] " << Percent << '%' << '\r';
278   }
279 
280   void runTrials(const BenchmarkOptions &Options,
281                  std::function<unsigned()> SizeSampler,
282                  std::function<unsigned()> OffsetSampler) {
283     Harness<Benchmark> B(BufferSize, BatchParameterCount, SizeSampler,
284                          OffsetSampler);
285     for (size_t i = 0; i < NumTrials; ++i) {
286       const BenchmarkResult Result = benchmark(Options, B, B.functor());
287       Study.Measurements.push_back(Result.BestGuess);
288       reportProgress();
289     }
290   }
291 
292   void runSweepMode() {
293     Study.Measurements.reserve(NumTrials * SweepMaxSize);
294 
295     BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
296     BO.MinDuration = std::chrono::milliseconds(1);
297     BO.InitialIterations = 100;
298 
299     for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
300       const auto SizeSampler = [Size]() { return Size; };
301       runTrials(BO, SizeSampler, geOffsetSampler());
302     }
303   }
304 
305   void runDistributionMode() {
306     Study.Measurements.reserve(NumTrials);
307 
308     BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
309     BO.MinDuration = std::chrono::milliseconds(10);
310     BO.InitialIterations = BatchParameterCount * 10;
311 
312     runTrials(BO, getSizeSampler(), geOffsetSampler());
313   }
314 };
315 
316 std::unique_ptr<IBenchmark> getMemfunctionBenchmark() {
317   switch (MemoryFunction) {
318   case memcpy:
319     return std::make_unique<MemfunctionBenchmark<MemcpyBenchmark>>();
320   case memset:
321     return std::make_unique<MemfunctionBenchmark<MemsetBenchmark>>();
322   }
323 }
324 
325 void writeStudy(const Study &S) {
326   std::error_code EC;
327   raw_fd_ostream FOS(Output, EC);
328   if (EC)
329     report_fatal_error(Twine("Could not open file: ")
330                            .concat(EC.message())
331                            .concat(", ")
332                            .concat(Output));
333   json::OStream JOS(FOS);
334   serializeToJson(S, JOS);
335   FOS << "\n";
336 }
337 
338 void main() {
339   checkRequirements();
340   auto MB = getMemfunctionBenchmark();
341   writeStudy(MB->run());
342 }
343 
344 } // namespace libc_benchmarks
345 } // namespace llvm
346 
347 int main(int argc, char **argv) {
348   llvm::cl::ParseCommandLineOptions(argc, argv);
349 #ifndef NDEBUG
350   static_assert(
351       false,
352       "For reproducibility benchmarks should not be compiled in DEBUG mode.");
353 #endif
354   llvm::libc_benchmarks::main();
355   return EXIT_SUCCESS;
356 }
357