xref: /llvm-project/libc/benchmarks/LibcMemoryBenchmarkMain.cpp (revision ab577807165c45abfbadc117125ec7275cdcc0cf)
1 //===-- Benchmark ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "JSON.h"
10 #include "LibcBenchmark.h"
11 #include "LibcMemoryBenchmark.h"
12 #include "MemorySizeDistributions.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/ErrorHandling.h"
15 #include "llvm/Support/FileSystem.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 namespace __llvm_libc {
21 
22 extern void *memcpy(void *__restrict, const void *__restrict, size_t);
23 extern void *memset(void *, int, size_t);
24 
25 } // namespace __llvm_libc
26 
27 namespace llvm {
28 namespace libc_benchmarks {
29 
30 enum Function { memcpy, memset };
31 
32 static cl::opt<std::string>
33     StudyName("study-name", cl::desc("The name for this study"), cl::Required);
34 
35 static cl::opt<Function>
36     MemoryFunction("function", cl::desc("Sets the function to benchmark:"),
37                    cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"),
38                               clEnumVal(memset, "__llvm_libc::memset")),
39                    cl::Required);
40 
41 static cl::opt<std::string>
42     SizeDistributionName("size-distribution-name",
43                          cl::desc("The name of the distribution to use"));
44 
45 static cl::opt<bool>
46     SweepMode("sweep-mode",
47               cl::desc("If set, benchmark all sizes from 0 to sweep-max-size"));
48 
49 static cl::opt<uint32_t>
50     SweepMaxSize("sweep-max-size",
51                  cl::desc("The maximum size to use in sweep-mode"),
52                  cl::init(256));
53 
54 static cl::opt<uint32_t>
55     AlignedAccess("aligned-access",
56                   cl::desc("The alignment to use when accessing the buffers\n"
57                            "Default is unaligned\n"
58                            "Use 0 to disable address randomization"),
59                   cl::init(1));
60 
61 static cl::opt<std::string> Output("output",
62                                    cl::desc("Specify output filename"),
63                                    cl::value_desc("filename"), cl::init("-"));
64 
65 static cl::opt<uint32_t>
66     NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
67               cl::init(1));
68 
69 static constexpr int64_t KiB = 1024;
70 static constexpr int64_t ParameterStorageBytes = 4 * KiB;
71 static constexpr int64_t L1LeftAsideBytes = 1 * KiB;
72 
73 struct ParameterType {
74   unsigned OffsetBytes : 16; // max : 16 KiB - 1
75   unsigned SizeBytes : 16;   // max : 16 KiB - 1
76 };
77 
78 struct MemcpyBenchmark {
79   static constexpr auto GetDistributions = &getMemcpySizeDistributions;
80   static constexpr size_t BufferCount = 2;
81   static void amend(Study &S) { S.Configuration.Function = "memcpy"; }
82 
83   MemcpyBenchmark(const size_t BufferSize)
84       : SrcBuffer(BufferSize), DstBuffer(BufferSize) {}
85 
86   inline auto functor() {
87     return [this](ParameterType P) {
88       __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
89                           P.SizeBytes);
90       return DstBuffer + P.OffsetBytes;
91     };
92   }
93 
94   AlignedBuffer SrcBuffer;
95   AlignedBuffer DstBuffer;
96 };
97 
98 struct MemsetBenchmark {
99   static constexpr auto GetDistributions = &getMemsetSizeDistributions;
100   static constexpr size_t BufferCount = 1;
101   static void amend(Study &S) { S.Configuration.Function = "memset"; }
102 
103   MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
104 
105   inline auto functor() {
106     return [this](ParameterType P) {
107       __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
108                           P.SizeBytes);
109       return DstBuffer + P.OffsetBytes;
110     };
111   }
112 
113   AlignedBuffer DstBuffer;
114 };
115 
116 template <typename Benchmark> struct Harness : Benchmark {
117   using Benchmark::functor;
118 
119   Harness(const size_t BufferSize, size_t BatchParameterCount,
120           std::function<unsigned()> SizeSampler,
121           std::function<unsigned()> OffsetSampler)
122       : Benchmark(BufferSize), BufferSize(BufferSize),
123         BatchParameterCount(BatchParameterCount),
124         Parameters(BatchParameterCount), SizeSampler(SizeSampler),
125         OffsetSampler(OffsetSampler) {}
126 
127   CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
128     for (auto &P : Parameters) {
129       P.OffsetBytes = OffsetSampler();
130       P.SizeBytes = SizeSampler();
131       if (P.OffsetBytes + P.SizeBytes >= BufferSize)
132         report_fatal_error("Call would result in buffer overflow");
133     }
134     return cycle(makeArrayRef(Parameters), Iterations);
135   }
136 
137 private:
138   const size_t BufferSize;
139   const size_t BatchParameterCount;
140   std::vector<ParameterType> Parameters;
141   std::function<unsigned()> SizeSampler;
142   std::function<unsigned()> OffsetSampler;
143 };
144 
145 struct IBenchmark {
146   virtual ~IBenchmark() {}
147   virtual Study run() = 0;
148 };
149 
150 size_t getL1DataCacheSize() {
151   const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
152   const auto IsL1DataCache = [](const CacheInfo &CI) {
153     return CI.Type == "Data" && CI.Level == 1;
154   };
155   const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
156   if (CacheIt != CacheInfos.end())
157     return CacheIt->Size;
158   report_fatal_error("Unable to read L1 Cache Data Size");
159 }
160 
161 template <typename Benchmark> struct MemfunctionBenchmark : IBenchmark {
162   MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
163       : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
164         BufferSize(AvailableSize / Benchmark::BufferCount),
165         BatchParameterCount(BufferSize / sizeof(ParameterType)) {
166     // Handling command line flags
167     if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
168       report_fatal_error("Not enough L1 cache");
169 
170     if (!isPowerOfTwoOrZero(AlignedAccess))
171       report_fatal_error(AlignedAccess.ArgStr +
172                          Twine(" must be a power of two or zero"));
173 
174     const bool HasDistributionName = !SizeDistributionName.empty();
175     if (SweepMode && HasDistributionName)
176       report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
177                          "` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
178 
179     if (SweepMode) {
180       MaxSizeValue = SweepMaxSize;
181     } else {
182       std::map<StringRef, MemorySizeDistribution> Map;
183       for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
184         Map[Distribution.Name] = Distribution;
185       if (Map.count(SizeDistributionName) == 0) {
186         std::string Message;
187         raw_string_ostream Stream(Message);
188         Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
189                << SizeDistributionName << "', available distributions:\n";
190         for (const auto &Pair : Map)
191           Stream << "'" << Pair.first << "'\n";
192         report_fatal_error(Stream.str());
193       }
194       SizeDistribution = Map[SizeDistributionName];
195       MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
196     }
197 
198     // Setup study.
199     Study.StudyName = StudyName;
200     Runtime &RI = Study.Runtime;
201     RI.Host = HostState::get();
202     RI.BufferSize = BufferSize;
203     RI.BatchParameterCount = BatchParameterCount;
204 
205     BenchmarkOptions &BO = RI.BenchmarkOptions;
206     BO.MinDuration = std::chrono::milliseconds(1);
207     BO.MaxDuration = std::chrono::seconds(1);
208     BO.MaxIterations = 10'000'000U;
209     BO.MinSamples = 4;
210     BO.MaxSamples = 1000;
211     BO.Epsilon = 0.01; // 1%
212     BO.ScalingFactor = 1.4;
213 
214     StudyConfiguration &SC = Study.Configuration;
215     SC.NumTrials = NumTrials;
216     SC.IsSweepMode = SweepMode;
217     if (SweepMode)
218       SC.SweepModeMaxSize = SweepMaxSize;
219     else
220       SC.SizeDistributionName = SizeDistributionName;
221     SC.AccessAlignment = MaybeAlign(AlignedAccess);
222 
223     // Delegate specific flags and configuration.
224     Benchmark::amend(Study);
225   }
226 
227   Study run() override {
228     if (SweepMode)
229       runSweepMode();
230     else
231       runDistributionMode();
232     return Study;
233   }
234 
235 private:
236   const int64_t AvailableSize;
237   const int64_t BufferSize;
238   const size_t BatchParameterCount;
239   size_t MaxSizeValue = 0;
240   MemorySizeDistribution SizeDistribution;
241   Study Study;
242   std::mt19937_64 Gen;
243 
244   static constexpr bool isPowerOfTwoOrZero(size_t Value) {
245     return (Value & (Value - 1U)) == 0;
246   }
247 
248   std::function<unsigned()> geOffsetSampler() {
249     return [this]() {
250       static OffsetDistribution OD(BufferSize, MaxSizeValue,
251                                    Study.Configuration.AccessAlignment);
252       return OD(Gen);
253     };
254   }
255 
256   std::function<unsigned()> getSizeSampler() {
257     return [this]() {
258       static std::discrete_distribution<unsigned> Distribution(
259           SizeDistribution.Probabilities.begin(),
260           SizeDistribution.Probabilities.end());
261       return Distribution(Gen);
262     };
263   }
264 
265   void reportProgress() {
266     static size_t LastPercent = -1;
267     const size_t TotalSteps = Study.Measurements.capacity();
268     const size_t Steps = Study.Measurements.size();
269     const size_t Percent = 100 * Steps / TotalSteps;
270     if (Percent == LastPercent)
271       return;
272     LastPercent = Percent;
273     size_t I = 0;
274     errs() << '[';
275     for (; I <= Percent; ++I)
276       errs() << '#';
277     for (; I <= 100; ++I)
278       errs() << '_';
279     errs() << "] " << Percent << '%' << '\r';
280   }
281 
282   void runTrials(const BenchmarkOptions &Options,
283                  std::function<unsigned()> SizeSampler,
284                  std::function<unsigned()> OffsetSampler) {
285     Harness<Benchmark> B(BufferSize, BatchParameterCount, SizeSampler,
286                          OffsetSampler);
287     for (size_t i = 0; i < NumTrials; ++i) {
288       const BenchmarkResult Result = benchmark(Options, B, B.functor());
289       Study.Measurements.push_back(Result.BestGuess);
290       reportProgress();
291     }
292   }
293 
294   void runSweepMode() {
295     Study.Measurements.reserve(NumTrials * SweepMaxSize);
296 
297     BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
298     BO.MinDuration = std::chrono::milliseconds(1);
299     BO.InitialIterations = 100;
300 
301     for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
302       const auto SizeSampler = [Size]() { return Size; };
303       runTrials(BO, SizeSampler, geOffsetSampler());
304     }
305   }
306 
307   void runDistributionMode() {
308     Study.Measurements.reserve(NumTrials);
309 
310     BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
311     BO.MinDuration = std::chrono::milliseconds(10);
312     BO.InitialIterations = BatchParameterCount * 10;
313 
314     runTrials(BO, getSizeSampler(), geOffsetSampler());
315   }
316 };
317 
318 std::unique_ptr<IBenchmark> getMemfunctionBenchmark() {
319   switch (MemoryFunction) {
320   case memcpy:
321     return std::make_unique<MemfunctionBenchmark<MemcpyBenchmark>>();
322   case memset:
323     return std::make_unique<MemfunctionBenchmark<MemsetBenchmark>>();
324   }
325 }
326 
327 void writeStudy(const Study &S) {
328   std::error_code EC;
329   raw_fd_ostream FOS(Output, EC);
330   if (EC)
331     report_fatal_error(Twine("Could not open file: ")
332                            .concat(EC.message())
333                            .concat(", ")
334                            .concat(Output));
335   json::OStream JOS(FOS);
336   serializeToJson(S, JOS);
337   FOS << "\n";
338 }
339 
340 void main() {
341   checkRequirements();
342   auto MB = getMemfunctionBenchmark();
343   writeStudy(MB->run());
344 }
345 
346 } // namespace libc_benchmarks
347 } // namespace llvm
348 
349 int main(int argc, char **argv) {
350   llvm::cl::ParseCommandLineOptions(argc, argv);
351 #ifndef NDEBUG
352   static_assert(
353       false,
354       "For reproducibility benchmarks should not be compiled in DEBUG mode.");
355 #endif
356   llvm::libc_benchmarks::main();
357   return EXIT_SUCCESS;
358 }
359