xref: /llvm-project/libc/benchmarks/LibcMemoryBenchmark.h (revision b6bc9d72f65a5086f310f321e969d96e9a559e75)
1438f7fc0SSiva Chandra Reddy //===-- Benchmark memory specific tools -------------------------*- C++ -*-===//
2438f7fc0SSiva Chandra Reddy //
3438f7fc0SSiva Chandra Reddy // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4438f7fc0SSiva Chandra Reddy // See https://llvm.org/LICENSE.txt for license information.
5438f7fc0SSiva Chandra Reddy // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6438f7fc0SSiva Chandra Reddy //
7438f7fc0SSiva Chandra Reddy //===----------------------------------------------------------------------===//
8438f7fc0SSiva Chandra Reddy 
9438f7fc0SSiva Chandra Reddy // This file complements the `benchmark` header with memory specific tools and
10438f7fc0SSiva Chandra Reddy // benchmarking facilities.
11438f7fc0SSiva Chandra Reddy 
12438f7fc0SSiva Chandra Reddy #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
13438f7fc0SSiva Chandra Reddy #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
14438f7fc0SSiva Chandra Reddy 
15438f7fc0SSiva Chandra Reddy #include "LibcBenchmark.h"
16adc18ad6SGuillaume Chatelet #include "LibcFunctionPrototypes.h"
17d3c70d9fSGuillaume Chatelet #include "MemorySizeDistributions.h"
18438f7fc0SSiva Chandra Reddy #include "llvm/ADT/SmallVector.h"
19438f7fc0SSiva Chandra Reddy #include "llvm/Support/Alignment.h"
209abc1e08SVitaly Buka #include "llvm/Support/MathExtras.h"
21438f7fc0SSiva Chandra Reddy #include <cstdint>
228dcb7f6bSSiva Chandra Reddy #include <optional>
23438f7fc0SSiva Chandra Reddy #include <random>
24438f7fc0SSiva Chandra Reddy 
25438f7fc0SSiva Chandra Reddy namespace llvm {
26438f7fc0SSiva Chandra Reddy namespace libc_benchmarks {
27438f7fc0SSiva Chandra Reddy 
28438f7fc0SSiva Chandra Reddy //--------------
29438f7fc0SSiva Chandra Reddy // Configuration
30438f7fc0SSiva Chandra Reddy //--------------
31438f7fc0SSiva Chandra Reddy 
32438f7fc0SSiva Chandra Reddy struct StudyConfiguration {
33deae7e98SGuillaume Chatelet   // One of 'memcpy', 'memset', 'memcmp'.
34deae7e98SGuillaume Chatelet   // The underlying implementation is always the llvm libc one.
35*b6bc9d72SGuillaume Chatelet   // e.g. 'memcpy' will test 'LIBC_NAMESPACE::memcpy'
36deae7e98SGuillaume Chatelet   std::string Function;
37438f7fc0SSiva Chandra Reddy 
38deae7e98SGuillaume Chatelet   // The number of trials to run for this benchmark.
39deae7e98SGuillaume Chatelet   // If in SweepMode, each individual sizes are measured 'NumTrials' time.
40deae7e98SGuillaume Chatelet   // i.e 'NumTrials' measurements for 0, 'NumTrials' measurements for 1 ...
41deae7e98SGuillaume Chatelet   uint32_t NumTrials = 1;
42438f7fc0SSiva Chandra Reddy 
43deae7e98SGuillaume Chatelet   // Toggles between Sweep Mode and Distribution Mode (default).
44deae7e98SGuillaume Chatelet   // See 'SweepModeMaxSize' and 'SizeDistributionName' below.
45deae7e98SGuillaume Chatelet   bool IsSweepMode = false;
46438f7fc0SSiva Chandra Reddy 
47deae7e98SGuillaume Chatelet   // Maximum size to use when measuring a ramp of size values (SweepMode).
48deae7e98SGuillaume Chatelet   // The benchmark measures all sizes from 0 to SweepModeMaxSize.
49deae7e98SGuillaume Chatelet   // Note: in sweep mode the same size is sampled several times in a row this
50deae7e98SGuillaume Chatelet   // will allow the processor to learn it and optimize the branching pattern.
51deae7e98SGuillaume Chatelet   // The resulting measurement is likely to be idealized.
52deae7e98SGuillaume Chatelet   uint32_t SweepModeMaxSize = 0; // inclusive
53deae7e98SGuillaume Chatelet 
54deae7e98SGuillaume Chatelet   // The name of the distribution to be used to randomize the size parameter.
55deae7e98SGuillaume Chatelet   // This is used when SweepMode is false (default).
56deae7e98SGuillaume Chatelet   std::string SizeDistributionName;
57deae7e98SGuillaume Chatelet 
58deae7e98SGuillaume Chatelet   // This parameter allows to control how the buffers are accessed during
59deae7e98SGuillaume Chatelet   // benchmark:
60deae7e98SGuillaume Chatelet   // None : Use a fixed address that is at least cache line aligned,
61438f7fc0SSiva Chandra Reddy   //    1 : Use random address,
62438f7fc0SSiva Chandra Reddy   //   >1 : Use random address aligned to value.
638dcb7f6bSSiva Chandra Reddy   MaybeAlign AccessAlignment = std::nullopt;
64438f7fc0SSiva Chandra Reddy 
65deae7e98SGuillaume Chatelet   // When Function == 'memcmp', this is the buffers mismatch position.
66deae7e98SGuillaume Chatelet   //  0 : Buffers always compare equal,
67deae7e98SGuillaume Chatelet   // >0 : Buffers compare different at byte N-1.
68deae7e98SGuillaume Chatelet   uint32_t MemcmpMismatchAt = 0;
69deae7e98SGuillaume Chatelet };
70438f7fc0SSiva Chandra Reddy 
71deae7e98SGuillaume Chatelet struct Runtime {
72deae7e98SGuillaume Chatelet   // Details about the Host (cpu name, cpu frequency, cache hierarchy).
73deae7e98SGuillaume Chatelet   HostState Host;
74deae7e98SGuillaume Chatelet 
75deae7e98SGuillaume Chatelet   // The framework will populate this value so all data accessed during the
76deae7e98SGuillaume Chatelet   // benchmark will stay in L1 data cache. This includes bookkeeping data.
77deae7e98SGuillaume Chatelet   uint32_t BufferSize = 0;
78deae7e98SGuillaume Chatelet 
79deae7e98SGuillaume Chatelet   // This is the number of distinct parameters used in a single batch.
80deae7e98SGuillaume Chatelet   // The framework always tests a batch of randomized parameter to prevent the
81deae7e98SGuillaume Chatelet   // cpu from learning branching patterns.
82deae7e98SGuillaume Chatelet   uint32_t BatchParameterCount = 0;
83deae7e98SGuillaume Chatelet 
84deae7e98SGuillaume Chatelet   // The benchmark options that were used to perform the measurement.
85deae7e98SGuillaume Chatelet   // This is decided by the framework.
86deae7e98SGuillaume Chatelet   BenchmarkOptions BenchmarkOptions;
87438f7fc0SSiva Chandra Reddy };
88438f7fc0SSiva Chandra Reddy 
89438f7fc0SSiva Chandra Reddy //--------
90438f7fc0SSiva Chandra Reddy // Results
91438f7fc0SSiva Chandra Reddy //--------
92438f7fc0SSiva Chandra Reddy 
93438f7fc0SSiva Chandra Reddy // The root object containing all the data (configuration and measurements).
94438f7fc0SSiva Chandra Reddy struct Study {
95deae7e98SGuillaume Chatelet   std::string StudyName;
96deae7e98SGuillaume Chatelet   Runtime Runtime;
97438f7fc0SSiva Chandra Reddy   StudyConfiguration Configuration;
98deae7e98SGuillaume Chatelet   std::vector<Duration> Measurements;
99438f7fc0SSiva Chandra Reddy };
100438f7fc0SSiva Chandra Reddy 
101deae7e98SGuillaume Chatelet //------
102deae7e98SGuillaume Chatelet // Utils
103deae7e98SGuillaume Chatelet //------
104deae7e98SGuillaume Chatelet 
105438f7fc0SSiva Chandra Reddy // Provides an aligned, dynamically allocated buffer.
106438f7fc0SSiva Chandra Reddy class AlignedBuffer {
107438f7fc0SSiva Chandra Reddy   char *const Buffer = nullptr;
108438f7fc0SSiva Chandra Reddy   size_t Size = 0;
109438f7fc0SSiva Chandra Reddy 
110438f7fc0SSiva Chandra Reddy public:
111219a1d21SGuillaume Chatelet   static constexpr size_t Alignment = 512;
112438f7fc0SSiva Chandra Reddy 
AlignedBuffer(size_t Size)113438f7fc0SSiva Chandra Reddy   explicit AlignedBuffer(size_t Size)
1149abc1e08SVitaly Buka       : Buffer(static_cast<char *>(
1159abc1e08SVitaly Buka             aligned_alloc(Alignment, alignTo(Size, Alignment)))),
116deae7e98SGuillaume Chatelet         Size(Size) {}
~AlignedBuffer()117438f7fc0SSiva Chandra Reddy   ~AlignedBuffer() { free(Buffer); }
118438f7fc0SSiva Chandra Reddy 
119438f7fc0SSiva Chandra Reddy   inline char *operator+(size_t Index) { return Buffer + Index; }
120438f7fc0SSiva Chandra Reddy   inline const char *operator+(size_t Index) const { return Buffer + Index; }
121438f7fc0SSiva Chandra Reddy   inline char &operator[](size_t Index) { return Buffer[Index]; }
122438f7fc0SSiva Chandra Reddy   inline const char &operator[](size_t Index) const { return Buffer[Index]; }
begin()123438f7fc0SSiva Chandra Reddy   inline char *begin() { return Buffer; }
end()124438f7fc0SSiva Chandra Reddy   inline char *end() { return Buffer + Size; }
125438f7fc0SSiva Chandra Reddy };
126438f7fc0SSiva Chandra Reddy 
127438f7fc0SSiva Chandra Reddy // Helper to generate random buffer offsets that satisfy the configuration
128438f7fc0SSiva Chandra Reddy // constraints.
129438f7fc0SSiva Chandra Reddy class OffsetDistribution {
130438f7fc0SSiva Chandra Reddy   std::uniform_int_distribution<uint32_t> Distribution;
131438f7fc0SSiva Chandra Reddy   uint32_t Factor;
132438f7fc0SSiva Chandra Reddy 
133438f7fc0SSiva Chandra Reddy public:
134deae7e98SGuillaume Chatelet   explicit OffsetDistribution(size_t BufferSize, size_t MaxSizeValue,
135deae7e98SGuillaume Chatelet                               MaybeAlign AccessAlignment);
136438f7fc0SSiva Chandra Reddy 
operator()137438f7fc0SSiva Chandra Reddy   template <class Generator> uint32_t operator()(Generator &G) {
138438f7fc0SSiva Chandra Reddy     return Distribution(G) * Factor;
139438f7fc0SSiva Chandra Reddy   }
140438f7fc0SSiva Chandra Reddy };
141438f7fc0SSiva Chandra Reddy 
142438f7fc0SSiva Chandra Reddy // Helper to generate random buffer offsets that satisfy the configuration
143438f7fc0SSiva Chandra Reddy // constraints. It is specifically designed to benchmark `memcmp` functions
144438f7fc0SSiva Chandra Reddy // where we may want the Nth byte to differ.
145438f7fc0SSiva Chandra Reddy class MismatchOffsetDistribution {
146438f7fc0SSiva Chandra Reddy   std::uniform_int_distribution<size_t> MismatchIndexSelector;
147438f7fc0SSiva Chandra Reddy   llvm::SmallVector<uint32_t, 16> MismatchIndices;
148438f7fc0SSiva Chandra Reddy   const uint32_t MismatchAt;
149438f7fc0SSiva Chandra Reddy 
150438f7fc0SSiva Chandra Reddy public:
151deae7e98SGuillaume Chatelet   explicit MismatchOffsetDistribution(size_t BufferSize, size_t MaxSizeValue,
152deae7e98SGuillaume Chatelet                                       size_t MismatchAt);
153438f7fc0SSiva Chandra Reddy 
154438f7fc0SSiva Chandra Reddy   explicit operator bool() const { return !MismatchIndices.empty(); }
155438f7fc0SSiva Chandra Reddy 
getMismatchIndices()156438f7fc0SSiva Chandra Reddy   const llvm::SmallVectorImpl<uint32_t> &getMismatchIndices() const {
157438f7fc0SSiva Chandra Reddy     return MismatchIndices;
158438f7fc0SSiva Chandra Reddy   }
159438f7fc0SSiva Chandra Reddy 
operator()160438f7fc0SSiva Chandra Reddy   template <class Generator> uint32_t operator()(Generator &G, uint32_t Size) {
161438f7fc0SSiva Chandra Reddy     const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)];
162438f7fc0SSiva Chandra Reddy     // We need to position the offset so that a mismatch occurs at MismatchAt.
163438f7fc0SSiva Chandra Reddy     if (Size >= MismatchAt)
164438f7fc0SSiva Chandra Reddy       return MismatchIndex - MismatchAt;
165438f7fc0SSiva Chandra Reddy     // Size is too small to trigger the mismatch.
166438f7fc0SSiva Chandra Reddy     return MismatchIndex - Size - 1;
167438f7fc0SSiva Chandra Reddy   }
168438f7fc0SSiva Chandra Reddy };
169438f7fc0SSiva Chandra Reddy 
170d3c70d9fSGuillaume Chatelet /// This structure holds a vector of ParameterType.
171d3c70d9fSGuillaume Chatelet /// It makes sure that BufferCount x BufferSize Bytes and the vector of
172d3c70d9fSGuillaume Chatelet /// ParameterType can all fit in the L1 cache.
173d3c70d9fSGuillaume Chatelet struct ParameterBatch {
174d3c70d9fSGuillaume Chatelet   struct ParameterType {
175d3c70d9fSGuillaume Chatelet     unsigned OffsetBytes : 16; // max : 16 KiB - 1
176d3c70d9fSGuillaume Chatelet     unsigned SizeBytes : 16;   // max : 16 KiB - 1
177d3c70d9fSGuillaume Chatelet   };
178d3c70d9fSGuillaume Chatelet 
179d3c70d9fSGuillaume Chatelet   ParameterBatch(size_t BufferCount);
180d3c70d9fSGuillaume Chatelet 
181d3c70d9fSGuillaume Chatelet   /// Verifies that memory accessed through this parameter is valid.
182d3c70d9fSGuillaume Chatelet   void checkValid(const ParameterType &) const;
183d3c70d9fSGuillaume Chatelet 
184d3c70d9fSGuillaume Chatelet   /// Computes the number of bytes processed during within this batch.
185d3c70d9fSGuillaume Chatelet   size_t getBatchBytes() const;
186d3c70d9fSGuillaume Chatelet 
187d3c70d9fSGuillaume Chatelet   const size_t BufferSize;
188d3c70d9fSGuillaume Chatelet   const size_t BatchSize;
189d3c70d9fSGuillaume Chatelet   std::vector<ParameterType> Parameters;
190d3c70d9fSGuillaume Chatelet };
191d3c70d9fSGuillaume Chatelet 
192d3c70d9fSGuillaume Chatelet /// Provides source and destination buffers for the Copy operation as well as
193d3c70d9fSGuillaume Chatelet /// the associated size distributions.
194e4dee762SGuillaume Chatelet struct CopySetup : public ParameterBatch {
195e4dee762SGuillaume Chatelet   CopySetup();
196d3c70d9fSGuillaume Chatelet 
getDistributionsCopySetup19759198d06SGuillaume Chatelet   inline static const ArrayRef<MemorySizeDistribution> getDistributions() {
19859198d06SGuillaume Chatelet     return getMemcpySizeDistributions();
19959198d06SGuillaume Chatelet   }
200d3c70d9fSGuillaume Chatelet 
CallCopySetup201e4dee762SGuillaume Chatelet   inline void *Call(ParameterType Parameter, MemcpyFunction Memcpy) {
202e4dee762SGuillaume Chatelet     return Memcpy(DstBuffer + Parameter.OffsetBytes,
203d3c70d9fSGuillaume Chatelet                   SrcBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
204d3c70d9fSGuillaume Chatelet   }
205d3c70d9fSGuillaume Chatelet 
206d3c70d9fSGuillaume Chatelet private:
207d3c70d9fSGuillaume Chatelet   AlignedBuffer SrcBuffer;
208d3c70d9fSGuillaume Chatelet   AlignedBuffer DstBuffer;
209d3c70d9fSGuillaume Chatelet };
210d3c70d9fSGuillaume Chatelet 
211de21f346SGuillaume Chatelet /// Provides source and destination buffers for the Move operation as well as
212de21f346SGuillaume Chatelet /// the associated size distributions.
213de21f346SGuillaume Chatelet struct MoveSetup : public ParameterBatch {
214de21f346SGuillaume Chatelet   MoveSetup();
215de21f346SGuillaume Chatelet 
getDistributionsMoveSetup216de21f346SGuillaume Chatelet   inline static const ArrayRef<MemorySizeDistribution> getDistributions() {
217de21f346SGuillaume Chatelet     return getMemmoveSizeDistributions();
218de21f346SGuillaume Chatelet   }
219de21f346SGuillaume Chatelet 
CallMoveSetup220de21f346SGuillaume Chatelet   inline void *Call(ParameterType Parameter, MemmoveFunction Memmove) {
221de21f346SGuillaume Chatelet     return Memmove(Buffer + ParameterBatch::BufferSize / 3,
222de21f346SGuillaume Chatelet                    Buffer + Parameter.OffsetBytes, Parameter.SizeBytes);
223de21f346SGuillaume Chatelet   }
224de21f346SGuillaume Chatelet 
225de21f346SGuillaume Chatelet private:
226de21f346SGuillaume Chatelet   AlignedBuffer Buffer;
227de21f346SGuillaume Chatelet };
228de21f346SGuillaume Chatelet 
229d3c70d9fSGuillaume Chatelet /// Provides destination buffer for the Set operation as well as the associated
230d3c70d9fSGuillaume Chatelet /// size distributions.
231e4dee762SGuillaume Chatelet struct SetSetup : public ParameterBatch {
232e4dee762SGuillaume Chatelet   SetSetup();
233d3c70d9fSGuillaume Chatelet 
getDistributionsSetSetup23459198d06SGuillaume Chatelet   inline static const ArrayRef<MemorySizeDistribution> getDistributions() {
23559198d06SGuillaume Chatelet     return getMemsetSizeDistributions();
23659198d06SGuillaume Chatelet   }
237d3c70d9fSGuillaume Chatelet 
CallSetSetup238e4dee762SGuillaume Chatelet   inline void *Call(ParameterType Parameter, MemsetFunction Memset) {
239e4dee762SGuillaume Chatelet     return Memset(DstBuffer + Parameter.OffsetBytes,
240d3c70d9fSGuillaume Chatelet                   Parameter.OffsetBytes % 0xFF, Parameter.SizeBytes);
241d3c70d9fSGuillaume Chatelet   }
242d3c70d9fSGuillaume Chatelet 
CallSetSetup243e4dee762SGuillaume Chatelet   inline void *Call(ParameterType Parameter, BzeroFunction Bzero) {
244e4dee762SGuillaume Chatelet     Bzero(DstBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
245d3c70d9fSGuillaume Chatelet     return DstBuffer.begin();
246d3c70d9fSGuillaume Chatelet   }
247d3c70d9fSGuillaume Chatelet 
248d3c70d9fSGuillaume Chatelet private:
249d3c70d9fSGuillaume Chatelet   AlignedBuffer DstBuffer;
250d3c70d9fSGuillaume Chatelet };
251d3c70d9fSGuillaume Chatelet 
252d3c70d9fSGuillaume Chatelet /// Provides left and right buffers for the Comparison operation as well as the
253d3c70d9fSGuillaume Chatelet /// associated size distributions.
254e4dee762SGuillaume Chatelet struct ComparisonSetup : public ParameterBatch {
255e4dee762SGuillaume Chatelet   ComparisonSetup();
256d3c70d9fSGuillaume Chatelet 
getDistributionsComparisonSetup25759198d06SGuillaume Chatelet   inline static const ArrayRef<MemorySizeDistribution> getDistributions() {
25859198d06SGuillaume Chatelet     return getMemcmpSizeDistributions();
25959198d06SGuillaume Chatelet   }
260d3c70d9fSGuillaume Chatelet 
CallComparisonSetup2614a9bcb60SGuillaume Chatelet   inline int Call(ParameterType Parameter, MemcmpOrBcmpFunction MemcmpOrBcmp) {
2624a9bcb60SGuillaume Chatelet     return MemcmpOrBcmp(LhsBuffer + Parameter.OffsetBytes,
263d3c70d9fSGuillaume Chatelet                         RhsBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
264d3c70d9fSGuillaume Chatelet   }
265d3c70d9fSGuillaume Chatelet 
266d3c70d9fSGuillaume Chatelet private:
267d3c70d9fSGuillaume Chatelet   AlignedBuffer LhsBuffer;
268d3c70d9fSGuillaume Chatelet   AlignedBuffer RhsBuffer;
269d3c70d9fSGuillaume Chatelet };
270d3c70d9fSGuillaume Chatelet 
271438f7fc0SSiva Chandra Reddy } // namespace libc_benchmarks
272438f7fc0SSiva Chandra Reddy } // namespace llvm
273438f7fc0SSiva Chandra Reddy 
274438f7fc0SSiva Chandra Reddy #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
275