1 //===-- Benchmark memory specific tools -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // This file complements the `benchmark` header with memory specific tools and 10 // benchmarking facilities. 11 12 #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 13 #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 14 15 #include "LibcBenchmark.h" 16 #include "LibcFunctionPrototypes.h" 17 #include "MemorySizeDistributions.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/Support/Alignment.h" 20 #include "llvm/Support/MathExtras.h" 21 #include <cstdint> 22 #include <optional> 23 #include <random> 24 25 namespace llvm { 26 namespace libc_benchmarks { 27 28 //-------------- 29 // Configuration 30 //-------------- 31 32 struct StudyConfiguration { 33 // One of 'memcpy', 'memset', 'memcmp'. 34 // The underlying implementation is always the llvm libc one. 35 // e.g. 'memcpy' will test 'LIBC_NAMESPACE::memcpy' 36 std::string Function; 37 38 // The number of trials to run for this benchmark. 39 // If in SweepMode, each individual sizes are measured 'NumTrials' time. 40 // i.e 'NumTrials' measurements for 0, 'NumTrials' measurements for 1 ... 41 uint32_t NumTrials = 1; 42 43 // Toggles between Sweep Mode and Distribution Mode (default). 44 // See 'SweepModeMaxSize' and 'SizeDistributionName' below. 45 bool IsSweepMode = false; 46 47 // Maximum size to use when measuring a ramp of size values (SweepMode). 48 // The benchmark measures all sizes from 0 to SweepModeMaxSize. 49 // Note: in sweep mode the same size is sampled several times in a row this 50 // will allow the processor to learn it and optimize the branching pattern. 51 // The resulting measurement is likely to be idealized. 52 uint32_t SweepModeMaxSize = 0; // inclusive 53 54 // The name of the distribution to be used to randomize the size parameter. 55 // This is used when SweepMode is false (default). 56 std::string SizeDistributionName; 57 58 // This parameter allows to control how the buffers are accessed during 59 // benchmark: 60 // None : Use a fixed address that is at least cache line aligned, 61 // 1 : Use random address, 62 // >1 : Use random address aligned to value. 63 MaybeAlign AccessAlignment = std::nullopt; 64 65 // When Function == 'memcmp', this is the buffers mismatch position. 66 // 0 : Buffers always compare equal, 67 // >0 : Buffers compare different at byte N-1. 68 uint32_t MemcmpMismatchAt = 0; 69 }; 70 71 struct Runtime { 72 // Details about the Host (cpu name, cpu frequency, cache hierarchy). 73 HostState Host; 74 75 // The framework will populate this value so all data accessed during the 76 // benchmark will stay in L1 data cache. This includes bookkeeping data. 77 uint32_t BufferSize = 0; 78 79 // This is the number of distinct parameters used in a single batch. 80 // The framework always tests a batch of randomized parameter to prevent the 81 // cpu from learning branching patterns. 82 uint32_t BatchParameterCount = 0; 83 84 // The benchmark options that were used to perform the measurement. 85 // This is decided by the framework. 86 BenchmarkOptions BenchmarkOptions; 87 }; 88 89 //-------- 90 // Results 91 //-------- 92 93 // The root object containing all the data (configuration and measurements). 94 struct Study { 95 std::string StudyName; 96 Runtime Runtime; 97 StudyConfiguration Configuration; 98 std::vector<Duration> Measurements; 99 }; 100 101 //------ 102 // Utils 103 //------ 104 105 // Provides an aligned, dynamically allocated buffer. 106 class AlignedBuffer { 107 char *const Buffer = nullptr; 108 size_t Size = 0; 109 110 public: 111 static constexpr size_t Alignment = 512; 112 AlignedBuffer(size_t Size)113 explicit AlignedBuffer(size_t Size) 114 : Buffer(static_cast<char *>( 115 aligned_alloc(Alignment, alignTo(Size, Alignment)))), 116 Size(Size) {} ~AlignedBuffer()117 ~AlignedBuffer() { free(Buffer); } 118 119 inline char *operator+(size_t Index) { return Buffer + Index; } 120 inline const char *operator+(size_t Index) const { return Buffer + Index; } 121 inline char &operator[](size_t Index) { return Buffer[Index]; } 122 inline const char &operator[](size_t Index) const { return Buffer[Index]; } begin()123 inline char *begin() { return Buffer; } end()124 inline char *end() { return Buffer + Size; } 125 }; 126 127 // Helper to generate random buffer offsets that satisfy the configuration 128 // constraints. 129 class OffsetDistribution { 130 std::uniform_int_distribution<uint32_t> Distribution; 131 uint32_t Factor; 132 133 public: 134 explicit OffsetDistribution(size_t BufferSize, size_t MaxSizeValue, 135 MaybeAlign AccessAlignment); 136 operator()137 template <class Generator> uint32_t operator()(Generator &G) { 138 return Distribution(G) * Factor; 139 } 140 }; 141 142 // Helper to generate random buffer offsets that satisfy the configuration 143 // constraints. It is specifically designed to benchmark `memcmp` functions 144 // where we may want the Nth byte to differ. 145 class MismatchOffsetDistribution { 146 std::uniform_int_distribution<size_t> MismatchIndexSelector; 147 llvm::SmallVector<uint32_t, 16> MismatchIndices; 148 const uint32_t MismatchAt; 149 150 public: 151 explicit MismatchOffsetDistribution(size_t BufferSize, size_t MaxSizeValue, 152 size_t MismatchAt); 153 154 explicit operator bool() const { return !MismatchIndices.empty(); } 155 getMismatchIndices()156 const llvm::SmallVectorImpl<uint32_t> &getMismatchIndices() const { 157 return MismatchIndices; 158 } 159 operator()160 template <class Generator> uint32_t operator()(Generator &G, uint32_t Size) { 161 const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)]; 162 // We need to position the offset so that a mismatch occurs at MismatchAt. 163 if (Size >= MismatchAt) 164 return MismatchIndex - MismatchAt; 165 // Size is too small to trigger the mismatch. 166 return MismatchIndex - Size - 1; 167 } 168 }; 169 170 /// This structure holds a vector of ParameterType. 171 /// It makes sure that BufferCount x BufferSize Bytes and the vector of 172 /// ParameterType can all fit in the L1 cache. 173 struct ParameterBatch { 174 struct ParameterType { 175 unsigned OffsetBytes : 16; // max : 16 KiB - 1 176 unsigned SizeBytes : 16; // max : 16 KiB - 1 177 }; 178 179 ParameterBatch(size_t BufferCount); 180 181 /// Verifies that memory accessed through this parameter is valid. 182 void checkValid(const ParameterType &) const; 183 184 /// Computes the number of bytes processed during within this batch. 185 size_t getBatchBytes() const; 186 187 const size_t BufferSize; 188 const size_t BatchSize; 189 std::vector<ParameterType> Parameters; 190 }; 191 192 /// Provides source and destination buffers for the Copy operation as well as 193 /// the associated size distributions. 194 struct CopySetup : public ParameterBatch { 195 CopySetup(); 196 getDistributionsCopySetup197 inline static const ArrayRef<MemorySizeDistribution> getDistributions() { 198 return getMemcpySizeDistributions(); 199 } 200 CallCopySetup201 inline void *Call(ParameterType Parameter, MemcpyFunction Memcpy) { 202 return Memcpy(DstBuffer + Parameter.OffsetBytes, 203 SrcBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); 204 } 205 206 private: 207 AlignedBuffer SrcBuffer; 208 AlignedBuffer DstBuffer; 209 }; 210 211 /// Provides source and destination buffers for the Move operation as well as 212 /// the associated size distributions. 213 struct MoveSetup : public ParameterBatch { 214 MoveSetup(); 215 getDistributionsMoveSetup216 inline static const ArrayRef<MemorySizeDistribution> getDistributions() { 217 return getMemmoveSizeDistributions(); 218 } 219 CallMoveSetup220 inline void *Call(ParameterType Parameter, MemmoveFunction Memmove) { 221 return Memmove(Buffer + ParameterBatch::BufferSize / 3, 222 Buffer + Parameter.OffsetBytes, Parameter.SizeBytes); 223 } 224 225 private: 226 AlignedBuffer Buffer; 227 }; 228 229 /// Provides destination buffer for the Set operation as well as the associated 230 /// size distributions. 231 struct SetSetup : public ParameterBatch { 232 SetSetup(); 233 getDistributionsSetSetup234 inline static const ArrayRef<MemorySizeDistribution> getDistributions() { 235 return getMemsetSizeDistributions(); 236 } 237 CallSetSetup238 inline void *Call(ParameterType Parameter, MemsetFunction Memset) { 239 return Memset(DstBuffer + Parameter.OffsetBytes, 240 Parameter.OffsetBytes % 0xFF, Parameter.SizeBytes); 241 } 242 CallSetSetup243 inline void *Call(ParameterType Parameter, BzeroFunction Bzero) { 244 Bzero(DstBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); 245 return DstBuffer.begin(); 246 } 247 248 private: 249 AlignedBuffer DstBuffer; 250 }; 251 252 /// Provides left and right buffers for the Comparison operation as well as the 253 /// associated size distributions. 254 struct ComparisonSetup : public ParameterBatch { 255 ComparisonSetup(); 256 getDistributionsComparisonSetup257 inline static const ArrayRef<MemorySizeDistribution> getDistributions() { 258 return getMemcmpSizeDistributions(); 259 } 260 CallComparisonSetup261 inline int Call(ParameterType Parameter, MemcmpOrBcmpFunction MemcmpOrBcmp) { 262 return MemcmpOrBcmp(LhsBuffer + Parameter.OffsetBytes, 263 RhsBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); 264 } 265 266 private: 267 AlignedBuffer LhsBuffer; 268 AlignedBuffer RhsBuffer; 269 }; 270 271 } // namespace libc_benchmarks 272 } // namespace llvm 273 274 #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 275