1438f7fc0SSiva Chandra Reddy //===-- Benchmark memory specific tools -------------------------*- C++ -*-===// 2438f7fc0SSiva Chandra Reddy // 3438f7fc0SSiva Chandra Reddy // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4438f7fc0SSiva Chandra Reddy // See https://llvm.org/LICENSE.txt for license information. 5438f7fc0SSiva Chandra Reddy // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6438f7fc0SSiva Chandra Reddy // 7438f7fc0SSiva Chandra Reddy //===----------------------------------------------------------------------===// 8438f7fc0SSiva Chandra Reddy 9438f7fc0SSiva Chandra Reddy // This file complements the `benchmark` header with memory specific tools and 10438f7fc0SSiva Chandra Reddy // benchmarking facilities. 11438f7fc0SSiva Chandra Reddy 12438f7fc0SSiva Chandra Reddy #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 13438f7fc0SSiva Chandra Reddy #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 14438f7fc0SSiva Chandra Reddy 15438f7fc0SSiva Chandra Reddy #include "LibcBenchmark.h" 16adc18ad6SGuillaume Chatelet #include "LibcFunctionPrototypes.h" 17d3c70d9fSGuillaume Chatelet #include "MemorySizeDistributions.h" 18438f7fc0SSiva Chandra Reddy #include "llvm/ADT/SmallVector.h" 19438f7fc0SSiva Chandra Reddy #include "llvm/Support/Alignment.h" 209abc1e08SVitaly Buka #include "llvm/Support/MathExtras.h" 21438f7fc0SSiva Chandra Reddy #include <cstdint> 228dcb7f6bSSiva Chandra Reddy #include <optional> 23438f7fc0SSiva Chandra Reddy #include <random> 24438f7fc0SSiva Chandra Reddy 25438f7fc0SSiva Chandra Reddy namespace llvm { 26438f7fc0SSiva Chandra Reddy namespace libc_benchmarks { 27438f7fc0SSiva Chandra Reddy 28438f7fc0SSiva Chandra Reddy //-------------- 29438f7fc0SSiva Chandra Reddy // Configuration 30438f7fc0SSiva Chandra Reddy //-------------- 31438f7fc0SSiva Chandra Reddy 32438f7fc0SSiva Chandra Reddy struct StudyConfiguration { 33deae7e98SGuillaume Chatelet // One of 'memcpy', 'memset', 'memcmp'. 34deae7e98SGuillaume Chatelet // The underlying implementation is always the llvm libc one. 35*b6bc9d72SGuillaume Chatelet // e.g. 'memcpy' will test 'LIBC_NAMESPACE::memcpy' 36deae7e98SGuillaume Chatelet std::string Function; 37438f7fc0SSiva Chandra Reddy 38deae7e98SGuillaume Chatelet // The number of trials to run for this benchmark. 39deae7e98SGuillaume Chatelet // If in SweepMode, each individual sizes are measured 'NumTrials' time. 40deae7e98SGuillaume Chatelet // i.e 'NumTrials' measurements for 0, 'NumTrials' measurements for 1 ... 41deae7e98SGuillaume Chatelet uint32_t NumTrials = 1; 42438f7fc0SSiva Chandra Reddy 43deae7e98SGuillaume Chatelet // Toggles between Sweep Mode and Distribution Mode (default). 44deae7e98SGuillaume Chatelet // See 'SweepModeMaxSize' and 'SizeDistributionName' below. 45deae7e98SGuillaume Chatelet bool IsSweepMode = false; 46438f7fc0SSiva Chandra Reddy 47deae7e98SGuillaume Chatelet // Maximum size to use when measuring a ramp of size values (SweepMode). 48deae7e98SGuillaume Chatelet // The benchmark measures all sizes from 0 to SweepModeMaxSize. 49deae7e98SGuillaume Chatelet // Note: in sweep mode the same size is sampled several times in a row this 50deae7e98SGuillaume Chatelet // will allow the processor to learn it and optimize the branching pattern. 51deae7e98SGuillaume Chatelet // The resulting measurement is likely to be idealized. 52deae7e98SGuillaume Chatelet uint32_t SweepModeMaxSize = 0; // inclusive 53deae7e98SGuillaume Chatelet 54deae7e98SGuillaume Chatelet // The name of the distribution to be used to randomize the size parameter. 55deae7e98SGuillaume Chatelet // This is used when SweepMode is false (default). 56deae7e98SGuillaume Chatelet std::string SizeDistributionName; 57deae7e98SGuillaume Chatelet 58deae7e98SGuillaume Chatelet // This parameter allows to control how the buffers are accessed during 59deae7e98SGuillaume Chatelet // benchmark: 60deae7e98SGuillaume Chatelet // None : Use a fixed address that is at least cache line aligned, 61438f7fc0SSiva Chandra Reddy // 1 : Use random address, 62438f7fc0SSiva Chandra Reddy // >1 : Use random address aligned to value. 638dcb7f6bSSiva Chandra Reddy MaybeAlign AccessAlignment = std::nullopt; 64438f7fc0SSiva Chandra Reddy 65deae7e98SGuillaume Chatelet // When Function == 'memcmp', this is the buffers mismatch position. 66deae7e98SGuillaume Chatelet // 0 : Buffers always compare equal, 67deae7e98SGuillaume Chatelet // >0 : Buffers compare different at byte N-1. 68deae7e98SGuillaume Chatelet uint32_t MemcmpMismatchAt = 0; 69deae7e98SGuillaume Chatelet }; 70438f7fc0SSiva Chandra Reddy 71deae7e98SGuillaume Chatelet struct Runtime { 72deae7e98SGuillaume Chatelet // Details about the Host (cpu name, cpu frequency, cache hierarchy). 73deae7e98SGuillaume Chatelet HostState Host; 74deae7e98SGuillaume Chatelet 75deae7e98SGuillaume Chatelet // The framework will populate this value so all data accessed during the 76deae7e98SGuillaume Chatelet // benchmark will stay in L1 data cache. This includes bookkeeping data. 77deae7e98SGuillaume Chatelet uint32_t BufferSize = 0; 78deae7e98SGuillaume Chatelet 79deae7e98SGuillaume Chatelet // This is the number of distinct parameters used in a single batch. 80deae7e98SGuillaume Chatelet // The framework always tests a batch of randomized parameter to prevent the 81deae7e98SGuillaume Chatelet // cpu from learning branching patterns. 82deae7e98SGuillaume Chatelet uint32_t BatchParameterCount = 0; 83deae7e98SGuillaume Chatelet 84deae7e98SGuillaume Chatelet // The benchmark options that were used to perform the measurement. 85deae7e98SGuillaume Chatelet // This is decided by the framework. 86deae7e98SGuillaume Chatelet BenchmarkOptions BenchmarkOptions; 87438f7fc0SSiva Chandra Reddy }; 88438f7fc0SSiva Chandra Reddy 89438f7fc0SSiva Chandra Reddy //-------- 90438f7fc0SSiva Chandra Reddy // Results 91438f7fc0SSiva Chandra Reddy //-------- 92438f7fc0SSiva Chandra Reddy 93438f7fc0SSiva Chandra Reddy // The root object containing all the data (configuration and measurements). 94438f7fc0SSiva Chandra Reddy struct Study { 95deae7e98SGuillaume Chatelet std::string StudyName; 96deae7e98SGuillaume Chatelet Runtime Runtime; 97438f7fc0SSiva Chandra Reddy StudyConfiguration Configuration; 98deae7e98SGuillaume Chatelet std::vector<Duration> Measurements; 99438f7fc0SSiva Chandra Reddy }; 100438f7fc0SSiva Chandra Reddy 101deae7e98SGuillaume Chatelet //------ 102deae7e98SGuillaume Chatelet // Utils 103deae7e98SGuillaume Chatelet //------ 104deae7e98SGuillaume Chatelet 105438f7fc0SSiva Chandra Reddy // Provides an aligned, dynamically allocated buffer. 106438f7fc0SSiva Chandra Reddy class AlignedBuffer { 107438f7fc0SSiva Chandra Reddy char *const Buffer = nullptr; 108438f7fc0SSiva Chandra Reddy size_t Size = 0; 109438f7fc0SSiva Chandra Reddy 110438f7fc0SSiva Chandra Reddy public: 111219a1d21SGuillaume Chatelet static constexpr size_t Alignment = 512; 112438f7fc0SSiva Chandra Reddy AlignedBuffer(size_t Size)113438f7fc0SSiva Chandra Reddy explicit AlignedBuffer(size_t Size) 1149abc1e08SVitaly Buka : Buffer(static_cast<char *>( 1159abc1e08SVitaly Buka aligned_alloc(Alignment, alignTo(Size, Alignment)))), 116deae7e98SGuillaume Chatelet Size(Size) {} ~AlignedBuffer()117438f7fc0SSiva Chandra Reddy ~AlignedBuffer() { free(Buffer); } 118438f7fc0SSiva Chandra Reddy 119438f7fc0SSiva Chandra Reddy inline char *operator+(size_t Index) { return Buffer + Index; } 120438f7fc0SSiva Chandra Reddy inline const char *operator+(size_t Index) const { return Buffer + Index; } 121438f7fc0SSiva Chandra Reddy inline char &operator[](size_t Index) { return Buffer[Index]; } 122438f7fc0SSiva Chandra Reddy inline const char &operator[](size_t Index) const { return Buffer[Index]; } begin()123438f7fc0SSiva Chandra Reddy inline char *begin() { return Buffer; } end()124438f7fc0SSiva Chandra Reddy inline char *end() { return Buffer + Size; } 125438f7fc0SSiva Chandra Reddy }; 126438f7fc0SSiva Chandra Reddy 127438f7fc0SSiva Chandra Reddy // Helper to generate random buffer offsets that satisfy the configuration 128438f7fc0SSiva Chandra Reddy // constraints. 129438f7fc0SSiva Chandra Reddy class OffsetDistribution { 130438f7fc0SSiva Chandra Reddy std::uniform_int_distribution<uint32_t> Distribution; 131438f7fc0SSiva Chandra Reddy uint32_t Factor; 132438f7fc0SSiva Chandra Reddy 133438f7fc0SSiva Chandra Reddy public: 134deae7e98SGuillaume Chatelet explicit OffsetDistribution(size_t BufferSize, size_t MaxSizeValue, 135deae7e98SGuillaume Chatelet MaybeAlign AccessAlignment); 136438f7fc0SSiva Chandra Reddy operator()137438f7fc0SSiva Chandra Reddy template <class Generator> uint32_t operator()(Generator &G) { 138438f7fc0SSiva Chandra Reddy return Distribution(G) * Factor; 139438f7fc0SSiva Chandra Reddy } 140438f7fc0SSiva Chandra Reddy }; 141438f7fc0SSiva Chandra Reddy 142438f7fc0SSiva Chandra Reddy // Helper to generate random buffer offsets that satisfy the configuration 143438f7fc0SSiva Chandra Reddy // constraints. It is specifically designed to benchmark `memcmp` functions 144438f7fc0SSiva Chandra Reddy // where we may want the Nth byte to differ. 145438f7fc0SSiva Chandra Reddy class MismatchOffsetDistribution { 146438f7fc0SSiva Chandra Reddy std::uniform_int_distribution<size_t> MismatchIndexSelector; 147438f7fc0SSiva Chandra Reddy llvm::SmallVector<uint32_t, 16> MismatchIndices; 148438f7fc0SSiva Chandra Reddy const uint32_t MismatchAt; 149438f7fc0SSiva Chandra Reddy 150438f7fc0SSiva Chandra Reddy public: 151deae7e98SGuillaume Chatelet explicit MismatchOffsetDistribution(size_t BufferSize, size_t MaxSizeValue, 152deae7e98SGuillaume Chatelet size_t MismatchAt); 153438f7fc0SSiva Chandra Reddy 154438f7fc0SSiva Chandra Reddy explicit operator bool() const { return !MismatchIndices.empty(); } 155438f7fc0SSiva Chandra Reddy getMismatchIndices()156438f7fc0SSiva Chandra Reddy const llvm::SmallVectorImpl<uint32_t> &getMismatchIndices() const { 157438f7fc0SSiva Chandra Reddy return MismatchIndices; 158438f7fc0SSiva Chandra Reddy } 159438f7fc0SSiva Chandra Reddy operator()160438f7fc0SSiva Chandra Reddy template <class Generator> uint32_t operator()(Generator &G, uint32_t Size) { 161438f7fc0SSiva Chandra Reddy const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)]; 162438f7fc0SSiva Chandra Reddy // We need to position the offset so that a mismatch occurs at MismatchAt. 163438f7fc0SSiva Chandra Reddy if (Size >= MismatchAt) 164438f7fc0SSiva Chandra Reddy return MismatchIndex - MismatchAt; 165438f7fc0SSiva Chandra Reddy // Size is too small to trigger the mismatch. 166438f7fc0SSiva Chandra Reddy return MismatchIndex - Size - 1; 167438f7fc0SSiva Chandra Reddy } 168438f7fc0SSiva Chandra Reddy }; 169438f7fc0SSiva Chandra Reddy 170d3c70d9fSGuillaume Chatelet /// This structure holds a vector of ParameterType. 171d3c70d9fSGuillaume Chatelet /// It makes sure that BufferCount x BufferSize Bytes and the vector of 172d3c70d9fSGuillaume Chatelet /// ParameterType can all fit in the L1 cache. 173d3c70d9fSGuillaume Chatelet struct ParameterBatch { 174d3c70d9fSGuillaume Chatelet struct ParameterType { 175d3c70d9fSGuillaume Chatelet unsigned OffsetBytes : 16; // max : 16 KiB - 1 176d3c70d9fSGuillaume Chatelet unsigned SizeBytes : 16; // max : 16 KiB - 1 177d3c70d9fSGuillaume Chatelet }; 178d3c70d9fSGuillaume Chatelet 179d3c70d9fSGuillaume Chatelet ParameterBatch(size_t BufferCount); 180d3c70d9fSGuillaume Chatelet 181d3c70d9fSGuillaume Chatelet /// Verifies that memory accessed through this parameter is valid. 182d3c70d9fSGuillaume Chatelet void checkValid(const ParameterType &) const; 183d3c70d9fSGuillaume Chatelet 184d3c70d9fSGuillaume Chatelet /// Computes the number of bytes processed during within this batch. 185d3c70d9fSGuillaume Chatelet size_t getBatchBytes() const; 186d3c70d9fSGuillaume Chatelet 187d3c70d9fSGuillaume Chatelet const size_t BufferSize; 188d3c70d9fSGuillaume Chatelet const size_t BatchSize; 189d3c70d9fSGuillaume Chatelet std::vector<ParameterType> Parameters; 190d3c70d9fSGuillaume Chatelet }; 191d3c70d9fSGuillaume Chatelet 192d3c70d9fSGuillaume Chatelet /// Provides source and destination buffers for the Copy operation as well as 193d3c70d9fSGuillaume Chatelet /// the associated size distributions. 194e4dee762SGuillaume Chatelet struct CopySetup : public ParameterBatch { 195e4dee762SGuillaume Chatelet CopySetup(); 196d3c70d9fSGuillaume Chatelet getDistributionsCopySetup19759198d06SGuillaume Chatelet inline static const ArrayRef<MemorySizeDistribution> getDistributions() { 19859198d06SGuillaume Chatelet return getMemcpySizeDistributions(); 19959198d06SGuillaume Chatelet } 200d3c70d9fSGuillaume Chatelet CallCopySetup201e4dee762SGuillaume Chatelet inline void *Call(ParameterType Parameter, MemcpyFunction Memcpy) { 202e4dee762SGuillaume Chatelet return Memcpy(DstBuffer + Parameter.OffsetBytes, 203d3c70d9fSGuillaume Chatelet SrcBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); 204d3c70d9fSGuillaume Chatelet } 205d3c70d9fSGuillaume Chatelet 206d3c70d9fSGuillaume Chatelet private: 207d3c70d9fSGuillaume Chatelet AlignedBuffer SrcBuffer; 208d3c70d9fSGuillaume Chatelet AlignedBuffer DstBuffer; 209d3c70d9fSGuillaume Chatelet }; 210d3c70d9fSGuillaume Chatelet 211de21f346SGuillaume Chatelet /// Provides source and destination buffers for the Move operation as well as 212de21f346SGuillaume Chatelet /// the associated size distributions. 213de21f346SGuillaume Chatelet struct MoveSetup : public ParameterBatch { 214de21f346SGuillaume Chatelet MoveSetup(); 215de21f346SGuillaume Chatelet getDistributionsMoveSetup216de21f346SGuillaume Chatelet inline static const ArrayRef<MemorySizeDistribution> getDistributions() { 217de21f346SGuillaume Chatelet return getMemmoveSizeDistributions(); 218de21f346SGuillaume Chatelet } 219de21f346SGuillaume Chatelet CallMoveSetup220de21f346SGuillaume Chatelet inline void *Call(ParameterType Parameter, MemmoveFunction Memmove) { 221de21f346SGuillaume Chatelet return Memmove(Buffer + ParameterBatch::BufferSize / 3, 222de21f346SGuillaume Chatelet Buffer + Parameter.OffsetBytes, Parameter.SizeBytes); 223de21f346SGuillaume Chatelet } 224de21f346SGuillaume Chatelet 225de21f346SGuillaume Chatelet private: 226de21f346SGuillaume Chatelet AlignedBuffer Buffer; 227de21f346SGuillaume Chatelet }; 228de21f346SGuillaume Chatelet 229d3c70d9fSGuillaume Chatelet /// Provides destination buffer for the Set operation as well as the associated 230d3c70d9fSGuillaume Chatelet /// size distributions. 231e4dee762SGuillaume Chatelet struct SetSetup : public ParameterBatch { 232e4dee762SGuillaume Chatelet SetSetup(); 233d3c70d9fSGuillaume Chatelet getDistributionsSetSetup23459198d06SGuillaume Chatelet inline static const ArrayRef<MemorySizeDistribution> getDistributions() { 23559198d06SGuillaume Chatelet return getMemsetSizeDistributions(); 23659198d06SGuillaume Chatelet } 237d3c70d9fSGuillaume Chatelet CallSetSetup238e4dee762SGuillaume Chatelet inline void *Call(ParameterType Parameter, MemsetFunction Memset) { 239e4dee762SGuillaume Chatelet return Memset(DstBuffer + Parameter.OffsetBytes, 240d3c70d9fSGuillaume Chatelet Parameter.OffsetBytes % 0xFF, Parameter.SizeBytes); 241d3c70d9fSGuillaume Chatelet } 242d3c70d9fSGuillaume Chatelet CallSetSetup243e4dee762SGuillaume Chatelet inline void *Call(ParameterType Parameter, BzeroFunction Bzero) { 244e4dee762SGuillaume Chatelet Bzero(DstBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); 245d3c70d9fSGuillaume Chatelet return DstBuffer.begin(); 246d3c70d9fSGuillaume Chatelet } 247d3c70d9fSGuillaume Chatelet 248d3c70d9fSGuillaume Chatelet private: 249d3c70d9fSGuillaume Chatelet AlignedBuffer DstBuffer; 250d3c70d9fSGuillaume Chatelet }; 251d3c70d9fSGuillaume Chatelet 252d3c70d9fSGuillaume Chatelet /// Provides left and right buffers for the Comparison operation as well as the 253d3c70d9fSGuillaume Chatelet /// associated size distributions. 254e4dee762SGuillaume Chatelet struct ComparisonSetup : public ParameterBatch { 255e4dee762SGuillaume Chatelet ComparisonSetup(); 256d3c70d9fSGuillaume Chatelet getDistributionsComparisonSetup25759198d06SGuillaume Chatelet inline static const ArrayRef<MemorySizeDistribution> getDistributions() { 25859198d06SGuillaume Chatelet return getMemcmpSizeDistributions(); 25959198d06SGuillaume Chatelet } 260d3c70d9fSGuillaume Chatelet CallComparisonSetup2614a9bcb60SGuillaume Chatelet inline int Call(ParameterType Parameter, MemcmpOrBcmpFunction MemcmpOrBcmp) { 2624a9bcb60SGuillaume Chatelet return MemcmpOrBcmp(LhsBuffer + Parameter.OffsetBytes, 263d3c70d9fSGuillaume Chatelet RhsBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); 264d3c70d9fSGuillaume Chatelet } 265d3c70d9fSGuillaume Chatelet 266d3c70d9fSGuillaume Chatelet private: 267d3c70d9fSGuillaume Chatelet AlignedBuffer LhsBuffer; 268d3c70d9fSGuillaume Chatelet AlignedBuffer RhsBuffer; 269d3c70d9fSGuillaume Chatelet }; 270d3c70d9fSGuillaume Chatelet 271438f7fc0SSiva Chandra Reddy } // namespace libc_benchmarks 272438f7fc0SSiva Chandra Reddy } // namespace llvm 273438f7fc0SSiva Chandra Reddy 274438f7fc0SSiva Chandra Reddy #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 275