xref: /openbsd-src/gnu/llvm/libcxx/benchmarks/string.bench.cpp (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 
2 #include <cstdint>
3 #include <new>
4 #include <vector>
5 
6 #include "CartesianBenchmarks.h"
7 #include "GenerateInput.h"
8 #include "benchmark/benchmark.h"
9 #include "test_macros.h"
10 
11 constexpr std::size_t MAX_STRING_LEN = 8 << 14;
12 
13 // Benchmark when there is no match.
14 static void BM_StringFindNoMatch(benchmark::State &state) {
15   std::string s1(state.range(0), '-');
16   std::string s2(8, '*');
17   for (auto _ : state)
18     benchmark::DoNotOptimize(s1.find(s2));
19 }
20 BENCHMARK(BM_StringFindNoMatch)->Range(10, MAX_STRING_LEN);
21 
22 // Benchmark when the string matches first time.
23 static void BM_StringFindAllMatch(benchmark::State &state) {
24   std::string s1(MAX_STRING_LEN, '-');
25   std::string s2(state.range(0), '-');
26   for (auto _ : state)
27     benchmark::DoNotOptimize(s1.find(s2));
28 }
29 BENCHMARK(BM_StringFindAllMatch)->Range(1, MAX_STRING_LEN);
30 
31 // Benchmark when the string matches somewhere in the end.
32 static void BM_StringFindMatch1(benchmark::State &state) {
33   std::string s1(MAX_STRING_LEN / 2, '*');
34   s1 += std::string(state.range(0), '-');
35   std::string s2(state.range(0), '-');
36   for (auto _ : state)
37     benchmark::DoNotOptimize(s1.find(s2));
38 }
39 BENCHMARK(BM_StringFindMatch1)->Range(1, MAX_STRING_LEN / 4);
40 
41 // Benchmark when the string matches somewhere from middle to the end.
42 static void BM_StringFindMatch2(benchmark::State &state) {
43   std::string s1(MAX_STRING_LEN / 2, '*');
44   s1 += std::string(state.range(0), '-');
45   s1 += std::string(state.range(0), '*');
46   std::string s2(state.range(0), '-');
47   for (auto _ : state)
48     benchmark::DoNotOptimize(s1.find(s2));
49 }
50 BENCHMARK(BM_StringFindMatch2)->Range(1, MAX_STRING_LEN / 4);
51 
52 static void BM_StringCtorDefault(benchmark::State &state) {
53   for (auto _ : state) {
54     std::string Default;
55     benchmark::DoNotOptimize(Default);
56   }
57 }
58 BENCHMARK(BM_StringCtorDefault);
59 
60 enum class Length { Empty, Small, Large, Huge };
61 struct AllLengths : EnumValuesAsTuple<AllLengths, Length, 4> {
62   static constexpr const char* Names[] = {"Empty", "Small", "Large", "Huge"};
63 };
64 
65 enum class Opacity { Opaque, Transparent };
66 struct AllOpacity : EnumValuesAsTuple<AllOpacity, Opacity, 2> {
67   static constexpr const char* Names[] = {"Opaque", "Transparent"};
68 };
69 
70 enum class DiffType { Control, ChangeFirst, ChangeMiddle, ChangeLast };
71 struct AllDiffTypes : EnumValuesAsTuple<AllDiffTypes, DiffType, 4> {
72   static constexpr const char* Names[] = {"Control", "ChangeFirst",
73                                           "ChangeMiddle", "ChangeLast"};
74 };
75 
76 static constexpr char SmallStringLiteral[] = "012345678";
77 
78 TEST_ALWAYS_INLINE const char* getSmallString(DiffType D) {
79   switch (D) {
80     case DiffType::Control:
81       return SmallStringLiteral;
82     case DiffType::ChangeFirst:
83       return "-12345678";
84     case DiffType::ChangeMiddle:
85       return "0123-5678";
86     case DiffType::ChangeLast:
87       return "01234567-";
88   }
89 }
90 
91 static constexpr char LargeStringLiteral[] =
92     "012345678901234567890123456789012345678901234567890123456789012";
93 
94 TEST_ALWAYS_INLINE const char* getLargeString(DiffType D) {
95 #define LARGE_STRING_FIRST "123456789012345678901234567890"
96 #define LARGE_STRING_SECOND "234567890123456789012345678901"
97   switch (D) {
98     case DiffType::Control:
99       return "0" LARGE_STRING_FIRST "1" LARGE_STRING_SECOND "2";
100     case DiffType::ChangeFirst:
101       return "-" LARGE_STRING_FIRST "1" LARGE_STRING_SECOND "2";
102     case DiffType::ChangeMiddle:
103       return "0" LARGE_STRING_FIRST "-" LARGE_STRING_SECOND "2";
104     case DiffType::ChangeLast:
105       return "0" LARGE_STRING_FIRST "1" LARGE_STRING_SECOND "-";
106   }
107 }
108 
109 TEST_ALWAYS_INLINE const char* getHugeString(DiffType D) {
110 #define HUGE_STRING0 "0123456789"
111 #define HUGE_STRING1 HUGE_STRING0 HUGE_STRING0 HUGE_STRING0 HUGE_STRING0
112 #define HUGE_STRING2 HUGE_STRING1 HUGE_STRING1 HUGE_STRING1 HUGE_STRING1
113 #define HUGE_STRING3 HUGE_STRING2 HUGE_STRING2 HUGE_STRING2 HUGE_STRING2
114 #define HUGE_STRING4 HUGE_STRING3 HUGE_STRING3 HUGE_STRING3 HUGE_STRING3
115   switch (D) {
116     case DiffType::Control:
117       return "0123456789" HUGE_STRING4 "0123456789" HUGE_STRING4 "0123456789";
118     case DiffType::ChangeFirst:
119       return "-123456789" HUGE_STRING4 "0123456789" HUGE_STRING4 "0123456789";
120     case DiffType::ChangeMiddle:
121       return "0123456789" HUGE_STRING4 "01234-6789" HUGE_STRING4 "0123456789";
122     case DiffType::ChangeLast:
123       return "0123456789" HUGE_STRING4 "0123456789" HUGE_STRING4 "012345678-";
124   }
125 }
126 
127 TEST_ALWAYS_INLINE const char* getString(Length L,
128                                          DiffType D = DiffType::Control) {
129   switch (L) {
130   case Length::Empty:
131     return "";
132   case Length::Small:
133     return getSmallString(D);
134   case Length::Large:
135     return getLargeString(D);
136   case Length::Huge:
137     return getHugeString(D);
138   }
139 }
140 
141 TEST_ALWAYS_INLINE std::string makeString(Length L,
142                                           DiffType D = DiffType::Control,
143                                           Opacity O = Opacity::Transparent) {
144   switch (L) {
145   case Length::Empty:
146     return maybeOpaque("", O == Opacity::Opaque);
147   case Length::Small:
148     return maybeOpaque(getSmallString(D), O == Opacity::Opaque);
149   case Length::Large:
150     return maybeOpaque(getLargeString(D), O == Opacity::Opaque);
151   case Length::Huge:
152     return maybeOpaque(getHugeString(D), O == Opacity::Opaque);
153   }
154 }
155 
156 template <class Length, class Opaque>
157 struct StringConstructDestroyCStr {
158   static void run(benchmark::State& state) {
159     for (auto _ : state) {
160       benchmark::DoNotOptimize(
161           makeString(Length(), DiffType::Control, Opaque()));
162     }
163   }
164 
165   static std::string name() {
166     return "BM_StringConstructDestroyCStr" + Length::name() + Opaque::name();
167   }
168 };
169 
170 template <class Length, bool MeasureCopy, bool MeasureDestroy>
171 static void StringCopyAndDestroy(benchmark::State& state) {
172   static constexpr size_t NumStrings = 1024;
173   auto Orig = makeString(Length());
174   std::aligned_storage<sizeof(std::string)>::type Storage[NumStrings];
175 
176   while (state.KeepRunningBatch(NumStrings)) {
177     if (!MeasureCopy)
178       state.PauseTiming();
179     for (size_t I = 0; I < NumStrings; ++I) {
180       ::new (static_cast<void*>(Storage + I)) std::string(Orig);
181     }
182     if (!MeasureCopy)
183       state.ResumeTiming();
184     if (!MeasureDestroy)
185       state.PauseTiming();
186     for (size_t I = 0; I < NumStrings; ++I) {
187       using S = std::string;
188       reinterpret_cast<S*>(Storage + I)->~S();
189     }
190     if (!MeasureDestroy)
191       state.ResumeTiming();
192   }
193 }
194 
195 template <class Length>
196 struct StringCopy {
197   static void run(benchmark::State& state) {
198     StringCopyAndDestroy<Length, true, false>(state);
199   }
200 
201   static std::string name() { return "BM_StringCopy" + Length::name(); }
202 };
203 
204 template <class Length>
205 struct StringDestroy {
206   static void run(benchmark::State& state) {
207     StringCopyAndDestroy<Length, false, true>(state);
208   }
209 
210   static std::string name() { return "BM_StringDestroy" + Length::name(); }
211 };
212 
213 template <class Length>
214 struct StringMove {
215   static void run(benchmark::State& state) {
216     // Keep two object locations and move construct back and forth.
217     std::aligned_storage<sizeof(std::string), alignof(std::string)>::type Storage[2];
218     using S = std::string;
219     size_t I = 0;
220     S *newS = new (static_cast<void*>(Storage)) std::string(makeString(Length()));
221     for (auto _ : state) {
222       // Switch locations.
223       I ^= 1;
224       benchmark::DoNotOptimize(Storage);
225       // Move construct into the new location,
226       S *tmpS = new (static_cast<void*>(Storage + I)) S(std::move(*newS));
227       // then destroy the old one.
228       newS->~S();
229       newS = tmpS;
230     }
231     newS->~S();
232   }
233 
234   static std::string name() { return "BM_StringMove" + Length::name(); }
235 };
236 
237 template <class Length, class Opaque>
238 struct StringResizeDefaultInit {
239   static void run(benchmark::State& state) {
240     constexpr bool opaque = Opaque{} == Opacity::Opaque;
241     constexpr int kNumStrings = 4 << 10;
242     size_t length = makeString(Length()).size();
243     std::string strings[kNumStrings];
244     while (state.KeepRunningBatch(kNumStrings)) {
245       state.PauseTiming();
246       for (int i = 0; i < kNumStrings; ++i) {
247         std::string().swap(strings[i]);
248       }
249       benchmark::DoNotOptimize(strings);
250       state.ResumeTiming();
251       for (int i = 0; i < kNumStrings; ++i) {
252         strings[i].__resize_default_init(maybeOpaque(length, opaque));
253       }
254     }
255   }
256 
257   static std::string name() {
258     return "BM_StringResizeDefaultInit" + Length::name() + Opaque::name();
259   }
260 };
261 
262 template <class Length, class Opaque>
263 struct StringAssignStr {
264   static void run(benchmark::State& state) {
265     constexpr bool opaque = Opaque{} == Opacity::Opaque;
266     constexpr int kNumStrings = 4 << 10;
267     std::string src = makeString(Length());
268     std::string strings[kNumStrings];
269     while (state.KeepRunningBatch(kNumStrings)) {
270       state.PauseTiming();
271       for (int i = 0; i < kNumStrings; ++i) {
272         std::string().swap(strings[i]);
273       }
274       benchmark::DoNotOptimize(strings);
275       state.ResumeTiming();
276       for (int i = 0; i < kNumStrings; ++i) {
277         strings[i] = *maybeOpaque(&src, opaque);
278       }
279     }
280   }
281 
282   static std::string name() {
283     return "BM_StringAssignStr" + Length::name() + Opaque::name();
284   }
285 };
286 
287 template <class Length, class Opaque>
288 struct StringAssignAsciiz {
289   static void run(benchmark::State& state) {
290     constexpr bool opaque = Opaque{} == Opacity::Opaque;
291     constexpr int kNumStrings = 4 << 10;
292     std::string strings[kNumStrings];
293     while (state.KeepRunningBatch(kNumStrings)) {
294       state.PauseTiming();
295       for (int i = 0; i < kNumStrings; ++i) {
296         std::string().swap(strings[i]);
297       }
298       benchmark::DoNotOptimize(strings);
299       state.ResumeTiming();
300       for (int i = 0; i < kNumStrings; ++i) {
301         strings[i] = maybeOpaque(getString(Length()), opaque);
302       }
303     }
304   }
305 
306   static std::string name() {
307     return "BM_StringAssignAsciiz" + Length::name() + Opaque::name();
308   }
309 };
310 
311 template <class Opaque>
312 struct StringAssignAsciizMix {
313   static void run(benchmark::State& state) {
314     constexpr auto O = Opaque{};
315     constexpr auto D = DiffType::Control;
316     constexpr int kNumStrings = 4 << 10;
317     std::string strings[kNumStrings];
318     while (state.KeepRunningBatch(kNumStrings)) {
319       state.PauseTiming();
320       for (int i = 0; i < kNumStrings; ++i) {
321         std::string().swap(strings[i]);
322       }
323       benchmark::DoNotOptimize(strings);
324       state.ResumeTiming();
325       for (int i = 0; i < kNumStrings - 7; i += 8) {
326         strings[i + 0] = maybeOpaque(getSmallString(D), O == Opacity::Opaque);
327         strings[i + 1] = maybeOpaque(getSmallString(D), O == Opacity::Opaque);
328         strings[i + 2] = maybeOpaque(getLargeString(D), O == Opacity::Opaque);
329         strings[i + 3] = maybeOpaque(getSmallString(D), O == Opacity::Opaque);
330         strings[i + 4] = maybeOpaque(getSmallString(D), O == Opacity::Opaque);
331         strings[i + 5] = maybeOpaque(getSmallString(D), O == Opacity::Opaque);
332         strings[i + 6] = maybeOpaque(getLargeString(D), O == Opacity::Opaque);
333         strings[i + 7] = maybeOpaque(getSmallString(D), O == Opacity::Opaque);
334       }
335     }
336   }
337 
338   static std::string name() {
339     return "BM_StringAssignAsciizMix" + Opaque::name();
340   }
341 };
342 
343 enum class Relation { Eq, Less, Compare };
344 struct AllRelations : EnumValuesAsTuple<AllRelations, Relation, 3> {
345   static constexpr const char* Names[] = {"Eq", "Less", "Compare"};
346 };
347 
348 template <class Rel, class LHLength, class RHLength, class DiffType>
349 struct StringRelational {
350   static void run(benchmark::State& state) {
351     auto Lhs = makeString(RHLength());
352     auto Rhs = makeString(LHLength(), DiffType());
353     for (auto _ : state) {
354       benchmark::DoNotOptimize(Lhs);
355       benchmark::DoNotOptimize(Rhs);
356       switch (Rel()) {
357       case Relation::Eq:
358         benchmark::DoNotOptimize(Lhs == Rhs);
359         break;
360       case Relation::Less:
361         benchmark::DoNotOptimize(Lhs < Rhs);
362         break;
363       case Relation::Compare:
364         benchmark::DoNotOptimize(Lhs.compare(Rhs));
365         break;
366       }
367     }
368   }
369 
370   static bool skip() {
371     // Eq is commutative, so skip half the matrix.
372     if (Rel() == Relation::Eq && LHLength() > RHLength())
373       return true;
374     // We only care about control when the lengths differ.
375     if (LHLength() != RHLength() && DiffType() != ::DiffType::Control)
376       return true;
377     // For empty, only control matters.
378     if (LHLength() == Length::Empty && DiffType() != ::DiffType::Control)
379       return true;
380     return false;
381   }
382 
383   static std::string name() {
384     return "BM_StringRelational" + Rel::name() + LHLength::name() +
385            RHLength::name() + DiffType::name();
386   }
387 };
388 
389 template <class Rel, class LHLength, class RHLength, class DiffType>
390 struct StringRelationalLiteral {
391   static void run(benchmark::State& state) {
392     auto Lhs = makeString(LHLength(), DiffType());
393     for (auto _ : state) {
394       benchmark::DoNotOptimize(Lhs);
395       constexpr const char* Literal = RHLength::value == Length::Empty
396                                           ? ""
397                                           : RHLength::value == Length::Small
398                                                 ? SmallStringLiteral
399                                                 : LargeStringLiteral;
400       switch (Rel()) {
401       case Relation::Eq:
402         benchmark::DoNotOptimize(Lhs == Literal);
403         break;
404       case Relation::Less:
405         benchmark::DoNotOptimize(Lhs < Literal);
406         break;
407       case Relation::Compare:
408         benchmark::DoNotOptimize(Lhs.compare(Literal));
409         break;
410       }
411     }
412   }
413 
414   static bool skip() {
415     // Doesn't matter how they differ if they have different size.
416     if (LHLength() != RHLength() && DiffType() != ::DiffType::Control)
417       return true;
418     // We don't need huge. Doensn't give anything different than Large.
419     if (LHLength() == Length::Huge || RHLength() == Length::Huge)
420       return true;
421     return false;
422   }
423 
424   static std::string name() {
425     return "BM_StringRelationalLiteral" + Rel::name() + LHLength::name() +
426            RHLength::name() + DiffType::name();
427   }
428 };
429 
430 enum class Depth { Shallow, Deep };
431 struct AllDepths : EnumValuesAsTuple<AllDepths, Depth, 2> {
432   static constexpr const char* Names[] = {"Shallow", "Deep"};
433 };
434 
435 enum class Temperature { Hot, Cold };
436 struct AllTemperatures : EnumValuesAsTuple<AllTemperatures, Temperature, 2> {
437   static constexpr const char* Names[] = {"Hot", "Cold"};
438 };
439 
440 template <class Temperature, class Depth, class Length>
441 struct StringRead {
442   void run(benchmark::State& state) const {
443     static constexpr size_t NumStrings =
444         Temperature() == ::Temperature::Hot
445             ? 1 << 10
446             : /* Enough strings to overflow the cache */ 1 << 20;
447     static_assert((NumStrings & (NumStrings - 1)) == 0,
448                   "NumStrings should be a power of two to reduce overhead.");
449 
450     std::vector<std::string> Values(NumStrings, makeString(Length()));
451     size_t I = 0;
452     for (auto _ : state) {
453       // Jump long enough to defeat cache locality, and use a value that is
454       // coprime with NumStrings to ensure we visit every element.
455       I = (I + 17) % NumStrings;
456       const auto& V = Values[I];
457 
458       // Read everything first. Escaping data() through DoNotOptimize might
459       // cause the compiler to have to recalculate information about `V` due to
460       // aliasing.
461       const char* const Data = V.data();
462       const size_t Size = V.size();
463       benchmark::DoNotOptimize(Data);
464       benchmark::DoNotOptimize(Size);
465       if (Depth() == ::Depth::Deep) {
466         // Read into the payload. This mainly shows the benefit of SSO when the
467         // data is cold.
468         benchmark::DoNotOptimize(*Data);
469       }
470     }
471   }
472 
473   static bool skip() {
474     // Huge does not give us anything that Large doesn't have. Skip it.
475     if (Length() == ::Length::Huge) {
476       return true;
477     }
478     return false;
479   }
480 
481   std::string name() const {
482     return "BM_StringRead" + Temperature::name() + Depth::name() +
483            Length::name();
484   }
485 };
486 
487 void sanityCheckGeneratedStrings() {
488   for (auto Lhs : {Length::Empty, Length::Small, Length::Large, Length::Huge}) {
489     const auto LhsString = makeString(Lhs);
490     for (auto Rhs :
491          {Length::Empty, Length::Small, Length::Large, Length::Huge}) {
492       if (Lhs > Rhs)
493         continue;
494       const auto RhsString = makeString(Rhs);
495 
496       // The smaller one must be a prefix of the larger one.
497       if (RhsString.find(LhsString) != 0) {
498         fprintf(stderr, "Invalid autogenerated strings for sizes (%d,%d).\n",
499                 static_cast<int>(Lhs), static_cast<int>(Rhs));
500         std::abort();
501       }
502     }
503   }
504   // Verify the autogenerated diffs
505   for (auto L : {Length::Small, Length::Large, Length::Huge}) {
506     const auto Control = makeString(L);
507     const auto Verify = [&](std::string Exp, size_t Pos) {
508       // Only change on the Pos char.
509       if (Control[Pos] != Exp[Pos]) {
510         Exp[Pos] = Control[Pos];
511         if (Control == Exp)
512           return;
513       }
514       fprintf(stderr, "Invalid autogenerated diff with size %d\n",
515               static_cast<int>(L));
516       std::abort();
517     };
518     Verify(makeString(L, DiffType::ChangeFirst), 0);
519     Verify(makeString(L, DiffType::ChangeMiddle), Control.size() / 2);
520     Verify(makeString(L, DiffType::ChangeLast), Control.size() - 1);
521   }
522 }
523 
524 // Some small codegen thunks to easily see generated code.
525 bool StringEqString(const std::string& a, const std::string& b) {
526   return a == b;
527 }
528 bool StringEqCStr(const std::string& a, const char* b) { return a == b; }
529 bool CStrEqString(const char* a, const std::string& b) { return a == b; }
530 bool StringEqCStrLiteralEmpty(const std::string& a) {
531   return a == "";
532 }
533 bool StringEqCStrLiteralSmall(const std::string& a) {
534   return a == SmallStringLiteral;
535 }
536 bool StringEqCStrLiteralLarge(const std::string& a) {
537   return a == LargeStringLiteral;
538 }
539 
540 int main(int argc, char** argv) {
541   benchmark::Initialize(&argc, argv);
542   if (benchmark::ReportUnrecognizedArguments(argc, argv))
543     return 1;
544 
545   sanityCheckGeneratedStrings();
546 
547   makeCartesianProductBenchmark<StringConstructDestroyCStr, AllLengths,
548                                 AllOpacity>();
549 
550   makeCartesianProductBenchmark<StringAssignStr, AllLengths, AllOpacity>();
551   makeCartesianProductBenchmark<StringAssignAsciiz, AllLengths, AllOpacity>();
552   makeCartesianProductBenchmark<StringAssignAsciizMix, AllOpacity>();
553 
554   makeCartesianProductBenchmark<StringCopy, AllLengths>();
555   makeCartesianProductBenchmark<StringMove, AllLengths>();
556   makeCartesianProductBenchmark<StringDestroy, AllLengths>();
557   makeCartesianProductBenchmark<StringResizeDefaultInit, AllLengths,
558                                 AllOpacity>();
559   makeCartesianProductBenchmark<StringRelational, AllRelations, AllLengths,
560                                 AllLengths, AllDiffTypes>();
561   makeCartesianProductBenchmark<StringRelationalLiteral, AllRelations,
562                                 AllLengths, AllLengths, AllDiffTypes>();
563   makeCartesianProductBenchmark<StringRead, AllTemperatures, AllDepths,
564                                 AllLengths>();
565   benchmark::RunSpecifiedBenchmarks();
566 
567   if (argc < 0) {
568     // ODR-use the functions to force them being generated in the binary.
569     auto functions = std::make_tuple(
570         StringEqString, StringEqCStr, CStrEqString, StringEqCStrLiteralEmpty,
571         StringEqCStrLiteralSmall, StringEqCStrLiteralLarge);
572     printf("%p", &functions);
573   }
574 }
575