xref: /llvm-project/libc/src/string/memory_utils/op_generic.h (revision 5bf8efd269f8b75b5be40f087f231d71f744885c)
1 //===-- Generic implementation of memory function building blocks ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides generic C++ building blocks.
10 // Depending on the requested size, the block operation uses unsigned integral
11 // types, vector types or an array of the type with the maximum size.
12 //
13 // The maximum size is passed as a template argument. For instance, on x86
14 // platforms that only supports integral types the maximum size would be 8
15 // (corresponding to uint64_t). On this platform if we request the size 32, this
16 // would be treated as a cpp::array<uint64_t, 4>.
17 //
18 // On the other hand, if the platform is x86 with support for AVX the maximum
19 // size is 32 and the operation can be handled with a single native operation.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
24 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
25 
26 #include "src/__support/CPP/array.h"
27 #include "src/__support/CPP/type_traits.h"
28 #include "src/__support/common.h"
29 #include "src/__support/endian.h"
30 #include "src/__support/macros/optimization.h"
31 #include "src/string/memory_utils/op_builtin.h"
32 #include "src/string/memory_utils/utils.h"
33 
34 #include <stdint.h>
35 
36 static_assert((UINTPTR_MAX == 4294967295U) ||
37                   (UINTPTR_MAX == 18446744073709551615UL),
38               "We currently only support 32- or 64-bit platforms");
39 
40 #if defined(UINT64_MAX)
41 #define LLVM_LIBC_HAS_UINT64
42 #endif
43 
44 namespace __llvm_libc {
45 // Compiler types using the vector attributes.
46 using generic_v128 = uint8_t __attribute__((__vector_size__(16)));
47 using generic_v256 = uint8_t __attribute__((__vector_size__(32)));
48 using generic_v512 = uint8_t __attribute__((__vector_size__(64)));
49 } // namespace __llvm_libc
50 
51 namespace __llvm_libc::generic {
52 
53 // We accept three types of values as elements for generic operations:
54 // - scalar : unsigned integral types,
55 // - vector : compiler types using the vector attributes or platform builtins,
56 // - array  : a cpp::array<T, N> where T is itself either a scalar or a vector.
57 // The following traits help discriminate between these cases.
58 
59 template <typename T> struct is_scalar : cpp::false_type {};
60 template <> struct is_scalar<uint8_t> : cpp::true_type {};
61 template <> struct is_scalar<uint16_t> : cpp::true_type {};
62 template <> struct is_scalar<uint32_t> : cpp::true_type {};
63 #ifdef LLVM_LIBC_HAS_UINT64
64 template <> struct is_scalar<uint64_t> : cpp::true_type {};
65 #endif // LLVM_LIBC_HAS_UINT64
66 template <typename T> constexpr bool is_scalar_v = is_scalar<T>::value;
67 
68 template <typename T> struct is_vector : cpp::false_type {};
69 template <> struct is_vector<generic_v128> : cpp::true_type {};
70 template <> struct is_vector<generic_v256> : cpp::true_type {};
71 template <> struct is_vector<generic_v512> : cpp::true_type {};
72 template <typename T> constexpr bool is_vector_v = is_vector<T>::value;
73 
74 template <class T> struct is_array : cpp::false_type {};
75 template <class T, size_t N> struct is_array<cpp::array<T, N>> {
76   static constexpr bool value = is_scalar_v<T> || is_vector_v<T>;
77 };
78 template <typename T> constexpr bool is_array_v = is_array<T>::value;
79 
80 template <typename T>
81 constexpr bool is_element_type_v =
82     is_scalar_v<T> || is_vector_v<T> || is_array_v<T>;
83 
84 // Helper struct to retrieve the number of elements of an array.
85 template <class T> struct array_size {};
86 template <class T, size_t N>
87 struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
88 template <typename T> constexpr size_t array_size_v = array_size<T>::value;
89 
90 // Generic operations for the above type categories.
91 
92 template <typename T> T load(CPtr src) {
93   static_assert(is_element_type_v<T>);
94   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
95     return ::__llvm_libc::load<T>(src);
96   } else if constexpr (is_array_v<T>) {
97     using value_type = typename T::value_type;
98     T Value;
99     for (size_t I = 0; I < array_size_v<T>; ++I)
100       Value[I] = load<value_type>(src + (I * sizeof(value_type)));
101     return Value;
102   }
103 }
104 
105 template <typename T> void store(Ptr dst, T value) {
106   static_assert(is_element_type_v<T>);
107   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
108     ::__llvm_libc::store<T>(dst, value);
109   } else if constexpr (is_array_v<T>) {
110     using value_type = typename T::value_type;
111     for (size_t I = 0; I < array_size_v<T>; ++I)
112       store<value_type>(dst + (I * sizeof(value_type)), value[I]);
113   }
114 }
115 
116 template <typename T> T splat(uint8_t value) {
117   static_assert(is_scalar_v<T> || is_vector_v<T>);
118   if constexpr (is_scalar_v<T>)
119     return T(~0) / T(0xFF) * T(value);
120   else if constexpr (is_vector_v<T>) {
121     T Out;
122     // This for loop is optimized out for vector types.
123     for (size_t i = 0; i < sizeof(T); ++i)
124       Out[i] = value;
125     return Out;
126   }
127 }
128 
129 ///////////////////////////////////////////////////////////////////////////////
130 // Memset
131 ///////////////////////////////////////////////////////////////////////////////
132 
133 template <typename T> struct Memset {
134   static_assert(is_element_type_v<T>);
135   static constexpr size_t SIZE = sizeof(T);
136 
137   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
138     if constexpr (is_scalar_v<T> || is_vector_v<T>) {
139       store<T>(dst, splat<T>(value));
140     } else if constexpr (is_array_v<T>) {
141       using value_type = typename T::value_type;
142       const auto Splat = splat<value_type>(value);
143       for (size_t I = 0; I < array_size_v<T>; ++I)
144         store<value_type>(dst + (I * sizeof(value_type)), Splat);
145     }
146   }
147 
148   LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
149     block(dst + count - SIZE, value);
150   }
151 
152   LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
153     block(dst, value);
154     tail(dst, value, count);
155   }
156 
157   LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
158     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
159     size_t offset = 0;
160     do {
161       block(dst + offset, value);
162       offset += SIZE;
163     } while (offset < count - SIZE);
164     tail(dst, value, count);
165   }
166 };
167 
168 template <typename T, typename... TS> struct MemsetSequence {
169   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
170   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
171     Memset<T>::block(dst, value);
172     if constexpr (sizeof...(TS) > 0)
173       return MemsetSequence<TS...>::block(dst + sizeof(T), value);
174   }
175 };
176 
177 ///////////////////////////////////////////////////////////////////////////////
178 // Memmove
179 ///////////////////////////////////////////////////////////////////////////////
180 
181 template <typename T> struct Memmove {
182   static_assert(is_element_type_v<T>);
183   static constexpr size_t SIZE = sizeof(T);
184 
185   LIBC_INLINE static void block(Ptr dst, CPtr src) {
186     store<T>(dst, load<T>(src));
187   }
188 
189   LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
190     const size_t offset = count - SIZE;
191     // The load and store operations can be performed in any order as long as
192     // they are not interleaved. More investigations are needed to determine
193     // the best order.
194     const auto head = load<T>(src);
195     const auto tail = load<T>(src + offset);
196     store<T>(dst, head);
197     store<T>(dst + offset, tail);
198   }
199 
200   // Align forward suitable when dst < src. The alignment is performed with
201   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
202   //
203   // e.g. Moving two bytes forward, we make sure src is aligned.
204   // [  |       |       |       |      ]
205   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
206   // [____LLLLLLLL_____________________]
207   // [___________LLLLLLLA______________]
208   // [_SSSSSSSS________________________]
209   // [________SSSSSSSS_________________]
210   //
211   // e.g. Moving two bytes forward, we make sure dst is aligned.
212   // [  |       |       |       |      ]
213   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
214   // [____LLLLLLLL_____________________]
215   // [______LLLLLLLL___________________]
216   // [_SSSSSSSS________________________]
217   // [___SSSSSSSA______________________]
218   template <Arg AlignOn>
219   LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
220     Ptr prev_dst = dst;
221     CPtr prev_src = src;
222     size_t prev_count = count;
223     align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
224     adjust(SIZE, dst, src, count);
225     head_tail(prev_dst, prev_src, prev_count - count);
226   }
227 
228   // Align backward suitable when dst > src. The alignment is performed with
229   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
230   //
231   // e.g. Moving two bytes backward, we make sure src is aligned.
232   // [  |       |       |       |      ]
233   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
234   // [ _________________ALLLLLLL_______]
235   // [ ___________________LLLLLLLL_____]
236   // [____________________SSSSSSSS_____]
237   // [______________________SSSSSSSS___]
238   //
239   // e.g. Moving two bytes backward, we make sure dst is aligned.
240   // [  |       |       |       |      ]
241   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
242   // [ _______________LLLLLLLL_________]
243   // [ ___________________LLLLLLLL_____]
244   // [__________________ASSSSSSS_______]
245   // [______________________SSSSSSSS___]
246   template <Arg AlignOn>
247   LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
248     Ptr headtail_dst = dst + count;
249     CPtr headtail_src = src + count;
250     size_t headtail_size = 0;
251     align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
252                                           headtail_size);
253     adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
254     head_tail(headtail_dst, headtail_src, headtail_size);
255     count -= headtail_size;
256   }
257 
258   // Move forward suitable when dst < src. We load the tail bytes before
259   // handling the loop.
260   //
261   // e.g. Moving two bytes
262   // [   |       |       |       |       |]
263   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
264   // [_________________________LLLLLLLL___]
265   // [___LLLLLLLL_________________________]
266   // [_SSSSSSSS___________________________]
267   // [___________LLLLLLLL_________________]
268   // [_________SSSSSSSS___________________]
269   // [___________________LLLLLLLL_________]
270   // [_________________SSSSSSSS___________]
271   // [_______________________SSSSSSSS_____]
272   LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
273                                                 size_t count) {
274     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
275     const size_t tail_offset = count - SIZE;
276     const auto tail_value = load<T>(src + tail_offset);
277     size_t offset = 0;
278     LIBC_LOOP_NOUNROLL
279     do {
280       block(dst + offset, src + offset);
281       offset += SIZE;
282     } while (offset < count - SIZE);
283     store<T>(dst + tail_offset, tail_value);
284   }
285 
286   // Move backward suitable when dst > src. We load the head bytes before
287   // handling the loop.
288   //
289   // e.g. Moving two bytes
290   // [   |       |       |       |       |]
291   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
292   // [___LLLLLLLL_________________________]
293   // [_________________________LLLLLLLL___]
294   // [___________________________SSSSSSSS_]
295   // [_________________LLLLLLLL___________]
296   // [___________________SSSSSSSS_________]
297   // [_________LLLLLLLL___________________]
298   // [___________SSSSSSSS_________________]
299   // [_____SSSSSSSS_______________________]
300   LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
301                                                  size_t count) {
302     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
303     const auto head_value = load<T>(src);
304     ptrdiff_t offset = count - SIZE;
305     LIBC_LOOP_NOUNROLL
306     do {
307       block(dst + offset, src + offset);
308       offset -= SIZE;
309     } while (offset >= 0);
310     store<T>(dst, head_value);
311   }
312 };
313 
314 ///////////////////////////////////////////////////////////////////////////////
315 // Low level operations for Bcmp and Memcmp that operate on memory locations.
316 ///////////////////////////////////////////////////////////////////////////////
317 
318 // Same as load above but with an offset to the pointer.
319 // Making the offset explicit hints the compiler to use relevant addressing mode
320 // consistently.
321 template <typename T> LIBC_INLINE T load(CPtr ptr, size_t offset) {
322   return ::__llvm_libc::load<T>(ptr + offset);
323 }
324 
325 // Same as above but also makes sure the loaded value is in big endian format.
326 // This is useful when implementing lexicograhic comparisons as big endian
327 // scalar comparison directly maps to lexicographic byte comparisons.
328 template <typename T> LIBC_INLINE T load_be(CPtr ptr, size_t offset) {
329   return Endian::to_big_endian(load<T>(ptr, offset));
330 }
331 
332 // Equality: returns true iff values at locations (p1 + offset) and (p2 +
333 // offset) compare equal.
334 template <typename T> LIBC_INLINE bool eq(CPtr p1, CPtr p2, size_t offset);
335 
336 // Not equals: returns non-zero iff values at locations (p1 + offset) and (p2 +
337 // offset) differ.
338 template <typename T> LIBC_INLINE uint32_t neq(CPtr p1, CPtr p2, size_t offset);
339 
340 // Lexicographic comparison:
341 // - returns 0 iff values at locations (p1 + offset) and (p2 + offset) compare
342 //   equal.
343 // - returns a negative value if value at location (p1 + offset) is
344 //   lexicographically less than value at (p2 + offset).
345 // - returns a positive value if value at location (p1 + offset) is
346 //   lexicographically greater than value at (p2 + offset).
347 template <typename T>
348 LIBC_INLINE MemcmpReturnType cmp(CPtr p1, CPtr p2, size_t offset);
349 
350 // Lexicographic comparison of non-equal values:
351 // - returns a negative value if value at location (p1 + offset) is
352 //   lexicographically less than value at (p2 + offset).
353 // - returns a positive value if value at location (p1 + offset) is
354 //   lexicographically greater than value at (p2 + offset).
355 template <typename T>
356 LIBC_INLINE MemcmpReturnType cmp_neq(CPtr p1, CPtr p2, size_t offset);
357 
358 ///////////////////////////////////////////////////////////////////////////////
359 // Memcmp implementation
360 //
361 // When building memcmp, not all types are considered equals.
362 //
363 // For instance, the lexicographic comparison of two uint8_t can be implemented
364 // as a simple subtraction, but for wider operations the logic can be much more
365 // involving, especially on little endian platforms.
366 //
367 // For such wider types it is a good strategy to test for equality first and
368 // only do the expensive lexicographic comparison if necessary.
369 //
370 // Decomposing the algorithm like this for wider types allows us to have
371 // efficient implementation of higher order functions like 'head_tail' or
372 // 'loop_and_tail'.
373 ///////////////////////////////////////////////////////////////////////////////
374 
375 // Type traits to decide whether we can use 'cmp' directly or if we need to
376 // split the computation.
377 template <typename T> struct cmp_is_expensive;
378 
379 template <typename T> struct Memcmp {
380   static_assert(is_element_type_v<T>);
381   static constexpr size_t SIZE = sizeof(T);
382 
383 private:
384   LIBC_INLINE static MemcmpReturnType block_offset(CPtr p1, CPtr p2,
385                                                    size_t offset) {
386     if constexpr (cmp_is_expensive<T>::value) {
387       if (!eq<T>(p1, p2, offset))
388         return cmp_neq<T>(p1, p2, offset);
389       return MemcmpReturnType::ZERO();
390     } else {
391       return cmp<T>(p1, p2, offset);
392     }
393   }
394 
395 public:
396   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
397     return block_offset(p1, p2, 0);
398   }
399 
400   LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
401     return block_offset(p1, p2, count - SIZE);
402   }
403 
404   LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
405                                                 size_t count) {
406     if constexpr (cmp_is_expensive<T>::value) {
407       if (!eq<T>(p1, p2, 0))
408         return cmp_neq<T>(p1, p2, 0);
409     } else {
410       if (const auto value = cmp<T>(p1, p2, 0))
411         return value;
412     }
413     return tail(p1, p2, count);
414   }
415 
416   LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
417                                                     size_t count) {
418     return loop_and_tail_offset(p1, p2, count, 0);
419   }
420 
421   LIBC_INLINE static MemcmpReturnType
422   loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
423     if constexpr (SIZE > 1) {
424       const size_t limit = count - SIZE;
425       LIBC_LOOP_NOUNROLL
426       for (; offset < limit; offset += SIZE) {
427         if constexpr (cmp_is_expensive<T>::value) {
428           if (!eq<T>(p1, p2, offset))
429             return cmp_neq<T>(p1, p2, offset);
430         } else {
431           if (const auto value = cmp<T>(p1, p2, offset))
432             return value;
433         }
434       }
435       return block_offset(p1, p2, limit); // tail
436     } else {
437       // No need for a tail operation when SIZE == 1.
438       LIBC_LOOP_NOUNROLL
439       for (; offset < count; offset += SIZE)
440         if (auto value = cmp<T>(p1, p2, offset))
441           return value;
442       return MemcmpReturnType::ZERO();
443     }
444   }
445 
446   LIBC_INLINE static MemcmpReturnType
447   loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
448     const AlignHelper<sizeof(T)> helper(p1);
449     if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
450       if (auto value = block(p1, p2))
451         return value;
452       adjust(helper.offset(), p1, p2, count);
453     }
454     return loop_and_tail(p1, p2, count);
455   }
456 };
457 
458 template <typename T, typename... TS> struct MemcmpSequence {
459   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
460   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
461     // TODO: test suggestion in
462     // https://reviews.llvm.org/D148717?id=515724#inline-1446890
463     // once we have a proper way to check memory operation latency.
464     if constexpr (cmp_is_expensive<T>::value) {
465       if (!eq<T>(p1, p2, 0))
466         return cmp_neq<T>(p1, p2, 0);
467     } else {
468       if (auto value = cmp<T>(p1, p2, 0))
469         return value;
470     }
471     if constexpr (sizeof...(TS) > 0)
472       return MemcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
473     else
474       return MemcmpReturnType::ZERO();
475   }
476 };
477 
478 ///////////////////////////////////////////////////////////////////////////////
479 // Bcmp
480 ///////////////////////////////////////////////////////////////////////////////
481 template <typename T> struct Bcmp {
482   static_assert(is_element_type_v<T>);
483   static constexpr size_t SIZE = sizeof(T);
484 
485   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
486     return neq<T>(p1, p2, 0);
487   }
488 
489   LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
490     const size_t tail_offset = count - SIZE;
491     return neq<T>(p1, p2, tail_offset);
492   }
493 
494   LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
495     if (const auto value = neq<T>(p1, p2, 0))
496       return value;
497     return tail(p1, p2, count);
498   }
499 
500   LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
501                                                   size_t count) {
502     return loop_and_tail_offset(p1, p2, count, 0);
503   }
504 
505   LIBC_INLINE static BcmpReturnType
506   loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
507     if constexpr (SIZE > 1) {
508       const size_t limit = count - SIZE;
509       LIBC_LOOP_NOUNROLL
510       for (; offset < limit; offset += SIZE)
511         if (const auto value = neq<T>(p1, p2, offset))
512           return value;
513       return tail(p1, p2, count);
514     } else {
515       // No need for a tail operation when SIZE == 1.
516       LIBC_LOOP_NOUNROLL
517       for (; offset < count; offset += SIZE)
518         if (const auto value = neq<T>(p1, p2, offset))
519           return value;
520       return BcmpReturnType::ZERO();
521     }
522   }
523 
524   LIBC_INLINE static BcmpReturnType
525   loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
526     static_assert(SIZE > 1,
527                   "No need to align when processing one byte at a time");
528     const AlignHelper<sizeof(T)> helper(p1);
529     if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
530       if (auto value = block(p1, p2))
531         return value;
532       adjust(helper.offset(), p1, p2, count);
533     }
534     return loop_and_tail(p1, p2, count);
535   }
536 };
537 
538 template <typename T, typename... TS> struct BcmpSequence {
539   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
540   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
541     if (auto value = neq<T>(p1, p2, 0))
542       return value;
543     if constexpr (sizeof...(TS) > 0)
544       return BcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
545     else
546       return BcmpReturnType::ZERO();
547   }
548 };
549 
550 ///////////////////////////////////////////////////////////////////////////////
551 // Specializations for uint8_t
552 template <> struct cmp_is_expensive<uint8_t> : public cpp::false_type {};
553 template <> LIBC_INLINE bool eq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
554   return load<uint8_t>(p1, offset) == load<uint8_t>(p2, offset);
555 }
556 template <> LIBC_INLINE uint32_t neq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
557   return load<uint8_t>(p1, offset) ^ load<uint8_t>(p2, offset);
558 }
559 template <>
560 LIBC_INLINE MemcmpReturnType cmp<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
561   return static_cast<int32_t>(load<uint8_t>(p1, offset)) -
562          static_cast<int32_t>(load<uint8_t>(p2, offset));
563 }
564 template <>
565 LIBC_INLINE MemcmpReturnType cmp_neq<uint8_t>(CPtr p1, CPtr p2, size_t offset);
566 
567 } // namespace __llvm_libc::generic
568 
569 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
570