1 /**
2 * Contains various string related functions.
3 *
4 * Copyright: Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved
5 * Authors: Walter Bright, https://www.digitalmars.com
6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/string.d, root/_string.d)
8 * Documentation: https://dlang.org/phobos/dmd_root_string.html
9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/string.d
10 */
11 module dmd.root.string;
12
13 /// Slices a `\0`-terminated C-string, excluding the terminator
inout(char)14 inout(char)[] toDString (inout(char)* s) pure nothrow @nogc
15 {
16 import core.stdc.string : strlen;
17 return s ? s[0 .. strlen(s)] : null;
18 }
19
20 /**
21 Compare two slices for equality, in a case-insensitive way
22
23 Comparison is based on `char` and does not do decoding.
24 As a result, it's only really accurate for plain ASCII strings.
25
26 Params:
27 s1 = string to compare
28 s2 = string to compare
29
30 Returns:
31 `true` if `s1 == s2` regardless of case
32 */
iequals(const (char)[]s1,const (char)[]s2)33 extern(D) static bool iequals(const(char)[] s1, const(char)[] s2) pure nothrow @nogc
34 {
35 import core.stdc.ctype : toupper;
36
37 if (s1.length != s2.length)
38 return false;
39
40 foreach (idx, c1; s1)
41 {
42 // Since we did a length check, it is safe to bypass bounds checking
43 const c2 = s2.ptr[idx];
44 if (c1 != c2)
45 if (toupper(c1) != toupper(c2))
46 return false;
47 }
48 return true;
49 }
50
51 /**
52 Copy the content of `src` into a C-string ('\0' terminated) then call `dg`
53
54 The intent of this function is to provide an allocation-less
55 way to call a C function using a D slice.
56 The function internally allocates a buffer if needed, but frees it on exit.
57
58 Note:
59 The argument to `dg` is `scope`. To keep the data around after `dg` exits,
60 one has to copy it.
61
62 Params:
63 src = Slice to use to call the C function
64 dg = Delegate to call afterwards
65
66 Returns:
67 The return value of `T`
68 */
toCStringThen(alias dg)69 auto toCStringThen(alias dg)(const(char)[] src) nothrow
70 {
71 import dmd.root.rmem : mem;
72 import dmd.common.string : SmallBuffer;
73
74 const len = src.length + 1;
75 char[512] small = void;
76 auto sb = SmallBuffer!char(len, small[]);
77 scope ptr = sb[];
78 ptr[0 .. src.length] = src[];
79 ptr[src.length] = '\0';
80 return dg(ptr);
81 }
82
83 unittest
84 {
85 assert("Hello world".toCStringThen!((v) => v == "Hello world\0"));
86 assert("Hello world\0".toCStringThen!((v) => v == "Hello world\0\0"));
87 assert(null.toCStringThen!((v) => v == "\0"));
88 }
89
90 /**
91 * Strips one leading line terminator of the given string.
92 *
93 * The following are what the Unicode standard considers as line terminators:
94 *
95 * | Name | D Escape Sequence | Unicode Code Point |
96 * |---------------------|-------------------|--------------------|
97 * | Line feed | `\n` | `U+000A` |
98 * | Line tabulation | `\v` | `U+000B` |
99 * | Form feed | `\f` | `U+000C` |
100 * | Carriage return | `\r` | `U+000D` |
101 * | Next line | | `U+0085` |
102 * | Line separator | | `U+2028` |
103 * | Paragraph separator | | `U+2029` |
104 *
105 * This function will also strip `\r\n`.
106 */
stripLeadingLineTerminator(string str)107 string stripLeadingLineTerminator(string str) pure nothrow @nogc @safe
108 {
109 enum nextLine = "\xC2\x85";
110 enum lineSeparator = "\xE2\x80\xA8";
111 enum paragraphSeparator = "\xE2\x80\xA9";
112
113 static assert(lineSeparator.length == paragraphSeparator.length);
114
115 if (str.length == 0)
116 return str;
117
118 switch (str[0])
119 {
120 case '\r':
121 {
122 if (str.length >= 2 && str[1] == '\n')
123 return str[2 .. $];
124 goto case;
125 }
126 case '\v', '\f', '\n': return str[1 .. $];
127
128 case nextLine[0]:
129 {
130 if (str.length >= 2 && str[0 .. 2] == nextLine)
131 return str[2 .. $];
132
133 return str;
134 }
135
136 case lineSeparator[0]:
137 {
138 if (str.length >= lineSeparator.length)
139 {
140 const prefix = str[0 .. lineSeparator.length];
141
142 if (prefix == lineSeparator || prefix == paragraphSeparator)
143 return str[lineSeparator.length .. $];
144 }
145
146 return str;
147 }
148
149 default: return str;
150 }
151 }
152
153 unittest
154 {
155 assert("".stripLeadingLineTerminator == "");
156 assert("foo".stripLeadingLineTerminator == "foo");
157 assert("\xC2foo".stripLeadingLineTerminator == "\xC2foo");
158 assert("\xE2foo".stripLeadingLineTerminator == "\xE2foo");
159 assert("\nfoo".stripLeadingLineTerminator == "foo");
160 assert("\vfoo".stripLeadingLineTerminator == "foo");
161 assert("\ffoo".stripLeadingLineTerminator == "foo");
162 assert("\rfoo".stripLeadingLineTerminator == "foo");
163 assert("\u0085foo".stripLeadingLineTerminator == "foo");
164 assert("\u2028foo".stripLeadingLineTerminator == "foo");
165 assert("\u2029foo".stripLeadingLineTerminator == "foo");
166 assert("\n\rfoo".stripLeadingLineTerminator == "\rfoo");
167 assert("\r\nfoo".stripLeadingLineTerminator == "foo");
168 }
169
170 /**
171 * A string comparison functions that returns the same result as strcmp
172 *
173 * Note: Strings are compared based on their ASCII values, no UTF-8 decoding.
174 *
175 * Some C functions (e.g. `qsort`) require a `int` result for comparison.
176 * See_Also: Druntime's `core.internal.string`
177 */
dstrcmp()178 int dstrcmp()( scope const char[] s1, scope const char[] s2 ) @trusted
179 {
180 immutable len = s1.length <= s2.length ? s1.length : s2.length;
181 if (__ctfe)
182 {
183 foreach (const u; 0 .. len)
184 {
185 if (s1[u] != s2[u])
186 return s1[u] > s2[u] ? 1 : -1;
187 }
188 }
189 else
190 {
191 import core.stdc.string : memcmp;
192
193 const ret = memcmp( s1.ptr, s2.ptr, len );
194 if ( ret )
195 return ret;
196 }
197 return s1.length < s2.length ? -1 : (s1.length > s2.length);
198 }
199
200 //
201 unittest
202 {
203 assert(dstrcmp("Fraise", "Fraise") == 0);
204 assert(dstrcmp("Baguette", "Croissant") < 0);
205 assert(dstrcmp("Croissant", "Baguette") > 0);
206
207 static assert(dstrcmp("Baguette", "Croissant") < 0);
208
209 // UTF-8 decoding for the CT variant
210 assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
211 static assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
212 }
213
214 /**
215 * Infers the length `N` of a string literal and coerces its type to a static
216 * array with length `N + 1`. Returns the string with a null character appended
217 * to the end.
218 *
219 * Params:
220 * literal = string literal
221 *
222 * Notes:
223 * - LDC produces quite optimal code for short strings:
224 * - https://d.godbolt.org/z/M69Z1g
225 * - https://gist.github.com/PetarKirov/338e4ab9292b6b2b311a3070572a07fb (backup URL)
226 */
toStaticArray(size_t N)227 char[N + 1] toStaticArray(size_t N)(scope const(char)[N] literal)
228 {
229 char[N+1] result = void;
230 result[0..N] = literal[0..N];
231 result[N] = 0;
232 return result;
233 }
234
235 ///
236 @safe pure nothrow @nogc
237 unittest
238 {
239 auto m = "123".toStaticArray;
240 const c = "123".toStaticArray;
241 immutable i = "123".toStaticArray;
242 enum e = "123".toStaticArray;
243
244 assert(m == "123\0");
245 assert(c == "123\0");
246 assert(i == "123\0");
247 static assert(e == "123\0");
248
249 const empty = "".toStaticArray;
250 static assert(empty.length == 1);
251 static assert(empty[0] == '\0');
252 }
253
254 /**
255 * Checks if C string `p` starts with `needle`.
256 * Params:
257 * p = the C string to check
258 * needle = the string to look for
259 * Returns:
260 * `true` if `p` starts with `needle`
261 */
262 @system pure nothrow @nogc
startsWith(scope const (char)* p,scope const (char)[]needle)263 bool startsWith(scope const(char)* p, scope const(char)[] needle)
264 in { assert(p && needle.ptr); }
265 do
266 {
foreach(const c;needle)267 foreach (const c; needle)
268 {
269 assert(c);
270 if (c != *p)
271 return false;
272 ++p;
273 }
274 return true;
275 }
276
277 ///
278 @system pure nothrow @nogc
279 unittest
280 {
281 const buf = "123".toStaticArray;
282 const ptr = &buf[0];
283 assert(ptr.startsWith(""));
284 assert(ptr.startsWith("1"));
285 assert(ptr.startsWith("12"));
286 assert(ptr.startsWith("123"));
287 assert(!ptr.startsWith("1234"));
288 }
289