1 /**
2  * Contains various string related functions.
3  *
4  * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5  * Authors:   Walter Bright, http://www.digitalmars.com
6  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/string.d, root/_string.d)
8  * Documentation:  https://dlang.org/phobos/dmd_root_string.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/string.d
10  */
11 module dmd.root..string;
12 
13 /// Slices a `\0`-terminated C-string, excluding the terminator
14 inout(char)[] toDString (inout(char)* s) pure nothrow @nogc
15 {
16     import core.stdc.string : strlen;
17     return s ? s[0 .. strlen(s)] : null;
18 }
19 
20 /**
21 Compare two slices for equality, in a case-insensitive way
22 
23 Comparison is based on `char` and does not do decoding.
24 As a result, it's only really accurate for plain ASCII strings.
25 
26 Params:
27 s1 = string to compare
28 s2 = string to compare
29 
30 Returns:
31 `true` if `s1 == s2` regardless of case
32 */
33 extern(D) static bool iequals(const(char)[] s1, const(char)[] s2)
34 {
35     import core.stdc.ctype : toupper;
36 
37     if (s1.length != s2.length)
38         return false;
39 
40     foreach (idx, c1; s1)
41     {
42         // Since we did a length check, it is safe to bypass bounds checking
43         const c2 = s2.ptr[idx];
44         if (c1 != c2)
45             if (toupper(c1) != toupper(c2))
46                 return false;
47     }
48     return true;
49 }
50 
51 /**
52 Copy the content of `src` into a C-string ('\0' terminated) then call `dg`
53 
54 The intent of this function is to provide an allocation-less
55 way to call a C function using a D slice.
56 The function internally allocates a buffer if needed, but frees it on exit.
57 
58 Note:
59 The argument to `dg` is `scope`. To keep the data around after `dg` exits,
60 one has to copy it.
61 
62 Params:
63 src = Slice to use to call the C function
64 dg  = Delegate to call afterwards
65 
66 Returns:
67 The return value of `T`
68 */
69 auto toCStringThen(alias dg)(const(char)[] src) nothrow
70 {
71     import dmd.root.rmem : mem;
72 
73     const len = src.length + 1;
74     char[512] small = void;
75     scope ptr = (src.length < (small.length - 1))
76                     ? small[0 .. len]
77                     : (cast(char*)mem.xmalloc(len))[0 .. len];
78     scope (exit)
79     {
80         if (&ptr[0] != &small[0])
81             mem.xfree(&ptr[0]);
82     }
83     ptr[0 .. src.length] = src[];
84     ptr[src.length] = '\0';
85     return dg(ptr);
86 }
87 
88 unittest
89 {
90     assert("Hello world".toCStringThen!((v) => v == "Hello world\0"));
91     assert("Hello world\0".toCStringThen!((v) => v == "Hello world\0\0"));
92     assert(null.toCStringThen!((v) => v == "\0"));
93 }
94 
95 /**
96  * Strips one leading line terminator of the given string.
97  *
98  * The following are what the Unicode standard considers as line terminators:
99  *
100  * | Name                | D Escape Sequence | Unicode Code Point |
101  * |---------------------|-------------------|--------------------|
102  * | Line feed           | `\n`              | `U+000A`           |
103  * | Line tabulation     | `\v`              | `U+000B`           |
104  * | Form feed           | `\f`              | `U+000C`           |
105  * | Carriage return     | `\r`              | `U+000D`           |
106  * | Next line           |                   | `U+0085`           |
107  * | Line separator      |                   | `U+2028`           |
108  * | Paragraph separator |                   | `U+2029`           |
109  *
110  * This function will also strip `\n\r`.
111  */
112 string stripLeadingLineTerminator(string str) pure nothrow @nogc @safe
113 {
114     enum nextLine = "\xC2\x85";
115     enum lineSeparator = "\xE2\x80\xA8";
116     enum paragraphSeparator = "\xE2\x80\xA9";
117 
118     if (str.length == 0)
119         return str;
120 
121     switch (str[0])
122     {
123         case '\n':
124         {
125             if (str.length >= 2 && str[1] == '\r')
126                 return str[2 .. $];
127             goto case;
128         }
129         case '\v', '\f', '\r': return str[1 .. $];
130 
131         case nextLine[0]:
132         {
133             if (str.length >= 2 && str[0 .. 2] == nextLine)
134                 return str[2 .. $];
135 
136             return str;
137         }
138 
139         case lineSeparator[0]:
140         {
141             if (str.length >= 3)
142             {
143                 const prefix = str[0 .. 3];
144 
145                 if (prefix == lineSeparator || prefix == paragraphSeparator)
146                     return str[3 .. $];
147             }
148 
149             return str;
150         }
151 
152         default: return str;
153     }
154 }
155 
156 unittest
157 {
158     assert("".stripLeadingLineTerminator == "");
159     assert("foo".stripLeadingLineTerminator == "foo");
160     assert("\xC2foo".stripLeadingLineTerminator == "\xC2foo");
161     assert("\xE2foo".stripLeadingLineTerminator == "\xE2foo");
162     assert("\nfoo".stripLeadingLineTerminator == "foo");
163     assert("\vfoo".stripLeadingLineTerminator == "foo");
164     assert("\ffoo".stripLeadingLineTerminator == "foo");
165     assert("\rfoo".stripLeadingLineTerminator == "foo");
166     assert("\u0085foo".stripLeadingLineTerminator == "foo");
167     assert("\u2028foo".stripLeadingLineTerminator == "foo");
168     assert("\u2029foo".stripLeadingLineTerminator == "foo");
169     assert("\n\rfoo".stripLeadingLineTerminator == "foo");
170 }
171 
172 /**
173  * A string comparison functions that returns the same result as strcmp
174  *
175  * Note: Strings are compared based on their ASCII values, no UTF-8 decoding.
176  *
177  * Some C functions (e.g. `qsort`) require a `int` result for comparison.
178  * See_Also: Druntime's `core.internal.string`
179  */
180 int dstrcmp()( scope const char[] s1, scope const char[] s2 ) @trusted
181 {
182     immutable len = s1.length <= s2.length ? s1.length : s2.length;
183     if (__ctfe)
184     {
185         foreach (const u; 0 .. len)
186         {
187             if (s1[u] != s2[u])
188                 return s1[u] > s2[u] ? 1 : -1;
189         }
190     }
191     else
192     {
193         import core.stdc.string : memcmp;
194 
195         const ret = memcmp( s1.ptr, s2.ptr, len );
196         if ( ret )
197             return ret;
198     }
199     return s1.length < s2.length ? -1 : (s1.length > s2.length);
200 }
201 
202 //
203 unittest
204 {
205     assert(dstrcmp("Fraise", "Fraise")      == 0);
206     assert(dstrcmp("Baguette", "Croissant") == -1);
207     assert(dstrcmp("Croissant", "Baguette") == 1);
208 
209     static assert(dstrcmp("Baguette", "Croissant") == -1);
210 
211     // UTF-8 decoding for the CT variant
212     assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
213     static assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
214 }
215 
216 /**
217  * Infers the length `N` of a string literal and coerces its type to a static
218  * array with length `N + 1`. Returns the string with a null character appended
219  * to the end.
220  *
221  * Params:
222  *  literal = string literal
223  *
224  * Notes:
225  *  - LDC produces quite optimal code for short strings:
226  *    - https://d.godbolt.org/z/M69Z1g
227  *    - https://gist.github.com/PetarKirov/338e4ab9292b6b2b311a3070572a07fb (backup URL)
228 */
229 char[N + 1] toStaticArray(size_t N)(scope const(char)[N] literal)
230 {
231     char[N+1] result = void;
232     result[0..N] = literal[0..N];
233     result[N] = 0;
234     return result;
235 }
236 
237 ///
238 @safe pure nothrow @nogc
239 unittest
240 {
241     auto m = "123".toStaticArray;
242     const c = "123".toStaticArray;
243     immutable i = "123".toStaticArray;
244     enum e = "123".toStaticArray;
245 
246     assert(m == "123\0");
247     assert(c == "123\0");
248     assert(i == "123\0");
249     static assert(e == "123\0");
250 
251     const empty = "".toStaticArray;
252     static assert(empty.length == 1);
253     static assert(empty[0] == '\0');
254 }
255 
256 /**
257  * Checks if C string `p` starts with `needle`.
258  * Params:
259  *     p = the C string to check
260  *     needle = the string to look for
261  * Returns:
262  *    `true` if `p` starts with `needle`
263  */
264 @system pure nothrow @nogc
265 bool startsWith(scope const(char)* p, scope const(char)[] needle)
266 in { assert(p && needle.ptr); }
267 do
268 {
269     foreach (const c; needle)
270     {
271         assert(c);
272         if (c != *p)
273             return false;
274         ++p;
275     }
276     return true;
277 }
278 
279 ///
280 @system pure nothrow @nogc
281 unittest
282 {
283     const buf = "123".toStaticArray;
284     const ptr = &buf[0];
285     assert(ptr.startsWith(""));
286     assert(ptr.startsWith("1"));
287     assert(ptr.startsWith("12"));
288     assert(ptr.startsWith("123"));
289     assert(!ptr.startsWith("1234"));
290 }