1 /** 2 * Contains various string related functions. 3 * 4 * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved 5 * Authors: Walter Bright, http://www.digitalmars.com 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/string.d, root/_string.d) 8 * Documentation: https://dlang.org/phobos/dmd_root_string.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/string.d 10 */ 11 module dmd.root..string; 12 13 /// Slices a `\0`-terminated C-string, excluding the terminator 14 inout(char)[] toDString (inout(char)* s) pure nothrow @nogc 15 { 16 import core.stdc.string : strlen; 17 return s ? s[0 .. strlen(s)] : null; 18 } 19 20 /** 21 Compare two slices for equality, in a case-insensitive way 22 23 Comparison is based on `char` and does not do decoding. 24 As a result, it's only really accurate for plain ASCII strings. 25 26 Params: 27 s1 = string to compare 28 s2 = string to compare 29 30 Returns: 31 `true` if `s1 == s2` regardless of case 32 */ 33 extern(D) static bool iequals(const(char)[] s1, const(char)[] s2) 34 { 35 import core.stdc.ctype : toupper; 36 37 if (s1.length != s2.length) 38 return false; 39 40 foreach (idx, c1; s1) 41 { 42 // Since we did a length check, it is safe to bypass bounds checking 43 const c2 = s2.ptr[idx]; 44 if (c1 != c2) 45 if (toupper(c1) != toupper(c2)) 46 return false; 47 } 48 return true; 49 } 50 51 /** 52 Copy the content of `src` into a C-string ('\0' terminated) then call `dg` 53 54 The intent of this function is to provide an allocation-less 55 way to call a C function using a D slice. 56 The function internally allocates a buffer if needed, but frees it on exit. 57 58 Note: 59 The argument to `dg` is `scope`. To keep the data around after `dg` exits, 60 one has to copy it. 61 62 Params: 63 src = Slice to use to call the C function 64 dg = Delegate to call afterwards 65 66 Returns: 67 The return value of `T` 68 */ 69 auto toCStringThen(alias dg)(const(char)[] src) nothrow 70 { 71 import dmd.root.rmem : mem; 72 73 const len = src.length + 1; 74 char[512] small = void; 75 scope ptr = (src.length < (small.length - 1)) 76 ? small[0 .. len] 77 : (cast(char*)mem.xmalloc(len))[0 .. len]; 78 scope (exit) 79 { 80 if (&ptr[0] != &small[0]) 81 mem.xfree(&ptr[0]); 82 } 83 ptr[0 .. src.length] = src[]; 84 ptr[src.length] = '\0'; 85 return dg(ptr); 86 } 87 88 unittest 89 { 90 assert("Hello world".toCStringThen!((v) => v == "Hello world\0")); 91 assert("Hello world\0".toCStringThen!((v) => v == "Hello world\0\0")); 92 assert(null.toCStringThen!((v) => v == "\0")); 93 } 94 95 /** 96 * Strips one leading line terminator of the given string. 97 * 98 * The following are what the Unicode standard considers as line terminators: 99 * 100 * | Name | D Escape Sequence | Unicode Code Point | 101 * |---------------------|-------------------|--------------------| 102 * | Line feed | `\n` | `U+000A` | 103 * | Line tabulation | `\v` | `U+000B` | 104 * | Form feed | `\f` | `U+000C` | 105 * | Carriage return | `\r` | `U+000D` | 106 * | Next line | | `U+0085` | 107 * | Line separator | | `U+2028` | 108 * | Paragraph separator | | `U+2029` | 109 * 110 * This function will also strip `\r\n`. 111 */ 112 string stripLeadingLineTerminator(string str) pure nothrow @nogc @safe 113 { 114 enum nextLine = "\xC2\x85"; 115 enum lineSeparator = "\xE2\x80\xA8"; 116 enum paragraphSeparator = "\xE2\x80\xA9"; 117 118 static assert(lineSeparator.length == paragraphSeparator.length); 119 120 if (str.length == 0) 121 return str; 122 123 switch (str[0]) 124 { 125 case '\r': 126 { 127 if (str.length >= 2 && str[1] == '\n') 128 return str[2 .. $]; 129 goto case; 130 } 131 case '\v', '\f', '\n': return str[1 .. $]; 132 133 case nextLine[0]: 134 { 135 if (str.length >= 2 && str[0 .. 2] == nextLine) 136 return str[2 .. $]; 137 138 return str; 139 } 140 141 case lineSeparator[0]: 142 { 143 if (str.length >= lineSeparator.length) 144 { 145 const prefix = str[0 .. lineSeparator.length]; 146 147 if (prefix == lineSeparator || prefix == paragraphSeparator) 148 return str[lineSeparator.length .. $]; 149 } 150 151 return str; 152 } 153 154 default: return str; 155 } 156 } 157 158 unittest 159 { 160 assert("".stripLeadingLineTerminator == ""); 161 assert("foo".stripLeadingLineTerminator == "foo"); 162 assert("\xC2foo".stripLeadingLineTerminator == "\xC2foo"); 163 assert("\xE2foo".stripLeadingLineTerminator == "\xE2foo"); 164 assert("\nfoo".stripLeadingLineTerminator == "foo"); 165 assert("\vfoo".stripLeadingLineTerminator == "foo"); 166 assert("\ffoo".stripLeadingLineTerminator == "foo"); 167 assert("\rfoo".stripLeadingLineTerminator == "foo"); 168 assert("\u0085foo".stripLeadingLineTerminator == "foo"); 169 assert("\u2028foo".stripLeadingLineTerminator == "foo"); 170 assert("\u2029foo".stripLeadingLineTerminator == "foo"); 171 assert("\n\rfoo".stripLeadingLineTerminator == "\rfoo"); 172 assert("\r\nfoo".stripLeadingLineTerminator == "foo"); 173 } 174 175 /** 176 * A string comparison functions that returns the same result as strcmp 177 * 178 * Note: Strings are compared based on their ASCII values, no UTF-8 decoding. 179 * 180 * Some C functions (e.g. `qsort`) require a `int` result for comparison. 181 * See_Also: Druntime's `core.internal.string` 182 */ 183 int dstrcmp()( scope const char[] s1, scope const char[] s2 ) @trusted 184 { 185 immutable len = s1.length <= s2.length ? s1.length : s2.length; 186 if (__ctfe) 187 { 188 foreach (const u; 0 .. len) 189 { 190 if (s1[u] != s2[u]) 191 return s1[u] > s2[u] ? 1 : -1; 192 } 193 } 194 else 195 { 196 import core.stdc.string : memcmp; 197 198 const ret = memcmp( s1.ptr, s2.ptr, len ); 199 if ( ret ) 200 return ret; 201 } 202 return s1.length < s2.length ? -1 : (s1.length > s2.length); 203 } 204 205 // 206 unittest 207 { 208 assert(dstrcmp("Fraise", "Fraise") == 0); 209 assert(dstrcmp("Baguette", "Croissant") == -1); 210 assert(dstrcmp("Croissant", "Baguette") == 1); 211 212 static assert(dstrcmp("Baguette", "Croissant") == -1); 213 214 // UTF-8 decoding for the CT variant 215 assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0); 216 static assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0); 217 } 218 219 /** 220 * Infers the length `N` of a string literal and coerces its type to a static 221 * array with length `N + 1`. Returns the string with a null character appended 222 * to the end. 223 * 224 * Params: 225 * literal = string literal 226 * 227 * Notes: 228 * - LDC produces quite optimal code for short strings: 229 * - https://d.godbolt.org/z/M69Z1g 230 * - https://gist.github.com/PetarKirov/338e4ab9292b6b2b311a3070572a07fb (backup URL) 231 */ 232 char[N + 1] toStaticArray(size_t N)(scope const(char)[N] literal) 233 { 234 char[N+1] result = void; 235 result[0..N] = literal[0..N]; 236 result[N] = 0; 237 return result; 238 } 239 240 /// 241 @safe pure nothrow @nogc 242 unittest 243 { 244 auto m = "123".toStaticArray; 245 const c = "123".toStaticArray; 246 immutable i = "123".toStaticArray; 247 enum e = "123".toStaticArray; 248 249 assert(m == "123\0"); 250 assert(c == "123\0"); 251 assert(i == "123\0"); 252 static assert(e == "123\0"); 253 254 const empty = "".toStaticArray; 255 static assert(empty.length == 1); 256 static assert(empty[0] == '\0'); 257 } 258 259 /** 260 * Checks if C string `p` starts with `needle`. 261 * Params: 262 * p = the C string to check 263 * needle = the string to look for 264 * Returns: 265 * `true` if `p` starts with `needle` 266 */ 267 @system pure nothrow @nogc 268 bool startsWith(scope const(char)* p, scope const(char)[] needle) 269 in { assert(p && needle.ptr); } 270 do 271 { 272 foreach (const c; needle) 273 { 274 assert(c); 275 if (c != *p) 276 return false; 277 ++p; 278 } 279 return true; 280 } 281 282 /// 283 @system pure nothrow @nogc 284 unittest 285 { 286 const buf = "123".toStaticArray; 287 const ptr = &buf[0]; 288 assert(ptr.startsWith("")); 289 assert(ptr.startsWith("1")); 290 assert(ptr.startsWith("12")); 291 assert(ptr.startsWith("123")); 292 assert(!ptr.startsWith("1234")); 293 }