1 /** 2 * A specialized associative array with string keys stored in a variable length structure. 3 * 4 * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved 5 * Authors: Walter Bright, http://www.digitalmars.com 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/stringtable.d, root/_stringtable.d) 8 * Documentation: https://dlang.org/phobos/dmd_root_stringtable.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/stringtable.d 10 */ 11 12 module dmd.root.stringtable; 13 14 import core.stdc..string; 15 import dmd.root.rmem, dmd.root.hash; 16 17 private enum POOL_BITS = 12; 18 private enum POOL_SIZE = (1U << POOL_BITS); 19 20 /* 21 Returns the smallest integer power of 2 larger than val. 22 if val > 2^^63 on 64-bit targets or val > 2^^31 on 32-bit targets it enters an 23 endless loop because of overflow. 24 */ 25 private size_t nextpow2(size_t val) @nogc nothrow pure @safe 26 { 27 size_t res = 1; 28 while (res < val) 29 res <<= 1; 30 return res; 31 } 32 33 unittest 34 { 35 assert(nextpow2(0) == 1); 36 assert(nextpow2(0xFFFF) == (1 << 16)); 37 assert(nextpow2(size_t.max / 2) == size_t.max / 2 + 1); 38 // note: nextpow2((1UL << 63) + 1) results in an endless loop 39 } 40 41 private enum loadFactorNumerator = 8; 42 private enum loadFactorDenominator = 10; // for a load factor of 0.8 43 44 private struct StringEntry 45 { 46 uint hash; 47 uint vptr; 48 } 49 50 // StringValue is a variable-length structure. It has neither proper c'tors nor a 51 // factory method because the only thing which should be creating these is StringTable. 52 struct StringValue(T) 53 { 54 T value; //T is/should typically be a pointer or a slice 55 private size_t length; 56 57 char* lstring() @nogc nothrow pure return 58 { 59 return cast(char*)(&this + 1); 60 } 61 62 size_t len() const @nogc nothrow pure @safe 63 { 64 return length; 65 } 66 67 const(char)* toDchars() const @nogc nothrow pure return 68 { 69 return cast(const(char)*)(&this + 1); 70 } 71 72 /// Returns: The content of this entry as a D slice 73 inout(char)[] toString() inout @nogc nothrow pure 74 { 75 return (cast(inout(char)*)(&this + 1))[0 .. length]; 76 } 77 } 78 79 struct StringTable(T) 80 { 81 private: 82 StringEntry[] table; 83 ubyte*[] pools; 84 size_t nfill; 85 size_t count; 86 size_t countTrigger; // amount which will trigger growing the table 87 88 public: 89 void _init(size_t size = 0) nothrow pure 90 { 91 size = nextpow2((size * loadFactorDenominator) / loadFactorNumerator); 92 if (size < 32) 93 size = 32; 94 table = (cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof))[0 .. size]; 95 countTrigger = (table.length * loadFactorNumerator) / loadFactorDenominator; 96 pools = null; 97 nfill = 0; 98 count = 0; 99 } 100 101 void reset(size_t size = 0) nothrow pure 102 { 103 freeMem(); 104 _init(size); 105 } 106 107 ~this() nothrow pure 108 { 109 freeMem(); 110 } 111 112 /** 113 Looks up the given string in the string table and returns its associated 114 value. 115 116 Params: 117 s = the string to look up 118 length = the length of $(D_PARAM s) 119 str = the string to look up 120 121 Returns: the string's associated value, or `null` if the string doesn't 122 exist in the string table 123 */ 124 inout(StringValue!T)* lookup(const(char)[] str) inout @nogc nothrow pure 125 { 126 const(size_t) hash = calcHash(str); 127 const(size_t) i = findSlot(hash, str); 128 // printf("lookup %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: null); 129 return getValue(table[i].vptr); 130 } 131 132 /// ditto 133 inout(StringValue!T)* lookup(const(char)* s, size_t length) inout @nogc nothrow pure 134 { 135 return lookup(s[0 .. length]); 136 } 137 138 /** 139 Inserts the given string and the given associated value into the string 140 table. 141 142 Params: 143 s = the string to insert 144 length = the length of $(D_PARAM s) 145 ptrvalue = the value to associate with the inserted string 146 str = the string to insert 147 value = the value to associate with the inserted string 148 149 Returns: the newly inserted value, or `null` if the string table already 150 contains the string 151 */ 152 StringValue!(T)* insert(const(char)[] str, T value) nothrow pure 153 { 154 const(size_t) hash = calcHash(str); 155 size_t i = findSlot(hash, str); 156 if (table[i].vptr) 157 return null; // already in table 158 if (++count > countTrigger) 159 { 160 grow(); 161 i = findSlot(hash, str); 162 } 163 table[i].hash = hash; 164 table[i].vptr = allocValue(str, value); 165 // printf("insert %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL); 166 return getValue(table[i].vptr); 167 } 168 169 /// ditto 170 StringValue!(T)* insert(const(char)* s, size_t length, T value) nothrow pure 171 { 172 return insert(s[0 .. length], value); 173 } 174 175 StringValue!(T)* update(const(char)[] str) nothrow pure 176 { 177 const(size_t) hash = calcHash(str); 178 size_t i = findSlot(hash, str); 179 if (!table[i].vptr) 180 { 181 if (++count > countTrigger) 182 { 183 grow(); 184 i = findSlot(hash, str); 185 } 186 table[i].hash = hash; 187 table[i].vptr = allocValue(str, T.init); 188 } 189 // printf("update %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL); 190 return getValue(table[i].vptr); 191 } 192 193 StringValue!(T)* update(const(char)* s, size_t length) nothrow pure 194 { 195 return update(s[0 .. length]); 196 } 197 198 /******************************** 199 * Walk the contents of the string table, 200 * calling fp for each entry. 201 * Params: 202 * fp = function to call. Returns !=0 to stop 203 * Returns: 204 * last return value of fp call 205 */ 206 int apply(int function(const(StringValue!T)*) nothrow fp) nothrow 207 { 208 foreach (const se; table) 209 { 210 if (!se.vptr) 211 continue; 212 const sv = getValue(se.vptr); 213 int result = (*fp)(sv); 214 if (result) 215 return result; 216 } 217 return 0; 218 } 219 220 /// ditto 221 extern(D) int opApply(scope int delegate(const(StringValue!T)*) nothrow dg) nothrow 222 { 223 foreach (const se; table) 224 { 225 if (!se.vptr) 226 continue; 227 const sv = getValue(se.vptr); 228 int result = dg(sv); 229 if (result) 230 return result; 231 } 232 return 0; 233 } 234 235 private: 236 /// Free all memory in use by this StringTable 237 void freeMem() nothrow pure 238 { 239 foreach (pool; pools) 240 mem.xfree(pool); 241 mem.xfree(table.ptr); 242 mem.xfree(pools.ptr); 243 table = null; 244 pools = null; 245 } 246 247 uint allocValue(const(char)[] str, T value) nothrow pure 248 { 249 const(size_t) nbytes = (StringValue!T).sizeof + str.length + 1; 250 if (!pools.length || nfill + nbytes > POOL_SIZE) 251 { 252 pools = (cast(ubyte**) mem.xrealloc(pools.ptr, (pools.length + 1) * (pools[0]).sizeof))[0 .. pools.length + 1]; 253 pools[$-1] = cast(ubyte*) mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE); 254 if (mem.isGCEnabled) 255 memset(pools[$ - 1], 0xff, POOL_SIZE); // 0xff less likely to produce GC pointer 256 nfill = 0; 257 } 258 StringValue!(T)* sv = cast(StringValue!(T)*)&pools[$ - 1][nfill]; 259 sv.value = value; 260 sv.length = str.length; 261 .memcpy(sv.lstring(), str.ptr, str.length); 262 sv.lstring()[str.length] = 0; 263 const(uint) vptr = cast(uint)(pools.length << POOL_BITS | nfill); 264 nfill += nbytes + (-nbytes & 7); // align to 8 bytes 265 return vptr; 266 } 267 268 inout(StringValue!T)* getValue(uint vptr) inout @nogc nothrow pure 269 { 270 if (!vptr) 271 return null; 272 const(size_t) idx = (vptr >> POOL_BITS) - 1; 273 const(size_t) off = vptr & POOL_SIZE - 1; 274 return cast(inout(StringValue!T)*)&pools[idx][off]; 275 } 276 277 size_t findSlot(hash_t hash, const(char)[] str) const @nogc nothrow pure 278 { 279 // quadratic probing using triangular numbers 280 // http://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774 281 for (size_t i = hash & (table.length - 1), j = 1;; ++j) 282 { 283 const(StringValue!T)* sv; 284 auto vptr = table[i].vptr; 285 if (!vptr || table[i].hash == hash && (sv = getValue(vptr)).length == str.length && .memcmp(str.ptr, sv.toDchars(), str.length) == 0) 286 return i; 287 i = (i + j) & (table.length - 1); 288 } 289 } 290 291 void grow() nothrow pure 292 { 293 const odim = table.length; 294 auto otab = table; 295 const ndim = table.length * 2; 296 countTrigger = (ndim * loadFactorNumerator) / loadFactorDenominator; 297 table = (cast(StringEntry*)mem.xcalloc_noscan(ndim, (table[0]).sizeof))[0 .. ndim]; 298 foreach (const se; otab[0 .. odim]) 299 { 300 if (!se.vptr) 301 continue; 302 const sv = getValue(se.vptr); 303 table[findSlot(se.hash, sv.toString())] = se; 304 } 305 mem.xfree(otab.ptr); 306 } 307 } 308 309 nothrow unittest 310 { 311 StringTable!(const(char)*) tab; 312 tab._init(10); 313 314 // construct two strings with the same text, but a different pointer 315 const(char)[6] fooBuffer = "foofoo"; 316 const(char)[] foo = fooBuffer[0 .. 3]; 317 const(char)[] fooAltPtr = fooBuffer[3 .. 6]; 318 319 assert(foo.ptr != fooAltPtr.ptr); 320 321 // first insertion returns value 322 assert(tab.insert(foo, foo.ptr).value == foo.ptr); 323 324 // subsequent insertion of same string return null 325 assert(tab.insert(foo.ptr, foo.length, foo.ptr) == null); 326 assert(tab.insert(fooAltPtr, foo.ptr) == null); 327 328 const lookup = tab.lookup("foo"); 329 assert(lookup.value == foo.ptr); 330 assert(lookup.len == 3); 331 assert(lookup.toString() == "foo"); 332 333 assert(tab.lookup("bar") == null); 334 tab.update("bar".ptr, "bar".length); 335 assert(tab.lookup("bar").value == null); 336 337 tab.reset(0); 338 assert(tab.lookup("foo".ptr, "foo".length) == null); 339 //tab.insert("bar"); 340 } 341 342 nothrow unittest 343 { 344 StringTable!(void*) tab; 345 tab._init(100); 346 347 enum testCount = 2000; 348 349 char[2 * testCount] buf; 350 351 foreach(i; 0 .. testCount) 352 { 353 buf[i * 2 + 0] = cast(char) (i % 256); 354 buf[i * 2 + 1] = cast(char) (i / 256); 355 auto toInsert = cast(const(char)[]) buf[i * 2 .. i * 2 + 2]; 356 tab.insert(toInsert, cast(void*) i); 357 } 358 359 foreach(i; 0 .. testCount) 360 { 361 auto toLookup = cast(const(char)[]) buf[i * 2 .. i * 2 + 2]; 362 assert(tab.lookup(toLookup).value == cast(void*) i); 363 } 364 } 365 366 nothrow unittest 367 { 368 StringTable!(int) tab; 369 tab._init(10); 370 tab.insert("foo", 4); 371 tab.insert("bar", 6); 372 373 static int resultFp = 0; 374 int resultDg = 0; 375 static bool returnImmediately = false; 376 377 int function(const(StringValue!int)*) nothrow applyFunc = (const(StringValue!int)* s) 378 { 379 resultFp += s.value; 380 return returnImmediately; 381 }; 382 383 scope int delegate(const(StringValue!int)*) nothrow applyDeleg = (const(StringValue!int)* s) 384 { 385 resultDg += s.value; 386 return returnImmediately; 387 }; 388 389 tab.apply(applyFunc); 390 tab.opApply(applyDeleg); 391 392 assert(resultDg == 10); 393 assert(resultFp == 10); 394 395 returnImmediately = true; 396 397 tab.apply(applyFunc); 398 tab.opApply(applyDeleg); 399 400 // Order of string table iteration is not specified, either foo or bar could 401 // have been visited first. 402 assert(resultDg == 14 || resultDg == 16); 403 assert(resultFp == 14 || resultFp == 16); 404 }