1 /**
2  * A specialized associative array with string keys stored in a variable length structure.
3  *
4  * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5  * Authors:   Walter Bright, http://www.digitalmars.com
6  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/stringtable.d, root/_stringtable.d)
8  * Documentation:  https://dlang.org/phobos/dmd_root_stringtable.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/stringtable.d
10  */
11 
12 module dmd.root.stringtable;
13 
14 import core.stdc..string;
15 import dmd.root.rmem, dmd.root.hash;
16 
17 private enum POOL_BITS = 12;
18 private enum POOL_SIZE = (1U << POOL_BITS);
19 
20 /*
21 Returns the smallest integer power of 2 larger than val.
22 if val > 2^^63 on 64-bit targets or val > 2^^31 on 32-bit targets it enters an
23 endless loop because of overflow.
24 */
25 private size_t nextpow2(size_t val) @nogc nothrow pure @safe
26 {
27     size_t res = 1;
28     while (res < val)
29         res <<= 1;
30     return res;
31 }
32 
33 unittest
34 {
35     assert(nextpow2(0) == 1);
36     assert(nextpow2(0xFFFF) == (1 << 16));
37     assert(nextpow2(size_t.max / 2) == size_t.max / 2 + 1);
38     // note: nextpow2((1UL << 63) + 1) results in an endless loop
39 }
40 
41 private enum loadFactorNumerator = 8;
42 private enum loadFactorDenominator = 10;        // for a load factor of 0.8
43 
44 private struct StringEntry
45 {
46     uint hash;
47     uint vptr;
48 }
49 
50 // StringValue is a variable-length structure. It has neither proper c'tors nor a
51 // factory method because the only thing which should be creating these is StringTable.
52 struct StringValue(T)
53 {
54     T value; //T is/should typically be a pointer or a slice
55     private size_t length;
56 
57     char* lstring() @nogc nothrow pure return
58     {
59         return cast(char*)(&this + 1);
60     }
61 
62     size_t len() const @nogc nothrow pure @safe
63     {
64         return length;
65     }
66 
67     const(char)* toDchars() const @nogc nothrow pure return
68     {
69         return cast(const(char)*)(&this + 1);
70     }
71 
72     /// Returns: The content of this entry as a D slice
73     inout(char)[] toString() inout @nogc nothrow pure
74     {
75         return (cast(inout(char)*)(&this + 1))[0 .. length];
76     }
77 }
78 
79 struct StringTable(T)
80 {
81 private:
82     StringEntry[] table;
83     ubyte*[] pools;
84     size_t nfill;
85     size_t count;
86     size_t countTrigger;   // amount which will trigger growing the table
87 
88 public:
89     void _init(size_t size = 0) nothrow pure
90     {
91         size = nextpow2((size * loadFactorDenominator) / loadFactorNumerator);
92         if (size < 32)
93             size = 32;
94         table = (cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof))[0 .. size];
95         countTrigger = (table.length * loadFactorNumerator) / loadFactorDenominator;
96         pools = null;
97         nfill = 0;
98         count = 0;
99     }
100 
101     void reset(size_t size = 0) nothrow pure
102     {
103         freeMem();
104         _init(size);
105     }
106 
107     ~this() nothrow pure
108     {
109         freeMem();
110     }
111 
112     /**
113     Looks up the given string in the string table and returns its associated
114     value.
115 
116     Params:
117      s = the string to look up
118      length = the length of $(D_PARAM s)
119      str = the string to look up
120 
121     Returns: the string's associated value, or `null` if the string doesn't
122      exist in the string table
123     */
124     inout(StringValue!T)* lookup(const(char)[] str) inout @nogc nothrow pure
125     {
126         const(size_t) hash = calcHash(str);
127         const(size_t) i = findSlot(hash, str);
128         // printf("lookup %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: null);
129         return getValue(table[i].vptr);
130     }
131 
132     /// ditto
133     inout(StringValue!T)* lookup(const(char)* s, size_t length) inout @nogc nothrow pure
134     {
135         return lookup(s[0 .. length]);
136     }
137 
138     /**
139     Inserts the given string and the given associated value into the string
140     table.
141 
142     Params:
143      s = the string to insert
144      length = the length of $(D_PARAM s)
145      ptrvalue = the value to associate with the inserted string
146      str = the string to insert
147      value = the value to associate with the inserted string
148 
149     Returns: the newly inserted value, or `null` if the string table already
150      contains the string
151     */
152     StringValue!(T)* insert(const(char)[] str, T value) nothrow pure
153     {
154         const(size_t) hash = calcHash(str);
155         size_t i = findSlot(hash, str);
156         if (table[i].vptr)
157             return null; // already in table
158         if (++count > countTrigger)
159         {
160             grow();
161             i = findSlot(hash, str);
162         }
163         table[i].hash = hash;
164         table[i].vptr = allocValue(str, value);
165         // printf("insert %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL);
166         return getValue(table[i].vptr);
167     }
168 
169     /// ditto
170     StringValue!(T)* insert(const(char)* s, size_t length, T value) nothrow pure
171     {
172         return insert(s[0 .. length], value);
173     }
174 
175     StringValue!(T)* update(const(char)[] str) nothrow pure
176     {
177         const(size_t) hash = calcHash(str);
178         size_t i = findSlot(hash, str);
179         if (!table[i].vptr)
180         {
181             if (++count > countTrigger)
182             {
183                 grow();
184                 i = findSlot(hash, str);
185             }
186             table[i].hash = hash;
187             table[i].vptr = allocValue(str, T.init);
188         }
189         // printf("update %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL);
190         return getValue(table[i].vptr);
191     }
192 
193     StringValue!(T)* update(const(char)* s, size_t length) nothrow pure
194     {
195         return update(s[0 .. length]);
196     }
197 
198     /********************************
199      * Walk the contents of the string table,
200      * calling fp for each entry.
201      * Params:
202      *      fp = function to call. Returns !=0 to stop
203      * Returns:
204      *      last return value of fp call
205      */
206     int apply(int function(const(StringValue!T)*) nothrow fp) nothrow
207     {
208         foreach (const se; table)
209         {
210             if (!se.vptr)
211                 continue;
212             const sv = getValue(se.vptr);
213             int result = (*fp)(sv);
214             if (result)
215                 return result;
216         }
217         return 0;
218     }
219 
220     /// ditto
221     extern(D) int opApply(scope int delegate(const(StringValue!T)*) nothrow dg) nothrow
222     {
223         foreach (const se; table)
224         {
225             if (!se.vptr)
226                 continue;
227             const sv = getValue(se.vptr);
228             int result = dg(sv);
229             if (result)
230                 return result;
231         }
232         return 0;
233     }
234 
235 private:
236     /// Free all memory in use by this StringTable
237     void freeMem() nothrow pure
238     {
239         foreach (pool; pools)
240             mem.xfree(pool);
241         mem.xfree(table.ptr);
242         mem.xfree(pools.ptr);
243         table = null;
244         pools = null;
245     }
246 
247     uint allocValue(const(char)[] str, T value) nothrow pure
248     {
249         const(size_t) nbytes = (StringValue!T).sizeof + str.length + 1;
250         if (!pools.length || nfill + nbytes > POOL_SIZE)
251         {
252             pools = (cast(ubyte**) mem.xrealloc(pools.ptr, (pools.length + 1) * (pools[0]).sizeof))[0 .. pools.length + 1];
253             pools[$-1] = cast(ubyte*) mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE);
254             if (mem.isGCEnabled)
255                 memset(pools[$ - 1], 0xff, POOL_SIZE); // 0xff less likely to produce GC pointer
256             nfill = 0;
257         }
258         StringValue!(T)* sv = cast(StringValue!(T)*)&pools[$ - 1][nfill];
259         sv.value = value;
260         sv.length = str.length;
261         .memcpy(sv.lstring(), str.ptr, str.length);
262         sv.lstring()[str.length] = 0;
263         const(uint) vptr = cast(uint)(pools.length << POOL_BITS | nfill);
264         nfill += nbytes + (-nbytes & 7); // align to 8 bytes
265         return vptr;
266     }
267 
268     inout(StringValue!T)* getValue(uint vptr) inout @nogc nothrow pure
269     {
270         if (!vptr)
271             return null;
272         const(size_t) idx = (vptr >> POOL_BITS) - 1;
273         const(size_t) off = vptr & POOL_SIZE - 1;
274         return cast(inout(StringValue!T)*)&pools[idx][off];
275     }
276 
277     size_t findSlot(hash_t hash, const(char)[] str) const @nogc nothrow pure
278     {
279         // quadratic probing using triangular numbers
280         // http://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774
281         for (size_t i = hash & (table.length - 1), j = 1;; ++j)
282         {
283             const(StringValue!T)* sv;
284             auto vptr = table[i].vptr;
285             if (!vptr || table[i].hash == hash && (sv = getValue(vptr)).length == str.length && .memcmp(str.ptr, sv.toDchars(), str.length) == 0)
286                 return i;
287             i = (i + j) & (table.length - 1);
288         }
289     }
290 
291     void grow() nothrow pure
292     {
293         const odim = table.length;
294         auto otab = table;
295         const ndim = table.length * 2;
296         countTrigger = (ndim * loadFactorNumerator) / loadFactorDenominator;
297         table = (cast(StringEntry*)mem.xcalloc_noscan(ndim, (table[0]).sizeof))[0 .. ndim];
298         foreach (const se; otab[0 .. odim])
299         {
300             if (!se.vptr)
301                 continue;
302             const sv = getValue(se.vptr);
303             table[findSlot(se.hash, sv.toString())] = se;
304         }
305         mem.xfree(otab.ptr);
306     }
307 }
308 
309 nothrow unittest
310 {
311     StringTable!(const(char)*) tab;
312     tab._init(10);
313 
314     // construct two strings with the same text, but a different pointer
315     const(char)[6] fooBuffer = "foofoo";
316     const(char)[] foo = fooBuffer[0 .. 3];
317     const(char)[] fooAltPtr = fooBuffer[3 .. 6];
318 
319     assert(foo.ptr != fooAltPtr.ptr);
320 
321     // first insertion returns value
322     assert(tab.insert(foo, foo.ptr).value == foo.ptr);
323 
324     // subsequent insertion of same string return null
325     assert(tab.insert(foo.ptr, foo.length, foo.ptr) == null);
326     assert(tab.insert(fooAltPtr, foo.ptr) == null);
327 
328     const lookup = tab.lookup("foo");
329     assert(lookup.value == foo.ptr);
330     assert(lookup.len == 3);
331     assert(lookup.toString() == "foo");
332 
333     assert(tab.lookup("bar") == null);
334     tab.update("bar".ptr, "bar".length);
335     assert(tab.lookup("bar").value == null);
336 
337     tab.reset(0);
338     assert(tab.lookup("foo".ptr, "foo".length) == null);
339     //tab.insert("bar");
340 }
341 
342 nothrow unittest
343 {
344     StringTable!(void*) tab;
345     tab._init(100);
346 
347     enum testCount = 2000;
348 
349     char[2 * testCount] buf;
350 
351     foreach(i; 0 .. testCount)
352     {
353         buf[i * 2 + 0] = cast(char) (i % 256);
354         buf[i * 2 + 1] = cast(char) (i / 256);
355         auto toInsert = cast(const(char)[]) buf[i * 2 .. i * 2 + 2];
356         tab.insert(toInsert, cast(void*) i);
357     }
358 
359     foreach(i; 0 .. testCount)
360     {
361         auto toLookup = cast(const(char)[]) buf[i * 2 .. i * 2 + 2];
362         assert(tab.lookup(toLookup).value == cast(void*) i);
363     }
364 }
365 
366 nothrow unittest
367 {
368     StringTable!(int) tab;
369     tab._init(10);
370     tab.insert("foo",  4);
371     tab.insert("bar",  6);
372 
373     static int resultFp = 0;
374     int resultDg = 0;
375     static bool returnImmediately = false;
376 
377     int function(const(StringValue!int)*) nothrow applyFunc = (const(StringValue!int)* s)
378     {
379         resultFp += s.value;
380         return returnImmediately;
381     };
382 
383     scope int delegate(const(StringValue!int)*) nothrow applyDeleg = (const(StringValue!int)* s)
384     {
385         resultDg += s.value;
386         return returnImmediately;
387     };
388 
389     tab.apply(applyFunc);
390     tab.opApply(applyDeleg);
391 
392     assert(resultDg == 10);
393     assert(resultFp == 10);
394 
395     returnImmediately = true;
396 
397     tab.apply(applyFunc);
398     tab.opApply(applyDeleg);
399 
400     // Order of string table iteration is not specified, either foo or bar could
401     // have been visited first.
402     assert(resultDg == 14 || resultDg == 16);
403     assert(resultFp == 14 || resultFp == 16);
404 }