1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 2015-2021 by The D Language Foundation, All Rights Reserved
6  * Authors:     Rainer Schuetze
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/dvarstats.d, backend/dvarstats.d)
9  */
10 
11 module dmd.backend.dvarstats;
12 
13 /******************************************
14  * support for lexical scope of local variables
15  */
16 
17 import core.stdc.string;
18 import core.stdc.stdlib;
19 
20 import dmd.backend.cc;
21 import dmd.backend.cdef;
22 import dmd.backend.global;
23 import dmd.backend.code;
24 import dmd.backend.symtab;
25 import dmd.backend.barray;
26 
27 extern (C++):
28 
29 nothrow:
30 
31 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
32 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
33 
34 version (all) // free function version
35 {
36     import dmd.backend.dvarstats;
37 
38     void varStats_writeSymbolTable(ref symtab_t symtab,
39             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
40             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
41     {
42         varStats.writeSymbolTable(symtab, fnWriteVar, fnEndArgs, fnBeginBlock, fnEndBlock);
43     }
44 
45     void varStats_startFunction()
46     {
47         varStats.startFunction();
48     }
49 
50     void varStats_recordLineOffset(Srcpos src, targ_size_t off)
51     {
52         varStats.recordLineOffset(src, off);
53     }
54 
55     __gshared VarStatistics varStats;
56 }
57 
58 
59 // estimate of variable life time
60 struct LifeTime
61 {
62     Symbol* sym;
63     int offCreate;  // variable created before this code offset
64     int offDestroy; // variable destroyed after this code offset
65 }
66 
67 struct LineOffset
68 {
69     targ_size_t offset;
70     uint linnum;
71     uint diffNextOffset;
72 }
73 
74 struct VarStatistics
75 {
76 private:
77 nothrow:
78     Barray!LifeTime lifeTimes;
79 
80     // symbol table sorted by offset of variable creation
81     symtab_t sortedSymtab;
82     Barray!SYMIDX nextSym;      // next symbol with identifier with same hash, same size as sortedSymtab
83     int uniquecnt;        // number of variables that have unique name and don't need lexical scope
84 
85     // line number records for the current function
86     Barray!LineOffset lineOffsets;
87     const(char)* srcfile;  // only one file supported, no inline
88 
89 public void startFunction()
90 {
91     lineOffsets.setLength(0);
92     srcfile = null;
93 }
94 
95 // figure if can we can add a lexical scope for the variable
96 // (this should exclude variables from inlined functions as there is
97 //  no support for gathering stats from different files)
98 private bool isLexicalScopeVar(Symbol* sa)
99 {
100     if (sa.lnoscopestart <= 0 || sa.lnoscopestart > sa.lnoscopeend)
101         return false;
102 
103     // is it inside the function? Unfortunately we cannot verify the source file in case of inlining
104     if (sa.lnoscopestart < funcsym_p.Sfunc.Fstartline.Slinnum)
105         return false;
106     if (sa.lnoscopeend > funcsym_p.Sfunc.Fendline.Slinnum)
107         return false;
108 
109     return true;
110 }
111 
112 // compare function to sort symbols by line offsets of their creation
113 private extern (C) static int cmpLifeTime(scope const void* p1, scope const void* p2)
114 {
115     const LifeTime* lt1 = cast(const(LifeTime)*)p1;
116     const LifeTime* lt2 = cast(const(LifeTime)*)p2;
117 
118     return lt1.offCreate - lt2.offCreate;
119 }
120 
121 // a parent scope contains the creation offset of the child scope
122 private static extern(D) SYMIDX isParentScope(ref Barray!LifeTime lifetimes, SYMIDX parent, SYMIDX si)
123 {
124     if (parent == SYMIDX.max) // full function
125         return true;
126     return lifetimes[parent].offCreate <= lifetimes[si].offCreate &&
127            lifetimes[parent].offDestroy > lifetimes[si].offCreate;
128 }
129 
130 // find a symbol that includes the creation of the given symbol as part of its life time
131 private static extern(D) SYMIDX findParentScope(ref Barray!LifeTime lifetimes, SYMIDX si)
132 {
133     if (si != SYMIDX.max)
134     {
135         for(SYMIDX sj = si; sj; --sj)
136             if(isParentScope(lifetimes, sj - 1, si))
137                return sj;
138     }
139     return SYMIDX.max;
140 }
141 
142 private static int getHash(const(char)* s)
143 {
144     int hash = 0;
145     for (; *s; s++)
146         hash = hash * 11 + *s;
147     return hash;
148 }
149 
150 private bool hashSymbolIdentifiers(ref symtab_t symtab)
151 {
152     // build circular-linked lists of symbols with same identifier hash
153     bool hashCollisions = false;
154     SYMIDX[256] firstSym = SYMIDX.max;
155     for (SYMIDX si = 0; si < symtab.length; si++)
156     {
157         Symbol* sa = symtab[si];
158         int hash = getHash(sa.Sident.ptr) & 255;
159         SYMIDX first = firstSym[hash];
160         if (first == SYMIDX.max)
161         {
162             // connect full circle, so we don't have to recalculate the hash
163             nextSym[si] = si;
164             firstSym[hash] = si;
165         }
166         else
167         {
168             // insert after first entry
169             nextSym[si] = nextSym[first];
170             nextSym[first] = si;
171             hashCollisions = true;
172         }
173     }
174     return hashCollisions;
175 }
176 
177 private bool hasUniqueIdentifier(ref symtab_t symtab, SYMIDX si)
178 {
179     Symbol* sa = symtab[si];
180     for (SYMIDX sj = nextSym[si]; sj != si; sj = nextSym[sj])
181         if (strcmp(sa.Sident.ptr, symtab[sj].Sident.ptr) == 0)
182             return false;
183     return true;
184 }
185 
186 // gather statistics about creation and destructions of variables that are
187 //  used by the current function
188 private symtab_t* calcLexicalScope(return ref symtab_t symtab) return
189 {
190     // make a copy of the symbol table
191     // - arguments should be kept at the very beginning
192     // - variables with unique name come first (will be emitted with full function scope)
193     // - variables with duplicate names are added with ascending code offset
194     nextSym.setLength(symtab.length);
195     if (sortedSymtab.symmax < symtab.length)
196     {
197         sortedSymtab.tab = cast(Symbol**) util_realloc(sortedSymtab.tab, symtab.length, (Symbol*).sizeof);
198         sortedSymtab.symmax = symtab.length;
199     }
200     sortedSymtab.length = symtab.length;
201 
202     if (!hashSymbolIdentifiers(symtab))
203     {
204         // without any collisions, there are no duplicate symbol names, so bail out early
205         uniquecnt = cast(int)symtab.length;
206         return &symtab;
207     }
208 
209     SYMIDX argcnt;
210     for (argcnt = 0; argcnt < symtab.length; argcnt++)
211     {
212         Symbol* sa = symtab[argcnt];
213         if (sa.Sclass != SCparameter && sa.Sclass != SCregpar && sa.Sclass != SCfastpar && sa.Sclass != SCshadowreg)
214             break;
215         sortedSymtab[argcnt] = sa;
216     }
217 
218     // find symbols with identical names, only these need lexical scope
219     uniquecnt = cast(int)argcnt;
220     SYMIDX dupcnt = 0;
221     for (SYMIDX sj, si = argcnt; si < symtab.length; si++)
222     {
223         Symbol* sa = symtab[si];
224         if (!isLexicalScopeVar(sa) || hasUniqueIdentifier(symtab, si))
225             sortedSymtab[uniquecnt++] = sa;
226         else
227             sortedSymtab[symtab.length - 1 - dupcnt++] = sa; // fill from the top
228     }
229     sortedSymtab.length = symtab.length;
230     if(dupcnt == 0)
231         return &symtab;
232 
233     sortLineOffsets();
234 
235     // precalc the lexical blocks to emit so that identically named symbols don't overlap
236     lifeTimes.setLength(dupcnt);
237 
238     for (SYMIDX si = 0; si < dupcnt; si++)
239     {
240         lifeTimes[si].sym = sortedSymtab[uniquecnt + si];
241         lifeTimes[si].offCreate = cast(int)getLineOffset(lifeTimes[si].sym.lnoscopestart);
242         lifeTimes[si].offDestroy = cast(int)getLineOffset(lifeTimes[si].sym.lnoscopeend);
243     }
244     qsort(lifeTimes[].ptr, dupcnt, (lifeTimes[0]).sizeof, &cmpLifeTime);
245 
246     // ensure that an inner block does not extend beyond the end of a parent block
247     for (SYMIDX si = 0; si < dupcnt; si++)
248     {
249         SYMIDX sj = findParentScope(lifeTimes, si);
250         if(sj != SYMIDX.max && lifeTimes[si].offDestroy > lifeTimes[sj].offDestroy)
251             lifeTimes[si].offDestroy = lifeTimes[sj].offDestroy;
252     }
253 
254     // extend life time to the creation of the next symbol that is not contained in the parent scope
255     // or that has the same name
256     for (SYMIDX sj, si = 0; si < dupcnt; si++)
257     {
258         SYMIDX parent = findParentScope(lifeTimes, si);
259 
260         for (sj = si + 1; sj < dupcnt; sj++)
261             if(!isParentScope(lifeTimes, parent, sj))
262                 break;
263             else if (strcmp(lifeTimes[si].sym.Sident.ptr, lifeTimes[sj].sym.Sident.ptr) == 0)
264                 break;
265 
266         lifeTimes[si].offDestroy = cast(int)(sj < dupcnt ? lifeTimes[sj].offCreate : retoffset + retsize); // function length
267     }
268 
269     // store duplicate symbols back with new ordering
270     for (SYMIDX si = 0; si < dupcnt; si++)
271         sortedSymtab[uniquecnt + si] = lifeTimes[si].sym;
272 
273     return &sortedSymtab;
274 }
275 
276 public void writeSymbolTable(ref symtab_t symtab,
277             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
278             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
279 {
280     auto symtab2 = calcLexicalScope(symtab);
281 
282     int openBlocks = 0;
283     int lastOffset = 0;
284 
285     // Write local symbol table
286     bool endarg = false;
287     for (SYMIDX si = 0; si < symtab2.length; si++)
288     {
289         Symbol *sa = (*symtab2)[si];
290         if (endarg == false &&
291             sa.Sclass != SCparameter &&
292             sa.Sclass != SCfastpar &&
293             sa.Sclass != SCregpar &&
294             sa.Sclass != SCshadowreg)
295         {
296             if(fnEndArgs)
297                 (*fnEndArgs)();
298             endarg = true;
299         }
300         if (si >= uniquecnt)
301         {
302             int off = lifeTimes[si - uniquecnt].offCreate;
303             // close scopes that end before the creation of this symbol
304             for (SYMIDX sj = si; sj > uniquecnt; --sj)
305             {
306                 if (lastOffset < lifeTimes[sj - 1 - uniquecnt].offDestroy && lifeTimes[sj - 1 - uniquecnt].offDestroy <= off)
307                 {
308                     assert(openBlocks > 0);
309                     if(fnEndBlock)
310                         (*fnEndBlock)();
311                     openBlocks--;
312                 }
313             }
314             int len = lifeTimes[si - uniquecnt].offDestroy - off;
315             // don't emit a block for length 0, it isn't captured by the close condition above
316             if (len > 0)
317             {
318                 if(fnBeginBlock)
319                     (*fnBeginBlock)(off, len);
320                 openBlocks++;
321             }
322             lastOffset = off;
323         }
324         (*fnWriteVar)(sa);
325     }
326 
327     while (openBlocks > 0)
328     {
329         if(fnEndBlock)
330             (*fnEndBlock)();
331         openBlocks--;
332     }
333 }
334 
335 // compare function to sort line offsets ascending by line (and offset on identical line)
336 private extern (C) static int cmpLineOffsets(scope const void* off1, scope const void* off2)
337 {
338     const LineOffset* loff1 = cast(const(LineOffset)*)off1;
339     const LineOffset* loff2 = cast(const(LineOffset)*)off2;
340 
341     if (loff1.linnum == loff2.linnum)
342         return cast(int)(loff1.offset - loff2.offset);
343     return loff1.linnum - loff2.linnum;
344 }
345 
346 private void sortLineOffsets()
347 {
348     if (lineOffsets.length == 0)
349         return;
350 
351     // remember the offset to the next recorded offset on another line
352     for (int i = 1; i < lineOffsets.length; i++)
353         lineOffsets[i-1].diffNextOffset = cast(uint)(lineOffsets[i].offset - lineOffsets[i-1].offset);
354     lineOffsets[lineOffsets.length - 1].diffNextOffset = cast(uint)(retoffset + retsize - lineOffsets[lineOffsets.length - 1].offset);
355 
356     // sort line records and remove duplicate lines preferring smaller offsets
357     qsort(lineOffsets[].ptr, lineOffsets.length, (lineOffsets[0]).sizeof, &cmpLineOffsets);
358     int j = 0;
359     for (int i = 1; i < lineOffsets.length; i++)
360         if (lineOffsets[i].linnum > lineOffsets[j].linnum)
361             lineOffsets[++j] = lineOffsets[i];
362     lineOffsets.setLength(j + 1);
363 }
364 
365 private targ_size_t getLineOffset(int linnum)
366 {
367     int idx = findLineIndex(linnum);
368     if (idx >= lineOffsets.length || lineOffsets[idx].linnum < linnum)
369         return retoffset + retsize; // function length
370     if (idx > 0 && lineOffsets[idx].linnum != linnum)
371         // for inexact line numbers, use the offset following the previous line
372         return lineOffsets[idx-1].offset + lineOffsets[idx-1].diffNextOffset;
373     return lineOffsets[idx].offset;
374 }
375 
376 // return the first record index in the lineOffsets array with linnum >= line
377 private int findLineIndex(uint line)
378 {
379     int low = 0;
380     int high = cast(int)lineOffsets.length;
381     while (low < high)
382     {
383         int mid = (low + high) >> 1;
384         int ln = lineOffsets[mid].linnum;
385         if (line < ln)
386             high = mid;
387         else if (line > ln)
388             low = mid + 1;
389         else
390             return mid;
391     }
392     return low;
393 }
394 
395 public void recordLineOffset(Srcpos src, targ_size_t off)
396 {
397     version (MARS)
398         const sfilename = src.Sfilename;
399     else
400         const sfilename = srcpos_name(src);
401 
402     // only record line numbers from one file, symbol info does not include source file
403     if (!sfilename || !src.Slinnum)
404         return;
405     if (!srcfile)
406         srcfile = sfilename;
407     if (srcfile != sfilename && strcmp(srcfile, sfilename) != 0)
408         return;
409 
410     // assume ascending code offsets generated during codegen, ignore any other
411     //  (e.g. there is an additional line number emitted at the end of the function
412     //   or multiple line numbers at the same offset)
413     if (lineOffsets.length > 0 && lineOffsets[lineOffsets.length-1].offset >= off)
414         return;
415 
416     if (lineOffsets.length > 0 && lineOffsets[lineOffsets.length-1].linnum == src.Slinnum)
417     {
418         // optimize common case: new offset on same line
419         return;
420     }
421     // don't care for lineOffsets being ordered now, that is taken care of later (calcLexicalScope)
422     LineOffset* linoff = lineOffsets.push();
423     linoff.linnum = src.Slinnum;
424     linoff.offset = off;
425     linoff.diffNextOffset = 0;
426 }
427 
428 }