1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 2015-2020 by The D Language Foundation, All Rights Reserved
6  * Authors:     Rainer Schuetze
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/dvarstats.d, backend/dvarstats.d)
9  */
10 
11 module dmd.backend.dvarstats;
12 
13 /******************************************
14  * support for lexical scope of local variables
15  */
16 
17 import core.stdc.string;
18 import core.stdc.stdlib;
19 
20 import dmd.backend.cc;
21 import dmd.backend.cdef;
22 import dmd.backend.global;
23 import dmd.backend.code;
24 
25 extern (C++):
26 
27 nothrow:
28 
29 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
30 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
31 
32 version (all) // free function version
33 {
34     import dmd.backend.dvarstats;
35 
36     void varStats_writeSymbolTable(symtab_t* symtab,
37             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
38             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
39     {
40         varStats.writeSymbolTable(symtab, fnWriteVar, fnEndArgs, fnBeginBlock, fnEndBlock);
41     }
42 
43     void varStats_startFunction()
44     {
45         varStats.startFunction();
46     }
47 
48     void varStats_recordLineOffset(Srcpos src, targ_size_t off)
49     {
50         varStats.recordLineOffset(src, off);
51     }
52 
53     __gshared VarStatistics varStats;
54 }
55 
56 
57 // estimate of variable life time
58 struct LifeTime
59 {
60     Symbol* sym;
61     int offCreate;  // variable created before this code offset
62     int offDestroy; // variable destroyed after this code offset
63 }
64 
65 struct LineOffset
66 {
67     targ_size_t offset;
68     uint linnum;
69     uint diffNextOffset;
70 }
71 
72 struct VarStatistics
73 {
74 private:
75 nothrow:
76     LifeTime[] lifeTimes;
77     int cntUsedLifeTimes;
78 
79     // symbol table sorted by offset of variable creation
80     symtab_t sortedSymtab;
81     SYMIDX* nextSym;      // next symbol with identifier with same hash, same size as sortedSymtab
82     int uniquecnt;        // number of variables that have unique name and don't need lexical scope
83 
84     // line number records for the current function
85     LineOffset[] lineOffsets;
86     int cntUsedLineOffsets;
87     const(char)* srcfile;  // only one file supported, no inline
88 
89 public void startFunction()
90 {
91     cntUsedLineOffsets = 0;
92     srcfile = null;
93 }
94 
95 // figure if can we can add a lexical scope for the variable
96 // (this should exclude variables from inlined functions as there is
97 //  no support for gathering stats from different files)
98 private bool isLexicalScopeVar(Symbol* sa)
99 {
100     if (sa.lnoscopestart <= 0 || sa.lnoscopestart > sa.lnoscopeend)
101         return false;
102 
103     // is it inside the function? Unfortunately we cannot verify the source file in case of inlining
104     if (sa.lnoscopestart < funcsym_p.Sfunc.Fstartline.Slinnum)
105         return false;
106     if (sa.lnoscopeend > funcsym_p.Sfunc.Fendline.Slinnum)
107         return false;
108 
109     return true;
110 }
111 
112 // compare function to sort symbols by line offsets of their creation
113 private extern (C) static int cmpLifeTime(scope const void* p1, scope const void* p2)
114 {
115     const LifeTime* lt1 = cast(const(LifeTime)*)p1;
116     const LifeTime* lt2 = cast(const(LifeTime)*)p2;
117 
118     return lt1.offCreate - lt2.offCreate;
119 }
120 
121 // a parent scope contains the creation offset of the child scope
122 private static extern(D) SYMIDX isParentScope(LifeTime[] lifetimes, SYMIDX parent, SYMIDX si)
123 {
124     if(parent < 0) // full function
125         return true;
126     return lifetimes[parent].offCreate <= lifetimes[si].offCreate &&
127            lifetimes[parent].offDestroy > lifetimes[si].offCreate;
128 }
129 
130 // find a symbol that includes the creation of the given symbol as part of its life time
131 private static extern(D) SYMIDX findParentScope(LifeTime[] lifetimes, SYMIDX si)
132 {
133     for(SYMIDX sj = si - 1; sj >= 0; --sj)
134         if(isParentScope(lifetimes, sj, si))
135            return sj;
136     return -1;
137 }
138 
139 private static int getHash(const(char)* s)
140 {
141     int hash = 0;
142     for (; *s; s++)
143         hash = hash * 11 + *s;
144     return hash;
145 }
146 
147 private bool hashSymbolIdentifiers(symtab_t* symtab)
148 {
149     // build circular-linked lists of symbols with same identifier hash
150     bool hashCollisions = false;
151     SYMIDX[256] firstSym = void;
152     memset(firstSym.ptr, -1, (firstSym).sizeof);
153     for (SYMIDX si = 0; si < symtab.top; si++)
154     {
155         Symbol* sa = symtab.tab[si];
156         int hash = getHash(sa.Sident.ptr) & 255;
157         SYMIDX first = firstSym[hash];
158         if (first == -1)
159         {
160             // connect full circle, so we don't have to recalculate the hash
161             nextSym[si] = si;
162             firstSym[hash] = si;
163         }
164         else
165         {
166             // insert after first entry
167             nextSym[si] = nextSym[first];
168             nextSym[first] = si;
169             hashCollisions = true;
170         }
171     }
172     return hashCollisions;
173 }
174 
175 private bool hasUniqueIdentifier(symtab_t* symtab, SYMIDX si)
176 {
177     Symbol* sa = symtab.tab[si];
178     for (SYMIDX sj = nextSym[si]; sj != si; sj = nextSym[sj])
179         if (strcmp(sa.Sident.ptr, symtab.tab[sj].Sident.ptr) == 0)
180             return false;
181     return true;
182 }
183 
184 // gather statistics about creation and destructions of variables that are
185 //  used by the current function
186 private symtab_t* calcLexicalScope(symtab_t* symtab) return
187 {
188     // make a copy of the symbol table
189     // - arguments should be kept at the very beginning
190     // - variables with unique name come first (will be emitted with full function scope)
191     // - variables with duplicate names are added with ascending code offset
192     if (sortedSymtab.symmax < symtab.top)
193     {
194         nextSym = cast(int*)util_realloc(nextSym, symtab.top, (*nextSym).sizeof);
195         sortedSymtab.tab = cast(Symbol**) util_realloc(sortedSymtab.tab, symtab.top, (Symbol*).sizeof);
196         sortedSymtab.symmax = symtab.top;
197     }
198 
199     if (!hashSymbolIdentifiers(symtab))
200     {
201         // without any collisions, there are no duplicate symbol names, so bail out early
202         uniquecnt = symtab.top;
203         return symtab;
204     }
205 
206     SYMIDX argcnt;
207     for (argcnt = 0; argcnt < symtab.top; argcnt++)
208     {
209         Symbol* sa = symtab.tab[argcnt];
210         if (sa.Sclass != SCparameter && sa.Sclass != SCregpar && sa.Sclass != SCfastpar && sa.Sclass != SCshadowreg)
211             break;
212         sortedSymtab.tab[argcnt] = sa;
213     }
214 
215     // find symbols with identical names, only these need lexical scope
216     uniquecnt = argcnt;
217     SYMIDX dupcnt = 0;
218     for (SYMIDX sj, si = argcnt; si < symtab.top; si++)
219     {
220         Symbol* sa = symtab.tab[si];
221         if (!isLexicalScopeVar(sa) || hasUniqueIdentifier(symtab, si))
222             sortedSymtab.tab[uniquecnt++] = sa;
223         else
224             sortedSymtab.tab[symtab.top - 1 - dupcnt++] = sa; // fill from the top
225     }
226     sortedSymtab.top = symtab.top;
227     if(dupcnt == 0)
228         return symtab;
229 
230     sortLineOffsets();
231 
232     // precalc the lexical blocks to emit so that identically named symbols don't overlap
233     if (lifeTimes.length < dupcnt)
234         lifeTimes = (cast(LifeTime*) util_realloc(lifeTimes.ptr, dupcnt, (LifeTime).sizeof))[0 .. dupcnt];
235 
236     for (SYMIDX si = 0; si < dupcnt; si++)
237     {
238         lifeTimes[si].sym = sortedSymtab.tab[uniquecnt + si];
239         lifeTimes[si].offCreate = cast(int)getLineOffset(lifeTimes[si].sym.lnoscopestart);
240         lifeTimes[si].offDestroy = cast(int)getLineOffset(lifeTimes[si].sym.lnoscopeend);
241     }
242     cntUsedLifeTimes = dupcnt;
243     qsort(lifeTimes.ptr, dupcnt, (lifeTimes[0]).sizeof, &cmpLifeTime);
244 
245     // ensure that an inner block does not extend beyond the end of a parent block
246     for (SYMIDX si = 0; si < dupcnt; si++)
247     {
248         SYMIDX sj = findParentScope(lifeTimes, si);
249         if(sj >= 0 && lifeTimes[si].offDestroy > lifeTimes[sj].offDestroy)
250             lifeTimes[si].offDestroy = lifeTimes[sj].offDestroy;
251     }
252 
253     // extend life time to the creation of the next symbol that is not contained in the parent scope
254     // or that has the same name
255     for (SYMIDX sj, si = 0; si < dupcnt; si++)
256     {
257         SYMIDX parent = findParentScope(lifeTimes, si);
258 
259         for (sj = si + 1; sj < dupcnt; sj++)
260             if(!isParentScope(lifeTimes, parent, sj))
261                 break;
262             else if (strcmp(lifeTimes[si].sym.Sident.ptr, lifeTimes[sj].sym.Sident.ptr) == 0)
263                 break;
264 
265         lifeTimes[si].offDestroy = cast(int)(sj < dupcnt ? lifeTimes[sj].offCreate : retoffset + retsize); // function length
266     }
267 
268     // store duplicate symbols back with new ordering
269     for (SYMIDX si = 0; si < dupcnt; si++)
270         sortedSymtab.tab[uniquecnt + si] = lifeTimes[si].sym;
271 
272     return &sortedSymtab;
273 }
274 
275 public void writeSymbolTable(symtab_t* symtab,
276             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
277             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
278 {
279     symtab = calcLexicalScope(symtab);
280 
281     int openBlocks = 0;
282     int lastOffset = 0;
283 
284     // Write local symbol table
285     bool endarg = false;
286     for (SYMIDX si = 0; si < symtab.top; si++)
287     {
288         Symbol *sa = symtab.tab[si];
289         if (endarg == false &&
290             sa.Sclass != SCparameter &&
291             sa.Sclass != SCfastpar &&
292             sa.Sclass != SCregpar &&
293             sa.Sclass != SCshadowreg)
294         {
295             if(fnEndArgs)
296                 (*fnEndArgs)();
297             endarg = true;
298         }
299         if (si >= uniquecnt)
300         {
301             int off = lifeTimes[si - uniquecnt].offCreate;
302             // close scopes that end before the creation of this symbol
303             for (SYMIDX sj = si - 1; sj >= uniquecnt; --sj)
304             {
305                 if (lastOffset < lifeTimes[sj - uniquecnt].offDestroy && lifeTimes[sj - uniquecnt].offDestroy <= off)
306                 {
307                     assert(openBlocks > 0);
308                     if(fnEndBlock)
309                         (*fnEndBlock)();
310                     openBlocks--;
311                 }
312             }
313             int len = lifeTimes[si - uniquecnt].offDestroy - off;
314             // don't emit a block for length 0, it isn't captured by the close condition above
315             if (len > 0)
316             {
317                 if(fnBeginBlock)
318                     (*fnBeginBlock)(off, len);
319                 openBlocks++;
320             }
321             lastOffset = off;
322         }
323         (*fnWriteVar)(sa);
324     }
325 
326     while (openBlocks > 0)
327     {
328         if(fnEndBlock)
329             (*fnEndBlock)();
330         openBlocks--;
331     }
332 }
333 
334 // compare function to sort line offsets ascending by line (and offset on identical line)
335 private extern (C) static int cmpLineOffsets(scope const void* off1, scope const void* off2)
336 {
337     const LineOffset* loff1 = cast(const(LineOffset)*)off1;
338     const LineOffset* loff2 = cast(const(LineOffset)*)off2;
339 
340     if (loff1.linnum == loff2.linnum)
341         return cast(int)(loff1.offset - loff2.offset);
342     return loff1.linnum - loff2.linnum;
343 }
344 
345 private void sortLineOffsets()
346 {
347     if (cntUsedLineOffsets == 0)
348         return;
349 
350     // remember the offset to the next recorded offset on another line
351     for (int i = 1; i < cntUsedLineOffsets; i++)
352         lineOffsets[i-1].diffNextOffset = cast(uint)(lineOffsets[i].offset - lineOffsets[i-1].offset);
353     lineOffsets[cntUsedLineOffsets - 1].diffNextOffset = cast(uint)(retoffset + retsize - lineOffsets[cntUsedLineOffsets - 1].offset);
354 
355     // sort line records and remove duplicate lines preferring smaller offsets
356     qsort(lineOffsets.ptr, cntUsedLineOffsets, (lineOffsets[0]).sizeof, &cmpLineOffsets);
357     int j = 0;
358     for (int i = 1; i < cntUsedLineOffsets; i++)
359         if (lineOffsets[i].linnum > lineOffsets[j].linnum)
360             lineOffsets[++j] = lineOffsets[i];
361     cntUsedLineOffsets = j + 1;
362 }
363 
364 private targ_size_t getLineOffset(int linnum)
365 {
366     int idx = findLineIndex(linnum);
367     if (idx >= cntUsedLineOffsets || lineOffsets[idx].linnum < linnum)
368         return retoffset + retsize; // function length
369     if (idx > 0 && lineOffsets[idx].linnum != linnum)
370         // for inexact line numbers, use the offset following the previous line
371         return lineOffsets[idx-1].offset + lineOffsets[idx-1].diffNextOffset;
372     return lineOffsets[idx].offset;
373 }
374 
375 // return the first record index in the lineOffsets array with linnum >= line
376 private int findLineIndex(uint line)
377 {
378     int low = 0;
379     int high = cntUsedLineOffsets;
380     while (low < high)
381     {
382         int mid = (low + high) >> 1;
383         int ln = lineOffsets[mid].linnum;
384         if (line < ln)
385             high = mid;
386         else if (line > ln)
387             low = mid + 1;
388         else
389             return mid;
390     }
391     return low;
392 }
393 
394 public void recordLineOffset(Srcpos src, targ_size_t off)
395 {
396     version (MARS)
397         const sfilename = src.Sfilename;
398     else
399         const sfilename = srcpos_name(src);
400 
401     // only record line numbers from one file, symbol info does not include source file
402     if (!sfilename || !src.Slinnum)
403         return;
404     if (!srcfile)
405         srcfile = sfilename;
406     if (srcfile != sfilename && strcmp(srcfile, sfilename) != 0)
407         return;
408 
409     // assume ascending code offsets generated during codegen, ignore any other
410     //  (e.g. there is an additional line number emitted at the end of the function
411     //   or multiple line numbers at the same offset)
412     if (cntUsedLineOffsets > 0 && lineOffsets[cntUsedLineOffsets-1].offset >= off)
413         return;
414 
415     if (cntUsedLineOffsets > 0 && lineOffsets[cntUsedLineOffsets-1].linnum == src.Slinnum)
416     {
417         // optimize common case: new offset on same line
418         return;
419     }
420     // don't care for lineOffsets being ordered now, that is taken care of later (calcLexicalScope)
421     if (lineOffsets.length <= cntUsedLineOffsets)
422     {
423         const newSize = 2 * cntUsedLineOffsets + 16;
424         lineOffsets = (cast(LineOffset*) util_realloc(lineOffsets.ptr, newSize, (lineOffsets[0]).sizeof))[0 .. newSize];
425     }
426     lineOffsets[cntUsedLineOffsets].linnum = src.Slinnum;
427     lineOffsets[cntUsedLineOffsets].offset = off;
428     cntUsedLineOffsets++;
429 }
430 
431 }