1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 2015-2020 by The D Language Foundation, All Rights Reserved
6  * Authors:     Rainer Schuetze
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/dvarstats.d, backend/dvarstats.d)
9  */
10 
11 module dmd.backend.dvarstats;
12 
13 /******************************************
14  * support for lexical scope of local variables
15  */
16 
17 import core.stdc.string;
18 import core.stdc.stdlib;
19 
20 import dmd.backend.cc;
21 import dmd.backend.cdef;
22 import dmd.backend.global;
23 import dmd.backend.code;
24 import dmd.backend.symtab;
25 
26 extern (C++):
27 
28 nothrow:
29 
30 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
31 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
32 
33 version (all) // free function version
34 {
35     import dmd.backend.dvarstats;
36 
37     void varStats_writeSymbolTable(ref symtab_t symtab,
38             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
39             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
40     {
41         varStats.writeSymbolTable(symtab, fnWriteVar, fnEndArgs, fnBeginBlock, fnEndBlock);
42     }
43 
44     void varStats_startFunction()
45     {
46         varStats.startFunction();
47     }
48 
49     void varStats_recordLineOffset(Srcpos src, targ_size_t off)
50     {
51         varStats.recordLineOffset(src, off);
52     }
53 
54     __gshared VarStatistics varStats;
55 }
56 
57 
58 // estimate of variable life time
59 struct LifeTime
60 {
61     Symbol* sym;
62     int offCreate;  // variable created before this code offset
63     int offDestroy; // variable destroyed after this code offset
64 }
65 
66 struct LineOffset
67 {
68     targ_size_t offset;
69     uint linnum;
70     uint diffNextOffset;
71 }
72 
73 struct VarStatistics
74 {
75 private:
76 nothrow:
77     LifeTime[] lifeTimes;
78     int cntUsedLifeTimes;
79 
80     // symbol table sorted by offset of variable creation
81     symtab_t sortedSymtab;
82     SYMIDX* nextSym;      // next symbol with identifier with same hash, same size as sortedSymtab
83     int uniquecnt;        // number of variables that have unique name and don't need lexical scope
84 
85     // line number records for the current function
86     LineOffset[] lineOffsets;
87     int cntUsedLineOffsets;
88     const(char)* srcfile;  // only one file supported, no inline
89 
90 public void startFunction()
91 {
92     cntUsedLineOffsets = 0;
93     srcfile = null;
94 }
95 
96 // figure if can we can add a lexical scope for the variable
97 // (this should exclude variables from inlined functions as there is
98 //  no support for gathering stats from different files)
99 private bool isLexicalScopeVar(Symbol* sa)
100 {
101     if (sa.lnoscopestart <= 0 || sa.lnoscopestart > sa.lnoscopeend)
102         return false;
103 
104     // is it inside the function? Unfortunately we cannot verify the source file in case of inlining
105     if (sa.lnoscopestart < funcsym_p.Sfunc.Fstartline.Slinnum)
106         return false;
107     if (sa.lnoscopeend > funcsym_p.Sfunc.Fendline.Slinnum)
108         return false;
109 
110     return true;
111 }
112 
113 // compare function to sort symbols by line offsets of their creation
114 private extern (C) static int cmpLifeTime(scope const void* p1, scope const void* p2)
115 {
116     const LifeTime* lt1 = cast(const(LifeTime)*)p1;
117     const LifeTime* lt2 = cast(const(LifeTime)*)p2;
118 
119     return lt1.offCreate - lt2.offCreate;
120 }
121 
122 // a parent scope contains the creation offset of the child scope
123 private static extern(D) SYMIDX isParentScope(LifeTime[] lifetimes, SYMIDX parent, SYMIDX si)
124 {
125     if (parent == SYMIDX.max) // full function
126         return true;
127     return lifetimes[parent].offCreate <= lifetimes[si].offCreate &&
128            lifetimes[parent].offDestroy > lifetimes[si].offCreate;
129 }
130 
131 // find a symbol that includes the creation of the given symbol as part of its life time
132 private static extern(D) SYMIDX findParentScope(LifeTime[] lifetimes, SYMIDX si)
133 {
134     if (si != SYMIDX.max)
135     {
136         for(SYMIDX sj = si; sj; --sj)
137             if(isParentScope(lifetimes, sj - 1, si))
138                return sj;
139     }
140     return SYMIDX.max;
141 }
142 
143 private static int getHash(const(char)* s)
144 {
145     int hash = 0;
146     for (; *s; s++)
147         hash = hash * 11 + *s;
148     return hash;
149 }
150 
151 private bool hashSymbolIdentifiers(ref symtab_t symtab)
152 {
153     // build circular-linked lists of symbols with same identifier hash
154     bool hashCollisions = false;
155     SYMIDX[256] firstSym = SYMIDX.max;
156     for (SYMIDX si = 0; si < symtab.length; si++)
157     {
158         Symbol* sa = symtab[si];
159         int hash = getHash(sa.Sident.ptr) & 255;
160         SYMIDX first = firstSym[hash];
161         if (first == SYMIDX.max)
162         {
163             // connect full circle, so we don't have to recalculate the hash
164             nextSym[si] = si;
165             firstSym[hash] = si;
166         }
167         else
168         {
169             // insert after first entry
170             nextSym[si] = nextSym[first];
171             nextSym[first] = si;
172             hashCollisions = true;
173         }
174     }
175     return hashCollisions;
176 }
177 
178 private bool hasUniqueIdentifier(ref symtab_t symtab, SYMIDX si)
179 {
180     Symbol* sa = symtab.tab[si];
181     for (SYMIDX sj = nextSym[si]; sj != si; sj = nextSym[sj])
182         if (strcmp(sa.Sident.ptr, symtab.tab[sj].Sident.ptr) == 0)
183             return false;
184     return true;
185 }
186 
187 // gather statistics about creation and destructions of variables that are
188 //  used by the current function
189 private symtab_t* calcLexicalScope(return ref symtab_t symtab) return
190 {
191     // make a copy of the symbol table
192     // - arguments should be kept at the very beginning
193     // - variables with unique name come first (will be emitted with full function scope)
194     // - variables with duplicate names are added with ascending code offset
195     if (sortedSymtab.symmax < symtab.length)
196     {
197         nextSym = cast(SYMIDX*)util_realloc(nextSym, symtab.length, (*nextSym).sizeof);
198         sortedSymtab.tab = cast(Symbol**) util_realloc(sortedSymtab.tab, symtab.length, (Symbol*).sizeof);
199         sortedSymtab.symmax = symtab.length;
200     }
201     sortedSymtab.length = symtab.length;
202 
203     if (!hashSymbolIdentifiers(symtab))
204     {
205         // without any collisions, there are no duplicate symbol names, so bail out early
206         uniquecnt = cast(int)symtab.length;
207         return &symtab;
208     }
209 
210     SYMIDX argcnt;
211     for (argcnt = 0; argcnt < symtab.length; argcnt++)
212     {
213         Symbol* sa = symtab.tab[argcnt];
214         if (sa.Sclass != SCparameter && sa.Sclass != SCregpar && sa.Sclass != SCfastpar && sa.Sclass != SCshadowreg)
215             break;
216         sortedSymtab.tab[argcnt] = sa;
217     }
218 
219     // find symbols with identical names, only these need lexical scope
220     uniquecnt = cast(int)argcnt;
221     SYMIDX dupcnt = 0;
222     for (SYMIDX sj, si = argcnt; si < symtab.length; si++)
223     {
224         Symbol* sa = symtab.tab[si];
225         if (!isLexicalScopeVar(sa) || hasUniqueIdentifier(symtab, si))
226             sortedSymtab.tab[uniquecnt++] = sa;
227         else
228             sortedSymtab.tab[symtab.length - 1 - dupcnt++] = sa; // fill from the top
229     }
230     sortedSymtab.length = symtab.length;
231     if(dupcnt == 0)
232         return &symtab;
233 
234     sortLineOffsets();
235 
236     // precalc the lexical blocks to emit so that identically named symbols don't overlap
237     if (lifeTimes.length < dupcnt)
238         lifeTimes = (cast(LifeTime*) util_realloc(lifeTimes.ptr, dupcnt, (LifeTime).sizeof))[0 .. dupcnt];
239 
240     for (SYMIDX si = 0; si < dupcnt; si++)
241     {
242         lifeTimes[si].sym = sortedSymtab[uniquecnt + si];
243         lifeTimes[si].offCreate = cast(int)getLineOffset(lifeTimes[si].sym.lnoscopestart);
244         lifeTimes[si].offDestroy = cast(int)getLineOffset(lifeTimes[si].sym.lnoscopeend);
245     }
246     cntUsedLifeTimes = cast(int)dupcnt;
247     qsort(lifeTimes.ptr, dupcnt, (lifeTimes[0]).sizeof, &cmpLifeTime);
248 
249     // ensure that an inner block does not extend beyond the end of a parent block
250     for (SYMIDX si = 0; si < dupcnt; si++)
251     {
252         SYMIDX sj = findParentScope(lifeTimes, si);
253         if(sj != SYMIDX.max && lifeTimes[si].offDestroy > lifeTimes[sj].offDestroy)
254             lifeTimes[si].offDestroy = lifeTimes[sj].offDestroy;
255     }
256 
257     // extend life time to the creation of the next symbol that is not contained in the parent scope
258     // or that has the same name
259     for (SYMIDX sj, si = 0; si < dupcnt; si++)
260     {
261         SYMIDX parent = findParentScope(lifeTimes, si);
262 
263         for (sj = si + 1; sj < dupcnt; sj++)
264             if(!isParentScope(lifeTimes, parent, sj))
265                 break;
266             else if (strcmp(lifeTimes[si].sym.Sident.ptr, lifeTimes[sj].sym.Sident.ptr) == 0)
267                 break;
268 
269         lifeTimes[si].offDestroy = cast(int)(sj < dupcnt ? lifeTimes[sj].offCreate : retoffset + retsize); // function length
270     }
271 
272     // store duplicate symbols back with new ordering
273     for (SYMIDX si = 0; si < dupcnt; si++)
274         sortedSymtab[uniquecnt + si] = lifeTimes[si].sym;
275 
276     return &sortedSymtab;
277 }
278 
279 public void writeSymbolTable(ref symtab_t symtab,
280             void function(Symbol*) nothrow fnWriteVar, void function() nothrow fnEndArgs,
281             void function(int off,int len) nothrow fnBeginBlock, void function() nothrow fnEndBlock)
282 {
283     auto symtab2 = calcLexicalScope(symtab);
284 
285     int openBlocks = 0;
286     int lastOffset = 0;
287 
288     // Write local symbol table
289     bool endarg = false;
290     for (SYMIDX si = 0; si < symtab2.length; si++)
291     {
292         Symbol *sa = (*symtab2)[si];
293         if (endarg == false &&
294             sa.Sclass != SCparameter &&
295             sa.Sclass != SCfastpar &&
296             sa.Sclass != SCregpar &&
297             sa.Sclass != SCshadowreg)
298         {
299             if(fnEndArgs)
300                 (*fnEndArgs)();
301             endarg = true;
302         }
303         if (si >= uniquecnt)
304         {
305             int off = lifeTimes[si - uniquecnt].offCreate;
306             // close scopes that end before the creation of this symbol
307             for (SYMIDX sj = si; sj > uniquecnt; --sj)
308             {
309                 if (lastOffset < lifeTimes[sj - 1 - uniquecnt].offDestroy && lifeTimes[sj - 1 - uniquecnt].offDestroy <= off)
310                 {
311                     assert(openBlocks > 0);
312                     if(fnEndBlock)
313                         (*fnEndBlock)();
314                     openBlocks--;
315                 }
316             }
317             int len = lifeTimes[si - uniquecnt].offDestroy - off;
318             // don't emit a block for length 0, it isn't captured by the close condition above
319             if (len > 0)
320             {
321                 if(fnBeginBlock)
322                     (*fnBeginBlock)(off, len);
323                 openBlocks++;
324             }
325             lastOffset = off;
326         }
327         (*fnWriteVar)(sa);
328     }
329 
330     while (openBlocks > 0)
331     {
332         if(fnEndBlock)
333             (*fnEndBlock)();
334         openBlocks--;
335     }
336 }
337 
338 // compare function to sort line offsets ascending by line (and offset on identical line)
339 private extern (C) static int cmpLineOffsets(scope const void* off1, scope const void* off2)
340 {
341     const LineOffset* loff1 = cast(const(LineOffset)*)off1;
342     const LineOffset* loff2 = cast(const(LineOffset)*)off2;
343 
344     if (loff1.linnum == loff2.linnum)
345         return cast(int)(loff1.offset - loff2.offset);
346     return loff1.linnum - loff2.linnum;
347 }
348 
349 private void sortLineOffsets()
350 {
351     if (cntUsedLineOffsets == 0)
352         return;
353 
354     // remember the offset to the next recorded offset on another line
355     for (int i = 1; i < cntUsedLineOffsets; i++)
356         lineOffsets[i-1].diffNextOffset = cast(uint)(lineOffsets[i].offset - lineOffsets[i-1].offset);
357     lineOffsets[cntUsedLineOffsets - 1].diffNextOffset = cast(uint)(retoffset + retsize - lineOffsets[cntUsedLineOffsets - 1].offset);
358 
359     // sort line records and remove duplicate lines preferring smaller offsets
360     qsort(lineOffsets.ptr, cntUsedLineOffsets, (lineOffsets[0]).sizeof, &cmpLineOffsets);
361     int j = 0;
362     for (int i = 1; i < cntUsedLineOffsets; i++)
363         if (lineOffsets[i].linnum > lineOffsets[j].linnum)
364             lineOffsets[++j] = lineOffsets[i];
365     cntUsedLineOffsets = j + 1;
366 }
367 
368 private targ_size_t getLineOffset(int linnum)
369 {
370     int idx = findLineIndex(linnum);
371     if (idx >= cntUsedLineOffsets || lineOffsets[idx].linnum < linnum)
372         return retoffset + retsize; // function length
373     if (idx > 0 && lineOffsets[idx].linnum != linnum)
374         // for inexact line numbers, use the offset following the previous line
375         return lineOffsets[idx-1].offset + lineOffsets[idx-1].diffNextOffset;
376     return lineOffsets[idx].offset;
377 }
378 
379 // return the first record index in the lineOffsets array with linnum >= line
380 private int findLineIndex(uint line)
381 {
382     int low = 0;
383     int high = cntUsedLineOffsets;
384     while (low < high)
385     {
386         int mid = (low + high) >> 1;
387         int ln = lineOffsets[mid].linnum;
388         if (line < ln)
389             high = mid;
390         else if (line > ln)
391             low = mid + 1;
392         else
393             return mid;
394     }
395     return low;
396 }
397 
398 public void recordLineOffset(Srcpos src, targ_size_t off)
399 {
400     version (MARS)
401         const sfilename = src.Sfilename;
402     else
403         const sfilename = srcpos_name(src);
404 
405     // only record line numbers from one file, symbol info does not include source file
406     if (!sfilename || !src.Slinnum)
407         return;
408     if (!srcfile)
409         srcfile = sfilename;
410     if (srcfile != sfilename && strcmp(srcfile, sfilename) != 0)
411         return;
412 
413     // assume ascending code offsets generated during codegen, ignore any other
414     //  (e.g. there is an additional line number emitted at the end of the function
415     //   or multiple line numbers at the same offset)
416     if (cntUsedLineOffsets > 0 && lineOffsets[cntUsedLineOffsets-1].offset >= off)
417         return;
418 
419     if (cntUsedLineOffsets > 0 && lineOffsets[cntUsedLineOffsets-1].linnum == src.Slinnum)
420     {
421         // optimize common case: new offset on same line
422         return;
423     }
424     // don't care for lineOffsets being ordered now, that is taken care of later (calcLexicalScope)
425     if (lineOffsets.length <= cntUsedLineOffsets)
426     {
427         const newSize = 2 * cntUsedLineOffsets + 16;
428         lineOffsets = (cast(LineOffset*) util_realloc(lineOffsets.ptr, newSize, (lineOffsets[0]).sizeof))[0 .. newSize];
429     }
430     lineOffsets[cntUsedLineOffsets].linnum = src.Slinnum;
431     lineOffsets[cntUsedLineOffsets].offset = off;
432     cntUsedLineOffsets++;
433 }
434 
435 }