1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 2016-2021 by The D Language Foundation, All Rights Reserved
6  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gsroa.c, backend/gsroa.d)
9  */
10 
11 module dmd.backend.gsroa;
12 
13 version (SCPP)
14     version = COMPILE;
15 version (MARS)
16     version = COMPILE;
17 
18 version (COMPILE)
19 {
20 
21 import core.stdc.stdio;
22 import core.stdc.stdlib;
23 import core.stdc.string;
24 import core.stdc.time;
25 
26 import dmd.backend.cc;
27 import dmd.backend.cdef;
28 import dmd.backend.code_x86;
29 import dmd.backend.oper;
30 import dmd.backend.global;
31 import dmd.backend.el;
32 import dmd.backend.symtab;
33 import dmd.backend.ty;
34 import dmd.backend.type;
35 
36 import dmd.backend.dlist;
37 import dmd.backend.dvec;
38 
39 extern (C++):
40 
41 nothrow:
42 
43 int REGSIZE();
44 
45 alias SLICESIZE = REGSIZE;  // slices are all register-sized
46 enum MAXSLICES = 2;         // max # of pieces we can slice an aggregate into
47 
48 /* This 'slices' a two register wide aggregate into two separate register-sized variables,
49  * enabling much better enregistering.
50  * SROA (Scalar Replacement Of Aggregates) is the common term for this.
51  */
52 
53 struct SymInfo
54 {
55     bool canSlice;
56     bool accessSlice;   // if Symbol was accessed as a slice
57     tym_t[MAXSLICES] ty; // type of each slice
58     SYMIDX si0;          // index of first slice, the rest follow sequentially
59 }
60 
61 /********************************
62  * Gather information about slice-able variables by scanning e.
63  * Params:
64  *      symtab = symbol table
65  *      e = expression to scan
66  *      sia = where to put gathered information
67  */
68 extern (D) private void sliceStructs_Gather(ref const symtab_t symtab, SymInfo[] sia, const(elem)* e)
69 {
70     while (1)
71     {
72         switch (e.Eoper)
73         {
74             case OPvar:
75             {
76                 const si = e.EV.Vsym.Ssymnum;
77                 if (si != SYMIDX.max && sia[si].canSlice)
78                 {
79                     assert(si < symtab.length);
80                     const n = nthSlice(e);
81                     const sz = getSize(e);
82                     if (sz == 2 * SLICESIZE && !tyfv(e.Ety) &&
83                         tybasic(e.Ety) != TYldouble && tybasic(e.Ety) != TYildouble)
84                     {
85                         // Rewritten as OPpair later
86                     }
87                     else if (n != NOTSLICE)
88                     {
89                         if (!sia[si].accessSlice)
90                         {
91                             /* [1] default as pointer type
92                              */
93                             foreach (ref ty; sia[si].ty)
94                                 ty = TYnptr;
95 
96                             const s = e.EV.Vsym;
97                             const t = s.Stype;
98                             if (tybasic(t.Tty) == TYstruct)
99                             {
100                                 if (const targ1 = t.Ttag.Sstruct.Sarg1type)
101                                     if (const targ2 = t.Ttag.Sstruct.Sarg2type)
102                                     {
103                                         sia[si].ty[0] = targ1.Tty;
104                                         sia[si].ty[1] = targ2.Tty;
105                                     }
106                             }
107                         }
108                         sia[si].accessSlice = true;
109                         if (sz == SLICESIZE)
110                             sia[si].ty[n] = tybasic(e.Ety);
111                     }
112                     else
113                     {
114                         sia[si].canSlice = false;
115                     }
116                 }
117                 return;
118             }
119 
120             default:
121                 if (OTassign(e.Eoper))
122                 {
123                     if (OTbinary(e.Eoper))
124                         sliceStructs_Gather(symtab, sia, e.EV.E2);
125 
126                     // Assignment to a whole var will disallow SROA
127                     if (e.EV.E1.Eoper == OPvar)
128                     {
129                         const e1 = e.EV.E1;
130                         const si = e1.EV.Vsym.Ssymnum;
131                         if (si != SYMIDX.max && sia[si].canSlice)
132                         {
133                             assert(si < symtab.length);
134                             if (nthSlice(e1) == NOTSLICE)
135                             {
136                                 sia[si].canSlice = false;
137                             }
138                             // Disable SROA on OSX32 (because XMM registers?)
139                             // https://issues.dlang.org/show_bug.cgi?id=15206
140                             // https://github.com/dlang/dmd/pull/8034
141                             else if (!(config.exe & EX_OSX))
142                             {
143                                 sliceStructs_Gather(symtab, sia, e.EV.E1);
144                             }
145                         }
146                         return;
147                     }
148                     e = e.EV.E1;
149                     break;
150                 }
151                 if (OTunary(e.Eoper))
152                 {
153                     e = e.EV.E1;
154                     break;
155                 }
156                 if (OTbinary(e.Eoper))
157                 {
158                     sliceStructs_Gather(symtab, sia, e.EV.E2);
159                     e = e.EV.E1;
160                     break;
161                 }
162                 return;
163         }
164     }
165 }
166 
167 /***********************************
168  * Rewrite expression tree e based on info in sia[].
169  * Params:
170  *      symtab = symbol table
171  *      sia = slicing info
172  *      e = expression tree to rewrite in place
173  */
174 extern (D) private void sliceStructs_Replace(ref symtab_t symtab, const SymInfo[] sia, elem *e)
175 {
176     while (1)
177     {
178         switch (e.Eoper)
179         {
180             case OPvar:
181             {
182                 Symbol *s = e.EV.Vsym;
183                 const si = s.Ssymnum;
184                 //printf("e: %d %d\n", si, sia[si].canSlice);
185                 //elem_print(e);
186                 if (si != SYMIDX.max && sia[si].canSlice)
187                 {
188                     const n = nthSlice(e);
189                     if (getSize(e) == 2 * SLICESIZE)
190                     {
191                         // Rewrite e as (si0 OPpair si0+1)
192                         elem *e1 = el_calloc();
193                         el_copy(e1, e);
194                         e1.Ety = sia[si].ty[0];
195 
196                         elem *e2 = el_calloc();
197                         el_copy(e2, e);
198                         Symbol *s1 = symtab[sia[si].si0 + 1]; // +1 for second slice
199                         e2.Ety = sia[si].ty[1];
200                         e2.EV.Vsym = s1;
201                         e2.EV.Voffset = 0;
202 
203                         e.Eoper = OPpair;
204                         e.EV.E1 = e1;
205                         e.EV.E2 = e2;
206 
207                         if (tycomplex(e.Ety))
208                         {
209                             /* Ensure complex OPpair operands are floating point types
210                              * because [1] may have defaulted them to a pointer type.
211                              * https://issues.dlang.org/show_bug.cgi?id=18936
212                              */
213                             tym_t tyop;
214                             switch (tybasic(e.Ety))
215                             {
216                                 case TYcfloat:   tyop = TYfloat;   break;
217                                 case TYcdouble:  tyop = TYdouble;  break;
218                                 case TYcldouble: tyop = TYldouble; break;
219                                 default:
220                                     assert(0);
221                             }
222                             if (!tyfloating(e1.Ety))
223                                 e1.Ety = tyop;
224                             if (!tyfloating(e2.Ety))
225                                 e2.Ety = tyop;
226                         }
227                     }
228                     else if (n == 0)  // the first slice of the symbol is the same as the original
229                     {
230                     }
231                     else // the nth slice
232                     {
233                         e.EV.Vsym = symtab[sia[si].si0 + n];
234                         e.EV.Voffset -= n * SLICESIZE;
235                         //printf("replaced with:\n");
236                         //elem_print(e);
237                     }
238                 }
239                 return;
240             }
241 
242             default:
243                 if (OTunary(e.Eoper))
244                 {
245                     e = e.EV.E1;
246                     break;
247                 }
248                 if (OTbinary(e.Eoper))
249                 {
250                     sliceStructs_Replace(symtab, sia, e.EV.E2);
251                     e = e.EV.E1;
252                     break;
253                 }
254                 return;
255         }
256     }
257 }
258 
259 void sliceStructs(ref symtab_t symtab, block* startblock)
260 {
261     if (debugc) printf("sliceStructs() %s\n", funcsym_p.Sident.ptr);
262     const sia_length = symtab.length;
263     /* 3 is because it is used for two arrays, sia[] and sia2[].
264      * sia2[] can grow to twice the size of sia[], as symbols can get split into two.
265      */
266     debug
267         enum tmp_length = 3;
268     else
269         enum tmp_length = 6;
270     SymInfo[tmp_length] tmp = void;
271     SymInfo* sip;
272     if (sia_length <= tmp.length / 3)
273         sip = tmp.ptr;
274     else
275     {
276         sip = cast(SymInfo *)malloc(3 * sia_length * SymInfo.sizeof);
277         assert(sip);
278     }
279     SymInfo[] sia = sip[0 .. sia_length];
280     SymInfo[] sia2 = sip[sia_length .. sia_length * 3];
281 
282     if (0) foreach (si; 0 .. symtab.length)
283     {
284         Symbol *s = symtab[si];
285         printf("[%d]: %p %d %s\n", cast(int)si, s, cast(int)type_size(s.Stype), s.Sident.ptr);
286     }
287 
288     bool anySlice = false;
289     foreach (si; 0 .. symtab.length)
290     {
291         Symbol *s = symtab[si];
292         //printf("slice1: %s\n", s.Sident.ptr);
293 
294         if (!(s.Sflags & SFLunambig))   // if somebody took the address of s
295         {
296             sia[si].canSlice = false;
297             continue;
298         }
299 
300         const sz = type_size(s.Stype);
301         if (sz != 2 * SLICESIZE ||
302             tyfv(s.Stype.Tty) || tybasic(s.Stype.Tty) == TYhptr)    // because there is no TYseg
303         {
304             sia[si].canSlice = false;
305             continue;
306         }
307 
308         switch (s.Sclass)
309         {
310             case SCfastpar:
311             case SCregister:
312             case SCauto:
313             case SCshadowreg:
314             case SCparameter:
315                 anySlice = true;
316                 sia[si].canSlice = true;
317                 sia[si].accessSlice = false;
318                 // We can't slice whole XMM registers
319                 if (tyxmmreg(s.Stype.Tty) &&
320                     isXMMreg(s.Spreg) && s.Spreg2 == NOREG)
321                 {
322                     sia[si].canSlice = false;
323                 }
324                 break;
325 
326             case SCstack:
327             case SCpseudo:
328             case SCstatic:
329             case SCbprel:
330                 sia[si].canSlice = false;
331                 break;
332 
333             default:
334                 symbol_print(s);
335                 assert(0);
336         }
337     }
338 
339     if (!anySlice)
340         goto Ldone;
341 
342     foreach (b; BlockRange(startblock))
343     {
344         if (b.BC == BCasm)
345             goto Ldone;
346         if (b.Belem)
347             sliceStructs_Gather(symtab, sia, b.Belem);
348     }
349 
350     {   // scope needed because of goto skipping declarations
351         bool any = false;
352         int n = 0;              // the number of symbols added
353         foreach (si; 0 .. sia_length)
354         {
355             sia2[si + n].canSlice = false;
356             if (sia[si].canSlice)
357             {
358                 // If never did access it as a slice, don't slice
359                 if (!sia[si].accessSlice)
360                 {
361                     sia[si].canSlice = false;
362                     continue;
363                 }
364 
365                 /* Split slice-able symbol sold into two symbols,
366                  * (sold,snew) in adjacent slots in the symbol table.
367                  */
368                 Symbol *sold = symtab[si + n];
369 
370                 const idlen = 2 + strlen(sold.Sident.ptr) + 2;
371                 char *id = cast(char *)malloc(idlen + 1);
372                 assert(id);
373                 sprintf(id, "__%s_%d", sold.Sident.ptr, SLICESIZE);
374                 if (debugc) printf("creating slice symbol %s\n", id);
375                 Symbol *snew = symbol_calloc(id, cast(uint)idlen);
376                 free(id);
377                 snew.Sclass = sold.Sclass;
378                 snew.Sfl = sold.Sfl;
379                 snew.Sflags = sold.Sflags;
380                 if (snew.Sclass == SCfastpar || snew.Sclass == SCshadowreg)
381                 {
382                     snew.Spreg = sold.Spreg2;
383                     snew.Spreg2 = NOREG;
384                     sold.Spreg2 = NOREG;
385                 }
386                 type_free(sold.Stype);
387                 sold.Stype = type_fake(sia[si].ty[0]);
388                 sold.Stype.Tcount++;
389                 snew.Stype = type_fake(sia[si].ty[1]);
390                 snew.Stype.Tcount++;
391 
392                 // insert snew into symtab[si + n + 1]
393                 symbol_insert(symtab, snew, si + n + 1);
394 
395                 sia2[si + n].canSlice = true;
396                 sia2[si + n].si0 = si + n;
397                 sia2[si + n].ty[] = sia[si].ty[];
398                 ++n;
399                 any = true;
400             }
401         }
402         if (!any)
403             goto Ldone;
404     }
405 
406     foreach (si; 0 .. symtab.length)
407     {
408         Symbol *s = symtab[si];
409         assert(s.Ssymnum == si);
410     }
411 
412     foreach (b; BlockRange(startblock))
413     {
414         if (b.Belem)
415             sliceStructs_Replace(symtab, sia2, b.Belem);
416     }
417 
418 Ldone:
419     if (sip != tmp.ptr)
420         free(sip);
421 }
422 
423 
424 /*************************************
425  * Determine if `e` is a slice.
426  * Params:
427  *      e = elem that may be a slice
428  * Returns:
429  *      slice number if it is, NOTSLICE if not
430  */
431 enum NOTSLICE = -1;
432 int nthSlice(const(elem)* e)
433 {
434     const sz = tysize(e.Ety); // not getSize(e) because type_fake(TYstruct) doesn't work
435     if (sz == -1)
436         return NOTSLICE;
437     const sliceSize = SLICESIZE;
438 
439     /* See if e fits in a slice
440      */
441     const lwr = e.EV.Voffset;
442     const upr = lwr + sz;
443     if (0 <= lwr && upr <= sliceSize)
444         return 0;
445     if (sliceSize < lwr && upr <= sliceSize * 2)
446         return 1;
447 
448     return NOTSLICE;
449 }
450 
451 /******************************************
452  * Get size of an elem e.
453  */
454 private int getSize(const(elem)* e)
455 {
456     int sz = tysize(e.Ety);
457     if (sz == -1 && e.ET && (tybasic(e.Ety) == TYstruct || tybasic(e.Ety) == TYarray))
458         sz = cast(int)type_size(e.ET);
459     return sz;
460 }
461 
462 }