13 module dmd.backend.cgcod;
15 version = FRAMEPTR;
17 version (SCPP)
18     version = COMPILE;
19 version (MARS)
20     version = COMPILE;
22 version (COMPILE)
23 {
25 import core.stdc.stdio;
26 import core.stdc.stdlib;
27 import core.stdc.string;
29 import dmd.backend.backend;
30 import dmd.backend.cc;
31 import dmd.backend.cdef;
32 import dmd.backend.code;
33 import dmd.backend.cgcse;
34 import dmd.backend.code_x86;
35 import dmd.backend.codebuilder;
36 import dmd.backend.dlist;
37 import dmd.backend.dvec;
38 import dmd.backend.melf;
39 import dmd.backend.mem;
40 import dmd.backend.el;
41 import dmd.backend.exh;
42 import dmd.backend.global;
43 import dmd.backend.obj;
44 import dmd.backend.oper;
45 import dmd.backend.outbuf;
46 import dmd.backend.rtlsym;
47 import dmd.backend.symtab;
48 import dmd.backend.ty;
49 import dmd.backend.type;
50 import dmd.backend.xmm;
52 import dmd.backend.barray;
54 version (SCPP)
55 {
56     import parser;
57     import precomp;
58 }
60 extern (C++):
62 nothrow:
64 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
65 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
67 version (MARS)
68     enum MARS = true;
69 else
70     enum MARS = false;
72 void dwarf_except_gentables(Funcsym *sfunc, uint startoffset, uint retoffset);
73 int REGSIZE();
75 private extern (D) uint mask(uint m) { return 1 << m; }
78 __gshared
79 {
80 bool floatreg;                  // !=0 if floating register is required
82 int hasframe;                   // !=0 if this function has a stack frame
83 bool enforcealign;              // enforced stack alignment
84 targ_size_t spoff;
85 targ_size_t Foff;               // BP offset of floating register
86 targ_size_t CSoff;              // offset of common sub expressions
87 targ_size_t NDPoff;             // offset of saved 8087 registers
88 targ_size_t pushoff;            // offset of saved registers
89 bool pushoffuse;                // using pushoff
90 int BPoff;                      // offset from BP
91 int EBPtoESP;                   // add to EBP offset to get ESP offset
92 LocalSection Para;              // section of function parameters
93 LocalSection Auto;              // section of automatics and registers
94 LocalSection Fast;              // section of fastpar
95 LocalSection EEStack;           // offset of SCstack variables from ESP
96 LocalSection Alloca;            // data for alloca() temporary
98 REGSAVE regsave;
100 CGstate cgstate;                // state of code generator
106 /************************************
107  * # of bytes that SP is beyond BP.
108  */
110 uint stackpush;
112 int stackchanged;               /* set to !=0 if any use of the stack
113                                    other than accessing parameters. Used
114                                    to see if we can address parameters
115                                    with ESP rather than EBP.
116                                  */
117 int refparam;           // !=0 if we referenced any parameters
118 int reflocal;           // !=0 if we referenced any locals
119 bool anyiasm;           // !=0 if any inline assembler
120 char calledafunc;       // !=0 if we called a function
121 char needframe;         // if true, then we will need the frame
122                         // pointer (BP for the 8088)
123 char gotref;            // !=0 if the GOTsym was referenced
124 uint usednteh;              // if !=0, then used NT exception handling
125 bool calledFinally;     // true if called a BC_finally block
127 /* Register contents    */
128 con_t regcon;
130 int pass;                       // PASSxxxx
132 private Symbol *retsym;          // set to symbol that should be placed in
133                                 // register AX
135 /****************************
136  * Register masks.
137  */
139 regm_t msavereg;        // Mask of registers that we would like to save.
140                         // they are temporaries (set by scodelem())
141 regm_t mfuncreg;        // Mask of registers preserved by a function
143 regm_t allregs;                // ALLREGS optionally including mBP
145 int dfoidx;                     /* which block we are in                */
147 targ_size_t     funcoffset;     // offset of start of function
148 targ_size_t     prolog_allocoffset;     // offset past adj of stack allocation
149 targ_size_t     startoffset;    // size of function entry code
150 targ_size_t     retoffset;      /* offset from start of func to ret code */
151 targ_size_t     retsize;        /* size of function return              */
153 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs;
155 }
157 /*********************************
158  * Generate code for a function.
159  * Note at the end of this routine mfuncreg will contain the mask
160  * of registers not affected by the function. Some minor optimization
161  * possibilities are here.
162  * Params:
163  *      sfunc = function to generate code for
164  */
166 void codgen(Symbol *sfunc)
167 {
168     bool flag;
169     block *btry;
171     // Register usage. If a bit is on, the corresponding register is live
172     // in that basic block.
174     //printf("codgen('%s')\n",funcsym_p.Sident.ptr);
175     assert(sfunc == funcsym_p);
176     assert(cseg == funcsym_p.Sseg);
178     cgreg_init();
179     CSE.initialize();
180     tym_t functy = tybasic(sfunc.ty());
181     cod3_initregs();
182     allregs = ALLREGS;
183     pass = PASSinitial;
184     Alloca.init();
185     anyiasm = 0;
187     if (config.ehmethod == EHmethod.EH_DWARF)
188     {
189         /* The dwarf unwinder relies on the function epilog to exist
190          */
191         for (block* b = startblock; b; b = b.Bnext)
192         {
193             if (b.BC == BCexit)
194                 b.BC = BCret;
195         }
196     }
198 tryagain:
199     debug
200     if (debugr)
201         printf("------------------ PASS%s -----------------\n",
202             (pass == PASSinitial) ? "init".ptr : ((pass == PASSreg) ? "reg".ptr : "final".ptr));
204     lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0;
206     // if no parameters, assume we don't need a stack frame
207     needframe = 0;
208     enforcealign = false;
209     gotref = 0;
210     stackchanged = 0;
211     stackpush = 0;
212     refparam = 0;
213     calledafunc = 0;
214     retsym = null;
216     cgstate.stackclean = 1;
217     cgstate.funcarg.init();
218     cgstate.funcargtos = ~0;
219     cgstate.accessedTLS = false;
222     regsave.reset();
223     memset(global87.stack.ptr,0,global87.stack.sizeof);
225     calledFinally = false;
226     usednteh = 0;
228     static if (MARS && TARGET_WINDOS)
229     {
230         if (sfunc.Sfunc.Fflags3 & Fjmonitor)
231             usednteh |= NTEHjmonitor;
232     }
233     else version (SCPP)
234     {
235         if (CPP)
236         {
237             if (config.exe == EX_WIN32 &&
238                 (sfunc.Stype.Tflags & TFemptyexc || sfunc.Stype.Texcspec))
239                 usednteh |= NTEHexcspec;
240             except_reset();
241         }
242     }
244     // Set on a trial basis, turning it off if anything might throw
245     sfunc.Sfunc.Fflags3 |= Fnothrow;
247     floatreg = false;
248     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
250     CSE.start();
251     memset(&regcon,0,regcon.sizeof);
252     regcon.cse.mval = regcon.cse.mops = 0;      // no common subs yet
253     msavereg = 0;
254     uint nretblocks = 0;
255     mfuncreg = fregsaved;               // so we can see which are used
256                                         // (bit is cleared each time
257                                         //  we use one)
258     for (block* b = startblock; b; b = b.Bnext)
259     {
260         memset(&b.Bregcon,0,b.Bregcon.sizeof);       // Clear out values in registers
261         if (b.Belem)
262             resetEcomsub(b.Belem);     // reset all the Ecomsubs
263         if (b.BC == BCasm)
264             anyiasm = 1;                // we have inline assembler
265         if (b.BC == BCret || b.BC == BCretexp)
266             nretblocks++;
267     }
269     if (!config.fulltypes || (config.flags4 & CFG4optimized))
270     {
271         regm_t noparams = 0;
272         for (int i = 0; i < globsym.length; i++)
273         {
274             Symbol *s = globsym[i];
275             s.Sflags &= ~SFLread;
276             switch (s.Sclass)
277             {
278                 case SCfastpar:
279                 case SCshadowreg:
280                     regcon.params |= s.Spregm();
281                     goto case SCparameter;
283                 case SCparameter:
284                     if (s.Sfl == FLreg)
285                         noparams |= s.Sregm;
286                     break;
288                 default:
289                     break;
290             }
291         }
292         regcon.params &= ~noparams;
293     }
295     if (config.flags4 & CFG4optimized)
296     {
297         if (nretblocks == 0 &&                  // if no return blocks in function
298             !(sfunc.ty() & mTYnaked))      // naked functions may have hidden veys of returning
299             sfunc.Sflags |= SFLexit;       // mark function as never returning
301         assert(dfo);
303         cgreg_reset();
304         for (dfoidx = 0; dfoidx < dfo.length; dfoidx++)
305         {
306             regcon.used = msavereg | regcon.cse.mval;   // registers already in use
307             block* b = dfo[dfoidx];
308             blcodgen(b);                        // gen code in depth-first order
309             //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used));
310             cgreg_used(dfoidx, b.Bregcon.used); // gather register used information
311         }
312     }
313     else
314     {
315         pass = PASSfinal;
316         for (block* b = startblock; b; b = b.Bnext)
317             blcodgen(b);                // generate the code for each block
318     }
319     regcon.immed.mval = 0;
320     assert(!regcon.cse.mops);           // should have all been used
322     // See which variables we can put into registers
323     if (pass != PASSfinal &&
324         !anyiasm)                               // possible LEA or LES opcodes
325     {
326         allregs |= cod3_useBP();                // see if we can use EBP
328         // If pic code, but EBX was never needed
329         if (!(allregs & mask(PICREG)) && !gotref)
330         {
331             allregs |= mask(PICREG);            // EBX can now be used
332             cgreg_assign(retsym);
333             pass = PASSreg;
334         }
335         else if (cgreg_assign(retsym))          // if we found some registers
336             pass = PASSreg;
337         else
338             pass = PASSfinal;
339         for (block* b = startblock; b; b = b.Bnext)
340         {
341             code_free(b.Bcode);
342             b.Bcode = null;
343         }
344         goto tryagain;
345     }
346     cgreg_term();
348     version (SCPP)
349     {
350         if (CPP)
351             cgcod_eh();
352     }
354     // See if we need to enforce a particular stack alignment
355     foreach (i; 0 .. globsym.length)
356     {
357         Symbol *s = globsym[i];
359         if (Symbol_Sisdead(s, anyiasm))
360             continue;
362         switch (s.Sclass)
363         {
364             case SCregister:
365             case SCauto:
366             case SCfastpar:
367                 if (s.Sfl == FLreg)
368                     break;
370                 const sz = type_alignsize(s.Stype);
371                 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX))
372                 {
373                     STACKALIGN = sz;
374                     enforcealign = true;
375                 }
376                 break;
378             default:
379                 break;
380         }
381     }
383     stackoffsets(1);            // compute addresses of stack variables
384     cod5_prol_epi();            // see where to place prolog/epilog
385     CSE.finish();               // compute addresses and sizes of CSE saves
387     if (configv.addlinenumbers)
388         objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg));
390     // Otherwise, jmp's to startblock will execute the prolog again
391     assert(!startblock.Bpred);
393     CodeBuilder cdbprolog; cdbprolog.ctor();
394     prolog(cdbprolog);           // gen function start code
395     code *cprolog = cdbprolog.finish();
396     if (cprolog)
397         pinholeopt(cprolog,null);       // optimize
399     funcoffset = Offset(sfunc.Sseg);
400     targ_size_t coffset = Offset(sfunc.Sseg);
402     if (eecontext.EEelem)
403         genEEcode();
405     for (block* b = startblock; b; b = b.Bnext)
406     {
407         // We couldn't do this before because localsize was unknown
408         switch (b.BC)
409         {
410             case BCret:
411                 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked))
412                 {
413                     CodeBuilder cdb; cdb.ctor();
414                     cdb.append(b.Bcode);
415                     cdb.genlinnum(b.Bsrcpos);
416                     b.Bcode = cdb.finish();
417                 }
418                 goto case BCretexp;
420             case BCretexp:
421                 epilog(b);
422                 break;
424             default:
425                 if (b.Bflags & BFLepilog)
426                     epilog(b);
427                 break;
428         }
429         assignaddr(b);                  // assign addresses
430         pinholeopt(b.Bcode,b);         // do pinhole optimization
431         if (b.Bflags & BFLprolog)      // do function prolog
432         {
433             startoffset = coffset + calcblksize(cprolog) - funcoffset;
434             b.Bcode = cat(cprolog,b.Bcode);
435         }
436         cgsched_block(b);
437         b.Bsize = calcblksize(b.Bcode);       // calculate block size
438         if (b.Balign)
439         {
440             targ_size_t u = b.Balign - 1;
441             coffset = (coffset + u) & ~u;
442         }
443         b.Boffset = coffset;           /* offset of this block         */
444         coffset += b.Bsize;            /* offset of following block    */
445     }
447     debug
448     debugw && printf("code addr complete\n");
450     // Do jump optimization
451     do
452     {
453         flag = false;
454         for (block* b = startblock; b; b = b.Bnext)
455         {
456             if (b.Bflags & BFLjmpoptdone)      /* if no more jmp opts for this blk */
457                 continue;
458             int i = branch(b,0);            // see if jmp => jmp short
459             if (i)                          // if any bytes saved
460             {   targ_size_t offset;
462                 b.Bsize -= i;
463                 offset = b.Boffset + b.Bsize;
464                 for (block* bn = b.Bnext; bn; bn = bn.Bnext)
465                 {
466                     if (bn.Balign)
467                     {   targ_size_t u = bn.Balign - 1;
469                         offset = (offset + u) & ~u;
470                     }
471                     bn.Boffset = offset;
472                     offset += bn.Bsize;
473                 }
474                 coffset = offset;
475                 flag = true;
476             }
477         }
478         if (!I16 && !(config.flags4 & CFG4optimized))
479             break;                      // use the long conditional jmps
480     } while (flag);                     // loop till no more bytes saved
482     debug
483     debugw && printf("code jump optimization complete\n");
485     version (MARS)
486     {
487         if (usednteh & NTEH_try)
488         {
489             // Do this before code is emitted because we patch some instructions
490             nteh_filltables();
491         }
492     }
494     // Compute starting offset for switch tables
495     targ_size_t swoffset;
496     int jmpseg = -1;
497     if (config.flags & CFGromable)
498     {
499         jmpseg = 0;
500         swoffset = coffset;
501     }
503     // Emit the generated code
504     if (eecontext.EEcompile == 1)
505     {
506         codout(sfunc.Sseg,eecontext.EEcode);
507         code_free(eecontext.EEcode);
508         version (SCPP)
509         {
510             el_free(eecontext.EEelem);
511         }
512     }
513     else
514     {
515         for (block* b = startblock; b; b = b.Bnext)
516         {
517             if (b.BC == BCjmptab || b.BC == BCswitch)
518             {
519                 if (jmpseg == -1)
520                 {
521                     jmpseg = objmod.jmpTableSegment(sfunc);
522                     swoffset = Offset(jmpseg);
523                 }
524                 swoffset = _align(0,swoffset);
525                 b.Btableoffset = swoffset;     /* offset of sw tab */
526                 swoffset += b.Btablesize;
527             }
528             jmpaddr(b.Bcode);          /* assign jump addresses        */
530             debug
531             if (debugc)
532             {
533                 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n",
534                     cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg));
535                 if (b.Bcode)
536                     printf( "First opcode of block is: %0x\n", b.Bcode.Iop );
537             }
539             if (b.Balign)
540             {   uint u = b.Balign;
541                 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1);
543                 cod3_align_bytes(sfunc.Sseg, nalign);
544             }
545             assert(b.Boffset == Offset(sfunc.Sseg));
547             version (SCPP)
548             {
549                 if (CPP && !(config.exe == EX_WIN32))
550                 {
551                     //printf("b = %p, index = %d\n",b,b.Bindex);
552                     //except_index_set(b.Bindex);
554                     if (btry != b.Btry)
555                     {
556                         btry = b.Btry;
557                         except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset);
558                     }
559                     if (b.BC == BCtry)
560                     {
561                         btry = b;
562                         except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset);
563                     }
564                 }
565             }
567             codout(sfunc.Sseg,b.Bcode);   // output code
568         }
569         if (coffset != Offset(sfunc.Sseg))
570         {
571             debug
572             printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg));
574             assert(0);
575         }
576         sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset;    // size of function
578         static if (NTEXCEPTIONS || MARS)
579         {
580             version (MARS)
581                 const nteh = usednteh & NTEH_try;
582             else static if (NTEXCEPTIONS)
583                 const nteh = usednteh & NTEHcpp;
584             else
585                 enum nteh = true;
586             if (nteh)
587             {
588                 assert(!(config.flags & CFGromable));
589                 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset);
590                 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset);
591             }
592         }
594         // Write out switch tables
595         flag = false;                       // true if last active block was a ret
596         for (block* b = startblock; b; b = b.Bnext)
597         {
598             switch (b.BC)
599             {
600                 case BCjmptab:              /* if jump table                */
601                     outjmptab(b);           /* write out jump table         */
602                     goto Ldefault;
604                 case BCswitch:
605                     outswitab(b);           /* write out switch table       */
606                     goto Ldefault;
608                 case BCret:
609                 case BCretexp:
610                     /* Compute offset to return code from start of function */
611                     retoffset = b.Boffset + b.Bsize - retsize - funcoffset;
612                     version (MARS)
613                     {
614                         /* Add 3 bytes to retoffset in case we have an exception
615                          * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT
616                          * IT FIXES THE PROBLEM HERE AS WELL.
617                          */
618                         if (usednteh & NTEH_try)
619                             retoffset += 3;
620                     }
621                     flag = true;
622                     break;
624                 default:
625                 Ldefault:
626                     retoffset = b.Boffset + b.Bsize - funcoffset;
627                     break;
628             }
629         }
630         if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked))
631             /* put line number at end of function on the
632                start of the last instruction
633              */
634             /* Instead, try offset to cleanup code  */
635             if (retoffset < sfunc.Ssize)
636                 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset);
638         static if (TARGET_WINDOS && MARS)
639         {
640             if (config.exe == EX_WIN64)
641                 win64_pdata(sfunc);
642         }
644         static if (MARS)
645         {
646             if (usednteh & NTEH_try)
647             {
648                 // Do this before code is emitted because we patch some instructions
649                 nteh_gentables(sfunc);
650             }
651             if (usednteh & (EHtry | EHcleanup) &&   // saw BCtry or BC_try or OPddtor
652                 config.ehmethod == EHmethod.EH_DM)
653             {
654                 except_gentables();
655             }
656             if (config.ehmethod == EHmethod.EH_DWARF)
657             {
658                 sfunc.Sfunc.Fstartblock = startblock;
659                 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset);
660                 sfunc.Sfunc.Fstartblock = null;
661             }
662         }
664         version (SCPP)
665         {
666             // Write out frame handler
667             if (NTEXCEPTIONS && usednteh & NTEHcpp)
668             {
669                 nteh_framehandler(sfunc, except_gentables());
670             }
671             else
672             {
673                 if (NTEXCEPTIONS && usednteh & NTEH_try)
674                 {
675                     nteh_gentables(sfunc);
676                 }
677                 else
678                 {
679                     if (CPP)
680                         except_gentables();
681                 }
682             }
683         }
685         for (block* b = startblock; b; b = b.Bnext)
686         {
687             code_free(b.Bcode);
688             b.Bcode = null;
689         }
690     }
692     // Mask of regs saved
693     // BUG: do interrupt functions save BP?
694     sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved);
696     debug
697     if (global87.stackused != 0)
698       printf("stackused = %d\n",global87.stackused);
700     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
702     global87.save.dtor();       // clean up ndp save array
703 }
705 /*********************************************
706  * Align sections on the stack.
707  *  base        negative offset of section from frame pointer
708  *  alignment   alignment to use
709  *  bias        difference between where frame pointer points and the STACKALIGNed
710  *              part of the stack
711  * Returns:
712  *  base        revised downward so it is aligned
713  */
714 targ_size_t alignsection(targ_size_t base, uint alignment, int bias)
715 {
716     assert(cast(int)base <= 0);
717     if (alignment > STACKALIGN)
718         alignment = STACKALIGN;
719     if (alignment)
720     {
721         int sz = cast(int)(-base + bias);
722         assert(sz >= 0);
723         sz &= (alignment - 1);
724         if (sz)
725             base -= alignment - sz;
726     }
727     return base;
728 }
730 /*******************************
731  * Generate code for a function start.
732  * Input:
733  *      Offset(cseg)         address of start of code
734  *      Auto.alignment
735  * Output:
736  *      Offset(cseg)         adjusted for size of code generated
737  *      EBPtoESP
738  *      hasframe
739  *      BPoff
740  */
741 void prolog(ref CodeBuilder cdb)
742 {
743     bool enter;
745     //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident.ptr, needframe, Auto.alignment);
746     debug debugw && printf("funcstart()\n");
747     regcon.immed.mval = 0;                      /* no values in registers yet   */
748     version (FRAMEPTR)
749         EBPtoESP = 0;
750     else
751         EBPtoESP = -REGSIZE;
752     hasframe = 0;
753     bool pushds = false;
754     BPoff = 0;
755     bool pushalloc = false;
756     tym_t tyf = funcsym_p.ty();
757     tym_t tym = tybasic(tyf);
758     const farfunc = tyfarfunc(tym) != 0;
760     // Special Intel 64 bit ABI prolog setup for variadic functions
761     Symbol *sv64 = null;                        // set to __va_argsave
762     if (I64 && variadic(funcsym_p.Stype))
763     {
764         /* The Intel 64 bit ABI scheme.
765          * abi_sysV_amd64.pdf
766          * Load arguments passed in registers into the varargs save area
767          * so they can be accessed by va_arg().
768          */
769         /* Look for __va_argsave
770          */
771         for (SYMIDX si = 0; si < globsym.length; si++)
772         {
773             Symbol *s = globsym[si];
774             if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0)
775             {
776                 if (!(s.Sflags & SFLdead))
777                     sv64 = s;
778                 break;
779             }
780         }
781     }
783     if (config.flags & CFGalwaysframe ||
784         funcsym_p.Sfunc.Fflags3 & Ffakeeh ||
785         /* The exception stack unwinding mechanism relies on the EBP chain being intact,
786          * so need frame if function can possibly throw
787          */
788         !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) ||
789         cgstate.accessedTLS ||
790         sv64
791        )
792         needframe = 1;
794     CodeBuilder cdbx; cdbx.ctor();
796 Lagain:
797     spoff = 0;
798     char guessneedframe = needframe;
799     int cfa_offset = 0;
800 //    if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)))
801 //      usednteh |= NTEHpassthru;
803     /* Compute BP offsets for variables on stack.
804      * The organization is:
805      *  Para.size    parameters
806      * -------- stack is aligned to STACKALIGN
807      *          seg of return addr      (if far function)
808      *          IP of return addr
809      *  BP.    caller's BP
810      *          DS                      (if Windows prolog/epilog)
811      *          exception handling context symbol
812      *  Fast.size fastpar
813      *  Auto.size    autos and regs
814      *  regsave.off  any saved registers
815      *  Foff    floating register
816      *  Alloca.size  alloca temporary
817      *  CSoff   common subs
818      *  NDPoff  any 8087 saved registers
819      *          monitor context record
820      *          any saved registers
821      */
823     if (tym == TYifunc)
824         Para.size = 26; // how is this number derived?
825     else
826     {
827         version (FRAMEPTR)
828         {
829             Para.size = ((farfunc ? 2 : 1) + needframe) * REGSIZE;
830             if (needframe)
831                 EBPtoESP = -REGSIZE;
832         }
833         else
834             Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE;
835     }
837     /* The real reason for the FAST section is because the implementation of contracts
838      * requires a consistent stack frame location for the 'this' pointer. But if varying
839      * stuff in Auto.offset causes different alignment for that section, the entire block can
840      * shift around, causing a crash in the contracts.
841      * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their
842      * own FAST section, which is never aligned at a size bigger than REGSIZE, and so
843      * its alignment never shifts around.
844      * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned
845      * individually rather than as a group.
846      */
847     Fast.size = 0;
848     static if (NTEXCEPTIONS == 2)
849     {
850         Fast.size -= nteh_contextsym_size();
851         version (MARS)
852         {
853             static if (TARGET_WINDOS)
854             {
855                 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0)
856                     Fast.size -= 5 * 4;
857             }
858         }
859     }
861     /* Despite what the comment above says, aligning Fast section to size greater
862      * than REGSIZE does not break contract implementation. Fast.offset and
863      * Fast.alignment must be the same for the overriding and
864      * the overridden function, since they have the same parameters. Fast.size
865      * must be the same because otherwise, contract inheritance wouldn't work
866      * even if we didn't align Fast section to size greater than REGSIZE. Therefore,
867      * the only way aligning the section could cause problems with contract
868      * inheritance is if bias (declared below) differed for the overridden
869      * and the overriding function.
870      *
871      * Bias depends on Para.size and needframe. The value of Para.size depends on
872      * whether the function is an interrupt handler and whether it is a farfunc.
873      * DMD does not have _interrupt attribute and D does not make a distinction
874      * between near and far functions, so Para.size should always be 2 * REGSIZE
875      * for D.
876      *
877      * The value of needframe depends on a global setting that is only set
878      * during backend's initialization and on function flag Ffakeeh. On Windows,
879      * that flag is always set for virtual functions, for which contracts are
880      * defined and on other platforms, it is never set. Because of that
881      * the value of neadframe should always be the same for the overridden
882      * and the overriding function, and so bias should be the same too.
883      */
885 version (FRAMEPTR)
886     int bias = enforcealign ? 0 : cast(int)(Para.size);
887 else
888     int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE));
890     if (Fast.alignment < REGSIZE)
891         Fast.alignment = REGSIZE;
893     Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias);
895     if (Auto.alignment < REGSIZE)
896         Auto.alignment = REGSIZE;       // necessary because localsize must be REGSIZE aligned
897     Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias);
899     regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias);
900     //printf("regsave.off = x%x, size = x%x, alignment = %x\n",
901         //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment);
903     if (floatreg)
904     {
905         uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE;
906         Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias);
907         //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize);
908     }
909     else
910         Foff = regsave.off;
912     Alloca.alignment = REGSIZE;
913     Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias);
915     CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias);
916     //printf("CSoff = x%x, size = x%x, alignment = %x\n",
917         //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment);
919     NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias);
921     regm_t topush = fregsaved & ~mfuncreg;          // mask of registers that need saving
922     pushoffuse = false;
923     pushoff = NDPoff;
924     /* We don't keep track of all the pushes and pops in a function. Hence,
925      * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder
926      * won't be setting ESP correctly. With pushoffuse, the registers are restored
927      * from EBP, which is kept track of properly.
928      */
929     if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64))
930     {
931         /* Instead of pushing the registers onto the stack one by one,
932          * allocate space in the stack frame and copy/restore them there.
933          */
934         int xmmtopush = numbitsset(topush & XMMREGS);   // XMM regs take 16 bytes
935         int gptopush = numbitsset(topush) - xmmtopush;  // general purpose registers to save
936         if (NDPoff || xmmtopush || cgstate.funcarg.size)
937         {
938             pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16),
939                     xmmtopush ? STACKALIGN : REGSIZE, bias);
940             pushoffuse = true;          // tell others we're using this strategy
941         }
942     }
944     //printf("Fast.size = x%x, Auto.size = x%x\n", (int)Fast.size, (int)Auto.size);
946     cgstate.funcarg.alignment = STACKALIGN;
947     /* If the function doesn't need the extra alignment, don't do it.
948      * Can expand on this by allowing for locals that don't need extra alignment
949      * and calling functions that don't need it.
950      */
951     if (pushoff == 0 && !calledafunc && config.fpxmmregs && (I32 || I64))
952     {
953         cgstate.funcarg.alignment = I64 ? 8 : 4;
954     }
956     //printf("pushoff = %d, size = %d, alignment = %d, bias = %d\n", cast(int)pushoff, cast(int)cgstate.funcarg.size, cast(int)cgstate.funcarg.alignment, cast(int)bias);
957     cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias);
959     localsize = -cgstate.funcarg.offset;
961     //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n",
962         //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize);
963     assert(cast(targ_ptrdiff_t)localsize >= 0);
965     // Keep the stack aligned by 8 for any subsequent function calls
966     if (!I16 && calledafunc &&
967         (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
968     {
969         int npush = numbitsset(topush);            // number of registers that need saving
970         npush += numbitsset(topush & XMMREGS);     // XMM regs take 16 bytes, so count them twice
971         if (pushoffuse)
972             npush = 0;
974         //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n",
975                //npush, Para.size, needframe, localsize);
977         int sz = cast(int)(localsize + npush * REGSIZE);
978         if (!enforcealign)
979         {
980             version (FRAMEPTR)
981                 sz += Para.size;
982             else
983                 sz += Para.size + (needframe ? 0 : -REGSIZE);
984         }
985         if (sz & (STACKALIGN - 1))
986             localsize += STACKALIGN - (sz & (STACKALIGN - 1));
987     }
988     cgstate.funcarg.offset = -localsize;
990     //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n",
991         //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize);
993     uint xlocalsize = cast(uint)localsize;    // amount to subtract from ESP to make room for locals
995     if (tyf & mTYnaked)                 // if no prolog/epilog for function
996     {
997         hasframe = 1;
998         return;
999     }
1001     if (tym == TYifunc)
1002     {
1003         prolog_ifunc(cdbx,&tyf);
1004         hasframe = 1;
1005         cdb.append(cdbx);
1006         goto Lcont;
1007     }
1009     /* Determine if we need BP set up   */
1010     if (enforcealign)
1011     {
1012         // we need BP to reset the stack before return
1013         // otherwise the return address is lost
1014         needframe = 1;
1016     }
1017     else if (config.flags & CFGalwaysframe)
1018         needframe = 1;
1019     else
1020     {
1021         if (localsize)
1022         {
1023             if (I16 ||
1024                 !(config.flags4 & CFG4speed) ||
1025                 config.target_cpu < TARGET_Pentium ||
1026                 farfunc ||
1027                 config.flags & CFGstack ||
1028                 xlocalsize >= 0x1000 ||
1029                 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) ||
1030                 anyiasm ||
1031                 Alloca.size
1032                )
1033                 needframe = 1;
1034         }
1035         if (refparam && (anyiasm || I16))
1036             needframe = 1;
1037     }
1039     if (needframe)
1040     {
1041         assert(mfuncreg & mBP);         // shouldn't have used mBP
1043         if (!guessneedframe)            // if guessed wrong
1044             goto Lagain;
1045     }
1047     if (I16 && config.wflags & WFwindows && farfunc)
1048     {
1049         prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds);
1050         enter = false;                  // don't use ENTER instruction
1051         hasframe = 1;                   // we have a stack frame
1052     }
1053     else if (needframe)                 // if variables or parameters
1054     {
1055         prolog_frame(cdbx, farfunc, xlocalsize, enter, cfa_offset);
1056         hasframe = 1;
1057     }
1059     /* Align the stack if necessary */
1060     prolog_stackalign(cdbx);
1062     /* Subtract from stack pointer the size of the local stack frame
1063      */
1064     if (config.flags & CFGstack)        // if stack overflow check
1065     {
1066         prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
1067         if (Alloca.size)
1068             prolog_setupalloca(cdbx);
1069     }
1070     else if (needframe)                      /* if variables or parameters   */
1071     {
1072         if (xlocalsize)                 /* if any stack offset          */
1073         {
1074             prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
1075             if (Alloca.size)
1076                 prolog_setupalloca(cdbx);
1077         }
1078         else
1079             assert(Alloca.size == 0);
1080     }
1081     else if (xlocalsize)
1082     {
1083         assert(I32 || I64);
1084         prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc);
1085         version (FRAMEPTR) { } else
1086             BPoff += REGSIZE;
1087     }
1088     else
1089         assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor));
1090     EBPtoESP += xlocalsize;
1091     if (hasframe)
1092         EBPtoESP += REGSIZE;
1094     /* Win64 unwind needs the amount of code generated so far
1095      */
1096     if (config.exe == EX_WIN64)
1097     {
1098         code *c = cdbx.peek();
1099         pinholeopt(c, null);
1100         prolog_allocoffset = calcblksize(c);
1101     }
1103     version (SCPP)
1104     {
1105         /*  The idea is to generate trace for all functions if -Nc is not thrown.
1106          *  If -Nc is thrown, generate trace only for global COMDATs, because those
1107          *  are relevant to the FUNCTIONS statement in the linker .DEF file.
1108          *  This same logic should be in epilog().
1109          */
1110         if (config.flags & CFGtrace &&
1111             (!(config.flags4 & CFG4allcomdat) ||
1112              funcsym_p.Sclass == SCcomdat ||
1113              funcsym_p.Sclass == SCglobal ||
1114              (config.flags2 & CFG2comdat && SymInline(funcsym_p))
1115             )
1116            )
1117         {
1118             uint spalign = 0;
1119             int sz = cast(int)localsize;
1120             if (!enforcealign)
1121             {
1122                 version (FRAMEPTR)
1123                     sz += Para.size;
1124                 else
1125                     sz += Para.size + (needframe ? 0 : -REGSIZE);
1126             }
1127             if (STACKALIGN >= 16 && (sz & (STACKALIGN - 1)))
1128                 spalign = STACKALIGN - (sz & (STACKALIGN - 1));
1130             if (spalign)
1131             {   /* This could be avoided by moving the function call to after the
1132                  * registers are saved. But I don't remember why the call is here
1133                  * and not there.
1134                  */
1135                 cod3_stackadj(cdbx, spalign);
1136             }
1138             uint regsaved;
1139             prolog_trace(cdbx, farfunc, &regsaved);
1141             if (spalign)
1142                 cod3_stackadj(cdbx, -spalign);
1143             useregs((ALLREGS | mBP | mES) & ~regsaved);
1144         }
1145     }
1147     version (MARS)
1148     {
1149         if (usednteh & NTEHjmonitor)
1150         {   Symbol *sthis;
1152             for (SYMIDX si = 0; 1; si++)
1153             {   assert(si < globsym.length);
1154                 sthis = globsym[si];
1155                 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0)
1156                     break;
1157             }
1158             nteh_monitor_prolog(cdbx,sthis);
1159             EBPtoESP += 3 * 4;
1160         }
1161     }
1163     cdb.append(cdbx);
1164     prolog_saveregs(cdb, topush, cfa_offset);
1166 Lcont:
1168     if (config.exe == EX_WIN64)
1169     {
1170         if (variadic(funcsym_p.Stype))
1171             prolog_gen_win64_varargs(cdb);
1172         regm_t namedargs;
1173         prolog_loadparams(cdb, tyf, pushalloc, namedargs);
1174         return;
1175     }
1177     prolog_ifunc2(cdb, tyf, tym, pushds);
1179     static if (NTEXCEPTIONS == 2)
1180     {
1181         if (usednteh & NTEH_except)
1182             nteh_setsp(cdb, 0x89);            // MOV __context[EBP].esp,ESP
1183     }
1185     // Load register parameters off of the stack. Do not use
1186     // assignaddr(), as it will replace the stack reference with
1187     // the register!
1188     regm_t namedargs;
1189     prolog_loadparams(cdb, tyf, pushalloc, namedargs);
1191     if (sv64)
1192         prolog_genvarargs(cdb, sv64, namedargs);
1194     /* Alignment checks
1195      */
1196     //assert(Auto.alignment <= STACKALIGN);
1197     //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0);
1198 }
1200 /************************************
1201  * Predicate for sorting auto symbols for qsort().
1202  * Returns:
1203  *      < 0     s1 goes farther from frame pointer
1204  *      > 0     s1 goes nearer the frame pointer
1205  *      = 0     no difference
1206  */
1208 extern (C) int
1209  autosort_cmp(scope const void *ps1, scope const void *ps2)
1210 {
1211     Symbol *s1 = *cast(Symbol **)ps1;
1212     Symbol *s2 = *cast(Symbol **)ps2;
1214     /* Largest align size goes furthest away from frame pointer,
1215      * so they get allocated first.
1216      */
1217     uint alignsize1 = Symbol_Salignsize(s1);
1218     uint alignsize2 = Symbol_Salignsize(s2);
1219     if (alignsize1 < alignsize2)
1220         return 1;
1221     else if (alignsize1 > alignsize2)
1222         return -1;
1224     /* move variables nearer the frame pointer that have higher Sweights
1225      * because addressing mode is fewer bytes. Grouping together high Sweight
1226      * variables also may put them in the same cache
1227      */
1228     if (s1.Sweight < s2.Sweight)
1229         return -1;
1230     else if (s1.Sweight > s2.Sweight)
1231         return 1;
1233     /* More:
1234      * 1. put static arrays nearest the frame pointer, so buffer overflows
1235      *    can't change other variable contents
1236      * 2. Do the coloring at the byte level to minimize stack usage
1237      */
1238     return 0;
1239 }
1241 /******************************
1242  * Compute offsets for remaining tmp, automatic and register variables
1243  * that did not make it into registers.
1244  * Input:
1245  *      flags   0: do estimate only
1246  *              1: final
1247  */
1248 void stackoffsets(int flags)
1249 {
1250     //printf("stackoffsets() %s\n", funcsym_p.Sident);
1252     Para.init();        // parameter offset
1253     Fast.init();        // SCfastpar offset
1254     Auto.init();        // automatic & register offset
1255     EEStack.init();     // for SCstack's
1257     // Set if doing optimization of auto layout
1258     bool doAutoOpt = flags && config.flags4 & CFG4optimized;
1260     // Put autos in another array so we can do optimizations on the stack layout
1261     Symbol*[10] autotmp;
1262     Symbol **autos = null;
1263     if (doAutoOpt)
1264     {
1265         if (globsym.length <= autotmp.length)
1266             autos = autotmp.ptr;
1267         else
1268         {   autos = cast(Symbol **)malloc(globsym.length * (*autos).sizeof);
1269             assert(autos);
1270         }
1271     }
1272     size_t autosi = 0;  // number used in autos[]
1274     for (int si = 0; si < globsym.length; si++)
1275     {   Symbol *s = globsym[si];
1277         /* Don't allocate space for dead or zero size parameters
1278          */
1279         switch (s.Sclass)
1280         {
1281             case SCfastpar:
1282                 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh))
1283                     goto Ldefault;   // don't need consistent stack frame
1284                 break;
1286             case SCparameter:
1287                 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty)))
1288                 {
1289                     Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary
1290                     s.Soffset = Para.offset;
1291                     continue;
1292                 }
1293                 break;          // allocate even if it's dead
1295             case SCshadowreg:
1296                 break;          // allocate even if it's dead
1298             default:
1299             Ldefault:
1300                 if (Symbol_Sisdead(s, anyiasm))
1301                     continue;       // don't allocate space
1302                 break;
1303         }
1305         targ_size_t sz = type_size(s.Stype);
1306         if (sz == 0)
1307             sz++;               // can't handle 0 length structs
1309         uint alignsize = Symbol_Salignsize(s);
1310         if (alignsize > STACKALIGN)
1311             alignsize = STACKALIGN;         // no point if the stack is less aligned
1313         //printf("symbol '%s', size = x%lx, alignsize = %d, read = %x\n",s.Sident,(long)sz, (int)alignsize, s.Sflags & SFLread);
1314         assert(cast(int)sz >= 0);
1316         switch (s.Sclass)
1317         {
1318             case SCfastpar:
1319                 /* Get these
1320                  * right next to the stack frame pointer, EBP.
1321                  * Needed so we can call nested contract functions
1322                  * frequire and fensure.
1323                  */
1324                 if (s.Sfl == FLreg)        // if allocated in register
1325                     continue;
1326                 /* Needed because storing fastpar's on the stack in prolog()
1327                  * does the entire register
1328                  */
1329                 if (sz < REGSIZE)
1330                     sz = REGSIZE;
1332                 Fast.offset = _align(sz,Fast.offset);
1333                 s.Soffset = Fast.offset;
1334                 Fast.offset += sz;
1335                 //printf("fastpar '%s' sz = %d, fast offset =  x%x, %p\n",s.Sident,(int)sz,(int)s.Soffset, s);
1337                 if (alignsize > Fast.alignment)
1338                     Fast.alignment = alignsize;
1339                 break;
1341             case SCregister:
1342             case SCauto:
1343                 if (s.Sfl == FLreg)        // if allocated in register
1344                     break;
1346                 if (doAutoOpt)
1347                 {   autos[autosi++] = s;    // deal with later
1348                     break;
1349                 }
1351                 Auto.offset = _align(sz,Auto.offset);
1352                 s.Soffset = Auto.offset;
1353                 Auto.offset += sz;
1354                 //printf("auto    '%s' sz = %d, auto offset =  x%lx\n",s.Sident,sz,(long)s.Soffset);
1356                 if (alignsize > Auto.alignment)
1357                     Auto.alignment = alignsize;
1358                 break;
1360             case SCstack:
1361                 EEStack.offset = _align(sz,EEStack.offset);
1362                 s.Soffset = EEStack.offset;
1363                 //printf("EEStack.offset =  x%lx\n",(long)s.Soffset);
1364                 EEStack.offset += sz;
1365                 break;
1367             case SCshadowreg:
1368             case SCparameter:
1369                 if (config.exe == EX_WIN64)
1370                 {
1371                     assert((Para.offset & 7) == 0);
1372                     s.Soffset = Para.offset;
1373                     Para.offset += 8;
1374                     break;
1375                 }
1376                 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general,
1377                  * but are 4 byte aligned on the OSX 32 stack.
1378                  */
1379                 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */
1380                 if (alignsize >= 16 &&
1381                     (I64 || (config.exe == EX_OSX &&
1382                          (tyaggregate(s.ty()) || tyvector(s.ty())))))
1383                     Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1);
1384                 s.Soffset = Para.offset;
1385                 //printf("%s param offset =  x%lx, alignsize = %d\n",s.Sident,(long)s.Soffset, (int)alignsize);
1386                 Para.offset += (s.Sflags & SFLdouble)
1387                             ? type_size(tstypes[TYdouble])   // float passed as double
1388                             : type_size(s.Stype);
1389                 break;
1391             case SCpseudo:
1392             case SCstatic:
1393             case SCbprel:
1394                 break;
1395             default:
1396                 symbol_print(s);
1397                 assert(0);
1398         }
1399     }
1401     if (autosi)
1402     {
1403         qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp);
1405         vec_t tbl = vec_calloc(autosi);
1407         for (size_t si = 0; si < autosi; si++)
1408         {
1409             Symbol *s = autos[si];
1411             targ_size_t sz = type_size(s.Stype);
1412             if (sz == 0)
1413                 sz++;               // can't handle 0 length structs
1415             uint alignsize = Symbol_Salignsize(s);
1416             if (alignsize > STACKALIGN)
1417                 alignsize = STACKALIGN;         // no point if the stack is less aligned
1419             /* See if we can share storage with another variable
1420              * if their live ranges do not overlap.
1421              */
1422             if (// Don't share because could stomp on variables
1423                 // used in finally blocks
1424                 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) &&
1425                 s.Srange && !(s.Sflags & SFLspill))
1426             {
1427                 for (size_t i = 0; i < si; i++)
1428                 {
1429                     if (!vec_testbit(i,tbl))
1430                         continue;
1431                     Symbol *sp = autos[i];
1432 //printf("auto    s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange));
1433                     if (vec_disjoint(s.Srange,sp.Srange) &&
1434                         !(sp.Soffset & (alignsize - 1)) &&
1435                         sz <= type_size(sp.Stype))
1436                     {
1437                         vec_or(sp.Srange,sp.Srange,s.Srange);
1438                         //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident);
1439                         s.Soffset = sp.Soffset;
1440                         goto L2;
1441                     }
1442                 }
1443             }
1444             Auto.offset = _align(sz,Auto.offset);
1445             s.Soffset = Auto.offset;
1446             //printf("auto    '%s' sz = %d, auto offset =  x%lx\n",s.Sident,sz,(long)s.Soffset);
1447             Auto.offset += sz;
1448             if (s.Srange && !(s.Sflags & SFLspill))
1449                 vec_setbit(si,tbl);
1451             if (alignsize > Auto.alignment)
1452                 Auto.alignment = alignsize;
1453         L2: { }
1454         }
1456         vec_free(tbl);
1458         if (autos != autotmp.ptr)
1459             free(autos);
1460     }
1461 }
1463 /****************************
1464  * Generate code for a block.
1465  */
1467 private void blcodgen(block *bl)
1468 {
1469     regm_t mfuncregsave = mfuncreg;
1471     //dbg_printf("blcodgen(%p)\n",bl);
1473     /* Determine existing immediate values in registers by ANDing
1474         together the values from all the predecessors of b.
1475      */
1476     assert(bl.Bregcon.immed.mval == 0);
1477     regcon.immed.mval = 0;      // assume no previous contents in registers
1478 //    regcon.cse.mval = 0;
1479     foreach (bpl; ListRange(bl.Bpred))
1480     {
1481         block *bp = list_block(bpl);
1483         if (bpl == bl.Bpred)
1484         {   regcon.immed = bp.Bregcon.immed;
1485             regcon.params = bp.Bregcon.params;
1486 //          regcon.cse = bp.Bregcon.cse;
1487         }
1488         else
1489         {
1490             int i;
1492             regcon.params &= bp.Bregcon.params;
1493             if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0)
1494                 // Actual values must match, too
1495                 for (i = 0; i < REGMAX; i++)
1496                 {
1497                     if (regcon.immed.value[i] != bp.Bregcon.immed.value[i])
1498                         regcon.immed.mval &= ~mask(i);
1499                 }
1500         }
1501     }
1502     regcon.cse.mops &= regcon.cse.mval;
1504     // Set regcon.mvar according to what variables are in registers for this block
1505     CodeBuilder cdb; cdb.ctor();
1506     regcon.mvar = 0;
1507     regcon.mpvar = 0;
1508     regcon.indexregs = 1;
1509     int anyspill = 0;
1510     char *sflsave = null;
1511     if (config.flags4 & CFG4optimized)
1512     {
1513         CodeBuilder cdbload; cdbload.ctor();
1514         CodeBuilder cdbstore; cdbstore.ctor();
1516         sflsave = cast(char *) alloca(globsym.length * char.sizeof);
1517         for (SYMIDX i = 0; i < globsym.length; i++)
1518         {
1519             Symbol *s = globsym[i];
1521             sflsave[i] = s.Sfl;
1522             if (regParamInPreg(s) &&
1523                 regcon.params & s.Spregm() &&
1524                 vec_testbit(dfoidx,s.Srange))
1525             {
1526 //                regcon.used |= s.Spregm();
1527             }
1529             if (s.Sfl == FLreg)
1530             {
1531                 if (vec_testbit(dfoidx,s.Srange))
1532                 {
1533                     regcon.mvar |= s.Sregm;
1534                     if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
1535                         regcon.mpvar |= s.Sregm;
1536                 }
1537             }
1538             else if (s.Sflags & SFLspill)
1539             {
1540                 if (vec_testbit(dfoidx,s.Srange))
1541                 {
1542                     anyspill = cast(int)(i + 1);
1543                     cgreg_spillreg_prolog(bl,s,cdbstore,cdbload);
1544                     if (vec_testbit(dfoidx,s.Slvreg))
1545                     {
1546                         s.Sfl = FLreg;
1547                         regcon.mvar |= s.Sregm;
1548                         regcon.cse.mval &= ~s.Sregm;
1549                         regcon.immed.mval &= ~s.Sregm;
1550                         regcon.params &= ~s.Sregm;
1551                         if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
1552                             regcon.mpvar |= s.Sregm;
1553                     }
1554                 }
1555             }
1556         }
1557         if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
1558         {
1559             cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval);
1560         }
1561         cdb.append(cdbstore);
1562         cdb.append(cdbload);
1563         mfuncreg &= ~regcon.mvar;               // use these registers
1564         regcon.used |= regcon.mvar;
1566         // Determine if we have more than 1 uncommitted index register
1567         regcon.indexregs = IDXREGS & ~regcon.mvar;
1568         regcon.indexregs &= regcon.indexregs - 1;
1569     }
1571     /* This doesn't work when calling the BC_finally function,
1572      * as it is one block calling another.
1573      */
1574     //regsave.idx = 0;
1576     reflocal = 0;
1577     int refparamsave = refparam;
1578     refparam = 0;
1579     assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops);
1581     outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave);
1582     bl.Bcode = cdb.finish();
1584     for (int i = 0; i < anyspill; i++)
1585     {
1586         Symbol *s = globsym[i];
1587         s.Sfl = sflsave[i];    // undo block register assignments
1588     }
1590     if (reflocal)
1591         bl.Bflags |= BFLreflocal;
1592     if (refparam)
1593         bl.Bflags |= BFLrefparam;
1594     refparam |= refparamsave;
1595     bl.Bregcon.immed = regcon.immed;
1596     bl.Bregcon.cse = regcon.cse;
1597     bl.Bregcon.used = regcon.used;
1598     bl.Bregcon.params = regcon.params;
1600     debug
1601     debugw && printf("code gen complete\n");
1602 }
1604 /*****************************************
1605  * Add in exception handling code.
1606  */
1608 version (SCPP)
1609 {
1611 private void cgcod_eh()
1612 {
1613     list_t stack;
1614     int idx;
1615     int tryidx;
1617     if (!(usednteh & (EHtry | EHcleanup)))
1618         return;
1620     // Compute Bindex for each block
1621     for (block *b = startblock; b; b = b.Bnext)
1622     {
1623         b.Bindex = -1;
1624         b.Bflags &= ~BFLvisited;               /* mark as unvisited    */
1625     }
1626     block *btry = null;
1627     int lastidx = 0;
1628     startblock.Bindex = 0;
1629     for (block *b = startblock; b; b = b.Bnext)
1630     {
1631         if (btry == b.Btry && b.BC == BCcatch)  // if don't need to pop try block
1632         {
1633             block *br = list_block(b.Bpred);          // find corresponding try block
1634             assert(br.BC == BCtry);
1635             b.Bindex = br.Bindex;
1636         }
1637         else if (btry != b.Btry && b.BC != BCcatch ||
1638                  !(b.Bflags & BFLvisited))
1639             b.Bindex = lastidx;
1640         b.Bflags |= BFLvisited;
1642         debug
1643         if (debuge)
1644         {
1645             WRBC(b.BC);
1646             printf(" block (%p) Btry=%p Bindex=%d\n",b,b.Btry,b.Bindex);
1647         }
1649         except_index_set(b.Bindex);
1650         if (btry != b.Btry)                    // exited previous try block
1651         {
1652             except_pop(b,null,btry);
1653             btry = b.Btry;
1654         }
1655         if (b.BC == BCtry)
1656         {
1657             except_push(b,null,b);
1658             btry = b;
1659             tryidx = except_index_get();
1660             CodeBuilder cdb; cdb.ctor();
1661             nteh_gensindex(cdb,tryidx - 1);
1662             cdb.append(b.Bcode);
1663             b.Bcode = cdb.finish();
1664         }
1666         stack = null;
1667         for (code *c = b.Bcode; c; c = code_next(c))
1668         {
1669             if ((c.Iop & ESCAPEmask) == ESCAPE)
1670             {
1671                 code *c1 = null;
1672                 switch (c.Iop & 0xFFFF00)
1673                 {
1674                     case ESCctor:
1675                         //printf("ESCctor\n");
1676                         except_push(c,c.IEV1.Vtor,null);
1677                         goto L1;
1679                     case ESCdtor:
1680                         //printf("ESCdtor\n");
1681                         except_pop(c,c.IEV1.Vtor,null);
1682                     L1: if (config.exe == EX_WIN32)
1683                         {
1684                             CodeBuilder cdb; cdb.ctor();
1685                             nteh_gensindex(cdb,except_index_get() - 1);
1686                             c1 = cdb.finish();
1687                             c1.next = code_next(c);
1688                             c.next = c1;
1689                         }
1690                         break;
1692                     case ESCmark:
1693                         //printf("ESCmark\n");
1694                         idx = except_index_get();
1695                         list_prependdata(&stack,idx);
1696                         except_mark();
1697                         break;
1699                     case ESCrelease:
1700                         //printf("ESCrelease\n");
1701                         version (SCPP)
1702                         {
1703                             idx = list_data(stack);
1704                             list_pop(&stack);
1705                             if (idx != except_index_get())
1706                             {
1707                                 if (config.exe == EX_WIN32)
1708                                 {
1709                                     CodeBuilder cdb; cdb.ctor();
1710                                     nteh_gensindex(cdb,idx - 1);
1711                                     c1 = cdb.finish();
1712                                     c1.next = code_next(c);
1713                                     c.next = c1;
1714                                 }
1715                                 else
1716                                 {   except_pair_append(c,idx - 1);
1717                                     c.Iop = ESCAPE | ESCoffset;
1718                                 }
1719                             }
1720                             except_release();
1721                         }
1722                         break;
1724                     case ESCmark2:
1725                         //printf("ESCmark2\n");
1726                         except_mark();
1727                         break;
1729                     case ESCrelease2:
1730                         //printf("ESCrelease2\n");
1731                         version (SCPP)
1732                         {
1733                             except_release();
1734                         }
1735                         break;
1737                     default:
1738                         break;
1739                 }
1740             }
1741         }
1742         assert(stack == null);
1743         b.Bendindex = except_index_get();
1745         if (b.BC != BCret && b.BC != BCretexp)
1746             lastidx = b.Bendindex;
1748         // Set starting index for each of the successors
1749         int i = 0;
1750         foreach (bl; ListRange(b.Bsucc))
1751         {
1752             block *bs = list_block(bl);
1753             if (b.BC == BCtry)
1754             {
1755                 switch (i)
1756                 {
1757                     case 0:                             // block after catches
1758                         bs.Bindex = b.Bendindex;
1759                         break;
1761                     case 1:                             // 1st catch block
1762                         bs.Bindex = tryidx;
1763                         break;
1765                     default:                            // subsequent catch blocks
1766                         bs.Bindex = b.Bindex;
1767                         break;
1768                 }
1770                 debug
1771                 if (debuge)
1772                 {
1773                     printf(" 1setting %p to %d\n",bs,bs.Bindex);
1774                 }
1775             }
1776             else if (!(bs.Bflags & BFLvisited))
1777             {
1778                 bs.Bindex = b.Bendindex;
1780                 debug
1781                 if (debuge)
1782                 {
1783                     printf(" 2setting %p to %d\n",bs,bs.Bindex);
1784                 }
1785             }
1786             bs.Bflags |= BFLvisited;
1787             i++;
1788         }
1789     }
1791     if (config.exe == EX_WIN32)
1792         for (block *b = startblock; b; b = b.Bnext)
1793         {
1794             if (/*!b.Bcount ||*/ b.BC == BCtry)
1795                 continue;
1796             foreach (bl; ListRange(b.Bpred))
1797             {
1798                 int pi = list_block(bl).Bendindex;
1799                 if (b.Bindex != pi)
1800                 {
1801                     CodeBuilder cdb; cdb.ctor();
1802                     nteh_gensindex(cdb,b.Bindex - 1);
1803                     cdb.append(b.Bcode);
1804                     b.Bcode = cdb.finish();
1805                     break;
1806                 }
1807             }
1808         }
1809 }
1811 }
1813 /******************************
1814  * Count the number of bits set in a register mask.
1815  */
1817 int numbitsset(regm_t regm)
1818 {
1819     int n = 0;
1820     if (regm)
1821         do
1822             n++;
1823         while ((regm &= regm - 1) != 0);
1824     return n;
1825 }
1827 /******************************
1828  * Given a register mask, find and return the number
1829  * of the first register that fits.
1830  */
1832 reg_t findreg(regm_t regm)
1833 {
1834     return findreg(regm, __LINE__, __FILE__);
1835 }
1837 reg_t findreg(regm_t regm, int line, const(char)* file)
1838 {
1839     debug
1840     regm_t regmsave = regm;
1842     reg_t i = 0;
1843     while (1)
1844     {
1845         if (!(regm & 0xF))
1846         {
1847             regm >>= 4;
1848             i += 4;
1849             if (!regm)
1850                 break;
1851         }
1852         if (regm & 1)
1853             return i;
1854         regm >>= 1;
1855         i++;
1856     }
1858     debug
1859     printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr);
1860     fflush(stdout);
1862 //    *(char*)0=0;
1863     assert(0);
1864 }
1866 /***************
1867  * Free element (but not it's leaves! (assume they are already freed))
1868  * Don't decrement Ecount! This is so we can detect if the common subexp
1869  * has already been evaluated.
1870  * If common subexpression is not required anymore, eliminate
1871  * references to it.
1872  */
1874 void freenode(elem *e)
1875 {
1876     elem_debug(e);
1877     //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount);
1878     if (e.Ecomsub--) return;             /* usage count                  */
1879     if (e.Ecount)                        /* if it was a CSE              */
1880     {
1881         for (size_t i = 0; i < regcon.cse.value.length; i++)
1882         {
1883             if (regcon.cse.value[i] == e)       /* if a register is holding it  */
1884             {
1885                 regcon.cse.mval &= ~mask(cast(uint)i);
1886                 regcon.cse.mops &= ~mask(cast(uint)i);    /* free masks                   */
1887             }
1888         }
1889         CSE.remove(e);
1890     }
1891 }
1893 /*********************************
1894  * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode().
1895  */
1897 private void resetEcomsub(elem *e)
1898 {
1899     while (1)
1900     {
1901         elem_debug(e);
1902         e.Ecomsub = e.Ecount;
1903         const op = e.Eoper;
1904         if (!OTleaf(op))
1905         {
1906             if (OTbinary(op))
1907                 resetEcomsub(e.EV.E2);
1908             e = e.EV.E1;
1909         }
1910         else
1911             break;
1912     }
1913 }
1915 /*********************************
1916  * Determine if elem e is a register variable.
1917  * If so:
1918  *      *pregm = mask of registers that make up the variable
1919  *      *preg = the least significant register
1920  *      returns true
1921  * Else
1922  *      returns false
1923  */
1925 int isregvar(elem *e,regm_t *pregm,reg_t *preg)
1926 {
1927     Symbol *s;
1928     uint u;
1929     regm_t m;
1930     regm_t regm;
1931     reg_t reg;
1933     elem_debug(e);
1934     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
1935     {
1936         s = e.EV.Vsym;
1937         switch (s.Sfl)
1938         {
1939             case FLreg:
1940                 if (s.Sclass == SCparameter)
1941                 {   refparam = true;
1942                     reflocal = true;
1943                 }
1944                 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw;
1945                 regm = s.Sregm;
1946                 //assert(tyreg(s.ty()));
1947 static if (0)
1948 {
1949                 // Let's just see if there is a CSE in a reg we can use
1950                 // instead. This helps avoid AGI's.
1951                 if (e.Ecount && e.Ecount != e.Ecomsub)
1952                 {   int i;
1954                     for (i = 0; i < arraysize(regcon.cse.value); i++)
1955                     {
1956                         if (regcon.cse.value[i] == e)
1957                         {   reg = i;
1958                             break;
1959                         }
1960                     }
1961                 }
1962 }
1963                 assert(regm & regcon.mvar && !(regm & ~regcon.mvar));
1964                 goto Lreg;
1966             case FLpseudo:
1967                 version (MARS)
1968                 {
1969                     u = s.Sreglsw;
1970                     m = mask(u);
1971                     if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1972                     {
1973                         reg = u & 7;
1974                         regm = m;
1975                         goto Lreg;
1976                     }
1977                 }
1978                 else
1979                 {
1980                     u = s.Sreglsw;
1981                     m = pseudomask[u];
1982                     if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1983                     {
1984                         reg = pseudoreg[u] & 7;
1985                         regm = m;
1986                         goto Lreg;
1987                     }
1988                 }
1989                 break;
1991             default:
1992                 break;
1993         }
1994     }
1995     return false;
1997 Lreg:
1998     if (preg)
1999         *preg = reg;
2000     if (pregm)
2001         *pregm = regm;
2002     return true;
2003 }
2005 /*********************************
2006  * Allocate some registers.
2007  * Input:
2008  *      pretregs        Pointer to mask of registers to make selection from.
2009  *      tym             Mask of type we will store in registers.
2010  * Output:
2011  *      *pretregs       Mask of allocated registers.
2012  *      *preg           Register number of first allocated register.
2013  *      msavereg,mfuncreg       retregs bits are cleared.
2014  *      regcon.cse.mval,regcon.cse.mops updated
2015  * Returns:
2016  *      pointer to code generated if necessary to save any regcon.cse.mops on the
2017  *      stack.
2018  */
2020 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym)
2021 {
2022     allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__);
2023 }
2025 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym
2026         ,int line,const(char)* file)
2027 {
2028         reg_t reg;
2030 static if (0)
2031 {
2032         if (pass == PASSfinal)
2033         {
2034             printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym ",
2035                 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval),
2036                 regm_str(msavereg),regm_str(*pretregs));
2037             WRTYxx(tym);
2038             dbg_printf("\n");
2039         }
2040 }
2041         tym = tybasic(tym);
2042         uint size = _tysize[tym];
2043         *pretregs &= mES | allregs | XMMREGS;
2044         regm_t retregs = *pretregs;
2046         debug if (retregs == 0)
2047             printf("allocreg: file %s(%d)\n", file, line);
2049         if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars
2050         {
2051             if (size <= REGSIZE || (retregs & XMMREGS))
2052             {
2053                 *preg = findreg(retregs);
2054                 assert(retregs == mask(*preg)); /* no more bits are set */
2055             }
2056             else if (size <= 2 * REGSIZE)
2057             {
2058                 *preg = findregmsw(retregs);
2059                 assert(retregs & mLSW);
2060             }
2061             else
2062                 assert(0);
2063             getregs(cdb,retregs);
2064             return;
2065         }
2066         int count = 0;
2067 L1:
2068         //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
2069         assert(++count < 20);           /* fail instead of hanging if blocked */
2070         assert(retregs);
2071         reg_t msreg = NOREG, lsreg = NOREG;  /* no value assigned yet        */
2072 L3:
2073         //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
2074         regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params);
2075         if (!r)
2076         {
2077             r = retregs & ~(msavereg | regcon.cse.mval);
2078             if (!r)
2079             {
2080                 r = retregs & ~(msavereg | regcon.cse.mops);
2081                 if (!r)
2082                 {   r = retregs & ~msavereg;
2083                     if (!r)
2084                         r = retregs;
2085                 }
2086             }
2087         }
2089         if (size <= REGSIZE || retregs & XMMREGS)
2090         {
2091             if (r & ~mBP)
2092                 r &= ~mBP;
2094             // If only one index register, prefer to not use LSW registers
2095             if (!regcon.indexregs && r & ~mLSW)
2096                 r &= ~mLSW;
2098             if (pass == PASSfinal && r & ~lastretregs && !I16)
2099             {   // Try not to always allocate the same register,
2100                 // to schedule better
2102                 r &= ~lastretregs;
2103                 if (r & ~last2retregs)
2104                 {
2105                     r &= ~last2retregs;
2106                     if (r & ~last3retregs)
2107                     {
2108                         r &= ~last3retregs;
2109                         if (r & ~last4retregs)
2110                         {
2111                             r &= ~last4retregs;
2112 //                          if (r & ~last5retregs)
2113 //                              r &= ~last5retregs;
2114                         }
2115                     }
2116                 }
2117                 if (r & ~mfuncreg)
2118                     r &= ~mfuncreg;
2119             }
2120             reg = findreg(r);
2121             retregs = mask(reg);
2122         }
2123         else if (size <= 2 * REGSIZE)
2124         {
2125             /* Select pair with both regs free. Failing */
2126             /* that, select pair with one reg free.             */
2128             if (r & mBP)
2129             {
2130                 retregs &= ~mBP;
2131                 goto L3;
2132             }
2134             if (r & mMSW)
2135             {
2136                 if (r & mDX)
2137                     msreg = DX;                 /* prefer to use DX over CX */
2138                 else
2139                     msreg = findregmsw(r);
2140                 r &= mLSW;                      /* see if there's an LSW also */
2141                 if (r)
2142                     lsreg = findreg(r);
2143                 else if (lsreg == NOREG)   /* if don't have LSW yet */
2144                 {
2145                     retregs &= mLSW;
2146                     goto L3;
2147                 }
2148             }
2149             else
2150             {
2151                 if (I64 && !(r & mLSW))
2152                 {
2153                     retregs = *pretregs & (mMSW | mLSW);
2154                     assert(retregs);
2155                     goto L1;
2156                 }
2157                 lsreg = findreglsw(r);
2158                 if (msreg == NOREG)
2159                 {
2160                     retregs &= mMSW;
2161                     assert(retregs);
2162                     goto L3;
2163                 }
2164             }
2165             reg = (msreg == ES) ? lsreg : msreg;
2166             retregs = mask(msreg) | mask(lsreg);
2167         }
2168         else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
2169         {
2170             debug
2171             if (retregs != DOUBLEREGS)
2172                 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs));
2174             assert(retregs == DOUBLEREGS);
2175             reg = AX;
2176         }
2177         else
2178         {
2179             debug
2180             {
2181                 WRTYxx(tym);
2182                 printf("\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n",
2183                     file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym);
2184             }
2185             assert(0);
2186         }
2187         if (retregs & regcon.mvar)              // if conflict with reg vars
2188         {
2189             if (!(size > REGSIZE && *pretregs == (mAX | mDX)))
2190             {
2191                 retregs = (*pretregs &= ~(retregs & regcon.mvar));
2192                 goto L1;                // try other registers
2193             }
2194         }
2195         *preg = reg;
2196         *pretregs = retregs;
2198         //printf("Allocating %s\n",regm_str(retregs));
2199         last5retregs = last4retregs;
2200         last4retregs = last3retregs;
2201         last3retregs = last2retregs;
2202         last2retregs = lastretregs;
2203         lastretregs = retregs;
2204         getregs(cdb, retregs);
2205 }
2208 /*****************************************
2209  * Allocate a scratch register.
2210  * Params:
2211  *      cdb = where to write any generated code to
2212  *      regm = mask of registers to pick one from
2213  * Returns:
2214  *      selected register
2215  */
2216 reg_t allocScratchReg(ref CodeBuilder cdb, regm_t regm)
2217 {
2218     reg_t r;
2219     allocreg(cdb, &regm, &r, TYoffset);
2220     return r;
2221 }
2224 /******************************
2225  * Determine registers that should be destroyed upon arrival
2226  * to code entry point for exception handling.
2227  */
2228 regm_t lpadregs()
2229 {
2230     regm_t used;
2231     if (config.ehmethod == EHmethod.EH_DWARF)
2232         used = allregs & ~mfuncreg;
2233     else
2234         used = (I32 | I64) ? allregs : (ALLREGS | mES);
2235     //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg));
2236     return used;
2237 }
2240 /*************************
2241  * Mark registers as used.
2242  */
2244 void useregs(regm_t regm)
2245 {
2246     //printf("useregs(x%x) %s\n", regm, regm_str(regm));
2247     mfuncreg &= ~regm;
2248     regcon.used |= regm;                // registers used in this block
2249     regcon.params &= ~regm;
2250     if (regm & regcon.mpvar)            // if modified a fastpar register variable
2251         regcon.params = 0;              // toss them all out
2252 }
2254 /*************************
2255  * We are going to use the registers in mask r.
2256  * Generate any code necessary to save any regs.
2257  */
2259 void getregs(ref CodeBuilder cdb, regm_t r)
2260 {
2261     //printf("getregs(x%x) %s\n", r, regm_str(r));
2262     regm_t ms = r & regcon.cse.mops;           // mask of common subs we must save
2263     useregs(r);
2264     regcon.cse.mval &= ~r;
2265     msavereg &= ~r;                     // regs that are destroyed
2266     regcon.immed.mval &= ~r;
2267     if (ms)
2268         cse_save(cdb, ms);
2269 }
2271 /*************************
2272  * We are going to use the registers in mask r.
2273  * Same as getregs(), but assert if code is needed to be generated.
2274  */
2275 void getregsNoSave(regm_t r)
2276 {
2277     //printf("getregsNoSave(x%x) %s\n", r, regm_str(r));
2278     assert(!(r & regcon.cse.mops));            // mask of common subs we must save
2279     useregs(r);
2280     regcon.cse.mval &= ~r;
2281     msavereg &= ~r;                     // regs that are destroyed
2282     regcon.immed.mval &= ~r;
2283 }
2285 /*****************************************
2286  * Copy registers in cse.mops into memory.
2287  */
2289 private void cse_save(ref CodeBuilder cdb, regm_t ms)
2290 {
2291     assert((ms & regcon.cse.mops) == ms);
2292     regcon.cse.mops &= ~ms;
2294     /* Skip CSEs that are already saved */
2295     for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1)
2296     {
2297         if (regm & ms)
2298         {
2299             const e = regcon.cse.value[findreg(regm)];
2300             const sz = tysize(e.Ety);
2301             foreach (const ref cse; CSE.filter(e))
2302             {
2303                 if (sz <= REGSIZE ||
2304                     sz <= 2 * REGSIZE &&
2305                         (regm & mMSW && cse.regm & mMSW ||
2306                          regm & mLSW && cse.regm & mLSW) ||
2307                     sz == 4 * REGSIZE && regm == cse.regm
2308                    )
2309                 {
2310                     ms &= ~regm;
2311                     if (!ms)
2312                         return;
2313                     break;
2314                 }
2315             }
2316         }
2317     }
2319     while (ms)
2320     {
2321         auto cse = CSE.add();
2322         reg_t reg = findreg(ms);          /* the register to save         */
2323         cse.e = regcon.cse.value[reg];
2324         cse.regm = mask(reg);
2326         ms &= ~mask(reg);           /* turn off reg bit in ms       */
2328         // If we can simply reload the CSE, we don't need to save it
2329         if (cse_simple(&cse.csimple, cse.e))
2330             cse.flags |= CSEsimple;
2331         else
2332         {
2333             CSE.updateSizeAndAlign(cse.e);
2334             gen_storecse(cdb, cse.e.Ety, reg, cse.slot);
2335             reflocal = true;
2336         }
2337     }
2338 }
2340 /******************************************
2341  * Getregs without marking immediate register values as gone.
2342  */
2344 void getregs_imm(ref CodeBuilder cdb, regm_t r)
2345 {
2346     regm_t save = regcon.immed.mval;
2347     getregs(cdb,r);
2348     regcon.immed.mval = save;
2349 }
2351 /******************************************
2352  * Flush all CSE's out of registers and into memory.
2353  * Input:
2354  *      do87    !=0 means save 87 registers too
2355  */
2357 void cse_flush(ref CodeBuilder cdb, int do87)
2358 {
2359     //dbg_printf("cse_flush()\n");
2360     cse_save(cdb,regcon.cse.mops);      // save any CSEs to memory
2361     if (do87)
2362         save87(cdb);    // save any 8087 temporaries
2363 }
2365 /*************************
2366  * Common subexpressions exist in registers. Note this in regcon.cse.mval.
2367  * Input:
2368  *      e       the subexpression
2369  *      regm    mask of registers holding it
2370  *      opsflag if != 0 then regcon.cse.mops gets set too
2371  * Returns:
2372  *      false   not saved as a CSE
2373  *      true    saved as a CSE
2374  */
2376 bool cssave(elem *e,regm_t regm,uint opsflag)
2377 {
2378     bool result = false;
2380     /*if (e.Ecount && e.Ecount == e.Ecomsub)*/
2381     if (e.Ecount && e.Ecomsub)
2382     {
2383         if (!opsflag && pass != PASSfinal && (I32 || I64))
2384             return false;
2386         //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag);
2387         regm &= mBP | ALLREGS | mES | XMMREGS;    /* just to be sure              */
2389 /+
2390         /* Do not register CSEs if they are register variables and      */
2391         /* are not operator nodes. This forces the register allocation  */
2392         /* to go through allocreg(), which will prevent using register  */
2393         /* variables for scratch.                                       */
2394         if (opsflag || !(regm & regcon.mvar))
2395 +/
2396             for (uint i = 0; regm; i++)
2397             {
2398                 regm_t mi = mask(i);
2399                 if (regm & mi)
2400                 {
2401                     regm &= ~mi;
2403                     // If we don't need this CSE, and the register already
2404                     // holds a CSE that we do need, don't mark the new one
2405                     if (regcon.cse.mval & mi && regcon.cse.value[i] != e &&
2406                         !opsflag && regcon.cse.mops & mi)
2407                         continue;
2409                     regcon.cse.mval |= mi;
2410                     if (opsflag)
2411                         regcon.cse.mops |= mi;
2412                     //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e);
2413                     regcon.cse.value[i] = e;
2414                     result = true;
2415                 }
2416             }
2417     }
2418     return result;
2419 }
2421 /*************************************
2422  * Determine if a computation should be done into a register.
2423  */
2425 bool evalinregister(elem *e)
2426 {
2427     if (config.exe == EX_WIN64 && e.Eoper == OPrelconst)
2428         return true;
2430     if (e.Ecount == 0)             /* elem is not a CSE, therefore */
2431                                     /* we don't need to evaluate it */
2432                                     /* in a register                */
2433         return false;
2434     if (!OTleaf(e.Eoper))          /* operators are always in register */
2435         return true;
2437     // Need to rethink this code if float or double can be CSE'd
2438     uint sz = tysize(e.Ety);
2439     if (e.Ecount == e.Ecomsub)    /* elem is a CSE that needs     */
2440                                     /* to be generated              */
2441     {
2442         if ((I32 || I64) &&
2443             //pass == PASSfinal && // bug 8987
2444             sz <= REGSIZE)
2445         {
2446             // Do it only if at least 2 registers are available
2447             regm_t m = allregs & ~regcon.mvar;
2448             if (sz == 1)
2449                 m &= BYTEREGS;
2450             if (m & (m - 1))        // if more than one register
2451             {   // Need to be at least 3 registers available, as
2452                 // addressing modes can use up 2.
2453                 while (!(m & 1))
2454                     m >>= 1;
2455                 m >>= 1;
2456                 if (m & (m - 1))
2457                     return true;
2458             }
2459         }
2460         return false;
2461     }
2463     /* Elem is now a CSE that might have been generated. If so, and */
2464     /* it's in a register already, the computation should be done   */
2465     /* using that register.                                         */
2466     regm_t emask = 0;
2467     for (uint i = 0; i < regcon.cse.value.length; i++)
2468         if (regcon.cse.value[i] == e)
2469             emask |= mask(i);
2470     emask &= regcon.cse.mval;       // mask of available CSEs
2471     if (sz <= REGSIZE)
2472         return emask != 0;      /* the CSE is in a register     */
2473     else if (sz <= 2 * REGSIZE)
2474         return (emask & mMSW) && (emask & mLSW);
2475     return true;                    /* cop-out for now              */
2476 }
2478 /*******************************************************
2479  * Return mask of scratch registers.
2480  */
2482 regm_t getscratch()
2483 {
2484     regm_t scratch = 0;
2485     if (pass == PASSfinal)
2486     {
2487         scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval |
2488                   regcon.immed.mval | regcon.params | mfuncreg);
2489     }
2490     return scratch;
2491 }
2493 /******************************
2494  * Evaluate an elem that is a common subexp that has been encountered
2495  * before.
2496  * Look first to see if it is already in a register.
2497  */
2499 private void comsub(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2500 {
2501     tym_t tym;
2502     regm_t regm,emask;
2503     reg_t reg;
2504     uint byte_,sz;
2506     //printf("comsub(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2507     elem_debug(e);
2509     debug
2510     {
2511         if (e.Ecomsub > e.Ecount)
2512             elem_print(e);
2513     }
2515     assert(e.Ecomsub <= e.Ecount);
2517     if (*pretregs == 0)        // no possible side effects anyway
2518     {
2519         return;
2520     }
2522     /* First construct a mask, emask, of all the registers that
2523      * have the right contents.
2524      */
2525     emask = 0;
2526     for (uint i = 0; i < regcon.cse.value.length; i++)
2527     {
2528         //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]);
2529         if (regcon.cse.value[i] == e)   // if contents are right
2530                 emask |= mask(i);       // turn on bit for reg
2531     }
2532     emask &= regcon.cse.mval;                     // make sure all bits are valid
2534     if (emask & XMMREGS && *pretregs == mPSW)
2535         { }
2536     else if (tyxmmreg(e.Ety) && config.fpxmmregs)
2537     {
2538         if (*pretregs & (mST0 | mST01))
2539         {
2540             regm_t retregs = *pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1;
2541             comsub(cdb, e, &retregs);
2542             fixresult(cdb,e,retregs,pretregs);
2543             return;
2544         }
2545     }
2546     else if (tyfloating(e.Ety) && config.inline8087)
2547     {
2548         comsub87(cdb,e,pretregs);
2549         return;
2550     }
2553     /* create mask of CSEs */
2554     regm_t csemask = CSE.mask(e);
2555     csemask &= ~emask;            // stuff already in registers
2557     debug if (debugw)
2558     {
2559         printf("comsub(e=%p): *pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n",
2560                 e,regm_str(*pretregs),regm_str(emask),regm_str(csemask),
2561                 regm_str(regcon.cse.mval),regm_str(regcon.mvar));
2562         if (regcon.cse.mval & 1)
2563             elem_print(regcon.cse.value[0]);
2564     }
2566     tym = tybasic(e.Ety);
2567     sz = _tysize[tym];
2568     byte_ = sz == 1;
2570     if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register
2571     {
2572         /* First see if it is already in a correct register     */
2574         regm = emask & *pretregs;
2575         if (regm == 0)
2576             regm = emask;               /* try any other register       */
2577         if (regm)                       /* if it's in a register        */
2578         {
2579             if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (*pretregs & regcon.mvar) == *pretregs)
2580             {
2581                 regm = mask(findreg(regm));
2582                 fixresult(cdb,e,regm,pretregs);
2583                 return;
2584             }
2585         }
2587         if (OTleaf(e.Eoper))                  /* if not op or func            */
2588             goto reload;                      /* reload data                  */
2590         foreach (ref cse; CSE.filter(e))
2591         {
2592             regm_t retregs;
2594             if (cse.flags & CSEsimple)
2595             {
2596                 retregs = *pretregs;
2597                 if (byte_ && !(retregs & BYTEREGS))
2598                     retregs = BYTEREGS;
2599                 else if (!(retregs & allregs))
2600                     retregs = allregs;
2601                 allocreg(cdb,&retregs,&reg,tym);
2602                 code *cr = &cse.csimple;
2603                 cr.setReg(reg);
2604                 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1)
2605                     cr.Irex |= REX;
2606                 cdb.gen(cr);
2607                 goto L10;
2608             }
2609             else
2610             {
2611                 reflocal = true;
2612                 cse.flags |= CSEload;
2613                 if (*pretregs == mPSW)  // if result in CCs only
2614                 {
2615                     if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety)))
2616                     {
2617                         retregs = XMMREGS;
2618                         allocreg(cdb,&retregs,&reg,tym);
2619                         gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2620                         regcon.cse.mval |= mask(reg); // cs is in a reg
2621                         regcon.cse.value[reg] = e;
2622                         fixresult(cdb,e,retregs,pretregs);
2623                     }
2624                     else
2625                     {
2626                         // CMP cs[BP],0
2627                         gen_testcse(cdb, cse.e.Ety, sz, cse.slot);
2628                     }
2629                 }
2630                 else
2631                 {
2632                     retregs = *pretregs;
2633                     if (byte_ && !(retregs & BYTEREGS))
2634                         retregs = BYTEREGS;
2635                     allocreg(cdb,&retregs,&reg,tym);
2636                     gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2637                 L10:
2638                     regcon.cse.mval |= mask(reg); // cs is in a reg
2639                     regcon.cse.value[reg] = e;
2640                     fixresult(cdb,e,retregs,pretregs);
2641                 }
2642             }
2643             return;
2644         }
2646         debug
2647         {
2648             printf("couldn't find cse e = %p, pass = %d\n",e,pass);
2649             elem_print(e);
2650         }
2651         assert(0);                      /* should have found it         */
2652     }
2653     else                                  /* reg pair is req'd            */
2654     if (sz <= 2 * REGSIZE)
2655     {
2656         reg_t msreg,lsreg;
2658         /* see if we have both  */
2659         if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP)))
2660         {                               /* we don't have both           */
2661             debug if (!OTleaf(e.Eoper))
2662             {
2663                 printf("e = %p, op = x%x, emask = %s, csemask = %s\n",
2664                     e,e.Eoper,regm_str(emask),regm_str(csemask));
2665                 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW);
2666                 elem_print(e);
2667             }
2669             assert(OTleaf(e.Eoper));        /* must have both for operators */
2670             goto reload;
2671         }
2673         /* Look for right vals in any regs      */
2674         regm = *pretregs & mMSW;
2675         if (emask & regm)
2676             msreg = findreg(emask & regm);
2677         else if (emask & mMSW)
2678             msreg = findregmsw(emask);
2679         else                    /* reload from cse array        */
2680         {
2681             if (!regm)
2682                 regm = mMSW & ALLREGS;
2683             allocreg(cdb,&regm,&msreg,TYint);
2684             loadcse(cdb,e,msreg,mMSW);
2685         }
2687         regm = *pretregs & (mLSW | mBP);
2688         if (emask & regm)
2689             lsreg = findreg(emask & regm);
2690         else if (emask & (mLSW | mBP))
2691             lsreg = findreglsw(emask);
2692         else
2693         {
2694             if (!regm)
2695                 regm = mLSW;
2696             allocreg(cdb,&regm,&lsreg,TYint);
2697             loadcse(cdb,e,lsreg,mLSW | mBP);
2698         }
2700         regm = mask(msreg) | mask(lsreg);       /* mask of result       */
2701         fixresult(cdb,e,regm,pretregs);
2702         return;
2703     }
2704     else if (tym == TYdouble || tym == TYdouble_alias)    // double
2705     {
2706         assert(I16);
2707         if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16)
2708         {
2709             static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d
2710             for (reg = 0; reg != NOREG; reg = dblreg[reg])
2711             {
2712                 assert(cast(int) reg >= 0 && reg <= 7);
2713                 if (mask(reg) & csemask)
2714                     loadcse(cdb,e,reg,mask(reg));
2715             }
2716             regm = DOUBLEREGS_16;
2717             fixresult(cdb,e,regm,pretregs);
2718             return;
2719         }
2720         if (OTleaf(e.Eoper)) goto reload;
2722         debug
2723         printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask));
2725         assert(0);
2726     }
2727     else
2728     {
2729         debug
2730         printf("e = %p, tym = x%x\n",e,tym);
2732         assert(0);
2733     }
2735 reload:                                 /* reload result from memory    */
2736     switch (e.Eoper)
2737     {
2738         case OPrelconst:
2739             cdrelconst(cdb,e,pretregs);
2740             break;
2743 {
2744         case OPgot:
2745             cdgot(cdb,e,pretregs);
2746             break;
2747 }
2748         default:
2749             if (*pretregs == mPSW &&
2750                 config.fpxmmregs &&
2751                 (tyxmmreg(tym) || tysimd(tym)))
2752             {
2753                 regm_t retregs = XMMREGS | mPSW;
2754                 loaddata(cdb,e,&retregs);
2755                 cssave(e,retregs,false);
2756                 return;
2757             }
2758             loaddata(cdb,e,pretregs);
2759             break;
2760     }
2761     cssave(e,*pretregs,false);
2762 }
2765 /*****************************
2766  * Load reg from cse save area on stack.
2767  */
2769 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm)
2770 {
2771     foreach (ref cse; CSE.filter(e))
2772     {
2773         //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm));
2774         if (cse.regm & regm)
2775         {
2776             reflocal = true;
2777             cse.flags |= CSEload;    /* it was loaded        */
2778             regcon.cse.value[reg] = e;
2779             regcon.cse.mval |= mask(reg);
2780             getregs(cdb,mask(reg));
2781             gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2782             return;
2783         }
2784     }
2785     debug
2786     {
2787         printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm));
2788         elem_print(e);
2789     }
2790     assert(0);
2791 }
2793 /***************************
2794  * Generate code sequence for an elem.
2795  * Input:
2796  *      pretregs =      mask of possible registers to return result in
2797  *                      Note:   longs are in AX,BX or CX,DX or SI,DI
2798  *                              doubles are AX,BX,CX,DX only
2799  *      constflag =     1 for user of result will not modify the
2800  *                      registers returned in *pretregs.
2801  *                      2 for freenode() not called.
2802  * Output:
2803  *      *pretregs       mask of registers result is returned in
2804  * Returns:
2805  *      pointer to code sequence generated
2806  */
2808 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op)
2809 {
2810     (*cdxxx[op])(cdb,e,pretregs);
2811 }
2813 // jump table
2814 private extern (C++) __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx =
2815 [
2816     OPunde:    &cderr,
2817     OPadd:     &cdorth,
2818     OPmul:     &cdmul,
2819     OPand:     &cdorth,
2820     OPmin:     &cdorth,
2821     OPnot:     &cdnot,
2822     OPcom:     &cdcom,
2823     OPcond:    &cdcond,
2824     OPcomma:   &cdcomma,
2825     OPremquo:  &cddiv,
2826     OPdiv:     &cddiv,
2827     OPmod:     &cddiv,
2828     OPxor:     &cdorth,
2829     OPstring:  &cderr,
2830     OPrelconst: &cdrelconst,
2831     OPinp:     &cdport,
2832     OPoutp:    &cdport,
2833     OPasm:     &cdasm,
2834     OPinfo:    &cdinfo,
2835     OPdctor:   &cddctor,
2836     OPddtor:   &cdddtor,
2837     OPctor:    &cdctor,
2838     OPdtor:    &cddtor,
2839     OPmark:    &cdmark,
2840     OPvoid:    &cdvoid,
2841     OPhalt:    &cdhalt,
2842     OPnullptr: &cderr,
2843     OPpair:    &cdpair,
2844     OPrpair:   &cdpair,
2846     OPor:      &cdorth,
2847     OPoror:    &cdloglog,
2848     OPandand:  &cdloglog,
2849     OProl:     &cdshift,
2850     OPror:     &cdshift,
2851     OPshl:     &cdshift,
2852     OPshr:     &cdshift,
2853     OPashr:    &cdshift,
2854     OPbit:     &cderr,
2855     OPind:     &cdind,
2856     OPaddr:    &cderr,
2857     OPneg:     &cdneg,
2858     OPuadd:    &cderr,
2859     OPabs:     &cdabs,
2860     OPtoprec:  &cdtoprec,
2861     OPsqrt:    &cdneg,
2862     OPsin:     &cdneg,
2863     OPcos:     &cdneg,
2864     OPscale:   &cdscale,
2865     OPyl2x:    &cdscale,
2866     OPyl2xp1:  &cdscale,
2867     OPcmpxchg:     &cdcmpxchg,
2868     OPrint:    &cdneg,
2869     OPrndtol:  &cdrndtol,
2870     OPstrlen:  &cdstrlen,
2871     OPstrcpy:  &cdstrcpy,
2872     OPmemcpy:  &cdmemcpy,
2873     OPmemset:  &cdmemset,
2874     OPstrcat:  &cderr,
2875     OPstrcmp:  &cdstrcmp,
2876     OPmemcmp:  &cdmemcmp,
2877     OPsetjmp:  &cdsetjmp,
2878     OPnegass:  &cdaddass,
2879     OPpreinc:  &cderr,
2880     OPpredec:  &cderr,
2881     OPstreq:   &cdstreq,
2882     OPpostinc: &cdpost,
2883     OPpostdec: &cdpost,
2884     OPeq:      &cdeq,
2885     OPaddass:  &cdaddass,
2886     OPminass:  &cdaddass,
2887     OPmulass:  &cdmulass,
2888     OPdivass:  &cddivass,
2889     OPmodass:  &cddivass,
2890     OPshrass:  &cdshass,
2891     OPashrass: &cdshass,
2892     OPshlass:  &cdshass,
2893     OPandass:  &cdaddass,
2894     OPxorass:  &cdaddass,
2895     OPorass:   &cdaddass,
2897     OPle:      &cdcmp,
2898     OPgt:      &cdcmp,
2899     OPlt:      &cdcmp,
2900     OPge:      &cdcmp,
2901     OPeqeq:    &cdcmp,
2902     OPne:      &cdcmp,
2904     OPunord:   &cdcmp,
2905     OPlg:      &cdcmp,
2906     OPleg:     &cdcmp,
2907     OPule:     &cdcmp,
2908     OPul:      &cdcmp,
2909     OPuge:     &cdcmp,
2910     OPug:      &cdcmp,
2911     OPue:      &cdcmp,
2912     OPngt:     &cdcmp,
2913     OPnge:     &cdcmp,
2914     OPnlt:     &cdcmp,
2915     OPnle:     &cdcmp,
2916     OPord:     &cdcmp,
2917     OPnlg:     &cdcmp,
2918     OPnleg:    &cdcmp,
2919     OPnule:    &cdcmp,
2920     OPnul:     &cdcmp,
2921     OPnuge:    &cdcmp,
2922     OPnug:     &cdcmp,
2923     OPnue:     &cdcmp,
2925     OPvp_fp:   &cdcnvt,
2926     OPcvp_fp:  &cdcnvt,
2927     OPoffset:  &cdlngsht,
2928     OPnp_fp:   &cdshtlng,
2929     OPnp_f16p: &cdfar16,
2930     OPf16p_np: &cdfar16,
2932     OPs16_32:  &cdshtlng,
2933     OPu16_32:  &cdshtlng,
2934     OPd_s32:   &cdcnvt,
2935     OPb_8:     &cdcnvt,
2936     OPs32_d:   &cdcnvt,
2937     OPd_s16:   &cdcnvt,
2938     OPs16_d:   &cdcnvt,
2939     OPd_u16:   &cdcnvt,
2940     OPu16_d:   &cdcnvt,
2941     OPd_u32:   &cdcnvt,
2942     OPu32_d:   &cdcnvt,
2943     OP32_16:   &cdlngsht,
2944     OPd_f:     &cdcnvt,
2945     OPf_d:     &cdcnvt,
2946     OPd_ld:    &cdcnvt,
2947     OPld_d:    &cdcnvt,
2948     OPc_r:     &cdconvt87,
2949     OPc_i:     &cdconvt87,
2950     OPu8_16:   &cdbyteint,
2951     OPs8_16:   &cdbyteint,
2952     OP16_8:    &cdlngsht,
2953     OPu32_64:  &cdshtlng,
2954     OPs32_64:  &cdshtlng,
2955     OP64_32:   &cdlngsht,
2956     OPu64_128: &cdshtlng,
2957     OPs64_128: &cdshtlng,
2958     OP128_64:  &cdlngsht,
2959     OPmsw:     &cdmsw,
2961     OPd_s64:   &cdcnvt,
2962     OPs64_d:   &cdcnvt,
2963     OPd_u64:   &cdcnvt,
2964     OPu64_d:   &cdcnvt,
2965     OPld_u64:  &cdcnvt,
2966     OPparam:   &cderr,
2967     OPsizeof:  &cderr,
2968     OParrow:   &cderr,
2969     OParrowstar: &cderr,
2970     OPcolon:   &cderr,
2971     OPcolon2:  &cderr,
2972     OPbool:    &cdnot,
2973     OPcall:    &cdfunc,
2974     OPucall:   &cdfunc,
2975     OPcallns:  &cdfunc,
2976     OPucallns: &cdfunc,
2977     OPstrpar:  &cderr,
2978     OPstrctor: &cderr,
2979     OPstrthis: &cdstrthis,
2980     OPconst:   &cderr,
2981     OPvar:     &cderr,
2982     OPnew:     &cderr,
2983     OPanew:    &cderr,
2984     OPdelete:  &cderr,
2985     OPadelete: &cderr,
2986     OPbrack:   &cderr,
2987     OPframeptr: &cdframeptr,
2988     OPgot:     &cdgot,
2990     OPbsf:     &cdbscan,
2991     OPbsr:     &cdbscan,
2992     OPbtst:    &cdbtst,
2993     OPbt:      &cdbt,
2994     OPbtc:     &cdbt,
2995     OPbtr:     &cdbt,
2996     OPbts:     &cdbt,
2998     OPbswap:   &cdbswap,
2999     OPpopcnt:  &cdpopcnt,
3000     OPvector:  &cdvector,
3001     OPvecsto:  &cdvecsto,
3002     OPvecfill: &cdvecfill,
3003     OPva_start: &cderr,
3004     OPprefetch: &cdprefetch,
3005 ];
3008 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag)
3009 {
3010     Symbol *s;
3012     debug if (debugw)
3013     {
3014         printf("+codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs));
3015         WROP(e.Eoper);
3016         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3017                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3018         printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
3019     }
3021     assert(e);
3022     elem_debug(e);
3023     if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
3024     {
3025         debug
3026         {
3027             printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs));
3028             elem_print(e);
3029             printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3030                     regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3031             printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
3032         }
3033         assert(0);
3034     }
3036     if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar)
3037         *pretregs &= ~regcon.mvar;                      /* can't use register vars */
3039     uint op = e.Eoper;
3040     if (e.Ecount && e.Ecount != e.Ecomsub)     // if common subexp
3041     {
3042         comsub(cdb,e,pretregs);
3043         goto L1;
3044     }
3046     if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3047         cdb.genlinnum(e.Esrcpos);
3049     switch (op)
3050     {
3051         default:
3052             if (e.Ecount)                          /* if common subexp     */
3053             {
3054                 /* if no return value       */
3055                 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0)
3056                 {
3057                     if (*pretregs & (mST0 | mST01))
3058                     {
3059                         //printf("generate ST0 comsub for:\n");
3060                         //elem_print(e);
3062                         regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1;
3063                         (*cdxxx[op])(cdb,e,&retregs);
3064                         cssave(e,retregs,!OTleaf(op));
3065                         fixresult(cdb, e, retregs, pretregs);
3066                         goto L1;
3067                     }
3068                     if (tysize(e.Ety) == 1)
3069                         *pretregs |= BYTEREGS;
3070                     else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs)
3071                         *pretregs |= XMMREGS;
3072                     else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias)
3073                         *pretregs |= DOUBLEREGS;
3074                     else
3075                         *pretregs |= ALLREGS;       /* make one             */
3076                 }
3078                 /* BUG: For CSEs, make sure we have both an MSW             */
3079                 /* and an LSW specified in *pretregs                        */
3080             }
3081             assert(op <= OPMAX);
3082             (*cdxxx[op])(cdb,e,pretregs);
3083             break;
3085         case OPrelconst:
3086             cdrelconst(cdb,e,pretregs);
3087             break;
3089         case OPvar:
3090             if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg &&
3091                 (s.Sregm & *pretregs) == s.Sregm)
3092             {
3093                 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE)
3094                     *pretregs &= mPSW | (s.Sregm & mLSW);
3095                 else
3096                     *pretregs &= mPSW | s.Sregm;
3097             }
3098             goto case OPconst;
3100         case OPconst:
3101             if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile))
3102             {
3103                 switch (tybasic(e.Ety))
3104                 {
3105                     case TYbool:
3106                     case TYchar:
3107                     case TYschar:
3108                     case TYuchar:
3109                         *pretregs |= BYTEREGS;
3110                         break;
3112                     case TYnref:
3113                     case TYnptr:
3114                     case TYsptr:
3115                     case TYcptr:
3116                     case TYfgPtr:
3117                     case TYimmutPtr:
3118                     case TYsharePtr:
3119                     case TYrestrictPtr:
3120                         *pretregs |= I16 ? IDXREGS : ALLREGS;
3121                         break;
3123                     case TYshort:
3124                     case TYushort:
3125                     case TYint:
3126                     case TYuint:
3127                     case TYlong:
3128                     case TYulong:
3129                     case TYllong:
3130                     case TYullong:
3131                     case TYcent:
3132                     case TYucent:
3133                     case TYfptr:
3134                     case TYhptr:
3135                     case TYvptr:
3136                         *pretregs |= ALLREGS;
3137                         break;
3139                     default:
3140                         break;
3141                 }
3142             }
3143             loaddata(cdb,e,pretregs);
3144             break;
3145     }
3146     cssave(e,*pretregs,!OTleaf(op));
3147 L1:
3148     if (!(constflag & 2))
3149         freenode(e);
3151     debug if (debugw)
3152     {
3153         printf("-codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs));
3154         WROP(op);
3155         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3156                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3157     }
3158 }
3160 /*******************************
3161  * Same as codelem(), but do not destroy the registers in keepmsk.
3162  * Use scratch registers as much as possible, then use stack.
3163  * Input:
3164  *      constflag       true if user of result will not modify the
3165  *                      registers returned in *pretregs.
3166  */
3168 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag)
3169 {
3170     regm_t touse;
3172     debug if (debugw)
3173         printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3174                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3176     elem_debug(e);
3177     if (constflag)
3178     {
3179         regm_t regm;
3180         reg_t reg;
3182         if (isregvar(e,&regm,&reg) &&           // if e is a register variable
3183             (regm & *pretregs) == regm &&       // in one of the right regs
3184             e.EV.Voffset == 0
3185            )
3186         {
3187             uint sz1 = tysize(e.Ety);
3188             uint sz2 = tysize(e.EV.Vsym.Stype.Tty);
3189             if (sz1 <= REGSIZE && sz2 > REGSIZE)
3190                 regm &= mLSW | XMMREGS;
3191             fixresult(cdb,e,regm,pretregs);
3192             cssave(e,regm,0);
3193             freenode(e);
3195             debug if (debugw)
3196                 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3197                         e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3199             return;
3200         }
3201     }
3202     regm_t overlap = msavereg & keepmsk;
3203     msavereg |= keepmsk;          /* add to mask of regs to save          */
3204     regm_t oldregcon = regcon.cse.mval;
3205     regm_t oldregimmed = regcon.immed.mval;
3206     regm_t oldmfuncreg = mfuncreg;       /* remember old one                     */
3207     mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar;
3208     uint stackpushsave = stackpush;
3209     char calledafuncsave = calledafunc;
3210     calledafunc = 0;
3211     CodeBuilder cdbx; cdbx.ctor();
3212     codelem(cdbx,e,pretregs,constflag);    // generate code for the elem
3214     regm_t tosave = keepmsk & ~msavereg; /* registers to save                    */
3215     if (tosave)
3216     {
3217         cgstate.stackclean++;
3218         genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg);
3219         cgstate.stackclean--;
3220     }
3222     /* Assert that no new CSEs are generated that are not reflected       */
3223     /* in mfuncreg.                                                       */
3224     debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0)
3225         printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n",
3226                 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar));
3228     assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0);
3230     /* bugzilla 3521
3231      * The problem is:
3232      *    reg op (reg = exp)
3233      * where reg must be preserved (in keepregs) while the expression to be evaluated
3234      * must change it.
3235      * The only solution is to make this variable not a register.
3236      */
3237     if (regcon.mvar & tosave)
3238     {
3239         //elem_print(e);
3240         //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave));
3241         cgreg_unregister(regcon.mvar & tosave);
3242     }
3244     /* which registers can we use to save other registers in? */
3245     if (config.flags4 & CFG4space ||              // if optimize for space
3246         config.target_cpu >= TARGET_80486)        // PUSH/POP ops are 1 cycle
3247         touse = 0;                              // PUSH/POP pairs are always shorter
3248     else
3249     {
3250         touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval);
3251         /* Don't use registers we'll have to save/restore               */
3252         touse &= ~(fregsaved & oldmfuncreg);
3253         /* Don't use registers that have constant values in them, since
3254            the code generated might have used the value.
3255          */
3256         touse &= ~oldregimmed;
3257     }
3259     CodeBuilder cdbs1; cdbs1.ctor();
3260     code *cs2 = null;
3261     int adjesp = 0;
3263     for (uint i = 0; tosave; i++)
3264     {
3265         regm_t mi = mask(i);
3267         assert(i < REGMAX);
3268         if (mi & tosave)        /* i = register to save                 */
3269         {
3270             if (touse)          /* if any scratch registers             */
3271             {
3272                 uint j;
3273                 for (j = 0; j < 8; j++)
3274                 {
3275                     regm_t mj = mask(j);
3277                     if (touse & mj)
3278                     {
3279                         genmovreg(cdbs1,j,i);
3280                         cs2 = cat(genmovreg(i,j),cs2);
3281                         touse &= ~mj;
3282                         mfuncreg &= ~mj;
3283                         regcon.used |= mj;
3284                         break;
3285                     }
3286                 }
3287                 assert(j < 8);
3288             }
3289             else                        // else use memory
3290             {
3291                 CodeBuilder cdby; cdby.ctor();
3292                 uint size = gensaverestore(mask(i), cdbs1, cdby);
3293                 cs2 = cat(cdby.finish(),cs2);
3294                 if (size)
3295                 {
3296                     stackchanged = 1;
3297                     adjesp += size;
3298                 }
3299             }
3300             getregs(cdbx,mi);
3301             tosave &= ~mi;
3302         }
3303     }
3304     CodeBuilder cdbs2; cdbs2.ctor();
3305     if (adjesp)
3306     {
3307         // If this is done an odd number of times, it
3308         // will throw off the 8 byte stack alignment.
3309         // We should *only* worry about this if a function
3310         // was called in the code generation by codelem().
3311         int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1);
3312         if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
3313         {
3314             regm_t mval_save = regcon.immed.mval;
3315             regcon.immed.mval = 0;      // prevent reghasvalue() optimizations
3316                                         // because c hasn't been executed yet
3317             cod3_stackadj(cdbs1, sz);
3318             regcon.immed.mval = mval_save;
3319             cdbs1.genadjesp(sz);
3321             cod3_stackadj(cdbs2, -sz);
3322             cdbs2.genadjesp(-sz);
3323         }
3324         cdbs2.append(cs2);
3327         cdbs1.genadjesp(adjesp);
3328         cdbs2.genadjesp(-adjesp);
3329     }
3330     else
3331         cdbs2.append(cs2);
3333     calledafunc |= calledafuncsave;
3334     msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save   */
3335     mfuncreg &= oldmfuncreg;        /* update original                    */
3337     debug if (debugw)
3338         printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3339                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3341     cdb.append(cdbs1);
3342     cdb.append(cdbx);
3343     cdb.append(cdbs2);
3344     return;
3345 }
3347 /*********************************************
3348  * Turn register mask into a string suitable for printing.
3349  */
3351 const(char)* regm_str(regm_t rm)
3352 {
3353     enum NUM = 10;
3354     enum SMAX = 128;
3355     __gshared char[SMAX + 1][NUM] str;
3356     __gshared int i;
3358     if (rm == 0)
3359         return "0";
3360     if (rm == ALLREGS)
3361         return "ALLREGS";
3362     if (rm == BYTEREGS)
3363         return "BYTEREGS";
3364     if (rm == allregs)
3365         return "allregs";
3366     if (rm == XMMREGS)
3367         return "XMMREGS";
3368     char *p = str[i].ptr;
3369     if (++i == NUM)
3370         i = 0;
3371     *p = 0;
3372     for (size_t j = 0; j < 32; j++)
3373     {
3374         if (mask(cast(uint)j) & rm)
3375         {
3376             strcat(p,regstring[j]);
3377             rm &= ~mask(cast(uint)j);
3378             if (rm)
3379                 strcat(p,"|");
3380         }
3381     }
3382     if (rm)
3383     {   char *s = p + strlen(p);
3384         sprintf(s,"x%02x",rm);
3385     }
3386     assert(strlen(p) <= SMAX);
3387     return strdup(p);
3388 }
3390 /*********************************
3391  * Scan down comma-expressions.
3392  * Output:
3393  *      *pe = first elem down right side that is not an OPcomma
3394  * Returns:
3395  *      code generated for left branches of comma-expressions
3396  */
3398 void docommas(ref CodeBuilder cdb,elem **pe)
3399 {
3400     uint stackpushsave = stackpush;
3401     int stackcleansave = cgstate.stackclean;
3402     cgstate.stackclean = 0;
3403     elem* e = *pe;
3404     while (1)
3405     {
3406         if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3407         {
3408             cdb.genlinnum(e.Esrcpos);
3409             //e.Esrcpos.Slinnum = 0;               // don't do it twice
3410         }
3411         if (e.Eoper != OPcomma)
3412             break;
3413         regm_t retregs = 0;
3414         codelem(cdb,e.EV.E1,&retregs,true);
3415         elem* eold = e;
3416         e = e.EV.E2;
3417         freenode(eold);
3418     }
3419     *pe = e;
3420     assert(cgstate.stackclean == 0);
3421     cgstate.stackclean = stackcleansave;
3422     genstackclean(cdb,stackpush - stackpushsave,0);
3423 }
3425 /**************************
3426  * For elems in regcon that don't match regconsave,
3427  * clear the corresponding bit in regcon.cse.mval.
3428  * Do same for regcon.immed.
3429  */
3431 void andregcon(con_t *pregconsave)
3432 {
3433     regm_t m = ~1;
3434     for (int i = 0; i < REGMAX; i++)
3435     {
3436         if (pregconsave.cse.value[i] != regcon.cse.value[i])
3437             regcon.cse.mval &= m;
3438         if (pregconsave.immed.value[i] != regcon.immed.value[i])
3439             regcon.immed.mval &= m;
3440         m <<= 1;
3441         m |= 1;
3442     }
3443     //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval));
3444     regcon.used |= pregconsave.used;
3445     regcon.cse.mval &= pregconsave.cse.mval;
3446     regcon.immed.mval &= pregconsave.immed.mval;
3447     regcon.params &= pregconsave.params;
3448     //printf("regcon.cse.mval&regcon.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops));
3449     regcon.cse.mops &= regcon.cse.mval;
3450 }
3452 }