1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1985-1998 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d)
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d
11  */
12 
13 module dmd.backend.cgcod;
14 
15 version = FRAMEPTR;
16 
17 version (SCPP)
18     version = COMPILE;
19 version (MARS)
20     version = COMPILE;
21 
22 version (COMPILE)
23 {
24 
25 import core.stdc.stdio;
26 import core.stdc.stdlib;
27 import core.stdc.string;
28 
29 import dmd.backend.backend;
30 import dmd.backend.cc;
31 import dmd.backend.cdef;
32 import dmd.backend.code;
33 import dmd.backend.cgcse;
34 import dmd.backend.code_x86;
35 import dmd.backend.codebuilder;
36 import dmd.backend.dlist;
37 import dmd.backend.dvec;
38 import dmd.backend.melf;
39 import dmd.backend.mem;
40 import dmd.backend.el;
41 import dmd.backend.exh;
42 import dmd.backend.global;
43 import dmd.backend.obj;
44 import dmd.backend.oper;
45 import dmd.backend.outbuf;
46 import dmd.backend.rtlsym;
47 import dmd.backend.ty;
48 import dmd.backend.type;
49 import dmd.backend.xmm;
50 
51 import dmd.backend.barray;
52 
53 version (SCPP)
54 {
55     import parser;
56     import precomp;
57 }
58 
59 extern (C++):
60 
61 nothrow:
62 
63 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
64 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
65 
66 version (MARS)
67     enum MARS = true;
68 else
69     enum MARS = false;
70 
71 void dwarf_except_gentables(Funcsym *sfunc, uint startoffset, uint retoffset);
72 int REGSIZE();
73 
74 private extern (D) uint mask(uint m) { return 1 << m; }
75 
76 
77 __gshared
78 {
79 bool floatreg;                  // !=0 if floating register is required
80 
81 int hasframe;                   // !=0 if this function has a stack frame
82 bool enforcealign;              // enforced stack alignment
83 targ_size_t spoff;
84 targ_size_t Foff;               // BP offset of floating register
85 targ_size_t CSoff;              // offset of common sub expressions
86 targ_size_t NDPoff;             // offset of saved 8087 registers
87 targ_size_t pushoff;            // offset of saved registers
88 bool pushoffuse;                // using pushoff
89 int BPoff;                      // offset from BP
90 int EBPtoESP;                   // add to EBP offset to get ESP offset
91 LocalSection Para;              // section of function parameters
92 LocalSection Auto;              // section of automatics and registers
93 LocalSection Fast;              // section of fastpar
94 LocalSection EEStack;           // offset of SCstack variables from ESP
95 LocalSection Alloca;            // data for alloca() temporary
96 
97 REGSAVE regsave;
98 
99 CGstate cgstate;                // state of code generator
100 
101 regm_t BYTEREGS = BYTEREGS_INIT;
102 regm_t ALLREGS = ALLREGS_INIT;
103 
104 
105 /************************************
106  * # of bytes that SP is beyond BP.
107  */
108 
109 uint stackpush;
110 
111 int stackchanged;               /* set to !=0 if any use of the stack
112                                    other than accessing parameters. Used
113                                    to see if we can address parameters
114                                    with ESP rather than EBP.
115                                  */
116 int refparam;           // !=0 if we referenced any parameters
117 int reflocal;           // !=0 if we referenced any locals
118 bool anyiasm;           // !=0 if any inline assembler
119 char calledafunc;       // !=0 if we called a function
120 char needframe;         // if true, then we will need the frame
121                         // pointer (BP for the 8088)
122 char gotref;            // !=0 if the GOTsym was referenced
123 uint usednteh;              // if !=0, then used NT exception handling
124 bool calledFinally;     // true if called a BC_finally block
125 
126 /* Register contents    */
127 con_t regcon;
128 
129 int pass;                       // PASSxxxx
130 
131 private Symbol *retsym;          // set to symbol that should be placed in
132                                 // register AX
133 
134 /****************************
135  * Register masks.
136  */
137 
138 regm_t msavereg;        // Mask of registers that we would like to save.
139                         // they are temporaries (set by scodelem())
140 regm_t mfuncreg;        // Mask of registers preserved by a function
141 
142 regm_t allregs;                // ALLREGS optionally including mBP
143 
144 int dfoidx;                     /* which block we are in                */
145 
146 targ_size_t     funcoffset;     // offset of start of function
147 targ_size_t     prolog_allocoffset;     // offset past adj of stack allocation
148 targ_size_t     startoffset;    // size of function entry code
149 targ_size_t     retoffset;      /* offset from start of func to ret code */
150 targ_size_t     retsize;        /* size of function return              */
151 
152 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs;
153 
154 }
155 
156 /*********************************
157  * Generate code for a function.
158  * Note at the end of this routine mfuncreg will contain the mask
159  * of registers not affected by the function. Some minor optimization
160  * possibilities are here.
161  * Params:
162  *      sfunc = function to generate code for
163  */
164 
165 void codgen(Symbol *sfunc)
166 {
167     bool flag;
168     block *btry;
169 
170     // Register usage. If a bit is on, the corresponding register is live
171     // in that basic block.
172 
173     //printf("codgen('%s')\n",funcsym_p.Sident.ptr);
174     assert(sfunc == funcsym_p);
175     assert(cseg == funcsym_p.Sseg);
176 
177     cgreg_init();
178     CSE.initialize();
179     tym_t functy = tybasic(sfunc.ty());
180     cod3_initregs();
181     allregs = ALLREGS;
182     pass = PASSinitial;
183     Alloca.init();
184     anyiasm = 0;
185 
186     if (config.ehmethod == EHmethod.EH_DWARF)
187     {
188         /* The dwarf unwinder relies on the function epilog to exist
189          */
190         for (block* b = startblock; b; b = b.Bnext)
191         {
192             if (b.BC == BCexit)
193                 b.BC = BCret;
194         }
195     }
196 
197 tryagain:
198     debug
199     if (debugr)
200         printf("------------------ PASS%s -----------------\n",
201             (pass == PASSinitial) ? "init".ptr : ((pass == PASSreg) ? "reg".ptr : "final".ptr));
202 
203     lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0;
204 
205     // if no parameters, assume we don't need a stack frame
206     needframe = 0;
207     enforcealign = false;
208     gotref = 0;
209     stackchanged = 0;
210     stackpush = 0;
211     refparam = 0;
212     calledafunc = 0;
213     retsym = null;
214 
215     cgstate.stackclean = 1;
216     cgstate.funcarg.init();
217     cgstate.funcargtos = ~0;
218     cgstate.accessedTLS = false;
219     STACKALIGN = TARGET_STACKALIGN;
220 
221     regsave.reset();
222     memset(global87.stack.ptr,0,global87.stack.sizeof);
223 
224     calledFinally = false;
225     usednteh = 0;
226 
227     static if (MARS && TARGET_WINDOS)
228     {
229         if (sfunc.Sfunc.Fflags3 & Fjmonitor)
230             usednteh |= NTEHjmonitor;
231     }
232     else version (SCPP)
233     {
234         if (CPP)
235         {
236             if (config.exe == EX_WIN32 &&
237                 (sfunc.Stype.Tflags & TFemptyexc || sfunc.Stype.Texcspec))
238                 usednteh |= NTEHexcspec;
239             except_reset();
240         }
241     }
242 
243     // Set on a trial basis, turning it off if anything might throw
244     sfunc.Sfunc.Fflags3 |= Fnothrow;
245 
246     floatreg = false;
247     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
248 
249     CSE.start();
250     memset(&regcon,0,regcon.sizeof);
251     regcon.cse.mval = regcon.cse.mops = 0;      // no common subs yet
252     msavereg = 0;
253     uint nretblocks = 0;
254     mfuncreg = fregsaved;               // so we can see which are used
255                                         // (bit is cleared each time
256                                         //  we use one)
257     for (block* b = startblock; b; b = b.Bnext)
258     {
259         memset(&b.Bregcon,0,b.Bregcon.sizeof);       // Clear out values in registers
260         if (b.Belem)
261             resetEcomsub(b.Belem);     // reset all the Ecomsubs
262         if (b.BC == BCasm)
263             anyiasm = 1;                // we have inline assembler
264         if (b.BC == BCret || b.BC == BCretexp)
265             nretblocks++;
266     }
267 
268     if (!config.fulltypes || (config.flags4 & CFG4optimized))
269     {
270         regm_t noparams = 0;
271         for (int i = 0; i < globsym.top; i++)
272         {
273             Symbol *s = globsym.tab[i];
274             s.Sflags &= ~SFLread;
275             switch (s.Sclass)
276             {
277                 case SCfastpar:
278                 case SCshadowreg:
279                     regcon.params |= s.Spregm();
280                     goto case SCparameter;
281 
282                 case SCparameter:
283                     if (s.Sfl == FLreg)
284                         noparams |= s.Sregm;
285                     break;
286 
287                 default:
288                     break;
289             }
290         }
291         regcon.params &= ~noparams;
292     }
293 
294     if (config.flags4 & CFG4optimized)
295     {
296         if (nretblocks == 0 &&                  // if no return blocks in function
297             !(sfunc.ty() & mTYnaked))      // naked functions may have hidden veys of returning
298             sfunc.Sflags |= SFLexit;       // mark function as never returning
299 
300         assert(dfo);
301 
302         cgreg_reset();
303         for (dfoidx = 0; dfoidx < dfo.length; dfoidx++)
304         {
305             regcon.used = msavereg | regcon.cse.mval;   // registers already in use
306             block* b = dfo[dfoidx];
307             blcodgen(b);                        // gen code in depth-first order
308             //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used));
309             cgreg_used(dfoidx, b.Bregcon.used); // gather register used information
310         }
311     }
312     else
313     {
314         pass = PASSfinal;
315         for (block* b = startblock; b; b = b.Bnext)
316             blcodgen(b);                // generate the code for each block
317     }
318     regcon.immed.mval = 0;
319     assert(!regcon.cse.mops);           // should have all been used
320 
321     // See which variables we can put into registers
322     if (pass != PASSfinal &&
323         !anyiasm)                               // possible LEA or LES opcodes
324     {
325         allregs |= cod3_useBP();                // see if we can use EBP
326 
327         // If pic code, but EBX was never needed
328         if (!(allregs & mask(PICREG)) && !gotref)
329         {
330             allregs |= mask(PICREG);            // EBX can now be used
331             cgreg_assign(retsym);
332             pass = PASSreg;
333         }
334         else if (cgreg_assign(retsym))          // if we found some registers
335             pass = PASSreg;
336         else
337             pass = PASSfinal;
338         for (block* b = startblock; b; b = b.Bnext)
339         {
340             code_free(b.Bcode);
341             b.Bcode = null;
342         }
343         goto tryagain;
344     }
345     cgreg_term();
346 
347     version (SCPP)
348     {
349         if (CPP)
350             cgcod_eh();
351     }
352 
353     // See if we need to enforce a particular stack alignment
354     foreach (i; 0 .. globsym.top)
355     {
356         Symbol *s = globsym.tab[i];
357 
358         if (Symbol_Sisdead(s, anyiasm))
359             continue;
360 
361         switch (s.Sclass)
362         {
363             case SCregister:
364             case SCauto:
365             case SCfastpar:
366                 if (s.Sfl == FLreg)
367                     break;
368 
369                 const sz = type_alignsize(s.Stype);
370                 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX))
371                 {
372                     STACKALIGN = sz;
373                     enforcealign = true;
374                 }
375                 break;
376 
377             default:
378                 break;
379         }
380     }
381 
382     stackoffsets(1);            // compute addresses of stack variables
383     cod5_prol_epi();            // see where to place prolog/epilog
384     CSE.finish();               // compute addresses and sizes of CSE saves
385 
386     if (configv.addlinenumbers)
387         objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg));
388 
389     // Otherwise, jmp's to startblock will execute the prolog again
390     assert(!startblock.Bpred);
391 
392     CodeBuilder cdbprolog; cdbprolog.ctor();
393     prolog(cdbprolog);           // gen function start code
394     code *cprolog = cdbprolog.finish();
395     if (cprolog)
396         pinholeopt(cprolog,null);       // optimize
397 
398     funcoffset = Offset(sfunc.Sseg);
399     targ_size_t coffset = Offset(sfunc.Sseg);
400 
401     if (eecontext.EEelem)
402         genEEcode();
403 
404     for (block* b = startblock; b; b = b.Bnext)
405     {
406         // We couldn't do this before because localsize was unknown
407         switch (b.BC)
408         {
409             case BCret:
410                 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked))
411                 {
412                     CodeBuilder cdb; cdb.ctor();
413                     cdb.append(b.Bcode);
414                     cdb.genlinnum(b.Bsrcpos);
415                     b.Bcode = cdb.finish();
416                 }
417                 goto case BCretexp;
418 
419             case BCretexp:
420                 epilog(b);
421                 break;
422 
423             default:
424                 if (b.Bflags & BFLepilog)
425                     epilog(b);
426                 break;
427         }
428         assignaddr(b);                  // assign addresses
429         pinholeopt(b.Bcode,b);         // do pinhole optimization
430         if (b.Bflags & BFLprolog)      // do function prolog
431         {
432             startoffset = coffset + calcblksize(cprolog) - funcoffset;
433             b.Bcode = cat(cprolog,b.Bcode);
434         }
435         cgsched_block(b);
436         b.Bsize = calcblksize(b.Bcode);       // calculate block size
437         if (b.Balign)
438         {
439             targ_size_t u = b.Balign - 1;
440             coffset = (coffset + u) & ~u;
441         }
442         b.Boffset = coffset;           /* offset of this block         */
443         coffset += b.Bsize;            /* offset of following block    */
444     }
445 
446     debug
447     debugw && printf("code addr complete\n");
448 
449     // Do jump optimization
450     do
451     {
452         flag = false;
453         for (block* b = startblock; b; b = b.Bnext)
454         {
455             if (b.Bflags & BFLjmpoptdone)      /* if no more jmp opts for this blk */
456                 continue;
457             int i = branch(b,0);            // see if jmp => jmp short
458             if (i)                          // if any bytes saved
459             {   targ_size_t offset;
460 
461                 b.Bsize -= i;
462                 offset = b.Boffset + b.Bsize;
463                 for (block* bn = b.Bnext; bn; bn = bn.Bnext)
464                 {
465                     if (bn.Balign)
466                     {   targ_size_t u = bn.Balign - 1;
467 
468                         offset = (offset + u) & ~u;
469                     }
470                     bn.Boffset = offset;
471                     offset += bn.Bsize;
472                 }
473                 coffset = offset;
474                 flag = true;
475             }
476         }
477         if (!I16 && !(config.flags4 & CFG4optimized))
478             break;                      // use the long conditional jmps
479     } while (flag);                     // loop till no more bytes saved
480 
481     debug
482     debugw && printf("code jump optimization complete\n");
483 
484     version (MARS)
485     {
486         if (usednteh & NTEH_try)
487         {
488             // Do this before code is emitted because we patch some instructions
489             nteh_filltables();
490         }
491     }
492 
493     // Compute starting offset for switch tables
494     targ_size_t swoffset;
495     int jmpseg = -1;
496     if (config.flags & CFGromable)
497     {
498         jmpseg = 0;
499         swoffset = coffset;
500     }
501 
502     // Emit the generated code
503     if (eecontext.EEcompile == 1)
504     {
505         codout(sfunc.Sseg,eecontext.EEcode);
506         code_free(eecontext.EEcode);
507         version (SCPP)
508         {
509             el_free(eecontext.EEelem);
510         }
511     }
512     else
513     {
514         for (block* b = startblock; b; b = b.Bnext)
515         {
516             if (b.BC == BCjmptab || b.BC == BCswitch)
517             {
518                 if (jmpseg == -1)
519                 {
520                     jmpseg = objmod.jmpTableSegment(sfunc);
521                     swoffset = Offset(jmpseg);
522                 }
523                 swoffset = _align(0,swoffset);
524                 b.Btableoffset = swoffset;     /* offset of sw tab */
525                 swoffset += b.Btablesize;
526             }
527             jmpaddr(b.Bcode);          /* assign jump addresses        */
528 
529             debug
530             if (debugc)
531             {
532                 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n",
533                     cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg));
534                 if (b.Bcode)
535                     printf( "First opcode of block is: %0x\n", b.Bcode.Iop );
536             }
537 
538             if (b.Balign)
539             {   uint u = b.Balign;
540                 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1);
541 
542                 cod3_align_bytes(sfunc.Sseg, nalign);
543             }
544             assert(b.Boffset == Offset(sfunc.Sseg));
545 
546             version (SCPP)
547             {
548                 if (CPP && !(config.exe == EX_WIN32))
549                 {
550                     //printf("b = %p, index = %d\n",b,b.Bindex);
551                     //except_index_set(b.Bindex);
552 
553                     if (btry != b.Btry)
554                     {
555                         btry = b.Btry;
556                         except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset);
557                     }
558                     if (b.BC == BCtry)
559                     {
560                         btry = b;
561                         except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset);
562                     }
563                 }
564             }
565 
566             codout(sfunc.Sseg,b.Bcode);   // output code
567         }
568         if (coffset != Offset(sfunc.Sseg))
569         {
570             debug
571             printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg));
572 
573             assert(0);
574         }
575         sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset;    // size of function
576 
577         static if (NTEXCEPTIONS || MARS)
578         {
579             version (MARS)
580                 const nteh = usednteh & NTEH_try;
581             else static if (NTEXCEPTIONS)
582                 const nteh = usednteh & NTEHcpp;
583             else
584                 enum nteh = true;
585             if (nteh)
586             {
587                 assert(!(config.flags & CFGromable));
588                 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset);
589                 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset);
590             }
591         }
592 
593         // Write out switch tables
594         flag = false;                       // true if last active block was a ret
595         for (block* b = startblock; b; b = b.Bnext)
596         {
597             switch (b.BC)
598             {
599                 case BCjmptab:              /* if jump table                */
600                     outjmptab(b);           /* write out jump table         */
601                     goto Ldefault;
602 
603                 case BCswitch:
604                     outswitab(b);           /* write out switch table       */
605                     goto Ldefault;
606 
607                 case BCret:
608                 case BCretexp:
609                     /* Compute offset to return code from start of function */
610                     retoffset = b.Boffset + b.Bsize - retsize - funcoffset;
611                     version (MARS)
612                     {
613                         /* Add 3 bytes to retoffset in case we have an exception
614                          * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT
615                          * IT FIXES THE PROBLEM HERE AS WELL.
616                          */
617                         if (usednteh & NTEH_try)
618                             retoffset += 3;
619                     }
620                     flag = true;
621                     break;
622 
623                 default:
624                 Ldefault:
625                     retoffset = b.Boffset + b.Bsize - funcoffset;
626                     break;
627             }
628         }
629         if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked))
630             /* put line number at end of function on the
631                start of the last instruction
632              */
633             /* Instead, try offset to cleanup code  */
634             if (retoffset < sfunc.Ssize)
635                 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset);
636 
637         static if (TARGET_WINDOS && MARS)
638         {
639             if (config.exe == EX_WIN64)
640                 win64_pdata(sfunc);
641         }
642 
643         static if (MARS)
644         {
645             if (usednteh & NTEH_try)
646             {
647                 // Do this before code is emitted because we patch some instructions
648                 nteh_gentables(sfunc);
649             }
650             if (usednteh & EHtry &&             // saw BCtry or BC_try (test EHcleanup too?)
651                 config.ehmethod == EHmethod.EH_DM)
652             {
653                 except_gentables();
654             }
655             if (config.ehmethod == EHmethod.EH_DWARF)
656             {
657                 sfunc.Sfunc.Fstartblock = startblock;
658                 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset);
659                 sfunc.Sfunc.Fstartblock = null;
660             }
661         }
662 
663         version (SCPP)
664         {
665             // Write out frame handler
666             if (NTEXCEPTIONS && usednteh & NTEHcpp)
667             {
668                 nteh_framehandler(sfunc, except_gentables());
669             }
670             else
671             {
672                 if (NTEXCEPTIONS && usednteh & NTEH_try)
673                 {
674                     nteh_gentables(sfunc);
675                 }
676                 else
677                 {
678                     if (CPP)
679                         except_gentables();
680                 }
681             }
682         }
683 
684         for (block* b = startblock; b; b = b.Bnext)
685         {
686             code_free(b.Bcode);
687             b.Bcode = null;
688         }
689     }
690 
691     // Mask of regs saved
692     // BUG: do interrupt functions save BP?
693     sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved);
694 
695     debug
696     if (global87.stackused != 0)
697       printf("stackused = %d\n",global87.stackused);
698 
699     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
700 
701     global87.save.__dtor();       // clean up ndp save array
702 }
703 
704 /*********************************************
705  * Align sections on the stack.
706  *  base        negative offset of section from frame pointer
707  *  alignment   alignment to use
708  *  bias        difference between where frame pointer points and the STACKALIGNed
709  *              part of the stack
710  * Returns:
711  *  base        revised downward so it is aligned
712  */
713 targ_size_t alignsection(targ_size_t base, uint alignment, int bias)
714 {
715     assert(cast(int)base <= 0);
716     if (alignment > STACKALIGN)
717         alignment = STACKALIGN;
718     if (alignment)
719     {
720         int sz = cast(int)(-base + bias);
721         assert(sz >= 0);
722         sz &= (alignment - 1);
723         if (sz)
724             base -= alignment - sz;
725     }
726     return base;
727 }
728 
729 /*******************************
730  * Generate code for a function start.
731  * Input:
732  *      Offset(cseg)         address of start of code
733  *      Auto.alignment
734  * Output:
735  *      Offset(cseg)         adjusted for size of code generated
736  *      EBPtoESP
737  *      hasframe
738  *      BPoff
739  */
740 void prolog(ref CodeBuilder cdb)
741 {
742     bool enter;
743 
744     //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident, needframe, Auto.alignment);
745     debug debugw && printf("funcstart()\n");
746     regcon.immed.mval = 0;                      /* no values in registers yet   */
747     version (FRAMEPTR)
748         EBPtoESP = 0;
749     else
750         EBPtoESP = -REGSIZE;
751     hasframe = 0;
752     bool pushds = false;
753     BPoff = 0;
754     bool pushalloc = false;
755     tym_t tyf = funcsym_p.ty();
756     tym_t tym = tybasic(tyf);
757     uint farfunc = tyfarfunc(tym);
758 
759     // Special Intel 64 bit ABI prolog setup for variadic functions
760     Symbol *sv64 = null;                        // set to __va_argsave
761     if (I64 && variadic(funcsym_p.Stype))
762     {
763         /* The Intel 64 bit ABI scheme.
764          * abi_sysV_amd64.pdf
765          * Load arguments passed in registers into the varargs save area
766          * so they can be accessed by va_arg().
767          */
768         /* Look for __va_argsave
769          */
770         for (SYMIDX si = 0; si < globsym.top; si++)
771         {
772             Symbol *s = globsym.tab[si];
773             if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0)
774             {
775                 if (!(s.Sflags & SFLdead))
776                     sv64 = s;
777                 break;
778             }
779         }
780     }
781 
782     if (config.flags & CFGalwaysframe ||
783         funcsym_p.Sfunc.Fflags3 & Ffakeeh ||
784         /* The exception stack unwinding mechanism relies on the EBP chain being intact,
785          * so need frame if function can possibly throw
786          */
787         !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) ||
788         cgstate.accessedTLS ||
789         sv64
790        )
791         needframe = 1;
792 
793     CodeBuilder cdbx; cdbx.ctor();
794 
795 Lagain:
796     spoff = 0;
797     char guessneedframe = needframe;
798     int cfa_offset = 0;
799 //    if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)))
800 //      usednteh |= NTEHpassthru;
801 
802     /* Compute BP offsets for variables on stack.
803      * The organization is:
804      *  Para.size    parameters
805      * -------- stack is aligned to STACKALIGN
806      *          seg of return addr      (if far function)
807      *          IP of return addr
808      *  BP.    caller's BP
809      *          DS                      (if Windows prolog/epilog)
810      *          exception handling context symbol
811      *  Fast.size fastpar
812      *  Auto.size    autos and regs
813      *  regsave.off  any saved registers
814      *  Foff    floating register
815      *  Alloca.size  alloca temporary
816      *  CSoff   common subs
817      *  NDPoff  any 8087 saved registers
818      *          monitor context record
819      *          any saved registers
820      */
821 
822     if (tym == TYifunc)
823         Para.size = 26; // how is this number derived?
824     else
825     {
826         version (FRAMEPTR)
827         {
828             Para.size = ((farfunc ? 2 : 1) + needframe) * REGSIZE;
829             if (needframe)
830                 EBPtoESP = -REGSIZE;
831         }
832         else
833             Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE;
834     }
835 
836     /* The real reason for the FAST section is because the implementation of contracts
837      * requires a consistent stack frame location for the 'this' pointer. But if varying
838      * stuff in Auto.offset causes different alignment for that section, the entire block can
839      * shift around, causing a crash in the contracts.
840      * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their
841      * own FAST section, which is never aligned at a size bigger than REGSIZE, and so
842      * its alignment never shifts around.
843      * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned
844      * individually rather than as a group.
845      */
846     Fast.size = 0;
847     static if (NTEXCEPTIONS == 2)
848     {
849         Fast.size -= nteh_contextsym_size();
850         version (MARS)
851         {
852             static if (TARGET_WINDOS)
853             {
854                 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0)
855                     Fast.size -= 5 * 4;
856             }
857         }
858     }
859 
860     /* Despite what the comment above says, aligning Fast section to size greater
861      * than REGSIZE does not break contract implementation. Fast.offset and
862      * Fast.alignment must be the same for the overriding and
863      * the overridden function, since they have the same parameters. Fast.size
864      * must be the same because otherwise, contract inheritance wouldn't work
865      * even if we didn't align Fast section to size greater than REGSIZE. Therefore,
866      * the only way aligning the section could cause problems with contract
867      * inheritance is if bias (declared below) differed for the overridden
868      * and the overriding function.
869      *
870      * Bias depends on Para.size and needframe. The value of Para.size depends on
871      * whether the function is an interrupt handler and whether it is a farfunc.
872      * DMD does not have _interrupt attribute and D does not make a distinction
873      * between near and far functions, so Para.size should always be 2 * REGSIZE
874      * for D.
875      *
876      * The value of needframe depends on a global setting that is only set
877      * during backend's initialization and on function flag Ffakeeh. On Windows,
878      * that flag is always set for virtual functions, for which contracts are
879      * defined and on other platforms, it is never set. Because of that
880      * the value of neadframe should always be the same for the overridden
881      * and the overriding function, and so bias should be the same too.
882      */
883 
884 version (FRAMEPTR)
885     int bias = enforcealign ? 0 : cast(int)(Para.size);
886 else
887     int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE));
888 
889     if (Fast.alignment < REGSIZE)
890         Fast.alignment = REGSIZE;
891 
892     Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias);
893 
894     if (Auto.alignment < REGSIZE)
895         Auto.alignment = REGSIZE;       // necessary because localsize must be REGSIZE aligned
896     Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias);
897 
898     regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias);
899     //printf("regsave.off = x%x, size = x%x, alignment = %x\n",
900         //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment);
901 
902     if (floatreg)
903     {
904         uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE;
905         Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias);
906         //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize);
907     }
908     else
909         Foff = regsave.off;
910 
911     Alloca.alignment = REGSIZE;
912     Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias);
913 
914     CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias);
915     //printf("CSoff = x%x, size = x%x, alignment = %x\n",
916         //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment);
917 
918     NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias);
919 
920     regm_t topush = fregsaved & ~mfuncreg;          // mask of registers that need saving
921     pushoffuse = false;
922     pushoff = NDPoff;
923     /* We don't keep track of all the pushes and pops in a function. Hence,
924      * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder
925      * won't be setting ESP correctly. With pushoffuse, the registers are restored
926      * from EBP, which is kept track of properly.
927      */
928     if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64))
929     {
930         /* Instead of pushing the registers onto the stack one by one,
931          * allocate space in the stack frame and copy/restore them there.
932          */
933         int xmmtopush = numbitsset(topush & XMMREGS);   // XMM regs take 16 bytes
934         int gptopush = numbitsset(topush) - xmmtopush;  // general purpose registers to save
935         if (NDPoff || xmmtopush || cgstate.funcarg.size)
936         {
937             pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16),
938                     xmmtopush ? STACKALIGN : REGSIZE, bias);
939             pushoffuse = true;          // tell others we're using this strategy
940         }
941     }
942 
943     //printf("Fast.size = x%x, Auto.size = x%x\n", (int)Fast.size, (int)Auto.size);
944 
945     cgstate.funcarg.alignment = cgstate.funcarg.size ? STACKALIGN : REGSIZE;
946     cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias);
947 
948     localsize = -cgstate.funcarg.offset;
949 
950     //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n",
951         //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize);
952     assert(cast(targ_ptrdiff_t)localsize >= 0);
953 
954     // Keep the stack aligned by 8 for any subsequent function calls
955     if (!I16 && calledafunc &&
956         (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
957     {
958         int npush = numbitsset(topush);            // number of registers that need saving
959         npush += numbitsset(topush & XMMREGS);     // XMM regs take 16 bytes, so count them twice
960         if (pushoffuse)
961             npush = 0;
962 
963         //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n",
964                //npush, Para.size, needframe, localsize);
965 
966         int sz = cast(int)(localsize + npush * REGSIZE);
967         if (!enforcealign)
968         {
969             version (FRAMEPTR)
970                 sz += Para.size;
971             else
972                 sz += Para.size + (needframe ? 0 : -REGSIZE);
973         }
974         if (sz & (STACKALIGN - 1))
975             localsize += STACKALIGN - (sz & (STACKALIGN - 1));
976     }
977     cgstate.funcarg.offset = -localsize;
978 
979     //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n",
980         //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize);
981 
982     uint xlocalsize = cast(uint)localsize;    // amount to subtract from ESP to make room for locals
983 
984     if (tyf & mTYnaked)                 // if no prolog/epilog for function
985     {
986         hasframe = 1;
987         return;
988     }
989 
990     if (tym == TYifunc)
991     {
992         prolog_ifunc(cdbx,&tyf);
993         hasframe = 1;
994         cdb.append(cdbx);
995         goto Lcont;
996     }
997 
998     /* Determine if we need BP set up   */
999     if (enforcealign)
1000     {
1001         // we need BP to reset the stack before return
1002         // otherwise the return address is lost
1003         needframe = 1;
1004 
1005     }
1006     else if (config.flags & CFGalwaysframe)
1007         needframe = 1;
1008     else
1009     {
1010         if (localsize)
1011         {
1012             if (I16 ||
1013                 !(config.flags4 & CFG4speed) ||
1014                 config.target_cpu < TARGET_Pentium ||
1015                 farfunc ||
1016                 config.flags & CFGstack ||
1017                 xlocalsize >= 0x1000 ||
1018                 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) ||
1019                 anyiasm ||
1020                 Alloca.size
1021                )
1022                 needframe = 1;
1023         }
1024         if (refparam && (anyiasm || I16))
1025             needframe = 1;
1026     }
1027 
1028     if (needframe)
1029     {
1030         assert(mfuncreg & mBP);         // shouldn't have used mBP
1031 
1032         if (!guessneedframe)            // if guessed wrong
1033             goto Lagain;
1034     }
1035 
1036     if (I16 && config.wflags & WFwindows && farfunc)
1037     {
1038         prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds);
1039         enter = false;                  // don't use ENTER instruction
1040         hasframe = 1;                   // we have a stack frame
1041     }
1042     else if (needframe)                 // if variables or parameters
1043     {
1044         prolog_frame(cdbx, farfunc, &xlocalsize, &enter, &cfa_offset);
1045         hasframe = 1;
1046     }
1047 
1048     /* Align the stack if necessary */
1049     prolog_stackalign(cdbx);
1050 
1051     /* Subtract from stack pointer the size of the local stack frame
1052      */
1053     if (config.flags & CFGstack)        // if stack overflow check
1054     {
1055         prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
1056         if (Alloca.size)
1057             prolog_setupalloca(cdbx);
1058     }
1059     else if (needframe)                      /* if variables or parameters   */
1060     {
1061         if (xlocalsize)                 /* if any stack offset          */
1062         {
1063             prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
1064             if (Alloca.size)
1065                 prolog_setupalloca(cdbx);
1066         }
1067         else
1068             assert(Alloca.size == 0);
1069     }
1070     else if (xlocalsize)
1071     {
1072         assert(I32 || I64);
1073         prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc);
1074         version (FRAMEPTR) { } else
1075             BPoff += REGSIZE;
1076     }
1077     else
1078         assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor));
1079     EBPtoESP += xlocalsize;
1080     if (hasframe)
1081         EBPtoESP += REGSIZE;
1082 
1083     /* Win64 unwind needs the amount of code generated so far
1084      */
1085     if (config.exe == EX_WIN64)
1086     {
1087         code *c = cdbx.peek();
1088         pinholeopt(c, null);
1089         prolog_allocoffset = calcblksize(c);
1090     }
1091 
1092     version (SCPP)
1093     {
1094         /*  The idea is to generate trace for all functions if -Nc is not thrown.
1095          *  If -Nc is thrown, generate trace only for global COMDATs, because those
1096          *  are relevant to the FUNCTIONS statement in the linker .DEF file.
1097          *  This same logic should be in epilog().
1098          */
1099         if (config.flags & CFGtrace &&
1100             (!(config.flags4 & CFG4allcomdat) ||
1101              funcsym_p.Sclass == SCcomdat ||
1102              funcsym_p.Sclass == SCglobal ||
1103              (config.flags2 & CFG2comdat && SymInline(funcsym_p))
1104             )
1105            )
1106         {
1107             uint spalign = 0;
1108             int sz = cast(int)localsize;
1109             if (!enforcealign)
1110             {
1111                 version (FRAMEPTR)
1112                     sz += Para.size;
1113                 else
1114                     sz += Para.size + (needframe ? 0 : -REGSIZE);
1115             }
1116             if (STACKALIGN >= 16 && (sz & (STACKALIGN - 1)))
1117                 spalign = STACKALIGN - (sz & (STACKALIGN - 1));
1118 
1119             if (spalign)
1120             {   /* This could be avoided by moving the function call to after the
1121                  * registers are saved. But I don't remember why the call is here
1122                  * and not there.
1123                  */
1124                 cod3_stackadj(cdbx, spalign);
1125             }
1126 
1127             uint regsaved;
1128             prolog_trace(cdbx, farfunc != 0, &regsaved);
1129 
1130             if (spalign)
1131                 cod3_stackadj(cdbx, -spalign);
1132             useregs((ALLREGS | mBP | mES) & ~regsaved);
1133         }
1134     }
1135 
1136     version (MARS)
1137     {
1138         if (usednteh & NTEHjmonitor)
1139         {   Symbol *sthis;
1140 
1141             for (SYMIDX si = 0; 1; si++)
1142             {   assert(si < globsym.top);
1143                 sthis = globsym.tab[si];
1144                 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0)
1145                     break;
1146             }
1147             nteh_monitor_prolog(cdbx,sthis);
1148             EBPtoESP += 3 * 4;
1149         }
1150     }
1151 
1152     cdb.append(cdbx);
1153     prolog_saveregs(cdb, topush, cfa_offset);
1154 
1155 Lcont:
1156 
1157     if (config.exe == EX_WIN64)
1158     {
1159         if (variadic(funcsym_p.Stype))
1160             prolog_gen_win64_varargs(cdb);
1161         regm_t namedargs;
1162         prolog_loadparams(cdb, tyf, pushalloc, namedargs);
1163         return;
1164     }
1165 
1166     prolog_ifunc2(cdb, tyf, tym, pushds);
1167 
1168     static if (NTEXCEPTIONS == 2)
1169     {
1170         if (usednteh & NTEH_except)
1171             nteh_setsp(cdb, 0x89);            // MOV __context[EBP].esp,ESP
1172     }
1173 
1174     // Load register parameters off of the stack. Do not use
1175     // assignaddr(), as it will replace the stack reference with
1176     // the register!
1177     regm_t namedargs;
1178     prolog_loadparams(cdb, tyf, pushalloc, namedargs);
1179 
1180     if (sv64)
1181         prolog_genvarargs(cdb, sv64, namedargs);
1182 
1183     /* Alignment checks
1184      */
1185     //assert(Auto.alignment <= STACKALIGN);
1186     //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0);
1187 }
1188 
1189 /************************************
1190  * Predicate for sorting auto symbols for qsort().
1191  * Returns:
1192  *      < 0     s1 goes farther from frame pointer
1193  *      > 0     s1 goes nearer the frame pointer
1194  *      = 0     no difference
1195  */
1196 
1197 extern (C) int
1198  autosort_cmp(scope const void *ps1, scope const void *ps2)
1199 {
1200     Symbol *s1 = *cast(Symbol **)ps1;
1201     Symbol *s2 = *cast(Symbol **)ps2;
1202 
1203     /* Largest align size goes furthest away from frame pointer,
1204      * so they get allocated first.
1205      */
1206     uint alignsize1 = Symbol_Salignsize(s1);
1207     uint alignsize2 = Symbol_Salignsize(s2);
1208     if (alignsize1 < alignsize2)
1209         return 1;
1210     else if (alignsize1 > alignsize2)
1211         return -1;
1212 
1213     /* move variables nearer the frame pointer that have higher Sweights
1214      * because addressing mode is fewer bytes. Grouping together high Sweight
1215      * variables also may put them in the same cache
1216      */
1217     if (s1.Sweight < s2.Sweight)
1218         return -1;
1219     else if (s1.Sweight > s2.Sweight)
1220         return 1;
1221 
1222     /* More:
1223      * 1. put static arrays nearest the frame pointer, so buffer overflows
1224      *    can't change other variable contents
1225      * 2. Do the coloring at the byte level to minimize stack usage
1226      */
1227     return 0;
1228 }
1229 
1230 /******************************
1231  * Compute offsets for remaining tmp, automatic and register variables
1232  * that did not make it into registers.
1233  * Input:
1234  *      flags   0: do estimate only
1235  *              1: final
1236  */
1237 void stackoffsets(int flags)
1238 {
1239     //printf("stackoffsets() %s\n", funcsym_p.Sident);
1240 
1241     Para.init();        // parameter offset
1242     Fast.init();        // SCfastpar offset
1243     Auto.init();        // automatic & register offset
1244     EEStack.init();     // for SCstack's
1245 
1246     // Set if doing optimization of auto layout
1247     bool doAutoOpt = flags && config.flags4 & CFG4optimized;
1248 
1249     // Put autos in another array so we can do optimizations on the stack layout
1250     Symbol*[10] autotmp;
1251     Symbol **autos = null;
1252     if (doAutoOpt)
1253     {
1254         if (globsym.top <= autotmp.length)
1255             autos = autotmp.ptr;
1256         else
1257         {   autos = cast(Symbol **)malloc(globsym.top * (*autos).sizeof);
1258             assert(autos);
1259         }
1260     }
1261     size_t autosi = 0;  // number used in autos[]
1262 
1263     for (int si = 0; si < globsym.top; si++)
1264     {   Symbol *s = globsym.tab[si];
1265 
1266         /* Don't allocate space for dead or zero size parameters
1267          */
1268         switch (s.Sclass)
1269         {
1270             case SCfastpar:
1271                 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh))
1272                     goto Ldefault;   // don't need consistent stack frame
1273                 break;
1274 
1275             case SCparameter:
1276                 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty)))
1277                 {
1278                     Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary
1279                     s.Soffset = Para.offset;
1280                     continue;
1281                 }
1282                 break;          // allocate even if it's dead
1283 
1284             case SCshadowreg:
1285                 break;          // allocate even if it's dead
1286 
1287             default:
1288             Ldefault:
1289                 if (Symbol_Sisdead(s, anyiasm))
1290                     continue;       // don't allocate space
1291                 break;
1292         }
1293 
1294         targ_size_t sz = type_size(s.Stype);
1295         if (sz == 0)
1296             sz++;               // can't handle 0 length structs
1297 
1298         uint alignsize = Symbol_Salignsize(s);
1299         if (alignsize > STACKALIGN)
1300             alignsize = STACKALIGN;         // no point if the stack is less aligned
1301 
1302         //printf("symbol '%s', size = x%lx, alignsize = %d, read = %x\n",s.Sident,(long)sz, (int)alignsize, s.Sflags & SFLread);
1303         assert(cast(int)sz >= 0);
1304 
1305         switch (s.Sclass)
1306         {
1307             case SCfastpar:
1308                 /* Get these
1309                  * right next to the stack frame pointer, EBP.
1310                  * Needed so we can call nested contract functions
1311                  * frequire and fensure.
1312                  */
1313                 if (s.Sfl == FLreg)        // if allocated in register
1314                     continue;
1315                 /* Needed because storing fastpar's on the stack in prolog()
1316                  * does the entire register
1317                  */
1318                 if (sz < REGSIZE)
1319                     sz = REGSIZE;
1320 
1321                 Fast.offset = _align(sz,Fast.offset);
1322                 s.Soffset = Fast.offset;
1323                 Fast.offset += sz;
1324                 //printf("fastpar '%s' sz = %d, fast offset =  x%x, %p\n",s.Sident,(int)sz,(int)s.Soffset, s);
1325 
1326                 if (alignsize > Fast.alignment)
1327                     Fast.alignment = alignsize;
1328                 break;
1329 
1330             case SCregister:
1331             case SCauto:
1332                 if (s.Sfl == FLreg)        // if allocated in register
1333                     break;
1334 
1335                 if (doAutoOpt)
1336                 {   autos[autosi++] = s;    // deal with later
1337                     break;
1338                 }
1339 
1340                 Auto.offset = _align(sz,Auto.offset);
1341                 s.Soffset = Auto.offset;
1342                 Auto.offset += sz;
1343                 //printf("auto    '%s' sz = %d, auto offset =  x%lx\n",s.Sident,sz,(long)s.Soffset);
1344 
1345                 if (alignsize > Auto.alignment)
1346                     Auto.alignment = alignsize;
1347                 break;
1348 
1349             case SCstack:
1350                 EEStack.offset = _align(sz,EEStack.offset);
1351                 s.Soffset = EEStack.offset;
1352                 //printf("EEStack.offset =  x%lx\n",(long)s.Soffset);
1353                 EEStack.offset += sz;
1354                 break;
1355 
1356             case SCshadowreg:
1357             case SCparameter:
1358                 if (config.exe == EX_WIN64)
1359                 {
1360                     assert((Para.offset & 7) == 0);
1361                     s.Soffset = Para.offset;
1362                     Para.offset += 8;
1363                     break;
1364                 }
1365                 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general,
1366                  * but are 4 byte aligned on the OSX 32 stack.
1367                  */
1368                 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */
1369                 if (alignsize >= 16 &&
1370                     (I64 || (config.exe == EX_OSX &&
1371                          (tyaggregate(s.ty()) || tyvector(s.ty())))))
1372                     Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1);
1373                 s.Soffset = Para.offset;
1374                 //printf("%s param offset =  x%lx, alignsize = %d\n",s.Sident,(long)s.Soffset, (int)alignsize);
1375                 Para.offset += (s.Sflags & SFLdouble)
1376                             ? type_size(tstypes[TYdouble])   // float passed as double
1377                             : type_size(s.Stype);
1378                 break;
1379 
1380             case SCpseudo:
1381             case SCstatic:
1382             case SCbprel:
1383                 break;
1384             default:
1385                 symbol_print(s);
1386                 assert(0);
1387         }
1388     }
1389 
1390     if (autosi)
1391     {
1392         qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp);
1393 
1394         vec_t tbl = vec_calloc(autosi);
1395 
1396         for (size_t si = 0; si < autosi; si++)
1397         {
1398             Symbol *s = autos[si];
1399 
1400             targ_size_t sz = type_size(s.Stype);
1401             if (sz == 0)
1402                 sz++;               // can't handle 0 length structs
1403 
1404             uint alignsize = Symbol_Salignsize(s);
1405             if (alignsize > STACKALIGN)
1406                 alignsize = STACKALIGN;         // no point if the stack is less aligned
1407 
1408             /* See if we can share storage with another variable
1409              * if their live ranges do not overlap.
1410              */
1411             if (// Don't share because could stomp on variables
1412                 // used in finally blocks
1413                 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) &&
1414                 s.Srange && !(s.Sflags & SFLspill))
1415             {
1416                 for (size_t i = 0; i < si; i++)
1417                 {
1418                     if (!vec_testbit(i,tbl))
1419                         continue;
1420                     Symbol *sp = autos[i];
1421 //printf("auto    s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange));
1422                     if (vec_disjoint(s.Srange,sp.Srange) &&
1423                         !(sp.Soffset & (alignsize - 1)) &&
1424                         sz <= type_size(sp.Stype))
1425                     {
1426                         vec_or(sp.Srange,sp.Srange,s.Srange);
1427                         //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident);
1428                         s.Soffset = sp.Soffset;
1429                         goto L2;
1430                     }
1431                 }
1432             }
1433             Auto.offset = _align(sz,Auto.offset);
1434             s.Soffset = Auto.offset;
1435             //printf("auto    '%s' sz = %d, auto offset =  x%lx\n",s.Sident,sz,(long)s.Soffset);
1436             Auto.offset += sz;
1437             if (s.Srange && !(s.Sflags & SFLspill))
1438                 vec_setbit(si,tbl);
1439 
1440             if (alignsize > Auto.alignment)
1441                 Auto.alignment = alignsize;
1442         L2: { }
1443         }
1444 
1445         vec_free(tbl);
1446 
1447         if (autos != autotmp.ptr)
1448             free(autos);
1449     }
1450 }
1451 
1452 /****************************
1453  * Generate code for a block.
1454  */
1455 
1456 private void blcodgen(block *bl)
1457 {
1458     regm_t mfuncregsave = mfuncreg;
1459 
1460     //dbg_printf("blcodgen(%p)\n",bl);
1461 
1462     /* Determine existing immediate values in registers by ANDing
1463         together the values from all the predecessors of b.
1464      */
1465     assert(bl.Bregcon.immed.mval == 0);
1466     regcon.immed.mval = 0;      // assume no previous contents in registers
1467 //    regcon.cse.mval = 0;
1468     foreach (bpl; ListRange(bl.Bpred))
1469     {
1470         block *bp = list_block(bpl);
1471 
1472         if (bpl == bl.Bpred)
1473         {   regcon.immed = bp.Bregcon.immed;
1474             regcon.params = bp.Bregcon.params;
1475 //          regcon.cse = bp.Bregcon.cse;
1476         }
1477         else
1478         {
1479             int i;
1480 
1481             regcon.params &= bp.Bregcon.params;
1482             if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0)
1483                 // Actual values must match, too
1484                 for (i = 0; i < REGMAX; i++)
1485                 {
1486                     if (regcon.immed.value[i] != bp.Bregcon.immed.value[i])
1487                         regcon.immed.mval &= ~mask(i);
1488                 }
1489         }
1490     }
1491     regcon.cse.mops &= regcon.cse.mval;
1492 
1493     // Set regcon.mvar according to what variables are in registers for this block
1494     CodeBuilder cdb; cdb.ctor();
1495     regcon.mvar = 0;
1496     regcon.mpvar = 0;
1497     regcon.indexregs = 1;
1498     int anyspill = 0;
1499     char *sflsave = null;
1500     if (config.flags4 & CFG4optimized)
1501     {
1502         CodeBuilder cdbload; cdbload.ctor();
1503         CodeBuilder cdbstore; cdbstore.ctor();
1504 
1505         sflsave = cast(char *) alloca(globsym.top * char.sizeof);
1506         for (SYMIDX i = 0; i < globsym.top; i++)
1507         {
1508             Symbol *s = globsym.tab[i];
1509 
1510             sflsave[i] = s.Sfl;
1511             if (regParamInPreg(s) &&
1512                 regcon.params & s.Spregm() &&
1513                 vec_testbit(dfoidx,s.Srange))
1514             {
1515 //                regcon.used |= s.Spregm();
1516             }
1517 
1518             if (s.Sfl == FLreg)
1519             {
1520                 if (vec_testbit(dfoidx,s.Srange))
1521                 {
1522                     regcon.mvar |= s.Sregm;
1523                     if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
1524                         regcon.mpvar |= s.Sregm;
1525                 }
1526             }
1527             else if (s.Sflags & SFLspill)
1528             {
1529                 if (vec_testbit(dfoidx,s.Srange))
1530                 {
1531                     anyspill = i + 1;
1532                     cgreg_spillreg_prolog(bl,s,cdbstore,cdbload);
1533                     if (vec_testbit(dfoidx,s.Slvreg))
1534                     {
1535                         s.Sfl = FLreg;
1536                         regcon.mvar |= s.Sregm;
1537                         regcon.cse.mval &= ~s.Sregm;
1538                         regcon.immed.mval &= ~s.Sregm;
1539                         regcon.params &= ~s.Sregm;
1540                         if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
1541                             regcon.mpvar |= s.Sregm;
1542                     }
1543                 }
1544             }
1545         }
1546         if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
1547         {
1548             cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval);
1549         }
1550         cdb.append(cdbstore);
1551         cdb.append(cdbload);
1552         mfuncreg &= ~regcon.mvar;               // use these registers
1553         regcon.used |= regcon.mvar;
1554 
1555         // Determine if we have more than 1 uncommitted index register
1556         regcon.indexregs = IDXREGS & ~regcon.mvar;
1557         regcon.indexregs &= regcon.indexregs - 1;
1558     }
1559 
1560     /* This doesn't work when calling the BC_finally function,
1561      * as it is one block calling another.
1562      */
1563     //regsave.idx = 0;
1564 
1565     reflocal = 0;
1566     int refparamsave = refparam;
1567     refparam = 0;
1568     assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops);
1569 
1570     outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave);
1571     bl.Bcode = cdb.finish();
1572 
1573     for (int i = 0; i < anyspill; i++)
1574     {
1575         Symbol *s = globsym.tab[i];
1576         s.Sfl = sflsave[i];    // undo block register assignments
1577     }
1578 
1579     if (reflocal)
1580         bl.Bflags |= BFLreflocal;
1581     if (refparam)
1582         bl.Bflags |= BFLrefparam;
1583     refparam |= refparamsave;
1584     bl.Bregcon.immed = regcon.immed;
1585     bl.Bregcon.cse = regcon.cse;
1586     bl.Bregcon.used = regcon.used;
1587     bl.Bregcon.params = regcon.params;
1588 
1589     debug
1590     debugw && printf("code gen complete\n");
1591 }
1592 
1593 /*****************************************
1594  * Add in exception handling code.
1595  */
1596 
1597 version (SCPP)
1598 {
1599 
1600 private void cgcod_eh()
1601 {
1602     list_t stack;
1603     int idx;
1604     int tryidx;
1605 
1606     if (!(usednteh & (EHtry | EHcleanup)))
1607         return;
1608 
1609     // Compute Bindex for each block
1610     for (block *b = startblock; b; b = b.Bnext)
1611     {
1612         b.Bindex = -1;
1613         b.Bflags &= ~BFLvisited;               /* mark as unvisited    */
1614     }
1615     block *btry = null;
1616     int lastidx = 0;
1617     startblock.Bindex = 0;
1618     for (block *b = startblock; b; b = b.Bnext)
1619     {
1620         if (btry == b.Btry && b.BC == BCcatch)  // if don't need to pop try block
1621         {
1622             block *br = list_block(b.Bpred);          // find corresponding try block
1623             assert(br.BC == BCtry);
1624             b.Bindex = br.Bindex;
1625         }
1626         else if (btry != b.Btry && b.BC != BCcatch ||
1627                  !(b.Bflags & BFLvisited))
1628             b.Bindex = lastidx;
1629         b.Bflags |= BFLvisited;
1630 
1631         debug
1632         if (debuge)
1633         {
1634             WRBC(b.BC);
1635             printf(" block (%p) Btry=%p Bindex=%d\n",b,b.Btry,b.Bindex);
1636         }
1637 
1638         except_index_set(b.Bindex);
1639         if (btry != b.Btry)                    // exited previous try block
1640         {
1641             except_pop(b,null,btry);
1642             btry = b.Btry;
1643         }
1644         if (b.BC == BCtry)
1645         {
1646             except_push(b,null,b);
1647             btry = b;
1648             tryidx = except_index_get();
1649             CodeBuilder cdb; cdb.ctor();
1650             nteh_gensindex(cdb,tryidx - 1);
1651             cdb.append(b.Bcode);
1652             b.Bcode = cdb.finish();
1653         }
1654 
1655         stack = null;
1656         for (code *c = b.Bcode; c; c = code_next(c))
1657         {
1658             if ((c.Iop & ESCAPEmask) == ESCAPE)
1659             {
1660                 code *c1 = null;
1661                 switch (c.Iop & 0xFFFF00)
1662                 {
1663                     case ESCctor:
1664                         //printf("ESCctor\n");
1665                         except_push(c,c.IEV1.Vtor,null);
1666                         goto L1;
1667 
1668                     case ESCdtor:
1669                         //printf("ESCdtor\n");
1670                         except_pop(c,c.IEV1.Vtor,null);
1671                     L1: if (config.exe == EX_WIN32)
1672                         {
1673                             CodeBuilder cdb; cdb.ctor();
1674                             nteh_gensindex(cdb,except_index_get() - 1);
1675                             c1 = cdb.finish();
1676                             c1.next = code_next(c);
1677                             c.next = c1;
1678                         }
1679                         break;
1680 
1681                     case ESCmark:
1682                         //printf("ESCmark\n");
1683                         idx = except_index_get();
1684                         list_prependdata(&stack,idx);
1685                         except_mark();
1686                         break;
1687 
1688                     case ESCrelease:
1689                         //printf("ESCrelease\n");
1690                         version (SCPP)
1691                         {
1692                             idx = list_data(stack);
1693                             list_pop(&stack);
1694                             if (idx != except_index_get())
1695                             {
1696                                 if (config.exe == EX_WIN32)
1697                                 {
1698                                     CodeBuilder cdb; cdb.ctor();
1699                                     nteh_gensindex(cdb,idx - 1);
1700                                     c1 = cdb.finish();
1701                                     c1.next = code_next(c);
1702                                     c.next = c1;
1703                                 }
1704                                 else
1705                                 {   except_pair_append(c,idx - 1);
1706                                     c.Iop = ESCAPE | ESCoffset;
1707                                 }
1708                             }
1709                             except_release();
1710                         }
1711                         break;
1712 
1713                     case ESCmark2:
1714                         //printf("ESCmark2\n");
1715                         except_mark();
1716                         break;
1717 
1718                     case ESCrelease2:
1719                         //printf("ESCrelease2\n");
1720                         version (SCPP)
1721                         {
1722                             except_release();
1723                         }
1724                         break;
1725 
1726                     default:
1727                         break;
1728                 }
1729             }
1730         }
1731         assert(stack == null);
1732         b.Bendindex = except_index_get();
1733 
1734         if (b.BC != BCret && b.BC != BCretexp)
1735             lastidx = b.Bendindex;
1736 
1737         // Set starting index for each of the successors
1738         int i = 0;
1739         foreach (bl; ListRange(b.Bsucc))
1740         {
1741             block *bs = list_block(bl);
1742             if (b.BC == BCtry)
1743             {
1744                 switch (i)
1745                 {
1746                     case 0:                             // block after catches
1747                         bs.Bindex = b.Bendindex;
1748                         break;
1749 
1750                     case 1:                             // 1st catch block
1751                         bs.Bindex = tryidx;
1752                         break;
1753 
1754                     default:                            // subsequent catch blocks
1755                         bs.Bindex = b.Bindex;
1756                         break;
1757                 }
1758 
1759                 debug
1760                 if (debuge)
1761                 {
1762                     printf(" 1setting %p to %d\n",bs,bs.Bindex);
1763                 }
1764             }
1765             else if (!(bs.Bflags & BFLvisited))
1766             {
1767                 bs.Bindex = b.Bendindex;
1768 
1769                 debug
1770                 if (debuge)
1771                 {
1772                     printf(" 2setting %p to %d\n",bs,bs.Bindex);
1773                 }
1774             }
1775             bs.Bflags |= BFLvisited;
1776             i++;
1777         }
1778     }
1779 
1780     if (config.exe == EX_WIN32)
1781         for (block *b = startblock; b; b = b.Bnext)
1782         {
1783             if (/*!b.Bcount ||*/ b.BC == BCtry)
1784                 continue;
1785             foreach (bl; ListRange(b.Bpred))
1786             {
1787                 int pi = list_block(bl).Bendindex;
1788                 if (b.Bindex != pi)
1789                 {
1790                     CodeBuilder cdb; cdb.ctor();
1791                     nteh_gensindex(cdb,b.Bindex - 1);
1792                     cdb.append(b.Bcode);
1793                     b.Bcode = cdb.finish();
1794                     break;
1795                 }
1796             }
1797         }
1798 }
1799 
1800 }
1801 
1802 /******************************
1803  * Count the number of bits set in a register mask.
1804  */
1805 
1806 int numbitsset(regm_t regm)
1807 {
1808     int n = 0;
1809     if (regm)
1810         do
1811             n++;
1812         while ((regm &= regm - 1) != 0);
1813     return n;
1814 }
1815 
1816 /******************************
1817  * Given a register mask, find and return the number
1818  * of the first register that fits.
1819  */
1820 
1821 reg_t findreg(regm_t regm)
1822 {
1823     return findreg(regm, __LINE__, __FILE__);
1824 }
1825 
1826 reg_t findreg(regm_t regm, int line, const(char)* file)
1827 {
1828     debug
1829     regm_t regmsave = regm;
1830 
1831     reg_t i = 0;
1832     while (1)
1833     {
1834         if (!(regm & 0xF))
1835         {
1836             regm >>= 4;
1837             i += 4;
1838             if (!regm)
1839                 break;
1840         }
1841         if (regm & 1)
1842             return i;
1843         regm >>= 1;
1844         i++;
1845     }
1846 
1847     debug
1848     printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr);
1849     fflush(stdout);
1850 
1851 //    *(char*)0=0;
1852     assert(0);
1853 }
1854 
1855 /***************
1856  * Free element (but not it's leaves! (assume they are already freed))
1857  * Don't decrement Ecount! This is so we can detect if the common subexp
1858  * has already been evaluated.
1859  * If common subexpression is not required anymore, eliminate
1860  * references to it.
1861  */
1862 
1863 void freenode(elem *e)
1864 {
1865     elem_debug(e);
1866     //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount);
1867     if (e.Ecomsub--) return;             /* usage count                  */
1868     if (e.Ecount)                        /* if it was a CSE              */
1869     {
1870         for (size_t i = 0; i < regcon.cse.value.length; i++)
1871         {
1872             if (regcon.cse.value[i] == e)       /* if a register is holding it  */
1873             {
1874                 regcon.cse.mval &= ~mask(cast(uint)i);
1875                 regcon.cse.mops &= ~mask(cast(uint)i);    /* free masks                   */
1876             }
1877         }
1878         CSE.remove(e);
1879     }
1880 }
1881 
1882 /*********************************
1883  * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode().
1884  */
1885 
1886 private void resetEcomsub(elem *e)
1887 {
1888     while (1)
1889     {
1890         elem_debug(e);
1891         e.Ecomsub = e.Ecount;
1892         const op = e.Eoper;
1893         if (!OTleaf(op))
1894         {
1895             if (OTbinary(op))
1896                 resetEcomsub(e.EV.E2);
1897             e = e.EV.E1;
1898         }
1899         else
1900             break;
1901     }
1902 }
1903 
1904 /*********************************
1905  * Determine if elem e is a register variable.
1906  * If so:
1907  *      *pregm = mask of registers that make up the variable
1908  *      *preg = the least significant register
1909  *      returns true
1910  * Else
1911  *      returns false
1912  */
1913 
1914 int isregvar(elem *e,regm_t *pregm,reg_t *preg)
1915 {
1916     Symbol *s;
1917     uint u;
1918     regm_t m;
1919     regm_t regm;
1920     reg_t reg;
1921 
1922     elem_debug(e);
1923     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
1924     {
1925         s = e.EV.Vsym;
1926         switch (s.Sfl)
1927         {
1928             case FLreg:
1929                 if (s.Sclass == SCparameter)
1930                 {   refparam = true;
1931                     reflocal = true;
1932                 }
1933                 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw;
1934                 regm = s.Sregm;
1935                 //assert(tyreg(s.ty()));
1936 static if (0)
1937 {
1938                 // Let's just see if there is a CSE in a reg we can use
1939                 // instead. This helps avoid AGI's.
1940                 if (e.Ecount && e.Ecount != e.Ecomsub)
1941                 {   int i;
1942 
1943                     for (i = 0; i < arraysize(regcon.cse.value); i++)
1944                     {
1945                         if (regcon.cse.value[i] == e)
1946                         {   reg = i;
1947                             break;
1948                         }
1949                     }
1950                 }
1951 }
1952                 assert(regm & regcon.mvar && !(regm & ~regcon.mvar));
1953                 goto Lreg;
1954 
1955             case FLpseudo:
1956                 version (MARS)
1957                 {
1958                     u = s.Sreglsw;
1959                     m = mask(u);
1960                     if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1961                     {
1962                         reg = u & 7;
1963                         regm = m;
1964                         goto Lreg;
1965                     }
1966                 }
1967                 else
1968                 {
1969                     u = s.Sreglsw;
1970                     m = pseudomask[u];
1971                     if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1972                     {
1973                         reg = pseudoreg[u] & 7;
1974                         regm = m;
1975                         goto Lreg;
1976                     }
1977                 }
1978                 break;
1979 
1980             default:
1981                 break;
1982         }
1983     }
1984     return false;
1985 
1986 Lreg:
1987     if (preg)
1988         *preg = reg;
1989     if (pregm)
1990         *pregm = regm;
1991     return true;
1992 }
1993 
1994 /*********************************
1995  * Allocate some registers.
1996  * Input:
1997  *      pretregs        Pointer to mask of registers to make selection from.
1998  *      tym             Mask of type we will store in registers.
1999  * Output:
2000  *      *pretregs       Mask of allocated registers.
2001  *      *preg           Register number of first allocated register.
2002  *      msavereg,mfuncreg       retregs bits are cleared.
2003  *      regcon.cse.mval,regcon.cse.mops updated
2004  * Returns:
2005  *      pointer to code generated if necessary to save any regcon.cse.mops on the
2006  *      stack.
2007  */
2008 
2009 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym)
2010 {
2011     allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__);
2012 }
2013 
2014 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym
2015         ,int line,const(char)* file)
2016 {
2017         reg_t reg;
2018 
2019 static if (0)
2020 {
2021         if (pass == PASSfinal)
2022         {
2023             printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym ",
2024                 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval),
2025                 regm_str(msavereg),regm_str(*pretregs));
2026             WRTYxx(tym);
2027             dbg_printf("\n");
2028         }
2029 }
2030         tym = tybasic(tym);
2031         uint size = _tysize[tym];
2032         *pretregs &= mES | allregs | XMMREGS;
2033         regm_t retregs = *pretregs;
2034 
2035         debug if (retregs == 0)
2036             printf("allocreg: file %s(%d)\n", file, line);
2037 
2038         if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars
2039         {
2040             if (size <= REGSIZE || (retregs & XMMREGS))
2041             {
2042                 *preg = findreg(retregs);
2043                 assert(retregs == mask(*preg)); /* no more bits are set */
2044             }
2045             else if (size <= 2 * REGSIZE)
2046             {
2047                 *preg = findregmsw(retregs);
2048                 assert(retregs & mLSW);
2049             }
2050             else
2051                 assert(0);
2052             getregs(cdb,retregs);
2053             return;
2054         }
2055         int count = 0;
2056 L1:
2057         //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
2058         assert(++count < 20);           /* fail instead of hanging if blocked */
2059         assert(retregs);
2060         reg_t msreg = NOREG, lsreg = NOREG;  /* no value assigned yet        */
2061 L3:
2062         //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
2063         regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params);
2064         if (!r)
2065         {
2066             r = retregs & ~(msavereg | regcon.cse.mval);
2067             if (!r)
2068             {
2069                 r = retregs & ~(msavereg | regcon.cse.mops);
2070                 if (!r)
2071                 {   r = retregs & ~msavereg;
2072                     if (!r)
2073                         r = retregs;
2074                 }
2075             }
2076         }
2077 
2078         if (size <= REGSIZE || retregs & XMMREGS)
2079         {
2080             if (r & ~mBP)
2081                 r &= ~mBP;
2082 
2083             // If only one index register, prefer to not use LSW registers
2084             if (!regcon.indexregs && r & ~mLSW)
2085                 r &= ~mLSW;
2086 
2087             if (pass == PASSfinal && r & ~lastretregs && !I16)
2088             {   // Try not to always allocate the same register,
2089                 // to schedule better
2090 
2091                 r &= ~lastretregs;
2092                 if (r & ~last2retregs)
2093                 {
2094                     r &= ~last2retregs;
2095                     if (r & ~last3retregs)
2096                     {
2097                         r &= ~last3retregs;
2098                         if (r & ~last4retregs)
2099                         {
2100                             r &= ~last4retregs;
2101 //                          if (r & ~last5retregs)
2102 //                              r &= ~last5retregs;
2103                         }
2104                     }
2105                 }
2106                 if (r & ~mfuncreg)
2107                     r &= ~mfuncreg;
2108             }
2109             reg = findreg(r);
2110             retregs = mask(reg);
2111         }
2112         else if (size <= 2 * REGSIZE)
2113         {
2114             /* Select pair with both regs free. Failing */
2115             /* that, select pair with one reg free.             */
2116 
2117             if (r & mBP)
2118             {
2119                 retregs &= ~mBP;
2120                 goto L3;
2121             }
2122 
2123             if (r & mMSW)
2124             {
2125                 if (r & mDX)
2126                     msreg = DX;                 /* prefer to use DX over CX */
2127                 else
2128                     msreg = findregmsw(r);
2129                 r &= mLSW;                      /* see if there's an LSW also */
2130                 if (r)
2131                     lsreg = findreg(r);
2132                 else if (lsreg == NOREG)   /* if don't have LSW yet */
2133                 {
2134                     retregs &= mLSW;
2135                     goto L3;
2136                 }
2137             }
2138             else
2139             {
2140                 if (I64 && !(r & mLSW))
2141                 {
2142                     retregs = *pretregs & (mMSW | mLSW);
2143                     assert(retregs);
2144                     goto L1;
2145                 }
2146                 lsreg = findreglsw(r);
2147                 if (msreg == NOREG)
2148                 {
2149                     retregs &= mMSW;
2150                     assert(retregs);
2151                     goto L3;
2152                 }
2153             }
2154             reg = (msreg == ES) ? lsreg : msreg;
2155             retregs = mask(msreg) | mask(lsreg);
2156         }
2157         else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
2158         {
2159             debug
2160             if (retregs != DOUBLEREGS)
2161                 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs));
2162 
2163             assert(retregs == DOUBLEREGS);
2164             reg = AX;
2165         }
2166         else
2167         {
2168             debug
2169             {
2170                 WRTYxx(tym);
2171                 printf("\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n",
2172                     file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym);
2173             }
2174             assert(0);
2175         }
2176         if (retregs & regcon.mvar)              // if conflict with reg vars
2177         {
2178             if (!(size > REGSIZE && *pretregs == (mAX | mDX)))
2179             {
2180                 retregs = (*pretregs &= ~(retregs & regcon.mvar));
2181                 goto L1;                // try other registers
2182             }
2183         }
2184         *preg = reg;
2185         *pretregs = retregs;
2186 
2187         //printf("Allocating %s\n",regm_str(retregs));
2188         last5retregs = last4retregs;
2189         last4retregs = last3retregs;
2190         last3retregs = last2retregs;
2191         last2retregs = lastretregs;
2192         lastretregs = retregs;
2193         getregs(cdb, retregs);
2194 }
2195 
2196 /******************************
2197  * Determine registers that should be destroyed upon arrival
2198  * to code entry point for exception handling.
2199  */
2200 regm_t lpadregs()
2201 {
2202     regm_t used;
2203     if (config.ehmethod == EHmethod.EH_DWARF)
2204         used = allregs & ~mfuncreg;
2205     else
2206         used = (I32 | I64) ? allregs : (ALLREGS | mES);
2207     //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg));
2208     return used;
2209 }
2210 
2211 
2212 /*************************
2213  * Mark registers as used.
2214  */
2215 
2216 void useregs(regm_t regm)
2217 {
2218     //printf("useregs(x%x) %s\n", regm, regm_str(regm));
2219     mfuncreg &= ~regm;
2220     regcon.used |= regm;                // registers used in this block
2221     regcon.params &= ~regm;
2222     if (regm & regcon.mpvar)            // if modified a fastpar register variable
2223         regcon.params = 0;              // toss them all out
2224 }
2225 
2226 /*************************
2227  * We are going to use the registers in mask r.
2228  * Generate any code necessary to save any regs.
2229  */
2230 
2231 void getregs(ref CodeBuilder cdb, regm_t r)
2232 {
2233     //printf("getregs(x%x) %s\n", r, regm_str(r));
2234     regm_t ms = r & regcon.cse.mops;           // mask of common subs we must save
2235     useregs(r);
2236     regcon.cse.mval &= ~r;
2237     msavereg &= ~r;                     // regs that are destroyed
2238     regcon.immed.mval &= ~r;
2239     if (ms)
2240         cse_save(cdb, ms);
2241 }
2242 
2243 /*************************
2244  * We are going to use the registers in mask r.
2245  * Same as getregs(), but assert if code is needed to be generated.
2246  */
2247 void getregsNoSave(regm_t r)
2248 {
2249     //printf("getregsNoSave(x%x) %s\n", r, regm_str(r));
2250     assert(!(r & regcon.cse.mops));            // mask of common subs we must save
2251     useregs(r);
2252     regcon.cse.mval &= ~r;
2253     msavereg &= ~r;                     // regs that are destroyed
2254     regcon.immed.mval &= ~r;
2255 }
2256 
2257 /*****************************************
2258  * Copy registers in cse.mops into memory.
2259  */
2260 
2261 private void cse_save(ref CodeBuilder cdb, regm_t ms)
2262 {
2263     assert((ms & regcon.cse.mops) == ms);
2264     regcon.cse.mops &= ~ms;
2265 
2266     /* Skip CSEs that are already saved */
2267     for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1)
2268     {
2269         if (regm & ms)
2270         {
2271             const e = regcon.cse.value[findreg(regm)];
2272             const sz = tysize(e.Ety);
2273             foreach (const ref cse; CSE.filter(e))
2274             {
2275                 if (sz <= REGSIZE ||
2276                     sz <= 2 * REGSIZE &&
2277                         (regm & mMSW && cse.regm & mMSW ||
2278                          regm & mLSW && cse.regm & mLSW) ||
2279                     sz == 4 * REGSIZE && regm == cse.regm
2280                    )
2281                 {
2282                     ms &= ~regm;
2283                     if (!ms)
2284                         return;
2285                     break;
2286                 }
2287             }
2288         }
2289     }
2290 
2291     while (ms)
2292     {
2293         auto cse = CSE.add();
2294         reg_t reg = findreg(ms);          /* the register to save         */
2295         cse.e = regcon.cse.value[reg];
2296         cse.regm = mask(reg);
2297 
2298         ms &= ~mask(reg);           /* turn off reg bit in ms       */
2299 
2300         // If we can simply reload the CSE, we don't need to save it
2301         if (cse_simple(&cse.csimple, cse.e))
2302             cse.flags |= CSEsimple;
2303         else
2304         {
2305             CSE.updateSizeAndAlign(cse.e);
2306             gen_storecse(cdb, cse.e.Ety, reg, cse.slot);
2307             reflocal = true;
2308         }
2309     }
2310 }
2311 
2312 /******************************************
2313  * Getregs without marking immediate register values as gone.
2314  */
2315 
2316 void getregs_imm(ref CodeBuilder cdb, regm_t r)
2317 {
2318     regm_t save = regcon.immed.mval;
2319     getregs(cdb,r);
2320     regcon.immed.mval = save;
2321 }
2322 
2323 /******************************************
2324  * Flush all CSE's out of registers and into memory.
2325  * Input:
2326  *      do87    !=0 means save 87 registers too
2327  */
2328 
2329 void cse_flush(ref CodeBuilder cdb, int do87)
2330 {
2331     //dbg_printf("cse_flush()\n");
2332     cse_save(cdb,regcon.cse.mops);      // save any CSEs to memory
2333     if (do87)
2334         save87(cdb);    // save any 8087 temporaries
2335 }
2336 
2337 /*************************
2338  * Common subexpressions exist in registers. Note this in regcon.cse.mval.
2339  * Input:
2340  *      e       the subexpression
2341  *      regm    mask of registers holding it
2342  *      opsflag if != 0 then regcon.cse.mops gets set too
2343  * Returns:
2344  *      false   not saved as a CSE
2345  *      true    saved as a CSE
2346  */
2347 
2348 bool cssave(elem *e,regm_t regm,uint opsflag)
2349 {
2350     bool result = false;
2351 
2352     /*if (e.Ecount && e.Ecount == e.Ecomsub)*/
2353     if (e.Ecount && e.Ecomsub)
2354     {
2355         if (!opsflag && pass != PASSfinal && (I32 || I64))
2356             return false;
2357 
2358         //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag);
2359         regm &= mBP | ALLREGS | mES | XMMREGS;    /* just to be sure              */
2360 
2361 /+
2362         /* Do not register CSEs if they are register variables and      */
2363         /* are not operator nodes. This forces the register allocation  */
2364         /* to go through allocreg(), which will prevent using register  */
2365         /* variables for scratch.                                       */
2366         if (opsflag || !(regm & regcon.mvar))
2367 +/
2368             for (uint i = 0; regm; i++)
2369             {
2370                 regm_t mi = mask(i);
2371                 if (regm & mi)
2372                 {
2373                     regm &= ~mi;
2374 
2375                     // If we don't need this CSE, and the register already
2376                     // holds a CSE that we do need, don't mark the new one
2377                     if (regcon.cse.mval & mi && regcon.cse.value[i] != e &&
2378                         !opsflag && regcon.cse.mops & mi)
2379                         continue;
2380 
2381                     regcon.cse.mval |= mi;
2382                     if (opsflag)
2383                         regcon.cse.mops |= mi;
2384                     //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e);
2385                     regcon.cse.value[i] = e;
2386                     result = true;
2387                 }
2388             }
2389     }
2390     return result;
2391 }
2392 
2393 /*************************************
2394  * Determine if a computation should be done into a register.
2395  */
2396 
2397 bool evalinregister(elem *e)
2398 {
2399     if (config.exe == EX_WIN64 && e.Eoper == OPrelconst)
2400         return true;
2401 
2402     if (e.Ecount == 0)             /* elem is not a CSE, therefore */
2403                                     /* we don't need to evaluate it */
2404                                     /* in a register                */
2405         return false;
2406     if (!OTleaf(e.Eoper))          /* operators are always in register */
2407         return true;
2408 
2409     // Need to rethink this code if float or double can be CSE'd
2410     uint sz = tysize(e.Ety);
2411     if (e.Ecount == e.Ecomsub)    /* elem is a CSE that needs     */
2412                                     /* to be generated              */
2413     {
2414         if ((I32 || I64) &&
2415             //pass == PASSfinal && // bug 8987
2416             sz <= REGSIZE)
2417         {
2418             // Do it only if at least 2 registers are available
2419             regm_t m = allregs & ~regcon.mvar;
2420             if (sz == 1)
2421                 m &= BYTEREGS;
2422             if (m & (m - 1))        // if more than one register
2423             {   // Need to be at least 3 registers available, as
2424                 // addressing modes can use up 2.
2425                 while (!(m & 1))
2426                     m >>= 1;
2427                 m >>= 1;
2428                 if (m & (m - 1))
2429                     return true;
2430             }
2431         }
2432         return false;
2433     }
2434 
2435     /* Elem is now a CSE that might have been generated. If so, and */
2436     /* it's in a register already, the computation should be done   */
2437     /* using that register.                                         */
2438     regm_t emask = 0;
2439     for (uint i = 0; i < regcon.cse.value.length; i++)
2440         if (regcon.cse.value[i] == e)
2441             emask |= mask(i);
2442     emask &= regcon.cse.mval;       // mask of available CSEs
2443     if (sz <= REGSIZE)
2444         return emask != 0;      /* the CSE is in a register     */
2445     else if (sz <= 2 * REGSIZE)
2446         return (emask & mMSW) && (emask & mLSW);
2447     return true;                    /* cop-out for now              */
2448 }
2449 
2450 /*******************************************************
2451  * Return mask of scratch registers.
2452  */
2453 
2454 regm_t getscratch()
2455 {
2456     regm_t scratch = 0;
2457     if (pass == PASSfinal)
2458     {
2459         scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval |
2460                   regcon.immed.mval | regcon.params | mfuncreg);
2461     }
2462     return scratch;
2463 }
2464 
2465 /******************************
2466  * Evaluate an elem that is a common subexp that has been encountered
2467  * before.
2468  * Look first to see if it is already in a register.
2469  */
2470 
2471 private void comsub(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2472 {
2473     tym_t tym;
2474     regm_t regm,emask;
2475     reg_t reg;
2476     uint byte_,sz;
2477 
2478     //printf("comsub(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2479     elem_debug(e);
2480 
2481     debug
2482     {
2483         if (e.Ecomsub > e.Ecount)
2484             elem_print(e);
2485     }
2486 
2487     assert(e.Ecomsub <= e.Ecount);
2488 
2489     if (*pretregs == 0)        // no possible side effects anyway
2490     {
2491         return;
2492     }
2493 
2494     /* First construct a mask, emask, of all the registers that
2495      * have the right contents.
2496      */
2497     emask = 0;
2498     for (uint i = 0; i < regcon.cse.value.length; i++)
2499     {
2500         //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]);
2501         if (regcon.cse.value[i] == e)   // if contents are right
2502                 emask |= mask(i);       // turn on bit for reg
2503     }
2504     emask &= regcon.cse.mval;                     // make sure all bits are valid
2505 
2506     if (emask & XMMREGS && *pretregs == mPSW)
2507         { }
2508     else if (tyxmmreg(e.Ety) && config.fpxmmregs)
2509     {
2510         if (*pretregs & (mST0 | mST01))
2511         {
2512             regm_t retregs = *pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1;
2513             comsub(cdb, e, &retregs);
2514             fixresult(cdb,e,retregs,pretregs);
2515             return;
2516         }
2517     }
2518     else if (tyfloating(e.Ety) && config.inline8087)
2519     {
2520         comsub87(cdb,e,pretregs);
2521         return;
2522     }
2523 
2524 
2525     /* create mask of CSEs */
2526     regm_t csemask = CSE.mask(e);
2527     csemask &= ~emask;            // stuff already in registers
2528 
2529     debug if (debugw)
2530     {
2531         printf("comsub(e=%p): *pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n",
2532                 e,regm_str(*pretregs),regm_str(emask),regm_str(csemask),
2533                 regm_str(regcon.cse.mval),regm_str(regcon.mvar));
2534         if (regcon.cse.mval & 1)
2535             elem_print(regcon.cse.value[0]);
2536     }
2537 
2538     tym = tybasic(e.Ety);
2539     sz = _tysize[tym];
2540     byte_ = sz == 1;
2541 
2542     if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register
2543     {
2544         /* First see if it is already in a correct register     */
2545 
2546         regm = emask & *pretregs;
2547         if (regm == 0)
2548             regm = emask;               /* try any other register       */
2549         if (regm)                       /* if it's in a register        */
2550         {
2551             if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (*pretregs & regcon.mvar) == *pretregs)
2552             {
2553                 regm = mask(findreg(regm));
2554                 fixresult(cdb,e,regm,pretregs);
2555                 return;
2556             }
2557         }
2558 
2559         if (OTleaf(e.Eoper))                  /* if not op or func            */
2560             goto reload;                      /* reload data                  */
2561 
2562         foreach (ref cse; CSE.filter(e))
2563         {
2564             regm_t retregs;
2565 
2566             if (cse.flags & CSEsimple)
2567             {
2568                 retregs = *pretregs;
2569                 if (byte_ && !(retregs & BYTEREGS))
2570                     retregs = BYTEREGS;
2571                 else if (!(retregs & allregs))
2572                     retregs = allregs;
2573                 allocreg(cdb,&retregs,&reg,tym);
2574                 code *cr = &cse.csimple;
2575                 cr.setReg(reg);
2576                 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1)
2577                     cr.Irex |= REX;
2578                 cdb.gen(cr);
2579                 goto L10;
2580             }
2581             else
2582             {
2583                 reflocal = true;
2584                 cse.flags |= CSEload;
2585                 if (*pretregs == mPSW)  // if result in CCs only
2586                 {
2587                     if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety)))
2588                     {
2589                         retregs = XMMREGS;
2590                         allocreg(cdb,&retregs,&reg,tym);
2591                         gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2592                         regcon.cse.mval |= mask(reg); // cs is in a reg
2593                         regcon.cse.value[reg] = e;
2594                         fixresult(cdb,e,retregs,pretregs);
2595                     }
2596                     else
2597                     {
2598                         // CMP cs[BP],0
2599                         gen_testcse(cdb, cse.e.Ety, sz, cse.slot);
2600                     }
2601                 }
2602                 else
2603                 {
2604                     retregs = *pretregs;
2605                     if (byte_ && !(retregs & BYTEREGS))
2606                         retregs = BYTEREGS;
2607                     allocreg(cdb,&retregs,&reg,tym);
2608                     gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2609                 L10:
2610                     regcon.cse.mval |= mask(reg); // cs is in a reg
2611                     regcon.cse.value[reg] = e;
2612                     fixresult(cdb,e,retregs,pretregs);
2613                 }
2614             }
2615             return;
2616         }
2617 
2618         debug
2619         {
2620             printf("couldn't find cse e = %p, pass = %d\n",e,pass);
2621             elem_print(e);
2622         }
2623         assert(0);                      /* should have found it         */
2624     }
2625     else                                  /* reg pair is req'd            */
2626     if (sz <= 2 * REGSIZE)
2627     {
2628         reg_t msreg,lsreg;
2629 
2630         /* see if we have both  */
2631         if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP)))
2632         {                               /* we don't have both           */
2633             debug if (!OTleaf(e.Eoper))
2634             {
2635                 printf("e = %p, op = x%x, emask = %s, csemask = %s\n",
2636                     e,e.Eoper,regm_str(emask),regm_str(csemask));
2637                 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW);
2638                 elem_print(e);
2639             }
2640 
2641             assert(OTleaf(e.Eoper));        /* must have both for operators */
2642             goto reload;
2643         }
2644 
2645         /* Look for right vals in any regs      */
2646         regm = *pretregs & mMSW;
2647         if (emask & regm)
2648             msreg = findreg(emask & regm);
2649         else if (emask & mMSW)
2650             msreg = findregmsw(emask);
2651         else                    /* reload from cse array        */
2652         {
2653             if (!regm)
2654                 regm = mMSW & ALLREGS;
2655             allocreg(cdb,&regm,&msreg,TYint);
2656             loadcse(cdb,e,msreg,mMSW);
2657         }
2658 
2659         regm = *pretregs & (mLSW | mBP);
2660         if (emask & regm)
2661             lsreg = findreg(emask & regm);
2662         else if (emask & (mLSW | mBP))
2663             lsreg = findreglsw(emask);
2664         else
2665         {
2666             if (!regm)
2667                 regm = mLSW;
2668             allocreg(cdb,&regm,&lsreg,TYint);
2669             loadcse(cdb,e,lsreg,mLSW | mBP);
2670         }
2671 
2672         regm = mask(msreg) | mask(lsreg);       /* mask of result       */
2673         fixresult(cdb,e,regm,pretregs);
2674         return;
2675     }
2676     else if (tym == TYdouble || tym == TYdouble_alias)    // double
2677     {
2678         assert(I16);
2679         if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16)
2680         {
2681             static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d
2682             for (reg = 0; reg != NOREG; reg = dblreg[reg])
2683             {
2684                 assert(cast(int) reg >= 0 && reg <= 7);
2685                 if (mask(reg) & csemask)
2686                     loadcse(cdb,e,reg,mask(reg));
2687             }
2688             regm = DOUBLEREGS_16;
2689             fixresult(cdb,e,regm,pretregs);
2690             return;
2691         }
2692         if (OTleaf(e.Eoper)) goto reload;
2693 
2694         debug
2695         printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask));
2696 
2697         assert(0);
2698     }
2699     else
2700     {
2701         debug
2702         printf("e = %p, tym = x%x\n",e,tym);
2703 
2704         assert(0);
2705     }
2706 
2707 reload:                                 /* reload result from memory    */
2708     switch (e.Eoper)
2709     {
2710         case OPrelconst:
2711             cdrelconst(cdb,e,pretregs);
2712             break;
2713 
2714 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
2715 {
2716         case OPgot:
2717             cdgot(cdb,e,pretregs);
2718             break;
2719 }
2720         default:
2721             if (*pretregs == mPSW &&
2722                 config.fpxmmregs &&
2723                 (tyxmmreg(tym) || tysimd(tym)))
2724             {
2725                 regm_t retregs = XMMREGS | mPSW;
2726                 loaddata(cdb,e,&retregs);
2727                 cssave(e,retregs,false);
2728                 return;
2729             }
2730             loaddata(cdb,e,pretregs);
2731             break;
2732     }
2733     cssave(e,*pretregs,false);
2734 }
2735 
2736 
2737 /*****************************
2738  * Load reg from cse save area on stack.
2739  */
2740 
2741 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm)
2742 {
2743     foreach (ref cse; CSE.filter(e))
2744     {
2745         //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm));
2746         if (cse.regm & regm)
2747         {
2748             reflocal = true;
2749             cse.flags |= CSEload;    /* it was loaded        */
2750             regcon.cse.value[reg] = e;
2751             regcon.cse.mval |= mask(reg);
2752             getregs(cdb,mask(reg));
2753             gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2754             return;
2755         }
2756     }
2757     debug
2758     {
2759         printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm));
2760         elem_print(e);
2761     }
2762     assert(0);
2763 }
2764 
2765 /***************************
2766  * Generate code sequence for an elem.
2767  * Input:
2768  *      pretregs =      mask of possible registers to return result in
2769  *                      Note:   longs are in AX,BX or CX,DX or SI,DI
2770  *                              doubles are AX,BX,CX,DX only
2771  *      constflag =     1 for user of result will not modify the
2772  *                      registers returned in *pretregs.
2773  *                      2 for freenode() not called.
2774  * Output:
2775  *      *pretregs       mask of registers result is returned in
2776  * Returns:
2777  *      pointer to code sequence generated
2778  */
2779 
2780 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op)
2781 {
2782     (*cdxxx[op])(cdb,e,pretregs);
2783 }
2784 
2785 // jump table
2786 private extern (C++) __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx =
2787 [
2788     OPunde:    &cderr,
2789     OPadd:     &cdorth,
2790     OPmul:     &cdmul,
2791     OPand:     &cdorth,
2792     OPmin:     &cdorth,
2793     OPnot:     &cdnot,
2794     OPcom:     &cdcom,
2795     OPcond:    &cdcond,
2796     OPcomma:   &cdcomma,
2797     OPremquo:  &cdmul,
2798     OPdiv:     &cdmul,
2799     OPmod:     &cdmul,
2800     OPxor:     &cdorth,
2801     OPstring:  &cderr,
2802     OPrelconst: &cdrelconst,
2803     OPinp:     &cdport,
2804     OPoutp:    &cdport,
2805     OPasm:     &cdasm,
2806     OPinfo:    &cdinfo,
2807     OPdctor:   &cddctor,
2808     OPddtor:   &cdddtor,
2809     OPctor:    &cdctor,
2810     OPdtor:    &cddtor,
2811     OPmark:    &cdmark,
2812     OPvoid:    &cdvoid,
2813     OPhalt:    &cdhalt,
2814     OPnullptr: &cderr,
2815     OPpair:    &cdpair,
2816     OPrpair:   &cdpair,
2817 
2818     OPor:      &cdorth,
2819     OPoror:    &cdloglog,
2820     OPandand:  &cdloglog,
2821     OProl:     &cdshift,
2822     OPror:     &cdshift,
2823     OPshl:     &cdshift,
2824     OPshr:     &cdshift,
2825     OPashr:    &cdshift,
2826     OPbit:     &cderr,
2827     OPind:     &cdind,
2828     OPaddr:    &cderr,
2829     OPneg:     &cdneg,
2830     OPuadd:    &cderr,
2831     OPabs:     &cdabs,
2832     OPsqrt:    &cdneg,
2833     OPsin:     &cdneg,
2834     OPcos:     &cdneg,
2835     OPscale:   &cdscale,
2836     OPyl2x:    &cdscale,
2837     OPyl2xp1:  &cdscale,
2838     OPcmpxchg:     &cdcmpxchg,
2839     OPrint:    &cdneg,
2840     OPrndtol:  &cdrndtol,
2841     OPstrlen:  &cdstrlen,
2842     OPstrcpy:  &cdstrcpy,
2843     OPmemcpy:  &cdmemcpy,
2844     OPmemset:  &cdmemset,
2845     OPstrcat:  &cderr,
2846     OPstrcmp:  &cdstrcmp,
2847     OPmemcmp:  &cdmemcmp,
2848     OPsetjmp:  &cdsetjmp,
2849     OPnegass:  &cdaddass,
2850     OPpreinc:  &cderr,
2851     OPpredec:  &cderr,
2852     OPstreq:   &cdstreq,
2853     OPpostinc: &cdpost,
2854     OPpostdec: &cdpost,
2855     OPeq:      &cdeq,
2856     OPaddass:  &cdaddass,
2857     OPminass:  &cdaddass,
2858     OPmulass:  &cdmulass,
2859     OPdivass:  &cdmulass,
2860     OPmodass:  &cdmulass,
2861     OPshrass:  &cdshass,
2862     OPashrass: &cdshass,
2863     OPshlass:  &cdshass,
2864     OPandass:  &cdaddass,
2865     OPxorass:  &cdaddass,
2866     OPorass:   &cdaddass,
2867 
2868     OPle:      &cdcmp,
2869     OPgt:      &cdcmp,
2870     OPlt:      &cdcmp,
2871     OPge:      &cdcmp,
2872     OPeqeq:    &cdcmp,
2873     OPne:      &cdcmp,
2874 
2875     OPunord:   &cdcmp,
2876     OPlg:      &cdcmp,
2877     OPleg:     &cdcmp,
2878     OPule:     &cdcmp,
2879     OPul:      &cdcmp,
2880     OPuge:     &cdcmp,
2881     OPug:      &cdcmp,
2882     OPue:      &cdcmp,
2883     OPngt:     &cdcmp,
2884     OPnge:     &cdcmp,
2885     OPnlt:     &cdcmp,
2886     OPnle:     &cdcmp,
2887     OPord:     &cdcmp,
2888     OPnlg:     &cdcmp,
2889     OPnleg:    &cdcmp,
2890     OPnule:    &cdcmp,
2891     OPnul:     &cdcmp,
2892     OPnuge:    &cdcmp,
2893     OPnug:     &cdcmp,
2894     OPnue:     &cdcmp,
2895 
2896     OPvp_fp:   &cdcnvt,
2897     OPcvp_fp:  &cdcnvt,
2898     OPoffset:  &cdlngsht,
2899     OPnp_fp:   &cdshtlng,
2900     OPnp_f16p: &cdfar16,
2901     OPf16p_np: &cdfar16,
2902 
2903     OPs16_32:  &cdshtlng,
2904     OPu16_32:  &cdshtlng,
2905     OPd_s32:   &cdcnvt,
2906     OPb_8:     &cdcnvt,
2907     OPs32_d:   &cdcnvt,
2908     OPd_s16:   &cdcnvt,
2909     OPs16_d:   &cdcnvt,
2910     OPd_u16:   &cdcnvt,
2911     OPu16_d:   &cdcnvt,
2912     OPd_u32:   &cdcnvt,
2913     OPu32_d:   &cdcnvt,
2914     OP32_16:   &cdlngsht,
2915     OPd_f:     &cdcnvt,
2916     OPf_d:     &cdcnvt,
2917     OPd_ld:    &cdcnvt,
2918     OPld_d:    &cdcnvt,
2919     OPc_r:     &cdconvt87,
2920     OPc_i:     &cdconvt87,
2921     OPu8_16:   &cdbyteint,
2922     OPs8_16:   &cdbyteint,
2923     OP16_8:    &cdlngsht,
2924     OPu32_64:  &cdshtlng,
2925     OPs32_64:  &cdshtlng,
2926     OP64_32:   &cdlngsht,
2927     OPu64_128: &cdshtlng,
2928     OPs64_128: &cdshtlng,
2929     OP128_64:  &cdlngsht,
2930     OPmsw:     &cdmsw,
2931 
2932     OPd_s64:   &cdcnvt,
2933     OPs64_d:   &cdcnvt,
2934     OPd_u64:   &cdcnvt,
2935     OPu64_d:   &cdcnvt,
2936     OPld_u64:  &cdcnvt,
2937     OPparam:   &cderr,
2938     OPsizeof:  &cderr,
2939     OParrow:   &cderr,
2940     OParrowstar: &cderr,
2941     OPcolon:   &cderr,
2942     OPcolon2:  &cderr,
2943     OPbool:    &cdnot,
2944     OPcall:    &cdfunc,
2945     OPucall:   &cdfunc,
2946     OPcallns:  &cdfunc,
2947     OPucallns: &cdfunc,
2948     OPstrpar:  &cderr,
2949     OPstrctor: &cderr,
2950     OPstrthis: &cdstrthis,
2951     OPconst:   &cderr,
2952     OPvar:     &cderr,
2953     OPnew:     &cderr,
2954     OPanew:    &cderr,
2955     OPdelete:  &cderr,
2956     OPadelete: &cderr,
2957     OPbrack:   &cderr,
2958     OPframeptr: &cdframeptr,
2959     OPgot:     &cdgot,
2960 
2961     OPbsf:     &cdbscan,
2962     OPbsr:     &cdbscan,
2963     OPbtst:    &cdbtst,
2964     OPbt:      &cdbt,
2965     OPbtc:     &cdbt,
2966     OPbtr:     &cdbt,
2967     OPbts:     &cdbt,
2968 
2969     OPbswap:   &cdbswap,
2970     OPpopcnt:  &cdpopcnt,
2971     OPvector:  &cdvector,
2972     OPvecsto:  &cdvecsto,
2973     OPvecfill: &cdvecfill,
2974     OPva_start: &cderr,
2975     OPprefetch: &cdprefetch,
2976 ];
2977 
2978 
2979 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag)
2980 {
2981     Symbol *s;
2982 
2983     debug if (debugw)
2984     {
2985         printf("+codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs));
2986         WROP(e.Eoper);
2987         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
2988                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
2989         printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
2990     }
2991 
2992     assert(e);
2993     elem_debug(e);
2994     if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
2995     {
2996         debug
2997         {
2998             printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs));
2999             elem_print(e);
3000             printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3001                     regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3002             printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
3003         }
3004         assert(0);
3005     }
3006 
3007     if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar)
3008         *pretregs &= ~regcon.mvar;                      /* can't use register vars */
3009 
3010     uint op = e.Eoper;
3011     if (e.Ecount && e.Ecount != e.Ecomsub)     // if common subexp
3012     {
3013         comsub(cdb,e,pretregs);
3014         goto L1;
3015     }
3016 
3017     if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3018         cdb.genlinnum(e.Esrcpos);
3019 
3020     switch (op)
3021     {
3022         default:
3023             if (e.Ecount)                          /* if common subexp     */
3024             {
3025                 /* if no return value       */
3026                 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0)
3027                 {
3028                     if (*pretregs & (mST0 | mST01))
3029                     {
3030                         //printf("generate ST0 comsub for:\n");
3031                         //elem_print(e);
3032 
3033                         regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1;
3034                         (*cdxxx[op])(cdb,e,&retregs);
3035                         cssave(e,retregs,!OTleaf(op));
3036                         fixresult(cdb, e, retregs, pretregs);
3037                         goto L1;
3038                     }
3039                     if (tysize(e.Ety) == 1)
3040                         *pretregs |= BYTEREGS;
3041                     else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs)
3042                         *pretregs |= XMMREGS;
3043                     else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias)
3044                         *pretregs |= DOUBLEREGS;
3045                     else
3046                         *pretregs |= ALLREGS;       /* make one             */
3047                 }
3048 
3049                 /* BUG: For CSEs, make sure we have both an MSW             */
3050                 /* and an LSW specified in *pretregs                        */
3051             }
3052             assert(op <= OPMAX);
3053             (*cdxxx[op])(cdb,e,pretregs);
3054             break;
3055 
3056         case OPrelconst:
3057             cdrelconst(cdb,e,pretregs);
3058             break;
3059 
3060         case OPvar:
3061             if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg &&
3062                 (s.Sregm & *pretregs) == s.Sregm)
3063             {
3064                 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE)
3065                     *pretregs &= mPSW | (s.Sregm & mLSW);
3066                 else
3067                     *pretregs &= mPSW | s.Sregm;
3068             }
3069             goto case OPconst;
3070 
3071         case OPconst:
3072             if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile))
3073             {
3074                 switch (tybasic(e.Ety))
3075                 {
3076                     case TYbool:
3077                     case TYchar:
3078                     case TYschar:
3079                     case TYuchar:
3080                         *pretregs |= BYTEREGS;
3081                         break;
3082 
3083                     case TYnref:
3084                     case TYnptr:
3085                     case TYsptr:
3086                     case TYcptr:
3087                     case TYfgPtr:
3088                     case TYimmutPtr:
3089                     case TYsharePtr:
3090                     case TYrestrictPtr:
3091                         *pretregs |= I16 ? IDXREGS : ALLREGS;
3092                         break;
3093 
3094                     case TYshort:
3095                     case TYushort:
3096                     case TYint:
3097                     case TYuint:
3098                     case TYlong:
3099                     case TYulong:
3100                     case TYllong:
3101                     case TYullong:
3102                     case TYcent:
3103                     case TYucent:
3104                     case TYfptr:
3105                     case TYhptr:
3106                     case TYvptr:
3107                         *pretregs |= ALLREGS;
3108                         break;
3109 
3110                     default:
3111                         break;
3112                 }
3113             }
3114             loaddata(cdb,e,pretregs);
3115             break;
3116     }
3117     cssave(e,*pretregs,!OTleaf(op));
3118 L1:
3119     if (!(constflag & 2))
3120         freenode(e);
3121 
3122     debug if (debugw)
3123     {
3124         printf("-codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs));
3125         WROP(op);
3126         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3127                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3128     }
3129 }
3130 
3131 /*******************************
3132  * Same as codelem(), but do not destroy the registers in keepmsk.
3133  * Use scratch registers as much as possible, then use stack.
3134  * Input:
3135  *      constflag       true if user of result will not modify the
3136  *                      registers returned in *pretregs.
3137  */
3138 
3139 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag)
3140 {
3141     regm_t touse;
3142 
3143     debug if (debugw)
3144         printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3145                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3146 
3147     elem_debug(e);
3148     if (constflag)
3149     {
3150         regm_t regm;
3151         reg_t reg;
3152 
3153         if (isregvar(e,&regm,&reg) &&           // if e is a register variable
3154             (regm & *pretregs) == regm &&       // in one of the right regs
3155             e.EV.Voffset == 0
3156            )
3157         {
3158             uint sz1 = tysize(e.Ety);
3159             uint sz2 = tysize(e.EV.Vsym.Stype.Tty);
3160             if (sz1 <= REGSIZE && sz2 > REGSIZE)
3161                 regm &= mLSW | XMMREGS;
3162             fixresult(cdb,e,regm,pretregs);
3163             cssave(e,regm,0);
3164             freenode(e);
3165 
3166             debug if (debugw)
3167                 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3168                         e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3169 
3170             return;
3171         }
3172     }
3173     regm_t overlap = msavereg & keepmsk;
3174     msavereg |= keepmsk;          /* add to mask of regs to save          */
3175     regm_t oldregcon = regcon.cse.mval;
3176     regm_t oldregimmed = regcon.immed.mval;
3177     regm_t oldmfuncreg = mfuncreg;       /* remember old one                     */
3178     mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar;
3179     uint stackpushsave = stackpush;
3180     char calledafuncsave = calledafunc;
3181     calledafunc = 0;
3182     CodeBuilder cdbx; cdbx.ctor();
3183     codelem(cdbx,e,pretregs,constflag);    // generate code for the elem
3184 
3185     regm_t tosave = keepmsk & ~msavereg; /* registers to save                    */
3186     if (tosave)
3187     {
3188         cgstate.stackclean++;
3189         genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg);
3190         cgstate.stackclean--;
3191     }
3192 
3193     /* Assert that no new CSEs are generated that are not reflected       */
3194     /* in mfuncreg.                                                       */
3195     debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0)
3196         printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n",
3197                 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar));
3198 
3199     assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0);
3200 
3201     /* bugzilla 3521
3202      * The problem is:
3203      *    reg op (reg = exp)
3204      * where reg must be preserved (in keepregs) while the expression to be evaluated
3205      * must change it.
3206      * The only solution is to make this variable not a register.
3207      */
3208     if (regcon.mvar & tosave)
3209     {
3210         //elem_print(e);
3211         //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave));
3212         cgreg_unregister(regcon.mvar & tosave);
3213     }
3214 
3215     /* which registers can we use to save other registers in? */
3216     if (config.flags4 & CFG4space ||              // if optimize for space
3217         config.target_cpu >= TARGET_80486)        // PUSH/POP ops are 1 cycle
3218         touse = 0;                              // PUSH/POP pairs are always shorter
3219     else
3220     {
3221         touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval);
3222         /* Don't use registers we'll have to save/restore               */
3223         touse &= ~(fregsaved & oldmfuncreg);
3224         /* Don't use registers that have constant values in them, since
3225            the code generated might have used the value.
3226          */
3227         touse &= ~oldregimmed;
3228     }
3229 
3230     CodeBuilder cdbs1; cdbs1.ctor();
3231     code *cs2 = null;
3232     int adjesp = 0;
3233 
3234     for (uint i = 0; tosave; i++)
3235     {
3236         regm_t mi = mask(i);
3237 
3238         assert(i < REGMAX);
3239         if (mi & tosave)        /* i = register to save                 */
3240         {
3241             if (touse)          /* if any scratch registers             */
3242             {
3243                 uint j;
3244                 for (j = 0; j < 8; j++)
3245                 {
3246                     regm_t mj = mask(j);
3247 
3248                     if (touse & mj)
3249                     {
3250                         genmovreg(cdbs1,j,i);
3251                         cs2 = cat(genmovreg(i,j),cs2);
3252                         touse &= ~mj;
3253                         mfuncreg &= ~mj;
3254                         regcon.used |= mj;
3255                         break;
3256                     }
3257                 }
3258                 assert(j < 8);
3259             }
3260             else                        // else use memory
3261             {
3262                 CodeBuilder cdby; cdby.ctor();
3263                 uint size = gensaverestore(mask(i), cdbs1, cdby);
3264                 cs2 = cat(cdby.finish(),cs2);
3265                 if (size)
3266                 {
3267                     stackchanged = 1;
3268                     adjesp += size;
3269                 }
3270             }
3271             getregs(cdbx,mi);
3272             tosave &= ~mi;
3273         }
3274     }
3275     CodeBuilder cdbs2; cdbs2.ctor();
3276     if (adjesp)
3277     {
3278         // If this is done an odd number of times, it
3279         // will throw off the 8 byte stack alignment.
3280         // We should *only* worry about this if a function
3281         // was called in the code generation by codelem().
3282         int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1);
3283         if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
3284         {
3285             regm_t mval_save = regcon.immed.mval;
3286             regcon.immed.mval = 0;      // prevent reghasvalue() optimizations
3287                                         // because c hasn't been executed yet
3288             cod3_stackadj(cdbs1, sz);
3289             regcon.immed.mval = mval_save;
3290             cdbs1.genadjesp(sz);
3291 
3292             cod3_stackadj(cdbs2, -sz);
3293             cdbs2.genadjesp(-sz);
3294         }
3295         cdbs2.append(cs2);
3296 
3297 
3298         cdbs1.genadjesp(adjesp);
3299         cdbs2.genadjesp(-adjesp);
3300     }
3301     else
3302         cdbs2.append(cs2);
3303 
3304     calledafunc |= calledafuncsave;
3305     msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save   */
3306     mfuncreg &= oldmfuncreg;        /* update original                    */
3307 
3308     debug if (debugw)
3309         printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3310                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3311 
3312     cdb.append(cdbs1);
3313     cdb.append(cdbx);
3314     cdb.append(cdbs2);
3315     return;
3316 }
3317 
3318 /*********************************************
3319  * Turn register mask into a string suitable for printing.
3320  */
3321 
3322 const(char)* regm_str(regm_t rm)
3323 {
3324     enum NUM = 10;
3325     enum SMAX = 128;
3326     __gshared char[SMAX + 1][NUM] str;
3327     __gshared int i;
3328 
3329     if (rm == 0)
3330         return "0";
3331     if (rm == ALLREGS)
3332         return "ALLREGS";
3333     if (rm == BYTEREGS)
3334         return "BYTEREGS";
3335     if (rm == allregs)
3336         return "allregs";
3337     if (rm == XMMREGS)
3338         return "XMMREGS";
3339     char *p = str[i].ptr;
3340     if (++i == NUM)
3341         i = 0;
3342     *p = 0;
3343     for (size_t j = 0; j < 32; j++)
3344     {
3345         if (mask(cast(uint)j) & rm)
3346         {
3347             strcat(p,regstring[j]);
3348             rm &= ~mask(cast(uint)j);
3349             if (rm)
3350                 strcat(p,"|");
3351         }
3352     }
3353     if (rm)
3354     {   char *s = p + strlen(p);
3355         sprintf(s,"x%02x",rm);
3356     }
3357     assert(strlen(p) <= SMAX);
3358     return strdup(p);
3359 }
3360 
3361 /*********************************
3362  * Scan down comma-expressions.
3363  * Output:
3364  *      *pe = first elem down right side that is not an OPcomma
3365  * Returns:
3366  *      code generated for left branches of comma-expressions
3367  */
3368 
3369 void docommas(ref CodeBuilder cdb,elem **pe)
3370 {
3371     uint stackpushsave = stackpush;
3372     int stackcleansave = cgstate.stackclean;
3373     cgstate.stackclean = 0;
3374     elem* e = *pe;
3375     while (1)
3376     {
3377         if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3378         {
3379             cdb.genlinnum(e.Esrcpos);
3380             //e.Esrcpos.Slinnum = 0;               // don't do it twice
3381         }
3382         if (e.Eoper != OPcomma)
3383             break;
3384         regm_t retregs = 0;
3385         codelem(cdb,e.EV.E1,&retregs,true);
3386         elem* eold = e;
3387         e = e.EV.E2;
3388         freenode(eold);
3389     }
3390     *pe = e;
3391     assert(cgstate.stackclean == 0);
3392     cgstate.stackclean = stackcleansave;
3393     genstackclean(cdb,stackpush - stackpushsave,0);
3394 }
3395 
3396 /**************************
3397  * For elems in regcon that don't match regconsave,
3398  * clear the corresponding bit in regcon.cse.mval.
3399  * Do same for regcon.immed.
3400  */
3401 
3402 void andregcon(con_t *pregconsave)
3403 {
3404     regm_t m = ~1;
3405     for (int i = 0; i < REGMAX; i++)
3406     {
3407         if (pregconsave.cse.value[i] != regcon.cse.value[i])
3408             regcon.cse.mval &= m;
3409         if (pregconsave.immed.value[i] != regcon.immed.value[i])
3410             regcon.immed.mval &= m;
3411         m <<= 1;
3412         m |= 1;
3413     }
3414     //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval));
3415     regcon.used |= pregconsave.used;
3416     regcon.cse.mval &= pregconsave.cse.mval;
3417     regcon.immed.mval &= pregconsave.immed.mval;
3418     regcon.params &= pregconsave.params;
3419     //printf("regcon.cse.mval&regcon.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops));
3420     regcon.cse.mops &= regcon.cse.mval;
3421 }
3422 
3423 }