1 /**
2  * Top level code for the code generator.
3  *
4  * Copyright:   Copyright (C) 1985-1998 by Symantec
5  *              Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved
6  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d)
9  * Documentation:  https://dlang.org/phobos/dmd_backend_cgcod.html
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d
11  */
12 
13 module dmd.backend.cgcod;
14 
15 version = FRAMEPTR;
16 
17 version (SCPP)
18     version = COMPILE;
19 version (MARS)
20     version = COMPILE;
21 
22 version (COMPILE)
23 {
24 
25 import core.stdc.stdio;
26 import core.stdc.stdlib;
27 import core.stdc.string;
28 
29 import dmd.backend.backend;
30 import dmd.backend.cc;
31 import dmd.backend.cdef;
32 import dmd.backend.code;
33 import dmd.backend.cgcse;
34 import dmd.backend.code_x86;
35 import dmd.backend.codebuilder;
36 import dmd.backend.dlist;
37 import dmd.backend.dvec;
38 import dmd.backend.melf;
39 import dmd.backend.mem;
40 import dmd.backend.el;
41 import dmd.backend.exh;
42 import dmd.backend.global;
43 import dmd.backend.obj;
44 import dmd.backend.oper;
45 import dmd.backend.outbuf;
46 import dmd.backend.rtlsym;
47 import dmd.backend.symtab;
48 import dmd.backend.ty;
49 import dmd.backend.type;
50 import dmd.backend.xmm;
51 
52 import dmd.backend.barray;
53 
54 version (SCPP)
55 {
56     import parser;
57     import precomp;
58 }
59 
60 extern (C++):
61 
62 nothrow:
63 
64 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
65 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
66 
67 version (MARS)
68     enum MARS = true;
69 else
70     enum MARS = false;
71 
72 void dwarf_except_gentables(Funcsym *sfunc, uint startoffset, uint retoffset);
73 int REGSIZE();
74 
75 private extern (D) uint mask(uint m) { return 1 << m; }
76 
77 
78 __gshared
79 {
80 bool floatreg;                  // !=0 if floating register is required
81 
82 int hasframe;                   // !=0 if this function has a stack frame
83 bool enforcealign;              // enforced stack alignment
84 targ_size_t spoff;
85 targ_size_t Foff;               // BP offset of floating register
86 targ_size_t CSoff;              // offset of common sub expressions
87 targ_size_t NDPoff;             // offset of saved 8087 registers
88 targ_size_t pushoff;            // offset of saved registers
89 bool pushoffuse;                // using pushoff
90 int BPoff;                      // offset from BP
91 int EBPtoESP;                   // add to EBP offset to get ESP offset
92 LocalSection Para;              // section of function parameters
93 LocalSection Auto;              // section of automatics and registers
94 LocalSection Fast;              // section of fastpar
95 LocalSection EEStack;           // offset of SCstack variables from ESP
96 LocalSection Alloca;            // data for alloca() temporary
97 
98 REGSAVE regsave;
99 
100 CGstate cgstate;                // state of code generator
101 
102 regm_t BYTEREGS = BYTEREGS_INIT;
103 regm_t ALLREGS = ALLREGS_INIT;
104 
105 
106 /************************************
107  * # of bytes that SP is beyond BP.
108  */
109 
110 uint stackpush;
111 
112 int stackchanged;               /* set to !=0 if any use of the stack
113                                    other than accessing parameters. Used
114                                    to see if we can address parameters
115                                    with ESP rather than EBP.
116                                  */
117 int refparam;           // !=0 if we referenced any parameters
118 int reflocal;           // !=0 if we referenced any locals
119 bool anyiasm;           // !=0 if any inline assembler
120 char calledafunc;       // !=0 if we called a function
121 char needframe;         // if true, then we will need the frame
122                         // pointer (BP for the 8088)
123 char gotref;            // !=0 if the GOTsym was referenced
124 uint usednteh;              // if !=0, then used NT exception handling
125 bool calledFinally;     // true if called a BC_finally block
126 
127 /* Register contents    */
128 con_t regcon;
129 
130 int pass;                       // PASSxxxx
131 
132 private Symbol *retsym;          // set to symbol that should be placed in
133                                 // register AX
134 
135 /****************************
136  * Register masks.
137  */
138 
139 regm_t msavereg;        // Mask of registers that we would like to save.
140                         // they are temporaries (set by scodelem())
141 regm_t mfuncreg;        // Mask of registers preserved by a function
142 
143 regm_t allregs;                // ALLREGS optionally including mBP
144 
145 int dfoidx;                     /* which block we are in                */
146 
147 targ_size_t     funcoffset;     // offset of start of function
148 targ_size_t     prolog_allocoffset;     // offset past adj of stack allocation
149 targ_size_t     startoffset;    // size of function entry code
150 targ_size_t     retoffset;      /* offset from start of func to ret code */
151 targ_size_t     retsize;        /* size of function return              */
152 
153 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs;
154 
155 }
156 
157 /*********************************
158  * Generate code for a function.
159  * Note at the end of this routine mfuncreg will contain the mask
160  * of registers not affected by the function. Some minor optimization
161  * possibilities are here.
162  * Params:
163  *      sfunc = function to generate code for
164  */
165 
166 void codgen(Symbol *sfunc)
167 {
168     bool flag;
169     block *btry;
170 
171     // Register usage. If a bit is on, the corresponding register is live
172     // in that basic block.
173 
174     //printf("codgen('%s')\n",funcsym_p.Sident.ptr);
175     assert(sfunc == funcsym_p);
176     assert(cseg == funcsym_p.Sseg);
177 
178     cgreg_init();
179     CSE.initialize();
180     tym_t functy = tybasic(sfunc.ty());
181     cod3_initregs();
182     allregs = ALLREGS;
183     pass = PASSinitial;
184     Alloca.init();
185     anyiasm = 0;
186 
187     if (config.ehmethod == EHmethod.EH_DWARF)
188     {
189         /* The dwarf unwinder relies on the function epilog to exist
190          */
191         for (block* b = startblock; b; b = b.Bnext)
192         {
193             if (b.BC == BCexit)
194                 b.BC = BCret;
195         }
196     }
197 
198 tryagain:
199     debug
200     if (debugr)
201         printf("------------------ PASS%s -----------------\n",
202             (pass == PASSinitial) ? "init".ptr : ((pass == PASSreg) ? "reg".ptr : "final".ptr));
203 
204     lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0;
205 
206     // if no parameters, assume we don't need a stack frame
207     needframe = 0;
208     enforcealign = false;
209     gotref = 0;
210     stackchanged = 0;
211     stackpush = 0;
212     refparam = 0;
213     calledafunc = 0;
214     retsym = null;
215 
216     cgstate.stackclean = 1;
217     cgstate.funcarg.init();
218     cgstate.funcargtos = ~0;
219     cgstate.accessedTLS = false;
220     STACKALIGN = TARGET_STACKALIGN;
221 
222     regsave.reset();
223     memset(global87.stack.ptr,0,global87.stack.sizeof);
224 
225     calledFinally = false;
226     usednteh = 0;
227 
228     static if (MARS)
229     {
230         if (sfunc.Sfunc.Fflags3 & Fjmonitor &&
231             config.exe & EX_windos)
232             usednteh |= NTEHjmonitor;
233     }
234     else version (SCPP)
235     {
236         if (CPP)
237         {
238             if (config.exe == EX_WIN32 &&
239                 (sfunc.Stype.Tflags & TFemptyexc || sfunc.Stype.Texcspec))
240                 usednteh |= NTEHexcspec;
241             except_reset();
242         }
243     }
244 
245     // Set on a trial basis, turning it off if anything might throw
246     sfunc.Sfunc.Fflags3 |= Fnothrow;
247 
248     floatreg = false;
249     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
250 
251     CSE.start();
252     memset(&regcon,0,regcon.sizeof);
253     regcon.cse.mval = regcon.cse.mops = 0;      // no common subs yet
254     msavereg = 0;
255     uint nretblocks = 0;
256     mfuncreg = fregsaved;               // so we can see which are used
257                                         // (bit is cleared each time
258                                         //  we use one)
259     for (block* b = startblock; b; b = b.Bnext)
260     {
261         memset(&b.Bregcon,0,b.Bregcon.sizeof);       // Clear out values in registers
262         if (b.Belem)
263             resetEcomsub(b.Belem);     // reset all the Ecomsubs
264         if (b.BC == BCasm)
265             anyiasm = 1;                // we have inline assembler
266         if (b.BC == BCret || b.BC == BCretexp)
267             nretblocks++;
268     }
269 
270     if (!config.fulltypes || (config.flags4 & CFG4optimized))
271     {
272         regm_t noparams = 0;
273         for (int i = 0; i < globsym.length; i++)
274         {
275             Symbol *s = globsym[i];
276             s.Sflags &= ~SFLread;
277             switch (s.Sclass)
278             {
279                 case SCfastpar:
280                 case SCshadowreg:
281                     regcon.params |= s.Spregm();
282                     goto case SCparameter;
283 
284                 case SCparameter:
285                     if (s.Sfl == FLreg)
286                         noparams |= s.Sregm;
287                     break;
288 
289                 default:
290                     break;
291             }
292         }
293         regcon.params &= ~noparams;
294     }
295 
296     if (config.flags4 & CFG4optimized)
297     {
298         if (nretblocks == 0 &&                  // if no return blocks in function
299             !(sfunc.ty() & mTYnaked))      // naked functions may have hidden veys of returning
300             sfunc.Sflags |= SFLexit;       // mark function as never returning
301 
302         assert(dfo);
303 
304         cgreg_reset();
305         for (dfoidx = 0; dfoidx < dfo.length; dfoidx++)
306         {
307             regcon.used = msavereg | regcon.cse.mval;   // registers already in use
308             block* b = dfo[dfoidx];
309             blcodgen(b);                        // gen code in depth-first order
310             //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used));
311             cgreg_used(dfoidx, b.Bregcon.used); // gather register used information
312         }
313     }
314     else
315     {
316         pass = PASSfinal;
317         for (block* b = startblock; b; b = b.Bnext)
318             blcodgen(b);                // generate the code for each block
319     }
320     regcon.immed.mval = 0;
321     assert(!regcon.cse.mops);           // should have all been used
322 
323     // See which variables we can put into registers
324     if (pass != PASSfinal &&
325         !anyiasm)                               // possible LEA or LES opcodes
326     {
327         allregs |= cod3_useBP();                // see if we can use EBP
328 
329         // If pic code, but EBX was never needed
330         if (!(allregs & mask(PICREG)) && !gotref)
331         {
332             allregs |= mask(PICREG);            // EBX can now be used
333             cgreg_assign(retsym);
334             pass = PASSreg;
335         }
336         else if (cgreg_assign(retsym))          // if we found some registers
337             pass = PASSreg;
338         else
339             pass = PASSfinal;
340         for (block* b = startblock; b; b = b.Bnext)
341         {
342             code_free(b.Bcode);
343             b.Bcode = null;
344         }
345         goto tryagain;
346     }
347     cgreg_term();
348 
349     version (SCPP)
350     {
351         if (CPP)
352             cgcod_eh();
353     }
354 
355     // See if we need to enforce a particular stack alignment
356     foreach (i; 0 .. globsym.length)
357     {
358         Symbol *s = globsym[i];
359 
360         if (Symbol_Sisdead(s, anyiasm))
361             continue;
362 
363         switch (s.Sclass)
364         {
365             case SCregister:
366             case SCauto:
367             case SCfastpar:
368                 if (s.Sfl == FLreg)
369                     break;
370 
371                 const sz = type_alignsize(s.Stype);
372                 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX))
373                 {
374                     STACKALIGN = sz;
375                     enforcealign = true;
376                 }
377                 break;
378 
379             default:
380                 break;
381         }
382     }
383 
384     stackoffsets(globsym, false);  // compute final offsets of stack variables
385     cod5_prol_epi();            // see where to place prolog/epilog
386     CSE.finish();               // compute addresses and sizes of CSE saves
387 
388     if (configv.addlinenumbers)
389         objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg));
390 
391     // Otherwise, jmp's to startblock will execute the prolog again
392     assert(!startblock.Bpred);
393 
394     CodeBuilder cdbprolog; cdbprolog.ctor();
395     prolog(cdbprolog);           // gen function start code
396     code *cprolog = cdbprolog.finish();
397     if (cprolog)
398         pinholeopt(cprolog,null);       // optimize
399 
400     funcoffset = Offset(sfunc.Sseg);
401     targ_size_t coffset = Offset(sfunc.Sseg);
402 
403     if (eecontext.EEelem)
404         genEEcode();
405 
406     for (block* b = startblock; b; b = b.Bnext)
407     {
408         // We couldn't do this before because localsize was unknown
409         switch (b.BC)
410         {
411             case BCret:
412                 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked))
413                 {
414                     CodeBuilder cdb; cdb.ctor();
415                     cdb.append(b.Bcode);
416                     cdb.genlinnum(b.Bsrcpos);
417                     b.Bcode = cdb.finish();
418                 }
419                 goto case BCretexp;
420 
421             case BCretexp:
422                 epilog(b);
423                 break;
424 
425             default:
426                 if (b.Bflags & BFLepilog)
427                     epilog(b);
428                 break;
429         }
430         assignaddr(b);                  // assign addresses
431         pinholeopt(b.Bcode,b);         // do pinhole optimization
432         if (b.Bflags & BFLprolog)      // do function prolog
433         {
434             startoffset = coffset + calcblksize(cprolog) - funcoffset;
435             b.Bcode = cat(cprolog,b.Bcode);
436         }
437         cgsched_block(b);
438         b.Bsize = calcblksize(b.Bcode);       // calculate block size
439         if (b.Balign)
440         {
441             targ_size_t u = b.Balign - 1;
442             coffset = (coffset + u) & ~u;
443         }
444         b.Boffset = coffset;           /* offset of this block         */
445         coffset += b.Bsize;            /* offset of following block    */
446     }
447 
448     debug
449     debugw && printf("code addr complete\n");
450 
451     // Do jump optimization
452     do
453     {
454         flag = false;
455         for (block* b = startblock; b; b = b.Bnext)
456         {
457             if (b.Bflags & BFLjmpoptdone)      /* if no more jmp opts for this blk */
458                 continue;
459             int i = branch(b,0);            // see if jmp => jmp short
460             if (i)                          // if any bytes saved
461             {   targ_size_t offset;
462 
463                 b.Bsize -= i;
464                 offset = b.Boffset + b.Bsize;
465                 for (block* bn = b.Bnext; bn; bn = bn.Bnext)
466                 {
467                     if (bn.Balign)
468                     {   targ_size_t u = bn.Balign - 1;
469 
470                         offset = (offset + u) & ~u;
471                     }
472                     bn.Boffset = offset;
473                     offset += bn.Bsize;
474                 }
475                 coffset = offset;
476                 flag = true;
477             }
478         }
479         if (!I16 && !(config.flags4 & CFG4optimized))
480             break;                      // use the long conditional jmps
481     } while (flag);                     // loop till no more bytes saved
482 
483     debug
484     debugw && printf("code jump optimization complete\n");
485 
486     version (MARS)
487     {
488         if (usednteh & NTEH_try)
489         {
490             // Do this before code is emitted because we patch some instructions
491             nteh_filltables();
492         }
493     }
494 
495     // Compute starting offset for switch tables
496     targ_size_t swoffset;
497     int jmpseg = -1;
498     if (config.flags & CFGromable)
499     {
500         jmpseg = 0;
501         swoffset = coffset;
502     }
503 
504     // Emit the generated code
505     if (eecontext.EEcompile == 1)
506     {
507         codout(sfunc.Sseg,eecontext.EEcode);
508         code_free(eecontext.EEcode);
509         version (SCPP)
510         {
511             el_free(eecontext.EEelem);
512         }
513     }
514     else
515     {
516         for (block* b = startblock; b; b = b.Bnext)
517         {
518             if (b.BC == BCjmptab || b.BC == BCswitch)
519             {
520                 if (jmpseg == -1)
521                 {
522                     jmpseg = objmod.jmpTableSegment(sfunc);
523                     swoffset = Offset(jmpseg);
524                 }
525                 swoffset = _align(0,swoffset);
526                 b.Btableoffset = swoffset;     /* offset of sw tab */
527                 swoffset += b.Btablesize;
528             }
529             jmpaddr(b.Bcode);          /* assign jump addresses        */
530 
531             debug
532             if (debugc)
533             {
534                 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n",
535                     cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg));
536                 if (b.Bcode)
537                     printf( "First opcode of block is: %0x\n", b.Bcode.Iop );
538             }
539 
540             if (b.Balign)
541             {   uint u = b.Balign;
542                 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1);
543 
544                 cod3_align_bytes(sfunc.Sseg, nalign);
545             }
546             assert(b.Boffset == Offset(sfunc.Sseg));
547 
548             version (SCPP)
549             {
550                 if (CPP && !(config.exe == EX_WIN32))
551                 {
552                     //printf("b = %p, index = %d\n",b,b.Bindex);
553                     //except_index_set(b.Bindex);
554 
555                     if (btry != b.Btry)
556                     {
557                         btry = b.Btry;
558                         except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset);
559                     }
560                     if (b.BC == BCtry)
561                     {
562                         btry = b;
563                         except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset);
564                     }
565                 }
566             }
567 
568             codout(sfunc.Sseg,b.Bcode);   // output code
569         }
570         if (coffset != Offset(sfunc.Sseg))
571         {
572             debug
573             printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg));
574 
575             assert(0);
576         }
577         sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset;    // size of function
578 
579         static if (NTEXCEPTIONS || MARS)
580         {
581             version (MARS)
582                 const nteh = usednteh & NTEH_try;
583             else static if (NTEXCEPTIONS)
584                 const nteh = usednteh & NTEHcpp;
585             else
586                 enum nteh = true;
587             if (nteh)
588             {
589                 assert(!(config.flags & CFGromable));
590                 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset);
591                 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset);
592             }
593         }
594 
595         // Write out switch tables
596         flag = false;                       // true if last active block was a ret
597         for (block* b = startblock; b; b = b.Bnext)
598         {
599             switch (b.BC)
600             {
601                 case BCjmptab:              /* if jump table                */
602                     outjmptab(b);           /* write out jump table         */
603                     goto Ldefault;
604 
605                 case BCswitch:
606                     outswitab(b);           /* write out switch table       */
607                     goto Ldefault;
608 
609                 case BCret:
610                 case BCretexp:
611                     /* Compute offset to return code from start of function */
612                     retoffset = b.Boffset + b.Bsize - retsize - funcoffset;
613                     version (MARS)
614                     {
615                         /* Add 3 bytes to retoffset in case we have an exception
616                          * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT
617                          * IT FIXES THE PROBLEM HERE AS WELL.
618                          */
619                         if (usednteh & NTEH_try)
620                             retoffset += 3;
621                     }
622                     flag = true;
623                     break;
624 
625                 default:
626                 Ldefault:
627                     retoffset = b.Boffset + b.Bsize - funcoffset;
628                     break;
629             }
630         }
631         if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked))
632             /* put line number at end of function on the
633                start of the last instruction
634              */
635             /* Instead, try offset to cleanup code  */
636             if (retoffset < sfunc.Ssize)
637                 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset);
638 
639         static if (MARS)
640         {
641             if (config.exe == EX_WIN64)
642                 win64_pdata(sfunc);
643         }
644 
645         static if (MARS)
646         {
647             if (usednteh & NTEH_try)
648             {
649                 // Do this before code is emitted because we patch some instructions
650                 nteh_gentables(sfunc);
651             }
652             if (usednteh & (EHtry | EHcleanup) &&   // saw BCtry or BC_try or OPddtor
653                 config.ehmethod == EHmethod.EH_DM)
654             {
655                 except_gentables();
656             }
657             if (config.ehmethod == EHmethod.EH_DWARF)
658             {
659                 sfunc.Sfunc.Fstartblock = startblock;
660                 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset);
661                 sfunc.Sfunc.Fstartblock = null;
662             }
663         }
664 
665         version (SCPP)
666         {
667             // Write out frame handler
668             if (NTEXCEPTIONS && usednteh & NTEHcpp)
669             {
670                 nteh_framehandler(sfunc, except_gentables());
671             }
672             else
673             {
674                 if (NTEXCEPTIONS && usednteh & NTEH_try)
675                 {
676                     nteh_gentables(sfunc);
677                 }
678                 else
679                 {
680                     if (CPP)
681                         except_gentables();
682                 }
683             }
684         }
685 
686         for (block* b = startblock; b; b = b.Bnext)
687         {
688             code_free(b.Bcode);
689             b.Bcode = null;
690         }
691     }
692 
693     // Mask of regs saved
694     // BUG: do interrupt functions save BP?
695     sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved);
696 
697     debug
698     if (global87.stackused != 0)
699       printf("stackused = %d\n",global87.stackused);
700 
701     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
702 
703     global87.save.dtor();       // clean up ndp save array
704 }
705 
706 /*********************************************
707  * Align sections on the stack.
708  *  base        negative offset of section from frame pointer
709  *  alignment   alignment to use
710  *  bias        difference between where frame pointer points and the STACKALIGNed
711  *              part of the stack
712  * Returns:
713  *  base        revised downward so it is aligned
714  */
715 targ_size_t alignsection(targ_size_t base, uint alignment, int bias)
716 {
717     assert(cast(int)base <= 0);
718     if (alignment > STACKALIGN)
719         alignment = STACKALIGN;
720     if (alignment)
721     {
722         int sz = cast(int)(-base + bias);
723         assert(sz >= 0);
724         sz &= (alignment - 1);
725         if (sz)
726             base -= alignment - sz;
727     }
728     return base;
729 }
730 
731 /*******************************
732  * Generate code for a function start.
733  * Input:
734  *      Offset(cseg)         address of start of code
735  *      Auto.alignment
736  * Output:
737  *      Offset(cseg)         adjusted for size of code generated
738  *      EBPtoESP
739  *      hasframe
740  *      BPoff
741  */
742 void prolog(ref CodeBuilder cdb)
743 {
744     bool enter;
745 
746     //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident.ptr, needframe, Auto.alignment);
747     debug debugw && printf("funcstart()\n");
748     regcon.immed.mval = 0;                      /* no values in registers yet   */
749     version (FRAMEPTR)
750         EBPtoESP = 0;
751     else
752         EBPtoESP = -REGSIZE;
753     hasframe = 0;
754     bool pushds = false;
755     BPoff = 0;
756     bool pushalloc = false;
757     tym_t tyf = funcsym_p.ty();
758     tym_t tym = tybasic(tyf);
759     const farfunc = tyfarfunc(tym) != 0;
760 
761     // Special Intel 64 bit ABI prolog setup for variadic functions
762     Symbol *sv64 = null;                        // set to __va_argsave
763     if (I64 && variadic(funcsym_p.Stype))
764     {
765         /* The Intel 64 bit ABI scheme.
766          * abi_sysV_amd64.pdf
767          * Load arguments passed in registers into the varargs save area
768          * so they can be accessed by va_arg().
769          */
770         /* Look for __va_argsave
771          */
772         for (SYMIDX si = 0; si < globsym.length; si++)
773         {
774             Symbol *s = globsym[si];
775             if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0)
776             {
777                 if (!(s.Sflags & SFLdead))
778                     sv64 = s;
779                 break;
780             }
781         }
782     }
783 
784     if (config.flags & CFGalwaysframe ||
785         funcsym_p.Sfunc.Fflags3 & Ffakeeh ||
786         /* The exception stack unwinding mechanism relies on the EBP chain being intact,
787          * so need frame if function can possibly throw
788          */
789         !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) ||
790         cgstate.accessedTLS ||
791         sv64
792        )
793         needframe = 1;
794 
795     CodeBuilder cdbx; cdbx.ctor();
796 
797 Lagain:
798     spoff = 0;
799     char guessneedframe = needframe;
800     int cfa_offset = 0;
801 //    if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)))
802 //      usednteh |= NTEHpassthru;
803 
804     /* Compute BP offsets for variables on stack.
805      * The organization is:
806      *  Para.size    parameters
807      * -------- stack is aligned to STACKALIGN
808      *          seg of return addr      (if far function)
809      *          IP of return addr
810      *  BP.    caller's BP
811      *          DS                      (if Windows prolog/epilog)
812      *          exception handling context symbol
813      *  Fast.size fastpar
814      *  Auto.size    autos and regs
815      *  regsave.off  any saved registers
816      *  Foff    floating register
817      *  Alloca.size  alloca temporary
818      *  CSoff   common subs
819      *  NDPoff  any 8087 saved registers
820      *          monitor context record
821      *          any saved registers
822      */
823 
824     if (tym == TYifunc)
825         Para.size = 26; // how is this number derived?
826     else
827     {
828         version (FRAMEPTR)
829         {
830             Para.size = ((farfunc ? 2 : 1) + needframe) * REGSIZE;
831             if (needframe)
832                 EBPtoESP = -REGSIZE;
833         }
834         else
835             Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE;
836     }
837 
838     /* The real reason for the FAST section is because the implementation of contracts
839      * requires a consistent stack frame location for the 'this' pointer. But if varying
840      * stuff in Auto.offset causes different alignment for that section, the entire block can
841      * shift around, causing a crash in the contracts.
842      * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their
843      * own FAST section, which is never aligned at a size bigger than REGSIZE, and so
844      * its alignment never shifts around.
845      * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned
846      * individually rather than as a group.
847      */
848     Fast.size = 0;
849     static if (NTEXCEPTIONS == 2)
850     {
851         Fast.size -= nteh_contextsym_size();
852         version (MARS)
853         {
854             if (config.exe & EX_windos)
855             {
856                 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0)
857                     Fast.size -= 5 * 4;
858             }
859         }
860     }
861 
862     /* Despite what the comment above says, aligning Fast section to size greater
863      * than REGSIZE does not break contract implementation. Fast.offset and
864      * Fast.alignment must be the same for the overriding and
865      * the overridden function, since they have the same parameters. Fast.size
866      * must be the same because otherwise, contract inheritance wouldn't work
867      * even if we didn't align Fast section to size greater than REGSIZE. Therefore,
868      * the only way aligning the section could cause problems with contract
869      * inheritance is if bias (declared below) differed for the overridden
870      * and the overriding function.
871      *
872      * Bias depends on Para.size and needframe. The value of Para.size depends on
873      * whether the function is an interrupt handler and whether it is a farfunc.
874      * DMD does not have _interrupt attribute and D does not make a distinction
875      * between near and far functions, so Para.size should always be 2 * REGSIZE
876      * for D.
877      *
878      * The value of needframe depends on a global setting that is only set
879      * during backend's initialization and on function flag Ffakeeh. On Windows,
880      * that flag is always set for virtual functions, for which contracts are
881      * defined and on other platforms, it is never set. Because of that
882      * the value of neadframe should always be the same for the overridden
883      * and the overriding function, and so bias should be the same too.
884      */
885 
886 version (FRAMEPTR)
887     int bias = enforcealign ? 0 : cast(int)(Para.size);
888 else
889     int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE));
890 
891     if (Fast.alignment < REGSIZE)
892         Fast.alignment = REGSIZE;
893 
894     Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias);
895 
896     if (Auto.alignment < REGSIZE)
897         Auto.alignment = REGSIZE;       // necessary because localsize must be REGSIZE aligned
898     Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias);
899 
900     regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias);
901     //printf("regsave.off = x%x, size = x%x, alignment = %x\n",
902         //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment);
903 
904     if (floatreg)
905     {
906         uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE;
907         Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias);
908         //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize);
909     }
910     else
911         Foff = regsave.off;
912 
913     Alloca.alignment = REGSIZE;
914     Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias);
915 
916     CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias);
917     //printf("CSoff = x%x, size = x%x, alignment = %x\n",
918         //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment);
919 
920     NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias);
921 
922     regm_t topush = fregsaved & ~mfuncreg;          // mask of registers that need saving
923     pushoffuse = false;
924     pushoff = NDPoff;
925     /* We don't keep track of all the pushes and pops in a function. Hence,
926      * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder
927      * won't be setting ESP correctly. With pushoffuse, the registers are restored
928      * from EBP, which is kept track of properly.
929      */
930     if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64))
931     {
932         /* Instead of pushing the registers onto the stack one by one,
933          * allocate space in the stack frame and copy/restore them there.
934          */
935         int xmmtopush = numbitsset(topush & XMMREGS);   // XMM regs take 16 bytes
936         int gptopush = numbitsset(topush) - xmmtopush;  // general purpose registers to save
937         if (NDPoff || xmmtopush || cgstate.funcarg.size)
938         {
939             pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16),
940                     xmmtopush ? STACKALIGN : REGSIZE, bias);
941             pushoffuse = true;          // tell others we're using this strategy
942         }
943     }
944 
945     //printf("Fast.size = x%x, Auto.size = x%x\n", (int)Fast.size, (int)Auto.size);
946 
947     cgstate.funcarg.alignment = STACKALIGN;
948     /* If the function doesn't need the extra alignment, don't do it.
949      * Can expand on this by allowing for locals that don't need extra alignment
950      * and calling functions that don't need it.
951      */
952     if (pushoff == 0 && !calledafunc && config.fpxmmregs && (I32 || I64))
953     {
954         cgstate.funcarg.alignment = I64 ? 8 : 4;
955     }
956 
957     //printf("pushoff = %d, size = %d, alignment = %d, bias = %d\n", cast(int)pushoff, cast(int)cgstate.funcarg.size, cast(int)cgstate.funcarg.alignment, cast(int)bias);
958     cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias);
959 
960     localsize = -cgstate.funcarg.offset;
961 
962     //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n",
963         //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize);
964     assert(cast(targ_ptrdiff_t)localsize >= 0);
965 
966     // Keep the stack aligned by 8 for any subsequent function calls
967     if (!I16 && calledafunc &&
968         (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
969     {
970         int npush = numbitsset(topush);            // number of registers that need saving
971         npush += numbitsset(topush & XMMREGS);     // XMM regs take 16 bytes, so count them twice
972         if (pushoffuse)
973             npush = 0;
974 
975         //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n",
976                //npush, Para.size, needframe, localsize);
977 
978         int sz = cast(int)(localsize + npush * REGSIZE);
979         if (!enforcealign)
980         {
981             version (FRAMEPTR)
982                 sz += Para.size;
983             else
984                 sz += Para.size + (needframe ? 0 : -REGSIZE);
985         }
986         if (sz & (STACKALIGN - 1))
987             localsize += STACKALIGN - (sz & (STACKALIGN - 1));
988     }
989     cgstate.funcarg.offset = -localsize;
990 
991     //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n",
992         //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize);
993 
994     uint xlocalsize = cast(uint)localsize;    // amount to subtract from ESP to make room for locals
995 
996     if (tyf & mTYnaked)                 // if no prolog/epilog for function
997     {
998         hasframe = 1;
999         return;
1000     }
1001 
1002     if (tym == TYifunc)
1003     {
1004         prolog_ifunc(cdbx,&tyf);
1005         hasframe = 1;
1006         cdb.append(cdbx);
1007         goto Lcont;
1008     }
1009 
1010     /* Determine if we need BP set up   */
1011     if (enforcealign)
1012     {
1013         // we need BP to reset the stack before return
1014         // otherwise the return address is lost
1015         needframe = 1;
1016 
1017     }
1018     else if (config.flags & CFGalwaysframe)
1019         needframe = 1;
1020     else
1021     {
1022         if (localsize)
1023         {
1024             if (I16 ||
1025                 !(config.flags4 & CFG4speed) ||
1026                 config.target_cpu < TARGET_Pentium ||
1027                 farfunc ||
1028                 config.flags & CFGstack ||
1029                 xlocalsize >= 0x1000 ||
1030                 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) ||
1031                 anyiasm ||
1032                 Alloca.size
1033                )
1034                 needframe = 1;
1035         }
1036         if (refparam && (anyiasm || I16))
1037             needframe = 1;
1038     }
1039 
1040     if (needframe)
1041     {
1042         assert(mfuncreg & mBP);         // shouldn't have used mBP
1043 
1044         if (!guessneedframe)            // if guessed wrong
1045             goto Lagain;
1046     }
1047 
1048     if (I16 && config.wflags & WFwindows && farfunc)
1049     {
1050         prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds);
1051         enter = false;                  // don't use ENTER instruction
1052         hasframe = 1;                   // we have a stack frame
1053     }
1054     else if (needframe)                 // if variables or parameters
1055     {
1056         prolog_frame(cdbx, farfunc, xlocalsize, enter, cfa_offset);
1057         hasframe = 1;
1058     }
1059 
1060     /* Align the stack if necessary */
1061     prolog_stackalign(cdbx);
1062 
1063     /* Subtract from stack pointer the size of the local stack frame
1064      */
1065     if (config.flags & CFGstack)        // if stack overflow check
1066     {
1067         prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
1068         if (Alloca.size)
1069             prolog_setupalloca(cdbx);
1070     }
1071     else if (needframe)                      /* if variables or parameters   */
1072     {
1073         if (xlocalsize)                 /* if any stack offset          */
1074         {
1075             prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
1076             if (Alloca.size)
1077                 prolog_setupalloca(cdbx);
1078         }
1079         else
1080             assert(Alloca.size == 0);
1081     }
1082     else if (xlocalsize)
1083     {
1084         assert(I32 || I64);
1085         prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc);
1086         version (FRAMEPTR) { } else
1087             BPoff += REGSIZE;
1088     }
1089     else
1090         assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor));
1091     EBPtoESP += xlocalsize;
1092     if (hasframe)
1093         EBPtoESP += REGSIZE;
1094 
1095     /* Win64 unwind needs the amount of code generated so far
1096      */
1097     if (config.exe == EX_WIN64)
1098     {
1099         code *c = cdbx.peek();
1100         pinholeopt(c, null);
1101         prolog_allocoffset = calcblksize(c);
1102     }
1103 
1104     version (SCPP)
1105     {
1106         /*  The idea is to generate trace for all functions if -Nc is not thrown.
1107          *  If -Nc is thrown, generate trace only for global COMDATs, because those
1108          *  are relevant to the FUNCTIONS statement in the linker .DEF file.
1109          *  This same logic should be in epilog().
1110          */
1111         if (config.flags & CFGtrace &&
1112             (!(config.flags4 & CFG4allcomdat) ||
1113              funcsym_p.Sclass == SCcomdat ||
1114              funcsym_p.Sclass == SCglobal ||
1115              (config.flags2 & CFG2comdat && SymInline(funcsym_p))
1116             )
1117            )
1118         {
1119             uint spalign = 0;
1120             int sz = cast(int)localsize;
1121             if (!enforcealign)
1122             {
1123                 version (FRAMEPTR)
1124                     sz += Para.size;
1125                 else
1126                     sz += Para.size + (needframe ? 0 : -REGSIZE);
1127             }
1128             if (STACKALIGN >= 16 && (sz & (STACKALIGN - 1)))
1129                 spalign = STACKALIGN - (sz & (STACKALIGN - 1));
1130 
1131             if (spalign)
1132             {   /* This could be avoided by moving the function call to after the
1133                  * registers are saved. But I don't remember why the call is here
1134                  * and not there.
1135                  */
1136                 cod3_stackadj(cdbx, spalign);
1137             }
1138 
1139             uint regsaved;
1140             prolog_trace(cdbx, farfunc, &regsaved);
1141 
1142             if (spalign)
1143                 cod3_stackadj(cdbx, -spalign);
1144             useregs((ALLREGS | mBP | mES) & ~regsaved);
1145         }
1146     }
1147 
1148     version (MARS)
1149     {
1150         if (usednteh & NTEHjmonitor)
1151         {   Symbol *sthis;
1152 
1153             for (SYMIDX si = 0; 1; si++)
1154             {   assert(si < globsym.length);
1155                 sthis = globsym[si];
1156                 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0)
1157                     break;
1158             }
1159             nteh_monitor_prolog(cdbx,sthis);
1160             EBPtoESP += 3 * 4;
1161         }
1162     }
1163 
1164     cdb.append(cdbx);
1165     prolog_saveregs(cdb, topush, cfa_offset);
1166 
1167 Lcont:
1168 
1169     if (config.exe == EX_WIN64)
1170     {
1171         if (variadic(funcsym_p.Stype))
1172             prolog_gen_win64_varargs(cdb);
1173         regm_t namedargs;
1174         prolog_loadparams(cdb, tyf, pushalloc, namedargs);
1175         return;
1176     }
1177 
1178     prolog_ifunc2(cdb, tyf, tym, pushds);
1179 
1180     static if (NTEXCEPTIONS == 2)
1181     {
1182         if (usednteh & NTEH_except)
1183             nteh_setsp(cdb, 0x89);            // MOV __context[EBP].esp,ESP
1184     }
1185 
1186     // Load register parameters off of the stack. Do not use
1187     // assignaddr(), as it will replace the stack reference with
1188     // the register!
1189     regm_t namedargs;
1190     prolog_loadparams(cdb, tyf, pushalloc, namedargs);
1191 
1192     if (sv64)
1193         prolog_genvarargs(cdb, sv64, namedargs);
1194 
1195     /* Alignment checks
1196      */
1197     //assert(Auto.alignment <= STACKALIGN);
1198     //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0);
1199 }
1200 
1201 /************************************
1202  * Predicate for sorting auto symbols for qsort().
1203  * Returns:
1204  *      < 0     s1 goes farther from frame pointer
1205  *      > 0     s1 goes nearer the frame pointer
1206  *      = 0     no difference
1207  */
1208 
1209 extern (C) int
1210  autosort_cmp(scope const void *ps1, scope const void *ps2)
1211 {
1212     Symbol *s1 = *cast(Symbol **)ps1;
1213     Symbol *s2 = *cast(Symbol **)ps2;
1214 
1215     /* Largest align size goes furthest away from frame pointer,
1216      * so they get allocated first.
1217      */
1218     uint alignsize1 = Symbol_Salignsize(s1);
1219     uint alignsize2 = Symbol_Salignsize(s2);
1220     if (alignsize1 < alignsize2)
1221         return 1;
1222     else if (alignsize1 > alignsize2)
1223         return -1;
1224 
1225     /* move variables nearer the frame pointer that have higher Sweights
1226      * because addressing mode is fewer bytes. Grouping together high Sweight
1227      * variables also may put them in the same cache
1228      */
1229     if (s1.Sweight < s2.Sweight)
1230         return -1;
1231     else if (s1.Sweight > s2.Sweight)
1232         return 1;
1233 
1234     /* More:
1235      * 1. put static arrays nearest the frame pointer, so buffer overflows
1236      *    can't change other variable contents
1237      * 2. Do the coloring at the byte level to minimize stack usage
1238      */
1239     return 0;
1240 }
1241 
1242 /******************************
1243  * Compute stack frame offsets for local variables.
1244  * that did not make it into registers.
1245  * Params:
1246  *      symtab = function's symbol table
1247  *      estimate = true for do estimate only, false for final
1248  */
1249 void stackoffsets(ref symtab_t symtab, bool estimate)
1250 {
1251     //printf("stackoffsets() %s\n", funcsym_p.Sident.ptr);
1252 
1253     Para.init();        // parameter offset
1254     Fast.init();        // SCfastpar offset
1255     Auto.init();        // automatic & register offset
1256     EEStack.init();     // for SCstack's
1257 
1258     // Set if doing optimization of auto layout
1259     bool doAutoOpt = estimate && config.flags4 & CFG4optimized;
1260 
1261     // Put autos in another array so we can do optimizations on the stack layout
1262     Symbol*[10] autotmp = void;
1263     Symbol **autos = null;
1264     if (doAutoOpt)
1265     {
1266         if (symtab.length <= autotmp.length)
1267             autos = autotmp.ptr;
1268         else
1269         {   autos = cast(Symbol **)malloc(symtab.length * (*autos).sizeof);
1270             assert(autos);
1271         }
1272     }
1273     size_t autosi = 0;  // number used in autos[]
1274 
1275     for (int si = 0; si < symtab.length; si++)
1276     {   Symbol *s = symtab[si];
1277 
1278         /* Don't allocate space for dead or zero size parameters
1279          */
1280         switch (s.Sclass)
1281         {
1282             case SCfastpar:
1283                 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh))
1284                     goto Ldefault;   // don't need consistent stack frame
1285                 break;
1286 
1287             case SCparameter:
1288                 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty)))
1289                 {
1290                     Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary
1291                     s.Soffset = Para.offset;
1292                     continue;
1293                 }
1294                 break;          // allocate even if it's dead
1295 
1296             case SCshadowreg:
1297                 break;          // allocate even if it's dead
1298 
1299             default:
1300             Ldefault:
1301                 if (Symbol_Sisdead(s, anyiasm))
1302                     continue;       // don't allocate space
1303                 break;
1304         }
1305 
1306         targ_size_t sz = type_size(s.Stype);
1307         if (sz == 0)
1308             sz++;               // can't handle 0 length structs
1309 
1310         uint alignsize = Symbol_Salignsize(s);
1311         if (alignsize > STACKALIGN)
1312             alignsize = STACKALIGN;         // no point if the stack is less aligned
1313 
1314         //printf("symbol '%s', size = %d, alignsize = %d, read = %x\n",s.Sident.ptr, cast(int)sz, cast(int)alignsize, s.Sflags & SFLread);
1315         assert(cast(int)sz >= 0);
1316 
1317         switch (s.Sclass)
1318         {
1319             case SCfastpar:
1320                 /* Get these
1321                  * right next to the stack frame pointer, EBP.
1322                  * Needed so we can call nested contract functions
1323                  * frequire and fensure.
1324                  */
1325                 if (s.Sfl == FLreg)        // if allocated in register
1326                     continue;
1327                 /* Needed because storing fastpar's on the stack in prolog()
1328                  * does the entire register
1329                  */
1330                 if (sz < REGSIZE)
1331                     sz = REGSIZE;
1332 
1333                 Fast.offset = _align(sz,Fast.offset);
1334                 s.Soffset = Fast.offset;
1335                 Fast.offset += sz;
1336                 //printf("fastpar '%s' sz = %d, fast offset =  x%x, %p\n",s.Sident,(int)sz,(int)s.Soffset, s);
1337 
1338                 if (alignsize > Fast.alignment)
1339                     Fast.alignment = alignsize;
1340                 break;
1341 
1342             case SCregister:
1343             case SCauto:
1344                 if (s.Sfl == FLreg)        // if allocated in register
1345                     break;
1346 
1347                 if (doAutoOpt)
1348                 {   autos[autosi++] = s;    // deal with later
1349                     break;
1350                 }
1351 
1352                 Auto.offset = _align(sz,Auto.offset);
1353                 s.Soffset = Auto.offset;
1354                 Auto.offset += sz;
1355                 //printf("auto    '%s' sz = %d, auto offset =  x%lx\n",s.Sident,sz,(long)s.Soffset);
1356 
1357                 if (alignsize > Auto.alignment)
1358                     Auto.alignment = alignsize;
1359                 break;
1360 
1361             case SCstack:
1362                 EEStack.offset = _align(sz,EEStack.offset);
1363                 s.Soffset = EEStack.offset;
1364                 //printf("EEStack.offset =  x%lx\n",(long)s.Soffset);
1365                 EEStack.offset += sz;
1366                 break;
1367 
1368             case SCshadowreg:
1369             case SCparameter:
1370                 if (config.exe == EX_WIN64)
1371                 {
1372                     assert((Para.offset & 7) == 0);
1373                     s.Soffset = Para.offset;
1374                     Para.offset += 8;
1375                     break;
1376                 }
1377                 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general,
1378                  * but are 4 byte aligned on the OSX 32 stack.
1379                  */
1380                 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */
1381                 if (alignsize >= 16 &&
1382                     (I64 || (config.exe == EX_OSX &&
1383                          (tyaggregate(s.ty()) || tyvector(s.ty())))))
1384                     Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1);
1385                 s.Soffset = Para.offset;
1386                 //printf("%s param offset =  x%lx, alignsize = %d\n",s.Sident,(long)s.Soffset, (int)alignsize);
1387                 Para.offset += (s.Sflags & SFLdouble)
1388                             ? type_size(tstypes[TYdouble])   // float passed as double
1389                             : type_size(s.Stype);
1390                 break;
1391 
1392             case SCpseudo:
1393             case SCstatic:
1394             case SCbprel:
1395                 break;
1396             default:
1397                 symbol_print(s);
1398                 assert(0);
1399         }
1400     }
1401 
1402     if (autosi)
1403     {
1404         qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp);
1405 
1406         vec_t tbl = vec_calloc(autosi);
1407 
1408         for (size_t si = 0; si < autosi; si++)
1409         {
1410             Symbol *s = autos[si];
1411 
1412             targ_size_t sz = type_size(s.Stype);
1413             if (sz == 0)
1414                 sz++;               // can't handle 0 length structs
1415 
1416             uint alignsize = Symbol_Salignsize(s);
1417             if (alignsize > STACKALIGN)
1418                 alignsize = STACKALIGN;         // no point if the stack is less aligned
1419 
1420             /* See if we can share storage with another variable
1421              * if their live ranges do not overlap.
1422              */
1423             if (// Don't share because could stomp on variables
1424                 // used in finally blocks
1425                 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) &&
1426                 s.Srange && !(s.Sflags & SFLspill))
1427             {
1428                 for (size_t i = 0; i < si; i++)
1429                 {
1430                     if (!vec_testbit(i,tbl))
1431                         continue;
1432                     Symbol *sp = autos[i];
1433 //printf("auto    s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange));
1434                     if (vec_disjoint(s.Srange,sp.Srange) &&
1435                         !(sp.Soffset & (alignsize - 1)) &&
1436                         sz <= type_size(sp.Stype))
1437                     {
1438                         vec_or(sp.Srange,sp.Srange,s.Srange);
1439                         //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident);
1440                         s.Soffset = sp.Soffset;
1441                         goto L2;
1442                     }
1443                 }
1444             }
1445             Auto.offset = _align(sz,Auto.offset);
1446             s.Soffset = Auto.offset;
1447             //printf("auto    '%s' sz = %d, auto offset =  x%lx\n",s.Sident,sz,(long)s.Soffset);
1448             Auto.offset += sz;
1449             if (s.Srange && !(s.Sflags & SFLspill))
1450                 vec_setbit(si,tbl);
1451 
1452             if (alignsize > Auto.alignment)
1453                 Auto.alignment = alignsize;
1454         L2: { }
1455         }
1456 
1457         vec_free(tbl);
1458 
1459         if (autos != autotmp.ptr)
1460             free(autos);
1461     }
1462 }
1463 
1464 /****************************
1465  * Generate code for a block.
1466  */
1467 
1468 private void blcodgen(block *bl)
1469 {
1470     regm_t mfuncregsave = mfuncreg;
1471 
1472     //dbg_printf("blcodgen(%p)\n",bl);
1473 
1474     /* Determine existing immediate values in registers by ANDing
1475         together the values from all the predecessors of b.
1476      */
1477     assert(bl.Bregcon.immed.mval == 0);
1478     regcon.immed.mval = 0;      // assume no previous contents in registers
1479 //    regcon.cse.mval = 0;
1480     foreach (bpl; ListRange(bl.Bpred))
1481     {
1482         block *bp = list_block(bpl);
1483 
1484         if (bpl == bl.Bpred)
1485         {   regcon.immed = bp.Bregcon.immed;
1486             regcon.params = bp.Bregcon.params;
1487 //          regcon.cse = bp.Bregcon.cse;
1488         }
1489         else
1490         {
1491             int i;
1492 
1493             regcon.params &= bp.Bregcon.params;
1494             if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0)
1495                 // Actual values must match, too
1496                 for (i = 0; i < REGMAX; i++)
1497                 {
1498                     if (regcon.immed.value[i] != bp.Bregcon.immed.value[i])
1499                         regcon.immed.mval &= ~mask(i);
1500                 }
1501         }
1502     }
1503     regcon.cse.mops &= regcon.cse.mval;
1504 
1505     // Set regcon.mvar according to what variables are in registers for this block
1506     CodeBuilder cdb; cdb.ctor();
1507     regcon.mvar = 0;
1508     regcon.mpvar = 0;
1509     regcon.indexregs = 1;
1510     int anyspill = 0;
1511     char *sflsave = null;
1512     if (config.flags4 & CFG4optimized)
1513     {
1514         CodeBuilder cdbload; cdbload.ctor();
1515         CodeBuilder cdbstore; cdbstore.ctor();
1516 
1517         sflsave = cast(char *) alloca(globsym.length * char.sizeof);
1518         for (SYMIDX i = 0; i < globsym.length; i++)
1519         {
1520             Symbol *s = globsym[i];
1521 
1522             sflsave[i] = s.Sfl;
1523             if (regParamInPreg(s) &&
1524                 regcon.params & s.Spregm() &&
1525                 vec_testbit(dfoidx,s.Srange))
1526             {
1527 //                regcon.used |= s.Spregm();
1528             }
1529 
1530             if (s.Sfl == FLreg)
1531             {
1532                 if (vec_testbit(dfoidx,s.Srange))
1533                 {
1534                     regcon.mvar |= s.Sregm;
1535                     if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
1536                         regcon.mpvar |= s.Sregm;
1537                 }
1538             }
1539             else if (s.Sflags & SFLspill)
1540             {
1541                 if (vec_testbit(dfoidx,s.Srange))
1542                 {
1543                     anyspill = cast(int)(i + 1);
1544                     cgreg_spillreg_prolog(bl,s,cdbstore,cdbload);
1545                     if (vec_testbit(dfoidx,s.Slvreg))
1546                     {
1547                         s.Sfl = FLreg;
1548                         regcon.mvar |= s.Sregm;
1549                         regcon.cse.mval &= ~s.Sregm;
1550                         regcon.immed.mval &= ~s.Sregm;
1551                         regcon.params &= ~s.Sregm;
1552                         if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
1553                             regcon.mpvar |= s.Sregm;
1554                     }
1555                 }
1556             }
1557         }
1558         if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
1559         {
1560             cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval);
1561         }
1562         cdb.append(cdbstore);
1563         cdb.append(cdbload);
1564         mfuncreg &= ~regcon.mvar;               // use these registers
1565         regcon.used |= regcon.mvar;
1566 
1567         // Determine if we have more than 1 uncommitted index register
1568         regcon.indexregs = IDXREGS & ~regcon.mvar;
1569         regcon.indexregs &= regcon.indexregs - 1;
1570     }
1571 
1572     /* This doesn't work when calling the BC_finally function,
1573      * as it is one block calling another.
1574      */
1575     //regsave.idx = 0;
1576 
1577     reflocal = 0;
1578     int refparamsave = refparam;
1579     refparam = 0;
1580     assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops);
1581 
1582     outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave);
1583     bl.Bcode = cdb.finish();
1584 
1585     for (int i = 0; i < anyspill; i++)
1586     {
1587         Symbol *s = globsym[i];
1588         s.Sfl = sflsave[i];    // undo block register assignments
1589     }
1590 
1591     if (reflocal)
1592         bl.Bflags |= BFLreflocal;
1593     if (refparam)
1594         bl.Bflags |= BFLrefparam;
1595     refparam |= refparamsave;
1596     bl.Bregcon.immed = regcon.immed;
1597     bl.Bregcon.cse = regcon.cse;
1598     bl.Bregcon.used = regcon.used;
1599     bl.Bregcon.params = regcon.params;
1600 
1601     debug
1602     debugw && printf("code gen complete\n");
1603 }
1604 
1605 /*****************************************
1606  * Add in exception handling code.
1607  */
1608 
1609 version (SCPP)
1610 {
1611 
1612 private void cgcod_eh()
1613 {
1614     list_t stack;
1615     int idx;
1616     int tryidx;
1617 
1618     if (!(usednteh & (EHtry | EHcleanup)))
1619         return;
1620 
1621     // Compute Bindex for each block
1622     for (block *b = startblock; b; b = b.Bnext)
1623     {
1624         b.Bindex = -1;
1625         b.Bflags &= ~BFLvisited;               /* mark as unvisited    */
1626     }
1627     block *btry = null;
1628     int lastidx = 0;
1629     startblock.Bindex = 0;
1630     for (block *b = startblock; b; b = b.Bnext)
1631     {
1632         if (btry == b.Btry && b.BC == BCcatch)  // if don't need to pop try block
1633         {
1634             block *br = list_block(b.Bpred);          // find corresponding try block
1635             assert(br.BC == BCtry);
1636             b.Bindex = br.Bindex;
1637         }
1638         else if (btry != b.Btry && b.BC != BCcatch ||
1639                  !(b.Bflags & BFLvisited))
1640             b.Bindex = lastidx;
1641         b.Bflags |= BFLvisited;
1642 
1643         debug
1644         if (debuge)
1645         {
1646             WRBC(b.BC);
1647             printf(" block (%p) Btry=%p Bindex=%d\n",b,b.Btry,b.Bindex);
1648         }
1649 
1650         except_index_set(b.Bindex);
1651         if (btry != b.Btry)                    // exited previous try block
1652         {
1653             except_pop(b,null,btry);
1654             btry = b.Btry;
1655         }
1656         if (b.BC == BCtry)
1657         {
1658             except_push(b,null,b);
1659             btry = b;
1660             tryidx = except_index_get();
1661             CodeBuilder cdb; cdb.ctor();
1662             nteh_gensindex(cdb,tryidx - 1);
1663             cdb.append(b.Bcode);
1664             b.Bcode = cdb.finish();
1665         }
1666 
1667         stack = null;
1668         for (code *c = b.Bcode; c; c = code_next(c))
1669         {
1670             if ((c.Iop & ESCAPEmask) == ESCAPE)
1671             {
1672                 code *c1 = null;
1673                 switch (c.Iop & 0xFFFF00)
1674                 {
1675                     case ESCctor:
1676                         //printf("ESCctor\n");
1677                         except_push(c,c.IEV1.Vtor,null);
1678                         goto L1;
1679 
1680                     case ESCdtor:
1681                         //printf("ESCdtor\n");
1682                         except_pop(c,c.IEV1.Vtor,null);
1683                     L1: if (config.exe == EX_WIN32)
1684                         {
1685                             CodeBuilder cdb; cdb.ctor();
1686                             nteh_gensindex(cdb,except_index_get() - 1);
1687                             c1 = cdb.finish();
1688                             c1.next = code_next(c);
1689                             c.next = c1;
1690                         }
1691                         break;
1692 
1693                     case ESCmark:
1694                         //printf("ESCmark\n");
1695                         idx = except_index_get();
1696                         list_prependdata(&stack,idx);
1697                         except_mark();
1698                         break;
1699 
1700                     case ESCrelease:
1701                         //printf("ESCrelease\n");
1702                         version (SCPP)
1703                         {
1704                             idx = list_data(stack);
1705                             list_pop(&stack);
1706                             if (idx != except_index_get())
1707                             {
1708                                 if (config.exe == EX_WIN32)
1709                                 {
1710                                     CodeBuilder cdb; cdb.ctor();
1711                                     nteh_gensindex(cdb,idx - 1);
1712                                     c1 = cdb.finish();
1713                                     c1.next = code_next(c);
1714                                     c.next = c1;
1715                                 }
1716                                 else
1717                                 {   except_pair_append(c,idx - 1);
1718                                     c.Iop = ESCAPE | ESCoffset;
1719                                 }
1720                             }
1721                             except_release();
1722                         }
1723                         break;
1724 
1725                     case ESCmark2:
1726                         //printf("ESCmark2\n");
1727                         except_mark();
1728                         break;
1729 
1730                     case ESCrelease2:
1731                         //printf("ESCrelease2\n");
1732                         version (SCPP)
1733                         {
1734                             except_release();
1735                         }
1736                         break;
1737 
1738                     default:
1739                         break;
1740                 }
1741             }
1742         }
1743         assert(stack == null);
1744         b.Bendindex = except_index_get();
1745 
1746         if (b.BC != BCret && b.BC != BCretexp)
1747             lastidx = b.Bendindex;
1748 
1749         // Set starting index for each of the successors
1750         int i = 0;
1751         foreach (bl; ListRange(b.Bsucc))
1752         {
1753             block *bs = list_block(bl);
1754             if (b.BC == BCtry)
1755             {
1756                 switch (i)
1757                 {
1758                     case 0:                             // block after catches
1759                         bs.Bindex = b.Bendindex;
1760                         break;
1761 
1762                     case 1:                             // 1st catch block
1763                         bs.Bindex = tryidx;
1764                         break;
1765 
1766                     default:                            // subsequent catch blocks
1767                         bs.Bindex = b.Bindex;
1768                         break;
1769                 }
1770 
1771                 debug
1772                 if (debuge)
1773                 {
1774                     printf(" 1setting %p to %d\n",bs,bs.Bindex);
1775                 }
1776             }
1777             else if (!(bs.Bflags & BFLvisited))
1778             {
1779                 bs.Bindex = b.Bendindex;
1780 
1781                 debug
1782                 if (debuge)
1783                 {
1784                     printf(" 2setting %p to %d\n",bs,bs.Bindex);
1785                 }
1786             }
1787             bs.Bflags |= BFLvisited;
1788             i++;
1789         }
1790     }
1791 
1792     if (config.exe == EX_WIN32)
1793         for (block *b = startblock; b; b = b.Bnext)
1794         {
1795             if (/*!b.Bcount ||*/ b.BC == BCtry)
1796                 continue;
1797             foreach (bl; ListRange(b.Bpred))
1798             {
1799                 int pi = list_block(bl).Bendindex;
1800                 if (b.Bindex != pi)
1801                 {
1802                     CodeBuilder cdb; cdb.ctor();
1803                     nteh_gensindex(cdb,b.Bindex - 1);
1804                     cdb.append(b.Bcode);
1805                     b.Bcode = cdb.finish();
1806                     break;
1807                 }
1808             }
1809         }
1810 }
1811 
1812 }
1813 
1814 /******************************
1815  * Count the number of bits set in a register mask.
1816  */
1817 
1818 int numbitsset(regm_t regm)
1819 {
1820     int n = 0;
1821     if (regm)
1822         do
1823             n++;
1824         while ((regm &= regm - 1) != 0);
1825     return n;
1826 }
1827 
1828 /******************************
1829  * Given a register mask, find and return the number
1830  * of the first register that fits.
1831  */
1832 
1833 reg_t findreg(regm_t regm)
1834 {
1835     return findreg(regm, __LINE__, __FILE__);
1836 }
1837 
1838 reg_t findreg(regm_t regm, int line, const(char)* file)
1839 {
1840     debug
1841     regm_t regmsave = regm;
1842 
1843     reg_t i = 0;
1844     while (1)
1845     {
1846         if (!(regm & 0xF))
1847         {
1848             regm >>= 4;
1849             i += 4;
1850             if (!regm)
1851                 break;
1852         }
1853         if (regm & 1)
1854             return i;
1855         regm >>= 1;
1856         i++;
1857     }
1858 
1859     debug
1860     printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr);
1861     fflush(stdout);
1862 
1863 //    *(char*)0=0;
1864     assert(0);
1865 }
1866 
1867 /***************
1868  * Free element (but not its leaves! (assume they are already freed))
1869  * Don't decrement Ecount! This is so we can detect if the common subexp
1870  * has already been evaluated.
1871  * If common subexpression is not required anymore, eliminate
1872  * references to it.
1873  */
1874 
1875 void freenode(elem *e)
1876 {
1877     elem_debug(e);
1878     //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount);
1879     if (e.Ecomsub--) return;             /* usage count                  */
1880     if (e.Ecount)                        /* if it was a CSE              */
1881     {
1882         for (size_t i = 0; i < regcon.cse.value.length; i++)
1883         {
1884             if (regcon.cse.value[i] == e)       /* if a register is holding it  */
1885             {
1886                 regcon.cse.mval &= ~mask(cast(uint)i);
1887                 regcon.cse.mops &= ~mask(cast(uint)i);    /* free masks                   */
1888             }
1889         }
1890         CSE.remove(e);
1891     }
1892 }
1893 
1894 /*********************************
1895  * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode().
1896  */
1897 
1898 private void resetEcomsub(elem *e)
1899 {
1900     while (1)
1901     {
1902         elem_debug(e);
1903         e.Ecomsub = e.Ecount;
1904         const op = e.Eoper;
1905         if (!OTleaf(op))
1906         {
1907             if (OTbinary(op))
1908                 resetEcomsub(e.EV.E2);
1909             e = e.EV.E1;
1910         }
1911         else
1912             break;
1913     }
1914 }
1915 
1916 /*********************************
1917  * Determine if elem e is a register variable.
1918  * If so:
1919  *      *pregm = mask of registers that make up the variable
1920  *      *preg = the least significant register
1921  *      returns true
1922  * Else
1923  *      returns false
1924  */
1925 
1926 int isregvar(elem *e,regm_t *pregm,reg_t *preg)
1927 {
1928     Symbol *s;
1929     uint u;
1930     regm_t m;
1931     regm_t regm;
1932     reg_t reg;
1933 
1934     elem_debug(e);
1935     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
1936     {
1937         s = e.EV.Vsym;
1938         switch (s.Sfl)
1939         {
1940             case FLreg:
1941                 if (s.Sclass == SCparameter)
1942                 {   refparam = true;
1943                     reflocal = true;
1944                 }
1945                 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw;
1946                 regm = s.Sregm;
1947                 //assert(tyreg(s.ty()));
1948 static if (0)
1949 {
1950                 // Let's just see if there is a CSE in a reg we can use
1951                 // instead. This helps avoid AGI's.
1952                 if (e.Ecount && e.Ecount != e.Ecomsub)
1953                 {   int i;
1954 
1955                     for (i = 0; i < arraysize(regcon.cse.value); i++)
1956                     {
1957                         if (regcon.cse.value[i] == e)
1958                         {   reg = i;
1959                             break;
1960                         }
1961                     }
1962                 }
1963 }
1964                 assert(regm & regcon.mvar && !(regm & ~regcon.mvar));
1965                 goto Lreg;
1966 
1967             case FLpseudo:
1968                 version (MARS)
1969                 {
1970                     u = s.Sreglsw;
1971                     m = mask(u);
1972                     if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1973                     {
1974                         reg = u & 7;
1975                         regm = m;
1976                         goto Lreg;
1977                     }
1978                 }
1979                 else
1980                 {
1981                     u = s.Sreglsw;
1982                     m = pseudomask[u];
1983                     if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1984                     {
1985                         reg = pseudoreg[u] & 7;
1986                         regm = m;
1987                         goto Lreg;
1988                     }
1989                 }
1990                 break;
1991 
1992             default:
1993                 break;
1994         }
1995     }
1996     return false;
1997 
1998 Lreg:
1999     if (preg)
2000         *preg = reg;
2001     if (pregm)
2002         *pregm = regm;
2003     return true;
2004 }
2005 
2006 /*********************************
2007  * Allocate some registers.
2008  * Input:
2009  *      pretregs        Pointer to mask of registers to make selection from.
2010  *      tym             Mask of type we will store in registers.
2011  * Output:
2012  *      *pretregs       Mask of allocated registers.
2013  *      *preg           Register number of first allocated register.
2014  *      msavereg,mfuncreg       retregs bits are cleared.
2015  *      regcon.cse.mval,regcon.cse.mops updated
2016  * Returns:
2017  *      pointer to code generated if necessary to save any regcon.cse.mops on the
2018  *      stack.
2019  */
2020 
2021 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym)
2022 {
2023     allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__);
2024 }
2025 
2026 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym
2027         ,int line,const(char)* file)
2028 {
2029         reg_t reg;
2030 
2031 static if (0)
2032 {
2033         if (pass == PASSfinal)
2034         {
2035             printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym ",
2036                 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval),
2037                 regm_str(msavereg),regm_str(*pretregs));
2038             WRTYxx(tym);
2039             dbg_printf("\n");
2040         }
2041 }
2042         tym = tybasic(tym);
2043         uint size = _tysize[tym];
2044         *pretregs &= mES | allregs | XMMREGS;
2045         regm_t retregs = *pretregs;
2046 
2047         debug if (retregs == 0)
2048             printf("allocreg: file %s(%d)\n", file, line);
2049 
2050         if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars
2051         {
2052             if (size <= REGSIZE || (retregs & XMMREGS))
2053             {
2054                 *preg = findreg(retregs);
2055                 assert(retregs == mask(*preg)); /* no more bits are set */
2056             }
2057             else if (size <= 2 * REGSIZE)
2058             {
2059                 *preg = findregmsw(retregs);
2060                 assert(retregs & mLSW);
2061             }
2062             else
2063                 assert(0);
2064             getregs(cdb,retregs);
2065             return;
2066         }
2067         int count = 0;
2068 L1:
2069         //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
2070         assert(++count < 20);           /* fail instead of hanging if blocked */
2071         assert(retregs);
2072         reg_t msreg = NOREG, lsreg = NOREG;  /* no value assigned yet        */
2073 L3:
2074         //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
2075         regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params);
2076         if (!r)
2077         {
2078             r = retregs & ~(msavereg | regcon.cse.mval);
2079             if (!r)
2080             {
2081                 r = retregs & ~(msavereg | regcon.cse.mops);
2082                 if (!r)
2083                 {   r = retregs & ~msavereg;
2084                     if (!r)
2085                         r = retregs;
2086                 }
2087             }
2088         }
2089 
2090         if (size <= REGSIZE || retregs & XMMREGS)
2091         {
2092             if (r & ~mBP)
2093                 r &= ~mBP;
2094 
2095             // If only one index register, prefer to not use LSW registers
2096             if (!regcon.indexregs && r & ~mLSW)
2097                 r &= ~mLSW;
2098 
2099             if (pass == PASSfinal && r & ~lastretregs && !I16)
2100             {   // Try not to always allocate the same register,
2101                 // to schedule better
2102 
2103                 r &= ~lastretregs;
2104                 if (r & ~last2retregs)
2105                 {
2106                     r &= ~last2retregs;
2107                     if (r & ~last3retregs)
2108                     {
2109                         r &= ~last3retregs;
2110                         if (r & ~last4retregs)
2111                         {
2112                             r &= ~last4retregs;
2113 //                          if (r & ~last5retregs)
2114 //                              r &= ~last5retregs;
2115                         }
2116                     }
2117                 }
2118                 if (r & ~mfuncreg)
2119                     r &= ~mfuncreg;
2120             }
2121             reg = findreg(r);
2122             retregs = mask(reg);
2123         }
2124         else if (size <= 2 * REGSIZE)
2125         {
2126             /* Select pair with both regs free. Failing */
2127             /* that, select pair with one reg free.             */
2128 
2129             if (r & mBP)
2130             {
2131                 retregs &= ~mBP;
2132                 goto L3;
2133             }
2134 
2135             if (r & mMSW)
2136             {
2137                 if (r & mDX)
2138                     msreg = DX;                 /* prefer to use DX over CX */
2139                 else
2140                     msreg = findregmsw(r);
2141                 r &= mLSW;                      /* see if there's an LSW also */
2142                 if (r)
2143                     lsreg = findreg(r);
2144                 else if (lsreg == NOREG)   /* if don't have LSW yet */
2145                 {
2146                     retregs &= mLSW;
2147                     goto L3;
2148                 }
2149             }
2150             else
2151             {
2152                 if (I64 && !(r & mLSW))
2153                 {
2154                     retregs = *pretregs & (mMSW | mLSW);
2155                     assert(retregs);
2156                     goto L1;
2157                 }
2158                 lsreg = findreglsw(r);
2159                 if (msreg == NOREG)
2160                 {
2161                     retregs &= mMSW;
2162                     assert(retregs);
2163                     goto L3;
2164                 }
2165             }
2166             reg = (msreg == ES) ? lsreg : msreg;
2167             retregs = mask(msreg) | mask(lsreg);
2168         }
2169         else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
2170         {
2171             debug
2172             if (retregs != DOUBLEREGS)
2173                 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs));
2174 
2175             assert(retregs == DOUBLEREGS);
2176             reg = AX;
2177         }
2178         else
2179         {
2180             debug
2181             {
2182                 WRTYxx(tym);
2183                 printf("\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n",
2184                     file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym);
2185             }
2186             assert(0);
2187         }
2188         if (retregs & regcon.mvar)              // if conflict with reg vars
2189         {
2190             if (!(size > REGSIZE && *pretregs == (mAX | mDX)))
2191             {
2192                 retregs = (*pretregs &= ~(retregs & regcon.mvar));
2193                 goto L1;                // try other registers
2194             }
2195         }
2196         *preg = reg;
2197         *pretregs = retregs;
2198 
2199         //printf("Allocating %s\n",regm_str(retregs));
2200         last5retregs = last4retregs;
2201         last4retregs = last3retregs;
2202         last3retregs = last2retregs;
2203         last2retregs = lastretregs;
2204         lastretregs = retregs;
2205         getregs(cdb, retregs);
2206 }
2207 
2208 
2209 /*****************************************
2210  * Allocate a scratch register.
2211  * Params:
2212  *      cdb = where to write any generated code to
2213  *      regm = mask of registers to pick one from
2214  * Returns:
2215  *      selected register
2216  */
2217 reg_t allocScratchReg(ref CodeBuilder cdb, regm_t regm)
2218 {
2219     reg_t r;
2220     allocreg(cdb, &regm, &r, TYoffset);
2221     return r;
2222 }
2223 
2224 
2225 /******************************
2226  * Determine registers that should be destroyed upon arrival
2227  * to code entry point for exception handling.
2228  */
2229 regm_t lpadregs()
2230 {
2231     regm_t used;
2232     if (config.ehmethod == EHmethod.EH_DWARF)
2233         used = allregs & ~mfuncreg;
2234     else
2235         used = (I32 | I64) ? allregs : (ALLREGS | mES);
2236     //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg));
2237     return used;
2238 }
2239 
2240 
2241 /*************************
2242  * Mark registers as used.
2243  */
2244 
2245 void useregs(regm_t regm)
2246 {
2247     //printf("useregs(x%x) %s\n", regm, regm_str(regm));
2248     mfuncreg &= ~regm;
2249     regcon.used |= regm;                // registers used in this block
2250     regcon.params &= ~regm;
2251     if (regm & regcon.mpvar)            // if modified a fastpar register variable
2252         regcon.params = 0;              // toss them all out
2253 }
2254 
2255 /*************************
2256  * We are going to use the registers in mask r.
2257  * Generate any code necessary to save any regs.
2258  */
2259 
2260 void getregs(ref CodeBuilder cdb, regm_t r)
2261 {
2262     //printf("getregs(x%x) %s\n", r, regm_str(r));
2263     regm_t ms = r & regcon.cse.mops;           // mask of common subs we must save
2264     useregs(r);
2265     regcon.cse.mval &= ~r;
2266     msavereg &= ~r;                     // regs that are destroyed
2267     regcon.immed.mval &= ~r;
2268     if (ms)
2269         cse_save(cdb, ms);
2270 }
2271 
2272 /*************************
2273  * We are going to use the registers in mask r.
2274  * Same as getregs(), but assert if code is needed to be generated.
2275  */
2276 void getregsNoSave(regm_t r)
2277 {
2278     //printf("getregsNoSave(x%x) %s\n", r, regm_str(r));
2279     assert(!(r & regcon.cse.mops));            // mask of common subs we must save
2280     useregs(r);
2281     regcon.cse.mval &= ~r;
2282     msavereg &= ~r;                     // regs that are destroyed
2283     regcon.immed.mval &= ~r;
2284 }
2285 
2286 /*****************************************
2287  * Copy registers in cse.mops into memory.
2288  */
2289 
2290 private void cse_save(ref CodeBuilder cdb, regm_t ms)
2291 {
2292     assert((ms & regcon.cse.mops) == ms);
2293     regcon.cse.mops &= ~ms;
2294 
2295     /* Skip CSEs that are already saved */
2296     for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1)
2297     {
2298         if (regm & ms)
2299         {
2300             const e = regcon.cse.value[findreg(regm)];
2301             const sz = tysize(e.Ety);
2302             foreach (const ref cse; CSE.filter(e))
2303             {
2304                 if (sz <= REGSIZE ||
2305                     sz <= 2 * REGSIZE &&
2306                         (regm & mMSW && cse.regm & mMSW ||
2307                          regm & mLSW && cse.regm & mLSW) ||
2308                     sz == 4 * REGSIZE && regm == cse.regm
2309                    )
2310                 {
2311                     ms &= ~regm;
2312                     if (!ms)
2313                         return;
2314                     break;
2315                 }
2316             }
2317         }
2318     }
2319 
2320     while (ms)
2321     {
2322         auto cse = CSE.add();
2323         reg_t reg = findreg(ms);          /* the register to save         */
2324         cse.e = regcon.cse.value[reg];
2325         cse.regm = mask(reg);
2326 
2327         ms &= ~mask(reg);           /* turn off reg bit in ms       */
2328 
2329         // If we can simply reload the CSE, we don't need to save it
2330         if (cse_simple(&cse.csimple, cse.e))
2331             cse.flags |= CSEsimple;
2332         else
2333         {
2334             CSE.updateSizeAndAlign(cse.e);
2335             gen_storecse(cdb, cse.e.Ety, reg, cse.slot);
2336             reflocal = true;
2337         }
2338     }
2339 }
2340 
2341 /******************************************
2342  * Getregs without marking immediate register values as gone.
2343  */
2344 
2345 void getregs_imm(ref CodeBuilder cdb, regm_t r)
2346 {
2347     regm_t save = regcon.immed.mval;
2348     getregs(cdb,r);
2349     regcon.immed.mval = save;
2350 }
2351 
2352 /******************************************
2353  * Flush all CSE's out of registers and into memory.
2354  * Input:
2355  *      do87    !=0 means save 87 registers too
2356  */
2357 
2358 void cse_flush(ref CodeBuilder cdb, int do87)
2359 {
2360     //dbg_printf("cse_flush()\n");
2361     cse_save(cdb,regcon.cse.mops);      // save any CSEs to memory
2362     if (do87)
2363         save87(cdb);    // save any 8087 temporaries
2364 }
2365 
2366 /*************************
2367  * Common subexpressions exist in registers. Note this in regcon.cse.mval.
2368  * Input:
2369  *      e       the subexpression
2370  *      regm    mask of registers holding it
2371  *      opsflag if != 0 then regcon.cse.mops gets set too
2372  * Returns:
2373  *      false   not saved as a CSE
2374  *      true    saved as a CSE
2375  */
2376 
2377 bool cssave(elem *e,regm_t regm,uint opsflag)
2378 {
2379     bool result = false;
2380 
2381     /*if (e.Ecount && e.Ecount == e.Ecomsub)*/
2382     if (e.Ecount && e.Ecomsub)
2383     {
2384         if (!opsflag && pass != PASSfinal && (I32 || I64))
2385             return false;
2386 
2387         //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag);
2388         regm &= mBP | ALLREGS | mES | XMMREGS;    /* just to be sure              */
2389 
2390 /+
2391         /* Do not register CSEs if they are register variables and      */
2392         /* are not operator nodes. This forces the register allocation  */
2393         /* to go through allocreg(), which will prevent using register  */
2394         /* variables for scratch.                                       */
2395         if (opsflag || !(regm & regcon.mvar))
2396 +/
2397             for (uint i = 0; regm; i++)
2398             {
2399                 regm_t mi = mask(i);
2400                 if (regm & mi)
2401                 {
2402                     regm &= ~mi;
2403 
2404                     // If we don't need this CSE, and the register already
2405                     // holds a CSE that we do need, don't mark the new one
2406                     if (regcon.cse.mval & mi && regcon.cse.value[i] != e &&
2407                         !opsflag && regcon.cse.mops & mi)
2408                         continue;
2409 
2410                     regcon.cse.mval |= mi;
2411                     if (opsflag)
2412                         regcon.cse.mops |= mi;
2413                     //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e);
2414                     regcon.cse.value[i] = e;
2415                     result = true;
2416                 }
2417             }
2418     }
2419     return result;
2420 }
2421 
2422 /*************************************
2423  * Determine if a computation should be done into a register.
2424  */
2425 
2426 bool evalinregister(elem *e)
2427 {
2428     if (config.exe == EX_WIN64 && e.Eoper == OPrelconst)
2429         return true;
2430 
2431     if (e.Ecount == 0)             /* elem is not a CSE, therefore */
2432                                     /* we don't need to evaluate it */
2433                                     /* in a register                */
2434         return false;
2435     if (!OTleaf(e.Eoper))          /* operators are always in register */
2436         return true;
2437 
2438     // Need to rethink this code if float or double can be CSE'd
2439     uint sz = tysize(e.Ety);
2440     if (e.Ecount == e.Ecomsub)    /* elem is a CSE that needs     */
2441                                     /* to be generated              */
2442     {
2443         if ((I32 || I64) &&
2444             //pass == PASSfinal && // bug 8987
2445             sz <= REGSIZE)
2446         {
2447             // Do it only if at least 2 registers are available
2448             regm_t m = allregs & ~regcon.mvar;
2449             if (sz == 1)
2450                 m &= BYTEREGS;
2451             if (m & (m - 1))        // if more than one register
2452             {   // Need to be at least 3 registers available, as
2453                 // addressing modes can use up 2.
2454                 while (!(m & 1))
2455                     m >>= 1;
2456                 m >>= 1;
2457                 if (m & (m - 1))
2458                     return true;
2459             }
2460         }
2461         return false;
2462     }
2463 
2464     /* Elem is now a CSE that might have been generated. If so, and */
2465     /* it's in a register already, the computation should be done   */
2466     /* using that register.                                         */
2467     regm_t emask = 0;
2468     for (uint i = 0; i < regcon.cse.value.length; i++)
2469         if (regcon.cse.value[i] == e)
2470             emask |= mask(i);
2471     emask &= regcon.cse.mval;       // mask of available CSEs
2472     if (sz <= REGSIZE)
2473         return emask != 0;      /* the CSE is in a register     */
2474     else if (sz <= 2 * REGSIZE)
2475         return (emask & mMSW) && (emask & mLSW);
2476     return true;                    /* cop-out for now              */
2477 }
2478 
2479 /*******************************************************
2480  * Return mask of scratch registers.
2481  */
2482 
2483 regm_t getscratch()
2484 {
2485     regm_t scratch = 0;
2486     if (pass == PASSfinal)
2487     {
2488         scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval |
2489                   regcon.immed.mval | regcon.params | mfuncreg);
2490     }
2491     return scratch;
2492 }
2493 
2494 /******************************
2495  * Evaluate an elem that is a common subexp that has been encountered
2496  * before.
2497  * Look first to see if it is already in a register.
2498  */
2499 
2500 private void comsub(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2501 {
2502     tym_t tym;
2503     regm_t regm,emask;
2504     reg_t reg;
2505     uint byte_,sz;
2506 
2507     //printf("comsub(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2508     elem_debug(e);
2509 
2510     debug
2511     {
2512         if (e.Ecomsub > e.Ecount)
2513             elem_print(e);
2514     }
2515 
2516     assert(e.Ecomsub <= e.Ecount);
2517 
2518     if (*pretregs == 0)        // no possible side effects anyway
2519     {
2520         return;
2521     }
2522 
2523     /* First construct a mask, emask, of all the registers that
2524      * have the right contents.
2525      */
2526     emask = 0;
2527     for (uint i = 0; i < regcon.cse.value.length; i++)
2528     {
2529         //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]);
2530         if (regcon.cse.value[i] == e)   // if contents are right
2531                 emask |= mask(i);       // turn on bit for reg
2532     }
2533     emask &= regcon.cse.mval;                     // make sure all bits are valid
2534 
2535     if (emask & XMMREGS && *pretregs == mPSW)
2536         { }
2537     else if (tyxmmreg(e.Ety) && config.fpxmmregs)
2538     {
2539         if (*pretregs & (mST0 | mST01))
2540         {
2541             regm_t retregs = *pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1;
2542             comsub(cdb, e, &retregs);
2543             fixresult(cdb,e,retregs,pretregs);
2544             return;
2545         }
2546     }
2547     else if (tyfloating(e.Ety) && config.inline8087)
2548     {
2549         comsub87(cdb,e,pretregs);
2550         return;
2551     }
2552 
2553 
2554     /* create mask of CSEs */
2555     regm_t csemask = CSE.mask(e);
2556     csemask &= ~emask;            // stuff already in registers
2557 
2558     debug if (debugw)
2559     {
2560         printf("comsub(e=%p): *pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n",
2561                 e,regm_str(*pretregs),regm_str(emask),regm_str(csemask),
2562                 regm_str(regcon.cse.mval),regm_str(regcon.mvar));
2563         if (regcon.cse.mval & 1)
2564             elem_print(regcon.cse.value[0]);
2565     }
2566 
2567     tym = tybasic(e.Ety);
2568     sz = _tysize[tym];
2569     byte_ = sz == 1;
2570 
2571     if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register
2572     {
2573         /* First see if it is already in a correct register     */
2574 
2575         regm = emask & *pretregs;
2576         if (regm == 0)
2577             regm = emask;               /* try any other register       */
2578         if (regm)                       /* if it's in a register        */
2579         {
2580             if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (*pretregs & regcon.mvar) == *pretregs)
2581             {
2582                 regm = mask(findreg(regm));
2583                 fixresult(cdb,e,regm,pretregs);
2584                 return;
2585             }
2586         }
2587 
2588         if (OTleaf(e.Eoper))                  /* if not op or func            */
2589             goto reload;                      /* reload data                  */
2590 
2591         foreach (ref cse; CSE.filter(e))
2592         {
2593             regm_t retregs;
2594 
2595             if (cse.flags & CSEsimple)
2596             {
2597                 retregs = *pretregs;
2598                 if (byte_ && !(retregs & BYTEREGS))
2599                     retregs = BYTEREGS;
2600                 else if (!(retregs & allregs))
2601                     retregs = allregs;
2602                 allocreg(cdb,&retregs,&reg,tym);
2603                 code *cr = &cse.csimple;
2604                 cr.setReg(reg);
2605                 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1)
2606                     cr.Irex |= REX;
2607                 cdb.gen(cr);
2608                 goto L10;
2609             }
2610             else
2611             {
2612                 reflocal = true;
2613                 cse.flags |= CSEload;
2614                 if (*pretregs == mPSW)  // if result in CCs only
2615                 {
2616                     if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety)))
2617                     {
2618                         retregs = XMMREGS;
2619                         allocreg(cdb,&retregs,&reg,tym);
2620                         gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2621                         regcon.cse.mval |= mask(reg); // cs is in a reg
2622                         regcon.cse.value[reg] = e;
2623                         fixresult(cdb,e,retregs,pretregs);
2624                     }
2625                     else
2626                     {
2627                         // CMP cs[BP],0
2628                         gen_testcse(cdb, cse.e.Ety, sz, cse.slot);
2629                     }
2630                 }
2631                 else
2632                 {
2633                     retregs = *pretregs;
2634                     if (byte_ && !(retregs & BYTEREGS))
2635                         retregs = BYTEREGS;
2636                     allocreg(cdb,&retregs,&reg,tym);
2637                     gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2638                 L10:
2639                     regcon.cse.mval |= mask(reg); // cs is in a reg
2640                     regcon.cse.value[reg] = e;
2641                     fixresult(cdb,e,retregs,pretregs);
2642                 }
2643             }
2644             return;
2645         }
2646 
2647         debug
2648         {
2649             printf("couldn't find cse e = %p, pass = %d\n",e,pass);
2650             elem_print(e);
2651         }
2652         assert(0);                      /* should have found it         */
2653     }
2654     else                                  /* reg pair is req'd            */
2655     if (sz <= 2 * REGSIZE)
2656     {
2657         reg_t msreg,lsreg;
2658 
2659         /* see if we have both  */
2660         if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP)))
2661         {                               /* we don't have both           */
2662             debug if (!OTleaf(e.Eoper))
2663             {
2664                 printf("e = %p, op = x%x, emask = %s, csemask = %s\n",
2665                     e,e.Eoper,regm_str(emask),regm_str(csemask));
2666                 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW);
2667                 elem_print(e);
2668             }
2669 
2670             assert(OTleaf(e.Eoper));        /* must have both for operators */
2671             goto reload;
2672         }
2673 
2674         /* Look for right vals in any regs      */
2675         regm = *pretregs & mMSW;
2676         if (emask & regm)
2677             msreg = findreg(emask & regm);
2678         else if (emask & mMSW)
2679             msreg = findregmsw(emask);
2680         else                    /* reload from cse array        */
2681         {
2682             if (!regm)
2683                 regm = mMSW & ALLREGS;
2684             allocreg(cdb,&regm,&msreg,TYint);
2685             loadcse(cdb,e,msreg,mMSW);
2686         }
2687 
2688         regm = *pretregs & (mLSW | mBP);
2689         if (emask & regm)
2690             lsreg = findreg(emask & regm);
2691         else if (emask & (mLSW | mBP))
2692             lsreg = findreglsw(emask);
2693         else
2694         {
2695             if (!regm)
2696                 regm = mLSW;
2697             allocreg(cdb,&regm,&lsreg,TYint);
2698             loadcse(cdb,e,lsreg,mLSW | mBP);
2699         }
2700 
2701         regm = mask(msreg) | mask(lsreg);       /* mask of result       */
2702         fixresult(cdb,e,regm,pretregs);
2703         return;
2704     }
2705     else if (tym == TYdouble || tym == TYdouble_alias)    // double
2706     {
2707         assert(I16);
2708         if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16)
2709         {
2710             static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d
2711             for (reg = 0; reg != NOREG; reg = dblreg[reg])
2712             {
2713                 assert(cast(int) reg >= 0 && reg <= 7);
2714                 if (mask(reg) & csemask)
2715                     loadcse(cdb,e,reg,mask(reg));
2716             }
2717             regm = DOUBLEREGS_16;
2718             fixresult(cdb,e,regm,pretregs);
2719             return;
2720         }
2721         if (OTleaf(e.Eoper)) goto reload;
2722 
2723         debug
2724         printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask));
2725 
2726         assert(0);
2727     }
2728     else
2729     {
2730         debug
2731         printf("e = %p, tym = x%x\n",e,tym);
2732 
2733         assert(0);
2734     }
2735 
2736 reload:                                 /* reload result from memory    */
2737     switch (e.Eoper)
2738     {
2739         case OPrelconst:
2740             cdrelconst(cdb,e,pretregs);
2741             break;
2742 
2743         case OPgot:
2744             if (config.exe & EX_posix)
2745             {
2746                 cdgot(cdb,e,pretregs);
2747                 break;
2748             }
2749             goto default;
2750 
2751         default:
2752             if (*pretregs == mPSW &&
2753                 config.fpxmmregs &&
2754                 (tyxmmreg(tym) || tysimd(tym)))
2755             {
2756                 regm_t retregs = XMMREGS | mPSW;
2757                 loaddata(cdb,e,&retregs);
2758                 cssave(e,retregs,false);
2759                 return;
2760             }
2761             loaddata(cdb,e,pretregs);
2762             break;
2763     }
2764     cssave(e,*pretregs,false);
2765 }
2766 
2767 
2768 /*****************************
2769  * Load reg from cse save area on stack.
2770  */
2771 
2772 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm)
2773 {
2774     foreach (ref cse; CSE.filter(e))
2775     {
2776         //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm));
2777         if (cse.regm & regm)
2778         {
2779             reflocal = true;
2780             cse.flags |= CSEload;    /* it was loaded        */
2781             regcon.cse.value[reg] = e;
2782             regcon.cse.mval |= mask(reg);
2783             getregs(cdb,mask(reg));
2784             gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2785             return;
2786         }
2787     }
2788     debug
2789     {
2790         printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm));
2791         elem_print(e);
2792     }
2793     assert(0);
2794 }
2795 
2796 /***************************
2797  * Generate code sequence for an elem.
2798  * Input:
2799  *      pretregs =      mask of possible registers to return result in
2800  *                      Note:   longs are in AX,BX or CX,DX or SI,DI
2801  *                              doubles are AX,BX,CX,DX only
2802  *      constflag =     1 for user of result will not modify the
2803  *                      registers returned in *pretregs.
2804  *                      2 for freenode() not called.
2805  * Output:
2806  *      *pretregs       mask of registers result is returned in
2807  * Returns:
2808  *      pointer to code sequence generated
2809  */
2810 
2811 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op)
2812 {
2813     (*cdxxx[op])(cdb,e,pretregs);
2814 }
2815 
2816 // jump table
2817 private extern (C++) __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx =
2818 [
2819     OPunde:    &cderr,
2820     OPadd:     &cdorth,
2821     OPmul:     &cdmul,
2822     OPand:     &cdorth,
2823     OPmin:     &cdorth,
2824     OPnot:     &cdnot,
2825     OPcom:     &cdcom,
2826     OPcond:    &cdcond,
2827     OPcomma:   &cdcomma,
2828     OPremquo:  &cddiv,
2829     OPdiv:     &cddiv,
2830     OPmod:     &cddiv,
2831     OPxor:     &cdorth,
2832     OPstring:  &cderr,
2833     OPrelconst: &cdrelconst,
2834     OPinp:     &cdport,
2835     OPoutp:    &cdport,
2836     OPasm:     &cdasm,
2837     OPinfo:    &cdinfo,
2838     OPdctor:   &cddctor,
2839     OPddtor:   &cdddtor,
2840     OPctor:    &cdctor,
2841     OPdtor:    &cddtor,
2842     OPmark:    &cdmark,
2843     OPvoid:    &cdvoid,
2844     OPhalt:    &cdhalt,
2845     OPnullptr: &cderr,
2846     OPpair:    &cdpair,
2847     OPrpair:   &cdpair,
2848 
2849     OPor:      &cdorth,
2850     OPoror:    &cdloglog,
2851     OPandand:  &cdloglog,
2852     OProl:     &cdshift,
2853     OPror:     &cdshift,
2854     OPshl:     &cdshift,
2855     OPshr:     &cdshift,
2856     OPashr:    &cdshift,
2857     OPbit:     &cderr,
2858     OPind:     &cdind,
2859     OPaddr:    &cderr,
2860     OPneg:     &cdneg,
2861     OPuadd:    &cderr,
2862     OPabs:     &cdabs,
2863     OPtoprec:  &cdtoprec,
2864     OPsqrt:    &cdneg,
2865     OPsin:     &cdneg,
2866     OPcos:     &cdneg,
2867     OPscale:   &cdscale,
2868     OPyl2x:    &cdscale,
2869     OPyl2xp1:  &cdscale,
2870     OPcmpxchg:     &cdcmpxchg,
2871     OPrint:    &cdneg,
2872     OPrndtol:  &cdrndtol,
2873     OPstrlen:  &cdstrlen,
2874     OPstrcpy:  &cdstrcpy,
2875     OPmemcpy:  &cdmemcpy,
2876     OPmemset:  &cdmemset,
2877     OPstrcat:  &cderr,
2878     OPstrcmp:  &cdstrcmp,
2879     OPmemcmp:  &cdmemcmp,
2880     OPsetjmp:  &cdsetjmp,
2881     OPnegass:  &cdaddass,
2882     OPpreinc:  &cderr,
2883     OPpredec:  &cderr,
2884     OPstreq:   &cdstreq,
2885     OPpostinc: &cdpost,
2886     OPpostdec: &cdpost,
2887     OPeq:      &cdeq,
2888     OPaddass:  &cdaddass,
2889     OPminass:  &cdaddass,
2890     OPmulass:  &cdmulass,
2891     OPdivass:  &cddivass,
2892     OPmodass:  &cddivass,
2893     OPshrass:  &cdshass,
2894     OPashrass: &cdshass,
2895     OPshlass:  &cdshass,
2896     OPandass:  &cdaddass,
2897     OPxorass:  &cdaddass,
2898     OPorass:   &cdaddass,
2899 
2900     OPle:      &cdcmp,
2901     OPgt:      &cdcmp,
2902     OPlt:      &cdcmp,
2903     OPge:      &cdcmp,
2904     OPeqeq:    &cdcmp,
2905     OPne:      &cdcmp,
2906 
2907     OPunord:   &cdcmp,
2908     OPlg:      &cdcmp,
2909     OPleg:     &cdcmp,
2910     OPule:     &cdcmp,
2911     OPul:      &cdcmp,
2912     OPuge:     &cdcmp,
2913     OPug:      &cdcmp,
2914     OPue:      &cdcmp,
2915     OPngt:     &cdcmp,
2916     OPnge:     &cdcmp,
2917     OPnlt:     &cdcmp,
2918     OPnle:     &cdcmp,
2919     OPord:     &cdcmp,
2920     OPnlg:     &cdcmp,
2921     OPnleg:    &cdcmp,
2922     OPnule:    &cdcmp,
2923     OPnul:     &cdcmp,
2924     OPnuge:    &cdcmp,
2925     OPnug:     &cdcmp,
2926     OPnue:     &cdcmp,
2927 
2928     OPvp_fp:   &cdcnvt,
2929     OPcvp_fp:  &cdcnvt,
2930     OPoffset:  &cdlngsht,
2931     OPnp_fp:   &cdshtlng,
2932     OPnp_f16p: &cdfar16,
2933     OPf16p_np: &cdfar16,
2934 
2935     OPs16_32:  &cdshtlng,
2936     OPu16_32:  &cdshtlng,
2937     OPd_s32:   &cdcnvt,
2938     OPb_8:     &cdcnvt,
2939     OPs32_d:   &cdcnvt,
2940     OPd_s16:   &cdcnvt,
2941     OPs16_d:   &cdcnvt,
2942     OPd_u16:   &cdcnvt,
2943     OPu16_d:   &cdcnvt,
2944     OPd_u32:   &cdcnvt,
2945     OPu32_d:   &cdcnvt,
2946     OP32_16:   &cdlngsht,
2947     OPd_f:     &cdcnvt,
2948     OPf_d:     &cdcnvt,
2949     OPd_ld:    &cdcnvt,
2950     OPld_d:    &cdcnvt,
2951     OPc_r:     &cdconvt87,
2952     OPc_i:     &cdconvt87,
2953     OPu8_16:   &cdbyteint,
2954     OPs8_16:   &cdbyteint,
2955     OP16_8:    &cdlngsht,
2956     OPu32_64:  &cdshtlng,
2957     OPs32_64:  &cdshtlng,
2958     OP64_32:   &cdlngsht,
2959     OPu64_128: &cdshtlng,
2960     OPs64_128: &cdshtlng,
2961     OP128_64:  &cdlngsht,
2962     OPmsw:     &cdmsw,
2963 
2964     OPd_s64:   &cdcnvt,
2965     OPs64_d:   &cdcnvt,
2966     OPd_u64:   &cdcnvt,
2967     OPu64_d:   &cdcnvt,
2968     OPld_u64:  &cdcnvt,
2969     OPparam:   &cderr,
2970     OPsizeof:  &cderr,
2971     OParrow:   &cderr,
2972     OParrowstar: &cderr,
2973     OPcolon:   &cderr,
2974     OPcolon2:  &cderr,
2975     OPbool:    &cdnot,
2976     OPcall:    &cdfunc,
2977     OPucall:   &cdfunc,
2978     OPcallns:  &cdfunc,
2979     OPucallns: &cdfunc,
2980     OPstrpar:  &cderr,
2981     OPstrctor: &cderr,
2982     OPstrthis: &cdstrthis,
2983     OPconst:   &cderr,
2984     OPvar:     &cderr,
2985     OPnew:     &cderr,
2986     OPanew:    &cderr,
2987     OPdelete:  &cderr,
2988     OPadelete: &cderr,
2989     OPbrack:   &cderr,
2990     OPframeptr: &cdframeptr,
2991     OPgot:     &cdgot,
2992 
2993     OPbsf:     &cdbscan,
2994     OPbsr:     &cdbscan,
2995     OPbtst:    &cdbtst,
2996     OPbt:      &cdbt,
2997     OPbtc:     &cdbt,
2998     OPbtr:     &cdbt,
2999     OPbts:     &cdbt,
3000 
3001     OPbswap:   &cdbswap,
3002     OPpopcnt:  &cdpopcnt,
3003     OPvector:  &cdvector,
3004     OPvecsto:  &cdvecsto,
3005     OPvecfill: &cdvecfill,
3006     OPva_start: &cderr,
3007     OPprefetch: &cdprefetch,
3008 ];
3009 
3010 
3011 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag)
3012 {
3013     Symbol *s;
3014 
3015     debug if (debugw)
3016     {
3017         printf("+codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs));
3018         WROP(e.Eoper);
3019         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3020                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3021         printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
3022     }
3023 
3024     assert(e);
3025     elem_debug(e);
3026     if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
3027     {
3028         debug
3029         {
3030             printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs));
3031             elem_print(e);
3032             printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3033                     regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3034             printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
3035         }
3036         assert(0);
3037     }
3038 
3039     if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar)
3040         *pretregs &= ~regcon.mvar;                      /* can't use register vars */
3041 
3042     uint op = e.Eoper;
3043     if (e.Ecount && e.Ecount != e.Ecomsub)     // if common subexp
3044     {
3045         comsub(cdb,e,pretregs);
3046         goto L1;
3047     }
3048 
3049     if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3050         cdb.genlinnum(e.Esrcpos);
3051 
3052     switch (op)
3053     {
3054         default:
3055             if (e.Ecount)                          /* if common subexp     */
3056             {
3057                 /* if no return value       */
3058                 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0)
3059                 {
3060                     if (*pretregs & (mST0 | mST01))
3061                     {
3062                         //printf("generate ST0 comsub for:\n");
3063                         //elem_print(e);
3064 
3065                         regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1;
3066                         (*cdxxx[op])(cdb,e,&retregs);
3067                         cssave(e,retregs,!OTleaf(op));
3068                         fixresult(cdb, e, retregs, pretregs);
3069                         goto L1;
3070                     }
3071                     if (tysize(e.Ety) == 1)
3072                         *pretregs |= BYTEREGS;
3073                     else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs)
3074                         *pretregs |= XMMREGS;
3075                     else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias)
3076                         *pretregs |= DOUBLEREGS;
3077                     else
3078                         *pretregs |= ALLREGS;       /* make one             */
3079                 }
3080 
3081                 /* BUG: For CSEs, make sure we have both an MSW             */
3082                 /* and an LSW specified in *pretregs                        */
3083             }
3084             assert(op <= OPMAX);
3085             (*cdxxx[op])(cdb,e,pretregs);
3086             break;
3087 
3088         case OPrelconst:
3089             cdrelconst(cdb,e,pretregs);
3090             break;
3091 
3092         case OPvar:
3093             if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg &&
3094                 (s.Sregm & *pretregs) == s.Sregm)
3095             {
3096                 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE)
3097                     *pretregs &= mPSW | (s.Sregm & mLSW);
3098                 else
3099                     *pretregs &= mPSW | s.Sregm;
3100             }
3101             goto case OPconst;
3102 
3103         case OPconst:
3104             if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile))
3105             {
3106                 switch (tybasic(e.Ety))
3107                 {
3108                     case TYbool:
3109                     case TYchar:
3110                     case TYschar:
3111                     case TYuchar:
3112                         *pretregs |= BYTEREGS;
3113                         break;
3114 
3115                     case TYnref:
3116                     case TYnptr:
3117                     case TYsptr:
3118                     case TYcptr:
3119                     case TYfgPtr:
3120                     case TYimmutPtr:
3121                     case TYsharePtr:
3122                     case TYrestrictPtr:
3123                         *pretregs |= I16 ? IDXREGS : ALLREGS;
3124                         break;
3125 
3126                     case TYshort:
3127                     case TYushort:
3128                     case TYint:
3129                     case TYuint:
3130                     case TYlong:
3131                     case TYulong:
3132                     case TYllong:
3133                     case TYullong:
3134                     case TYcent:
3135                     case TYucent:
3136                     case TYfptr:
3137                     case TYhptr:
3138                     case TYvptr:
3139                         *pretregs |= ALLREGS;
3140                         break;
3141 
3142                     default:
3143                         break;
3144                 }
3145             }
3146             loaddata(cdb,e,pretregs);
3147             break;
3148     }
3149     cssave(e,*pretregs,!OTleaf(op));
3150 L1:
3151     if (!(constflag & 2))
3152         freenode(e);
3153 
3154     debug if (debugw)
3155     {
3156         printf("-codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs));
3157         WROP(op);
3158         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3159                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3160     }
3161 }
3162 
3163 /*******************************
3164  * Same as codelem(), but do not destroy the registers in keepmsk.
3165  * Use scratch registers as much as possible, then use stack.
3166  * Input:
3167  *      constflag       true if user of result will not modify the
3168  *                      registers returned in *pretregs.
3169  */
3170 
3171 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag)
3172 {
3173     regm_t touse;
3174 
3175     debug if (debugw)
3176         printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3177                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3178 
3179     elem_debug(e);
3180     if (constflag)
3181     {
3182         regm_t regm;
3183         reg_t reg;
3184 
3185         if (isregvar(e,&regm,&reg) &&           // if e is a register variable
3186             (regm & *pretregs) == regm &&       // in one of the right regs
3187             e.EV.Voffset == 0
3188            )
3189         {
3190             uint sz1 = tysize(e.Ety);
3191             uint sz2 = tysize(e.EV.Vsym.Stype.Tty);
3192             if (sz1 <= REGSIZE && sz2 > REGSIZE)
3193                 regm &= mLSW | XMMREGS;
3194             fixresult(cdb,e,regm,pretregs);
3195             cssave(e,regm,0);
3196             freenode(e);
3197 
3198             debug if (debugw)
3199                 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3200                         e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3201 
3202             return;
3203         }
3204     }
3205     regm_t overlap = msavereg & keepmsk;
3206     msavereg |= keepmsk;          /* add to mask of regs to save          */
3207     regm_t oldregcon = regcon.cse.mval;
3208     regm_t oldregimmed = regcon.immed.mval;
3209     regm_t oldmfuncreg = mfuncreg;       /* remember old one                     */
3210     mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar;
3211     uint stackpushsave = stackpush;
3212     char calledafuncsave = calledafunc;
3213     calledafunc = 0;
3214     CodeBuilder cdbx; cdbx.ctor();
3215     codelem(cdbx,e,pretregs,constflag);    // generate code for the elem
3216 
3217     regm_t tosave = keepmsk & ~msavereg; /* registers to save                    */
3218     if (tosave)
3219     {
3220         cgstate.stackclean++;
3221         genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg);
3222         cgstate.stackclean--;
3223     }
3224 
3225     /* Assert that no new CSEs are generated that are not reflected       */
3226     /* in mfuncreg.                                                       */
3227     debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0)
3228         printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n",
3229                 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar));
3230 
3231     assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0);
3232 
3233     /* bugzilla 3521
3234      * The problem is:
3235      *    reg op (reg = exp)
3236      * where reg must be preserved (in keepregs) while the expression to be evaluated
3237      * must change it.
3238      * The only solution is to make this variable not a register.
3239      */
3240     if (regcon.mvar & tosave)
3241     {
3242         //elem_print(e);
3243         //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave));
3244         cgreg_unregister(regcon.mvar & tosave);
3245     }
3246 
3247     /* which registers can we use to save other registers in? */
3248     if (config.flags4 & CFG4space ||              // if optimize for space
3249         config.target_cpu >= TARGET_80486)        // PUSH/POP ops are 1 cycle
3250         touse = 0;                              // PUSH/POP pairs are always shorter
3251     else
3252     {
3253         touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval);
3254         /* Don't use registers we'll have to save/restore               */
3255         touse &= ~(fregsaved & oldmfuncreg);
3256         /* Don't use registers that have constant values in them, since
3257            the code generated might have used the value.
3258          */
3259         touse &= ~oldregimmed;
3260     }
3261 
3262     CodeBuilder cdbs1; cdbs1.ctor();
3263     code *cs2 = null;
3264     int adjesp = 0;
3265 
3266     for (uint i = 0; tosave; i++)
3267     {
3268         regm_t mi = mask(i);
3269 
3270         assert(i < REGMAX);
3271         if (mi & tosave)        /* i = register to save                 */
3272         {
3273             if (touse)          /* if any scratch registers             */
3274             {
3275                 uint j;
3276                 for (j = 0; j < 8; j++)
3277                 {
3278                     regm_t mj = mask(j);
3279 
3280                     if (touse & mj)
3281                     {
3282                         genmovreg(cdbs1,j,i);
3283                         cs2 = cat(genmovreg(i,j),cs2);
3284                         touse &= ~mj;
3285                         mfuncreg &= ~mj;
3286                         regcon.used |= mj;
3287                         break;
3288                     }
3289                 }
3290                 assert(j < 8);
3291             }
3292             else                        // else use memory
3293             {
3294                 CodeBuilder cdby; cdby.ctor();
3295                 uint size = gensaverestore(mask(i), cdbs1, cdby);
3296                 cs2 = cat(cdby.finish(),cs2);
3297                 if (size)
3298                 {
3299                     stackchanged = 1;
3300                     adjesp += size;
3301                 }
3302             }
3303             getregs(cdbx,mi);
3304             tosave &= ~mi;
3305         }
3306     }
3307     CodeBuilder cdbs2; cdbs2.ctor();
3308     if (adjesp)
3309     {
3310         // If this is done an odd number of times, it
3311         // will throw off the 8 byte stack alignment.
3312         // We should *only* worry about this if a function
3313         // was called in the code generation by codelem().
3314         int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1);
3315         if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
3316         {
3317             regm_t mval_save = regcon.immed.mval;
3318             regcon.immed.mval = 0;      // prevent reghasvalue() optimizations
3319                                         // because c hasn't been executed yet
3320             cod3_stackadj(cdbs1, sz);
3321             regcon.immed.mval = mval_save;
3322             cdbs1.genadjesp(sz);
3323 
3324             cod3_stackadj(cdbs2, -sz);
3325             cdbs2.genadjesp(-sz);
3326         }
3327         cdbs2.append(cs2);
3328 
3329 
3330         cdbs1.genadjesp(adjesp);
3331         cdbs2.genadjesp(-adjesp);
3332     }
3333     else
3334         cdbs2.append(cs2);
3335 
3336     calledafunc |= calledafuncsave;
3337     msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save   */
3338     mfuncreg &= oldmfuncreg;        /* update original                    */
3339 
3340     debug if (debugw)
3341         printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3342                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3343 
3344     cdb.append(cdbs1);
3345     cdb.append(cdbx);
3346     cdb.append(cdbs2);
3347     return;
3348 }
3349 
3350 /*********************************************
3351  * Turn register mask into a string suitable for printing.
3352  */
3353 
3354 const(char)* regm_str(regm_t rm)
3355 {
3356     enum NUM = 10;
3357     enum SMAX = 128;
3358     __gshared char[SMAX + 1][NUM] str;
3359     __gshared int i;
3360 
3361     if (rm == 0)
3362         return "0";
3363     if (rm == ALLREGS)
3364         return "ALLREGS";
3365     if (rm == BYTEREGS)
3366         return "BYTEREGS";
3367     if (rm == allregs)
3368         return "allregs";
3369     if (rm == XMMREGS)
3370         return "XMMREGS";
3371     char *p = str[i].ptr;
3372     if (++i == NUM)
3373         i = 0;
3374     *p = 0;
3375     for (size_t j = 0; j < 32; j++)
3376     {
3377         if (mask(cast(uint)j) & rm)
3378         {
3379             strcat(p,regstring[j]);
3380             rm &= ~mask(cast(uint)j);
3381             if (rm)
3382                 strcat(p,"|");
3383         }
3384     }
3385     if (rm)
3386     {   char *s = p + strlen(p);
3387         sprintf(s,"x%02x",rm);
3388     }
3389     assert(strlen(p) <= SMAX);
3390     return strdup(p);
3391 }
3392 
3393 /*********************************
3394  * Scan down comma-expressions.
3395  * Output:
3396  *      *pe = first elem down right side that is not an OPcomma
3397  * Returns:
3398  *      code generated for left branches of comma-expressions
3399  */
3400 
3401 void docommas(ref CodeBuilder cdb,elem **pe)
3402 {
3403     uint stackpushsave = stackpush;
3404     int stackcleansave = cgstate.stackclean;
3405     cgstate.stackclean = 0;
3406     elem* e = *pe;
3407     while (1)
3408     {
3409         if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3410         {
3411             cdb.genlinnum(e.Esrcpos);
3412             //e.Esrcpos.Slinnum = 0;               // don't do it twice
3413         }
3414         if (e.Eoper != OPcomma)
3415             break;
3416         regm_t retregs = 0;
3417         codelem(cdb,e.EV.E1,&retregs,true);
3418         elem* eold = e;
3419         e = e.EV.E2;
3420         freenode(eold);
3421     }
3422     *pe = e;
3423     assert(cgstate.stackclean == 0);
3424     cgstate.stackclean = stackcleansave;
3425     genstackclean(cdb,stackpush - stackpushsave,0);
3426 }
3427 
3428 /**************************
3429  * For elems in regcon that don't match regconsave,
3430  * clear the corresponding bit in regcon.cse.mval.
3431  * Do same for regcon.immed.
3432  */
3433 
3434 void andregcon(con_t *pregconsave)
3435 {
3436     regm_t m = ~1;
3437     for (int i = 0; i < REGMAX; i++)
3438     {
3439         if (pregconsave.cse.value[i] != regcon.cse.value[i])
3440             regcon.cse.mval &= m;
3441         if (pregconsave.immed.value[i] != regcon.immed.value[i])
3442             regcon.immed.mval &= m;
3443         m <<= 1;
3444         m |= 1;
3445     }
3446     //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval));
3447     regcon.used |= pregconsave.used;
3448     regcon.cse.mval &= pregconsave.cse.mval;
3449     regcon.immed.mval &= pregconsave.immed.mval;
3450     regcon.params &= pregconsave.params;
3451     //printf("regcon.cse.mval&regcon.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops));
3452     regcon.cse.mops &= regcon.cse.mval;
3453 }
3454 
3455 }