1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1984-1998 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod1.d, backend/cod1.d)
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod1.d
11  */
12 
13 module dmd.backend.cod1;
14 
15 version (SCPP)
16     version = COMPILE;
17 version (MARS)
18     version = COMPILE;
19 
20 version (COMPILE)
21 {
22 
23 import core.stdc.stdio;
24 import core.stdc.stdlib;
25 import core.stdc.string;
26 
27 import dmd.backend.backend;
28 import dmd.backend.cc;
29 import dmd.backend.cdef;
30 import dmd.backend.code;
31 import dmd.backend.code_x86;
32 import dmd.backend.codebuilder;
33 import dmd.backend.mem;
34 import dmd.backend.el;
35 import dmd.backend.exh;
36 import dmd.backend.global;
37 import dmd.backend.obj;
38 import dmd.backend.oper;
39 import dmd.backend.rtlsym;
40 import dmd.backend.ty;
41 import dmd.backend.type;
42 import dmd.backend.xmm;
43 
44 extern (C++):
45 
46 nothrow:
47 
48 int REGSIZE();
49 
50 extern __gshared CGstate cgstate;
51 extern __gshared ubyte[FLMAX] segfl;
52 extern __gshared bool[FLMAX] stackfl;
53 
54 private extern (D) uint mask(uint m) { return 1 << m; }
55 
56 private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); }
57 
58 /* array to convert from index register to r/m field    */
59                                        /* AX CX DX BX SP BP SI DI       */
60 private __gshared const byte[8] regtorm32 =   [  0, 1, 2, 3,-1, 5, 6, 7 ];
61 __gshared const   byte[8] regtorm   =   [ -1,-1,-1, 7,-1, 6, 4, 5 ];
62 
63 targ_size_t paramsize(elem *e, tym_t tyf);
64 //void funccall(ref CodeBuilder cdb,elem *e,uint numpara,uint numalign,
65 //        regm_t *pretregs,regm_t keepmsk, bool usefuncarg);
66 
67 /*********************************
68  * Determine if we should leave parameter `s` in the register it
69  * came in, or allocate a register it using the register
70  * allocator.
71  * Params:
72  *      s = parameter Symbol
73  * Returns:
74  *      `true` if `s` is a register parameter and leave it in the register it came in
75  */
76 bool regParamInPreg(Symbol* s)
77 {
78     //printf("regPAramInPreg %s\n", s.Sident.ptr);
79     return (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) &&
80         (!(config.flags4 & CFG4optimized) || !(s.Sflags & GTregcand));
81 }
82 
83 
84 /**************************
85  * Determine if e is a 32 bit scaled index addressing mode.
86  * Returns:
87  *      0       not a scaled index addressing mode
88  *      !=0     the value for ss in the SIB byte
89  */
90 
91 int isscaledindex(elem *e)
92 {
93     targ_uns ss;
94 
95     assert(!I16);
96     while (e.Eoper == OPcomma)
97         e = e.EV.E2;
98     if (!(e.Eoper == OPshl && !e.Ecount &&
99           e.EV.E2.Eoper == OPconst &&
100           (ss = e.EV.E2.EV.Vuns) <= 3
101          )
102        )
103         ss = 0;
104     return ss;
105 }
106 
107 /*********************************************
108  * Generate code for which isscaledindex(e) returned a non-zero result.
109  */
110 
111 /*private*/ void cdisscaledindex(ref CodeBuilder cdb,elem *e,regm_t *pidxregs,regm_t keepmsk)
112 {
113     // Load index register with result of e.EV.E1
114     while (e.Eoper == OPcomma)
115     {
116         regm_t r = 0;
117         scodelem(cdb, e.EV.E1, &r, keepmsk, true);
118         freenode(e);
119         e = e.EV.E2;
120     }
121     assert(e.Eoper == OPshl);
122     scodelem(cdb, e.EV.E1, pidxregs, keepmsk, true);
123     freenode(e.EV.E2);
124     freenode(e);
125 }
126 
127 /***********************************
128  * Determine index if we can do two LEA instructions as a multiply.
129  * Returns:
130  *      0       can't do it
131  */
132 
133 enum
134 {
135     SSFLnobp       = 1,       /// can't have EBP in relconst
136     SSFLnobase1    = 2,       /// no base register for first LEA
137     SSFLnobase     = 4,       /// no base register
138     SSFLlea        = 8,       /// can do it in one LEA
139 }
140 
141 struct Ssindex
142 {
143     targ_uns product;
144     ubyte ss1;
145     ubyte ss2;
146     ubyte ssflags;       /// SSFLxxxx
147 }
148 
149 private __gshared const Ssindex[21] ssindex_array =
150 [
151     { 0, 0, 0 },               // [0] is a place holder
152 
153     { 3,  1, 0, SSFLnobp | SSFLlea },
154     { 5,  2, 0, SSFLnobp | SSFLlea },
155     { 9,  3, 0, SSFLnobp | SSFLlea },
156 
157     { 6,  1, 1, SSFLnobase },
158     { 12, 1, 2, SSFLnobase },
159     { 24, 1, 3, SSFLnobase },
160     { 10, 2, 1, SSFLnobase },
161     { 20, 2, 2, SSFLnobase },
162     { 40, 2, 3, SSFLnobase },
163     { 18, 3, 1, SSFLnobase },
164     { 36, 3, 2, SSFLnobase },
165     { 72, 3, 3, SSFLnobase },
166 
167     { 15, 2, 1, SSFLnobp },
168     { 25, 2, 2, SSFLnobp },
169     { 27, 3, 1, SSFLnobp },
170     { 45, 3, 2, SSFLnobp },
171     { 81, 3, 3, SSFLnobp },
172 
173     { 16, 3, 1, SSFLnobase1 | SSFLnobase },
174     { 32, 3, 2, SSFLnobase1 | SSFLnobase },
175     { 64, 3, 3, SSFLnobase1 | SSFLnobase },
176 ];
177 
178 int ssindex(OPER op,targ_uns product)
179 {
180     if (op == OPshl)
181         product = 1 << product;
182     for (size_t i = 1; i < ssindex_array.length; i++)
183     {
184         if (ssindex_array[i].product == product)
185             return cast(int)i;
186     }
187     return 0;
188 }
189 
190 /***************************************
191  * Build an EA of the form disp[base][index*scale].
192  * Input:
193  *      c       struct to fill in
194  *      base    base register (-1 if none)
195  *      index   index register (-1 if none)
196  *      scale   scale factor - 1,2,4,8
197  *      disp    displacement
198  */
199 
200 void buildEA(code *c,int base,int index,int scale,targ_size_t disp)
201 {
202     ubyte rm;
203     ubyte sib;
204     ubyte rex = 0;
205 
206     sib = 0;
207     if (!I16)
208     {   uint ss;
209 
210         assert(index != SP);
211 
212         switch (scale)
213         {   case 1:     ss = 0; break;
214             case 2:     ss = 1; break;
215             case 4:     ss = 2; break;
216             case 8:     ss = 3; break;
217             default:    assert(0);
218         }
219 
220         if (base == -1)
221         {
222             if (index == -1)
223                 rm = modregrm(0,0,5);
224             else
225             {
226                 rm  = modregrm(0,0,4);
227                 sib = modregrm(ss,index & 7,5);
228                 if (index & 8)
229                     rex |= REX_X;
230             }
231         }
232         else if (index == -1)
233         {
234             if (base == SP)
235             {
236                 rm  = modregrm(2, 0, 4);
237                 sib = modregrm(0, 4, SP);
238             }
239             else
240             {   rm = modregrm(2, 0, base & 7);
241                 if (base & 8)
242                 {   rex |= REX_B;
243                     if (base == R12)
244                     {
245                         rm  = modregrm(2, 0, 4);
246                         sib = modregrm(0, 4, 4);
247                     }
248                 }
249             }
250         }
251         else
252         {
253             rm  = modregrm(2, 0, 4);
254             sib = modregrm(ss,index & 7,base & 7);
255             if (index & 8)
256                 rex |= REX_X;
257             if (base & 8)
258                 rex |= REX_B;
259         }
260     }
261     else
262     {
263         // -1 AX CX DX BX SP BP SI DI
264         static immutable ubyte[9][9] EA16rm =
265         [
266             [   0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85,   ],      // -1
267             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // AX
268             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // CX
269             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // DX
270             [   0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81,   ],      // BX
271             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // SP
272             [   0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83,   ],      // BP
273             [   0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09,   ],      // SI
274             [   0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09,   ]       // DI
275         ];
276 
277         assert(scale == 1);
278         rm = EA16rm[base + 1][index + 1];
279         assert(rm != 9);
280     }
281     c.Irm = rm;
282     c.Isib = sib;
283     c.Irex = rex;
284     c.IFL1 = FLconst;
285     c.IEV1.Vuns = cast(targ_uns)disp;
286 }
287 
288 /*********************************************
289  * Build REX, modregrm and sib bytes
290  */
291 
292 uint buildModregrm(int mod, int reg, int rm)
293 {
294     uint m;
295     if (I16)
296         m = modregrm(mod, reg, rm);
297     else
298     {
299         if ((rm & 7) == SP && mod != 3)
300             m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4);
301         else
302             m = modregrm(mod,reg & 7,rm & 7);
303         if (reg & 8)
304             m |= REX_R << 16;
305         if (rm & 8)
306             m |= REX_B << 16;
307     }
308     return m;
309 }
310 
311 /****************************************
312  * Generate code for eecontext
313  */
314 
315 void genEEcode()
316 {
317     CodeBuilder cdb;
318     cdb.ctor();
319 
320     eecontext.EEin++;
321     regcon.immed.mval = 0;
322     regm_t retregs = 0;    //regmask(eecontext.EEelem.Ety);
323     assert(EEStack.offset >= REGSIZE);
324     cod3_stackadj(cdb, cast(int)(EEStack.offset - REGSIZE));
325     cdb.gen1(0x50 + SI);                      // PUSH ESI
326     cdb.genadjesp(cast(int)EEStack.offset);
327     gencodelem(cdb, eecontext.EEelem, &retregs, false);
328     code *c = cdb.finish();
329     assignaddrc(c);
330     pinholeopt(c,null);
331     jmpaddr(c);
332     eecontext.EEcode = gen1(c, 0xCC);        // INT 3
333     eecontext.EEin--;
334 }
335 
336 
337 /********************************************
338  * Gen a save/restore sequence for mask of registers.
339  * Params:
340  *      regm = mask of registers to save
341  *      cdbsave = save code appended here
342  *      cdbrestore = restore code appended here
343  * Returns:
344  *      amount of stack consumed
345  */
346 
347 uint gensaverestore(regm_t regm,ref CodeBuilder cdbsave,ref CodeBuilder cdbrestore)
348 {
349     //printf("gensaverestore2(%s)\n", regm_str(regm));
350     regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01;
351     if (!regm)
352         return 0;
353 
354     uint stackused = 0;
355 
356     code *[regm.sizeof * 8] restore;
357 
358     reg_t i;
359     for (i = 0; regm; i++)
360     {
361         if (regm & 1)
362         {
363             code *cs2;
364             if (i == ES && I16)
365             {
366                 stackused += REGSIZE;
367                 cdbsave.gen1(0x06);                     // PUSH ES
368                 cs2 = gen1(null, 0x07);                 // POP  ES
369             }
370             else if (i == ST0 || i == ST01)
371             {
372                 CodeBuilder cdb;
373                 cdb.ctor();
374                 gensaverestore87(1 << i, cdbsave, cdb);
375                 cs2 = cdb.finish();
376             }
377             else if (i >= XMM0 || I64 || cgstate.funcarg.size)
378             {   uint idx;
379                 regsave.save(cdbsave, i, &idx);
380                 CodeBuilder cdb;
381                 cdb.ctor();
382                 regsave.restore(cdb, i, idx);
383                 cs2 = cdb.finish();
384             }
385             else
386             {
387                 stackused += REGSIZE;
388                 cdbsave.gen1(0x50 + (i & 7));           // PUSH i
389                 cs2 = gen1(null, 0x58 + (i & 7));       // POP  i
390                 if (i & 8)
391                 {   code_orrex(cdbsave.last(), REX_B);
392                     code_orrex(cs2, REX_B);
393                 }
394             }
395             restore[i] = cs2;
396         }
397         else
398             restore[i] = null;
399         regm >>= 1;
400     }
401 
402     while (i)
403     {
404         code *c = restore[--i];
405         if (c)
406         {
407             cdbrestore.append(c);
408         }
409     }
410 
411     return stackused;
412 }
413 
414 
415 /****************************************
416  * Clean parameters off stack.
417  * Input:
418  *      numpara         amount to adjust stack pointer
419  *      keepmsk         mask of registers to not destroy
420  */
421 
422 void genstackclean(ref CodeBuilder cdb,uint numpara,regm_t keepmsk)
423 {
424     //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean);
425     if (numpara && (cgstate.stackclean || STACKALIGN >= 16))
426     {
427 /+
428         if (0 &&                                // won't work if operand of scodelem
429             numpara == stackpush &&             // if this is all those pushed
430             needframe &&                        // and there will be a BP
431             !config.windows &&
432             !(regcon.mvar & fregsaved)          // and no registers will be pushed
433         )
434             genregs(cdb,0x89,BP,SP);  // MOV SP,BP
435         else
436 +/
437         {
438             regm_t scratchm = 0;
439 
440             if (numpara == REGSIZE && config.flags4 & CFG4space)
441             {
442                 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar;
443             }
444 
445             if (scratchm)
446             {
447                 reg_t r;
448                 allocreg(cdb, &scratchm, &r, TYint);
449                 cdb.gen1(0x58 + r);           // POP r
450             }
451             else
452                 cod3_stackadj(cdb, -numpara);
453         }
454         stackpush -= numpara;
455         cdb.genadjesp(-numpara);
456     }
457 }
458 
459 /*********************************
460  * Generate code for a logical expression.
461  * Input:
462  *      e       elem
463  *      jcond
464  *         bit 1 if true then goto jump address if e
465  *               if false then goto jump address if !e
466  *         2    don't call save87()
467  *      fltarg   FLcode or FLblock, flavor of target if e evaluates to jcond
468  *      targ    either code or block pointer to destination
469  */
470 
471 void logexp(ref CodeBuilder cdb, elem *e, int jcond, uint fltarg, code *targ)
472 {
473     //printf("logexp(e = %p, jcond = %d)\n", e, jcond);
474     int no87 = (jcond & 2) == 0;
475     docommas(cdb, &e);             // scan down commas
476     cgstate.stackclean++;
477 
478     code* c, ce;
479     if (!OTleaf(e.Eoper) && !e.Ecount)     // if operator and not common sub
480     {
481         switch (e.Eoper)
482         {
483             case OPoror:
484             {
485                 con_t regconsave;
486                 if (jcond & 1)
487                 {
488                     logexp(cdb, e.EV.E1, jcond, fltarg, targ);
489                     regconsave = regcon;
490                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
491                 }
492                 else
493                 {
494                     code *cnop = gennop(null);
495                     logexp(cdb, e.EV.E1, jcond | 1, FLcode, cnop);
496                     regconsave = regcon;
497                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
498                     cdb.append(cnop);
499                 }
500                 andregcon(&regconsave);
501                 freenode(e);
502                 cgstate.stackclean--;
503                 return;
504             }
505 
506             case OPandand:
507             {
508                 con_t regconsave;
509                 if (jcond & 1)
510                 {
511                     code *cnop = gennop(null);    // a dummy target address
512                     logexp(cdb, e.EV.E1, jcond & ~1, FLcode, cnop);
513                     regconsave = regcon;
514                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
515                     cdb.append(cnop);
516                 }
517                 else
518                 {
519                     logexp(cdb, e.EV.E1, jcond, fltarg, targ);
520                     regconsave = regcon;
521                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
522                 }
523                 andregcon(&regconsave);
524                 freenode(e);
525                 cgstate.stackclean--;
526                 return;
527             }
528 
529             case OPnot:
530                 jcond ^= 1;
531                 goto case OPbool;
532 
533             case OPbool:
534             case OPs8_16:
535             case OPu8_16:
536             case OPs16_32:
537             case OPu16_32:
538             case OPs32_64:
539             case OPu32_64:
540             case OPu32_d:
541             case OPd_ld:
542                 logexp(cdb, e.EV.E1, jcond, fltarg, targ);
543                 freenode(e);
544                 cgstate.stackclean--;
545                 return;
546 
547             case OPcond:
548             {
549                 code *cnop2 = gennop(null);   // addresses of start of leaves
550                 code *cnop = gennop(null);
551                 logexp(cdb, e.EV.E1, false, FLcode, cnop2);   // eval condition
552                 con_t regconold = regcon;
553                 logexp(cdb, e.EV.E2.EV.E1, jcond, fltarg, targ);
554                 genjmp(cdb, JMP, FLcode, cast(block *) cnop); // skip second leaf
555 
556                 con_t regconsave = regcon;
557                 regcon = regconold;
558 
559                 cdb.append(cnop2);
560                 logexp(cdb, e.EV.E2.EV.E2, jcond, fltarg, targ);
561                 andregcon(&regconold);
562                 andregcon(&regconsave);
563                 freenode(e.EV.E2);
564                 freenode(e);
565                 cdb.append(cnop);
566                 cgstate.stackclean--;
567                 return;
568             }
569 
570             default:
571                 break;
572         }
573     }
574 
575     /* Special code for signed long compare.
576      * Not necessary for I64 until we do cents.
577      */
578     if (OTrel2(e.Eoper) &&               // if < <= >= >
579         !e.Ecount &&
580         ( (I16 && tybasic(e.EV.E1.Ety) == TYlong  && tybasic(e.EV.E2.Ety) == TYlong) ||
581           (I32 && tybasic(e.EV.E1.Ety) == TYllong && tybasic(e.EV.E2.Ety) == TYllong))
582        )
583     {
584         longcmp(cdb, e, jcond != 0, fltarg, targ);
585         cgstate.stackclean--;
586         return;
587     }
588 
589     regm_t retregs = mPSW;                // return result in flags
590     opcode_t op = jmpopcode(e);           // get jump opcode
591     if (!(jcond & 1))
592         op ^= 0x101;                      // toggle jump condition(s)
593     codelem(cdb, e, &retregs, true);         // evaluate elem
594     if (no87)
595         cse_flush(cdb,no87);              // flush CSE's to memory
596     genjmp(cdb, op, fltarg, cast(block *) targ); // generate jmp instruction
597     cgstate.stackclean--;
598 }
599 
600 /******************************
601  * Routine to aid in setting things up for gen().
602  * Look for common subexpression.
603  * Can handle indirection operators, but not if they're common subs.
604  * Input:
605  *      e ->    elem where we get some of the data from
606  *      cs ->   partially filled code to add
607  *      op =    opcode
608  *      reg =   reg field of (mod reg r/m)
609  *      offset = data to be added to Voffset field
610  *      keepmsk = mask of registers we must not destroy
611  *      desmsk  = mask of registers destroyed by executing the instruction
612  * Returns:
613  *      pointer to code generated
614  */
615 
616 void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t offset,
617             regm_t keepmsk,regm_t desmsk)
618 {
619     code* c, cg, cd;
620 
621     debug
622     if (debugw)
623         printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n",
624                e, cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk));
625     assert(e);
626     cs.Iflags = 0;
627     cs.Irex = 0;
628     cs.Iop = op;
629     tym_t tym = e.Ety;
630     int sz = tysize(tym);
631 
632     /* Determine if location we want to get is in a register. If so,      */
633     /* substitute the register for the EA.                                */
634     /* Note that operators don't go through this. CSE'd operators are     */
635     /* picked up by comsub().                                             */
636     if (e.Ecount &&                      /* if cse                       */
637         e.Ecount != e.Ecomsub &&        /* and cse was generated        */
638         op != LEA && op != 0xC4 &&        /* and not an LEA or LES        */
639         (op != 0xFF || reg != 3) &&       /* and not CALLF MEM16          */
640         (op & 0xFFF8) != 0xD8)            // and not 8087 opcode
641     {
642         assert(OTleaf(e.Eoper));                /* can't handle this            */
643         regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs
644         if (op == 0xFF && reg == 6)
645             rm &= ~XMMREGS;             // can't PUSH an XMM register
646         if (sz > REGSIZE)               // value is in 2 or 4 registers
647         {
648             if (I16 && sz == 8)     // value is in 4 registers
649             {
650                 static immutable regm_t[4] rmask = [ mDX,mCX,mBX,mAX ];
651                 rm &= rmask[cast(size_t)(offset >> 1)];
652             }
653             else if (offset)
654                 rm &= mMSW;             /* only high words      */
655             else
656                 rm &= mLSW;             /* only low words       */
657         }
658         for (uint i = 0; rm; i++)
659         {
660             if (mask(i) & rm)
661             {
662                 if (regcon.cse.value[i] == e && // if register has elem
663                     /* watch out for a CWD destroying DX        */
664                    !(i == DX && op == 0xF7 && desmsk & mDX))
665                 {
666                     /* if ES, then it can only be a load    */
667                     if (i == ES)
668                     {
669                         if (op != 0x8B)
670                             break;      // not a load
671                         cs.Iop = 0x8C; /* MOV reg,ES   */
672                         cs.Irm = modregrm(3, 0, reg & 7);
673                         if (reg & 8)
674                             code_orrex(cs, REX_B);
675                     }
676                     else    // XXX reg,i
677                     {
678                         cs.Irm = modregrm(3, reg & 7, i & 7);
679                         if (reg & 8)
680                             cs.Irex |= REX_R;
681                         if (i & 8)
682                             cs.Irex |= REX_B;
683                         if (sz == 1 && I64 && (i >= 4 || reg >= 4))
684                             cs.Irex |= REX;
685                         if (I64 && (sz == 8 || sz == 16))
686                             cs.Irex |= REX_W;
687                     }
688                     goto L2;
689                 }
690                 rm &= ~mask(i);
691             }
692         }
693     }
694 
695     getlvalue(cdb, cs, e, keepmsk);
696     if (offset == REGSIZE)
697         getlvalue_msw(cs);
698     else
699         cs.IEV1.Voffset += offset;
700     if (I64)
701     {
702         if (reg >= 4 && sz == 1)               // if byte register
703             // Can only address those 8 bit registers if a REX byte is present
704             cs.Irex |= REX;
705         if ((op & 0xFFFFFFF8) == 0xD8)
706             cs.Irex &= ~REX_W;                 // not needed for x87 ops
707         if (mask(reg) & XMMREGS &&
708             (op == LODSD || op == STOSD))
709             cs.Irex &= ~REX_W;                 // not needed for xmm ops
710     }
711     code_newreg(cs, reg);                         // OR in reg field
712     if (!I16)
713     {
714         if (reg == 6 && op == 0xFF ||             /* don't PUSH a word    */
715             op == MOVZXw || op == MOVSXw ||       /* MOVZX/MOVSX          */
716             (op & 0xFFF8) == 0xD8 ||              /* 8087 instructions    */
717             op == LEA)                            /* LEA                  */
718         {
719             cs.Iflags &= ~CFopsize;
720             if (reg == 6 && op == 0xFF)         // if PUSH
721                 cs.Irex &= ~REX_W;             // REX is ignored for PUSH anyway
722         }
723     }
724     else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT())
725         cs.Iflags |= CFwait;
726 L2:
727     getregs(cdb, desmsk);                  // save any regs we destroy
728 
729     /* KLUDGE! fix up DX for divide instructions */
730     if (op == 0xF7 && desmsk == (mAX|mDX))        /* if we need to fix DX */
731     {
732         if (reg == 7)                           /* if IDIV              */
733         {
734             cdb.gen1(0x99);                     // CWD
735             if (I64 && sz == 8)
736                 code_orrex(cdb.last(), REX_W);
737         }
738         else if (reg == 6)                      // if DIV
739             genregs(cdb, 0x33, DX, DX);        // XOR DX,DX
740     }
741 
742     // Eliminate MOV reg,reg
743     if ((cs.Iop & ~3) == 0x88 &&
744         (cs.Irm & 0xC7) == modregrm(3,0,reg & 7))
745     {
746         uint r = cs.Irm & 7;
747         if (cs.Irex & REX_B)
748             r |= 8;
749         if (r == reg)
750             cs.Iop = NOP;
751     }
752 
753     // Eliminate MOV xmmreg,xmmreg
754     if ((cs.Iop & ~(LODSD ^ STOSS)) == LODSD &&    // detect LODSD, LODSS, STOSD, STOSS
755         (cs.Irm & 0xC7) == modregrm(3,0,reg & 7))
756     {
757         reg_t r = cs.Irm & 7;
758         if (cs.Irex & REX_B)
759             r |= 8;
760         if (r == (reg - XMM0))
761             cs.Iop = NOP;
762     }
763 
764     cdb.gen(cs);
765 }
766 
767 
768 /**************************
769  * Get addressing mode.
770  */
771 
772 uint getaddrmode(regm_t idxregs)
773 {
774     uint mode;
775 
776     if (I16)
777     {
778         static ubyte error() { assert(0); }
779 
780         mode =  (idxregs & mBX) ? modregrm(2,0,7) :     /* [BX] */
781                 (idxregs & mDI) ? modregrm(2,0,5):      /* [DI] */
782                 (idxregs & mSI) ? modregrm(2,0,4):      /* [SI] */
783                                   error();
784     }
785     else
786     {
787         const reg = findreg(idxregs & (ALLREGS | mBP));
788         if (reg == R12)
789             mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4);
790         else
791             mode = modregrmx(2,0,reg);
792     }
793     return mode;
794 }
795 
796 void setaddrmode(code *c, regm_t idxregs)
797 {
798     uint mode = getaddrmode(idxregs);
799     c.Irm = mode & 0xFF;
800     c.Isib = (mode >> 8) & 0xFF;
801     c.Irex &= ~REX_B;
802     c.Irex |= mode >> 16;
803 }
804 
805 /**********************************************
806  */
807 
808 void getlvalue_msw(code *c)
809 {
810     if (c.IFL1 == FLreg)
811     {
812         const regmsw = c.IEV1.Vsym.Sregmsw;
813         c.Irm = (c.Irm & ~7) | (regmsw & 7);
814         if (regmsw & 8)
815             c.Irex |= REX_B;
816         else
817             c.Irex &= ~REX_B;
818     }
819     else
820         c.IEV1.Voffset += REGSIZE;
821 }
822 
823 /**********************************************
824  */
825 
826 void getlvalue_lsw(code *c)
827 {
828     if (c.IFL1 == FLreg)
829     {
830         const reglsw = c.IEV1.Vsym.Sreglsw;
831         c.Irm = (c.Irm & ~7) | (reglsw & 7);
832         if (reglsw & 8)
833             c.Irex |= REX_B;
834         else
835             c.Irex &= ~REX_B;
836     }
837     else
838         c.IEV1.Voffset -= REGSIZE;
839 }
840 
841 /******************
842  * Compute addressing mode.
843  * Generate & return sequence of code (if any).
844  * Return in cs the info on it.
845  * Input:
846  *      pcs ->  where to store data about addressing mode
847  *      e ->    the lvalue elem
848  *      keepmsk mask of registers we must not destroy or use
849  *              if (keepmsk & RMstore), this will be only a store operation
850  *              into the lvalue
851  *              if (keepmsk & RMload), this will be a read operation only
852  */
853 
854 void getlvalue(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk)
855 {
856     uint fl, f, opsave;
857     elem* e1, e11, e12;
858     bool e1isadd, e1free;
859     reg_t reg;
860     tym_t e1ty;
861     Symbol* s;
862 
863     //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk));
864     //elem_print(e);
865     assert(e);
866     elem_debug(e);
867     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
868     {
869         s = e.EV.Vsym;
870         fl = s.Sfl;
871         if (tyfloating(s.ty()))
872             objmod.fltused();
873     }
874     else
875         fl = FLoper;
876     pcs.IFL1 = cast(ubyte)fl;
877     pcs.Iflags = CFoff;                  /* only want offsets            */
878     pcs.Irex = 0;
879     pcs.IEV1.Voffset = 0;
880 
881     tym_t ty = e.Ety;
882     uint sz = tysize(ty);
883     if (tyfloating(ty))
884         objmod.fltused();
885     if (I64 && (sz == 8 || sz == 16) && !tyvector(ty))
886         pcs.Irex |= REX_W;
887     if (!I16 && sz == SHORTSIZE)
888         pcs.Iflags |= CFopsize;
889     if (ty & mTYvolatile)
890         pcs.Iflags |= CFvolatile;
891 
892     switch (fl)
893     {
894         case FLoper:
895             debug
896             if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk));
897 
898             switch (e.Eoper)
899             {
900                 case OPadd:                 // this way when we want to do LEA
901                     e1 = e;
902                     e1free = false;
903                     e1isadd = true;
904                     break;
905 
906                 case OPind:
907                 case OPpostinc:             // when doing (*p++ = ...)
908                 case OPpostdec:             // when doing (*p-- = ...)
909                 case OPbt:
910                 case OPbtc:
911                 case OPbtr:
912                 case OPbts:
913                 case OPvecfill:
914                     e1 = e.EV.E1;
915                     e1free = true;
916                     e1isadd = e1.Eoper == OPadd;
917                     break;
918 
919                 default:
920                     printf("function: %s\n", funcsym_p.Sident.ptr);
921                     elem_print(e);
922                     assert(0);
923             }
924             e1ty = tybasic(e1.Ety);
925             if (e1isadd)
926             {
927                 e12 = e1.EV.E2;
928                 e11 = e1.EV.E1;
929             }
930 
931             /* First see if we can replace *(e+&v) with
932              *      MOV     idxreg,e
933              *      EA =    [ES:] &v+idxreg
934              */
935             f = FLconst;
936 
937             /* Is address of `s` relative to RIP ?
938              */
939             static bool relativeToRIP(Symbol* s)
940             {
941                 if (!I64)
942                     return false;
943                 if (config.exe == EX_WIN64)
944                     return true;
945                 if (config.flags3 & CFG3pie)
946                 {
947                     if (s.Sfl == FLtlsdata || s.ty() & mTYthread)
948                     {
949                         if (s.Sclass == SCglobal || s.Sclass == SCstatic || s.Sclass == SClocstat)
950                             return false;
951                     }
952                     return true;
953                 }
954                 else
955                     return (config.flags3 & CFG3pic) != 0;
956             }
957 
958             if (e1isadd &&
959                 ((e12.Eoper == OPrelconst &&
960                   !relativeToRIP(e12.EV.Vsym) &&
961                   (f = el_fl(e12)) != FLfardata
962                  ) ||
963                  (e12.Eoper == OPconst && !I16 && !e1.Ecount && (!I64 || el_signx32(e12)))) &&
964                 e1.Ecount == e1.Ecomsub &&
965                 (!e1.Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) &&
966                 tysize(e11.Ety) == REGSIZE
967                )
968             {
969                 uint t;            /* component of r/m field */
970                 int ss;
971                 int ssi;
972 
973                 if (e12.Eoper == OPrelconst)
974                     f = el_fl(e12);
975                 /*assert(datafl[f]);*/              /* what if addr of func? */
976                 if (!I16)
977                 {   /* Any register can be an index register        */
978                     regm_t idxregs = allregs & ~keepmsk;
979                     assert(idxregs);
980 
981                     /* See if e1.EV.E1 can be a scaled index  */
982                     ss = isscaledindex(e11);
983                     if (ss)
984                     {
985                         /* Load index register with result of e11.EV.E1       */
986                         cdisscaledindex(cdb, e11, &idxregs, keepmsk);
987                         reg = findreg(idxregs);
988                         {
989                             t = stackfl[f] ? 2 : 0;
990                             pcs.Irm = modregrm(t, 0, 4);
991                             pcs.Isib = modregrm(ss, reg & 7, 5);
992                             if (reg & 8)
993                                 pcs.Irex |= REX_X;
994                         }
995                     }
996                     else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) &&
997                              !e11.Ecount &&
998                              e11.EV.E2.Eoper == OPconst &&
999                              (ssi = ssindex(e11.Eoper, e11.EV.E2.EV.Vuns)) != 0
1000                             )
1001                     {
1002                         regm_t scratchm;
1003 
1004                         char ssflags = ssindex_array[ssi].ssflags;
1005                         if (ssflags & SSFLnobp && stackfl[f])
1006                             goto L6;
1007 
1008                         // Load index register with result of e11.EV.E1
1009                         scodelem(cdb, e11.EV.E1, &idxregs, keepmsk, true);
1010                         reg = findreg(idxregs);
1011 
1012                         int ss1 = ssindex_array[ssi].ss1;
1013                         if (ssflags & SSFLlea)
1014                         {
1015                             assert(!stackfl[f]);
1016                             pcs.Irm = modregrm(2,0,4);
1017                             pcs.Isib = modregrm(ss1, reg & 7, reg & 7);
1018                             if (reg & 8)
1019                                 pcs.Irex |= REX_X | REX_B;
1020                         }
1021                         else
1022                         {
1023                             int rbase;
1024                             reg_t r;
1025 
1026                             scratchm = ALLREGS & ~keepmsk;
1027                             allocreg(cdb, &scratchm, &r, TYint);
1028 
1029                             if (ssflags & SSFLnobase1)
1030                             {
1031                                 t = 0;
1032                                 rbase = 5;
1033                             }
1034                             else
1035                             {
1036                                 t = 0;
1037                                 rbase = reg;
1038                                 if (rbase == BP || rbase == R13)
1039                                 {
1040                                     static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1];
1041 
1042                                     // IMUL r,BP,imm32
1043                                     cdb.genc2(0x69, modregxrmx(3, r, rbase), imm32[ss1]);
1044                                     goto L7;
1045                                 }
1046                             }
1047 
1048                             cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7));
1049                             if (reg & 8)
1050                                 code_orrex(cdb.last(), REX_X);
1051                             if (rbase & 8)
1052                                 code_orrex(cdb.last(), REX_B);
1053                             if (I64)
1054                                 code_orrex(cdb.last(), REX_W);
1055 
1056                             if (ssflags & SSFLnobase1)
1057                             {
1058                                 cdb.last().IFL1 = FLconst;
1059                                 cdb.last().IEV1.Vuns = 0;
1060                             }
1061                         L7:
1062                             if (ssflags & SSFLnobase)
1063                             {
1064                                 t = stackfl[f] ? 2 : 0;
1065                                 rbase = 5;
1066                             }
1067                             else
1068                             {
1069                                 t = 2;
1070                                 rbase = r;
1071                                 assert(rbase != BP);
1072                             }
1073                             pcs.Irm = modregrm(t, 0, 4);
1074                             pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7);
1075                             if (r & 8)
1076                                 pcs.Irex |= REX_X;
1077                             if (rbase & 8)
1078                                 pcs.Irex |= REX_B;
1079                         }
1080                         freenode(e11.EV.E2);
1081                         freenode(e11);
1082                     }
1083                     else
1084                     {
1085                      L6:
1086                         /* Load index register with result of e11   */
1087                         scodelem(cdb, e11, &idxregs, keepmsk, true);
1088                         setaddrmode(pcs, idxregs);
1089                         if (stackfl[f])             /* if we need [EBP] too */
1090                         {
1091                             uint idx = pcs.Irm & 7;
1092                             if (pcs.Irex & REX_B)
1093                                 pcs.Irex = (pcs.Irex & ~REX_B) | REX_X;
1094                             pcs.Isib = modregrm(0, idx, BP);
1095                             pcs.Irm = modregrm(2, 0, 4);
1096                         }
1097                     }
1098                 }
1099                 else
1100                 {
1101                     regm_t idxregs = IDXREGS & ~keepmsk;   /* only these can be index regs */
1102                     assert(idxregs);
1103                     if (stackfl[f])                 /* if stack data type   */
1104                     {
1105                         idxregs &= mSI | mDI;       /* BX can't index off stack */
1106                         if (!idxregs) goto L1;      /* index regs aren't avail */
1107                         t = 6;                      /* [BP+SI+disp]         */
1108                     }
1109                     else
1110                         t = 0;                      /* [SI + disp]          */
1111                     scodelem(cdb, e11, &idxregs, keepmsk, true); // load idx reg
1112                     pcs.Irm = cast(ubyte)(getaddrmode(idxregs) ^ t);
1113                 }
1114                 if (f == FLpara)
1115                     refparam = true;
1116                 else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast)
1117                     reflocal = true;
1118                 else if (f == FLcsdata || tybasic(e12.Ety) == TYcptr)
1119                     pcs.Iflags |= CFcs;
1120                 else
1121                     assert(f != FLreg);
1122                 pcs.IFL1 = cast(ubyte)f;
1123                 if (f != FLconst)
1124                     pcs.IEV1.Vsym = e12.EV.Vsym;
1125                 pcs.IEV1.Voffset = e12.EV.Voffset; /* += ??? */
1126 
1127                 /* If e1 is a CSE, we must generate an addressing mode      */
1128                 /* but also leave EA in registers so others can use it      */
1129                 if (e1.Ecount)
1130                 {
1131                     uint flagsave;
1132 
1133                     regm_t idxregs = IDXREGS & ~keepmsk;
1134                     allocreg(cdb, &idxregs, &reg, TYoffset);
1135 
1136                     /* If desired result is a far pointer, we'll have       */
1137                     /* to load another register with the segment of v       */
1138                     if (e1ty == TYfptr)
1139                     {
1140                         reg_t msreg;
1141 
1142                         idxregs |= mMSW & ALLREGS & ~keepmsk;
1143                         allocreg(cdb, &idxregs, &msreg, TYfptr);
1144                         msreg = findregmsw(idxregs);
1145                                                     /* MOV msreg,segreg     */
1146                         genregs(cdb, 0x8C, segfl[f], msreg);
1147                     }
1148                     opsave = pcs.Iop;
1149                     flagsave = pcs.Iflags;
1150                     ubyte rexsave = pcs.Irex;
1151                     pcs.Iop = LEA;
1152                     code_newreg(pcs, reg);
1153                     if (!I16)
1154                         pcs.Iflags &= ~CFopsize;
1155                     if (I64)
1156                         pcs.Irex |= REX_W;
1157                     cdb.gen(pcs);                 // LEA idxreg,EA
1158                     cssave(e1,idxregs,true);
1159                     if (!I16)
1160                     {
1161                         pcs.Iflags = flagsave;
1162                         pcs.Irex = rexsave;
1163                     }
1164                     if (stackfl[f] && (config.wflags & WFssneds))   // if pointer into stack
1165                         pcs.Iflags |= CFss;        // add SS: override
1166                     pcs.Iop = opsave;
1167                     pcs.IFL1 = FLoffset;
1168                     pcs.IEV1.Vuns = 0;
1169                     setaddrmode(pcs, idxregs);
1170                 }
1171                 freenode(e12);
1172                 if (e1free)
1173                     freenode(e1);
1174                 goto Lptr;
1175             }
1176 
1177             L1:
1178 
1179             /* The rest of the cases could be a far pointer */
1180 
1181             regm_t idxregs;
1182             idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs
1183             assert(idxregs);
1184             if (!I16 &&
1185                 (sz == REGSIZE || (I64 && sz == 4)) &&
1186                 keepmsk & RMstore)
1187                 idxregs |= regcon.mvar;
1188 
1189             switch (e1ty)
1190             {
1191                 case TYfptr:                        /* if far pointer       */
1192                 case TYhptr:
1193                     idxregs = (mES | IDXREGS) & ~keepmsk;   // need segment too
1194                     assert(idxregs & mES);
1195                     pcs.Iflags |= CFes;            /* ES segment override  */
1196                     break;
1197 
1198                 case TYsptr:                        /* if pointer to stack  */
1199                     if (config.wflags & WFssneds)   // if SS != DS
1200                         pcs.Iflags |= CFss;        /* then need SS: override */
1201                     break;
1202 
1203                 case TYfgPtr:
1204                     if (I32)
1205                         pcs.Iflags |= CFgs;
1206                     else if (I64)
1207                         pcs.Iflags |= CFfs;
1208                     else
1209                         assert(0);
1210                     break;
1211 
1212                 case TYcptr:                        /* if pointer to code   */
1213                     pcs.Iflags |= CFcs;            /* then need CS: override */
1214                     break;
1215 
1216                 default:
1217                     break;
1218             }
1219             pcs.IFL1 = FLoffset;
1220             pcs.IEV1.Vuns = 0;
1221 
1222             /* see if we can replace *(e+c) with
1223              *      MOV     idxreg,e
1224              *      [MOV    ES,segment]
1225              *      EA =    [ES:]c[idxreg]
1226              */
1227             if (e1isadd && e12.Eoper == OPconst &&
1228                 (!I64 || el_signx32(e12)) &&
1229                 (tysize(e12.Ety) == REGSIZE || (I64 && tysize(e12.Ety) == 4)) &&
1230                 (!e1.Ecount || !e1free)
1231                )
1232             {
1233                 int ss;
1234 
1235                 pcs.IEV1.Vuns = e12.EV.Vuns;
1236                 freenode(e12);
1237                 if (e1free) freenode(e1);
1238                 if (!I16 && e11.Eoper == OPadd && !e11.Ecount &&
1239                     tysize(e11.Ety) == REGSIZE)
1240                 {
1241                     e12 = e11.EV.E2;
1242                     e11 = e11.EV.E1;
1243                     e1 = e1.EV.E1;
1244                     e1free = true;
1245                     goto L4;
1246                 }
1247                 if (!I16 && (ss = isscaledindex(e11)) != 0)
1248                 {   // (v * scale) + const
1249                     cdisscaledindex(cdb, e11, &idxregs, keepmsk);
1250                     reg = findreg(idxregs);
1251                     pcs.Irm = modregrm(0, 0, 4);
1252                     pcs.Isib = modregrm(ss, reg & 7, 5);
1253                     if (reg & 8)
1254                         pcs.Irex |= REX_X;
1255                 }
1256                 else
1257                 {
1258                     scodelem(cdb, e11, &idxregs, keepmsk, true); // load index reg
1259                     setaddrmode(pcs, idxregs);
1260                 }
1261                 goto Lptr;
1262             }
1263 
1264             /* Look for *(v1 + v2)
1265              *      EA = [v1][v2]
1266              */
1267 
1268             if (!I16 && e1isadd && (!e1.Ecount || !e1free) &&
1269                 (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4)))
1270             {
1271             L4:
1272                 regm_t idxregs2;
1273                 uint base, index;
1274 
1275                 // Look for *(v1 + v2 << scale)
1276                 int ss = isscaledindex(e12);
1277                 if (ss)
1278                 {
1279                     scodelem(cdb, e11, &idxregs, keepmsk, true);
1280                     idxregs2 = allregs & ~(idxregs | keepmsk);
1281                     cdisscaledindex(cdb, e12, &idxregs2, keepmsk | idxregs);
1282                 }
1283 
1284                 // Look for *(v1 << scale + v2)
1285                 else if ((ss = isscaledindex(e11)) != 0)
1286                 {
1287                     idxregs2 = idxregs;
1288                     cdisscaledindex(cdb, e11, &idxregs2, keepmsk);
1289                     idxregs = allregs & ~(idxregs2 | keepmsk);
1290                     scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true);
1291                 }
1292                 // Look for *(((v1 << scale) + c1) + v2)
1293                 else if (e11.Eoper == OPadd && !e11.Ecount &&
1294                          e11.EV.E2.Eoper == OPconst &&
1295                          (ss = isscaledindex(e11.EV.E1)) != 0
1296                         )
1297                 {
1298                     pcs.IEV1.Vuns = e11.EV.E2.EV.Vuns;
1299                     idxregs2 = idxregs;
1300                     cdisscaledindex(cdb, e11.EV.E1, &idxregs2, keepmsk);
1301                     idxregs = allregs & ~(idxregs2 | keepmsk);
1302                     scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true);
1303                     freenode(e11.EV.E2);
1304                     freenode(e11);
1305                 }
1306                 else
1307                 {
1308                     scodelem(cdb, e11, &idxregs, keepmsk, true);
1309                     idxregs2 = allregs & ~(idxregs | keepmsk);
1310                     scodelem(cdb, e12, &idxregs2, keepmsk | idxregs, true);
1311                 }
1312                 base = findreg(idxregs);
1313                 index = findreg(idxregs2);
1314                 pcs.Irm  = modregrm(2, 0, 4);
1315                 pcs.Isib = modregrm(ss, index & 7, base & 7);
1316                 if (index & 8)
1317                     pcs.Irex |= REX_X;
1318                 if (base & 8)
1319                     pcs.Irex |= REX_B;
1320                 if (e1free)
1321                     freenode(e1);
1322 
1323                 goto Lptr;
1324             }
1325 
1326             /* give up and replace *e1 with
1327              *      MOV     idxreg,e
1328              *      EA =    0[idxreg]
1329              * pinholeopt() will usually correct the 0, we need it in case
1330              * we have a pointer to a long and need an offset to the second
1331              * word.
1332              */
1333 
1334             assert(e1free);
1335             scodelem(cdb, e1, &idxregs, keepmsk, true);  // load index register
1336             setaddrmode(pcs, idxregs);
1337         Lptr:
1338             if (config.flags3 & CFG3ptrchk)
1339                 cod3_ptrchk(cdb, pcs, keepmsk);        // validate pointer code
1340             break;
1341 
1342         case FLdatseg:
1343             assert(0);
1344         static if (0)
1345         {
1346             pcs.Irm = modregrm(0, 0, BPRM);
1347             pcs.IEVpointer1 = e.EVpointer;
1348             break;
1349         }
1350 
1351         case FLfltreg:
1352             reflocal = true;
1353             pcs.Irm = modregrm(2, 0, BPRM);
1354             pcs.IEV1.Vint = 0;
1355             break;
1356 
1357         case FLreg:
1358             goto L2;
1359 
1360         case FLpara:
1361             if (s.Sclass == SCshadowreg)
1362                 goto case FLfast;
1363         Lpara:
1364             refparam = true;
1365             pcs.Irm = modregrm(2, 0, BPRM);
1366             goto L2;
1367 
1368         case FLauto:
1369         case FLfast:
1370             if (regParamInPreg(s))
1371             {
1372                 regm_t pregm = s.Spregm();
1373                 /* See if the parameter is still hanging about in a register,
1374                  * and so can we load from that register instead.
1375                  */
1376                 if (regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/)
1377                 {
1378                     if (keepmsk & RMload && !anyiasm)
1379                     {
1380                         auto voffset = e.EV.Voffset;
1381                         if (sz <= REGSIZE)
1382                         {
1383                             const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg;
1384                             if (voffset >= REGSIZE)
1385                                 voffset -= REGSIZE;
1386 
1387                             /* preg could be NOREG if it's a variadic function and we're
1388                              * in Win64 shadow regs and we're offsetting to get to the start
1389                              * of the variadic args.
1390                              */
1391                             if (preg != NOREG && regcon.params & mask(preg))
1392                             {
1393                                 //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset);
1394                                 if (mask(preg) & XMMREGS && sz != REGSIZE)
1395                                 {
1396                                     /* The following fails with this from std.math on Linux64:
1397                                         void main()
1398                                         {
1399                                             alias T = float;
1400                                             T x = T.infinity;
1401                                             T e = T.infinity;
1402                                             int eptr;
1403                                             T v = frexp(x, eptr);
1404                                             assert(isIdentical(e, v));
1405                                         }
1406                                      */
1407                                 }
1408                                 else if (voffset == 0)
1409                                 {
1410                                     pcs.Irm = modregrm(3, 0, preg & 7);
1411                                     if (preg & 8)
1412                                         pcs.Irex |= REX_B;
1413                                     if (I64 && sz == 1 && preg >= 4)
1414                                         pcs.Irex |= REX;
1415                                     regcon.used |= mask(preg);
1416                                     break;
1417                                 }
1418                                 else if (voffset == 1 && sz == 1 && preg < 4)
1419                                 {
1420                                     pcs.Irm = modregrm(3, 0, 4 | preg); // use H register
1421                                     regcon.used |= mask(preg);
1422                                     break;
1423                                 }
1424                             }
1425                         }
1426                     }
1427                     else
1428                         regcon.params &= ~pregm;
1429                 }
1430             }
1431             if (s.Sclass == SCshadowreg)
1432                 goto Lpara;
1433             goto case FLbprel;
1434 
1435         case FLbprel:
1436             reflocal = true;
1437             pcs.Irm = modregrm(2, 0, BPRM);
1438             goto L2;
1439 
1440         case FLextern:
1441             if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0)
1442             {
1443                 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1444                 {
1445                     assert(0);
1446                 }
1447                 else static if (TARGET_WINDOS)
1448                 {
1449                     if (I64)
1450                     {   // GS:[88]
1451                         pcs.Irm = modregrm(0, 0, 4);
1452                         pcs.Isib = modregrm(0, 4, 5);  // don't use [RIP] addressing
1453                         pcs.IFL1 = FLconst;
1454                         pcs.IEV1.Vuns = 88;
1455                         pcs.Iflags = CFgs;
1456                         pcs.Irex |= REX_W;
1457                         break;
1458                     }
1459                     else
1460                     {
1461                         pcs.Iflags |= CFfs;    // add FS: override
1462                     }
1463                 }
1464             }
1465             if (s.ty() & mTYcs && cast(bool) LARGECODE)
1466                 goto Lfardata;
1467             goto L3;
1468 
1469         case FLdata:
1470         case FLudata:
1471         case FLcsdata:
1472         case FLgot:
1473         case FLgotoff:
1474     static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1475     {
1476         case FLtlsdata:
1477     }
1478         L3:
1479             pcs.Irm = modregrm(0, 0, BPRM);
1480         L2:
1481             if (fl == FLreg)
1482             {
1483                 //printf("test: FLreg, %s %d regcon.mvar = %s\n",
1484                 // s.Sident.ptr, cast(int)e.EV.Voffset, regm_str(regcon.mvar));
1485                 if (!(s.Sregm & regcon.mvar))
1486                     symbol_print(s);
1487                 assert(s.Sregm & regcon.mvar);
1488 
1489                 /* Attempting to paint a float as an integer or an integer as a float
1490                  * will cause serious problems since the EA is loaded separatedly from
1491                  * the opcode. The only way to deal with this is to prevent enregistering
1492                  * such variables.
1493                  */
1494                 if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) ||
1495                     !tyxmmreg(ty) && (s.Sregm & XMMREGS))
1496                     cgreg_unregister(s.Sregm);
1497 
1498                 if (
1499                     s.Sclass == SCregpar ||
1500                     s.Sclass == SCparameter)
1501                 {   refparam = true;
1502                     reflocal = true;        // kludge to set up prolog
1503                 }
1504                 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7);
1505                 if (s.Sreglsw & 8)
1506                     pcs.Irex |= REX_B;
1507                 if (e.EV.Voffset == REGSIZE && sz == REGSIZE)
1508                 {
1509                     pcs.Irm = modregrm(3, 0, s.Sregmsw & 7);
1510                     if (s.Sregmsw & 8)
1511                         pcs.Irex |= REX_B;
1512                     else
1513                         pcs.Irex &= ~REX_B;
1514                 }
1515                 else if (e.EV.Voffset == 1 && sz == 1)
1516                 {
1517                     assert(s.Sregm & BYTEREGS);
1518                     assert(s.Sreglsw < 4);
1519                     pcs.Irm |= 4;                  // use 2nd byte of register
1520                 }
1521                 else
1522                 {
1523                     assert(!e.EV.Voffset);
1524                     if (I64 && sz == 1 && s.Sreglsw >= 4)
1525                         pcs.Irex |= REX;
1526                 }
1527             }
1528             else if (s.ty() & mTYcs && !(fl == FLextern && LARGECODE))
1529             {
1530                 pcs.Iflags |= CFcs | CFoff;
1531             }
1532             if (config.flags3 & CFG3pic &&
1533                 (fl == FLtlsdata || s.ty() & mTYthread))
1534             {
1535                 if (I32)
1536                 {
1537                     if (config.flags3 & CFG3pie)
1538                     {
1539                         pcs.Iflags |= CFgs;
1540                     }
1541                 }
1542                 else if (I64)
1543                 {
1544                     if (config.flags3 & CFG3pie &&
1545                         (s.Sclass == SCglobal || s.Sclass == SCstatic || s.Sclass == SClocstat))
1546                     {
1547                         pcs.Iflags |= CFfs;
1548                         pcs.Irm = modregrm(0, 0, 4);
1549                         pcs.Isib = modregrm(0, 4, 5);  // don't use [RIP] addressing
1550                     }
1551                     else
1552                     {
1553                         pcs.Iflags |= CFopsize;
1554                         pcs.Irex = 0x48;
1555                     }
1556                 }
1557             }
1558             pcs.IEV1.Vsym = s;
1559             pcs.IEV1.Voffset = e.EV.Voffset;
1560             if (sz == 1)
1561             {   /* Don't use SI or DI for this variable     */
1562                 s.Sflags |= GTbyte;
1563                 if (I64 ? e.EV.Voffset > 0 : e.EV.Voffset > 1)
1564                 {
1565                     debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr);
1566                     s.Sflags &= ~GTregcand;
1567                 }
1568             }
1569             else if (e.EV.Voffset || sz > tysize(s.Stype.Tty))
1570             {
1571                 debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr);
1572                 s.Sflags &= ~GTregcand;
1573             }
1574 
1575             if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty))
1576             {
1577                 debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr);
1578                 // Can't successfully mix XMM register variables accessed as integers
1579                 s.Sflags &= ~GTregcand;
1580             }
1581 
1582             if (!(keepmsk & RMstore))               // if not store only
1583                 s.Sflags |= SFLread;               // assume we are doing a read
1584             break;
1585 
1586         case FLpseudo:
1587             version (MARS)
1588             {
1589                 {
1590                     getregs(cdb, mask(s.Sreglsw));
1591                     pcs.Irm = modregrm(3, 0, s.Sreglsw & 7);
1592                     if (s.Sreglsw & 8)
1593                         pcs.Irex |= REX_B;
1594                     if (e.EV.Voffset == 1 && sz == 1)
1595                     {   assert(s.Sregm & BYTEREGS);
1596                         assert(s.Sreglsw < 4);
1597                         pcs.Irm |= 4;                  // use 2nd byte of register
1598                     }
1599                     else
1600                     {   assert(!e.EV.Voffset);
1601                         if (I64 && sz == 1 && s.Sreglsw >= 4)
1602                             pcs.Irex |= REX;
1603                     }
1604                     break;
1605                 }
1606             }
1607             else
1608             {
1609                 {
1610                     uint u = s.Sreglsw;
1611                     getregs(cdb, pseudomask[u]);
1612                     pcs.Irm = modregrm(3, 0, pseudoreg[u] & 7);
1613                     break;
1614                 }
1615             }
1616 
1617         case FLfardata:
1618         case FLfunc:                                /* reading from code seg */
1619             if (config.exe & EX_flat)
1620                 goto L3;
1621         Lfardata:
1622         {
1623             regm_t regm = ALLREGS & ~keepmsk;       // need scratch register
1624             allocreg(cdb, &regm, &reg, TYint);
1625             getregs(cdb,mES);
1626             // MOV mreg,seg of symbol
1627             cdb.gencs(0xB8 + reg, 0, FLextern, s);
1628             cdb.last().Iflags = CFseg;
1629             cdb.gen2(0x8E, modregrmx(3, 0, reg));     // MOV ES,reg
1630             pcs.Iflags |= CFes | CFoff;            /* ES segment override  */
1631             goto L3;
1632         }
1633 
1634         case FLstack:
1635             assert(!I16);
1636             pcs.Irm = modregrm(2, 0, 4);
1637             pcs.Isib = modregrm(0, 4, SP);
1638             pcs.IEV1.Vsym = s;
1639             pcs.IEV1.Voffset = e.EV.Voffset;
1640             break;
1641 
1642         default:
1643             WRFL(cast(FL)fl);
1644             symbol_print(s);
1645             assert(0);
1646     }
1647 }
1648 
1649 /*****************************
1650  * Given an opcode and EA in cs, generate code
1651  * for each floating register in turn.
1652  * Input:
1653  *      tym     either TYdouble or TYfloat
1654  */
1655 
1656 void fltregs(ref CodeBuilder cdb, code* pcs, tym_t tym)
1657 {
1658     assert(!I64);
1659     tym = tybasic(tym);
1660     if (I32)
1661     {
1662         getregs(cdb,(tym == TYfloat) ? mAX : mAX | mDX);
1663         if (tym != TYfloat)
1664         {
1665             pcs.IEV1.Voffset += REGSIZE;
1666             NEWREG(pcs.Irm,DX);
1667             cdb.gen(pcs);
1668             pcs.IEV1.Voffset -= REGSIZE;
1669         }
1670         NEWREG(pcs.Irm,AX);
1671         cdb.gen(pcs);
1672     }
1673     else
1674     {
1675         getregs(cdb,(tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16);
1676         pcs.IEV1.Voffset += (tym == TYfloat) ? 2 : 6;
1677         if (tym == TYfloat)
1678             NEWREG(pcs.Irm, DX);
1679         else
1680             NEWREG(pcs.Irm, AX);
1681         cdb.gen(pcs);
1682         pcs.IEV1.Voffset -= 2;
1683         if (tym == TYfloat)
1684             NEWREG(pcs.Irm, AX);
1685         else
1686             NEWREG(pcs.Irm, BX);
1687         cdb.gen(pcs);
1688         if (tym != TYfloat)
1689         {
1690             pcs.IEV1.Voffset -= 2;
1691             NEWREG(pcs.Irm, CX);
1692             cdb.gen(pcs);
1693             pcs.IEV1.Voffset -= 2;     /* note that exit is with Voffset unaltered */
1694             NEWREG(pcs.Irm, DX);
1695             cdb.gen(pcs);
1696         }
1697     }
1698 }
1699 
1700 
1701 /*****************************
1702  * Given a result in registers, test it for true or false.
1703  * Will fail if TYfptr and the reg is ES!
1704  * If saveflag is true, preserve the contents of the
1705  * registers.
1706  */
1707 
1708 void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, uint saveflag)
1709 {
1710     reg_t scrreg;                      // scratch register
1711     regm_t scrregm;
1712 
1713     //if (!(regm & (mBP | ALLREGS)))
1714         //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n",
1715             //regm_str(regm),tym,saveflag);
1716 
1717     assert(regm & (XMMREGS | mBP | ALLREGS));
1718     tym = tybasic(tym);
1719     reg_t reg = findreg(regm);
1720     uint sz = _tysize[tym];
1721     if (sz == 1)
1722     {
1723         assert(regm & BYTEREGS);
1724         genregs(cdb, 0x84, reg, reg);        // TEST regL,regL
1725         if (I64 && reg >= 4)
1726             code_orrex(cdb.last(), REX);
1727         return;
1728     }
1729     if (regm & XMMREGS)
1730     {
1731         reg_t xreg;
1732         regm_t xregs = XMMREGS & ~regm;
1733         allocreg(cdb,&xregs, &xreg, TYdouble);
1734         opcode_t op = 0;
1735         if (tym == TYdouble || tym == TYidouble || tym == TYcdouble)
1736             op = 0x660000;
1737         cdb.gen2(op | 0x0F57, modregrm(3, xreg-XMM0, xreg-XMM0));      // XORPS xreg,xreg
1738         cdb.gen2(op | 0x0F2E, modregrm(3, xreg-XMM0, reg-XMM0));    // UCOMISS xreg,reg
1739         if (tym == TYcfloat || tym == TYcdouble)
1740         {   code *cnop = gennop(null);
1741             genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE     L1
1742             genjmp(cdb,  JP, FLcode, cast(block *) cnop); // JP      L1
1743             reg = findreg(regm & ~mask(reg));
1744             cdb.gen2(op | 0x0F2E, modregrm(3, xreg-XMM0, reg-XMM0));        // UCOMISS xreg,reg
1745             cdb.append(cnop);
1746         }
1747         return;
1748     }
1749     if (sz <= REGSIZE)
1750     {
1751         if (!I16)
1752         {
1753             if (tym == TYfloat)
1754             {
1755                 if (saveflag)
1756                 {
1757                     scrregm = allregs & ~regm;              // possible scratch regs
1758                     allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg
1759                     genmovreg(cdb, scrreg, reg);  // MOV scrreg,msreg
1760                     reg = scrreg;
1761                 }
1762                 getregs(cdb, mask(reg));
1763                 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1
1764                 return;
1765             }
1766             gentstreg(cdb,reg);                 // TEST reg,reg
1767             if (sz == SHORTSIZE)
1768                 cdb.last().Iflags |= CFopsize;             // 16 bit operands
1769             else if (sz == 8)
1770                 code_orrex(cdb.last(), REX_W);
1771         }
1772         else
1773             gentstreg(cdb, reg);                 // TEST reg,reg
1774         return;
1775     }
1776 
1777     if (saveflag || tyfv(tym))
1778     {
1779     L1:
1780         scrregm = ALLREGS & ~regm;              // possible scratch regs
1781         allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg
1782         if (I32 || sz == REGSIZE * 2)
1783         {
1784             assert(regm & mMSW && regm & mLSW);
1785 
1786             reg = findregmsw(regm);
1787             if (I32)
1788             {
1789                 if (tyfv(tym))
1790                     genregs(cdb, MOVZXw, scrreg, reg); // MOVZX scrreg,msreg
1791                 else
1792                 {
1793                     genmovreg(cdb, scrreg, reg);      // MOV scrreg,msreg
1794                     if (tym == TYdouble || tym == TYdouble_alias)
1795                         cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1
1796                 }
1797             }
1798             else
1799             {
1800                 genmovreg(cdb, scrreg, reg);  // MOV scrreg,msreg
1801                 if (tym == TYfloat)
1802                     cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1
1803             }
1804             reg = findreglsw(regm);
1805             genorreg(cdb, scrreg, reg);           // OR scrreg,lsreg
1806         }
1807         else if (sz == 8)
1808         {
1809             // !I32
1810             genmovreg(cdb, scrreg, AX);           // MOV scrreg,AX
1811             if (tym == TYdouble || tym == TYdouble_alias)
1812                 cdb.gen2(0xD1 ,modregrm(3, 4, scrreg));         // SHL scrreg,1
1813             genorreg(cdb, scrreg, BX);            // OR scrreg,BX
1814             genorreg(cdb, scrreg, CX);            // OR scrreg,CX
1815             genorreg(cdb, scrreg, DX);            // OR scrreg,DX
1816         }
1817         else
1818             assert(0);
1819     }
1820     else
1821     {
1822         if (I32 || sz == REGSIZE * 2)
1823         {
1824             // can't test ES:LSW for 0
1825             assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP));
1826 
1827             reg = findregmsw(regm);
1828             if (regcon.mvar & mask(reg))        // if register variable
1829                 goto L1;                        // don't trash it
1830             getregs(cdb, mask(reg));            // we're going to trash reg
1831             if (tyfloating(tym) && sz == 2 * _tysize[TYint])
1832                 cdb.gen2(0xD1, modregrm(3 ,4, reg));   // SHL reg,1
1833             genorreg(cdb, reg, findreglsw(regm));     // OR reg,reg+1
1834             if (I64)
1835                 code_orrex(cdb.last(), REX_W);
1836        }
1837         else if (sz == 8)
1838         {   assert(regm == DOUBLEREGS_16);
1839             getregs(cdb,mAX);                  // allocate AX
1840             if (tym == TYdouble || tym == TYdouble_alias)
1841                 cdb.gen2(0xD1, modregrm(3, 4, AX));       // SHL AX,1
1842             genorreg(cdb, AX, BX);          // OR AX,BX
1843             genorreg(cdb, AX, CX);          // OR AX,CX
1844             genorreg(cdb, AX, DX);          // OR AX,DX
1845         }
1846         else
1847             assert(0);
1848     }
1849     code_orflag(cdb.last(),CFpsw);
1850 }
1851 
1852 /******************************
1853  * Given the result of an expression is in retregs,
1854  * generate necessary code to return result in *pretregs.
1855  */
1856 
1857 void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, regm_t *pretregs)
1858 {
1859     //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs));
1860     if (*pretregs == 0) return;           // if don't want result
1861     assert(e && retregs);                 // need something to work with
1862     regm_t forccs = *pretregs & mPSW;
1863     regm_t forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS);
1864     tym_t tym = tybasic(e.Ety);
1865 
1866     if (tym == TYstruct)
1867     {
1868         if (e.Eoper == OPpair || e.Eoper == OPrpair)
1869         {
1870             if (I64)
1871                 tym = TYucent;
1872             else
1873                 tym = TYullong;
1874         }
1875         else
1876             // Hack to support cdstreq()
1877             tym = (forregs & mMSW) ? TYfptr : TYnptr;
1878     }
1879     int sz = _tysize[tym];
1880 
1881     if (sz == 1)
1882     {
1883         assert(retregs & BYTEREGS);
1884         const reg = findreg(retregs);
1885         if (e.Eoper == OPvar &&
1886             e.EV.Voffset == 1 &&
1887             e.EV.Vsym.Sfl == FLreg)
1888         {
1889             assert(reg < 4);
1890             if (forccs)
1891                 cdb.gen2(0x84, modregrm(3, reg | 4, reg | 4));   // TEST regH,regH
1892             forccs = 0;
1893         }
1894     }
1895 
1896     reg_t reg,rreg;
1897     if ((retregs & forregs) == retregs)   // if already in right registers
1898         *pretregs = retregs;
1899     else if (forregs)             // if return the result in registers
1900     {
1901         if ((forregs | retregs) & (mST01 | mST0))
1902         {
1903             fixresult87(cdb, e, retregs, pretregs);
1904             return;
1905         }
1906         uint opsflag = false;
1907         if (I16 && sz == 8)
1908         {
1909             if (forregs & mSTACK)
1910             {
1911                 assert(retregs == DOUBLEREGS_16);
1912                 // Push floating regs
1913                 cdb.gen1(0x50 + AX);
1914                 cdb.gen1(0x50 + BX);
1915                 cdb.gen1(0x50 + CX);
1916                 cdb.gen1(0x50 + DX);
1917                 stackpush += DOUBLESIZE;
1918             }
1919             else if (retregs & mSTACK)
1920             {
1921                 assert(forregs == DOUBLEREGS_16);
1922                 // Pop floating regs
1923                 getregs(cdb,forregs);
1924                 cdb.gen1(0x58 + DX);
1925                 cdb.gen1(0x58 + CX);
1926                 cdb.gen1(0x58 + BX);
1927                 cdb.gen1(0x58 + AX);
1928                 stackpush -= DOUBLESIZE;
1929                 retregs = DOUBLEREGS_16; // for tstresult() below
1930             }
1931             else
1932             {
1933                 debug
1934                 printf("retregs = %s, forregs = %s\n", regm_str(retregs), regm_str(forregs)),
1935                 assert(0);
1936             }
1937             if (!OTleaf(e.Eoper))
1938                 opsflag = true;
1939         }
1940         else
1941         {
1942             allocreg(cdb, pretregs, &rreg, tym);  // allocate return regs
1943             if (retregs & XMMREGS)
1944             {
1945                 reg = findreg(retregs & XMMREGS);
1946                 // MOVSD floatreg, XMM?
1947                 cdb.genxmmreg(xmmstore(tym), reg, 0, tym);
1948                 if (mask(rreg) & XMMREGS)
1949                     // MOVSD XMM?, floatreg
1950                     cdb.genxmmreg(xmmload(tym), rreg, 0, tym);
1951                 else
1952                 {
1953                     // MOV rreg,floatreg
1954                     cdb.genfltreg(0x8B,rreg,0);
1955                     if (sz == 8)
1956                     {
1957                         if (I32)
1958                         {
1959                             rreg = findregmsw(*pretregs);
1960                             cdb.genfltreg(0x8B, rreg,4);
1961                         }
1962                         else
1963                             code_orrex(cdb.last(),REX_W);
1964                     }
1965                 }
1966             }
1967             else if (forregs & XMMREGS)
1968             {
1969                 reg = findreg(retregs & (mBP | ALLREGS));
1970                 switch (sz)
1971                 {
1972                     case 4:
1973                         cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg
1974                         break;
1975 
1976                     case 8:
1977                         if (I32)
1978                         {
1979                             cdb.genfltreg(0x89, reg, 0);
1980                             reg = findregmsw(retregs);
1981                             cdb.genfltreg(0x89, reg, 4);
1982                             cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem
1983                         }
1984                         else
1985                         {
1986                             cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg));
1987                             code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg
1988                         }
1989                         break;
1990 
1991                     default:
1992                         assert(false);
1993                 }
1994                 checkSetVex(cdb.last(), tym);
1995             }
1996             else if (sz > REGSIZE)
1997             {
1998                 uint msreg = findregmsw(retregs);
1999                 uint lsreg = findreglsw(retregs);
2000                 uint msrreg = findregmsw(*pretregs);
2001                 uint lsrreg = findreglsw(*pretregs);
2002 
2003                 genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg
2004                 genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg
2005             }
2006             else
2007             {
2008                 assert(!(retregs & XMMREGS));
2009                 assert(!(forregs & XMMREGS));
2010                 reg = findreg(retregs & (mBP | ALLREGS));
2011                 if (I64 && sz <= 4)
2012                     genregs(cdb, 0x89, reg, rreg);  // only move 32 bits, and zero the top 32 bits
2013                 else
2014                     genmovreg(cdb, rreg, reg);    // MOV rreg,reg
2015             }
2016         }
2017         cssave(e,retregs | *pretregs,opsflag);
2018         // Commented out due to Bugzilla 8840
2019         //forregs = 0;    // don't care about result in reg cuz real result is in rreg
2020         retregs = *pretregs & ~mPSW;
2021     }
2022     if (forccs)                           // if return result in flags
2023     {
2024         if (retregs & (mST01 | mST0))
2025             fixresult87(cdb, e, retregs, pretregs);
2026         else
2027             tstresult(cdb, retregs, tym, forregs);
2028     }
2029 }
2030 
2031 /*******************************
2032  * Extra information about each CLIB runtime library function.
2033  */
2034 
2035 enum
2036 {
2037     INF32         = 1,      /// if 32 bit only
2038     INFfloat      = 2,      /// if this is floating point
2039     INFwkdone     = 4,      /// if weak extern is already done
2040     INF64         = 8,      /// if 64 bit only
2041     INFpushebx    = 0x10,   /// push EBX before load_localgot()
2042     INFpusheabcdx = 0x20,   /// pass EAX/EBX/ECX/EDX on stack, callee does ret 16
2043 }
2044 
2045 struct ClibInfo
2046 {
2047     regm_t retregs16;   /* registers that 16 bit result is returned in  */
2048     regm_t retregs32;   /* registers that 32 bit result is returned in  */
2049     ubyte pop;          // # of bytes popped off of stack upon return
2050     ubyte flags;        /// INFxxx
2051     byte push87;                        // # of pushes onto the 8087 stack
2052     byte pop87;                         // # of pops off of the 8087 stack
2053 }
2054 
2055 __gshared int clib_inited = false;          // true if initialized
2056 
2057 Symbol* symboly(const(char)* name, regm_t desregs)
2058 {
2059     Symbol *s = symbol_calloc(name);
2060     s.Stype = tsclib;
2061     s.Sclass = SCextern;
2062     s.Sfl = FLfunc;
2063     s.Ssymnum = 0;
2064     s.Sregsaved = ~desregs & (mBP | mES | ALLREGS);
2065     return s;
2066 }
2067 
2068 void getClibInfo(uint clib, Symbol** ps, ClibInfo** pinfo)
2069 {
2070     __gshared Symbol*[CLIB.MAX] clibsyms;
2071     __gshared ClibInfo[CLIB.MAX] clibinfo;
2072 
2073     if (!clib_inited)
2074     {
2075         for (size_t i = 0; i < CLIB.MAX; ++i)
2076         {
2077             Symbol* s = clibsyms[i];
2078             if (s)
2079             {
2080                 s.Sxtrnnum = 0;
2081                 s.Stypidx = 0;
2082                 clibinfo[i].flags &= ~INFwkdone;
2083             }
2084         }
2085         clib_inited = true;
2086     }
2087 
2088     const uint ex_unix = (EX_LINUX   | EX_LINUX64   |
2089                           EX_OSX     | EX_OSX64     |
2090                           EX_FREEBSD | EX_FREEBSD64 |
2091                           EX_OPENBSD | EX_OPENBSD64 |
2092                           EX_DRAGONFLYBSD64 |
2093                           EX_SOLARIS | EX_SOLARIS64);
2094 
2095     ClibInfo* cinfo = &clibinfo[clib];
2096     Symbol* s = clibsyms[clib];
2097     if (!s)
2098     {
2099 
2100         switch (clib)
2101         {
2102             case CLIB.lcmp:
2103                 {
2104                     const(char)* name = (config.exe & ex_unix) ? "__LCMP__" : "_LCMP@";
2105                     s = symboly(name, 0);
2106                 }
2107                 break;
2108 
2109             case CLIB.lmul:
2110                 {
2111                     const(char)* name = (config.exe & ex_unix) ? "__LMUL__" : "_LMUL@";
2112                     s = symboly(name, mAX|mCX|mDX);
2113                     cinfo.retregs16 = mDX|mAX;
2114                     cinfo.retregs32 = mDX|mAX;
2115                 }
2116                 break;
2117 
2118             case CLIB.ldiv:
2119                 cinfo.retregs16 = mDX|mAX;
2120                 if (config.exe & (EX_LINUX | EX_FREEBSD))
2121                 {
2122                     s = symboly("__divdi3", mAX|mBX|mCX|mDX);
2123                     cinfo.flags = INFpushebx;
2124                     cinfo.retregs32 = mDX|mAX;
2125                 }
2126                 else if (config.exe & (EX_OPENBSD | EX_SOLARIS))
2127                 {
2128                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2129                     cinfo.flags = INFpushebx;
2130                     cinfo.retregs32 = mDX|mAX;
2131                 }
2132                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2133                 {
2134                     s = symboly("_alldiv", mAX|mBX|mCX|mDX);
2135                     cinfo.flags = INFpusheabcdx;
2136                     cinfo.retregs32 = mDX|mAX;
2137                 }
2138                 else
2139                 {
2140                     const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@";
2141                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2142                     cinfo.retregs32 = mDX|mAX;
2143                 }
2144                 break;
2145 
2146             case CLIB.lmod:
2147                 cinfo.retregs16 = mCX|mBX;
2148                 if (config.exe & (EX_LINUX | EX_FREEBSD))
2149                 {
2150                     s = symboly("__moddi3", mAX|mBX|mCX|mDX);
2151                     cinfo.flags = INFpushebx;
2152                     cinfo.retregs32 = mDX|mAX;
2153                 }
2154                 else if (config.exe & (EX_OPENBSD | EX_SOLARIS))
2155                 {
2156                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2157                     cinfo.flags = INFpushebx;
2158                     cinfo.retregs32 = mCX|mBX;
2159                 }
2160                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2161                 {
2162                     s = symboly("_allrem", mAX|mBX|mCX|mDX);
2163                     cinfo.flags = INFpusheabcdx;
2164                     cinfo.retregs32 = mAX|mDX;
2165                 }
2166                 else
2167                 {
2168                     const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@";
2169                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2170                     cinfo.retregs32 = mCX|mBX;
2171                 }
2172                 break;
2173 
2174             case CLIB.uldiv:
2175                 cinfo.retregs16 = mDX|mAX;
2176                 if (config.exe & (EX_LINUX | EX_FREEBSD))
2177                 {
2178                     s = symboly("__udivdi3", mAX|mBX|mCX|mDX);
2179                     cinfo.flags = INFpushebx;
2180                     cinfo.retregs32 = mDX|mAX;
2181                 }
2182                 else if (config.exe & (EX_OPENBSD | EX_SOLARIS))
2183                 {
2184                     s = symboly("__ULDIV2__", mAX|mBX|mCX|mDX);
2185                     cinfo.flags = INFpushebx;
2186                     cinfo.retregs32 = mDX|mAX;
2187                 }
2188                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2189                 {
2190                     s = symboly("_aulldiv", mAX|mBX|mCX|mDX);
2191                     cinfo.flags = INFpusheabcdx;
2192                     cinfo.retregs32 = mDX|mAX;
2193                 }
2194                 else
2195                 {
2196                     const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@";
2197                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2198                     cinfo.retregs32 = mDX|mAX;
2199                 }
2200                 break;
2201 
2202             case CLIB.ulmod:
2203                 cinfo.retregs16 = mCX|mBX;
2204                 if (config.exe & (EX_LINUX | EX_FREEBSD))
2205                 {
2206                     s = symboly("__umoddi3", mAX|mBX|mCX|mDX);
2207                     cinfo.flags = INFpushebx;
2208                     cinfo.retregs32 = mDX|mAX;
2209                 }
2210                 else if (config.exe & (EX_OPENBSD | EX_SOLARIS))
2211                 {
2212                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2213                     cinfo.flags = INFpushebx;
2214                     cinfo.retregs32 = mCX|mBX;
2215                 }
2216                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2217                 {
2218                     s = symboly("_aullrem", mAX|mBX|mCX|mDX);
2219                     cinfo.flags = INFpusheabcdx;
2220                     cinfo.retregs32 = mAX|mDX;
2221                 }
2222                 else
2223                 {
2224                     const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@";
2225                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2226                     cinfo.retregs32 = mCX|mBX;
2227                 }
2228                 break;
2229 
2230             // This section is only for Windows and DOS (i.e. machines without the x87 FPU)
2231             case CLIB.dmul:
2232                 s = symboly("_DMUL@",mAX|mBX|mCX|mDX);
2233                 cinfo.retregs16 = DOUBLEREGS_16;
2234                 cinfo.retregs32 = DOUBLEREGS_32;
2235                 cinfo.pop = 8;
2236                 cinfo.flags = INFfloat;
2237                 cinfo.push87 = 1;
2238                 cinfo.pop87 = 1;
2239                 break;
2240 
2241             case CLIB.ddiv:
2242                 s = symboly("_DDIV@",mAX|mBX|mCX|mDX);
2243                 cinfo.retregs16 = DOUBLEREGS_16;
2244                 cinfo.retregs32 = DOUBLEREGS_32;
2245                 cinfo.pop = 8;
2246                 cinfo.flags = INFfloat;
2247                 cinfo.push87 = 1;
2248                 cinfo.pop87 = 1;
2249                 break;
2250 
2251             case CLIB.dtst0:
2252                 s = symboly("_DTST0@",0);
2253                 cinfo.flags = INFfloat;
2254                 break;
2255 
2256             case CLIB.dtst0exc:
2257                 s = symboly("_DTST0EXC@",0);
2258                 cinfo.flags = INFfloat;
2259                 break;
2260 
2261             case CLIB.dcmp:
2262                 s = symboly("_DCMP@",0);
2263                 cinfo.pop = 8;
2264                 cinfo.flags = INFfloat;
2265                 cinfo.push87 = 1;
2266                 cinfo.pop87 = 1;
2267                 break;
2268 
2269             case CLIB.dcmpexc:
2270                 s = symboly("_DCMPEXC@",0);
2271                 cinfo.pop = 8;
2272                 cinfo.flags = INFfloat;
2273                 cinfo.push87 = 1;
2274                 cinfo.pop87 = 1;
2275                 break;
2276 
2277             case CLIB.dneg:
2278                 s = symboly("_DNEG@",I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2279                 cinfo.retregs16 = DOUBLEREGS_16;
2280                 cinfo.retregs32 = DOUBLEREGS_32;
2281                 cinfo.flags = INFfloat;
2282                 break;
2283 
2284             case CLIB.dadd:
2285                 s = symboly("_DADD@",mAX|mBX|mCX|mDX);
2286                 cinfo.retregs16 = DOUBLEREGS_16;
2287                 cinfo.retregs32 = DOUBLEREGS_32;
2288                 cinfo.pop = 8;
2289                 cinfo.flags = INFfloat;
2290                 cinfo.push87 = 1;
2291                 cinfo.pop87 = 1;
2292                 break;
2293 
2294             case CLIB.dsub:
2295                 s = symboly("_DSUB@",mAX|mBX|mCX|mDX);
2296                 cinfo.retregs16 = DOUBLEREGS_16;
2297                 cinfo.retregs32 = DOUBLEREGS_32;
2298                 cinfo.pop = 8;
2299                 cinfo.flags = INFfloat;
2300                 cinfo.push87 = 1;
2301                 cinfo.pop87 = 1;
2302                 break;
2303 
2304             case CLIB.fmul:
2305                 s = symboly("_FMUL@",mAX|mBX|mCX|mDX);
2306                 cinfo.retregs16 = FLOATREGS_16;
2307                 cinfo.retregs32 = FLOATREGS_32;
2308                 cinfo.flags = INFfloat;
2309                 cinfo.push87 = 1;
2310                 cinfo.pop87 = 1;
2311                 break;
2312 
2313             case CLIB.fdiv:
2314                 s = symboly("_FDIV@",mAX|mBX|mCX|mDX);
2315                 cinfo.retregs16 = FLOATREGS_16;
2316                 cinfo.retregs32 = FLOATREGS_32;
2317                 cinfo.flags = INFfloat;
2318                 cinfo.push87 = 1;
2319                 cinfo.pop87 = 1;
2320                 break;
2321 
2322             case CLIB.ftst0:
2323                 s = symboly("_FTST0@",0);
2324                 cinfo.flags = INFfloat;
2325                 break;
2326 
2327             case CLIB.ftst0exc:
2328                 s = symboly("_FTST0EXC@",0);
2329                 cinfo.flags = INFfloat;
2330                 break;
2331 
2332             case CLIB.fcmp:
2333                 s = symboly("_FCMP@",0);
2334                 cinfo.flags = INFfloat;
2335                 cinfo.push87 = 1;
2336                 cinfo.pop87 = 1;
2337                 break;
2338 
2339             case CLIB.fcmpexc:
2340                 s = symboly("_FCMPEXC@",0);
2341                 cinfo.flags = INFfloat;
2342                 cinfo.push87 = 1;
2343                 cinfo.pop87 = 1;
2344                 break;
2345 
2346             case CLIB.fneg:
2347                 s = symboly("_FNEG@",I16 ? FLOATREGS_16 : FLOATREGS_32);
2348                 cinfo.retregs16 = FLOATREGS_16;
2349                 cinfo.retregs32 = FLOATREGS_32;
2350                 cinfo.flags = INFfloat;
2351                 break;
2352 
2353             case CLIB.fadd:
2354                 s = symboly("_FADD@",mAX|mBX|mCX|mDX);
2355                 cinfo.retregs16 = FLOATREGS_16;
2356                 cinfo.retregs32 = FLOATREGS_32;
2357                 cinfo.flags = INFfloat;
2358                 cinfo.push87 = 1;
2359                 cinfo.pop87 = 1;
2360                 break;
2361 
2362             case CLIB.fsub:
2363                 s = symboly("_FSUB@",mAX|mBX|mCX|mDX);
2364                 cinfo.retregs16 = FLOATREGS_16;
2365                 cinfo.retregs32 = FLOATREGS_32;
2366                 cinfo.flags = INFfloat;
2367                 cinfo.push87 = 1;
2368                 cinfo.pop87 = 1;
2369                 break;
2370 
2371             case CLIB.dbllng:
2372             {
2373                 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG" : "_DBLLNG@";
2374                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2375                 cinfo.retregs16 = mDX | mAX;
2376                 cinfo.retregs32 = mAX;
2377                 cinfo.flags = INFfloat;
2378                 cinfo.push87 = 1;
2379                 cinfo.pop87 = 1;
2380                 break;
2381             }
2382 
2383             case CLIB.lngdbl:
2384             {
2385                 const(char)* name = (config.exe & ex_unix) ? "__LNGDBL" : "_LNGDBL@";
2386                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2387                 cinfo.retregs16 = DOUBLEREGS_16;
2388                 cinfo.retregs32 = DOUBLEREGS_32;
2389                 cinfo.flags = INFfloat;
2390                 cinfo.push87 = 1;
2391                 cinfo.pop87 = 1;
2392                 break;
2393             }
2394 
2395             case CLIB.dblint:
2396             {
2397                 const(char)* name = (config.exe & ex_unix) ? "__DBLINT" : "_DBLINT@";
2398                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2399                 cinfo.retregs16 = mAX;
2400                 cinfo.retregs32 = mAX;
2401                 cinfo.flags = INFfloat;
2402                 cinfo.push87 = 1;
2403                 cinfo.pop87 = 1;
2404                 break;
2405             }
2406 
2407             case CLIB.intdbl:
2408             {
2409                 const(char)* name = (config.exe & ex_unix) ? "__INTDBL" : "_INTDBL@";
2410                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2411                 cinfo.retregs16 = DOUBLEREGS_16;
2412                 cinfo.retregs32 = DOUBLEREGS_32;
2413                 cinfo.flags = INFfloat;
2414                 cinfo.push87 = 1;
2415                 cinfo.pop87 = 1;
2416                 break;
2417             }
2418 
2419             case CLIB.dbluns:
2420             {
2421                 const(char)* name = (config.exe & ex_unix) ? "__DBLUNS" : "_DBLUNS@";
2422                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2423                 cinfo.retregs16 = mAX;
2424                 cinfo.retregs32 = mAX;
2425                 cinfo.flags = INFfloat;
2426                 cinfo.push87 = 1;
2427                 cinfo.pop87 = 1;
2428                 break;
2429             }
2430 
2431             case CLIB.unsdbl:
2432                 // Y(DOUBLEREGS_32,"__UNSDBL"),         // CLIB.unsdbl
2433                 // Y(DOUBLEREGS_16,"_UNSDBL@"),
2434                 // {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1},       // _UNSDBL@     unsdbl
2435             {
2436                 const(char)* name = (config.exe & ex_unix) ? "__UNSDBL" : "_UNSDBL@";
2437                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2438                 cinfo.retregs16 = DOUBLEREGS_16;
2439                 cinfo.retregs32 = DOUBLEREGS_32;
2440                 cinfo.flags = INFfloat;
2441                 cinfo.push87 = 1;
2442                 cinfo.pop87 = 1;
2443                 break;
2444             }
2445 
2446             case CLIB.dblulng:
2447             {
2448                 const(char)* name = (config.exe & ex_unix) ? "__DBLULNG" : "_DBLULNG@";
2449                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2450                 cinfo.retregs16 = mDX|mAX;
2451                 cinfo.retregs32 = mAX;
2452                 cinfo.flags = (config.exe & ex_unix) ? INFfloat | INF32 : INFfloat;
2453                 cinfo.push87 = (config.exe & ex_unix) ? 0 : 1;
2454                 cinfo.pop87 = 1;
2455                 break;
2456             }
2457 
2458             case CLIB.ulngdbl:
2459             {
2460                 const(char)* name = (config.exe & ex_unix) ? "__ULNGDBL@" : "_ULNGDBL@";
2461                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2462                 cinfo.retregs16 = DOUBLEREGS_16;
2463                 cinfo.retregs32 = DOUBLEREGS_32;
2464                 cinfo.flags = INFfloat;
2465                 cinfo.push87 = 1;
2466                 cinfo.pop87 = 1;
2467                 break;
2468             }
2469 
2470             case CLIB.dblflt:
2471             {
2472                 const(char)* name = (config.exe & ex_unix) ? "__DBLFLT" : "_DBLFLT@";
2473                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2474                 cinfo.retregs16 = FLOATREGS_16;
2475                 cinfo.retregs32 = FLOATREGS_32;
2476                 cinfo.flags = INFfloat;
2477                 cinfo.push87 = 1;
2478                 cinfo.pop87 = 1;
2479                 break;
2480             }
2481 
2482             case CLIB.fltdbl:
2483             {
2484                 const(char)* name = (config.exe & ex_unix) ? "__FLTDBL" : "_FLTDBL@";
2485                 s = symboly(name, I16 ? ALLREGS : DOUBLEREGS_32);
2486                 cinfo.retregs16 = DOUBLEREGS_16;
2487                 cinfo.retregs32 = DOUBLEREGS_32;
2488                 cinfo.flags = INFfloat;
2489                 cinfo.push87 = 1;
2490                 cinfo.pop87 = 1;
2491                 break;
2492             }
2493 
2494             case CLIB.dblllng:
2495             {
2496                 const(char)* name = (config.exe & ex_unix) ? "__DBLLLNG" : "_DBLLLNG@";
2497                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2498                 cinfo.retregs16 = DOUBLEREGS_16;
2499                 cinfo.retregs32 = mDX|mAX;
2500                 cinfo.flags = INFfloat;
2501                 cinfo.push87 = 1;
2502                 cinfo.pop87 = 1;
2503                 break;
2504             }
2505 
2506             case CLIB.llngdbl:
2507             {
2508                 const(char)* name = (config.exe & ex_unix) ? "__LLNGDBL" : "_LLNGDBL@";
2509                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2510                 cinfo.retregs16 = DOUBLEREGS_16;
2511                 cinfo.retregs32 = DOUBLEREGS_32;
2512                 cinfo.flags = INFfloat;
2513                 cinfo.push87 = 1;
2514                 cinfo.pop87 = 1;
2515                 break;
2516             }
2517 
2518             case CLIB.dblullng:
2519             {
2520                 if (config.exe == EX_WIN64)
2521                 {
2522                     s = symboly("__DBLULLNG", DOUBLEREGS_32);
2523                     cinfo.retregs32 = mAX;
2524                     cinfo.flags = INFfloat;
2525                     cinfo.push87 = 2;
2526                     cinfo.pop87 = 2;
2527                 }
2528                 else
2529                 {
2530                     const(char)* name = (config.exe & ex_unix) ? "__DBLULLNG" : "_DBLULLNG@";
2531                     s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2532                     cinfo.retregs16 = DOUBLEREGS_16;
2533                     cinfo.retregs32 = I64 ? mAX : mDX|mAX;
2534                     cinfo.flags = INFfloat;
2535                     cinfo.push87 = (config.exe & ex_unix) ? 2 : 1;
2536                     cinfo.pop87 = (config.exe & ex_unix) ? 2 : 1;
2537                 }
2538                 break;
2539             }
2540 
2541             case CLIB.ullngdbl:
2542             {
2543                 if (config.exe == EX_WIN64)
2544                 {
2545                     s = symboly("__ULLNGDBL", DOUBLEREGS_32);
2546                     cinfo.retregs32 = mAX;
2547                     cinfo.flags = INFfloat;
2548                     cinfo.push87 = 1;
2549                     cinfo.pop87 = 1;
2550                 }
2551                 else
2552                 {
2553                     const(char)* name = (config.exe & ex_unix) ? "__ULLNGDBL" : "_ULLNGDBL@";
2554                     s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2555                     cinfo.retregs16 = DOUBLEREGS_16;
2556                     cinfo.retregs32 = I64 ? mAX : DOUBLEREGS_32;
2557                     cinfo.flags = INFfloat;
2558                     cinfo.push87 = 1;
2559                     cinfo.pop87 = 1;
2560                 }
2561                 break;
2562             }
2563 
2564             case CLIB.dtst:
2565             {
2566                 const(char)* name = (config.exe & ex_unix) ? "__DTST" : "_DTST@";
2567                 s = symboly(name, 0);
2568                 cinfo.flags = INFfloat;
2569                 break;
2570             }
2571 
2572             case CLIB.vptrfptr:
2573             {
2574                 const(char)* name = (config.exe & ex_unix) ? "__HTOFPTR" : "_HTOFPTR@";
2575                 s = symboly(name, mES|mBX);
2576                 cinfo.retregs16 = mES|mBX;
2577                 cinfo.retregs32 = mES|mBX;
2578                 break;
2579             }
2580 
2581             case CLIB.cvptrfptr:
2582             {
2583                 const(char)* name = (config.exe & ex_unix) ? "__HCTOFPTR" : "_HCTOFPTR@";
2584                 s = symboly(name, mES|mBX);
2585                 cinfo.retregs16 = mES|mBX;
2586                 cinfo.retregs32 = mES|mBX;
2587                 break;
2588             }
2589 
2590             case CLIB._87topsw:
2591             {
2592                 const(char)* name = (config.exe & ex_unix) ? "__87TOPSW" : "_87TOPSW@";
2593                 s = symboly(name, 0);
2594                 cinfo.flags = INFfloat;
2595                 break;
2596             }
2597 
2598             case CLIB.fltto87:
2599             {
2600                 const(char)* name = (config.exe & ex_unix) ? "__FLTTO87" : "_FLTTO87@";
2601                 s = symboly(name, mST0);
2602                 cinfo.retregs16 = mST0;
2603                 cinfo.retregs32 = mST0;
2604                 cinfo.flags = INFfloat;
2605                 cinfo.push87 = 1;
2606                 break;
2607             }
2608 
2609             case CLIB.dblto87:
2610             {
2611                 const(char)* name = (config.exe & ex_unix) ? "__DBLTO87" : "_DBLTO87@";
2612                 s = symboly(name, mST0);
2613                 cinfo.retregs16 = mST0;
2614                 cinfo.retregs32 = mST0;
2615                 cinfo.flags = INFfloat;
2616                 cinfo.push87 = 1;
2617                 break;
2618             }
2619 
2620             case CLIB.dblint87:
2621             {
2622                 const(char)* name = (config.exe & ex_unix) ? "__DBLINT87" : "_DBLINT87@";
2623                 s = symboly(name, mST0|mAX);
2624                 cinfo.retregs16 = mAX;
2625                 cinfo.retregs32 = mAX;
2626                 cinfo.flags = INFfloat;
2627                 break;
2628             }
2629 
2630             case CLIB.dbllng87:
2631             {
2632                 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG87" : "_DBLLNG87@";
2633                 s = symboly(name, mST0|mAX|mDX);
2634                 cinfo.retregs16 = mDX|mAX;
2635                 cinfo.retregs32 = mAX;
2636                 cinfo.flags = INFfloat;
2637                 break;
2638             }
2639 
2640             case CLIB.ftst:
2641             {
2642                 const(char)* name = (config.exe & ex_unix) ? "__FTST" : "_FTST@";
2643                 s = symboly(name, 0);
2644                 cinfo.flags = INFfloat;
2645                 break;
2646             }
2647 
2648             case CLIB.fcompp:
2649             {
2650                 const(char)* name = (config.exe & ex_unix) ? "__FCOMPP" : "_FCOMPP@";
2651                 s = symboly(name, 0);
2652                 cinfo.retregs16 = mPSW;
2653                 cinfo.retregs32 = mPSW;
2654                 cinfo.flags = INFfloat;
2655                 cinfo.pop87 = 2;
2656                 break;
2657             }
2658 
2659             case CLIB.ftest:
2660             {
2661                 const(char)* name = (config.exe & ex_unix) ? "__FTEST" : "_FTEST@";
2662                 s = symboly(name, 0);
2663                 cinfo.retregs16 = mPSW;
2664                 cinfo.retregs32 = mPSW;
2665                 cinfo.flags = INFfloat;
2666                 break;
2667             }
2668 
2669             case CLIB.ftest0:
2670             {
2671                 const(char)* name = (config.exe & ex_unix) ? "__FTEST0" : "_FTEST0@";
2672                 s = symboly(name, 0);
2673                 cinfo.retregs16 = mPSW;
2674                 cinfo.retregs32 = mPSW;
2675                 cinfo.flags = INFfloat;
2676                 break;
2677             }
2678 
2679             case CLIB.fdiv87:
2680             {
2681                 const(char)* name = (config.exe & ex_unix) ? "__FDIVP" : "_FDIVP";
2682                 s = symboly(name, mST0|mAX|mBX|mCX|mDX);
2683                 cinfo.retregs16 = mST0;
2684                 cinfo.retregs32 = mST0;
2685                 cinfo.flags = INFfloat;
2686                 cinfo.push87 = 1;
2687                 cinfo.pop87 = 1;
2688                 break;
2689             }
2690 
2691             // Complex numbers
2692             case CLIB.cmul:
2693             {
2694                 s = symboly("_Cmul", mST0|mST01);
2695                 cinfo.retregs16 = mST01;
2696                 cinfo.retregs32 = mST01;
2697                 cinfo.flags = INF32|INFfloat;
2698                 cinfo.push87 = 3;
2699                 cinfo.pop87 = 5;
2700                 break;
2701             }
2702 
2703             case CLIB.cdiv:
2704             {
2705                 s = symboly("_Cdiv", mAX|mCX|mDX|mST0|mST01);
2706                 cinfo.retregs16 = mST01;
2707                 cinfo.retregs32 = mST01;
2708                 cinfo.flags = INF32|INFfloat;
2709                 cinfo.push87 = 0;
2710                 cinfo.pop87 = 2;
2711                 break;
2712             }
2713 
2714             case CLIB.ccmp:
2715             {
2716                 s = symboly("_Ccmp", mAX|mST0|mST01);
2717                 cinfo.retregs16 = mPSW;
2718                 cinfo.retregs32 = mPSW;
2719                 cinfo.flags = INF32|INFfloat;
2720                 cinfo.push87 = 0;
2721                 cinfo.pop87 = 4;
2722                 break;
2723             }
2724 
2725             case CLIB.u64_ldbl:
2726             {
2727                 const(char)* name = (config.exe & ex_unix) ? "__U64_LDBL" : "_U64_LDBL";
2728                 s = symboly(name, mST0);
2729                 cinfo.retregs16 = mST0;
2730                 cinfo.retregs32 = mST0;
2731                 cinfo.flags = INF32|INF64|INFfloat;
2732                 cinfo.push87 = 2;
2733                 cinfo.pop87 = 1;
2734                 break;
2735             }
2736 
2737             case CLIB.ld_u64:
2738             {
2739                 const(char)* name = (config.exe & ex_unix) ? (config.objfmt == OBJ_ELF ||
2740                                                              config.objfmt == OBJ_MACH ?
2741                                                                 "__LDBLULLNG" : "___LDBLULLNG")
2742                                                           : "__LDBLULLNG";
2743                 s = symboly(name, mST0|mAX|mDX);
2744                 cinfo.retregs16 = 0;
2745                 cinfo.retregs32 = mDX|mAX;
2746                 cinfo.flags = INF32|INF64|INFfloat;
2747                 cinfo.push87 = 1;
2748                 cinfo.pop87 = 2;
2749                 break;
2750             }
2751 
2752             default:
2753                 assert(0);
2754         }
2755         clibsyms[clib] = s;
2756     }
2757 
2758     *ps = s;
2759     *pinfo = cinfo;
2760 }
2761 
2762 /********************************
2763  * Generate code sequence to call C runtime library support routine.
2764  *      clib = CLIB.xxxx
2765  *      keepmask = mask of registers not to destroy. Currently can
2766  *              handle only 1. Should use a temporary rather than
2767  *              push/pop for speed.
2768  */
2769 
2770 void callclib(ref CodeBuilder cdb, elem* e, uint clib, regm_t* pretregs, regm_t keepmask)
2771 {
2772     //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask));
2773     //elem_print(e);
2774 
2775     Symbol* s;
2776     ClibInfo* cinfo;
2777     getClibInfo(clib, &s, &cinfo);
2778 
2779     if (I16)
2780         assert(!(cinfo.flags & (INF32 | INF64)));
2781     getregs(cdb,(~s.Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed
2782     keepmask &= ~s.Sregsaved;
2783     int npushed = numbitsset(keepmask);
2784     CodeBuilder cdbpop;
2785     cdbpop.ctor();
2786     gensaverestore(keepmask, cdb, cdbpop);
2787 
2788     save87regs(cdb,cinfo.push87);
2789     for (int i = 0; i < cinfo.push87; i++)
2790         push87(cdb);
2791 
2792     for (int i = 0; i < cinfo.pop87; i++)
2793         pop87();
2794 
2795     if (config.target_cpu >= TARGET_80386 && clib == CLIB.lmul && !I32)
2796     {
2797         static immutable ubyte[23] lmul =
2798         [
2799             0x66,0xc1,0xe1,0x10,        // shl  ECX,16
2800             0x8b,0xcb,                  // mov  CX,BX           ;ECX = CX,BX
2801             0x66,0xc1,0xe0,0x10,        // shl  EAX,16
2802             0x66,0x0f,0xac,0xd0,0x10,   // shrd EAX,EDX,16      ;EAX = DX,AX
2803             0x66,0xf7,0xe1,             // mul  ECX
2804             0x66,0x0f,0xa4,0xc2,0x10,   // shld EDX,EAX,16      ;DX,AX = EAX
2805         ];
2806 
2807         cdb.genasm(cast(char*)lmul.ptr, lmul.sizeof);
2808     }
2809     else
2810     {
2811         makeitextern(s);
2812         int nalign = 0;
2813         int pushebx = (cinfo.flags & INFpushebx) != 0;
2814         int pushall = (cinfo.flags & INFpusheabcdx) != 0;
2815         if (STACKALIGN >= 16)
2816         {   // Align the stack (assume no args on stack)
2817             int npush = (npushed + pushebx + 4 * pushall) * REGSIZE + stackpush;
2818             if (npush & (STACKALIGN - 1))
2819             {   nalign = STACKALIGN - (npush & (STACKALIGN - 1));
2820                 cod3_stackadj(cdb, nalign);
2821             }
2822         }
2823         if (pushebx)
2824         {
2825             if (config.exe & (EX_LINUX | EX_LINUX64 | EX_FREEBSD | EX_FREEBSD64 | EX_DRAGONFLYBSD64))
2826             {
2827                 cdb.gen1(0x50 + CX);                             // PUSH ECX
2828                 cdb.gen1(0x50 + BX);                             // PUSH EBX
2829                 cdb.gen1(0x50 + DX);                             // PUSH EDX
2830                 cdb.gen1(0x50 + AX);                             // PUSH EAX
2831                 nalign += 4 * REGSIZE;
2832             }
2833             else
2834             {
2835                 cdb.gen1(0x50 + BX);                             // PUSH EBX
2836                 nalign += REGSIZE;
2837             }
2838         }
2839         if (pushall)
2840         {
2841             cdb.gen1(0x50 + CX);                                 // PUSH ECX
2842             cdb.gen1(0x50 + BX);                                 // PUSH EBX
2843             cdb.gen1(0x50 + DX);                                 // PUSH EDX
2844             cdb.gen1(0x50 + AX);                                 // PUSH EAX
2845         }
2846         if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS))
2847         {
2848             // Note: not for OSX
2849             /* Pass EBX on the stack instead, this is because EBX is used
2850              * for shared library function calls
2851              */
2852             if (config.flags3 & CFG3pic)
2853             {
2854                 load_localgot(cdb);     // EBX gets set to this value
2855             }
2856         }
2857 
2858         cdb.gencs(LARGECODE ? 0x9A : 0xE8,0,FLfunc,s);  // CALL s
2859         if (nalign)
2860             cod3_stackadj(cdb, -nalign);
2861         calledafunc = 1;
2862 
2863         version (SCPP)
2864         {
2865             if (I16 &&                                   // bug in Optlink for weak references
2866                 config.flags3 & CFG3wkfloat &&
2867                 (cinfo.flags & (INFfloat | INFwkdone)) == INFfloat)
2868             {
2869                 cinfo.flags |= INFwkdone;
2870                 makeitextern(getRtlsym(RTLSYM_INTONLY));
2871                 objmod.wkext(s, getRtlsym(RTLSYM_INTONLY));
2872             }
2873         }
2874     }
2875     if (I16)
2876         stackpush -= cinfo.pop;
2877     regm_t retregs = I16 ? cinfo.retregs16 : cinfo.retregs32;
2878     cdb.append(cdbpop);
2879     fixresult(cdb, e, retregs, pretregs);
2880 }
2881 
2882 
2883 /*************************************************
2884  * Helper function for converting OPparam's into array of Parameters.
2885  */
2886 struct Parameter { elem* e; reg_t reg; reg_t reg2; uint numalign; }
2887 
2888 //void fillParameters(elem* e, Parameter* parameters, int* pi);
2889 
2890 void fillParameters(elem* e, Parameter* parameters, int* pi)
2891 {
2892     if (e.Eoper == OPparam)
2893     {
2894         fillParameters(e.EV.E1, parameters, pi);
2895         fillParameters(e.EV.E2, parameters, pi);
2896         freenode(e);
2897     }
2898     else
2899     {
2900         parameters[*pi].e = e;
2901         (*pi)++;
2902     }
2903 }
2904 
2905 /***********************************
2906  * tyf: type of the function
2907  */
2908 FuncParamRegs FuncParamRegs_create(tym_t tyf)
2909 {
2910     FuncParamRegs result;
2911 
2912     result.tyf = tyf;
2913 
2914     if (I16)
2915     {
2916         result.numintegerregs = 0;
2917         result.numfloatregs = 0;
2918     }
2919     else if (I32)
2920     {
2921         if (tyf == TYjfunc)
2922         {
2923             static immutable ubyte[1] reglist1 = [ AX ];
2924             result.argregs = &reglist1[0];
2925             result.numintegerregs = reglist1.length;
2926         }
2927         else if (tyf == TYmfunc)
2928         {
2929             static immutable ubyte[1] reglist2 = [ CX ];
2930             result.argregs = &reglist2[0];
2931             result.numintegerregs = reglist2.length;
2932         }
2933         else
2934             result.numintegerregs = 0;
2935         result.numfloatregs = 0;
2936     }
2937     else if (I64 && config.exe == EX_WIN64)
2938     {
2939         static immutable ubyte[4] reglist3 = [ CX,DX,R8,R9 ];
2940         result.argregs = &reglist3[0];
2941         result.numintegerregs = reglist3.length;
2942 
2943         static immutable ubyte[4] freglist3 = [ XMM0, XMM1, XMM2, XMM3 ];
2944         result.floatregs = &freglist3[0];
2945         result.numfloatregs = freglist3.length;
2946     }
2947     else if (I64)
2948     {
2949         static immutable ubyte[6] reglist4 = [ DI,SI,DX,CX,R8,R9 ];
2950         result.argregs = &reglist4[0];
2951         result.numintegerregs = reglist4.length;
2952 
2953         static immutable ubyte[8] freglist4 = [ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 ];
2954         result.floatregs = &freglist4[0];
2955         result.numfloatregs = freglist4.length;
2956     }
2957     else
2958         assert(0);
2959     return result;
2960 }
2961 
2962 /*****************************************
2963  * Allocate parameter of type t and ty to registers *preg1 and *preg2.
2964  * Params:
2965  *      t = type, valid only if ty is TYstruct or TYarray
2966  * Returns:
2967  *      false       not allocated to any register
2968  *      true        *preg1, *preg2 set to allocated register pair
2969  */
2970 
2971 //bool type_jparam2(type* t, tym_t ty);
2972 
2973 private bool type_jparam2(type* t, tym_t ty)
2974 {
2975     ty = tybasic(ty);
2976 
2977     if (tyfloating(ty))
2978         return false;
2979     else if (ty == TYstruct || ty == TYarray)
2980     {
2981         type_debug(t);
2982         targ_size_t sz = type_size(t);
2983         return (sz <= _tysize[TYnptr]) &&
2984                (config.exe == EX_WIN64 || sz == 1 || sz == 2 || sz == 4 || sz == 8);
2985     }
2986     else if (tysize(ty) <= _tysize[TYnptr])
2987         return true;
2988     return false;
2989 }
2990 
2991 int FuncParamRegs_alloc(ref FuncParamRegs fpr, type* t, tym_t ty, reg_t* preg1, reg_t* preg2)
2992 {
2993     //printf("FuncParamRegs::alloc(ty: TY%sm t: %p)\n", tystring[tybasic(ty)], t);
2994     //if (t) type_print(t);
2995 
2996     *preg1 = NOREG;
2997     *preg2 = NOREG;
2998 
2999     type* t2 = null;
3000     tym_t ty2 = TYMAX;
3001 
3002     // SROA with mixed registers
3003     if (ty & mTYxmmgpr)
3004     {
3005         ty = TYdouble;
3006         ty2 = TYllong;
3007     }
3008     else if (ty & mTYgprxmm)
3009     {
3010         ty = TYllong;
3011         ty2 = TYdouble;
3012     }
3013 
3014     // Treat array of 1 the same as its element type
3015     // (Don't put volatile parameters in registers)
3016     if (tybasic(ty) == TYarray && tybasic(t.Tty) == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile)
3017         && type_size(t.Tnext) > 1)
3018     {
3019         t = t.Tnext;
3020         ty = t.Tty;
3021     }
3022 
3023     if (tybasic(ty) == TYstruct && type_zeroSize(t, fpr.tyf))
3024         return 0;               // don't allocate into registers
3025 
3026     ++fpr.i;
3027 
3028     // If struct or array
3029     if (tyaggregate(ty))
3030     {
3031         assert(t);
3032         if (config.exe == EX_WIN64)
3033         {
3034             /* Structs occupy a general purpose register, regardless of the struct
3035              * size or the number & types of its fields.
3036              */
3037             t = null;
3038             ty = TYnptr;
3039         }
3040         else
3041         {
3042             type* targ1, targ2;
3043             if (tybasic(t.Tty) == TYstruct)
3044             {
3045                 targ1 = t.Ttag.Sstruct.Sarg1type;
3046                 targ2 = t.Ttag.Sstruct.Sarg2type;
3047             }
3048             else if (tybasic(t.Tty) == TYarray)
3049             {
3050                 if (I64)
3051                     argtypes(t, targ1, targ2);
3052             }
3053             else
3054                 assert(0);
3055 
3056             if (targ1)
3057             {
3058                 t = targ1;
3059                 ty = t.Tty;
3060                 if (targ2)
3061                 {
3062                     t2 = targ2;
3063                     ty2 = t2.Tty;
3064                 }
3065             }
3066             else if (I64 && !targ2)
3067                 return 0;
3068         }
3069     }
3070 
3071     reg_t* preg = preg1;
3072     int regcntsave = fpr.regcnt;
3073     int xmmcntsave = fpr.xmmcnt;
3074 
3075     if (config.exe == EX_WIN64)
3076     {
3077         if (tybasic(ty) == TYcfloat)
3078         {
3079             ty = TYnptr;                // treat like a struct
3080         }
3081     }
3082     else if (I64)
3083     {
3084         if ((tybasic(ty) == TYcent || tybasic(ty) == TYucent) &&
3085             fpr.numintegerregs - fpr.regcnt >= 2)
3086         {
3087             // Allocate to register pair
3088             *preg1 = fpr.argregs[fpr.regcnt];
3089             *preg2 = fpr.argregs[fpr.regcnt + 1];
3090             fpr.regcnt += 2;
3091             return 1;
3092         }
3093 
3094         if (tybasic(ty) == TYcdouble &&
3095             fpr.numfloatregs - fpr.xmmcnt >= 2)
3096         {
3097             // Allocate to register pair
3098             *preg1 = fpr.floatregs[fpr.xmmcnt];
3099             *preg2 = fpr.floatregs[fpr.xmmcnt + 1];
3100             fpr.xmmcnt += 2;
3101             return 1;
3102         }
3103 
3104         if (tybasic(ty) == TYcfloat
3105             && fpr.numfloatregs - fpr.xmmcnt >= 1)
3106         {
3107             // Allocate XMM register
3108             *preg1 = fpr.floatregs[fpr.xmmcnt++];
3109             return 1;
3110         }
3111     }
3112 
3113     foreach (j; 0 .. 2)
3114     {
3115         if (fpr.regcnt < fpr.numintegerregs)
3116         {
3117             if ((I64 || (fpr.i == 1 && (fpr.tyf == TYjfunc || fpr.tyf == TYmfunc))) &&
3118                 type_jparam2(t, ty))
3119             {
3120                 *preg = fpr.argregs[fpr.regcnt];
3121                 ++fpr.regcnt;
3122                 if (config.exe == EX_WIN64)
3123                     ++fpr.xmmcnt;
3124                 goto Lnext;
3125             }
3126         }
3127         if (fpr.xmmcnt < fpr.numfloatregs)
3128         {
3129             if (tyxmmreg(ty))
3130             {
3131                 *preg = fpr.floatregs[fpr.xmmcnt];
3132                 if (config.exe == EX_WIN64)
3133                     ++fpr.regcnt;
3134                 ++fpr.xmmcnt;
3135                 goto Lnext;
3136             }
3137         }
3138         // Failed to allocate to a register
3139         if (j == 1)
3140         {   /* Unwind first preg1 assignment, because it's both or nothing
3141              */
3142             *preg1 = NOREG;
3143             fpr.regcnt = regcntsave;
3144             fpr.xmmcnt = xmmcntsave;
3145         }
3146         return 0;
3147 
3148      Lnext:
3149         if (tybasic(ty2) == TYMAX)
3150             break;
3151         preg = preg2;
3152         t = t2;
3153         ty = ty2;
3154     }
3155     return 1;
3156 }
3157 
3158 /***************************************
3159  * Finds replacemnt types for register passing of aggregates.
3160  */
3161 void argtypes(type* t, ref type* arg1type, ref type* arg2type)
3162 {
3163     if (!t) return;
3164 
3165     tym_t ty = t.Tty;
3166 
3167     if (!tyaggregate(ty))
3168         return;
3169 
3170     arg1type = arg2type = null;
3171 
3172     if (tybasic(ty) == TYarray)
3173     {
3174         size_t sz = cast(size_t) type_size(t);
3175         if (sz == 0)
3176             return;
3177 
3178         if ((I32 || config.exe == EX_WIN64) && (sz & (sz - 1)))  // power of 2
3179             return;
3180 
3181         if (config.exe == EX_WIN64 && sz > REGSIZE)
3182             return;
3183 
3184         if (sz <= 2 * REGSIZE)
3185         {
3186             type** argtype = &arg1type;
3187             size_t argsz = sz < REGSIZE ? sz : REGSIZE;
3188             foreach (v; 0 .. (sz > REGSIZE) + 1)
3189             {
3190                 *argtype = argsz == 1 ? tstypes[TYchar]
3191                          : argsz == 2 ? tstypes[TYshort]
3192                          : argsz <= 4 ? tstypes[TYlong]
3193                          : tstypes[TYllong];
3194                 argtype = &arg2type;
3195                 argsz = sz - REGSIZE;
3196             }
3197         }
3198 
3199         if (I64 && config.exe != EX_WIN64)
3200         {
3201             type* tn = t.Tnext;
3202             tym_t tyn = tn.Tty;
3203             while (tyn == TYarray)
3204             {
3205                 tn = tn.Tnext;
3206                 assert(tn);
3207                 tyn = tybasic(tn.Tty);
3208             }
3209 
3210             if (tybasic(tyn) == TYstruct)
3211             {
3212                 if (type_size(tn) == sz) // array(s) of size 1
3213                 {
3214                     arg1type = tn.Ttag.Sstruct.Sarg1type;
3215                     arg2type = tn.Ttag.Sstruct.Sarg2type;
3216                     return;
3217                 }
3218 
3219                 type* t1 = tn.Ttag.Sstruct.Sarg1type;
3220                 if (t1)
3221                 {
3222                     tn = t1;
3223                     tyn = tn.Tty;
3224                 }
3225             }
3226 
3227             if (sz == tysize(tyn))
3228             {
3229                 if (tysimd(tyn))
3230                 {
3231                     type* ts = type_fake(tybasic(tyn));
3232                     ts.Tcount = 1;
3233                     arg1type = ts;
3234                     return;
3235                 }
3236                 else if (tybasic(tyn) == TYldouble || tybasic(tyn) == TYildouble)
3237                 {
3238                     arg1type = tstypes[tybasic(tyn)];
3239                     return;
3240                 }
3241             }
3242 
3243             if (sz <= 16)
3244             {
3245                 if (tyfloating(tyn))
3246                 {
3247                     arg1type = sz <= 4 ? tstypes[TYfloat] : tstypes[TYdouble];
3248                     if (sz > 8)
3249                         arg2type = (sz - 8) <= 4 ? tstypes[TYfloat] : tstypes[TYdouble];
3250                 }
3251             }
3252         }
3253     }
3254     else if (tybasic(ty) == TYstruct)
3255     {
3256         // TODO: Move code from `cgelem.d:elstruct()` here
3257     }
3258 }
3259 
3260 /*******************************
3261  * Generate code sequence for function call.
3262  */
3263 
3264 void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3265 {
3266     //printf("cdfunc()\n"); elem_print(e);
3267     assert(e);
3268     uint numpara = 0;               // bytes of parameters
3269     uint numalign = 0;              // bytes to align stack before pushing parameters
3270     uint stackpushsave = stackpush;            // so we can compute # of parameters
3271     cgstate.stackclean++;
3272     regm_t keepmsk = 0;
3273     int xmmcnt = 0;
3274     tym_t tyf = tybasic(e.EV.E1.Ety);        // the function type
3275 
3276     // Easier to deal with parameters as an array: parameters[0..np]
3277     int np = OTbinary(e.Eoper) ? el_nparams(e.EV.E2) : 0;
3278     Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof);
3279 
3280     if (np)
3281     {
3282         int n = 0;
3283         fillParameters(e.EV.E2, parameters, &n);
3284         assert(n == np);
3285     }
3286 
3287     Symbol *sf = null;                  // symbol of the function being called
3288     if (e.EV.E1.Eoper == OPvar)
3289         sf = e.EV.E1.EV.Vsym;
3290 
3291     /* Assume called function access statics
3292      */
3293     if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64) &&
3294         config.flags3 & CFG3pic)
3295         cgstate.accessedTLS = true;
3296 
3297     /* Special handling for call to __tls_get_addr, we must save registers
3298      * before evaluating the parameter, so that the parameter load and call
3299      * are adjacent.
3300      */
3301     if (np == 1 && sf)
3302     {
3303         if (sf == tls_get_addr_sym)
3304             getregs(cdb, ~sf.Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
3305     }
3306 
3307     uint stackalign = REGSIZE;
3308     if (tyf == TYf16func)
3309         stackalign = 2;
3310     // Figure out which parameters go in registers.
3311     // Compute numpara, the total bytes pushed on the stack
3312     FuncParamRegs fpr = FuncParamRegs_create(tyf);
3313     for (int i = np; --i >= 0;)
3314     {
3315         elem *ep = parameters[i].e;
3316         uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf));     // align on stack boundary
3317         if (config.exe == EX_WIN64)
3318         {
3319             //printf("[%d] size = %u, numpara = %d ep = %p ", i, psize, numpara, ep); WRTYxx(ep.Ety); printf("\n");
3320             debug
3321             if (psize > REGSIZE) elem_print(e);
3322 
3323             assert(psize <= REGSIZE);
3324             psize = REGSIZE;
3325         }
3326         //printf("[%d] size = %u, numpara = %d ", i, psize, numpara); WRTYxx(ep.Ety); printf("\n");
3327         if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, &parameters[i].reg, &parameters[i].reg2))
3328         {
3329             if (config.exe == EX_WIN64)
3330                 numpara += REGSIZE;             // allocate stack space for it anyway
3331             continue;   // goes in register, not stack
3332         }
3333 
3334         // Parameter i goes on the stack
3335         parameters[i].reg = NOREG;
3336         uint alignsize = el_alignsize(ep);
3337         parameters[i].numalign = 0;
3338         if (alignsize > stackalign &&
3339             (I64 || (alignsize >= 16 &&
3340                 (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety))))))
3341         {
3342             if (alignsize > STACKALIGN)
3343             {
3344                 STACKALIGN = alignsize;
3345                 enforcealign = true;
3346             }
3347             uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1);
3348             parameters[i].numalign = newnumpara - numpara;
3349             numpara = newnumpara;
3350             assert(config.exe != EX_WIN64);
3351         }
3352         numpara += psize;
3353     }
3354 
3355     if (config.exe == EX_WIN64)
3356     {
3357         if (numpara < 4 * REGSIZE)
3358             numpara = 4 * REGSIZE;
3359     }
3360 
3361     //printf("numpara = %d, stackpush = %d\n", numpara, stackpush);
3362     assert((numpara & (REGSIZE - 1)) == 0);
3363     assert((stackpush & (REGSIZE - 1)) == 0);
3364 
3365     /* Should consider reordering the order of evaluation of the parameters
3366      * so that args that go into registers are evaluated after args that get
3367      * pushed. We can reorder args that are constants or relconst's.
3368      */
3369 
3370     /* Determine if we should use cgstate.funcarg for the parameters or push them
3371      */
3372     bool usefuncarg = false;
3373     static if (0)
3374     {
3375         printf("test1 %d %d %d %d %d %d %d %d\n", (config.flags4 & CFG4speed)!=0, !Alloca.size,
3376             !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)),
3377             cast(int)numpara, !stackpush,
3378             (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos),
3379             (!typfunc(tyf) || sf && sf.Sflags & SFLexit), !I16);
3380     }
3381     if (config.flags4 & CFG4speed &&
3382         !Alloca.size &&
3383         /* The cleanup code calls a local function, leaving the return address on
3384          * the top of the stack. If parameters are placed there, the return address
3385          * is stepped on.
3386          * A better solution is turn this off only inside the cleanup code.
3387          */
3388         !usednteh &&
3389         !calledFinally &&
3390         (numpara || config.exe == EX_WIN64) &&
3391         stackpush == 0 &&               // cgstate.funcarg needs to be at top of stack
3392         (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos) &&
3393         (!(typfunc(tyf) || tyf == TYhfunc) || sf && sf.Sflags & SFLexit) &&
3394         !anyiasm && !I16
3395        )
3396     {
3397         for (int i = 0; i < np; i++)
3398         {
3399             elem* ep = parameters[i].e;
3400             int preg = parameters[i].reg;
3401             //printf("parameter[%d] = %d, np = %d\n", i, preg, np);
3402             if (preg == NOREG)
3403             {
3404                 switch (ep.Eoper)
3405                 {
3406                     case OPstrctor:
3407                     case OPstrthis:
3408                     case OPstrpar:
3409                     case OPnp_fp:
3410                         goto Lno;
3411 
3412                     default:
3413                         break;
3414                 }
3415             }
3416         }
3417 
3418         if (numpara > cgstate.funcarg.size)
3419         {   // New high water mark
3420             //printf("increasing size from %d to %d\n", (int)cgstate.funcarg.size, (int)numpara);
3421             cgstate.funcarg.size = numpara;
3422         }
3423         usefuncarg = true;
3424     }
3425   Lno:
3426 
3427     /* Adjust start of the stack so after all args are pushed,
3428      * the stack will be aligned.
3429      */
3430     if (!usefuncarg && STACKALIGN >= 16 && (numpara + stackpush) & (STACKALIGN - 1))
3431     {
3432         numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1));
3433         cod3_stackadj(cdb, numalign);
3434         cdb.genadjesp(numalign);
3435         stackpush += numalign;
3436         stackpushsave += numalign;
3437     }
3438     assert(stackpush == stackpushsave);
3439     if (config.exe == EX_WIN64)
3440     {
3441         //printf("np = %d, numpara = %d, stackpush = %d\n", np, numpara, stackpush);
3442         assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE));
3443 
3444         // Allocate stack space for four entries anyway
3445         // http://msdn.microsoft.com/en-US/library/ew5tede7(v=vs.80)
3446     }
3447 
3448     int[XMM7 + 1] regsaved = void;
3449     memset(regsaved.ptr, -1, regsaved.sizeof);
3450     CodeBuilder cdbrestore;
3451     cdbrestore.ctor();
3452     regm_t saved = 0;
3453     targ_size_t funcargtossave = cgstate.funcargtos;
3454     targ_size_t funcargtos = numpara;
3455     //printf("funcargtos1 = %d\n", cast(int)funcargtos);
3456 
3457     /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9
3458      * float and double parameters go into XMM0..XMM7
3459      * For variadic functions, count of XMM registers used goes in AL
3460      */
3461     for (int i = 0; i < np; i++)
3462     {
3463         elem* ep = parameters[i].e;
3464         int preg = parameters[i].reg;
3465         //printf("parameter[%d] = %d, np = %d\n", i, preg, np);
3466         if (preg == NOREG)
3467         {
3468             /* Push parameter on stack, but keep track of registers used
3469              * in the process. If they interfere with keepmsk, we'll have
3470              * to save/restore them.
3471              */
3472             CodeBuilder cdbsave;
3473             cdbsave.ctor();
3474             regm_t overlap = msavereg & keepmsk;
3475             msavereg |= keepmsk;
3476             CodeBuilder cdbparams;
3477             cdbparams.ctor();
3478             if (usefuncarg)
3479                 movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf);
3480             else
3481                 pushParams(cdbparams,ep,stackalign, tyf);
3482             regm_t tosave = keepmsk & ~msavereg;
3483             msavereg &= ~keepmsk | overlap;
3484 
3485             // tosave is the mask to save and restore
3486             for (reg_t j = 0; tosave; j++)
3487             {
3488                 regm_t mi = mask(j);
3489                 assert(j <= XMM7);
3490                 if (mi & tosave)
3491                 {
3492                     uint idx;
3493                     regsave.save(cdbsave, j, &idx);
3494                     regsave.restore(cdbrestore, j, idx);
3495                     saved |= mi;
3496                     keepmsk &= ~mi;             // don't need to keep these for rest of params
3497                     tosave &= ~mi;
3498                 }
3499             }
3500 
3501             cdb.append(cdbsave);
3502             cdb.append(cdbparams);
3503 
3504             // Alignment for parameter comes after it got pushed
3505             const uint numalignx = parameters[i].numalign;
3506             if (usefuncarg)
3507             {
3508                 funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx;
3509                 cgstate.funcargtos = funcargtos;
3510             }
3511             else if (numalignx)
3512             {
3513                 cod3_stackadj(cdb, numalignx);
3514                 cdb.genadjesp(numalignx);
3515                 stackpush += numalignx;
3516             }
3517         }
3518         else
3519         {
3520             // Goes in register preg, not stack
3521             regm_t retregs = mask(preg);
3522             if (retregs & XMMREGS)
3523                 ++xmmcnt;
3524             int preg2 = parameters[i].reg2;
3525             reg_t mreg,lreg;
3526             if (preg2 != NOREG || tybasic(ep.Ety) == TYcfloat)
3527             {
3528                 assert(ep.Eoper != OPstrthis);
3529                 if (mask(preg2) & XMMREGS)
3530                     ++xmmcnt;
3531                 if (tybasic(ep.Ety) == TYcfloat)
3532                 {
3533                     lreg = ST01;
3534                     mreg = NOREG;
3535                 }
3536                 else if (tyrelax(ep.Ety) == TYcent)
3537                 {
3538                     lreg = mask(preg ) & mLSW ? cast(reg_t)preg  : AX;
3539                     mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX;
3540                 }
3541                 else
3542                 {
3543                     lreg = XMM0;
3544                     mreg = XMM1;
3545                 }
3546                 retregs = (mask(mreg) | mask(lreg)) & ~mask(NOREG);
3547                 CodeBuilder cdbsave;
3548                 cdbsave.ctor();
3549                 if (keepmsk & retregs)
3550                 {
3551                     regm_t tosave = keepmsk & retregs;
3552 
3553                     // tosave is the mask to save and restore
3554                     for (reg_t j = 0; tosave; j++)
3555                     {
3556                         regm_t mi = mask(j);
3557                         assert(j <= XMM7);
3558                         if (mi & tosave)
3559                         {
3560                             uint idx;
3561                             regsave.save(cdbsave, j, &idx);
3562                             regsave.restore(cdbrestore, j, idx);
3563                             saved |= mi;
3564                             keepmsk &= ~mi;             // don't need to keep these for rest of params
3565                             tosave &= ~mi;
3566                         }
3567                     }
3568                 }
3569                 cdb.append(cdbsave);
3570 
3571                 scodelem(cdb, ep, &retregs, keepmsk, false);
3572 
3573                 // Move result [mreg,lreg] into parameter registers from [preg2,preg]
3574                 retregs = 0;
3575                 if (preg != lreg)
3576                     retregs |= mask(preg);
3577                 if (preg2 != mreg)
3578                     retregs |= mask(preg2);
3579                 retregs &= ~mask(NOREG);
3580                 getregs(cdb,retregs);
3581 
3582                 tym_t ty1 = tybasic(ep.Ety);
3583                 tym_t ty2 = ty1;
3584                 if (ep.Ety & mTYgprxmm)
3585                 {
3586                     ty1 = TYllong;
3587                     ty2 = TYdouble;
3588                 }
3589                 else if (ep.Ety & mTYxmmgpr)
3590                 {
3591                     ty1 = TYdouble;
3592                     ty2 = TYllong;
3593                 }
3594                 else if (ty1 == TYstruct)
3595                 {
3596                     type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type;
3597                     type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type;
3598                     if (targ1)
3599                         ty1 = targ1.Tty;
3600                     if (targ2)
3601                         ty2 = targ2.Tty;
3602                 }
3603                 else if (tyrelax(ty1) == TYcent)
3604                     ty1 = ty2 = TYllong;
3605                 else if (tybasic(ty1) == TYcdouble)
3606                     ty1 = ty2 = TYdouble;
3607 
3608                 if (tybasic(ep.Ety) == TYcfloat)
3609                 {
3610                     assert(I64);
3611                     assert(lreg == ST01 && mreg == NOREG);
3612                     // spill
3613                     pop87();
3614                     pop87();
3615                     cdb.genfltreg(0xD9, 3, tysize(TYfloat));
3616                     genfwait(cdb);
3617                     cdb.genfltreg(0xD9, 3, 0);
3618                     genfwait(cdb);
3619                     // reload
3620                     if (config.exe == EX_WIN64)
3621                     {
3622                         cdb.genfltreg(LOD, preg, 0);
3623                         code_orrex(cdb.last(), REX_W);
3624                     }
3625                     else
3626                     {
3627                         assert(mask(preg) & XMMREGS);
3628                         cdb.genxmmreg(xmmload(TYdouble), cast(reg_t) preg, 0, TYdouble);
3629                     }
3630                 }
3631                 else foreach (v; 0 .. 2)
3632                 {
3633                     if (v ^ (preg != mreg))
3634                         genmovreg(cdb, preg, lreg, ty1);
3635                     else
3636                         genmovreg(cdb, preg2, mreg, ty2);
3637                 }
3638 
3639                 retregs = (mask(preg) | mask(preg2)) & ~mask(NOREG);
3640             }
3641             else if (ep.Eoper == OPstrthis)
3642             {
3643                 getregs(cdb,retregs);
3644                 // LEA preg,np[RSP]
3645                 uint delta = stackpush - ep.EV.Vuns;   // stack delta to parameter
3646                 cdb.genc1(LEA,
3647                         (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta);
3648                 if (I64)
3649                     code_orrex(cdb.last(), REX_W);
3650             }
3651             else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0)
3652             {
3653                 retregs = 0;
3654                 scodelem(cdb, ep.EV.E1, &retregs, keepmsk, false);
3655                 freenode(ep);
3656             }
3657             else
3658             {
3659                 scodelem(cdb, ep, &retregs, keepmsk, false);
3660             }
3661             keepmsk |= retregs;      // don't change preg when evaluating func address
3662         }
3663     }
3664 
3665     if (config.exe == EX_WIN64)
3666     {   // Allocate stack space for four entries anyway
3667         // http://msdn.microsoft.com/en-US/library/ew5tede7(v=vs.80)
3668         {   uint sz = 4 * REGSIZE;
3669             if (usefuncarg)
3670             {
3671                 funcargtos -= sz;
3672                 cgstate.funcargtos = funcargtos;
3673             }
3674             else
3675             {
3676                 cod3_stackadj(cdb, sz);
3677                 cdb.genadjesp(sz);
3678                 stackpush += sz;
3679             }
3680         }
3681 
3682         /* Variadic functions store XMM parameters into their corresponding GP registers
3683          */
3684         for (int i = 0; i < np; i++)
3685         {
3686             int preg = parameters[i].reg;
3687             regm_t retregs = mask(preg);
3688             if (retregs & XMMREGS)
3689             {
3690                 reg_t reg;
3691                 switch (preg)
3692                 {
3693                     case XMM0: reg = CX; break;
3694                     case XMM1: reg = DX; break;
3695                     case XMM2: reg = R8; break;
3696                     case XMM3: reg = R9; break;
3697 
3698                     default:   assert(0);
3699                 }
3700                 getregs(cdb,mask(reg));
3701                 cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg
3702             }
3703         }
3704     }
3705 
3706     // Restore any register parameters we saved
3707     getregs(cdb,saved);
3708     cdb.append(cdbrestore);
3709     keepmsk |= saved;
3710 
3711     // Variadic functions store the number of XMM registers used in AL
3712     if (I64 && config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic)
3713     {
3714         getregs(cdb,mAX);
3715         movregconst(cdb,AX,xmmcnt,1);
3716         keepmsk |= mAX;
3717     }
3718 
3719     //printf("funcargtos2 = %d\n", (int)funcargtos);
3720     assert(!usefuncarg || (funcargtos == 0 && cgstate.funcargtos == 0));
3721     cgstate.stackclean--;
3722 
3723     debug
3724     if (!usefuncarg && numpara != stackpush - stackpushsave)
3725     {
3726         printf("function %s\n", funcsym_p.Sident.ptr);
3727         printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave);
3728         elem_print(e);
3729     }
3730 
3731     assert(usefuncarg || numpara == stackpush - stackpushsave);
3732 
3733     funccall(cdb,e,numpara,numalign,pretregs,keepmsk,usefuncarg);
3734     cgstate.funcargtos = funcargtossave;
3735 }
3736 
3737 /***********************************
3738  */
3739 
3740 void cdstrthis(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3741 {
3742     assert(tysize(e.Ety) == REGSIZE);
3743     const reg = findreg(*pretregs & allregs);
3744     getregs(cdb,mask(reg));
3745     // LEA reg,np[ESP]
3746     uint np = stackpush - e.EV.Vuns;        // stack delta to parameter
3747     cdb.genc1(LEA,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np);
3748     if (I64)
3749         code_orrex(cdb.last(), REX_W);
3750     fixresult(cdb, e, mask(reg), pretregs);
3751 }
3752 
3753 /******************************
3754  * Call function. All parameters have already been pushed onto the stack.
3755  * Params:
3756  *      e          = function call
3757  *      numpara    = size in bytes of all the parameters
3758  *      numalign   = amount the stack was aligned by before the parameters were pushed
3759  *      pretregs   = where return value goes
3760  *      keepmsk    = registers to not change when evaluating the function address
3761  *      usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return
3762  */
3763 
3764 private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign,
3765                       regm_t* pretregs,regm_t keepmsk, bool usefuncarg)
3766 {
3767     //printf("%s ", funcsym_p.Sident.ptr);
3768     //printf("funccall(e = %p, *pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(*pretregs),numpara,numalign,usefuncarg);
3769     calledafunc = 1;
3770     // Determine if we need frame for function prolog/epilog
3771 
3772     static if (TARGET_WINDOS)
3773     {
3774         if (config.memmodel == Vmodel)
3775         {
3776             if (tyfarfunc(funcsym_p.ty()))
3777                 needframe = true;
3778         }
3779     }
3780 
3781     code cs;
3782     regm_t retregs;
3783     Symbol* s;
3784 
3785     elem* e1 = e.EV.E1;
3786     tym_t tym1 = tybasic(e1.Ety);
3787     char farfunc = tyfarfunc(tym1) || tym1 == TYifunc;
3788 
3789     CodeBuilder cdbe;
3790     cdbe.ctor();
3791 
3792     if (e1.Eoper == OPvar)
3793     {   // Call function directly
3794 
3795         if (!tyfunc(tym1))
3796             WRTYxx(tym1);
3797         assert(tyfunc(tym1));
3798         s = e1.EV.Vsym;
3799         if (s.Sflags & SFLexit)
3800         { }
3801         else if (s != tls_get_addr_sym)
3802             save87(cdb);               // assume 8087 regs are all trashed
3803 
3804         // Function calls may throw Errors, unless marked that they don't
3805         if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow))
3806             funcsym_p.Sfunc.Fflags3 &= ~Fnothrow;
3807 
3808         if (s.Sflags & SFLexit)
3809         {
3810             // Function doesn't return, so don't worry about registers
3811             // it may use
3812         }
3813         else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized))
3814             // so we can replace func at runtime
3815             getregs(cdbe,~fregsaved & (mBP | ALLREGS | mES | XMMREGS));
3816         else
3817             getregs(cdbe,~s.Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
3818         if (strcmp(s.Sident.ptr, "alloca") == 0)
3819         {
3820             s = getRtlsym(RTLSYM_ALLOCA);
3821             makeitextern(s);
3822             int areg = CX;
3823             if (config.exe == EX_WIN64)
3824                 areg = DX;
3825             getregs(cdbe, mask(areg));
3826             cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0);  // LEA areg,&localsize[BP]
3827             if (I64)
3828                 code_orrex(cdbe.last(), REX_W);
3829             Alloca.size = REGSIZE;
3830         }
3831         if (sytab[s.Sclass] & SCSS)    // if function is on stack (!)
3832         {
3833             retregs = allregs & ~keepmsk;
3834             s.Sflags &= ~GTregcand;
3835             s.Sflags |= SFLread;
3836             cdrelconst(cdbe,e1,&retregs);
3837             if (farfunc)
3838             {
3839                 const reg = findregmsw(retregs);
3840                 const lsreg = findreglsw(retregs);
3841                 floatreg = true;                // use float register
3842                 reflocal = true;
3843                 cdbe.genc1(0x89,                 // MOV floatreg+2,reg
3844                         modregrm(2, reg, BPRM), FLfltreg, REGSIZE);
3845                 cdbe.genc1(0x89,                 // MOV floatreg,lsreg
3846                         modregrm(2, lsreg, BPRM), FLfltreg, 0);
3847                 if (tym1 == TYifunc)
3848                     cdbe.gen1(0x9C);             // PUSHF
3849                 cdbe.genc1(0xFF,                 // CALL [floatreg]
3850                         modregrm(2, 3, BPRM), FLfltreg, 0);
3851             }
3852             else
3853             {
3854                 const reg = findreg(retregs);
3855                 cdbe.gen2(0xFF, modregrmx(3, 2, reg));   // CALL reg
3856                 if (I64)
3857                     code_orrex(cdbe.last(), REX_W);
3858             }
3859         }
3860         else
3861         {
3862             int fl = FLfunc;
3863             if (!tyfunc(s.ty()))
3864                 fl = el_fl(e1);
3865             if (tym1 == TYifunc)
3866                 cdbe.gen1(0x9C);                             // PUSHF
3867             static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
3868             {
3869                 assert(!farfunc);
3870                 if (s != tls_get_addr_sym)
3871                 {
3872                     //printf("call %s\n", s.Sident.ptr);
3873                     load_localgot(cdb);
3874                     cdbe.gencs(0xE8, 0, fl, s);    // CALL extern
3875                 }
3876                 else if (I64)
3877                 {
3878                     /* Prepend 66 66 48 so GNU linker has patch room
3879                      */
3880                     assert(!farfunc);
3881                     cdbe.gen1(0x66);
3882                     cdbe.gen1(0x66);
3883                     cdbe.gencs(0xE8, 0, fl, s);      // CALL extern
3884                     cdbe.last().Irex = REX | REX_W;
3885                 }
3886                 else
3887                     cdbe.gencs(0xE8, 0, fl, s);    // CALL extern
3888             }
3889             else
3890             {
3891                 cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s);    // CALL extern
3892             }
3893             code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff));
3894         }
3895     }
3896     else
3897     {   // Call function via pointer
3898 
3899         // Function calls may throw Errors
3900         funcsym_p.Sfunc.Fflags3 &= ~Fnothrow;
3901 
3902         if (e1.Eoper != OPind) { WRFL(cast(FL)el_fl(e1)); WROP(e1.Eoper); }
3903         save87(cdb);                   // assume 8087 regs are all trashed
3904         assert(e1.Eoper == OPind);
3905         elem *e11 = e1.EV.E1;
3906         tym_t e11ty = tybasic(e11.Ety);
3907         assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr)));
3908         load_localgot(cdb);
3909         static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
3910         {
3911             if (config.flags3 & CFG3pic && I32)
3912                 keepmsk |= mBX;
3913         }
3914 
3915         /* Mask of registers destroyed by the function call
3916          */
3917         regm_t desmsk = (mBP | ALLREGS | mES | XMMREGS) & ~fregsaved;
3918 
3919         // if we can't use loadea()
3920         if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) &&
3921             (e11.Eoper != OPind || e11.Ecount))
3922         {
3923             retregs = allregs & ~keepmsk;
3924             cgstate.stackclean++;
3925             scodelem(cdbe,e11,&retregs,keepmsk,true);
3926             cgstate.stackclean--;
3927             // Kill registers destroyed by an arbitrary function call
3928             getregs(cdbe,desmsk);
3929             if (e11ty == TYfptr)
3930             {
3931                 const reg = findregmsw(retregs);
3932                 const lsreg = findreglsw(retregs);
3933                 floatreg = true;                // use float register
3934                 reflocal = true;
3935                 cdbe.genc1(0x89,                 // MOV floatreg+2,reg
3936                         modregrm(2, reg, BPRM), FLfltreg, REGSIZE);
3937                 cdbe.genc1(0x89,                 // MOV floatreg,lsreg
3938                         modregrm(2, lsreg, BPRM), FLfltreg, 0);
3939                 if (tym1 == TYifunc)
3940                     cdbe.gen1(0x9C);             // PUSHF
3941                 cdbe.genc1(0xFF,                 // CALL [floatreg]
3942                         modregrm(2, 3, BPRM), FLfltreg, 0);
3943             }
3944             else
3945             {
3946                 const reg = findreg(retregs);
3947                 cdbe.gen2(0xFF, modregrmx(3, 2, reg));   // CALL reg
3948                 if (I64)
3949                     code_orrex(cdbe.last(), REX_W);
3950             }
3951         }
3952         else
3953         {
3954             if (tym1 == TYifunc)
3955                 cdb.gen1(0x9C);                 // PUSHF
3956                                                 // CALL [function]
3957             cs.Iflags = 0;
3958             cgstate.stackclean++;
3959             loadea(cdbe, e11, &cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk);
3960             cgstate.stackclean--;
3961             freenode(e11);
3962         }
3963         s = null;
3964     }
3965     cdb.append(cdbe);
3966     freenode(e1);
3967 
3968     /* See if we will need the frame pointer.
3969        Calculate it here so we can possibly use BP to fix the stack.
3970      */
3971 static if (0)
3972 {
3973     if (!needframe)
3974     {
3975         // If there is a register available for this basic block
3976         if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used))
3977         { }
3978         else
3979         {
3980             for (SYMIDX si = 0; si < globsym.length; si++)
3981             {
3982                 Symbol* s = globsym[si];
3983 
3984                 if (s.Sflags & GTregcand && type_size(s.Stype) != 0)
3985                 {
3986                     if (config.flags4 & CFG4optimized)
3987                     {   // If symbol is live in this basic block and
3988                         // isn't already in a register
3989                         if (s.Srange && vec_testbit(dfoidx, s.Srange) &&
3990                             s.Sfl != FLreg)
3991                         {   // Then symbol must be allocated on stack
3992                             needframe = true;
3993                             break;
3994                         }
3995                     }
3996                     else
3997                     {   if (mfuncreg == 0)      // if no registers left
3998                         {   needframe = true;
3999                             break;
4000                         }
4001                     }
4002                 }
4003             }
4004         }
4005     }
4006 }
4007 
4008     reg_t reg1 = NOREG, reg2 = NOREG;
4009 
4010     if (config.exe == EX_WIN64) // Win64 is currently broken
4011         retregs = regmask(e.Ety, tym1);
4012     else
4013         retregs = allocretregs(e.Ety, e.ET, tym1, &reg1, &reg2);
4014 
4015     assert(retregs || !*pretregs);
4016 
4017     if (!usefuncarg)
4018     {
4019         // If stack needs cleanup
4020         if  (s && s.Sflags & SFLexit)
4021         {
4022             if (config.fulltypes && TARGET_WINDOS)
4023             {
4024                 // the stack walker evaluates the return address, not a byte of the
4025                 // call instruction, so ensure there is an instruction byte after
4026                 // the call that still has the same line number information
4027                 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3);
4028             }
4029             /* Function never returns, so don't need to generate stack
4030              * cleanup code. But still need to log the stack cleanup
4031              * as if it did return.
4032              */
4033             cdb.genadjesp(-(numpara + numalign));
4034             stackpush -= numpara + numalign;
4035         }
4036         else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) &&
4037             (!typfunc(tym1) || config.exe == EX_WIN64))
4038         {
4039             if (tym1 == TYhfunc)
4040             {   // Hidden parameter is popped off by the callee
4041                 cdb.genadjesp(-REGSIZE);
4042                 stackpush -= REGSIZE;
4043                 if (numpara + numalign > REGSIZE)
4044                     genstackclean(cdb, numpara + numalign - REGSIZE, retregs);
4045             }
4046             else
4047                 genstackclean(cdb, numpara + numalign, retregs);
4048         }
4049         else
4050         {
4051             cdb.genadjesp(-numpara);  // popped off by the callee's 'RET numpara'
4052             stackpush -= numpara;
4053             if (numalign)               // callee doesn't know about alignment adjustment
4054                 genstackclean(cdb,numalign,retregs);
4055         }
4056     }
4057 
4058     /* Special handling for functions which return a floating point
4059        value in the top of the 8087 stack.
4060      */
4061 
4062     if (retregs & mST0)
4063     {
4064         cdb.genadjfpu(1);
4065         if (*pretregs)                  // if we want the result
4066         {
4067             //assert(global87.stackused == 0);
4068             push87(cdb);                // one item on 8087 stack
4069             fixresult87(cdb,e,retregs,pretregs);
4070             return;
4071         }
4072         else
4073             // Pop unused result off 8087 stack
4074             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4075     }
4076     else if (retregs & mST01)
4077     {
4078         cdb.genadjfpu(2);
4079         if (*pretregs)                  // if we want the result
4080         {
4081             assert(global87.stackused == 0);
4082             push87(cdb);
4083             push87(cdb);                // two items on 8087 stack
4084             fixresult_complex87(cdb, e, retregs, pretregs);
4085             return;
4086         }
4087         else
4088         {
4089             // Pop unused result off 8087 stack
4090             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4091             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4092         }
4093     }
4094 
4095     /* Special handling for functions that return one part
4096        in XMM0 and the other part in AX
4097      */
4098     if (*pretregs && retregs)
4099     {
4100         if (reg1 == NOREG || reg2 == NOREG)
4101         {}
4102         else if ((0 == (mask(reg1) & XMMREGS)) ^ (0 == (mask(reg2) & XMMREGS)))
4103         {
4104             reg_t lreg, mreg;
4105             if (mask(reg1) & XMMREGS)
4106             {
4107                 lreg = XMM0;
4108                 mreg = XMM1;
4109             }
4110             else
4111             {
4112                 lreg = mask(reg1) & mLSW ? reg1 : AX;
4113                 mreg = mask(reg2) & mMSW ? reg2 : DX;
4114             }
4115             for (int v = 0; v < 2; v++)
4116             {
4117                 if (v ^ (reg2 != lreg))
4118                     genmovreg(cdb,lreg,reg1);
4119                 else
4120                     genmovreg(cdb,mreg,reg2);
4121             }
4122             retregs = mask(lreg) | mask(mreg);
4123         }
4124     }
4125 
4126     /* Special handling for functions which return complex float in XMM0 or RAX. */
4127 
4128     if (I64
4129         && config.exe != EX_WIN64 // broken
4130         && *pretregs && tybasic(e.Ety) == TYcfloat)
4131     {
4132         assert(reg2 == NOREG);
4133         // spill
4134         if (config.exe == EX_WIN64)
4135         {
4136             assert(reg1 == AX);
4137             cdb.genfltreg(STO, reg1, 0);
4138             code_orrex(cdb.last(), REX_W);
4139         }
4140         else
4141         {
4142             assert(reg1 == XMM0);
4143             cdb.genxmmreg(xmmstore(TYdouble), reg1, 0, TYdouble);
4144         }
4145         // reload real
4146         push87(cdb);
4147         cdb.genfltreg(0xD9, 0, 0);
4148         genfwait(cdb);
4149         // reload imaginary
4150         push87(cdb);
4151         cdb.genfltreg(0xD9, 0, tysize(TYfloat));
4152         genfwait(cdb);
4153 
4154         retregs = mST01;
4155     }
4156 
4157     fixresult(cdb, e, retregs, pretregs);
4158 }
4159 
4160 /***************************
4161  * Determine size of argument e that will be pushed.
4162  */
4163 
4164 targ_size_t paramsize(elem* e, tym_t tyf)
4165 {
4166     assert(e.Eoper != OPparam);
4167     targ_size_t szb;
4168     tym_t tym = tybasic(e.Ety);
4169     if (tyscalar(tym))
4170         szb = size(tym);
4171     else if (tym == TYstruct || tym == TYarray)
4172         szb = type_parameterSize(e.ET, tyf);
4173     else
4174     {
4175         WRTYxx(tym);
4176         assert(0);
4177     }
4178     return szb;
4179 }
4180 
4181 /***************************
4182  * Generate code to move argument e on the stack.
4183  */
4184 
4185 private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf)
4186 {
4187     //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos);
4188     //printf("movParams()\n"); elem_print(e);
4189     assert(!I16);
4190     assert(e && e.Eoper != OPparam);
4191 
4192     tym_t tym = tybasic(e.Ety);
4193     if (tyfloating(tym))
4194         objmod.fltused();
4195 
4196     int grex = I64 ? REX_W << 16 : 0;
4197 
4198     targ_size_t szb = paramsize(e, tyf);          // size before alignment
4199     targ_size_t sz = _align(stackalign, szb);       // size after alignment
4200     assert((sz & (stackalign - 1)) == 0);         // ensure that alignment worked
4201     assert((sz & (REGSIZE - 1)) == 0);
4202     //printf("szb = %d sz = %d\n", (int)szb, (int)sz);
4203 
4204     code cs;
4205     cs.Iflags = 0;
4206     cs.Irex = 0;
4207     switch (e.Eoper)
4208     {
4209         case OPstrctor:
4210         case OPstrthis:
4211         case OPstrpar:
4212         case OPnp_fp:
4213             assert(0);
4214 
4215         case OPrelconst:
4216         {
4217             int fl;
4218             if (!evalinregister(e) &&
4219                 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) &&
4220                 ((fl = el_fl(e)) == FLdata || fl == FLudata || fl == FLextern)
4221                )
4222             {
4223                 // MOV -stackoffset[EBP],&variable
4224                 cs.Iop = 0xC7;
4225                 cs.Irm = modregrm(2,0,BPRM);
4226                 if (I64 && sz == 8)
4227                     cs.Irex |= REX_W;
4228                 cs.IFL1 = FLfuncarg;
4229                 cs.IEV1.Voffset = funcargtos - REGSIZE;
4230                 cs.IEV2.Voffset = e.EV.Voffset;
4231                 cs.IFL2 = cast(ubyte)fl;
4232                 cs.IEV2.Vsym = e.EV.Vsym;
4233                 cs.Iflags |= CFoff;
4234                 cdb.gen(&cs);
4235                 return;
4236             }
4237             break;
4238         }
4239 
4240         case OPconst:
4241             if (!evalinregister(e))
4242             {
4243                 cs.Iop = (sz == 1) ? 0xC6 : 0xC7;
4244                 cs.Irm = modregrm(2,0,BPRM);
4245                 cs.IFL1 = FLfuncarg;
4246                 cs.IEV1.Voffset = funcargtos - sz;
4247                 cs.IFL2 = FLconst;
4248                 targ_size_t *p = cast(targ_size_t *) &(e.EV);
4249                 cs.IEV2.Vsize_t = *p;
4250                 if (I64 && tym == TYcldouble)
4251                     // The alignment of EV.Vcldouble is not the same on the compiler
4252                     // as on the target
4253                     goto Lbreak;
4254                 if (I64 && sz >= 8)
4255                 {
4256                     int i = cast(int)sz;
4257                     do
4258                     {
4259                         if (*p >= 0x80000000)
4260                         {   // Use 64 bit register MOV, as the 32 bit one gets sign extended
4261                             // MOV reg,imm64
4262                             // MOV EA,reg
4263                             goto Lbreak;
4264                         }
4265                         p = cast(targ_size_t *)(cast(char *) p + REGSIZE);
4266                         i -= REGSIZE;
4267                     } while (i > 0);
4268                     p = cast(targ_size_t *) &(e.EV);
4269                 }
4270 
4271                 int i = cast(int)sz;
4272                 do
4273                 {   int regsize = REGSIZE;
4274                     regm_t retregs = (sz == 1) ? BYTEREGS : allregs;
4275                     reg_t reg;
4276                     if (reghasvalue(retregs,*p,&reg))
4277                     {
4278                         cs.Iop = (cs.Iop & 1) | 0x88;
4279                         cs.Irm |= modregrm(0, reg & 7, 0); // MOV EA,reg
4280                         if (reg & 8)
4281                             cs.Irex |= REX_R;
4282                         if (I64 && sz == 1 && reg >= 4)
4283                             cs.Irex |= REX;
4284                     }
4285                     if (I64 && sz >= 8)
4286                         cs.Irex |= REX_W;
4287                     cdb.gen(&cs);           // MOV EA,const
4288 
4289                     p = cast(targ_size_t *)(cast(char *) p + regsize);
4290                     cs.Iop = 0xC7;
4291                     cs.Irm &= cast(ubyte)~cast(int)modregrm(0, 7, 0);
4292                     cs.Irex &= ~REX_R;
4293                     cs.IEV1.Voffset += regsize;
4294                     cs.IEV2.Vint = cast(targ_int)*p;
4295                     i -= regsize;
4296                 } while (i > 0);
4297                 return;
4298             }
4299 
4300         Lbreak:
4301             break;
4302 
4303         default:
4304             break;
4305     }
4306     regm_t retregs = tybyte(tym) ? BYTEREGS : allregs;
4307     if (tyvector(tym))
4308     {
4309         retregs = XMMREGS;
4310         codelem(cdb, e, &retregs, false);
4311         const op = xmmstore(tym);
4312         const r = findreg(retregs);
4313         cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - 16);   // MOV funcarg[EBP],r
4314         checkSetVex(cdb.last(),tym);
4315         return;
4316     }
4317     else if (tyfloating(tym))
4318     {
4319         if (config.inline8087)
4320         {
4321             retregs = tycomplex(tym) ? mST01 : mST0;
4322             codelem(cdb, e, &retregs, false);
4323 
4324             opcode_t op;
4325             uint r;
4326             switch (tym)
4327             {
4328                 case TYfloat:
4329                 case TYifloat:
4330                 case TYcfloat:
4331                     op = 0xD9;
4332                     r = 3;
4333                     break;
4334 
4335                 case TYdouble:
4336                 case TYidouble:
4337                 case TYdouble_alias:
4338                 case TYcdouble:
4339                     op = 0xDD;
4340                     r = 3;
4341                     break;
4342 
4343                 case TYldouble:
4344                 case TYildouble:
4345                 case TYcldouble:
4346                     op = 0xDB;
4347                     r = 7;
4348                     break;
4349 
4350                 default:
4351                     assert(0);
4352             }
4353             if (tycomplex(tym))
4354             {
4355                 // FSTP sz/2[ESP]
4356                 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz/2);
4357                 pop87();
4358             }
4359             pop87();
4360             cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz);    // FSTP -sz[EBP]
4361             return;
4362         }
4363     }
4364     scodelem(cdb, e, &retregs, 0, true);
4365     if (sz <= REGSIZE)
4366     {
4367         uint r = findreg(retregs);
4368         cdb.genc1(0x89, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE);   // MOV -REGSIZE[EBP],r
4369         if (sz == 8)
4370             code_orrex(cdb.last(), REX_W);
4371     }
4372     else if (sz == REGSIZE * 2)
4373     {
4374         uint r = findregmsw(retregs);
4375         cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE);    // MOV -REGSIZE[EBP],r
4376         r = findreglsw(retregs);
4377         cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r
4378     }
4379     else
4380         assert(0);
4381 }
4382 
4383 
4384 /***************************
4385  * Generate code to push argument e on the stack.
4386  * stackpush is incremented by stackalign for each PUSH.
4387  */
4388 
4389 void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf)
4390 {
4391     //printf("params(e = %p, stackalign = %d)\n", e, stackalign);
4392     //printf("params()\n"); elem_print(e);
4393     stackchanged = 1;
4394     assert(e && e.Eoper != OPparam);
4395 
4396     tym_t tym = tybasic(e.Ety);
4397     if (tyfloating(tym))
4398         objmod.fltused();
4399 
4400     int grex = I64 ? REX_W << 16 : 0;
4401 
4402     targ_size_t szb = paramsize(e, tyf);          // size before alignment
4403     targ_size_t sz = _align(stackalign,szb);      // size after alignment
4404     assert((sz & (stackalign - 1)) == 0);         // ensure that alignment worked
4405     assert((sz & (REGSIZE - 1)) == 0);
4406 
4407     switch (e.Eoper)
4408     {
4409     version (SCPP)
4410     {
4411         case OPstrctor:
4412         {
4413             elem* e1 = e.EV.E1;
4414             docommas(cdb,&e1);              // skip over any comma expressions
4415 
4416             cod3_stackadj(cdb, sz);
4417             stackpush += sz;
4418             cdb.genadjesp(sz);
4419 
4420             // Find OPstrthis and set it to stackpush
4421             exp2_setstrthis(e1, null, stackpush, null);
4422 
4423             regm_t retregs = 0;
4424             codelem(cdb, e1, &retregs, true);
4425             freenode(e);
4426             return;
4427         }
4428         case OPstrthis:
4429             // This is the parameter for the 'this' pointer corresponding to
4430             // OPstrctor. We push a pointer to an object that was already
4431             // allocated on the stack by OPstrctor.
4432         {
4433             regm_t retregs = allregs;
4434             reg_t reg;
4435             allocreg(cdb, &retregs, &reg, TYoffset);
4436             genregs(cdb, 0x89, SP, reg);        // MOV reg,SP
4437             if (I64)
4438                 code_orrex(cdb.last(), REX_W);
4439             uint np = stackpush - e.EV.Vuns;         // stack delta to parameter
4440             cdb.genc2(0x81, grex | modregrmx(3, 0, reg), np); // ADD reg,np
4441             if (sz > REGSIZE)
4442             {
4443                 cdb.gen1(0x16);                     // PUSH SS
4444                 stackpush += REGSIZE;
4445             }
4446             cdb.gen1(0x50 + (reg & 7));             // PUSH reg
4447             if (reg & 8)
4448                 code_orrex(cdb.last(), REX_B);
4449             stackpush += REGSIZE;
4450             cdb.genadjesp(sz);
4451             freenode(e);
4452             return;
4453         }
4454     }
4455 
4456         case OPstrpar:
4457         {
4458             uint rm;
4459 
4460             elem* e1 = e.EV.E1;
4461             if (sz == 0)
4462             {
4463                 docommas(cdb, &e1); // skip over any commas
4464 
4465                 const stackpushsave = stackpush;
4466                 const stackcleansave = cgstate.stackclean;
4467                 cgstate.stackclean = 0;
4468 
4469                 regm_t retregs = 0;
4470                 codelem(cdb,e1,&retregs,true);
4471 
4472                 assert(cgstate.stackclean == 0);
4473                 cgstate.stackclean = stackcleansave;
4474                 genstackclean(cdb,stackpush - stackpushsave,0);
4475 
4476                 freenode(e);
4477                 return;
4478             }
4479             if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1.Eoper == OPvar)
4480             {
4481                 freenode(e);
4482                 e = e1;
4483                 goto L1;
4484             }
4485             docommas(cdb,&e1);             // skip over any commas
4486             code_flags_t seg = 0;          // assume no seg override
4487             regm_t retregs = sz ? IDXREGS : 0;
4488             bool doneoff = false;
4489             uint pushsize = REGSIZE;
4490             uint op16 = 0;
4491             if (!I16 && sz & 2)     // if odd number of words to push
4492             {
4493                 pushsize = 2;
4494                 op16 = 1;
4495             }
4496             else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0)
4497             {
4498                 pushsize = 4;       // push DWORDs at a time
4499                 op16 = 1;
4500             }
4501             uint npushes = cast(uint)(sz / pushsize);
4502             switch (e1.Eoper)
4503             {
4504                 case OPind:
4505                     if (sz)
4506                     {
4507                         switch (tybasic(e1.EV.E1.Ety))
4508                         {
4509                             case TYfptr:
4510                             case TYhptr:
4511                                 seg = CFes;
4512                                 retregs |= mES;
4513                                 break;
4514 
4515                             case TYsptr:
4516                                 if (config.wflags & WFssneds)
4517                                     seg = CFss;
4518                                 break;
4519 
4520                             case TYfgPtr:
4521                                 if (I32)
4522                                      seg = CFgs;
4523                                 else if (I64)
4524                                      seg = CFfs;
4525                                 else
4526                                      assert(0);
4527                                 break;
4528 
4529                             case TYcptr:
4530                                 seg = CFcs;
4531                                 break;
4532 
4533                             default:
4534                                 break;
4535                         }
4536                     }
4537                     codelem(cdb, e1.EV.E1, &retregs, false);
4538                     freenode(e1);
4539                     break;
4540 
4541                 case OPvar:
4542                     /* Symbol is no longer a candidate for a register */
4543                     e1.EV.Vsym.Sflags &= ~GTregcand;
4544 
4545                     if (!e1.Ecount && npushes > 4)
4546                     {
4547                         /* Kludge to point at last word in struct. */
4548                         /* Don't screw up CSEs.                 */
4549                         e1.EV.Voffset += sz - pushsize;
4550                         doneoff = true;
4551                     }
4552                     //if (LARGEDATA) /* if default isn't DS */
4553                     {
4554                         static immutable uint[4] segtocf = [ CFes,CFcs,CFss,0 ];
4555 
4556                         int fl = el_fl(e1);
4557                         if (fl == FLfardata)
4558                         {
4559                             seg = CFes;
4560                             retregs |= mES;
4561                         }
4562                         else
4563                         {
4564                             uint s = segfl[fl];
4565                             assert(s < 4);
4566                             seg = segtocf[s];
4567                             if (seg == CFss && !(config.wflags & WFssneds))
4568                                 seg = 0;
4569                         }
4570                     }
4571                     if (e1.Ety & mTYfar)
4572                     {
4573                         seg = CFes;
4574                         retregs |= mES;
4575                     }
4576                     cdrelconst(cdb, e1, &retregs);
4577                     // Reverse the effect of the previous add
4578                     if (doneoff)
4579                         e1.EV.Voffset -= sz - pushsize;
4580                     freenode(e1);
4581                     break;
4582 
4583                 case OPstreq:
4584                 //case OPcond:
4585                     if (!(config.exe & EX_flat))
4586                     {
4587                         seg = CFes;
4588                         retregs |= mES;
4589                     }
4590                     codelem(cdb, e1, &retregs, false);
4591                     break;
4592 
4593                 case OPpair:
4594                 case OPrpair:
4595                     pushParams(cdb, e1, stackalign, tyf);
4596                     freenode(e);
4597                     return;
4598 
4599                 default:
4600                     elem_print(e1);
4601                     assert(0);
4602             }
4603             reg_t reg = findreglsw(retregs);
4604             rm = I16 ? regtorm[reg] : regtorm32[reg];
4605             if (op16)
4606                 seg |= CFopsize;            // operand size
4607             if (npushes <= 4)
4608             {
4609                 assert(!doneoff);
4610                 for (; npushes > 1; --npushes)
4611                 {
4612                     cdb.genc1(0xFF, buildModregrm(2, 6, rm), FLconst, pushsize * (npushes - 1));  // PUSH [reg]
4613                     code_orflag(cdb.last(),seg);
4614                     cdb.genadjesp(pushsize);
4615                 }
4616                 cdb.gen2(0xFF,buildModregrm(0, 6, rm));     // PUSH [reg]
4617                 cdb.last().Iflags |= seg;
4618                 cdb.genadjesp(pushsize);
4619             }
4620             else if (sz)
4621             {
4622                 getregs_imm(cdb, mCX | retregs);
4623                                                     // MOV CX,sz/2
4624                 movregconst(cdb, CX, npushes, 0);
4625                 if (!doneoff)
4626                 {   // This should be done when
4627                     // reg is loaded. Fix later
4628                                                     // ADD reg,sz-pushsize
4629                     cdb.genc2(0x81, grex | modregrmx(3, 0, reg), sz-pushsize);
4630                 }
4631                 getregs(cdb,mCX);                       // the LOOP decrements it
4632                 cdb.gen2(0xFF, buildModregrm(0, 6, rm));   // PUSH [reg]
4633                 cdb.last().Iflags |= seg | CFtarg2;
4634                 code* c3 = cdb.last();
4635                 cdb.genc2(0x81,grex | buildModregrm(3, 5,reg), pushsize);  // SUB reg,pushsize
4636                 if (I16 || config.flags4 & CFG4space)
4637                     genjmp(cdb,0xE2,FLcode,cast(block *)c3);// LOOP c3
4638                 else
4639                 {
4640                     if (I64)
4641                         cdb.gen2(0xFF, modregrm(3, 1, CX));// DEC CX
4642                     else
4643                         cdb.gen1(0x48 + CX);            // DEC CX
4644                     genjmp(cdb, JNE, FLcode, cast(block *)c3); // JNE c3
4645                 }
4646                 regimmed_set(CX,0);
4647                 cdb.genadjesp(cast(int)sz);
4648             }
4649             stackpush += sz;
4650             freenode(e);
4651             return;
4652         }
4653 
4654         case OPind:
4655             if (!e.Ecount)                         /* if *e1       */
4656             {
4657                 if (sz <= REGSIZE)
4658                 {   // Watch out for single byte quantities being up
4659                     // against the end of a segment or in memory-mapped I/O
4660                     if (!(config.exe & EX_flat) && szb == 1)
4661                         break;
4662                     goto L1;            // can handle it with loadea()
4663                 }
4664 
4665                 // Avoid PUSH MEM on the Pentium when optimizing for speed
4666                 if (config.flags4 & CFG4speed &&
4667                     (config.target_cpu >= TARGET_80486 &&
4668                      config.target_cpu <= TARGET_PentiumMMX) &&
4669                     sz <= 2 * REGSIZE &&
4670                     !tyfloating(tym))
4671                     break;
4672 
4673                 if (tym == TYldouble || tym == TYildouble || tycomplex(tym))
4674                     break;
4675 
4676                 code cs;
4677                 cs.Iflags = 0;
4678                 cs.Irex = 0;
4679                 if (I32)
4680                 {
4681                     assert(sz >= REGSIZE * 2);
4682                     loadea(cdb, e, &cs, 0xFF, 6, sz - REGSIZE, 0, 0); // PUSH EA+4
4683                     cdb.genadjesp(REGSIZE);
4684                     stackpush += REGSIZE;
4685                     sz -= REGSIZE;
4686 
4687                     if (sz > REGSIZE)
4688                     {
4689                         while (sz)
4690                         {
4691                             cs.IEV1.Voffset -= REGSIZE;
4692                             cdb.gen(&cs);                    // PUSH EA+...
4693                             cdb.genadjesp(REGSIZE);
4694                             stackpush += REGSIZE;
4695                             sz -= REGSIZE;
4696                         }
4697                         freenode(e);
4698                         return;
4699                     }
4700                 }
4701                 else
4702                 {
4703                     if (sz == DOUBLESIZE)
4704                     {
4705                         loadea(cdb, e, &cs, 0xFF, 6, DOUBLESIZE - REGSIZE, 0, 0); // PUSH EA+6
4706                         cs.IEV1.Voffset -= REGSIZE;
4707                         cdb.gen(&cs);                    // PUSH EA+4
4708                         cdb.genadjesp(REGSIZE);
4709                         getlvalue_lsw(&cs);
4710                         cdb.gen(&cs);                    // PUSH EA+2
4711                     }
4712                     else /* TYlong */
4713                         loadea(cdb, e, &cs, 0xFF, 6, REGSIZE, 0, 0); // PUSH EA+2
4714                     cdb.genadjesp(REGSIZE);
4715                 }
4716                 stackpush += sz;
4717                 getlvalue_lsw(&cs);
4718                 cdb.gen(&cs);                            // PUSH EA
4719                 cdb.genadjesp(REGSIZE);
4720                 freenode(e);
4721                 return;
4722             }
4723             break;
4724 
4725         case OPnp_fp:
4726             if (!e.Ecount)                         /* if (far *)e1 */
4727             {
4728                 elem* e1 = e.EV.E1;
4729                 tym_t tym1 = tybasic(e1.Ety);
4730                 /* BUG: what about pointers to functions?   */
4731                 int segreg;
4732                 switch (tym1)
4733                 {
4734                     case TYnptr: segreg = 3<<3; break;
4735                     case TYcptr: segreg = 1<<3; break;
4736                     default:     segreg = 2<<3; break;
4737                 }
4738                 if (I32 && stackalign == 2)
4739                     cdb.gen1(0x66);                 // push a word
4740                 cdb.gen1(0x06 + segreg);            // PUSH SEGREG
4741                 if (I32 && stackalign == 2)
4742                     code_orflag(cdb.last(), CFopsize);        // push a word
4743                 cdb.genadjesp(stackalign);
4744                 stackpush += stackalign;
4745                 pushParams(cdb, e1, stackalign, tyf);
4746                 freenode(e);
4747                 return;
4748             }
4749             break;
4750 
4751         case OPrelconst:
4752             static if (TARGET_SEGMENTED)
4753             {
4754                 /* Determine if we can just push the segment register           */
4755                 /* Test size of type rather than TYfptr because of (long)(&v)   */
4756                 Symbol* s = e.EV.Vsym;
4757                 //if (sytab[s.Sclass] & SCSS && !I32)  // if variable is on stack
4758                 //    needframe = true;                 // then we need stack frame
4759                 int fl;
4760                 if (_tysize[tym] == tysize(TYfptr) &&
4761                     (fl = s.Sfl) != FLfardata &&
4762                     /* not a function that CS might not be the segment of       */
4763                     (!((fl == FLfunc || s.ty() & mTYcs) &&
4764                       (s.Sclass == SCcomdat || s.Sclass == SCextern || s.Sclass == SCinline || config.wflags & WFthunk)) ||
4765                      (fl == FLfunc && config.exe == EX_DOSX)
4766                     )
4767                    )
4768                 {
4769                     stackpush += sz;
4770                     cdb.gen1(0x06 +           // PUSH SEGREG
4771                             (((fl == FLfunc || s.ty() & mTYcs) ? 1 : segfl[fl]) << 3));
4772                     cdb.genadjesp(REGSIZE);
4773 
4774                     if (config.target_cpu >= TARGET_80286 && !e.Ecount)
4775                     {
4776                         getoffset(cdb, e, STACK);
4777                         freenode(e);
4778                         return;
4779                     }
4780                     else
4781                     {
4782                         regm_t retregs;
4783                         offsetinreg(cdb, e, &retregs);
4784                         const reg = findreg(retregs);
4785                         genpush(cdb,reg);                    // PUSH reg
4786                         cdb.genadjesp(REGSIZE);
4787                     }
4788                     return;
4789                 }
4790                 if (config.target_cpu >= TARGET_80286 && !e.Ecount)
4791                 {
4792                     stackpush += sz;
4793                     if (_tysize[tym] == tysize(TYfptr))
4794                     {
4795                         // PUSH SEG e
4796                         cdb.gencs(0x68,0,FLextern,s);
4797                         cdb.last().Iflags = CFseg;
4798                         cdb.genadjesp(REGSIZE);
4799                     }
4800                     getoffset(cdb, e, STACK);
4801                     freenode(e);
4802                     return;
4803                 }
4804             }
4805             break;                          /* else must evaluate expression */
4806 
4807         case OPvar:
4808         L1:
4809             if (config.flags4 & CFG4speed &&
4810                      (config.target_cpu >= TARGET_80486 &&
4811                       config.target_cpu <= TARGET_PentiumMMX) &&
4812                      sz <= 2 * REGSIZE &&
4813                      !tyfloating(tym))
4814             {   // Avoid PUSH MEM on the Pentium when optimizing for speed
4815                 break;
4816             }
4817             else if (movOnly(e) || (tyxmmreg(tym) && config.fpxmmregs) || tyvector(tym))
4818                 break;                      // no PUSH MEM
4819             else
4820             {
4821                 int regsize = REGSIZE;
4822                 uint flag = 0;
4823                 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 &&
4824                     !e.Ecount)
4825                 {
4826                     regsize = 4;
4827                     flag |= CFopsize;
4828                 }
4829                 code cs;
4830                 cs.Iflags = 0;
4831                 cs.Irex = 0;
4832                 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0);    // PUSH EA+sz-2
4833                 code_orflag(cdb.last(), flag);
4834                 cdb.genadjesp(REGSIZE);
4835                 stackpush += sz;
4836                 while (cast(targ_int)(sz -= regsize) > 0)
4837                 {
4838                     loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0);
4839                     code_orflag(cdb.last(), flag);
4840                     cdb.genadjesp(REGSIZE);
4841                 }
4842                 freenode(e);
4843                 return;
4844             }
4845 
4846         case OPconst:
4847         {
4848             char pushi = 0;
4849             uint flag = 0;
4850             int regsize = REGSIZE;
4851 
4852             if (tycomplex(tym))
4853                 break;
4854 
4855             if (I64 && tyfloating(tym) && sz > 4 && boolres(e))
4856                 // Can't push 64 bit non-zero args directly
4857                 break;
4858 
4859             if (I32 && szb == 10)           // special case for long double constants
4860             {
4861                 assert(sz == 12);
4862                 targ_int value = e.EV.Vushort8[4]; // pick upper 2 bytes of Vldouble
4863                 stackpush += sz;
4864                 cdb.genadjesp(cast(int)sz);
4865                 for (int i = 0; i < 3; ++i)
4866                 {
4867                     reg_t reg;
4868                     if (reghasvalue(allregs, value, &reg))
4869                         cdb.gen1(0x50 + reg);           // PUSH reg
4870                     else
4871                         cdb.genc2(0x68,0,value);        // PUSH value
4872                     value = e.EV.Vulong4[i ^ 1];       // treat Vldouble as 2 element array of 32 bit uint
4873                 }
4874                 freenode(e);
4875                 return;
4876             }
4877 
4878             assert(I64 || sz <= tysize(TYldouble));
4879             int i = cast(int)sz;
4880             if (!I16 && i == 2)
4881                 flag = CFopsize;
4882 
4883             if (config.target_cpu >= TARGET_80286)
4884     //       && (e.Ecount == 0 || e.Ecount != e.Ecomsub))
4885             {
4886                 pushi = 1;
4887                 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4)
4888                 {
4889                     regsize = 4;
4890                     flag = CFopsize;
4891                 }
4892             }
4893             else if (i == REGSIZE)
4894                 break;
4895 
4896             stackpush += sz;
4897             cdb.genadjesp(cast(int)sz);
4898             targ_uns* pi = &e.EV.Vuns;     // point to start of Vdouble
4899             targ_ushort* ps = cast(targ_ushort *) pi;
4900             targ_ullong* pl = cast(targ_ullong *)pi;
4901             i /= regsize;
4902             do
4903             {
4904                 if (i)                      /* be careful not to go negative */
4905                     i--;
4906 
4907                 targ_size_t value;
4908                 switch (regsize)
4909                 {
4910                     case 2:
4911                         value = ps[i];
4912                         break;
4913 
4914                     case 4:
4915                         if (tym == TYldouble || tym == TYildouble)
4916                             /* The size is 10 bytes, and since we have 2 bytes left over,
4917                              * just read those 2 bytes, not 4.
4918                              * Otherwise we're reading uninitialized data.
4919                              * I.e. read 4 bytes, 4 bytes, then 2 bytes
4920                              */
4921                             value = i == 2 ? ps[4] : pi[i]; // 80 bits
4922                         else
4923                             value = pi[i];
4924                         break;
4925 
4926                     case 8:
4927                         value = cast(targ_size_t)pl[i];
4928                         break;
4929 
4930                     default:
4931                         assert(0);
4932                 }
4933 
4934                 reg_t reg;
4935                 if (pushi)
4936                 {
4937                     if (I64 && regsize == 8 && value != cast(int)value)
4938                     {
4939                         regwithvalue(cdb,allregs,value,&reg,64);
4940                         goto Preg;          // cannot push imm64 unless it is sign extended 32 bit value
4941                     }
4942                     if (regsize == REGSIZE && reghasvalue(allregs,value,&reg))
4943                         goto Preg;
4944                     cdb.genc2((szb == 1) ? 0x6A : 0x68, 0, value); // PUSH value
4945                 }
4946                 else
4947                 {
4948                     regwithvalue(cdb, allregs, value, &reg, 0);
4949                 Preg:
4950                     genpush(cdb,reg);         // PUSH reg
4951                 }
4952                 code_orflag(cdb.last(), flag);              // operand size
4953             } while (i);
4954             freenode(e);
4955             return;
4956         }
4957 
4958         case OPpair:
4959         {
4960             if (e.Ecount)
4961                 break;
4962             const op1 = e.EV.E1.Eoper;
4963             const op2 = e.EV.E2.Eoper;
4964             if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) &&
4965                 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst))
4966             {
4967                 pushParams(cdb, e.EV.E2, stackalign, tyf);
4968                 pushParams(cdb, e.EV.E1, stackalign, tyf);
4969                 freenode(e);
4970             }
4971             else if (tyfloating(e.EV.E1.Ety) ||
4972                      tyfloating(e.EV.E2.Ety))
4973             {
4974                 // Need special handling because of order of evaluation of e1 and e2
4975                 break;
4976             }
4977             else
4978             {
4979                 regm_t regs = allregs;
4980                 codelem(cdb, e, &regs, false);
4981                 genpush(cdb, findregmsw(regs)); // PUSH msreg
4982                 genpush(cdb, findreglsw(regs)); // PUSH lsreg
4983                 cdb.genadjesp(cast(int)sz);
4984                 stackpush += sz;
4985             }
4986             return;
4987         }
4988 
4989         case OPrpair:
4990         {
4991             if (e.Ecount)
4992                 break;
4993             const op1 = e.EV.E1.Eoper;
4994             const op2 = e.EV.E2.Eoper;
4995             if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) &&
4996                 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst))
4997             {
4998                 pushParams(cdb, e.EV.E1, stackalign, tyf);
4999                 pushParams(cdb, e.EV.E2, stackalign, tyf);
5000                 freenode(e);
5001             }
5002             else if (tyfloating(e.EV.E1.Ety) ||
5003                      tyfloating(e.EV.E2.Ety))
5004             {
5005                 // Need special handling because of order of evaluation of e1 and e2
5006                 break;
5007             }
5008             else
5009             {
5010                 regm_t regs = allregs;
5011                 codelem(cdb, e, &regs, false);
5012                 genpush(cdb, findregmsw(regs)); // PUSH msreg
5013                 genpush(cdb, findreglsw(regs)); // PUSH lsreg
5014                 cdb.genadjesp(cast(int)sz);
5015                 stackpush += sz;
5016             }
5017             return;
5018         }
5019 
5020         default:
5021             break;
5022     }
5023 
5024     regm_t retregs = tybyte(tym) ? BYTEREGS : allregs;
5025     if (tyvector(tym) || (tyxmmreg(tym) && config.fpxmmregs))
5026     {
5027         regm_t retxmm = XMMREGS;
5028         codelem(cdb, e, &retxmm, false);
5029         stackpush += sz;
5030         cdb.genadjesp(cast(int)sz);
5031         cod3_stackadj(cdb, cast(int)sz);
5032         const op = xmmstore(tym);
5033         const r = findreg(retxmm);
5034         cdb.gen2sib(op, modregxrm(0, r - XMM0,4 ), modregrm(0, 4, SP));   // MOV [ESP],r
5035         checkSetVex(cdb.last(),tym);
5036         return;
5037     }
5038     else if (tyfloating(tym))
5039     {
5040         if (config.inline8087)
5041         {
5042             retregs = tycomplex(tym) ? mST01 : mST0;
5043             codelem(cdb, e, &retregs, false);
5044             stackpush += sz;
5045             cdb.genadjesp(cast(int)sz);
5046             cod3_stackadj(cdb, cast(int)sz);
5047             opcode_t op;
5048             uint r;
5049             switch (tym)
5050             {
5051                 case TYfloat:
5052                 case TYifloat:
5053                 case TYcfloat:
5054                     op = 0xD9;
5055                     r = 3;
5056                     break;
5057 
5058                 case TYdouble:
5059                 case TYidouble:
5060                 case TYdouble_alias:
5061                 case TYcdouble:
5062                     op = 0xDD;
5063                     r = 3;
5064                     break;
5065 
5066                 case TYldouble:
5067                 case TYildouble:
5068                 case TYcldouble:
5069                     op = 0xDB;
5070                     r = 7;
5071                     break;
5072 
5073                 default:
5074                     assert(0);
5075             }
5076             if (!I16)
5077             {
5078                 if (tycomplex(tym))
5079                 {
5080                     // FSTP sz/2[ESP]
5081                     cdb.genc1(op, (modregrm(0, 4, SP) << 8) | modregxrm(2, r, 4),FLconst, sz/2);
5082                     pop87();
5083                 }
5084                 pop87();
5085                 cdb.gen2sib(op, modregrm(0, r, 4),modregrm(0, 4, SP));   // FSTP [ESP]
5086             }
5087             else
5088             {
5089                 retregs = IDXREGS;                             // get an index reg
5090                 reg_t reg;
5091                 allocreg(cdb, &retregs, &reg, TYoffset);
5092                 genregs(cdb, 0x89, SP, reg);         // MOV reg,SP
5093                 pop87();
5094                 cdb.gen2(op, modregrm(0, r, regtorm[reg]));       // FSTP [reg]
5095             }
5096             if (LARGEDATA)
5097                 cdb.last().Iflags |= CFss;     // want to store into stack
5098             genfwait(cdb);         // FWAIT
5099             return;
5100         }
5101         else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
5102             retregs = mSTACK;
5103     }
5104     else if (I16 && sz == 8)             // if long long
5105         retregs = mSTACK;
5106 
5107     scodelem(cdb,e,&retregs,0,true);
5108     if (retregs != mSTACK)                // if stackpush not already inc'd
5109         stackpush += sz;
5110     if (sz <= REGSIZE)
5111     {
5112         genpush(cdb,findreg(retregs));        // PUSH reg
5113         cdb.genadjesp(cast(int)REGSIZE);
5114     }
5115     else if (sz == REGSIZE * 2)
5116     {
5117         genpush(cdb,findregmsw(retregs));     // PUSH msreg
5118         genpush(cdb,findreglsw(retregs));     // PUSH lsreg
5119         cdb.genadjesp(cast(int)sz);
5120     }
5121 }
5122 
5123 /*******************************
5124  * Get offset portion of e, and store it in an index
5125  * register. Return mask of index register in *pretregs.
5126  */
5127 
5128 void offsetinreg(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
5129 {
5130     reg_t reg;
5131     regm_t retregs = mLSW;                     // want only offset
5132     if (e.Ecount && e.Ecount != e.Ecomsub)
5133     {
5134         regm_t rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */
5135         for (uint i = 0; rm; i++)
5136         {
5137             if (mask(i) & rm && regcon.cse.value[i] == e)
5138             {
5139                 *pretregs = mask(i);
5140                 getregs(cdb, *pretregs);
5141                 goto L3;
5142             }
5143             rm &= ~mask(i);
5144         }
5145     }
5146 
5147     *pretregs = retregs;
5148     allocreg(cdb, pretregs, &reg, TYoffset);
5149     getoffset(cdb,e,reg);
5150 L3:
5151     cssave(e, *pretregs,false);
5152     freenode(e);
5153 }
5154 
5155 /******************************
5156  * Generate code to load data into registers.
5157  */
5158 
5159 
5160 void loaddata(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
5161 {
5162     reg_t reg;
5163     reg_t nreg;
5164     reg_t sreg;
5165     opcode_t op;
5166     tym_t tym;
5167     code cs;
5168     regm_t flags, forregs, regm;
5169 
5170     debug
5171     {
5172     //  if (debugw)
5173     //        printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs));
5174     //  elem_print(e);
5175     }
5176 
5177     assert(e);
5178     elem_debug(e);
5179     if (*pretregs == 0)
5180         return;
5181     tym = tybasic(e.Ety);
5182     if (tym == TYstruct)
5183     {
5184         cdrelconst(cdb,e,pretregs);
5185         return;
5186     }
5187     if (tyfloating(tym))
5188     {
5189         objmod.fltused();
5190         if (config.fpxmmregs &&
5191             (tym == TYcfloat || tym == TYcdouble) &&
5192             (*pretregs & (XMMREGS | mPSW))
5193            )
5194         {
5195             cloadxmm(cdb, e, pretregs);
5196             return;
5197         }
5198         else if (config.inline8087)
5199         {
5200             if (*pretregs & mST0)
5201             {
5202                 load87(cdb, e, 0, pretregs, null, -1);
5203                 return;
5204             }
5205             else if (tycomplex(tym))
5206             {
5207                 cload87(cdb, e, pretregs);
5208                 return;
5209             }
5210         }
5211     }
5212     int sz = _tysize[tym];
5213     cs.Iflags = 0;
5214     cs.Irex = 0;
5215     if (*pretregs == mPSW)
5216     {
5217         Symbol *s;
5218         regm = allregs;
5219         if (e.Eoper == OPconst)
5220         {       /* true:        OR SP,SP        (SP is never 0)         */
5221                 /* false:       CMP SP,SP       (always equal)          */
5222                 genregs(cdb, (boolres(e)) ? 0x09 : 0x39 , SP, SP);
5223                 if (I64)
5224                     code_orrex(cdb.last(), REX_W);
5225         }
5226         else if (e.Eoper == OPvar &&
5227             (s = e.EV.Vsym).Sfl == FLreg &&
5228             s.Sregm & XMMREGS &&
5229             (tym == TYfloat || tym == TYifloat || tym == TYdouble || tym ==TYidouble))
5230         {
5231             tstresult(cdb,s.Sregm,e.Ety,true);
5232         }
5233         else if (sz <= REGSIZE)
5234         {
5235             if (!I16 && (tym == TYfloat || tym == TYifloat))
5236             {
5237                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5238                 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5239                 cdb.gen2(0xD1,modregrmx(3,4,reg));           // SHL reg,1
5240             }
5241             else if (I64 && (tym == TYdouble || tym ==TYidouble))
5242             {
5243                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5244                 loadea(cdb, e,&cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5245                 // remove sign bit, so that -0.0 == 0.0
5246                 cdb.gen2(0xD1, modregrmx(3, 4, reg));           // SHL reg,1
5247                 code_orrex(cdb.last(), REX_W);
5248             }
5249             else if (TARGET_OSX && e.Eoper == OPvar && movOnly(e))
5250             {
5251                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5252                 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5253                 fixresult(cdb, e, regm, pretregs);
5254             }
5255             else
5256             {   cs.IFL2 = FLconst;
5257                 cs.IEV2.Vsize_t = 0;
5258                 op = (sz == 1) ? 0x80 : 0x81;
5259                 loadea(cdb, e, &cs, op, 7, 0, 0, 0);        // CMP EA,0
5260 
5261                 // Convert to TEST instruction if EA is a register
5262                 // (to avoid register contention on Pentium)
5263                 code *c = cdb.last();
5264                 if ((c.Iop & ~1) == 0x38 &&
5265                     (c.Irm & modregrm(3, 0, 0)) == modregrm(3, 0, 0)
5266                    )
5267                 {
5268                     c.Iop = (c.Iop & 1) | 0x84;
5269                     code_newreg(c, c.Irm & 7);
5270                     if (c.Irex & REX_B)
5271                         //c.Irex = (c.Irex & ~REX_B) | REX_R;
5272                         c.Irex |= REX_R;
5273                 }
5274             }
5275         }
5276         else if (sz < 8)
5277         {
5278             allocreg(cdb, &regm, &reg, TYoffset);  // get a register
5279             if (I32)                                    // it's a 48 bit pointer
5280                 loadea(cdb, e, &cs, MOVZXw, reg, REGSIZE, 0, 0); // MOVZX reg,data+4
5281             else
5282             {
5283                 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, 0, 0); // MOV reg,data+2
5284                 if (tym == TYfloat || tym == TYifloat)       // dump sign bit
5285                     cdb.gen2(0xD1, modregrm(3, 4, reg));        // SHL reg,1
5286             }
5287             loadea(cdb,e,&cs,0x0B,reg,0,regm,0);     // OR reg,data
5288         }
5289         else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym)))
5290         {
5291             allocreg(cdb, &regm, &reg, TYoffset);       // get a register
5292             int i = sz - REGSIZE;
5293             loadea(cdb, e, &cs, 0x8B, reg, i, 0, 0);        // MOV reg,data+6
5294             if (tyfloating(tym))                             // TYdouble or TYdouble_alias
5295                 cdb.gen2(0xD1, modregrm(3, 4, reg));            // SHL reg,1
5296 
5297             while ((i -= REGSIZE) >= 0)
5298             {
5299                 loadea(cdb, e, &cs, 0x0B, reg, i, regm, 0); // OR reg,data+i
5300                 code *c = cdb.last();
5301                 if (i == 0)
5302                     c.Iflags |= CFpsw;                      // need the flags on last OR
5303             }
5304         }
5305         else if (sz == tysize(TYldouble))               // TYldouble
5306             load87(cdb, e, 0, pretregs, null, -1);
5307         else
5308         {
5309             elem_print(e);
5310             assert(0);
5311         }
5312         return;
5313     }
5314     /* not for flags only */
5315     flags = *pretregs & mPSW;             /* save original                */
5316     forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS);
5317     if (*pretregs & mSTACK)
5318         forregs |= DOUBLEREGS;
5319     if (e.Eoper == OPconst)
5320     {
5321         targ_size_t value = e.EV.Vint;
5322         if (sz == 8)
5323             value = cast(targ_size_t)e.EV.Vullong;
5324 
5325         if (sz == REGSIZE && reghasvalue(forregs, value, &reg))
5326             forregs = mask(reg);
5327 
5328         regm_t save = regcon.immed.mval;
5329         allocreg(cdb, &forregs, &reg, tym);        // allocate registers
5330         regcon.immed.mval = save;               // KLUDGE!
5331         if (sz <= REGSIZE)
5332         {
5333             if (sz == 1)
5334                 flags |= 1;
5335             else if (!I16 && sz == SHORTSIZE &&
5336                      !(mask(reg) & regcon.mvar) &&
5337                      !(config.flags4 & CFG4speed)
5338                     )
5339                 flags |= 2;
5340             if (sz == 8)
5341                 flags |= 64;
5342             if (isXMMreg(reg))
5343             {   /* This comes about because 0, 1, pi, etc., constants don't get stored
5344                  * in the data segment, because they are x87 opcodes.
5345                  * Not so efficient. We should at least do a PXOR for 0.
5346                  */
5347                 reg_t r;
5348                 targ_size_t unsvalue = e.EV.Vuns;
5349                 if (sz == 8)
5350                     unsvalue = cast(targ_size_t)e.EV.Vullong;
5351                 regwithvalue(cdb,ALLREGS, unsvalue,&r,flags);
5352                 flags = 0;                          // flags are already set
5353                 cdb.genfltreg(0x89, r, 0);            // MOV floatreg,r
5354                 if (sz == 8)
5355                     code_orrex(cdb.last(), REX_W);
5356                 assert(sz == 4 || sz == 8);         // float or double
5357                 const opmv = xmmload(tym);
5358                 cdb.genxmmreg(opmv, reg, 0, tym);        // MOVSS/MOVSD XMMreg,floatreg
5359             }
5360             else
5361             {
5362                 movregconst(cdb, reg, value, flags);
5363                 flags = 0;                          // flags are already set
5364             }
5365         }
5366         else if (sz < 8)        // far pointers, longs for 16 bit targets
5367         {
5368             targ_int msw = I32 ? e.EV.Vseg
5369                         : (e.EV.Vulong >> 16);
5370             targ_int lsw = e.EV.Voff;
5371             regm_t mswflags = 0;
5372             if (forregs & mES)
5373             {
5374                 movregconst(cdb, reg, msw, 0); // MOV reg,segment
5375                 genregs(cdb, 0x8E, 0, reg);    // MOV ES,reg
5376                 msw = lsw;                               // MOV reg,offset
5377             }
5378             else
5379             {
5380                 sreg = findreglsw(forregs);
5381                 movregconst(cdb, sreg, lsw, 0);
5382                 reg = findregmsw(forregs);
5383                 /* Decide if we need to set flags when we load msw      */
5384                 if (flags && (msw && msw|lsw || !(msw|lsw)))
5385                 {   mswflags = mPSW;
5386                     flags = 0;
5387                 }
5388             }
5389             movregconst(cdb, reg, msw, mswflags);
5390         }
5391         else if (sz == 8)
5392         {
5393             if (I32)
5394             {
5395                 targ_long *p = cast(targ_long *)cast(void*)&e.EV.Vdouble;
5396                 if (isXMMreg(reg))
5397                 {   /* This comes about because 0, 1, pi, etc., constants don't get stored
5398                      * in the data segment, because they are x87 opcodes.
5399                      * Not so efficient. We should at least do a PXOR for 0.
5400                      */
5401                     reg_t r;
5402                     regm_t rm = ALLREGS;
5403                     allocreg(cdb, &rm, &r, TYint);    // allocate scratch register
5404                     movregconst(cdb, r, p[0], 0);
5405                     cdb.genfltreg(0x89, r, 0);               // MOV floatreg,r
5406                     movregconst(cdb, r, p[1], 0);
5407                     cdb.genfltreg(0x89, r, 4);               // MOV floatreg+4,r
5408 
5409                     const opmv = xmmload(tym);
5410                     cdb.genxmmreg(opmv, reg, 0, tym);           // MOVSS/MOVSD XMMreg,floatreg
5411                 }
5412                 else
5413                 {
5414                     movregconst(cdb, findreglsw(forregs) ,p[0], 0);
5415                     movregconst(cdb, findregmsw(forregs) ,p[1], 0);
5416                 }
5417             }
5418             else
5419             {   targ_short *p = &e.EV.Vshort;  // point to start of Vdouble
5420 
5421                 assert(reg == AX);
5422                 movregconst(cdb, AX, p[3], 0);   // MOV AX,p[3]
5423                 movregconst(cdb, DX, p[0], 0);
5424                 movregconst(cdb, CX, p[1], 0);
5425                 movregconst(cdb, BX, p[2], 0);
5426             }
5427         }
5428         else if (I64 && sz == 16)
5429         {
5430             movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.EV.Vcent.lsw, 64);
5431             movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.EV.Vcent.msw, 64);
5432         }
5433         else
5434             assert(0);
5435         // Flags may already be set
5436         *pretregs &= flags | ~mPSW;
5437         fixresult(cdb, e, forregs, pretregs);
5438         return;
5439     }
5440     else
5441     {
5442         // See if we can use register that parameter was passed in
5443         if (regcon.params &&
5444             regParamInPreg(e.EV.Vsym) &&
5445             !anyiasm &&   // may have written to the memory for the parameter
5446             (regcon.params & mask(e.EV.Vsym.Spreg) && e.EV.Voffset == 0 ||
5447              regcon.params & mask(e.EV.Vsym.Spreg2) && e.EV.Voffset == REGSIZE) &&
5448             sz <= REGSIZE)                  // make sure no 'paint' to a larger size happened
5449         {
5450             reg = e.EV.Voffset ? e.EV.Vsym.Spreg2 : e.EV.Vsym.Spreg;
5451             forregs = mask(reg);
5452 
5453             if (debugr)
5454                 printf("%s.%d is fastpar and using register %s\n",
5455                        e.EV.Vsym.Sident.ptr,
5456                        cast(int)e.EV.Voffset,
5457                        regm_str(forregs));
5458 
5459             mfuncreg &= ~forregs;
5460             regcon.used |= forregs;
5461             fixresult(cdb,e,forregs,pretregs);
5462             return;
5463         }
5464 
5465         allocreg(cdb, &forregs, &reg, tym);            // allocate registers
5466 
5467         if (sz == 1)
5468         {   regm_t nregm;
5469 
5470             debug
5471             if (!(forregs & BYTEREGS))
5472             {   elem_print(e);
5473                     printf("forregs = %s\n", regm_str(forregs));
5474             }
5475 
5476             opcode_t opmv = 0x8A;                               // byte MOV
5477             static if (TARGET_OSX)
5478             {
5479                 if (movOnly(e))
5480                     opmv = 0x8B;
5481             }
5482             assert(forregs & BYTEREGS);
5483             if (!I16)
5484             {
5485                 if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed &&
5486                     // Workaround for OSX linker bug:
5487                     //   ld: GOT load reloc does not point to a movq instruction in test42 for x86_64
5488                     !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS))
5489                    )
5490                 {
5491 //                    opmv = tyuns(tym) ? MOVZXb : MOVSXb;      // MOVZX/MOVSX
5492                 }
5493                 loadea(cdb, e, &cs, opmv, reg, 0, 0, 0);     // MOV regL,data
5494             }
5495             else
5496             {
5497                 nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX;
5498                 if (*pretregs & nregm)
5499                     nreg = reg;                             // already allocated
5500                 else
5501                     allocreg(cdb, &nregm, &nreg, tym);
5502                 loadea(cdb, e, &cs, opmv, nreg, 0, 0, 0);    // MOV nregL,data
5503                 if (reg != nreg)
5504                 {
5505                     genmovreg(cdb, reg, nreg);   // MOV reg,nreg
5506                     cssave(e, mask(nreg), false);
5507                 }
5508             }
5509         }
5510         else if (forregs & XMMREGS)
5511         {
5512             // Can't load from registers directly to XMM regs
5513             //e.EV.Vsym.Sflags &= ~GTregcand;
5514 
5515             opcode_t opmv = xmmload(tym, xmmIsAligned(e));
5516             if (e.Eoper == OPvar)
5517             {
5518                 Symbol *s = e.EV.Vsym;
5519                 if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS))
5520                 {   opmv = LODD;          // MOVD/MOVQ
5521                     /* getlvalue() will unwind this and unregister s; could use a better solution */
5522                 }
5523             }
5524             loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); // MOVSS/MOVSD reg,data
5525             checkSetVex(cdb.last(),tym);
5526         }
5527         else if (sz <= REGSIZE)
5528         {
5529             opcode_t opmv = 0x8B;                     // MOV reg,data
5530             if (sz == 2 && !I16 && config.target_cpu >= TARGET_PentiumPro &&
5531                 // Workaround for OSX linker bug:
5532                 //   ld: GOT load reloc does not point to a movq instruction in test42 for x86_64
5533                 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS))
5534                )
5535             {
5536 //                opmv = tyuns(tym) ? MOVZXw : MOVSXw;  // MOVZX/MOVSX
5537             }
5538             loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0);
5539         }
5540         else if (sz <= 2 * REGSIZE && forregs & mES)
5541         {
5542             loadea(cdb, e, &cs, 0xC4, reg, 0, 0, mES);    // LES data
5543         }
5544         else if (sz <= 2 * REGSIZE)
5545         {
5546             if (I32 && sz == 8 &&
5547                 (*pretregs & (mSTACK | mPSW)) == mSTACK)
5548             {
5549                 assert(0);
5550     /+
5551                 /* Note that we allocreg(DOUBLEREGS) needlessly     */
5552                 stackchanged = 1;
5553                 int i = DOUBLESIZE - REGSIZE;
5554                 do
5555                 {
5556                     loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i
5557                     cdb.genadjesp(REGSIZE);
5558                     stackpush += REGSIZE;
5559                     i -= REGSIZE;
5560                 }
5561                 while (i >= 0);
5562                 return;
5563     +/
5564             }
5565 
5566             reg = findregmsw(forregs);
5567             loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2
5568             if (I32 && sz == REGSIZE + 2)
5569                 cdb.last().Iflags |= CFopsize;                   // seg is 16 bits
5570             reg = findreglsw(forregs);
5571             loadea(cdb, e, &cs, 0x8B, reg, 0, forregs, 0);       // MOV reg,data
5572         }
5573         else if (sz >= 8)
5574         {
5575             assert(!I32);
5576             if ((*pretregs & (mSTACK | mPSW)) == mSTACK)
5577             {
5578                 // Note that we allocreg(DOUBLEREGS) needlessly
5579                 stackchanged = 1;
5580                 int i = sz - REGSIZE;
5581                 do
5582                 {
5583                     loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i
5584                     cdb.genadjesp(REGSIZE);
5585                     stackpush += REGSIZE;
5586                     i -= REGSIZE;
5587                 }
5588                 while (i >= 0);
5589                 return;
5590             }
5591             else
5592             {
5593                 assert(reg == AX);
5594                 loadea(cdb, e, &cs, 0x8B, AX, 6, 0,           0); // MOV AX,data+6
5595                 loadea(cdb, e, &cs, 0x8B, BX, 4, mAX,         0); // MOV BX,data+4
5596                 loadea(cdb, e, &cs, 0x8B, CX, 2, mAX|mBX,     0); // MOV CX,data+2
5597                 loadea(cdb, e, &cs, 0x8B, DX, 0, mAX|mCX|mCX, 0); // MOV DX,data
5598             }
5599         }
5600         else
5601             assert(0);
5602         // Flags may already be set
5603         *pretregs &= flags | ~mPSW;
5604         fixresult(cdb, e, forregs, pretregs);
5605         return;
5606     }
5607 }
5608 
5609 }