1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1984-1998 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d)
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d
11  */
12 
13 module dmd.backend.cod2;
14 
15 version (SCPP)
16     version = COMPILE;
17 version (MARS)
18     version = COMPILE;
19 
20 version (COMPILE)
21 {
22 
23 import core.stdc.stdio;
24 import core.stdc.stdlib;
25 import core.stdc..string;
26 
27 import dmd.backend.backend;
28 import dmd.backend.cc;
29 import dmd.backend.cdef;
30 import dmd.backend.code;
31 import dmd.backend.code_x86;
32 import dmd.backend.codebuilder;
33 import dmd.backend.mem;
34 import dmd.backend.el;
35 import dmd.backend.exh;
36 import dmd.backend.global;
37 import dmd.backend.oper;
38 import dmd.backend.ty;
39 import dmd.backend.type;
40 import dmd.backend.xmm;
41 
42 extern (C++):
43 
44 nothrow:
45 
46 int REGSIZE();
47 
48 extern __gshared CGstate cgstate;
49 extern __gshared ubyte[FLMAX] segfl;
50 extern __gshared bool[FLMAX] stackfl;
51 
52 __gshared int cdcmp_flag;
53 
54 private extern (D) uint mask(uint m) { return 1 << m; }
55 
56 // from divcoeff.c
57 extern (C)
58 {
59     bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost);
60     bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost);
61 }
62 
63 /*******************************
64  * Swap two registers.
65  */
66 
67 private void swap(reg_t *a,reg_t *b)
68 {
69     const tmp = *a;
70     *a = *b;
71     *b = tmp;
72 }
73 
74 
75 /*******************************************
76  * Returns: true if cannot use this EA in anything other than a MOV instruction.
77  */
78 
79 bool movOnly(const elem *e)
80 {
81     if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar)
82     {
83         const s = e.EV.Vsym;
84         // Fixups for these can only be done with a MOV
85         if (s.Sclass == SCglobal || s.Sclass == SCextern ||
86             s.Sclass == SCcomdat || s.Sclass == SCcomdef)
87             return true;
88     }
89     return false;
90 }
91 
92 /********************************
93  * Determine index registers used by addressing mode.
94  * Index is rm of modregrm field.
95  * Returns:
96  *      mask of index registers
97  */
98 
99 regm_t idxregm(const code* c)
100 {
101     const rm = c.Irm;
102     regm_t idxm;
103     if ((rm & 0xC0) != 0xC0)            /* if register is not the destination */
104     {
105         if (I16)
106         {
107             static immutable ubyte[8] idxrm  = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX];
108             idxm = idxrm[rm & 7];
109         }
110         else
111         {
112             if ((rm & 7) == 4)          /* if sib byte                  */
113             {
114                 const sib = c.Isib;
115                 reg_t idxreg = (sib >> 3) & 7;
116                 // scaled index reg
117                 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0));
118 
119                 if ((sib & 7) == 5 && (rm & 0xC0) == 0)
120                 { }
121                 else
122                     idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0));
123             }
124             else
125                 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0));
126         }
127     }
128     return idxm;
129 }
130 
131 
132 static if (TARGET_WINDOS)
133 {
134 /***************************
135  * Gen code for call to floating point routine.
136  */
137 
138 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib)
139 {
140     if (config.inline8087)
141     {
142         orth87(cdb,e,pretregs);
143         return;
144     }
145 
146     regm_t retregs1,retregs2;
147     if (tybasic(e.EV.E1.Ety) == TYfloat)
148     {
149         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
150         retregs1 = FLOATREGS;
151         retregs2 = FLOATREGS2;
152     }
153     else
154     {
155         if (I32)
156         {   retregs1 = DOUBLEREGS_32;
157             retregs2 = DOUBLEREGS2_32;
158         }
159         else
160         {   retregs1 = mSTACK;
161             retregs2 = DOUBLEREGS_16;
162         }
163     }
164 
165     codelem(cdb,e.EV.E1, &retregs1,false);
166     if (retregs1 & mSTACK)
167         cgstate.stackclean++;
168     scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false);
169     if (retregs1 & mSTACK)
170         cgstate.stackclean--;
171     callclib(cdb, e, clib, pretregs, 0);
172 }
173 }
174 
175 /*****************************
176  * Handle operators which are more or less orthogonal
177  * ( + - & | ^ )
178  */
179 
180 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
181 {
182     //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
183     elem *e1 = e.EV.E1;
184     elem *e2 = e.EV.E2;
185     if (*pretregs == 0)                   // if don't want result
186     {
187         codelem(cdb,e1,pretregs,false); // eval left leaf
188         *pretregs = 0;                          // in case they got set
189         codelem(cdb,e2,pretregs,false);
190         return;
191     }
192 
193     const ty = tybasic(e.Ety);
194     const ty1 = tybasic(e1.Ety);
195 
196     if (tyfloating(ty1))
197     {
198         if (tyvector(ty1) ||
199             config.fpxmmregs && tyxmmreg(ty1) &&
200             !(*pretregs & mST0) &&
201             !(*pretregs & mST01) &&
202             !(ty == TYldouble || ty == TYildouble)  // watch out for shrinkLongDoubleConstantIfPossible()
203            )
204         {
205             orthxmm(cdb,e,pretregs);
206             return;
207         }
208         if (config.inline8087)
209         {
210             orth87(cdb,e,pretregs);
211             return;
212         }
213         static if (TARGET_WINDOS)
214         {
215             opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd
216                                                        : CLIB.dsub);
217             return;
218         }
219         else
220         {
221             assert(0);
222         }
223     }
224     if (tyxmmreg(ty1))
225     {
226         orthxmm(cdb,e,pretregs);
227         return;
228     }
229 
230     opcode_t op1, op2;
231     uint mode;
232     __gshared int nest;
233 
234     const ty2 = tybasic(e2.Ety);
235     const e2oper = e2.Eoper;
236     const sz = _tysize[ty];
237     const isbyte = (sz == 1);
238     code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
239     bool test = false;                // assume we destroyed lvalue
240 
241     switch (e.Eoper)
242     {
243         case OPadd:     mode = 0;
244                         op1 = 0x03; op2 = 0x13; break;  /* ADD, ADC     */
245         case OPmin:     mode = 5;
246                         op1 = 0x2B; op2 = 0x1B; break;  /* SUB, SBB     */
247         case OPor:      mode = 1;
248                         op1 = 0x0B; op2 = 0x0B; break;  /* OR , OR      */
249         case OPxor:     mode = 6;
250                         op1 = 0x33; op2 = 0x33; break;  /* XOR, XOR     */
251         case OPand:     mode = 4;
252                         op1 = 0x23; op2 = 0x23;         /* AND, AND     */
253                         if (tyreg(ty1) &&
254                             *pretregs == mPSW)          /* if flags only */
255                         {
256                             test = true;
257                             op1 = 0x85;                 /* TEST         */
258                             mode = 0;
259                         }
260                         break;
261 
262         default:
263             assert(0);
264     }
265     op1 ^= isbyte;                                  /* if byte operation    */
266 
267     // Compute numwords, the number of words to operate on.
268     int numwords = 1;
269     if (!I16)
270     {
271         /* Cannot operate on longs and then do a 'paint' to a far       */
272         /* pointer, because far pointers are 48 bits and longs are 32.  */
273         /* Therefore, numwords can never be 2.                          */
274         assert(!(tyfv(ty1) && tyfv(ty2)));
275         if (sz == 2 * REGSIZE)
276         {
277             numwords++;
278         }
279     }
280     else
281     {
282         /* If ty is a TYfptr, but both operands are long, treat the     */
283         /* operation as a long.                                         */
284         if ((tylong(ty1) || ty1 == TYhptr) &&
285             (tylong(ty2) || ty2 == TYhptr))
286             numwords++;
287     }
288 
289     // Special cases where only flags are set
290     if (test && _tysize[ty1] <= REGSIZE &&
291         (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
292         && !movOnly(e1)
293        )
294     {
295         // Handle the case of (var & const)
296         if (e2.Eoper == OPconst && el_signx32(e2))
297         {
298             code cs = void;
299             cs.Iflags = 0;
300             cs.Irex = 0;
301             getlvalue(cdb,&cs,e1,0);
302             targ_size_t value = e2.EV.Vpointer;
303             if (sz == 2)
304                 value &= 0xFFFF;
305             else if (sz == 4)
306                 value &= 0xFFFFFFFF;
307             reg_t reg;
308             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,&reg))
309             {
310                 code_newreg(&cs, reg);
311                 if (I64 && isbyte && reg >= 4)
312                     cs.Irex |= REX;
313             }
314             else
315             {
316                 if (sz == 8 && !I64)
317                 {
318                     assert(value == cast(int)value);    // sign extend imm32
319                 }
320                 op1 = 0xF7;
321                 cs.IEV2.Vint = cast(targ_int)value;
322                 cs.IFL2 = FLconst;
323             }
324             cs.Iop = op1 ^ isbyte;
325             cs.Iflags |= word | CFpsw;
326             freenode(e1);
327             freenode(e2);
328             cdb.gen(&cs);
329             return;
330         }
331 
332         // Handle (exp & reg)
333         reg_t reg;
334         regm_t retregs;
335         if (isregvar(e2,&retregs,&reg))
336         {
337             code cs = void;
338             cs.Iflags = 0;
339             cs.Irex = 0;
340             getlvalue(cdb,&cs,e1,0);
341             code_newreg(&cs, reg);
342             if (I64 && isbyte && reg >= 4)
343                 cs.Irex |= REX;
344             cs.Iop = op1 ^ isbyte;
345             cs.Iflags |= word | CFpsw;
346             freenode(e1);
347             freenode(e2);
348             cdb.gen(&cs);
349             return;
350         }
351     }
352 
353     code cs = void;
354     cs.Iflags = 0;
355     cs.Irex = 0;
356 
357     // Look for possible uses of LEA
358     if (e.Eoper == OPadd &&
359         !(*pretregs & mPSW) &&                // flags aren't set by LEA
360         !nest &&                              // could cause infinite recursion if e.Ecount
361         (sz == REGSIZE || (I64 && sz == 4)))  // far pointers aren't handled
362     {
363         const rex = (sz == 8) ? REX_W : 0;
364 
365         // Handle the case of (e + &var)
366         int e1oper = e1.Eoper;
367         if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)])))
368                 || // LEA costs too much for simple EAs on older CPUs
369             (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) ||
370             (!I16 && (isscaledindex(e1) || isscaledindex(e2))) ||
371             (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) ||
372             (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) ||
373             (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount &&
374              (e1oper == OPmul || e1oper == OPshl) &&
375              e1.EV.E2.Eoper == OPconst &&
376              ssindex(e1oper,e1.EV.E2.EV.Vuns)
377             ) ||
378             (!I16 && e1.Ecount)
379            )
380         {
381             const inc = e.Ecount != 0;
382             nest += inc;
383             code csx = void;
384             getlvalue(cdb,&csx,e,0);
385             nest -= inc;
386             reg_t regx;
387             allocreg(cdb,pretregs,&regx,ty);
388             csx.Iop = LEA;
389             code_newreg(&csx, regx);
390             cdb.gen(&csx);          // LEA regx,EA
391             if (rex)
392                 code_orrex(cdb.last(), rex);
393             return;
394         }
395 
396         // Handle the case of ((e + c) + e2)
397         if (!I16 &&
398             e1oper == OPadd &&
399             (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) ||
400              e2oper == OPconst && el_signx32(e2)) &&
401             !e1.Ecount
402            )
403         {
404             elem *ebase;
405             elem *edisp;
406             if (e2oper == OPconst && el_signx32(e2))
407             {   edisp = e2;
408                 ebase = e1.EV.E2;
409             }
410             else
411             {   edisp = e1.EV.E2;
412                 ebase = e2;
413             }
414 
415             auto e11 = e1.EV.E1;
416             regm_t retregs = *pretregs & ALLREGS;
417             if (!retregs)
418                 retregs = ALLREGS;
419             int ss = 0;
420             int ss2 = 0;
421 
422             // Handle the case of (((e *  c1) + c2) + e2)
423             // Handle the case of (((e << c1) + c2) + e2)
424             if ((e11.Eoper == OPmul || e11.Eoper == OPshl) &&
425                 e11.EV.E2.Eoper == OPconst &&
426                 !e11.Ecount
427                )
428             {
429                 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2);
430                 if (e11.Eoper == OPshl)
431                 {
432                     if (co1 > 3)
433                         goto L13;
434                     ss = cast(int)co1;
435                 }
436                 else
437                 {
438                     ss2 = 1;
439                     switch (co1)
440                     {
441                         case  6:        ss = 1;                 break;
442                         case 12:        ss = 1; ss2 = 2;        break;
443                         case 24:        ss = 1; ss2 = 3;        break;
444                         case 10:        ss = 2;                 break;
445                         case 20:        ss = 2; ss2 = 2;        break;
446                         case 40:        ss = 2; ss2 = 3;        break;
447                         case 18:        ss = 3;                 break;
448                         case 36:        ss = 3; ss2 = 2;        break;
449                         case 72:        ss = 3; ss2 = 3;        break;
450                         default:
451                             ss2 = 0;
452                             goto L13;
453                     }
454                 }
455                 freenode(e11.EV.E2);
456                 freenode(e11);
457                 e11 = e11.EV.E1;
458               L13:
459                 { }
460             }
461 
462             reg_t reg11;
463             regm_t regm;
464             if (e11.Eoper == OPvar && isregvar(e11,&regm,&reg11))
465             {
466                 if (tysize(e11.Ety) <= REGSIZE)
467                     retregs = mask(reg11); // only want the LSW
468                 else
469                     retregs = regm;
470                 freenode(e11);
471             }
472             else
473                 codelem(cdb,e11,&retregs,false);
474 
475             regm_t rretregs = ALLREGS & ~retregs & ~mBP;
476             scodelem(cdb,ebase,&rretregs,retregs,true);
477             reg_t reg;
478             {
479                 regm_t sregs = *pretregs & ~rretregs;
480                 if (!sregs)
481                     sregs = ALLREGS & ~rretregs;
482                 allocreg(cdb,&sregs,&reg,ty);
483             }
484 
485             assert((retregs & (retregs - 1)) == 0); // must be only one register
486             assert((rretregs & (rretregs - 1)) == 0); // must be only one register
487 
488             auto  reg1 = findreg(retregs);
489             const reg2 = findreg(rretregs);
490 
491             if (ss2)
492             {
493                 assert(reg != reg2);
494                 if ((reg1 & 7) == BP)
495                 {   static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1];
496 
497                     // IMUL reg,imm32
498                     cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]);
499                 }
500                 else
501                 {   // LEA reg,[reg1*ss][reg1]
502                     cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7));
503                     if (reg1 & 8)
504                         code_orrex(cdb.last(), REX_X | REX_B);
505                 }
506                 if (rex)
507                     code_orrex(cdb.last(), rex);
508                 reg1 = reg;
509                 ss = ss2;                               // use *2 for scale
510             }
511 
512             cs.Iop = LEA;                      // LEA reg,c[reg1*ss][reg2]
513             cs.Irm = modregrm(2,reg & 7,4);
514             cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7);
515             assert(reg2 != BP);
516             cs.Iflags = CFoff;
517             cs.Irex = cast(ubyte)rex;
518             if (reg & 8)
519                 cs.Irex |= REX_R;
520             if (reg1 & 8)
521                 cs.Irex |= REX_X;
522             if (reg2 & 8)
523                 cs.Irex |= REX_B;
524             cs.IFL1 = FLconst;
525             cs.IEV1.Vsize_t = edisp.EV.Vuns;
526 
527             freenode(edisp);
528             freenode(e1);
529             cdb.gen(&cs);
530             fixresult(cdb,e,mask(reg),pretregs);
531             return;
532         }
533     }
534 
535     regm_t posregs = (isbyte) ? BYTEREGS : (mES | ALLREGS | mBP);
536     regm_t retregs = *pretregs & posregs;
537     if (retregs == 0)                   /* if no return regs speced     */
538                                         /* (like if wanted flags only)  */
539         retregs = ALLREGS & posregs;    // give us some
540 
541     if (ty1 == TYhptr || ty2 == TYhptr)
542     {     /* Generate code for add/subtract of huge pointers.
543            No attempt is made to generate very good code.
544          */
545         retregs = (retregs & mLSW) | mDX;
546         regm_t rretregs;
547         if (ty1 == TYhptr)
548         {   // hptr +- long
549             rretregs = mLSW & ~(retregs | regcon.mvar);
550             if (!rretregs)
551                 rretregs = mLSW;
552             rretregs |= mCX;
553             codelem(cdb,e1,&rretregs,0);
554             retregs &= ~rretregs;
555             if (!(retregs & mLSW))
556                 retregs |= mLSW & ~rretregs;
557 
558             scodelem(cdb,e2,&retregs,rretregs,true);
559         }
560         else
561         {   // long + hptr
562             codelem(cdb,e1,&retregs,0);
563             rretregs = (mLSW | mCX) & ~retregs;
564             if (!(rretregs & mLSW))
565                 rretregs |= mLSW;
566             scodelem(cdb,e2,&rretregs,retregs,true);
567         }
568         getregs(cdb,rretregs | retregs);
569         const mreg = DX;
570         const lreg = findreglsw(retregs);
571         if (e.Eoper == OPmin)
572         {   // negate retregs
573             cdb.gen2(0xF7,modregrm(3,3,mreg));     // NEG mreg
574             cdb.gen2(0xF7,modregrm(3,3,lreg));     // NEG lreg
575             code_orflag(cdb.last(),CFpsw);
576             cdb.genc2(0x81,modregrm(3,3,mreg),0);  // SBB mreg,0
577         }
578         const lrreg = findreglsw(rretregs);
579         genregs(cdb,0x03,lreg,lrreg);              // ADD lreg,lrreg
580         code_orflag(cdb.last(),CFpsw);
581         genmovreg(cdb,lrreg,CX);      // MOV lrreg,CX
582         cdb.genc2(0x81,modregrm(3,2,mreg),0);      // ADC mreg,0
583         genshift(cdb);                             // MOV CX,offset __AHSHIFT
584         cdb.gen2(0xD3,modregrm(3,4,mreg));         // SHL mreg,CL
585         genregs(cdb,0x03,mreg,lrreg);              // ADD mreg,MSREG(h)
586         fixresult(cdb,e,retregs,pretregs);
587         return;
588     }
589 
590     regm_t rretregs;
591     reg_t reg;
592     if (_tysize[ty1] > REGSIZE && numwords == 1)
593     {     /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */
594 
595         debug
596         if (_tysize[ty2] != REGSIZE)
597         {
598             printf("e = %p, e.Eoper = ",e);
599             WROP(e.Eoper);
600             printf(" e1.Ety = ");
601             WRTYxx(ty1);
602             printf(" e2.Ety = ");
603             WRTYxx(ty2);
604             printf("\n");
605             elem_print(e);
606         }
607 
608         assert(_tysize[ty2] == REGSIZE);
609 
610         /* Watch out for the case here where you are going to OP reg,EA */
611         /* and both the reg and EA use ES! Prevent this by forcing      */
612         /* reg into the regular registers.                              */
613         if ((e2oper == OPind ||
614             (e2oper == OPvar && el_fl(e2) == FLfardata)) &&
615             !e2.Ecount)
616         {
617             retregs = ALLREGS;
618         }
619 
620         codelem(cdb,e1,&retregs,test != 0);
621         reg = findreglsw(retregs);      /* reg is the register with the offset*/
622     }
623     else
624     {
625         regm_t regm;
626 
627         /* if (tyword + TYfptr) */
628         if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE)
629         {   retregs = ~*pretregs & ALLREGS;
630 
631             /* if retregs doesn't have any regs in it that aren't reg vars */
632             if ((retregs & ~regcon.mvar) == 0)
633                 retregs |= mAX;
634         }
635         else if (numwords == 2 && retregs & mES)
636             retregs = (retregs | mMSW) & ALLREGS;
637 
638         // Determine if we should swap operands, because
639         //      mov     EAX,x
640         //      add     EAX,reg
641         // is faster than:
642         //      mov     EAX,reg
643         //      add     EAX,x
644         else if (e2oper == OPvar &&
645                  e1.Eoper == OPvar &&
646                  e.Eoper != OPmin &&
647                  isregvar(e1,&regm,null) &&
648                  regm != retregs &&
649                  _tysize[ty1] == _tysize[ty2])
650         {
651             elem *es = e1;
652             e1 = e2;
653             e2 = es;
654         }
655         codelem(cdb,e1,&retregs,test != 0);         // eval left leaf
656         reg = findreg(retregs);
657     }
658     reg_t rreg;
659     int rval;
660     targ_size_t i;
661     switch (e2oper)
662     {
663         case OPind:                                 /* if addressing mode   */
664             if (!e2.Ecount)                         /* if not CSE           */
665                     goto L1;                        /* try OP reg,EA        */
666             goto default;
667 
668         default:                                    /* operator node        */
669         L2:
670             rretregs = ALLREGS & ~retregs;
671             /* Be careful not to do arithmetic on ES        */
672             if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW)
673                 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs;
674             else if (isbyte)
675                 rretregs &= BYTEREGS;
676 
677             scodelem(cdb,e2,&rretregs,retregs,true);       // get rvalue
678             rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs);
679             if (!test)
680                 getregs(cdb,retregs);          // we will trash these regs
681             if (numwords == 1)                              /* ADD reg,rreg */
682             {
683                 /* reverse operands to avoid moving around the segment value */
684                 if (_tysize[ty2] > REGSIZE)
685                 {
686                     getregs(cdb,rretregs);
687                     genregs(cdb,op1,rreg,reg);
688                     retregs = rretregs;     // reverse operands
689                 }
690                 else
691                 {
692                     genregs(cdb,op1,reg,rreg);
693                     if (!I16 && *pretregs & mPSW)
694                         cdb.last().Iflags |= word;
695                 }
696                 if (I64 && sz == 8)
697                     code_orrex(cdb.last(), REX_W);
698                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
699                     code_orrex(cdb.last(), REX);
700             }
701             else /* numwords == 2 */                /* ADD lsreg,lsrreg     */
702             {
703                 reg = findreglsw(retregs);
704                 rreg = findreglsw(rretregs);
705                 genregs(cdb,op1,reg,rreg);
706                 if (e.Eoper == OPadd || e.Eoper == OPmin)
707                     code_orflag(cdb.last(),CFpsw);
708                 reg = findregmsw(retregs);
709                 rreg = findregmsw(rretregs);
710                 if (!(e2oper == OPu16_32 && // if second operand is 0
711                       (op2 == 0x0B || op2 == 0x33)) // and OR or XOR
712                    )
713                     genregs(cdb,op2,reg,rreg);        // ADC msreg,msrreg
714             }
715             break;
716 
717         case OPrelconst:
718             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
719                 goto default;
720             if (sz != REGSIZE)
721                 goto L2;
722             if (segfl[el_fl(e2)] != 3)              /* if not in data segment */
723                 goto L2;
724             if (evalinregister(e2))
725                 goto L2;
726             cs.IEV2.Voffset = e2.EV.Voffset;
727             cs.IEV2.Vsym = e2.EV.Vsym;
728             cs.Iflags |= CFoff;
729             i = 0;                          /* no INC or DEC opcode         */
730             rval = 0;
731             goto L3;
732 
733         case OPconst:
734             if (tyfv(ty2))
735                 goto L2;
736             if (numwords == 1)
737             {
738                 if (!el_signx32(e2))
739                     goto L2;
740                 i = e2.EV.Vpointer;
741                 if (word)
742                 {
743                     if (!(*pretregs & mPSW) &&
744                         config.flags4 & CFG4speed &&
745                         (e.Eoper == OPor || e.Eoper == OPxor || test ||
746                          (e1.Eoper != OPvar && e1.Eoper != OPind)))
747                     {   word = 0;
748                         i &= 0xFFFF;
749                     }
750                 }
751                 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&rreg);
752                 cs.IEV2.Vsize_t = i;
753             L3:
754                 if (!test)
755                     getregs(cdb,retregs);          // we will trash these regs
756                 op1 ^= isbyte;
757                 cs.Iflags |= word;
758                 if (rval)
759                 {   cs.Iop = op1 ^ 2;
760                     mode = rreg;
761                 }
762                 else
763                     cs.Iop = 0x81;
764                 cs.Irm = modregrm(3,mode&7,reg&7);
765                 if (mode & 8)
766                     cs.Irex |= REX_R;
767                 if (reg & 8)
768                     cs.Irex |= REX_B;
769                 if (I64 && sz == 8)
770                     cs.Irex |= REX_W;
771                 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4)))
772                     cs.Irex |= REX;
773                 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2));
774                 /* Modify instruction for special cases */
775                 switch (e.Eoper)
776                 {
777                     case OPadd:
778                     {
779                         int iop;
780 
781                         if (i == 1)
782                             iop = 0;                    /* INC reg      */
783                         else if (i == -1)
784                             iop = 8;                    /* DEC reg      */
785                         else
786                             break;
787                         cs.Iop = (0x40 | iop | reg) ^ isbyte;
788                         if ((isbyte && *pretregs & mPSW) || I64)
789                         {
790                             cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop);
791                             cs.Iop = 0xFF;
792                         }
793                         break;
794                     }
795 
796                     case OPand:
797                         if (test)
798                             cs.Iop = rval ? op1 : 0xF7; // TEST
799                         break;
800 
801                     default:
802                         break;
803                 }
804                 if (*pretregs & mPSW)
805                     cs.Iflags |= CFpsw;
806                 cs.Iop ^= isbyte;
807                 cdb.gen(&cs);
808                 cs.Iflags &= ~CFpsw;
809             }
810             else if (numwords == 2)
811             {
812                 getregs(cdb,retregs);
813                 reg = findregmsw(retregs);
814                 const lsreg = findreglsw(retregs);
815                 cs.Iop = 0x81;
816                 cs.Irm = modregrm(3,mode,lsreg);
817                 cs.IFL2 = FLconst;
818                 const msw = cast(targ_int)MSREG(e2.EV.Vllong);
819                 cs.IEV2.Vint = e2.EV.Vlong;
820                 switch (e.Eoper)
821                 {
822                     case OPadd:
823                     case OPmin:
824                         cs.Iflags |= CFpsw;
825                         break;
826 
827                     default:
828                         break;
829                 }
830                 cdb.gen(&cs);
831                 cs.Iflags &= ~CFpsw;
832 
833                 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg);
834                 cs.IEV2.Vint = msw;
835                 if (e.Eoper == OPadd)
836                     cs.Irm |= modregrm(0,2,0);      /* ADC          */
837                 cdb.gen(&cs);
838             }
839             else
840                 assert(0);
841             freenode(e2);
842             break;
843 
844         case OPvar:
845             if (movOnly(e2))
846                 goto L2;
847         L1:
848             if (tyfv(ty2))
849                 goto L2;
850             if (!test)
851                 getregs(cdb,retregs);          // we will trash these regs
852             loadea(cdb,e2,&cs,op1,
853                    ((numwords == 2) ? findreglsw(retregs) : reg),
854                    0,retregs,retregs);
855             if (!I16 && word)
856             {   if (*pretregs & mPSW)
857                     code_orflag(cdb.last(),word);
858                 else
859                     cdb.last().Iflags &= ~cast(int)word;
860             }
861             else if (numwords == 2)
862             {
863                 if (e.Eoper == OPadd || e.Eoper == OPmin)
864                     code_orflag(cdb.last(),CFpsw);
865                 reg = findregmsw(retregs);
866                 if (!OTleaf(e2.Eoper))
867                 {   getlvalue_msw(&cs);
868                     cs.Iop = op2;
869                     NEWREG(cs.Irm,reg);
870                     cdb.gen(&cs);                 // ADC reg,data+2
871                 }
872                 else
873                     loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0);
874             }
875             else if (I64 && sz == 8)
876                 code_orrex(cdb.last(), REX_W);
877             freenode(e2);
878             break;
879     }
880 
881     if (sz <= REGSIZE && *pretregs & mPSW)
882     {
883         /* If the expression is (_tls_array + ...), then the flags are not set
884          * since the linker may rewrite these instructions into something else.
885          */
886         if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar)
887         {
888             const s = e1.EV.Vsym;
889             if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0)
890             {
891                 goto L7;                        // don't assume flags are set
892             }
893         }
894         code_orflag(cdb.last(),CFpsw);
895         *pretregs &= ~mPSW;                    // flags already set
896     L7: { }
897     }
898     fixresult(cdb,e,retregs,pretregs);
899 }
900 
901 
902 /*****************************
903  * Handle multiply, divide, modulo and remquo.
904  * Note that modulo isn't defined for doubles.
905  */
906 
907 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
908 {
909     elem *e1 = e.EV.E1;
910     elem *e2 = e.EV.E2;
911     if (*pretregs == 0)                         // if don't want result
912     {
913         codelem(cdb,e1,pretregs,false);      // eval left leaf
914         *pretregs = 0;                          // in case they got set
915         codelem(cdb,e2,pretregs,false);
916         return;
917     }
918 
919     //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
920     const tyml = tybasic(e1.Ety);
921     const ty = tybasic(e.Ety);
922     const oper = e.Eoper;
923 
924     if (tyfloating(tyml))
925     {
926         if (tyvector(tyml) ||
927             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
928             !(*pretregs & mST0) &&
929             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
930             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
931             !(ty == TYllong)  //   or passing to function through integer register
932            )
933         {
934             orthxmm(cdb,e,pretregs);
935             return;
936         }
937         static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
938             orth87(cdb,e,pretregs);
939         else
940             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
941 
942         return;
943     }
944 
945     if (tyxmmreg(tyml))
946     {
947         orthxmm(cdb,e,pretregs);
948         return;
949     }
950 
951     uint lib;
952     regm_t keepregs = 0;
953     regm_t resreg;
954     ubyte op;
955     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if uint operation, 0 if not
956 
957     switch (oper)
958     {
959         case OPmul:
960             resreg = mAX;
961             op = 5 - uns;
962             lib = CLIB.lmul;
963             break;
964 
965         case OPdiv:
966             resreg = mAX;
967             op = 7 - uns;
968             lib = uns ? CLIB.uldiv : CLIB.ldiv;
969             if (I32)
970                 keepregs |= mSI | mDI;
971             break;
972 
973         case OPmod:
974             resreg = mDX;
975             op = 7 - uns;
976             lib = uns ? CLIB.ulmod : CLIB.lmod;
977             if (I32)
978                 keepregs |= mSI | mDI;
979             break;
980 
981         case OPremquo:
982             resreg = mDX | mAX;
983             op = 7 - uns;
984             lib = uns ? CLIB.uldiv : CLIB.ldiv;
985             if (I32)
986                 keepregs |= mSI | mDI;
987             break;
988 
989         default:
990             assert(0);
991     }
992 
993     regm_t retregs;
994     regm_t rretregs;
995     const isbyte = tybyte(e.Ety) != 0;
996     const sz = _tysize[tyml];
997     if (sz <= REGSIZE)                  // dedicated regs for mul & div
998     {   retregs = mAX;
999         // pick some other regs
1000         rretregs = isbyte ? BYTEREGS & ~mAX
1001                         : ALLREGS & ~(mAX|mDX);
1002     }
1003     else
1004     {
1005         assert(sz <= 2 * REGSIZE);
1006         retregs = mDX | mAX;
1007         rretregs = mCX | mBX;           // second arg
1008     }
1009 
1010     reg_t rreg;
1011     int pow2;
1012 
1013     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
1014     const uint grex = rex << 16;
1015     const OPER opunslng = I16 ? OPu16_32 : OPu32_64;
1016 
1017     code cs = void;
1018     cs.Iflags = 0;
1019     cs.Irex = 0;
1020 
1021     switch (e2.Eoper)
1022     {
1023         case OPu16_32:
1024         case OPs16_32:
1025         case OPu32_64:
1026         case OPs32_64:
1027         {
1028             if (sz != 2 * REGSIZE || oper != OPmul || e1.Eoper != e2.Eoper ||
1029                 e1.Ecount || e2.Ecount)
1030                 goto L2;
1031             const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5;
1032             regm_t retregsx = mAX;
1033             codelem(cdb,e1.EV.E1,&retregsx,false);    // eval left leaf
1034             if (e2.EV.E1.Eoper == OPvar ||
1035                 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount)
1036                )
1037             {
1038                 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX);
1039             }
1040             else
1041             {
1042                 regm_t rretregsx = ALLREGS & ~mAX;
1043                 scodelem(cdb,e2.EV.E1,&rretregsx,retregs,true); // get rvalue
1044                 getregs(cdb,mAX | mDX);
1045                 const rregx = findreg(rretregsx);
1046                 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx
1047             }
1048             freenode(e.EV.E1);
1049             freenode(e2);
1050             fixresult(cdb,e,mAX | mDX,pretregs);
1051             return;
1052         }
1053 
1054         case OPconst:
1055             auto d = cast(targ_size_t)el_tolong(e2);
1056             bool neg = false;
1057             const e2factor = d;
1058             if (!uns && cast(targ_llong)e2factor < 0)
1059             {   neg = true;
1060                 d = -d;
1061             }
1062 
1063             // Multiply by a constant
1064             if (oper == OPmul && I32 && sz == REGSIZE * 2)
1065             {
1066                 /*  IMUL    EDX,EDX,lsw
1067                     IMUL    reg,EAX,msw
1068                     ADD     reg,EDX
1069                     MOV     EDX,lsw
1070                     MUL     EDX
1071                     ADD     EDX,reg
1072 
1073                     if (msw == 0)
1074                     IMUL    reg,EDX,lsw
1075                     MOV     EDX,lsw
1076                     MUL     EDX
1077                     ADD     EDX,reg
1078                  */
1079                 codelem(cdb,e1,&retregs,false);    // eval left leaf
1080                 regm_t scratch = allregs & ~(mAX | mDX);
1081                 reg_t reg;
1082                 allocreg(cdb,&scratch,&reg,TYint);
1083                 getregs(cdb,mDX | mAX);
1084 
1085                 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1086                 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1087 
1088                 if (msw)
1089                 {
1090                     genmulimm(cdb,DX,DX,lsw);
1091                     genmulimm(cdb,reg,AX,msw);
1092                     cdb.gen2(0x03,modregrm(3,reg,DX));
1093                 }
1094                 else
1095                     genmulimm(cdb,reg,DX,lsw);
1096 
1097                 movregconst(cdb,DX,lsw,0);     // MOV EDX,lsw
1098                 getregs(cdb,mDX);
1099                 cdb.gen2(0xF7,modregrm(3,4,DX));            // MUL EDX
1100                 cdb.gen2(0x03,modregrm(3,DX,reg));          // ADD EDX,reg
1101 
1102                 const resregx = mDX | mAX;
1103                 freenode(e2);
1104                 fixresult(cdb,e,resregx,pretregs);
1105                 return;
1106             }
1107 
1108             // Signed divide by a constant
1109             if (oper != OPmul &&
1110                 (d & (d - 1)) &&
1111                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1112                 config.flags4 & CFG4speed && !uns)
1113             {
1114                 /* R1 / 10
1115                  *
1116                  *  MOV     EAX,m
1117                  *  IMUL    R1
1118                  *  MOV     EAX,R1
1119                  *  SAR     EAX,31
1120                  *  SAR     EDX,shpost
1121                  *  SUB     EDX,EAX
1122                  *  IMUL    EAX,EDX,d
1123                  *  SUB     R1,EAX
1124                  *
1125                  * EDX = quotient
1126                  * R1 = remainder
1127                  */
1128                 assert(sz == 4 || sz == 8);
1129 
1130                 ulong m;
1131                 int shpost;
1132                 const int N = sz * 8;
1133                 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1134 
1135                 regm_t regm = allregs & ~(mAX | mDX);
1136                 codelem(cdb,e1,&regm,false);       // eval left leaf
1137                 const reg_t reg = findreg(regm);
1138                 getregs(cdb,regm | mDX | mAX);
1139 
1140                 /* Algorithm 5.2
1141                  * if m>=2**(N-1)
1142                  *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1143                  * else
1144                  *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1145                  * if (neg)
1146                  *    q = -q
1147                  */
1148                 const bool mgt = mhighbit || m >= (1UL << (N - 1));
1149                 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1150                 cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL R1
1151                 if (mgt)
1152                     cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,R1
1153                 getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1154                 genmovreg(cdb, AX, reg);                   // MOV EAX,R1
1155                 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1156                 if (shpost)
1157                     cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1158                 reg_t r3;
1159                 if (neg && oper == OPdiv)
1160                 {
1161                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1162                     r3 = AX;
1163                 }
1164                 else
1165                 {
1166                     cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1167                     r3 = DX;
1168                 }
1169 
1170                 // r3 is quotient
1171                 regm_t resregx;
1172                 switch (oper)
1173                 {   case OPdiv:
1174                         resregx = mask(r3);
1175                         break;
1176 
1177                     case OPmod:
1178                         assert(reg != AX && r3 == DX);
1179                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1180                         {
1181                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1182                         }
1183                         else
1184                         {
1185                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1186                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1187                             getregsNoSave(mAX);                             // EAX no longer contains 'd'
1188                         }
1189                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1190                         resregx = regm;
1191                         break;
1192 
1193                     case OPremquo:
1194                         assert(reg != AX && r3 == DX);
1195                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1196                         {
1197                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);     // IMUL EAX,EDX,d
1198                         }
1199                         else
1200                         {
1201                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1202                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1203                         }
1204                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1205                         genmovreg(cdb, AX, r3);                // MOV EAX,r3
1206                         if (neg)
1207                             cdb.gen2(0xF7,grex | modregrm(3,3,AX));         // NEG EAX
1208                         genmovreg(cdb, DX, reg);               // MOV EDX,R1
1209                         resregx = mDX | mAX;
1210                         break;
1211 
1212                     default:
1213                         assert(0);
1214                 }
1215                 freenode(e2);
1216                 fixresult(cdb,e,resregx,pretregs);
1217                 return;
1218             }
1219 
1220             // Unsigned divide by a constant
1221             if (oper != OPmul &&
1222                 e2factor > 2 && (e2factor & (e2factor - 1)) &&
1223                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1224                 config.flags4 & CFG4speed && uns)
1225             {
1226                 assert(sz == 4 || sz == 8);
1227 
1228                 reg_t r3;
1229                 regm_t regm;
1230                 reg_t reg;
1231                 ulong m;
1232                 int shpre;
1233                 int shpost;
1234                 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1235                 {
1236                     /* t1 = MULUH(m, n)
1237                      * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1238                      *   MOV   EAX,reg
1239                      *   MOV   EDX,m
1240                      *   MUL   EDX
1241                      *   MOV   EAX,reg
1242                      *   SUB   EAX,EDX
1243                      *   SHR   EAX,1
1244                      *   LEA   R3,[EAX][EDX]
1245                      *   SHR   R3,shpost-1
1246                      */
1247                     assert(shpre == 0);
1248 
1249                     regm = allregs & ~(mAX | mDX);
1250                     codelem(cdb,e1,&regm,false);       // eval left leaf
1251                     reg = findreg(regm);
1252                     getregs(cdb,mAX | mDX);
1253                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1254                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1255                     getregs(cdb,regm | mDX | mAX);
1256                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1257                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1258                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1259                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1260                     regm_t regm3 = allregs;
1261                     if (oper == OPmod || oper == OPremquo)
1262                     {
1263                         regm3 &= ~regm;
1264                         if (oper == OPremquo || !el_signx32(e2))
1265                             regm3 &= ~mAX;
1266                     }
1267                     allocreg(cdb,&regm3,&r3,TYint);
1268                     cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1269                     if (shpost != 1)
1270                         cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1271                 }
1272                 else
1273                 {
1274                     /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1275                      *   SHR   EAX,shpre
1276                      *   MOV   reg,m
1277                      *   MUL   reg
1278                      *   SHR   EDX,shpost
1279                      */
1280                     regm = mAX;
1281                     if (oper == OPmod || oper == OPremquo)
1282                         regm = allregs & ~(mAX|mDX);
1283                     codelem(cdb,e1,&regm,false);       // eval left leaf
1284                     reg = findreg(regm);
1285 
1286                     if (reg != AX)
1287                     {
1288                         getregs(cdb,mAX);
1289                         genmovreg(cdb,AX,reg);                 // MOV EAX,reg
1290                     }
1291                     if (shpre)
1292                     {
1293                         getregs(cdb,mAX);
1294                         cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1295                     }
1296                     getregs(cdb,mDX);
1297                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1298                     getregs(cdb,mDX | mAX);
1299                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1300                     if (shpost)
1301                         cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1302                     r3 = DX;
1303                 }
1304 
1305                 switch (oper)
1306                 {   case OPdiv:
1307                         // r3 = quotient
1308                         resreg = mask(r3);
1309                         break;
1310 
1311                     case OPmod:
1312                         /* reg = original value
1313                          * r3  = quotient
1314                          */
1315                         assert(!(regm & mAX));
1316                         if (el_signx32(e2))
1317                         {
1318                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1319                         }
1320                         else
1321                         {
1322                             assert(!(mask(r3) & mAX));
1323                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1324                             getregs(cdb,mAX);
1325                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1326                         }
1327                         getregs(cdb,regm);
1328                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1329                         resreg = regm;
1330                         break;
1331 
1332                     case OPremquo:
1333                         /* reg = original value
1334                          * r3  = quotient
1335                          */
1336                         assert(!(mask(r3) & (mAX|regm)));
1337                         assert(!(regm & mAX));
1338                         if (el_signx32(e2))
1339                         {
1340                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1341                         }
1342                         else
1343                         {
1344                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor
1345                             getregs(cdb,mAX);
1346                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1347                         }
1348                         getregs(cdb,regm);
1349                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1350                         genmovreg(cdb, AX, r3);              // MOV EAX,r3
1351                         genmovreg(cdb, DX, reg);             // MOV EDX,reg
1352                         resreg = mDX | mAX;
1353                         break;
1354 
1355                     default:
1356                         assert(0);
1357                 }
1358                 freenode(e2);
1359                 fixresult(cdb,e,resreg,pretregs);
1360                 return;
1361             }
1362 
1363             if (sz > REGSIZE || !el_signx32(e2))
1364                 goto L2;
1365 
1366             if (oper == OPmul && config.target_cpu >= TARGET_80286)
1367             {   reg_t reg;
1368                 int ss;
1369 
1370                 freenode(e2);
1371                 retregs = isbyte ? BYTEREGS : ALLREGS;
1372                 resreg = *pretregs & (ALLREGS | mBP);
1373                 if (!resreg)
1374                     resreg = retregs;
1375 
1376                 if (!I16)
1377                 {   // See if we can use an LEA instruction
1378                     int ss2 = 0;
1379                     int shift;
1380 
1381                     switch (e2factor)
1382                     {
1383                         case 12:    ss = 1; ss2 = 2; goto L4;
1384                         case 24:    ss = 1; ss2 = 3; goto L4;
1385 
1386                         case 6:
1387                         case 3:     ss = 1; goto L4;
1388 
1389                         case 20:    ss = 2; ss2 = 2; goto L4;
1390                         case 40:    ss = 2; ss2 = 3; goto L4;
1391 
1392                         case 10:
1393                         case 5:     ss = 2; goto L4;
1394 
1395                         case 36:    ss = 3; ss2 = 2; goto L4;
1396                         case 72:    ss = 3; ss2 = 3; goto L4;
1397 
1398                         case 18:
1399                         case 9:     ss = 3; goto L4;
1400 
1401                         L4:
1402                         {
1403                         static if (1)
1404                         {
1405                             regm_t regm = isbyte ? BYTEREGS : ALLREGS;
1406                             regm &= ~(mBP | mR13);                  // don't use EBP
1407                             codelem(cdb,e.EV.E1,&regm,true);
1408                             uint r = findreg(regm);
1409 
1410                             if (ss2)
1411                             {   // Don't use EBP
1412                                 resreg &= ~(mBP | mR13);
1413                                 if (!resreg)
1414                                     resreg = retregs;
1415                             }
1416                             allocreg(cdb,&resreg,&reg,tyml);
1417 
1418                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1419                                         modregxrmx(ss,r,r));
1420                             assert((r & 7) != BP);
1421                             if (ss2)
1422                             {
1423                                 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1424                                                modregxrm(ss2,reg,5));
1425                                 cdb.last().IFL1 = FLconst;
1426                                 cdb.last().IEV1.Vint = 0;
1427                             }
1428                             else if (!(e2factor & 1))    // if even factor
1429                             {
1430                                 genregs(cdb,0x03,reg,reg); // ADD reg,reg
1431                                 code_orrex(cdb.last(),rex);
1432                             }
1433                             fixresult(cdb,e,resreg,pretregs);
1434                             return;
1435                         }
1436                         else
1437                         {
1438                             // Don't use EBP
1439                             resreg &= ~mBP;
1440                             if (!resreg)
1441                                 resreg = retregs;
1442 
1443                             codelem(cdb,e.EV.E1,&resreg,false);
1444                             reg = findreg(resreg);
1445                             getregs(cdb,resreg);
1446                             cdb.gen2sib(LEA,modregrm(0,reg,4),
1447                                         modregrm(ss,reg,reg));
1448                             if (ss2)
1449                             {
1450                                 cdb.gen2sib(LEA,modregrm(0,reg,4),
1451                                             modregrm(ss2,reg,5));
1452                                 cdb.last().IFL1 = FLconst;
1453                                 cdb.last().IEV1.Vint = 0;
1454                             }
1455                             else if (!(e2factor & 1))    // if even factor
1456                                 genregs(cdb,0x03,reg,reg); // ADD reg,reg
1457                             fixresult(cdb,e,resreg,pretregs);
1458                             return;
1459                         }
1460                         }
1461                         case 37:
1462                         case 74:    shift = 2;
1463                                     goto L5;
1464                         case 13:
1465                         case 26:    shift = 0;
1466                                     goto L5;
1467                         L5:
1468                         {
1469                             // Don't use EBP
1470                             resreg &= ~(mBP | mR13);
1471                             if (!resreg)
1472                                 resreg = retregs;
1473                             allocreg(cdb,&resreg,&reg,TYint);
1474 
1475                             regm_t sregm = (ALLREGS & ~mR13) & ~resreg;
1476                             codelem(cdb,e.EV.E1,&sregm,false);
1477                             uint sreg = findreg(sregm);
1478                             getregs(cdb,resreg | sregm);
1479                             // LEA reg,[sreg * 4][sreg]
1480                             // SHL sreg,shift
1481                             // LEA reg,[sreg * 8][reg]
1482                             assert((sreg & 7) != BP);
1483                             assert((reg & 7) != BP);
1484                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1485                                                   modregxrmx(2,sreg,sreg));
1486                             if (shift)
1487                                 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift);
1488                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1489                                                   modregxrmx(3,sreg,reg));
1490                             if (!(e2factor & 1))         // if even factor
1491                             {
1492                                 genregs(cdb,0x03,reg,reg); // ADD reg,reg
1493                                 code_orrex(cdb.last(),rex);
1494                             }
1495                             fixresult(cdb,e,resreg,pretregs);
1496                             return;
1497                         }
1498 
1499                         default:
1500                             break;
1501                     }
1502                 }
1503 
1504                 scodelem(cdb,e.EV.E1,&retregs,0,true);     // eval left leaf
1505                 const regx = findreg(retregs);
1506                 allocreg(cdb,&resreg,&rreg,e.Ety);
1507 
1508                 // IMUL regx,imm16
1509                 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor);
1510                 fixresult(cdb,e,resreg,pretregs);
1511                 return;
1512             }
1513 
1514             // Special code for signed divide or modulo by power of 2
1515             if ((sz == REGSIZE || (I64 && sz == 4)) &&
1516                 (oper == OPdiv || oper == OPmod) && !uns &&
1517                 (pow2 = ispow2(e2factor)) != -1 &&
1518                 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv)
1519                )
1520             {
1521                 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386)
1522                 {
1523                     //     test    eax,eax
1524                     //     jns     L1
1525                     //     add     eax,1
1526                     // L1: sar     eax,1
1527 
1528                     retregs = allregs;
1529                     codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1530                     const reg = findreg(retregs);
1531                     freenode(e2);
1532                     getregs(cdb,retregs);
1533                     gentstreg(cdb,reg);            // TEST reg,reg
1534                     code_orrex(cdb.last(), rex);
1535                     code *cnop = gennop(null);
1536                     genjmp(cdb,JNS,FLcode,cast(block *)cnop);   // JNS cnop
1537                     if (I64)
1538                     {
1539                         cdb.gen2(0xFF,modregrmx(3,0,reg));      // INC reg
1540                         code_orrex(cdb.last(),rex);
1541                     }
1542                     else
1543                         cdb.gen1(0x40 + reg);                   // INC reg
1544                     cdb.append(cnop);
1545                     cdb.gen2(0xD1,grex | modregrmx(3,7,reg));   // SAR reg,1
1546                     resreg = retregs;
1547                     fixresult(cdb,e,resreg,pretregs);
1548                     return;
1549                 }
1550                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1551                 freenode(e2);
1552                 getregs(cdb,mAX | mDX);             // modify these regs
1553                 cdb.gen1(0x99);                             // CWD
1554                 code_orrex(cdb.last(), rex);
1555                 if (pow2 == 1)
1556                 {
1557                     if (oper == OPdiv)
1558                     {
1559                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));  // SUB AX,DX
1560                         cdb.gen2(0xD1,grex | modregrm(3,7,AX));   // SAR AX,1
1561                     }
1562                     else // OPmod
1563                     {
1564                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));   // XOR AX,DX
1565                         cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1
1566                         cdb.gen2(0x03,grex | modregrm(3,DX,AX));   // ADD DX,AX
1567                     }
1568                 }
1569                 else
1570                 {   targ_ulong m;
1571 
1572                     m = (1 << pow2) - 1;
1573                     if (oper == OPdiv)
1574                     {
1575                         cdb.genc2(0x81,grex | modregrm(3,4,DX),m);  // AND DX,m
1576                         cdb.gen2(0x03,grex | modregrm(3,AX,DX));    // ADD AX,DX
1577                         // Be careful not to generate this for 8088
1578                         assert(config.target_cpu >= TARGET_80286);
1579                         cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2
1580                     }
1581                     else // OPmod
1582                     {
1583                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1584                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1585                         cdb.genc2(0x81,grex | modregrm(3,4,AX),m);  // AND AX,mask
1586                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1587                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1588                         resreg = mAX;
1589                     }
1590                 }
1591                 fixresult(cdb,e,resreg,pretregs);
1592                 return;
1593             }
1594             goto L2;
1595 
1596         case OPind:
1597             if (!e2.Ecount)                        // if not CSE
1598                     goto L1;                        // try OP reg,EA
1599             goto L2;
1600 
1601         default:                                    // OPconst and operators
1602         L2:
1603             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1604             codelem(cdb,e1,&retregs,false);           // eval left leaf
1605             scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1606             if (sz <= REGSIZE)
1607             {
1608                 getregs(cdb,mAX | mDX);     // trash these regs
1609                 if (op == 7)                        // signed divide
1610                 {
1611                     cdb.gen1(0x99);                 // CWD
1612                     code_orrex(cdb.last(),rex);
1613                 }
1614                 else if (op == 6)                   // uint divide
1615                 {
1616                     movregconst(cdb,DX,0,(sz == 8) ? 64 : 0);  // MOV DX,0
1617                     getregs(cdb,mDX);
1618                 }
1619                 rreg = findreg(rretregs);
1620                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,op,rreg)); // OP AX,rreg
1621                 if (I64 && isbyte && rreg >= 4)
1622                     code_orrex(cdb.last(), REX);
1623                 fixresult(cdb,e,resreg,pretregs);
1624             }
1625             else if (sz == 2 * REGSIZE)
1626             {
1627                 if (config.target_cpu >= TARGET_PentiumPro && oper == OPmul)
1628                 {
1629                     /*  IMUL    ECX,EAX
1630                         IMUL    EDX,EBX
1631                         ADD     ECX,EDX
1632                         MUL     EBX
1633                         ADD     EDX,ECX
1634                      */
1635                      getregs(cdb,mAX|mDX|mCX);
1636                      cdb.gen2(0x0FAF,modregrm(3,CX,AX));
1637                      cdb.gen2(0x0FAF,modregrm(3,DX,BX));
1638                      cdb.gen2(0x03,modregrm(3,CX,DX));
1639                      cdb.gen2(0xF7,modregrm(3,4,BX));
1640                      cdb.gen2(0x03,modregrm(3,DX,CX));
1641                      fixresult(cdb,e,mDX|mAX,pretregs);
1642                 }
1643                 else
1644                     callclib(cdb,e,lib,pretregs,keepregs);
1645             }
1646             else
1647                     assert(0);
1648             return;
1649 
1650         case OPvar:
1651         L1:
1652             if (!I16 && sz <= REGSIZE)
1653             {
1654                 if (oper == OPmul && sz > 1)        // no byte version
1655                 {
1656                     // Generate IMUL r32,r/m32
1657                     retregs = *pretregs & (ALLREGS | mBP);
1658                     if (!retregs)
1659                         retregs = ALLREGS;
1660                     codelem(cdb,e1,&retregs,false);        // eval left leaf
1661                     resreg = retregs;
1662                     loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs);
1663                     freenode(e2);
1664                     fixresult(cdb,e,resreg,pretregs);
1665                     return;
1666                 }
1667             }
1668             else
1669             {
1670                 if (sz == 2 * REGSIZE)
1671                 {
1672                     if (oper != OPmul || e.EV.E1.Eoper != opunslng ||
1673                         e1.Ecount)
1674                         goto L2;            // have to handle it with codelem()
1675 
1676                     retregs = ALLREGS & ~(mAX | mDX);
1677                     codelem(cdb,e1.EV.E1,&retregs,false);    // eval left leaf
1678                     const reg = findreg(retregs);
1679                     getregs(cdb,mAX);
1680                     genmovreg(cdb,AX,reg);            // MOV AX,reg
1681                     loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX);  // MUL EA+2
1682                     getregs(cdb,retregs);
1683                     cdb.gen1(0x90 + reg);                          // XCHG AX,reg
1684                     getregs(cdb,mAX | mDX);
1685                     if ((cs.Irm & 0xC0) == 0xC0)            // if EA is a register
1686                         loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA
1687                     else
1688                     {   getlvalue_lsw(&cs);
1689                         cdb.gen(&cs);                       // MUL EA
1690                     }
1691                     cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD DX,reg
1692 
1693                     freenode(e1);
1694                     fixresult(cdb,e,mAX | mDX,pretregs);
1695                     return;
1696                 }
1697                 assert(sz <= REGSIZE);
1698             }
1699 
1700             // loadea() handles CWD or CLR DX for divides
1701             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1702             loadea(cdb,e2,&cs,0xF7 ^ isbyte,op,0,
1703                    (oper == OPmul) ? mAX : mAX | mDX,
1704                    mAX | mDX);
1705             freenode(e2);
1706             fixresult(cdb,e,resreg,pretregs);
1707             return;
1708     }
1709     assert(0);
1710 }
1711 
1712 
1713 /***************************
1714  * Handle OPnot and OPbool.
1715  * Generate:
1716  *      c:      [evaluate e1]
1717  *      cfalse: [save reg code]
1718  *              clr     reg
1719  *              jmp     cnop
1720  *      ctrue:  [save reg code]
1721  *              clr     reg
1722  *              inc     reg
1723  *      cnop:   nop
1724  */
1725 
1726 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1727 {
1728     //printf("cdnot()\n");
1729     reg_t reg;
1730     tym_t forflags;
1731     regm_t retregs;
1732     elem *e1 = e.EV.E1;
1733 
1734     if (*pretregs == 0)
1735         goto L1;
1736     if (*pretregs == mPSW)
1737     {   //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized
1738     L1:
1739         codelem(cdb,e1,pretregs,false);      // evaluate e1 for cc
1740         return;
1741     }
1742 
1743     OPER op = e.Eoper;
1744     uint sz = tysize(e1.Ety);
1745     uint rex = (I64 && sz == 8) ? REX_W : 0;
1746     uint grex = rex << 16;
1747 
1748     if (!tyfloating(e1.Ety))
1749     {
1750     if (sz <= REGSIZE && e1.Eoper == OPvar)
1751     {   code cs;
1752 
1753         getlvalue(cdb,&cs,e1,0);
1754         freenode(e1);
1755         if (!I16 && sz == 2)
1756             cs.Iflags |= CFopsize;
1757 
1758         retregs = *pretregs & (ALLREGS | mBP);
1759         if (config.target_cpu >= TARGET_80486 &&
1760             tysize(e.Ety) == 1)
1761         {
1762             if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,&reg))
1763             {
1764                 cs.Iop = 0x39;
1765                 if (I64 && (sz == 1) && reg >= 4)
1766                     cs.Irex |= REX;
1767             }
1768             else
1769             {   cs.Iop = 0x81;
1770                 reg = 7;
1771                 cs.IFL2 = FLconst;
1772                 cs.IEV2.Vint = 0;
1773             }
1774             cs.Iop ^= (sz == 1);
1775             code_newreg(&cs,reg);
1776             cdb.gen(&cs);                             // CMP e1,0
1777 
1778             retregs &= BYTEREGS;
1779             if (!retregs)
1780                 retregs = BYTEREGS;
1781             allocreg(cdb,&retregs,&reg,TYint);
1782 
1783             const opcode_t iop = (op == OPbool)
1784                 ? 0x0F95    // SETNZ rm8
1785                 : 0x0F94;   // SETZ rm8
1786             cdb.gen2(iop, modregrmx(3,0,reg));
1787             if (reg >= 4)
1788                 code_orrex(cdb.last(), REX);
1789             if (op == OPbool)
1790                 *pretregs &= ~mPSW;
1791             goto L4;
1792         }
1793 
1794         if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,&reg))
1795             cs.Iop = 0x39;
1796         else
1797         {   cs.Iop = 0x81;
1798             reg = 7;
1799             cs.IFL2 = FLconst;
1800             cs.IEV2.Vint = 1;
1801         }
1802         if (I64 && (sz == 1) && reg >= 4)
1803             cs.Irex |= REX;
1804         cs.Iop ^= (sz == 1);
1805         code_newreg(&cs,reg);
1806         cdb.gen(&cs);                         // CMP e1,1
1807 
1808         allocreg(cdb,&retregs,&reg,TYint);
1809         op ^= (OPbool ^ OPnot);                 // switch operators
1810         goto L2;
1811     }
1812     else if (config.target_cpu >= TARGET_80486 &&
1813         tysize(e.Ety) == 1)
1814     {
1815         int jop = jmpopcode(e.EV.E1);
1816         retregs = mPSW;
1817         codelem(cdb,e.EV.E1,&retregs,false);
1818         retregs = *pretregs & BYTEREGS;
1819         if (!retregs)
1820             retregs = BYTEREGS;
1821         allocreg(cdb,&retregs,&reg,TYint);
1822 
1823         int iop = 0x0F90 | (jop & 0x0F);        // SETcc rm8
1824         if (op == OPnot)
1825             iop ^= 1;
1826         cdb.gen2(iop,grex | modregrmx(3,0,reg));
1827         if (reg >= 4)
1828             code_orrex(cdb.last(), REX);
1829         if (op == OPbool)
1830             *pretregs &= ~mPSW;
1831         goto L4;
1832     }
1833     else if (sz <= REGSIZE &&
1834         // NEG bytereg is too expensive
1835         (sz != 1 || config.target_cpu < TARGET_PentiumPro))
1836     {
1837         retregs = *pretregs & (ALLREGS | mBP);
1838         if (sz == 1 && !(retregs &= BYTEREGS))
1839             retregs = BYTEREGS;
1840         codelem(cdb,e.EV.E1,&retregs,false);
1841         reg = findreg(retregs);
1842         getregs(cdb,retregs);
1843         cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg));   // NEG reg
1844         code_orflag(cdb.last(),CFpsw);
1845         if (!I16 && sz == SHORTSIZE)
1846             code_orflag(cdb.last(),CFopsize);
1847     L2:
1848         genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
1849         code_orrex(cdb.last(), rex);
1850         // At this point, reg==0 if e1==0, reg==-1 if e1!=0
1851         if (op == OPnot)
1852         {
1853             if (I64)
1854                 cdb.gen2(0xFF,grex | modregrmx(3,0,reg));    // INC reg
1855             else
1856                 cdb.gen1(0x40 + reg);                        // INC reg
1857         }
1858         else
1859             cdb.gen2(0xF7,grex | modregrmx(3,3,reg));    // NEG reg
1860         if (*pretregs & mPSW)
1861         {   code_orflag(cdb.last(),CFpsw);
1862             *pretregs &= ~mPSW;         // flags are always set anyway
1863         }
1864     L4:
1865         fixresult(cdb,e,retregs,pretregs);
1866         return;
1867     }
1868     }
1869     code *cnop = gennop(null);
1870     code *ctrue = gennop(null);
1871     logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue);
1872     forflags = *pretregs & mPSW;
1873     if (I64 && sz == 8)
1874         forflags |= 64;
1875     assert(tysize(e.Ety) <= REGSIZE);              // result better be int
1876     CodeBuilder cdbfalse;
1877     cdbfalse.ctor();
1878     allocreg(cdbfalse,pretregs,&reg,e.Ety);        // allocate reg for result
1879     code *cfalse = cdbfalse.finish();
1880     CodeBuilder cdbtrue;
1881     cdbtrue.ctor();
1882     cdbtrue.append(ctrue);
1883     for (code *c1 = cfalse; c1; c1 = code_next(c1))
1884         cdbtrue.gen(c1);                                      // duplicate reg save code
1885     CodeBuilder cdbfalse2;
1886     cdbfalse2.ctor();
1887     movregconst(cdbfalse2,reg,0,forflags);                    // mov 0 into reg
1888     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
1889     movregconst(cdbtrue,reg,1,forflags);                      // mov 1 into reg
1890     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
1891     genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop);          // skip over ctrue
1892     cdb.append(cfalse);
1893     cdb.append(cdbfalse2);
1894     cdb.append(cdbtrue);
1895     cdb.append(cnop);
1896 }
1897 
1898 
1899 /************************
1900  * Complement operator
1901  */
1902 
1903 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1904 {
1905     if (*pretregs == 0)
1906     {
1907         codelem(cdb,e.EV.E1,pretregs,false);
1908         return;
1909     }
1910     tym_t tym = tybasic(e.Ety);
1911     int sz = _tysize[tym];
1912     uint rex = (I64 && sz == 8) ? REX_W : 0;
1913     regm_t possregs = (sz == 1) ? BYTEREGS : allregs;
1914     regm_t retregs = *pretregs & possregs;
1915     if (retregs == 0)
1916         retregs = possregs;
1917     codelem(cdb,e.EV.E1,&retregs,false);
1918     getregs(cdb,retregs);                // retregs will be destroyed
1919 
1920     if (0 && sz == 4 * REGSIZE)
1921     {
1922         cdb.gen2(0xF7,modregrm(3,2,AX));   // NOT AX
1923         cdb.gen2(0xF7,modregrm(3,2,BX));   // NOT BX
1924         cdb.gen2(0xF7,modregrm(3,2,CX));   // NOT CX
1925         cdb.gen2(0xF7,modregrm(3,2,DX));   // NOT DX
1926     }
1927     else
1928     {
1929         const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs);
1930         const op = (sz == 1) ? 0xF6 : 0xF7;
1931         genregs(cdb,op,2,reg);     // NOT reg
1932         code_orrex(cdb.last(), rex);
1933         if (I64 && sz == 1 && reg >= 4)
1934             code_orrex(cdb.last(), REX);
1935         if (sz == 2 * REGSIZE)
1936         {
1937             const reg2 = findreglsw(retregs);
1938             genregs(cdb,op,2,reg2);  // NOT reg+1
1939         }
1940     }
1941     fixresult(cdb,e,retregs,pretregs);
1942 }
1943 
1944 /************************
1945  * Bswap operator
1946  */
1947 
1948 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1949 {
1950     if (*pretregs == 0)
1951     {
1952         codelem(cdb,e.EV.E1,pretregs,false);
1953         return;
1954     }
1955 
1956     tym_t tym = tybasic(e.Ety);
1957     assert(_tysize[tym] == 4);
1958     regm_t retregs = *pretregs & allregs;
1959     if (retregs == 0)
1960         retregs = allregs;
1961     codelem(cdb,e.EV.E1,&retregs,false);
1962     getregs(cdb,retregs);        // retregs will be destroyed
1963     const reg = findreg(retregs);
1964     cdb.gen2(0x0FC8 + (reg & 7),0);      // BSWAP reg
1965     if (reg & 8)
1966         code_orrex(cdb.last(), REX_B);
1967     fixresult(cdb,e,retregs,pretregs);
1968 }
1969 
1970 /*************************
1971  * ?: operator
1972  */
1973 
1974 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1975 {
1976     con_t regconold,regconsave;
1977     uint stackpushold,stackpushsave;
1978     int ehindexold,ehindexsave;
1979     uint sz2;
1980 
1981     /* vars to save state of 8087 */
1982     int stackusedold,stackusedsave;
1983     NDP[global87.stack.length] _8087old;
1984     NDP[global87.stack.length] _8087save;
1985 
1986     //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
1987     elem *e1 = e.EV.E1;
1988     elem *e2 = e.EV.E2;
1989     elem *e21 = e2.EV.E1;
1990     elem *e22 = e2.EV.E2;
1991     regm_t psw = *pretregs & mPSW;               /* save PSW bit                 */
1992     const op1 = e1.Eoper;
1993     uint sz1 = tysize(e1.Ety);
1994     uint rex = (I64 && sz1 == 8) ? REX_W : 0;
1995     uint grex = rex << 16;
1996     uint jop = jmpopcode(e1);
1997 
1998     uint jop1 = jmpopcode(e21);
1999     uint jop2 = jmpopcode(e22);
2000 
2001     docommas(cdb,&e1);
2002     cgstate.stackclean++;
2003 
2004     if (!OTrel(op1) && e1 == e21 &&
2005         sz1 <= REGSIZE && !tyfloating(e1.Ety))
2006     {   // Recognize (e ? e : f)
2007 
2008         code *cnop1 = gennop(null);
2009         regm_t retregs = *pretregs | mPSW;
2010         codelem(cdb,e1,&retregs,false);
2011 
2012         cse_flush(cdb,1);                // flush CSEs to memory
2013         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2014         freenode(e21);
2015 
2016         regconsave = regcon;
2017         stackpushsave = stackpush;
2018 
2019         retregs |= psw;
2020         if (retregs & (mBP | ALLREGS))
2021             regimmed_set(findreg(retregs),0);
2022         codelem(cdb,e22,&retregs,false);
2023 
2024         andregcon(&regconsave);
2025         assert(stackpushsave == stackpush);
2026 
2027         *pretregs = retregs;
2028         freenode(e2);
2029         cdb.append(cnop1);
2030         cgstate.stackclean--;
2031         return;
2032     }
2033 
2034     if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE &&
2035         !e1.Ecount &&
2036         (jop == JC || jop == JNC) &&
2037         (sz2 = tysize(e2.Ety)) <= REGSIZE &&
2038         e21.Eoper == OPconst &&
2039         e22.Eoper == OPconst
2040        )
2041     {
2042         regm_t retregs;
2043         targ_size_t v1,v2;
2044 
2045         if (sz2 != 1 || I64)
2046         {
2047             retregs = *pretregs & (ALLREGS | mBP);
2048             if (!retregs)
2049                 retregs = ALLREGS;
2050         }
2051         else
2052         {
2053             retregs = *pretregs & BYTEREGS;
2054             if (!retregs)
2055                 retregs = BYTEREGS;
2056         }
2057 
2058         cdcmp_flag = 1;
2059         v1 = cast(targ_size_t)e21.EV.Vllong;
2060         v2 = cast(targ_size_t)e22.EV.Vllong;
2061         if (jop == JNC)
2062         {   v1 = v2;
2063             v2 = cast(targ_size_t)e21.EV.Vllong;
2064         }
2065 
2066         opcode_t opcode = 0x81;
2067         switch (sz2)
2068         {   case 1:     opcode--;
2069                         v1 = cast(byte) v1;
2070                         v2 = cast(byte) v2;
2071                         break;
2072 
2073             case 2:     v1 = cast(short) v1;
2074                         v2 = cast(short) v2;
2075                         break;
2076 
2077             case 4:     v1 = cast(int) v1;
2078                         v2 = cast(int) v2;
2079                         break;
2080             default:
2081                         break;
2082         }
2083 
2084         if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1)
2085         {
2086             // only zero-extension from 32-bits is available for 'or'
2087         }
2088         else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2)
2089         {
2090             // only sign-extension from 32-bits is available for 'and'
2091         }
2092         else
2093         {
2094             codelem(cdb,e1,&retregs,false);
2095             const reg = findreg(retregs);
2096 
2097             if (v1 == 0 && v2 == ~cast(targ_size_t)0)
2098             {
2099                 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg));  // NOT reg
2100                 if (I64 && sz2 == REGSIZE)
2101                     code_orrex(cdb.last(), REX_W);
2102             }
2103             else
2104             {
2105                 v1 -= v2;
2106                 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1);   // AND reg,v1-v2
2107                 if (I64 && sz2 == 1 && reg >= 4)
2108                     code_orrex(cdb.last(), REX);
2109                 if (v2 == 1 && !I64)
2110                     cdb.gen1(0x40 + reg);                     // INC reg
2111                 else if (v2 == -1L && !I64)
2112                     cdb.gen1(0x48 + reg);                     // DEC reg
2113                 else
2114                 {   cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2);   // ADD reg,v2
2115                     if (I64 && sz2 == 1 && reg >= 4)
2116                         code_orrex(cdb.last(), REX);
2117                 }
2118             }
2119 
2120             freenode(e21);
2121             freenode(e22);
2122             freenode(e2);
2123 
2124             fixresult(cdb,e,retregs,pretregs);
2125             cgstate.stackclean--;
2126             return;
2127         }
2128     }
2129 
2130     if (op1 != OPcond && op1 != OPandand && op1 != OPoror &&
2131         op1 != OPnot && op1 != OPbool &&
2132         e21.Eoper == OPconst &&
2133         sz1 <= REGSIZE &&
2134         *pretregs & (mBP | ALLREGS) &&
2135         tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety))
2136     {   // Recognize (e ? c : f)
2137 
2138         code *cnop1 = gennop(null);
2139         regm_t retregs = mPSW;
2140         jop = jmpopcode(e1);            // get jmp condition
2141         codelem(cdb,e1,&retregs,false);
2142 
2143         // Set the register with e21 without affecting the flags
2144         retregs = *pretregs & (ALLREGS | mBP);
2145         if (retregs & ~regcon.mvar)
2146             retregs &= ~regcon.mvar;    // don't disturb register variables
2147         // NOTE: see my email (sign extension bug? possible fix, some questions
2148         reg_t reg;
2149         regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,&reg,tysize(e21.Ety) == 8 ? 64|8 : 8);
2150         retregs = mask(reg);
2151 
2152         cse_flush(cdb,1);                // flush CSE's to memory
2153         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2154         freenode(e21);
2155 
2156         regconsave = regcon;
2157         stackpushsave = stackpush;
2158 
2159         codelem(cdb,e22,&retregs,false);
2160 
2161         andregcon(&regconsave);
2162         assert(stackpushsave == stackpush);
2163 
2164         freenode(e2);
2165         cdb.append(cnop1);
2166         fixresult(cdb,e,retregs,pretregs);
2167         cgstate.stackclean--;
2168         return;
2169     }
2170 
2171     code *cnop1 = gennop(null);
2172     code *cnop2 = gennop(null);         // dummy target addresses
2173     logexp(cdb,e1,false,FLcode,cnop1);  // evaluate condition
2174     regconold = regcon;
2175     stackusedold = global87.stackused;
2176     stackpushold = stackpush;
2177     memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof);
2178     regm_t retregs = *pretregs;
2179     CodeBuilder cdb1;
2180     cdb1.ctor();
2181     if (psw && jop1 != JNE)
2182     {
2183         retregs &= ~mPSW;
2184         if (!retregs)
2185             retregs = ALLREGS;
2186         codelem(cdb1,e21,&retregs,false);
2187         fixresult(cdb1,e21,retregs,pretregs);
2188     }
2189     else
2190         codelem(cdb1,e21,&retregs,false);
2191 
2192     if (CPP && e2.Eoper == OPcolon2)
2193     {
2194         code cs;
2195 
2196         // This is necessary so that any cleanup code on one branch
2197         // is redone on the other branch.
2198         cs.Iop = ESCAPE | ESCmark2;
2199         cs.Iflags = 0;
2200         cs.Irex = 0;
2201         cdb.gen(&cs);
2202         cdb.append(cdb1);
2203         cs.Iop = ESCAPE | ESCrelease2;
2204         cdb.gen(&cs);
2205     }
2206     else
2207         cdb.append(cdb1);
2208 
2209     regconsave = regcon;
2210     regcon = regconold;
2211 
2212     stackpushsave = stackpush;
2213     stackpush = stackpushold;
2214 
2215     stackusedsave = global87.stackused;
2216     global87.stackused = stackusedold;
2217 
2218     memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof);
2219     memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof);
2220 
2221     retregs |= psw;                     // PSW bit may have been trashed
2222     CodeBuilder cdb2;
2223     cdb2.ctor();
2224     if (psw && jop2 != JNE)
2225     {
2226         retregs &= ~mPSW;
2227         if (!retregs)
2228             retregs = ALLREGS;
2229         codelem(cdb2,e22,&retregs,false);
2230         fixresult(cdb2,e22,retregs,pretregs);
2231     }
2232     else
2233         codelem(cdb2,e22,&retregs,false);   // use same regs as E1
2234     *pretregs = retregs | psw;
2235     andregcon(&regconold);
2236     andregcon(&regconsave);
2237     assert(global87.stackused == stackusedsave);
2238     assert(stackpush == stackpushsave);
2239     memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof);
2240     freenode(e2);
2241     genjmp(cdb,JMP,FLcode,cast(block *) cnop2);
2242     cdb.append(cnop1);
2243     cdb.append(cdb2);
2244     cdb.append(cnop2);
2245     if (*pretregs & mST0)
2246         note87(e,0,0);
2247 
2248     cgstate.stackclean--;
2249 }
2250 
2251 /*********************
2252  * Comma operator OPcomma
2253  */
2254 
2255 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2256 {
2257     regm_t retregs = 0;
2258     codelem(cdb,e.EV.E1,&retregs,false);   // ignore value from left leaf
2259     codelem(cdb,e.EV.E2,pretregs,false);   // do right leaf
2260 }
2261 
2262 
2263 /*********************************
2264  * Do && and || operators.
2265  * Generate:
2266  *              (evaluate e1 and e2, if true goto cnop1)
2267  *      cnop3:  NOP
2268  *      cg:     [save reg code]         ;if we must preserve reg
2269  *              CLR     reg             ;false result (set Z also)
2270  *              JMP     cnop2
2271  *
2272  *      cnop1:  NOP                     ;if e1 evaluates to true
2273  *              [save reg code]         ;preserve reg
2274  *
2275  *              MOV     reg,1           ;true result
2276  *                  or
2277  *              CLR     reg             ;if return result in flags
2278  *              INC     reg
2279  *
2280  *      cnop2:  NOP                     ;mark end of code
2281  */
2282 
2283 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2284 {
2285     /* We can trip the assert with the following:
2286      *    if ( (b<=a) ? (c<b || a<=c) : c>=a )
2287      * We'll generate ugly code for it, but it's too obscure a case
2288      * to expend much effort on it.
2289      * assert(*pretregs != mPSW);
2290      */
2291 
2292     cgstate.stackclean++;
2293     code *cnop1 = gennop(null);
2294     CodeBuilder cdb1;
2295     cdb1.ctor();
2296     cdb1.append(cnop1);
2297     code *cnop3 = gennop(null);
2298     elem *e2 = e.EV.E2;
2299     (e.Eoper == OPoror)
2300         ? logexp(cdb,e.EV.E1,1,FLcode,cnop1)
2301         : logexp(cdb,e.EV.E1,0,FLcode,cnop3);
2302     con_t regconsave = regcon;
2303     uint stackpushsave = stackpush;
2304     if (*pretregs == 0)                 // if don't want result
2305     {
2306         int noreturn = !el_returns(e2);
2307         codelem(cdb,e2,pretregs,false);
2308         if (noreturn)
2309         {
2310             regconsave.used |= regcon.used;
2311             regcon = regconsave;
2312         }
2313         else
2314             andregcon(&regconsave);
2315         assert(stackpush == stackpushsave);
2316         cdb.append(cnop3);
2317         cdb.append(cdb1);        // eval code, throw away result
2318         cgstate.stackclean--;
2319         return;
2320     }
2321     code *cnop2 = gennop(null);
2322     uint sz = tysize(e.Ety);
2323     if (tybasic(e2.Ety) == TYbool &&
2324       sz == tysize(e2.Ety) &&
2325       !(*pretregs & mPSW) &&
2326       e2.Eoper == OPcall)
2327     {
2328         codelem(cdb,e2,pretregs,false);
2329 
2330         andregcon(&regconsave);
2331 
2332         // stack depth should not change when evaluating E2
2333         assert(stackpush == stackpushsave);
2334 
2335         assert(sz <= 4);                                        // result better be int
2336         regm_t retregs = *pretregs & allregs;
2337         reg_t reg;
2338         allocreg(cdb1,&retregs,&reg,TYint);                     // allocate reg for result
2339         movregconst(cdb1,reg,e.Eoper == OPoror,0);             // reg = 1
2340         regcon.immed.mval &= ~mask(reg);                        // mark reg as unavail
2341         *pretregs = retregs;
2342         if (e.Eoper == OPoror)
2343         {
2344             cdb.append(cnop3);
2345             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2346             cdb.append(cdb1);
2347             cdb.append(cnop2);
2348         }
2349         else
2350         {
2351             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2352             cdb.append(cnop3);
2353             cdb.append(cdb1);
2354             cdb.append(cnop2);
2355         }
2356         cgstate.stackclean--;
2357         return;
2358     }
2359     logexp(cdb,e2,1,FLcode,cnop1);
2360     andregcon(&regconsave);
2361 
2362     // stack depth should not change when evaluating E2
2363     assert(stackpush == stackpushsave);
2364 
2365     assert(sz <= 4);                                         // result better be int
2366     regm_t retregs = *pretregs & (ALLREGS | mBP);
2367     if (!retregs)
2368         retregs = ALLREGS;                                   // if mPSW only
2369     CodeBuilder cdbcg;
2370     cdbcg.ctor();
2371     reg_t reg;
2372     allocreg(cdbcg,&retregs,&reg,TYint);                     // allocate reg for result
2373     code *cg = cdbcg.finish();
2374     for (code *c1 = cg; c1; c1 = code_next(c1))              // for each instruction
2375         cdb1.gen(c1);                                        // duplicate it
2376     CodeBuilder cdbcg2;
2377     cdbcg2.ctor();
2378     movregconst(cdbcg2,reg,0,*pretregs & mPSW);              // MOV reg,0
2379     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2380     genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2);              // JMP cnop2
2381     movregconst(cdb1,reg,1,*pretregs & mPSW);                // reg = 1
2382     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2383     *pretregs = retregs;
2384     cdb.append(cnop3);
2385     cdb.append(cg);
2386     cdb.append(cdbcg2);
2387     cdb.append(cdb1);
2388     cdb.append(cnop2);
2389     cgstate.stackclean--;
2390     return;
2391 }
2392 
2393 
2394 /*********************
2395  * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror).
2396  */
2397 
2398 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2399 {
2400     reg_t resreg;
2401     uint shiftcnt;
2402     regm_t retregs,rretregs;
2403 
2404     //printf("cdshift()\n");
2405     elem *e1 = e.EV.E1;
2406     if (*pretregs == 0)                   // if don't want result
2407     {
2408         codelem(cdb,e1,pretregs,false); // eval left leaf
2409         *pretregs = 0;                  // in case they got set
2410         codelem(cdb,e.EV.E2,pretregs,false);
2411         return;
2412     }
2413 
2414     tym_t tyml = tybasic(e1.Ety);
2415     int sz = _tysize[tyml];
2416     assert(!tyfloating(tyml));
2417     OPER oper = e.Eoper;
2418     uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16;
2419 
2420 version (SCPP)
2421 {
2422     // Do this until the rest of the compiler does OPshr/OPashr correctly
2423     if (oper == OPshr)
2424         oper = (tyuns(tyml)) ? OPshr : OPashr;
2425 }
2426 
2427     uint s1,s2;
2428     switch (oper)
2429     {
2430         case OPshl:
2431             s1 = 4;                     // SHL
2432             s2 = 2;                     // RCL
2433             break;
2434         case OPshr:
2435             s1 = 5;                     // SHR
2436             s2 = 3;                     // RCR
2437             break;
2438         case OPashr:
2439             s1 = 7;                     // SAR
2440             s2 = 3;                     // RCR
2441             break;
2442         case OProl:
2443             s1 = 0;                     // ROL
2444             break;
2445         case OPror:
2446             s1 = 1;                     // ROR
2447             break;
2448         default:
2449             assert(0);
2450     }
2451 
2452     reg_t sreg = NOREG;                   // guard against using value without assigning to sreg
2453     elem *e2 = e.EV.E2;
2454     regm_t forccs = *pretregs & mPSW;            // if return result in CCs
2455     regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs
2456     bool e2isconst = false;                    // assume for the moment
2457     uint isbyte = (sz == 1);
2458     switch (e2.Eoper)
2459     {
2460         case OPconst:
2461             e2isconst = true;               // e2 is a constant
2462             shiftcnt = e2.EV.Vint;         // get shift count
2463             if ((!I16 && sz <= REGSIZE) ||
2464                 shiftcnt <= 4 ||            // if sequence of shifts
2465                 (sz == 2 &&
2466                     (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) ||
2467                 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2468                )
2469             {
2470                 retregs = (forregs) ? forregs
2471                                     : ALLREGS;
2472                 if (isbyte)
2473                 {   retregs &= BYTEREGS;
2474                     if (!retregs)
2475                         retregs = BYTEREGS;
2476                 }
2477                 else if (sz > REGSIZE && sz <= 2 * REGSIZE &&
2478                          !(retregs & mMSW))
2479                     retregs |= mMSW & ALLREGS;
2480                 if (s1 == 7)    // if arithmetic right shift
2481                 {
2482                     if (shiftcnt == 8)
2483                         retregs = mAX;
2484                     else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2485                         retregs = mDX|mAX;
2486                 }
2487 
2488                 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE &&
2489                     oper == OPshl &&
2490                     !e1.Ecount &&
2491                     (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 ||
2492                      e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64)
2493                    )
2494                 {   // Handle (shtlng)s << 16
2495                     regm_t r = retregs & mMSW;
2496                     codelem(cdb,e1.EV.E1,&r,false);      // eval left leaf
2497                     regwithvalue(cdb,retregs & mLSW,0,&resreg,0);
2498                     getregs(cdb,r);
2499                     retregs = r | mask(resreg);
2500                     if (forccs)
2501                     {   sreg = findreg(r);
2502                         gentstreg(cdb,sreg);
2503                         *pretregs &= ~mPSW;             // already set
2504                     }
2505                     freenode(e1);
2506                     freenode(e2);
2507                     break;
2508                 }
2509 
2510                 // See if we should use LEA reg,xxx instead of shift
2511                 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 &&
2512                     (sz == REGSIZE || (I64 && sz == 4)) &&
2513                     oper == OPshl &&
2514                     e1.Eoper == OPvar &&
2515                     !(*pretregs & mPSW) &&
2516                     config.flags4 & CFG4speed
2517                    )
2518                 {
2519                     reg_t reg;
2520                     regm_t regm;
2521 
2522                     if (isregvar(e1,&regm,&reg) && !(regm & retregs))
2523                     {   code cs;
2524                         allocreg(cdb,&retregs,&resreg,e.Ety);
2525                         buildEA(&cs,-1,reg,1 << shiftcnt,0);
2526                         cs.Iop = LEA;
2527                         code_newreg(&cs,resreg);
2528                         cs.Iflags = 0;
2529                         if (I64 && sz == 8)
2530                             cs.Irex |= REX_W;
2531                         cdb.gen(&cs);             // LEA resreg,[reg * ss]
2532                         freenode(e1);
2533                         freenode(e2);
2534                         break;
2535                     }
2536                 }
2537 
2538                 codelem(cdb,e1,&retregs,false); // eval left leaf
2539                 //assert((retregs & regcon.mvar) == 0);
2540                 getregs(cdb,retregs);          // modify these regs
2541 
2542                 {
2543                     if (sz == 2 * REGSIZE)
2544                     {   resreg = findregmsw(retregs);
2545                         sreg = findreglsw(retregs);
2546                     }
2547                     else
2548                     {   resreg = findreg(retregs);
2549                         sreg = NOREG;              // an invalid value
2550                     }
2551                     if (config.target_cpu >= TARGET_80286 &&
2552                         sz <= REGSIZE)
2553                     {
2554                         // SHL resreg,shiftcnt
2555                         assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS)));
2556                         cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt);
2557                         if (shiftcnt == 1)
2558                             cdb.last().Iop += 0x10;     // short form of shift
2559                         if (I64 && sz == 1 && resreg >= 4)
2560                             cdb.last().Irex |= REX;
2561                         // See if we need operand size prefix
2562                         if (!I16 && oper != OPshl && sz == 2)
2563                             cdb.last().Iflags |= CFopsize;
2564                         if (forccs)
2565                             cdb.last().Iflags |= CFpsw;         // need flags result
2566                     }
2567                     else if (shiftcnt == 8)
2568                     {   if (!(retregs & BYTEREGS) || resreg >= 4)
2569                         {
2570                             goto L1;
2571                         }
2572 
2573                         if (pass != PASSfinal && (!forregs || forregs & (mSI | mDI)))
2574                         {
2575                             // e1 might get into SI or DI in a later pass,
2576                             // so don't put CX into a register
2577                             getregs(cdb,mCX);
2578                         }
2579 
2580                         assert(sz == 2);
2581                         switch (oper)
2582                         {
2583                             case OPshl:
2584                                 // MOV regH,regL        XOR regL,regL
2585                                 assert(resreg < 4 && !grex);
2586                                 genregs(cdb,0x8A,resreg+4,resreg);
2587                                 genregs(cdb,0x32,resreg,resreg);
2588                                 break;
2589 
2590                             case OPshr:
2591                             case OPashr:
2592                                 // MOV regL,regH
2593                                 genregs(cdb,0x8A,resreg,resreg+4);
2594                                 if (oper == OPashr)
2595                                     cdb.gen1(0x98);           // CBW
2596                                 else
2597                                     genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH
2598                                 break;
2599 
2600                             case OPror:
2601                             case OProl:
2602                                 // XCHG regL,regH
2603                                 genregs(cdb,0x86,resreg+4,resreg);
2604                                 break;
2605 
2606                             default:
2607                                 assert(0);
2608                         }
2609                         if (forccs)
2610                             gentstreg(cdb,resreg);
2611                     }
2612                     else if (shiftcnt == REGSIZE * 8)   // it's an lword
2613                     {
2614                         if (oper == OPshl)
2615                             swap(&resreg, &sreg);
2616                         genmovreg(cdb,sreg,resreg);  // MOV sreg,resreg
2617                         if (oper == OPashr)
2618                             cdb.gen1(0x99);                       // CWD
2619                         else
2620                             movregconst(cdb,resreg,0,0);  // MOV resreg,0
2621                         if (forccs)
2622                         {
2623                             gentstreg(cdb,sreg);
2624                             *pretregs &= mBP | ALLREGS | mES;
2625                         }
2626                     }
2627                     else
2628                     {
2629                         if (oper == OPshl && sz == 2 * REGSIZE)
2630                             swap(&resreg, &sreg);
2631                         while (shiftcnt--)
2632                         {
2633                             cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg));
2634                             if (sz == 2 * REGSIZE)
2635                             {
2636                                 code_orflag(cdb.last(),CFpsw);
2637                                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
2638                             }
2639                         }
2640                         if (forccs)
2641                             code_orflag(cdb.last(),CFpsw);
2642                     }
2643                     if (sz <= REGSIZE)
2644                         *pretregs &= mBP | ALLREGS;     // flags already set
2645                 }
2646                 freenode(e2);
2647                 break;
2648             }
2649             goto default;
2650 
2651         default:
2652             retregs = forregs & ~mCX;               // CX will be shift count
2653             if (sz <= REGSIZE)
2654             {
2655                 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar))
2656                     retregs = ALLREGS & ~mCX;       // need something
2657                 else if (!retregs)
2658                     retregs = ALLREGS & ~mCX;       // need something
2659                 if (sz == 1)
2660                 {   retregs &= mAX|mBX|mDX;
2661                     if (!retregs)
2662                         retregs = mAX|mBX|mDX;
2663                 }
2664             }
2665             else
2666             {
2667                 if (!(retregs & mMSW))
2668                     retregs = ALLREGS & ~mCX;
2669             }
2670             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
2671 
2672             if (sz <= REGSIZE)
2673                 resreg = findreg(retregs);
2674             else
2675             {
2676                 resreg = findregmsw(retregs);
2677                 sreg = findreglsw(retregs);
2678             }
2679         L1:
2680             rretregs = mCX;                 // CX is shift count
2681             if (sz <= REGSIZE)
2682             {
2683                 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue
2684                 getregs(cdb,retregs);      // trash these regs
2685                 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX
2686 
2687                 if (!I16 && sz == 2 && (oper == OProl || oper == OPror))
2688                     cdb.last().Iflags |= CFopsize;
2689 
2690                 // Note that a shift by CL does not set the flags if
2691                 // CL == 0. If e2 is a constant, we know it isn't 0
2692                 // (it would have been optimized out).
2693                 if (e2isconst)
2694                     *pretregs &= mBP | ALLREGS; // flags already set with result
2695             }
2696             else if (sz == 2 * REGSIZE &&
2697                      config.target_cpu >= TARGET_80386)
2698             {
2699                 reg_t hreg = resreg;
2700                 reg_t lreg = sreg;
2701                 uint rex = I64 ? (REX_W << 16) : 0;
2702                 if (e2isconst)
2703                 {
2704                     getregs(cdb,retregs);
2705                     if (shiftcnt & (REGSIZE * 8))
2706                     {
2707                         if (oper == OPshr)
2708                         {   //      SHR hreg,shiftcnt
2709                             //      MOV lreg,hreg
2710                             //      XOR hreg,hreg
2711                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8));
2712                             genmovreg(cdb,lreg,hreg);
2713                             movregconst(cdb,hreg,0,0);
2714                         }
2715                         else if (oper == OPashr)
2716                         {   //      MOV     lreg,hreg
2717                             //      SAR     hreg,31
2718                             //      SHRD    lreg,hreg,shiftcnt
2719                             genmovreg(cdb,lreg,hreg);
2720                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1);
2721                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8));
2722                         }
2723                         else
2724                         {   //      SHL lreg,shiftcnt
2725                             //      MOV hreg,lreg
2726                             //      XOR lreg,lreg
2727                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8));
2728                             genmovreg(cdb,hreg,lreg);
2729                             movregconst(cdb,lreg,0,0);
2730                         }
2731                     }
2732                     else
2733                     {
2734                         if (oper == OPshr || oper == OPashr)
2735                         {   //      SHRD    lreg,hreg,shiftcnt
2736                             //      SHR/SAR hreg,shiftcnt
2737                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt);
2738                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt);
2739                         }
2740                         else
2741                         {   //      SHLD hreg,lreg,shiftcnt
2742                             //      SHL  lreg,shiftcnt
2743                             cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt);
2744                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt);
2745                         }
2746                     }
2747                     freenode(e2);
2748                 }
2749                 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2)
2750                 {
2751                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
2752                     getregs(cdb,retregs);          // modify these regs
2753                     if (oper == OPshl)
2754                     {
2755                         /*
2756                             SHLD    hreg,lreg,CL
2757                             SHL     lreg,CL
2758                          */
2759 
2760                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
2761                         cdb.gen2(0xD3,modregrm(3,4,lreg));
2762                     }
2763                     else
2764                     {
2765                         /*
2766                             SHRD    lreg,hreg,CL
2767                             SAR             hreg,CL
2768 
2769                             -- or --
2770 
2771                             SHRD    lreg,hreg,CL
2772                             SHR             hreg,CL
2773                          */
2774                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
2775                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
2776                     }
2777                 }
2778                 else
2779                 {   code* cl1,cl2;
2780 
2781                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
2782                     getregs(cdb,retregs | mCX);     // modify these regs
2783                                                             // TEST CL,0x20
2784                     cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8);
2785                     cl1 = gennop(null);
2786                     CodeBuilder cdb1;
2787                     cdb1.ctor();
2788                     cdb1.append(cl1);
2789                     if (oper == OPshl)
2790                     {
2791                         /*  TEST    CL,20H
2792                             JNE     L1
2793                             SHLD    hreg,lreg,CL
2794                             SHL     lreg,CL
2795                             JMP     L2
2796                         L1: AND     CL,20H-1
2797                             SHL     lreg,CL
2798                             MOV     hreg,lreg
2799                             XOR     lreg,lreg
2800                         L2: NOP
2801                          */
2802 
2803                         if (REGSIZE == 2)
2804                             cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
2805                         cdb1.gen2(0xD3,modregrm(3,4,lreg));
2806                         genmovreg(cdb1,hreg,lreg);
2807                         genregs(cdb1,0x31,lreg,lreg);
2808 
2809                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
2810                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
2811                         cdb.gen2(0xD3,modregrm(3,4,lreg));
2812                     }
2813                     else
2814                     {   if (oper == OPashr)
2815                         {
2816                             /*  TEST        CL,20H
2817                                 JNE         L1
2818                                 SHRD        lreg,hreg,CL
2819                                 SAR         hreg,CL
2820                                 JMP         L2
2821                             L1: AND         CL,15
2822                                 MOV         lreg,hreg
2823                                 SAR         hreg,31
2824                                 SHRD        lreg,hreg,CL
2825                             L2: NOP
2826                              */
2827 
2828                             if (REGSIZE == 2)
2829                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
2830                             genmovreg(cdb1,lreg,hreg);
2831                             cdb1.genc2(0xC1,modregrm(3,s1,hreg),31);
2832                             cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg));
2833                         }
2834                         else
2835                         {
2836                             /*  TEST        CL,20H
2837                                 JNE         L1
2838                                 SHRD        lreg,hreg,CL
2839                                 SHR         hreg,CL
2840                                 JMP         L2
2841                             L1: AND         CL,15
2842                                 SHR         hreg,CL
2843                                 MOV         lreg,hreg
2844                                 XOR         hreg,hreg
2845                             L2: NOP
2846                              */
2847 
2848                             if (REGSIZE == 2)
2849                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
2850                             cdb1.gen2(0xD3,modregrm(3,5,hreg));
2851                             genmovreg(cdb1,lreg,hreg);
2852                             genregs(cdb1,0x31,hreg,hreg);
2853                         }
2854                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
2855                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
2856                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
2857                     }
2858                     cl2 = gennop(null);
2859                     genjmp(cdb,JMPS,FLcode,cast(block *)cl2);
2860                     cdb.append(cdb1);
2861                     cdb.append(cl2);
2862                 }
2863                 break;
2864             }
2865             else if (sz == 2 * REGSIZE)
2866             {
2867                 scodelem(cdb,e2,&rretregs,retregs,false);
2868                 getregs(cdb,retregs | mCX);
2869                 if (oper == OPshl)
2870                     swap(&resreg, &sreg);
2871                 if (!e2isconst)                   // if not sure shift count != 0
2872                     cdb.genc2(0xE3,0,6);          // JCXZ .+6
2873                 cdb.gen2(0xD1,modregrm(3,s1,resreg));
2874                 code_orflag(cdb.last(),CFtarg2);
2875                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
2876                 cdb.genc2(0xE2,0,cast(targ_uns)-6);          // LOOP .-6
2877                 regimmed_set(CX,0);         // note that now CX == 0
2878             }
2879             else
2880                 assert(0);
2881             break;
2882     }
2883     fixresult(cdb,e,retregs,pretregs);
2884 }
2885 
2886 
2887 /***************************
2888  * Perform a 'star' reference (indirection).
2889  */
2890 
2891 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2892 {
2893     regm_t retregs;
2894     reg_t reg;
2895     uint nreg;
2896 
2897     //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2898     tym_t tym = tybasic(e.Ety);
2899     if (tyfloating(tym))
2900     {
2901         if (config.inline8087)
2902         {
2903             if (*pretregs & mST0)
2904             {
2905                 cdind87(cdb, e, pretregs);
2906                 return;
2907             }
2908             if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP))
2909             { }
2910             else if (tycomplex(tym))
2911             {
2912                 cload87(cdb, e, pretregs);
2913                 return;
2914             }
2915 
2916             if (*pretregs & mPSW)
2917             {
2918                 cdind87(cdb, e, pretregs);
2919                 return;
2920             }
2921         }
2922     }
2923 
2924     elem *e1 = e.EV.E1;
2925     assert(e1);
2926     switch (tym)
2927     {
2928         case TYstruct:
2929         case TYarray:
2930             // This case should never happen, why is it here?
2931             tym = TYnptr;               // don't confuse allocreg()
2932             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
2933                     tym = TYfptr;
2934             break;
2935 
2936         default:
2937             break;
2938     }
2939     uint sz = _tysize[tym];
2940     uint isbyte = tybyte(tym) != 0;
2941 
2942     code cs;
2943 
2944      getlvalue(cdb,&cs,e,RMload);          // get addressing mode
2945     //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib);
2946     //fprintf(stderr,"cd2 :\n"); WRcodlst(c);
2947     if (*pretregs == 0)
2948     {
2949         if (e.Ety & mTYvolatile)               // do the load anyway
2950             *pretregs = regmask(e.Ety, 0);     // load into registers
2951         else
2952             return;
2953     }
2954 
2955     regm_t idxregs = idxregm(&cs);               // mask of index regs used
2956 
2957     if (*pretregs == mPSW)
2958     {
2959         if (!I16 && tym == TYfloat)
2960         {
2961             retregs = ALLREGS & ~idxregs;
2962             allocreg(cdb,&retregs,&reg,TYfloat);
2963             cs.Iop = 0x8B;
2964             code_newreg(&cs,reg);
2965             cdb.gen(&cs);                       // MOV reg,lsw
2966             cdb.gen2(0xD1,modregrmx(3,4,reg));  // SHL reg,1
2967             code_orflag(cdb.last(), CFpsw);
2968         }
2969         else if (sz <= REGSIZE)
2970         {
2971             cs.Iop = 0x81 ^ isbyte;
2972             cs.Irm |= modregrm(0,7,0);
2973             cs.IFL2 = FLconst;
2974             cs.IEV2.Vsize_t = 0;
2975             cdb.gen(&cs);             // CMP [idx],0
2976         }
2977         else if (!I16 && sz == REGSIZE + 2)      // if far pointer
2978         {
2979             retregs = ALLREGS & ~idxregs;
2980             allocreg(cdb,&retregs,&reg,TYint);
2981             cs.Iop = 0x0FB7;
2982             cs.Irm |= modregrm(0,reg,0);
2983             getlvalue_msw(&cs);
2984             cdb.gen(&cs);             // MOVZX reg,msw
2985             goto L4;
2986         }
2987         else if (sz <= 2 * REGSIZE)
2988         {
2989             retregs = ALLREGS & ~idxregs;
2990             allocreg(cdb,&retregs,&reg,TYint);
2991             cs.Iop = 0x8B;
2992             code_newreg(&cs,reg);
2993             getlvalue_msw(&cs);
2994             cdb.gen(&cs);             // MOV reg,msw
2995             if (I32)
2996             {   if (tym == TYdouble || tym == TYdouble_alias)
2997                     cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1
2998             }
2999             else if (tym == TYfloat)
3000                 cdb.gen2(0xD1,modregrm(3,4,reg));    // SHL reg,1
3001         L4:
3002             cs.Iop = 0x0B;
3003             getlvalue_lsw(&cs);
3004             cs.Iflags |= CFpsw;
3005             cdb.gen(&cs);                    // OR reg,lsw
3006         }
3007         else if (!I32 && sz == 8)
3008         {
3009             *pretregs |= DOUBLEREGS_16;     // fake it for now
3010             goto L1;
3011         }
3012         else
3013         {
3014             debug WRTYxx(tym);
3015             assert(0);
3016         }
3017     }
3018     else                                // else return result in reg
3019     {
3020     L1:
3021         retregs = *pretregs;
3022         if (sz == 8 &&
3023             (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK)
3024         {   int i;
3025 
3026             // Optimizer should not CSE these, as the result is worse code!
3027             assert(!e.Ecount);
3028 
3029             cs.Iop = 0xFF;
3030             cs.Irm |= modregrm(0,6,0);
3031             cs.IEV1.Voffset += 8 - REGSIZE;
3032             stackchanged = 1;
3033             i = 8 - REGSIZE;
3034             do
3035             {
3036                 cdb.gen(&cs);                         // PUSH EA+i
3037                 cdb.genadjesp(REGSIZE);
3038                 cs.IEV1.Voffset -= REGSIZE;
3039                 stackpush += REGSIZE;
3040                 i -= REGSIZE;
3041             }
3042             while (i >= 0);
3043             goto L3;
3044         }
3045         if (I16 && sz == 8)
3046             retregs = DOUBLEREGS_16;
3047 
3048         // Watch out for loading an lptr from an lptr! We must have
3049         // the offset loaded into a different register.
3050         /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes)
3051                 retregs = ALLREGS;*/
3052 
3053         {
3054             assert(!isbyte || retregs & BYTEREGS);
3055             allocreg(cdb,&retregs,&reg,tym); // alloc registers
3056         }
3057         if (retregs & XMMREGS)
3058         {
3059             assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector
3060             cs.Iop = xmmload(tym);
3061             cs.Irex &= ~REX_W;
3062             code_newreg(&cs,reg - XMM0);
3063             checkSetVex(&cs,tym);
3064             cdb.gen(&cs);     // MOV reg,[idx]
3065         }
3066         else if (sz <= REGSIZE)
3067         {
3068             cs.Iop = 0x8B;                                  // MOV
3069             if (sz <= 2 && !I16 &&
3070                 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed)
3071             {
3072                 cs.Iop = tyuns(tym) ? 0x0FB7 : 0x0FBF;      // MOVZX/MOVSX
3073                 cs.Iflags &= ~CFopsize;
3074             }
3075             cs.Iop ^= isbyte;
3076         L2:
3077             code_newreg(&cs,reg);
3078             cdb.gen(&cs);     // MOV reg,[idx]
3079             if (isbyte && reg >= 4)
3080                 code_orrex(cdb.last(), REX);
3081         }
3082         else if ((tym == TYfptr || tym == TYhptr) && retregs & mES)
3083         {
3084             cs.Iop = 0xC4;          // LES reg,[idx]
3085             goto L2;
3086         }
3087         else if (sz <= 2 * REGSIZE)
3088         {   uint lsreg;
3089 
3090             cs.Iop = 0x8B;
3091             // Be careful not to interfere with index registers
3092             if (!I16)
3093             {
3094                 // Can't handle if both result registers are used in
3095                 // the addressing mode.
3096                 if ((retregs & idxregs) == retregs)
3097                 {
3098                     retregs = mMSW & allregs & ~idxregs;
3099                     if (!retregs)
3100                         retregs |= mCX;
3101                     retregs |= mLSW & ~idxregs;
3102 
3103                     // We can run out of registers, so if that's possible,
3104                     // give us *one* of the idxregs
3105                     if ((retregs & ~regcon.mvar & mLSW) == 0)
3106                     {
3107                         regm_t x = idxregs & mLSW;
3108                         if (x)
3109                             retregs |= mask(findreg(x));        // give us one idxreg
3110                     }
3111                     else if ((retregs & ~regcon.mvar & mMSW) == 0)
3112                     {
3113                         regm_t x = idxregs & mMSW;
3114                         if (x)
3115                             retregs |= mask(findreg(x));        // give us one idxreg
3116                     }
3117 
3118                     allocreg(cdb,&retregs,&reg,tym);     // alloc registers
3119                     assert((retregs & idxregs) != retregs);
3120                 }
3121 
3122                 lsreg = findreglsw(retregs);
3123                 if (mask(reg) & idxregs)                // reg is in addr mode
3124                 {
3125                     code_newreg(&cs,lsreg);
3126                     cdb.gen(&cs);                 // MOV lsreg,lsw
3127                     if (sz == REGSIZE + 2)
3128                         cs.Iflags |= CFopsize;
3129                     lsreg = reg;
3130                     getlvalue_msw(&cs);                 // MOV reg,msw
3131                 }
3132                 else
3133                 {
3134                     code_newreg(&cs,reg);
3135                     getlvalue_msw(&cs);
3136                     cdb.gen(&cs);                 // MOV reg,msw
3137                     if (sz == REGSIZE + 2)
3138                         cdb.last().Iflags |= CFopsize;
3139                     getlvalue_lsw(&cs);                 // MOV lsreg,lsw
3140                 }
3141                 NEWREG(cs.Irm,lsreg);
3142                 cdb.gen(&cs);
3143             }
3144             else
3145             {
3146                 // Index registers are always the lsw!
3147                 cs.Irm |= modregrm(0,reg,0);
3148                 getlvalue_msw(&cs);
3149                 cdb.gen(&cs);     // MOV reg,msw
3150                 lsreg = findreglsw(retregs);
3151                 NEWREG(cs.Irm,lsreg);
3152                 getlvalue_lsw(&cs);     // MOV lsreg,lsw
3153                 cdb.gen(&cs);
3154             }
3155         }
3156         else if (I16 && sz == 8)
3157         {
3158             assert(reg == AX);
3159             cs.Iop = 0x8B;
3160             cs.IEV1.Voffset += 6;
3161             cdb.gen(&cs);             // MOV AX,EA+6
3162             cs.Irm |= modregrm(0,CX,0);
3163             cs.IEV1.Voffset -= 4;
3164             cdb.gen(&cs);                    // MOV CX,EA+2
3165             NEWREG(cs.Irm,DX);
3166             cs.IEV1.Voffset -= 2;
3167             cdb.gen(&cs);                    // MOV DX,EA
3168             cs.IEV1.Voffset += 4;
3169             NEWREG(cs.Irm,BX);
3170             cdb.gen(&cs);                    // MOV BX,EA+4
3171         }
3172         else
3173             assert(0);
3174     L3:
3175         fixresult(cdb,e,retregs,pretregs);
3176     }
3177     //fprintf(stderr,"cdafter :\n"); WRcodlst(c);
3178 }
3179 
3180 
3181 
3182 static if (!TARGET_SEGMENTED)
3183 {
3184 private code *cod2_setES(tym_t ty) { return null; }
3185 }
3186 else
3187 {
3188 /********************************
3189  * Generate code to load ES with the right segment value,
3190  * do nothing if e is a far pointer.
3191  */
3192 
3193 private code *cod2_setES(tym_t ty)
3194 {
3195     int push;
3196 
3197     CodeBuilder cdb;
3198     cdb.ctor();
3199     switch (tybasic(ty))
3200     {
3201         case TYnptr:
3202             if (!(config.flags3 & CFG3eseqds))
3203             {   push = 0x1E;            // PUSH DS
3204                 goto L1;
3205             }
3206             break;
3207         case TYcptr:
3208             push = 0x0E;                // PUSH CS
3209             goto L1;
3210         case TYsptr:
3211             if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds))
3212             {   push = 0x16;            // PUSH SS
3213             L1:
3214                 // Must load ES
3215                 getregs(cdb,mES);
3216                 cdb.gen1(push);
3217                 cdb.gen1(0x07);         // POP ES
3218             }
3219             break;
3220 
3221         default:
3222             break;
3223     }
3224     return cdb.finish();
3225 }
3226 }
3227 
3228 /********************************
3229  * Generate code for intrinsic strlen().
3230  */
3231 
3232 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3233 {
3234     /* Generate strlen in CX:
3235         LES     DI,e1
3236         CLR     AX                      ;scan for 0
3237         MOV     CX,-1                   ;largest possible string
3238         REPNE   SCASB
3239         NOT     CX
3240         DEC     CX
3241      */
3242 
3243     regm_t retregs = mDI;
3244     tym_t ty1 = e.EV.E1.Ety;
3245     if (!tyreg(ty1))
3246         retregs |= mES;
3247     codelem(cdb,e.EV.E1,&retregs,false);
3248 
3249     // Make sure ES contains proper segment value
3250     cdb.append(cod2_setES(ty1));
3251 
3252     ubyte rex = I64 ? REX_W : 0;
3253 
3254     getregs_imm(cdb,mAX | mCX);
3255     movregconst(cdb,AX,0,1);               // MOV AL,0
3256     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);  // MOV CX,-1
3257     getregs(cdb,mDI|mCX);
3258     cdb.gen1(0xF2);                                     // REPNE
3259     cdb.gen1(0xAE);                                     // SCASB
3260     genregs(cdb,0xF7,2,CX);                // NOT CX
3261     code_orrex(cdb.last(), rex);
3262     if (I64)
3263         cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX));  // DEC reg
3264     else
3265         cdb.gen1(0x48 + CX);                            // DEC CX
3266 
3267     if (*pretregs & mPSW)
3268     {
3269         cdb.last().Iflags |= CFpsw;
3270         *pretregs &= ~mPSW;
3271     }
3272     fixresult(cdb,e,mCX,pretregs);
3273 }
3274 
3275 
3276 /*********************************
3277  * Generate code for strcmp(s1,s2) intrinsic.
3278  */
3279 
3280 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3281 {
3282     char need_DS;
3283     int segreg;
3284 
3285     /*
3286         MOV     SI,s1                   ;get destination pointer (s1)
3287         MOV     CX,s1+2
3288         LES     DI,s2                   ;get source pointer (s2)
3289         PUSH    DS
3290         MOV     DS,CX
3291         CLR     AX                      ;scan for 0
3292         MOV     CX,-1                   ;largest possible string
3293         REPNE   SCASB
3294         NOT     CX                      ;CX = string length of s2
3295         SUB     DI,CX                   ;point DI back to beginning
3296         REPE    CMPSB                   ;compare string
3297         POP     DS
3298         JE      L1                      ;strings are equal
3299         SBB     AX,AX
3300         SBB     AX,-1
3301     L1:
3302     */
3303 
3304     regm_t retregs1 = mSI;
3305     tym_t ty1 = e.EV.E1.Ety;
3306     if (!tyreg(ty1))
3307         retregs1 |= mCX;
3308     codelem(cdb,e.EV.E1,&retregs1,false);
3309 
3310     regm_t retregs = mDI;
3311     tym_t ty2 = e.EV.E2.Ety;
3312     if (!tyreg(ty2))
3313         retregs |= mES;
3314     scodelem(cdb,e.EV.E2,&retregs,retregs1,false);
3315 
3316     // Make sure ES contains proper segment value
3317     cdb.append(cod2_setES(ty2));
3318     getregs_imm(cdb,mAX | mCX);
3319 
3320     ubyte rex = I64 ? REX_W : 0;
3321 
3322     // Load DS with right value
3323     switch (tybasic(ty1))
3324     {
3325         case TYnptr:
3326         case TYimmutPtr:
3327             need_DS = false;
3328             break;
3329 
3330         case TYsptr:
3331             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3332                 segreg = SEG_SS;
3333             else
3334                 segreg = SEG_DS;
3335             goto L1;
3336         case TYcptr:
3337             segreg = SEG_CS;
3338         L1:
3339             cdb.gen1(0x1E);                         // PUSH DS
3340             cdb.gen1(0x06 + (segreg << 3));         // PUSH segreg
3341             cdb.gen1(0x1F);                         // POP  DS
3342             need_DS = true;
3343             break;
3344         case TYfptr:
3345         case TYvptr:
3346         case TYhptr:
3347             cdb.gen1(0x1E);                         // PUSH DS
3348             cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));   // MOV DS,CX
3349             need_DS = true;
3350             break;
3351         default:
3352             assert(0);
3353     }
3354 
3355     movregconst(cdb,AX,0,0);                // MOV AX,0
3356     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);   // MOV CX,-1
3357     getregs(cdb,mSI|mDI|mCX);
3358     cdb.gen1(0xF2);                              // REPNE
3359     cdb.gen1(0xAE);                              // SCASB
3360     genregs(cdb,0xF7,2,CX);         // NOT CX
3361     code_orrex(cdb.last(),rex);
3362     genregs(cdb,0x2B,DI,CX);        // SUB DI,CX
3363     code_orrex(cdb.last(),rex);
3364     cdb.gen1(0xF3);                              // REPE
3365     cdb.gen1(0xA6);                              // CMPSB
3366     if (need_DS)
3367         cdb.gen1(0x1F);                          // POP DS
3368     code *c4 = gennop(null);
3369     if (*pretregs != mPSW)                       // if not flags only
3370     {
3371         genjmp(cdb,JE,FLcode,cast(block *) c4);      // JE L1
3372         getregs(cdb,mAX);
3373         genregs(cdb,0x1B,AX,AX);                 // SBB AX,AX
3374         code_orrex(cdb.last(),rex);
3375         cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1);   // SBB AX,-1
3376     }
3377 
3378     *pretregs &= ~mPSW;
3379     cdb.append(c4);
3380     fixresult(cdb,e,mAX,pretregs);
3381 }
3382 
3383 /*********************************
3384  * Generate code for memcmp(s1,s2,n) intrinsic.
3385  */
3386 
3387 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3388 {
3389     char need_DS;
3390     int segreg;
3391 
3392     /*
3393         MOV     SI,s1                   ;get destination pointer (s1)
3394         MOV     DX,s1+2
3395         LES     DI,s2                   ;get source pointer (s2)
3396         MOV     CX,n                    ;get number of bytes to compare
3397         PUSH    DS
3398         MOV     DS,DX
3399         XOR     AX,AX
3400         REPE    CMPSB                   ;compare string
3401         POP     DS
3402         JE      L1                      ;strings are equal
3403         SBB     AX,AX
3404         SBB     AX,-1
3405     L1:
3406     */
3407 
3408     elem *e1 = e.EV.E1;
3409     assert(e1.Eoper == OPparam);
3410 
3411     // Get s1 into DX:SI
3412     regm_t retregs1 = mSI;
3413     tym_t ty1 = e1.EV.E1.Ety;
3414     if (!tyreg(ty1))
3415         retregs1 |= mDX;
3416     codelem(cdb,e1.EV.E1,&retregs1,false);
3417 
3418     // Get s2 into ES:DI
3419     regm_t retregs = mDI;
3420     tym_t ty2 = e1.EV.E2.Ety;
3421     if (!tyreg(ty2))
3422         retregs |= mES;
3423     scodelem(cdb,e1.EV.E2,&retregs,retregs1,false);
3424     freenode(e1);
3425 
3426     // Get nbytes into CX
3427     regm_t retregs3 = mCX;
3428     scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false);
3429 
3430     // Make sure ES contains proper segment value
3431     cdb.append(cod2_setES(ty2));
3432 
3433     // Load DS with right value
3434     switch (tybasic(ty1))
3435     {
3436         case TYnptr:
3437         case TYimmutPtr:
3438             need_DS = false;
3439             break;
3440 
3441         case TYsptr:
3442             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3443                 segreg = SEG_SS;
3444             else
3445                 segreg = SEG_DS;
3446             goto L1;
3447         case TYcptr:
3448             segreg = SEG_CS;
3449         L1:
3450             cdb.gen1(0x1E);                     // PUSH DS
3451             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3452             cdb.gen1(0x1F);                     // POP  DS
3453             need_DS = true;
3454             break;
3455         case TYfptr:
3456         case TYvptr:
3457         case TYhptr:
3458             cdb.gen1(0x1E);                        // PUSH DS
3459             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
3460             need_DS = true;
3461             break;
3462         default:
3463             assert(0);
3464     }
3465 
3466     static if (1)
3467     {
3468         getregs(cdb,mAX);
3469         cdb.gen2(0x33,modregrm(3,AX,AX));           // XOR AX,AX
3470         code_orflag(cdb.last(), CFpsw);             // keep flags
3471     }
3472     else
3473     {
3474         if (*pretregs != mPSW)                      // if not flags only
3475             regwithvalue(cdb,mAX,0,null,0);         // put 0 in AX
3476     }
3477 
3478     getregs(cdb,mCX | mSI | mDI);
3479     cdb.gen1(0xF3);                             // REPE
3480     cdb.gen1(0xA6);                             // CMPSB
3481     if (need_DS)
3482         cdb.gen1(0x1F);                         // POP DS
3483     if (*pretregs != mPSW)                      // if not flags only
3484     {
3485         code *c4 = gennop(null);
3486         genjmp(cdb,JE,FLcode,cast(block *) c4);  // JE L1
3487         getregs(cdb,mAX);
3488         genregs(cdb,0x1B,AX,AX);             // SBB AX,AX
3489         cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1);    // SBB AX,-1
3490         cdb.append(c4);
3491     }
3492 
3493     *pretregs &= ~mPSW;
3494     fixresult(cdb,e,mAX,pretregs);
3495 }
3496 
3497 /*********************************
3498  * Generate code for strcpy(s1,s2) intrinsic.
3499  */
3500 
3501 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3502 {
3503     char need_DS;
3504     int segreg;
3505 
3506     /*
3507         LES     DI,s2                   ;ES:DI = s2
3508         CLR     AX                      ;scan for 0
3509         MOV     CX,-1                   ;largest possible string
3510         REPNE   SCASB                   ;find end of s2
3511         NOT     CX                      ;CX = strlen(s2) + 1 (for EOS)
3512         SUB     DI,CX
3513         MOV     SI,DI
3514         PUSH    DS
3515         PUSH    ES
3516         LES     DI,s1
3517         POP     DS
3518         MOV     AX,DI                   ;return value is s1
3519         REP     MOVSB
3520         POP     DS
3521     */
3522 
3523     stackchanged = 1;
3524     regm_t retregs = mDI;
3525     tym_t ty2 = tybasic(e.EV.E2.Ety);
3526     if (!tyreg(ty2))
3527         retregs |= mES;
3528     ubyte rex = I64 ? REX_W : 0;
3529     codelem(cdb,e.EV.E2,&retregs,false);
3530 
3531     // Make sure ES contains proper segment value
3532     cdb.append(cod2_setES(ty2));
3533     getregs_imm(cdb,mAX | mCX);
3534     movregconst(cdb,AX,0,1);       // MOV AL,0
3535     movregconst(cdb,CX,-1,I64?64:0);  // MOV CX,-1
3536     getregs(cdb,mAX|mCX|mSI|mDI);
3537     cdb.gen1(0xF2);                             // REPNE
3538     cdb.gen1(0xAE);                             // SCASB
3539     genregs(cdb,0xF7,2,CX);                     // NOT CX
3540     code_orrex(cdb.last(),rex);
3541     genregs(cdb,0x2B,DI,CX);                    // SUB DI,CX
3542     code_orrex(cdb.last(),rex);
3543     genmovreg(cdb,SI,DI);          // MOV SI,DI
3544 
3545     // Load DS with right value
3546     switch (ty2)
3547     {
3548         case TYnptr:
3549         case TYimmutPtr:
3550             need_DS = false;
3551             break;
3552 
3553         case TYsptr:
3554             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3555                 segreg = SEG_SS;
3556             else
3557                 segreg = SEG_DS;
3558             goto L1;
3559         case TYcptr:
3560             segreg = SEG_CS;
3561         L1:
3562             cdb.gen1(0x1E);                     // PUSH DS
3563             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3564             cdb.genadjesp(REGSIZE * 2);
3565             need_DS = true;
3566             break;
3567         case TYfptr:
3568         case TYvptr:
3569         case TYhptr:
3570             segreg = SEG_ES;
3571             goto L1;
3572 
3573         default:
3574             assert(0);
3575     }
3576 
3577     retregs = mDI;
3578     tym_t ty1 = tybasic(e.EV.E1.Ety);
3579     if (!tyreg(ty1))
3580         retregs |= mES;
3581     scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false);
3582     getregs(cdb,mAX|mCX|mSI|mDI);
3583 
3584     // Make sure ES contains proper segment value
3585     if (ty2 != TYnptr || ty1 != ty2)
3586         cdb.append(cod2_setES(ty1));
3587     else
3588     {}                              // ES is already same as DS
3589 
3590     if (need_DS)
3591         cdb.gen1(0x1F);                     // POP DS
3592     if (*pretregs)
3593         genmovreg(cdb,AX,DI);               // MOV AX,DI
3594     cdb.gen1(0xF3);                         // REP
3595     cdb.gen1(0xA4);                              // MOVSB
3596 
3597     if (need_DS)
3598     {   cdb.gen1(0x1F);                          // POP DS
3599         cdb.genadjesp(-(REGSIZE * 2));
3600     }
3601     fixresult(cdb,e,mAX | mES,pretregs);
3602 }
3603 
3604 /*********************************
3605  * Generate code for memcpy(s1,s2,n) intrinsic.
3606  *  OPmemcpy
3607  *   /   \
3608  * s1   OPparam
3609  *       /   \
3610  *      s2    n
3611  */
3612 
3613 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3614 {
3615     char need_DS;
3616     int segreg;
3617 
3618     /*
3619         MOV     SI,s2
3620         MOV     DX,s2+2
3621         MOV     CX,n
3622         LES     DI,s1
3623         PUSH    DS
3624         MOV     DS,DX
3625         MOV     AX,DI                   ;return value is s1
3626         REP     MOVSB
3627         POP     DS
3628     */
3629 
3630     elem *e2 = e.EV.E2;
3631     assert(e2.Eoper == OPparam);
3632 
3633     // Get s2 into DX:SI
3634     regm_t retregs2 = mSI;
3635     tym_t ty2 = e2.EV.E1.Ety;
3636     if (!tyreg(ty2))
3637         retregs2 |= mDX;
3638     codelem(cdb,e2.EV.E1,&retregs2,false);
3639 
3640     // Get nbytes into CX
3641     regm_t retregs3 = mCX;
3642     scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false);
3643     freenode(e2);
3644 
3645     // Get s1 into ES:DI
3646     regm_t retregs1 = mDI;
3647     tym_t ty1 = e.EV.E1.Ety;
3648     if (!tyreg(ty1))
3649         retregs1 |= mES;
3650     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
3651 
3652     ubyte rex = I64 ? REX_W : 0;
3653 
3654     // Make sure ES contains proper segment value
3655     cdb.append(cod2_setES(ty1));
3656 
3657     // Load DS with right value
3658     switch (tybasic(ty2))
3659     {
3660         case TYnptr:
3661         case TYimmutPtr:
3662             need_DS = false;
3663             break;
3664 
3665         case TYsptr:
3666             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3667                 segreg = SEG_SS;
3668             else
3669                 segreg = SEG_DS;
3670             goto L1;
3671 
3672         case TYcptr:
3673             segreg = SEG_CS;
3674         L1:
3675             cdb.gen1(0x1E);                        // PUSH DS
3676             cdb.gen1(0x06 + (segreg << 3));        // PUSH segreg
3677             cdb.gen1(0x1F);                        // POP  DS
3678             need_DS = true;
3679             break;
3680 
3681         case TYfptr:
3682         case TYvptr:
3683         case TYhptr:
3684             cdb.gen1(0x1E);                        // PUSH DS
3685             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
3686             need_DS = true;
3687             break;
3688 
3689         default:
3690             assert(0);
3691     }
3692 
3693     if (*pretregs)                              // if need return value
3694     {   getregs(cdb,mAX);
3695         genmovreg(cdb,AX,DI);
3696     }
3697 
3698     if (0 && I32 && config.flags4 & CFG4speed)
3699     {
3700         /* This is only faster if the memory is dword aligned, if not
3701          * it is significantly slower than just a rep movsb.
3702          */
3703         /*      mov     EDX,ECX
3704          *      shr     ECX,2
3705          *      jz      L1
3706          *      repe    movsd
3707          * L1:  nop
3708          *      and     EDX,3
3709          *      jz      L2
3710          *      mov     ECX,EDX
3711          *      repe    movsb
3712          * L2:  nop
3713          */
3714         getregs(cdb,mSI | mDI | mCX | mDX);
3715         genmovreg(cdb,DX,CX);                  // MOV EDX,ECX
3716         cdb.genc2(0xC1,modregrm(3,5,CX),2);                 // SHR ECX,2
3717         code *cx = gennop(null);
3718         genjmp(cdb, JE, FLcode, cast(block *)cx);  // JZ L1
3719         cdb.gen1(0xF3);                                     // REPE
3720         cdb.gen1(0xA5);                                     // MOVSW
3721         cdb.append(cx);
3722         cdb.genc2(0x81, modregrm(3,4,DX),3);                // AND EDX,3
3723 
3724         code *cnop = gennop(null);
3725         genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ L2
3726         genmovreg(cdb,CX,DX);                    // MOV ECX,EDX
3727         cdb.gen1(0xF3);                          // REPE
3728         cdb.gen1(0xA4);                          // MOVSB
3729         cdb.append(cnop);
3730     }
3731     else
3732     {
3733         getregs(cdb,mSI | mDI | mCX);
3734         if (!I32 && config.flags4 & CFG4speed)          // if speed optimization
3735         {   cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX));        // SHR CX,1
3736             cdb.gen1(0xF3);                              // REPE
3737             cdb.gen1(0xA5);                              // MOVSW
3738             cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX));            // ADC CX,CX
3739         }
3740         cdb.gen1(0xF3);                             // REPE
3741         cdb.gen1(0xA4);                             // MOVSB
3742         if (need_DS)
3743             cdb.gen1(0x1F);                         // POP DS
3744     }
3745     fixresult(cdb,e,mES|mAX,pretregs);
3746 }
3747 
3748 
3749 /*********************************
3750  * Generate code for memset(s,val,n) intrinsic.
3751  *      (s OPmemset (n OPparam val))
3752  */
3753 
3754 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3755 {
3756     regm_t retregs1;
3757     regm_t retregs2;
3758     regm_t retregs3;
3759     reg_t reg;
3760     reg_t vreg;
3761     tym_t ty1;
3762     int segreg;
3763     uint remainder;
3764     targ_uns numbytes,numwords;
3765     targ_size_t value;
3766     uint m;
3767 
3768     //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs));
3769     elem *e2 = e.EV.E2;
3770     assert(e2.Eoper == OPparam);
3771 
3772     ubyte rex = I64 ? REX_W : 0;
3773 
3774     bool e2E2isConst = false;
3775     if (e2.EV.E2.Eoper == OPconst)
3776     {
3777         value = cast(targ_size_t)el_tolong(e2.EV.E2);
3778         value &= 0xFF;
3779         value |= value << 8;
3780         value |= value << 16;
3781         static if (value.sizeof > 4)
3782             value |= value << 32;
3783         e2E2isConst = true;
3784     }
3785     else if (e2.EV.E2.Eoper == OPstrpar)  // happens if e2.EV.E2 is a struct of 0 size
3786     {
3787         value = 0;
3788         e2E2isConst = true;
3789     }
3790     else
3791         value = 0xDEADBEEF;     // stop annoying false positives that value is not inited
3792 
3793     if (e2.EV.E1.Eoper == OPconst)
3794     {
3795         static uint REP_THRESHOLD() { return REGSIZE * (6 + (REGSIZE == 4)); }
3796         numbytes = cast(uint)cast(targ_size_t)el_tolong(e2.EV.E1);
3797         if (numbytes <= REP_THRESHOLD &&
3798             !I16 &&                     // doesn't work for 16 bits
3799             e2E2isConst)
3800         {
3801             targ_uns offset = 0;
3802             retregs1 = *pretregs;
3803             if (!retregs1)
3804                 retregs1 = ALLREGS;
3805             codelem(cdb,e.EV.E1,&retregs1,false);
3806             reg = findreg(retregs1);
3807             if (e2.EV.E2.Eoper == OPconst)
3808             {
3809                 const uint mrm = buildModregrm(0,0,reg);
3810                 switch (numbytes)
3811                 {
3812                     case 4:                     // MOV [reg],imm32
3813                         cdb.genc2(0xC7,mrm,value);
3814                         goto fixres;
3815                     case 2:                     // MOV [reg],imm16
3816                         cdb.genc2(0xC7,mrm,value);
3817                         cdb.last().Iflags = CFopsize;
3818                         goto fixres;
3819                     case 1:                     // MOV [reg],imm8
3820                         cdb.genc2(0xC6,mrm,value);
3821                         goto fixres;
3822 
3823                     default:
3824                         break;
3825                 }
3826             }
3827 
3828             regwithvalue(cdb, BYTEREGS & ~retregs1, value, &vreg, I64 ? 64 : 0);
3829             freenode(e2.EV.E2);
3830             freenode(e2);
3831 
3832             m = (rex << 16) | buildModregrm(2,vreg,reg);
3833             while (numbytes >= REGSIZE)
3834             {                           // MOV dword ptr offset[reg],vreg
3835                 cdb.gen2(0x89,m);
3836                 cdb.last().IEV1.Voffset = offset;
3837                 cdb.last().IFL1 = FLconst;
3838                 numbytes -= REGSIZE;
3839                 offset += REGSIZE;
3840             }
3841             m &= ~(rex << 16);
3842             if (numbytes & 4)
3843             {                           // MOV dword ptr offset[reg],vreg
3844                 cdb.gen2(0x89,m);
3845                 cdb.last().IEV1.Voffset = offset;
3846                 cdb.last().IFL1 = FLconst;
3847                 offset += 4;
3848             }
3849             if (numbytes & 2)
3850             {                           // MOV word ptr offset[reg],vreg
3851                 cdb.gen2(0x89,m);
3852                 cdb.last().IEV1.Voffset = offset;
3853                 cdb.last().IFL1 = FLconst;
3854                 cdb.last().Iflags = CFopsize;
3855                 offset += 2;
3856             }
3857             if (numbytes & 1)
3858             {                           // MOV byte ptr offset[reg],vreg
3859                 cdb.gen2(0x88,m);
3860                 cdb.last().IEV1.Voffset = offset;
3861                 cdb.last().IFL1 = FLconst;
3862                 if (I64 && vreg >= 4)
3863                     cdb.last().Irex |= REX;
3864             }
3865 fixres:
3866             fixresult(cdb,e,retregs1,pretregs);
3867             return;
3868         }
3869     }
3870 
3871     opcode_t op;
3872     // Get nbytes into CX
3873     retregs2 = mCX;
3874     if (!I16 && e2.EV.E1.Eoper == OPconst && e2E2isConst)
3875     {
3876         remainder = numbytes & (4 - 1);
3877         numwords  = numbytes / 4;               // number of words
3878         op = 0xAB;                              // moving by words
3879         getregs(cdb,mCX);
3880         movregconst(cdb,CX,numwords,I64?64:0);     // # of bytes/words
3881     }
3882     else
3883     {
3884         remainder = 0;
3885         op = 0xAA;                              // must move by bytes
3886         codelem(cdb,e2.EV.E1,&retregs2,false);
3887     }
3888 
3889     // Get val into AX
3890 
3891     retregs3 = mAX;
3892     if (!I16 && e2E2isConst)
3893     {
3894         regwithvalue(cdb, mAX, value, null, I64?64:0);
3895         freenode(e2.EV.E2);
3896     }
3897     else
3898     {
3899         scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false);
3900 
3901         if (0 && I32)
3902         {
3903             cdb.gen2(0x8A,modregrm(3,AH,AL));       // MOV AH,AL
3904             cdb.genc2(0xC1,modregrm(3,4,AX),8);     // SHL EAX,8
3905             cdb.gen2(0x8A,modregrm(3,AL,AH));       // MOV AL,AH
3906             cdb.genc2(0xC1,modregrm(3,4,AX),8);     // SHL EAX,8
3907             cdb.gen2(0x8A,modregrm(3,AL,AH));       // MOV AL,AH
3908         }
3909     }
3910     freenode(e2);
3911 
3912     // Get s into ES:DI
3913     retregs1 = mDI;
3914     ty1 = e.EV.E1.Ety;
3915     if (!tyreg(ty1))
3916         retregs1 |= mES;
3917     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
3918     reg = DI; //findreg(retregs1);
3919 
3920     // Make sure ES contains proper segment value
3921     cdb.append(cod2_setES(ty1));
3922 
3923     if (*pretregs)                              // if need return value
3924     {
3925         getregs(cdb,mBX);
3926         genmovreg(cdb,BX,DI);
3927     }
3928 
3929     getregs(cdb,mDI | mCX);
3930     if (I16 && config.flags4 & CFG4speed)      // if speed optimization
3931     {
3932         getregs(cdb,mAX);
3933         cdb.gen2(0x8A,modregrm(3,AH,AL));   // MOV AH,AL
3934         cdb.gen2(0xD1,modregrm(3,5,CX));    // SHR CX,1
3935         cdb.gen1(0xF3);                     // REP
3936         cdb.gen1(0xAB);                     // STOSW
3937         cdb.gen2(0x11,modregrm(3,CX,CX));   // ADC CX,CX
3938         op = 0xAA;
3939     }
3940 
3941     cdb.gen1(0xF3);                         // REP
3942     cdb.gen1(op);                           // STOSD
3943     m = buildModregrm(2,AX,reg);
3944     if (remainder & 4)
3945     {
3946         cdb.gen2(0x89,m);
3947         cdb.last().IFL1 = FLconst;
3948     }
3949     if (remainder & 2)
3950     {
3951         cdb.gen2(0x89,m);
3952         cdb.last().Iflags = CFopsize;
3953         cdb.last().IEV1.Voffset = remainder & 4;
3954         cdb.last().IFL1 = FLconst;
3955     }
3956     if (remainder & 1)
3957     {
3958         cdb.gen2(0x88,m);
3959         cdb.last().IEV1.Voffset = remainder & ~1;
3960         cdb.last().IFL1 = FLconst;
3961     }
3962     regimmed_set(CX,0);
3963     fixresult(cdb,e,mES|mBX,pretregs);
3964 }
3965 
3966 
3967 /**********************
3968  * Do structure assignments.
3969  * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2).
3970  * Mebbe call cdstreq() for double assignments???
3971  */
3972 
3973 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3974 {
3975     char need_DS = false;
3976     elem *e1 = e.EV.E1;
3977     elem *e2 = e.EV.E2;
3978     int segreg;
3979     uint numbytes = cast(uint)type_size(e.ET);          // # of bytes in structure/union
3980     ubyte rex = I64 ? REX_W : 0;
3981 
3982     //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3983 
3984     // First, load pointer to rvalue into SI
3985     regm_t srcregs = mSI;                      // source is DS:SI
3986     docommas(cdb,&e2);
3987     if (e2.Eoper == OPind)             // if (.. = *p)
3988     {   elem *e21 = e2.EV.E1;
3989 
3990         segreg = SEG_DS;
3991         switch (tybasic(e21.Ety))
3992         {
3993             case TYsptr:
3994                 if (config.wflags & WFssneds)   // if sptr can't use DS segment
3995                     segreg = SEG_SS;
3996                 break;
3997             case TYcptr:
3998                 if (!(config.exe & EX_flat))
3999                     segreg = SEG_CS;
4000                 break;
4001             case TYfptr:
4002             case TYvptr:
4003             case TYhptr:
4004                 srcregs |= mCX;         // get segment also
4005                 need_DS = true;
4006                 break;
4007 
4008             default:
4009                 break;
4010         }
4011         codelem(cdb,e21,&srcregs,false);
4012         freenode(e2);
4013         if (segreg != SEG_DS)           // if not DS
4014         {
4015             getregs(cdb,mCX);
4016             cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg
4017             need_DS = true;
4018         }
4019     }
4020     else if (e2.Eoper == OPvar)
4021     {
4022         if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment
4023         {   srcregs |= mCX;             // get segment also
4024             need_DS = true;
4025             cdrelconst(cdb,e2,&srcregs);
4026         }
4027         else
4028         {
4029             segreg = segfl[el_fl(e2)];
4030             if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack
4031                 segreg == SEG_CS)               // if source is in CS
4032             {
4033                 need_DS = true;         // we need to reload DS
4034                 // Load CX with segment
4035                 srcregs |= mCX;
4036                 getregs(cdb,mCX);
4037                 cdb.gen2(0x8C,                // MOV CX,[SS|CS]
4038                     modregrm(3,segreg,CX));
4039             }
4040             cdrelconst(cdb,e2,&srcregs);
4041         }
4042         freenode(e2);
4043     }
4044     else
4045     {
4046         if (!(config.exe & EX_flat))
4047         {   need_DS = true;
4048             srcregs |= mCX;
4049         }
4050         codelem(cdb,e2,&srcregs,false);
4051     }
4052 
4053     // now get pointer to lvalue (destination) in ES:DI
4054     regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI;
4055     if (e1.Eoper == OPind)               // if (*p = ..)
4056     {
4057         if (tyreg(e1.EV.E1.Ety))
4058             dstregs = mDI;
4059         cdb.append(cod2_setES(e1.EV.E1.Ety));
4060         scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false);
4061     }
4062     else
4063         cdrelconst(cdb,e1,&dstregs);
4064     freenode(e1);
4065 
4066     getregs(cdb,(srcregs | dstregs) & (mLSW | mDI));
4067     if (need_DS)
4068     {     assert(!(config.exe & EX_flat));
4069         cdb.gen1(0x1E);                     // PUSH DS
4070         cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));    // MOV DS,CX
4071     }
4072     if (numbytes <= REGSIZE * (6 + (REGSIZE == 4)))
4073     {
4074         while (numbytes >= REGSIZE)
4075         {
4076             cdb.gen1(0xA5);         // MOVSW
4077             code_orrex(cdb.last(), rex);
4078             numbytes -= REGSIZE;
4079         }
4080         //if (numbytes)
4081         //    printf("cdstreq numbytes %d\n",numbytes);
4082         while (numbytes--)
4083             cdb.gen1(0xA4);         // MOVSB
4084     }
4085     else
4086     {
4087 static if (1)
4088 {
4089         uint remainder = numbytes & (REGSIZE - 1);
4090         numbytes /= REGSIZE;            // number of words
4091         getregs_imm(cdb,mCX);
4092         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4093         cdb.gen1(0xF3);                 // REP
4094         if (REGSIZE == 8)
4095             cdb.gen1(REX | REX_W);
4096         cdb.gen1(0xA5);                 // REP MOVSD
4097         regimmed_set(CX,0);             // note that CX == 0
4098         for (; remainder; remainder--)
4099         {
4100             cdb.gen1(0xA4);             // MOVSB
4101         }
4102 }
4103 else
4104 {
4105         uint movs;
4106         if (numbytes & (REGSIZE - 1))   // if odd
4107             movs = 0xA4;                // MOVSB
4108         else
4109         {
4110             movs = 0xA5;                // MOVSW
4111             numbytes /= REGSIZE;        // # of words
4112         }
4113         getregs_imm(cdb,mCX);
4114         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4115         cdb.gen1(0xF3);                 // REP
4116         cdb.gen1(movs);
4117         regimmed_set(CX,0);             // note that CX == 0
4118 }
4119     }
4120     if (need_DS)
4121         cdb.gen1(0x1F);                 // POP  DS
4122     assert(!(*pretregs & mPSW));
4123     if (*pretregs)
4124     {   // ES:DI points past what we want
4125 
4126         cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET));   // SUB DI,numbytes
4127         regm_t retregs = mDI;
4128         if (*pretregs & mMSW && !(config.exe & EX_flat))
4129             retregs |= mES;
4130         fixresult(cdb,e,retregs,pretregs);
4131     }
4132 }
4133 
4134 
4135 /**********************
4136  * Get the address of.
4137  * Is also called by cdstreq() to set up pointer to a structure.
4138  */
4139 
4140 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4141 {
4142     //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4143 
4144     /* The following should not happen, but cgelem.c is a little stupid.
4145      * Assertion can be tripped by func("string" == 0); and similar
4146      * things. Need to add goals to optelem() to fix this completely.
4147      */
4148     //assert((*pretregs & mPSW) == 0);
4149     if (*pretregs & mPSW)
4150     {
4151         *pretregs &= ~mPSW;
4152         gentstreg(cdb,SP);            // SP is never 0
4153         if (I64)
4154             code_orrex(cdb.last(), REX_W);
4155     }
4156     if (!*pretregs)
4157         return;
4158 
4159     assert(e);
4160     tym_t tym = tybasic(e.Ety);
4161     switch (tym)
4162     {
4163         case TYstruct:
4164         case TYarray:
4165         case TYldouble:
4166         case TYildouble:
4167         case TYcldouble:
4168             tym = TYnptr;               // don't confuse allocreg()
4169             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
4170             {
4171                 tym = TYfptr;
4172             }
4173             break;
4174 
4175         case TYifunc:
4176             tym = TYfptr;
4177             break;
4178 
4179         default:
4180             if (tyfunc(tym))
4181                 tym =
4182                     tyfarfunc(tym) ? TYfptr :
4183                     TYnptr;
4184             break;
4185     }
4186     //assert(tym & typtr);              // don't fail on (int)&a
4187 
4188     SC sclass;
4189     reg_t mreg,            // segment of the address (TYfptrs only)
4190           lreg;            // offset of the address
4191 
4192     allocreg(cdb,pretregs,&lreg,tym);
4193     if (_tysize[tym] > REGSIZE)            // fptr could've been cast to long
4194     {
4195         if (*pretregs & mES)
4196         {
4197             /* Do not allocate CX or SI here, as cdstreq() needs
4198              * them preserved. cdstreq() should use scodelem()
4199              */
4200             regm_t scratch = (mAX|mBX|mDX|mDI) & ~mask(lreg);
4201             allocreg(cdb,&scratch,&mreg,TYint);
4202         }
4203         else
4204         {
4205             mreg = lreg;
4206             lreg = findreglsw(*pretregs);
4207         }
4208 
4209         /* if (get segment of function that isn't necessarily in the
4210          * current segment (i.e. CS doesn't have the right value in it)
4211          */
4212         Symbol *s = e.EV.Vsym;
4213         if (s.Sfl == FLdatseg)
4214         {   assert(0);
4215         }
4216         sclass = cast(SC) s.Sclass;
4217         const ety = tybasic(s.ty());
4218         if ((tyfarfunc(ety) || ety == TYifunc) &&
4219             (sclass == SCextern || ClassInline(sclass) || config.wflags & WFthunk)
4220             || s.Sfl == FLfardata
4221             || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SCcomdat))
4222            )
4223         {   // MOV mreg,seg of symbol
4224             cdb.gencs(0xB8 + mreg,0,FLextern,s);
4225             cdb.last().Iflags = CFseg;
4226         }
4227         else
4228         {
4229             const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl;
4230             cdb.gen2(0x8C,            // MOV mreg,SEG REGISTER
4231                 modregrm(3,segfl[fl],mreg));
4232         }
4233         if (*pretregs & mES)
4234             cdb.gen2(0x8E,modregrm(3,0,mreg));        // MOV ES,mreg
4235     }
4236     getoffset(cdb,e,lreg);
4237 }
4238 
4239 /*********************************
4240  * Load the offset portion of the address represented by e into
4241  * reg.
4242  */
4243 
4244 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg)
4245 {
4246     //printf("getoffset(e = %p, reg = %d)\n", e, reg);
4247     code cs = void;
4248     cs.Iflags = 0;
4249     ubyte rex = 0;
4250     cs.Irex = rex;
4251     assert(e.Eoper == OPvar || e.Eoper == OPrelconst);
4252     auto fl = el_fl(e);
4253     switch (fl)
4254     {
4255         case FLdatseg:
4256             cs.IEV2.Vpointer = e.EV.Vpointer;
4257             goto L3;
4258 
4259         case FLfardata:
4260             goto L4;
4261 
4262         case FLtlsdata:
4263     static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
4264     {
4265         {
4266           L5:
4267             if (config.flags3 & CFG3pic)
4268             {
4269                 if (I64)
4270                 {
4271                     /* Generate:
4272                      *   LEA DI,s@TLSGD[RIP]
4273                      */
4274                     //assert(reg == DI);
4275                     code css = void;
4276                     css.Irex = REX | REX_W;
4277                     css.Iop = LEA;
4278                     css.Irm = modregrm(0,reg,5);
4279                     if (reg & 8)
4280                         css.Irex |= REX_R;
4281                     css.Iflags = CFopsize;
4282                     css.IFL1 = cast(ubyte)fl;
4283                     css.IEV1.Vsym = e.EV.Vsym;
4284                     css.IEV1.Voffset = e.EV.Voffset;
4285                     cdb.gen(&css);
4286                 }
4287                 else
4288                 {
4289                     /* Generate:
4290                      *   LEA EAX,s@TLSGD[1*EBX+0]
4291                      */
4292                     assert(reg == AX);
4293                     load_localgot(cdb);
4294                     code css = void;
4295                     css.Iflags = 0;
4296                     css.Iop = LEA;             // LEA
4297                     css.Irex = 0;
4298                     css.Irm = modregrm(0,AX,4);
4299                     css.Isib = modregrm(0,BX,5);
4300                     css.IFL1 = cast(ubyte)fl;
4301                     css.IEV1.Vsym = e.EV.Vsym;
4302                     css.IEV1.Voffset = e.EV.Voffset;
4303                     cdb.gen(&css);
4304                 }
4305                 return;
4306             }
4307             /* Generate:
4308              *      MOV reg,GS:[00000000]
4309              *      ADD reg, offset s@TLS_LE
4310              * for locals, and for globals:
4311              *      MOV reg,GS:[00000000]
4312              *      ADD reg, s@TLS_IE
4313              * note different fixup
4314              */
4315             int stack = 0;
4316             if (reg == STACK)
4317             {   regm_t retregs = ALLREGS;
4318 
4319                 reg_t regx;
4320                 allocreg(cdb,&retregs,&regx,TYoffset);
4321                 reg = findreg(retregs);
4322                 stack = 1;
4323             }
4324 
4325             code css = void;
4326             css.Irex = rex;
4327             css.Iop = 0x8B;
4328             css.Irm = modregrm(0, 0, BPRM);
4329             code_newreg(&css, reg);
4330             css.Iflags = CFgs;
4331             css.IFL1 = FLconst;
4332             css.IEV1.Vuns = 0;
4333             cdb.gen(&css);               // MOV reg,GS:[00000000]
4334 
4335             if (e.EV.Vsym.Sclass == SCstatic || e.EV.Vsym.Sclass == SClocstat)
4336             {   // ADD reg, offset s
4337                 cs.Irex = rex;
4338                 cs.Iop = 0x81;
4339                 cs.Irm = modregrm(3,0,reg & 7);
4340                 if (reg & 8)
4341                     cs.Irex |= REX_B;
4342                 cs.Iflags = CFoff;
4343                 cs.IFL2 = cast(ubyte)fl;
4344                 cs.IEV2.Vsym = e.EV.Vsym;
4345                 cs.IEV2.Voffset = e.EV.Voffset;
4346             }
4347             else
4348             {   // ADD reg, s
4349                 cs.Irex = rex;
4350                 cs.Iop = 0x03;
4351                 cs.Irm = modregrm(0,0,BPRM);
4352                 code_newreg(&cs, reg);
4353                 cs.Iflags = CFoff;
4354                 cs.IFL1 = cast(ubyte)fl;
4355                 cs.IEV1.Vsym = e.EV.Vsym;
4356                 cs.IEV1.Voffset = e.EV.Voffset;
4357             }
4358             cdb.gen(&cs);                // ADD reg, xxxx
4359 
4360             if (stack)
4361             {
4362                 cdb.gen1(0x50 + (reg & 7));      // PUSH reg
4363                 if (reg & 8)
4364                     code_orrex(cdb.last(), REX_B);
4365                 cdb.genadjesp(REGSIZE);
4366                 stackchanged = 1;
4367             }
4368             break;
4369         }
4370     }
4371     else static if (TARGET_WINDOS)
4372     {
4373             if (I64)
4374             {
4375             L5:
4376                 assert(reg != STACK);
4377                 cs.IEV2.Vsym = e.EV.Vsym;
4378                 cs.IEV2.Voffset = e.EV.Voffset;
4379                 cs.Iop = 0xB8 + (reg & 7);      // MOV Ereg,offset s
4380                 if (reg & 8)
4381                     cs.Irex |= REX_B;
4382                 cs.Iflags = CFoff;              // want offset only
4383                 cs.IFL2 = cast(ubyte)fl;
4384                 cdb.gen(&cs);
4385                 break;
4386             }
4387             goto L4;
4388     }
4389     else
4390     {
4391             goto L4;
4392     }
4393 
4394         case FLfunc:
4395             fl = FLextern;                  /* don't want PC relative addresses */
4396             goto L4;
4397 
4398         case FLextern:
4399     static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
4400     {
4401             if (e.EV.Vsym.ty() & mTYthread)
4402                 goto L5;
4403     }
4404     static if (TARGET_WINDOS)
4405     {
4406             if (I64 && e.EV.Vsym.ty() & mTYthread)
4407                 goto L5;
4408     }
4409             goto L4;
4410 
4411         case FLdata:
4412         case FLudata:
4413         case FLgot:
4414         case FLgotoff:
4415         case FLcsdata:
4416         L4:
4417             cs.IEV2.Vsym = e.EV.Vsym;
4418             cs.IEV2.Voffset = e.EV.Voffset;
4419         L3:
4420             if (reg == STACK)
4421             {   stackchanged = 1;
4422                 cs.Iop = 0x68;              /* PUSH immed16                 */
4423                 cdb.genadjesp(REGSIZE);
4424             }
4425             else
4426             {   cs.Iop = 0xB8 + (reg & 7);  // MOV reg,immed16
4427                 if (reg & 8)
4428                     cs.Irex |= REX_B;
4429                 if (I64)
4430                 {   cs.Irex |= REX_W;
4431                     if (config.flags3 & CFG3pic || config.exe == EX_WIN64)
4432                     {   // LEA reg,immed32[RIP]
4433                         cs.Iop = LEA;
4434                         cs.Irm = modregrm(0,reg & 7,5);
4435                         if (reg & 8)
4436                             cs.Irex = (cs.Irex & ~REX_B) | REX_R;
4437                         cs.IFL1 = cast(ubyte)fl;
4438                         cs.IEV1.Vsym = cs.IEV2.Vsym;
4439                         cs.IEV1.Voffset = cs.IEV2.Voffset;
4440                     }
4441                 }
4442             }
4443             cs.Iflags = CFoff;              /* want offset only             */
4444             cs.IFL2 = cast(ubyte)fl;
4445             cdb.gen(&cs);
4446             break;
4447 
4448         case FLreg:
4449             /* Allow this since the tree optimizer puts & in front of       */
4450             /* register doubles.                                            */
4451             goto L2;
4452         case FLauto:
4453         case FLfast:
4454         case FLbprel:
4455         case FLfltreg:
4456             reflocal = true;
4457             goto L2;
4458         case FLpara:
4459             refparam = true;
4460         L2:
4461             if (reg == STACK)
4462             {   regm_t retregs = ALLREGS;
4463 
4464                 reg_t regx;
4465                 allocreg(cdb,&retregs,&regx,TYoffset);
4466                 reg = findreg(retregs);
4467                 loadea(cdb,e,&cs,LEA,reg,0,0,0);    // LEA reg,EA
4468                 if (I64)
4469                     code_orrex(cdb.last(), REX_W);
4470                 cdb.gen1(0x50 + (reg & 7));               // PUSH reg
4471                 if (reg & 8)
4472                     code_orrex(cdb.last(), REX_B);
4473                 cdb.genadjesp(REGSIZE);
4474                 stackchanged = 1;
4475             }
4476             else
4477             {
4478                 loadea(cdb,e,&cs,LEA,reg,0,0,0);   // LEA reg,EA
4479                 if (I64)
4480                     code_orrex(cdb.last(), REX_W);
4481             }
4482             break;
4483 
4484         default:
4485             debug
4486             {
4487                 elem_print(e);
4488                 WRFL(fl);
4489             }
4490             assert(0);
4491     }
4492 }
4493 
4494 
4495 /******************
4496  * OPneg, OPsqrt, OPsin, OPcos, OPrint
4497  */
4498 
4499 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4500 {
4501     //printf("cdneg()\n");
4502     //elem_print(e);
4503     if (*pretregs == 0)
4504     {
4505         codelem(cdb,e.EV.E1,pretregs,false);
4506         return;
4507     }
4508     const tyml = tybasic(e.EV.E1.Ety);
4509     const sz = _tysize[tyml];
4510     if (tyfloating(tyml))
4511     {
4512         if (tycomplex(tyml))
4513         {
4514             neg_complex87(cdb, e, pretregs);
4515             return;
4516         }
4517         if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS)
4518         {
4519             xmmneg(cdb,e,pretregs);
4520             return;
4521         }
4522         if (config.inline8087 &&
4523             ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64))
4524             {
4525                 neg87(cdb,e,pretregs);
4526                 return;
4527             }
4528         regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
4529         codelem(cdb,e.EV.E1,&retregs,false);
4530         getregs(cdb,retregs);
4531         if (I32)
4532         {
4533             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
4534             cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit
4535         }
4536         else
4537         {
4538             const reg = (sz == 8) ? AX : findregmsw(retregs);
4539             cdb.genc2(0x81,modregrm(3,6,reg),0x8000);     // XOR AX,0x8000
4540         }
4541         fixresult(cdb,e,retregs,pretregs);
4542         return;
4543     }
4544 
4545     const uint isbyte = sz == 1;
4546     const possregs = (isbyte) ? BYTEREGS : allregs;
4547     regm_t retregs = *pretregs & possregs;
4548     if (retregs == 0)
4549         retregs = possregs;
4550     codelem(cdb,e.EV.E1,&retregs,false);
4551     getregs(cdb,retregs);                // retregs will be destroyed
4552     if (sz <= REGSIZE)
4553     {
4554         const reg = findreg(retregs);
4555         uint rex = (I64 && sz == 8) ? REX_W : 0;
4556         if (I64 && sz == 1 && reg >= 4)
4557             rex |= REX;
4558         cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg));   // NEG reg
4559         if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW)
4560             cdb.last().Iflags |= CFopsize | CFpsw;
4561         *pretregs &= mBP | ALLREGS;             // flags already set
4562     }
4563     else if (sz == 2 * REGSIZE)
4564     {
4565         const msreg = findregmsw(retregs);
4566         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
4567         const lsreg = findreglsw(retregs);
4568         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg
4569         code_orflag(cdb.last(), CFpsw);           // need flag result of previous NEG
4570         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
4571     }
4572     else
4573         assert(0);
4574     fixresult(cdb,e,retregs,pretregs);
4575 }
4576 
4577 
4578 /******************
4579  * Absolute value operator
4580  */
4581 
4582 
4583 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
4584 {
4585     //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4586     if (*pretregs == 0)
4587     {
4588         codelem(cdb,e.EV.E1,pretregs,false);
4589         return;
4590     }
4591     const tyml = tybasic(e.EV.E1.Ety);
4592     const sz = _tysize[tyml];
4593     const rex = (I64 && sz == 8) ? REX_W : 0;
4594     if (tyfloating(tyml))
4595     {
4596         if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64))
4597         {
4598             neg87(cdb,e,pretregs);
4599             return;
4600         }
4601         regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
4602         codelem(cdb,e.EV.E1,&retregs,false);
4603         getregs(cdb,retregs);
4604         if (I32)
4605         {
4606             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
4607             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit
4608         }
4609         else
4610         {
4611             const reg = (sz == 8) ? AX : findregmsw(retregs);
4612             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF);     // AND AX,0x7FFF
4613         }
4614         fixresult(cdb,e,retregs,pretregs);
4615         return;
4616     }
4617 
4618     const uint isbyte = sz == 1;
4619     assert(isbyte == 0);
4620     regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs;
4621     if (!I16 && sz == REGSIZE)
4622         possregs = allregs;
4623     regm_t retregs = *pretregs & possregs;
4624     if (retregs == 0)
4625         retregs = possregs;
4626     codelem(cdb,e.EV.E1,&retregs,false);
4627     getregs(cdb,retregs);                // retregs will be destroyed
4628     if (sz <= REGSIZE)
4629     {
4630         /*      CWD
4631                 XOR     AX,DX
4632                 SUB     AX,DX
4633            or:
4634                 MOV     r,reg
4635                 SAR     r,63
4636                 XOR     reg,r
4637                 SUB     reg,r
4638          */
4639         reg_t reg;
4640         reg_t r;
4641 
4642         if (!I16 && sz == REGSIZE)
4643         {   regm_t scratch = allregs & ~retregs;
4644             reg = findreg(retregs);
4645             allocreg(cdb,&scratch,&r,TYint);
4646             getregs(cdb,retregs);
4647             genmovreg(cdb,r,reg);                     // MOV r,reg
4648             cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1);      // SAR r,31/63
4649             code_orrex(cdb.last(), rex);
4650         }
4651         else
4652         {
4653             reg = AX;
4654             r = DX;
4655             getregs(cdb,mDX);
4656             if (!I16 && sz == SHORTSIZE)
4657                 cdb.gen1(0x98);                         // CWDE
4658             cdb.gen1(0x99);                             // CWD
4659             code_orrex(cdb.last(), rex);
4660         }
4661         cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r
4662         cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r
4663         if (!I16 && sz == SHORTSIZE && *pretregs & mPSW)
4664             cdb.last().Iflags |= CFopsize | CFpsw;
4665         if (*pretregs & mPSW)
4666             cdb.last().Iflags |= CFpsw;
4667         *pretregs &= ~mPSW;                     // flags already set
4668     }
4669     else if (sz == 2 * REGSIZE)
4670     {
4671         /*      or      DX,DX
4672                 jns     L2
4673                 neg     DX
4674                 neg     AX
4675                 sbb     DX,0
4676             L2:
4677          */
4678 
4679         code *cnop = gennop(null);
4680         const msreg = findregmsw(retregs);
4681         const lsreg = findreglsw(retregs);
4682         genregs(cdb,0x09,msreg,msreg);            // OR msreg,msreg
4683         genjmp(cdb,JNS,FLcode,cast(block *)cnop);
4684         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
4685         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg+1
4686         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
4687         cdb.append(cnop);
4688     }
4689     else
4690         assert(0);
4691     fixresult(cdb,e,retregs,pretregs);
4692 }
4693 
4694 /**************************
4695  * Post increment and post decrement.
4696  */
4697 
4698 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4699 {
4700     //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs));
4701     code cs = void;
4702     const op = e.Eoper;                      // OPxxxx
4703     if (*pretregs == 0)                        // if nothing to return
4704     {
4705         cdaddass(cdb,e,pretregs);
4706         return;
4707     }
4708     const tym_t tyml = tybasic(e.EV.E1.Ety);
4709     const sz = _tysize[tyml];
4710     elem *e2 = e.EV.E2;
4711     const rex = (I64 && sz == 8) ? REX_W : 0;
4712 
4713     if (tyfloating(tyml))
4714     {
4715         if (config.fpxmmregs && tyxmmreg(tyml) &&
4716             !tycomplex(tyml) // SIMD code is not set up to deal with complex
4717            )
4718         {
4719             xmmpost(cdb,e,pretregs);
4720             return;
4721         }
4722 
4723         if (config.inline8087)
4724         {
4725             post87(cdb,e,pretregs);
4726             return;
4727         }
4728 static if (TARGET_WINDOS)
4729 {
4730         assert(sz <= 8);
4731         getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS);
4732         freenode(e.EV.E1);
4733         regm_t idxregs = idxregm(&cs);  // mask of index regs used
4734         cs.Iop = 0x8B;                  /* MOV DOUBLEREGS,EA            */
4735         fltregs(cdb,&cs,tyml);
4736         stackchanged = 1;
4737         int stackpushsave = stackpush;
4738         regm_t retregs;
4739         if (sz == 8)
4740         {
4741             if (I32)
4742             {
4743                 cdb.gen1(0x50 + DX);             // PUSH DOUBLEREGS
4744                 cdb.gen1(0x50 + AX);
4745                 stackpush += DOUBLESIZE;
4746                 retregs = DOUBLEREGS2_32;
4747             }
4748             else
4749             {
4750                 cdb.gen1(0x50 + AX);
4751                 cdb.gen1(0x50 + BX);
4752                 cdb.gen1(0x50 + CX);
4753                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
4754                 stackpush += DOUBLESIZE + DOUBLESIZE;
4755 
4756                 cdb.gen1(0x50 + AX);
4757                 cdb.gen1(0x50 + BX);
4758                 cdb.gen1(0x50 + CX);
4759                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
4760                 retregs = DOUBLEREGS_16;
4761             }
4762         }
4763         else
4764         {
4765             stackpush += FLOATSIZE;     /* so we know something is on   */
4766             if (!I32)
4767                 cdb.gen1(0x50 + DX);
4768             cdb.gen1(0x50 + AX);
4769             retregs = FLOATREGS2;
4770         }
4771         cdb.genadjesp(stackpush - stackpushsave);
4772 
4773         cgstate.stackclean++;
4774         scodelem(cdb,e2,&retregs,idxregs,false);
4775         cgstate.stackclean--;
4776 
4777         if (tyml == TYdouble || tyml == TYdouble_alias)
4778         {
4779             retregs = DOUBLEREGS;
4780             callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub,
4781                     &retregs,idxregs);
4782         }
4783         else /* tyml == TYfloat */
4784         {
4785             retregs = FLOATREGS;
4786             callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub,
4787                     &retregs,idxregs);
4788         }
4789         cs.Iop = 0x89;                  /* MOV EA,DOUBLEREGS            */
4790         fltregs(cdb,&cs,tyml);
4791         stackpushsave = stackpush;
4792         if (tyml == TYdouble || tyml == TYdouble_alias)
4793         {   if (*pretregs == mSTACK)
4794                 retregs = mSTACK;       /* leave result on stack        */
4795             else
4796             {
4797                 if (I32)
4798                 {
4799                     cdb.gen1(0x58 + AX);
4800                     cdb.gen1(0x58 + DX);
4801                 }
4802                 else
4803                 {
4804                     cdb.gen1(0x58 + DX);
4805                     cdb.gen1(0x58 + CX);
4806                     cdb.gen1(0x58 + BX);
4807                     cdb.gen1(0x58 + AX);
4808                 }
4809                 stackpush -= DOUBLESIZE;
4810                 retregs = DOUBLEREGS;
4811             }
4812         }
4813         else
4814         {
4815             cdb.gen1(0x58 + AX);
4816             if (!I32)
4817                 cdb.gen1(0x58 + DX);
4818             stackpush -= FLOATSIZE;
4819             retregs = FLOATREGS;
4820         }
4821         cdb.genadjesp(stackpush - stackpushsave);
4822         fixresult(cdb,e,retregs,pretregs);
4823         return;
4824 }
4825     }
4826     if (tyxmmreg(tyml))
4827     {
4828         xmmpost(cdb,e,pretregs);
4829         return;
4830     }
4831 
4832     assert(e2.Eoper == OPconst);
4833     uint isbyte = (sz == 1);
4834     regm_t possregs = isbyte ? BYTEREGS : allregs;
4835     getlvalue(cdb,&cs,e.EV.E1,0);
4836     freenode(e.EV.E1);
4837     regm_t idxregs = idxregm(&cs);       // mask of index regs used
4838     if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 &&
4839         (!I16 || (idxregs & (mBX | mSI | mDI | mBP))))
4840     {
4841         // Generate:
4842         //      TEST    reg,reg
4843         //      LEA     reg,n[reg]      // don't affect flags
4844         reg_t reg = cs.Irm & 7;
4845         if (cs.Irex & REX_B)
4846             reg |= 8;
4847         cs.Iop = 0x85 ^ isbyte;
4848         code_newreg(&cs, reg);
4849         cs.Iflags |= CFpsw;
4850         cdb.gen(&cs);             // TEST reg,reg
4851 
4852         // If lvalue is a register variable, we must mark it as modified
4853         modEA(cdb,&cs);
4854 
4855         auto n = e2.EV.Vint;
4856         if (op == OPpostdec)
4857             n = -n;
4858         int rm = reg;
4859         if (I16)
4860         {
4861             static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c
4862             rm = regtorm[reg];
4863         }
4864         cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg]
4865         return;
4866     }
4867     else if (sz <= REGSIZE || tyfv(tyml))
4868     {
4869         code cs2 = void;
4870 
4871         cs.Iop = 0x8B ^ isbyte;
4872         regm_t retregs = possregs & ~idxregs & *pretregs;
4873         if (!tyfv(tyml))
4874         {
4875             if (retregs == 0)
4876                 retregs = possregs & ~idxregs;
4877         }
4878         else /* tyfv(tyml) */
4879         {
4880             if ((retregs &= mLSW) == 0)
4881                 retregs = mLSW & ~idxregs;
4882             /* Can't use LES if the EA uses ES as a seg override    */
4883             if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
4884             {   cs.Iop = 0xC4;                      /* LES          */
4885                 getregs(cdb,mES);           // allocate ES
4886             }
4887         }
4888         reg_t reg;
4889         allocreg(cdb,&retregs,&reg,TYint);
4890         code_newreg(&cs, reg);
4891         if (sz == 1 && I64 && reg >= 4)
4892             cs.Irex |= REX;
4893         cdb.gen(&cs);                     // MOV reg,EA
4894         cs2 = cs;
4895 
4896         /* If lvalue is a register variable, we must mark it as modified */
4897         modEA(cdb,&cs);
4898 
4899         cs.Iop = 0x81 ^ isbyte;
4900         cs.Irm &= ~cast(int)modregrm(0,7,0);             // reg field = 0
4901         cs.Irex &= ~REX_R;
4902         if (op == OPpostdec)
4903             cs.Irm |= modregrm(0,5,0);  /* SUB                  */
4904         cs.IFL2 = FLconst;
4905         targ_int n = e2.EV.Vint;
4906         cs.IEV2.Vint = n;
4907         if (n == 1)                     /* can use INC or DEC           */
4908         {
4909             cs.Iop |= 0xFE;             /* xFE is dec byte, xFF is word */
4910             if (op == OPpostdec)
4911                 NEWREG(cs.Irm,1);       // DEC EA
4912             else
4913                 NEWREG(cs.Irm,0);       // INC EA
4914         }
4915         else if (n == -1)               // can use INC or DEC
4916         {
4917             cs.Iop |= 0xFE;             // xFE is dec byte, xFF is word
4918             if (op == OPpostinc)
4919                 NEWREG(cs.Irm,1);       // DEC EA
4920             else
4921                 NEWREG(cs.Irm,0);       // INC EA
4922         }
4923 
4924         // For scheduling purposes, we wish to replace:
4925         //      MOV     reg,EA
4926         //      OP      EA
4927         // with:
4928         //      MOV     reg,EA
4929         //      OP      reg
4930         //      MOV     EA,reg
4931         //      ~OP     reg
4932         if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
4933             config.target_cpu >= TARGET_Pentium &&
4934             config.flags4 & CFG4speed)
4935         {
4936             // Replace EA in cs with reg
4937             cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7);
4938             if (reg & 8)
4939             {   cs.Irex &= ~REX_R;
4940                 cs.Irex |= REX_B;
4941             }
4942             else
4943                 cs.Irex &= ~REX_B;
4944             if (I64 && sz == 1 && reg >= 4)
4945                 cs.Irex |= REX;
4946             cdb.gen(&cs);                        // ADD/SUB reg,const
4947 
4948             // Reverse MOV direction
4949             cs2.Iop ^= 2;
4950             cdb.gen(&cs2);                       // MOV EA,reg
4951 
4952             // Toggle INC <. DEC, ADD <. SUB
4953             cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0);
4954             cdb.gen(&cs);
4955 
4956             if (*pretregs & mPSW)
4957             {   *pretregs &= ~mPSW;              // flags already set
4958                 code_orflag(cdb.last(),CFpsw);
4959             }
4960         }
4961         else
4962             cdb.gen(&cs);                        // ADD/SUB EA,const
4963 
4964         freenode(e2);
4965         if (tyfv(tyml))
4966         {
4967             reg_t preg;
4968 
4969             getlvalue_msw(&cs);
4970             if (*pretregs & mES)
4971             {
4972                 preg = ES;
4973                 /* ES is already loaded if CFes is 0            */
4974                 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP;
4975                 NEWREG(cs.Irm,0);       /* MOV ES,EA+2          */
4976             }
4977             else
4978             {
4979                 regm_t retregsx = *pretregs & mMSW;
4980                 if (!retregsx)
4981                     retregsx = mMSW;
4982                 allocreg(cdb,&retregsx,&preg,TYint);
4983                 cs.Iop = 0x8B;
4984                 if (I32)
4985                     cs.Iflags |= CFopsize;
4986                 NEWREG(cs.Irm,preg);    /* MOV preg,EA+2        */
4987             }
4988             getregs(cdb,mask(preg));
4989             cdb.gen(&cs);
4990             retregs = mask(reg) | mask(preg);
4991         }
4992         fixresult(cdb,e,retregs,pretregs);
4993         return;
4994     }
4995     else if (tyml == TYhptr)
4996     {
4997         uint rvalue;
4998         reg_t lreg;
4999         reg_t rtmp;
5000         regm_t mtmp;
5001 
5002         rvalue = e2.EV.Vlong;
5003         freenode(e2);
5004 
5005         // If h--, convert to h++
5006         if (e.Eoper == OPpostdec)
5007             rvalue = -rvalue;
5008 
5009         regm_t retregs = mLSW & ~idxregs & *pretregs;
5010         if (!retregs)
5011             retregs = mLSW & ~idxregs;
5012         allocreg(cdb,&retregs,&lreg,TYint);
5013 
5014         // Can't use LES if the EA uses ES as a seg override
5015         if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5016         {   cs.Iop = 0xC4;
5017             retregs |= mES;
5018             getregs(cdb,mES|mCX);       // allocate ES
5019             cs.Irm |= modregrm(0,lreg,0);
5020             cdb.gen(&cs);                       // LES lreg,EA
5021         }
5022         else
5023         {   cs.Iop = 0x8B;
5024             retregs |= mDX;
5025             getregs(cdb,mDX|mCX);
5026             cs.Irm |= modregrm(0,lreg,0);
5027             cdb.gen(&cs);                       // MOV lreg,EA
5028             NEWREG(cs.Irm,DX);
5029             getlvalue_msw(&cs);
5030             cdb.gen(&cs);                       // MOV DX,EA+2
5031             getlvalue_lsw(&cs);
5032         }
5033 
5034         // Allocate temporary register, rtmp
5035         mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs;
5036         allocreg(cdb,&mtmp,&rtmp,TYint);
5037 
5038         movregconst(cdb,rtmp,rvalue >> 16,0);   // MOV rtmp,e2+2
5039         getregs(cdb,mtmp);
5040         cs.Iop = 0x81;
5041         NEWREG(cs.Irm,0);
5042         cs.IFL2 = FLconst;
5043         cs.IEV2.Vint = rvalue;
5044         cdb.gen(&cs);                           // ADD EA,e2
5045         code_orflag(cdb.last(),CFpsw);
5046         cdb.genc2(0x81,modregrm(3,2,rtmp),0);   // ADC rtmp,0
5047         genshift(cdb);                          // MOV CX,offset __AHSHIFT
5048         cdb.gen2(0xD3,modregrm(3,4,rtmp));      // SHL rtmp,CL
5049         cs.Iop = 0x01;
5050         NEWREG(cs.Irm,rtmp);                    // ADD EA+2,rtmp
5051         getlvalue_msw(&cs);
5052         cdb.gen(&cs);
5053         fixresult(cdb,e,retregs,pretregs);
5054         return;
5055     }
5056     else if (sz == 2 * REGSIZE)
5057     {
5058         regm_t retregs = allregs & ~idxregs & *pretregs;
5059         if ((retregs & mLSW) == 0)
5060                 retregs |= mLSW & ~idxregs;
5061         if ((retregs & mMSW) == 0)
5062                 retregs |= ALLREGS & mMSW;
5063         assert(retregs & mMSW && retregs & mLSW);
5064         reg_t reg;
5065         allocreg(cdb,&retregs,&reg,tyml);
5066         uint sreg = findreglsw(retregs);
5067         cs.Iop = 0x8B;
5068         cs.Irm |= modregrm(0,sreg,0);
5069         cdb.gen(&cs);                   // MOV sreg,EA
5070         NEWREG(cs.Irm,reg);
5071         getlvalue_msw(&cs);
5072         cdb.gen(&cs);                   // MOV reg,EA+2
5073         cs.Iop = 0x81;
5074         cs.Irm &= ~cast(int)modregrm(0,7,0);     /* reg field = 0 for ADD        */
5075         if (op == OPpostdec)
5076             cs.Irm |= modregrm(0,5,0);  /* SUB                          */
5077         getlvalue_lsw(&cs);
5078         cs.IFL2 = FLconst;
5079         cs.IEV2.Vlong = e2.EV.Vlong;
5080         cdb.gen(&cs);                   // ADD/SUB EA,const
5081         code_orflag(cdb.last(),CFpsw);
5082         getlvalue_msw(&cs);
5083         cs.IEV2.Vlong = 0;
5084         if (op == OPpostinc)
5085             cs.Irm ^= modregrm(0,2,0);  /* ADC                          */
5086         else
5087             cs.Irm ^= modregrm(0,6,0);  /* SBB                          */
5088         cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8));
5089         cdb.gen(&cs);                   // ADC/SBB EA,0
5090         freenode(e2);
5091         fixresult(cdb,e,retregs,pretregs);
5092         return;
5093     }
5094     else
5095     {
5096         assert(0);
5097     }
5098 }
5099 
5100 
5101 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5102 {
5103     debug
5104         elem_print(e);
5105 
5106     //printf("op = %d, %d\n", e.Eoper, OPstring);
5107     //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen);
5108     //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring);
5109     assert(0);
5110 }
5111 
5112 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5113 {
5114     switch (e.EV.E1.Eoper)
5115     {
5116 version (MARS)
5117 {
5118         case OPdctor:
5119             codelem(cdb,e.EV.E2,pretregs,false);
5120             regm_t retregs = 0;
5121             codelem(cdb,e.EV.E1,&retregs,false);
5122             break;
5123 }
5124 version (SCPP)
5125 {
5126         case OPdtor:
5127             cdcomma(cdb,e,pretregs);
5128             break;
5129         case OPctor:
5130             codelem(cdb,e.EV.E2,pretregs,false);
5131             regm_t retregs = 0;
5132             codelem(cdb,e.EV.E1,&retregs,false);
5133             break;
5134         case OPmark:
5135             if (0 && config.exe == EX_WIN32)
5136             {
5137                 const idx = except_index_get();
5138                 except_mark();
5139                 codelem(cdb,e.EV.E2,pretregs,false);
5140                 if (config.exe == EX_WIN32 && idx != except_index_get())
5141                 {   usednteh |= NTEHcleanup;
5142                     nteh_gensindex(cdb,idx - 1);
5143                 }
5144                 except_release();
5145                 assert(idx == except_index_get());
5146             }
5147             else
5148             {
5149                 code cs = void;
5150                 cs.Iop = ESCAPE | ESCmark;
5151                 cs.Iflags = 0;
5152                 cs.Irex = 0;
5153                 cdb.gen(&cs);
5154                 codelem(cdb,e.EV.E2,pretregs,false);
5155                 cs.Iop = ESCAPE | ESCrelease;
5156                 cdb.gen(&cs);
5157             }
5158             freenode(e.EV.E1);
5159             break;
5160 }
5161         default:
5162             assert(0);
5163     }
5164 }
5165 
5166 /*******************************************
5167  * D constructor.
5168  */
5169 
5170 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5171 {
5172     /* Generate:
5173         ESCAPE | ESCdctor
5174         MOV     sindex[BP],index
5175      */
5176     usednteh |= EHcleanup;
5177     if (config.ehmethod == EHmethod.EH_WIN32)
5178     {   usednteh |= NTEHcleanup | NTEH_try;
5179         nteh_usevars();
5180     }
5181     assert(*pretregs == 0);
5182     code cs;
5183     cs.Iop = ESCAPE | ESCdctor;         // mark start of EH range
5184     cs.Iflags = 0;
5185     cs.Irex = 0;
5186     cs.IFL1 = FLctor;
5187     cs.IEV1.Vtor = e;
5188     cdb.gen(&cs);
5189     nteh_gensindex(cdb,0);              // the actual index will be patched in later
5190                                         // by except_fillInEHTable()
5191 }
5192 
5193 /*******************************************
5194  * D destructor.
5195  */
5196 
5197 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5198 {
5199     if (config.ehmethod == EHmethod.EH_DWARF)
5200     {
5201         usednteh |= EHcleanup;
5202 
5203         code cs;
5204         cs.Iop = ESCAPE | ESCddtor;     // mark end of EH range and where landing pad is
5205         cs.Iflags = 0;
5206         cs.Irex = 0;
5207         cs.IFL1 = FLdtor;
5208         cs.IEV1.Vtor = e;
5209         cdb.gen(&cs);
5210 
5211         // Mark all registers as destroyed
5212         getregsNoSave(allregs);
5213 
5214         assert(*pretregs == 0);
5215         codelem(cdb,e.EV.E1,pretregs,false);
5216         return;
5217     }
5218     else
5219     {
5220         /* Generate:
5221             ESCAPE | ESCddtor
5222             MOV     sindex[BP],index
5223             CALL    dtor
5224             JMP     L1
5225         Ldtor:
5226             ... e.EV.E1 ...
5227             RET
5228         L1: NOP
5229         */
5230         usednteh |= EHcleanup;
5231         if (config.ehmethod == EHmethod.EH_WIN32)
5232         {   usednteh |= NTEHcleanup | NTEH_try;
5233             nteh_usevars();
5234         }
5235 
5236         code cs;
5237         cs.Iop = ESCAPE | ESCddtor;
5238         cs.Iflags = 0;
5239         cs.Irex = 0;
5240         cs.IFL1 = FLdtor;
5241         cs.IEV1.Vtor = e;
5242         cdb.gen(&cs);
5243 
5244         nteh_gensindex(cdb,0);              // the actual index will be patched in later
5245                                             // by except_fillInEHTable()
5246 
5247         // Mark all registers as destroyed
5248         getregsNoSave(allregs);
5249 
5250         assert(*pretregs == 0);
5251         CodeBuilder cdbx;
5252         cdbx.ctor();
5253         codelem(cdbx,e.EV.E1,pretregs,false);
5254         cdbx.gen1(0xC3);                      // RET
5255         code *c = cdbx.finish();
5256 
5257         int nalign = 0;
5258         if (STACKALIGN >= 16)
5259         {
5260             nalign = STACKALIGN - REGSIZE;
5261             cod3_stackadj(cdb, nalign);
5262         }
5263         calledafunc = 1;
5264         genjmp(cdb,0xE8,FLcode,cast(block *)c);   // CALL Ldtor
5265         if (nalign)
5266             cod3_stackadj(cdb, -nalign);
5267 
5268         code *cnop = gennop(null);
5269 
5270         genjmp(cdb,JMP,FLcode,cast(block *)cnop);
5271         cdb.append(cdbx);
5272         cdb.append(cnop);
5273         return;
5274     }
5275 }
5276 
5277 
5278 /*******************************************
5279  * C++ constructor.
5280  */
5281 
5282 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5283 {
5284 version (SCPP)
5285 {
5286     usednteh |= EHcleanup;
5287     if (config.exe == EX_WIN32)
5288         usednteh |= NTEHcleanup;
5289     assert(*pretregs == 0);
5290 
5291     code cs = void;
5292     cs.Iop = ESCAPE | ESCctor;
5293     cs.Iflags = 0;
5294     cs.Irex = 0;
5295     cs.IFL1 = FLctor;
5296     cs.IEV1.Vtor = e;
5297     cdb.gen(&cs);
5298 }
5299 }
5300 
5301 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5302 {
5303 version (SCPP)
5304 {
5305     usednteh |= EHcleanup;
5306     if (config.exe == EX_WIN32)
5307         usednteh |= NTEHcleanup;
5308     assert(*pretregs == 0);
5309 
5310     code cs = void;
5311     cs.Iop = ESCAPE | ESCdtor;
5312     cs.Iflags = 0;
5313     cs.Irex = 0;
5314     cs.IFL1 = FLdtor;
5315     cs.IEV1.Vtor = e;
5316     cdb.gen(&cs);
5317 }
5318 }
5319 
5320 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5321 {
5322 }
5323 
5324 static if (!NTEXCEPTIONS)
5325 {
5326 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5327 {
5328     assert(0);
5329 }
5330 }
5331 
5332 /*****************************************
5333  */
5334 
5335 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5336 {
5337     assert(*pretregs == 0);
5338     codelem(cdb,e.EV.E1,pretregs,false);
5339 }
5340 
5341 /*****************************************
5342  */
5343 
5344 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5345 {
5346     assert(*pretregs == 0);
5347     cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3);
5348 }
5349 
5350 }