1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1984-1998 by Symantec
6  *              Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d)
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d
11  */
12 
13 module dmd.backend.cod2;
14 
15 version (SCPP)
16     version = COMPILE;
17 version (MARS)
18     version = COMPILE;
19 
20 version (COMPILE)
21 {
22 
23 import core.stdc.stdio;
24 import core.stdc.stdlib;
25 import core.stdc.string;
26 
27 import dmd.backend.backend;
28 import dmd.backend.cc;
29 import dmd.backend.cdef;
30 import dmd.backend.code;
31 import dmd.backend.code_x86;
32 import dmd.backend.codebuilder;
33 import dmd.backend.mem;
34 import dmd.backend.el;
35 import dmd.backend.exh;
36 import dmd.backend.global;
37 import dmd.backend.oper;
38 import dmd.backend.ty;
39 import dmd.backend.type;
40 import dmd.backend.xmm;
41 
42 extern (C++):
43 
44 nothrow:
45 
46 int REGSIZE();
47 
48 extern __gshared CGstate cgstate;
49 extern __gshared ubyte[FLMAX] segfl;
50 extern __gshared bool[FLMAX] stackfl;
51 
52 __gshared int cdcmp_flag;
53 
54 private extern (D) uint mask(uint m) { return 1 << m; }
55 
56 // from divcoeff.c
57 extern (C)
58 {
59     bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost);
60     bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost);
61 }
62 
63 /*******************************
64  * Swap two registers.
65  */
66 
67 private void swap(reg_t *a,reg_t *b)
68 {
69     const tmp = *a;
70     *a = *b;
71     *b = tmp;
72 }
73 
74 
75 /*******************************************
76  * Returns: true if cannot use this EA in anything other than a MOV instruction.
77  */
78 
79 bool movOnly(const elem *e)
80 {
81     if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar)
82     {
83         const s = e.EV.Vsym;
84         // Fixups for these can only be done with a MOV
85         if (s.Sclass == SCglobal || s.Sclass == SCextern ||
86             s.Sclass == SCcomdat || s.Sclass == SCcomdef)
87             return true;
88     }
89     return false;
90 }
91 
92 /********************************
93  * Determine index registers used by addressing mode.
94  * Index is rm of modregrm field.
95  * Returns:
96  *      mask of index registers
97  */
98 
99 regm_t idxregm(const code* c)
100 {
101     const rm = c.Irm;
102     regm_t idxm;
103     if ((rm & 0xC0) != 0xC0)            /* if register is not the destination */
104     {
105         if (I16)
106         {
107             static immutable ubyte[8] idxrm  = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX];
108             idxm = idxrm[rm & 7];
109         }
110         else
111         {
112             if ((rm & 7) == 4)          /* if sib byte                  */
113             {
114                 const sib = c.Isib;
115                 reg_t idxreg = (sib >> 3) & 7;
116                 // scaled index reg
117                 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0));
118 
119                 if ((sib & 7) == 5 && (rm & 0xC0) == 0)
120                 { }
121                 else
122                     idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0));
123             }
124             else
125                 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0));
126         }
127     }
128     return idxm;
129 }
130 
131 
132 /***************************
133  * Gen code for call to floating point routine.
134  */
135 
136 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib)
137 {
138     if (config.inline8087)
139     {
140         orth87(cdb,e,pretregs);
141         return;
142     }
143 
144     regm_t retregs1,retregs2;
145     if (tybasic(e.EV.E1.Ety) == TYfloat)
146     {
147         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
148         retregs1 = FLOATREGS;
149         retregs2 = FLOATREGS2;
150     }
151     else
152     {
153         if (I32)
154         {   retregs1 = DOUBLEREGS_32;
155             retregs2 = DOUBLEREGS2_32;
156         }
157         else
158         {   retregs1 = mSTACK;
159             retregs2 = DOUBLEREGS_16;
160         }
161     }
162 
163     codelem(cdb,e.EV.E1, &retregs1,false);
164     if (retregs1 & mSTACK)
165         cgstate.stackclean++;
166     scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false);
167     if (retregs1 & mSTACK)
168         cgstate.stackclean--;
169     callclib(cdb, e, clib, pretregs, 0);
170 }
171 
172 /*****************************
173  * Handle operators which are more or less orthogonal
174  * ( + - & | ^ )
175  */
176 
177 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
178 {
179     //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
180     elem *e1 = e.EV.E1;
181     elem *e2 = e.EV.E2;
182     if (*pretregs == 0)                   // if don't want result
183     {
184         codelem(cdb,e1,pretregs,false); // eval left leaf
185         *pretregs = 0;                          // in case they got set
186         codelem(cdb,e2,pretregs,false);
187         return;
188     }
189 
190     const ty = tybasic(e.Ety);
191     const ty1 = tybasic(e1.Ety);
192 
193     if (tyfloating(ty1))
194     {
195         if (tyvector(ty1) ||
196             config.fpxmmregs && tyxmmreg(ty1) &&
197             !(*pretregs & mST0) &&
198             !(*pretregs & mST01) &&
199             !(ty == TYldouble || ty == TYildouble)  // watch out for shrinkLongDoubleConstantIfPossible()
200            )
201         {
202             orthxmm(cdb,e,pretregs);
203             return;
204         }
205         if (config.inline8087)
206         {
207             orth87(cdb,e,pretregs);
208             return;
209         }
210         if (config.exe & EX_windos)
211         {
212             opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd
213                                                        : CLIB.dsub);
214             return;
215         }
216         else
217         {
218             assert(0);
219         }
220     }
221     if (tyxmmreg(ty1))
222     {
223         orthxmm(cdb,e,pretregs);
224         return;
225     }
226 
227     opcode_t op1, op2;
228     uint mode;
229     __gshared int nest;
230 
231     const ty2 = tybasic(e2.Ety);
232     const e2oper = e2.Eoper;
233     const sz = _tysize[ty];
234     const isbyte = (sz == 1);
235     code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
236     bool test = false;                // assume we destroyed lvalue
237 
238     switch (e.Eoper)
239     {
240         case OPadd:     mode = 0;
241                         op1 = 0x03; op2 = 0x13; break;  /* ADD, ADC     */
242         case OPmin:     mode = 5;
243                         op1 = 0x2B; op2 = 0x1B; break;  /* SUB, SBB     */
244         case OPor:      mode = 1;
245                         op1 = 0x0B; op2 = 0x0B; break;  /* OR , OR      */
246         case OPxor:     mode = 6;
247                         op1 = 0x33; op2 = 0x33; break;  /* XOR, XOR     */
248         case OPand:     mode = 4;
249                         op1 = 0x23; op2 = 0x23;         /* AND, AND     */
250                         if (tyreg(ty1) &&
251                             *pretregs == mPSW)          /* if flags only */
252                         {
253                             test = true;
254                             op1 = 0x85;                 /* TEST         */
255                             mode = 0;
256                         }
257                         break;
258 
259         default:
260             assert(0);
261     }
262     op1 ^= isbyte;                                  /* if byte operation    */
263 
264     // Compute numwords, the number of words to operate on.
265     int numwords = 1;
266     if (!I16)
267     {
268         /* Cannot operate on longs and then do a 'paint' to a far       */
269         /* pointer, because far pointers are 48 bits and longs are 32.  */
270         /* Therefore, numwords can never be 2.                          */
271         assert(!(tyfv(ty1) && tyfv(ty2)));
272         if (sz == 2 * REGSIZE)
273         {
274             numwords++;
275         }
276     }
277     else
278     {
279         /* If ty is a TYfptr, but both operands are long, treat the     */
280         /* operation as a long.                                         */
281         if ((tylong(ty1) || ty1 == TYhptr) &&
282             (tylong(ty2) || ty2 == TYhptr))
283             numwords++;
284     }
285 
286     // Special cases where only flags are set
287     if (test && _tysize[ty1] <= REGSIZE &&
288         (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
289         && !movOnly(e1)
290        )
291     {
292         // Handle the case of (var & const)
293         if (e2.Eoper == OPconst && el_signx32(e2))
294         {
295             code cs = void;
296             cs.Iflags = 0;
297             cs.Irex = 0;
298             getlvalue(cdb,&cs,e1,0);
299             targ_size_t value = e2.EV.Vpointer;
300             if (sz == 2)
301                 value &= 0xFFFF;
302             else if (sz == 4)
303                 value &= 0xFFFFFFFF;
304             reg_t reg;
305             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,&reg))
306             {
307                 code_newreg(&cs, reg);
308                 if (I64 && isbyte && reg >= 4)
309                     cs.Irex |= REX;
310             }
311             else
312             {
313                 if (sz == 8 && !I64)
314                 {
315                     assert(value == cast(int)value);    // sign extend imm32
316                 }
317                 op1 = 0xF7;
318                 cs.IEV2.Vint = cast(targ_int)value;
319                 cs.IFL2 = FLconst;
320             }
321             cs.Iop = op1 ^ isbyte;
322             cs.Iflags |= word | CFpsw;
323             freenode(e1);
324             freenode(e2);
325             cdb.gen(&cs);
326             return;
327         }
328 
329         // Handle (exp & reg)
330         reg_t reg;
331         regm_t retregs;
332         if (isregvar(e2,&retregs,&reg))
333         {
334             code cs = void;
335             cs.Iflags = 0;
336             cs.Irex = 0;
337             getlvalue(cdb,&cs,e1,0);
338             code_newreg(&cs, reg);
339             if (I64 && isbyte && reg >= 4)
340                 cs.Irex |= REX;
341             cs.Iop = op1 ^ isbyte;
342             cs.Iflags |= word | CFpsw;
343             freenode(e1);
344             freenode(e2);
345             cdb.gen(&cs);
346             return;
347         }
348     }
349 
350     code cs = void;
351     cs.Iflags = 0;
352     cs.Irex = 0;
353 
354     // Look for possible uses of LEA
355     if (e.Eoper == OPadd &&
356         !(*pretregs & mPSW) &&                // flags aren't set by LEA
357         !nest &&                              // could cause infinite recursion if e.Ecount
358         (sz == REGSIZE || (I64 && sz == 4)))  // far pointers aren't handled
359     {
360         const rex = (sz == 8) ? REX_W : 0;
361 
362         // Handle the case of (e + &var)
363         int e1oper = e1.Eoper;
364         if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)])))
365                 || // LEA costs too much for simple EAs on older CPUs
366             (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) ||
367             (!I16 && (isscaledindex(e1) || isscaledindex(e2))) ||
368             (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) ||
369             (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) ||
370             (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount &&
371              (e1oper == OPmul || e1oper == OPshl) &&
372              e1.EV.E2.Eoper == OPconst &&
373              ssindex(e1oper,e1.EV.E2.EV.Vuns)
374             ) ||
375             (!I16 && e1.Ecount)
376            )
377         {
378             const inc = e.Ecount != 0;
379             nest += inc;
380             code csx = void;
381             getlvalue(cdb,&csx,e,0);
382             nest -= inc;
383             reg_t regx;
384             allocreg(cdb,pretregs,&regx,ty);
385             csx.Iop = LEA;
386             code_newreg(&csx, regx);
387             cdb.gen(&csx);          // LEA regx,EA
388             if (rex)
389                 code_orrex(cdb.last(), rex);
390             return;
391         }
392 
393         // Handle the case of ((e + c) + e2)
394         if (!I16 &&
395             e1oper == OPadd &&
396             (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) ||
397              e2oper == OPconst && el_signx32(e2)) &&
398             !e1.Ecount
399            )
400         {
401             elem *ebase;
402             elem *edisp;
403             if (e2oper == OPconst && el_signx32(e2))
404             {   edisp = e2;
405                 ebase = e1.EV.E2;
406             }
407             else
408             {   edisp = e1.EV.E2;
409                 ebase = e2;
410             }
411 
412             auto e11 = e1.EV.E1;
413             regm_t retregs = *pretregs & ALLREGS;
414             if (!retregs)
415                 retregs = ALLREGS;
416             int ss = 0;
417             int ss2 = 0;
418 
419             // Handle the case of (((e *  c1) + c2) + e2)
420             // Handle the case of (((e << c1) + c2) + e2)
421             if ((e11.Eoper == OPmul || e11.Eoper == OPshl) &&
422                 e11.EV.E2.Eoper == OPconst &&
423                 !e11.Ecount
424                )
425             {
426                 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2);
427                 if (e11.Eoper == OPshl)
428                 {
429                     if (co1 > 3)
430                         goto L13;
431                     ss = cast(int)co1;
432                 }
433                 else
434                 {
435                     ss2 = 1;
436                     switch (co1)
437                     {
438                         case  6:        ss = 1;                 break;
439                         case 12:        ss = 1; ss2 = 2;        break;
440                         case 24:        ss = 1; ss2 = 3;        break;
441                         case 10:        ss = 2;                 break;
442                         case 20:        ss = 2; ss2 = 2;        break;
443                         case 40:        ss = 2; ss2 = 3;        break;
444                         case 18:        ss = 3;                 break;
445                         case 36:        ss = 3; ss2 = 2;        break;
446                         case 72:        ss = 3; ss2 = 3;        break;
447                         default:
448                             ss2 = 0;
449                             goto L13;
450                     }
451                 }
452                 freenode(e11.EV.E2);
453                 freenode(e11);
454                 e11 = e11.EV.E1;
455               L13:
456                 { }
457             }
458 
459             reg_t reg11;
460             regm_t regm;
461             if (e11.Eoper == OPvar && isregvar(e11,&regm,&reg11))
462             {
463                 if (tysize(e11.Ety) <= REGSIZE)
464                     retregs = mask(reg11); // only want the LSW
465                 else
466                     retregs = regm;
467                 freenode(e11);
468             }
469             else
470                 codelem(cdb,e11,&retregs,false);
471 
472             regm_t rretregs = ALLREGS & ~retregs & ~mBP;
473             scodelem(cdb,ebase,&rretregs,retregs,true);
474             reg_t reg;
475             {
476                 regm_t sregs = *pretregs & ~rretregs;
477                 if (!sregs)
478                     sregs = ALLREGS & ~rretregs;
479                 allocreg(cdb,&sregs,&reg,ty);
480             }
481 
482             assert((retregs & (retregs - 1)) == 0); // must be only one register
483             assert((rretregs & (rretregs - 1)) == 0); // must be only one register
484 
485             auto  reg1 = findreg(retregs);
486             const reg2 = findreg(rretregs);
487 
488             if (ss2)
489             {
490                 assert(reg != reg2);
491                 if ((reg1 & 7) == BP)
492                 {   static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1];
493 
494                     // IMUL reg,imm32
495                     cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]);
496                 }
497                 else
498                 {   // LEA reg,[reg1*ss][reg1]
499                     cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7));
500                     if (reg1 & 8)
501                         code_orrex(cdb.last(), REX_X | REX_B);
502                 }
503                 if (rex)
504                     code_orrex(cdb.last(), rex);
505                 reg1 = reg;
506                 ss = ss2;                               // use *2 for scale
507             }
508 
509             cs.Iop = LEA;                      // LEA reg,c[reg1*ss][reg2]
510             cs.Irm = modregrm(2,reg & 7,4);
511             cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7);
512             assert(reg2 != BP);
513             cs.Iflags = CFoff;
514             cs.Irex = cast(ubyte)rex;
515             if (reg & 8)
516                 cs.Irex |= REX_R;
517             if (reg1 & 8)
518                 cs.Irex |= REX_X;
519             if (reg2 & 8)
520                 cs.Irex |= REX_B;
521             cs.IFL1 = FLconst;
522             cs.IEV1.Vsize_t = edisp.EV.Vuns;
523 
524             freenode(edisp);
525             freenode(e1);
526             cdb.gen(&cs);
527             fixresult(cdb,e,mask(reg),pretregs);
528             return;
529         }
530     }
531 
532     regm_t posregs = (isbyte) ? BYTEREGS : (mES | ALLREGS | mBP);
533     regm_t retregs = *pretregs & posregs;
534     if (retregs == 0)                   /* if no return regs speced     */
535                                         /* (like if wanted flags only)  */
536         retregs = ALLREGS & posregs;    // give us some
537 
538     if (ty1 == TYhptr || ty2 == TYhptr)
539     {     /* Generate code for add/subtract of huge pointers.
540            No attempt is made to generate very good code.
541          */
542         retregs = (retregs & mLSW) | mDX;
543         regm_t rretregs;
544         if (ty1 == TYhptr)
545         {   // hptr +- long
546             rretregs = mLSW & ~(retregs | regcon.mvar);
547             if (!rretregs)
548                 rretregs = mLSW;
549             rretregs |= mCX;
550             codelem(cdb,e1,&rretregs,0);
551             retregs &= ~rretregs;
552             if (!(retregs & mLSW))
553                 retregs |= mLSW & ~rretregs;
554 
555             scodelem(cdb,e2,&retregs,rretregs,true);
556         }
557         else
558         {   // long + hptr
559             codelem(cdb,e1,&retregs,0);
560             rretregs = (mLSW | mCX) & ~retregs;
561             if (!(rretregs & mLSW))
562                 rretregs |= mLSW;
563             scodelem(cdb,e2,&rretregs,retregs,true);
564         }
565         getregs(cdb,rretregs | retregs);
566         const mreg = DX;
567         const lreg = findreglsw(retregs);
568         if (e.Eoper == OPmin)
569         {   // negate retregs
570             cdb.gen2(0xF7,modregrm(3,3,mreg));     // NEG mreg
571             cdb.gen2(0xF7,modregrm(3,3,lreg));     // NEG lreg
572             code_orflag(cdb.last(),CFpsw);
573             cdb.genc2(0x81,modregrm(3,3,mreg),0);  // SBB mreg,0
574         }
575         const lrreg = findreglsw(rretregs);
576         genregs(cdb,0x03,lreg,lrreg);              // ADD lreg,lrreg
577         code_orflag(cdb.last(),CFpsw);
578         genmovreg(cdb,lrreg,CX);      // MOV lrreg,CX
579         cdb.genc2(0x81,modregrm(3,2,mreg),0);      // ADC mreg,0
580         genshift(cdb);                             // MOV CX,offset __AHSHIFT
581         cdb.gen2(0xD3,modregrm(3,4,mreg));         // SHL mreg,CL
582         genregs(cdb,0x03,mreg,lrreg);              // ADD mreg,MSREG(h)
583         fixresult(cdb,e,retregs,pretregs);
584         return;
585     }
586 
587     regm_t rretregs;
588     reg_t reg;
589     if (_tysize[ty1] > REGSIZE && numwords == 1)
590     {     /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */
591 
592         debug
593         if (_tysize[ty2] != REGSIZE)
594         {
595             printf("e = %p, e.Eoper = ",e);
596             WROP(e.Eoper);
597             printf(" e1.Ety = ");
598             WRTYxx(ty1);
599             printf(" e2.Ety = ");
600             WRTYxx(ty2);
601             printf("\n");
602             elem_print(e);
603         }
604 
605         assert(_tysize[ty2] == REGSIZE);
606 
607         /* Watch out for the case here where you are going to OP reg,EA */
608         /* and both the reg and EA use ES! Prevent this by forcing      */
609         /* reg into the regular registers.                              */
610         if ((e2oper == OPind ||
611             (e2oper == OPvar && el_fl(e2) == FLfardata)) &&
612             !e2.Ecount)
613         {
614             retregs = ALLREGS;
615         }
616 
617         codelem(cdb,e1,&retregs,test != 0);
618         reg = findreglsw(retregs);      /* reg is the register with the offset*/
619     }
620     else
621     {
622         regm_t regm;
623 
624         /* if (tyword + TYfptr) */
625         if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE)
626         {   retregs = ~*pretregs & ALLREGS;
627 
628             /* if retregs doesn't have any regs in it that aren't reg vars */
629             if ((retregs & ~regcon.mvar) == 0)
630                 retregs |= mAX;
631         }
632         else if (numwords == 2 && retregs & mES)
633             retregs = (retregs | mMSW) & ALLREGS;
634 
635         // Determine if we should swap operands, because
636         //      mov     EAX,x
637         //      add     EAX,reg
638         // is faster than:
639         //      mov     EAX,reg
640         //      add     EAX,x
641         else if (e2oper == OPvar &&
642                  e1.Eoper == OPvar &&
643                  e.Eoper != OPmin &&
644                  isregvar(e1,&regm,null) &&
645                  regm != retregs &&
646                  _tysize[ty1] == _tysize[ty2])
647         {
648             elem *es = e1;
649             e1 = e2;
650             e2 = es;
651         }
652         codelem(cdb,e1,&retregs,test != 0);         // eval left leaf
653         reg = findreg(retregs);
654     }
655     reg_t rreg;
656     int rval;
657     targ_size_t i;
658     switch (e2oper)
659     {
660         case OPind:                                 /* if addressing mode   */
661             if (!e2.Ecount)                         /* if not CSE           */
662                     goto L1;                        /* try OP reg,EA        */
663             goto default;
664 
665         default:                                    /* operator node        */
666         L2:
667             rretregs = ALLREGS & ~retregs;
668             /* Be careful not to do arithmetic on ES        */
669             if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW)
670                 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs;
671             else if (isbyte)
672                 rretregs &= BYTEREGS;
673 
674             scodelem(cdb,e2,&rretregs,retregs,true);       // get rvalue
675             rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs);
676             if (!test)
677                 getregs(cdb,retregs);          // we will trash these regs
678             if (numwords == 1)                              /* ADD reg,rreg */
679             {
680                 /* reverse operands to avoid moving around the segment value */
681                 if (_tysize[ty2] > REGSIZE)
682                 {
683                     getregs(cdb,rretregs);
684                     genregs(cdb,op1,rreg,reg);
685                     retregs = rretregs;     // reverse operands
686                 }
687                 else
688                 {
689                     genregs(cdb,op1,reg,rreg);
690                     if (!I16 && *pretregs & mPSW)
691                         cdb.last().Iflags |= word;
692                 }
693                 if (I64 && sz == 8)
694                     code_orrex(cdb.last(), REX_W);
695                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
696                     code_orrex(cdb.last(), REX);
697             }
698             else /* numwords == 2 */                /* ADD lsreg,lsrreg     */
699             {
700                 reg = findreglsw(retregs);
701                 rreg = findreglsw(rretregs);
702                 genregs(cdb,op1,reg,rreg);
703                 if (e.Eoper == OPadd || e.Eoper == OPmin)
704                     code_orflag(cdb.last(),CFpsw);
705                 reg = findregmsw(retregs);
706                 rreg = findregmsw(rretregs);
707                 if (!(e2oper == OPu16_32 && // if second operand is 0
708                       (op2 == 0x0B || op2 == 0x33)) // and OR or XOR
709                    )
710                     genregs(cdb,op2,reg,rreg);        // ADC msreg,msrreg
711             }
712             break;
713 
714         case OPrelconst:
715             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
716                 goto default;
717             if (sz != REGSIZE)
718                 goto L2;
719             if (segfl[el_fl(e2)] != 3)              /* if not in data segment */
720                 goto L2;
721             if (evalinregister(e2))
722                 goto L2;
723             cs.IEV2.Voffset = e2.EV.Voffset;
724             cs.IEV2.Vsym = e2.EV.Vsym;
725             cs.Iflags |= CFoff;
726             i = 0;                          /* no INC or DEC opcode         */
727             rval = 0;
728             goto L3;
729 
730         case OPconst:
731             if (tyfv(ty2))
732                 goto L2;
733             if (numwords == 1)
734             {
735                 if (!el_signx32(e2))
736                     goto L2;
737                 i = e2.EV.Vpointer;
738                 if (word)
739                 {
740                     if (!(*pretregs & mPSW) &&
741                         config.flags4 & CFG4speed &&
742                         (e.Eoper == OPor || e.Eoper == OPxor || test ||
743                          (e1.Eoper != OPvar && e1.Eoper != OPind)))
744                     {   word = 0;
745                         i &= 0xFFFF;
746                     }
747                 }
748                 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&rreg);
749                 cs.IEV2.Vsize_t = i;
750             L3:
751                 if (!test)
752                     getregs(cdb,retregs);          // we will trash these regs
753                 op1 ^= isbyte;
754                 cs.Iflags |= word;
755                 if (rval)
756                 {   cs.Iop = op1 ^ 2;
757                     mode = rreg;
758                 }
759                 else
760                     cs.Iop = 0x81;
761                 cs.Irm = modregrm(3,mode&7,reg&7);
762                 if (mode & 8)
763                     cs.Irex |= REX_R;
764                 if (reg & 8)
765                     cs.Irex |= REX_B;
766                 if (I64 && sz == 8)
767                     cs.Irex |= REX_W;
768                 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4)))
769                     cs.Irex |= REX;
770                 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2));
771                 /* Modify instruction for special cases */
772                 switch (e.Eoper)
773                 {
774                     case OPadd:
775                     {
776                         int iop;
777 
778                         if (i == 1)
779                             iop = 0;                    /* INC reg      */
780                         else if (i == -1)
781                             iop = 8;                    /* DEC reg      */
782                         else
783                             break;
784                         cs.Iop = (0x40 | iop | reg) ^ isbyte;
785                         if ((isbyte && *pretregs & mPSW) || I64)
786                         {
787                             cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop);
788                             cs.Iop = 0xFF;
789                         }
790                         break;
791                     }
792 
793                     case OPand:
794                         if (test)
795                             cs.Iop = rval ? op1 : 0xF7; // TEST
796                         break;
797 
798                     default:
799                         break;
800                 }
801                 if (*pretregs & mPSW)
802                     cs.Iflags |= CFpsw;
803                 cs.Iop ^= isbyte;
804                 cdb.gen(&cs);
805                 cs.Iflags &= ~CFpsw;
806             }
807             else if (numwords == 2)
808             {
809                 getregs(cdb,retregs);
810                 reg = findregmsw(retregs);
811                 const lsreg = findreglsw(retregs);
812                 cs.Iop = 0x81;
813                 cs.Irm = modregrm(3,mode,lsreg);
814                 cs.IFL2 = FLconst;
815                 const msw = cast(targ_int)MSREG(e2.EV.Vllong);
816                 cs.IEV2.Vint = e2.EV.Vlong;
817                 switch (e.Eoper)
818                 {
819                     case OPadd:
820                     case OPmin:
821                         cs.Iflags |= CFpsw;
822                         break;
823 
824                     default:
825                         break;
826                 }
827                 cdb.gen(&cs);
828                 cs.Iflags &= ~CFpsw;
829 
830                 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg);
831                 cs.IEV2.Vint = msw;
832                 if (e.Eoper == OPadd)
833                     cs.Irm |= modregrm(0,2,0);      /* ADC          */
834                 cdb.gen(&cs);
835             }
836             else
837                 assert(0);
838             freenode(e2);
839             break;
840 
841         case OPvar:
842             if (movOnly(e2))
843                 goto L2;
844         L1:
845             if (tyfv(ty2))
846                 goto L2;
847             if (!test)
848                 getregs(cdb,retregs);          // we will trash these regs
849             loadea(cdb,e2,&cs,op1,
850                    ((numwords == 2) ? findreglsw(retregs) : reg),
851                    0,retregs,retregs);
852             if (!I16 && word)
853             {   if (*pretregs & mPSW)
854                     code_orflag(cdb.last(),word);
855                 else
856                     cdb.last().Iflags &= ~cast(int)word;
857             }
858             else if (numwords == 2)
859             {
860                 if (e.Eoper == OPadd || e.Eoper == OPmin)
861                     code_orflag(cdb.last(),CFpsw);
862                 reg = findregmsw(retregs);
863                 if (!OTleaf(e2.Eoper))
864                 {   getlvalue_msw(&cs);
865                     cs.Iop = op2;
866                     NEWREG(cs.Irm,reg);
867                     cdb.gen(&cs);                 // ADC reg,data+2
868                 }
869                 else
870                     loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0);
871             }
872             else if (I64 && sz == 8)
873                 code_orrex(cdb.last(), REX_W);
874             freenode(e2);
875             break;
876     }
877 
878     if (sz <= REGSIZE && *pretregs & mPSW)
879     {
880         /* If the expression is (_tls_array + ...), then the flags are not set
881          * since the linker may rewrite these instructions into something else.
882          */
883         if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar)
884         {
885             const s = e1.EV.Vsym;
886             if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0)
887             {
888                 goto L7;                        // don't assume flags are set
889             }
890         }
891         code_orflag(cdb.last(),CFpsw);
892         *pretregs &= ~mPSW;                    // flags already set
893     L7: { }
894     }
895     fixresult(cdb,e,retregs,pretregs);
896 }
897 
898 
899 /*****************************
900  * Handle multiply.
901  */
902 
903 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
904 {
905     //printf("cdmul()\n");
906     elem *e1 = e.EV.E1;
907     elem *e2 = e.EV.E2;
908     if (*pretregs == 0)                         // if don't want result
909     {
910         codelem(cdb,e1,pretregs,false);      // eval left leaf
911         *pretregs = 0;                          // in case they got set
912         codelem(cdb,e2,pretregs,false);
913         return;
914     }
915 
916     //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
917     const tyml = tybasic(e1.Ety);
918     const ty = tybasic(e.Ety);
919     const oper = e.Eoper;
920 
921     if (tyfloating(tyml))
922     {
923         if (tyvector(tyml) ||
924             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
925             !(*pretregs & mST0) &&
926             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
927             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
928             !(ty == TYllong)  //   or passing to function through integer register
929            )
930         {
931             orthxmm(cdb,e,pretregs);
932             return;
933         }
934         if (config.exe & EX_posix)
935             orth87(cdb,e,pretregs);
936         else
937             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
938 
939         return;
940     }
941 
942     if (tyxmmreg(tyml))
943     {
944         orthxmm(cdb,e,pretregs);
945         return;
946     }
947 
948     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if signed operation, 0 if unsigned
949     const isbyte = tybyte(e.Ety) != 0;
950     const sz = _tysize[tyml];
951     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
952     const uint grex = rex << 16;
953     const OPER opunslng = I16 ? OPu16_32 : OPu32_64;
954 
955     code cs = void;
956     cs.Iflags = 0;
957     cs.Irex = 0;
958 
959     switch (e2.Eoper)
960     {
961         case OPu16_32:
962         case OPs16_32:
963         case OPu32_64:
964         case OPs32_64:
965         {
966             if (sz != 2 * REGSIZE || e1.Eoper != e2.Eoper ||
967                 e1.Ecount || e2.Ecount)
968                 goto default;
969             const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5;
970             regm_t retregsx = mAX;
971             codelem(cdb,e1.EV.E1,&retregsx,false);    // eval left leaf
972             if (e2.EV.E1.Eoper == OPvar ||
973                 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount)
974                )
975             {
976                 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX);
977             }
978             else
979             {
980                 regm_t rretregsx = ALLREGS & ~mAX;
981                 scodelem(cdb,e2.EV.E1,&rretregsx,retregsx,true); // get rvalue
982                 getregs(cdb,mAX | mDX);
983                 const rregx = findreg(rretregsx);
984                 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx
985             }
986             freenode(e.EV.E1);
987             freenode(e2);
988             fixresult(cdb,e,mAX | mDX,pretregs);
989             return;
990         }
991 
992         case OPconst:
993             const e2factor = cast(targ_size_t)el_tolong(e2);
994 
995             // Multiply by a constant
996             if (I32 && sz == REGSIZE * 2)
997             {
998                 /*  if (msw)
999                       IMUL    EDX,EDX,lsw
1000                       IMUL    reg,EAX,msw
1001                       ADD     reg,EDX
1002                     else
1003                       IMUL    reg,EDX,lsw
1004                     MOV       EDX,lsw
1005                     MUL       EDX
1006                     ADD       EDX,reg
1007                  */
1008                 regm_t retregs = mAX | mDX;
1009                 codelem(cdb,e1,&retregs,false);    // eval left leaf
1010                 reg_t reg = allocScratchReg(cdb, allregs & ~(mAX | mDX));
1011                 getregs(cdb,mDX | mAX);
1012 
1013                 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1014                 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1015 
1016                 if (msw)
1017                 {
1018                     genmulimm(cdb,DX,DX,lsw);           // IMUL EDX,EDX,lsw
1019                     genmulimm(cdb,reg,AX,msw);          // IMUL reg,EAX,msw
1020                     cdb.gen2(0x03,modregrm(3,reg,DX));  // ADD  reg,EAX
1021                 }
1022                 else
1023                     genmulimm(cdb,reg,DX,lsw);          // IMUL reg,EDX,lsw
1024 
1025                 movregconst(cdb,DX,lsw,0);              // MOV EDX,lsw
1026                 getregs(cdb,mDX);
1027                 cdb.gen2(0xF7,modregrm(3,4,DX));        // MUL EDX
1028                 cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD EDX,reg
1029 
1030                 const resregx = mDX | mAX;
1031                 freenode(e2);
1032                 fixresult(cdb,e,resregx,pretregs);
1033                 return;
1034             }
1035 
1036 
1037             const int pow2 = ispow2(e2factor);
1038 
1039             if (sz > REGSIZE || !el_signx32(e2))
1040                 goto default;
1041 
1042             if (config.target_cpu >= TARGET_80286)
1043             {
1044                 if (I32 || I64)
1045                 {
1046                     // See if we can use an LEA instruction
1047                     int ss;
1048                     int ss2 = 0;
1049                     int shift;
1050 
1051                     switch (e2factor)
1052                     {
1053                         case 12:    ss = 1; ss2 = 2; goto L4;
1054                         case 24:    ss = 1; ss2 = 3; goto L4;
1055 
1056                         case 6:
1057                         case 3:     ss = 1; goto L4;
1058 
1059                         case 20:    ss = 2; ss2 = 2; goto L4;
1060                         case 40:    ss = 2; ss2 = 3; goto L4;
1061 
1062                         case 10:
1063                         case 5:     ss = 2; goto L4;
1064 
1065                         case 36:    ss = 3; ss2 = 2; goto L4;
1066                         case 72:    ss = 3; ss2 = 3; goto L4;
1067 
1068                         case 18:
1069                         case 9:     ss = 3; goto L4;
1070 
1071                         L4:
1072                         {
1073                             regm_t resreg = *pretregs & ALLREGS & ~(mBP | mR13);
1074                             if (!resreg)
1075                                 resreg = isbyte ? BYTEREGS : ALLREGS & ~(mBP | mR13);
1076 
1077                             codelem(cdb,e.EV.E1,&resreg,false);
1078                             getregs(cdb,resreg);
1079                             reg_t reg = findreg(resreg);
1080 
1081                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1082                                         modregxrmx(ss,reg,reg));        // LEA reg,[ss*reg][reg]
1083                             assert((reg & 7) != BP);
1084                             if (ss2)
1085                             {
1086                                 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1087                                                modregxrm(ss2,reg,5));
1088                                 cdb.last().IFL1 = FLconst;
1089                                 cdb.last().IEV1.Vint = 0;               // LEA reg,0[ss2*reg]
1090                             }
1091                             else if (!(e2factor & 1))                   // if even factor
1092                             {
1093                                 genregs(cdb,0x03,reg,reg);              // ADD reg,reg
1094                                 code_orrex(cdb.last(),rex);
1095                             }
1096                             freenode(e2);
1097                             fixresult(cdb,e,resreg,pretregs);
1098                             return;
1099                         }
1100                         case 37:
1101                         case 74:    shift = 2;
1102                                     goto L5;
1103                         case 13:
1104                         case 26:    shift = 0;
1105                                     goto L5;
1106                         L5:
1107                         {
1108                             regm_t retregs = isbyte ? BYTEREGS : ALLREGS;
1109                             regm_t resreg = *pretregs & (ALLREGS | mBP);
1110                             if (!resreg)
1111                                 resreg = retregs;
1112 
1113                             // Don't use EBP
1114                             resreg &= ~(mBP | mR13);
1115                             if (!resreg)
1116                                 resreg = retregs;
1117                             reg_t reg;
1118                             allocreg(cdb,&resreg,&reg,TYint);
1119 
1120                             regm_t sregm = (ALLREGS & ~mR13) & ~resreg;
1121                             codelem(cdb,e.EV.E1,&sregm,false);
1122                             uint sreg = findreg(sregm);
1123                             getregs(cdb,resreg | sregm);
1124                             assert((sreg & 7) != BP);
1125                             assert((reg & 7) != BP);
1126                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1127                                                   modregxrmx(2,sreg,sreg));       // LEA reg,[sreg*4][sreg]
1128                             if (shift)
1129                                 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift
1130                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1131                                                   modregxrmx(3,sreg,reg));        // LEA reg,[sreg*8][reg]
1132                             if (!(e2factor & 1))                                  // if even factor
1133                             {
1134                                 genregs(cdb,0x03,reg,reg);                        // ADD reg,reg
1135                                 code_orrex(cdb.last(),rex);
1136                             }
1137                             freenode(e2);
1138                             fixresult(cdb,e,resreg,pretregs);
1139                             return;
1140                         }
1141 
1142                         default:
1143                             break;
1144                     }
1145                 }
1146 
1147                 regm_t retregs = isbyte ? BYTEREGS : ALLREGS;
1148                 regm_t resreg = *pretregs & (ALLREGS | mBP);
1149                 if (!resreg)
1150                     resreg = retregs;
1151 
1152                 scodelem(cdb,e.EV.E1,&retregs,0,true);     // eval left leaf
1153                 const regx = findreg(retregs);
1154                 reg_t rreg;
1155                 allocreg(cdb,&resreg,&rreg,e.Ety);
1156 
1157                 // IMUL regx,imm16
1158                 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor);
1159                 freenode(e2);
1160                 fixresult(cdb,e,resreg,pretregs);
1161                 return;
1162             }
1163             goto default;
1164 
1165         case OPind:
1166             if (!e2.Ecount)                        // if not CSE
1167                     goto case OPvar;                        // try OP reg,EA
1168             goto default;
1169 
1170         default:                                    // OPconst and operators
1171             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1172             if (sz <= REGSIZE)
1173             {
1174                 regm_t retregs = mAX;
1175                 codelem(cdb,e1,&retregs,false);           // eval left leaf
1176                 regm_t rretregs = isbyte ? BYTEREGS & ~mAX
1177                                          : ALLREGS & ~(mAX|mDX);
1178                 scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1179                 getregs(cdb,mAX | mDX);     // trash these regs
1180                 reg_t rreg = findreg(rretregs);
1181                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,5 - uns,rreg)); // OP AX,rreg
1182                 if (I64 && isbyte && rreg >= 4)
1183                     code_orrex(cdb.last(), REX);
1184                 fixresult(cdb,e,mAX,pretregs);
1185                 return;
1186             }
1187             else if (sz == 2 * REGSIZE)
1188             {
1189                 regm_t retregs = mDX | mAX;
1190                 codelem(cdb,e1,&retregs,false);           // eval left leaf
1191                 if (config.target_cpu >= TARGET_PentiumPro)
1192                 {
1193                     regm_t rretregs = allregs & ~retregs;           // second arg
1194                     scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue
1195                     regm_t rlo = findreglsw(rretregs);
1196                     regm_t rhi = findregmsw(rretregs);
1197                     /*  IMUL    rhi,EAX
1198                         IMUL    EDX,rlo
1199                         ADD     rhi,EDX
1200                         MUL     rlo
1201                         ADD     EDX,rhi
1202                      */
1203                     getregs(cdb,mAX|mDX|mask(rhi));
1204                     cdb.gen2(0x0FAF,modregrm(3,rhi,AX));
1205                     cdb.gen2(0x0FAF,modregrm(3,DX,rlo));
1206                     cdb.gen2(0x03,modregrm(3,rhi,DX));
1207                     cdb.gen2(0xF7,modregrm(3,4,rlo));
1208                     cdb.gen2(0x03,modregrm(3,DX,rhi));
1209                     fixresult(cdb,e,mDX|mAX,pretregs);
1210                     return;
1211                 }
1212                 else
1213                 {
1214                     regm_t rretregs = mCX | mBX;           // second arg
1215                     scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1216                     callclib(cdb,e,CLIB.lmul,pretregs,0);
1217                     return;
1218                 }
1219             }
1220             assert(0);
1221 
1222         case OPvar:
1223             if (!I16 && sz <= REGSIZE)
1224             {
1225                 if (sz > 1)        // no byte version
1226                 {
1227                     // Generate IMUL r32,r/m32
1228                     regm_t retregs = *pretregs & (ALLREGS | mBP);
1229                     if (!retregs)
1230                         retregs = ALLREGS;
1231                     codelem(cdb,e1,&retregs,false);        // eval left leaf
1232                     regm_t resreg = retregs;
1233                     loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs);
1234                     freenode(e2);
1235                     fixresult(cdb,e,resreg,pretregs);
1236                     return;
1237                 }
1238             }
1239             else
1240             {
1241                 if (sz == 2 * REGSIZE)
1242                 {
1243                     if (e.EV.E1.Eoper != opunslng ||
1244                         e1.Ecount)
1245                         goto default;            // have to handle it with codelem()
1246 
1247                     regm_t retregs = ALLREGS & ~(mAX | mDX);
1248                     codelem(cdb,e1.EV.E1,&retregs,false);    // eval left leaf
1249                     const reg = findreg(retregs);
1250                     getregs(cdb,mAX);
1251                     genmovreg(cdb,AX,reg);            // MOV AX,reg
1252                     loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX);  // MUL EA+2
1253                     getregs(cdb,retregs);
1254                     cdb.gen1(0x90 + reg);                          // XCHG AX,reg
1255                     getregs(cdb,mAX | mDX);
1256                     if ((cs.Irm & 0xC0) == 0xC0)            // if EA is a register
1257                         loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA
1258                     else
1259                     {   getlvalue_lsw(&cs);
1260                         cdb.gen(&cs);                       // MUL EA
1261                     }
1262                     cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD DX,reg
1263 
1264                     freenode(e1);
1265                     fixresult(cdb,e,mAX | mDX,pretregs);
1266                     return;
1267                 }
1268                 assert(sz <= REGSIZE);
1269             }
1270 
1271             // loadea() handles CWD or CLR DX for divides
1272             regm_t retregs = sz <= REGSIZE ? mAX : mDX|mAX;
1273             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1274             loadea(cdb,e2,&cs,0xF7 ^ isbyte,5 - uns,0,
1275                    mAX,
1276                    mAX | mDX);
1277             freenode(e2);
1278             fixresult(cdb,e,mAX,pretregs);
1279             return;
1280     }
1281     assert(0);
1282 }
1283 
1284 
1285 /*****************************
1286  * Handle divide, modulo and remquo.
1287  * Note that modulo isn't defined for doubles.
1288  */
1289 
1290 void cddiv(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1291 {
1292     //printf("cddiv()\n");
1293     elem *e1 = e.EV.E1;
1294     elem *e2 = e.EV.E2;
1295     if (*pretregs == 0)                         // if don't want result
1296     {
1297         codelem(cdb,e1,pretregs,false);      // eval left leaf
1298         *pretregs = 0;                          // in case they got set
1299         codelem(cdb,e2,pretregs,false);
1300         return;
1301     }
1302 
1303     //printf("cddiv(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1304     const tyml = tybasic(e1.Ety);
1305     const ty = tybasic(e.Ety);
1306     const oper = e.Eoper;
1307 
1308     if (tyfloating(tyml))
1309     {
1310         if (tyvector(tyml) ||
1311             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
1312             !(*pretregs & mST0) &&
1313             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
1314             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
1315             !(ty == TYllong)  //   or passing to function through integer register
1316            )
1317         {
1318             orthxmm(cdb,e,pretregs);
1319             return;
1320         }
1321         if (config.exe & EX_posix)
1322             orth87(cdb,e,pretregs);
1323         else
1324             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
1325 
1326         return;
1327     }
1328 
1329     if (tyxmmreg(tyml))
1330     {
1331         orthxmm(cdb,e,pretregs);
1332         return;
1333     }
1334 
1335     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if uint operation, 0 if not
1336     const isbyte = tybyte(e.Ety) != 0;
1337     const sz = _tysize[tyml];
1338     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
1339     const uint grex = rex << 16;
1340 
1341     code cs = void;
1342     cs.Iflags = 0;
1343     cs.Irex = 0;
1344 
1345     switch (e2.Eoper)
1346     {
1347         case OPconst:
1348             auto d = cast(targ_size_t)el_tolong(e2);
1349             bool neg = false;
1350             const e2factor = d;
1351             if (!uns && cast(targ_llong)e2factor < 0)
1352             {   neg = true;
1353                 d = -d;
1354             }
1355 
1356             // Signed divide by a constant
1357             if ((d & (d - 1)) &&
1358                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1359                 config.flags4 & CFG4speed && !uns)
1360             {
1361                 /* R1 / 10
1362                  *
1363                  *  MOV     EAX,m
1364                  *  IMUL    R1
1365                  *  MOV     EAX,R1
1366                  *  SAR     EAX,31
1367                  *  SAR     EDX,shpost
1368                  *  SUB     EDX,EAX
1369                  *  IMUL    EAX,EDX,d
1370                  *  SUB     R1,EAX
1371                  *
1372                  * EDX = quotient
1373                  * R1 = remainder
1374                  */
1375                 assert(sz == 4 || sz == 8);
1376 
1377                 ulong m;
1378                 int shpost;
1379                 const int N = sz * 8;
1380                 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1381 
1382                 regm_t regm = allregs & ~(mAX | mDX);
1383                 codelem(cdb,e1,&regm,false);       // eval left leaf
1384                 const reg_t reg = findreg(regm);
1385                 getregs(cdb,regm | mDX | mAX);
1386 
1387                 /* Algorithm 5.2
1388                  * if m>=2**(N-1)
1389                  *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1390                  * else
1391                  *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1392                  * if (neg)
1393                  *    q = -q
1394                  */
1395                 const bool mgt = mhighbit || m >= (1UL << (N - 1));
1396                 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1397                 cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL R1
1398                 if (mgt)
1399                     cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,R1
1400                 getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1401                 genmovreg(cdb, AX, reg);                   // MOV EAX,R1
1402                 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1403                 if (shpost)
1404                     cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1405                 reg_t r3;
1406                 if (neg && oper == OPdiv)
1407                 {
1408                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1409                     r3 = AX;
1410                 }
1411                 else
1412                 {
1413                     cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1414                     r3 = DX;
1415                 }
1416 
1417                 // r3 is quotient
1418                 regm_t resregx;
1419                 switch (oper)
1420                 {   case OPdiv:
1421                         resregx = mask(r3);
1422                         break;
1423 
1424                     case OPmod:
1425                         assert(reg != AX && r3 == DX);
1426                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1427                         {
1428                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1429                         }
1430                         else
1431                         {
1432                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1433                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1434                             getregsNoSave(mAX);                             // EAX no longer contains 'd'
1435                         }
1436                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1437                         resregx = regm;
1438                         break;
1439 
1440                     case OPremquo:
1441                         assert(reg != AX && r3 == DX);
1442                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1443                         {
1444                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);     // IMUL EAX,EDX,d
1445                         }
1446                         else
1447                         {
1448                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1449                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1450                         }
1451                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1452                         genmovreg(cdb, AX, r3);                // MOV EAX,r3
1453                         if (neg)
1454                             cdb.gen2(0xF7,grex | modregrm(3,3,AX));         // NEG EAX
1455                         genmovreg(cdb, DX, reg);               // MOV EDX,R1
1456                         resregx = mDX | mAX;
1457                         break;
1458 
1459                     default:
1460                         assert(0);
1461                 }
1462                 freenode(e2);
1463                 fixresult(cdb,e,resregx,pretregs);
1464                 return;
1465             }
1466 
1467             // Unsigned divide by a constant
1468             if (e2factor > 2 && (e2factor & (e2factor - 1)) &&
1469                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1470                 config.flags4 & CFG4speed && uns)
1471             {
1472                 assert(sz == 4 || sz == 8);
1473 
1474                 reg_t r3;
1475                 regm_t regm;
1476                 reg_t reg;
1477                 ulong m;
1478                 int shpre;
1479                 int shpost;
1480                 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1481                 {
1482                     /* t1 = MULUH(m, n)
1483                      * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1484                      *   MOV   EAX,reg
1485                      *   MOV   EDX,m
1486                      *   MUL   EDX
1487                      *   MOV   EAX,reg
1488                      *   SUB   EAX,EDX
1489                      *   SHR   EAX,1
1490                      *   LEA   R3,[EAX][EDX]
1491                      *   SHR   R3,shpost-1
1492                      */
1493                     assert(shpre == 0);
1494 
1495                     regm = allregs & ~(mAX | mDX);
1496                     codelem(cdb,e1,&regm,false);       // eval left leaf
1497                     reg = findreg(regm);
1498                     getregs(cdb,mAX | mDX);
1499                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1500                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1501                     getregs(cdb,regm | mDX | mAX);
1502                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1503                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1504                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1505                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1506                     regm_t regm3 = allregs;
1507                     if (oper == OPmod || oper == OPremquo)
1508                     {
1509                         regm3 &= ~regm;
1510                         if (oper == OPremquo || !el_signx32(e2))
1511                             regm3 &= ~mAX;
1512                     }
1513                     allocreg(cdb,&regm3,&r3,TYint);
1514                     cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1515                     if (shpost != 1)
1516                         cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1517                 }
1518                 else
1519                 {
1520                     /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1521                      *   SHR   EAX,shpre
1522                      *   MOV   reg,m
1523                      *   MUL   reg
1524                      *   SHR   EDX,shpost
1525                      */
1526                     regm = mAX;
1527                     if (oper == OPmod || oper == OPremquo)
1528                         regm = allregs & ~(mAX|mDX);
1529                     codelem(cdb,e1,&regm,false);       // eval left leaf
1530                     reg = findreg(regm);
1531 
1532                     if (reg != AX)
1533                     {
1534                         getregs(cdb,mAX);
1535                         genmovreg(cdb,AX,reg);                 // MOV EAX,reg
1536                     }
1537                     if (shpre)
1538                     {
1539                         getregs(cdb,mAX);
1540                         cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1541                     }
1542                     getregs(cdb,mDX);
1543                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1544                     getregs(cdb,mDX | mAX);
1545                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1546                     if (shpost)
1547                         cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1548                     r3 = DX;
1549                 }
1550 
1551                 regm_t resreg;
1552                 switch (oper)
1553                 {   case OPdiv:
1554                         // r3 = quotient
1555                         resreg = mask(r3);
1556                         break;
1557 
1558                     case OPmod:
1559                         /* reg = original value
1560                          * r3  = quotient
1561                          */
1562                         assert(!(regm & mAX));
1563                         if (el_signx32(e2))
1564                         {
1565                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1566                         }
1567                         else
1568                         {
1569                             assert(!(mask(r3) & mAX));
1570                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1571                             getregs(cdb,mAX);
1572                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1573                         }
1574                         getregs(cdb,regm);
1575                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1576                         resreg = regm;
1577                         break;
1578 
1579                     case OPremquo:
1580                         /* reg = original value
1581                          * r3  = quotient
1582                          */
1583                         assert(!(mask(r3) & (mAX|regm)));
1584                         assert(!(regm & mAX));
1585                         if (el_signx32(e2))
1586                         {
1587                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1588                         }
1589                         else
1590                         {
1591                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor
1592                             getregs(cdb,mAX);
1593                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1594                         }
1595                         getregs(cdb,regm);
1596                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1597                         genmovreg(cdb, AX, r3);              // MOV EAX,r3
1598                         genmovreg(cdb, DX, reg);             // MOV EDX,reg
1599                         resreg = mDX | mAX;
1600                         break;
1601 
1602                     default:
1603                         assert(0);
1604                 }
1605                 freenode(e2);
1606                 fixresult(cdb,e,resreg,pretregs);
1607                 return;
1608             }
1609 
1610             const int pow2 = ispow2(e2factor);
1611 
1612             // Register pair signed divide by power of 2
1613             if (sz == REGSIZE * 2 &&
1614                 (oper == OPdiv) && !uns &&
1615                 pow2 != -1 &&
1616                 I32 // not set up for I64 cent yet
1617                )
1618             {
1619                 regm_t retregs = mDX | mAX;
1620                 if (pow2 == 63 && !(retregs & BYTEREGS & mLSW))
1621                     retregs = (retregs & mMSW) | (BYTEREGS & mLSW);  // because of SETZ
1622 
1623                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1624                 const rhi = findregmsw(retregs);
1625                 const rlo = findreglsw(retregs);
1626                 freenode(e2);
1627                 getregs(cdb,retregs);
1628 
1629                 if (pow2 < 32)
1630                 {
1631                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1632 
1633                     genmovreg(cdb,r1,rhi);                                        // MOV  r1,rhi
1634                     if (pow2 == 1)
1635                         cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR  r1,31
1636                     else
1637                     {
1638                         cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR  r1,31
1639                         cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND  r1,mask
1640                     }
1641                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD  rlo,r1
1642                     cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC  rhi,0
1643                     cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2);            // SHRD rlo,rhi,pow2
1644                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2);               // SAR  rhi,pow2
1645                 }
1646                 else if (pow2 == 32)
1647                 {
1648                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1649 
1650                     genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
1651                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
1652                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD rlo,r1
1653                     cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC rhi,0
1654                     cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
1655                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
1656                 }
1657                 else if (pow2 < 63)
1658                 {
1659                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1660                     reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1)));
1661 
1662                     genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
1663                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
1664                     cdb.genmovreg(r2,r1);                                         // MOV r2,r1
1665 
1666                     if (pow2 == 33)
1667                     {
1668                         cdb.gen2(0xF7,modregrmx(3,3,r1));                         // NEG r1
1669                         cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2));               // ADD rlo,r2
1670                         cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1));               // ADC rhi,r1
1671                     }
1672                     else
1673                     {
1674                         cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask
1675                         cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                    // ADD rlo,r1
1676                         cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                    // ADC rhi,r2
1677                     }
1678 
1679                     cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
1680                     cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32);          // SAR rlo,pow2-32
1681                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
1682                 }
1683                 else
1684                 {
1685                     // This may be better done by cgelem.d
1686                     assert(pow2 == 63);
1687                     cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000
1688                     cdb.genregs(0x09,rlo,rhi);                             // OR  rlo,rhi
1689                     cdb.gen2(0x0F94,modregrmx(3,0,rlo));                   // SETZ rlo
1690                     cdb.genregs(MOVZXb,rlo,rlo);                           // MOVZX rlo,rloL
1691                     movregconst(cdb,rhi,0,0);                              // MOV rhi,0
1692                 }
1693 
1694                 fixresult(cdb,e,retregs,pretregs);
1695                 return;
1696             }
1697 
1698             // Register pair signed modulo by power of 2
1699             if (sz == REGSIZE * 2 &&
1700                 (oper == OPmod) && !uns &&
1701                 pow2 != -1 &&
1702                 I32 // not set up for I64 cent yet
1703                )
1704             {
1705                 regm_t retregs = mDX | mAX;
1706                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1707                 const rhi = findregmsw(retregs);
1708                 const rlo = findreglsw(retregs);
1709                 freenode(e2);
1710                 getregs(cdb,retregs);
1711 
1712                 regm_t scratchm = allregs & ~retregs;
1713                 if (pow2 == 63)
1714                     scratchm &= BYTEREGS;               // because of SETZ
1715                 reg_t r1 = allocScratchReg(cdb, scratchm);
1716 
1717                 if (pow2 < 32)
1718                 {
1719                     cdb.genmovreg(r1,rhi);                                    // MOV r1,rhi
1720                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31
1721                     cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
1722                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
1723                     cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1);    // AND rlo,(1<<pow2)-1
1724                     cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
1725                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
1726                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));              // SBB rhi,rhi
1727                 }
1728                 else if (pow2 == 32)
1729                 {
1730                     cdb.genmovreg(r1,rhi);                                      // MOV r1,rhi
1731                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR r1,31
1732                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD rlo,r1
1733                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB rlo,r1
1734                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));                // SBB rhi,rhi
1735                 }
1736                 else if (pow2 < 63)
1737                 {
1738                     reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1)));
1739 
1740                     cdb.genmovreg(r1,rhi);                                      // MOV  r1,rhi
1741                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR  r1,31
1742                     cdb.genmovreg(r2,r1);                                       // MOV  r2,r1
1743                     cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2);         // SHRD r1,r2,64-pow2
1744                     cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2);           // SHR  r2,64-pow2
1745                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD  rlo,r1
1746                     cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                 // ADC  rhi,r2
1747                     cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND  rhi,(1<<(pow2-32))-1
1748                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB  rlo,r1
1749                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2));                 // SBB  rhi,r2
1750                 }
1751                 else
1752                 {
1753                     // This may be better done by cgelem.d
1754                     assert(pow2 == 63);
1755 
1756                     cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi]
1757                     cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo));               // OR   r1,rlo
1758                     cdb.gen2(0x0F94,modregrmx(3,0,r1));                       // SETZ r1
1759                     cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL  r1,31
1760                     cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1));               // SUB  rhi,r1
1761                 }
1762 
1763                 fixresult(cdb,e,retregs,pretregs);
1764                 return;
1765             }
1766 
1767             if (sz > REGSIZE || !el_signx32(e2))
1768                 goto default;
1769 
1770             // Special code for signed divide or modulo by power of 2
1771             if ((sz == REGSIZE || (I64 && sz == 4)) &&
1772                 (oper == OPdiv || oper == OPmod) && !uns &&
1773                 pow2 != -1 &&
1774                 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv)
1775                )
1776             {
1777                 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386)
1778                 {
1779                     /* MOV r,reg
1780                        SHR r,31
1781                        ADD reg,r
1782                        SAR reg,1
1783                      */
1784                     regm_t retregs = allregs;
1785                     codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1786                     const reg = findreg(retregs);
1787                     freenode(e2);
1788                     getregs(cdb,retregs);
1789 
1790                     reg_t r = allocScratchReg(cdb, allregs & ~retregs);
1791                     genmovreg(cdb,r,reg);                        // MOV r,reg
1792                     cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31
1793                     cdb.gen2(0x03,grex | modregxrmx(3,reg,r));   // ADD reg,r
1794                     cdb.gen2(0xD1,grex | modregrmx(3,7,reg));    // SAR reg,1
1795                     regm_t resreg = retregs;
1796                     fixresult(cdb,e,resreg,pretregs);
1797                     return;
1798                 }
1799 
1800                 regm_t resreg;
1801                 switch (oper)
1802                 {
1803                     case OPdiv:
1804                         resreg = mAX;
1805                         break;
1806 
1807                     case OPmod:
1808                         resreg = mDX;
1809                         break;
1810 
1811                     case OPremquo:
1812                         resreg = mDX | mAX;
1813                         break;
1814 
1815                     default:
1816                         assert(0);
1817                 }
1818 
1819                 regm_t retregs = mAX;
1820                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1821                 freenode(e2);
1822                 getregs(cdb,mAX | mDX);             // modify these regs
1823                 cdb.gen1(0x99);                             // CWD
1824                 code_orrex(cdb.last(), rex);
1825                 if (pow2 == 1)
1826                 {
1827                     if (oper == OPdiv)
1828                     {
1829                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));  // SUB AX,DX
1830                         cdb.gen2(0xD1,grex | modregrm(3,7,AX));   // SAR AX,1
1831                     }
1832                     else // OPmod
1833                     {
1834                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));   // XOR AX,DX
1835                         cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1
1836                         cdb.gen2(0x03,grex | modregrm(3,DX,AX));   // ADD DX,AX
1837                     }
1838                 }
1839                 else
1840                 {   targ_ulong m;
1841 
1842                     m = (1 << pow2) - 1;
1843                     if (oper == OPdiv)
1844                     {
1845                         cdb.genc2(0x81,grex | modregrm(3,4,DX),m);  // AND DX,m
1846                         cdb.gen2(0x03,grex | modregrm(3,AX,DX));    // ADD AX,DX
1847                         // Be careful not to generate this for 8088
1848                         assert(config.target_cpu >= TARGET_80286);
1849                         cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2
1850                     }
1851                     else // OPmod
1852                     {
1853                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1854                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1855                         cdb.genc2(0x81,grex | modregrm(3,4,AX),m);  // AND AX,mask
1856                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1857                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1858                         resreg = mAX;
1859                     }
1860                 }
1861                 fixresult(cdb,e,resreg,pretregs);
1862                 return;
1863             }
1864             goto default;
1865 
1866         case OPind:
1867             if (!e2.Ecount)                        // if not CSE
1868                     goto case OPvar;                        // try OP reg,EA
1869             goto default;
1870 
1871         default:                                    // OPconst and operators
1872             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1873             regm_t retregs = sz <= REGSIZE ? mAX : mDX | mAX;
1874             codelem(cdb,e1,&retregs,false);           // eval left leaf
1875             regm_t rretregs;
1876             if (sz <= REGSIZE)                  // dedicated regs for div
1877             {
1878                 // pick some other regs
1879                 rretregs = isbyte ? BYTEREGS & ~mAX
1880                                 : ALLREGS & ~(mAX|mDX);
1881             }
1882             else
1883             {
1884                 assert(sz <= 2 * REGSIZE);
1885                 rretregs = mCX | mBX;           // second arg
1886             }
1887             scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1888             if (sz <= REGSIZE)
1889             {
1890                 getregs(cdb,mAX | mDX);     // trash these regs
1891                 if (uns)                        // unsigned divide
1892                 {
1893                     movregconst(cdb,DX,0,(sz == 8) ? 64 : 0);  // MOV DX,0
1894                     getregs(cdb,mDX);
1895                 }
1896                 else
1897                 {
1898                     cdb.gen1(0x99);                 // CWD
1899                     code_orrex(cdb.last(),rex);
1900                 }
1901                 reg_t rreg = findreg(rretregs);
1902                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,7 - uns,rreg)); // OP AX,rreg
1903                 if (I64 && isbyte && rreg >= 4)
1904                     code_orrex(cdb.last(), REX);
1905                 regm_t resreg;
1906                 switch (oper)
1907                 {
1908                     case OPdiv:
1909                         resreg = mAX;
1910                         break;
1911 
1912                     case OPmod:
1913                         resreg = mDX;
1914                         break;
1915 
1916                     case OPremquo:
1917                         resreg = mDX | mAX;
1918                         break;
1919 
1920                     default:
1921                         assert(0);
1922                 }
1923                 fixresult(cdb,e,resreg,pretregs);
1924             }
1925             else if (sz == 2 * REGSIZE)
1926             {
1927                 uint lib;
1928                 switch (oper)
1929                 {
1930                     case OPdiv:
1931                     case OPremquo:
1932                         lib = uns ? CLIB.uldiv : CLIB.ldiv;
1933                         break;
1934 
1935                     case OPmod:
1936                         lib = uns ? CLIB.ulmod : CLIB.lmod;
1937                         break;
1938 
1939                     default:
1940                         assert(0);
1941                 }
1942 
1943                 regm_t keepregs = I32 ? mSI | mDI : 0;
1944                 callclib(cdb,e,lib,pretregs,keepregs);
1945             }
1946             else
1947                     assert(0);
1948             return;
1949 
1950         case OPvar:
1951             if (I16 || sz == 2 * REGSIZE)
1952                 goto default;            // have to handle it with codelem()
1953 
1954             // loadea() handles CWD or CLR DX for divides
1955             regm_t retregs = mAX;
1956             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1957             loadea(cdb,e2,&cs,0xF7 ^ isbyte,7 - uns,0,
1958                    mAX | mDX,
1959                    mAX | mDX);
1960             freenode(e2);
1961             regm_t resreg;
1962             switch (oper)
1963             {
1964                 case OPdiv:
1965                     resreg = mAX;
1966                     break;
1967 
1968                 case OPmod:
1969                     resreg = mDX;
1970                     break;
1971 
1972                 case OPremquo:
1973                     resreg = mDX | mAX;
1974                     break;
1975 
1976                 default:
1977                     assert(0);
1978             }
1979             fixresult(cdb,e,resreg,pretregs);
1980             return;
1981     }
1982     assert(0);
1983 }
1984 
1985 
1986 /***************************
1987  * Handle OPnot and OPbool.
1988  * Generate:
1989  *      c:      [evaluate e1]
1990  *      cfalse: [save reg code]
1991  *              clr     reg
1992  *              jmp     cnop
1993  *      ctrue:  [save reg code]
1994  *              clr     reg
1995  *              inc     reg
1996  *      cnop:   nop
1997  */
1998 
1999 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2000 {
2001     //printf("cdnot()\n");
2002     reg_t reg;
2003     tym_t forflags;
2004     regm_t retregs;
2005     elem *e1 = e.EV.E1;
2006 
2007     if (*pretregs == 0)
2008         goto L1;
2009     if (*pretregs == mPSW)
2010     {   //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized
2011     L1:
2012         codelem(cdb,e1,pretregs,false);      // evaluate e1 for cc
2013         return;
2014     }
2015 
2016     OPER op = e.Eoper;
2017     uint sz = tysize(e1.Ety);
2018     uint rex = (I64 && sz == 8) ? REX_W : 0;
2019     uint grex = rex << 16;
2020 
2021     if (!tyfloating(e1.Ety))
2022     {
2023     if (sz <= REGSIZE && e1.Eoper == OPvar)
2024     {   code cs;
2025 
2026         getlvalue(cdb,&cs,e1,0);
2027         freenode(e1);
2028         if (!I16 && sz == 2)
2029             cs.Iflags |= CFopsize;
2030 
2031         retregs = *pretregs & (ALLREGS | mBP);
2032         if (config.target_cpu >= TARGET_80486 &&
2033             tysize(e.Ety) == 1)
2034         {
2035             if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,&reg))
2036             {
2037                 cs.Iop = 0x39;
2038                 if (I64 && (sz == 1) && reg >= 4)
2039                     cs.Irex |= REX;
2040             }
2041             else
2042             {   cs.Iop = 0x81;
2043                 reg = 7;
2044                 cs.IFL2 = FLconst;
2045                 cs.IEV2.Vint = 0;
2046             }
2047             cs.Iop ^= (sz == 1);
2048             code_newreg(&cs,reg);
2049             cdb.gen(&cs);                             // CMP e1,0
2050 
2051             retregs &= BYTEREGS;
2052             if (!retregs)
2053                 retregs = BYTEREGS;
2054             allocreg(cdb,&retregs,&reg,TYint);
2055 
2056             const opcode_t iop = (op == OPbool)
2057                 ? 0x0F95    // SETNZ rm8
2058                 : 0x0F94;   // SETZ rm8
2059             cdb.gen2(iop, modregrmx(3,0,reg));
2060             if (reg >= 4)
2061                 code_orrex(cdb.last(), REX);
2062             if (op == OPbool)
2063                 *pretregs &= ~mPSW;
2064             goto L4;
2065         }
2066 
2067         if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,&reg))
2068             cs.Iop = 0x39;
2069         else
2070         {   cs.Iop = 0x81;
2071             reg = 7;
2072             cs.IFL2 = FLconst;
2073             cs.IEV2.Vint = 1;
2074         }
2075         if (I64 && (sz == 1) && reg >= 4)
2076             cs.Irex |= REX;
2077         cs.Iop ^= (sz == 1);
2078         code_newreg(&cs,reg);
2079         cdb.gen(&cs);                         // CMP e1,1
2080 
2081         allocreg(cdb,&retregs,&reg,TYint);
2082         op ^= (OPbool ^ OPnot);                 // switch operators
2083         goto L2;
2084     }
2085     else if (config.target_cpu >= TARGET_80486 &&
2086         tysize(e.Ety) == 1)
2087     {
2088         int jop = jmpopcode(e.EV.E1);
2089         retregs = mPSW;
2090         codelem(cdb,e.EV.E1,&retregs,false);
2091         retregs = *pretregs & BYTEREGS;
2092         if (!retregs)
2093             retregs = BYTEREGS;
2094         allocreg(cdb,&retregs,&reg,TYint);
2095 
2096         int iop = 0x0F90 | (jop & 0x0F);        // SETcc rm8
2097         if (op == OPnot)
2098             iop ^= 1;
2099         cdb.gen2(iop,grex | modregrmx(3,0,reg));
2100         if (reg >= 4)
2101             code_orrex(cdb.last(), REX);
2102         if (op == OPbool)
2103             *pretregs &= ~mPSW;
2104         goto L4;
2105     }
2106     else if (sz <= REGSIZE &&
2107         // NEG bytereg is too expensive
2108         (sz != 1 || config.target_cpu < TARGET_PentiumPro))
2109     {
2110         retregs = *pretregs & (ALLREGS | mBP);
2111         if (sz == 1 && !(retregs &= BYTEREGS))
2112             retregs = BYTEREGS;
2113         codelem(cdb,e.EV.E1,&retregs,false);
2114         reg = findreg(retregs);
2115         getregs(cdb,retregs);
2116         cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg));   // NEG reg
2117         code_orflag(cdb.last(),CFpsw);
2118         if (!I16 && sz == SHORTSIZE)
2119             code_orflag(cdb.last(),CFopsize);
2120     L2:
2121         genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
2122         code_orrex(cdb.last(), rex);
2123         // At this point, reg==0 if e1==0, reg==-1 if e1!=0
2124         if (op == OPnot)
2125         {
2126             if (I64)
2127                 cdb.gen2(0xFF,grex | modregrmx(3,0,reg));    // INC reg
2128             else
2129                 cdb.gen1(0x40 + reg);                        // INC reg
2130         }
2131         else
2132             cdb.gen2(0xF7,grex | modregrmx(3,3,reg));    // NEG reg
2133         if (*pretregs & mPSW)
2134         {   code_orflag(cdb.last(),CFpsw);
2135             *pretregs &= ~mPSW;         // flags are always set anyway
2136         }
2137     L4:
2138         fixresult(cdb,e,retregs,pretregs);
2139         return;
2140     }
2141     }
2142     code *cnop = gennop(null);
2143     code *ctrue = gennop(null);
2144     logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue);
2145     forflags = *pretregs & mPSW;
2146     if (I64 && sz == 8)
2147         forflags |= 64;
2148     assert(tysize(e.Ety) <= REGSIZE);              // result better be int
2149     CodeBuilder cdbfalse;
2150     cdbfalse.ctor();
2151     allocreg(cdbfalse,pretregs,&reg,e.Ety);        // allocate reg for result
2152     code *cfalse = cdbfalse.finish();
2153     CodeBuilder cdbtrue;
2154     cdbtrue.ctor();
2155     cdbtrue.append(ctrue);
2156     for (code *c1 = cfalse; c1; c1 = code_next(c1))
2157         cdbtrue.gen(c1);                                      // duplicate reg save code
2158     CodeBuilder cdbfalse2;
2159     cdbfalse2.ctor();
2160     movregconst(cdbfalse2,reg,0,forflags);                    // mov 0 into reg
2161     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
2162     movregconst(cdbtrue,reg,1,forflags);                      // mov 1 into reg
2163     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
2164     genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop);          // skip over ctrue
2165     cdb.append(cfalse);
2166     cdb.append(cdbfalse2);
2167     cdb.append(cdbtrue);
2168     cdb.append(cnop);
2169 }
2170 
2171 
2172 /************************
2173  * Complement operator
2174  */
2175 
2176 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2177 {
2178     if (*pretregs == 0)
2179     {
2180         codelem(cdb,e.EV.E1,pretregs,false);
2181         return;
2182     }
2183     tym_t tym = tybasic(e.Ety);
2184     int sz = _tysize[tym];
2185     uint rex = (I64 && sz == 8) ? REX_W : 0;
2186     regm_t possregs = (sz == 1) ? BYTEREGS : allregs;
2187     regm_t retregs = *pretregs & possregs;
2188     if (retregs == 0)
2189         retregs = possregs;
2190     codelem(cdb,e.EV.E1,&retregs,false);
2191     getregs(cdb,retregs);                // retregs will be destroyed
2192 
2193     if (0 && sz == 4 * REGSIZE)
2194     {
2195         cdb.gen2(0xF7,modregrm(3,2,AX));   // NOT AX
2196         cdb.gen2(0xF7,modregrm(3,2,BX));   // NOT BX
2197         cdb.gen2(0xF7,modregrm(3,2,CX));   // NOT CX
2198         cdb.gen2(0xF7,modregrm(3,2,DX));   // NOT DX
2199     }
2200     else
2201     {
2202         const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs);
2203         const op = (sz == 1) ? 0xF6 : 0xF7;
2204         genregs(cdb,op,2,reg);     // NOT reg
2205         code_orrex(cdb.last(), rex);
2206         if (I64 && sz == 1 && reg >= 4)
2207             code_orrex(cdb.last(), REX);
2208         if (sz == 2 * REGSIZE)
2209         {
2210             const reg2 = findreglsw(retregs);
2211             genregs(cdb,op,2,reg2);  // NOT reg+1
2212         }
2213     }
2214     fixresult(cdb,e,retregs,pretregs);
2215 }
2216 
2217 /************************
2218  * Bswap operator
2219  */
2220 
2221 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2222 {
2223     if (*pretregs == 0)
2224     {
2225         codelem(cdb,e.EV.E1,pretregs,false);
2226         return;
2227     }
2228 
2229     const tym = tybasic(e.Ety);
2230     const sz = _tysize[tym];
2231     const posregs = (sz == 2) ? BYTEREGS : allregs;
2232     regm_t retregs = *pretregs & posregs;
2233     if (retregs == 0)
2234         retregs = posregs;
2235     codelem(cdb,e.EV.E1,&retregs,false);
2236     getregs(cdb,retregs);        // retregs will be destroyed
2237     if (sz == 2 * REGSIZE)
2238     {
2239         assert(sz != 16);                       // no cent support yet
2240         const msreg = findregmsw(retregs);
2241         cdb.gen1(0x0FC8 + (msreg & 7));         // BSWAP msreg
2242         const lsreg = findreglsw(retregs);
2243         cdb.gen1(0x0FC8 + (lsreg & 7));         // BSWAP lsreg
2244         cdb.gen2(0x87,modregrm(3,msreg,lsreg)); // XCHG msreg,lsreg
2245     }
2246     else
2247     {
2248         const reg = findreg(retregs);
2249         if (sz == 2)
2250         {
2251             genregs(cdb,0x86,reg+4,reg);    // XCHG regL,regH
2252         }
2253         else
2254         {
2255             assert(sz == 4 || sz == 8);
2256             cdb.gen1(0x0FC8 + (reg & 7));      // BSWAP reg
2257             ubyte rex = 0;
2258             if (sz == 8)
2259                 rex |= REX_W;
2260             if (reg & 8)
2261                 rex |= REX_B;
2262             if (rex)
2263                 code_orrex(cdb.last(), rex);
2264         }
2265     }
2266     fixresult(cdb,e,retregs,pretregs);
2267 }
2268 
2269 /*************************
2270  * ?: operator
2271  */
2272 
2273 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2274 {
2275     con_t regconold,regconsave;
2276     uint stackpushold,stackpushsave;
2277     int ehindexold,ehindexsave;
2278     uint sz2;
2279 
2280     /* vars to save state of 8087 */
2281     int stackusedold,stackusedsave;
2282     NDP[global87.stack.length] _8087old;
2283     NDP[global87.stack.length] _8087save;
2284 
2285     //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2286     elem *e1 = e.EV.E1;
2287     elem *e2 = e.EV.E2;
2288     elem *e21 = e2.EV.E1;
2289     elem *e22 = e2.EV.E2;
2290     regm_t psw = *pretregs & mPSW;               /* save PSW bit                 */
2291     const op1 = e1.Eoper;
2292     uint sz1 = tysize(e1.Ety);
2293     uint jop = jmpopcode(e1);
2294 
2295     uint jop1 = jmpopcode(e21);
2296     uint jop2 = jmpopcode(e22);
2297 
2298     docommas(cdb,&e1);
2299     cgstate.stackclean++;
2300 
2301     if (!OTrel(op1) && e1 == e21 &&
2302         sz1 <= REGSIZE && !tyfloating(e1.Ety))
2303     {   // Recognize (e ? e : f)
2304 
2305         code *cnop1 = gennop(null);
2306         regm_t retregs = *pretregs | mPSW;
2307         codelem(cdb,e1,&retregs,false);
2308 
2309         cse_flush(cdb,1);                // flush CSEs to memory
2310         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2311         freenode(e21);
2312 
2313         regconsave = regcon;
2314         stackpushsave = stackpush;
2315 
2316         retregs |= psw;
2317         if (retregs & (mBP | ALLREGS))
2318             regimmed_set(findreg(retregs),0);
2319         codelem(cdb,e22,&retregs,false);
2320 
2321         andregcon(&regconsave);
2322         assert(stackpushsave == stackpush);
2323 
2324         *pretregs = retregs;
2325         freenode(e2);
2326         cdb.append(cnop1);
2327         cgstate.stackclean--;
2328         return;
2329     }
2330 
2331     if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE &&
2332         !e1.Ecount &&
2333         (jop == JC || jop == JNC) &&
2334         (sz2 = tysize(e2.Ety)) <= REGSIZE &&
2335         e21.Eoper == OPconst &&
2336         e22.Eoper == OPconst
2337        )
2338     {
2339         uint sz = tysize(e.Ety);
2340         uint rex = (I64 && sz == 8) ? REX_W : 0;
2341         uint grex = rex << 16;
2342 
2343         regm_t retregs;
2344         targ_size_t v1,v2;
2345 
2346         if (sz2 != 1 || I64)
2347         {
2348             retregs = *pretregs & (ALLREGS | mBP);
2349             if (!retregs)
2350                 retregs = ALLREGS;
2351         }
2352         else
2353         {
2354             retregs = *pretregs & BYTEREGS;
2355             if (!retregs)
2356                 retregs = BYTEREGS;
2357         }
2358 
2359         cdcmp_flag = 1 | rex;
2360         v1 = cast(targ_size_t)e21.EV.Vllong;
2361         v2 = cast(targ_size_t)e22.EV.Vllong;
2362         if (jop == JNC)
2363         {   v1 = v2;
2364             v2 = cast(targ_size_t)e21.EV.Vllong;
2365         }
2366 
2367         opcode_t opcode = 0x81;
2368         switch (sz2)
2369         {   case 1:     opcode--;
2370                         v1 = cast(byte) v1;
2371                         v2 = cast(byte) v2;
2372                         break;
2373 
2374             case 2:     v1 = cast(short) v1;
2375                         v2 = cast(short) v2;
2376                         break;
2377 
2378             case 4:     v1 = cast(int) v1;
2379                         v2 = cast(int) v2;
2380                         break;
2381             default:
2382                         break;
2383         }
2384 
2385         if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1)
2386         {
2387             // only zero-extension from 32-bits is available for 'or'
2388         }
2389         else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2)
2390         {
2391             // only sign-extension from 32-bits is available for 'and'
2392         }
2393         else
2394         {
2395             codelem(cdb,e1,&retregs,false);
2396             const reg = findreg(retregs);
2397 
2398             if (v1 == 0 && v2 == ~cast(targ_size_t)0)
2399             {
2400                 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg));  // NOT reg
2401                 if (I64 && sz2 == REGSIZE)
2402                     code_orrex(cdb.last(), REX_W);
2403             }
2404             else
2405             {
2406                 v1 -= v2;
2407                 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1);   // AND reg,v1-v2
2408                 if (I64 && sz2 == 1 && reg >= 4)
2409                     code_orrex(cdb.last(), REX);
2410                 if (v2 == 1 && !I64)
2411                     cdb.gen1(0x40 + reg);                     // INC reg
2412                 else if (v2 == -1L && !I64)
2413                     cdb.gen1(0x48 + reg);                     // DEC reg
2414                 else
2415                 {   cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2);   // ADD reg,v2
2416                     if (I64 && sz2 == 1 && reg >= 4)
2417                         code_orrex(cdb.last(), REX);
2418                 }
2419             }
2420 
2421             freenode(e21);
2422             freenode(e22);
2423             freenode(e2);
2424 
2425             fixresult(cdb,e,retregs,pretregs);
2426             cgstate.stackclean--;
2427             return;
2428         }
2429     }
2430 
2431     if (op1 != OPcond && op1 != OPandand && op1 != OPoror &&
2432         op1 != OPnot && op1 != OPbool &&
2433         e21.Eoper == OPconst &&
2434         sz1 <= REGSIZE &&
2435         *pretregs & (mBP | ALLREGS) &&
2436         tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety))
2437     {   // Recognize (e ? c : f)
2438 
2439         code *cnop1 = gennop(null);
2440         regm_t retregs = mPSW;
2441         jop = jmpopcode(e1);            // get jmp condition
2442         codelem(cdb,e1,&retregs,false);
2443 
2444         // Set the register with e21 without affecting the flags
2445         retregs = *pretregs & (ALLREGS | mBP);
2446         if (retregs & ~regcon.mvar)
2447             retregs &= ~regcon.mvar;    // don't disturb register variables
2448         // NOTE: see my email (sign extension bug? possible fix, some questions
2449         reg_t reg;
2450         regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,&reg,tysize(e21.Ety) == 8 ? 64|8 : 8);
2451         retregs = mask(reg);
2452 
2453         cse_flush(cdb,1);                // flush CSE's to memory
2454         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2455         freenode(e21);
2456 
2457         regconsave = regcon;
2458         stackpushsave = stackpush;
2459 
2460         codelem(cdb,e22,&retregs,false);
2461 
2462         andregcon(&regconsave);
2463         assert(stackpushsave == stackpush);
2464 
2465         freenode(e2);
2466         cdb.append(cnop1);
2467         fixresult(cdb,e,retregs,pretregs);
2468         cgstate.stackclean--;
2469         return;
2470     }
2471 
2472     code *cnop1 = gennop(null);
2473     code *cnop2 = gennop(null);         // dummy target addresses
2474     logexp(cdb,e1,false,FLcode,cnop1);  // evaluate condition
2475     regconold = regcon;
2476     stackusedold = global87.stackused;
2477     stackpushold = stackpush;
2478     memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof);
2479     regm_t retregs = *pretregs;
2480     CodeBuilder cdb1;
2481     cdb1.ctor();
2482     if (psw && jop1 != JNE)
2483     {
2484         retregs &= ~mPSW;
2485         if (!retregs)
2486             retregs = ALLREGS;
2487         codelem(cdb1,e21,&retregs,false);
2488         fixresult(cdb1,e21,retregs,pretregs);
2489     }
2490     else
2491         codelem(cdb1,e21,&retregs,false);
2492 
2493     if (CPP && e2.Eoper == OPcolon2)
2494     {
2495         code cs;
2496 
2497         // This is necessary so that any cleanup code on one branch
2498         // is redone on the other branch.
2499         cs.Iop = ESCAPE | ESCmark2;
2500         cs.Iflags = 0;
2501         cs.Irex = 0;
2502         cdb.gen(&cs);
2503         cdb.append(cdb1);
2504         cs.Iop = ESCAPE | ESCrelease2;
2505         cdb.gen(&cs);
2506     }
2507     else
2508         cdb.append(cdb1);
2509 
2510     regconsave = regcon;
2511     regcon = regconold;
2512 
2513     stackpushsave = stackpush;
2514     stackpush = stackpushold;
2515 
2516     stackusedsave = global87.stackused;
2517     global87.stackused = stackusedold;
2518 
2519     memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof);
2520     memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof);
2521 
2522     retregs |= psw;                     // PSW bit may have been trashed
2523     *pretregs |= psw;
2524     CodeBuilder cdb2;
2525     cdb2.ctor();
2526     if (psw && jop2 != JNE)
2527     {
2528         retregs &= ~mPSW;
2529         if (!retregs)
2530             retregs = ALLREGS;
2531         codelem(cdb2,e22,&retregs,false);
2532         fixresult(cdb2,e22,retregs,pretregs);
2533     }
2534     else
2535         codelem(cdb2,e22,&retregs,false);   // use same regs as E1
2536     *pretregs = retregs | psw;
2537     andregcon(&regconold);
2538     andregcon(&regconsave);
2539     assert(global87.stackused == stackusedsave);
2540     assert(stackpush == stackpushsave);
2541     memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof);
2542     freenode(e2);
2543     genjmp(cdb,JMP,FLcode,cast(block *) cnop2);
2544     cdb.append(cnop1);
2545     cdb.append(cdb2);
2546     cdb.append(cnop2);
2547     if (*pretregs & mST0)
2548         note87(e,0,0);
2549 
2550     cgstate.stackclean--;
2551 }
2552 
2553 /*********************
2554  * Comma operator OPcomma
2555  */
2556 
2557 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2558 {
2559     regm_t retregs = 0;
2560     codelem(cdb,e.EV.E1,&retregs,false);   // ignore value from left leaf
2561     codelem(cdb,e.EV.E2,pretregs,false);   // do right leaf
2562 }
2563 
2564 
2565 /*********************************
2566  * Do && and || operators.
2567  * Generate:
2568  *              (evaluate e1 and e2, if true goto cnop1)
2569  *      cnop3:  NOP
2570  *      cg:     [save reg code]         ;if we must preserve reg
2571  *              CLR     reg             ;false result (set Z also)
2572  *              JMP     cnop2
2573  *
2574  *      cnop1:  NOP                     ;if e1 evaluates to true
2575  *              [save reg code]         ;preserve reg
2576  *
2577  *              MOV     reg,1           ;true result
2578  *                  or
2579  *              CLR     reg             ;if return result in flags
2580  *              INC     reg
2581  *
2582  *      cnop2:  NOP                     ;mark end of code
2583  */
2584 
2585 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2586 {
2587     /* We can trip the assert with the following:
2588      *    if ( (b<=a) ? (c<b || a<=c) : c>=a )
2589      * We'll generate ugly code for it, but it's too obscure a case
2590      * to expend much effort on it.
2591      * assert(*pretregs != mPSW);
2592      */
2593 
2594     cgstate.stackclean++;
2595     code *cnop1 = gennop(null);
2596     CodeBuilder cdb1;
2597     cdb1.ctor();
2598     cdb1.append(cnop1);
2599     code *cnop3 = gennop(null);
2600     elem *e2 = e.EV.E2;
2601     (e.Eoper == OPoror)
2602         ? logexp(cdb,e.EV.E1,1,FLcode,cnop1)
2603         : logexp(cdb,e.EV.E1,0,FLcode,cnop3);
2604     con_t regconsave = regcon;
2605     uint stackpushsave = stackpush;
2606     if (*pretregs == 0)                 // if don't want result
2607     {
2608         int noreturn = !el_returns(e2);
2609         codelem(cdb,e2,pretregs,false);
2610         if (noreturn)
2611         {
2612             regconsave.used |= regcon.used;
2613             regcon = regconsave;
2614         }
2615         else
2616             andregcon(&regconsave);
2617         assert(stackpush == stackpushsave);
2618         cdb.append(cnop3);
2619         cdb.append(cdb1);        // eval code, throw away result
2620         cgstate.stackclean--;
2621         return;
2622     }
2623     code *cnop2 = gennop(null);
2624     uint sz = tysize(e.Ety);
2625     if (tybasic(e2.Ety) == TYbool &&
2626       sz == tysize(e2.Ety) &&
2627       !(*pretregs & mPSW) &&
2628       e2.Eoper == OPcall)
2629     {
2630         codelem(cdb,e2,pretregs,false);
2631 
2632         andregcon(&regconsave);
2633 
2634         // stack depth should not change when evaluating E2
2635         assert(stackpush == stackpushsave);
2636 
2637         assert(sz <= 4);                                        // result better be int
2638         regm_t retregs = *pretregs & allregs;
2639         reg_t reg;
2640         allocreg(cdb1,&retregs,&reg,TYint);                     // allocate reg for result
2641         movregconst(cdb1,reg,e.Eoper == OPoror,0);             // reg = 1
2642         regcon.immed.mval &= ~mask(reg);                        // mark reg as unavail
2643         *pretregs = retregs;
2644         if (e.Eoper == OPoror)
2645         {
2646             cdb.append(cnop3);
2647             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2648             cdb.append(cdb1);
2649             cdb.append(cnop2);
2650         }
2651         else
2652         {
2653             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2654             cdb.append(cnop3);
2655             cdb.append(cdb1);
2656             cdb.append(cnop2);
2657         }
2658         cgstate.stackclean--;
2659         return;
2660     }
2661     logexp(cdb,e2,1,FLcode,cnop1);
2662     andregcon(&regconsave);
2663 
2664     // stack depth should not change when evaluating E2
2665     assert(stackpush == stackpushsave);
2666 
2667     assert(sz <= 4);                                         // result better be int
2668     regm_t retregs = *pretregs & (ALLREGS | mBP);
2669     if (!retregs)
2670         retregs = ALLREGS;                                   // if mPSW only
2671     CodeBuilder cdbcg;
2672     cdbcg.ctor();
2673     reg_t reg;
2674     allocreg(cdbcg,&retregs,&reg,TYint);                     // allocate reg for result
2675     code *cg = cdbcg.finish();
2676     for (code *c1 = cg; c1; c1 = code_next(c1))              // for each instruction
2677         cdb1.gen(c1);                                        // duplicate it
2678     CodeBuilder cdbcg2;
2679     cdbcg2.ctor();
2680     movregconst(cdbcg2,reg,0,*pretregs & mPSW);              // MOV reg,0
2681     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2682     genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2);              // JMP cnop2
2683     movregconst(cdb1,reg,1,*pretregs & mPSW);                // reg = 1
2684     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2685     *pretregs = retregs;
2686     cdb.append(cnop3);
2687     cdb.append(cg);
2688     cdb.append(cdbcg2);
2689     cdb.append(cdb1);
2690     cdb.append(cnop2);
2691     cgstate.stackclean--;
2692     return;
2693 }
2694 
2695 
2696 /*********************
2697  * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror).
2698  */
2699 
2700 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2701 {
2702     reg_t resreg;
2703     uint shiftcnt;
2704     regm_t retregs,rretregs;
2705 
2706     //printf("cdshift()\n");
2707     elem *e1 = e.EV.E1;
2708     if (*pretregs == 0)                   // if don't want result
2709     {
2710         codelem(cdb,e1,pretregs,false); // eval left leaf
2711         *pretregs = 0;                  // in case they got set
2712         codelem(cdb,e.EV.E2,pretregs,false);
2713         return;
2714     }
2715 
2716     tym_t tyml = tybasic(e1.Ety);
2717     int sz = _tysize[tyml];
2718     assert(!tyfloating(tyml));
2719     OPER oper = e.Eoper;
2720     uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16;
2721 
2722 version (SCPP)
2723 {
2724     // Do this until the rest of the compiler does OPshr/OPashr correctly
2725     if (oper == OPshr)
2726         oper = (tyuns(tyml)) ? OPshr : OPashr;
2727 }
2728 
2729     uint s1,s2;
2730     switch (oper)
2731     {
2732         case OPshl:
2733             s1 = 4;                     // SHL
2734             s2 = 2;                     // RCL
2735             break;
2736         case OPshr:
2737             s1 = 5;                     // SHR
2738             s2 = 3;                     // RCR
2739             break;
2740         case OPashr:
2741             s1 = 7;                     // SAR
2742             s2 = 3;                     // RCR
2743             break;
2744         case OProl:
2745             s1 = 0;                     // ROL
2746             break;
2747         case OPror:
2748             s1 = 1;                     // ROR
2749             break;
2750         default:
2751             assert(0);
2752     }
2753 
2754     reg_t sreg = NOREG;                   // guard against using value without assigning to sreg
2755     elem *e2 = e.EV.E2;
2756     regm_t forccs = *pretregs & mPSW;            // if return result in CCs
2757     regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs
2758     bool e2isconst = false;                    // assume for the moment
2759     uint isbyte = (sz == 1);
2760     switch (e2.Eoper)
2761     {
2762         case OPconst:
2763             e2isconst = true;               // e2 is a constant
2764             shiftcnt = e2.EV.Vint;         // get shift count
2765             if ((!I16 && sz <= REGSIZE) ||
2766                 shiftcnt <= 4 ||            // if sequence of shifts
2767                 (sz == 2 &&
2768                     (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) ||
2769                 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2770                )
2771             {
2772                 retregs = (forregs) ? forregs
2773                                     : ALLREGS;
2774                 if (isbyte)
2775                 {   retregs &= BYTEREGS;
2776                     if (!retregs)
2777                         retregs = BYTEREGS;
2778                 }
2779                 else if (sz > REGSIZE && sz <= 2 * REGSIZE &&
2780                          !(retregs & mMSW))
2781                     retregs |= mMSW & ALLREGS;
2782                 if (s1 == 7)    // if arithmetic right shift
2783                 {
2784                     if (shiftcnt == 8)
2785                         retregs = mAX;
2786                     else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2787                         retregs = mDX|mAX;
2788                 }
2789 
2790                 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE &&
2791                     oper == OPshl &&
2792                     !e1.Ecount &&
2793                     (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 ||
2794                      e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64)
2795                    )
2796                 {   // Handle (shtlng)s << 16
2797                     regm_t r = retregs & mMSW;
2798                     codelem(cdb,e1.EV.E1,&r,false);      // eval left leaf
2799                     regwithvalue(cdb,retregs & mLSW,0,&resreg,0);
2800                     getregs(cdb,r);
2801                     retregs = r | mask(resreg);
2802                     if (forccs)
2803                     {   sreg = findreg(r);
2804                         gentstreg(cdb,sreg);
2805                         *pretregs &= ~mPSW;             // already set
2806                     }
2807                     freenode(e1);
2808                     freenode(e2);
2809                     break;
2810                 }
2811 
2812                 // See if we should use LEA reg,xxx instead of shift
2813                 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 &&
2814                     (sz == REGSIZE || (I64 && sz == 4)) &&
2815                     oper == OPshl &&
2816                     e1.Eoper == OPvar &&
2817                     !(*pretregs & mPSW) &&
2818                     config.flags4 & CFG4speed
2819                    )
2820                 {
2821                     reg_t reg;
2822                     regm_t regm;
2823 
2824                     if (isregvar(e1,&regm,&reg) && !(regm & retregs))
2825                     {   code cs;
2826                         allocreg(cdb,&retregs,&resreg,e.Ety);
2827                         buildEA(&cs,-1,reg,1 << shiftcnt,0);
2828                         cs.Iop = LEA;
2829                         code_newreg(&cs,resreg);
2830                         cs.Iflags = 0;
2831                         if (I64 && sz == 8)
2832                             cs.Irex |= REX_W;
2833                         cdb.gen(&cs);             // LEA resreg,[reg * ss]
2834                         freenode(e1);
2835                         freenode(e2);
2836                         break;
2837                     }
2838                 }
2839 
2840                 codelem(cdb,e1,&retregs,false); // eval left leaf
2841                 //assert((retregs & regcon.mvar) == 0);
2842                 getregs(cdb,retregs);          // modify these regs
2843 
2844                 {
2845                     if (sz == 2 * REGSIZE)
2846                     {   resreg = findregmsw(retregs);
2847                         sreg = findreglsw(retregs);
2848                     }
2849                     else
2850                     {   resreg = findreg(retregs);
2851                         sreg = NOREG;              // an invalid value
2852                     }
2853                     if (config.target_cpu >= TARGET_80286 &&
2854                         sz <= REGSIZE)
2855                     {
2856                         // SHL resreg,shiftcnt
2857                         assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS)));
2858                         cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt);
2859                         if (shiftcnt == 1)
2860                             cdb.last().Iop += 0x10;     // short form of shift
2861                         if (I64 && sz == 1 && resreg >= 4)
2862                             cdb.last().Irex |= REX;
2863                         // See if we need operand size prefix
2864                         if (!I16 && oper != OPshl && sz == 2)
2865                             cdb.last().Iflags |= CFopsize;
2866                         if (forccs)
2867                             cdb.last().Iflags |= CFpsw;         // need flags result
2868                     }
2869                     else if (shiftcnt == 8)
2870                     {   if (!(retregs & BYTEREGS) || resreg >= 4)
2871                         {
2872                             goto L1;
2873                         }
2874 
2875                         if (pass != PASSfinal && (!forregs || forregs & (mSI | mDI)))
2876                         {
2877                             // e1 might get into SI or DI in a later pass,
2878                             // so don't put CX into a register
2879                             getregs(cdb,mCX);
2880                         }
2881 
2882                         assert(sz == 2);
2883                         switch (oper)
2884                         {
2885                             case OPshl:
2886                                 // MOV regH,regL        XOR regL,regL
2887                                 assert(resreg < 4 && !grex);
2888                                 genregs(cdb,0x8A,resreg+4,resreg);
2889                                 genregs(cdb,0x32,resreg,resreg);
2890                                 break;
2891 
2892                             case OPshr:
2893                             case OPashr:
2894                                 // MOV regL,regH
2895                                 genregs(cdb,0x8A,resreg,resreg+4);
2896                                 if (oper == OPashr)
2897                                     cdb.gen1(0x98);           // CBW
2898                                 else
2899                                     genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH
2900                                 break;
2901 
2902                             case OPror:
2903                             case OProl:
2904                                 // XCHG regL,regH
2905                                 genregs(cdb,0x86,resreg+4,resreg);
2906                                 break;
2907 
2908                             default:
2909                                 assert(0);
2910                         }
2911                         if (forccs)
2912                             gentstreg(cdb,resreg);
2913                     }
2914                     else if (shiftcnt == REGSIZE * 8)   // it's an lword
2915                     {
2916                         if (oper == OPshl)
2917                             swap(&resreg, &sreg);
2918                         genmovreg(cdb,sreg,resreg);  // MOV sreg,resreg
2919                         if (oper == OPashr)
2920                             cdb.gen1(0x99);                       // CWD
2921                         else
2922                             movregconst(cdb,resreg,0,0);  // MOV resreg,0
2923                         if (forccs)
2924                         {
2925                             gentstreg(cdb,sreg);
2926                             *pretregs &= mBP | ALLREGS | mES;
2927                         }
2928                     }
2929                     else
2930                     {
2931                         if (oper == OPshl && sz == 2 * REGSIZE)
2932                             swap(&resreg, &sreg);
2933                         while (shiftcnt--)
2934                         {
2935                             cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg));
2936                             if (sz == 2 * REGSIZE)
2937                             {
2938                                 code_orflag(cdb.last(),CFpsw);
2939                                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
2940                             }
2941                         }
2942                         if (forccs)
2943                             code_orflag(cdb.last(),CFpsw);
2944                     }
2945                     if (sz <= REGSIZE)
2946                         *pretregs &= mBP | ALLREGS;     // flags already set
2947                 }
2948                 freenode(e2);
2949                 break;
2950             }
2951             goto default;
2952 
2953         default:
2954             retregs = forregs & ~mCX;               // CX will be shift count
2955             if (sz <= REGSIZE)
2956             {
2957                 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar))
2958                     retregs = ALLREGS & ~mCX;       // need something
2959                 else if (!retregs)
2960                     retregs = ALLREGS & ~mCX;       // need something
2961                 if (sz == 1)
2962                 {   retregs &= mAX|mBX|mDX;
2963                     if (!retregs)
2964                         retregs = mAX|mBX|mDX;
2965                 }
2966             }
2967             else
2968             {
2969                 if (!(retregs & mMSW))
2970                     retregs = ALLREGS & ~mCX;
2971             }
2972             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
2973 
2974             if (sz <= REGSIZE)
2975                 resreg = findreg(retregs);
2976             else
2977             {
2978                 resreg = findregmsw(retregs);
2979                 sreg = findreglsw(retregs);
2980             }
2981         L1:
2982             rretregs = mCX;                 // CX is shift count
2983             if (sz <= REGSIZE)
2984             {
2985                 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue
2986                 getregs(cdb,retregs);      // trash these regs
2987                 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX
2988 
2989                 if (!I16 && sz == 2 && (oper == OProl || oper == OPror))
2990                     cdb.last().Iflags |= CFopsize;
2991 
2992                 // Note that a shift by CL does not set the flags if
2993                 // CL == 0. If e2 is a constant, we know it isn't 0
2994                 // (it would have been optimized out).
2995                 if (e2isconst)
2996                     *pretregs &= mBP | ALLREGS; // flags already set with result
2997             }
2998             else if (sz == 2 * REGSIZE &&
2999                      config.target_cpu >= TARGET_80386)
3000             {
3001                 reg_t hreg = resreg;
3002                 reg_t lreg = sreg;
3003                 uint rex = I64 ? (REX_W << 16) : 0;
3004                 if (e2isconst)
3005                 {
3006                     getregs(cdb,retregs);
3007                     if (shiftcnt & (REGSIZE * 8))
3008                     {
3009                         if (oper == OPshr)
3010                         {   //      SHR hreg,shiftcnt
3011                             //      MOV lreg,hreg
3012                             //      XOR hreg,hreg
3013                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8));
3014                             genmovreg(cdb,lreg,hreg);
3015                             movregconst(cdb,hreg,0,0);
3016                         }
3017                         else if (oper == OPashr)
3018                         {   //      MOV     lreg,hreg
3019                             //      SAR     hreg,31
3020                             //      SHRD    lreg,hreg,shiftcnt
3021                             genmovreg(cdb,lreg,hreg);
3022                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1);
3023                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8));
3024                         }
3025                         else
3026                         {   //      SHL lreg,shiftcnt
3027                             //      MOV hreg,lreg
3028                             //      XOR lreg,lreg
3029                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8));
3030                             genmovreg(cdb,hreg,lreg);
3031                             movregconst(cdb,lreg,0,0);
3032                         }
3033                     }
3034                     else
3035                     {
3036                         if (oper == OPshr || oper == OPashr)
3037                         {   //      SHRD    lreg,hreg,shiftcnt
3038                             //      SHR/SAR hreg,shiftcnt
3039                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt);
3040                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt);
3041                         }
3042                         else
3043                         {   //      SHLD hreg,lreg,shiftcnt
3044                             //      SHL  lreg,shiftcnt
3045                             cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt);
3046                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt);
3047                         }
3048                     }
3049                     freenode(e2);
3050                 }
3051                 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2)
3052                 {
3053                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
3054                     getregs(cdb,retregs);          // modify these regs
3055                     if (oper == OPshl)
3056                     {
3057                         /*
3058                             SHLD    hreg,lreg,CL
3059                             SHL     lreg,CL
3060                          */
3061 
3062                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
3063                         cdb.gen2(0xD3,modregrm(3,4,lreg));
3064                     }
3065                     else
3066                     {
3067                         /*
3068                             SHRD    lreg,hreg,CL
3069                             SAR             hreg,CL
3070 
3071                             -- or --
3072 
3073                             SHRD    lreg,hreg,CL
3074                             SHR             hreg,CL
3075                          */
3076                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
3077                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
3078                     }
3079                 }
3080                 else
3081                 {   code* cl1,cl2;
3082 
3083                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
3084                     getregs(cdb,retregs | mCX);     // modify these regs
3085                                                             // TEST CL,0x20
3086                     cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8);
3087                     cl1 = gennop(null);
3088                     CodeBuilder cdb1;
3089                     cdb1.ctor();
3090                     cdb1.append(cl1);
3091                     if (oper == OPshl)
3092                     {
3093                         /*  TEST    CL,20H
3094                             JNE     L1
3095                             SHLD    hreg,lreg,CL
3096                             SHL     lreg,CL
3097                             JMP     L2
3098                         L1: AND     CL,20H-1
3099                             SHL     lreg,CL
3100                             MOV     hreg,lreg
3101                             XOR     lreg,lreg
3102                         L2: NOP
3103                          */
3104 
3105                         if (REGSIZE == 2)
3106                             cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3107                         cdb1.gen2(0xD3,modregrm(3,4,lreg));
3108                         genmovreg(cdb1,hreg,lreg);
3109                         genregs(cdb1,0x31,lreg,lreg);
3110 
3111                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
3112                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
3113                         cdb.gen2(0xD3,modregrm(3,4,lreg));
3114                     }
3115                     else
3116                     {   if (oper == OPashr)
3117                         {
3118                             /*  TEST        CL,20H
3119                                 JNE         L1
3120                                 SHRD        lreg,hreg,CL
3121                                 SAR         hreg,CL
3122                                 JMP         L2
3123                             L1: AND         CL,15
3124                                 MOV         lreg,hreg
3125                                 SAR         hreg,31
3126                                 SHRD        lreg,hreg,CL
3127                             L2: NOP
3128                              */
3129 
3130                             if (REGSIZE == 2)
3131                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3132                             genmovreg(cdb1,lreg,hreg);
3133                             cdb1.genc2(0xC1,modregrm(3,s1,hreg),31);
3134                             cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg));
3135                         }
3136                         else
3137                         {
3138                             /*  TEST        CL,20H
3139                                 JNE         L1
3140                                 SHRD        lreg,hreg,CL
3141                                 SHR         hreg,CL
3142                                 JMP         L2
3143                             L1: AND         CL,15
3144                                 SHR         hreg,CL
3145                                 MOV         lreg,hreg
3146                                 XOR         hreg,hreg
3147                             L2: NOP
3148                              */
3149 
3150                             if (REGSIZE == 2)
3151                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3152                             cdb1.gen2(0xD3,modregrm(3,5,hreg));
3153                             genmovreg(cdb1,lreg,hreg);
3154                             genregs(cdb1,0x31,hreg,hreg);
3155                         }
3156                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
3157                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
3158                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
3159                     }
3160                     cl2 = gennop(null);
3161                     genjmp(cdb,JMPS,FLcode,cast(block *)cl2);
3162                     cdb.append(cdb1);
3163                     cdb.append(cl2);
3164                 }
3165                 break;
3166             }
3167             else if (sz == 2 * REGSIZE)
3168             {
3169                 scodelem(cdb,e2,&rretregs,retregs,false);
3170                 getregs(cdb,retregs | mCX);
3171                 if (oper == OPshl)
3172                     swap(&resreg, &sreg);
3173                 if (!e2isconst)                   // if not sure shift count != 0
3174                     cdb.genc2(0xE3,0,6);          // JCXZ .+6
3175                 cdb.gen2(0xD1,modregrm(3,s1,resreg));
3176                 code_orflag(cdb.last(),CFtarg2);
3177                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
3178                 cdb.genc2(0xE2,0,cast(targ_uns)-6);          // LOOP .-6
3179                 regimmed_set(CX,0);         // note that now CX == 0
3180             }
3181             else
3182                 assert(0);
3183             break;
3184     }
3185     fixresult(cdb,e,retregs,pretregs);
3186 }
3187 
3188 
3189 /***************************
3190  * Perform a 'star' reference (indirection).
3191  */
3192 
3193 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3194 {
3195     regm_t retregs;
3196     reg_t reg;
3197     uint nreg;
3198 
3199     //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
3200     tym_t tym = tybasic(e.Ety);
3201     if (tyfloating(tym))
3202     {
3203         if (config.inline8087)
3204         {
3205             if (*pretregs & mST0)
3206             {
3207                 cdind87(cdb, e, pretregs);
3208                 return;
3209             }
3210             if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP))
3211             { }
3212             else if (tycomplex(tym))
3213             {
3214                 cload87(cdb, e, pretregs);
3215                 return;
3216             }
3217 
3218             if (*pretregs & mPSW)
3219             {
3220                 cdind87(cdb, e, pretregs);
3221                 return;
3222             }
3223         }
3224     }
3225 
3226     elem *e1 = e.EV.E1;
3227     assert(e1);
3228     switch (tym)
3229     {
3230         case TYstruct:
3231         case TYarray:
3232             // This case should never happen, why is it here?
3233             tym = TYnptr;               // don't confuse allocreg()
3234             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
3235                     tym = TYfptr;
3236             break;
3237 
3238         default:
3239             break;
3240     }
3241     uint sz = _tysize[tym];
3242     uint isbyte = tybyte(tym) != 0;
3243 
3244     code cs;
3245 
3246      getlvalue(cdb,&cs,e,RMload);          // get addressing mode
3247     //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib);
3248     //fprintf(stderr,"cd2 :\n"); WRcodlst(c);
3249     if (*pretregs == 0)
3250     {
3251         if (e.Ety & mTYvolatile)               // do the load anyway
3252             *pretregs = regmask(e.Ety, 0);     // load into registers
3253         else
3254             return;
3255     }
3256 
3257     regm_t idxregs = idxregm(&cs);               // mask of index regs used
3258 
3259     if (*pretregs == mPSW)
3260     {
3261         if (!I16 && tym == TYfloat)
3262         {
3263             retregs = ALLREGS & ~idxregs;
3264             allocreg(cdb,&retregs,&reg,TYfloat);
3265             cs.Iop = 0x8B;
3266             code_newreg(&cs,reg);
3267             cdb.gen(&cs);                       // MOV reg,lsw
3268             cdb.gen2(0xD1,modregrmx(3,4,reg));  // SHL reg,1
3269             code_orflag(cdb.last(), CFpsw);
3270         }
3271         else if (sz <= REGSIZE)
3272         {
3273             cs.Iop = 0x81 ^ isbyte;
3274             cs.Irm |= modregrm(0,7,0);
3275             cs.IFL2 = FLconst;
3276             cs.IEV2.Vsize_t = 0;
3277             cdb.gen(&cs);             // CMP [idx],0
3278         }
3279         else if (!I16 && sz == REGSIZE + 2)      // if far pointer
3280         {
3281             retregs = ALLREGS & ~idxregs;
3282             allocreg(cdb,&retregs,&reg,TYint);
3283             cs.Iop = MOVZXw;
3284             cs.Irm |= modregrm(0,reg,0);
3285             getlvalue_msw(&cs);
3286             cdb.gen(&cs);             // MOVZX reg,msw
3287             goto L4;
3288         }
3289         else if (sz <= 2 * REGSIZE)
3290         {
3291             retregs = ALLREGS & ~idxregs;
3292             allocreg(cdb,&retregs,&reg,TYint);
3293             cs.Iop = 0x8B;
3294             code_newreg(&cs,reg);
3295             getlvalue_msw(&cs);
3296             cdb.gen(&cs);             // MOV reg,msw
3297             if (I32)
3298             {   if (tym == TYdouble || tym == TYdouble_alias)
3299                     cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1
3300             }
3301             else if (tym == TYfloat)
3302                 cdb.gen2(0xD1,modregrm(3,4,reg));    // SHL reg,1
3303         L4:
3304             cs.Iop = 0x0B;
3305             getlvalue_lsw(&cs);
3306             cs.Iflags |= CFpsw;
3307             cdb.gen(&cs);                    // OR reg,lsw
3308         }
3309         else if (!I32 && sz == 8)
3310         {
3311             *pretregs |= DOUBLEREGS_16;     // fake it for now
3312             goto L1;
3313         }
3314         else
3315         {
3316             debug WRTYxx(tym);
3317             assert(0);
3318         }
3319     }
3320     else                                // else return result in reg
3321     {
3322     L1:
3323         retregs = *pretregs;
3324         if (sz == 8 &&
3325             (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK)
3326         {   int i;
3327 
3328             // Optimizer should not CSE these, as the result is worse code!
3329             assert(!e.Ecount);
3330 
3331             cs.Iop = 0xFF;
3332             cs.Irm |= modregrm(0,6,0);
3333             cs.IEV1.Voffset += 8 - REGSIZE;
3334             stackchanged = 1;
3335             i = 8 - REGSIZE;
3336             do
3337             {
3338                 cdb.gen(&cs);                         // PUSH EA+i
3339                 cdb.genadjesp(REGSIZE);
3340                 cs.IEV1.Voffset -= REGSIZE;
3341                 stackpush += REGSIZE;
3342                 i -= REGSIZE;
3343             }
3344             while (i >= 0);
3345             goto L3;
3346         }
3347         if (I16 && sz == 8)
3348             retregs = DOUBLEREGS_16;
3349 
3350         // Watch out for loading an lptr from an lptr! We must have
3351         // the offset loaded into a different register.
3352         /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes)
3353                 retregs = ALLREGS;*/
3354 
3355         {
3356             assert(!isbyte || retregs & BYTEREGS);
3357             allocreg(cdb,&retregs,&reg,tym); // alloc registers
3358         }
3359         if (retregs & XMMREGS)
3360         {
3361             assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector
3362             cs.Iop = xmmload(tym);
3363             cs.Irex &= ~REX_W;
3364             code_newreg(&cs,reg - XMM0);
3365             checkSetVex(&cs,tym);
3366             cdb.gen(&cs);     // MOV reg,[idx]
3367         }
3368         else if (sz <= REGSIZE)
3369         {
3370             cs.Iop = 0x8B;                                  // MOV
3371             if (sz <= 2 && !I16 &&
3372                 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed)
3373             {
3374                 cs.Iop = tyuns(tym) ? MOVZXw : MOVSXw;      // MOVZX/MOVSX
3375                 cs.Iflags &= ~CFopsize;
3376             }
3377             cs.Iop ^= isbyte;
3378         L2:
3379             code_newreg(&cs,reg);
3380             cdb.gen(&cs);     // MOV reg,[idx]
3381             if (isbyte && reg >= 4)
3382                 code_orrex(cdb.last(), REX);
3383         }
3384         else if ((tym == TYfptr || tym == TYhptr) && retregs & mES)
3385         {
3386             cs.Iop = 0xC4;          // LES reg,[idx]
3387             goto L2;
3388         }
3389         else if (sz <= 2 * REGSIZE)
3390         {   uint lsreg;
3391 
3392             cs.Iop = 0x8B;
3393             // Be careful not to interfere with index registers
3394             if (!I16)
3395             {
3396                 // Can't handle if both result registers are used in
3397                 // the addressing mode.
3398                 if ((retregs & idxregs) == retregs)
3399                 {
3400                     retregs = mMSW & allregs & ~idxregs;
3401                     if (!retregs)
3402                         retregs |= mCX;
3403                     retregs |= mLSW & ~idxregs;
3404 
3405                     // We can run out of registers, so if that's possible,
3406                     // give us *one* of the idxregs
3407                     if ((retregs & ~regcon.mvar & mLSW) == 0)
3408                     {
3409                         regm_t x = idxregs & mLSW;
3410                         if (x)
3411                             retregs |= mask(findreg(x));        // give us one idxreg
3412                     }
3413                     else if ((retregs & ~regcon.mvar & mMSW) == 0)
3414                     {
3415                         regm_t x = idxregs & mMSW;
3416                         if (x)
3417                             retregs |= mask(findreg(x));        // give us one idxreg
3418                     }
3419 
3420                     allocreg(cdb,&retregs,&reg,tym);     // alloc registers
3421                     assert((retregs & idxregs) != retregs);
3422                 }
3423 
3424                 lsreg = findreglsw(retregs);
3425                 if (mask(reg) & idxregs)                // reg is in addr mode
3426                 {
3427                     code_newreg(&cs,lsreg);
3428                     cdb.gen(&cs);                 // MOV lsreg,lsw
3429                     if (sz == REGSIZE + 2)
3430                         cs.Iflags |= CFopsize;
3431                     lsreg = reg;
3432                     getlvalue_msw(&cs);                 // MOV reg,msw
3433                 }
3434                 else
3435                 {
3436                     code_newreg(&cs,reg);
3437                     getlvalue_msw(&cs);
3438                     cdb.gen(&cs);                 // MOV reg,msw
3439                     if (sz == REGSIZE + 2)
3440                         cdb.last().Iflags |= CFopsize;
3441                     getlvalue_lsw(&cs);                 // MOV lsreg,lsw
3442                 }
3443                 NEWREG(cs.Irm,lsreg);
3444                 cdb.gen(&cs);
3445             }
3446             else
3447             {
3448                 // Index registers are always the lsw!
3449                 cs.Irm |= modregrm(0,reg,0);
3450                 getlvalue_msw(&cs);
3451                 cdb.gen(&cs);     // MOV reg,msw
3452                 lsreg = findreglsw(retregs);
3453                 NEWREG(cs.Irm,lsreg);
3454                 getlvalue_lsw(&cs);     // MOV lsreg,lsw
3455                 cdb.gen(&cs);
3456             }
3457         }
3458         else if (I16 && sz == 8)
3459         {
3460             assert(reg == AX);
3461             cs.Iop = 0x8B;
3462             cs.IEV1.Voffset += 6;
3463             cdb.gen(&cs);             // MOV AX,EA+6
3464             cs.Irm |= modregrm(0,CX,0);
3465             cs.IEV1.Voffset -= 4;
3466             cdb.gen(&cs);                    // MOV CX,EA+2
3467             NEWREG(cs.Irm,DX);
3468             cs.IEV1.Voffset -= 2;
3469             cdb.gen(&cs);                    // MOV DX,EA
3470             cs.IEV1.Voffset += 4;
3471             NEWREG(cs.Irm,BX);
3472             cdb.gen(&cs);                    // MOV BX,EA+4
3473         }
3474         else
3475             assert(0);
3476     L3:
3477         fixresult(cdb,e,retregs,pretregs);
3478     }
3479     //fprintf(stderr,"cdafter :\n"); WRcodlst(c);
3480 }
3481 
3482 
3483 
3484 /********************************
3485  * Generate code to load ES with the right segment value,
3486  * do nothing if e is a far pointer.
3487  */
3488 
3489 private code *cod2_setES(tym_t ty)
3490 {
3491     if (config.exe & EX_flat)
3492         return null;
3493 
3494     int push;
3495 
3496     CodeBuilder cdb;
3497     cdb.ctor();
3498     switch (tybasic(ty))
3499     {
3500         case TYnptr:
3501             if (!(config.flags3 & CFG3eseqds))
3502             {   push = 0x1E;            // PUSH DS
3503                 goto L1;
3504             }
3505             break;
3506         case TYcptr:
3507             push = 0x0E;                // PUSH CS
3508             goto L1;
3509         case TYsptr:
3510             if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds))
3511             {   push = 0x16;            // PUSH SS
3512             L1:
3513                 // Must load ES
3514                 getregs(cdb,mES);
3515                 cdb.gen1(push);
3516                 cdb.gen1(0x07);         // POP ES
3517             }
3518             break;
3519 
3520         default:
3521             break;
3522     }
3523     return cdb.finish();
3524 }
3525 
3526 /********************************
3527  * Generate code for intrinsic strlen().
3528  */
3529 
3530 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3531 {
3532     /* Generate strlen in CX:
3533         LES     DI,e1
3534         CLR     AX                      ;scan for 0
3535         MOV     CX,-1                   ;largest possible string
3536         REPNE   SCASB
3537         NOT     CX
3538         DEC     CX
3539      */
3540 
3541     regm_t retregs = mDI;
3542     tym_t ty1 = e.EV.E1.Ety;
3543     if (!tyreg(ty1))
3544         retregs |= mES;
3545     codelem(cdb,e.EV.E1,&retregs,false);
3546 
3547     // Make sure ES contains proper segment value
3548     cdb.append(cod2_setES(ty1));
3549 
3550     ubyte rex = I64 ? REX_W : 0;
3551 
3552     getregs_imm(cdb,mAX | mCX);
3553     movregconst(cdb,AX,0,1);               // MOV AL,0
3554     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);  // MOV CX,-1
3555     getregs(cdb,mDI|mCX);
3556     cdb.gen1(0xF2);                                     // REPNE
3557     cdb.gen1(0xAE);                                     // SCASB
3558     genregs(cdb,0xF7,2,CX);                // NOT CX
3559     code_orrex(cdb.last(), rex);
3560     if (I64)
3561         cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX));  // DEC reg
3562     else
3563         cdb.gen1(0x48 + CX);                            // DEC CX
3564 
3565     if (*pretregs & mPSW)
3566     {
3567         cdb.last().Iflags |= CFpsw;
3568         *pretregs &= ~mPSW;
3569     }
3570     fixresult(cdb,e,mCX,pretregs);
3571 }
3572 
3573 
3574 /*********************************
3575  * Generate code for strcmp(s1,s2) intrinsic.
3576  */
3577 
3578 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3579 {
3580     char need_DS;
3581     int segreg;
3582 
3583     /*
3584         MOV     SI,s1                   ;get destination pointer (s1)
3585         MOV     CX,s1+2
3586         LES     DI,s2                   ;get source pointer (s2)
3587         PUSH    DS
3588         MOV     DS,CX
3589         CLR     AX                      ;scan for 0
3590         MOV     CX,-1                   ;largest possible string
3591         REPNE   SCASB
3592         NOT     CX                      ;CX = string length of s2
3593         SUB     DI,CX                   ;point DI back to beginning
3594         REPE    CMPSB                   ;compare string
3595         POP     DS
3596         JE      L1                      ;strings are equal
3597         SBB     AX,AX
3598         SBB     AX,-1
3599     L1:
3600     */
3601 
3602     regm_t retregs1 = mSI;
3603     tym_t ty1 = e.EV.E1.Ety;
3604     if (!tyreg(ty1))
3605         retregs1 |= mCX;
3606     codelem(cdb,e.EV.E1,&retregs1,false);
3607 
3608     regm_t retregs = mDI;
3609     tym_t ty2 = e.EV.E2.Ety;
3610     if (!tyreg(ty2))
3611         retregs |= mES;
3612     scodelem(cdb,e.EV.E2,&retregs,retregs1,false);
3613 
3614     // Make sure ES contains proper segment value
3615     cdb.append(cod2_setES(ty2));
3616     getregs_imm(cdb,mAX | mCX);
3617 
3618     ubyte rex = I64 ? REX_W : 0;
3619 
3620     // Load DS with right value
3621     switch (tybasic(ty1))
3622     {
3623         case TYnptr:
3624         case TYimmutPtr:
3625             need_DS = false;
3626             break;
3627 
3628         case TYsptr:
3629             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3630                 segreg = SEG_SS;
3631             else
3632                 segreg = SEG_DS;
3633             goto L1;
3634         case TYcptr:
3635             segreg = SEG_CS;
3636         L1:
3637             cdb.gen1(0x1E);                         // PUSH DS
3638             cdb.gen1(0x06 + (segreg << 3));         // PUSH segreg
3639             cdb.gen1(0x1F);                         // POP  DS
3640             need_DS = true;
3641             break;
3642         case TYfptr:
3643         case TYvptr:
3644         case TYhptr:
3645             cdb.gen1(0x1E);                         // PUSH DS
3646             cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));   // MOV DS,CX
3647             need_DS = true;
3648             break;
3649         default:
3650             assert(0);
3651     }
3652 
3653     movregconst(cdb,AX,0,0);                // MOV AX,0
3654     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);   // MOV CX,-1
3655     getregs(cdb,mSI|mDI|mCX);
3656     cdb.gen1(0xF2);                              // REPNE
3657     cdb.gen1(0xAE);                              // SCASB
3658     genregs(cdb,0xF7,2,CX);         // NOT CX
3659     code_orrex(cdb.last(),rex);
3660     genregs(cdb,0x2B,DI,CX);        // SUB DI,CX
3661     code_orrex(cdb.last(),rex);
3662     cdb.gen1(0xF3);                              // REPE
3663     cdb.gen1(0xA6);                              // CMPSB
3664     if (need_DS)
3665         cdb.gen1(0x1F);                          // POP DS
3666     code *c4 = gennop(null);
3667     if (*pretregs != mPSW)                       // if not flags only
3668     {
3669         genjmp(cdb,JE,FLcode,cast(block *) c4);      // JE L1
3670         getregs(cdb,mAX);
3671         genregs(cdb,0x1B,AX,AX);                 // SBB AX,AX
3672         code_orrex(cdb.last(),rex);
3673         cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1);   // SBB AX,-1
3674     }
3675 
3676     *pretregs &= ~mPSW;
3677     cdb.append(c4);
3678     fixresult(cdb,e,mAX,pretregs);
3679 }
3680 
3681 /*********************************
3682  * Generate code for memcmp(s1,s2,n) intrinsic.
3683  */
3684 
3685 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3686 {
3687     char need_DS;
3688     int segreg;
3689 
3690     /*
3691         MOV     SI,s1                   ;get destination pointer (s1)
3692         MOV     DX,s1+2
3693         LES     DI,s2                   ;get source pointer (s2)
3694         MOV     CX,n                    ;get number of bytes to compare
3695         PUSH    DS
3696         MOV     DS,DX
3697         XOR     AX,AX
3698         REPE    CMPSB                   ;compare string
3699         POP     DS
3700         JE      L1                      ;strings are equal
3701         SBB     AX,AX
3702         SBB     AX,-1
3703     L1:
3704     */
3705 
3706     elem *e1 = e.EV.E1;
3707     assert(e1.Eoper == OPparam);
3708 
3709     // Get s1 into DX:SI
3710     regm_t retregs1 = mSI;
3711     tym_t ty1 = e1.EV.E1.Ety;
3712     if (!tyreg(ty1))
3713         retregs1 |= mDX;
3714     codelem(cdb,e1.EV.E1,&retregs1,false);
3715 
3716     // Get s2 into ES:DI
3717     regm_t retregs = mDI;
3718     tym_t ty2 = e1.EV.E2.Ety;
3719     if (!tyreg(ty2))
3720         retregs |= mES;
3721     scodelem(cdb,e1.EV.E2,&retregs,retregs1,false);
3722     freenode(e1);
3723 
3724     // Get nbytes into CX
3725     regm_t retregs3 = mCX;
3726     scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false);
3727 
3728     // Make sure ES contains proper segment value
3729     cdb.append(cod2_setES(ty2));
3730 
3731     // Load DS with right value
3732     switch (tybasic(ty1))
3733     {
3734         case TYnptr:
3735         case TYimmutPtr:
3736             need_DS = false;
3737             break;
3738 
3739         case TYsptr:
3740             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3741                 segreg = SEG_SS;
3742             else
3743                 segreg = SEG_DS;
3744             goto L1;
3745         case TYcptr:
3746             segreg = SEG_CS;
3747         L1:
3748             cdb.gen1(0x1E);                     // PUSH DS
3749             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3750             cdb.gen1(0x1F);                     // POP  DS
3751             need_DS = true;
3752             break;
3753         case TYfptr:
3754         case TYvptr:
3755         case TYhptr:
3756             cdb.gen1(0x1E);                        // PUSH DS
3757             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
3758             need_DS = true;
3759             break;
3760         default:
3761             assert(0);
3762     }
3763 
3764     static if (1)
3765     {
3766         getregs(cdb,mAX);
3767         cdb.gen2(0x33,modregrm(3,AX,AX));           // XOR AX,AX
3768         code_orflag(cdb.last(), CFpsw);             // keep flags
3769     }
3770     else
3771     {
3772         if (*pretregs != mPSW)                      // if not flags only
3773             regwithvalue(cdb,mAX,0,null,0);         // put 0 in AX
3774     }
3775 
3776     getregs(cdb,mCX | mSI | mDI);
3777     cdb.gen1(0xF3);                             // REPE
3778     cdb.gen1(0xA6);                             // CMPSB
3779     if (need_DS)
3780         cdb.gen1(0x1F);                         // POP DS
3781     if (*pretregs != mPSW)                      // if not flags only
3782     {
3783         code *c4 = gennop(null);
3784         genjmp(cdb,JE,FLcode,cast(block *) c4);  // JE L1
3785         getregs(cdb,mAX);
3786         genregs(cdb,0x1B,AX,AX);             // SBB AX,AX
3787         cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1);    // SBB AX,-1
3788         cdb.append(c4);
3789     }
3790 
3791     *pretregs &= ~mPSW;
3792     fixresult(cdb,e,mAX,pretregs);
3793 }
3794 
3795 /*********************************
3796  * Generate code for strcpy(s1,s2) intrinsic.
3797  */
3798 
3799 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3800 {
3801     char need_DS;
3802     int segreg;
3803 
3804     /*
3805         LES     DI,s2                   ;ES:DI = s2
3806         CLR     AX                      ;scan for 0
3807         MOV     CX,-1                   ;largest possible string
3808         REPNE   SCASB                   ;find end of s2
3809         NOT     CX                      ;CX = strlen(s2) + 1 (for EOS)
3810         SUB     DI,CX
3811         MOV     SI,DI
3812         PUSH    DS
3813         PUSH    ES
3814         LES     DI,s1
3815         POP     DS
3816         MOV     AX,DI                   ;return value is s1
3817         REP     MOVSB
3818         POP     DS
3819     */
3820 
3821     stackchanged = 1;
3822     regm_t retregs = mDI;
3823     tym_t ty2 = tybasic(e.EV.E2.Ety);
3824     if (!tyreg(ty2))
3825         retregs |= mES;
3826     ubyte rex = I64 ? REX_W : 0;
3827     codelem(cdb,e.EV.E2,&retregs,false);
3828 
3829     // Make sure ES contains proper segment value
3830     cdb.append(cod2_setES(ty2));
3831     getregs_imm(cdb,mAX | mCX);
3832     movregconst(cdb,AX,0,1);       // MOV AL,0
3833     movregconst(cdb,CX,-1,I64?64:0);  // MOV CX,-1
3834     getregs(cdb,mAX|mCX|mSI|mDI);
3835     cdb.gen1(0xF2);                             // REPNE
3836     cdb.gen1(0xAE);                             // SCASB
3837     genregs(cdb,0xF7,2,CX);                     // NOT CX
3838     code_orrex(cdb.last(),rex);
3839     genregs(cdb,0x2B,DI,CX);                    // SUB DI,CX
3840     code_orrex(cdb.last(),rex);
3841     genmovreg(cdb,SI,DI);          // MOV SI,DI
3842 
3843     // Load DS with right value
3844     switch (ty2)
3845     {
3846         case TYnptr:
3847         case TYimmutPtr:
3848             need_DS = false;
3849             break;
3850 
3851         case TYsptr:
3852             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3853                 segreg = SEG_SS;
3854             else
3855                 segreg = SEG_DS;
3856             goto L1;
3857         case TYcptr:
3858             segreg = SEG_CS;
3859         L1:
3860             cdb.gen1(0x1E);                     // PUSH DS
3861             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3862             cdb.genadjesp(REGSIZE * 2);
3863             need_DS = true;
3864             break;
3865         case TYfptr:
3866         case TYvptr:
3867         case TYhptr:
3868             segreg = SEG_ES;
3869             goto L1;
3870 
3871         default:
3872             assert(0);
3873     }
3874 
3875     retregs = mDI;
3876     tym_t ty1 = tybasic(e.EV.E1.Ety);
3877     if (!tyreg(ty1))
3878         retregs |= mES;
3879     scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false);
3880     getregs(cdb,mAX|mCX|mSI|mDI);
3881 
3882     // Make sure ES contains proper segment value
3883     if (ty2 != TYnptr || ty1 != ty2)
3884         cdb.append(cod2_setES(ty1));
3885     else
3886     {}                              // ES is already same as DS
3887 
3888     if (need_DS)
3889         cdb.gen1(0x1F);                     // POP DS
3890     if (*pretregs)
3891         genmovreg(cdb,AX,DI);               // MOV AX,DI
3892     cdb.gen1(0xF3);                         // REP
3893     cdb.gen1(0xA4);                              // MOVSB
3894 
3895     if (need_DS)
3896     {   cdb.gen1(0x1F);                          // POP DS
3897         cdb.genadjesp(-(REGSIZE * 2));
3898     }
3899     fixresult(cdb,e,mAX | mES,pretregs);
3900 }
3901 
3902 /*********************************
3903  * Generate code for memcpy(s1,s2,n) intrinsic.
3904  *  OPmemcpy
3905  *   /   \
3906  * s1   OPparam
3907  *       /   \
3908  *      s2    n
3909  */
3910 
3911 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3912 {
3913     char need_DS;
3914     int segreg;
3915 
3916     /*
3917         MOV     SI,s2
3918         MOV     DX,s2+2
3919         MOV     CX,n
3920         LES     DI,s1
3921         PUSH    DS
3922         MOV     DS,DX
3923         MOV     AX,DI                   ;return value is s1
3924         REP     MOVSB
3925         POP     DS
3926     */
3927 
3928     elem *e2 = e.EV.E2;
3929     assert(e2.Eoper == OPparam);
3930 
3931     // Get s2 into DX:SI
3932     regm_t retregs2 = mSI;
3933     tym_t ty2 = e2.EV.E1.Ety;
3934     if (!tyreg(ty2))
3935         retregs2 |= mDX;
3936     codelem(cdb,e2.EV.E1,&retregs2,false);
3937 
3938     // Need to check if nbytes is 0 (OPconst of 0 would have been removed by elmemcpy())
3939     const zeroCheck = e2.EV.E2.Eoper != OPconst;
3940 
3941     // Get nbytes into CX
3942     regm_t retregs3 = mCX;
3943     scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false);
3944     freenode(e2);
3945 
3946     // Get s1 into ES:DI
3947     regm_t retregs1 = mDI;
3948     tym_t ty1 = e.EV.E1.Ety;
3949     if (!tyreg(ty1))
3950         retregs1 |= mES;
3951     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
3952 
3953     ubyte rex = I64 ? REX_W : 0;
3954 
3955     // Make sure ES contains proper segment value
3956     cdb.append(cod2_setES(ty1));
3957 
3958     // Load DS with right value
3959     switch (tybasic(ty2))
3960     {
3961         case TYnptr:
3962         case TYimmutPtr:
3963             need_DS = false;
3964             break;
3965 
3966         case TYsptr:
3967             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3968                 segreg = SEG_SS;
3969             else
3970                 segreg = SEG_DS;
3971             goto L1;
3972 
3973         case TYcptr:
3974             segreg = SEG_CS;
3975         L1:
3976             cdb.gen1(0x1E);                        // PUSH DS
3977             cdb.gen1(0x06 + (segreg << 3));        // PUSH segreg
3978             cdb.gen1(0x1F);                        // POP  DS
3979             need_DS = true;
3980             break;
3981 
3982         case TYfptr:
3983         case TYvptr:
3984         case TYhptr:
3985             cdb.gen1(0x1E);                        // PUSH DS
3986             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
3987             need_DS = true;
3988             break;
3989 
3990         default:
3991             assert(0);
3992     }
3993 
3994     if (*pretregs)                              // if need return value
3995     {   getregs(cdb,mAX);
3996         genmovreg(cdb,AX,DI);
3997     }
3998 
3999     if (0 && I32 && config.flags4 & CFG4speed)
4000     {
4001         /* This is only faster if the memory is dword aligned, if not
4002          * it is significantly slower than just a rep movsb.
4003          */
4004         /*      mov     EDX,ECX
4005          *      shr     ECX,2
4006          *      jz      L1
4007          *      repe    movsd
4008          * L1:  nop
4009          *      and     EDX,3
4010          *      jz      L2
4011          *      mov     ECX,EDX
4012          *      repe    movsb
4013          * L2:  nop
4014          */
4015         getregs(cdb,mSI | mDI | mCX | mDX);
4016         genmovreg(cdb,DX,CX);                  // MOV EDX,ECX
4017         cdb.genc2(0xC1,modregrm(3,5,CX),2);                 // SHR ECX,2
4018         code *cx = gennop(null);
4019         genjmp(cdb, JE, FLcode, cast(block *)cx);  // JZ L1
4020         cdb.gen1(0xF3);                                     // REPE
4021         cdb.gen1(0xA5);                                     // MOVSW
4022         cdb.append(cx);
4023         cdb.genc2(0x81, modregrm(3,4,DX),3);                // AND EDX,3
4024 
4025         code *cnop = gennop(null);
4026         genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ L2
4027         genmovreg(cdb,CX,DX);                    // MOV ECX,EDX
4028         cdb.gen1(0xF3);                          // REPE
4029         cdb.gen1(0xA4);                          // MOVSB
4030         cdb.append(cnop);
4031     }
4032     else
4033     {
4034         getregs(cdb,mSI | mDI | mCX);
4035         code* cnop;
4036         if (zeroCheck)
4037         {
4038             cnop = gennop(null);
4039             gentstreg(cdb,CX);                           // TEST ECX,ECX
4040             if (I64)
4041                 code_orrex(cdb.last, REX_W);
4042             genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ cnop
4043         }
4044 
4045         if (I16 && config.flags4 & CFG4speed)          // if speed optimization
4046         {
4047             // Note this doesn't work if CX is 0
4048             cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX));        // SHR CX,1
4049             cdb.gen1(0xF3);                              // REPE
4050             cdb.gen1(0xA5);                              // MOVSW
4051             cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX));            // ADC CX,CX
4052         }
4053         cdb.gen1(0xF3);                             // REPE
4054         cdb.gen1(0xA4);                             // MOVSB
4055         if (zeroCheck)
4056             cdb.append(cnop);
4057         if (need_DS)
4058             cdb.gen1(0x1F);                         // POP DS
4059     }
4060     fixresult(cdb,e,mES|mAX,pretregs);
4061 }
4062 
4063 
4064 /*********************************
4065  * Generate code for memset(s,value,numbytes) intrinsic.
4066  *      (s OPmemset (numbytes OPparam value))
4067  */
4068 
4069 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4070 {
4071     regm_t retregs1;
4072     regm_t retregs3;
4073     reg_t reg;
4074     reg_t vreg;
4075     tym_t ty1;
4076     int segreg;
4077     targ_uns numbytes;
4078     uint m;
4079 
4080     //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs));
4081     elem *e2 = e.EV.E2;
4082     assert(e2.Eoper == OPparam);
4083 
4084     elem* evalue = e2.EV.E2;
4085     elem* enumbytes = e2.EV.E1;
4086 
4087     const grex = I64 ? (REX_W << 16) : 0;
4088 
4089     bool valueIsConst = false;
4090     targ_size_t value;
4091     if (evalue.Eoper == OPconst)
4092     {
4093         value = el_tolong(evalue) & 0xFF;
4094         value |= value << 8;
4095         if (I32 || I64)
4096         {
4097             value |= value << 16;
4098             static if (value.sizeof == 8)
4099             if (I64)
4100                 value |= value << 32;
4101         }
4102         valueIsConst = true;
4103     }
4104     else if (evalue.Eoper == OPstrpar)  // happens if evalue is a struct of 0 size
4105     {
4106         value = 0;
4107         valueIsConst = true;
4108     }
4109     else
4110         value = 0xDEADBEEF;     // stop annoying false positives that value is not inited
4111 
4112     if (enumbytes.Eoper == OPconst)
4113     {
4114         static uint REP_THRESHOLD() { return REGSIZE * (6 + (REGSIZE == 4)); }
4115         numbytes = cast(uint)cast(targ_size_t)el_tolong(enumbytes);
4116         if (numbytes <= REP_THRESHOLD &&
4117             !I16 &&                     // doesn't work for 16 bits
4118             valueIsConst)
4119         {
4120             targ_uns offset = 0;
4121             retregs1 = *pretregs;
4122             if (!retregs1)
4123                 retregs1 = ALLREGS;
4124             codelem(cdb,e.EV.E1,&retregs1,false);
4125             reg = findreg(retregs1);
4126             if (evalue.Eoper == OPconst)
4127             {
4128                 const uint mrm = buildModregrm(0,0,reg);
4129                 switch (numbytes)
4130                 {
4131                     case 4:                     // MOV [reg],imm32
4132                         cdb.genc2(0xC7,mrm,value);
4133                         goto fixres;
4134                     case 2:                     // MOV [reg],imm16
4135                         cdb.genc2(0xC7,mrm,value);
4136                         cdb.last().Iflags = CFopsize;
4137                         goto fixres;
4138                     case 1:                     // MOV [reg],imm8
4139                         cdb.genc2(0xC6,mrm,value);
4140                         goto fixres;
4141 
4142                     default:
4143                         break;
4144                 }
4145             }
4146 
4147             regwithvalue(cdb, BYTEREGS & ~retregs1, value, &vreg, I64 ? 64 : 0);
4148             freenode(evalue);
4149             freenode(e2);
4150 
4151             m = grex | buildModregrm(2,vreg,reg);
4152             while (numbytes >= REGSIZE)
4153             {                           // MOV dword ptr offset[reg],vreg
4154                 cdb.gen2(0x89,m);
4155                 cdb.last().IEV1.Voffset = offset;
4156                 cdb.last().IFL1 = FLconst;
4157                 numbytes -= REGSIZE;
4158                 offset += REGSIZE;
4159             }
4160             m &= ~grex;
4161             if (numbytes & 4)
4162             {                           // MOV dword ptr offset[reg],vreg
4163                 cdb.gen2(0x89,m);
4164                 cdb.last().IEV1.Voffset = offset;
4165                 cdb.last().IFL1 = FLconst;
4166                 offset += 4;
4167             }
4168             if (numbytes & 2)
4169             {                           // MOV word ptr offset[reg],vreg
4170                 cdb.gen2(0x89,m);
4171                 cdb.last().IEV1.Voffset = offset;
4172                 cdb.last().IFL1 = FLconst;
4173                 cdb.last().Iflags = CFopsize;
4174                 offset += 2;
4175             }
4176             if (numbytes & 1)
4177             {                           // MOV byte ptr offset[reg],vreg
4178                 cdb.gen2(0x88,m);
4179                 cdb.last().IEV1.Voffset = offset;
4180                 cdb.last().IFL1 = FLconst;
4181                 if (I64 && vreg >= 4)
4182                     cdb.last().Irex |= REX;
4183             }
4184 fixres:
4185             fixresult(cdb,e,retregs1,pretregs);
4186             return;
4187         }
4188     }
4189 
4190     // Get nbytes into CX
4191     regm_t retregs2 = 0;
4192     if (enumbytes.Eoper != OPconst)
4193     {
4194         retregs2 = mCX;
4195         codelem(cdb,enumbytes,&retregs2,false);
4196     }
4197 
4198     // Get value into AX
4199     retregs3 = mAX;
4200     if (valueIsConst)
4201     {
4202         regwithvalue(cdb, mAX, value, null, I64?64:0);
4203         freenode(evalue);
4204     }
4205     else
4206     {
4207         scodelem(cdb,evalue,&retregs3,retregs2,false);
4208 
4209         getregs(cdb,mAX);
4210         if (I16)
4211         {
4212             cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL
4213         }
4214         else if (I32)
4215         {
4216             genregs(cdb,MOVZXb,AX,AX);                    // MOVZX EAX,AL
4217             cdb.genc2(0x69,modregrm(3,AX,AX),0x01010101); // IMUL EAX,EAX,0x01010101
4218         }
4219         else
4220         {
4221             genregs(cdb,MOVZXb,AX,AX);                    // MOVZX EAX,AL
4222             regm_t regm = allregs & ~(mAX | retregs2);
4223             reg_t r;
4224             regwithvalue(cdb,regm,cast(targ_size_t)0x01010101_01010101,&r,64); // MOV reg,0x01010101_01010101
4225             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r));        // IMUL RAX,reg
4226         }
4227     }
4228     freenode(e2);
4229 
4230     // Get s into ES:DI
4231     retregs1 = mDI;
4232     ty1 = e.EV.E1.Ety;
4233     if (!tyreg(ty1))
4234         retregs1 |= mES;
4235     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
4236     reg = DI; //findreg(retregs1);
4237 
4238     // Make sure ES contains proper segment value
4239     cdb.append(cod2_setES(ty1));
4240 
4241     if (*pretregs)                              // if need return value
4242     {
4243         getregs(cdb,mBX);
4244         genmovreg(cdb,BX,DI);                   // MOV EBX,EDI
4245     }
4246 
4247 
4248     if (enumbytes.Eoper == OPconst)
4249     {
4250         getregs(cdb,mDI);
4251         if (const numwords = numbytes / REGSIZE)
4252         {
4253             regwithvalue(cdb,mCX,numwords,null, I64 ? 64 : 0);
4254             getregs(cdb,mCX);
4255             cdb.gen1(0xF3);                     // REP
4256             cdb.gen1(STOS);                     // STOSW/D/Q
4257             if (I64)
4258                 code_orrex(cdb.last(), REX_W);
4259             regimmed_set(CX, 0);                // CX is now 0
4260         }
4261 
4262         auto remainder = numbytes & (REGSIZE - 1);
4263         if (I64 && remainder >= 4)
4264         {
4265             cdb.gen1(STOS);                     // STOSD
4266             remainder -= 4;
4267         }
4268         for (; remainder; --remainder)
4269             cdb.gen1(STOSB);                    // STOSB
4270         fixresult(cdb,e,mES|mBX,pretregs);
4271         return;
4272     }
4273 
4274     getregs(cdb,mDI | mCX);
4275     if (I16)
4276     {
4277         if (config.flags4 & CFG4speed)      // if speed optimization
4278         {
4279             cdb.gen2(0xD1,modregrm(3,5,CX));  // SHR CX,1
4280             cdb.gen1(0xF3);                   // REP
4281             cdb.gen1(STOS);                   // STOSW
4282             cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC CX,CX
4283         }
4284         cdb.gen1(0xF3);                       // REP
4285         cdb.gen1(STOSB);                      // STOSB
4286         regimmed_set(CX, 0);                  // CX is now 0
4287         fixresult(cdb,e,mES|mBX,pretregs);
4288         return;
4289     }
4290 
4291     /*  MOV   sreg,ECX
4292         SHR   ECX,n
4293         REP
4294         STOSD/Q
4295 
4296         ADC   ECX,ECX
4297         REP
4298         STOSD
4299 
4300         MOV   ECX,sreg
4301         AND   ECX,3
4302         REP
4303         STOSB
4304      */
4305     regm_t regs = allregs & (*pretregs ? ~(mAX|mBX|mCX|mDI) : ~(mAX|mCX|mDI));
4306     reg_t sreg;
4307     allocreg(cdb,&regs,&sreg,TYint);
4308     genregs(cdb,0x89,CX,sreg);                        // MOV sreg,ECX (32 bits only)
4309 
4310     const n = I64 ? 3 : 2;
4311     cdb.genc2(0xC1, grex | modregrm(3,5,CX), n);      // SHR ECX,n
4312 
4313     cdb.gen1(0xF3);                                   // REP
4314     cdb.gen1(STOS);                                   // STOSD/Q
4315     if (I64)
4316         code_orrex(cdb.last(), REX_W);
4317 
4318     if (I64)
4319     {
4320         cdb.gen2(0x11,modregrm(3,CX,CX));             // ADC ECX,ECX
4321         cdb.gen1(0xF3);                               // REP
4322         cdb.gen1(STOS);                               // STOSD
4323     }
4324 
4325     genregs(cdb,0x89,sreg,CX);                        // MOV ECX,sreg (32 bits only)
4326     cdb.genc2(0x81, modregrm(3,4,CX), 3);             // AND ECX,3
4327     cdb.gen1(0xF3);                                   // REP
4328     cdb.gen1(STOSB);                                  // STOSB
4329 
4330     regimmed_set(CX, 0);                    // CX is now 0
4331     fixresult(cdb,e,mES|mBX,pretregs);
4332 }
4333 
4334 
4335 /**********************
4336  * Do structure assignments.
4337  * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2).
4338  * Mebbe call cdstreq() for double assignments???
4339  */
4340 
4341 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4342 {
4343     char need_DS = false;
4344     elem *e1 = e.EV.E1;
4345     elem *e2 = e.EV.E2;
4346     int segreg;
4347     uint numbytes = cast(uint)type_size(e.ET);          // # of bytes in structure/union
4348     ubyte rex = I64 ? REX_W : 0;
4349 
4350     //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4351 
4352     // First, load pointer to rvalue into SI
4353     regm_t srcregs = mSI;                      // source is DS:SI
4354     docommas(cdb,&e2);
4355     if (e2.Eoper == OPind)             // if (.. = *p)
4356     {   elem *e21 = e2.EV.E1;
4357 
4358         segreg = SEG_DS;
4359         switch (tybasic(e21.Ety))
4360         {
4361             case TYsptr:
4362                 if (config.wflags & WFssneds)   // if sptr can't use DS segment
4363                     segreg = SEG_SS;
4364                 break;
4365             case TYcptr:
4366                 if (!(config.exe & EX_flat))
4367                     segreg = SEG_CS;
4368                 break;
4369             case TYfptr:
4370             case TYvptr:
4371             case TYhptr:
4372                 srcregs |= mCX;         // get segment also
4373                 need_DS = true;
4374                 break;
4375 
4376             default:
4377                 break;
4378         }
4379         codelem(cdb,e21,&srcregs,false);
4380         freenode(e2);
4381         if (segreg != SEG_DS)           // if not DS
4382         {
4383             getregs(cdb,mCX);
4384             cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg
4385             need_DS = true;
4386         }
4387     }
4388     else if (e2.Eoper == OPvar)
4389     {
4390         if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment
4391         {   srcregs |= mCX;             // get segment also
4392             need_DS = true;
4393             cdrelconst(cdb,e2,&srcregs);
4394         }
4395         else
4396         {
4397             segreg = segfl[el_fl(e2)];
4398             if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack
4399                 segreg == SEG_CS)               // if source is in CS
4400             {
4401                 need_DS = true;         // we need to reload DS
4402                 // Load CX with segment
4403                 srcregs |= mCX;
4404                 getregs(cdb,mCX);
4405                 cdb.gen2(0x8C,                // MOV CX,[SS|CS]
4406                     modregrm(3,segreg,CX));
4407             }
4408             cdrelconst(cdb,e2,&srcregs);
4409         }
4410         freenode(e2);
4411     }
4412     else
4413     {
4414         if (!(config.exe & EX_flat))
4415         {   need_DS = true;
4416             srcregs |= mCX;
4417         }
4418         codelem(cdb,e2,&srcregs,false);
4419     }
4420 
4421     // now get pointer to lvalue (destination) in ES:DI
4422     regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI;
4423     if (e1.Eoper == OPind)               // if (*p = ..)
4424     {
4425         if (tyreg(e1.EV.E1.Ety))
4426             dstregs = mDI;
4427         cdb.append(cod2_setES(e1.EV.E1.Ety));
4428         scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false);
4429     }
4430     else
4431         cdrelconst(cdb,e1,&dstregs);
4432     freenode(e1);
4433 
4434     getregs(cdb,(srcregs | dstregs) & (mLSW | mDI));
4435     if (need_DS)
4436     {     assert(!(config.exe & EX_flat));
4437         cdb.gen1(0x1E);                     // PUSH DS
4438         cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));    // MOV DS,CX
4439     }
4440     if (numbytes <= REGSIZE * (6 + (REGSIZE == 4)))
4441     {
4442         while (numbytes >= REGSIZE)
4443         {
4444             cdb.gen1(0xA5);         // MOVSW
4445             code_orrex(cdb.last(), rex);
4446             numbytes -= REGSIZE;
4447         }
4448         //if (numbytes)
4449         //    printf("cdstreq numbytes %d\n",numbytes);
4450         if (I64 && numbytes >= 4)
4451         {
4452             cdb.gen1(0xA5);         // MOVSD
4453             numbytes -= 4;
4454         }
4455         while (numbytes--)
4456             cdb.gen1(0xA4);         // MOVSB
4457     }
4458     else
4459     {
4460 static if (1)
4461 {
4462         uint remainder = numbytes & (REGSIZE - 1);
4463         numbytes /= REGSIZE;            // number of words
4464         getregs_imm(cdb,mCX);
4465         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4466         cdb.gen1(0xF3);                 // REP
4467         if (REGSIZE == 8)
4468             cdb.gen1(REX | REX_W);
4469         cdb.gen1(0xA5);                 // REP MOVSD
4470         regimmed_set(CX,0);             // note that CX == 0
4471         if (I64 && remainder >= 4)
4472         {
4473             cdb.gen1(0xA5);         // MOVSD
4474             remainder -= 4;
4475         }
4476         for (; remainder; remainder--)
4477         {
4478             cdb.gen1(0xA4);             // MOVSB
4479         }
4480 }
4481 else
4482 {
4483         uint movs;
4484         if (numbytes & (REGSIZE - 1))   // if odd
4485             movs = 0xA4;                // MOVSB
4486         else
4487         {
4488             movs = 0xA5;                // MOVSW
4489             numbytes /= REGSIZE;        // # of words
4490         }
4491         getregs_imm(cdb,mCX);
4492         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4493         cdb.gen1(0xF3);                 // REP
4494         cdb.gen1(movs);
4495         regimmed_set(CX,0);             // note that CX == 0
4496 }
4497     }
4498     if (need_DS)
4499         cdb.gen1(0x1F);                 // POP  DS
4500     assert(!(*pretregs & mPSW));
4501     if (*pretregs)
4502     {   // ES:DI points past what we want
4503 
4504         cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET));   // SUB DI,numbytes
4505         regm_t retregs = mDI;
4506         if (*pretregs & mMSW && !(config.exe & EX_flat))
4507             retregs |= mES;
4508         fixresult(cdb,e,retregs,pretregs);
4509     }
4510 }
4511 
4512 
4513 /**********************
4514  * Get the address of.
4515  * Is also called by cdstreq() to set up pointer to a structure.
4516  */
4517 
4518 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4519 {
4520     //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4521 
4522     /* The following should not happen, but cgelem.c is a little stupid.
4523      * Assertion can be tripped by func("string" == 0); and similar
4524      * things. Need to add goals to optelem() to fix this completely.
4525      */
4526     //assert((*pretregs & mPSW) == 0);
4527     if (*pretregs & mPSW)
4528     {
4529         *pretregs &= ~mPSW;
4530         gentstreg(cdb,SP);            // SP is never 0
4531         if (I64)
4532             code_orrex(cdb.last(), REX_W);
4533     }
4534     if (!*pretregs)
4535         return;
4536 
4537     assert(e);
4538     tym_t tym = tybasic(e.Ety);
4539     switch (tym)
4540     {
4541         case TYstruct:
4542         case TYarray:
4543         case TYldouble:
4544         case TYildouble:
4545         case TYcldouble:
4546             tym = TYnptr;               // don't confuse allocreg()
4547             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
4548             {
4549                 tym = TYfptr;
4550             }
4551             break;
4552 
4553         case TYifunc:
4554             tym = TYfptr;
4555             break;
4556 
4557         default:
4558             if (tyfunc(tym))
4559                 tym =
4560                     tyfarfunc(tym) ? TYfptr :
4561                     TYnptr;
4562             break;
4563     }
4564     //assert(tym & typtr);              // don't fail on (int)&a
4565 
4566     SC sclass;
4567     reg_t mreg,            // segment of the address (TYfptrs only)
4568           lreg;            // offset of the address
4569 
4570     allocreg(cdb,pretregs,&lreg,tym);
4571     if (_tysize[tym] > REGSIZE)            // fptr could've been cast to long
4572     {
4573         if (*pretregs & mES)
4574         {
4575             /* Do not allocate CX or SI here, as cdstreq() needs
4576              * them preserved. cdstreq() should use scodelem()
4577              */
4578             mreg = allocScratchReg(cdb, (mAX|mBX|mDX|mDI) & ~mask(lreg));
4579         }
4580         else
4581         {
4582             mreg = lreg;
4583             lreg = findreglsw(*pretregs);
4584         }
4585 
4586         /* if (get segment of function that isn't necessarily in the
4587          * current segment (i.e. CS doesn't have the right value in it)
4588          */
4589         Symbol *s = e.EV.Vsym;
4590         if (s.Sfl == FLdatseg)
4591         {   assert(0);
4592         }
4593         sclass = cast(SC) s.Sclass;
4594         const ety = tybasic(s.ty());
4595         if ((tyfarfunc(ety) || ety == TYifunc) &&
4596             (sclass == SCextern || ClassInline(sclass) || config.wflags & WFthunk)
4597             || s.Sfl == FLfardata
4598             || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SCcomdat))
4599            )
4600         {   // MOV mreg,seg of symbol
4601             cdb.gencs(0xB8 + mreg,0,FLextern,s);
4602             cdb.last().Iflags = CFseg;
4603         }
4604         else
4605         {
4606             const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl;
4607             cdb.gen2(0x8C,            // MOV mreg,SEG REGISTER
4608                 modregrm(3,segfl[fl],mreg));
4609         }
4610         if (*pretregs & mES)
4611             cdb.gen2(0x8E,modregrm(3,0,mreg));        // MOV ES,mreg
4612     }
4613     getoffset(cdb,e,lreg);
4614 }
4615 
4616 /*********************************
4617  * Load the offset portion of the address represented by e into
4618  * reg.
4619  */
4620 
4621 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg)
4622 {
4623     //printf("getoffset(e = %p, reg = %d)\n", e, reg);
4624     code cs = void;
4625     cs.Iflags = 0;
4626     ubyte rex = 0;
4627     cs.Irex = rex;
4628     assert(e.Eoper == OPvar || e.Eoper == OPrelconst);
4629     auto fl = el_fl(e);
4630     switch (fl)
4631     {
4632         case FLdatseg:
4633             cs.IEV2.Vpointer = e.EV.Vpointer;
4634             goto L3;
4635 
4636         case FLfardata:
4637             goto L4;
4638 
4639         case FLtlsdata:
4640         if (config.exe & EX_posix)
4641         {
4642           Lposix:
4643             if (config.flags3 & CFG3pic)
4644             {
4645                 if (I64)
4646                 {
4647                     /* Generate:
4648                      *   LEA DI,s@TLSGD[RIP]
4649                      */
4650                     //assert(reg == DI);
4651                     code css = void;
4652                     css.Irex = REX | REX_W;
4653                     css.Iop = LEA;
4654                     css.Irm = modregrm(0,reg,5);
4655                     if (reg & 8)
4656                         css.Irex |= REX_R;
4657                     css.Iflags = CFopsize;
4658                     css.IFL1 = cast(ubyte)fl;
4659                     css.IEV1.Vsym = e.EV.Vsym;
4660                     css.IEV1.Voffset = e.EV.Voffset;
4661                     cdb.gen(&css);
4662                 }
4663                 else
4664                 {
4665                     /* Generate:
4666                      *   LEA EAX,s@TLSGD[1*EBX+0]
4667                      */
4668                     assert(reg == AX);
4669                     load_localgot(cdb);
4670                     code css = void;
4671                     css.Iflags = 0;
4672                     css.Iop = LEA;             // LEA
4673                     css.Irex = 0;
4674                     css.Irm = modregrm(0,AX,4);
4675                     css.Isib = modregrm(0,BX,5);
4676                     css.IFL1 = cast(ubyte)fl;
4677                     css.IEV1.Vsym = e.EV.Vsym;
4678                     css.IEV1.Voffset = e.EV.Voffset;
4679                     cdb.gen(&css);
4680                 }
4681                 return;
4682             }
4683             /* Generate:
4684              *      MOV reg,GS:[00000000]
4685              *      ADD reg, offset s@TLS_LE
4686              * for locals, and for globals:
4687              *      MOV reg,GS:[00000000]
4688              *      ADD reg, s@TLS_IE
4689              * note different fixup
4690              */
4691             int stack = 0;
4692             if (reg == STACK)
4693             {   regm_t retregs = ALLREGS;
4694 
4695                 reg_t regx;
4696                 allocreg(cdb,&retregs,&regx,TYoffset);
4697                 reg = findreg(retregs);
4698                 stack = 1;
4699             }
4700 
4701             code css = void;
4702             css.Irex = rex;
4703             css.Iop = 0x8B;
4704             css.Irm = modregrm(0, 0, BPRM);
4705             code_newreg(&css, reg);
4706             css.Iflags = CFgs;
4707             css.IFL1 = FLconst;
4708             css.IEV1.Vuns = 0;
4709             cdb.gen(&css);               // MOV reg,GS:[00000000]
4710 
4711             if (e.EV.Vsym.Sclass == SCstatic || e.EV.Vsym.Sclass == SClocstat)
4712             {   // ADD reg, offset s
4713                 cs.Irex = rex;
4714                 cs.Iop = 0x81;
4715                 cs.Irm = modregrm(3,0,reg & 7);
4716                 if (reg & 8)
4717                     cs.Irex |= REX_B;
4718                 cs.Iflags = CFoff;
4719                 cs.IFL2 = cast(ubyte)fl;
4720                 cs.IEV2.Vsym = e.EV.Vsym;
4721                 cs.IEV2.Voffset = e.EV.Voffset;
4722             }
4723             else
4724             {   // ADD reg, s
4725                 cs.Irex = rex;
4726                 cs.Iop = 0x03;
4727                 cs.Irm = modregrm(0,0,BPRM);
4728                 code_newreg(&cs, reg);
4729                 cs.Iflags = CFoff;
4730                 cs.IFL1 = cast(ubyte)fl;
4731                 cs.IEV1.Vsym = e.EV.Vsym;
4732                 cs.IEV1.Voffset = e.EV.Voffset;
4733             }
4734             cdb.gen(&cs);                // ADD reg, xxxx
4735 
4736             if (stack)
4737             {
4738                 cdb.gen1(0x50 + (reg & 7));      // PUSH reg
4739                 if (reg & 8)
4740                     code_orrex(cdb.last(), REX_B);
4741                 cdb.genadjesp(REGSIZE);
4742                 stackchanged = 1;
4743             }
4744             break;
4745         }
4746         else if (config.exe & EX_windos)
4747         {
4748             if (I64)
4749             {
4750             Lwin64:
4751                 assert(reg != STACK);
4752                 cs.IEV2.Vsym = e.EV.Vsym;
4753                 cs.IEV2.Voffset = e.EV.Voffset;
4754                 cs.Iop = 0xB8 + (reg & 7);      // MOV Ereg,offset s
4755                 if (reg & 8)
4756                     cs.Irex |= REX_B;
4757                 cs.Iflags = CFoff;              // want offset only
4758                 cs.IFL2 = cast(ubyte)fl;
4759                 cdb.gen(&cs);
4760                 break;
4761             }
4762             goto L4;
4763         }
4764         else
4765         {
4766             goto L4;
4767         }
4768 
4769         case FLfunc:
4770             fl = FLextern;                  /* don't want PC relative addresses */
4771             goto L4;
4772 
4773         case FLextern:
4774             if (config.exe & EX_posix && e.EV.Vsym.ty() & mTYthread)
4775                 goto Lposix;
4776             if (config.exe & EX_WIN64 && e.EV.Vsym.ty() & mTYthread)
4777                 goto Lwin64;
4778             goto L4;
4779 
4780         case FLdata:
4781         case FLudata:
4782         case FLgot:
4783         case FLgotoff:
4784         case FLcsdata:
4785         L4:
4786             cs.IEV2.Vsym = e.EV.Vsym;
4787             cs.IEV2.Voffset = e.EV.Voffset;
4788         L3:
4789             if (reg == STACK)
4790             {   stackchanged = 1;
4791                 cs.Iop = 0x68;              /* PUSH immed16                 */
4792                 cdb.genadjesp(REGSIZE);
4793             }
4794             else
4795             {   cs.Iop = 0xB8 + (reg & 7);  // MOV reg,immed16
4796                 if (reg & 8)
4797                     cs.Irex |= REX_B;
4798                 if (I64)
4799                 {   cs.Irex |= REX_W;
4800                     if (config.flags3 & CFG3pic || config.exe == EX_WIN64)
4801                     {   // LEA reg,immed32[RIP]
4802                         cs.Iop = LEA;
4803                         cs.Irm = modregrm(0,reg & 7,5);
4804                         if (reg & 8)
4805                             cs.Irex = (cs.Irex & ~REX_B) | REX_R;
4806                         cs.IFL1 = cast(ubyte)fl;
4807                         cs.IEV1.Vsym = cs.IEV2.Vsym;
4808                         cs.IEV1.Voffset = cs.IEV2.Voffset;
4809                     }
4810                 }
4811             }
4812             cs.Iflags = CFoff;              /* want offset only             */
4813             cs.IFL2 = cast(ubyte)fl;
4814             cdb.gen(&cs);
4815             break;
4816 
4817         case FLreg:
4818             /* Allow this since the tree optimizer puts & in front of       */
4819             /* register doubles.                                            */
4820             goto L2;
4821         case FLauto:
4822         case FLfast:
4823         case FLbprel:
4824         case FLfltreg:
4825             reflocal = true;
4826             goto L2;
4827         case FLpara:
4828             refparam = true;
4829         L2:
4830             if (reg == STACK)
4831             {   regm_t retregs = ALLREGS;
4832 
4833                 reg_t regx;
4834                 allocreg(cdb,&retregs,&regx,TYoffset);
4835                 reg = findreg(retregs);
4836                 loadea(cdb,e,&cs,LEA,reg,0,0,0);    // LEA reg,EA
4837                 if (I64)
4838                     code_orrex(cdb.last(), REX_W);
4839                 cdb.gen1(0x50 + (reg & 7));               // PUSH reg
4840                 if (reg & 8)
4841                     code_orrex(cdb.last(), REX_B);
4842                 cdb.genadjesp(REGSIZE);
4843                 stackchanged = 1;
4844             }
4845             else
4846             {
4847                 loadea(cdb,e,&cs,LEA,reg,0,0,0);   // LEA reg,EA
4848                 if (I64)
4849                     code_orrex(cdb.last(), REX_W);
4850             }
4851             break;
4852 
4853         default:
4854             debug
4855             {
4856                 elem_print(e);
4857                 WRFL(fl);
4858             }
4859             assert(0);
4860     }
4861 }
4862 
4863 
4864 /******************
4865  * OPneg, OPsqrt, OPsin, OPcos, OPrint
4866  */
4867 
4868 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4869 {
4870     //printf("cdneg()\n");
4871     //elem_print(e);
4872     if (*pretregs == 0)
4873     {
4874         codelem(cdb,e.EV.E1,pretregs,false);
4875         return;
4876     }
4877     const tyml = tybasic(e.EV.E1.Ety);
4878     const sz = _tysize[tyml];
4879     if (tyfloating(tyml))
4880     {
4881         if (tycomplex(tyml))
4882         {
4883             neg_complex87(cdb, e, pretregs);
4884             return;
4885         }
4886         if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS)
4887         {
4888             xmmneg(cdb,e,pretregs);
4889             return;
4890         }
4891         if (config.inline8087 &&
4892             ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64))
4893             {
4894                 neg87(cdb,e,pretregs);
4895                 return;
4896             }
4897         regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
4898         codelem(cdb,e.EV.E1,&retregs,false);
4899         getregs(cdb,retregs);
4900         if (I32)
4901         {
4902             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
4903             cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit
4904         }
4905         else
4906         {
4907             const reg = (sz == 8) ? AX : findregmsw(retregs);
4908             cdb.genc2(0x81,modregrm(3,6,reg),0x8000);     // XOR AX,0x8000
4909         }
4910         fixresult(cdb,e,retregs,pretregs);
4911         return;
4912     }
4913 
4914     const uint isbyte = sz == 1;
4915     const possregs = (isbyte) ? BYTEREGS : allregs;
4916     regm_t retregs = *pretregs & possregs;
4917     if (retregs == 0)
4918         retregs = possregs;
4919     codelem(cdb,e.EV.E1,&retregs,false);
4920     getregs(cdb,retregs);                // retregs will be destroyed
4921     if (sz <= REGSIZE)
4922     {
4923         const reg = findreg(retregs);
4924         uint rex = (I64 && sz == 8) ? REX_W : 0;
4925         if (I64 && sz == 1 && reg >= 4)
4926             rex |= REX;
4927         cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg));   // NEG reg
4928         if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW)
4929             cdb.last().Iflags |= CFopsize | CFpsw;
4930         *pretregs &= mBP | ALLREGS;             // flags already set
4931     }
4932     else if (sz == 2 * REGSIZE)
4933     {
4934         const msreg = findregmsw(retregs);
4935         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
4936         const lsreg = findreglsw(retregs);
4937         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg
4938         code_orflag(cdb.last(), CFpsw);           // need flag result of previous NEG
4939         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
4940     }
4941     else
4942         assert(0);
4943     fixresult(cdb,e,retregs,pretregs);
4944 }
4945 
4946 
4947 /******************
4948  * Absolute value operator
4949  */
4950 
4951 
4952 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
4953 {
4954     //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4955     if (*pretregs == 0)
4956     {
4957         codelem(cdb,e.EV.E1,pretregs,false);
4958         return;
4959     }
4960     const tyml = tybasic(e.EV.E1.Ety);
4961     const sz = _tysize[tyml];
4962     const rex = (I64 && sz == 8) ? REX_W : 0;
4963     if (tyfloating(tyml))
4964     {
4965         if (tyxmmreg(tyml) && *pretregs & XMMREGS)
4966         {
4967             xmmabs(cdb,e,pretregs);
4968             return;
4969         }
4970         if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64))
4971         {
4972             neg87(cdb,e,pretregs);
4973             return;
4974         }
4975         regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
4976         codelem(cdb,e.EV.E1,&retregs,false);
4977         getregs(cdb,retregs);
4978         if (I32)
4979         {
4980             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
4981             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit
4982         }
4983         else
4984         {
4985             const reg = (sz == 8) ? AX : findregmsw(retregs);
4986             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF);     // AND AX,0x7FFF
4987         }
4988         fixresult(cdb,e,retregs,pretregs);
4989         return;
4990     }
4991 
4992     const uint isbyte = sz == 1;
4993     assert(isbyte == 0);
4994     regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs;
4995     if (!I16 && sz == REGSIZE)
4996         possregs = allregs;
4997     regm_t retregs = *pretregs & possregs;
4998     if (retregs == 0)
4999         retregs = possregs;
5000     codelem(cdb,e.EV.E1,&retregs,false);
5001     getregs(cdb,retregs);                // retregs will be destroyed
5002     if (sz <= REGSIZE)
5003     {
5004         /*      CWD
5005                 XOR     AX,DX
5006                 SUB     AX,DX
5007            or:
5008                 MOV     r,reg
5009                 SAR     r,63
5010                 XOR     reg,r
5011                 SUB     reg,r
5012          */
5013         reg_t reg;
5014         reg_t r;
5015 
5016         if (!I16 && sz == REGSIZE)
5017         {
5018             reg = findreg(retregs);
5019             r = allocScratchReg(cdb, allregs & ~retregs);
5020             getregs(cdb,retregs);
5021             genmovreg(cdb,r,reg);                     // MOV r,reg
5022             cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1);      // SAR r,31/63
5023             code_orrex(cdb.last(), rex);
5024         }
5025         else
5026         {
5027             reg = AX;
5028             r = DX;
5029             getregs(cdb,mDX);
5030             if (!I16 && sz == SHORTSIZE)
5031                 cdb.gen1(0x98);                         // CWDE
5032             cdb.gen1(0x99);                             // CWD
5033             code_orrex(cdb.last(), rex);
5034         }
5035         cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r
5036         cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r
5037         if (!I16 && sz == SHORTSIZE && *pretregs & mPSW)
5038             cdb.last().Iflags |= CFopsize | CFpsw;
5039         if (*pretregs & mPSW)
5040             cdb.last().Iflags |= CFpsw;
5041         *pretregs &= ~mPSW;                     // flags already set
5042     }
5043     else if (sz == 2 * REGSIZE)
5044     {
5045         /*      or      DX,DX
5046                 jns     L2
5047                 neg     DX
5048                 neg     AX
5049                 sbb     DX,0
5050             L2:
5051          */
5052 
5053         code *cnop = gennop(null);
5054         const msreg = findregmsw(retregs);
5055         const lsreg = findreglsw(retregs);
5056         genregs(cdb,0x09,msreg,msreg);            // OR msreg,msreg
5057         genjmp(cdb,JNS,FLcode,cast(block *)cnop);
5058         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
5059         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg+1
5060         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
5061         cdb.append(cnop);
5062     }
5063     else
5064         assert(0);
5065     fixresult(cdb,e,retregs,pretregs);
5066 }
5067 
5068 /**************************
5069  * Post increment and post decrement.
5070  */
5071 
5072 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5073 {
5074     //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs));
5075     code cs = void;
5076     const op = e.Eoper;                      // OPxxxx
5077     if (*pretregs == 0)                        // if nothing to return
5078     {
5079         cdaddass(cdb,e,pretregs);
5080         return;
5081     }
5082     const tym_t tyml = tybasic(e.EV.E1.Ety);
5083     const sz = _tysize[tyml];
5084     elem *e2 = e.EV.E2;
5085     const rex = (I64 && sz == 8) ? REX_W : 0;
5086 
5087     if (tyfloating(tyml))
5088     {
5089         if (config.fpxmmregs && tyxmmreg(tyml) &&
5090             !tycomplex(tyml) // SIMD code is not set up to deal with complex
5091            )
5092         {
5093             xmmpost(cdb,e,pretregs);
5094             return;
5095         }
5096 
5097         if (config.inline8087)
5098         {
5099             post87(cdb,e,pretregs);
5100             return;
5101         }
5102 if (config.exe & EX_windos)
5103 {
5104         assert(sz <= 8);
5105         getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS);
5106         freenode(e.EV.E1);
5107         regm_t idxregs = idxregm(&cs);  // mask of index regs used
5108         cs.Iop = 0x8B;                  /* MOV DOUBLEREGS,EA            */
5109         fltregs(cdb,&cs,tyml);
5110         stackchanged = 1;
5111         int stackpushsave = stackpush;
5112         regm_t retregs;
5113         if (sz == 8)
5114         {
5115             if (I32)
5116             {
5117                 cdb.gen1(0x50 + DX);             // PUSH DOUBLEREGS
5118                 cdb.gen1(0x50 + AX);
5119                 stackpush += DOUBLESIZE;
5120                 retregs = DOUBLEREGS2_32;
5121             }
5122             else
5123             {
5124                 cdb.gen1(0x50 + AX);
5125                 cdb.gen1(0x50 + BX);
5126                 cdb.gen1(0x50 + CX);
5127                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
5128                 stackpush += DOUBLESIZE + DOUBLESIZE;
5129 
5130                 cdb.gen1(0x50 + AX);
5131                 cdb.gen1(0x50 + BX);
5132                 cdb.gen1(0x50 + CX);
5133                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
5134                 retregs = DOUBLEREGS_16;
5135             }
5136         }
5137         else
5138         {
5139             stackpush += FLOATSIZE;     /* so we know something is on   */
5140             if (!I32)
5141                 cdb.gen1(0x50 + DX);
5142             cdb.gen1(0x50 + AX);
5143             retregs = FLOATREGS2;
5144         }
5145         cdb.genadjesp(stackpush - stackpushsave);
5146 
5147         cgstate.stackclean++;
5148         scodelem(cdb,e2,&retregs,idxregs,false);
5149         cgstate.stackclean--;
5150 
5151         if (tyml == TYdouble || tyml == TYdouble_alias)
5152         {
5153             retregs = DOUBLEREGS;
5154             callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub,
5155                     &retregs,idxregs);
5156         }
5157         else /* tyml == TYfloat */
5158         {
5159             retregs = FLOATREGS;
5160             callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub,
5161                     &retregs,idxregs);
5162         }
5163         cs.Iop = 0x89;                  /* MOV EA,DOUBLEREGS            */
5164         fltregs(cdb,&cs,tyml);
5165         stackpushsave = stackpush;
5166         if (tyml == TYdouble || tyml == TYdouble_alias)
5167         {   if (*pretregs == mSTACK)
5168                 retregs = mSTACK;       /* leave result on stack        */
5169             else
5170             {
5171                 if (I32)
5172                 {
5173                     cdb.gen1(0x58 + AX);
5174                     cdb.gen1(0x58 + DX);
5175                 }
5176                 else
5177                 {
5178                     cdb.gen1(0x58 + DX);
5179                     cdb.gen1(0x58 + CX);
5180                     cdb.gen1(0x58 + BX);
5181                     cdb.gen1(0x58 + AX);
5182                 }
5183                 stackpush -= DOUBLESIZE;
5184                 retregs = DOUBLEREGS;
5185             }
5186         }
5187         else
5188         {
5189             cdb.gen1(0x58 + AX);
5190             if (!I32)
5191                 cdb.gen1(0x58 + DX);
5192             stackpush -= FLOATSIZE;
5193             retregs = FLOATREGS;
5194         }
5195         cdb.genadjesp(stackpush - stackpushsave);
5196         fixresult(cdb,e,retregs,pretregs);
5197         return;
5198 }
5199     }
5200     if (tyxmmreg(tyml))
5201     {
5202         xmmpost(cdb,e,pretregs);
5203         return;
5204     }
5205 
5206     assert(e2.Eoper == OPconst);
5207     uint isbyte = (sz == 1);
5208     regm_t possregs = isbyte ? BYTEREGS : allregs;
5209     getlvalue(cdb,&cs,e.EV.E1,0);
5210     freenode(e.EV.E1);
5211     regm_t idxregs = idxregm(&cs);       // mask of index regs used
5212     if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 &&
5213         (!I16 || (idxregs & (mBX | mSI | mDI | mBP))))
5214     {
5215         // Generate:
5216         //      TEST    reg,reg
5217         //      LEA     reg,n[reg]      // don't affect flags
5218         reg_t reg = cs.Irm & 7;
5219         if (cs.Irex & REX_B)
5220             reg |= 8;
5221         cs.Iop = 0x85 ^ isbyte;
5222         code_newreg(&cs, reg);
5223         cs.Iflags |= CFpsw;
5224         cdb.gen(&cs);             // TEST reg,reg
5225 
5226         // If lvalue is a register variable, we must mark it as modified
5227         modEA(cdb,&cs);
5228 
5229         auto n = e2.EV.Vint;
5230         if (op == OPpostdec)
5231             n = -n;
5232         int rm = reg;
5233         if (I16)
5234         {
5235             static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c
5236             rm = regtorm[reg];
5237         }
5238         cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg]
5239         return;
5240     }
5241     else if (sz <= REGSIZE || tyfv(tyml))
5242     {
5243         code cs2 = void;
5244 
5245         cs.Iop = 0x8B ^ isbyte;
5246         regm_t retregs = possregs & ~idxregs & *pretregs;
5247         if (!tyfv(tyml))
5248         {
5249             if (retregs == 0)
5250                 retregs = possregs & ~idxregs;
5251         }
5252         else /* tyfv(tyml) */
5253         {
5254             if ((retregs &= mLSW) == 0)
5255                 retregs = mLSW & ~idxregs;
5256             /* Can't use LES if the EA uses ES as a seg override    */
5257             if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5258             {   cs.Iop = 0xC4;                      /* LES          */
5259                 getregs(cdb,mES);           // allocate ES
5260             }
5261         }
5262         reg_t reg;
5263         allocreg(cdb,&retregs,&reg,TYint);
5264         code_newreg(&cs, reg);
5265         if (sz == 1 && I64 && reg >= 4)
5266             cs.Irex |= REX;
5267         cdb.gen(&cs);                     // MOV reg,EA
5268         cs2 = cs;
5269 
5270         /* If lvalue is a register variable, we must mark it as modified */
5271         modEA(cdb,&cs);
5272 
5273         cs.Iop = 0x81 ^ isbyte;
5274         cs.Irm &= ~cast(int)modregrm(0,7,0);             // reg field = 0
5275         cs.Irex &= ~REX_R;
5276         if (op == OPpostdec)
5277             cs.Irm |= modregrm(0,5,0);  /* SUB                  */
5278         cs.IFL2 = FLconst;
5279         targ_int n = e2.EV.Vint;
5280         cs.IEV2.Vint = n;
5281         if (n == 1)                     /* can use INC or DEC           */
5282         {
5283             cs.Iop |= 0xFE;             /* xFE is dec byte, xFF is word */
5284             if (op == OPpostdec)
5285                 NEWREG(cs.Irm,1);       // DEC EA
5286             else
5287                 NEWREG(cs.Irm,0);       // INC EA
5288         }
5289         else if (n == -1)               // can use INC or DEC
5290         {
5291             cs.Iop |= 0xFE;             // xFE is dec byte, xFF is word
5292             if (op == OPpostinc)
5293                 NEWREG(cs.Irm,1);       // DEC EA
5294             else
5295                 NEWREG(cs.Irm,0);       // INC EA
5296         }
5297 
5298         // For scheduling purposes, we wish to replace:
5299         //      MOV     reg,EA
5300         //      OP      EA
5301         // with:
5302         //      MOV     reg,EA
5303         //      OP      reg
5304         //      MOV     EA,reg
5305         //      ~OP     reg
5306         if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
5307             config.target_cpu >= TARGET_Pentium &&
5308             config.flags4 & CFG4speed)
5309         {
5310             // Replace EA in cs with reg
5311             cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7);
5312             if (reg & 8)
5313             {   cs.Irex &= ~REX_R;
5314                 cs.Irex |= REX_B;
5315             }
5316             else
5317                 cs.Irex &= ~REX_B;
5318             if (I64 && sz == 1 && reg >= 4)
5319                 cs.Irex |= REX;
5320             cdb.gen(&cs);                        // ADD/SUB reg,const
5321 
5322             // Reverse MOV direction
5323             cs2.Iop ^= 2;
5324             cdb.gen(&cs2);                       // MOV EA,reg
5325 
5326             // Toggle INC <. DEC, ADD <. SUB
5327             cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0);
5328             cdb.gen(&cs);
5329 
5330             if (*pretregs & mPSW)
5331             {   *pretregs &= ~mPSW;              // flags already set
5332                 code_orflag(cdb.last(),CFpsw);
5333             }
5334         }
5335         else
5336             cdb.gen(&cs);                        // ADD/SUB EA,const
5337 
5338         freenode(e2);
5339         if (tyfv(tyml))
5340         {
5341             reg_t preg;
5342 
5343             getlvalue_msw(&cs);
5344             if (*pretregs & mES)
5345             {
5346                 preg = ES;
5347                 /* ES is already loaded if CFes is 0            */
5348                 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP;
5349                 NEWREG(cs.Irm,0);       /* MOV ES,EA+2          */
5350             }
5351             else
5352             {
5353                 regm_t retregsx = *pretregs & mMSW;
5354                 if (!retregsx)
5355                     retregsx = mMSW;
5356                 allocreg(cdb,&retregsx,&preg,TYint);
5357                 cs.Iop = 0x8B;
5358                 if (I32)
5359                     cs.Iflags |= CFopsize;
5360                 NEWREG(cs.Irm,preg);    /* MOV preg,EA+2        */
5361             }
5362             getregs(cdb,mask(preg));
5363             cdb.gen(&cs);
5364             retregs = mask(reg) | mask(preg);
5365         }
5366         fixresult(cdb,e,retregs,pretregs);
5367         return;
5368     }
5369     else if (tyml == TYhptr)
5370     {
5371         uint rvalue;
5372         reg_t lreg;
5373         reg_t rtmp;
5374         regm_t mtmp;
5375 
5376         rvalue = e2.EV.Vlong;
5377         freenode(e2);
5378 
5379         // If h--, convert to h++
5380         if (e.Eoper == OPpostdec)
5381             rvalue = -rvalue;
5382 
5383         regm_t retregs = mLSW & ~idxregs & *pretregs;
5384         if (!retregs)
5385             retregs = mLSW & ~idxregs;
5386         allocreg(cdb,&retregs,&lreg,TYint);
5387 
5388         // Can't use LES if the EA uses ES as a seg override
5389         if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5390         {   cs.Iop = 0xC4;
5391             retregs |= mES;
5392             getregs(cdb,mES|mCX);       // allocate ES
5393             cs.Irm |= modregrm(0,lreg,0);
5394             cdb.gen(&cs);                       // LES lreg,EA
5395         }
5396         else
5397         {   cs.Iop = 0x8B;
5398             retregs |= mDX;
5399             getregs(cdb,mDX|mCX);
5400             cs.Irm |= modregrm(0,lreg,0);
5401             cdb.gen(&cs);                       // MOV lreg,EA
5402             NEWREG(cs.Irm,DX);
5403             getlvalue_msw(&cs);
5404             cdb.gen(&cs);                       // MOV DX,EA+2
5405             getlvalue_lsw(&cs);
5406         }
5407 
5408         // Allocate temporary register, rtmp
5409         mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs;
5410         allocreg(cdb,&mtmp,&rtmp,TYint);
5411 
5412         movregconst(cdb,rtmp,rvalue >> 16,0);   // MOV rtmp,e2+2
5413         getregs(cdb,mtmp);
5414         cs.Iop = 0x81;
5415         NEWREG(cs.Irm,0);
5416         cs.IFL2 = FLconst;
5417         cs.IEV2.Vint = rvalue;
5418         cdb.gen(&cs);                           // ADD EA,e2
5419         code_orflag(cdb.last(),CFpsw);
5420         cdb.genc2(0x81,modregrm(3,2,rtmp),0);   // ADC rtmp,0
5421         genshift(cdb);                          // MOV CX,offset __AHSHIFT
5422         cdb.gen2(0xD3,modregrm(3,4,rtmp));      // SHL rtmp,CL
5423         cs.Iop = 0x01;
5424         NEWREG(cs.Irm,rtmp);                    // ADD EA+2,rtmp
5425         getlvalue_msw(&cs);
5426         cdb.gen(&cs);
5427         fixresult(cdb,e,retregs,pretregs);
5428         return;
5429     }
5430     else if (sz == 2 * REGSIZE)
5431     {
5432         regm_t retregs = allregs & ~idxregs & *pretregs;
5433         if ((retregs & mLSW) == 0)
5434                 retregs |= mLSW & ~idxregs;
5435         if ((retregs & mMSW) == 0)
5436                 retregs |= ALLREGS & mMSW;
5437         assert(retregs & mMSW && retregs & mLSW);
5438         reg_t reg;
5439         allocreg(cdb,&retregs,&reg,tyml);
5440         uint sreg = findreglsw(retregs);
5441         cs.Iop = 0x8B;
5442         cs.Irm |= modregrm(0,sreg,0);
5443         cdb.gen(&cs);                   // MOV sreg,EA
5444         NEWREG(cs.Irm,reg);
5445         getlvalue_msw(&cs);
5446         cdb.gen(&cs);                   // MOV reg,EA+2
5447         cs.Iop = 0x81;
5448         cs.Irm &= ~cast(int)modregrm(0,7,0);     /* reg field = 0 for ADD        */
5449         if (op == OPpostdec)
5450             cs.Irm |= modregrm(0,5,0);  /* SUB                          */
5451         getlvalue_lsw(&cs);
5452         cs.IFL2 = FLconst;
5453         cs.IEV2.Vlong = e2.EV.Vlong;
5454         cdb.gen(&cs);                   // ADD/SUB EA,const
5455         code_orflag(cdb.last(),CFpsw);
5456         getlvalue_msw(&cs);
5457         cs.IEV2.Vlong = 0;
5458         if (op == OPpostinc)
5459             cs.Irm ^= modregrm(0,2,0);  /* ADC                          */
5460         else
5461             cs.Irm ^= modregrm(0,6,0);  /* SBB                          */
5462         cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8));
5463         cdb.gen(&cs);                   // ADC/SBB EA,0
5464         freenode(e2);
5465         fixresult(cdb,e,retregs,pretregs);
5466         return;
5467     }
5468     else
5469     {
5470         assert(0);
5471     }
5472 }
5473 
5474 
5475 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5476 {
5477     debug
5478         elem_print(e);
5479 
5480     //printf("op = %d, %d\n", e.Eoper, OPstring);
5481     //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen);
5482     //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring);
5483     assert(0);
5484 }
5485 
5486 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5487 {
5488     switch (e.EV.E1.Eoper)
5489     {
5490 version (MARS)
5491 {
5492         case OPdctor:
5493             codelem(cdb,e.EV.E2,pretregs,false);
5494             regm_t retregs = 0;
5495             codelem(cdb,e.EV.E1,&retregs,false);
5496             break;
5497 }
5498 version (SCPP)
5499 {
5500         case OPdtor:
5501             cdcomma(cdb,e,pretregs);
5502             break;
5503         case OPctor:
5504             codelem(cdb,e.EV.E2,pretregs,false);
5505             regm_t retregs = 0;
5506             codelem(cdb,e.EV.E1,&retregs,false);
5507             break;
5508         case OPmark:
5509             if (0 && config.exe == EX_WIN32)
5510             {
5511                 const idx = except_index_get();
5512                 except_mark();
5513                 codelem(cdb,e.EV.E2,pretregs,false);
5514                 if (config.exe == EX_WIN32 && idx != except_index_get())
5515                 {   usednteh |= NTEHcleanup;
5516                     nteh_gensindex(cdb,idx - 1);
5517                 }
5518                 except_release();
5519                 assert(idx == except_index_get());
5520             }
5521             else
5522             {
5523                 code cs = void;
5524                 cs.Iop = ESCAPE | ESCmark;
5525                 cs.Iflags = 0;
5526                 cs.Irex = 0;
5527                 cdb.gen(&cs);
5528                 codelem(cdb,e.EV.E2,pretregs,false);
5529                 cs.Iop = ESCAPE | ESCrelease;
5530                 cdb.gen(&cs);
5531             }
5532             freenode(e.EV.E1);
5533             break;
5534 }
5535         default:
5536             assert(0);
5537     }
5538 }
5539 
5540 /*******************************************
5541  * D constructor.
5542  * OPdctor
5543  */
5544 
5545 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5546 {
5547     /* Generate:
5548         ESCAPE | ESCdctor
5549         MOV     sindex[BP],index
5550      */
5551     usednteh |= EHcleanup;
5552     if (config.ehmethod == EHmethod.EH_WIN32)
5553     {   usednteh |= NTEHcleanup | NTEH_try;
5554         nteh_usevars();
5555     }
5556     assert(*pretregs == 0);
5557     code cs;
5558     cs.Iop = ESCAPE | ESCdctor;         // mark start of EH range
5559     cs.Iflags = 0;
5560     cs.Irex = 0;
5561     cs.IFL1 = FLctor;
5562     cs.IEV1.Vtor = e;
5563     cdb.gen(&cs);
5564     nteh_gensindex(cdb,0);              // the actual index will be patched in later
5565                                         // by except_fillInEHTable()
5566 }
5567 
5568 /*******************************************
5569  * D destructor.
5570  * OPddtor
5571  */
5572 
5573 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5574 {
5575     if (config.ehmethod == EHmethod.EH_DWARF)
5576     {
5577         usednteh |= EHcleanup;
5578 
5579         code cs;
5580         cs.Iop = ESCAPE | ESCddtor;     // mark end of EH range and where landing pad is
5581         cs.Iflags = 0;
5582         cs.Irex = 0;
5583         cs.IFL1 = FLdtor;
5584         cs.IEV1.Vtor = e;
5585         cdb.gen(&cs);
5586 
5587         // Mark all registers as destroyed
5588         getregsNoSave(allregs);
5589 
5590         assert(*pretregs == 0);
5591         codelem(cdb,e.EV.E1,pretregs,false);
5592         return;
5593     }
5594     else
5595     {
5596         /* Generate:
5597             ESCAPE | ESCddtor
5598             MOV     sindex[BP],index
5599             CALL    dtor
5600             JMP     L1
5601         Ldtor:
5602             ... e.EV.E1 ...
5603             RET
5604         L1: NOP
5605         */
5606         usednteh |= EHcleanup;
5607         if (config.ehmethod == EHmethod.EH_WIN32)
5608         {   usednteh |= NTEHcleanup | NTEH_try;
5609             nteh_usevars();
5610         }
5611 
5612         code cs;
5613         cs.Iop = ESCAPE | ESCddtor;
5614         cs.Iflags = 0;
5615         cs.Irex = 0;
5616         cs.IFL1 = FLdtor;
5617         cs.IEV1.Vtor = e;
5618         cdb.gen(&cs);
5619 
5620         nteh_gensindex(cdb,0);              // the actual index will be patched in later
5621                                             // by except_fillInEHTable()
5622 
5623         // Mark all registers as destroyed
5624         getregsNoSave(allregs);
5625 
5626         assert(*pretregs == 0);
5627         CodeBuilder cdbx;
5628         cdbx.ctor();
5629         codelem(cdbx,e.EV.E1,pretregs,false);
5630         cdbx.gen1(0xC3);                      // RET
5631         code *c = cdbx.finish();
5632 
5633         int nalign = 0;
5634         if (STACKALIGN >= 16)
5635         {
5636             nalign = STACKALIGN - REGSIZE;
5637             cod3_stackadj(cdb, nalign);
5638         }
5639         calledafunc = 1;
5640         genjmp(cdb,0xE8,FLcode,cast(block *)c);   // CALL Ldtor
5641         if (nalign)
5642             cod3_stackadj(cdb, -nalign);
5643 
5644         code *cnop = gennop(null);
5645 
5646         genjmp(cdb,JMP,FLcode,cast(block *)cnop);
5647         cdb.append(cdbx);
5648         cdb.append(cnop);
5649         return;
5650     }
5651 }
5652 
5653 
5654 /*******************************************
5655  * C++ constructor.
5656  */
5657 
5658 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5659 {
5660 version (SCPP)
5661 {
5662     usednteh |= EHcleanup;
5663     if (config.exe == EX_WIN32)
5664         usednteh |= NTEHcleanup;
5665     assert(*pretregs == 0);
5666 
5667     code cs = void;
5668     cs.Iop = ESCAPE | ESCctor;
5669     cs.Iflags = 0;
5670     cs.Irex = 0;
5671     cs.IFL1 = FLctor;
5672     cs.IEV1.Vtor = e;
5673     cdb.gen(&cs);
5674 }
5675 }
5676 
5677 /******
5678  * OPdtor
5679  */
5680 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5681 {
5682 version (SCPP)
5683 {
5684     usednteh |= EHcleanup;
5685     if (config.exe == EX_WIN32)
5686         usednteh |= NTEHcleanup;
5687     assert(*pretregs == 0);
5688 
5689     code cs = void;
5690     cs.Iop = ESCAPE | ESCdtor;
5691     cs.Iflags = 0;
5692     cs.Irex = 0;
5693     cs.IFL1 = FLdtor;
5694     cs.IEV1.Vtor = e;
5695     cdb.gen(&cs);
5696 }
5697 }
5698 
5699 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5700 {
5701 }
5702 
5703 static if (!NTEXCEPTIONS)
5704 {
5705 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5706 {
5707     assert(0);
5708 }
5709 }
5710 
5711 /*****************************************
5712  */
5713 
5714 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5715 {
5716     assert(*pretregs == 0);
5717     codelem(cdb,e.EV.E1,pretregs,false);
5718 }
5719 
5720 /*****************************************
5721  */
5722 
5723 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5724 {
5725     assert(*pretregs == 0);
5726     cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3);
5727 }
5728 
5729 }