1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1984-1998 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d)
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d
11  */
12 
13 module dmd.backend.cod2;
14 
15 version (SCPP)
16     version = COMPILE;
17 version (MARS)
18     version = COMPILE;
19 
20 version (COMPILE)
21 {
22 
23 import core.stdc.stdio;
24 import core.stdc.stdlib;
25 import core.stdc.string;
26 
27 import dmd.backend.backend;
28 import dmd.backend.cc;
29 import dmd.backend.cdef;
30 import dmd.backend.code;
31 import dmd.backend.code_x86;
32 import dmd.backend.codebuilder;
33 import dmd.backend.mem;
34 import dmd.backend.el;
35 import dmd.backend.exh;
36 import dmd.backend.global;
37 import dmd.backend.oper;
38 import dmd.backend.ty;
39 import dmd.backend.type;
40 import dmd.backend.xmm;
41 
42 extern (C++):
43 
44 nothrow:
45 
46 int REGSIZE();
47 
48 extern __gshared CGstate cgstate;
49 extern __gshared ubyte[FLMAX] segfl;
50 extern __gshared bool[FLMAX] stackfl;
51 
52 __gshared int cdcmp_flag;
53 
54 private extern (D) uint mask(uint m) { return 1 << m; }
55 
56 // from divcoeff.c
57 extern (C)
58 {
59     bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost);
60     bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost);
61 }
62 
63 /*******************************
64  * Swap two registers.
65  */
66 
67 private void swap(reg_t *a,reg_t *b)
68 {
69     const tmp = *a;
70     *a = *b;
71     *b = tmp;
72 }
73 
74 
75 /*******************************************
76  * Returns: true if cannot use this EA in anything other than a MOV instruction.
77  */
78 
79 bool movOnly(const elem *e)
80 {
81     if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar)
82     {
83         const s = e.EV.Vsym;
84         // Fixups for these can only be done with a MOV
85         if (s.Sclass == SCglobal || s.Sclass == SCextern ||
86             s.Sclass == SCcomdat || s.Sclass == SCcomdef)
87             return true;
88     }
89     return false;
90 }
91 
92 /********************************
93  * Determine index registers used by addressing mode.
94  * Index is rm of modregrm field.
95  * Returns:
96  *      mask of index registers
97  */
98 
99 regm_t idxregm(const code* c)
100 {
101     const rm = c.Irm;
102     regm_t idxm;
103     if ((rm & 0xC0) != 0xC0)            /* if register is not the destination */
104     {
105         if (I16)
106         {
107             static immutable ubyte[8] idxrm  = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX];
108             idxm = idxrm[rm & 7];
109         }
110         else
111         {
112             if ((rm & 7) == 4)          /* if sib byte                  */
113             {
114                 const sib = c.Isib;
115                 reg_t idxreg = (sib >> 3) & 7;
116                 // scaled index reg
117                 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0));
118 
119                 if ((sib & 7) == 5 && (rm & 0xC0) == 0)
120                 { }
121                 else
122                     idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0));
123             }
124             else
125                 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0));
126         }
127     }
128     return idxm;
129 }
130 
131 
132 static if (TARGET_WINDOS)
133 {
134 /***************************
135  * Gen code for call to floating point routine.
136  */
137 
138 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib)
139 {
140     if (config.inline8087)
141     {
142         orth87(cdb,e,pretregs);
143         return;
144     }
145 
146     regm_t retregs1,retregs2;
147     if (tybasic(e.EV.E1.Ety) == TYfloat)
148     {
149         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
150         retregs1 = FLOATREGS;
151         retregs2 = FLOATREGS2;
152     }
153     else
154     {
155         if (I32)
156         {   retregs1 = DOUBLEREGS_32;
157             retregs2 = DOUBLEREGS2_32;
158         }
159         else
160         {   retregs1 = mSTACK;
161             retregs2 = DOUBLEREGS_16;
162         }
163     }
164 
165     codelem(cdb,e.EV.E1, &retregs1,false);
166     if (retregs1 & mSTACK)
167         cgstate.stackclean++;
168     scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false);
169     if (retregs1 & mSTACK)
170         cgstate.stackclean--;
171     callclib(cdb, e, clib, pretregs, 0);
172 }
173 }
174 
175 /*****************************
176  * Handle operators which are more or less orthogonal
177  * ( + - & | ^ )
178  */
179 
180 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
181 {
182     //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
183     elem *e1 = e.EV.E1;
184     elem *e2 = e.EV.E2;
185     if (*pretregs == 0)                   // if don't want result
186     {
187         codelem(cdb,e1,pretregs,false); // eval left leaf
188         *pretregs = 0;                          // in case they got set
189         codelem(cdb,e2,pretregs,false);
190         return;
191     }
192 
193     const ty = tybasic(e.Ety);
194     const ty1 = tybasic(e1.Ety);
195 
196     if (tyfloating(ty1))
197     {
198         if (tyvector(ty1) ||
199             config.fpxmmregs && tyxmmreg(ty1) &&
200             !(*pretregs & mST0) &&
201             !(*pretregs & mST01) &&
202             !(ty == TYldouble || ty == TYildouble)  // watch out for shrinkLongDoubleConstantIfPossible()
203            )
204         {
205             orthxmm(cdb,e,pretregs);
206             return;
207         }
208         if (config.inline8087)
209         {
210             orth87(cdb,e,pretregs);
211             return;
212         }
213         static if (TARGET_WINDOS)
214         {
215             opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd
216                                                        : CLIB.dsub);
217             return;
218         }
219         else
220         {
221             assert(0);
222         }
223     }
224     if (tyxmmreg(ty1))
225     {
226         orthxmm(cdb,e,pretregs);
227         return;
228     }
229 
230     opcode_t op1, op2;
231     uint mode;
232     __gshared int nest;
233 
234     const ty2 = tybasic(e2.Ety);
235     const e2oper = e2.Eoper;
236     const sz = _tysize[ty];
237     const isbyte = (sz == 1);
238     code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
239     bool test = false;                // assume we destroyed lvalue
240 
241     switch (e.Eoper)
242     {
243         case OPadd:     mode = 0;
244                         op1 = 0x03; op2 = 0x13; break;  /* ADD, ADC     */
245         case OPmin:     mode = 5;
246                         op1 = 0x2B; op2 = 0x1B; break;  /* SUB, SBB     */
247         case OPor:      mode = 1;
248                         op1 = 0x0B; op2 = 0x0B; break;  /* OR , OR      */
249         case OPxor:     mode = 6;
250                         op1 = 0x33; op2 = 0x33; break;  /* XOR, XOR     */
251         case OPand:     mode = 4;
252                         op1 = 0x23; op2 = 0x23;         /* AND, AND     */
253                         if (tyreg(ty1) &&
254                             *pretregs == mPSW)          /* if flags only */
255                         {
256                             test = true;
257                             op1 = 0x85;                 /* TEST         */
258                             mode = 0;
259                         }
260                         break;
261 
262         default:
263             assert(0);
264     }
265     op1 ^= isbyte;                                  /* if byte operation    */
266 
267     // Compute numwords, the number of words to operate on.
268     int numwords = 1;
269     if (!I16)
270     {
271         /* Cannot operate on longs and then do a 'paint' to a far       */
272         /* pointer, because far pointers are 48 bits and longs are 32.  */
273         /* Therefore, numwords can never be 2.                          */
274         assert(!(tyfv(ty1) && tyfv(ty2)));
275         if (sz == 2 * REGSIZE)
276         {
277             numwords++;
278         }
279     }
280     else
281     {
282         /* If ty is a TYfptr, but both operands are long, treat the     */
283         /* operation as a long.                                         */
284         if ((tylong(ty1) || ty1 == TYhptr) &&
285             (tylong(ty2) || ty2 == TYhptr))
286             numwords++;
287     }
288 
289     // Special cases where only flags are set
290     if (test && _tysize[ty1] <= REGSIZE &&
291         (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
292         && !movOnly(e1)
293        )
294     {
295         // Handle the case of (var & const)
296         if (e2.Eoper == OPconst && el_signx32(e2))
297         {
298             code cs = void;
299             cs.Iflags = 0;
300             cs.Irex = 0;
301             getlvalue(cdb,&cs,e1,0);
302             targ_size_t value = e2.EV.Vpointer;
303             if (sz == 2)
304                 value &= 0xFFFF;
305             else if (sz == 4)
306                 value &= 0xFFFFFFFF;
307             reg_t reg;
308             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,&reg))
309             {
310                 code_newreg(&cs, reg);
311                 if (I64 && isbyte && reg >= 4)
312                     cs.Irex |= REX;
313             }
314             else
315             {
316                 if (sz == 8 && !I64)
317                 {
318                     assert(value == cast(int)value);    // sign extend imm32
319                 }
320                 op1 = 0xF7;
321                 cs.IEV2.Vint = cast(targ_int)value;
322                 cs.IFL2 = FLconst;
323             }
324             cs.Iop = op1 ^ isbyte;
325             cs.Iflags |= word | CFpsw;
326             freenode(e1);
327             freenode(e2);
328             cdb.gen(&cs);
329             return;
330         }
331 
332         // Handle (exp & reg)
333         reg_t reg;
334         regm_t retregs;
335         if (isregvar(e2,&retregs,&reg))
336         {
337             code cs = void;
338             cs.Iflags = 0;
339             cs.Irex = 0;
340             getlvalue(cdb,&cs,e1,0);
341             code_newreg(&cs, reg);
342             if (I64 && isbyte && reg >= 4)
343                 cs.Irex |= REX;
344             cs.Iop = op1 ^ isbyte;
345             cs.Iflags |= word | CFpsw;
346             freenode(e1);
347             freenode(e2);
348             cdb.gen(&cs);
349             return;
350         }
351     }
352 
353     code cs = void;
354     cs.Iflags = 0;
355     cs.Irex = 0;
356 
357     // Look for possible uses of LEA
358     if (e.Eoper == OPadd &&
359         !(*pretregs & mPSW) &&                // flags aren't set by LEA
360         !nest &&                              // could cause infinite recursion if e.Ecount
361         (sz == REGSIZE || (I64 && sz == 4)))  // far pointers aren't handled
362     {
363         const rex = (sz == 8) ? REX_W : 0;
364 
365         // Handle the case of (e + &var)
366         int e1oper = e1.Eoper;
367         if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)])))
368                 || // LEA costs too much for simple EAs on older CPUs
369             (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) ||
370             (!I16 && (isscaledindex(e1) || isscaledindex(e2))) ||
371             (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) ||
372             (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) ||
373             (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount &&
374              (e1oper == OPmul || e1oper == OPshl) &&
375              e1.EV.E2.Eoper == OPconst &&
376              ssindex(e1oper,e1.EV.E2.EV.Vuns)
377             ) ||
378             (!I16 && e1.Ecount)
379            )
380         {
381             const inc = e.Ecount != 0;
382             nest += inc;
383             code csx = void;
384             getlvalue(cdb,&csx,e,0);
385             nest -= inc;
386             reg_t regx;
387             allocreg(cdb,pretregs,&regx,ty);
388             csx.Iop = LEA;
389             code_newreg(&csx, regx);
390             cdb.gen(&csx);          // LEA regx,EA
391             if (rex)
392                 code_orrex(cdb.last(), rex);
393             return;
394         }
395 
396         // Handle the case of ((e + c) + e2)
397         if (!I16 &&
398             e1oper == OPadd &&
399             (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) ||
400              e2oper == OPconst && el_signx32(e2)) &&
401             !e1.Ecount
402            )
403         {
404             elem *ebase;
405             elem *edisp;
406             if (e2oper == OPconst && el_signx32(e2))
407             {   edisp = e2;
408                 ebase = e1.EV.E2;
409             }
410             else
411             {   edisp = e1.EV.E2;
412                 ebase = e2;
413             }
414 
415             auto e11 = e1.EV.E1;
416             regm_t retregs = *pretregs & ALLREGS;
417             if (!retregs)
418                 retregs = ALLREGS;
419             int ss = 0;
420             int ss2 = 0;
421 
422             // Handle the case of (((e *  c1) + c2) + e2)
423             // Handle the case of (((e << c1) + c2) + e2)
424             if ((e11.Eoper == OPmul || e11.Eoper == OPshl) &&
425                 e11.EV.E2.Eoper == OPconst &&
426                 !e11.Ecount
427                )
428             {
429                 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2);
430                 if (e11.Eoper == OPshl)
431                 {
432                     if (co1 > 3)
433                         goto L13;
434                     ss = cast(int)co1;
435                 }
436                 else
437                 {
438                     ss2 = 1;
439                     switch (co1)
440                     {
441                         case  6:        ss = 1;                 break;
442                         case 12:        ss = 1; ss2 = 2;        break;
443                         case 24:        ss = 1; ss2 = 3;        break;
444                         case 10:        ss = 2;                 break;
445                         case 20:        ss = 2; ss2 = 2;        break;
446                         case 40:        ss = 2; ss2 = 3;        break;
447                         case 18:        ss = 3;                 break;
448                         case 36:        ss = 3; ss2 = 2;        break;
449                         case 72:        ss = 3; ss2 = 3;        break;
450                         default:
451                             ss2 = 0;
452                             goto L13;
453                     }
454                 }
455                 freenode(e11.EV.E2);
456                 freenode(e11);
457                 e11 = e11.EV.E1;
458               L13:
459                 { }
460             }
461 
462             reg_t reg11;
463             regm_t regm;
464             if (e11.Eoper == OPvar && isregvar(e11,&regm,&reg11))
465             {
466                 if (tysize(e11.Ety) <= REGSIZE)
467                     retregs = mask(reg11); // only want the LSW
468                 else
469                     retregs = regm;
470                 freenode(e11);
471             }
472             else
473                 codelem(cdb,e11,&retregs,false);
474 
475             regm_t rretregs = ALLREGS & ~retregs & ~mBP;
476             scodelem(cdb,ebase,&rretregs,retregs,true);
477             reg_t reg;
478             {
479                 regm_t sregs = *pretregs & ~rretregs;
480                 if (!sregs)
481                     sregs = ALLREGS & ~rretregs;
482                 allocreg(cdb,&sregs,&reg,ty);
483             }
484 
485             assert((retregs & (retregs - 1)) == 0); // must be only one register
486             assert((rretregs & (rretregs - 1)) == 0); // must be only one register
487 
488             auto  reg1 = findreg(retregs);
489             const reg2 = findreg(rretregs);
490 
491             if (ss2)
492             {
493                 assert(reg != reg2);
494                 if ((reg1 & 7) == BP)
495                 {   static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1];
496 
497                     // IMUL reg,imm32
498                     cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]);
499                 }
500                 else
501                 {   // LEA reg,[reg1*ss][reg1]
502                     cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7));
503                     if (reg1 & 8)
504                         code_orrex(cdb.last(), REX_X | REX_B);
505                 }
506                 if (rex)
507                     code_orrex(cdb.last(), rex);
508                 reg1 = reg;
509                 ss = ss2;                               // use *2 for scale
510             }
511 
512             cs.Iop = LEA;                      // LEA reg,c[reg1*ss][reg2]
513             cs.Irm = modregrm(2,reg & 7,4);
514             cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7);
515             assert(reg2 != BP);
516             cs.Iflags = CFoff;
517             cs.Irex = cast(ubyte)rex;
518             if (reg & 8)
519                 cs.Irex |= REX_R;
520             if (reg1 & 8)
521                 cs.Irex |= REX_X;
522             if (reg2 & 8)
523                 cs.Irex |= REX_B;
524             cs.IFL1 = FLconst;
525             cs.IEV1.Vsize_t = edisp.EV.Vuns;
526 
527             freenode(edisp);
528             freenode(e1);
529             cdb.gen(&cs);
530             fixresult(cdb,e,mask(reg),pretregs);
531             return;
532         }
533     }
534 
535     regm_t posregs = (isbyte) ? BYTEREGS : (mES | ALLREGS | mBP);
536     regm_t retregs = *pretregs & posregs;
537     if (retregs == 0)                   /* if no return regs speced     */
538                                         /* (like if wanted flags only)  */
539         retregs = ALLREGS & posregs;    // give us some
540 
541     if (ty1 == TYhptr || ty2 == TYhptr)
542     {     /* Generate code for add/subtract of huge pointers.
543            No attempt is made to generate very good code.
544          */
545         retregs = (retregs & mLSW) | mDX;
546         regm_t rretregs;
547         if (ty1 == TYhptr)
548         {   // hptr +- long
549             rretregs = mLSW & ~(retregs | regcon.mvar);
550             if (!rretregs)
551                 rretregs = mLSW;
552             rretregs |= mCX;
553             codelem(cdb,e1,&rretregs,0);
554             retregs &= ~rretregs;
555             if (!(retregs & mLSW))
556                 retregs |= mLSW & ~rretregs;
557 
558             scodelem(cdb,e2,&retregs,rretregs,true);
559         }
560         else
561         {   // long + hptr
562             codelem(cdb,e1,&retregs,0);
563             rretregs = (mLSW | mCX) & ~retregs;
564             if (!(rretregs & mLSW))
565                 rretregs |= mLSW;
566             scodelem(cdb,e2,&rretregs,retregs,true);
567         }
568         getregs(cdb,rretregs | retregs);
569         const mreg = DX;
570         const lreg = findreglsw(retregs);
571         if (e.Eoper == OPmin)
572         {   // negate retregs
573             cdb.gen2(0xF7,modregrm(3,3,mreg));     // NEG mreg
574             cdb.gen2(0xF7,modregrm(3,3,lreg));     // NEG lreg
575             code_orflag(cdb.last(),CFpsw);
576             cdb.genc2(0x81,modregrm(3,3,mreg),0);  // SBB mreg,0
577         }
578         const lrreg = findreglsw(rretregs);
579         genregs(cdb,0x03,lreg,lrreg);              // ADD lreg,lrreg
580         code_orflag(cdb.last(),CFpsw);
581         genmovreg(cdb,lrreg,CX);      // MOV lrreg,CX
582         cdb.genc2(0x81,modregrm(3,2,mreg),0);      // ADC mreg,0
583         genshift(cdb);                             // MOV CX,offset __AHSHIFT
584         cdb.gen2(0xD3,modregrm(3,4,mreg));         // SHL mreg,CL
585         genregs(cdb,0x03,mreg,lrreg);              // ADD mreg,MSREG(h)
586         fixresult(cdb,e,retregs,pretregs);
587         return;
588     }
589 
590     regm_t rretregs;
591     reg_t reg;
592     if (_tysize[ty1] > REGSIZE && numwords == 1)
593     {     /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */
594 
595         debug
596         if (_tysize[ty2] != REGSIZE)
597         {
598             printf("e = %p, e.Eoper = ",e);
599             WROP(e.Eoper);
600             printf(" e1.Ety = ");
601             WRTYxx(ty1);
602             printf(" e2.Ety = ");
603             WRTYxx(ty2);
604             printf("\n");
605             elem_print(e);
606         }
607 
608         assert(_tysize[ty2] == REGSIZE);
609 
610         /* Watch out for the case here where you are going to OP reg,EA */
611         /* and both the reg and EA use ES! Prevent this by forcing      */
612         /* reg into the regular registers.                              */
613         if ((e2oper == OPind ||
614             (e2oper == OPvar && el_fl(e2) == FLfardata)) &&
615             !e2.Ecount)
616         {
617             retregs = ALLREGS;
618         }
619 
620         codelem(cdb,e1,&retregs,test != 0);
621         reg = findreglsw(retregs);      /* reg is the register with the offset*/
622     }
623     else
624     {
625         regm_t regm;
626 
627         /* if (tyword + TYfptr) */
628         if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE)
629         {   retregs = ~*pretregs & ALLREGS;
630 
631             /* if retregs doesn't have any regs in it that aren't reg vars */
632             if ((retregs & ~regcon.mvar) == 0)
633                 retregs |= mAX;
634         }
635         else if (numwords == 2 && retregs & mES)
636             retregs = (retregs | mMSW) & ALLREGS;
637 
638         // Determine if we should swap operands, because
639         //      mov     EAX,x
640         //      add     EAX,reg
641         // is faster than:
642         //      mov     EAX,reg
643         //      add     EAX,x
644         else if (e2oper == OPvar &&
645                  e1.Eoper == OPvar &&
646                  e.Eoper != OPmin &&
647                  isregvar(e1,&regm,null) &&
648                  regm != retregs &&
649                  _tysize[ty1] == _tysize[ty2])
650         {
651             elem *es = e1;
652             e1 = e2;
653             e2 = es;
654         }
655         codelem(cdb,e1,&retregs,test != 0);         // eval left leaf
656         reg = findreg(retregs);
657     }
658     reg_t rreg;
659     int rval;
660     targ_size_t i;
661     switch (e2oper)
662     {
663         case OPind:                                 /* if addressing mode   */
664             if (!e2.Ecount)                         /* if not CSE           */
665                     goto L1;                        /* try OP reg,EA        */
666             goto default;
667 
668         default:                                    /* operator node        */
669         L2:
670             rretregs = ALLREGS & ~retregs;
671             /* Be careful not to do arithmetic on ES        */
672             if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW)
673                 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs;
674             else if (isbyte)
675                 rretregs &= BYTEREGS;
676 
677             scodelem(cdb,e2,&rretregs,retregs,true);       // get rvalue
678             rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs);
679             if (!test)
680                 getregs(cdb,retregs);          // we will trash these regs
681             if (numwords == 1)                              /* ADD reg,rreg */
682             {
683                 /* reverse operands to avoid moving around the segment value */
684                 if (_tysize[ty2] > REGSIZE)
685                 {
686                     getregs(cdb,rretregs);
687                     genregs(cdb,op1,rreg,reg);
688                     retregs = rretregs;     // reverse operands
689                 }
690                 else
691                 {
692                     genregs(cdb,op1,reg,rreg);
693                     if (!I16 && *pretregs & mPSW)
694                         cdb.last().Iflags |= word;
695                 }
696                 if (I64 && sz == 8)
697                     code_orrex(cdb.last(), REX_W);
698                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
699                     code_orrex(cdb.last(), REX);
700             }
701             else /* numwords == 2 */                /* ADD lsreg,lsrreg     */
702             {
703                 reg = findreglsw(retregs);
704                 rreg = findreglsw(rretregs);
705                 genregs(cdb,op1,reg,rreg);
706                 if (e.Eoper == OPadd || e.Eoper == OPmin)
707                     code_orflag(cdb.last(),CFpsw);
708                 reg = findregmsw(retregs);
709                 rreg = findregmsw(rretregs);
710                 if (!(e2oper == OPu16_32 && // if second operand is 0
711                       (op2 == 0x0B || op2 == 0x33)) // and OR or XOR
712                    )
713                     genregs(cdb,op2,reg,rreg);        // ADC msreg,msrreg
714             }
715             break;
716 
717         case OPrelconst:
718             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
719                 goto default;
720             if (sz != REGSIZE)
721                 goto L2;
722             if (segfl[el_fl(e2)] != 3)              /* if not in data segment */
723                 goto L2;
724             if (evalinregister(e2))
725                 goto L2;
726             cs.IEV2.Voffset = e2.EV.Voffset;
727             cs.IEV2.Vsym = e2.EV.Vsym;
728             cs.Iflags |= CFoff;
729             i = 0;                          /* no INC or DEC opcode         */
730             rval = 0;
731             goto L3;
732 
733         case OPconst:
734             if (tyfv(ty2))
735                 goto L2;
736             if (numwords == 1)
737             {
738                 if (!el_signx32(e2))
739                     goto L2;
740                 i = e2.EV.Vpointer;
741                 if (word)
742                 {
743                     if (!(*pretregs & mPSW) &&
744                         config.flags4 & CFG4speed &&
745                         (e.Eoper == OPor || e.Eoper == OPxor || test ||
746                          (e1.Eoper != OPvar && e1.Eoper != OPind)))
747                     {   word = 0;
748                         i &= 0xFFFF;
749                     }
750                 }
751                 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&rreg);
752                 cs.IEV2.Vsize_t = i;
753             L3:
754                 if (!test)
755                     getregs(cdb,retregs);          // we will trash these regs
756                 op1 ^= isbyte;
757                 cs.Iflags |= word;
758                 if (rval)
759                 {   cs.Iop = op1 ^ 2;
760                     mode = rreg;
761                 }
762                 else
763                     cs.Iop = 0x81;
764                 cs.Irm = modregrm(3,mode&7,reg&7);
765                 if (mode & 8)
766                     cs.Irex |= REX_R;
767                 if (reg & 8)
768                     cs.Irex |= REX_B;
769                 if (I64 && sz == 8)
770                     cs.Irex |= REX_W;
771                 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4)))
772                     cs.Irex |= REX;
773                 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2));
774                 /* Modify instruction for special cases */
775                 switch (e.Eoper)
776                 {
777                     case OPadd:
778                     {
779                         int iop;
780 
781                         if (i == 1)
782                             iop = 0;                    /* INC reg      */
783                         else if (i == -1)
784                             iop = 8;                    /* DEC reg      */
785                         else
786                             break;
787                         cs.Iop = (0x40 | iop | reg) ^ isbyte;
788                         if ((isbyte && *pretregs & mPSW) || I64)
789                         {
790                             cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop);
791                             cs.Iop = 0xFF;
792                         }
793                         break;
794                     }
795 
796                     case OPand:
797                         if (test)
798                             cs.Iop = rval ? op1 : 0xF7; // TEST
799                         break;
800 
801                     default:
802                         break;
803                 }
804                 if (*pretregs & mPSW)
805                     cs.Iflags |= CFpsw;
806                 cs.Iop ^= isbyte;
807                 cdb.gen(&cs);
808                 cs.Iflags &= ~CFpsw;
809             }
810             else if (numwords == 2)
811             {
812                 getregs(cdb,retregs);
813                 reg = findregmsw(retregs);
814                 const lsreg = findreglsw(retregs);
815                 cs.Iop = 0x81;
816                 cs.Irm = modregrm(3,mode,lsreg);
817                 cs.IFL2 = FLconst;
818                 const msw = cast(targ_int)MSREG(e2.EV.Vllong);
819                 cs.IEV2.Vint = e2.EV.Vlong;
820                 switch (e.Eoper)
821                 {
822                     case OPadd:
823                     case OPmin:
824                         cs.Iflags |= CFpsw;
825                         break;
826 
827                     default:
828                         break;
829                 }
830                 cdb.gen(&cs);
831                 cs.Iflags &= ~CFpsw;
832 
833                 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg);
834                 cs.IEV2.Vint = msw;
835                 if (e.Eoper == OPadd)
836                     cs.Irm |= modregrm(0,2,0);      /* ADC          */
837                 cdb.gen(&cs);
838             }
839             else
840                 assert(0);
841             freenode(e2);
842             break;
843 
844         case OPvar:
845             if (movOnly(e2))
846                 goto L2;
847         L1:
848             if (tyfv(ty2))
849                 goto L2;
850             if (!test)
851                 getregs(cdb,retregs);          // we will trash these regs
852             loadea(cdb,e2,&cs,op1,
853                    ((numwords == 2) ? findreglsw(retregs) : reg),
854                    0,retregs,retregs);
855             if (!I16 && word)
856             {   if (*pretregs & mPSW)
857                     code_orflag(cdb.last(),word);
858                 else
859                     cdb.last().Iflags &= ~cast(int)word;
860             }
861             else if (numwords == 2)
862             {
863                 if (e.Eoper == OPadd || e.Eoper == OPmin)
864                     code_orflag(cdb.last(),CFpsw);
865                 reg = findregmsw(retregs);
866                 if (!OTleaf(e2.Eoper))
867                 {   getlvalue_msw(&cs);
868                     cs.Iop = op2;
869                     NEWREG(cs.Irm,reg);
870                     cdb.gen(&cs);                 // ADC reg,data+2
871                 }
872                 else
873                     loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0);
874             }
875             else if (I64 && sz == 8)
876                 code_orrex(cdb.last(), REX_W);
877             freenode(e2);
878             break;
879     }
880 
881     if (sz <= REGSIZE && *pretregs & mPSW)
882     {
883         /* If the expression is (_tls_array + ...), then the flags are not set
884          * since the linker may rewrite these instructions into something else.
885          */
886         if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar)
887         {
888             const s = e1.EV.Vsym;
889             if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0)
890             {
891                 goto L7;                        // don't assume flags are set
892             }
893         }
894         code_orflag(cdb.last(),CFpsw);
895         *pretregs &= ~mPSW;                    // flags already set
896     L7: { }
897     }
898     fixresult(cdb,e,retregs,pretregs);
899 }
900 
901 
902 /*****************************
903  * Handle multiply.
904  */
905 
906 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
907 {
908     //printf("cdmul()\n");
909     elem *e1 = e.EV.E1;
910     elem *e2 = e.EV.E2;
911     if (*pretregs == 0)                         // if don't want result
912     {
913         codelem(cdb,e1,pretregs,false);      // eval left leaf
914         *pretregs = 0;                          // in case they got set
915         codelem(cdb,e2,pretregs,false);
916         return;
917     }
918 
919     //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
920     const tyml = tybasic(e1.Ety);
921     const ty = tybasic(e.Ety);
922     const oper = e.Eoper;
923 
924     if (tyfloating(tyml))
925     {
926         if (tyvector(tyml) ||
927             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
928             !(*pretregs & mST0) &&
929             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
930             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
931             !(ty == TYllong)  //   or passing to function through integer register
932            )
933         {
934             orthxmm(cdb,e,pretregs);
935             return;
936         }
937         static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
938             orth87(cdb,e,pretregs);
939         else
940             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
941 
942         return;
943     }
944 
945     if (tyxmmreg(tyml))
946     {
947         orthxmm(cdb,e,pretregs);
948         return;
949     }
950 
951     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if signed operation, 0 if unsigned
952     const isbyte = tybyte(e.Ety) != 0;
953     const sz = _tysize[tyml];
954     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
955     const uint grex = rex << 16;
956     const OPER opunslng = I16 ? OPu16_32 : OPu32_64;
957 
958     code cs = void;
959     cs.Iflags = 0;
960     cs.Irex = 0;
961 
962     switch (e2.Eoper)
963     {
964         case OPu16_32:
965         case OPs16_32:
966         case OPu32_64:
967         case OPs32_64:
968         {
969             if (sz != 2 * REGSIZE || e1.Eoper != e2.Eoper ||
970                 e1.Ecount || e2.Ecount)
971                 goto default;
972             const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5;
973             regm_t retregsx = mAX;
974             codelem(cdb,e1.EV.E1,&retregsx,false);    // eval left leaf
975             if (e2.EV.E1.Eoper == OPvar ||
976                 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount)
977                )
978             {
979                 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX);
980             }
981             else
982             {
983                 regm_t rretregsx = ALLREGS & ~mAX;
984                 scodelem(cdb,e2.EV.E1,&rretregsx,retregsx,true); // get rvalue
985                 getregs(cdb,mAX | mDX);
986                 const rregx = findreg(rretregsx);
987                 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx
988             }
989             freenode(e.EV.E1);
990             freenode(e2);
991             fixresult(cdb,e,mAX | mDX,pretregs);
992             return;
993         }
994 
995         case OPconst:
996             const e2factor = cast(targ_size_t)el_tolong(e2);
997 
998             // Multiply by a constant
999             if (I32 && sz == REGSIZE * 2)
1000             {
1001                 /*  if (msw)
1002                       IMUL    EDX,EDX,lsw
1003                       IMUL    reg,EAX,msw
1004                       ADD     reg,EDX
1005                     else
1006                       IMUL    reg,EDX,lsw
1007                     MOV       EDX,lsw
1008                     MUL       EDX
1009                     ADD       EDX,reg
1010                  */
1011                 regm_t retregs = mAX | mDX;
1012                 codelem(cdb,e1,&retregs,false);    // eval left leaf
1013                 reg_t reg = allocScratchReg(cdb, allregs & ~(mAX | mDX));
1014                 getregs(cdb,mDX | mAX);
1015 
1016                 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1017                 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1018 
1019                 if (msw)
1020                 {
1021                     genmulimm(cdb,DX,DX,lsw);           // IMUL EDX,EDX,lsw
1022                     genmulimm(cdb,reg,AX,msw);          // IMUL reg,EAX,msw
1023                     cdb.gen2(0x03,modregrm(3,reg,DX));  // ADD  reg,EAX
1024                 }
1025                 else
1026                     genmulimm(cdb,reg,DX,lsw);          // IMUL reg,EDX,lsw
1027 
1028                 movregconst(cdb,DX,lsw,0);              // MOV EDX,lsw
1029                 getregs(cdb,mDX);
1030                 cdb.gen2(0xF7,modregrm(3,4,DX));        // MUL EDX
1031                 cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD EDX,reg
1032 
1033                 const resregx = mDX | mAX;
1034                 freenode(e2);
1035                 fixresult(cdb,e,resregx,pretregs);
1036                 return;
1037             }
1038 
1039 
1040             const int pow2 = ispow2(e2factor);
1041 
1042             if (sz > REGSIZE || !el_signx32(e2))
1043                 goto default;
1044 
1045             if (config.target_cpu >= TARGET_80286)
1046             {
1047                 if (I32 || I64)
1048                 {
1049                     // See if we can use an LEA instruction
1050                     int ss;
1051                     int ss2 = 0;
1052                     int shift;
1053 
1054                     switch (e2factor)
1055                     {
1056                         case 12:    ss = 1; ss2 = 2; goto L4;
1057                         case 24:    ss = 1; ss2 = 3; goto L4;
1058 
1059                         case 6:
1060                         case 3:     ss = 1; goto L4;
1061 
1062                         case 20:    ss = 2; ss2 = 2; goto L4;
1063                         case 40:    ss = 2; ss2 = 3; goto L4;
1064 
1065                         case 10:
1066                         case 5:     ss = 2; goto L4;
1067 
1068                         case 36:    ss = 3; ss2 = 2; goto L4;
1069                         case 72:    ss = 3; ss2 = 3; goto L4;
1070 
1071                         case 18:
1072                         case 9:     ss = 3; goto L4;
1073 
1074                         L4:
1075                         {
1076                             regm_t resreg = *pretregs & ALLREGS & ~(mBP | mR13);
1077                             if (!resreg)
1078                                 resreg = isbyte ? BYTEREGS : ALLREGS & ~(mBP | mR13);
1079 
1080                             codelem(cdb,e.EV.E1,&resreg,false);
1081                             getregs(cdb,resreg);
1082                             reg_t reg = findreg(resreg);
1083 
1084                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1085                                         modregxrmx(ss,reg,reg));        // LEA reg,[ss*reg][reg]
1086                             assert((reg & 7) != BP);
1087                             if (ss2)
1088                             {
1089                                 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1090                                                modregxrm(ss2,reg,5));
1091                                 cdb.last().IFL1 = FLconst;
1092                                 cdb.last().IEV1.Vint = 0;               // LEA reg,0[ss2*reg]
1093                             }
1094                             else if (!(e2factor & 1))                   // if even factor
1095                             {
1096                                 genregs(cdb,0x03,reg,reg);              // ADD reg,reg
1097                                 code_orrex(cdb.last(),rex);
1098                             }
1099                             freenode(e2);
1100                             fixresult(cdb,e,resreg,pretregs);
1101                             return;
1102                         }
1103                         case 37:
1104                         case 74:    shift = 2;
1105                                     goto L5;
1106                         case 13:
1107                         case 26:    shift = 0;
1108                                     goto L5;
1109                         L5:
1110                         {
1111                             regm_t retregs = isbyte ? BYTEREGS : ALLREGS;
1112                             regm_t resreg = *pretregs & (ALLREGS | mBP);
1113                             if (!resreg)
1114                                 resreg = retregs;
1115 
1116                             // Don't use EBP
1117                             resreg &= ~(mBP | mR13);
1118                             if (!resreg)
1119                                 resreg = retregs;
1120                             reg_t reg;
1121                             allocreg(cdb,&resreg,&reg,TYint);
1122 
1123                             regm_t sregm = (ALLREGS & ~mR13) & ~resreg;
1124                             codelem(cdb,e.EV.E1,&sregm,false);
1125                             uint sreg = findreg(sregm);
1126                             getregs(cdb,resreg | sregm);
1127                             assert((sreg & 7) != BP);
1128                             assert((reg & 7) != BP);
1129                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1130                                                   modregxrmx(2,sreg,sreg));       // LEA reg,[sreg*4][sreg]
1131                             if (shift)
1132                                 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift
1133                             cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1134                                                   modregxrmx(3,sreg,reg));        // LEA reg,[sreg*8][reg]
1135                             if (!(e2factor & 1))                                  // if even factor
1136                             {
1137                                 genregs(cdb,0x03,reg,reg);                        // ADD reg,reg
1138                                 code_orrex(cdb.last(),rex);
1139                             }
1140                             freenode(e2);
1141                             fixresult(cdb,e,resreg,pretregs);
1142                             return;
1143                         }
1144 
1145                         default:
1146                             break;
1147                     }
1148                 }
1149 
1150                 regm_t retregs = isbyte ? BYTEREGS : ALLREGS;
1151                 regm_t resreg = *pretregs & (ALLREGS | mBP);
1152                 if (!resreg)
1153                     resreg = retregs;
1154 
1155                 scodelem(cdb,e.EV.E1,&retregs,0,true);     // eval left leaf
1156                 const regx = findreg(retregs);
1157                 reg_t rreg;
1158                 allocreg(cdb,&resreg,&rreg,e.Ety);
1159 
1160                 // IMUL regx,imm16
1161                 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor);
1162                 freenode(e2);
1163                 fixresult(cdb,e,resreg,pretregs);
1164                 return;
1165             }
1166             goto default;
1167 
1168         case OPind:
1169             if (!e2.Ecount)                        // if not CSE
1170                     goto case OPvar;                        // try OP reg,EA
1171             goto default;
1172 
1173         default:                                    // OPconst and operators
1174             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1175             if (sz <= REGSIZE)
1176             {
1177                 regm_t retregs = mAX;
1178                 codelem(cdb,e1,&retregs,false);           // eval left leaf
1179                 regm_t rretregs = isbyte ? BYTEREGS & ~mAX
1180                                          : ALLREGS & ~(mAX|mDX);
1181                 scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1182                 getregs(cdb,mAX | mDX);     // trash these regs
1183                 reg_t rreg = findreg(rretregs);
1184                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,5 - uns,rreg)); // OP AX,rreg
1185                 if (I64 && isbyte && rreg >= 4)
1186                     code_orrex(cdb.last(), REX);
1187                 fixresult(cdb,e,mAX,pretregs);
1188                 return;
1189             }
1190             else if (sz == 2 * REGSIZE)
1191             {
1192                 regm_t retregs = mDX | mAX;
1193                 codelem(cdb,e1,&retregs,false);           // eval left leaf
1194                 if (config.target_cpu >= TARGET_PentiumPro)
1195                 {
1196                     regm_t rretregs = allregs & ~retregs;           // second arg
1197                     scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue
1198                     regm_t rlo = findreglsw(rretregs);
1199                     regm_t rhi = findregmsw(rretregs);
1200                     /*  IMUL    rhi,EAX
1201                         IMUL    EDX,rlo
1202                         ADD     rhi,EDX
1203                         MUL     rlo
1204                         ADD     EDX,rhi
1205                      */
1206                     getregs(cdb,mAX|mDX|mask(rhi));
1207                     cdb.gen2(0x0FAF,modregrm(3,rhi,AX));
1208                     cdb.gen2(0x0FAF,modregrm(3,DX,rlo));
1209                     cdb.gen2(0x03,modregrm(3,rhi,DX));
1210                     cdb.gen2(0xF7,modregrm(3,4,rlo));
1211                     cdb.gen2(0x03,modregrm(3,DX,rhi));
1212                     fixresult(cdb,e,mDX|mAX,pretregs);
1213                     return;
1214                 }
1215                 else
1216                 {
1217                     regm_t rretregs = mCX | mBX;           // second arg
1218                     scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1219                     callclib(cdb,e,CLIB.lmul,pretregs,0);
1220                     return;
1221                 }
1222             }
1223             assert(0);
1224 
1225         case OPvar:
1226             if (!I16 && sz <= REGSIZE)
1227             {
1228                 if (sz > 1)        // no byte version
1229                 {
1230                     // Generate IMUL r32,r/m32
1231                     regm_t retregs = *pretregs & (ALLREGS | mBP);
1232                     if (!retregs)
1233                         retregs = ALLREGS;
1234                     codelem(cdb,e1,&retregs,false);        // eval left leaf
1235                     regm_t resreg = retregs;
1236                     loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs);
1237                     freenode(e2);
1238                     fixresult(cdb,e,resreg,pretregs);
1239                     return;
1240                 }
1241             }
1242             else
1243             {
1244                 if (sz == 2 * REGSIZE)
1245                 {
1246                     if (e.EV.E1.Eoper != opunslng ||
1247                         e1.Ecount)
1248                         goto default;            // have to handle it with codelem()
1249 
1250                     regm_t retregs = ALLREGS & ~(mAX | mDX);
1251                     codelem(cdb,e1.EV.E1,&retregs,false);    // eval left leaf
1252                     const reg = findreg(retregs);
1253                     getregs(cdb,mAX);
1254                     genmovreg(cdb,AX,reg);            // MOV AX,reg
1255                     loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX);  // MUL EA+2
1256                     getregs(cdb,retregs);
1257                     cdb.gen1(0x90 + reg);                          // XCHG AX,reg
1258                     getregs(cdb,mAX | mDX);
1259                     if ((cs.Irm & 0xC0) == 0xC0)            // if EA is a register
1260                         loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA
1261                     else
1262                     {   getlvalue_lsw(&cs);
1263                         cdb.gen(&cs);                       // MUL EA
1264                     }
1265                     cdb.gen2(0x03,modregrm(3,DX,reg));      // ADD DX,reg
1266 
1267                     freenode(e1);
1268                     fixresult(cdb,e,mAX | mDX,pretregs);
1269                     return;
1270                 }
1271                 assert(sz <= REGSIZE);
1272             }
1273 
1274             // loadea() handles CWD or CLR DX for divides
1275             regm_t retregs = sz <= REGSIZE ? mAX : mDX|mAX;
1276             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1277             loadea(cdb,e2,&cs,0xF7 ^ isbyte,5 - uns,0,
1278                    mAX,
1279                    mAX | mDX);
1280             freenode(e2);
1281             fixresult(cdb,e,mAX,pretregs);
1282             return;
1283     }
1284     assert(0);
1285 }
1286 
1287 
1288 /*****************************
1289  * Handle divide, modulo and remquo.
1290  * Note that modulo isn't defined for doubles.
1291  */
1292 
1293 void cddiv(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1294 {
1295     //printf("cddiv()\n");
1296     elem *e1 = e.EV.E1;
1297     elem *e2 = e.EV.E2;
1298     if (*pretregs == 0)                         // if don't want result
1299     {
1300         codelem(cdb,e1,pretregs,false);      // eval left leaf
1301         *pretregs = 0;                          // in case they got set
1302         codelem(cdb,e2,pretregs,false);
1303         return;
1304     }
1305 
1306     //printf("cddiv(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1307     const tyml = tybasic(e1.Ety);
1308     const ty = tybasic(e.Ety);
1309     const oper = e.Eoper;
1310 
1311     if (tyfloating(tyml))
1312     {
1313         if (tyvector(tyml) ||
1314             config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) &&
1315             !(*pretregs & mST0) &&
1316             !(ty == TYldouble || ty == TYildouble) &&  // watch out for shrinkLongDoubleConstantIfPossible()
1317             !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div
1318             !(ty == TYllong)  //   or passing to function through integer register
1319            )
1320         {
1321             orthxmm(cdb,e,pretregs);
1322             return;
1323         }
1324         static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1325             orth87(cdb,e,pretregs);
1326         else
1327             opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv);
1328 
1329         return;
1330     }
1331 
1332     if (tyxmmreg(tyml))
1333     {
1334         orthxmm(cdb,e,pretregs);
1335         return;
1336     }
1337 
1338     const uns = tyuns(tyml) || tyuns(e2.Ety);  // 1 if uint operation, 0 if not
1339     const isbyte = tybyte(e.Ety) != 0;
1340     const sz = _tysize[tyml];
1341     const ubyte rex = (I64 && sz == 8) ? REX_W : 0;
1342     const uint grex = rex << 16;
1343 
1344     code cs = void;
1345     cs.Iflags = 0;
1346     cs.Irex = 0;
1347 
1348     switch (e2.Eoper)
1349     {
1350         case OPconst:
1351             auto d = cast(targ_size_t)el_tolong(e2);
1352             bool neg = false;
1353             const e2factor = d;
1354             if (!uns && cast(targ_llong)e2factor < 0)
1355             {   neg = true;
1356                 d = -d;
1357             }
1358 
1359             // Signed divide by a constant
1360             if ((d & (d - 1)) &&
1361                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1362                 config.flags4 & CFG4speed && !uns)
1363             {
1364                 /* R1 / 10
1365                  *
1366                  *  MOV     EAX,m
1367                  *  IMUL    R1
1368                  *  MOV     EAX,R1
1369                  *  SAR     EAX,31
1370                  *  SAR     EDX,shpost
1371                  *  SUB     EDX,EAX
1372                  *  IMUL    EAX,EDX,d
1373                  *  SUB     R1,EAX
1374                  *
1375                  * EDX = quotient
1376                  * R1 = remainder
1377                  */
1378                 assert(sz == 4 || sz == 8);
1379 
1380                 ulong m;
1381                 int shpost;
1382                 const int N = sz * 8;
1383                 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1384 
1385                 regm_t regm = allregs & ~(mAX | mDX);
1386                 codelem(cdb,e1,&regm,false);       // eval left leaf
1387                 const reg_t reg = findreg(regm);
1388                 getregs(cdb,regm | mDX | mAX);
1389 
1390                 /* Algorithm 5.2
1391                  * if m>=2**(N-1)
1392                  *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1393                  * else
1394                  *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1395                  * if (neg)
1396                  *    q = -q
1397                  */
1398                 const bool mgt = mhighbit || m >= (1UL << (N - 1));
1399                 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1400                 cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL R1
1401                 if (mgt)
1402                     cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,R1
1403                 getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1404                 genmovreg(cdb, AX, reg);                   // MOV EAX,R1
1405                 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1406                 if (shpost)
1407                     cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1408                 reg_t r3;
1409                 if (neg && oper == OPdiv)
1410                 {
1411                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1412                     r3 = AX;
1413                 }
1414                 else
1415                 {
1416                     cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1417                     r3 = DX;
1418                 }
1419 
1420                 // r3 is quotient
1421                 regm_t resregx;
1422                 switch (oper)
1423                 {   case OPdiv:
1424                         resregx = mask(r3);
1425                         break;
1426 
1427                     case OPmod:
1428                         assert(reg != AX && r3 == DX);
1429                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1430                         {
1431                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1432                         }
1433                         else
1434                         {
1435                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1436                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1437                             getregsNoSave(mAX);                             // EAX no longer contains 'd'
1438                         }
1439                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1440                         resregx = regm;
1441                         break;
1442 
1443                     case OPremquo:
1444                         assert(reg != AX && r3 == DX);
1445                         if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1446                         {
1447                             cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);     // IMUL EAX,EDX,d
1448                         }
1449                         else
1450                         {
1451                             movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d
1452                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1453                         }
1454                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1455                         genmovreg(cdb, AX, r3);                // MOV EAX,r3
1456                         if (neg)
1457                             cdb.gen2(0xF7,grex | modregrm(3,3,AX));         // NEG EAX
1458                         genmovreg(cdb, DX, reg);               // MOV EDX,R1
1459                         resregx = mDX | mAX;
1460                         break;
1461 
1462                     default:
1463                         assert(0);
1464                 }
1465                 freenode(e2);
1466                 fixresult(cdb,e,resregx,pretregs);
1467                 return;
1468             }
1469 
1470             // Unsigned divide by a constant
1471             if (e2factor > 2 && (e2factor & (e2factor - 1)) &&
1472                 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) &&
1473                 config.flags4 & CFG4speed && uns)
1474             {
1475                 assert(sz == 4 || sz == 8);
1476 
1477                 reg_t r3;
1478                 regm_t regm;
1479                 reg_t reg;
1480                 ulong m;
1481                 int shpre;
1482                 int shpost;
1483                 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1484                 {
1485                     /* t1 = MULUH(m, n)
1486                      * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1487                      *   MOV   EAX,reg
1488                      *   MOV   EDX,m
1489                      *   MUL   EDX
1490                      *   MOV   EAX,reg
1491                      *   SUB   EAX,EDX
1492                      *   SHR   EAX,1
1493                      *   LEA   R3,[EAX][EDX]
1494                      *   SHR   R3,shpost-1
1495                      */
1496                     assert(shpre == 0);
1497 
1498                     regm = allregs & ~(mAX | mDX);
1499                     codelem(cdb,e1,&regm,false);       // eval left leaf
1500                     reg = findreg(regm);
1501                     getregs(cdb,mAX | mDX);
1502                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1503                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1504                     getregs(cdb,regm | mDX | mAX);
1505                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1506                     genmovreg(cdb,AX,reg);                   // MOV EAX,reg
1507                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1508                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1509                     regm_t regm3 = allregs;
1510                     if (oper == OPmod || oper == OPremquo)
1511                     {
1512                         regm3 &= ~regm;
1513                         if (oper == OPremquo || !el_signx32(e2))
1514                             regm3 &= ~mAX;
1515                     }
1516                     allocreg(cdb,&regm3,&r3,TYint);
1517                     cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1518                     if (shpost != 1)
1519                         cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1520                 }
1521                 else
1522                 {
1523                     /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1524                      *   SHR   EAX,shpre
1525                      *   MOV   reg,m
1526                      *   MUL   reg
1527                      *   SHR   EDX,shpost
1528                      */
1529                     regm = mAX;
1530                     if (oper == OPmod || oper == OPremquo)
1531                         regm = allregs & ~(mAX|mDX);
1532                     codelem(cdb,e1,&regm,false);       // eval left leaf
1533                     reg = findreg(regm);
1534 
1535                     if (reg != AX)
1536                     {
1537                         getregs(cdb,mAX);
1538                         genmovreg(cdb,AX,reg);                 // MOV EAX,reg
1539                     }
1540                     if (shpre)
1541                     {
1542                         getregs(cdb,mAX);
1543                         cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1544                     }
1545                     getregs(cdb,mDX);
1546                     movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1547                     getregs(cdb,mDX | mAX);
1548                     cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1549                     if (shpost)
1550                         cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1551                     r3 = DX;
1552                 }
1553 
1554                 regm_t resreg;
1555                 switch (oper)
1556                 {   case OPdiv:
1557                         // r3 = quotient
1558                         resreg = mask(r3);
1559                         break;
1560 
1561                     case OPmod:
1562                         /* reg = original value
1563                          * r3  = quotient
1564                          */
1565                         assert(!(regm & mAX));
1566                         if (el_signx32(e2))
1567                         {
1568                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1569                         }
1570                         else
1571                         {
1572                             assert(!(mask(r3) & mAX));
1573                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1574                             getregs(cdb,mAX);
1575                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1576                         }
1577                         getregs(cdb,regm);
1578                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1579                         resreg = regm;
1580                         break;
1581 
1582                     case OPremquo:
1583                         /* reg = original value
1584                          * r3  = quotient
1585                          */
1586                         assert(!(mask(r3) & (mAX|regm)));
1587                         assert(!(regm & mAX));
1588                         if (el_signx32(e2))
1589                         {
1590                             cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1591                         }
1592                         else
1593                         {
1594                             movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor
1595                             getregs(cdb,mAX);
1596                             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1597                         }
1598                         getregs(cdb,regm);
1599                         cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1600                         genmovreg(cdb, AX, r3);              // MOV EAX,r3
1601                         genmovreg(cdb, DX, reg);             // MOV EDX,reg
1602                         resreg = mDX | mAX;
1603                         break;
1604 
1605                     default:
1606                         assert(0);
1607                 }
1608                 freenode(e2);
1609                 fixresult(cdb,e,resreg,pretregs);
1610                 return;
1611             }
1612 
1613             const int pow2 = ispow2(e2factor);
1614 
1615             // Register pair signed divide by power of 2
1616             if (sz == REGSIZE * 2 &&
1617                 (oper == OPdiv) && !uns &&
1618                 pow2 != -1 &&
1619                 I32 // not set up for I64 cent yet
1620                )
1621             {
1622                 regm_t retregs = mDX | mAX;
1623                 if (pow2 == 63 && !(retregs & BYTEREGS & mLSW))
1624                     retregs = (retregs & mMSW) | (BYTEREGS & mLSW);  // because of SETZ
1625 
1626                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1627                 const rhi = findregmsw(retregs);
1628                 const rlo = findreglsw(retregs);
1629                 freenode(e2);
1630                 getregs(cdb,retregs);
1631 
1632                 if (pow2 < 32)
1633                 {
1634                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1635 
1636                     genmovreg(cdb,r1,rhi);                                        // MOV  r1,rhi
1637                     if (pow2 == 1)
1638                         cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR  r1,31
1639                     else
1640                     {
1641                         cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR  r1,31
1642                         cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND  r1,mask
1643                     }
1644                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD  rlo,r1
1645                     cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC  rhi,0
1646                     cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2);            // SHRD rlo,rhi,pow2
1647                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2);               // SAR  rhi,pow2
1648                 }
1649                 else if (pow2 == 32)
1650                 {
1651                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1652 
1653                     genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
1654                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
1655                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD rlo,r1
1656                     cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC rhi,0
1657                     cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
1658                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
1659                 }
1660                 else if (pow2 < 63)
1661                 {
1662                     reg_t r1 = allocScratchReg(cdb, allregs & ~retregs);
1663                     reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1)));
1664 
1665                     genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
1666                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
1667                     cdb.genmovreg(r2,r1);                                         // MOV r2,r1
1668 
1669                     if (pow2 == 33)
1670                     {
1671                         cdb.gen2(0xF7,modregrmx(3,3,r1));                         // NEG r1
1672                         cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2));               // ADD rlo,r2
1673                         cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1));               // ADC rhi,r1
1674                     }
1675                     else
1676                     {
1677                         cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask
1678                         cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                    // ADD rlo,r1
1679                         cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                    // ADC rhi,r2
1680                     }
1681 
1682                     cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
1683                     cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32);          // SAR rlo,pow2-32
1684                     cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
1685                 }
1686                 else
1687                 {
1688                     // This may be better done by cgelem.d
1689                     assert(pow2 == 63);
1690                     cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000
1691                     cdb.genregs(0x09,rlo,rhi);                             // OR  rlo,rhi
1692                     cdb.gen2(0x0F94,modregrmx(3,0,rlo));                   // SETZ rlo
1693                     cdb.genregs(MOVZXb,rlo,rlo);                           // MOVZX rlo,rloL
1694                     movregconst(cdb,rhi,0,0);                              // MOV rhi,0
1695                 }
1696 
1697                 fixresult(cdb,e,retregs,pretregs);
1698                 return;
1699             }
1700 
1701             // Register pair signed modulo by power of 2
1702             if (sz == REGSIZE * 2 &&
1703                 (oper == OPmod) && !uns &&
1704                 pow2 != -1 &&
1705                 I32 // not set up for I64 cent yet
1706                )
1707             {
1708                 regm_t retregs = mDX | mAX;
1709                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1710                 const rhi = findregmsw(retregs);
1711                 const rlo = findreglsw(retregs);
1712                 freenode(e2);
1713                 getregs(cdb,retregs);
1714 
1715                 regm_t scratchm = allregs & ~retregs;
1716                 if (pow2 == 63)
1717                     scratchm &= BYTEREGS;               // because of SETZ
1718                 reg_t r1 = allocScratchReg(cdb, scratchm);
1719 
1720                 if (pow2 < 32)
1721                 {
1722                     cdb.genmovreg(r1,rhi);                                    // MOV r1,rhi
1723                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31
1724                     cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
1725                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
1726                     cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1);    // AND rlo,(1<<pow2)-1
1727                     cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
1728                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
1729                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));              // SBB rhi,rhi
1730                 }
1731                 else if (pow2 == 32)
1732                 {
1733                     cdb.genmovreg(r1,rhi);                                      // MOV r1,rhi
1734                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR r1,31
1735                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD rlo,r1
1736                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB rlo,r1
1737                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));                // SBB rhi,rhi
1738                 }
1739                 else if (pow2 < 63)
1740                 {
1741                     reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1)));
1742 
1743                     cdb.genmovreg(r1,rhi);                                      // MOV  r1,rhi
1744                     cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR  r1,31
1745                     cdb.genmovreg(r2,r1);                                       // MOV  r2,r1
1746                     cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2);         // SHRD r1,r2,64-pow2
1747                     cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2);           // SHR  r2,64-pow2
1748                     cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD  rlo,r1
1749                     cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                 // ADC  rhi,r2
1750                     cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND  rhi,(1<<(pow2-32))-1
1751                     cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB  rlo,r1
1752                     cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2));                 // SBB  rhi,r2
1753                 }
1754                 else
1755                 {
1756                     // This may be better done by cgelem.d
1757                     assert(pow2 == 63);
1758 
1759                     cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi]
1760                     cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo));               // OR   r1,rlo
1761                     cdb.gen2(0x0F94,modregrmx(3,0,r1));                       // SETZ r1
1762                     cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL  r1,31
1763                     cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1));               // SUB  rhi,r1
1764                 }
1765 
1766                 fixresult(cdb,e,retregs,pretregs);
1767                 return;
1768             }
1769 
1770             if (sz > REGSIZE || !el_signx32(e2))
1771                 goto default;
1772 
1773             // Special code for signed divide or modulo by power of 2
1774             if ((sz == REGSIZE || (I64 && sz == 4)) &&
1775                 (oper == OPdiv || oper == OPmod) && !uns &&
1776                 pow2 != -1 &&
1777                 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv)
1778                )
1779             {
1780                 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386)
1781                 {
1782                     /* MOV r,reg
1783                        SHR r,31
1784                        ADD reg,r
1785                        SAR reg,1
1786                      */
1787                     regm_t retregs = allregs;
1788                     codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1789                     const reg = findreg(retregs);
1790                     freenode(e2);
1791                     getregs(cdb,retregs);
1792 
1793                     reg_t r = allocScratchReg(cdb, allregs & ~retregs);
1794                     genmovreg(cdb,r,reg);                        // MOV r,reg
1795                     cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31
1796                     cdb.gen2(0x03,grex | modregxrmx(3,reg,r));   // ADD reg,r
1797                     cdb.gen2(0xD1,grex | modregrmx(3,7,reg));    // SAR reg,1
1798                     regm_t resreg = retregs;
1799                     fixresult(cdb,e,resreg,pretregs);
1800                     return;
1801                 }
1802 
1803                 regm_t resreg;
1804                 switch (oper)
1805                 {
1806                     case OPdiv:
1807                         resreg = mAX;
1808                         break;
1809 
1810                     case OPmod:
1811                         resreg = mDX;
1812                         break;
1813 
1814                     case OPremquo:
1815                         resreg = mDX | mAX;
1816                         break;
1817 
1818                     default:
1819                         assert(0);
1820                 }
1821 
1822                 regm_t retregs = mAX;
1823                 codelem(cdb,e.EV.E1,&retregs,false);  // eval left leaf
1824                 freenode(e2);
1825                 getregs(cdb,mAX | mDX);             // modify these regs
1826                 cdb.gen1(0x99);                             // CWD
1827                 code_orrex(cdb.last(), rex);
1828                 if (pow2 == 1)
1829                 {
1830                     if (oper == OPdiv)
1831                     {
1832                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));  // SUB AX,DX
1833                         cdb.gen2(0xD1,grex | modregrm(3,7,AX));   // SAR AX,1
1834                     }
1835                     else // OPmod
1836                     {
1837                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));   // XOR AX,DX
1838                         cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1
1839                         cdb.gen2(0x03,grex | modregrm(3,DX,AX));   // ADD DX,AX
1840                     }
1841                 }
1842                 else
1843                 {   targ_ulong m;
1844 
1845                     m = (1 << pow2) - 1;
1846                     if (oper == OPdiv)
1847                     {
1848                         cdb.genc2(0x81,grex | modregrm(3,4,DX),m);  // AND DX,m
1849                         cdb.gen2(0x03,grex | modregrm(3,AX,DX));    // ADD AX,DX
1850                         // Be careful not to generate this for 8088
1851                         assert(config.target_cpu >= TARGET_80286);
1852                         cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2
1853                     }
1854                     else // OPmod
1855                     {
1856                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1857                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1858                         cdb.genc2(0x81,grex | modregrm(3,4,AX),m);  // AND AX,mask
1859                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));    // XOR AX,DX
1860                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));    // SUB AX,DX
1861                         resreg = mAX;
1862                     }
1863                 }
1864                 fixresult(cdb,e,resreg,pretregs);
1865                 return;
1866             }
1867             goto default;
1868 
1869         case OPind:
1870             if (!e2.Ecount)                        // if not CSE
1871                     goto case OPvar;                        // try OP reg,EA
1872             goto default;
1873 
1874         default:                                    // OPconst and operators
1875             //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg));
1876             regm_t retregs = sz <= REGSIZE ? mAX : mDX | mAX;
1877             codelem(cdb,e1,&retregs,false);           // eval left leaf
1878             regm_t rretregs;
1879             if (sz <= REGSIZE)                  // dedicated regs for div
1880             {
1881                 // pick some other regs
1882                 rretregs = isbyte ? BYTEREGS & ~mAX
1883                                 : ALLREGS & ~(mAX|mDX);
1884             }
1885             else
1886             {
1887                 assert(sz <= 2 * REGSIZE);
1888                 rretregs = mCX | mBX;           // second arg
1889             }
1890             scodelem(cdb,e2,&rretregs,retregs,true);  // get rvalue
1891             if (sz <= REGSIZE)
1892             {
1893                 getregs(cdb,mAX | mDX);     // trash these regs
1894                 if (uns)                        // unsigned divide
1895                 {
1896                     movregconst(cdb,DX,0,(sz == 8) ? 64 : 0);  // MOV DX,0
1897                     getregs(cdb,mDX);
1898                 }
1899                 else
1900                 {
1901                     cdb.gen1(0x99);                 // CWD
1902                     code_orrex(cdb.last(),rex);
1903                 }
1904                 reg_t rreg = findreg(rretregs);
1905                 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,7 - uns,rreg)); // OP AX,rreg
1906                 if (I64 && isbyte && rreg >= 4)
1907                     code_orrex(cdb.last(), REX);
1908                 regm_t resreg;
1909                 switch (oper)
1910                 {
1911                     case OPdiv:
1912                         resreg = mAX;
1913                         break;
1914 
1915                     case OPmod:
1916                         resreg = mDX;
1917                         break;
1918 
1919                     case OPremquo:
1920                         resreg = mDX | mAX;
1921                         break;
1922 
1923                     default:
1924                         assert(0);
1925                 }
1926                 fixresult(cdb,e,resreg,pretregs);
1927             }
1928             else if (sz == 2 * REGSIZE)
1929             {
1930                 uint lib;
1931                 switch (oper)
1932                 {
1933                     case OPdiv:
1934                     case OPremquo:
1935                         lib = uns ? CLIB.uldiv : CLIB.ldiv;
1936                         break;
1937 
1938                     case OPmod:
1939                         lib = uns ? CLIB.ulmod : CLIB.lmod;
1940                         break;
1941 
1942                     default:
1943                         assert(0);
1944                 }
1945 
1946                 regm_t keepregs = I32 ? mSI | mDI : 0;
1947                 callclib(cdb,e,lib,pretregs,keepregs);
1948             }
1949             else
1950                     assert(0);
1951             return;
1952 
1953         case OPvar:
1954             if (I16 || sz == 2 * REGSIZE)
1955                 goto default;            // have to handle it with codelem()
1956 
1957             // loadea() handles CWD or CLR DX for divides
1958             regm_t retregs = mAX;
1959             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
1960             loadea(cdb,e2,&cs,0xF7 ^ isbyte,7 - uns,0,
1961                    mAX | mDX,
1962                    mAX | mDX);
1963             freenode(e2);
1964             regm_t resreg;
1965             switch (oper)
1966             {
1967                 case OPdiv:
1968                     resreg = mAX;
1969                     break;
1970 
1971                 case OPmod:
1972                     resreg = mDX;
1973                     break;
1974 
1975                 case OPremquo:
1976                     resreg = mDX | mAX;
1977                     break;
1978 
1979                 default:
1980                     assert(0);
1981             }
1982             fixresult(cdb,e,resreg,pretregs);
1983             return;
1984     }
1985     assert(0);
1986 }
1987 
1988 
1989 /***************************
1990  * Handle OPnot and OPbool.
1991  * Generate:
1992  *      c:      [evaluate e1]
1993  *      cfalse: [save reg code]
1994  *              clr     reg
1995  *              jmp     cnop
1996  *      ctrue:  [save reg code]
1997  *              clr     reg
1998  *              inc     reg
1999  *      cnop:   nop
2000  */
2001 
2002 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2003 {
2004     //printf("cdnot()\n");
2005     reg_t reg;
2006     tym_t forflags;
2007     regm_t retregs;
2008     elem *e1 = e.EV.E1;
2009 
2010     if (*pretregs == 0)
2011         goto L1;
2012     if (*pretregs == mPSW)
2013     {   //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized
2014     L1:
2015         codelem(cdb,e1,pretregs,false);      // evaluate e1 for cc
2016         return;
2017     }
2018 
2019     OPER op = e.Eoper;
2020     uint sz = tysize(e1.Ety);
2021     uint rex = (I64 && sz == 8) ? REX_W : 0;
2022     uint grex = rex << 16;
2023 
2024     if (!tyfloating(e1.Ety))
2025     {
2026     if (sz <= REGSIZE && e1.Eoper == OPvar)
2027     {   code cs;
2028 
2029         getlvalue(cdb,&cs,e1,0);
2030         freenode(e1);
2031         if (!I16 && sz == 2)
2032             cs.Iflags |= CFopsize;
2033 
2034         retregs = *pretregs & (ALLREGS | mBP);
2035         if (config.target_cpu >= TARGET_80486 &&
2036             tysize(e.Ety) == 1)
2037         {
2038             if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,&reg))
2039             {
2040                 cs.Iop = 0x39;
2041                 if (I64 && (sz == 1) && reg >= 4)
2042                     cs.Irex |= REX;
2043             }
2044             else
2045             {   cs.Iop = 0x81;
2046                 reg = 7;
2047                 cs.IFL2 = FLconst;
2048                 cs.IEV2.Vint = 0;
2049             }
2050             cs.Iop ^= (sz == 1);
2051             code_newreg(&cs,reg);
2052             cdb.gen(&cs);                             // CMP e1,0
2053 
2054             retregs &= BYTEREGS;
2055             if (!retregs)
2056                 retregs = BYTEREGS;
2057             allocreg(cdb,&retregs,&reg,TYint);
2058 
2059             const opcode_t iop = (op == OPbool)
2060                 ? 0x0F95    // SETNZ rm8
2061                 : 0x0F94;   // SETZ rm8
2062             cdb.gen2(iop, modregrmx(3,0,reg));
2063             if (reg >= 4)
2064                 code_orrex(cdb.last(), REX);
2065             if (op == OPbool)
2066                 *pretregs &= ~mPSW;
2067             goto L4;
2068         }
2069 
2070         if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,&reg))
2071             cs.Iop = 0x39;
2072         else
2073         {   cs.Iop = 0x81;
2074             reg = 7;
2075             cs.IFL2 = FLconst;
2076             cs.IEV2.Vint = 1;
2077         }
2078         if (I64 && (sz == 1) && reg >= 4)
2079             cs.Irex |= REX;
2080         cs.Iop ^= (sz == 1);
2081         code_newreg(&cs,reg);
2082         cdb.gen(&cs);                         // CMP e1,1
2083 
2084         allocreg(cdb,&retregs,&reg,TYint);
2085         op ^= (OPbool ^ OPnot);                 // switch operators
2086         goto L2;
2087     }
2088     else if (config.target_cpu >= TARGET_80486 &&
2089         tysize(e.Ety) == 1)
2090     {
2091         int jop = jmpopcode(e.EV.E1);
2092         retregs = mPSW;
2093         codelem(cdb,e.EV.E1,&retregs,false);
2094         retregs = *pretregs & BYTEREGS;
2095         if (!retregs)
2096             retregs = BYTEREGS;
2097         allocreg(cdb,&retregs,&reg,TYint);
2098 
2099         int iop = 0x0F90 | (jop & 0x0F);        // SETcc rm8
2100         if (op == OPnot)
2101             iop ^= 1;
2102         cdb.gen2(iop,grex | modregrmx(3,0,reg));
2103         if (reg >= 4)
2104             code_orrex(cdb.last(), REX);
2105         if (op == OPbool)
2106             *pretregs &= ~mPSW;
2107         goto L4;
2108     }
2109     else if (sz <= REGSIZE &&
2110         // NEG bytereg is too expensive
2111         (sz != 1 || config.target_cpu < TARGET_PentiumPro))
2112     {
2113         retregs = *pretregs & (ALLREGS | mBP);
2114         if (sz == 1 && !(retregs &= BYTEREGS))
2115             retregs = BYTEREGS;
2116         codelem(cdb,e.EV.E1,&retregs,false);
2117         reg = findreg(retregs);
2118         getregs(cdb,retregs);
2119         cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg));   // NEG reg
2120         code_orflag(cdb.last(),CFpsw);
2121         if (!I16 && sz == SHORTSIZE)
2122             code_orflag(cdb.last(),CFopsize);
2123     L2:
2124         genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
2125         code_orrex(cdb.last(), rex);
2126         // At this point, reg==0 if e1==0, reg==-1 if e1!=0
2127         if (op == OPnot)
2128         {
2129             if (I64)
2130                 cdb.gen2(0xFF,grex | modregrmx(3,0,reg));    // INC reg
2131             else
2132                 cdb.gen1(0x40 + reg);                        // INC reg
2133         }
2134         else
2135             cdb.gen2(0xF7,grex | modregrmx(3,3,reg));    // NEG reg
2136         if (*pretregs & mPSW)
2137         {   code_orflag(cdb.last(),CFpsw);
2138             *pretregs &= ~mPSW;         // flags are always set anyway
2139         }
2140     L4:
2141         fixresult(cdb,e,retregs,pretregs);
2142         return;
2143     }
2144     }
2145     code *cnop = gennop(null);
2146     code *ctrue = gennop(null);
2147     logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue);
2148     forflags = *pretregs & mPSW;
2149     if (I64 && sz == 8)
2150         forflags |= 64;
2151     assert(tysize(e.Ety) <= REGSIZE);              // result better be int
2152     CodeBuilder cdbfalse;
2153     cdbfalse.ctor();
2154     allocreg(cdbfalse,pretregs,&reg,e.Ety);        // allocate reg for result
2155     code *cfalse = cdbfalse.finish();
2156     CodeBuilder cdbtrue;
2157     cdbtrue.ctor();
2158     cdbtrue.append(ctrue);
2159     for (code *c1 = cfalse; c1; c1 = code_next(c1))
2160         cdbtrue.gen(c1);                                      // duplicate reg save code
2161     CodeBuilder cdbfalse2;
2162     cdbfalse2.ctor();
2163     movregconst(cdbfalse2,reg,0,forflags);                    // mov 0 into reg
2164     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
2165     movregconst(cdbtrue,reg,1,forflags);                      // mov 1 into reg
2166     regcon.immed.mval &= ~mask(reg);                          // mark reg as unavail
2167     genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop);          // skip over ctrue
2168     cdb.append(cfalse);
2169     cdb.append(cdbfalse2);
2170     cdb.append(cdbtrue);
2171     cdb.append(cnop);
2172 }
2173 
2174 
2175 /************************
2176  * Complement operator
2177  */
2178 
2179 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2180 {
2181     if (*pretregs == 0)
2182     {
2183         codelem(cdb,e.EV.E1,pretregs,false);
2184         return;
2185     }
2186     tym_t tym = tybasic(e.Ety);
2187     int sz = _tysize[tym];
2188     uint rex = (I64 && sz == 8) ? REX_W : 0;
2189     regm_t possregs = (sz == 1) ? BYTEREGS : allregs;
2190     regm_t retregs = *pretregs & possregs;
2191     if (retregs == 0)
2192         retregs = possregs;
2193     codelem(cdb,e.EV.E1,&retregs,false);
2194     getregs(cdb,retregs);                // retregs will be destroyed
2195 
2196     if (0 && sz == 4 * REGSIZE)
2197     {
2198         cdb.gen2(0xF7,modregrm(3,2,AX));   // NOT AX
2199         cdb.gen2(0xF7,modregrm(3,2,BX));   // NOT BX
2200         cdb.gen2(0xF7,modregrm(3,2,CX));   // NOT CX
2201         cdb.gen2(0xF7,modregrm(3,2,DX));   // NOT DX
2202     }
2203     else
2204     {
2205         const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs);
2206         const op = (sz == 1) ? 0xF6 : 0xF7;
2207         genregs(cdb,op,2,reg);     // NOT reg
2208         code_orrex(cdb.last(), rex);
2209         if (I64 && sz == 1 && reg >= 4)
2210             code_orrex(cdb.last(), REX);
2211         if (sz == 2 * REGSIZE)
2212         {
2213             const reg2 = findreglsw(retregs);
2214             genregs(cdb,op,2,reg2);  // NOT reg+1
2215         }
2216     }
2217     fixresult(cdb,e,retregs,pretregs);
2218 }
2219 
2220 /************************
2221  * Bswap operator
2222  */
2223 
2224 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2225 {
2226     if (*pretregs == 0)
2227     {
2228         codelem(cdb,e.EV.E1,pretregs,false);
2229         return;
2230     }
2231 
2232     const tym = tybasic(e.Ety);
2233     const sz = _tysize[tym];
2234     const posregs = (sz == 2) ? BYTEREGS : allregs;
2235     regm_t retregs = *pretregs & posregs;
2236     if (retregs == 0)
2237         retregs = posregs;
2238     codelem(cdb,e.EV.E1,&retregs,false);
2239     getregs(cdb,retregs);        // retregs will be destroyed
2240     if (sz == 2 * REGSIZE)
2241     {
2242         assert(sz != 16);                       // no cent support yet
2243         const msreg = findregmsw(retregs);
2244         cdb.gen1(0x0FC8 + (msreg & 7));         // BSWAP msreg
2245         const lsreg = findreglsw(retregs);
2246         cdb.gen1(0x0FC8 + (lsreg & 7));         // BSWAP lsreg
2247         cdb.gen2(0x87,modregrm(3,msreg,lsreg)); // XCHG msreg,lsreg
2248     }
2249     else
2250     {
2251         const reg = findreg(retregs);
2252         if (sz == 2)
2253         {
2254             genregs(cdb,0x86,reg+4,reg);    // XCHG regL,regH
2255         }
2256         else
2257         {
2258             assert(sz == 4 || sz == 8);
2259             cdb.gen1(0x0FC8 + (reg & 7));      // BSWAP reg
2260             ubyte rex = 0;
2261             if (sz == 8)
2262                 rex |= REX_W;
2263             if (reg & 8)
2264                 rex |= REX_B;
2265             if (rex)
2266                 code_orrex(cdb.last(), rex);
2267         }
2268     }
2269     fixresult(cdb,e,retregs,pretregs);
2270 }
2271 
2272 /*************************
2273  * ?: operator
2274  */
2275 
2276 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2277 {
2278     con_t regconold,regconsave;
2279     uint stackpushold,stackpushsave;
2280     int ehindexold,ehindexsave;
2281     uint sz2;
2282 
2283     /* vars to save state of 8087 */
2284     int stackusedold,stackusedsave;
2285     NDP[global87.stack.length] _8087old;
2286     NDP[global87.stack.length] _8087save;
2287 
2288     //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2289     elem *e1 = e.EV.E1;
2290     elem *e2 = e.EV.E2;
2291     elem *e21 = e2.EV.E1;
2292     elem *e22 = e2.EV.E2;
2293     regm_t psw = *pretregs & mPSW;               /* save PSW bit                 */
2294     const op1 = e1.Eoper;
2295     uint sz1 = tysize(e1.Ety);
2296     uint jop = jmpopcode(e1);
2297 
2298     uint jop1 = jmpopcode(e21);
2299     uint jop2 = jmpopcode(e22);
2300 
2301     docommas(cdb,&e1);
2302     cgstate.stackclean++;
2303 
2304     if (!OTrel(op1) && e1 == e21 &&
2305         sz1 <= REGSIZE && !tyfloating(e1.Ety))
2306     {   // Recognize (e ? e : f)
2307 
2308         code *cnop1 = gennop(null);
2309         regm_t retregs = *pretregs | mPSW;
2310         codelem(cdb,e1,&retregs,false);
2311 
2312         cse_flush(cdb,1);                // flush CSEs to memory
2313         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2314         freenode(e21);
2315 
2316         regconsave = regcon;
2317         stackpushsave = stackpush;
2318 
2319         retregs |= psw;
2320         if (retregs & (mBP | ALLREGS))
2321             regimmed_set(findreg(retregs),0);
2322         codelem(cdb,e22,&retregs,false);
2323 
2324         andregcon(&regconsave);
2325         assert(stackpushsave == stackpush);
2326 
2327         *pretregs = retregs;
2328         freenode(e2);
2329         cdb.append(cnop1);
2330         cgstate.stackclean--;
2331         return;
2332     }
2333 
2334     if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE &&
2335         !e1.Ecount &&
2336         (jop == JC || jop == JNC) &&
2337         (sz2 = tysize(e2.Ety)) <= REGSIZE &&
2338         e21.Eoper == OPconst &&
2339         e22.Eoper == OPconst
2340        )
2341     {
2342         uint sz = tysize(e.Ety);
2343         uint rex = (I64 && sz == 8) ? REX_W : 0;
2344         uint grex = rex << 16;
2345 
2346         regm_t retregs;
2347         targ_size_t v1,v2;
2348 
2349         if (sz2 != 1 || I64)
2350         {
2351             retregs = *pretregs & (ALLREGS | mBP);
2352             if (!retregs)
2353                 retregs = ALLREGS;
2354         }
2355         else
2356         {
2357             retregs = *pretregs & BYTEREGS;
2358             if (!retregs)
2359                 retregs = BYTEREGS;
2360         }
2361 
2362         cdcmp_flag = 1 | rex;
2363         v1 = cast(targ_size_t)e21.EV.Vllong;
2364         v2 = cast(targ_size_t)e22.EV.Vllong;
2365         if (jop == JNC)
2366         {   v1 = v2;
2367             v2 = cast(targ_size_t)e21.EV.Vllong;
2368         }
2369 
2370         opcode_t opcode = 0x81;
2371         switch (sz2)
2372         {   case 1:     opcode--;
2373                         v1 = cast(byte) v1;
2374                         v2 = cast(byte) v2;
2375                         break;
2376 
2377             case 2:     v1 = cast(short) v1;
2378                         v2 = cast(short) v2;
2379                         break;
2380 
2381             case 4:     v1 = cast(int) v1;
2382                         v2 = cast(int) v2;
2383                         break;
2384             default:
2385                         break;
2386         }
2387 
2388         if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1)
2389         {
2390             // only zero-extension from 32-bits is available for 'or'
2391         }
2392         else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2)
2393         {
2394             // only sign-extension from 32-bits is available for 'and'
2395         }
2396         else
2397         {
2398             codelem(cdb,e1,&retregs,false);
2399             const reg = findreg(retregs);
2400 
2401             if (v1 == 0 && v2 == ~cast(targ_size_t)0)
2402             {
2403                 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg));  // NOT reg
2404                 if (I64 && sz2 == REGSIZE)
2405                     code_orrex(cdb.last(), REX_W);
2406             }
2407             else
2408             {
2409                 v1 -= v2;
2410                 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1);   // AND reg,v1-v2
2411                 if (I64 && sz2 == 1 && reg >= 4)
2412                     code_orrex(cdb.last(), REX);
2413                 if (v2 == 1 && !I64)
2414                     cdb.gen1(0x40 + reg);                     // INC reg
2415                 else if (v2 == -1L && !I64)
2416                     cdb.gen1(0x48 + reg);                     // DEC reg
2417                 else
2418                 {   cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2);   // ADD reg,v2
2419                     if (I64 && sz2 == 1 && reg >= 4)
2420                         code_orrex(cdb.last(), REX);
2421                 }
2422             }
2423 
2424             freenode(e21);
2425             freenode(e22);
2426             freenode(e2);
2427 
2428             fixresult(cdb,e,retregs,pretregs);
2429             cgstate.stackclean--;
2430             return;
2431         }
2432     }
2433 
2434     if (op1 != OPcond && op1 != OPandand && op1 != OPoror &&
2435         op1 != OPnot && op1 != OPbool &&
2436         e21.Eoper == OPconst &&
2437         sz1 <= REGSIZE &&
2438         *pretregs & (mBP | ALLREGS) &&
2439         tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety))
2440     {   // Recognize (e ? c : f)
2441 
2442         code *cnop1 = gennop(null);
2443         regm_t retregs = mPSW;
2444         jop = jmpopcode(e1);            // get jmp condition
2445         codelem(cdb,e1,&retregs,false);
2446 
2447         // Set the register with e21 without affecting the flags
2448         retregs = *pretregs & (ALLREGS | mBP);
2449         if (retregs & ~regcon.mvar)
2450             retregs &= ~regcon.mvar;    // don't disturb register variables
2451         // NOTE: see my email (sign extension bug? possible fix, some questions
2452         reg_t reg;
2453         regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,&reg,tysize(e21.Ety) == 8 ? 64|8 : 8);
2454         retregs = mask(reg);
2455 
2456         cse_flush(cdb,1);                // flush CSE's to memory
2457         genjmp(cdb,jop,FLcode,cast(block *)cnop1);
2458         freenode(e21);
2459 
2460         regconsave = regcon;
2461         stackpushsave = stackpush;
2462 
2463         codelem(cdb,e22,&retregs,false);
2464 
2465         andregcon(&regconsave);
2466         assert(stackpushsave == stackpush);
2467 
2468         freenode(e2);
2469         cdb.append(cnop1);
2470         fixresult(cdb,e,retregs,pretregs);
2471         cgstate.stackclean--;
2472         return;
2473     }
2474 
2475     code *cnop1 = gennop(null);
2476     code *cnop2 = gennop(null);         // dummy target addresses
2477     logexp(cdb,e1,false,FLcode,cnop1);  // evaluate condition
2478     regconold = regcon;
2479     stackusedold = global87.stackused;
2480     stackpushold = stackpush;
2481     memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof);
2482     regm_t retregs = *pretregs;
2483     CodeBuilder cdb1;
2484     cdb1.ctor();
2485     if (psw && jop1 != JNE)
2486     {
2487         retregs &= ~mPSW;
2488         if (!retregs)
2489             retregs = ALLREGS;
2490         codelem(cdb1,e21,&retregs,false);
2491         fixresult(cdb1,e21,retregs,pretregs);
2492     }
2493     else
2494         codelem(cdb1,e21,&retregs,false);
2495 
2496     if (CPP && e2.Eoper == OPcolon2)
2497     {
2498         code cs;
2499 
2500         // This is necessary so that any cleanup code on one branch
2501         // is redone on the other branch.
2502         cs.Iop = ESCAPE | ESCmark2;
2503         cs.Iflags = 0;
2504         cs.Irex = 0;
2505         cdb.gen(&cs);
2506         cdb.append(cdb1);
2507         cs.Iop = ESCAPE | ESCrelease2;
2508         cdb.gen(&cs);
2509     }
2510     else
2511         cdb.append(cdb1);
2512 
2513     regconsave = regcon;
2514     regcon = regconold;
2515 
2516     stackpushsave = stackpush;
2517     stackpush = stackpushold;
2518 
2519     stackusedsave = global87.stackused;
2520     global87.stackused = stackusedold;
2521 
2522     memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof);
2523     memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof);
2524 
2525     retregs |= psw;                     // PSW bit may have been trashed
2526     CodeBuilder cdb2;
2527     cdb2.ctor();
2528     if (psw && jop2 != JNE)
2529     {
2530         retregs &= ~mPSW;
2531         if (!retregs)
2532             retregs = ALLREGS;
2533         codelem(cdb2,e22,&retregs,false);
2534         fixresult(cdb2,e22,retregs,pretregs);
2535     }
2536     else
2537         codelem(cdb2,e22,&retregs,false);   // use same regs as E1
2538     *pretregs = retregs | psw;
2539     andregcon(&regconold);
2540     andregcon(&regconsave);
2541     assert(global87.stackused == stackusedsave);
2542     assert(stackpush == stackpushsave);
2543     memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof);
2544     freenode(e2);
2545     genjmp(cdb,JMP,FLcode,cast(block *) cnop2);
2546     cdb.append(cnop1);
2547     cdb.append(cdb2);
2548     cdb.append(cnop2);
2549     if (*pretregs & mST0)
2550         note87(e,0,0);
2551 
2552     cgstate.stackclean--;
2553 }
2554 
2555 /*********************
2556  * Comma operator OPcomma
2557  */
2558 
2559 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2560 {
2561     regm_t retregs = 0;
2562     codelem(cdb,e.EV.E1,&retregs,false);   // ignore value from left leaf
2563     codelem(cdb,e.EV.E2,pretregs,false);   // do right leaf
2564 }
2565 
2566 
2567 /*********************************
2568  * Do && and || operators.
2569  * Generate:
2570  *              (evaluate e1 and e2, if true goto cnop1)
2571  *      cnop3:  NOP
2572  *      cg:     [save reg code]         ;if we must preserve reg
2573  *              CLR     reg             ;false result (set Z also)
2574  *              JMP     cnop2
2575  *
2576  *      cnop1:  NOP                     ;if e1 evaluates to true
2577  *              [save reg code]         ;preserve reg
2578  *
2579  *              MOV     reg,1           ;true result
2580  *                  or
2581  *              CLR     reg             ;if return result in flags
2582  *              INC     reg
2583  *
2584  *      cnop2:  NOP                     ;mark end of code
2585  */
2586 
2587 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2588 {
2589     /* We can trip the assert with the following:
2590      *    if ( (b<=a) ? (c<b || a<=c) : c>=a )
2591      * We'll generate ugly code for it, but it's too obscure a case
2592      * to expend much effort on it.
2593      * assert(*pretregs != mPSW);
2594      */
2595 
2596     cgstate.stackclean++;
2597     code *cnop1 = gennop(null);
2598     CodeBuilder cdb1;
2599     cdb1.ctor();
2600     cdb1.append(cnop1);
2601     code *cnop3 = gennop(null);
2602     elem *e2 = e.EV.E2;
2603     (e.Eoper == OPoror)
2604         ? logexp(cdb,e.EV.E1,1,FLcode,cnop1)
2605         : logexp(cdb,e.EV.E1,0,FLcode,cnop3);
2606     con_t regconsave = regcon;
2607     uint stackpushsave = stackpush;
2608     if (*pretregs == 0)                 // if don't want result
2609     {
2610         int noreturn = !el_returns(e2);
2611         codelem(cdb,e2,pretregs,false);
2612         if (noreturn)
2613         {
2614             regconsave.used |= regcon.used;
2615             regcon = regconsave;
2616         }
2617         else
2618             andregcon(&regconsave);
2619         assert(stackpush == stackpushsave);
2620         cdb.append(cnop3);
2621         cdb.append(cdb1);        // eval code, throw away result
2622         cgstate.stackclean--;
2623         return;
2624     }
2625     code *cnop2 = gennop(null);
2626     uint sz = tysize(e.Ety);
2627     if (tybasic(e2.Ety) == TYbool &&
2628       sz == tysize(e2.Ety) &&
2629       !(*pretregs & mPSW) &&
2630       e2.Eoper == OPcall)
2631     {
2632         codelem(cdb,e2,pretregs,false);
2633 
2634         andregcon(&regconsave);
2635 
2636         // stack depth should not change when evaluating E2
2637         assert(stackpush == stackpushsave);
2638 
2639         assert(sz <= 4);                                        // result better be int
2640         regm_t retregs = *pretregs & allregs;
2641         reg_t reg;
2642         allocreg(cdb1,&retregs,&reg,TYint);                     // allocate reg for result
2643         movregconst(cdb1,reg,e.Eoper == OPoror,0);             // reg = 1
2644         regcon.immed.mval &= ~mask(reg);                        // mark reg as unavail
2645         *pretregs = retregs;
2646         if (e.Eoper == OPoror)
2647         {
2648             cdb.append(cnop3);
2649             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2650             cdb.append(cdb1);
2651             cdb.append(cnop2);
2652         }
2653         else
2654         {
2655             genjmp(cdb,JMP,FLcode,cast(block *) cnop2);    // JMP cnop2
2656             cdb.append(cnop3);
2657             cdb.append(cdb1);
2658             cdb.append(cnop2);
2659         }
2660         cgstate.stackclean--;
2661         return;
2662     }
2663     logexp(cdb,e2,1,FLcode,cnop1);
2664     andregcon(&regconsave);
2665 
2666     // stack depth should not change when evaluating E2
2667     assert(stackpush == stackpushsave);
2668 
2669     assert(sz <= 4);                                         // result better be int
2670     regm_t retregs = *pretregs & (ALLREGS | mBP);
2671     if (!retregs)
2672         retregs = ALLREGS;                                   // if mPSW only
2673     CodeBuilder cdbcg;
2674     cdbcg.ctor();
2675     reg_t reg;
2676     allocreg(cdbcg,&retregs,&reg,TYint);                     // allocate reg for result
2677     code *cg = cdbcg.finish();
2678     for (code *c1 = cg; c1; c1 = code_next(c1))              // for each instruction
2679         cdb1.gen(c1);                                        // duplicate it
2680     CodeBuilder cdbcg2;
2681     cdbcg2.ctor();
2682     movregconst(cdbcg2,reg,0,*pretregs & mPSW);              // MOV reg,0
2683     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2684     genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2);              // JMP cnop2
2685     movregconst(cdb1,reg,1,*pretregs & mPSW);                // reg = 1
2686     regcon.immed.mval &= ~mask(reg);                         // mark reg as unavail
2687     *pretregs = retregs;
2688     cdb.append(cnop3);
2689     cdb.append(cg);
2690     cdb.append(cdbcg2);
2691     cdb.append(cdb1);
2692     cdb.append(cnop2);
2693     cgstate.stackclean--;
2694     return;
2695 }
2696 
2697 
2698 /*********************
2699  * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror).
2700  */
2701 
2702 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2703 {
2704     reg_t resreg;
2705     uint shiftcnt;
2706     regm_t retregs,rretregs;
2707 
2708     //printf("cdshift()\n");
2709     elem *e1 = e.EV.E1;
2710     if (*pretregs == 0)                   // if don't want result
2711     {
2712         codelem(cdb,e1,pretregs,false); // eval left leaf
2713         *pretregs = 0;                  // in case they got set
2714         codelem(cdb,e.EV.E2,pretregs,false);
2715         return;
2716     }
2717 
2718     tym_t tyml = tybasic(e1.Ety);
2719     int sz = _tysize[tyml];
2720     assert(!tyfloating(tyml));
2721     OPER oper = e.Eoper;
2722     uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16;
2723 
2724 version (SCPP)
2725 {
2726     // Do this until the rest of the compiler does OPshr/OPashr correctly
2727     if (oper == OPshr)
2728         oper = (tyuns(tyml)) ? OPshr : OPashr;
2729 }
2730 
2731     uint s1,s2;
2732     switch (oper)
2733     {
2734         case OPshl:
2735             s1 = 4;                     // SHL
2736             s2 = 2;                     // RCL
2737             break;
2738         case OPshr:
2739             s1 = 5;                     // SHR
2740             s2 = 3;                     // RCR
2741             break;
2742         case OPashr:
2743             s1 = 7;                     // SAR
2744             s2 = 3;                     // RCR
2745             break;
2746         case OProl:
2747             s1 = 0;                     // ROL
2748             break;
2749         case OPror:
2750             s1 = 1;                     // ROR
2751             break;
2752         default:
2753             assert(0);
2754     }
2755 
2756     reg_t sreg = NOREG;                   // guard against using value without assigning to sreg
2757     elem *e2 = e.EV.E2;
2758     regm_t forccs = *pretregs & mPSW;            // if return result in CCs
2759     regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs
2760     bool e2isconst = false;                    // assume for the moment
2761     uint isbyte = (sz == 1);
2762     switch (e2.Eoper)
2763     {
2764         case OPconst:
2765             e2isconst = true;               // e2 is a constant
2766             shiftcnt = e2.EV.Vint;         // get shift count
2767             if ((!I16 && sz <= REGSIZE) ||
2768                 shiftcnt <= 4 ||            // if sequence of shifts
2769                 (sz == 2 &&
2770                     (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) ||
2771                 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2772                )
2773             {
2774                 retregs = (forregs) ? forregs
2775                                     : ALLREGS;
2776                 if (isbyte)
2777                 {   retregs &= BYTEREGS;
2778                     if (!retregs)
2779                         retregs = BYTEREGS;
2780                 }
2781                 else if (sz > REGSIZE && sz <= 2 * REGSIZE &&
2782                          !(retregs & mMSW))
2783                     retregs |= mMSW & ALLREGS;
2784                 if (s1 == 7)    // if arithmetic right shift
2785                 {
2786                     if (shiftcnt == 8)
2787                         retregs = mAX;
2788                     else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE)
2789                         retregs = mDX|mAX;
2790                 }
2791 
2792                 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE &&
2793                     oper == OPshl &&
2794                     !e1.Ecount &&
2795                     (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 ||
2796                      e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64)
2797                    )
2798                 {   // Handle (shtlng)s << 16
2799                     regm_t r = retregs & mMSW;
2800                     codelem(cdb,e1.EV.E1,&r,false);      // eval left leaf
2801                     regwithvalue(cdb,retregs & mLSW,0,&resreg,0);
2802                     getregs(cdb,r);
2803                     retregs = r | mask(resreg);
2804                     if (forccs)
2805                     {   sreg = findreg(r);
2806                         gentstreg(cdb,sreg);
2807                         *pretregs &= ~mPSW;             // already set
2808                     }
2809                     freenode(e1);
2810                     freenode(e2);
2811                     break;
2812                 }
2813 
2814                 // See if we should use LEA reg,xxx instead of shift
2815                 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 &&
2816                     (sz == REGSIZE || (I64 && sz == 4)) &&
2817                     oper == OPshl &&
2818                     e1.Eoper == OPvar &&
2819                     !(*pretregs & mPSW) &&
2820                     config.flags4 & CFG4speed
2821                    )
2822                 {
2823                     reg_t reg;
2824                     regm_t regm;
2825 
2826                     if (isregvar(e1,&regm,&reg) && !(regm & retregs))
2827                     {   code cs;
2828                         allocreg(cdb,&retregs,&resreg,e.Ety);
2829                         buildEA(&cs,-1,reg,1 << shiftcnt,0);
2830                         cs.Iop = LEA;
2831                         code_newreg(&cs,resreg);
2832                         cs.Iflags = 0;
2833                         if (I64 && sz == 8)
2834                             cs.Irex |= REX_W;
2835                         cdb.gen(&cs);             // LEA resreg,[reg * ss]
2836                         freenode(e1);
2837                         freenode(e2);
2838                         break;
2839                     }
2840                 }
2841 
2842                 codelem(cdb,e1,&retregs,false); // eval left leaf
2843                 //assert((retregs & regcon.mvar) == 0);
2844                 getregs(cdb,retregs);          // modify these regs
2845 
2846                 {
2847                     if (sz == 2 * REGSIZE)
2848                     {   resreg = findregmsw(retregs);
2849                         sreg = findreglsw(retregs);
2850                     }
2851                     else
2852                     {   resreg = findreg(retregs);
2853                         sreg = NOREG;              // an invalid value
2854                     }
2855                     if (config.target_cpu >= TARGET_80286 &&
2856                         sz <= REGSIZE)
2857                     {
2858                         // SHL resreg,shiftcnt
2859                         assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS)));
2860                         cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt);
2861                         if (shiftcnt == 1)
2862                             cdb.last().Iop += 0x10;     // short form of shift
2863                         if (I64 && sz == 1 && resreg >= 4)
2864                             cdb.last().Irex |= REX;
2865                         // See if we need operand size prefix
2866                         if (!I16 && oper != OPshl && sz == 2)
2867                             cdb.last().Iflags |= CFopsize;
2868                         if (forccs)
2869                             cdb.last().Iflags |= CFpsw;         // need flags result
2870                     }
2871                     else if (shiftcnt == 8)
2872                     {   if (!(retregs & BYTEREGS) || resreg >= 4)
2873                         {
2874                             goto L1;
2875                         }
2876 
2877                         if (pass != PASSfinal && (!forregs || forregs & (mSI | mDI)))
2878                         {
2879                             // e1 might get into SI or DI in a later pass,
2880                             // so don't put CX into a register
2881                             getregs(cdb,mCX);
2882                         }
2883 
2884                         assert(sz == 2);
2885                         switch (oper)
2886                         {
2887                             case OPshl:
2888                                 // MOV regH,regL        XOR regL,regL
2889                                 assert(resreg < 4 && !grex);
2890                                 genregs(cdb,0x8A,resreg+4,resreg);
2891                                 genregs(cdb,0x32,resreg,resreg);
2892                                 break;
2893 
2894                             case OPshr:
2895                             case OPashr:
2896                                 // MOV regL,regH
2897                                 genregs(cdb,0x8A,resreg,resreg+4);
2898                                 if (oper == OPashr)
2899                                     cdb.gen1(0x98);           // CBW
2900                                 else
2901                                     genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH
2902                                 break;
2903 
2904                             case OPror:
2905                             case OProl:
2906                                 // XCHG regL,regH
2907                                 genregs(cdb,0x86,resreg+4,resreg);
2908                                 break;
2909 
2910                             default:
2911                                 assert(0);
2912                         }
2913                         if (forccs)
2914                             gentstreg(cdb,resreg);
2915                     }
2916                     else if (shiftcnt == REGSIZE * 8)   // it's an lword
2917                     {
2918                         if (oper == OPshl)
2919                             swap(&resreg, &sreg);
2920                         genmovreg(cdb,sreg,resreg);  // MOV sreg,resreg
2921                         if (oper == OPashr)
2922                             cdb.gen1(0x99);                       // CWD
2923                         else
2924                             movregconst(cdb,resreg,0,0);  // MOV resreg,0
2925                         if (forccs)
2926                         {
2927                             gentstreg(cdb,sreg);
2928                             *pretregs &= mBP | ALLREGS | mES;
2929                         }
2930                     }
2931                     else
2932                     {
2933                         if (oper == OPshl && sz == 2 * REGSIZE)
2934                             swap(&resreg, &sreg);
2935                         while (shiftcnt--)
2936                         {
2937                             cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg));
2938                             if (sz == 2 * REGSIZE)
2939                             {
2940                                 code_orflag(cdb.last(),CFpsw);
2941                                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
2942                             }
2943                         }
2944                         if (forccs)
2945                             code_orflag(cdb.last(),CFpsw);
2946                     }
2947                     if (sz <= REGSIZE)
2948                         *pretregs &= mBP | ALLREGS;     // flags already set
2949                 }
2950                 freenode(e2);
2951                 break;
2952             }
2953             goto default;
2954 
2955         default:
2956             retregs = forregs & ~mCX;               // CX will be shift count
2957             if (sz <= REGSIZE)
2958             {
2959                 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar))
2960                     retregs = ALLREGS & ~mCX;       // need something
2961                 else if (!retregs)
2962                     retregs = ALLREGS & ~mCX;       // need something
2963                 if (sz == 1)
2964                 {   retregs &= mAX|mBX|mDX;
2965                     if (!retregs)
2966                         retregs = mAX|mBX|mDX;
2967                 }
2968             }
2969             else
2970             {
2971                 if (!(retregs & mMSW))
2972                     retregs = ALLREGS & ~mCX;
2973             }
2974             codelem(cdb,e.EV.E1,&retregs,false);     // eval left leaf
2975 
2976             if (sz <= REGSIZE)
2977                 resreg = findreg(retregs);
2978             else
2979             {
2980                 resreg = findregmsw(retregs);
2981                 sreg = findreglsw(retregs);
2982             }
2983         L1:
2984             rretregs = mCX;                 // CX is shift count
2985             if (sz <= REGSIZE)
2986             {
2987                 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue
2988                 getregs(cdb,retregs);      // trash these regs
2989                 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX
2990 
2991                 if (!I16 && sz == 2 && (oper == OProl || oper == OPror))
2992                     cdb.last().Iflags |= CFopsize;
2993 
2994                 // Note that a shift by CL does not set the flags if
2995                 // CL == 0. If e2 is a constant, we know it isn't 0
2996                 // (it would have been optimized out).
2997                 if (e2isconst)
2998                     *pretregs &= mBP | ALLREGS; // flags already set with result
2999             }
3000             else if (sz == 2 * REGSIZE &&
3001                      config.target_cpu >= TARGET_80386)
3002             {
3003                 reg_t hreg = resreg;
3004                 reg_t lreg = sreg;
3005                 uint rex = I64 ? (REX_W << 16) : 0;
3006                 if (e2isconst)
3007                 {
3008                     getregs(cdb,retregs);
3009                     if (shiftcnt & (REGSIZE * 8))
3010                     {
3011                         if (oper == OPshr)
3012                         {   //      SHR hreg,shiftcnt
3013                             //      MOV lreg,hreg
3014                             //      XOR hreg,hreg
3015                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8));
3016                             genmovreg(cdb,lreg,hreg);
3017                             movregconst(cdb,hreg,0,0);
3018                         }
3019                         else if (oper == OPashr)
3020                         {   //      MOV     lreg,hreg
3021                             //      SAR     hreg,31
3022                             //      SHRD    lreg,hreg,shiftcnt
3023                             genmovreg(cdb,lreg,hreg);
3024                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1);
3025                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8));
3026                         }
3027                         else
3028                         {   //      SHL lreg,shiftcnt
3029                             //      MOV hreg,lreg
3030                             //      XOR lreg,lreg
3031                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8));
3032                             genmovreg(cdb,hreg,lreg);
3033                             movregconst(cdb,lreg,0,0);
3034                         }
3035                     }
3036                     else
3037                     {
3038                         if (oper == OPshr || oper == OPashr)
3039                         {   //      SHRD    lreg,hreg,shiftcnt
3040                             //      SHR/SAR hreg,shiftcnt
3041                             cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt);
3042                             cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt);
3043                         }
3044                         else
3045                         {   //      SHLD hreg,lreg,shiftcnt
3046                             //      SHL  lreg,shiftcnt
3047                             cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt);
3048                             cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt);
3049                         }
3050                     }
3051                     freenode(e2);
3052                 }
3053                 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2)
3054                 {
3055                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
3056                     getregs(cdb,retregs);          // modify these regs
3057                     if (oper == OPshl)
3058                     {
3059                         /*
3060                             SHLD    hreg,lreg,CL
3061                             SHL     lreg,CL
3062                          */
3063 
3064                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
3065                         cdb.gen2(0xD3,modregrm(3,4,lreg));
3066                     }
3067                     else
3068                     {
3069                         /*
3070                             SHRD    lreg,hreg,CL
3071                             SAR             hreg,CL
3072 
3073                             -- or --
3074 
3075                             SHRD    lreg,hreg,CL
3076                             SHR             hreg,CL
3077                          */
3078                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
3079                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
3080                     }
3081                 }
3082                 else
3083                 {   code* cl1,cl2;
3084 
3085                     scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX
3086                     getregs(cdb,retregs | mCX);     // modify these regs
3087                                                             // TEST CL,0x20
3088                     cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8);
3089                     cl1 = gennop(null);
3090                     CodeBuilder cdb1;
3091                     cdb1.ctor();
3092                     cdb1.append(cl1);
3093                     if (oper == OPshl)
3094                     {
3095                         /*  TEST    CL,20H
3096                             JNE     L1
3097                             SHLD    hreg,lreg,CL
3098                             SHL     lreg,CL
3099                             JMP     L2
3100                         L1: AND     CL,20H-1
3101                             SHL     lreg,CL
3102                             MOV     hreg,lreg
3103                             XOR     lreg,lreg
3104                         L2: NOP
3105                          */
3106 
3107                         if (REGSIZE == 2)
3108                             cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3109                         cdb1.gen2(0xD3,modregrm(3,4,lreg));
3110                         genmovreg(cdb1,hreg,lreg);
3111                         genregs(cdb1,0x31,lreg,lreg);
3112 
3113                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
3114                         cdb.gen2(0x0FA5,modregrm(3,lreg,hreg));
3115                         cdb.gen2(0xD3,modregrm(3,4,lreg));
3116                     }
3117                     else
3118                     {   if (oper == OPashr)
3119                         {
3120                             /*  TEST        CL,20H
3121                                 JNE         L1
3122                                 SHRD        lreg,hreg,CL
3123                                 SAR         hreg,CL
3124                                 JMP         L2
3125                             L1: AND         CL,15
3126                                 MOV         lreg,hreg
3127                                 SAR         hreg,31
3128                                 SHRD        lreg,hreg,CL
3129                             L2: NOP
3130                              */
3131 
3132                             if (REGSIZE == 2)
3133                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3134                             genmovreg(cdb1,lreg,hreg);
3135                             cdb1.genc2(0xC1,modregrm(3,s1,hreg),31);
3136                             cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg));
3137                         }
3138                         else
3139                         {
3140                             /*  TEST        CL,20H
3141                                 JNE         L1
3142                                 SHRD        lreg,hreg,CL
3143                                 SHR         hreg,CL
3144                                 JMP         L2
3145                             L1: AND         CL,15
3146                                 SHR         hreg,CL
3147                                 MOV         lreg,hreg
3148                                 XOR         hreg,hreg
3149                             L2: NOP
3150                              */
3151 
3152                             if (REGSIZE == 2)
3153                                 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1);
3154                             cdb1.gen2(0xD3,modregrm(3,5,hreg));
3155                             genmovreg(cdb1,lreg,hreg);
3156                             genregs(cdb1,0x31,hreg,hreg);
3157                         }
3158                         genjmp(cdb,JNE,FLcode,cast(block *)cl1);
3159                         cdb.gen2(0x0FAD,modregrm(3,hreg,lreg));
3160                         cdb.gen2(0xD3,modregrm(3,s1,hreg));
3161                     }
3162                     cl2 = gennop(null);
3163                     genjmp(cdb,JMPS,FLcode,cast(block *)cl2);
3164                     cdb.append(cdb1);
3165                     cdb.append(cl2);
3166                 }
3167                 break;
3168             }
3169             else if (sz == 2 * REGSIZE)
3170             {
3171                 scodelem(cdb,e2,&rretregs,retregs,false);
3172                 getregs(cdb,retregs | mCX);
3173                 if (oper == OPshl)
3174                     swap(&resreg, &sreg);
3175                 if (!e2isconst)                   // if not sure shift count != 0
3176                     cdb.genc2(0xE3,0,6);          // JCXZ .+6
3177                 cdb.gen2(0xD1,modregrm(3,s1,resreg));
3178                 code_orflag(cdb.last(),CFtarg2);
3179                 cdb.gen2(0xD1,modregrm(3,s2,sreg));
3180                 cdb.genc2(0xE2,0,cast(targ_uns)-6);          // LOOP .-6
3181                 regimmed_set(CX,0);         // note that now CX == 0
3182             }
3183             else
3184                 assert(0);
3185             break;
3186     }
3187     fixresult(cdb,e,retregs,pretregs);
3188 }
3189 
3190 
3191 /***************************
3192  * Perform a 'star' reference (indirection).
3193  */
3194 
3195 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3196 {
3197     regm_t retregs;
3198     reg_t reg;
3199     uint nreg;
3200 
3201     //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
3202     tym_t tym = tybasic(e.Ety);
3203     if (tyfloating(tym))
3204     {
3205         if (config.inline8087)
3206         {
3207             if (*pretregs & mST0)
3208             {
3209                 cdind87(cdb, e, pretregs);
3210                 return;
3211             }
3212             if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP))
3213             { }
3214             else if (tycomplex(tym))
3215             {
3216                 cload87(cdb, e, pretregs);
3217                 return;
3218             }
3219 
3220             if (*pretregs & mPSW)
3221             {
3222                 cdind87(cdb, e, pretregs);
3223                 return;
3224             }
3225         }
3226     }
3227 
3228     elem *e1 = e.EV.E1;
3229     assert(e1);
3230     switch (tym)
3231     {
3232         case TYstruct:
3233         case TYarray:
3234             // This case should never happen, why is it here?
3235             tym = TYnptr;               // don't confuse allocreg()
3236             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
3237                     tym = TYfptr;
3238             break;
3239 
3240         default:
3241             break;
3242     }
3243     uint sz = _tysize[tym];
3244     uint isbyte = tybyte(tym) != 0;
3245 
3246     code cs;
3247 
3248      getlvalue(cdb,&cs,e,RMload);          // get addressing mode
3249     //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib);
3250     //fprintf(stderr,"cd2 :\n"); WRcodlst(c);
3251     if (*pretregs == 0)
3252     {
3253         if (e.Ety & mTYvolatile)               // do the load anyway
3254             *pretregs = regmask(e.Ety, 0);     // load into registers
3255         else
3256             return;
3257     }
3258 
3259     regm_t idxregs = idxregm(&cs);               // mask of index regs used
3260 
3261     if (*pretregs == mPSW)
3262     {
3263         if (!I16 && tym == TYfloat)
3264         {
3265             retregs = ALLREGS & ~idxregs;
3266             allocreg(cdb,&retregs,&reg,TYfloat);
3267             cs.Iop = 0x8B;
3268             code_newreg(&cs,reg);
3269             cdb.gen(&cs);                       // MOV reg,lsw
3270             cdb.gen2(0xD1,modregrmx(3,4,reg));  // SHL reg,1
3271             code_orflag(cdb.last(), CFpsw);
3272         }
3273         else if (sz <= REGSIZE)
3274         {
3275             cs.Iop = 0x81 ^ isbyte;
3276             cs.Irm |= modregrm(0,7,0);
3277             cs.IFL2 = FLconst;
3278             cs.IEV2.Vsize_t = 0;
3279             cdb.gen(&cs);             // CMP [idx],0
3280         }
3281         else if (!I16 && sz == REGSIZE + 2)      // if far pointer
3282         {
3283             retregs = ALLREGS & ~idxregs;
3284             allocreg(cdb,&retregs,&reg,TYint);
3285             cs.Iop = MOVZXw;
3286             cs.Irm |= modregrm(0,reg,0);
3287             getlvalue_msw(&cs);
3288             cdb.gen(&cs);             // MOVZX reg,msw
3289             goto L4;
3290         }
3291         else if (sz <= 2 * REGSIZE)
3292         {
3293             retregs = ALLREGS & ~idxregs;
3294             allocreg(cdb,&retregs,&reg,TYint);
3295             cs.Iop = 0x8B;
3296             code_newreg(&cs,reg);
3297             getlvalue_msw(&cs);
3298             cdb.gen(&cs);             // MOV reg,msw
3299             if (I32)
3300             {   if (tym == TYdouble || tym == TYdouble_alias)
3301                     cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1
3302             }
3303             else if (tym == TYfloat)
3304                 cdb.gen2(0xD1,modregrm(3,4,reg));    // SHL reg,1
3305         L4:
3306             cs.Iop = 0x0B;
3307             getlvalue_lsw(&cs);
3308             cs.Iflags |= CFpsw;
3309             cdb.gen(&cs);                    // OR reg,lsw
3310         }
3311         else if (!I32 && sz == 8)
3312         {
3313             *pretregs |= DOUBLEREGS_16;     // fake it for now
3314             goto L1;
3315         }
3316         else
3317         {
3318             debug WRTYxx(tym);
3319             assert(0);
3320         }
3321     }
3322     else                                // else return result in reg
3323     {
3324     L1:
3325         retregs = *pretregs;
3326         if (sz == 8 &&
3327             (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK)
3328         {   int i;
3329 
3330             // Optimizer should not CSE these, as the result is worse code!
3331             assert(!e.Ecount);
3332 
3333             cs.Iop = 0xFF;
3334             cs.Irm |= modregrm(0,6,0);
3335             cs.IEV1.Voffset += 8 - REGSIZE;
3336             stackchanged = 1;
3337             i = 8 - REGSIZE;
3338             do
3339             {
3340                 cdb.gen(&cs);                         // PUSH EA+i
3341                 cdb.genadjesp(REGSIZE);
3342                 cs.IEV1.Voffset -= REGSIZE;
3343                 stackpush += REGSIZE;
3344                 i -= REGSIZE;
3345             }
3346             while (i >= 0);
3347             goto L3;
3348         }
3349         if (I16 && sz == 8)
3350             retregs = DOUBLEREGS_16;
3351 
3352         // Watch out for loading an lptr from an lptr! We must have
3353         // the offset loaded into a different register.
3354         /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes)
3355                 retregs = ALLREGS;*/
3356 
3357         {
3358             assert(!isbyte || retregs & BYTEREGS);
3359             allocreg(cdb,&retregs,&reg,tym); // alloc registers
3360         }
3361         if (retregs & XMMREGS)
3362         {
3363             assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector
3364             cs.Iop = xmmload(tym);
3365             cs.Irex &= ~REX_W;
3366             code_newreg(&cs,reg - XMM0);
3367             checkSetVex(&cs,tym);
3368             cdb.gen(&cs);     // MOV reg,[idx]
3369         }
3370         else if (sz <= REGSIZE)
3371         {
3372             cs.Iop = 0x8B;                                  // MOV
3373             if (sz <= 2 && !I16 &&
3374                 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed)
3375             {
3376                 cs.Iop = tyuns(tym) ? MOVZXw : MOVSXw;      // MOVZX/MOVSX
3377                 cs.Iflags &= ~CFopsize;
3378             }
3379             cs.Iop ^= isbyte;
3380         L2:
3381             code_newreg(&cs,reg);
3382             cdb.gen(&cs);     // MOV reg,[idx]
3383             if (isbyte && reg >= 4)
3384                 code_orrex(cdb.last(), REX);
3385         }
3386         else if ((tym == TYfptr || tym == TYhptr) && retregs & mES)
3387         {
3388             cs.Iop = 0xC4;          // LES reg,[idx]
3389             goto L2;
3390         }
3391         else if (sz <= 2 * REGSIZE)
3392         {   uint lsreg;
3393 
3394             cs.Iop = 0x8B;
3395             // Be careful not to interfere with index registers
3396             if (!I16)
3397             {
3398                 // Can't handle if both result registers are used in
3399                 // the addressing mode.
3400                 if ((retregs & idxregs) == retregs)
3401                 {
3402                     retregs = mMSW & allregs & ~idxregs;
3403                     if (!retregs)
3404                         retregs |= mCX;
3405                     retregs |= mLSW & ~idxregs;
3406 
3407                     // We can run out of registers, so if that's possible,
3408                     // give us *one* of the idxregs
3409                     if ((retregs & ~regcon.mvar & mLSW) == 0)
3410                     {
3411                         regm_t x = idxregs & mLSW;
3412                         if (x)
3413                             retregs |= mask(findreg(x));        // give us one idxreg
3414                     }
3415                     else if ((retregs & ~regcon.mvar & mMSW) == 0)
3416                     {
3417                         regm_t x = idxregs & mMSW;
3418                         if (x)
3419                             retregs |= mask(findreg(x));        // give us one idxreg
3420                     }
3421 
3422                     allocreg(cdb,&retregs,&reg,tym);     // alloc registers
3423                     assert((retregs & idxregs) != retregs);
3424                 }
3425 
3426                 lsreg = findreglsw(retregs);
3427                 if (mask(reg) & idxregs)                // reg is in addr mode
3428                 {
3429                     code_newreg(&cs,lsreg);
3430                     cdb.gen(&cs);                 // MOV lsreg,lsw
3431                     if (sz == REGSIZE + 2)
3432                         cs.Iflags |= CFopsize;
3433                     lsreg = reg;
3434                     getlvalue_msw(&cs);                 // MOV reg,msw
3435                 }
3436                 else
3437                 {
3438                     code_newreg(&cs,reg);
3439                     getlvalue_msw(&cs);
3440                     cdb.gen(&cs);                 // MOV reg,msw
3441                     if (sz == REGSIZE + 2)
3442                         cdb.last().Iflags |= CFopsize;
3443                     getlvalue_lsw(&cs);                 // MOV lsreg,lsw
3444                 }
3445                 NEWREG(cs.Irm,lsreg);
3446                 cdb.gen(&cs);
3447             }
3448             else
3449             {
3450                 // Index registers are always the lsw!
3451                 cs.Irm |= modregrm(0,reg,0);
3452                 getlvalue_msw(&cs);
3453                 cdb.gen(&cs);     // MOV reg,msw
3454                 lsreg = findreglsw(retregs);
3455                 NEWREG(cs.Irm,lsreg);
3456                 getlvalue_lsw(&cs);     // MOV lsreg,lsw
3457                 cdb.gen(&cs);
3458             }
3459         }
3460         else if (I16 && sz == 8)
3461         {
3462             assert(reg == AX);
3463             cs.Iop = 0x8B;
3464             cs.IEV1.Voffset += 6;
3465             cdb.gen(&cs);             // MOV AX,EA+6
3466             cs.Irm |= modregrm(0,CX,0);
3467             cs.IEV1.Voffset -= 4;
3468             cdb.gen(&cs);                    // MOV CX,EA+2
3469             NEWREG(cs.Irm,DX);
3470             cs.IEV1.Voffset -= 2;
3471             cdb.gen(&cs);                    // MOV DX,EA
3472             cs.IEV1.Voffset += 4;
3473             NEWREG(cs.Irm,BX);
3474             cdb.gen(&cs);                    // MOV BX,EA+4
3475         }
3476         else
3477             assert(0);
3478     L3:
3479         fixresult(cdb,e,retregs,pretregs);
3480     }
3481     //fprintf(stderr,"cdafter :\n"); WRcodlst(c);
3482 }
3483 
3484 
3485 
3486 static if (!TARGET_SEGMENTED)
3487 {
3488 private code *cod2_setES(tym_t ty) { return null; }
3489 }
3490 else
3491 {
3492 /********************************
3493  * Generate code to load ES with the right segment value,
3494  * do nothing if e is a far pointer.
3495  */
3496 
3497 private code *cod2_setES(tym_t ty)
3498 {
3499     int push;
3500 
3501     CodeBuilder cdb;
3502     cdb.ctor();
3503     switch (tybasic(ty))
3504     {
3505         case TYnptr:
3506             if (!(config.flags3 & CFG3eseqds))
3507             {   push = 0x1E;            // PUSH DS
3508                 goto L1;
3509             }
3510             break;
3511         case TYcptr:
3512             push = 0x0E;                // PUSH CS
3513             goto L1;
3514         case TYsptr:
3515             if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds))
3516             {   push = 0x16;            // PUSH SS
3517             L1:
3518                 // Must load ES
3519                 getregs(cdb,mES);
3520                 cdb.gen1(push);
3521                 cdb.gen1(0x07);         // POP ES
3522             }
3523             break;
3524 
3525         default:
3526             break;
3527     }
3528     return cdb.finish();
3529 }
3530 }
3531 
3532 /********************************
3533  * Generate code for intrinsic strlen().
3534  */
3535 
3536 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3537 {
3538     /* Generate strlen in CX:
3539         LES     DI,e1
3540         CLR     AX                      ;scan for 0
3541         MOV     CX,-1                   ;largest possible string
3542         REPNE   SCASB
3543         NOT     CX
3544         DEC     CX
3545      */
3546 
3547     regm_t retregs = mDI;
3548     tym_t ty1 = e.EV.E1.Ety;
3549     if (!tyreg(ty1))
3550         retregs |= mES;
3551     codelem(cdb,e.EV.E1,&retregs,false);
3552 
3553     // Make sure ES contains proper segment value
3554     cdb.append(cod2_setES(ty1));
3555 
3556     ubyte rex = I64 ? REX_W : 0;
3557 
3558     getregs_imm(cdb,mAX | mCX);
3559     movregconst(cdb,AX,0,1);               // MOV AL,0
3560     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);  // MOV CX,-1
3561     getregs(cdb,mDI|mCX);
3562     cdb.gen1(0xF2);                                     // REPNE
3563     cdb.gen1(0xAE);                                     // SCASB
3564     genregs(cdb,0xF7,2,CX);                // NOT CX
3565     code_orrex(cdb.last(), rex);
3566     if (I64)
3567         cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX));  // DEC reg
3568     else
3569         cdb.gen1(0x48 + CX);                            // DEC CX
3570 
3571     if (*pretregs & mPSW)
3572     {
3573         cdb.last().Iflags |= CFpsw;
3574         *pretregs &= ~mPSW;
3575     }
3576     fixresult(cdb,e,mCX,pretregs);
3577 }
3578 
3579 
3580 /*********************************
3581  * Generate code for strcmp(s1,s2) intrinsic.
3582  */
3583 
3584 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3585 {
3586     char need_DS;
3587     int segreg;
3588 
3589     /*
3590         MOV     SI,s1                   ;get destination pointer (s1)
3591         MOV     CX,s1+2
3592         LES     DI,s2                   ;get source pointer (s2)
3593         PUSH    DS
3594         MOV     DS,CX
3595         CLR     AX                      ;scan for 0
3596         MOV     CX,-1                   ;largest possible string
3597         REPNE   SCASB
3598         NOT     CX                      ;CX = string length of s2
3599         SUB     DI,CX                   ;point DI back to beginning
3600         REPE    CMPSB                   ;compare string
3601         POP     DS
3602         JE      L1                      ;strings are equal
3603         SBB     AX,AX
3604         SBB     AX,-1
3605     L1:
3606     */
3607 
3608     regm_t retregs1 = mSI;
3609     tym_t ty1 = e.EV.E1.Ety;
3610     if (!tyreg(ty1))
3611         retregs1 |= mCX;
3612     codelem(cdb,e.EV.E1,&retregs1,false);
3613 
3614     regm_t retregs = mDI;
3615     tym_t ty2 = e.EV.E2.Ety;
3616     if (!tyreg(ty2))
3617         retregs |= mES;
3618     scodelem(cdb,e.EV.E2,&retregs,retregs1,false);
3619 
3620     // Make sure ES contains proper segment value
3621     cdb.append(cod2_setES(ty2));
3622     getregs_imm(cdb,mAX | mCX);
3623 
3624     ubyte rex = I64 ? REX_W : 0;
3625 
3626     // Load DS with right value
3627     switch (tybasic(ty1))
3628     {
3629         case TYnptr:
3630         case TYimmutPtr:
3631             need_DS = false;
3632             break;
3633 
3634         case TYsptr:
3635             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3636                 segreg = SEG_SS;
3637             else
3638                 segreg = SEG_DS;
3639             goto L1;
3640         case TYcptr:
3641             segreg = SEG_CS;
3642         L1:
3643             cdb.gen1(0x1E);                         // PUSH DS
3644             cdb.gen1(0x06 + (segreg << 3));         // PUSH segreg
3645             cdb.gen1(0x1F);                         // POP  DS
3646             need_DS = true;
3647             break;
3648         case TYfptr:
3649         case TYvptr:
3650         case TYhptr:
3651             cdb.gen1(0x1E);                         // PUSH DS
3652             cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));   // MOV DS,CX
3653             need_DS = true;
3654             break;
3655         default:
3656             assert(0);
3657     }
3658 
3659     movregconst(cdb,AX,0,0);                // MOV AX,0
3660     movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0);   // MOV CX,-1
3661     getregs(cdb,mSI|mDI|mCX);
3662     cdb.gen1(0xF2);                              // REPNE
3663     cdb.gen1(0xAE);                              // SCASB
3664     genregs(cdb,0xF7,2,CX);         // NOT CX
3665     code_orrex(cdb.last(),rex);
3666     genregs(cdb,0x2B,DI,CX);        // SUB DI,CX
3667     code_orrex(cdb.last(),rex);
3668     cdb.gen1(0xF3);                              // REPE
3669     cdb.gen1(0xA6);                              // CMPSB
3670     if (need_DS)
3671         cdb.gen1(0x1F);                          // POP DS
3672     code *c4 = gennop(null);
3673     if (*pretregs != mPSW)                       // if not flags only
3674     {
3675         genjmp(cdb,JE,FLcode,cast(block *) c4);      // JE L1
3676         getregs(cdb,mAX);
3677         genregs(cdb,0x1B,AX,AX);                 // SBB AX,AX
3678         code_orrex(cdb.last(),rex);
3679         cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1);   // SBB AX,-1
3680     }
3681 
3682     *pretregs &= ~mPSW;
3683     cdb.append(c4);
3684     fixresult(cdb,e,mAX,pretregs);
3685 }
3686 
3687 /*********************************
3688  * Generate code for memcmp(s1,s2,n) intrinsic.
3689  */
3690 
3691 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3692 {
3693     char need_DS;
3694     int segreg;
3695 
3696     /*
3697         MOV     SI,s1                   ;get destination pointer (s1)
3698         MOV     DX,s1+2
3699         LES     DI,s2                   ;get source pointer (s2)
3700         MOV     CX,n                    ;get number of bytes to compare
3701         PUSH    DS
3702         MOV     DS,DX
3703         XOR     AX,AX
3704         REPE    CMPSB                   ;compare string
3705         POP     DS
3706         JE      L1                      ;strings are equal
3707         SBB     AX,AX
3708         SBB     AX,-1
3709     L1:
3710     */
3711 
3712     elem *e1 = e.EV.E1;
3713     assert(e1.Eoper == OPparam);
3714 
3715     // Get s1 into DX:SI
3716     regm_t retregs1 = mSI;
3717     tym_t ty1 = e1.EV.E1.Ety;
3718     if (!tyreg(ty1))
3719         retregs1 |= mDX;
3720     codelem(cdb,e1.EV.E1,&retregs1,false);
3721 
3722     // Get s2 into ES:DI
3723     regm_t retregs = mDI;
3724     tym_t ty2 = e1.EV.E2.Ety;
3725     if (!tyreg(ty2))
3726         retregs |= mES;
3727     scodelem(cdb,e1.EV.E2,&retregs,retregs1,false);
3728     freenode(e1);
3729 
3730     // Get nbytes into CX
3731     regm_t retregs3 = mCX;
3732     scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false);
3733 
3734     // Make sure ES contains proper segment value
3735     cdb.append(cod2_setES(ty2));
3736 
3737     // Load DS with right value
3738     switch (tybasic(ty1))
3739     {
3740         case TYnptr:
3741         case TYimmutPtr:
3742             need_DS = false;
3743             break;
3744 
3745         case TYsptr:
3746             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3747                 segreg = SEG_SS;
3748             else
3749                 segreg = SEG_DS;
3750             goto L1;
3751         case TYcptr:
3752             segreg = SEG_CS;
3753         L1:
3754             cdb.gen1(0x1E);                     // PUSH DS
3755             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3756             cdb.gen1(0x1F);                     // POP  DS
3757             need_DS = true;
3758             break;
3759         case TYfptr:
3760         case TYvptr:
3761         case TYhptr:
3762             cdb.gen1(0x1E);                        // PUSH DS
3763             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
3764             need_DS = true;
3765             break;
3766         default:
3767             assert(0);
3768     }
3769 
3770     static if (1)
3771     {
3772         getregs(cdb,mAX);
3773         cdb.gen2(0x33,modregrm(3,AX,AX));           // XOR AX,AX
3774         code_orflag(cdb.last(), CFpsw);             // keep flags
3775     }
3776     else
3777     {
3778         if (*pretregs != mPSW)                      // if not flags only
3779             regwithvalue(cdb,mAX,0,null,0);         // put 0 in AX
3780     }
3781 
3782     getregs(cdb,mCX | mSI | mDI);
3783     cdb.gen1(0xF3);                             // REPE
3784     cdb.gen1(0xA6);                             // CMPSB
3785     if (need_DS)
3786         cdb.gen1(0x1F);                         // POP DS
3787     if (*pretregs != mPSW)                      // if not flags only
3788     {
3789         code *c4 = gennop(null);
3790         genjmp(cdb,JE,FLcode,cast(block *) c4);  // JE L1
3791         getregs(cdb,mAX);
3792         genregs(cdb,0x1B,AX,AX);             // SBB AX,AX
3793         cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1);    // SBB AX,-1
3794         cdb.append(c4);
3795     }
3796 
3797     *pretregs &= ~mPSW;
3798     fixresult(cdb,e,mAX,pretregs);
3799 }
3800 
3801 /*********************************
3802  * Generate code for strcpy(s1,s2) intrinsic.
3803  */
3804 
3805 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3806 {
3807     char need_DS;
3808     int segreg;
3809 
3810     /*
3811         LES     DI,s2                   ;ES:DI = s2
3812         CLR     AX                      ;scan for 0
3813         MOV     CX,-1                   ;largest possible string
3814         REPNE   SCASB                   ;find end of s2
3815         NOT     CX                      ;CX = strlen(s2) + 1 (for EOS)
3816         SUB     DI,CX
3817         MOV     SI,DI
3818         PUSH    DS
3819         PUSH    ES
3820         LES     DI,s1
3821         POP     DS
3822         MOV     AX,DI                   ;return value is s1
3823         REP     MOVSB
3824         POP     DS
3825     */
3826 
3827     stackchanged = 1;
3828     regm_t retregs = mDI;
3829     tym_t ty2 = tybasic(e.EV.E2.Ety);
3830     if (!tyreg(ty2))
3831         retregs |= mES;
3832     ubyte rex = I64 ? REX_W : 0;
3833     codelem(cdb,e.EV.E2,&retregs,false);
3834 
3835     // Make sure ES contains proper segment value
3836     cdb.append(cod2_setES(ty2));
3837     getregs_imm(cdb,mAX | mCX);
3838     movregconst(cdb,AX,0,1);       // MOV AL,0
3839     movregconst(cdb,CX,-1,I64?64:0);  // MOV CX,-1
3840     getregs(cdb,mAX|mCX|mSI|mDI);
3841     cdb.gen1(0xF2);                             // REPNE
3842     cdb.gen1(0xAE);                             // SCASB
3843     genregs(cdb,0xF7,2,CX);                     // NOT CX
3844     code_orrex(cdb.last(),rex);
3845     genregs(cdb,0x2B,DI,CX);                    // SUB DI,CX
3846     code_orrex(cdb.last(),rex);
3847     genmovreg(cdb,SI,DI);          // MOV SI,DI
3848 
3849     // Load DS with right value
3850     switch (ty2)
3851     {
3852         case TYnptr:
3853         case TYimmutPtr:
3854             need_DS = false;
3855             break;
3856 
3857         case TYsptr:
3858             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3859                 segreg = SEG_SS;
3860             else
3861                 segreg = SEG_DS;
3862             goto L1;
3863         case TYcptr:
3864             segreg = SEG_CS;
3865         L1:
3866             cdb.gen1(0x1E);                     // PUSH DS
3867             cdb.gen1(0x06 + (segreg << 3));     // PUSH segreg
3868             cdb.genadjesp(REGSIZE * 2);
3869             need_DS = true;
3870             break;
3871         case TYfptr:
3872         case TYvptr:
3873         case TYhptr:
3874             segreg = SEG_ES;
3875             goto L1;
3876 
3877         default:
3878             assert(0);
3879     }
3880 
3881     retregs = mDI;
3882     tym_t ty1 = tybasic(e.EV.E1.Ety);
3883     if (!tyreg(ty1))
3884         retregs |= mES;
3885     scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false);
3886     getregs(cdb,mAX|mCX|mSI|mDI);
3887 
3888     // Make sure ES contains proper segment value
3889     if (ty2 != TYnptr || ty1 != ty2)
3890         cdb.append(cod2_setES(ty1));
3891     else
3892     {}                              // ES is already same as DS
3893 
3894     if (need_DS)
3895         cdb.gen1(0x1F);                     // POP DS
3896     if (*pretregs)
3897         genmovreg(cdb,AX,DI);               // MOV AX,DI
3898     cdb.gen1(0xF3);                         // REP
3899     cdb.gen1(0xA4);                              // MOVSB
3900 
3901     if (need_DS)
3902     {   cdb.gen1(0x1F);                          // POP DS
3903         cdb.genadjesp(-(REGSIZE * 2));
3904     }
3905     fixresult(cdb,e,mAX | mES,pretregs);
3906 }
3907 
3908 /*********************************
3909  * Generate code for memcpy(s1,s2,n) intrinsic.
3910  *  OPmemcpy
3911  *   /   \
3912  * s1   OPparam
3913  *       /   \
3914  *      s2    n
3915  */
3916 
3917 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3918 {
3919     char need_DS;
3920     int segreg;
3921 
3922     /*
3923         MOV     SI,s2
3924         MOV     DX,s2+2
3925         MOV     CX,n
3926         LES     DI,s1
3927         PUSH    DS
3928         MOV     DS,DX
3929         MOV     AX,DI                   ;return value is s1
3930         REP     MOVSB
3931         POP     DS
3932     */
3933 
3934     elem *e2 = e.EV.E2;
3935     assert(e2.Eoper == OPparam);
3936 
3937     // Get s2 into DX:SI
3938     regm_t retregs2 = mSI;
3939     tym_t ty2 = e2.EV.E1.Ety;
3940     if (!tyreg(ty2))
3941         retregs2 |= mDX;
3942     codelem(cdb,e2.EV.E1,&retregs2,false);
3943 
3944     // Need to check if nbytes is 0 (OPconst of 0 would have been removed by elmemcpy())
3945     const zeroCheck = e2.EV.E2.Eoper != OPconst;
3946 
3947     // Get nbytes into CX
3948     regm_t retregs3 = mCX;
3949     scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false);
3950     freenode(e2);
3951 
3952     // Get s1 into ES:DI
3953     regm_t retregs1 = mDI;
3954     tym_t ty1 = e.EV.E1.Ety;
3955     if (!tyreg(ty1))
3956         retregs1 |= mES;
3957     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
3958 
3959     ubyte rex = I64 ? REX_W : 0;
3960 
3961     // Make sure ES contains proper segment value
3962     cdb.append(cod2_setES(ty1));
3963 
3964     // Load DS with right value
3965     switch (tybasic(ty2))
3966     {
3967         case TYnptr:
3968         case TYimmutPtr:
3969             need_DS = false;
3970             break;
3971 
3972         case TYsptr:
3973             if (config.wflags & WFssneds)       // if sptr can't use DS segment
3974                 segreg = SEG_SS;
3975             else
3976                 segreg = SEG_DS;
3977             goto L1;
3978 
3979         case TYcptr:
3980             segreg = SEG_CS;
3981         L1:
3982             cdb.gen1(0x1E);                        // PUSH DS
3983             cdb.gen1(0x06 + (segreg << 3));        // PUSH segreg
3984             cdb.gen1(0x1F);                        // POP  DS
3985             need_DS = true;
3986             break;
3987 
3988         case TYfptr:
3989         case TYvptr:
3990         case TYhptr:
3991             cdb.gen1(0x1E);                        // PUSH DS
3992             cdb.gen2(0x8E,modregrm(3,SEG_DS,DX));  // MOV DS,DX
3993             need_DS = true;
3994             break;
3995 
3996         default:
3997             assert(0);
3998     }
3999 
4000     if (*pretregs)                              // if need return value
4001     {   getregs(cdb,mAX);
4002         genmovreg(cdb,AX,DI);
4003     }
4004 
4005     if (0 && I32 && config.flags4 & CFG4speed)
4006     {
4007         /* This is only faster if the memory is dword aligned, if not
4008          * it is significantly slower than just a rep movsb.
4009          */
4010         /*      mov     EDX,ECX
4011          *      shr     ECX,2
4012          *      jz      L1
4013          *      repe    movsd
4014          * L1:  nop
4015          *      and     EDX,3
4016          *      jz      L2
4017          *      mov     ECX,EDX
4018          *      repe    movsb
4019          * L2:  nop
4020          */
4021         getregs(cdb,mSI | mDI | mCX | mDX);
4022         genmovreg(cdb,DX,CX);                  // MOV EDX,ECX
4023         cdb.genc2(0xC1,modregrm(3,5,CX),2);                 // SHR ECX,2
4024         code *cx = gennop(null);
4025         genjmp(cdb, JE, FLcode, cast(block *)cx);  // JZ L1
4026         cdb.gen1(0xF3);                                     // REPE
4027         cdb.gen1(0xA5);                                     // MOVSW
4028         cdb.append(cx);
4029         cdb.genc2(0x81, modregrm(3,4,DX),3);                // AND EDX,3
4030 
4031         code *cnop = gennop(null);
4032         genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ L2
4033         genmovreg(cdb,CX,DX);                    // MOV ECX,EDX
4034         cdb.gen1(0xF3);                          // REPE
4035         cdb.gen1(0xA4);                          // MOVSB
4036         cdb.append(cnop);
4037     }
4038     else
4039     {
4040         code* cnop;
4041         if (zeroCheck)
4042         {
4043             cnop = gennop(null);
4044             gentstreg(cdb,CX);                           // TEST ECX,ECX
4045             if (I64)
4046                 code_orrex(cdb.last, REX_W);
4047             genjmp(cdb, JE, FLcode, cast(block *)cnop);  // JZ cnop
4048         }
4049 
4050         getregs(cdb,mSI | mDI | mCX);
4051         if (I16 && config.flags4 & CFG4speed)          // if speed optimization
4052         {
4053             // Note this doesn't work if CX is 0
4054             cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX));        // SHR CX,1
4055             cdb.gen1(0xF3);                              // REPE
4056             cdb.gen1(0xA5);                              // MOVSW
4057             cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX));            // ADC CX,CX
4058         }
4059         cdb.gen1(0xF3);                             // REPE
4060         cdb.gen1(0xA4);                             // MOVSB
4061         if (zeroCheck)
4062             cdb.append(cnop);
4063         if (need_DS)
4064             cdb.gen1(0x1F);                         // POP DS
4065     }
4066     fixresult(cdb,e,mES|mAX,pretregs);
4067 }
4068 
4069 
4070 /*********************************
4071  * Generate code for memset(s,value,numbytes) intrinsic.
4072  *      (s OPmemset (numbytes OPparam value))
4073  */
4074 
4075 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4076 {
4077     regm_t retregs1;
4078     regm_t retregs3;
4079     reg_t reg;
4080     reg_t vreg;
4081     tym_t ty1;
4082     int segreg;
4083     targ_uns numbytes;
4084     uint m;
4085 
4086     //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs));
4087     elem *e2 = e.EV.E2;
4088     assert(e2.Eoper == OPparam);
4089 
4090     elem* evalue = e2.EV.E2;
4091     elem* enumbytes = e2.EV.E1;
4092 
4093     const grex = I64 ? (REX_W << 16) : 0;
4094 
4095     bool valueIsConst = false;
4096     targ_size_t value;
4097     if (evalue.Eoper == OPconst)
4098     {
4099         value = el_tolong(evalue) & 0xFF;
4100         value |= value << 8;
4101         if (I32 || I64)
4102         {
4103             value |= value << 16;
4104             static if (value.sizeof == 8)
4105             if (I64)
4106                 value |= value << 32;
4107         }
4108         valueIsConst = true;
4109     }
4110     else if (evalue.Eoper == OPstrpar)  // happens if evalue is a struct of 0 size
4111     {
4112         value = 0;
4113         valueIsConst = true;
4114     }
4115     else
4116         value = 0xDEADBEEF;     // stop annoying false positives that value is not inited
4117 
4118     if (enumbytes.Eoper == OPconst)
4119     {
4120         static uint REP_THRESHOLD() { return REGSIZE * (6 + (REGSIZE == 4)); }
4121         numbytes = cast(uint)cast(targ_size_t)el_tolong(enumbytes);
4122         if (numbytes <= REP_THRESHOLD &&
4123             !I16 &&                     // doesn't work for 16 bits
4124             valueIsConst)
4125         {
4126             targ_uns offset = 0;
4127             retregs1 = *pretregs;
4128             if (!retregs1)
4129                 retregs1 = ALLREGS;
4130             codelem(cdb,e.EV.E1,&retregs1,false);
4131             reg = findreg(retregs1);
4132             if (evalue.Eoper == OPconst)
4133             {
4134                 const uint mrm = buildModregrm(0,0,reg);
4135                 switch (numbytes)
4136                 {
4137                     case 4:                     // MOV [reg],imm32
4138                         cdb.genc2(0xC7,mrm,value);
4139                         goto fixres;
4140                     case 2:                     // MOV [reg],imm16
4141                         cdb.genc2(0xC7,mrm,value);
4142                         cdb.last().Iflags = CFopsize;
4143                         goto fixres;
4144                     case 1:                     // MOV [reg],imm8
4145                         cdb.genc2(0xC6,mrm,value);
4146                         goto fixres;
4147 
4148                     default:
4149                         break;
4150                 }
4151             }
4152 
4153             regwithvalue(cdb, BYTEREGS & ~retregs1, value, &vreg, I64 ? 64 : 0);
4154             freenode(evalue);
4155             freenode(e2);
4156 
4157             m = grex | buildModregrm(2,vreg,reg);
4158             while (numbytes >= REGSIZE)
4159             {                           // MOV dword ptr offset[reg],vreg
4160                 cdb.gen2(0x89,m);
4161                 cdb.last().IEV1.Voffset = offset;
4162                 cdb.last().IFL1 = FLconst;
4163                 numbytes -= REGSIZE;
4164                 offset += REGSIZE;
4165             }
4166             m &= ~grex;
4167             if (numbytes & 4)
4168             {                           // MOV dword ptr offset[reg],vreg
4169                 cdb.gen2(0x89,m);
4170                 cdb.last().IEV1.Voffset = offset;
4171                 cdb.last().IFL1 = FLconst;
4172                 offset += 4;
4173             }
4174             if (numbytes & 2)
4175             {                           // MOV word ptr offset[reg],vreg
4176                 cdb.gen2(0x89,m);
4177                 cdb.last().IEV1.Voffset = offset;
4178                 cdb.last().IFL1 = FLconst;
4179                 cdb.last().Iflags = CFopsize;
4180                 offset += 2;
4181             }
4182             if (numbytes & 1)
4183             {                           // MOV byte ptr offset[reg],vreg
4184                 cdb.gen2(0x88,m);
4185                 cdb.last().IEV1.Voffset = offset;
4186                 cdb.last().IFL1 = FLconst;
4187                 if (I64 && vreg >= 4)
4188                     cdb.last().Irex |= REX;
4189             }
4190 fixres:
4191             fixresult(cdb,e,retregs1,pretregs);
4192             return;
4193         }
4194     }
4195 
4196     // Get nbytes into CX
4197     regm_t retregs2 = 0;
4198     if (enumbytes.Eoper != OPconst)
4199     {
4200         retregs2 = mCX;
4201         codelem(cdb,enumbytes,&retregs2,false);
4202     }
4203 
4204     // Get value into AX
4205     retregs3 = mAX;
4206     if (valueIsConst)
4207     {
4208         regwithvalue(cdb, mAX, value, null, I64?64:0);
4209         freenode(evalue);
4210     }
4211     else
4212     {
4213         scodelem(cdb,evalue,&retregs3,retregs2,false);
4214 
4215         getregs(cdb,mAX);
4216         if (I16)
4217         {
4218             cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL
4219         }
4220         else if (I32)
4221         {
4222             genregs(cdb,MOVZXb,AX,AX);                    // MOVZX EAX,AL
4223             cdb.genc2(0x69,modregrm(3,AX,AX),0x01010101); // IMUL EAX,EAX,0x01010101
4224         }
4225         else
4226         {
4227             genregs(cdb,MOVZXb,AX,AX);                    // MOVZX EAX,AL
4228             regm_t regm = allregs & ~(mAX | retregs2);
4229             reg_t r;
4230             regwithvalue(cdb,regm,cast(targ_size_t)0x01010101_01010101,&r,64); // MOV reg,0x01010101_01010101
4231             cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r));        // IMUL RAX,reg
4232         }
4233     }
4234     freenode(e2);
4235 
4236     // Get s into ES:DI
4237     retregs1 = mDI;
4238     ty1 = e.EV.E1.Ety;
4239     if (!tyreg(ty1))
4240         retregs1 |= mES;
4241     scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false);
4242     reg = DI; //findreg(retregs1);
4243 
4244     // Make sure ES contains proper segment value
4245     cdb.append(cod2_setES(ty1));
4246 
4247     if (*pretregs)                              // if need return value
4248     {
4249         getregs(cdb,mBX);
4250         genmovreg(cdb,BX,DI);                   // MOV EBX,EDI
4251     }
4252 
4253 
4254     if (enumbytes.Eoper == OPconst)
4255     {
4256         getregs(cdb,mDI);
4257         if (const numwords = numbytes / REGSIZE)
4258         {
4259             regwithvalue(cdb,mCX,numwords,null, I64 ? 64 : 0);
4260             getregs(cdb,mCX);
4261             cdb.gen1(0xF3);                     // REP
4262             cdb.gen1(STOS);                     // STOSW/D/Q
4263             if (I64)
4264                 code_orrex(cdb.last(), REX_W);
4265             regimmed_set(CX, 0);                // CX is now 0
4266         }
4267 
4268         auto remainder = numbytes & (REGSIZE - 1);
4269         if (I64 && remainder >= 4)
4270         {
4271             cdb.gen1(STOS);                     // STOSD
4272             remainder -= 4;
4273         }
4274         for (; remainder; --remainder)
4275             cdb.gen1(STOSB);                    // STOSB
4276         fixresult(cdb,e,mES|mBX,pretregs);
4277         return;
4278     }
4279 
4280     getregs(cdb,mDI | mCX);
4281     if (I16)
4282     {
4283         if (config.flags4 & CFG4speed)      // if speed optimization
4284         {
4285             cdb.gen2(0xD1,modregrm(3,5,CX));  // SHR CX,1
4286             cdb.gen1(0xF3);                   // REP
4287             cdb.gen1(STOS);                   // STOSW
4288             cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC CX,CX
4289         }
4290         cdb.gen1(0xF3);                       // REP
4291         cdb.gen1(STOSB);                      // STOSB
4292         regimmed_set(CX, 0);                  // CX is now 0
4293         fixresult(cdb,e,mES|mBX,pretregs);
4294         return;
4295     }
4296 
4297     /*  MOV   sreg,ECX
4298         SHR   ECX,n
4299         REP
4300         STOSD/Q
4301 
4302         ADC   ECX,ECX
4303         REP
4304         STOSD
4305 
4306         MOV   ECX,sreg
4307         AND   ECX,3
4308         REP
4309         STOSB
4310      */
4311     regm_t regs = allregs & (*pretregs ? ~(mAX|mBX|mCX|mDI) : ~(mAX|mCX|mDI));
4312     reg_t sreg;
4313     allocreg(cdb,&regs,&sreg,TYint);
4314     genregs(cdb,0x89,CX,sreg);                        // MOV sreg,ECX (32 bits only)
4315 
4316     const n = I64 ? 3 : 2;
4317     cdb.genc2(0xC1, grex | modregrm(3,5,CX), n);      // SHR ECX,n
4318 
4319     cdb.gen1(0xF3);                                   // REP
4320     cdb.gen1(STOS);                                   // STOSD/Q
4321     if (I64)
4322         code_orrex(cdb.last(), REX_W);
4323 
4324     if (I64)
4325     {
4326         cdb.gen2(0x11,modregrm(3,CX,CX));             // ADC ECX,ECX
4327         cdb.gen1(0xF3);                               // REP
4328         cdb.gen1(STOS);                               // STOSD
4329     }
4330 
4331     genregs(cdb,0x89,sreg,CX);                        // MOV ECX,sreg (32 bits only)
4332     cdb.genc2(0x81, modregrm(3,4,CX), 3);             // AND ECX,3
4333     cdb.gen1(0xF3);                                   // REP
4334     cdb.gen1(STOSB);                                  // STOSB
4335 
4336     regimmed_set(CX, 0);                    // CX is now 0
4337     fixresult(cdb,e,mES|mBX,pretregs);
4338 }
4339 
4340 
4341 /**********************
4342  * Do structure assignments.
4343  * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2).
4344  * Mebbe call cdstreq() for double assignments???
4345  */
4346 
4347 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4348 {
4349     char need_DS = false;
4350     elem *e1 = e.EV.E1;
4351     elem *e2 = e.EV.E2;
4352     int segreg;
4353     uint numbytes = cast(uint)type_size(e.ET);          // # of bytes in structure/union
4354     ubyte rex = I64 ? REX_W : 0;
4355 
4356     //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4357 
4358     // First, load pointer to rvalue into SI
4359     regm_t srcregs = mSI;                      // source is DS:SI
4360     docommas(cdb,&e2);
4361     if (e2.Eoper == OPind)             // if (.. = *p)
4362     {   elem *e21 = e2.EV.E1;
4363 
4364         segreg = SEG_DS;
4365         switch (tybasic(e21.Ety))
4366         {
4367             case TYsptr:
4368                 if (config.wflags & WFssneds)   // if sptr can't use DS segment
4369                     segreg = SEG_SS;
4370                 break;
4371             case TYcptr:
4372                 if (!(config.exe & EX_flat))
4373                     segreg = SEG_CS;
4374                 break;
4375             case TYfptr:
4376             case TYvptr:
4377             case TYhptr:
4378                 srcregs |= mCX;         // get segment also
4379                 need_DS = true;
4380                 break;
4381 
4382             default:
4383                 break;
4384         }
4385         codelem(cdb,e21,&srcregs,false);
4386         freenode(e2);
4387         if (segreg != SEG_DS)           // if not DS
4388         {
4389             getregs(cdb,mCX);
4390             cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg
4391             need_DS = true;
4392         }
4393     }
4394     else if (e2.Eoper == OPvar)
4395     {
4396         if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment
4397         {   srcregs |= mCX;             // get segment also
4398             need_DS = true;
4399             cdrelconst(cdb,e2,&srcregs);
4400         }
4401         else
4402         {
4403             segreg = segfl[el_fl(e2)];
4404             if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack
4405                 segreg == SEG_CS)               // if source is in CS
4406             {
4407                 need_DS = true;         // we need to reload DS
4408                 // Load CX with segment
4409                 srcregs |= mCX;
4410                 getregs(cdb,mCX);
4411                 cdb.gen2(0x8C,                // MOV CX,[SS|CS]
4412                     modregrm(3,segreg,CX));
4413             }
4414             cdrelconst(cdb,e2,&srcregs);
4415         }
4416         freenode(e2);
4417     }
4418     else
4419     {
4420         if (!(config.exe & EX_flat))
4421         {   need_DS = true;
4422             srcregs |= mCX;
4423         }
4424         codelem(cdb,e2,&srcregs,false);
4425     }
4426 
4427     // now get pointer to lvalue (destination) in ES:DI
4428     regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI;
4429     if (e1.Eoper == OPind)               // if (*p = ..)
4430     {
4431         if (tyreg(e1.EV.E1.Ety))
4432             dstregs = mDI;
4433         cdb.append(cod2_setES(e1.EV.E1.Ety));
4434         scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false);
4435     }
4436     else
4437         cdrelconst(cdb,e1,&dstregs);
4438     freenode(e1);
4439 
4440     getregs(cdb,(srcregs | dstregs) & (mLSW | mDI));
4441     if (need_DS)
4442     {     assert(!(config.exe & EX_flat));
4443         cdb.gen1(0x1E);                     // PUSH DS
4444         cdb.gen2(0x8E,modregrm(3,SEG_DS,CX));    // MOV DS,CX
4445     }
4446     if (numbytes <= REGSIZE * (6 + (REGSIZE == 4)))
4447     {
4448         while (numbytes >= REGSIZE)
4449         {
4450             cdb.gen1(0xA5);         // MOVSW
4451             code_orrex(cdb.last(), rex);
4452             numbytes -= REGSIZE;
4453         }
4454         //if (numbytes)
4455         //    printf("cdstreq numbytes %d\n",numbytes);
4456         if (I64 && numbytes >= 4)
4457         {
4458             cdb.gen1(0xA5);         // MOVSD
4459             numbytes -= 4;
4460         }
4461         while (numbytes--)
4462             cdb.gen1(0xA4);         // MOVSB
4463     }
4464     else
4465     {
4466 static if (1)
4467 {
4468         uint remainder = numbytes & (REGSIZE - 1);
4469         numbytes /= REGSIZE;            // number of words
4470         getregs_imm(cdb,mCX);
4471         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4472         cdb.gen1(0xF3);                 // REP
4473         if (REGSIZE == 8)
4474             cdb.gen1(REX | REX_W);
4475         cdb.gen1(0xA5);                 // REP MOVSD
4476         regimmed_set(CX,0);             // note that CX == 0
4477         if (I64 && remainder >= 4)
4478         {
4479             cdb.gen1(0xA5);         // MOVSD
4480             remainder -= 4;
4481         }
4482         for (; remainder; remainder--)
4483         {
4484             cdb.gen1(0xA4);             // MOVSB
4485         }
4486 }
4487 else
4488 {
4489         uint movs;
4490         if (numbytes & (REGSIZE - 1))   // if odd
4491             movs = 0xA4;                // MOVSB
4492         else
4493         {
4494             movs = 0xA5;                // MOVSW
4495             numbytes /= REGSIZE;        // # of words
4496         }
4497         getregs_imm(cdb,mCX);
4498         movregconst(cdb,CX,numbytes,0);   // # of bytes/words
4499         cdb.gen1(0xF3);                 // REP
4500         cdb.gen1(movs);
4501         regimmed_set(CX,0);             // note that CX == 0
4502 }
4503     }
4504     if (need_DS)
4505         cdb.gen1(0x1F);                 // POP  DS
4506     assert(!(*pretregs & mPSW));
4507     if (*pretregs)
4508     {   // ES:DI points past what we want
4509 
4510         cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET));   // SUB DI,numbytes
4511         regm_t retregs = mDI;
4512         if (*pretregs & mMSW && !(config.exe & EX_flat))
4513             retregs |= mES;
4514         fixresult(cdb,e,retregs,pretregs);
4515     }
4516 }
4517 
4518 
4519 /**********************
4520  * Get the address of.
4521  * Is also called by cdstreq() to set up pointer to a structure.
4522  */
4523 
4524 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4525 {
4526     //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4527 
4528     /* The following should not happen, but cgelem.c is a little stupid.
4529      * Assertion can be tripped by func("string" == 0); and similar
4530      * things. Need to add goals to optelem() to fix this completely.
4531      */
4532     //assert((*pretregs & mPSW) == 0);
4533     if (*pretregs & mPSW)
4534     {
4535         *pretregs &= ~mPSW;
4536         gentstreg(cdb,SP);            // SP is never 0
4537         if (I64)
4538             code_orrex(cdb.last(), REX_W);
4539     }
4540     if (!*pretregs)
4541         return;
4542 
4543     assert(e);
4544     tym_t tym = tybasic(e.Ety);
4545     switch (tym)
4546     {
4547         case TYstruct:
4548         case TYarray:
4549         case TYldouble:
4550         case TYildouble:
4551         case TYcldouble:
4552             tym = TYnptr;               // don't confuse allocreg()
4553             if (*pretregs & (mES | mCX) || e.Ety & mTYfar)
4554             {
4555                 tym = TYfptr;
4556             }
4557             break;
4558 
4559         case TYifunc:
4560             tym = TYfptr;
4561             break;
4562 
4563         default:
4564             if (tyfunc(tym))
4565                 tym =
4566                     tyfarfunc(tym) ? TYfptr :
4567                     TYnptr;
4568             break;
4569     }
4570     //assert(tym & typtr);              // don't fail on (int)&a
4571 
4572     SC sclass;
4573     reg_t mreg,            // segment of the address (TYfptrs only)
4574           lreg;            // offset of the address
4575 
4576     allocreg(cdb,pretregs,&lreg,tym);
4577     if (_tysize[tym] > REGSIZE)            // fptr could've been cast to long
4578     {
4579         if (*pretregs & mES)
4580         {
4581             /* Do not allocate CX or SI here, as cdstreq() needs
4582              * them preserved. cdstreq() should use scodelem()
4583              */
4584             mreg = allocScratchReg(cdb, (mAX|mBX|mDX|mDI) & ~mask(lreg));
4585         }
4586         else
4587         {
4588             mreg = lreg;
4589             lreg = findreglsw(*pretregs);
4590         }
4591 
4592         /* if (get segment of function that isn't necessarily in the
4593          * current segment (i.e. CS doesn't have the right value in it)
4594          */
4595         Symbol *s = e.EV.Vsym;
4596         if (s.Sfl == FLdatseg)
4597         {   assert(0);
4598         }
4599         sclass = cast(SC) s.Sclass;
4600         const ety = tybasic(s.ty());
4601         if ((tyfarfunc(ety) || ety == TYifunc) &&
4602             (sclass == SCextern || ClassInline(sclass) || config.wflags & WFthunk)
4603             || s.Sfl == FLfardata
4604             || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SCcomdat))
4605            )
4606         {   // MOV mreg,seg of symbol
4607             cdb.gencs(0xB8 + mreg,0,FLextern,s);
4608             cdb.last().Iflags = CFseg;
4609         }
4610         else
4611         {
4612             const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl;
4613             cdb.gen2(0x8C,            // MOV mreg,SEG REGISTER
4614                 modregrm(3,segfl[fl],mreg));
4615         }
4616         if (*pretregs & mES)
4617             cdb.gen2(0x8E,modregrm(3,0,mreg));        // MOV ES,mreg
4618     }
4619     getoffset(cdb,e,lreg);
4620 }
4621 
4622 /*********************************
4623  * Load the offset portion of the address represented by e into
4624  * reg.
4625  */
4626 
4627 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg)
4628 {
4629     //printf("getoffset(e = %p, reg = %d)\n", e, reg);
4630     code cs = void;
4631     cs.Iflags = 0;
4632     ubyte rex = 0;
4633     cs.Irex = rex;
4634     assert(e.Eoper == OPvar || e.Eoper == OPrelconst);
4635     auto fl = el_fl(e);
4636     switch (fl)
4637     {
4638         case FLdatseg:
4639             cs.IEV2.Vpointer = e.EV.Vpointer;
4640             goto L3;
4641 
4642         case FLfardata:
4643             goto L4;
4644 
4645         case FLtlsdata:
4646     static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
4647     {
4648         {
4649           L5:
4650             if (config.flags3 & CFG3pic)
4651             {
4652                 if (I64)
4653                 {
4654                     /* Generate:
4655                      *   LEA DI,s@TLSGD[RIP]
4656                      */
4657                     //assert(reg == DI);
4658                     code css = void;
4659                     css.Irex = REX | REX_W;
4660                     css.Iop = LEA;
4661                     css.Irm = modregrm(0,reg,5);
4662                     if (reg & 8)
4663                         css.Irex |= REX_R;
4664                     css.Iflags = CFopsize;
4665                     css.IFL1 = cast(ubyte)fl;
4666                     css.IEV1.Vsym = e.EV.Vsym;
4667                     css.IEV1.Voffset = e.EV.Voffset;
4668                     cdb.gen(&css);
4669                 }
4670                 else
4671                 {
4672                     /* Generate:
4673                      *   LEA EAX,s@TLSGD[1*EBX+0]
4674                      */
4675                     assert(reg == AX);
4676                     load_localgot(cdb);
4677                     code css = void;
4678                     css.Iflags = 0;
4679                     css.Iop = LEA;             // LEA
4680                     css.Irex = 0;
4681                     css.Irm = modregrm(0,AX,4);
4682                     css.Isib = modregrm(0,BX,5);
4683                     css.IFL1 = cast(ubyte)fl;
4684                     css.IEV1.Vsym = e.EV.Vsym;
4685                     css.IEV1.Voffset = e.EV.Voffset;
4686                     cdb.gen(&css);
4687                 }
4688                 return;
4689             }
4690             /* Generate:
4691              *      MOV reg,GS:[00000000]
4692              *      ADD reg, offset s@TLS_LE
4693              * for locals, and for globals:
4694              *      MOV reg,GS:[00000000]
4695              *      ADD reg, s@TLS_IE
4696              * note different fixup
4697              */
4698             int stack = 0;
4699             if (reg == STACK)
4700             {   regm_t retregs = ALLREGS;
4701 
4702                 reg_t regx;
4703                 allocreg(cdb,&retregs,&regx,TYoffset);
4704                 reg = findreg(retregs);
4705                 stack = 1;
4706             }
4707 
4708             code css = void;
4709             css.Irex = rex;
4710             css.Iop = 0x8B;
4711             css.Irm = modregrm(0, 0, BPRM);
4712             code_newreg(&css, reg);
4713             css.Iflags = CFgs;
4714             css.IFL1 = FLconst;
4715             css.IEV1.Vuns = 0;
4716             cdb.gen(&css);               // MOV reg,GS:[00000000]
4717 
4718             if (e.EV.Vsym.Sclass == SCstatic || e.EV.Vsym.Sclass == SClocstat)
4719             {   // ADD reg, offset s
4720                 cs.Irex = rex;
4721                 cs.Iop = 0x81;
4722                 cs.Irm = modregrm(3,0,reg & 7);
4723                 if (reg & 8)
4724                     cs.Irex |= REX_B;
4725                 cs.Iflags = CFoff;
4726                 cs.IFL2 = cast(ubyte)fl;
4727                 cs.IEV2.Vsym = e.EV.Vsym;
4728                 cs.IEV2.Voffset = e.EV.Voffset;
4729             }
4730             else
4731             {   // ADD reg, s
4732                 cs.Irex = rex;
4733                 cs.Iop = 0x03;
4734                 cs.Irm = modregrm(0,0,BPRM);
4735                 code_newreg(&cs, reg);
4736                 cs.Iflags = CFoff;
4737                 cs.IFL1 = cast(ubyte)fl;
4738                 cs.IEV1.Vsym = e.EV.Vsym;
4739                 cs.IEV1.Voffset = e.EV.Voffset;
4740             }
4741             cdb.gen(&cs);                // ADD reg, xxxx
4742 
4743             if (stack)
4744             {
4745                 cdb.gen1(0x50 + (reg & 7));      // PUSH reg
4746                 if (reg & 8)
4747                     code_orrex(cdb.last(), REX_B);
4748                 cdb.genadjesp(REGSIZE);
4749                 stackchanged = 1;
4750             }
4751             break;
4752         }
4753     }
4754     else static if (TARGET_WINDOS)
4755     {
4756             if (I64)
4757             {
4758             L5:
4759                 assert(reg != STACK);
4760                 cs.IEV2.Vsym = e.EV.Vsym;
4761                 cs.IEV2.Voffset = e.EV.Voffset;
4762                 cs.Iop = 0xB8 + (reg & 7);      // MOV Ereg,offset s
4763                 if (reg & 8)
4764                     cs.Irex |= REX_B;
4765                 cs.Iflags = CFoff;              // want offset only
4766                 cs.IFL2 = cast(ubyte)fl;
4767                 cdb.gen(&cs);
4768                 break;
4769             }
4770             goto L4;
4771     }
4772     else
4773     {
4774             goto L4;
4775     }
4776 
4777         case FLfunc:
4778             fl = FLextern;                  /* don't want PC relative addresses */
4779             goto L4;
4780 
4781         case FLextern:
4782     static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
4783     {
4784             if (e.EV.Vsym.ty() & mTYthread)
4785                 goto L5;
4786     }
4787     static if (TARGET_WINDOS)
4788     {
4789             if (I64 && e.EV.Vsym.ty() & mTYthread)
4790                 goto L5;
4791     }
4792             goto L4;
4793 
4794         case FLdata:
4795         case FLudata:
4796         case FLgot:
4797         case FLgotoff:
4798         case FLcsdata:
4799         L4:
4800             cs.IEV2.Vsym = e.EV.Vsym;
4801             cs.IEV2.Voffset = e.EV.Voffset;
4802         L3:
4803             if (reg == STACK)
4804             {   stackchanged = 1;
4805                 cs.Iop = 0x68;              /* PUSH immed16                 */
4806                 cdb.genadjesp(REGSIZE);
4807             }
4808             else
4809             {   cs.Iop = 0xB8 + (reg & 7);  // MOV reg,immed16
4810                 if (reg & 8)
4811                     cs.Irex |= REX_B;
4812                 if (I64)
4813                 {   cs.Irex |= REX_W;
4814                     if (config.flags3 & CFG3pic || config.exe == EX_WIN64)
4815                     {   // LEA reg,immed32[RIP]
4816                         cs.Iop = LEA;
4817                         cs.Irm = modregrm(0,reg & 7,5);
4818                         if (reg & 8)
4819                             cs.Irex = (cs.Irex & ~REX_B) | REX_R;
4820                         cs.IFL1 = cast(ubyte)fl;
4821                         cs.IEV1.Vsym = cs.IEV2.Vsym;
4822                         cs.IEV1.Voffset = cs.IEV2.Voffset;
4823                     }
4824                 }
4825             }
4826             cs.Iflags = CFoff;              /* want offset only             */
4827             cs.IFL2 = cast(ubyte)fl;
4828             cdb.gen(&cs);
4829             break;
4830 
4831         case FLreg:
4832             /* Allow this since the tree optimizer puts & in front of       */
4833             /* register doubles.                                            */
4834             goto L2;
4835         case FLauto:
4836         case FLfast:
4837         case FLbprel:
4838         case FLfltreg:
4839             reflocal = true;
4840             goto L2;
4841         case FLpara:
4842             refparam = true;
4843         L2:
4844             if (reg == STACK)
4845             {   regm_t retregs = ALLREGS;
4846 
4847                 reg_t regx;
4848                 allocreg(cdb,&retregs,&regx,TYoffset);
4849                 reg = findreg(retregs);
4850                 loadea(cdb,e,&cs,LEA,reg,0,0,0);    // LEA reg,EA
4851                 if (I64)
4852                     code_orrex(cdb.last(), REX_W);
4853                 cdb.gen1(0x50 + (reg & 7));               // PUSH reg
4854                 if (reg & 8)
4855                     code_orrex(cdb.last(), REX_B);
4856                 cdb.genadjesp(REGSIZE);
4857                 stackchanged = 1;
4858             }
4859             else
4860             {
4861                 loadea(cdb,e,&cs,LEA,reg,0,0,0);   // LEA reg,EA
4862                 if (I64)
4863                     code_orrex(cdb.last(), REX_W);
4864             }
4865             break;
4866 
4867         default:
4868             debug
4869             {
4870                 elem_print(e);
4871                 WRFL(fl);
4872             }
4873             assert(0);
4874     }
4875 }
4876 
4877 
4878 /******************
4879  * OPneg, OPsqrt, OPsin, OPcos, OPrint
4880  */
4881 
4882 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4883 {
4884     //printf("cdneg()\n");
4885     //elem_print(e);
4886     if (*pretregs == 0)
4887     {
4888         codelem(cdb,e.EV.E1,pretregs,false);
4889         return;
4890     }
4891     const tyml = tybasic(e.EV.E1.Ety);
4892     const sz = _tysize[tyml];
4893     if (tyfloating(tyml))
4894     {
4895         if (tycomplex(tyml))
4896         {
4897             neg_complex87(cdb, e, pretregs);
4898             return;
4899         }
4900         if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS)
4901         {
4902             xmmneg(cdb,e,pretregs);
4903             return;
4904         }
4905         if (config.inline8087 &&
4906             ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64))
4907             {
4908                 neg87(cdb,e,pretregs);
4909                 return;
4910             }
4911         regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
4912         codelem(cdb,e.EV.E1,&retregs,false);
4913         getregs(cdb,retregs);
4914         if (I32)
4915         {
4916             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
4917             cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit
4918         }
4919         else
4920         {
4921             const reg = (sz == 8) ? AX : findregmsw(retregs);
4922             cdb.genc2(0x81,modregrm(3,6,reg),0x8000);     // XOR AX,0x8000
4923         }
4924         fixresult(cdb,e,retregs,pretregs);
4925         return;
4926     }
4927 
4928     const uint isbyte = sz == 1;
4929     const possregs = (isbyte) ? BYTEREGS : allregs;
4930     regm_t retregs = *pretregs & possregs;
4931     if (retregs == 0)
4932         retregs = possregs;
4933     codelem(cdb,e.EV.E1,&retregs,false);
4934     getregs(cdb,retregs);                // retregs will be destroyed
4935     if (sz <= REGSIZE)
4936     {
4937         const reg = findreg(retregs);
4938         uint rex = (I64 && sz == 8) ? REX_W : 0;
4939         if (I64 && sz == 1 && reg >= 4)
4940             rex |= REX;
4941         cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg));   // NEG reg
4942         if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW)
4943             cdb.last().Iflags |= CFopsize | CFpsw;
4944         *pretregs &= mBP | ALLREGS;             // flags already set
4945     }
4946     else if (sz == 2 * REGSIZE)
4947     {
4948         const msreg = findregmsw(retregs);
4949         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
4950         const lsreg = findreglsw(retregs);
4951         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg
4952         code_orflag(cdb.last(), CFpsw);           // need flag result of previous NEG
4953         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
4954     }
4955     else
4956         assert(0);
4957     fixresult(cdb,e,retregs,pretregs);
4958 }
4959 
4960 
4961 /******************
4962  * Absolute value operator
4963  */
4964 
4965 
4966 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
4967 {
4968     //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4969     if (*pretregs == 0)
4970     {
4971         codelem(cdb,e.EV.E1,pretregs,false);
4972         return;
4973     }
4974     const tyml = tybasic(e.EV.E1.Ety);
4975     const sz = _tysize[tyml];
4976     const rex = (I64 && sz == 8) ? REX_W : 0;
4977     if (tyfloating(tyml))
4978     {
4979         if (tyxmmreg(tyml) && *pretregs & XMMREGS)
4980         {
4981             xmmabs(cdb,e,pretregs);
4982             return;
4983         }
4984         if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64))
4985         {
4986             neg87(cdb,e,pretregs);
4987             return;
4988         }
4989         regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS;
4990         codelem(cdb,e.EV.E1,&retregs,false);
4991         getregs(cdb,retregs);
4992         if (I32)
4993         {
4994             const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs);
4995             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit
4996         }
4997         else
4998         {
4999             const reg = (sz == 8) ? AX : findregmsw(retregs);
5000             cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF);     // AND AX,0x7FFF
5001         }
5002         fixresult(cdb,e,retregs,pretregs);
5003         return;
5004     }
5005 
5006     const uint isbyte = sz == 1;
5007     assert(isbyte == 0);
5008     regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs;
5009     if (!I16 && sz == REGSIZE)
5010         possregs = allregs;
5011     regm_t retregs = *pretregs & possregs;
5012     if (retregs == 0)
5013         retregs = possregs;
5014     codelem(cdb,e.EV.E1,&retregs,false);
5015     getregs(cdb,retregs);                // retregs will be destroyed
5016     if (sz <= REGSIZE)
5017     {
5018         /*      CWD
5019                 XOR     AX,DX
5020                 SUB     AX,DX
5021            or:
5022                 MOV     r,reg
5023                 SAR     r,63
5024                 XOR     reg,r
5025                 SUB     reg,r
5026          */
5027         reg_t reg;
5028         reg_t r;
5029 
5030         if (!I16 && sz == REGSIZE)
5031         {
5032             reg = findreg(retregs);
5033             r = allocScratchReg(cdb, allregs & ~retregs);
5034             getregs(cdb,retregs);
5035             genmovreg(cdb,r,reg);                     // MOV r,reg
5036             cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1);      // SAR r,31/63
5037             code_orrex(cdb.last(), rex);
5038         }
5039         else
5040         {
5041             reg = AX;
5042             r = DX;
5043             getregs(cdb,mDX);
5044             if (!I16 && sz == SHORTSIZE)
5045                 cdb.gen1(0x98);                         // CWDE
5046             cdb.gen1(0x99);                             // CWD
5047             code_orrex(cdb.last(), rex);
5048         }
5049         cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r
5050         cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r
5051         if (!I16 && sz == SHORTSIZE && *pretregs & mPSW)
5052             cdb.last().Iflags |= CFopsize | CFpsw;
5053         if (*pretregs & mPSW)
5054             cdb.last().Iflags |= CFpsw;
5055         *pretregs &= ~mPSW;                     // flags already set
5056     }
5057     else if (sz == 2 * REGSIZE)
5058     {
5059         /*      or      DX,DX
5060                 jns     L2
5061                 neg     DX
5062                 neg     AX
5063                 sbb     DX,0
5064             L2:
5065          */
5066 
5067         code *cnop = gennop(null);
5068         const msreg = findregmsw(retregs);
5069         const lsreg = findreglsw(retregs);
5070         genregs(cdb,0x09,msreg,msreg);            // OR msreg,msreg
5071         genjmp(cdb,JNS,FLcode,cast(block *)cnop);
5072         cdb.gen2(0xF7,modregrm(3,3,msreg));       // NEG msreg
5073         cdb.gen2(0xF7,modregrm(3,3,lsreg));       // NEG lsreg+1
5074         cdb.genc2(0x81,modregrm(3,3,msreg),0);    // SBB msreg,0
5075         cdb.append(cnop);
5076     }
5077     else
5078         assert(0);
5079     fixresult(cdb,e,retregs,pretregs);
5080 }
5081 
5082 /**************************
5083  * Post increment and post decrement.
5084  */
5085 
5086 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5087 {
5088     //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs));
5089     code cs = void;
5090     const op = e.Eoper;                      // OPxxxx
5091     if (*pretregs == 0)                        // if nothing to return
5092     {
5093         cdaddass(cdb,e,pretregs);
5094         return;
5095     }
5096     const tym_t tyml = tybasic(e.EV.E1.Ety);
5097     const sz = _tysize[tyml];
5098     elem *e2 = e.EV.E2;
5099     const rex = (I64 && sz == 8) ? REX_W : 0;
5100 
5101     if (tyfloating(tyml))
5102     {
5103         if (config.fpxmmregs && tyxmmreg(tyml) &&
5104             !tycomplex(tyml) // SIMD code is not set up to deal with complex
5105            )
5106         {
5107             xmmpost(cdb,e,pretregs);
5108             return;
5109         }
5110 
5111         if (config.inline8087)
5112         {
5113             post87(cdb,e,pretregs);
5114             return;
5115         }
5116 static if (TARGET_WINDOS)
5117 {
5118         assert(sz <= 8);
5119         getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS);
5120         freenode(e.EV.E1);
5121         regm_t idxregs = idxregm(&cs);  // mask of index regs used
5122         cs.Iop = 0x8B;                  /* MOV DOUBLEREGS,EA            */
5123         fltregs(cdb,&cs,tyml);
5124         stackchanged = 1;
5125         int stackpushsave = stackpush;
5126         regm_t retregs;
5127         if (sz == 8)
5128         {
5129             if (I32)
5130             {
5131                 cdb.gen1(0x50 + DX);             // PUSH DOUBLEREGS
5132                 cdb.gen1(0x50 + AX);
5133                 stackpush += DOUBLESIZE;
5134                 retregs = DOUBLEREGS2_32;
5135             }
5136             else
5137             {
5138                 cdb.gen1(0x50 + AX);
5139                 cdb.gen1(0x50 + BX);
5140                 cdb.gen1(0x50 + CX);
5141                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
5142                 stackpush += DOUBLESIZE + DOUBLESIZE;
5143 
5144                 cdb.gen1(0x50 + AX);
5145                 cdb.gen1(0x50 + BX);
5146                 cdb.gen1(0x50 + CX);
5147                 cdb.gen1(0x50 + DX);             /* PUSH DOUBLEREGS      */
5148                 retregs = DOUBLEREGS_16;
5149             }
5150         }
5151         else
5152         {
5153             stackpush += FLOATSIZE;     /* so we know something is on   */
5154             if (!I32)
5155                 cdb.gen1(0x50 + DX);
5156             cdb.gen1(0x50 + AX);
5157             retregs = FLOATREGS2;
5158         }
5159         cdb.genadjesp(stackpush - stackpushsave);
5160 
5161         cgstate.stackclean++;
5162         scodelem(cdb,e2,&retregs,idxregs,false);
5163         cgstate.stackclean--;
5164 
5165         if (tyml == TYdouble || tyml == TYdouble_alias)
5166         {
5167             retregs = DOUBLEREGS;
5168             callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub,
5169                     &retregs,idxregs);
5170         }
5171         else /* tyml == TYfloat */
5172         {
5173             retregs = FLOATREGS;
5174             callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub,
5175                     &retregs,idxregs);
5176         }
5177         cs.Iop = 0x89;                  /* MOV EA,DOUBLEREGS            */
5178         fltregs(cdb,&cs,tyml);
5179         stackpushsave = stackpush;
5180         if (tyml == TYdouble || tyml == TYdouble_alias)
5181         {   if (*pretregs == mSTACK)
5182                 retregs = mSTACK;       /* leave result on stack        */
5183             else
5184             {
5185                 if (I32)
5186                 {
5187                     cdb.gen1(0x58 + AX);
5188                     cdb.gen1(0x58 + DX);
5189                 }
5190                 else
5191                 {
5192                     cdb.gen1(0x58 + DX);
5193                     cdb.gen1(0x58 + CX);
5194                     cdb.gen1(0x58 + BX);
5195                     cdb.gen1(0x58 + AX);
5196                 }
5197                 stackpush -= DOUBLESIZE;
5198                 retregs = DOUBLEREGS;
5199             }
5200         }
5201         else
5202         {
5203             cdb.gen1(0x58 + AX);
5204             if (!I32)
5205                 cdb.gen1(0x58 + DX);
5206             stackpush -= FLOATSIZE;
5207             retregs = FLOATREGS;
5208         }
5209         cdb.genadjesp(stackpush - stackpushsave);
5210         fixresult(cdb,e,retregs,pretregs);
5211         return;
5212 }
5213     }
5214     if (tyxmmreg(tyml))
5215     {
5216         xmmpost(cdb,e,pretregs);
5217         return;
5218     }
5219 
5220     assert(e2.Eoper == OPconst);
5221     uint isbyte = (sz == 1);
5222     regm_t possregs = isbyte ? BYTEREGS : allregs;
5223     getlvalue(cdb,&cs,e.EV.E1,0);
5224     freenode(e.EV.E1);
5225     regm_t idxregs = idxregm(&cs);       // mask of index regs used
5226     if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 &&
5227         (!I16 || (idxregs & (mBX | mSI | mDI | mBP))))
5228     {
5229         // Generate:
5230         //      TEST    reg,reg
5231         //      LEA     reg,n[reg]      // don't affect flags
5232         reg_t reg = cs.Irm & 7;
5233         if (cs.Irex & REX_B)
5234             reg |= 8;
5235         cs.Iop = 0x85 ^ isbyte;
5236         code_newreg(&cs, reg);
5237         cs.Iflags |= CFpsw;
5238         cdb.gen(&cs);             // TEST reg,reg
5239 
5240         // If lvalue is a register variable, we must mark it as modified
5241         modEA(cdb,&cs);
5242 
5243         auto n = e2.EV.Vint;
5244         if (op == OPpostdec)
5245             n = -n;
5246         int rm = reg;
5247         if (I16)
5248         {
5249             static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c
5250             rm = regtorm[reg];
5251         }
5252         cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg]
5253         return;
5254     }
5255     else if (sz <= REGSIZE || tyfv(tyml))
5256     {
5257         code cs2 = void;
5258 
5259         cs.Iop = 0x8B ^ isbyte;
5260         regm_t retregs = possregs & ~idxregs & *pretregs;
5261         if (!tyfv(tyml))
5262         {
5263             if (retregs == 0)
5264                 retregs = possregs & ~idxregs;
5265         }
5266         else /* tyfv(tyml) */
5267         {
5268             if ((retregs &= mLSW) == 0)
5269                 retregs = mLSW & ~idxregs;
5270             /* Can't use LES if the EA uses ES as a seg override    */
5271             if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5272             {   cs.Iop = 0xC4;                      /* LES          */
5273                 getregs(cdb,mES);           // allocate ES
5274             }
5275         }
5276         reg_t reg;
5277         allocreg(cdb,&retregs,&reg,TYint);
5278         code_newreg(&cs, reg);
5279         if (sz == 1 && I64 && reg >= 4)
5280             cs.Irex |= REX;
5281         cdb.gen(&cs);                     // MOV reg,EA
5282         cs2 = cs;
5283 
5284         /* If lvalue is a register variable, we must mark it as modified */
5285         modEA(cdb,&cs);
5286 
5287         cs.Iop = 0x81 ^ isbyte;
5288         cs.Irm &= ~cast(int)modregrm(0,7,0);             // reg field = 0
5289         cs.Irex &= ~REX_R;
5290         if (op == OPpostdec)
5291             cs.Irm |= modregrm(0,5,0);  /* SUB                  */
5292         cs.IFL2 = FLconst;
5293         targ_int n = e2.EV.Vint;
5294         cs.IEV2.Vint = n;
5295         if (n == 1)                     /* can use INC or DEC           */
5296         {
5297             cs.Iop |= 0xFE;             /* xFE is dec byte, xFF is word */
5298             if (op == OPpostdec)
5299                 NEWREG(cs.Irm,1);       // DEC EA
5300             else
5301                 NEWREG(cs.Irm,0);       // INC EA
5302         }
5303         else if (n == -1)               // can use INC or DEC
5304         {
5305             cs.Iop |= 0xFE;             // xFE is dec byte, xFF is word
5306             if (op == OPpostinc)
5307                 NEWREG(cs.Irm,1);       // DEC EA
5308             else
5309                 NEWREG(cs.Irm,0);       // INC EA
5310         }
5311 
5312         // For scheduling purposes, we wish to replace:
5313         //      MOV     reg,EA
5314         //      OP      EA
5315         // with:
5316         //      MOV     reg,EA
5317         //      OP      reg
5318         //      MOV     EA,reg
5319         //      ~OP     reg
5320         if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
5321             config.target_cpu >= TARGET_Pentium &&
5322             config.flags4 & CFG4speed)
5323         {
5324             // Replace EA in cs with reg
5325             cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7);
5326             if (reg & 8)
5327             {   cs.Irex &= ~REX_R;
5328                 cs.Irex |= REX_B;
5329             }
5330             else
5331                 cs.Irex &= ~REX_B;
5332             if (I64 && sz == 1 && reg >= 4)
5333                 cs.Irex |= REX;
5334             cdb.gen(&cs);                        // ADD/SUB reg,const
5335 
5336             // Reverse MOV direction
5337             cs2.Iop ^= 2;
5338             cdb.gen(&cs2);                       // MOV EA,reg
5339 
5340             // Toggle INC <. DEC, ADD <. SUB
5341             cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0);
5342             cdb.gen(&cs);
5343 
5344             if (*pretregs & mPSW)
5345             {   *pretregs &= ~mPSW;              // flags already set
5346                 code_orflag(cdb.last(),CFpsw);
5347             }
5348         }
5349         else
5350             cdb.gen(&cs);                        // ADD/SUB EA,const
5351 
5352         freenode(e2);
5353         if (tyfv(tyml))
5354         {
5355             reg_t preg;
5356 
5357             getlvalue_msw(&cs);
5358             if (*pretregs & mES)
5359             {
5360                 preg = ES;
5361                 /* ES is already loaded if CFes is 0            */
5362                 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP;
5363                 NEWREG(cs.Irm,0);       /* MOV ES,EA+2          */
5364             }
5365             else
5366             {
5367                 regm_t retregsx = *pretregs & mMSW;
5368                 if (!retregsx)
5369                     retregsx = mMSW;
5370                 allocreg(cdb,&retregsx,&preg,TYint);
5371                 cs.Iop = 0x8B;
5372                 if (I32)
5373                     cs.Iflags |= CFopsize;
5374                 NEWREG(cs.Irm,preg);    /* MOV preg,EA+2        */
5375             }
5376             getregs(cdb,mask(preg));
5377             cdb.gen(&cs);
5378             retregs = mask(reg) | mask(preg);
5379         }
5380         fixresult(cdb,e,retregs,pretregs);
5381         return;
5382     }
5383     else if (tyml == TYhptr)
5384     {
5385         uint rvalue;
5386         reg_t lreg;
5387         reg_t rtmp;
5388         regm_t mtmp;
5389 
5390         rvalue = e2.EV.Vlong;
5391         freenode(e2);
5392 
5393         // If h--, convert to h++
5394         if (e.Eoper == OPpostdec)
5395             rvalue = -rvalue;
5396 
5397         regm_t retregs = mLSW & ~idxregs & *pretregs;
5398         if (!retregs)
5399             retregs = mLSW & ~idxregs;
5400         allocreg(cdb,&retregs,&lreg,TYint);
5401 
5402         // Can't use LES if the EA uses ES as a seg override
5403         if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes)
5404         {   cs.Iop = 0xC4;
5405             retregs |= mES;
5406             getregs(cdb,mES|mCX);       // allocate ES
5407             cs.Irm |= modregrm(0,lreg,0);
5408             cdb.gen(&cs);                       // LES lreg,EA
5409         }
5410         else
5411         {   cs.Iop = 0x8B;
5412             retregs |= mDX;
5413             getregs(cdb,mDX|mCX);
5414             cs.Irm |= modregrm(0,lreg,0);
5415             cdb.gen(&cs);                       // MOV lreg,EA
5416             NEWREG(cs.Irm,DX);
5417             getlvalue_msw(&cs);
5418             cdb.gen(&cs);                       // MOV DX,EA+2
5419             getlvalue_lsw(&cs);
5420         }
5421 
5422         // Allocate temporary register, rtmp
5423         mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs;
5424         allocreg(cdb,&mtmp,&rtmp,TYint);
5425 
5426         movregconst(cdb,rtmp,rvalue >> 16,0);   // MOV rtmp,e2+2
5427         getregs(cdb,mtmp);
5428         cs.Iop = 0x81;
5429         NEWREG(cs.Irm,0);
5430         cs.IFL2 = FLconst;
5431         cs.IEV2.Vint = rvalue;
5432         cdb.gen(&cs);                           // ADD EA,e2
5433         code_orflag(cdb.last(),CFpsw);
5434         cdb.genc2(0x81,modregrm(3,2,rtmp),0);   // ADC rtmp,0
5435         genshift(cdb);                          // MOV CX,offset __AHSHIFT
5436         cdb.gen2(0xD3,modregrm(3,4,rtmp));      // SHL rtmp,CL
5437         cs.Iop = 0x01;
5438         NEWREG(cs.Irm,rtmp);                    // ADD EA+2,rtmp
5439         getlvalue_msw(&cs);
5440         cdb.gen(&cs);
5441         fixresult(cdb,e,retregs,pretregs);
5442         return;
5443     }
5444     else if (sz == 2 * REGSIZE)
5445     {
5446         regm_t retregs = allregs & ~idxregs & *pretregs;
5447         if ((retregs & mLSW) == 0)
5448                 retregs |= mLSW & ~idxregs;
5449         if ((retregs & mMSW) == 0)
5450                 retregs |= ALLREGS & mMSW;
5451         assert(retregs & mMSW && retregs & mLSW);
5452         reg_t reg;
5453         allocreg(cdb,&retregs,&reg,tyml);
5454         uint sreg = findreglsw(retregs);
5455         cs.Iop = 0x8B;
5456         cs.Irm |= modregrm(0,sreg,0);
5457         cdb.gen(&cs);                   // MOV sreg,EA
5458         NEWREG(cs.Irm,reg);
5459         getlvalue_msw(&cs);
5460         cdb.gen(&cs);                   // MOV reg,EA+2
5461         cs.Iop = 0x81;
5462         cs.Irm &= ~cast(int)modregrm(0,7,0);     /* reg field = 0 for ADD        */
5463         if (op == OPpostdec)
5464             cs.Irm |= modregrm(0,5,0);  /* SUB                          */
5465         getlvalue_lsw(&cs);
5466         cs.IFL2 = FLconst;
5467         cs.IEV2.Vlong = e2.EV.Vlong;
5468         cdb.gen(&cs);                   // ADD/SUB EA,const
5469         code_orflag(cdb.last(),CFpsw);
5470         getlvalue_msw(&cs);
5471         cs.IEV2.Vlong = 0;
5472         if (op == OPpostinc)
5473             cs.Irm ^= modregrm(0,2,0);  /* ADC                          */
5474         else
5475             cs.Irm ^= modregrm(0,6,0);  /* SBB                          */
5476         cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8));
5477         cdb.gen(&cs);                   // ADC/SBB EA,0
5478         freenode(e2);
5479         fixresult(cdb,e,retregs,pretregs);
5480         return;
5481     }
5482     else
5483     {
5484         assert(0);
5485     }
5486 }
5487 
5488 
5489 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5490 {
5491     debug
5492         elem_print(e);
5493 
5494     //printf("op = %d, %d\n", e.Eoper, OPstring);
5495     //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen);
5496     //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring);
5497     assert(0);
5498 }
5499 
5500 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5501 {
5502     switch (e.EV.E1.Eoper)
5503     {
5504 version (MARS)
5505 {
5506         case OPdctor:
5507             codelem(cdb,e.EV.E2,pretregs,false);
5508             regm_t retregs = 0;
5509             codelem(cdb,e.EV.E1,&retregs,false);
5510             break;
5511 }
5512 version (SCPP)
5513 {
5514         case OPdtor:
5515             cdcomma(cdb,e,pretregs);
5516             break;
5517         case OPctor:
5518             codelem(cdb,e.EV.E2,pretregs,false);
5519             regm_t retregs = 0;
5520             codelem(cdb,e.EV.E1,&retregs,false);
5521             break;
5522         case OPmark:
5523             if (0 && config.exe == EX_WIN32)
5524             {
5525                 const idx = except_index_get();
5526                 except_mark();
5527                 codelem(cdb,e.EV.E2,pretregs,false);
5528                 if (config.exe == EX_WIN32 && idx != except_index_get())
5529                 {   usednteh |= NTEHcleanup;
5530                     nteh_gensindex(cdb,idx - 1);
5531                 }
5532                 except_release();
5533                 assert(idx == except_index_get());
5534             }
5535             else
5536             {
5537                 code cs = void;
5538                 cs.Iop = ESCAPE | ESCmark;
5539                 cs.Iflags = 0;
5540                 cs.Irex = 0;
5541                 cdb.gen(&cs);
5542                 codelem(cdb,e.EV.E2,pretregs,false);
5543                 cs.Iop = ESCAPE | ESCrelease;
5544                 cdb.gen(&cs);
5545             }
5546             freenode(e.EV.E1);
5547             break;
5548 }
5549         default:
5550             assert(0);
5551     }
5552 }
5553 
5554 /*******************************************
5555  * D constructor.
5556  */
5557 
5558 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5559 {
5560     /* Generate:
5561         ESCAPE | ESCdctor
5562         MOV     sindex[BP],index
5563      */
5564     usednteh |= EHcleanup;
5565     if (config.ehmethod == EHmethod.EH_WIN32)
5566     {   usednteh |= NTEHcleanup | NTEH_try;
5567         nteh_usevars();
5568     }
5569     assert(*pretregs == 0);
5570     code cs;
5571     cs.Iop = ESCAPE | ESCdctor;         // mark start of EH range
5572     cs.Iflags = 0;
5573     cs.Irex = 0;
5574     cs.IFL1 = FLctor;
5575     cs.IEV1.Vtor = e;
5576     cdb.gen(&cs);
5577     nteh_gensindex(cdb,0);              // the actual index will be patched in later
5578                                         // by except_fillInEHTable()
5579 }
5580 
5581 /*******************************************
5582  * D destructor.
5583  */
5584 
5585 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5586 {
5587     if (config.ehmethod == EHmethod.EH_DWARF)
5588     {
5589         usednteh |= EHcleanup;
5590 
5591         code cs;
5592         cs.Iop = ESCAPE | ESCddtor;     // mark end of EH range and where landing pad is
5593         cs.Iflags = 0;
5594         cs.Irex = 0;
5595         cs.IFL1 = FLdtor;
5596         cs.IEV1.Vtor = e;
5597         cdb.gen(&cs);
5598 
5599         // Mark all registers as destroyed
5600         getregsNoSave(allregs);
5601 
5602         assert(*pretregs == 0);
5603         codelem(cdb,e.EV.E1,pretregs,false);
5604         return;
5605     }
5606     else
5607     {
5608         /* Generate:
5609             ESCAPE | ESCddtor
5610             MOV     sindex[BP],index
5611             CALL    dtor
5612             JMP     L1
5613         Ldtor:
5614             ... e.EV.E1 ...
5615             RET
5616         L1: NOP
5617         */
5618         usednteh |= EHcleanup;
5619         if (config.ehmethod == EHmethod.EH_WIN32)
5620         {   usednteh |= NTEHcleanup | NTEH_try;
5621             nteh_usevars();
5622         }
5623 
5624         code cs;
5625         cs.Iop = ESCAPE | ESCddtor;
5626         cs.Iflags = 0;
5627         cs.Irex = 0;
5628         cs.IFL1 = FLdtor;
5629         cs.IEV1.Vtor = e;
5630         cdb.gen(&cs);
5631 
5632         nteh_gensindex(cdb,0);              // the actual index will be patched in later
5633                                             // by except_fillInEHTable()
5634 
5635         // Mark all registers as destroyed
5636         getregsNoSave(allregs);
5637 
5638         assert(*pretregs == 0);
5639         CodeBuilder cdbx;
5640         cdbx.ctor();
5641         codelem(cdbx,e.EV.E1,pretregs,false);
5642         cdbx.gen1(0xC3);                      // RET
5643         code *c = cdbx.finish();
5644 
5645         int nalign = 0;
5646         if (STACKALIGN >= 16)
5647         {
5648             nalign = STACKALIGN - REGSIZE;
5649             cod3_stackadj(cdb, nalign);
5650         }
5651         calledafunc = 1;
5652         genjmp(cdb,0xE8,FLcode,cast(block *)c);   // CALL Ldtor
5653         if (nalign)
5654             cod3_stackadj(cdb, -nalign);
5655 
5656         code *cnop = gennop(null);
5657 
5658         genjmp(cdb,JMP,FLcode,cast(block *)cnop);
5659         cdb.append(cdbx);
5660         cdb.append(cnop);
5661         return;
5662     }
5663 }
5664 
5665 
5666 /*******************************************
5667  * C++ constructor.
5668  */
5669 
5670 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5671 {
5672 version (SCPP)
5673 {
5674     usednteh |= EHcleanup;
5675     if (config.exe == EX_WIN32)
5676         usednteh |= NTEHcleanup;
5677     assert(*pretregs == 0);
5678 
5679     code cs = void;
5680     cs.Iop = ESCAPE | ESCctor;
5681     cs.Iflags = 0;
5682     cs.Irex = 0;
5683     cs.IFL1 = FLctor;
5684     cs.IEV1.Vtor = e;
5685     cdb.gen(&cs);
5686 }
5687 }
5688 
5689 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5690 {
5691 version (SCPP)
5692 {
5693     usednteh |= EHcleanup;
5694     if (config.exe == EX_WIN32)
5695         usednteh |= NTEHcleanup;
5696     assert(*pretregs == 0);
5697 
5698     code cs = void;
5699     cs.Iop = ESCAPE | ESCdtor;
5700     cs.Iflags = 0;
5701     cs.Irex = 0;
5702     cs.IFL1 = FLdtor;
5703     cs.IEV1.Vtor = e;
5704     cdb.gen(&cs);
5705 }
5706 }
5707 
5708 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5709 {
5710 }
5711 
5712 static if (!NTEXCEPTIONS)
5713 {
5714 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5715 {
5716     assert(0);
5717 }
5718 }
5719 
5720 /*****************************************
5721  */
5722 
5723 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5724 {
5725     assert(*pretregs == 0);
5726     codelem(cdb,e.EV.E1,pretregs,false);
5727 }
5728 
5729 /*****************************************
5730  */
5731 
5732 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
5733 {
5734     assert(*pretregs == 0);
5735     cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3);
5736 }
5737 
5738 }