1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Mostly code generation for assignment operators.
6  *
7  * Copyright:   Copyright (C) 1985-1998 by Symantec
8  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
9  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
10  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
11  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d)
12  * Documentation:  https://dlang.org/phobos/dmd_backend_cod4.html
13  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod4.d
14  */
15 
16 module dmd.backend.cod4;
17 
18 version (SCPP)
19     version = COMPILE;
20 version (MARS)
21     version = COMPILE;
22 
23 version (COMPILE)
24 {
25 
26 import core.stdc.stdio;
27 import core.stdc.stdlib;
28 import core.stdc.string;
29 
30 import dmd.backend.cc;
31 import dmd.backend.cdef;
32 import dmd.backend.code;
33 import dmd.backend.code_x86;
34 import dmd.backend.codebuilder;
35 import dmd.backend.mem;
36 import dmd.backend.el;
37 import dmd.backend.global;
38 import dmd.backend.oper;
39 import dmd.backend.ty;
40 import dmd.backend.evalu8 : el_toldoubled;
41 import dmd.backend.xmm;
42 
43 extern (C++):
44 
45 nothrow:
46 
47 int REGSIZE();
48 
49 extern __gshared CGstate cgstate;
50 extern __gshared bool[FLMAX] datafl;
51 
52 private extern (D) uint mask(uint m) { return 1 << m; }
53 
54                         /*   AX,CX,DX,BX                */
55 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ];
56 
57 // from divcoeff.c
58 extern (C)
59 {
60     bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost);
61     bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost);
62 }
63 
64 /*******************************
65  * Return number of times symbol s appears in tree e.
66  */
67 
68 private int intree(Symbol *s,elem *e)
69 {
70     if (!OTleaf(e.Eoper))
71         return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0);
72     return e.Eoper == OPvar && e.EV.Vsym == s;
73 }
74 
75 /***********************************
76  * Determine if expression e can be evaluated directly into register
77  * variable s.
78  * Have to be careful about things like x=x+x+x, and x=a+x.
79  * Returns:
80  *      !=0     can
81  *      0       can't
82  */
83 
84 int doinreg(Symbol *s, elem *e)
85 {
86     int in_ = 0;
87     OPER op;
88 
89  L1:
90     op = e.Eoper;
91     if (op == OPind ||
92         OTcall(op)  ||
93         OTleaf(op) ||
94         (in_ = intree(s,e)) == 0 ||
95         (OTunary(op) && OTleaf(e.EV.E1.Eoper))
96        )
97         return 1;
98     if (in_ == 1)
99     {
100         switch (op)
101         {
102             case OPadd:
103             case OPmin:
104             case OPand:
105             case OPor:
106             case OPxor:
107             case OPshl:
108             case OPmul:
109                 if (!intree(s,e.EV.E2))
110                 {
111                     e = e.EV.E1;
112                     goto L1;
113                 }
114                 break;
115 
116             default:
117                 break;
118         }
119     }
120     return 0;
121 }
122 
123 /****************************
124  * Return code for saving common subexpressions if EA
125  * turns out to be a register.
126  * This is called just before modifying an EA.
127  */
128 
129 void modEA(ref CodeBuilder cdb,code *c)
130 {
131     if ((c.Irm & 0xC0) == 0xC0)        // addressing mode refers to a register
132     {
133         reg_t reg = c.Irm & 7;
134         if (c.Irex & REX_B)
135         {   reg |= 8;
136             assert(I64);
137         }
138         getregs(cdb,mask(reg));
139     }
140 }
141 
142 static if (TARGET_WINDOS)
143 {
144 // This code is for CPUs that do not support the 8087
145 
146 /****************************
147  * Gen code for op= for doubles.
148  */
149 
150 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op)
151 {
152     static immutable uint[OPdivass - OPpostinc + 1] clibtab =
153     /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass       */
154     [  CLIB.dadd, CLIB.dsub, cast(uint)-1,  CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ];
155 
156     if (config.inline8087)
157     {
158         opass87(cdb,e,pretregs);
159         return;
160     }
161 
162     code cs;
163     regm_t retregs2,retregs,idxregs;
164 
165     uint clib = clibtab[op - OPpostinc];
166     elem *e1 = e.EV.E1;
167     tym_t tym = tybasic(e1.Ety);
168     getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX);
169 
170     if (tym == TYfloat)
171     {
172         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
173 
174         // Load EA into FLOATREGS
175         getregs(cdb,FLOATREGS);
176         cs.Iop = LOD;
177         cs.Irm |= modregrm(0,AX,0);
178         cdb.gen(&cs);
179 
180         if (!I32)
181         {
182             cs.Irm |= modregrm(0,DX,0);
183             getlvalue_msw(&cs);
184             cdb.gen(&cs);
185             getlvalue_lsw(&cs);
186 
187         }
188         retregs2 = FLOATREGS2;
189         idxregs = FLOATREGS | idxregm(&cs);
190         retregs = FLOATREGS;
191     }
192     else
193     {
194         if (I32)
195         {
196             // Load EA into DOUBLEREGS
197             getregs(cdb,DOUBLEREGS_32);
198             cs.Iop = LOD;
199             cs.Irm |= modregrm(0,AX,0);
200             cdb.gen(&cs);
201             cs.Irm |= modregrm(0,DX,0);
202             getlvalue_msw(&cs);
203             cdb.gen(&cs);
204             getlvalue_lsw(&cs);
205 
206             retregs2 = DOUBLEREGS2_32;
207             idxregs = DOUBLEREGS_32 | idxregm(&cs);
208         }
209         else
210         {
211             // Push EA onto stack
212             cs.Iop = 0xFF;
213             cs.Irm |= modregrm(0,6,0);
214             cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
215             cdb.gen(&cs);
216             getlvalue_lsw(&cs);
217             cdb.gen(&cs);
218             getlvalue_lsw(&cs);
219             cdb.gen(&cs);
220             getlvalue_lsw(&cs);
221             cdb.gen(&cs);
222             stackpush += DOUBLESIZE;
223 
224             retregs2 = DOUBLEREGS_16;
225             idxregs = idxregm(&cs);
226         }
227         retregs = DOUBLEREGS;
228     }
229 
230     if ((cs.Iflags & CFSEG) == CFes)
231         idxregs |= mES;
232     cgstate.stackclean++;
233     scodelem(cdb,e.EV.E2,&retregs2,idxregs,false);
234     cgstate.stackclean--;
235     callclib(cdb,e,clib,&retregs,0);
236     if (e1.Ecount)
237         cssave(e1,retregs,!OTleaf(e1.Eoper));             // if lvalue is a CSE
238     freenode(e1);
239     cs.Iop = STO;                              // MOV EA,DOUBLEREGS
240     fltregs(cdb,&cs,tym);
241     fixresult(cdb,e,retregs,pretregs);
242 }
243 
244 /****************************
245  * Gen code for OPnegass for doubles.
246  */
247 
248 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
249 {
250     if (config.inline8087)
251     {
252         cdnegass87(cdb,e,pretregs);
253         return;
254     }
255     elem *e1 = e.EV.E1;
256     tym_t tym = tybasic(e1.Ety);
257     int sz = _tysize[tym];
258     code cs;
259 
260     getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0);
261     modEA(cdb,&cs);
262     cs.Irm |= modregrm(0,6,0);
263     cs.Iop = 0x80;
264     cs.IEV1.Voffset += sz - 1;
265     cs.IFL2 = FLconst;
266     cs.IEV2.Vuns = 0x80;
267     cdb.gen(&cs);                       // XOR 7[EA],0x80
268     if (tycomplex(tym))
269     {
270         cs.IEV1.Voffset -= sz / 2;
271         cdb.gen(&cs);                   // XOR 7[EA],0x80
272     }
273 
274     regm_t retregs;
275     if (*pretregs || e1.Ecount)
276     {
277         cs.IEV1.Voffset -= sz - 1;
278 
279         if (tym == TYfloat)
280         {
281             // Load EA into FLOATREGS
282             getregs(cdb,FLOATREGS);
283             cs.Iop = LOD;
284             NEWREG(cs.Irm, AX);
285             cdb.gen(&cs);
286 
287             if (!I32)
288             {
289                 NEWREG(cs.Irm, DX);
290                 getlvalue_msw(&cs);
291                 cdb.gen(&cs);
292                 getlvalue_lsw(&cs);
293 
294             }
295             retregs = FLOATREGS;
296         }
297         else
298         {
299             if (I32)
300             {
301                 // Load EA into DOUBLEREGS
302                 getregs(cdb,DOUBLEREGS_32);
303                 cs.Iop = LOD;
304                 cs.Irm &= ~cast(uint)modregrm(0,7,0);
305                 cs.Irm |= modregrm(0,AX,0);
306                 cdb.gen(&cs);
307                 cs.Irm |= modregrm(0,DX,0);
308                 getlvalue_msw(&cs);
309                 cdb.gen(&cs);
310                 getlvalue_lsw(&cs);
311             }
312             else
313             {
314                 static if (1)
315                 {
316                     cs.Iop = LOD;
317                     fltregs(cdb,&cs,TYdouble);     // MOV DOUBLEREGS, EA
318                 }
319                 else
320                 {
321                     // Push EA onto stack
322                     cs.Iop = 0xFF;
323                     cs.Irm |= modregrm(0,6,0);
324                     cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
325                     cdb.gen(&cs);
326                     cs.IEV1.Voffset -= REGSIZE;
327                     cdb.gen(&cs);
328                     cs.IEV1.Voffset -= REGSIZE;
329                     cdb.gen(&cs);
330                     cs.IEV1.Voffset -= REGSIZE;
331                     cdb.gen(&cs);
332                     stackpush += DOUBLESIZE;
333                 }
334             }
335             retregs = DOUBLEREGS;
336         }
337         if (e1.Ecount)
338             cssave(e1,retregs,!OTleaf(e1.Eoper));         /* if lvalue is a CSE   */
339     }
340     else
341     {
342         retregs = 0;
343         assert(e1.Ecount == 0);
344     }
345 
346     freenode(e1);
347     fixresult(cdb,e,retregs,pretregs);
348 }
349 }
350 
351 
352 
353 /************************
354  * Generate code for an assignment.
355  */
356 
357 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
358 {
359     tym_t tymll;
360     reg_t reg;
361     code cs;
362     elem *e11;
363     bool regvar;                  // true means evaluate into register variable
364     regm_t varregm;
365     reg_t varreg;
366     targ_int postinc;
367 
368     //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
369     elem *e1 = e.EV.E1;
370     elem *e2 = e.EV.E2;
371     int e2oper = e2.Eoper;
372     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
373     regm_t retregs = *pretregs;
374 
375     if (tyxmmreg(tyml) && config.fpxmmregs)
376     {
377         xmmeq(cdb, e, CMP, e1, e2, pretregs);
378         return;
379     }
380 
381     if (tyfloating(tyml) && config.inline8087)
382     {
383         if (tycomplex(tyml))
384         {
385             complex_eq87(cdb, e, pretregs);
386             return;
387         }
388 
389         if (!(retregs == 0 &&
390               (e2oper == OPconst || e2oper == OPvar || e2oper == OPind))
391            )
392         {
393             eq87(cdb,e,pretregs);
394             return;
395         }
396         if (config.target_cpu >= TARGET_PentiumPro &&
397             (e2oper == OPvar || e2oper == OPind)
398            )
399         {
400             eq87(cdb,e,pretregs);
401             return;
402         }
403         if (tyml == TYldouble || tyml == TYildouble)
404         {
405             eq87(cdb,e,pretregs);
406             return;
407         }
408     }
409 
410     uint sz = _tysize[tyml];           // # of bytes to transfer
411     assert(cast(int)sz > 0);
412 
413     if (retregs == 0)                     // if no return value
414     {
415         int fl;
416 
417         /* If registers are tight, and we might need them for the lvalue,
418          * prefer to not use them for the rvalue
419          */
420         bool plenty = true;
421         if (e1.Eoper == OPind)
422         {
423             /* Will need 1 register for evaluation, +2 registers for
424              * e1's addressing mode
425              */
426             regm_t m = allregs & ~regcon.mvar;  // mask of non-register variables
427             m &= m - 1;         // clear least significant bit
428             m &= m - 1;         // clear least significant bit
429             plenty = m != 0;    // at least 3 registers
430         }
431 
432         if ((e2oper == OPconst ||       // if rvalue is a constant
433              e2oper == OPrelconst &&
434              !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) &&
435              ((fl = el_fl(e2)) == FLdata ||
436               fl==FLudata || fl == FLextern)
437               && !(e2.EV.Vsym.ty() & mTYcs)
438             ) &&
439             !(evalinregister(e2) && plenty) &&
440             !e1.Ecount)        // and no CSE headaches
441         {
442             // Look for special case of (*p++ = ...), where p is a register variable
443             if (e1.Eoper == OPind &&
444                 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
445                 e11.EV.E1.Eoper == OPvar &&
446                 e11.EV.E1.EV.Vsym.Sfl == FLreg &&
447                 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
448                )
449             {
450                 Symbol *s = e11.EV.E1.EV.Vsym;
451                 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
452                 {
453                     regcon.params &= ~s.Spregm();
454                 }
455                 postinc = e11.EV.E2.EV.Vint;
456                 if (e11.Eoper == OPpostdec)
457                     postinc = -postinc;
458                 getlvalue(cdb,&cs,e1,RMstore);
459                 freenode(e11.EV.E2);
460             }
461             else
462             {
463                 postinc = 0;
464                 getlvalue(cdb,&cs,e1,RMstore);
465 
466                 if (e2oper == OPconst &&
467                     config.flags4 & CFG4speed &&
468                     (config.target_cpu == TARGET_Pentium ||
469                      config.target_cpu == TARGET_PentiumMMX) &&
470                     (cs.Irm & 0xC0) == 0x80
471                    )
472                 {
473                     if (I64 && sz == 8 && e2.EV.Vpointer)
474                     {
475                         // MOV reg,imm64
476                         // MOV EA,reg
477                         regm_t rregm = allregs & ~idxregm(&cs);
478                         reg_t regx;
479                         regwithvalue(cdb,rregm,e2.EV.Vpointer,&regx,64);
480                         cs.Iop = STO;
481                         cs.Irm |= modregrm(0,regx & 7,0);
482                         if (regx & 8)
483                             cs.Irex |= REX_R;
484                         cdb.gen(&cs);
485                         freenode(e2);
486                         goto Lp;
487                     }
488                     if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint)
489                     {
490                         // MOV reg,imm
491                         // MOV EA,reg
492                         regm_t rregm = allregs & ~idxregm(&cs);
493                         reg_t regx;
494                         regwithvalue(cdb,rregm,e2.EV.Vint,&regx,0);
495                         cs.Iop = STO;
496                         cs.Irm |= modregrm(0,regx & 7,0);
497                         if (regx & 8)
498                             cs.Irex |= REX_R;
499                         cdb.gen(&cs);
500                         freenode(e2);
501                         goto Lp;
502                     }
503                     if (sz == 2 * REGSIZE && e2.EV.Vllong == 0)
504                     {
505                         // MOV reg,imm
506                         // MOV EA,reg
507                         // MOV EA+2,reg
508                         regm_t rregm = getscratch() & ~idxregm(&cs);
509                         if (rregm)
510                         {
511                             reg_t regx;
512                             regwithvalue(cdb,rregm,e2.EV.Vint,&regx,0);
513                             cs.Iop = STO;
514                             cs.Irm |= modregrm(0,regx,0);
515                             cdb.gen(&cs);
516                             getlvalue_msw(&cs);
517                             cdb.gen(&cs);
518                             freenode(e2);
519                             goto Lp;
520                         }
521                     }
522                 }
523             }
524 
525             // If loading result into a register
526             if ((cs.Irm & 0xC0) == 0xC0)
527             {
528                 modEA(cdb,&cs);
529                 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg)
530                     getregs(cdb,cs.IEV1.Vsym.Sregm);
531             }
532             cs.Iop = (sz == 1) ? 0xC6 : 0xC7;
533 
534             if (e2oper == OPrelconst)
535             {
536                 cs.IEV2.Voffset = e2.EV.Voffset;
537                 cs.IFL2 = cast(ubyte)fl;
538                 cs.IEV2.Vsym = e2.EV.Vsym;
539                 cs.Iflags |= CFoff;
540                 cdb.gen(&cs);       // MOV EA,&variable
541                 if (I64 && sz == 8)
542                     code_orrex(cdb.last(), REX_W);
543                 if (sz > REGSIZE)
544                 {
545                     cs.Iop = 0x8C;
546                     getlvalue_msw(&cs);
547                     cs.Irm |= modregrm(0,3,0);
548                     cdb.gen(&cs);   // MOV EA+2,DS
549                 }
550             }
551             else
552             {
553                 assert(e2oper == OPconst);
554                 cs.IFL2 = FLconst;
555                 targ_size_t *p = cast(targ_size_t *) &(e2.EV);
556                 cs.IEV2.Vsize_t = *p;
557                 // Look for loading a register variable
558                 if ((cs.Irm & 0xC0) == 0xC0)
559                 {
560                     reg_t regx = cs.Irm & 7;
561 
562                     if (cs.Irex & REX_B)
563                         regx |= 8;
564                     if (I64 && sz == 8)
565                         movregconst(cdb,regx,*p,64);
566                     else
567                         movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1));
568                     if (sz == 2 * REGSIZE)
569                     {   getlvalue_msw(&cs);
570                         if (REGSIZE == 2)
571                             movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0);
572                         else if (REGSIZE == 4)
573                             movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0);
574                         else if (REGSIZE == 8)
575                             movregconst(cdb,cs.Irm & 7,p[1],0);
576                         else
577                             assert(0);
578                     }
579                 }
580                 else if (I64 && sz == 8 && *p >= 0x80000000)
581                 {   // Use 64 bit MOV, as the 32 bit one gets sign extended
582                     // MOV reg,imm64
583                     // MOV EA,reg
584                     regm_t rregm = allregs & ~idxregm(&cs);
585                     reg_t regx;
586                     regwithvalue(cdb,rregm,*p,&regx,64);
587                     cs.Iop = STO;
588                     cs.Irm |= modregrm(0,regx & 7,0);
589                     if (regx & 8)
590                         cs.Irex |= REX_R;
591                     cdb.gen(&cs);
592                 }
593                 else
594                 {
595                     int off = sz;
596                     do
597                     {   int regsize = REGSIZE;
598                         if (off >= 4 && I16 && config.target_cpu >= TARGET_80386)
599                         {
600                             regsize = 4;
601                             cs.Iflags |= CFopsize;      // use opsize to do 32 bit operation
602                         }
603                         else if (I64 && sz == 16 && *p >= 0x80000000)
604                         {
605                             regm_t rregm = allregs & ~idxregm(&cs);
606                             reg_t regx;
607                             regwithvalue(cdb,rregm,*p,&regx,64);
608                             cs.Iop = STO;
609                             cs.Irm |= modregrm(0,regx & 7,0);
610                             if (regx & 8)
611                                 cs.Irex |= REX_R;
612                         }
613                         else
614                         {
615                             regm_t retregsx = (sz == 1) ? BYTEREGS : allregs;
616                             reg_t regx;
617                             if (reghasvalue(retregsx,*p,&regx))
618                             {
619                                 cs.Iop = (cs.Iop & 1) | 0x88;
620                                 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx
621                                 if (regx & 8)
622                                     cs.Irex |= REX_R;
623                                 if (I64 && sz == 1 && regx >= 4)
624                                     cs.Irex |= REX;
625                             }
626                             if (!I16 && off == 2)      // if 16 bit operand
627                                 cs.Iflags |= CFopsize;
628                             if (I64 && sz == 8)
629                                 cs.Irex |= REX_W;
630                         }
631                         cdb.gen(&cs);           // MOV EA,const
632 
633                         p = cast(targ_size_t *)(cast(char *) p + regsize);
634                         cs.Iop = (cs.Iop & 1) | 0xC6;
635                         cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0);
636                         cs.Irex &= ~REX_R;
637                         cs.IEV1.Voffset += regsize;
638                         cs.IEV2.Vint = cast(int)*p;
639                         off -= regsize;
640                     } while (off > 0);
641                 }
642             }
643             freenode(e2);
644             goto Lp;
645         }
646         retregs = allregs;        // pick a reg, any reg
647         if (sz == 2 * REGSIZE)
648             retregs &= ~mBP;      // BP cannot be used for register pair
649     }
650     if (retregs == mPSW)
651     {
652         retregs = allregs;
653         if (sz == 2 * REGSIZE)
654             retregs &= ~mBP;      // BP cannot be used for register pair
655     }
656     cs.Iop = STO;
657     if (sz == 1)                  // must have byte regs
658     {
659         cs.Iop = 0x88;
660         retregs &= BYTEREGS;
661         if (!retregs)
662             retregs = BYTEREGS;
663     }
664     else if (retregs & mES &&
665            (
666              (e1.Eoper == OPind &&
667                 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) ||
668              (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata)
669            )
670           )
671         // getlvalue() needs ES, so we can't return it
672         retregs = allregs;              // no conflicts with ES
673     else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0)
674         retregs = DOUBLEREGS;
675 
676     regvar = false;
677     varregm = 0;
678     if (config.flags4 & CFG4optimized)
679     {
680         // Be careful of cases like (x = x+x+x). We cannot evaluate in
681         // x if x is in a register.
682         if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
683             doinreg(e1.EV.Vsym,e2) &&       // and we can compute directly into it
684             !(sz == 1 && e1.EV.Voffset == 1)
685            )
686         {
687             regvar = true;
688             retregs = varregm;
689             reg = varreg;       // evaluate directly in target register
690             if (tysize(e1.Ety) == REGSIZE &&
691                 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE)
692             {
693                 if (e1.EV.Voffset)
694                     retregs &= mMSW;
695                 else
696                     retregs &= mLSW;
697                 reg = findreg(retregs);
698             }
699         }
700     }
701     if (*pretregs & mPSW && OTleaf(e1.Eoper))     // if evaluating e1 couldn't change flags
702     {   // Be careful that this lines up with jmpopcode()
703         retregs |= mPSW;
704         *pretregs &= ~mPSW;
705     }
706     scodelem(cdb,e2,&retregs,0,true);    // get rvalue
707 
708     // Look for special case of (*p++ = ...), where p is a register variable
709     if (e1.Eoper == OPind &&
710         ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
711         e11.EV.E1.Eoper == OPvar &&
712         e11.EV.E1.EV.Vsym.Sfl == FLreg &&
713         (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
714        )
715     {
716         Symbol *s = e11.EV.E1.EV.Vsym;
717         if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
718         {
719             regcon.params &= ~s.Spregm();
720         }
721 
722         postinc = e11.EV.E2.EV.Vint;
723         if (e11.Eoper == OPpostdec)
724             postinc = -postinc;
725         getlvalue(cdb,&cs,e1,RMstore | retregs);
726         freenode(e11.EV.E2);
727     }
728     else
729     {
730         postinc = 0;
731         getlvalue(cdb,&cs,e1,RMstore | retregs);     // get lvalue (cl == null if regvar)
732     }
733 
734     getregs(cdb,varregm);
735 
736     assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes));
737     if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES)
738     {
739         reg = findreglsw(retregs);
740         cs.Irm |= modregrm(0,reg,0);
741         cdb.gen(&cs);                   // MOV EA,reg
742         getlvalue_msw(&cs);             // point to where segment goes
743         cs.Iop = 0x8C;
744         NEWREG(cs.Irm,0);
745         cdb.gen(&cs);                   // MOV EA+2,ES
746     }
747     else
748     {
749         if (!I16)
750         {
751             reg = findreg(retregs &
752                     ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS));
753             cs.Irm |= modregrm(0,reg & 7,0);
754             if (reg & 8)
755                 cs.Irex |= REX_R;
756             for (; true; sz -= REGSIZE)
757             {
758                 // Do not generate mov from register onto itself
759                 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0)))
760                     break;
761                 if (sz == 2)            // if 16 bit operand
762                     cs.Iflags |= CFopsize;
763                 else if (sz == 1 && reg >= 4)
764                     cs.Irex |= REX;
765                 cdb.gen(&cs);           // MOV EA+offset,reg
766                 if (sz <= REGSIZE)
767                     break;
768                 getlvalue_msw(&cs);
769                 reg = findregmsw(retregs);
770                 code_newreg(&cs, reg);
771             }
772         }
773         else
774         {
775             if (sz > REGSIZE)
776                 cs.IEV1.Voffset += sz - REGSIZE;  // 0,2,6
777             reg = findreg(retregs &
778                     (sz > REGSIZE ? mMSW : ALLREGS));
779             if (tyml == TYdouble || tyml == TYdouble_alias)
780                 reg = AX;
781             cs.Irm |= modregrm(0,reg,0);
782             // Do not generate mov from register onto itself
783             if (!regvar || reg != (cs.Irm & 7))
784                 for (; true; sz -= REGSIZE)             // 1,2,4
785                 {
786                     cdb.gen(&cs);             // MOV EA+offset,reg
787                     if (sz <= REGSIZE)
788                         break;
789                     cs.IEV1.Voffset -= REGSIZE;
790                     if (tyml == TYdouble || tyml == TYdouble_alias)
791                             reg = dblreg[reg];
792                     else
793                             reg = findreglsw(retregs);
794                     NEWREG(cs.Irm,reg);
795                 }
796         }
797     }
798     if (e1.Ecount ||                    // if lvalue is a CSE or
799         regvar)                         // rvalue can't be a CSE
800     {
801         getregs_imm(cdb,retregs);       // necessary if both lvalue and
802                                         //  rvalue are CSEs (since a reg
803                                         //  can hold only one e at a time)
804         cssave(e1,retregs,!OTleaf(e1.Eoper));     // if lvalue is a CSE
805     }
806 
807     fixresult(cdb,e,retregs,pretregs);
808 Lp:
809     if (postinc)
810     {
811         reg_t ireg = findreg(idxregm(&cs));
812         if (*pretregs & mPSW)
813         {   // Use LEA to avoid touching the flags
814             uint rm = cs.Irm & 7;
815             if (cs.Irex & REX_B)
816                 rm |= 8;
817             cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc);
818             if (tysize(e11.EV.E1.Ety) == 8)
819                 code_orrex(cdb.last(), REX_W);
820         }
821         else if (I64)
822         {
823             cdb.genc2(0x81,modregrmx(3,0,ireg),postinc);
824             if (tysize(e11.EV.E1.Ety) == 8)
825                 code_orrex(cdb.last(), REX_W);
826         }
827         else
828         {
829             if (postinc == 1)
830                 cdb.gen1(0x40 + ireg);        // INC ireg
831             else if (postinc == -cast(targ_int)1)
832                 cdb.gen1(0x48 + ireg);        // DEC ireg
833             else
834             {
835                 cdb.genc2(0x81,modregrm(3,0,ireg),postinc);
836             }
837         }
838     }
839     freenode(e1);
840 }
841 
842 
843 /************************
844  * Generate code for += -= &= |= ^= negass
845  */
846 
847 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
848 {
849     //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
850     OPER op = e.Eoper;
851     regm_t retregs = 0;
852     uint reverse = 0;
853     elem *e1 = e.EV.E1;
854     tym_t tyml = tybasic(e1.Ety);            // type of lvalue
855     int sz = _tysize[tyml];
856     int isbyte = (sz == 1);                     // 1 for byte operation, else 0
857 
858     // See if evaluate in XMM registers
859     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0))
860     {
861         xmmopass(cdb,e,pretregs);
862         return;
863     }
864 
865     if (tyfloating(tyml))
866     {
867         static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
868         {
869             if (op == OPnegass)
870                 cdnegass87(cdb,e,pretregs);
871             else
872                 opass87(cdb,e,pretregs);
873         }
874         else
875         {
876             if (op == OPnegass)
877                 opnegassdbl(cdb,e,pretregs);
878             else
879                 opassdbl(cdb,e,pretregs,op);
880         }
881         return;
882     }
883     uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386)
884         ? CFopsize : 0;
885     uint cflags = 0;
886     regm_t forccs = *pretregs & mPSW;            // return result in flags
887     regm_t forregs = *pretregs & ~mPSW;          // return result in regs
888     // true if we want the result in a register
889     uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper));
890 
891     reg_t reg;
892     uint op1,op2,mode;
893     code cs;
894     elem *e2;
895     regm_t varregm;
896     reg_t varreg;
897     uint jop;
898 
899 
900     switch (op)                   // select instruction opcodes
901     {
902         case OPpostinc: op = OPaddass;                  // i++ => +=
903                         goto case OPaddass;
904 
905         case OPaddass:  op1 = 0x01; op2 = 0x11;
906                         cflags = CFpsw;
907                         mode = 0; break;                // ADD, ADC
908 
909         case OPpostdec: op = OPminass;                  // i-- => -=
910                         goto case OPminass;
911 
912         case OPminass:  op1 = 0x29; op2 = 0x19;
913                         cflags = CFpsw;
914                         mode = 5; break;                // SUB, SBC
915 
916         case OPandass:  op1 = op2 = 0x21;
917                         mode = 4; break;                // AND, AND
918 
919         case OPorass:   op1 = op2 = 0x09;
920                         mode = 1; break;                // OR , OR
921 
922         case OPxorass:  op1 = op2 = 0x31;
923                         mode = 6; break;                // XOR, XOR
924 
925         case OPnegass:  op1 = 0xF7;                     // NEG
926                         break;
927 
928         default:
929                 assert(0);
930     }
931     op1 ^= isbyte;                  // bit 0 is 0 for byte operation
932 
933     if (op == OPnegass)
934     {
935         getlvalue(cdb,&cs,e1,0);
936         modEA(cdb,&cs);
937         cs.Irm |= modregrm(0,3,0);
938         cs.Iop = op1;
939         switch (_tysize[tyml])
940         {
941             case CHARSIZE:
942                 cdb.gen(&cs);
943                 break;
944 
945             case SHORTSIZE:
946                 cdb.gen(&cs);
947                 if (!I16 && *pretregs & mPSW)
948                     cdb.last().Iflags |= CFopsize | CFpsw;
949                 break;
950 
951             case LONGSIZE:
952                 if (!I16 || opsize)
953                 {   cdb.gen(&cs);
954                     cdb.last().Iflags |= opsize;
955                     break;
956                 }
957             neg_2reg:
958                 getlvalue_msw(&cs);
959                 cdb.gen(&cs);              // NEG EA+2
960                 getlvalue_lsw(&cs);
961                 cdb.gen(&cs);              // NEG EA
962                 code_orflag(cdb.last(),CFpsw);
963                 cs.Iop = 0x81;
964                 getlvalue_msw(&cs);
965                 cs.IFL2 = FLconst;
966                 cs.IEV2.Vuns = 0;
967                 cdb.gen(&cs);              // SBB EA+2,0
968                 break;
969 
970             case LLONGSIZE:
971                 if (I16)
972                     assert(0);             // not implemented yet
973                 if (I32)
974                     goto neg_2reg;
975                 cdb.gen(&cs);
976                 break;
977 
978             default:
979                 assert(0);
980         }
981         forccs = 0;             // flags already set by NEG
982         *pretregs &= ~mPSW;
983     }
984     else if ((e2 = e.EV.E2).Eoper == OPconst &&    // if rvalue is a const
985              el_signx32(e2) &&
986              // Don't evaluate e2 in register if we can use an INC or DEC
987              (((sz <= REGSIZE || tyfv(tyml)) &&
988                (op == OPaddass || op == OPminass) &&
989                (el_allbits(e2, 1) || el_allbits(e2, -1))
990               ) ||
991               (!evalinregister(e2)
992                && tyml != TYhptr
993               )
994              )
995             )
996     {
997         getlvalue(cdb,&cs,e1,0);
998         modEA(cdb,&cs);
999         cs.IFL2 = FLconst;
1000         cs.IEV2.Vsize_t = e2.EV.Vint;
1001         if (sz <= REGSIZE || tyfv(tyml) || opsize)
1002         {
1003             targ_int i = cs.IEV2.Vint;
1004 
1005             // Handle shortcuts. Watch out for if result has
1006             // to be in flags.
1007 
1008             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&reg) && i != 1 && i != -1 &&
1009                 !opsize)
1010             {
1011                 cs.Iop = op1;
1012                 cs.Irm |= modregrm(0,reg & 7,0);
1013                 if (I64)
1014                 {   if (isbyte && reg >= 4)
1015                         cs.Irex |= REX;
1016                     if (reg & 8)
1017                         cs.Irex |= REX_R;
1018                 }
1019             }
1020             else
1021             {
1022                 cs.Iop = 0x81;
1023                 cs.Irm |= modregrm(0,mode,0);
1024                 switch (op)
1025                 {
1026                     case OPminass:      // convert to +=
1027                         cs.Irm ^= modregrm(0,5,0);
1028                         i = -i;
1029                         cs.IEV2.Vsize_t = i;
1030                         goto case OPaddass;
1031 
1032                     case OPaddass:
1033                         if (i == 1)             // INC EA
1034                                 goto L1;
1035                         else if (i == -1)       // DEC EA
1036                         {       cs.Irm |= modregrm(0,1,0);
1037                            L1:  cs.Iop = 0xFF;
1038                         }
1039                         break;
1040 
1041                     default:
1042                         break;
1043                 }
1044                 cs.Iop ^= isbyte;             // for byte operations
1045             }
1046             cs.Iflags |= opsize;
1047             if (forccs)
1048                 cs.Iflags |= CFpsw;
1049             else if (!I16 && cs.Iflags & CFopsize)
1050             {
1051                 switch (op)
1052                 {   case OPorass:
1053                     case OPxorass:
1054                         cs.IEV2.Vsize_t &= 0xFFFF;
1055                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1056                         break;
1057 
1058                     case OPandass:
1059                         cs.IEV2.Vsize_t |= ~0xFFFFL;
1060                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1061                         break;
1062 
1063                     case OPminass:
1064                     case OPaddass:
1065                         static if (1)
1066                         {
1067                             if ((cs.Irm & 0xC0) == 0xC0)    // EA is register
1068                                 cs.Iflags &= ~CFopsize;
1069                         }
1070                         else
1071                         {
1072                             if ((cs.Irm & 0xC0) == 0xC0 &&  // EA is register and
1073                                 e1.Eoper == OPind)          // not a register var
1074                                 cs.Iflags &= ~CFopsize;
1075                         }
1076                         break;
1077 
1078                     default:
1079                         assert(0);
1080                 }
1081             }
1082 
1083             // For scheduling purposes, we wish to replace:
1084             //    OP    EA
1085             // with:
1086             //    MOV   reg,EA
1087             //    OP    reg
1088             //    MOV   EA,reg
1089             if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
1090                 (config.target_cpu == TARGET_Pentium ||
1091                  config.target_cpu == TARGET_PentiumMMX) &&
1092                 config.flags4 & CFG4speed)
1093             {
1094                 regm_t sregm;
1095                 code cs2;
1096 
1097                 // Determine which registers to use
1098                 sregm = allregs & ~idxregm(&cs);
1099                 if (isbyte)
1100                     sregm &= BYTEREGS;
1101                 if (sregm & forregs)
1102                     sregm &= forregs;
1103 
1104                 allocreg(cdb,&sregm,&reg,tyml);      // allocate register
1105 
1106                 cs2 = cs;
1107                 cs2.Iflags &= ~CFpsw;
1108                 cs2.Iop = LOD ^ isbyte;
1109                 code_newreg(&cs2, reg);
1110                 cdb.gen(&cs2);                      // MOV reg,EA
1111 
1112                 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7);
1113                 if (reg & 8)
1114                     cs.Irex |= REX_B;
1115                 cdb.gen(&cs);                       // OP reg
1116 
1117                 cs2.Iop ^= 2;
1118                 cdb.gen(&cs2);                      // MOV EA,reg
1119 
1120                 retregs = sregm;
1121                 wantres = 0;
1122                 if (e1.Ecount)
1123                     cssave(e1,retregs,!OTleaf(e1.Eoper));
1124             }
1125             else
1126             {
1127                 cdb.gen(&cs);
1128                 cs.Iflags &= ~opsize;
1129                 cs.Iflags &= ~CFpsw;
1130                 if (I16 && opsize)                     // if DWORD operand
1131                     cs.IEV1.Voffset += 2; // compensate for wantres code
1132             }
1133         }
1134         else if (sz == 2 * REGSIZE)
1135         {
1136             targ_uns msw;
1137 
1138             cs.Iop = 0x81;
1139             cs.Irm |= modregrm(0,mode,0);
1140             cs.Iflags |= cflags;
1141             cdb.gen(&cs);
1142             cs.Iflags &= ~CFpsw;
1143 
1144             getlvalue_msw(&cs);             // point to msw
1145             msw = cast(uint)MSREG(e.EV.E2.EV.Vllong);
1146             cs.IEV2.Vuns = msw;             // msw of constant
1147             switch (op)
1148             {
1149                 case OPminass:
1150                     cs.Irm ^= modregrm(0,6,0);      // SUB => SBB
1151                     break;
1152 
1153                 case OPaddass:
1154                     cs.Irm |= modregrm(0,2,0);      // ADD => ADC
1155                     break;
1156 
1157                 default:
1158                     break;
1159             }
1160             cdb.gen(&cs);
1161         }
1162         else
1163             assert(0);
1164         freenode(e.EV.E2);        // don't need it anymore
1165     }
1166     else if (isregvar(e1,&varregm,&varreg) &&
1167              (e2.Eoper == OPvar || e2.Eoper == OPind) &&
1168             !evalinregister(e2) &&
1169              sz <= REGSIZE)               // deal with later
1170     {
1171         getlvalue(cdb,&cs,e2,0);
1172         freenode(e2);
1173         getregs(cdb,varregm);
1174         code_newreg(&cs, varreg);
1175         if (I64 && sz == 1 && varreg >= 4)
1176             cs.Irex |= REX;
1177         cs.Iop = op1 ^ 2;                       // toggle direction bit
1178         if (forccs)
1179             cs.Iflags |= CFpsw;
1180         reverse = 2;                            // remember we toggled it
1181         cdb.gen(&cs);
1182         retregs = 0;            // to trigger a bug if we attempt to use it
1183     }
1184     else if ((op == OPaddass || op == OPminass) &&
1185              sz <= REGSIZE &&
1186              !e2.Ecount &&
1187              ((jop = jmpopcode(e2)) == JC || jop == JNC ||
1188               (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC)))
1189             )
1190     {
1191         /* e1 += (x < y)    ADC EA,0
1192          * e1 -= (x < y)    SBB EA,0
1193          * e1 += (x >= y)   SBB EA,-1
1194          * e1 -= (x >= y)   ADC EA,-1
1195          */
1196         getlvalue(cdb,&cs,e1,0);             // get lvalue
1197         modEA(cdb,&cs);
1198         regm_t keepmsk = idxregm(&cs);
1199         retregs = mPSW;
1200         if (OTconv(e2.Eoper))
1201         {
1202             scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true);
1203             freenode(e2);
1204         }
1205         else
1206             scodelem(cdb,e2,&retregs,keepmsk,true);
1207         cs.Iop = 0x81 ^ isbyte;                   // ADC EA,imm16/32
1208         uint regop = 2;                     // ADC
1209         if ((op == OPaddass) ^ (jop == JC))
1210             regop = 3;                          // SBB
1211         code_newreg(&cs,regop);
1212         cs.Iflags |= opsize;
1213         if (forccs)
1214             cs.Iflags |= CFpsw;
1215         cs.IFL2 = FLconst;
1216         cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0;
1217         cdb.gen(&cs);
1218         retregs = 0;            // to trigger a bug if we attempt to use it
1219     }
1220     else // evaluate e2 into register
1221     {
1222         retregs = (isbyte) ? BYTEREGS : ALLREGS;  // pick working reg
1223         if (tyml == TYhptr)
1224             retregs &= ~mCX;                    // need CX for shift count
1225         scodelem(cdb,e.EV.E2,&retregs,0,true);   // get rvalue
1226         getlvalue(cdb,&cs,e1,retregs);         // get lvalue
1227         modEA(cdb,&cs);
1228         cs.Iop = op1;
1229         if (sz <= REGSIZE || tyfv(tyml))
1230         {
1231             reg = findreg(retregs);
1232             code_newreg(&cs, reg);              // OP1 EA,reg
1233             if (sz == 1 && reg >= 4 && I64)
1234                 cs.Irex |= REX;
1235             if (forccs)
1236                 cs.Iflags |= CFpsw;
1237         }
1238         else if (tyml == TYhptr)
1239         {
1240             uint mreg = findregmsw(retregs);
1241             uint lreg = findreglsw(retregs);
1242             getregs(cdb,retregs | mCX);
1243 
1244             // If h -= l, convert to h += -l
1245             if (e.Eoper == OPminass)
1246             {
1247                 cdb.gen2(0xF7,modregrm(3,3,mreg));      // NEG mreg
1248                 cdb.gen2(0xF7,modregrm(3,3,lreg));      // NEG lreg
1249                 code_orflag(cdb.last(),CFpsw);
1250                 cdb.genc2(0x81,modregrm(3,3,mreg),0);   // SBB mreg,0
1251             }
1252             cs.Iop = 0x01;
1253             cs.Irm |= modregrm(0,lreg,0);
1254             cdb.gen(&cs);                               // ADD EA,lreg
1255             code_orflag(cdb.last(),CFpsw);
1256             cdb.genc2(0x81,modregrm(3,2,mreg),0);       // ADC mreg,0
1257             genshift(cdb);                              // MOV CX,offset __AHSHIFT
1258             cdb.gen2(0xD3,modregrm(3,4,mreg));          // SHL mreg,CL
1259             NEWREG(cs.Irm,mreg);                        // ADD EA+2,mreg
1260             getlvalue_msw(&cs);
1261         }
1262         else if (sz == 2 * REGSIZE)
1263         {
1264             cs.Irm |= modregrm(0,findreglsw(retregs),0);
1265             cdb.gen(&cs);                               // OP1 EA,reg+1
1266             code_orflag(cdb.last(),cflags);
1267             cs.Iop = op2;
1268             NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg
1269             getlvalue_msw(&cs);
1270         }
1271         else
1272             assert(0);
1273         cdb.gen(&cs);
1274         retregs = 0;            // to trigger a bug if we attempt to use it
1275     }
1276 
1277     // See if we need to reload result into a register.
1278     // Need result in registers in case we have a 32 bit
1279     // result and we want the flags as a result.
1280     if (wantres || (sz > REGSIZE && forccs))
1281     {
1282         if (sz <= REGSIZE)
1283         {
1284             regm_t possregs;
1285 
1286             possregs = ALLREGS;
1287             if (isbyte)
1288                 possregs = BYTEREGS;
1289             retregs = forregs & possregs;
1290             if (!retregs)
1291                 retregs = possregs;
1292 
1293             // If reg field is destination
1294             if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5)
1295             {
1296                 reg = (cs.Irm >> 3) & 7;
1297                 if (cs.Irex & REX_R)
1298                     reg |= 8;
1299                 retregs = mask(reg);
1300                 allocreg(cdb,&retregs,&reg,tyml);
1301             }
1302             // If lvalue is a register, just use that register
1303             else if ((cs.Irm & 0xC0) == 0xC0)
1304             {
1305                 reg = cs.Irm & 7;
1306                 if (cs.Irex & REX_B)
1307                     reg |= 8;
1308                 retregs = mask(reg);
1309                 allocreg(cdb,&retregs,&reg,tyml);
1310             }
1311             else
1312             {
1313                 allocreg(cdb,&retregs,&reg,tyml);
1314                 cs.Iop = LOD ^ isbyte ^ reverse;
1315                 code_newreg(&cs, reg);
1316                 if (I64 && isbyte && reg >= 4)
1317                     cs.Irex |= REX_W;
1318                 cdb.gen(&cs);               // MOV reg,EA
1319             }
1320         }
1321         else if (tyfv(tyml) || tyml == TYhptr)
1322         {
1323             regm_t idxregs;
1324 
1325             if (tyml == TYhptr)
1326                 getlvalue_lsw(&cs);
1327             idxregs = idxregm(&cs);
1328             retregs = forregs & ~idxregs;
1329             if (!(retregs & IDXREGS))
1330                 retregs |= IDXREGS & ~idxregs;
1331             if (!(retregs & mMSW))
1332                 retregs |= mMSW & ALLREGS;
1333             allocreg(cdb,&retregs,&reg,tyml);
1334             NEWREG(cs.Irm,findreglsw(retregs));
1335             if (retregs & mES)              // if want ES loaded
1336             {
1337                 cs.Iop = 0xC4;
1338                 cdb.gen(&cs);               // LES lreg,EA
1339             }
1340             else
1341             {
1342                 cs.Iop = LOD;
1343                 cdb.gen(&cs);               // MOV lreg,EA
1344                 getlvalue_msw(&cs);
1345                 if (I32)
1346                     cs.Iflags |= CFopsize;
1347                 NEWREG(cs.Irm,reg);
1348                 cdb.gen(&cs);               // MOV mreg,EA+2
1349             }
1350         }
1351         else if (sz == 2 * REGSIZE)
1352         {
1353             regm_t idx = idxregm(&cs);
1354             retregs = forregs;
1355             if (!retregs)
1356                 retregs = ALLREGS;
1357             allocreg(cdb,&retregs,&reg,tyml);
1358             cs.Iop = LOD;
1359             NEWREG(cs.Irm,reg);
1360 
1361             code csl = cs;
1362             NEWREG(csl.Irm,findreglsw(retregs));
1363             getlvalue_lsw(&csl);
1364 
1365             if (mask(reg) & idx)
1366             {
1367                 cdb.gen(&csl);             // MOV reg+1,EA
1368                 cdb.gen(&cs);              // MOV reg,EA+2
1369             }
1370             else
1371             {
1372                 cdb.gen(&cs);              // MOV reg,EA+2
1373                 cdb.gen(&csl);             // MOV reg+1,EA
1374             }
1375         }
1376         else
1377             assert(0);
1378         if (e1.Ecount)                 // if we gen a CSE
1379             cssave(e1,retregs,!OTleaf(e1.Eoper));
1380     }
1381     freenode(e1);
1382     if (sz <= REGSIZE)
1383         *pretregs &= ~mPSW;            // flags are already set
1384     fixresult(cdb,e,retregs,pretregs);
1385 }
1386 
1387 /********************************
1388  * Generate code for *=
1389  */
1390 
1391 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1392 {
1393     code cs;
1394     regm_t retregs;
1395     reg_t resreg;
1396     uint opr,isbyte;
1397 
1398     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1399     elem *e1 = e.EV.E1;
1400     elem *e2 = e.EV.E2;
1401     OPER op = e.Eoper;                     // OPxxxx
1402 
1403     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1404     char uns = tyuns(tyml) || tyuns(e2.Ety);
1405     uint sz = _tysize[tyml];
1406 
1407     uint rex = (I64 && sz == 8) ? REX_W : 0;
1408     uint grex = rex << 16;          // 64 bit operands
1409 
1410     // See if evaluate in XMM registers
1411     if (config.fpxmmregs && tyxmmreg(tyml) && !(*pretregs & mST0))
1412     {
1413         xmmopass(cdb,e,pretregs);
1414         return;
1415     }
1416 
1417     if (tyfloating(tyml))
1418     {
1419         static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1420         {
1421             opass87(cdb,e,pretregs);
1422         }
1423         else
1424         {
1425             opassdbl(cdb,e,pretregs,op);
1426         }
1427         return;
1428     }
1429 
1430     if (sz <= REGSIZE)                  // if word or byte
1431     {
1432         if (e2.Eoper == OPconst &&
1433             (I32 || I64) &&
1434             el_signx32(e2) &&
1435             sz >= 4)
1436         {
1437             // See if we can use an LEA instruction
1438 
1439             int ss;
1440             int ss2 = 0;
1441             int shift;
1442 
1443             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1444             switch (e2factor)
1445             {
1446                 case 12:    ss = 1; ss2 = 2; goto L4;
1447                 case 24:    ss = 1; ss2 = 3; goto L4;
1448 
1449                 case 6:
1450                 case 3:     ss = 1; goto L4;
1451 
1452                 case 20:    ss = 2; ss2 = 2; goto L4;
1453                 case 40:    ss = 2; ss2 = 3; goto L4;
1454 
1455                 case 10:
1456                 case 5:     ss = 2; goto L4;
1457 
1458                 case 36:    ss = 3; ss2 = 2; goto L4;
1459                 case 72:    ss = 3; ss2 = 3; goto L4;
1460 
1461                 case 18:
1462                 case 9:     ss = 3; goto L4;
1463                 L4:
1464                 {
1465                     getlvalue(cdb,&cs,e1,0);           // get EA
1466                     modEA(cdb,&cs);
1467                     freenode(e2);
1468                     regm_t idxregs = idxregm(&cs);
1469                     regm_t regm = *pretregs & ~(idxregs | mBP | mR13);  // don't use EBP
1470                     if (!regm)
1471                         regm = allregs & ~(idxregs | mBP | mR13);
1472                     reg_t reg;
1473                     allocreg(cdb,&regm,&reg,tyml);
1474                     cs.Iop = LOD;
1475                     code_newreg(&cs,reg);
1476                     cs.Irex |= rex;
1477                     cdb.gen(&cs);                       // MOV reg,EA
1478 
1479                     assert((reg & 7) != BP);
1480                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1481                                 modregxrmx(ss,reg,reg));  // LEA reg,[ss*reg][reg]
1482                     if (ss2)
1483                     {
1484                         cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1485                                        modregxrm(ss2,reg,5));
1486                         cdb.last().IFL1 = FLconst;
1487                         cdb.last().IEV1.Vint = 0;       // LEA reg,0[ss2*reg]
1488                     }
1489                     else if (!(e2factor & 1))    // if even factor
1490                     {
1491                         genregs(cdb,0x03,reg,reg); // ADD reg,reg
1492                         code_orrex(cdb.last(),rex);
1493                     }
1494                     opAssStoreReg(cdb,cs,e,reg,pretregs);
1495                     return;
1496                 }
1497 
1498                 case 37:
1499                 case 74:    shift = 2;
1500                             goto L5;
1501                 case 13:
1502                 case 26:    shift = 0;
1503                             goto L5;
1504                 L5:
1505                 {
1506                     getlvalue(cdb,&cs,e1,0);           // get EA
1507                     modEA(cdb,&cs);
1508                     freenode(e2);
1509                     regm_t idxregs = idxregm(&cs);
1510                     regm_t regm = *pretregs & ~(idxregs | mBP | mR13);  // don't use EBP
1511                     if (!regm)
1512                         regm = allregs & ~(idxregs | mBP | mR13);
1513                     reg_t reg;                          // return register
1514                     allocreg(cdb,&regm,&reg,tyml);
1515 
1516                     reg_t sreg = allocScratchReg(cdb, allregs & ~(regm | idxregs | mBP | mR13));
1517 
1518                     cs.Iop = LOD;
1519                     code_newreg(&cs,sreg);
1520                     cs.Irex |= rex;
1521                     cdb.gen(&cs);                                         // MOV sreg,EA
1522 
1523                     assert((sreg & 7) != BP);
1524                     assert((reg & 7) != BP);
1525                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1526                                           modregxrmx(2,sreg,sreg));       // LEA reg,[sreg*4][sreg]
1527                     if (shift)
1528                         cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift
1529                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1530                                           modregxrmx(3,sreg,reg));        // LEA reg,[sreg*8][reg]
1531                     if (!(e2factor & 1))                                  // if even factor
1532                     {
1533                         genregs(cdb,0x03,reg,reg);                        // ADD reg,reg
1534                         code_orrex(cdb.last(),rex);
1535                     }
1536                     opAssStoreReg(cdb,cs,e,reg,pretregs);
1537                     return;
1538                 }
1539 
1540                 default:
1541                     break;
1542             }
1543         }
1544 
1545         isbyte = (sz == 1);             // 1 for byte operation
1546 
1547         if (config.target_cpu >= TARGET_80286 &&
1548             e2.Eoper == OPconst && !isbyte)
1549         {
1550             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1551             if (I64 && sz == 8 && e2factor != cast(int)e2factor)
1552                 goto L1;
1553             freenode(e2);
1554             getlvalue(cdb,&cs,e1,0);     // get EA
1555             regm_t idxregs = idxregm(&cs);
1556             retregs = *pretregs & (ALLREGS | mBP) & ~idxregs;
1557             if (!retregs)
1558                 retregs = ALLREGS & ~idxregs;
1559             allocreg(cdb,&retregs,&resreg,tyml);
1560             cs.Iop = 0x69;                  // IMUL reg,EA,e2value
1561             cs.IFL2 = FLconst;
1562             cs.IEV2.Vint = cast(int)e2factor;
1563             opr = resreg;
1564         }
1565         else if (!I16 && !isbyte)
1566         {
1567          L1:
1568             retregs = *pretregs & (ALLREGS | mBP);
1569             if (!retregs)
1570                 retregs = ALLREGS;
1571             codelem(cdb,e2,&retregs,false); // load rvalue in reg
1572             getlvalue(cdb,&cs,e1,retregs);  // get EA
1573             getregs(cdb,retregs);           // destroy these regs
1574             cs.Iop = 0x0FAF;                        // IMUL resreg,EA
1575             resreg = findreg(retregs);
1576             opr = resreg;
1577         }
1578         else
1579         {
1580             retregs = mAX;
1581             codelem(cdb,e2,&retregs,false);      // load rvalue in AX
1582             getlvalue(cdb,&cs,e1,mAX);           // get EA
1583             getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs
1584             cs.Iop = 0xF7 ^ isbyte;                        // [I]MUL EA
1585             opr = uns ? 4 : 5;              // MUL/IMUL
1586             resreg = AX;                    // result register for *
1587         }
1588         code_newreg(&cs,opr);
1589         cdb.gen(&cs);
1590 
1591         opAssStoreReg(cdb, cs, e, resreg, pretregs);
1592         return;
1593     }
1594     else if (sz == 2 * REGSIZE)
1595     {
1596         if (e2.Eoper == OPconst && I32)
1597         {
1598             /*  if (msw)
1599                   IMUL    EDX,EDX,lsw
1600                   IMUL    reg,EAX,msw
1601                   ADD     reg,EDX
1602                 else
1603                   IMUL    reg,EDX,lsw
1604                 MOV       EDX,lsw
1605                 MUL       EDX
1606                 ADD       EDX,reg
1607              */
1608             freenode(e2);
1609             retregs = mDX|mAX;
1610             reg_t rhi, rlo;
1611             opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
1612             const regm_t keepmsk = idxregm(&cs);
1613 
1614             reg_t reg = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
1615 
1616             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1617             const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1618             const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1619 
1620             if (msw)
1621             {
1622                 genmulimm(cdb,DX,DX,lsw);          // IMUL EDX,EDX,lsw
1623                 genmulimm(cdb,reg,AX,msw);         // IMUL reg,EAX,msw
1624                 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX
1625             }
1626             else
1627                 genmulimm(cdb,reg,DX,lsw);         // IMUL reg,EDX,lsw
1628 
1629             movregconst(cdb,DX,lsw,0);             // MOV EDX,lsw
1630             getregs(cdb,mDX);
1631             cdb.gen2(0xF7,modregrm(3,4,DX));       // MUL EDX
1632             cdb.gen2(0x03,modregrm(3,DX,reg));     // ADD EDX,reg
1633         }
1634         else
1635         {
1636             retregs = mDX | mAX;
1637             regm_t rretregs = (config.target_cpu >= TARGET_PentiumPro) ? allregs & ~retregs : mCX | mBX;
1638             codelem(cdb,e2,&rretregs,false);
1639             getlvalue(cdb,&cs,e1,retregs | rretregs);
1640             getregs(cdb,retregs);
1641             cs.Iop = LOD;
1642             cdb.gen(&cs);                   // MOV AX,EA
1643             getlvalue_msw(&cs);
1644             cs.Irm |= modregrm(0,DX,0);
1645             cdb.gen(&cs);                   // MOV DX,EA+2
1646             getlvalue_lsw(&cs);
1647             if (config.target_cpu >= TARGET_PentiumPro)
1648             {
1649                 regm_t rlo = findreglsw(rretregs);
1650                 regm_t rhi = findregmsw(rretregs);
1651                 /*  IMUL    rhi,EAX
1652                     IMUL    EDX,rlo
1653                     ADD     rhi,EDX
1654                     MUL     rlo
1655                     ADD     EDX,Erhi
1656                  */
1657                  getregs(cdb,mAX|mDX|mask(rhi));
1658                  cdb.gen2(0x0FAF,modregrm(3,rhi,AX));
1659                  cdb.gen2(0x0FAF,modregrm(3,DX,rlo));
1660                  cdb.gen2(0x03,modregrm(3,rhi,DX));
1661                  cdb.gen2(0xF7,modregrm(3,4,rlo));
1662                  cdb.gen2(0x03,modregrm(3,DX,rhi));
1663             }
1664             else
1665             {
1666                 callclib(cdb,e,CLIB.lmul,&retregs,idxregm(&cs));
1667             }
1668         }
1669 
1670         opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs);
1671         return;
1672     }
1673     else
1674     {
1675         assert(0);
1676     }
1677 }
1678 
1679 
1680 /********************************
1681  * Generate code for /= %=
1682  */
1683 
1684 void cddivass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1685 {
1686     elem *e1 = e.EV.E1;
1687     elem *e2 = e.EV.E2;
1688 
1689     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1690     OPER op = e.Eoper;                     // OPxxxx
1691 
1692     // See if evaluate in XMM registers
1693     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0))
1694     {
1695         xmmopass(cdb,e,pretregs);
1696         return;
1697     }
1698 
1699     if (tyfloating(tyml))
1700     {
1701         static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1702         {
1703             opass87(cdb,e,pretregs);
1704         }
1705         else
1706         {
1707             opassdbl(cdb,e,pretregs,op);
1708         }
1709         return;
1710     }
1711 
1712     code cs = void;
1713 
1714     //printf("cddivass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1715     char uns = tyuns(tyml) || tyuns(e2.Ety);
1716     uint sz = _tysize[tyml];
1717 
1718     uint rex = (I64 && sz == 8) ? REX_W : 0;
1719     uint grex = rex << 16;          // 64 bit operands
1720 
1721     if (sz <= REGSIZE)                  // if word or byte
1722     {
1723         uint isbyte = (sz == 1);        // 1 for byte operation
1724         reg_t resreg;
1725         targ_size_t e2factor;
1726         targ_size_t d;
1727         bool neg;
1728         int pow2;
1729 
1730         assert(!isbyte);                      // should never happen
1731         assert(I16 || sz != SHORTSIZE);
1732 
1733         if (e2.Eoper == OPconst)
1734         {
1735             e2factor = cast(targ_size_t)el_tolong(e2);
1736             pow2 = ispow2(e2factor);
1737             d = e2factor;
1738             if (!uns && cast(targ_llong)e2factor < 0)
1739             {
1740                 neg = true;
1741                 d = -d;
1742             }
1743         }
1744 
1745         // Signed divide by a constant
1746         if (config.flags4 & CFG4speed &&
1747             e2.Eoper == OPconst &&
1748             !uns &&
1749             (d & (d - 1)) &&
1750             ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))))
1751         {
1752             /* R1 / 10
1753              *
1754              *  MOV     EAX,m
1755              *  IMUL    R1
1756              *  MOV     EAX,R1
1757              *  SAR     EAX,31
1758              *  SAR     EDX,shpost
1759              *  SUB     EDX,EAX
1760              *  IMUL    EAX,EDX,d
1761              *  SUB     R1,EAX
1762              *
1763              * EDX = quotient
1764              * R1 = remainder
1765              */
1766             assert(sz == 4 || sz == 8);
1767 
1768             ulong m;
1769             int shpost;
1770             const int N = sz * 8;
1771             const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1772 
1773             freenode(e2);
1774 
1775             getlvalue(cdb,&cs,e1,mAX | mDX);
1776             reg_t reg;
1777             opAssLoadReg(cdb, cs, e, reg, allregs & ~( mAX | mDX | idxregm(&cs)));    // MOV reg,EA
1778             getregs(cdb, mAX|mDX);
1779 
1780             /* Algorithm 5.2
1781              * if m>=2**(N-1)
1782              *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1783              * else
1784              *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1785              * if (neg)
1786              *    q = -q
1787              */
1788             const bool mgt = mhighbit || m >= (1UL << (N - 1));
1789             movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1790             cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL reg
1791             if (mgt)
1792                 cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,reg
1793             getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1794             genmovreg(cdb, AX, reg);                                // MOV EAX,reg
1795             cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1796             if (shpost)
1797                 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1798             reg_t r3;
1799             if (neg && op == OPdivass)
1800             {
1801                 cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1802                 r3 = AX;
1803             }
1804             else
1805             {
1806                 cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1807                 r3 = DX;
1808             }
1809 
1810             // r3 is quotient
1811             reg_t resregx;
1812             switch (op)
1813             {   case OPdivass:
1814                     resregx = r3;
1815                     break;
1816 
1817                 case OPmodass:
1818                     assert(reg != AX && r3 == DX);
1819                     if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1820                     {
1821                         cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1822                     }
1823                     else
1824                     {
1825                         movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0);     // MOV EAX,d
1826                         cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1827                         getregsNoSave(mAX);                             // EAX no longer contains 'd'
1828                     }
1829                     cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1830                     resregx = reg;
1831                     break;
1832 
1833                 default:
1834                     assert(0);
1835             }
1836 
1837             opAssStoreReg(cdb, cs, e, resregx, pretregs);
1838             return;
1839         }
1840 
1841         // Unsigned divide by a constant
1842         void unsignedDivideByConstant(ref CodeBuilder cdb)
1843         {
1844             assert(sz == 4 || sz == 8);
1845 
1846             reg_t r3;
1847             reg_t reg;
1848             ulong m;
1849             int shpre;
1850             int shpost;
1851             code cs = void;
1852 
1853             if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1854             {
1855                 /* t1 = MULUH(m, n)
1856                  * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1857                  *   MOV   EAX,reg
1858                  *   MOV   EDX,m
1859                  *   MUL   EDX
1860                  *   MOV   EAX,reg
1861                  *   SUB   EAX,EDX
1862                  *   SHR   EAX,1
1863                  *   LEA   R3,[EAX][EDX]
1864                  *   SHR   R3,shpost-1
1865                  */
1866                 assert(shpre == 0);
1867 
1868                 freenode(e2);
1869                 getlvalue(cdb,&cs,e1,mAX | mDX);
1870                 regm_t idxregs = idxregm(&cs);
1871                 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA
1872                 getregs(cdb, mAX|mDX);
1873 
1874                 genmovreg(cdb,AX,reg);                                // MOV EAX,reg
1875                 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m
1876                 getregs(cdb,mask(reg) | mDX | mAX);
1877                 cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1878                 genmovreg(cdb,AX,reg);                                // MOV EAX,reg
1879                 cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1880                 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1881                 regm_t regm3 = allregs & ~idxregs;
1882                 if (op == OPmodass)
1883                 {
1884                     regm3 &= ~mask(reg);
1885                     if (!el_signx32(e2))
1886                         regm3 &= ~mAX;
1887                 }
1888                 allocreg(cdb,&regm3,&r3,TYint);
1889                 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1890                 if (shpost != 1)
1891                     cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1892             }
1893             else
1894             {
1895                 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1896                  *   SHR   EAX,shpre
1897                  *   MOV   reg,m
1898                  *   MUL   reg
1899                  *   SHR   EDX,shpost
1900                  */
1901 
1902                 freenode(e2);
1903                 getlvalue(cdb,&cs,e1,mAX | mDX);
1904                 regm_t idxregs = idxregm(&cs);
1905                 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA
1906                 getregs(cdb, mAX|mDX);
1907 
1908                 if (reg != AX)
1909                 {
1910                     getregs(cdb,mAX);
1911                     genmovreg(cdb,AX,reg);                              // MOV EAX,reg
1912                 }
1913                 if (shpre)
1914                 {
1915                     getregs(cdb,mAX);
1916                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1917                 }
1918                 getregs(cdb,mDX);
1919                 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1920                 getregs(cdb,mDX | mAX);
1921                 cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1922                 if (shpost)
1923                     cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1924                 r3 = DX;
1925             }
1926 
1927             reg_t resregx;
1928             switch (op)
1929             {
1930                 case OPdivass:
1931                     // r3 = quotient
1932                     resregx = r3;
1933                     break;
1934 
1935                 case OPmodass:
1936                     /* reg = original value
1937                      * r3  = quotient
1938                      */
1939                     assert(reg != AX);
1940                     if (el_signx32(e2))
1941                     {
1942                         cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1943                     }
1944                     else
1945                     {
1946                         assert(!(mask(r3) & mAX));
1947                         movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1948                         getregs(cdb,mAX);
1949                         cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1950                     }
1951                     getregs(cdb,mask(reg));
1952                     cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1953                     resregx = reg;
1954                     break;
1955 
1956                 default:
1957                     assert(0);
1958             }
1959 
1960             opAssStoreReg(cdb, cs, e, resregx, pretregs);
1961             return;
1962         }
1963 
1964         if (config.flags4 & CFG4speed &&
1965             e2.Eoper == OPconst &&
1966             uns &&
1967             e2factor > 2 && (e2factor & (e2factor - 1)) &&
1968             ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))))
1969         {
1970             unsignedDivideByConstant(cdb);
1971             return;
1972         }
1973 
1974         if (config.flags4 & CFG4speed &&
1975             e2.Eoper == OPconst && !uns &&
1976             (sz == REGSIZE || (I64 && sz == 4)) &&
1977             pow2 != -1 &&
1978             e2factor == cast(int)e2factor &&
1979             !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass)
1980            )
1981         {
1982             freenode(e2);
1983             if (pow2 == 1 && op == OPdivass && config.target_cpu > TARGET_80386)
1984             {
1985                 /* This is better than the code further down because it is
1986                  * not constrained to using AX and DX.
1987                  */
1988                 getlvalue(cdb,&cs,e1,0);
1989                 regm_t idxregs = idxregm(&cs);
1990                 reg_t reg;
1991                 opAssLoadReg(cdb,cs,e,reg,allregs & ~idxregs); // MOV reg,EA
1992 
1993                 reg_t r = allocScratchReg(cdb, allregs & ~(idxregs | mask(reg)));
1994                 genmovreg(cdb,r,reg);                        // MOV r,reg
1995                 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31
1996                 cdb.gen2(0x03,grex | modregxrmx(3,reg,r));   // ADD reg,r
1997                 cdb.gen2(0xD1,grex | modregrmx(3,7,reg));    // SAR reg,1
1998 
1999                 opAssStoreReg(cdb, cs, e, reg, pretregs);
2000                 return;
2001             }
2002 
2003             // Signed divide or modulo by power of 2
2004             getlvalue(cdb,&cs,e1,mAX | mDX);
2005             reg_t reg;
2006             opAssLoadReg(cdb,cs,e,reg,mAX);
2007 
2008             getregs(cdb,mDX);                   // DX is scratch register
2009             cdb.gen1(0x99);                     // CWD
2010             code_orrex(cdb.last(), rex);
2011             if (pow2 == 1)
2012             {
2013                 if (op == OPdivass)
2014                 {
2015                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2016                     cdb.gen2(0xD1,grex | modregrm(3,7,AX));        // SAR AX,1
2017                     resreg = AX;
2018                 }
2019                 else // OPmod
2020                 {
2021                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2022                     cdb.genc2(0x81,grex | modregrm(3,4,AX),1);     // AND AX,1
2023                     cdb.gen2(0x03,grex | modregrm(3,DX,AX));       // ADD DX,AX
2024                     resreg = DX;
2025                 }
2026             }
2027             else
2028             {
2029                 assert(pow2 < 32);
2030                 targ_ulong m = (1 << pow2) - 1;
2031                 if (op == OPdivass)
2032                 {
2033                     cdb.genc2(0x81,grex | modregrm(3,4,DX),m);     // AND DX,m
2034                     cdb.gen2(0x03,grex | modregrm(3,AX,DX));       // ADD AX,DX
2035                     // Be careful not to generate this for 8088
2036                     assert(config.target_cpu >= TARGET_80286);
2037                     cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2);  // SAR AX,pow2
2038                     resreg = AX;
2039                 }
2040                 else // OPmodass
2041                 {
2042                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2043                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2044                     cdb.genc2(0x81,grex | modregrm(3,4,AX),m);     // AND AX,m
2045                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2046                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2047                     resreg = AX;
2048                 }
2049             }
2050         }
2051         else
2052         {
2053             regm_t retregs = ALLREGS & ~(mAX|mDX);     // DX gets sign extension
2054             codelem(cdb,e2,&retregs,false);            // load rvalue in retregs
2055             reg_t reg = findreg(retregs);
2056             getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA
2057             getregs(cdb,mAX | mDX);         // destroy these regs
2058             cs.Irm |= modregrm(0,AX,0);
2059             cs.Iop = LOD;
2060             cdb.gen(&cs);                   // MOV AX,EA
2061             if (uns)                        // if uint
2062                 movregconst(cdb,DX,0,0);    // CLR DX
2063             else                            // else signed
2064             {
2065                 cdb.gen1(0x99);             // CWD
2066                 code_orrex(cdb.last(),rex);
2067             }
2068             getregs(cdb,mDX | mAX); // DX and AX will be destroyed
2069             const uint opr = uns ? 6 : 7;     // DIV/IDIV
2070             genregs(cdb,0xF7,opr,reg);   // OPR reg
2071             code_orrex(cdb.last(),rex);
2072             resreg = (op == OPmodass) ? DX : AX;        // result register
2073         }
2074         opAssStoreReg(cdb, cs, e, resreg, pretregs);
2075         return;
2076     }
2077 
2078     assert(sz == 2 * REGSIZE);
2079 
2080     targ_size_t e2factor;
2081     int pow2;
2082     if (e2.Eoper == OPconst)
2083     {
2084         e2factor = cast(targ_size_t)el_tolong(e2);
2085         pow2 = ispow2(e2factor);
2086     }
2087 
2088     // Register pair signed divide by power of 2
2089     if (op == OPdivass &&
2090         !uns &&
2091         e.Eoper == OPconst &&
2092         pow2 != -1 &&
2093         I32 // not set up for I16 or I64 cent
2094        )
2095     {
2096         freenode(e2);
2097         regm_t retregs = mDX|mAX | mCX|mBX;     // LSW must be byte reg because of later SETZ
2098         reg_t rhi, rlo;
2099         opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
2100         const regm_t keepmsk = idxregm(&cs);
2101         retregs = mask(rhi) | mask(rlo);
2102 
2103         if (pow2 < 32)
2104         {
2105             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2106 
2107             genmovreg(cdb,r1,rhi);                                        // MOV  r1,rhi
2108             if (pow2 == 1)
2109                 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR  r1,31
2110             else
2111             {
2112                 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR  r1,31
2113                 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND  r1,mask
2114             }
2115             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD  rlo,r1
2116             cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC  rhi,0
2117             cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2);            // SHRD rlo,rhi,pow2
2118             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2);               // SAR  rhi,pow2
2119         }
2120         else if (pow2 == 32)
2121         {
2122             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2123 
2124             genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
2125             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
2126             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD rlo,r1
2127             cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC rhi,0
2128             cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
2129             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
2130         }
2131         else if (pow2 < 63)
2132         {
2133             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2134             reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk | mask(r1)));
2135 
2136             genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
2137             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
2138             cdb.genmovreg(r2,r1);                                         // MOV r2,r1
2139 
2140             if (pow2 == 33)
2141             {
2142                 cdb.gen2(0xF7,modregrmx(3,3,r1));                         // NEG r1
2143                 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2));               // ADD rlo,r2
2144                 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1));               // ADC rhi,r1
2145             }
2146             else
2147             {
2148                 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask
2149                 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                    // ADD rlo,r1
2150                 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                    // ADC rhi,r2
2151             }
2152 
2153             cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
2154             cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32);          // SAR rlo,pow2-32
2155             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
2156         }
2157         else
2158         {
2159             // This may be better done by cgelem.d
2160             assert(pow2 == 63);
2161             assert(mask(rlo) & BYTEREGS);                          // for SETZ
2162             cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000
2163             cdb.genregs(0x09,rlo,rhi);                             // OR  rlo,rhi
2164             cdb.gen2(0x0F94,modregrmx(3,0,rlo));                   // SETZ rlo
2165             cdb.genregs(MOVZXb,rlo,rlo);                           // MOVZX rlo,rloL
2166             movregconst(cdb,rhi,0,0);                              // MOV rhi,0
2167         }
2168 
2169         opAssStorePair(cdb, cs, e, rlo, rhi, pretregs);
2170         return;
2171     }
2172 
2173     // Register pair signed modulo by power of 2
2174     if (op == OPmodass &&
2175         !uns &&
2176         e.Eoper == OPconst &&
2177         pow2 != -1 &&
2178         I32 // not set up for I64 cent yet
2179        )
2180     {
2181         freenode(e2);
2182         regm_t retregs = mDX|mAX;
2183         reg_t rhi, rlo;
2184         opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
2185         const regm_t keepmsk = idxregm(&cs);
2186 
2187         regm_t scratchm = allregs & ~(retregs | keepmsk);
2188         if (pow2 == 63)
2189             scratchm &= BYTEREGS;               // because of SETZ
2190         reg_t r1 = allocScratchReg(cdb, scratchm);
2191 
2192         if (pow2 < 32)
2193         {
2194             cdb.genmovreg(r1,rhi);                                    // MOV r1,rhi
2195             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31
2196             cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
2197             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
2198             cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1);    // AND rlo,(1<<pow2)-1
2199             cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
2200             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
2201             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));              // SBB rhi,rhi
2202         }
2203         else if (pow2 == 32)
2204         {
2205             cdb.genmovreg(r1,rhi);                                      // MOV r1,rhi
2206             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR r1,31
2207             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD rlo,r1
2208             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB rlo,r1
2209             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));                // SBB rhi,rhi
2210         }
2211         else if (pow2 < 63)
2212         {
2213             scratchm = allregs & ~(retregs | scratchm);
2214             reg_t r2;
2215             allocreg(cdb,&scratchm,&r2,TYint);
2216 
2217             cdb.genmovreg(r1,rhi);                                      // MOV  r1,rhi
2218             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR  r1,31
2219             cdb.genmovreg(r2,r1);                                       // MOV  r2,r1
2220             cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2);         // SHRD r1,r2,64-pow2
2221             cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2);           // SHR  r2,64-pow2
2222             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD  rlo,r1
2223             cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                 // ADC  rhi,r2
2224             cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND  rhi,(1<<(pow2-32))-1
2225             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB  rlo,r1
2226             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2));                 // SBB  rhi,r2
2227         }
2228         else
2229         {
2230             // This may be better done by cgelem.d
2231             assert(pow2 == 63);
2232 
2233             cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi]
2234             cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo));               // OR   r1,rlo
2235             cdb.gen2(0x0F94,modregrmx(3,0,r1));                       // SETZ r1
2236             cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL  r1,31
2237             cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1));               // SUB  rhi,r1
2238         }
2239 
2240         opAssStorePair(cdb, cs, e, rlo, rhi, pretregs);
2241         return;
2242     }
2243 
2244     regm_t rretregs = mCX|mBX;
2245     codelem(cdb,e2,&rretregs,false);    // load e2 into CX|BX
2246 
2247     reg_t rlo;
2248     reg_t rhi;
2249     opAssLoadPair(cdb, cs, e, rhi, rlo, mDX|mAX, rretregs);
2250 
2251     regm_t retregs = (op == OPmodass) ? mCX|mBX : mDX|mAX;
2252     uint lib = uns ? CLIB.uldiv : CLIB.ldiv;
2253     if (op == OPmodass)
2254         ++lib;
2255     callclib(cdb,e,lib,&retregs,idxregm(&cs));
2256 
2257     opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs);
2258 }
2259 
2260 
2261 /********************************
2262  * Generate code for <<= and >>=
2263  */
2264 
2265 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2266 {
2267     code cs;
2268     regm_t retregs;
2269     uint op1,op2;
2270     reg_t reg;
2271 
2272     elem *e1 = e.EV.E1;
2273     elem *e2 = e.EV.E2;
2274 
2275     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
2276     uint sz = _tysize[tyml];
2277     uint isbyte = tybyte(e.Ety) != 0;        // 1 for byte operations
2278     tym_t tym = tybasic(e.Ety);                // type of result
2279     OPER oper = e.Eoper;
2280     assert(tysize(e2.Ety) <= REGSIZE);
2281 
2282     uint rex = (I64 && sz == 8) ? REX_W : 0;
2283 
2284     // if our lvalue is a cse, make sure we evaluate for result in register
2285     if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,&reg))
2286         *pretregs |= ALLREGS;
2287 
2288     version (SCPP)
2289     {
2290         // Do this until the rest of the compiler does OPshr/OPashr correctly
2291         if (oper == OPshrass)
2292             oper = tyuns(tyml) ? OPshrass : OPashrass;
2293     }
2294 
2295     // Select opcodes. op2 is used for msw for long shifts.
2296 
2297     switch (oper)
2298     {
2299         case OPshlass:
2300             op1 = 4;                    // SHL
2301             op2 = 2;                    // RCL
2302             break;
2303 
2304         case OPshrass:
2305             op1 = 5;                    // SHR
2306             op2 = 3;                    // RCR
2307             break;
2308 
2309         case OPashrass:
2310             op1 = 7;                    // SAR
2311             op2 = 3;                    // RCR
2312             break;
2313 
2314         default:
2315             assert(0);
2316     }
2317 
2318 
2319     uint v = 0xD3;                  // for SHIFT xx,CL cases
2320     uint loopcnt = 1;
2321     uint conste2 = false;
2322     uint shiftcnt = 0;              // avoid "use before initialized" warnings
2323     if (e2.Eoper == OPconst)
2324     {
2325         conste2 = true;                 // e2 is a constant
2326         shiftcnt = e2.EV.Vint;         // byte ordering of host
2327         if (config.target_cpu >= TARGET_80286 &&
2328             sz <= REGSIZE &&
2329             shiftcnt != 1)
2330             v = 0xC1;                   // SHIFT xx,shiftcnt
2331         else if (shiftcnt <= 3)
2332         {
2333             loopcnt = shiftcnt;
2334             v = 0xD1;                   // SHIFT xx,1
2335         }
2336     }
2337 
2338     if (v == 0xD3)                        // if COUNT == CL
2339     {
2340         retregs = mCX;
2341         codelem(cdb,e2,&retregs,false);
2342     }
2343     else
2344         freenode(e2);
2345     getlvalue(cdb,&cs,e1,mCX);          // get lvalue, preserve CX
2346     modEA(cdb,&cs);             // check for modifying register
2347 
2348     if (*pretregs == 0 ||               // if don't return result
2349         (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) ||
2350         sz > REGSIZE
2351        )
2352     {
2353         retregs = 0;            // value not returned in a register
2354         cs.Iop = v ^ isbyte;
2355         while (loopcnt--)
2356         {
2357             NEWREG(cs.Irm,op1);           // make sure op1 is first
2358             if (sz <= REGSIZE)
2359             {
2360                 if (conste2)
2361                 {
2362                     cs.IFL2 = FLconst;
2363                     cs.IEV2.Vint = shiftcnt;
2364                 }
2365                 cdb.gen(&cs);             // SHIFT EA,[CL|1]
2366                 if (*pretregs & mPSW && !loopcnt && conste2)
2367                   code_orflag(cdb.last(),CFpsw);
2368             }
2369             else // TYlong
2370             {
2371                 cs.Iop = 0xD1;            // plain shift
2372                 code *ce = gennop(null);                  // ce: NOP
2373                 if (v == 0xD3)
2374                 {
2375                     getregs(cdb,mCX);
2376                     if (!conste2)
2377                     {
2378                         assert(loopcnt == 0);
2379                         genjmp(cdb,JCXZ,FLcode,cast(block *) ce);   // JCXZ ce
2380                     }
2381                 }
2382                 code *cg;
2383                 if (oper == OPshlass)
2384                 {
2385                     cdb.gen(&cs);               // cg: SHIFT EA
2386                     cg = cdb.last();
2387                     code_orflag(cg,CFpsw);
2388                     getlvalue_msw(&cs);
2389                     NEWREG(cs.Irm,op2);
2390                     cdb.gen(&cs);               // SHIFT EA
2391                     getlvalue_lsw(&cs);
2392                 }
2393                 else
2394                 {
2395                     getlvalue_msw(&cs);
2396                     cdb.gen(&cs);
2397                     cg = cdb.last();
2398                     code_orflag(cg,CFpsw);
2399                     NEWREG(cs.Irm,op2);
2400                     getlvalue_lsw(&cs);
2401                     cdb.gen(&cs);
2402                 }
2403                 if (v == 0xD3)                    // if building a loop
2404                 {
2405                     genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg
2406                     regimmed_set(CX,0);           // note that now CX == 0
2407                 }
2408                 cdb.append(ce);
2409             }
2410         }
2411 
2412         // If we want the result, we must load it from the EA
2413         // into a register.
2414 
2415         if (sz == 2 * REGSIZE && *pretregs)
2416         {
2417             retregs = *pretregs & (ALLREGS | mBP);
2418             if (retregs)
2419             {
2420                 retregs &= ~idxregm(&cs);
2421                 allocreg(cdb,&retregs,&reg,tym);
2422                 cs.Iop = LOD;
2423 
2424                 // be careful not to trash any index regs
2425                 // do MSW first (which can't be an index reg)
2426                 getlvalue_msw(&cs);
2427                 NEWREG(cs.Irm,reg);
2428                 cdb.gen(&cs);
2429                 getlvalue_lsw(&cs);
2430                 reg = findreglsw(retregs);
2431                 NEWREG(cs.Irm,reg);
2432                 cdb.gen(&cs);
2433                 if (*pretregs & mPSW)
2434                     tstresult(cdb,retregs,tyml,true);
2435             }
2436             else        // flags only
2437             {
2438                 retregs = ALLREGS & ~idxregm(&cs);
2439                 allocreg(cdb,&retregs,&reg,TYint);
2440                 cs.Iop = LOD;
2441                 NEWREG(cs.Irm,reg);
2442                 cdb.gen(&cs);           // MOV reg,EA
2443                 cs.Iop = 0x0B;          // OR reg,EA+2
2444                 cs.Iflags |= CFpsw;
2445                 getlvalue_msw(&cs);
2446                 cdb.gen(&cs);
2447             }
2448         }
2449         if (e1.Ecount && !(retregs & regcon.mvar))   // if lvalue is a CSE
2450             cssave(e1,retregs,!OTleaf(e1.Eoper));
2451         freenode(e1);
2452         *pretregs = retregs;
2453         return;
2454     }
2455     else                                // else must evaluate in register
2456     {
2457         if (sz <= REGSIZE)
2458         {
2459             regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs);
2460             if (isbyte)
2461                 possregs &= BYTEREGS;
2462             retregs = *pretregs & possregs;
2463             if (retregs == 0)
2464                 retregs = possregs;
2465             allocreg(cdb,&retregs,&reg,tym);
2466             cs.Iop = LOD ^ isbyte;
2467             code_newreg(&cs, reg);
2468             if (isbyte && I64 && (reg >= 4))
2469                 cs.Irex |= REX;
2470             cdb.gen(&cs);                     // MOV reg,EA
2471             if (!I16)
2472             {
2473                 assert(!isbyte || (mask(reg) & BYTEREGS));
2474                 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt);
2475                 if (isbyte && I64 && (reg >= 4))
2476                     cdb.last().Irex |= REX;
2477                 code_orrex(cdb.last(), rex);
2478                 // We can do a 32 bit shift on a 16 bit operand if
2479                 // it's a left shift and we're not concerned about
2480                 // the flags. Remember that flags are not set if
2481                 // a shift of 0 occurs.
2482                 if (_tysize[tym] == SHORTSIZE &&
2483                     (oper == OPshrass || oper == OPashrass ||
2484                      (*pretregs & mPSW && conste2)))
2485                      cdb.last().Iflags |= CFopsize;            // 16 bit operand
2486             }
2487             else
2488             {
2489                 while (loopcnt--)
2490                 {   // Generate shift instructions.
2491                     cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt);
2492                 }
2493             }
2494             if (*pretregs & mPSW && conste2)
2495             {
2496                 assert(shiftcnt);
2497                 *pretregs &= ~mPSW;     // result is already in flags
2498                 code_orflag(cdb.last(),CFpsw);
2499             }
2500 
2501             opAssStoreReg(cdb,cs,e,reg,pretregs);
2502             return;
2503         }
2504         assert(0);
2505     }
2506 }
2507 
2508 
2509 /**********************************
2510  * Generate code for compares.
2511  * Handles lt,gt,le,ge,eqeq,ne for all data types.
2512  */
2513 
2514 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2515 {
2516     regm_t retregs,rretregs;
2517     reg_t reg,rreg;
2518     int fl;
2519 
2520     //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs));
2521     // Collect extra parameter. This is pretty ugly...
2522     int flag = cdcmp_flag;
2523     cdcmp_flag = 0;
2524 
2525     elem *e1 = e.EV.E1;
2526     elem *e2 = e.EV.E2;
2527     if (*pretregs == 0)                 // if don't want result
2528     {
2529         codelem(cdb,e1,pretregs,false);
2530         *pretregs = 0;                  // in case e1 changed it
2531         codelem(cdb,e2,pretregs,false);
2532         return;
2533     }
2534 
2535     uint jop = jmpopcode(e);        // must be computed before
2536                                         // leaves are free'd
2537     uint reverse = 0;
2538 
2539     OPER op = e.Eoper;
2540     assert(OTrel(op));
2541     bool eqorne = (op == OPeqeq) || (op == OPne);
2542 
2543     tym_t tym = tybasic(e1.Ety);
2544     uint sz = _tysize[tym];
2545     uint isbyte = sz == 1;
2546 
2547     uint rex = (I64 && sz == 8) ? REX_W : 0;
2548     uint grex = rex << 16;          // 64 bit operands
2549 
2550     code cs;
2551     code *ce;
2552     if (tyfloating(tym))                  // if floating operation
2553     {
2554         if (config.fpxmmregs)
2555         {
2556             retregs = mPSW;
2557             if (tyxmmreg(tym))
2558                 orthxmm(cdb,e,&retregs);
2559             else
2560                 orth87(cdb,e,&retregs);
2561         }
2562         else if (config.inline8087)
2563         {   retregs = mPSW;
2564             orth87(cdb,e,&retregs);
2565         }
2566         else
2567         {
2568             static if (TARGET_WINDOS)
2569             {
2570                 int clib;
2571 
2572                 retregs = 0;                /* skip result for now          */
2573                 if (iffalse(e2))            /* second operand is constant 0 */
2574                 {
2575                     assert(!eqorne);        /* should be OPbool or OPnot    */
2576                     if (tym == TYfloat)
2577                     {
2578                         retregs = FLOATREGS;
2579                         clib = CLIB.ftst0;
2580                     }
2581                     else
2582                     {
2583                         retregs = DOUBLEREGS;
2584                         clib = CLIB.dtst0;
2585                     }
2586                     if (rel_exception(op))
2587                         clib += CLIB.dtst0exc - CLIB.dtst0;
2588                     codelem(cdb,e1,&retregs,false);
2589                     retregs = 0;
2590                     callclib(cdb,e,clib,&retregs,0);
2591                     freenode(e2);
2592                 }
2593                 else
2594                 {
2595                     clib = CLIB.dcmp;
2596                     if (rel_exception(op))
2597                         clib += CLIB.dcmpexc - CLIB.dcmp;
2598                     opdouble(cdb,e,&retregs,clib);
2599                 }
2600             }
2601             else
2602             {
2603                 assert(0);
2604             }
2605         }
2606         goto L3;
2607     }
2608 
2609     /* If it's a signed comparison of longs, we have to call a library    */
2610     /* routine, because we don't know the target of the signed branch     */
2611     /* (have to set up flags so that jmpopcode() will do it right)        */
2612     if (!eqorne &&
2613         (I16 && tym == TYlong  && tybasic(e2.Ety) == TYlong ||
2614          I32 && tym == TYllong && tybasic(e2.Ety) == TYllong)
2615        )
2616     {
2617         assert(jop != JC && jop != JNC);
2618         retregs = mDX | mAX;
2619         codelem(cdb,e1,&retregs,false);
2620         retregs = mCX | mBX;
2621         scodelem(cdb,e2,&retregs,mDX | mAX,false);
2622 
2623         if (I16)
2624         {
2625             retregs = 0;
2626             callclib(cdb,e,CLIB.lcmp,&retregs,0);    // gross, but it works
2627         }
2628         else
2629         {
2630             /* Generate:
2631              *      CMP  EDX,ECX
2632              *      JNE  C1
2633              *      XOR  EDX,EDX
2634              *      CMP  EAX,EBX
2635              *      JZ   C1
2636              *      JA   C3
2637              *      DEC  EDX
2638              *      JMP  C1
2639              * C3:  INC  EDX
2640              * C1:
2641              */
2642              getregs(cdb,mDX);
2643              genregs(cdb,0x39,CX,DX);             // CMP EDX,ECX
2644              code *c1 = gennop(null);
2645              genjmp(cdb,JNE,FLcode,cast(block *)c1);  // JNE C1
2646              movregconst(cdb,DX,0,0);             // XOR EDX,EDX
2647              genregs(cdb,0x39,BX,AX);             // CMP EAX,EBX
2648              genjmp(cdb,JE,FLcode,cast(block *)c1);   // JZ C1
2649              code *c3 = gen1(null,0x40 + DX);                  // INC EDX
2650              genjmp(cdb,JA,FLcode,cast(block *)c3);   // JA C3
2651              cdb.gen1(0x48 + DX);                              // DEC EDX
2652              genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1
2653              cdb.append(c3);
2654              cdb.append(c1);
2655              getregs(cdb,mDX);
2656              retregs = mPSW;
2657         }
2658         goto L3;
2659     }
2660 
2661     /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC
2662      * (This is already reflected in the jop)
2663      */
2664     if ((jop == JC || jop == JNC) &&
2665         (op == OPgt || op == OPle) &&
2666         (tyuns(tym) || tyuns(e2.Ety))
2667        )
2668     {   // jmpopcode() sez comparison should be reversed
2669         assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst);
2670         reverse ^= 2;
2671     }
2672 
2673     /* See if we should swap operands     */
2674     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
2675     {
2676         e1 = e.EV.E2;
2677         e2 = e.EV.E1;
2678         reverse ^= 2;
2679     }
2680 
2681     retregs = allregs;
2682     if (isbyte)
2683         retregs = BYTEREGS;
2684 
2685     ce = null;
2686     cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
2687     cs.Irex = cast(ubyte)rex;
2688     if (sz > REGSIZE)
2689         ce = gennop(ce);
2690 
2691     switch (e2.Eoper)
2692     {
2693         default:
2694         L2:
2695             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
2696             rretregs = allregs & ~retregs;
2697             if (isbyte)
2698                 rretregs &= BYTEREGS;
2699             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
2700             if (sz <= REGSIZE)                              // CMP reg,rreg
2701             {
2702                 reg = findreg(retregs);             // get reg that e1 is in
2703                 rreg = findreg(rretregs);
2704                 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg);
2705                 code_orrex(cdb.last(), rex);
2706                 if (!I16 && sz == SHORTSIZE)
2707                     cdb.last().Iflags |= CFopsize;          // compare only 16 bits
2708                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
2709                     cdb.last().Irex |= REX;                 // address byte registers
2710             }
2711             else
2712             {
2713                 assert(sz <= 2 * REGSIZE);
2714 
2715                 // Compare MSW, if they're equal then compare the LSW
2716                 reg = findregmsw(retregs);
2717                 rreg = findregmsw(rretregs);
2718                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2719                 if (I32 && sz == 6)
2720                     cdb.last().Iflags |= CFopsize;         // seg is only 16 bits
2721                 else if (I64)
2722                     code_orrex(cdb.last(), REX_W);
2723                 genjmp(cdb,JNE,FLcode,cast(block *) ce);   // JNE nop
2724 
2725                 reg = findreglsw(retregs);
2726                 rreg = findreglsw(rretregs);
2727                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2728                 if (I64)
2729                     code_orrex(cdb.last(), REX_W);
2730             }
2731             break;
2732 
2733         case OPrelconst:
2734             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
2735                 goto L2;
2736             fl = el_fl(e2);
2737             switch (fl)
2738             {
2739                 case FLfunc:
2740                     fl = FLextern;          // so it won't be self-relative
2741                     break;
2742 
2743                 case FLdata:
2744                 case FLudata:
2745                 case FLextern:
2746                     if (sz > REGSIZE)       // compare against DS, not DGROUP
2747                         goto L2;
2748                     break;
2749 
2750                 case FLfardata:
2751                     break;
2752 
2753                 default:
2754                     goto L2;
2755             }
2756             cs.IFL2 = cast(ubyte)fl;
2757             cs.IEV2.Vsym = e2.EV.Vsym;
2758             if (sz > REGSIZE)
2759             {
2760                 cs.Iflags |= CFseg;
2761                 cs.IEV2.Voffset = 0;
2762             }
2763             else
2764             {
2765                 cs.Iflags |= CFoff;
2766                 cs.IEV2.Voffset = e2.EV.Voffset;
2767             }
2768             goto L4;
2769 
2770         case OPconst:
2771             // If compare against 0
2772             if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) &&
2773                 isregvar(e1,&retregs,&reg)
2774                )
2775             {   // Just do a TEST instruction
2776                 genregs(cdb,0x85 ^ isbyte,reg,reg);      // TEST reg,reg
2777                 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw;
2778                 code_orrex(cdb.last(), rex);
2779                 if (I64 && isbyte && reg >= 4)
2780                     cdb.last().Irex |= REX;                 // address byte registers
2781                 retregs = mPSW;
2782                 break;
2783             }
2784 
2785             if (!tyuns(tym) && !tyuns(e2.Ety) &&
2786                 !boolres(e2) && !(*pretregs & mPSW) &&
2787                 (sz == REGSIZE || (I64 && sz == 4)) &&
2788                 (!I16 || op == OPlt || op == OPge))
2789             {
2790                 assert(*pretregs & (allregs));
2791                 codelem(cdb,e1,pretregs,false);
2792                 reg = findreg(*pretregs);
2793                 getregs(cdb,mask(reg));
2794                 switch (op)
2795                 {
2796                     case OPle:
2797                         cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1);   // ADD reg,-1
2798                         code_orflag(cdb.last(), CFpsw);
2799                         cdb.genc2(0x81,grex | modregrmx(3,2,reg),0);          // ADC reg,0
2800                         goto oplt;
2801 
2802                     case OPgt:
2803                         cdb.gen2(0xF7,grex | modregrmx(3,3,reg));         // NEG reg
2804                             /* Flips the sign bit unless the value is 0 or int.min.
2805                             Also sets the carry bit when the value is not 0. */
2806                         code_orflag(cdb.last(), CFpsw);
2807                         cdb.genc2(0x81,grex | modregrmx(3,3,reg),0);  // SBB reg,0
2808                             /* Subtracts the carry bit. This turns int.min into
2809                             int.max, flipping the sign bit.
2810                             For other negative and positive values, subtracting 1
2811                             doesn't affect the sign bit.
2812                             For 0, the carry bit is not set, so this does nothing
2813                             and the sign bit is not affected. */
2814                         goto oplt;
2815 
2816                     case OPlt:
2817                     oplt:
2818                         // Get the sign bit, i.e. 1 if the value is negative.
2819                         if (!I16)
2820                             cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31
2821                         else
2822                         {   /* 8088-286 do not have a barrel shifter, so use this
2823                                faster sequence
2824                              */
2825                             genregs(cdb,0xD1,0,reg);   // ROL reg,1
2826                             reg_t regi;
2827                             if (reghasvalue(allregs,1,&regi))
2828                                 genregs(cdb,0x23,reg,regi);  // AND reg,regi
2829                             else
2830                                 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1
2831                         }
2832                         break;
2833 
2834                     case OPge:
2835                         genregs(cdb,0xD1,4,reg);        // SHL reg,1
2836                         code_orrex(cdb.last(),rex);
2837                         code_orflag(cdb.last(), CFpsw);
2838                         genregs(cdb,0x19,reg,reg);      // SBB reg,reg
2839                         code_orrex(cdb.last(),rex);
2840                         if (I64)
2841                         {
2842                             cdb.gen2(0xFF,modregrmx(3,0,reg));       // INC reg
2843                             code_orrex(cdb.last(), rex);
2844                         }
2845                         else
2846                             cdb.gen1(0x40 + reg);                    // INC reg
2847                         break;
2848 
2849                     default:
2850                         assert(0);
2851                 }
2852                 freenode(e2);
2853                 goto ret;
2854             }
2855 
2856             cs.IFL2 = FLconst;
2857             if (sz == 16)
2858                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.msw;
2859             else if (sz > REGSIZE)
2860                 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);
2861             else
2862                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong;
2863 
2864             // The cmp immediate relies on sign extension of the 32 bit immediate value
2865             if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint)
2866                 goto L2;
2867           L4:
2868             cs.Iop = 0x81 ^ isbyte;
2869 
2870             /* if ((e1 is data or a '*' reference) and it's not a
2871              * common subexpression
2872              */
2873 
2874             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
2875                  e1.Eoper == OPind) &&
2876                 !evalinregister(e1))
2877             {
2878                 getlvalue(cdb,&cs,e1,RMload);
2879                 freenode(e1);
2880                 if (evalinregister(e2))
2881                 {
2882                     retregs = idxregm(&cs);
2883                     if ((cs.Iflags & CFSEG) == CFes)
2884                         retregs |= mES;             // take no chances
2885                     rretregs = allregs & ~retregs;
2886                     if (isbyte)
2887                         rretregs &= BYTEREGS;
2888                     scodelem(cdb,e2,&rretregs,retregs,true);
2889                     cs.Iop = 0x39 ^ isbyte ^ reverse;
2890                     if (sz > REGSIZE)
2891                     {
2892                         rreg = findregmsw(rretregs);
2893                         cs.Irm |= modregrm(0,rreg,0);
2894                         getlvalue_msw(&cs);
2895                         cdb.gen(&cs);              // CMP EA+2,rreg
2896                         if (I32 && sz == 6)
2897                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2898                         if (I64 && isbyte && rreg >= 4)
2899                             cdb.last().Irex |= REX;
2900                         genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop
2901                         rreg = findreglsw(rretregs);
2902                         NEWREG(cs.Irm,rreg);
2903                         getlvalue_lsw(&cs);
2904                     }
2905                     else
2906                     {
2907                         rreg = findreg(rretregs);
2908                         code_newreg(&cs, rreg);
2909                         if (I64 && isbyte && rreg >= 4)
2910                             cs.Irex |= REX;
2911                     }
2912                 }
2913                 else
2914                 {
2915                     cs.Irm |= modregrm(0,7,0);
2916                     if (sz > REGSIZE)
2917                     {
2918                         if (sz == 6)
2919                             assert(0);
2920                         if (e2.Eoper == OPrelconst)
2921                         {   cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg;
2922                             cs.IEV2.Voffset = 0;
2923                         }
2924                         getlvalue_msw(&cs);
2925                         cdb.gen(&cs);              // CMP EA+2,const
2926                         if (!I16 && sz == 6)
2927                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2928                         genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop
2929                         if (e2.Eoper == OPconst)
2930                             cs.IEV2.Vint = cast(int)e2.EV.Vllong;
2931                         else if (e2.Eoper == OPrelconst)
2932                         {   // Turn off CFseg, on CFoff
2933                             cs.Iflags ^= CFseg | CFoff;
2934                             cs.IEV2.Voffset = e2.EV.Voffset;
2935                         }
2936                         else
2937                             assert(0);
2938                         getlvalue_lsw(&cs);
2939                     }
2940                     freenode(e2);
2941                 }
2942                 cdb.gen(&cs);
2943                 break;
2944             }
2945 
2946             if (evalinregister(e2) && !OTassign(e1.Eoper) &&
2947                 !isregvar(e1,null,null))
2948             {
2949                 regm_t m;
2950 
2951                 m = allregs & ~regcon.mvar;
2952                 if (isbyte)
2953                     m &= BYTEREGS;
2954                 if (m & (m - 1))    // if more than one free register
2955                     goto L2;
2956             }
2957             if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) &&
2958                 !boolres(e2) && !evalinregister(e1))
2959             {
2960                 retregs = mPSW;
2961                 scodelem(cdb,e1,&retregs,0,false);
2962                 freenode(e2);
2963                 break;
2964             }
2965             if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW)
2966             {
2967                 retregs |= mPSW;
2968                 scodelem(cdb,e1,&retregs,0,false);
2969                 freenode(e2);
2970                 break;
2971             }
2972             scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
2973             if (sz == 1)
2974             {
2975                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2976                 cs.Irm = modregrm(3,7,reg & 7);
2977                 if (reg & 8)
2978                     cs.Irex |= REX_B;
2979                 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg)
2980                 {   assert(reg < 4);
2981                     cs.Irm |= 4;                    // use upper register half
2982                 }
2983                 if (I64 && reg >= 4)
2984                     cs.Irex |= REX;                 // address byte registers
2985             }
2986             else if (sz <= REGSIZE)
2987             {   // CMP reg,const
2988                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2989                 rretregs = allregs & ~retregs;
2990                 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg))
2991                 {
2992                     genregs(cdb,0x3B,reg,rreg);
2993                     code_orrex(cdb.last(), rex);
2994                     if (!I16)
2995                         cdb.last().Iflags |= cs.Iflags & CFopsize;
2996                     freenode(e2);
2997                     break;
2998                 }
2999                 cs.Irm = modregrm(3,7,reg & 7);
3000                 if (reg & 8)
3001                     cs.Irex |= REX_B;
3002             }
3003             else if (sz <= 2 * REGSIZE)
3004             {
3005                 reg = findregmsw(retregs);          // get reg that e1 is in
3006                 cs.Irm = modregrm(3,7,reg);
3007                 cdb.gen(&cs);                       // CMP reg,MSW
3008                 if (I32 && sz == 6)
3009                     cdb.last().Iflags |= CFopsize;  // seg is only 16 bits
3010                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
3011 
3012                 reg = findreglsw(retregs);
3013                 cs.Irm = modregrm(3,7,reg);
3014                 if (e2.Eoper == OPconst)
3015                     cs.IEV2.Vint = e2.EV.Vlong;
3016                 else if (e2.Eoper == OPrelconst)
3017                 {   // Turn off CFseg, on CFoff
3018                     cs.Iflags ^= CFseg | CFoff;
3019                     cs.IEV2.Voffset = e2.EV.Voffset;
3020                 }
3021                 else
3022                     assert(0);
3023             }
3024             else
3025                 assert(0);
3026             cdb.gen(&cs);                         // CMP sucreg,LSW
3027             freenode(e2);
3028             break;
3029 
3030         case OPind:
3031             if (e2.Ecount)
3032                 goto L2;
3033             goto L5;
3034 
3035         case OPvar:
3036             static if (TARGET_OSX)
3037             {
3038                 if (movOnly(e2))
3039                     goto L2;
3040             }
3041             if ((e1.Eoper == OPvar &&
3042                  isregvar(e2,&rretregs,&reg) &&
3043                  sz <= REGSIZE
3044                 ) ||
3045                 (e1.Eoper == OPind &&
3046                  isregvar(e2,&rretregs,&reg) &&
3047                  !evalinregister(e1) &&
3048                  sz <= REGSIZE
3049                 )
3050                )
3051             {
3052                 // CMP EA,e2
3053                 getlvalue(cdb,&cs,e1,RMload);
3054                 freenode(e1);
3055                 cs.Iop = 0x39 ^ isbyte ^ reverse;
3056                 code_newreg(&cs,reg);
3057                 if (I64 && isbyte && reg >= 4)
3058                     cs.Irex |= REX;                 // address byte registers
3059                 cdb.gen(&cs);
3060                 freenode(e2);
3061                 break;
3062             }
3063           L5:
3064             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
3065             if (sz <= REGSIZE)                      // CMP reg,EA
3066             {
3067                 reg = findreg(retregs & allregs);   // get reg that e1 is in
3068                 uint opsize = cs.Iflags & CFopsize;
3069                 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0);
3070                 code_orflag(cdb.last(),opsize);
3071             }
3072             else if (sz <= 2 * REGSIZE)
3073             {
3074                 reg = findregmsw(retregs);   // get reg that e1 is in
3075                 // CMP reg,EA
3076                 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0);
3077                 if (I32 && sz == 6)
3078                     cdb.last().Iflags |= CFopsize;        // seg is only 16 bits
3079                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
3080                 reg = findreglsw(retregs);
3081                 if (e2.Eoper == OPind)
3082                 {
3083                     NEWREG(cs.Irm,reg);
3084                     getlvalue_lsw(&cs);
3085                     cdb.gen(&cs);
3086                 }
3087                 else
3088                     loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0);
3089             }
3090             else
3091                 assert(0);
3092             freenode(e2);
3093             break;
3094     }
3095     cdb.append(ce);
3096 
3097 L3:
3098     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
3099     {
3100         if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00))
3101         {
3102             regm_t resregs = retregs;
3103             if (!I64)
3104             {
3105                 resregs &= BYTEREGS;
3106                 if (!resregs)
3107                     resregs = BYTEREGS;
3108             }
3109             allocreg(cdb,&resregs,&reg,TYint);
3110             cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg
3111             if (I64 && reg >= 4)
3112                 code_orrex(cdb.last(),REX);
3113             if (tysize(e.Ety) > 1)
3114             {
3115                 genregs(cdb,MOVZXb,reg,reg);       // MOVZX reg,reg
3116                 if (I64 && sz == 8)
3117                     code_orrex(cdb.last(),REX_W);
3118                 if (I64 && reg >= 4)
3119                     code_orrex(cdb.last(),REX);
3120             }
3121             *pretregs &= ~mPSW;
3122             fixresult(cdb,e,resregs,pretregs);
3123         }
3124         else
3125         {
3126             code *nop = null;
3127             regm_t save = regcon.immed.mval;
3128             allocreg(cdb,&retregs,&reg,TYint);
3129             regcon.immed.mval = save;
3130             if ((*pretregs & mPSW) == 0 &&
3131                 (jop == JC || jop == JNC))
3132             {
3133                 getregs(cdb,retregs);
3134                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
3135                 if (rex || flag & REX_W)
3136                     code_orrex(cdb.last(), REX_W);
3137                 if (flag)
3138                 { }                                         // cdcond() will handle it
3139                 else if (jop == JNC)
3140                 {
3141                     if (I64)
3142                     {
3143                         cdb.gen2(0xFF,modregrmx(3,0,reg));  // INC reg
3144                         code_orrex(cdb.last(), rex);
3145                     }
3146                     else
3147                         cdb.gen1(0x40 + reg);               // INC reg
3148                 }
3149                 else
3150                 {
3151                     cdb.gen2(0xF7,modregrmx(3,3,reg));      // NEG reg
3152                     code_orrex(cdb.last(), rex);
3153                 }
3154             }
3155             else if (I64 && sz == 8)
3156             {
3157                 assert(!flag);
3158                 movregconst(cdb,reg,1,64|8);   // MOV reg,1
3159                 nop = gennop(nop);
3160                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
3161                                                             // MOV reg,0
3162                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64);
3163                 regcon.immed.mval &= ~mask(reg);
3164             }
3165             else
3166             {
3167                 assert(!flag);
3168                 movregconst(cdb,reg,1,8);      // MOV reg,1
3169                 nop = gennop(nop);
3170                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
3171                                                             // MOV reg,0
3172                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0);
3173                 regcon.immed.mval &= ~mask(reg);
3174             }
3175             *pretregs = retregs;
3176             cdb.append(nop);
3177         }
3178     }
3179 ret:
3180     { }
3181 }
3182 
3183 
3184 /**********************************
3185  * Generate code for signed compare of longs.
3186  * Input:
3187  *      targ    block* or code*
3188  */
3189 
3190 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ)
3191 {
3192                                          // <=  >   <   >=
3193     static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ];
3194     static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ];
3195 
3196     //printf("longcmp(e = %p)\n", e);
3197     elem *e1 = e.EV.E1;
3198     elem *e2 = e.EV.E2;
3199     OPER op = e.Eoper;
3200 
3201     // See if we should swap operands
3202     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
3203     {
3204         e1 = e.EV.E2;
3205         e2 = e.EV.E1;
3206         op = swaprel(op);
3207     }
3208 
3209     code cs;
3210     cs.Iflags = 0;
3211     cs.Irex = 0;
3212 
3213     code *ce = gennop(null);
3214     regm_t retregs = ALLREGS;
3215     regm_t rretregs;
3216     reg_t reg,rreg;
3217 
3218     uint jop = jopmsw[op - OPle];
3219     if (!(jcond & 1)) jop ^= (JL ^ JG);                   // toggle jump condition
3220     CodeBuilder cdbjmp;
3221     cdbjmp.ctor();
3222     genjmp(cdbjmp,jop,fltarg, cast(block *) targ);             // Jx targ
3223     genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce);   // Jy nop
3224 
3225     switch (e2.Eoper)
3226     {
3227         default:
3228         L2:
3229             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
3230             rretregs = ALLREGS & ~retregs;
3231             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
3232             cse_flush(cdb,1);
3233             // Compare MSW, if they're equal then compare the LSW
3234             reg = findregmsw(retregs);
3235             rreg = findregmsw(rretregs);
3236             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
3237             cdb.append(cdbjmp);
3238 
3239             reg = findreglsw(retregs);
3240             rreg = findreglsw(rretregs);
3241             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
3242             break;
3243 
3244         case OPconst:
3245             cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);            // MSW first
3246             cs.IFL2 = FLconst;
3247             cs.Iop = 0x81;
3248 
3249             /* if ((e1 is data or a '*' reference) and it's not a
3250              * common subexpression
3251              */
3252 
3253             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
3254                  e1.Eoper == OPind) &&
3255                 !evalinregister(e1))
3256             {
3257                 getlvalue(cdb,&cs,e1,0);
3258                 freenode(e1);
3259                 if (evalinregister(e2))
3260                 {
3261                     retregs = idxregm(&cs);
3262                     if ((cs.Iflags & CFSEG) == CFes)
3263                             retregs |= mES;         // take no chances
3264                     rretregs = ALLREGS & ~retregs;
3265                     scodelem(cdb,e2,&rretregs,retregs,true);
3266                     cse_flush(cdb,1);
3267                     rreg = findregmsw(rretregs);
3268                     cs.Iop = 0x39;
3269                     cs.Irm |= modregrm(0,rreg,0);
3270                     getlvalue_msw(&cs);
3271                     cdb.gen(&cs);           // CMP EA+2,rreg
3272                     cdb.append(cdbjmp);
3273                     rreg = findreglsw(rretregs);
3274                     NEWREG(cs.Irm,rreg);
3275                 }
3276                 else
3277                 {
3278                     cse_flush(cdb,1);
3279                     cs.Irm |= modregrm(0,7,0);
3280                     getlvalue_msw(&cs);
3281                     cdb.gen(&cs);           // CMP EA+2,const
3282                     cdb.append(cdbjmp);
3283                     cs.IEV2.Vint = e2.EV.Vlong;
3284                     freenode(e2);
3285                 }
3286                 getlvalue_lsw(&cs);
3287                 cdb.gen(&cs);                   // CMP EA,rreg/const
3288                 break;
3289             }
3290             if (evalinregister(e2))
3291                 goto L2;
3292 
3293             scodelem(cdb,e1,&retregs,0,true);    // compute left leaf
3294             cse_flush(cdb,1);
3295             reg = findregmsw(retregs);              // get reg that e1 is in
3296             cs.Irm = modregrm(3,7,reg);
3297 
3298             cdb.gen(&cs);                           // CMP reg,MSW
3299             cdb.append(cdbjmp);
3300             reg = findreglsw(retregs);
3301             cs.Irm = modregrm(3,7,reg);
3302             cs.IEV2.Vint = e2.EV.Vlong;
3303             cdb.gen(&cs);                           // CMP sucreg,LSW
3304             freenode(e2);
3305             break;
3306 
3307         case OPvar:
3308             if (!e1.Ecount && e1.Eoper == OPs32_64)
3309             {
3310                 reg_t msreg;
3311 
3312                 retregs = allregs;
3313                 scodelem(cdb,e1.EV.E1,&retregs,0,true);
3314                 freenode(e1);
3315                 reg = findreg(retregs);
3316                 retregs = allregs & ~retregs;
3317                 allocreg(cdb,&retregs,&msreg,TYint);
3318                 genmovreg(cdb,msreg,reg);                  // MOV msreg,reg
3319                 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3320                 cse_flush(cdb,1);
3321                 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0);
3322                 cdb.append(cdbjmp);
3323                 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0);
3324                 freenode(e2);
3325             }
3326             else
3327             {
3328                 scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
3329                 cse_flush(cdb,1);
3330                 reg = findregmsw(retregs);   // get reg that e1 is in
3331                 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0);
3332                 cdb.append(cdbjmp);
3333                 reg = findreglsw(retregs);
3334                 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0);
3335                 freenode(e2);
3336             }
3337             break;
3338     }
3339 
3340     jop = joplsw[op - OPle];
3341     if (!(jcond & 1)) jop ^= 1;                           // toggle jump condition
3342     genjmp(cdb,jop,fltarg,cast(block *) targ);   // Jcond targ
3343 
3344     cdb.append(ce);
3345     freenode(e);
3346 }
3347 
3348 /*****************************
3349  * Do conversions.
3350  * Depends on OPd_s32 and CLIB.dbllng being in sequence.
3351  */
3352 
3353 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
3354 {
3355     //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs));
3356     //elem_print(e);
3357 
3358     static immutable ubyte[2][16] clib =
3359     [
3360         [ OPd_s32,        CLIB.dbllng   ],
3361         [ OPs32_d,        CLIB.lngdbl   ],
3362         [ OPd_s16,        CLIB.dblint   ],
3363         [ OPs16_d,        CLIB.intdbl   ],
3364         [ OPd_u16,        CLIB.dbluns   ],
3365         [ OPu16_d,        CLIB.unsdbl   ],
3366         [ OPd_u32,        CLIB.dblulng  ],
3367         [ OPu32_d,        CLIB.ulngdbl  ],
3368         [ OPd_s64,        CLIB.dblllng  ],
3369         [ OPs64_d,        CLIB.llngdbl  ],
3370         [ OPd_u64,        CLIB.dblullng ],
3371         [ OPu64_d,        CLIB.ullngdbl ],
3372         [ OPd_f,          CLIB.dblflt   ],
3373         [ OPf_d,          CLIB.fltdbl   ],
3374         [ OPvp_fp,        CLIB.vptrfptr ],
3375         [ OPcvp_fp,       CLIB.cvptrfptr]
3376     ];
3377 
3378     if (!*pretregs)
3379     {
3380         codelem(cdb,e.EV.E1,pretregs,false);
3381         return;
3382     }
3383 
3384     regm_t retregs;
3385     if (config.inline8087)
3386     {
3387         switch (e.Eoper)
3388         {
3389             case OPld_d:
3390             case OPd_ld:
3391             {
3392                 if (tycomplex(e.EV.E1.Ety))
3393                 {
3394             Lcomplex:
3395                     regm_t retregsx = mST01 | (*pretregs & mPSW);
3396                     codelem(cdb,e.EV.E1, &retregsx, false);
3397                     fixresult_complex87(cdb, e, retregsx, pretregs);
3398                     return;
3399                 }
3400                 regm_t retregsx = mST0 | (*pretregs & mPSW);
3401                 codelem(cdb,e.EV.E1, &retregsx, false);
3402                 fixresult87(cdb, e, retregsx, pretregs);
3403                 return;
3404             }
3405 
3406             case OPf_d:
3407             case OPd_f:
3408                 if (tycomplex(e.EV.E1.Ety))
3409                     goto Lcomplex;
3410                 if (config.fpxmmregs && *pretregs & XMMREGS)
3411                 {
3412                     xmmcnvt(cdb, e, pretregs);
3413                     return;
3414                 }
3415 
3416                 /* if won't do us much good to transfer back and        */
3417                 /* forth between 8088 registers and 8087 registers      */
3418                 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs))
3419                 {
3420                     retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety);
3421                     if (retregs & (mXMM1 | mXMM0 |mST01 | mST0))       // if return in ST0
3422                     {
3423                         codelem(cdb,e.EV.E1,pretregs,false);
3424                         if (*pretregs & mST0)
3425                             note87(e, 0, 0);
3426                         return;
3427                     }
3428                     else
3429                         break;
3430                 }
3431                 goto Lload87;
3432 
3433             case OPs64_d:
3434                 if (!I64)
3435                     goto Lload87;
3436                 goto case OPs32_d;
3437 
3438             case OPs32_d:
3439                 if (config.fpxmmregs && *pretregs & XMMREGS)
3440                 {
3441                     xmmcnvt(cdb, e, pretregs);
3442                     return;
3443                 }
3444                 goto Lload87;
3445 
3446             case OPs16_d:
3447             case OPu16_d:
3448             Lload87:
3449                 load87(cdb,e,0,pretregs,null,-1);
3450                 return;
3451 
3452             case OPu32_d:
3453                 if (I64 && config.fpxmmregs && *pretregs & XMMREGS)
3454                 {
3455                     xmmcnvt(cdb,e,pretregs);
3456                     return;
3457                 }
3458                 else if (!I16)
3459                 {
3460                     regm_t retregsx = ALLREGS;
3461                     codelem(cdb,e.EV.E1, &retregsx, false);
3462                     reg_t reg = findreg(retregsx);
3463                     cdb.genfltreg(STO, reg, 0);
3464                     regwithvalue(cdb,ALLREGS,0,&reg,0);
3465                     cdb.genfltreg(STO, reg, 4);
3466 
3467                     push87(cdb);
3468                     cdb.genfltreg(0xDF,5,0);     // FILD m64int
3469 
3470                     regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/;
3471                     fixresult87(cdb, e, retregsy, pretregs);
3472                     return;
3473                 }
3474                 break;
3475 
3476             case OPd_s64:
3477                 if (!I64)
3478                     goto Lcnvt87;
3479                 goto case OPd_s32;
3480 
3481             case OPd_s32:
3482                 if (config.fpxmmregs)
3483                 {
3484                     xmmcnvt(cdb,e,pretregs);
3485                     return;
3486                 }
3487                 goto Lcnvt87;
3488 
3489             case OPd_s16:
3490             case OPd_u16:
3491             Lcnvt87:
3492                 cnvt87(cdb,e,pretregs);
3493                 return;
3494 
3495             case OPd_u32:               // use subroutine, not 8087
3496                 if (I64 && config.fpxmmregs)
3497                 {
3498                     xmmcnvt(cdb,e,pretregs);
3499                     return;
3500                 }
3501                 if (I32 || I64)
3502                 {
3503                     cdd_u32(cdb,e,pretregs);
3504                     return;
3505                 }
3506                 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD ||
3507                            TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
3508                 {
3509                     retregs = mST0;
3510                 }
3511                 else
3512                 {
3513                     retregs = DOUBLEREGS;
3514                 }
3515                 goto L1;
3516 
3517             case OPd_u64:
3518                 if (I32 || I64)
3519                 {
3520                     cdd_u64(cdb,e,pretregs);
3521                     return;
3522                 }
3523                 retregs = DOUBLEREGS;
3524                 goto L1;
3525 
3526             case OPu64_d:
3527                 if (*pretregs & mST0)
3528                 {
3529                     regm_t retregsx = I64 ? mAX : mAX|mDX;
3530                     codelem(cdb,e.EV.E1,&retregsx,false);
3531                     callclib(cdb,e,CLIB.u64_ldbl,pretregs,0);
3532                     return;
3533                 }
3534                 break;
3535 
3536             case OPld_u64:
3537             {
3538                 if (I32 || I64)
3539                 {
3540                     cdd_u64(cdb,e,pretregs);
3541                     return;
3542                 }
3543                 regm_t retregsx = mST0;
3544                 codelem(cdb,e.EV.E1,&retregsx,false);
3545                 callclib(cdb,e,CLIB.ld_u64,pretregs,0);
3546                 return;
3547             }
3548 
3549             default:
3550                 break;
3551         }
3552     }
3553     retregs = regmask(e.EV.E1.Ety, TYnfunc);
3554 L1:
3555     codelem(cdb,e.EV.E1,&retregs,false);
3556     for (int i = 0; 1; i++)
3557     {
3558         assert(i < clib.length);
3559         if (clib[i][0] == e.Eoper)
3560         {
3561             callclib(cdb,e,clib[i][1],pretregs,0);
3562             break;
3563         }
3564     }
3565 }
3566 
3567 
3568 /***************************
3569  * Convert short to long.
3570  * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64,
3571  * OPu64_128, OPs64_128
3572  */
3573 
3574 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3575 {
3576     reg_t reg;
3577     regm_t retregs;
3578 
3579     //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3580     int e1comsub = e.EV.E1.Ecount;
3581     ubyte op = e.Eoper;
3582     if ((*pretregs & (ALLREGS | mBP)) == 0)    // if don't need result in regs
3583     {
3584         codelem(cdb,e.EV.E1,pretregs,false);     // then conversion isn't necessary
3585         return;
3586     }
3587     else if (
3588              op == OPnp_fp ||
3589              (I16 && op == OPu16_32) ||
3590              (I32 && op == OPu32_64)
3591             )
3592     {
3593         /* Result goes into a register pair.
3594          * Zero extend by putting a zero into most significant reg.
3595          */
3596 
3597         regm_t retregsx = *pretregs & mLSW;
3598         assert(retregsx);
3599         tym_t tym1 = tybasic(e.EV.E1.Ety);
3600         codelem(cdb,e.EV.E1,&retregsx,false);
3601 
3602         regm_t regm = *pretregs & (mMSW & ALLREGS);
3603         if (regm == 0)                  // *pretregs could be mES
3604             regm = mMSW & ALLREGS;
3605         allocreg(cdb,&regm,&reg,TYint);
3606         if (e1comsub)
3607             getregs(cdb,retregsx);
3608         if (op == OPnp_fp)
3609         {
3610             int segreg;
3611 
3612             // BUG: what about pointers to functions?
3613             switch (tym1)
3614             {
3615                 case TYimmutPtr:
3616                 case TYnptr:    segreg = SEG_DS;        break;
3617                 case TYcptr:    segreg = SEG_CS;        break;
3618                 case TYsptr:    segreg = SEG_SS;        break;
3619                 default:        assert(0);
3620             }
3621             cdb.gen2(0x8C,modregrm(3,segreg,reg));  // MOV reg,segreg
3622         }
3623         else
3624             movregconst(cdb,reg,0,0);  // 0 extend
3625 
3626         fixresult(cdb,e,retregsx | regm,pretregs);
3627         return;
3628     }
3629     else if (I64 && op == OPu32_64)
3630     {
3631         elem *e1 = e.EV.E1;
3632         retregs = *pretregs;
3633         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3634         {
3635             code cs;
3636 
3637             allocreg(cdb,&retregs,&reg,TYint);
3638             loadea(cdb,e1,&cs,LOD,reg,0,retregs,retregs);  //  MOV Ereg,EA
3639             freenode(e1);
3640         }
3641         else
3642         {
3643             *pretregs &= ~mPSW;                 // flags are set by eval of e1
3644             codelem(cdb,e1,&retregs,false);
3645             /* Determine if high 32 bits are already 0
3646              */
3647             if (e1.Eoper == OPu16_32 && !e1.Ecount)
3648             {
3649             }
3650             else
3651             {
3652                 // Zero high 32 bits
3653                 getregs(cdb,retregs);
3654                 reg = findreg(retregs);
3655                 // Don't use x89 because that will get optimized away
3656                 genregs(cdb,LOD,reg,reg);  // MOV Ereg,Ereg
3657             }
3658         }
3659         fixresult(cdb,e,retregs,pretregs);
3660         return;
3661     }
3662     else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount)
3663     {
3664         /* Due to how e1 is calculated, the high 32 bits of the register
3665          * are already 0.
3666          */
3667         retregs = *pretregs;
3668         codelem(cdb,e.EV.E1,&retregs,false);
3669         fixresult(cdb,e,retregs,pretregs);
3670         return;
3671     }
3672     else if (!I16 && (op == OPs16_32 || op == OPu16_32) ||
3673               I64 && op == OPs32_64)
3674     {
3675         elem *e11;
3676         elem *e1 = e.EV.E1;
3677 
3678         if (e1.Eoper == OPu8_16 && !e1.Ecount &&
3679             ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount))
3680            )
3681         {
3682             code cs;
3683 
3684             retregs = *pretregs & BYTEREGS;
3685             if (!retregs)
3686                 retregs = BYTEREGS;
3687             allocreg(cdb,&retregs,&reg,TYint);
3688             movregconst(cdb,reg,0,0);                   //  XOR reg,reg
3689             loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs);  //  MOV regL,EA
3690             freenode(e11);
3691             freenode(e1);
3692         }
3693         else if (e1.Eoper == OPvar ||
3694             (e1.Eoper == OPind && !e1.Ecount))
3695         {
3696             code cs = void;
3697 
3698             if (I32 && op == OPu16_32 && config.flags4 & CFG4speed)
3699                 goto L2;
3700             retregs = *pretregs;
3701             allocreg(cdb,&retregs,&reg,TYint);
3702             const opcode = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,EA
3703             if (op == OPs32_64)
3704             {
3705                 assert(I64);
3706                 // MOVSXD reg,e1
3707                 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs);
3708                 code_orrex(cdb.last(), REX_W);
3709             }
3710             else
3711                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs);
3712             freenode(e1);
3713         }
3714         else
3715         {
3716         L2:
3717             retregs = *pretregs;
3718             if (op == OPs32_64)
3719                 retregs = mAX | (*pretregs & mPSW);
3720             *pretregs &= ~mPSW;             // flags are already set
3721             CodeBuilder cdbx;
3722             cdbx.ctor();
3723             codelem(cdbx,e1,&retregs,false);
3724             code *cx = cdbx.finish();
3725             cdb.append(cdbx);
3726             getregs(cdb,retregs);
3727             if (op == OPu16_32 && cx)
3728             {
3729                 cx = code_last(cx);
3730                 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) &&
3731                     mask(cx.Irm & 7) == retregs)
3732                 {
3733                     // Convert AND of a word to AND of a dword, zeroing upper word
3734                     if (cx.Irex & REX_B)
3735                         retregs = mask(8 | (cx.Irm & 7));
3736                     cx.Iflags &= ~CFopsize;
3737                     cx.IEV2.Vint &= 0xFFFF;
3738                     goto L1;
3739                 }
3740             }
3741             if (op == OPs16_32 && retregs == mAX)
3742                 cdb.gen1(0x98);         // CWDE
3743             else if (op == OPs32_64 && retregs == mAX)
3744             {
3745                 cdb.gen1(0x98);         // CDQE
3746                 code_orrex(cdb.last(), REX_W);
3747             }
3748             else
3749             {
3750                 reg = findreg(retregs);
3751                 if (config.flags4 & CFG4speed && op == OPu16_32)
3752                 {   // AND reg,0xFFFF
3753                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu);
3754                 }
3755                 else
3756                 {
3757                     opcode_t iop = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,reg
3758                     genregs(cdb,iop,reg,reg);
3759                 }
3760             }
3761          L1:
3762             if (e1comsub)
3763                 getregs(cdb,retregs);
3764         }
3765         fixresult(cdb,e,retregs,pretregs);
3766         return;
3767     }
3768     else if (*pretregs & mPSW || config.target_cpu < TARGET_80286)
3769     {
3770         // OPs16_32, OPs32_64
3771         // CWD doesn't affect flags, so we can depend on the integer
3772         // math to provide the flags.
3773         retregs = mAX | mPSW;               // want integer result in AX
3774         *pretregs &= ~mPSW;                 // flags are already set
3775         codelem(cdb,e.EV.E1,&retregs,false);
3776         getregs(cdb,mDX);           // sign extend into DX
3777         cdb.gen1(0x99);                     // CWD/CDQ
3778         if (e1comsub)
3779             getregs(cdb,retregs);
3780         fixresult(cdb,e,mDX | retregs,pretregs);
3781         return;
3782     }
3783     else
3784     {
3785         // OPs16_32, OPs32_64
3786         uint msreg,lsreg;
3787 
3788         retregs = *pretregs & mLSW;
3789         assert(retregs);
3790         codelem(cdb,e.EV.E1,&retregs,false);
3791         retregs |= *pretregs & mMSW;
3792         allocreg(cdb,&retregs,&reg,e.Ety);
3793         msreg = findregmsw(retregs);
3794         lsreg = findreglsw(retregs);
3795         genmovreg(cdb,msreg,lsreg);                // MOV msreg,lsreg
3796         assert(config.target_cpu >= TARGET_80286);              // 8088 can't handle SAR reg,imm8
3797         cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3798         fixresult(cdb,e,retregs,pretregs);
3799         return;
3800     }
3801 }
3802 
3803 
3804 /***************************
3805  * Convert byte to int.
3806  * For OPu8_16 and OPs8_16.
3807  */
3808 
3809 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3810 {
3811     regm_t retregs;
3812     char size;
3813 
3814     if ((*pretregs & (ALLREGS | mBP)) == 0)     // if don't need result in regs
3815     {
3816         codelem(cdb,e.EV.E1,pretregs,false);      // then conversion isn't necessary
3817         return;
3818     }
3819 
3820     //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
3821     char op = e.Eoper;
3822     elem *e1 = e.EV.E1;
3823     if (e1.Eoper == OPcomma)
3824         docommas(cdb,&e1);
3825     if (!I16)
3826     {
3827         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3828         {
3829             code cs;
3830 
3831             regm_t retregsx = *pretregs;
3832             reg_t reg;
3833             allocreg(cdb,&retregsx,&reg,TYint);
3834             if (config.flags4 & CFG4speed &&
3835                 op == OPu8_16 && mask(reg) & BYTEREGS &&
3836                 config.target_cpu < TARGET_PentiumPro)
3837             {
3838                 movregconst(cdb,reg,0,0);                 //  XOR reg,reg
3839                 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); //  MOV regL,EA
3840             }
3841             else
3842             {
3843                 const opcode = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,EA
3844                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx);
3845             }
3846             freenode(e1);
3847             fixresult(cdb,e,retregsx,pretregs);
3848             return;
3849         }
3850         size = tysize(e.Ety);
3851         retregs = *pretregs & BYTEREGS;
3852         if (retregs == 0)
3853             retregs = BYTEREGS;
3854         retregs |= *pretregs & mPSW;
3855         *pretregs &= ~mPSW;
3856     }
3857     else
3858     {
3859         if (op == OPu8_16)              // if uint conversion
3860         {
3861             retregs = *pretregs & BYTEREGS;
3862             if (retregs == 0)
3863                 retregs = BYTEREGS;
3864         }
3865         else
3866         {
3867             // CBW doesn't affect flags, so we can depend on the integer
3868             // math to provide the flags.
3869             retregs = mAX | (*pretregs & mPSW); // want integer result in AX
3870         }
3871     }
3872 
3873     CodeBuilder cdb1;
3874     cdb1.ctor();
3875     codelem(cdb1,e1,&retregs,false);
3876     code *c1 = cdb1.finish();
3877     cdb.append(cdb1);
3878     reg_t reg = findreg(retregs);
3879     code *c;
3880     if (!c1)
3881         goto L1;
3882 
3883     // If previous instruction is an AND bytereg,value
3884     c = cdb.last();
3885     if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) &&
3886         (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0))
3887     {
3888         if (*pretregs & mPSW)
3889             c.Iflags |= CFpsw;
3890         c.Iop |= 1;                    // convert to word operation
3891         c.IEV2.Vuns &= 0xFF;           // dump any high order bits
3892         *pretregs &= ~mPSW;             // flags already set
3893     }
3894     else
3895     {
3896      L1:
3897         if (!I16)
3898         {
3899             if (op == OPs8_16 && reg == AX && size == 2)
3900             {
3901                 cdb.gen1(0x98);                  // CBW
3902                 cdb.last().Iflags |= CFopsize;  // don't do a CWDE
3903             }
3904             else
3905             {
3906                 // We could do better by not forcing the src and dst
3907                 // registers to be the same.
3908 
3909                 if (config.flags4 & CFG4speed && op == OPu8_16)
3910                 {   // AND reg,0xFF
3911                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFF);
3912                 }
3913                 else
3914                 {
3915                     opcode_t iop = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,reg
3916                     genregs(cdb,iop,reg,reg);
3917                     if (I64 && reg >= 4)
3918                         code_orrex(cdb.last(), REX);
3919                 }
3920             }
3921         }
3922         else
3923         {
3924             if (op == OPu8_16)
3925                 genregs(cdb,0x30,reg+4,reg+4);  // XOR regH,regH
3926             else
3927             {
3928                 cdb.gen1(0x98);                 // CBW
3929                 *pretregs &= ~mPSW;             // flags already set
3930             }
3931         }
3932     }
3933     getregs(cdb,retregs);
3934     fixresult(cdb,e,retregs,pretregs);
3935 }
3936 
3937 
3938 /***************************
3939  * Convert long to short (OP32_16).
3940  * Get offset of far pointer (OPoffset).
3941  * Convert int to byte (OP16_8).
3942  * Convert long long to long (OP64_32).
3943  * OP128_64
3944  */
3945 
3946 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3947 {
3948     debug
3949     {
3950         switch (e.Eoper)
3951         {
3952             case OP32_16:
3953             case OPoffset:
3954             case OP16_8:
3955             case OP64_32:
3956             case OP128_64:
3957                 break;
3958 
3959             default:
3960                 assert(0);
3961         }
3962     }
3963 
3964     regm_t retregs;
3965     if (e.Eoper == OP16_8)
3966     {
3967         retregs = *pretregs ? BYTEREGS : 0;
3968         codelem(cdb,e.EV.E1,&retregs,false);
3969     }
3970     else
3971     {
3972         if (e.EV.E1.Eoper == OPrelconst)
3973             offsetinreg(cdb,e.EV.E1,&retregs);
3974         else
3975         {
3976             retregs = *pretregs ? ALLREGS : 0;
3977             codelem(cdb,e.EV.E1,&retregs,false);
3978             bool isOff = e.Eoper == OPoffset;
3979             if (I16 ||
3980                 I32 && (isOff || e.Eoper == OP64_32) ||
3981                 I64 && (isOff || e.Eoper == OP128_64))
3982                 retregs &= mLSW;                // want LSW only
3983         }
3984     }
3985 
3986     /* We "destroy" a reg by assigning it the result of a new e, even
3987      * though the values are the same. Weakness of our CSE strategy that
3988      * a register can only hold the contents of one elem at a time.
3989      */
3990     if (e.Ecount)
3991         getregs(cdb,retregs);
3992     else
3993         useregs(retregs);
3994 
3995     debug
3996     if (!(!*pretregs || retregs))
3997     {
3998         WROP(e.Eoper),
3999         printf(" *pretregs = %s, retregs = %s, e = %p\n",regm_str(*pretregs),regm_str(retregs),e);
4000     }
4001 
4002     assert(!*pretregs || retregs);
4003     fixresult(cdb,e,retregs,pretregs);  // lsw only
4004 }
4005 
4006 /**********************************************
4007  * Get top 32 bits of 64 bit value (I32)
4008  * or top 16 bits of 32 bit value (I16)
4009  * or top 64 bits of 128 bit value (I64).
4010  * OPmsw
4011  */
4012 
4013 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4014 {
4015     assert(e.Eoper == OPmsw);
4016 
4017     regm_t retregs = *pretregs ? ALLREGS : 0;
4018     codelem(cdb,e.EV.E1,&retregs,false);
4019     retregs &= mMSW;                    // want MSW only
4020 
4021     /* We "destroy" a reg by assigning it the result of a new e, even
4022      * though the values are the same. Weakness of our CSE strategy that
4023      * a register can only hold the contents of one elem at a time.
4024      */
4025     if (e.Ecount)
4026         getregs(cdb,retregs);
4027     else
4028         useregs(retregs);
4029 
4030     debug
4031     if (!(!*pretregs || retregs))
4032     {   WROP(e.Eoper);
4033         printf(" *pretregs = %s, retregs = %s\n",regm_str(*pretregs),regm_str(retregs));
4034         elem_print(e);
4035     }
4036 
4037     assert(!*pretregs || retregs);
4038     fixresult(cdb,e,retregs,pretregs);  // msw only
4039 }
4040 
4041 
4042 
4043 /******************************
4044  * Handle operators OPinp and OPoutp.
4045  */
4046 
4047 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4048 {
4049     //printf("cdport\n");
4050     ubyte op = 0xE4;            // root of all IN/OUT opcodes
4051     elem *e1 = e.EV.E1;
4052 
4053     // See if we can use immediate mode of IN/OUT opcodes
4054     ubyte port;
4055     if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 &&
4056         (!evalinregister(e1) || regcon.mvar & mDX))
4057     {
4058         port = cast(ubyte)e1.EV.Vuns;
4059         freenode(e1);
4060     }
4061     else
4062     {
4063         regm_t retregs = mDX;           // port number is always DX
4064         codelem(cdb,e1,&retregs,false);
4065         op |= 0x08;                     // DX version of opcode
4066         port = 0;                       // not logically needed, but
4067                                         // quiets "uninitialized var" complaints
4068     }
4069 
4070     uint sz;
4071     if (e.Eoper == OPoutp)
4072     {
4073         sz = tysize(e.EV.E2.Ety);
4074         regm_t retregs = mAX;           // byte/word to output is in AL/AX
4075         scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true);
4076         op |= 0x02;                     // OUT opcode
4077     }
4078     else // OPinp
4079     {
4080         getregs(cdb,mAX);
4081         sz = tysize(e.Ety);
4082     }
4083 
4084     if (sz != 1)
4085         op |= 1;                        // word operation
4086     cdb.genc2(op,0,port);               // IN/OUT AL/AX,DX/port
4087     if (op & 1 && sz != REGSIZE)        // if need size override
4088         cdb.last().Iflags |= CFopsize;
4089     regm_t retregs = mAX;
4090     fixresult(cdb,e,retregs,pretregs);
4091 }
4092 
4093 /************************
4094  * Generate code for an asm elem.
4095  */
4096 
4097 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4098 {
4099     // Assume only regs normally destroyed by a function are destroyed
4100     getregs(cdb,(ALLREGS | mES) & ~fregsaved);
4101     cdb.genasm(cast(char *)e.EV.Vstring, cast(uint)e.EV.Vstrlen);
4102     fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs);
4103 }
4104 
4105 /************************
4106  * Generate code for OPnp_f16p and OPf16p_np.
4107  */
4108 
4109 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4110 {
4111     code *cnop;
4112     code cs;
4113 
4114     assert(I32);
4115     codelem(cdb,e.EV.E1,pretregs,false);
4116     reg_t reg = findreg(*pretregs);
4117     getregs(cdb,*pretregs);      // we will destroy the regs
4118 
4119     cs.Iop = 0xC1;
4120     cs.Irm = modregrm(3,0,reg);
4121     cs.Iflags = 0;
4122     cs.Irex = 0;
4123     cs.IFL2 = FLconst;
4124     cs.IEV2.Vuns = 16;
4125 
4126     cdb.gen(&cs);                       // ROL ereg,16
4127     cs.Irm |= modregrm(0,1,0);
4128     cdb.gen(&cs);                       // ROR ereg,16
4129     cs.IEV2.Vuns = 3;
4130     cs.Iflags |= CFopsize;
4131 
4132     if (e.Eoper == OPnp_f16p)
4133     {
4134         /*      OR  ereg,ereg
4135                 JE  L1
4136                 ROR ereg,16
4137                 SHL reg,3
4138                 MOV rx,SS
4139                 AND rx,3                ;mask off CPL bits
4140                 OR  rl,4                ;run on LDT bit
4141                 OR  regl,rl
4142                 ROL ereg,16
4143             L1: NOP
4144          */
4145         reg_t rx;
4146 
4147         regm_t retregs = BYTEREGS & ~*pretregs;
4148         allocreg(cdb,&retregs,&rx,TYint);
4149         cnop = gennop(null);
4150         int jop = JCXZ;
4151         if (reg != CX)
4152         {
4153             gentstreg(cdb,reg);
4154             jop = JE;
4155         }
4156         genjmp(cdb,jop,FLcode, cast(block *)cnop);  // Jop L1
4157         NEWREG(cs.Irm,4);
4158         cdb.gen(&cs);                                   // SHL reg,3
4159         genregs(cdb,0x8C,2,rx);            // MOV rx,SS
4160         int isbyte = (mask(reg) & BYTEREGS) == 0;
4161         cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3);      // AND rl,3
4162         cdb.genc2(0x80,modregrm(3,1,rx),4);             // OR  rl,4
4163         genregs(cdb,0x0A | isbyte,reg,rx);   // OR  regl,rl
4164     }
4165     else // OPf16p_np
4166     {
4167         /*      ROR ereg,16
4168                 SHR reg,3
4169                 ROL ereg,16
4170          */
4171 
4172         cs.Irm |= modregrm(0,5,0);
4173         cdb.gen(&cs);                                   // SHR reg,3
4174         cnop = null;
4175     }
4176 }
4177 
4178 /*************************
4179  * Generate code for OPbtst
4180  */
4181 
4182 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4183 {
4184     regm_t retregs;
4185     reg_t reg;
4186 
4187     //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
4188 
4189     opcode_t op = 0xA3;                        // BT EA,value
4190     int mode = 4;
4191 
4192     elem *e1 = e.EV.E1;
4193     elem *e2 = e.EV.E2;
4194     code cs;
4195     cs.Iflags = 0;
4196 
4197     if (*pretregs == 0)                   // if don't want result
4198     {
4199         codelem(cdb,e1,pretregs,false);  // eval left leaf
4200         *pretregs = 0;                    // in case they got set
4201         codelem(cdb,e2,pretregs,false);
4202         return;
4203     }
4204 
4205     regm_t idxregs;
4206     if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar)
4207     {
4208         getlvalue(cdb, &cs, e1, RMload);    // get addressing mode
4209         idxregs = idxregm(&cs);             // mask if index regs used
4210     }
4211     else
4212     {
4213         retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs;
4214         codelem(cdb,e1, &retregs, false);
4215         reg = findreg(retregs);
4216         cs.Irm = modregrm(3,0,reg & 7);
4217         cs.Iflags = 0;
4218         cs.Irex = 0;
4219         if (reg & 8)
4220             cs.Irex |= REX_B;
4221         idxregs = retregs;
4222     }
4223 
4224     tym_t ty1 = tybasic(e1.Ety);
4225     const sz = tysize(e1.Ety);
4226     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
4227 
4228 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
4229     if (e2.Eoper == OPconst)
4230     {
4231         cs.Iop = 0x0FBA;                         // BT rm,imm8
4232         cs.Irm |= modregrm(0,mode,0);
4233         cs.Iflags |= CFpsw | word;
4234         cs.IFL2 = FLconst;
4235         if (sz <= SHORTSIZE)
4236         {
4237             cs.IEV2.Vint = e2.EV.Vint & 15;
4238         }
4239         else if (sz == 4)
4240         {
4241             cs.IEV2.Vint = e2.EV.Vint & 31;
4242         }
4243         else
4244         {
4245             cs.IEV2.Vint = e2.EV.Vint & 63;
4246             if (I64)
4247                 cs.Irex |= REX_W;
4248         }
4249         cdb.gen(&cs);
4250     }
4251     else
4252     {
4253         retregs = ALLREGS & ~idxregs;
4254 
4255         /* A register variable may not have its upper 32
4256          * bits 0, so pick a different register to force
4257          * a MOV which will clear it
4258          */
4259         if (I64 && sz == 8 && tysize(e2.Ety) == 4)
4260         {
4261             regm_t rregm;
4262             if (isregvar(e2, &rregm, null))
4263                 retregs &= ~rregm;
4264         }
4265 
4266         scodelem(cdb,e2,&retregs,idxregs,true);
4267         reg = findreg(retregs);
4268 
4269         cs.Iop = 0x0F00 | op;                     // BT rm,reg
4270         code_newreg(&cs,reg);
4271         cs.Iflags |= CFpsw | word;
4272         if (I64 && _tysize[ty1] == 8)
4273             cs.Irex |= REX_W;
4274         cdb.gen(&cs);
4275     }
4276 
4277     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4278     {
4279         if (tysize(e.Ety) == 1)
4280         {
4281             assert(I64 || retregs & BYTEREGS);
4282             allocreg(cdb,&retregs,&reg,TYint);
4283             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
4284             if (I64 && reg >= 4)
4285                 code_orrex(cdb.last(), REX);
4286             *pretregs = retregs;
4287         }
4288         else
4289         {
4290             code *cnop = null;
4291             regm_t save = regcon.immed.mval;
4292             allocreg(cdb,&retregs,&reg,TYint);
4293             regcon.immed.mval = save;
4294             if ((*pretregs & mPSW) == 0)
4295             {
4296                 getregs(cdb,retregs);
4297                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
4298                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
4299             }
4300             else
4301             {
4302                 movregconst(cdb,reg,1,8);      // MOV reg,1
4303                 cnop = gennop(null);
4304                 genjmp(cdb,JC,FLcode, cast(block *) cnop);  // Jtrue nop
4305                                                             // MOV reg,0
4306                 movregconst(cdb,reg,0,8);
4307                 regcon.immed.mval &= ~mask(reg);
4308             }
4309             *pretregs = retregs;
4310             cdb.append(cnop);
4311         }
4312     }
4313 }
4314 
4315 /*************************
4316  * Generate code for OPbt, OPbtc, OPbtr, OPbts
4317  */
4318 
4319 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
4320 {
4321     //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs));
4322     regm_t retregs;
4323     reg_t reg;
4324     opcode_t op;
4325     int mode;
4326 
4327     switch (e.Eoper)
4328     {
4329         case OPbt:      op = 0xA3; mode = 4; break;
4330         case OPbtc:     op = 0xBB; mode = 7; break;
4331         case OPbtr:     op = 0xB3; mode = 6; break;
4332         case OPbts:     op = 0xAB; mode = 5; break;
4333 
4334         default:
4335             assert(0);
4336     }
4337 
4338     elem *e1 = e.EV.E1;
4339     elem *e2 = e.EV.E2;
4340     code cs;
4341     cs.Iflags = 0;
4342 
4343     getlvalue(cdb, &cs, e, RMload);      // get addressing mode
4344     if (e.Eoper == OPbt && *pretregs == 0)
4345     {
4346         codelem(cdb,e2,pretregs,false);
4347         return;
4348     }
4349 
4350     const ty1 = tybasic(e1.Ety);
4351     const ty2 = tybasic(e2.Ety);
4352     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
4353     regm_t idxregs = idxregm(&cs);         // mask if index regs used
4354 
4355 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
4356     if (e2.Eoper == OPconst)
4357     {
4358         cs.Iop = 0x0FBA;                         // BT rm,imm8
4359         cs.Irm |= modregrm(0,mode,0);
4360         cs.Iflags |= CFpsw | word;
4361         cs.IFL2 = FLconst;
4362         if (_tysize[ty1] == SHORTSIZE)
4363         {
4364             cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3;
4365             cs.IEV2.Vint = e2.EV.Vint & 15;
4366         }
4367         else if (_tysize[ty1] == 4)
4368         {
4369             cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3;
4370             cs.IEV2.Vint = e2.EV.Vint & 31;
4371         }
4372         else
4373         {
4374             cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3;
4375             cs.IEV2.Vint = e2.EV.Vint & 63;
4376             if (I64)
4377                 cs.Irex |= REX_W;
4378         }
4379         cdb.gen(&cs);
4380     }
4381     else
4382     {
4383         retregs = ALLREGS & ~idxregs;
4384         scodelem(cdb,e2,&retregs,idxregs,true);
4385         reg = findreg(retregs);
4386 
4387         cs.Iop = 0x0F00 | op;                     // BT rm,reg
4388         code_newreg(&cs,reg);
4389         cs.Iflags |= CFpsw | word;
4390         if (_tysize[ty2] == 8 && I64)
4391             cs.Irex |= REX_W;
4392         cdb.gen(&cs);
4393     }
4394 
4395     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4396     {
4397         if (_tysize[e.Ety] == 1)
4398         {
4399             assert(I64 || retregs & BYTEREGS);
4400             allocreg(cdb,&retregs,&reg,TYint);
4401             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
4402             if (I64 && reg >= 4)
4403                 code_orrex(cdb.last(), REX);
4404             *pretregs = retregs;
4405         }
4406         else
4407         {
4408             code *cnop = null;
4409             const save = regcon.immed.mval;
4410             allocreg(cdb,&retregs,&reg,TYint);
4411             regcon.immed.mval = save;
4412             if ((*pretregs & mPSW) == 0)
4413             {
4414                 getregs(cdb,retregs);
4415                 genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
4416                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
4417             }
4418             else
4419             {
4420                 movregconst(cdb,reg,1,8);      // MOV reg,1
4421                 cnop = gennop(null);
4422                 genjmp(cdb,JC,FLcode, cast(block *) cnop);    // Jtrue nop
4423                                                             // MOV reg,0
4424                 movregconst(cdb,reg,0,8);
4425                 regcon.immed.mval &= ~mask(reg);
4426             }
4427             *pretregs = retregs;
4428             cdb.append(cnop);
4429         }
4430     }
4431 }
4432 
4433 /*************************************
4434  * Generate code for OPbsf and OPbsr.
4435  */
4436 
4437 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4438 {
4439     //printf("cdbscan()\n");
4440     //elem_print(e);
4441     if (!*pretregs)
4442     {
4443         codelem(cdb,e.EV.E1,pretregs,false);
4444         return;
4445     }
4446 
4447     const tyml = tybasic(e.EV.E1.Ety);
4448     const sz = _tysize[tyml];
4449     assert(sz == 2 || sz == 4 || sz == 8);
4450     code cs = void;
4451 
4452     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
4453     {
4454         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
4455     }
4456     else
4457     {
4458         regm_t retregs = allregs;
4459         codelem(cdb,e.EV.E1, &retregs, false);
4460         const reg = findreg(retregs);
4461         cs.Irm = modregrm(3,0,reg & 7);
4462         cs.Iflags = 0;
4463         cs.Irex = 0;
4464         if (reg & 8)
4465             cs.Irex |= REX_B;
4466     }
4467 
4468     regm_t retregs = *pretregs & allregs;
4469     if  (!retregs)
4470         retregs = allregs;
4471     reg_t reg;
4472     allocreg(cdb,&retregs, &reg, e.Ety);
4473 
4474     cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD;        // BSF/BSR reg,EA
4475     code_newreg(&cs, reg);
4476     if (!I16 && sz == SHORTSIZE)
4477         cs.Iflags |= CFopsize;
4478     cdb.gen(&cs);
4479     if (sz == 8)
4480         code_orrex(cdb.last(), REX_W);
4481 
4482     fixresult(cdb,e,retregs,pretregs);
4483 }
4484 
4485 /************************
4486  * OPpopcnt operator
4487  */
4488 
4489 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4490 {
4491     //printf("cdpopcnt()\n");
4492     //elem_print(e);
4493     assert(!I16);
4494     if (!*pretregs)
4495     {
4496         codelem(cdb,e.EV.E1,pretregs,false);
4497         return;
4498     }
4499 
4500     const tyml = tybasic(e.EV.E1.Ety);
4501 
4502     const sz = _tysize[tyml];
4503     assert(sz == 2 || sz == 4 || (sz == 8 && I64));     // no byte op
4504 
4505     code cs = void;
4506     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
4507     {
4508         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
4509     }
4510     else
4511     {
4512         regm_t retregs = allregs;
4513         codelem(cdb,e.EV.E1, &retregs, false);
4514         const reg = findreg(retregs);
4515         cs.Irm = modregrm(3,0,reg & 7);
4516         cs.Iflags = 0;
4517         cs.Irex = 0;
4518         if (reg & 8)
4519             cs.Irex |= REX_B;
4520     }
4521 
4522     regm_t retregs = *pretregs & allregs;
4523     if  (!retregs)
4524         retregs = allregs;
4525     reg_t reg;
4526     allocreg(cdb,&retregs, &reg, e.Ety);
4527 
4528     cs.Iop = POPCNT;            // POPCNT reg,EA
4529     code_newreg(&cs, reg);
4530     if (sz == SHORTSIZE)
4531         cs.Iflags |= CFopsize;
4532     if (*pretregs & mPSW)
4533         cs.Iflags |= CFpsw;
4534     cdb.gen(&cs);
4535     if (sz == 8)
4536         code_orrex(cdb.last(), REX_W);
4537     *pretregs &= mBP | ALLREGS;             // flags already set
4538 
4539     fixresult(cdb,e,retregs,pretregs);
4540 }
4541 
4542 
4543 /*******************************************
4544  * Generate code for OPpair, OPrpair.
4545  */
4546 
4547 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4548 {
4549     if (*pretregs == 0)                         // if don't want result
4550     {
4551         codelem(cdb,e.EV.E1,pretregs,false);     // eval left leaf
4552         *pretregs = 0;                          // in case they got set
4553         codelem(cdb,e.EV.E2,pretregs,false);
4554         return;
4555     }
4556 
4557     //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4558     //printf("Ecount = %d\n", e.Ecount);
4559 
4560     regm_t retregs = *pretregs;
4561     if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087)
4562     {
4563         if (config.fpxmmregs)
4564             retregs |= mXMM0 | mXMM1;
4565         else
4566             retregs |= mST01;
4567     }
4568 
4569     if (retregs & mST01)
4570     {
4571         loadPair87(cdb, e, pretregs);
4572         return;
4573     }
4574 
4575     regm_t regs1;
4576     regm_t regs2;
4577     if (retregs & XMMREGS)
4578     {
4579         retregs &= XMMREGS;
4580         const reg = findreg(retregs);
4581         regs1 = mask(reg);
4582         regs2 = mask(findreg(retregs & ~regs1));
4583     }
4584     else
4585     {
4586         retregs &= allregs;
4587         if  (!retregs)
4588             retregs = allregs;
4589         regs1 = retregs & mLSW;
4590         regs2 = retregs & mMSW;
4591     }
4592     if (e.Eoper == OPrpair)
4593     {
4594         // swap
4595         regs1 ^= regs2;
4596         regs2 ^= regs1;
4597         regs1 ^= regs2;
4598     }
4599     //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2));
4600 
4601     codelem(cdb,e.EV.E1, &regs1, false);
4602     scodelem(cdb,e.EV.E2, &regs2, regs1, false);
4603     //printf("2: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2));
4604 
4605     if (e.EV.E1.Ecount)
4606         getregs(cdb,regs1);
4607     if (e.EV.E2.Ecount)
4608         getregs(cdb,regs2);
4609 
4610     fixresult(cdb,e,regs1 | regs2,pretregs);
4611 }
4612 
4613 /*************************
4614  * Generate code for OPcmpxchg
4615  */
4616 
4617 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4618 {
4619     /* The form is:
4620      *     OPcmpxchg
4621      *    /     \
4622      * lvalue   OPparam
4623      *          /     \
4624      *        old     new
4625      */
4626 
4627     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
4628     elem *e1 = e.EV.E1;
4629     elem *e2 = e.EV.E2;
4630     assert(e2.Eoper == OPparam);
4631     assert(!e2.Ecount);
4632 
4633     const tyml = tybasic(e1.Ety);                   // type of lvalue
4634     const sz = _tysize[tyml];
4635 
4636     if (I32 && sz == 8)
4637     {
4638         regm_t retregsx = mDX|mAX;
4639         codelem(cdb,e2.EV.E1,&retregsx,false);          // [DX,AX] = e2.EV.E1
4640 
4641         regm_t retregs = mCX|mBX;
4642         scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false);  // [CX,BX] = e2.EV.E2
4643 
4644         code cs = void;
4645         getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX);        // get EA
4646 
4647         getregs(cdb,mDX|mAX);                 // CMPXCHG destroys these regs
4648 
4649         if (e1.Ety & mTYvolatile)
4650             cdb.gen1(LOCK);                           // LOCK prefix
4651         cs.Iop = 0x0FC7;                              // CMPXCHG8B EA
4652         cs.Iflags |= CFpsw;
4653         code_newreg(&cs,1);
4654         cdb.gen(&cs);
4655 
4656         assert(!e1.Ecount);
4657         freenode(e1);
4658     }
4659     else
4660     {
4661         const uint isbyte = (sz == 1);            // 1 for byte operation
4662         const ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
4663         const uint rex = (I64 && sz == 8) ? REX_W : 0;
4664 
4665         regm_t retregsx = mAX;
4666         codelem(cdb,e2.EV.E1,&retregsx,false);       // AX = e2.EV.E1
4667 
4668         regm_t retregs = (ALLREGS | mBP) & ~mAX;
4669         scodelem(cdb,e2.EV.E2,&retregs,mAX,false);   // load rvalue in reg
4670 
4671         code cs = void;
4672         getlvalue(cdb,&cs,e1,mAX | retregs); // get EA
4673 
4674         getregs(cdb,mAX);                  // CMPXCHG destroys AX
4675 
4676         if (e1.Ety & mTYvolatile)
4677             cdb.gen1(LOCK);                        // LOCK prefix
4678         cs.Iop = 0x0FB1 ^ isbyte;                    // CMPXCHG EA,reg
4679         cs.Iflags |= CFpsw | word;
4680         cs.Irex |= rex;
4681         const reg = findreg(retregs);
4682         code_newreg(&cs,reg);
4683         cdb.gen(&cs);
4684 
4685         assert(!e1.Ecount);
4686         freenode(e1);
4687     }
4688 
4689     if (regm_t retregs = *pretregs & (ALLREGS | mBP)) // if return result in register
4690     {
4691         assert(tysize(e.Ety) == 1);
4692         assert(I64 || retregs & BYTEREGS);
4693         reg_t reg;
4694         allocreg(cdb,&retregs,&reg,TYint);
4695         uint ea = modregrmx(3,0,reg);
4696         if (I64 && reg >= 4)
4697             ea |= REX << 16;
4698         cdb.gen2(0x0F94,ea);        // SETZ reg
4699         *pretregs = retregs;
4700     }
4701 }
4702 
4703 /*************************
4704  * Generate code for OPprefetch
4705  */
4706 
4707 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4708 {
4709     /* Generate the following based on e2:
4710      *    0: prefetch0
4711      *    1: prefetch1
4712      *    2: prefetch2
4713      *    3: prefetchnta
4714      *    4: prefetchw
4715      *    5: prefetchwt1
4716      */
4717     //printf("cdprefetch\n");
4718     elem *e1 = e.EV.E1;
4719 
4720     assert(*pretregs == 0);
4721     assert(e.EV.E2.Eoper == OPconst);
4722     opcode_t op;
4723     reg_t reg;
4724     switch (e.EV.E2.EV.Vuns)
4725     {
4726         case 0: op = PREFETCH; reg = 1; break;  // PREFETCH0
4727         case 1: op = PREFETCH; reg = 2; break;  // PREFETCH1
4728         case 2: op = PREFETCH; reg = 3; break;  // PREFETCH2
4729         case 3: op = PREFETCH; reg = 0; break;  // PREFETCHNTA
4730         case 4: op = 0x0F0D;   reg = 1; break;  // PREFETCHW
4731         case 5: op = 0x0F0D;   reg = 2; break;  // PREFETCHWT1
4732         default: assert(0);
4733     }
4734 
4735     freenode(e.EV.E2);
4736 
4737     code cs = void;
4738     getlvalue(cdb,&cs,e1,0);
4739     cs.Iop = op;
4740     cs.Irm |= modregrm(0,reg,0);
4741     cs.Iflags |= CFvolatile;            // do not schedule
4742     cdb.gen(&cs);
4743 }
4744 
4745 
4746 /*********************
4747  * Load register from EA of assignment operation.
4748  * Params:
4749  *      cdb = store generated code here
4750  *      cs = instruction with EA already set in it
4751  *      e = assignment expression that will be evaluated
4752  *      reg = set to register loaded from EA
4753  *      retregs = register candidates for reg
4754  */
4755 private
4756 void opAssLoadReg(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t reg, regm_t retregs)
4757 {
4758     modEA(cdb, &cs);
4759     allocreg(cdb,&retregs,&reg,TYoffset);
4760 
4761     cs.Iop = LOD;
4762     code_newreg(&cs,reg);
4763     cdb.gen(&cs);                   // MOV reg,EA
4764 }
4765 
4766 /*********************
4767  * Load register pair from EA of assignment operation.
4768  * Params:
4769  *      cdb = store generated code here
4770  *      cs = instruction with EA already set in it
4771  *      e = assignment expression that will be evaluated
4772  *      rhi = set to most significant register of the pair
4773  *      rlo = set toleast significant register of the pair
4774  *      retregs = register candidates for rhi, rlo
4775  *      keepmsk = registers to not modify
4776  */
4777 private
4778 void opAssLoadPair(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t rhi, out reg_t rlo, regm_t retregs, regm_t keepmsk)
4779 {
4780     getlvalue(cdb,&cs,e.EV.E1,retregs | keepmsk);
4781     const tym_t tyml = tybasic(e.EV.E1.Ety);              // type of lvalue
4782     reg_t reg;
4783     allocreg(cdb,&retregs,&reg,tyml);
4784 
4785     rhi = findregmsw(retregs);
4786     rlo = findreglsw(retregs);
4787 
4788     cs.Iop = LOD;
4789     code_newreg(&cs,rlo);
4790     cdb.gen(&cs);                   // MOV rlo,EA
4791     getlvalue_msw(&cs);
4792     code_newreg(&cs,rhi);
4793     cdb.gen(&cs);                   // MOV rhi,EA+2
4794     getlvalue_lsw(&cs);
4795 }
4796 
4797 
4798 /*********************************************************
4799  * Store register result of assignment operation EA.
4800  * Params:
4801  *      cdb = store generated code here
4802  *      cs = instruction with EA already set in it
4803  *      e = assignment expression that was evaluated
4804  *      reg = register of result
4805  *      pretregs = registers to store result in
4806  */
4807 private
4808 void opAssStoreReg(ref CodeBuilder cdb, ref code cs, elem* e, reg_t reg, regm_t* pretregs)
4809 {
4810     elem* e1 = e.EV.E1;
4811     const tym_t tyml = tybasic(e1.Ety);     // type of lvalue
4812     const uint sz = _tysize[tyml];
4813     const ubyte isbyte = (sz == 1);         // 1 for byte operation
4814     cs.Iop = STO ^ isbyte;
4815     code_newreg(&cs,reg);
4816     cdb.gen(&cs);                           // MOV EA,resreg
4817     if (e1.Ecount)                          // if we gen a CSE
4818         cssave(e1,mask(reg),!OTleaf(e1.Eoper));
4819     freenode(e1);
4820     fixresult(cdb,e,mask(reg),pretregs);
4821 }
4822 
4823 /*********************************************************
4824  * Store register pair result of assignment operation EA.
4825  * Params:
4826  *      cdb = store generated code here
4827  *      cs = instruction with EA already set in it
4828  *      e = assignment expression that was evaluated
4829  *      rhi = most significant register of the pair
4830  *      rlo = least significant register of the pair
4831  *      pretregs = registers to store result in
4832  */
4833 private
4834 void opAssStorePair(ref CodeBuilder cdb, ref code cs, elem* e, reg_t rhi, reg_t rlo, regm_t* pretregs)
4835 {
4836     cs.Iop = STO;
4837     code_newreg(&cs,rlo);
4838     cdb.gen(&cs);                   // MOV EA,lsreg
4839     code_newreg(&cs,rhi);
4840     getlvalue_msw(&cs);
4841     cdb.gen(&cs);                   // MOV EA+REGSIZE,msreg
4842     const regm_t retregs = mask(rhi) | mask(rlo);
4843     elem* e1 = e.EV.E1;
4844     if (e1.Ecount)                 // if we gen a CSE
4845         cssave(e1,retregs,!OTleaf(e1.Eoper));
4846     freenode(e1);
4847     fixresult(cdb,e,retregs,pretregs);
4848 }
4849 
4850 
4851 }