1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Mostly code generation for assignment operators.
6  *
7  * Copyright:   Copyright (C) 1985-1998 by Symantec
8  *              Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved
9  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
10  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
11  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d)
12  * Documentation:  https://dlang.org/phobos/dmd_backend_cod4.html
13  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod4.d
14  */
15 
16 module dmd.backend.cod4;
17 
18 version (SCPP)
19     version = COMPILE;
20 version (MARS)
21     version = COMPILE;
22 
23 version (COMPILE)
24 {
25 
26 import core.stdc.stdio;
27 import core.stdc.stdlib;
28 import core.stdc.string;
29 
30 import dmd.backend.cc;
31 import dmd.backend.cdef;
32 import dmd.backend.code;
33 import dmd.backend.code_x86;
34 import dmd.backend.codebuilder;
35 import dmd.backend.mem;
36 import dmd.backend.el;
37 import dmd.backend.global;
38 import dmd.backend.oper;
39 import dmd.backend.ty;
40 import dmd.backend.evalu8 : el_toldoubled;
41 import dmd.backend.xmm;
42 
43 extern (C++):
44 
45 nothrow:
46 
47 int REGSIZE();
48 
49 extern __gshared CGstate cgstate;
50 extern __gshared bool[FLMAX] datafl;
51 
52 private extern (D) uint mask(uint m) { return 1 << m; }
53 
54                         /*   AX,CX,DX,BX                */
55 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ];
56 
57 // from divcoeff.c
58 extern (C)
59 {
60     bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost);
61     bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost);
62 }
63 
64 /*******************************
65  * Return number of times symbol s appears in tree e.
66  */
67 
68 private int intree(Symbol *s,elem *e)
69 {
70     if (!OTleaf(e.Eoper))
71         return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0);
72     return e.Eoper == OPvar && e.EV.Vsym == s;
73 }
74 
75 /***********************************
76  * Determine if expression e can be evaluated directly into register
77  * variable s.
78  * Have to be careful about things like x=x+x+x, and x=a+x.
79  * Returns:
80  *      !=0     can
81  *      0       can't
82  */
83 
84 int doinreg(Symbol *s, elem *e)
85 {
86     int in_ = 0;
87     OPER op;
88 
89  L1:
90     op = e.Eoper;
91     if (op == OPind ||
92         OTcall(op)  ||
93         OTleaf(op) ||
94         (in_ = intree(s,e)) == 0 ||
95         (OTunary(op) && OTleaf(e.EV.E1.Eoper))
96        )
97         return 1;
98     if (in_ == 1)
99     {
100         switch (op)
101         {
102             case OPadd:
103             case OPmin:
104             case OPand:
105             case OPor:
106             case OPxor:
107             case OPshl:
108             case OPmul:
109                 if (!intree(s,e.EV.E2))
110                 {
111                     e = e.EV.E1;
112                     goto L1;
113                 }
114                 break;
115 
116             default:
117                 break;
118         }
119     }
120     return 0;
121 }
122 
123 /****************************
124  * Return code for saving common subexpressions if EA
125  * turns out to be a register.
126  * This is called just before modifying an EA.
127  */
128 
129 void modEA(ref CodeBuilder cdb,code *c)
130 {
131     if ((c.Irm & 0xC0) == 0xC0)        // addressing mode refers to a register
132     {
133         reg_t reg = c.Irm & 7;
134         if (c.Irex & REX_B)
135         {   reg |= 8;
136             assert(I64);
137         }
138         getregs(cdb,mask(reg));
139     }
140 }
141 
142 
143 /****************************
144  * Gen code for op= for doubles.
145  */
146 
147 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op)
148 {
149     assert(config.exe & EX_windos);  // for targets that may not have an 8087
150 
151     static immutable uint[OPdivass - OPpostinc + 1] clibtab =
152     /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass       */
153     [  CLIB.dadd, CLIB.dsub, cast(uint)-1,  CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ];
154 
155     if (config.inline8087)
156     {
157         opass87(cdb,e,pretregs);
158         return;
159     }
160 
161     code cs;
162     regm_t retregs2,retregs,idxregs;
163 
164     uint clib = clibtab[op - OPpostinc];
165     elem *e1 = e.EV.E1;
166     tym_t tym = tybasic(e1.Ety);
167     getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX);
168 
169     if (tym == TYfloat)
170     {
171         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
172 
173         // Load EA into FLOATREGS
174         getregs(cdb,FLOATREGS);
175         cs.Iop = LOD;
176         cs.Irm |= modregrm(0,AX,0);
177         cdb.gen(&cs);
178 
179         if (!I32)
180         {
181             cs.Irm |= modregrm(0,DX,0);
182             getlvalue_msw(&cs);
183             cdb.gen(&cs);
184             getlvalue_lsw(&cs);
185 
186         }
187         retregs2 = FLOATREGS2;
188         idxregs = FLOATREGS | idxregm(&cs);
189         retregs = FLOATREGS;
190     }
191     else
192     {
193         if (I32)
194         {
195             // Load EA into DOUBLEREGS
196             getregs(cdb,DOUBLEREGS_32);
197             cs.Iop = LOD;
198             cs.Irm |= modregrm(0,AX,0);
199             cdb.gen(&cs);
200             cs.Irm |= modregrm(0,DX,0);
201             getlvalue_msw(&cs);
202             cdb.gen(&cs);
203             getlvalue_lsw(&cs);
204 
205             retregs2 = DOUBLEREGS2_32;
206             idxregs = DOUBLEREGS_32 | idxregm(&cs);
207         }
208         else
209         {
210             // Push EA onto stack
211             cs.Iop = 0xFF;
212             cs.Irm |= modregrm(0,6,0);
213             cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
214             cdb.gen(&cs);
215             getlvalue_lsw(&cs);
216             cdb.gen(&cs);
217             getlvalue_lsw(&cs);
218             cdb.gen(&cs);
219             getlvalue_lsw(&cs);
220             cdb.gen(&cs);
221             stackpush += DOUBLESIZE;
222 
223             retregs2 = DOUBLEREGS_16;
224             idxregs = idxregm(&cs);
225         }
226         retregs = DOUBLEREGS;
227     }
228 
229     if ((cs.Iflags & CFSEG) == CFes)
230         idxregs |= mES;
231     cgstate.stackclean++;
232     scodelem(cdb,e.EV.E2,&retregs2,idxregs,false);
233     cgstate.stackclean--;
234     callclib(cdb,e,clib,&retregs,0);
235     if (e1.Ecount)
236         cssave(e1,retregs,!OTleaf(e1.Eoper));             // if lvalue is a CSE
237     freenode(e1);
238     cs.Iop = STO;                              // MOV EA,DOUBLEREGS
239     fltregs(cdb,&cs,tym);
240     fixresult(cdb,e,retregs,pretregs);
241 }
242 
243 /****************************
244  * Gen code for OPnegass for doubles.
245  */
246 
247 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
248 {
249     assert(config.exe & EX_windos);  // for targets that may not have an 8087
250 
251     if (config.inline8087)
252     {
253         cdnegass87(cdb,e,pretregs);
254         return;
255     }
256     elem *e1 = e.EV.E1;
257     tym_t tym = tybasic(e1.Ety);
258     int sz = _tysize[tym];
259     code cs;
260 
261     getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0);
262     modEA(cdb,&cs);
263     cs.Irm |= modregrm(0,6,0);
264     cs.Iop = 0x80;
265     cs.IEV1.Voffset += sz - 1;
266     cs.IFL2 = FLconst;
267     cs.IEV2.Vuns = 0x80;
268     cdb.gen(&cs);                       // XOR 7[EA],0x80
269     if (tycomplex(tym))
270     {
271         cs.IEV1.Voffset -= sz / 2;
272         cdb.gen(&cs);                   // XOR 7[EA],0x80
273     }
274 
275     regm_t retregs;
276     if (*pretregs || e1.Ecount)
277     {
278         cs.IEV1.Voffset -= sz - 1;
279 
280         if (tym == TYfloat)
281         {
282             // Load EA into FLOATREGS
283             getregs(cdb,FLOATREGS);
284             cs.Iop = LOD;
285             NEWREG(cs.Irm, AX);
286             cdb.gen(&cs);
287 
288             if (!I32)
289             {
290                 NEWREG(cs.Irm, DX);
291                 getlvalue_msw(&cs);
292                 cdb.gen(&cs);
293                 getlvalue_lsw(&cs);
294 
295             }
296             retregs = FLOATREGS;
297         }
298         else
299         {
300             if (I32)
301             {
302                 // Load EA into DOUBLEREGS
303                 getregs(cdb,DOUBLEREGS_32);
304                 cs.Iop = LOD;
305                 cs.Irm &= ~cast(uint)modregrm(0,7,0);
306                 cs.Irm |= modregrm(0,AX,0);
307                 cdb.gen(&cs);
308                 cs.Irm |= modregrm(0,DX,0);
309                 getlvalue_msw(&cs);
310                 cdb.gen(&cs);
311                 getlvalue_lsw(&cs);
312             }
313             else
314             {
315                 static if (1)
316                 {
317                     cs.Iop = LOD;
318                     fltregs(cdb,&cs,TYdouble);     // MOV DOUBLEREGS, EA
319                 }
320                 else
321                 {
322                     // Push EA onto stack
323                     cs.Iop = 0xFF;
324                     cs.Irm |= modregrm(0,6,0);
325                     cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
326                     cdb.gen(&cs);
327                     cs.IEV1.Voffset -= REGSIZE;
328                     cdb.gen(&cs);
329                     cs.IEV1.Voffset -= REGSIZE;
330                     cdb.gen(&cs);
331                     cs.IEV1.Voffset -= REGSIZE;
332                     cdb.gen(&cs);
333                     stackpush += DOUBLESIZE;
334                 }
335             }
336             retregs = DOUBLEREGS;
337         }
338         if (e1.Ecount)
339             cssave(e1,retregs,!OTleaf(e1.Eoper));         /* if lvalue is a CSE   */
340     }
341     else
342     {
343         retregs = 0;
344         assert(e1.Ecount == 0);
345     }
346 
347     freenode(e1);
348     fixresult(cdb,e,retregs,pretregs);
349 }
350 
351 
352 
353 /************************
354  * Generate code for an assignment.
355  */
356 
357 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
358 {
359     tym_t tymll;
360     reg_t reg;
361     code cs;
362     elem *e11;
363     bool regvar;                  // true means evaluate into register variable
364     regm_t varregm;
365     reg_t varreg;
366     targ_int postinc;
367 
368     //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
369     elem *e1 = e.EV.E1;
370     elem *e2 = e.EV.E2;
371     int e2oper = e2.Eoper;
372     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
373     regm_t retregs = *pretregs;
374 
375     if (tyxmmreg(tyml) && config.fpxmmregs)
376     {
377         xmmeq(cdb, e, CMP, e1, e2, pretregs);
378         return;
379     }
380 
381     if (tyfloating(tyml) && config.inline8087)
382     {
383         if (tycomplex(tyml))
384         {
385             complex_eq87(cdb, e, pretregs);
386             return;
387         }
388 
389         if (!(retregs == 0 &&
390               (e2oper == OPconst || e2oper == OPvar || e2oper == OPind))
391            )
392         {
393             eq87(cdb,e,pretregs);
394             return;
395         }
396         if (config.target_cpu >= TARGET_PentiumPro &&
397             (e2oper == OPvar || e2oper == OPind)
398            )
399         {
400             eq87(cdb,e,pretregs);
401             return;
402         }
403         if (tyml == TYldouble || tyml == TYildouble)
404         {
405             eq87(cdb,e,pretregs);
406             return;
407         }
408     }
409 
410     uint sz = _tysize[tyml];           // # of bytes to transfer
411     assert(cast(int)sz > 0);
412 
413     if (retregs == 0)                     // if no return value
414     {
415         int fl;
416 
417         /* If registers are tight, and we might need them for the lvalue,
418          * prefer to not use them for the rvalue
419          */
420         bool plenty = true;
421         if (e1.Eoper == OPind)
422         {
423             /* Will need 1 register for evaluation, +2 registers for
424              * e1's addressing mode
425              */
426             regm_t m = allregs & ~regcon.mvar;  // mask of non-register variables
427             m &= m - 1;         // clear least significant bit
428             m &= m - 1;         // clear least significant bit
429             plenty = m != 0;    // at least 3 registers
430         }
431 
432         if ((e2oper == OPconst ||       // if rvalue is a constant
433              e2oper == OPrelconst &&
434              !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) &&
435              ((fl = el_fl(e2)) == FLdata ||
436               fl==FLudata || fl == FLextern)
437               && !(e2.EV.Vsym.ty() & mTYcs)
438             ) &&
439             !(evalinregister(e2) && plenty) &&
440             !e1.Ecount)        // and no CSE headaches
441         {
442             // Look for special case of (*p++ = ...), where p is a register variable
443             if (e1.Eoper == OPind &&
444                 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
445                 e11.EV.E1.Eoper == OPvar &&
446                 e11.EV.E1.EV.Vsym.Sfl == FLreg &&
447                 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
448                )
449             {
450                 Symbol *s = e11.EV.E1.EV.Vsym;
451                 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
452                 {
453                     regcon.params &= ~s.Spregm();
454                 }
455                 postinc = e11.EV.E2.EV.Vint;
456                 if (e11.Eoper == OPpostdec)
457                     postinc = -postinc;
458                 getlvalue(cdb,&cs,e1,RMstore);
459                 freenode(e11.EV.E2);
460             }
461             else
462             {
463                 postinc = 0;
464                 getlvalue(cdb,&cs,e1,RMstore);
465 
466                 if (e2oper == OPconst &&
467                     config.flags4 & CFG4speed &&
468                     (config.target_cpu == TARGET_Pentium ||
469                      config.target_cpu == TARGET_PentiumMMX) &&
470                     (cs.Irm & 0xC0) == 0x80
471                    )
472                 {
473                     if (I64 && sz == 8 && e2.EV.Vpointer)
474                     {
475                         // MOV reg,imm64
476                         // MOV EA,reg
477                         regm_t rregm = allregs & ~idxregm(&cs);
478                         reg_t regx;
479                         regwithvalue(cdb,rregm,e2.EV.Vpointer,&regx,64);
480                         cs.Iop = STO;
481                         cs.Irm |= modregrm(0,regx & 7,0);
482                         if (regx & 8)
483                             cs.Irex |= REX_R;
484                         cdb.gen(&cs);
485                         freenode(e2);
486                         goto Lp;
487                     }
488                     if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint)
489                     {
490                         // MOV reg,imm
491                         // MOV EA,reg
492                         regm_t rregm = allregs & ~idxregm(&cs);
493                         reg_t regx;
494                         regwithvalue(cdb,rregm,e2.EV.Vint,&regx,0);
495                         cs.Iop = STO;
496                         cs.Irm |= modregrm(0,regx & 7,0);
497                         if (regx & 8)
498                             cs.Irex |= REX_R;
499                         cdb.gen(&cs);
500                         freenode(e2);
501                         goto Lp;
502                     }
503                     if (sz == 2 * REGSIZE && e2.EV.Vllong == 0)
504                     {
505                         // MOV reg,imm
506                         // MOV EA,reg
507                         // MOV EA+2,reg
508                         regm_t rregm = getscratch() & ~idxregm(&cs);
509                         if (rregm)
510                         {
511                             reg_t regx;
512                             regwithvalue(cdb,rregm,e2.EV.Vint,&regx,0);
513                             cs.Iop = STO;
514                             cs.Irm |= modregrm(0,regx,0);
515                             cdb.gen(&cs);
516                             getlvalue_msw(&cs);
517                             cdb.gen(&cs);
518                             freenode(e2);
519                             goto Lp;
520                         }
521                     }
522                 }
523             }
524 
525             // If loading result into a register
526             if ((cs.Irm & 0xC0) == 0xC0)
527             {
528                 modEA(cdb,&cs);
529                 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg)
530                     getregs(cdb,cs.IEV1.Vsym.Sregm);
531             }
532             cs.Iop = (sz == 1) ? 0xC6 : 0xC7;
533 
534             if (e2oper == OPrelconst)
535             {
536                 cs.IEV2.Voffset = e2.EV.Voffset;
537                 cs.IFL2 = cast(ubyte)fl;
538                 cs.IEV2.Vsym = e2.EV.Vsym;
539                 cs.Iflags |= CFoff;
540                 cdb.gen(&cs);       // MOV EA,&variable
541                 if (I64 && sz == 8)
542                     code_orrex(cdb.last(), REX_W);
543                 if (sz > REGSIZE)
544                 {
545                     cs.Iop = 0x8C;
546                     getlvalue_msw(&cs);
547                     cs.Irm |= modregrm(0,3,0);
548                     cdb.gen(&cs);   // MOV EA+2,DS
549                 }
550             }
551             else
552             {
553                 assert(e2oper == OPconst);
554                 cs.IFL2 = FLconst;
555                 targ_size_t *p = cast(targ_size_t *) &(e2.EV);
556                 cs.IEV2.Vsize_t = *p;
557                 // Look for loading a register variable
558                 if ((cs.Irm & 0xC0) == 0xC0)
559                 {
560                     reg_t regx = cs.Irm & 7;
561 
562                     if (cs.Irex & REX_B)
563                         regx |= 8;
564                     if (I64 && sz == 8)
565                         movregconst(cdb,regx,*p,64);
566                     else
567                         movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1));
568                     if (sz == 2 * REGSIZE)
569                     {   getlvalue_msw(&cs);
570                         if (REGSIZE == 2)
571                             movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0);
572                         else if (REGSIZE == 4)
573                             movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0);
574                         else if (REGSIZE == 8)
575                             movregconst(cdb,cs.Irm & 7,p[1],0);
576                         else
577                             assert(0);
578                     }
579                 }
580                 else if (I64 && sz == 8 && *p >= 0x80000000)
581                 {   // Use 64 bit MOV, as the 32 bit one gets sign extended
582                     // MOV reg,imm64
583                     // MOV EA,reg
584                     regm_t rregm = allregs & ~idxregm(&cs);
585                     reg_t regx;
586                     regwithvalue(cdb,rregm,*p,&regx,64);
587                     cs.Iop = STO;
588                     cs.Irm |= modregrm(0,regx & 7,0);
589                     if (regx & 8)
590                         cs.Irex |= REX_R;
591                     cdb.gen(&cs);
592                 }
593                 else
594                 {
595                     int off = sz;
596                     do
597                     {   int regsize = REGSIZE;
598                         if (off >= 4 && I16 && config.target_cpu >= TARGET_80386)
599                         {
600                             regsize = 4;
601                             cs.Iflags |= CFopsize;      // use opsize to do 32 bit operation
602                         }
603                         else if (I64 && sz == 16 && *p >= 0x80000000)
604                         {
605                             regm_t rregm = allregs & ~idxregm(&cs);
606                             reg_t regx;
607                             regwithvalue(cdb,rregm,*p,&regx,64);
608                             cs.Iop = STO;
609                             cs.Irm |= modregrm(0,regx & 7,0);
610                             if (regx & 8)
611                                 cs.Irex |= REX_R;
612                         }
613                         else
614                         {
615                             regm_t retregsx = (sz == 1) ? BYTEREGS : allregs;
616                             reg_t regx;
617                             if (reghasvalue(retregsx,*p,&regx))
618                             {
619                                 cs.Iop = (cs.Iop & 1) | 0x88;
620                                 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx
621                                 if (regx & 8)
622                                     cs.Irex |= REX_R;
623                                 if (I64 && sz == 1 && regx >= 4)
624                                     cs.Irex |= REX;
625                             }
626                             if (!I16 && off == 2)      // if 16 bit operand
627                                 cs.Iflags |= CFopsize;
628                             if (I64 && sz == 8)
629                                 cs.Irex |= REX_W;
630                         }
631                         cdb.gen(&cs);           // MOV EA,const
632 
633                         p = cast(targ_size_t *)(cast(char *) p + regsize);
634                         cs.Iop = (cs.Iop & 1) | 0xC6;
635                         cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0);
636                         cs.Irex &= ~REX_R;
637                         cs.IEV1.Voffset += regsize;
638                         cs.IEV2.Vint = cast(int)*p;
639                         off -= regsize;
640                     } while (off > 0);
641                 }
642             }
643             freenode(e2);
644             goto Lp;
645         }
646         retregs = allregs;        // pick a reg, any reg
647         if (sz == 2 * REGSIZE)
648             retregs &= ~mBP;      // BP cannot be used for register pair
649     }
650     if (retregs == mPSW)
651     {
652         retregs = allregs;
653         if (sz == 2 * REGSIZE)
654             retregs &= ~mBP;      // BP cannot be used for register pair
655     }
656     cs.Iop = STO;
657     if (sz == 1)                  // must have byte regs
658     {
659         cs.Iop = 0x88;
660         retregs &= BYTEREGS;
661         if (!retregs)
662             retregs = BYTEREGS;
663     }
664     else if (retregs & mES &&
665            (
666              (e1.Eoper == OPind &&
667                 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) ||
668              (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata)
669            )
670           )
671         // getlvalue() needs ES, so we can't return it
672         retregs = allregs;              // no conflicts with ES
673     else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0)
674         retregs = DOUBLEREGS;
675 
676     regvar = false;
677     varregm = 0;
678     if (config.flags4 & CFG4optimized)
679     {
680         // Be careful of cases like (x = x+x+x). We cannot evaluate in
681         // x if x is in a register.
682         if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
683             doinreg(e1.EV.Vsym,e2) &&       // and we can compute directly into it
684             !(sz == 1 && e1.EV.Voffset == 1)
685            )
686         {
687             if (varregm & XMMREGS)
688             {
689                 // Could be an integer vector in the XMMREGS
690                 xmmeq(cdb, e, CMP, e1, e2, pretregs);
691                 return;
692             }
693             regvar = true;
694             retregs = varregm;
695             reg = varreg;       // evaluate directly in target register
696             if (tysize(e1.Ety) == REGSIZE &&
697                 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE)
698             {
699                 if (e1.EV.Voffset)
700                     retregs &= mMSW;
701                 else
702                     retregs &= mLSW;
703                 reg = findreg(retregs);
704             }
705         }
706     }
707     if (*pretregs & mPSW && OTleaf(e1.Eoper))     // if evaluating e1 couldn't change flags
708     {   // Be careful that this lines up with jmpopcode()
709         retregs |= mPSW;
710         *pretregs &= ~mPSW;
711     }
712     scodelem(cdb,e2,&retregs,0,true);    // get rvalue
713 
714     // Look for special case of (*p++ = ...), where p is a register variable
715     if (e1.Eoper == OPind &&
716         ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
717         e11.EV.E1.Eoper == OPvar &&
718         e11.EV.E1.EV.Vsym.Sfl == FLreg &&
719         (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
720        )
721     {
722         Symbol *s = e11.EV.E1.EV.Vsym;
723         if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
724         {
725             regcon.params &= ~s.Spregm();
726         }
727 
728         postinc = e11.EV.E2.EV.Vint;
729         if (e11.Eoper == OPpostdec)
730             postinc = -postinc;
731         getlvalue(cdb,&cs,e1,RMstore | retregs);
732         freenode(e11.EV.E2);
733     }
734     else
735     {
736         postinc = 0;
737         getlvalue(cdb,&cs,e1,RMstore | retregs);     // get lvalue (cl == null if regvar)
738     }
739 
740     getregs(cdb,varregm);
741 
742     assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes));
743     if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES)
744     {
745         reg = findreglsw(retregs);
746         cs.Irm |= modregrm(0,reg,0);
747         cdb.gen(&cs);                   // MOV EA,reg
748         getlvalue_msw(&cs);             // point to where segment goes
749         cs.Iop = 0x8C;
750         NEWREG(cs.Irm,0);
751         cdb.gen(&cs);                   // MOV EA+2,ES
752     }
753     else
754     {
755         if (!I16)
756         {
757             reg = findreg(retregs &
758                     ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS));
759             cs.Irm |= modregrm(0,reg & 7,0);
760             if (reg & 8)
761                 cs.Irex |= REX_R;
762             for (; true; sz -= REGSIZE)
763             {
764                 // Do not generate mov from register onto itself
765                 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0)))
766                     break;
767                 if (sz == 2)            // if 16 bit operand
768                     cs.Iflags |= CFopsize;
769                 else if (sz == 1 && reg >= 4)
770                     cs.Irex |= REX;
771                 cdb.gen(&cs);           // MOV EA+offset,reg
772                 if (sz <= REGSIZE)
773                     break;
774                 getlvalue_msw(&cs);
775                 reg = findregmsw(retregs);
776                 code_newreg(&cs, reg);
777             }
778         }
779         else
780         {
781             if (sz > REGSIZE)
782                 cs.IEV1.Voffset += sz - REGSIZE;  // 0,2,6
783             reg = findreg(retregs &
784                     (sz > REGSIZE ? mMSW : ALLREGS));
785             if (tyml == TYdouble || tyml == TYdouble_alias)
786                 reg = AX;
787             cs.Irm |= modregrm(0,reg,0);
788             // Do not generate mov from register onto itself
789             if (!regvar || reg != (cs.Irm & 7))
790                 for (; true; sz -= REGSIZE)             // 1,2,4
791                 {
792                     cdb.gen(&cs);             // MOV EA+offset,reg
793                     if (sz <= REGSIZE)
794                         break;
795                     cs.IEV1.Voffset -= REGSIZE;
796                     if (tyml == TYdouble || tyml == TYdouble_alias)
797                             reg = dblreg[reg];
798                     else
799                             reg = findreglsw(retregs);
800                     NEWREG(cs.Irm,reg);
801                 }
802         }
803     }
804     if (e1.Ecount ||                    // if lvalue is a CSE or
805         regvar)                         // rvalue can't be a CSE
806     {
807         getregs_imm(cdb,retregs);       // necessary if both lvalue and
808                                         //  rvalue are CSEs (since a reg
809                                         //  can hold only one e at a time)
810         cssave(e1,retregs,!OTleaf(e1.Eoper));     // if lvalue is a CSE
811     }
812 
813     fixresult(cdb,e,retregs,pretregs);
814 Lp:
815     if (postinc)
816     {
817         reg_t ireg = findreg(idxregm(&cs));
818         if (*pretregs & mPSW)
819         {   // Use LEA to avoid touching the flags
820             uint rm = cs.Irm & 7;
821             if (cs.Irex & REX_B)
822                 rm |= 8;
823             cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc);
824             if (tysize(e11.EV.E1.Ety) == 8)
825                 code_orrex(cdb.last(), REX_W);
826         }
827         else if (I64)
828         {
829             cdb.genc2(0x81,modregrmx(3,0,ireg),postinc);
830             if (tysize(e11.EV.E1.Ety) == 8)
831                 code_orrex(cdb.last(), REX_W);
832         }
833         else
834         {
835             if (postinc == 1)
836                 cdb.gen1(0x40 + ireg);        // INC ireg
837             else if (postinc == -cast(targ_int)1)
838                 cdb.gen1(0x48 + ireg);        // DEC ireg
839             else
840             {
841                 cdb.genc2(0x81,modregrm(3,0,ireg),postinc);
842             }
843         }
844     }
845     freenode(e1);
846 }
847 
848 
849 /************************
850  * Generate code for += -= &= |= ^= negass
851  */
852 
853 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
854 {
855     //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
856     OPER op = e.Eoper;
857     regm_t retregs = 0;
858     uint reverse = 0;
859     elem *e1 = e.EV.E1;
860     tym_t tyml = tybasic(e1.Ety);            // type of lvalue
861     int sz = _tysize[tyml];
862     int isbyte = (sz == 1);                     // 1 for byte operation, else 0
863 
864     // See if evaluate in XMM registers
865     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0))
866     {
867         xmmopass(cdb,e,pretregs);
868         return;
869     }
870 
871     if (tyfloating(tyml))
872     {
873         if (config.exe & EX_posix)
874         {
875             if (op == OPnegass)
876                 cdnegass87(cdb,e,pretregs);
877             else
878                 opass87(cdb,e,pretregs);
879         }
880         else
881         {
882             if (op == OPnegass)
883                 opnegassdbl(cdb,e,pretregs);
884             else
885                 opassdbl(cdb,e,pretregs,op);
886         }
887         return;
888     }
889     uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386)
890         ? CFopsize : 0;
891     uint cflags = 0;
892     regm_t forccs = *pretregs & mPSW;            // return result in flags
893     regm_t forregs = *pretregs & ~mPSW;          // return result in regs
894     // true if we want the result in a register
895     uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper));
896 
897     reg_t reg;
898     uint op1,op2,mode;
899     code cs;
900     elem *e2;
901     regm_t varregm;
902     reg_t varreg;
903     uint jop;
904 
905 
906     switch (op)                   // select instruction opcodes
907     {
908         case OPpostinc: op = OPaddass;                  // i++ => +=
909                         goto case OPaddass;
910 
911         case OPaddass:  op1 = 0x01; op2 = 0x11;
912                         cflags = CFpsw;
913                         mode = 0; break;                // ADD, ADC
914 
915         case OPpostdec: op = OPminass;                  // i-- => -=
916                         goto case OPminass;
917 
918         case OPminass:  op1 = 0x29; op2 = 0x19;
919                         cflags = CFpsw;
920                         mode = 5; break;                // SUB, SBC
921 
922         case OPandass:  op1 = op2 = 0x21;
923                         mode = 4; break;                // AND, AND
924 
925         case OPorass:   op1 = op2 = 0x09;
926                         mode = 1; break;                // OR , OR
927 
928         case OPxorass:  op1 = op2 = 0x31;
929                         mode = 6; break;                // XOR, XOR
930 
931         case OPnegass:  op1 = 0xF7;                     // NEG
932                         break;
933 
934         default:
935                 assert(0);
936     }
937     op1 ^= isbyte;                  // bit 0 is 0 for byte operation
938 
939     if (op == OPnegass)
940     {
941         getlvalue(cdb,&cs,e1,0);
942         modEA(cdb,&cs);
943         cs.Irm |= modregrm(0,3,0);
944         cs.Iop = op1;
945         switch (_tysize[tyml])
946         {
947             case CHARSIZE:
948                 cdb.gen(&cs);
949                 break;
950 
951             case SHORTSIZE:
952                 cdb.gen(&cs);
953                 if (!I16 && *pretregs & mPSW)
954                     cdb.last().Iflags |= CFopsize | CFpsw;
955                 break;
956 
957             case LONGSIZE:
958                 if (!I16 || opsize)
959                 {   cdb.gen(&cs);
960                     cdb.last().Iflags |= opsize;
961                     break;
962                 }
963             neg_2reg:
964                 getlvalue_msw(&cs);
965                 cdb.gen(&cs);              // NEG EA+2
966                 getlvalue_lsw(&cs);
967                 cdb.gen(&cs);              // NEG EA
968                 code_orflag(cdb.last(),CFpsw);
969                 cs.Iop = 0x81;
970                 getlvalue_msw(&cs);
971                 cs.IFL2 = FLconst;
972                 cs.IEV2.Vuns = 0;
973                 cdb.gen(&cs);              // SBB EA+2,0
974                 break;
975 
976             case LLONGSIZE:
977                 if (I16)
978                     assert(0);             // not implemented yet
979                 if (I32)
980                     goto neg_2reg;
981                 cdb.gen(&cs);
982                 break;
983 
984             default:
985                 assert(0);
986         }
987         forccs = 0;             // flags already set by NEG
988         *pretregs &= ~mPSW;
989     }
990     else if ((e2 = e.EV.E2).Eoper == OPconst &&    // if rvalue is a const
991              el_signx32(e2) &&
992              // Don't evaluate e2 in register if we can use an INC or DEC
993              (((sz <= REGSIZE || tyfv(tyml)) &&
994                (op == OPaddass || op == OPminass) &&
995                (el_allbits(e2, 1) || el_allbits(e2, -1))
996               ) ||
997               (!evalinregister(e2)
998                && tyml != TYhptr
999               )
1000              )
1001             )
1002     {
1003         getlvalue(cdb,&cs,e1,0);
1004         modEA(cdb,&cs);
1005         cs.IFL2 = FLconst;
1006         cs.IEV2.Vsize_t = e2.EV.Vint;
1007         if (sz <= REGSIZE || tyfv(tyml) || opsize)
1008         {
1009             targ_int i = cs.IEV2.Vint;
1010 
1011             // Handle shortcuts. Watch out for if result has
1012             // to be in flags.
1013 
1014             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&reg) && i != 1 && i != -1 &&
1015                 !opsize)
1016             {
1017                 cs.Iop = op1;
1018                 cs.Irm |= modregrm(0,reg & 7,0);
1019                 if (I64)
1020                 {   if (isbyte && reg >= 4)
1021                         cs.Irex |= REX;
1022                     if (reg & 8)
1023                         cs.Irex |= REX_R;
1024                 }
1025             }
1026             else
1027             {
1028                 cs.Iop = 0x81;
1029                 cs.Irm |= modregrm(0,mode,0);
1030                 switch (op)
1031                 {
1032                     case OPminass:      // convert to +=
1033                         cs.Irm ^= modregrm(0,5,0);
1034                         i = -i;
1035                         cs.IEV2.Vsize_t = i;
1036                         goto case OPaddass;
1037 
1038                     case OPaddass:
1039                         if (i == 1)             // INC EA
1040                                 goto L1;
1041                         else if (i == -1)       // DEC EA
1042                         {       cs.Irm |= modregrm(0,1,0);
1043                            L1:  cs.Iop = 0xFF;
1044                         }
1045                         break;
1046 
1047                     default:
1048                         break;
1049                 }
1050                 cs.Iop ^= isbyte;             // for byte operations
1051             }
1052             cs.Iflags |= opsize;
1053             if (forccs)
1054                 cs.Iflags |= CFpsw;
1055             else if (!I16 && cs.Iflags & CFopsize)
1056             {
1057                 switch (op)
1058                 {   case OPorass:
1059                     case OPxorass:
1060                         cs.IEV2.Vsize_t &= 0xFFFF;
1061                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1062                         break;
1063 
1064                     case OPandass:
1065                         cs.IEV2.Vsize_t |= ~0xFFFFL;
1066                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1067                         break;
1068 
1069                     case OPminass:
1070                     case OPaddass:
1071                         static if (1)
1072                         {
1073                             if ((cs.Irm & 0xC0) == 0xC0)    // EA is register
1074                                 cs.Iflags &= ~CFopsize;
1075                         }
1076                         else
1077                         {
1078                             if ((cs.Irm & 0xC0) == 0xC0 &&  // EA is register and
1079                                 e1.Eoper == OPind)          // not a register var
1080                                 cs.Iflags &= ~CFopsize;
1081                         }
1082                         break;
1083 
1084                     default:
1085                         assert(0);
1086                 }
1087             }
1088 
1089             // For scheduling purposes, we wish to replace:
1090             //    OP    EA
1091             // with:
1092             //    MOV   reg,EA
1093             //    OP    reg
1094             //    MOV   EA,reg
1095             if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
1096                 (config.target_cpu == TARGET_Pentium ||
1097                  config.target_cpu == TARGET_PentiumMMX) &&
1098                 config.flags4 & CFG4speed)
1099             {
1100                 regm_t sregm;
1101                 code cs2;
1102 
1103                 // Determine which registers to use
1104                 sregm = allregs & ~idxregm(&cs);
1105                 if (isbyte)
1106                     sregm &= BYTEREGS;
1107                 if (sregm & forregs)
1108                     sregm &= forregs;
1109 
1110                 allocreg(cdb,&sregm,&reg,tyml);      // allocate register
1111 
1112                 cs2 = cs;
1113                 cs2.Iflags &= ~CFpsw;
1114                 cs2.Iop = LOD ^ isbyte;
1115                 code_newreg(&cs2, reg);
1116                 cdb.gen(&cs2);                      // MOV reg,EA
1117 
1118                 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7);
1119                 if (reg & 8)
1120                     cs.Irex |= REX_B;
1121                 cdb.gen(&cs);                       // OP reg
1122 
1123                 cs2.Iop ^= 2;
1124                 cdb.gen(&cs2);                      // MOV EA,reg
1125 
1126                 retregs = sregm;
1127                 wantres = 0;
1128                 if (e1.Ecount)
1129                     cssave(e1,retregs,!OTleaf(e1.Eoper));
1130             }
1131             else
1132             {
1133                 cdb.gen(&cs);
1134                 cs.Iflags &= ~opsize;
1135                 cs.Iflags &= ~CFpsw;
1136                 if (I16 && opsize)                     // if DWORD operand
1137                     cs.IEV1.Voffset += 2; // compensate for wantres code
1138             }
1139         }
1140         else if (sz == 2 * REGSIZE)
1141         {
1142             targ_uns msw;
1143 
1144             cs.Iop = 0x81;
1145             cs.Irm |= modregrm(0,mode,0);
1146             cs.Iflags |= cflags;
1147             cdb.gen(&cs);
1148             cs.Iflags &= ~CFpsw;
1149 
1150             getlvalue_msw(&cs);             // point to msw
1151             msw = cast(uint)MSREG(e.EV.E2.EV.Vllong);
1152             cs.IEV2.Vuns = msw;             // msw of constant
1153             switch (op)
1154             {
1155                 case OPminass:
1156                     cs.Irm ^= modregrm(0,6,0);      // SUB => SBB
1157                     break;
1158 
1159                 case OPaddass:
1160                     cs.Irm |= modregrm(0,2,0);      // ADD => ADC
1161                     break;
1162 
1163                 default:
1164                     break;
1165             }
1166             cdb.gen(&cs);
1167         }
1168         else
1169             assert(0);
1170         freenode(e.EV.E2);        // don't need it anymore
1171     }
1172     else if (isregvar(e1,&varregm,&varreg) &&
1173              (e2.Eoper == OPvar || e2.Eoper == OPind) &&
1174             !evalinregister(e2) &&
1175              sz <= REGSIZE)               // deal with later
1176     {
1177         getlvalue(cdb,&cs,e2,0);
1178         freenode(e2);
1179         getregs(cdb,varregm);
1180         code_newreg(&cs, varreg);
1181         if (I64 && sz == 1 && varreg >= 4)
1182             cs.Irex |= REX;
1183         cs.Iop = op1 ^ 2;                       // toggle direction bit
1184         if (forccs)
1185             cs.Iflags |= CFpsw;
1186         reverse = 2;                            // remember we toggled it
1187         cdb.gen(&cs);
1188         retregs = 0;            // to trigger a bug if we attempt to use it
1189     }
1190     else if ((op == OPaddass || op == OPminass) &&
1191              sz <= REGSIZE &&
1192              !e2.Ecount &&
1193              ((jop = jmpopcode(e2)) == JC || jop == JNC ||
1194               (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC)))
1195             )
1196     {
1197         /* e1 += (x < y)    ADC EA,0
1198          * e1 -= (x < y)    SBB EA,0
1199          * e1 += (x >= y)   SBB EA,-1
1200          * e1 -= (x >= y)   ADC EA,-1
1201          */
1202         getlvalue(cdb,&cs,e1,0);             // get lvalue
1203         modEA(cdb,&cs);
1204         regm_t keepmsk = idxregm(&cs);
1205         retregs = mPSW;
1206         if (OTconv(e2.Eoper))
1207         {
1208             scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true);
1209             freenode(e2);
1210         }
1211         else
1212             scodelem(cdb,e2,&retregs,keepmsk,true);
1213         cs.Iop = 0x81 ^ isbyte;                   // ADC EA,imm16/32
1214         uint regop = 2;                     // ADC
1215         if ((op == OPaddass) ^ (jop == JC))
1216             regop = 3;                          // SBB
1217         code_newreg(&cs,regop);
1218         cs.Iflags |= opsize;
1219         if (forccs)
1220             cs.Iflags |= CFpsw;
1221         cs.IFL2 = FLconst;
1222         cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0;
1223         cdb.gen(&cs);
1224         retregs = 0;            // to trigger a bug if we attempt to use it
1225     }
1226     else // evaluate e2 into register
1227     {
1228         retregs = (isbyte) ? BYTEREGS : ALLREGS;  // pick working reg
1229         if (tyml == TYhptr)
1230             retregs &= ~mCX;                    // need CX for shift count
1231         scodelem(cdb,e.EV.E2,&retregs,0,true);   // get rvalue
1232         getlvalue(cdb,&cs,e1,retregs);         // get lvalue
1233         modEA(cdb,&cs);
1234         cs.Iop = op1;
1235         if (sz <= REGSIZE || tyfv(tyml))
1236         {
1237             reg = findreg(retregs);
1238             code_newreg(&cs, reg);              // OP1 EA,reg
1239             if (sz == 1 && reg >= 4 && I64)
1240                 cs.Irex |= REX;
1241             if (forccs)
1242                 cs.Iflags |= CFpsw;
1243         }
1244         else if (tyml == TYhptr)
1245         {
1246             uint mreg = findregmsw(retregs);
1247             uint lreg = findreglsw(retregs);
1248             getregs(cdb,retregs | mCX);
1249 
1250             // If h -= l, convert to h += -l
1251             if (e.Eoper == OPminass)
1252             {
1253                 cdb.gen2(0xF7,modregrm(3,3,mreg));      // NEG mreg
1254                 cdb.gen2(0xF7,modregrm(3,3,lreg));      // NEG lreg
1255                 code_orflag(cdb.last(),CFpsw);
1256                 cdb.genc2(0x81,modregrm(3,3,mreg),0);   // SBB mreg,0
1257             }
1258             cs.Iop = 0x01;
1259             cs.Irm |= modregrm(0,lreg,0);
1260             cdb.gen(&cs);                               // ADD EA,lreg
1261             code_orflag(cdb.last(),CFpsw);
1262             cdb.genc2(0x81,modregrm(3,2,mreg),0);       // ADC mreg,0
1263             genshift(cdb);                              // MOV CX,offset __AHSHIFT
1264             cdb.gen2(0xD3,modregrm(3,4,mreg));          // SHL mreg,CL
1265             NEWREG(cs.Irm,mreg);                        // ADD EA+2,mreg
1266             getlvalue_msw(&cs);
1267         }
1268         else if (sz == 2 * REGSIZE)
1269         {
1270             cs.Irm |= modregrm(0,findreglsw(retregs),0);
1271             cdb.gen(&cs);                               // OP1 EA,reg+1
1272             code_orflag(cdb.last(),cflags);
1273             cs.Iop = op2;
1274             NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg
1275             getlvalue_msw(&cs);
1276         }
1277         else
1278             assert(0);
1279         cdb.gen(&cs);
1280         retregs = 0;            // to trigger a bug if we attempt to use it
1281     }
1282 
1283     // See if we need to reload result into a register.
1284     // Need result in registers in case we have a 32 bit
1285     // result and we want the flags as a result.
1286     if (wantres || (sz > REGSIZE && forccs))
1287     {
1288         if (sz <= REGSIZE)
1289         {
1290             regm_t possregs;
1291 
1292             possregs = ALLREGS;
1293             if (isbyte)
1294                 possregs = BYTEREGS;
1295             retregs = forregs & possregs;
1296             if (!retregs)
1297                 retregs = possregs;
1298 
1299             // If reg field is destination
1300             if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5)
1301             {
1302                 reg = (cs.Irm >> 3) & 7;
1303                 if (cs.Irex & REX_R)
1304                     reg |= 8;
1305                 retregs = mask(reg);
1306                 allocreg(cdb,&retregs,&reg,tyml);
1307             }
1308             // If lvalue is a register, just use that register
1309             else if ((cs.Irm & 0xC0) == 0xC0)
1310             {
1311                 reg = cs.Irm & 7;
1312                 if (cs.Irex & REX_B)
1313                     reg |= 8;
1314                 retregs = mask(reg);
1315                 allocreg(cdb,&retregs,&reg,tyml);
1316             }
1317             else
1318             {
1319                 allocreg(cdb,&retregs,&reg,tyml);
1320                 cs.Iop = LOD ^ isbyte ^ reverse;
1321                 code_newreg(&cs, reg);
1322                 if (I64 && isbyte && reg >= 4)
1323                     cs.Irex |= REX_W;
1324                 cdb.gen(&cs);               // MOV reg,EA
1325             }
1326         }
1327         else if (tyfv(tyml) || tyml == TYhptr)
1328         {
1329             regm_t idxregs;
1330 
1331             if (tyml == TYhptr)
1332                 getlvalue_lsw(&cs);
1333             idxregs = idxregm(&cs);
1334             retregs = forregs & ~idxregs;
1335             if (!(retregs & IDXREGS))
1336                 retregs |= IDXREGS & ~idxregs;
1337             if (!(retregs & mMSW))
1338                 retregs |= mMSW & ALLREGS;
1339             allocreg(cdb,&retregs,&reg,tyml);
1340             NEWREG(cs.Irm,findreglsw(retregs));
1341             if (retregs & mES)              // if want ES loaded
1342             {
1343                 cs.Iop = 0xC4;
1344                 cdb.gen(&cs);               // LES lreg,EA
1345             }
1346             else
1347             {
1348                 cs.Iop = LOD;
1349                 cdb.gen(&cs);               // MOV lreg,EA
1350                 getlvalue_msw(&cs);
1351                 if (I32)
1352                     cs.Iflags |= CFopsize;
1353                 NEWREG(cs.Irm,reg);
1354                 cdb.gen(&cs);               // MOV mreg,EA+2
1355             }
1356         }
1357         else if (sz == 2 * REGSIZE)
1358         {
1359             regm_t idx = idxregm(&cs);
1360             retregs = forregs;
1361             if (!retregs)
1362                 retregs = ALLREGS;
1363             allocreg(cdb,&retregs,&reg,tyml);
1364             cs.Iop = LOD;
1365             NEWREG(cs.Irm,reg);
1366 
1367             code csl = cs;
1368             NEWREG(csl.Irm,findreglsw(retregs));
1369             getlvalue_lsw(&csl);
1370 
1371             if (mask(reg) & idx)
1372             {
1373                 cdb.gen(&csl);             // MOV reg+1,EA
1374                 cdb.gen(&cs);              // MOV reg,EA+2
1375             }
1376             else
1377             {
1378                 cdb.gen(&cs);              // MOV reg,EA+2
1379                 cdb.gen(&csl);             // MOV reg+1,EA
1380             }
1381         }
1382         else
1383             assert(0);
1384         if (e1.Ecount)                 // if we gen a CSE
1385             cssave(e1,retregs,!OTleaf(e1.Eoper));
1386     }
1387     freenode(e1);
1388     if (sz <= REGSIZE)
1389         *pretregs &= ~mPSW;            // flags are already set
1390     fixresult(cdb,e,retregs,pretregs);
1391 }
1392 
1393 /********************************
1394  * Generate code for *=
1395  */
1396 
1397 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1398 {
1399     code cs;
1400     regm_t retregs;
1401     reg_t resreg;
1402     uint opr,isbyte;
1403 
1404     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1405     elem *e1 = e.EV.E1;
1406     elem *e2 = e.EV.E2;
1407     OPER op = e.Eoper;                     // OPxxxx
1408 
1409     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1410     char uns = tyuns(tyml) || tyuns(e2.Ety);
1411     uint sz = _tysize[tyml];
1412 
1413     uint rex = (I64 && sz == 8) ? REX_W : 0;
1414     uint grex = rex << 16;          // 64 bit operands
1415 
1416     // See if evaluate in XMM registers
1417     if (config.fpxmmregs && tyxmmreg(tyml) && !(*pretregs & mST0))
1418     {
1419         xmmopass(cdb,e,pretregs);
1420         return;
1421     }
1422 
1423     if (tyfloating(tyml))
1424     {
1425         if (config.exe & EX_posix)
1426         {
1427             opass87(cdb,e,pretregs);
1428         }
1429         else
1430         {
1431             opassdbl(cdb,e,pretregs,op);
1432         }
1433         return;
1434     }
1435 
1436     if (sz <= REGSIZE)                  // if word or byte
1437     {
1438         if (e2.Eoper == OPconst &&
1439             (I32 || I64) &&
1440             el_signx32(e2) &&
1441             sz >= 4)
1442         {
1443             // See if we can use an LEA instruction
1444 
1445             int ss;
1446             int ss2 = 0;
1447             int shift;
1448 
1449             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1450             switch (e2factor)
1451             {
1452                 case 12:    ss = 1; ss2 = 2; goto L4;
1453                 case 24:    ss = 1; ss2 = 3; goto L4;
1454 
1455                 case 6:
1456                 case 3:     ss = 1; goto L4;
1457 
1458                 case 20:    ss = 2; ss2 = 2; goto L4;
1459                 case 40:    ss = 2; ss2 = 3; goto L4;
1460 
1461                 case 10:
1462                 case 5:     ss = 2; goto L4;
1463 
1464                 case 36:    ss = 3; ss2 = 2; goto L4;
1465                 case 72:    ss = 3; ss2 = 3; goto L4;
1466 
1467                 case 18:
1468                 case 9:     ss = 3; goto L4;
1469                 L4:
1470                 {
1471                     getlvalue(cdb,&cs,e1,0);           // get EA
1472                     modEA(cdb,&cs);
1473                     freenode(e2);
1474                     regm_t idxregs = idxregm(&cs);
1475                     regm_t regm = *pretregs & ~(idxregs | mBP | mR13);  // don't use EBP
1476                     if (!regm)
1477                         regm = allregs & ~(idxregs | mBP | mR13);
1478                     reg_t reg;
1479                     allocreg(cdb,&regm,&reg,tyml);
1480                     cs.Iop = LOD;
1481                     code_newreg(&cs,reg);
1482                     cs.Irex |= rex;
1483                     cdb.gen(&cs);                       // MOV reg,EA
1484 
1485                     assert((reg & 7) != BP);
1486                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1487                                 modregxrmx(ss,reg,reg));  // LEA reg,[ss*reg][reg]
1488                     if (ss2)
1489                     {
1490                         cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1491                                        modregxrm(ss2,reg,5));
1492                         cdb.last().IFL1 = FLconst;
1493                         cdb.last().IEV1.Vint = 0;       // LEA reg,0[ss2*reg]
1494                     }
1495                     else if (!(e2factor & 1))    // if even factor
1496                     {
1497                         genregs(cdb,0x03,reg,reg); // ADD reg,reg
1498                         code_orrex(cdb.last(),rex);
1499                     }
1500                     opAssStoreReg(cdb,cs,e,reg,pretregs);
1501                     return;
1502                 }
1503 
1504                 case 37:
1505                 case 74:    shift = 2;
1506                             goto L5;
1507                 case 13:
1508                 case 26:    shift = 0;
1509                             goto L5;
1510                 L5:
1511                 {
1512                     getlvalue(cdb,&cs,e1,0);           // get EA
1513                     modEA(cdb,&cs);
1514                     freenode(e2);
1515                     regm_t idxregs = idxregm(&cs);
1516                     regm_t regm = *pretregs & ~(idxregs | mBP | mR13);  // don't use EBP
1517                     if (!regm)
1518                         regm = allregs & ~(idxregs | mBP | mR13);
1519                     reg_t reg;                          // return register
1520                     allocreg(cdb,&regm,&reg,tyml);
1521 
1522                     reg_t sreg = allocScratchReg(cdb, allregs & ~(regm | idxregs | mBP | mR13));
1523 
1524                     cs.Iop = LOD;
1525                     code_newreg(&cs,sreg);
1526                     cs.Irex |= rex;
1527                     cdb.gen(&cs);                                         // MOV sreg,EA
1528 
1529                     assert((sreg & 7) != BP);
1530                     assert((reg & 7) != BP);
1531                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1532                                           modregxrmx(2,sreg,sreg));       // LEA reg,[sreg*4][sreg]
1533                     if (shift)
1534                         cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift
1535                     cdb.gen2sib(LEA,grex | modregxrm(0,reg,4),
1536                                           modregxrmx(3,sreg,reg));        // LEA reg,[sreg*8][reg]
1537                     if (!(e2factor & 1))                                  // if even factor
1538                     {
1539                         genregs(cdb,0x03,reg,reg);                        // ADD reg,reg
1540                         code_orrex(cdb.last(),rex);
1541                     }
1542                     opAssStoreReg(cdb,cs,e,reg,pretregs);
1543                     return;
1544                 }
1545 
1546                 default:
1547                     break;
1548             }
1549         }
1550 
1551         isbyte = (sz == 1);             // 1 for byte operation
1552 
1553         if (config.target_cpu >= TARGET_80286 &&
1554             e2.Eoper == OPconst && !isbyte)
1555         {
1556             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1557             if (I64 && sz == 8 && e2factor != cast(int)e2factor)
1558                 goto L1;
1559             freenode(e2);
1560             getlvalue(cdb,&cs,e1,0);     // get EA
1561             regm_t idxregs = idxregm(&cs);
1562             retregs = *pretregs & (ALLREGS | mBP) & ~idxregs;
1563             if (!retregs)
1564                 retregs = ALLREGS & ~idxregs;
1565             allocreg(cdb,&retregs,&resreg,tyml);
1566             cs.Iop = 0x69;                  // IMUL reg,EA,e2value
1567             cs.IFL2 = FLconst;
1568             cs.IEV2.Vint = cast(int)e2factor;
1569             opr = resreg;
1570         }
1571         else if (!I16 && !isbyte)
1572         {
1573          L1:
1574             retregs = *pretregs & (ALLREGS | mBP);
1575             if (!retregs)
1576                 retregs = ALLREGS;
1577             codelem(cdb,e2,&retregs,false); // load rvalue in reg
1578             getlvalue(cdb,&cs,e1,retregs);  // get EA
1579             getregs(cdb,retregs);           // destroy these regs
1580             cs.Iop = 0x0FAF;                        // IMUL resreg,EA
1581             resreg = findreg(retregs);
1582             opr = resreg;
1583         }
1584         else
1585         {
1586             retregs = mAX;
1587             codelem(cdb,e2,&retregs,false);      // load rvalue in AX
1588             getlvalue(cdb,&cs,e1,mAX);           // get EA
1589             getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs
1590             cs.Iop = 0xF7 ^ isbyte;                        // [I]MUL EA
1591             opr = uns ? 4 : 5;              // MUL/IMUL
1592             resreg = AX;                    // result register for *
1593         }
1594         code_newreg(&cs,opr);
1595         cdb.gen(&cs);
1596 
1597         opAssStoreReg(cdb, cs, e, resreg, pretregs);
1598         return;
1599     }
1600     else if (sz == 2 * REGSIZE)
1601     {
1602         if (e2.Eoper == OPconst && I32)
1603         {
1604             /*  if (msw)
1605                   IMUL    EDX,EDX,lsw
1606                   IMUL    reg,EAX,msw
1607                   ADD     reg,EDX
1608                 else
1609                   IMUL    reg,EDX,lsw
1610                 MOV       EDX,lsw
1611                 MUL       EDX
1612                 ADD       EDX,reg
1613              */
1614             freenode(e2);
1615             retregs = mDX|mAX;
1616             reg_t rhi, rlo;
1617             opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
1618             const regm_t keepmsk = idxregm(&cs);
1619 
1620             reg_t reg = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
1621 
1622             targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1623             const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1));
1624             const msw = cast(targ_int)(e2factor >> (REGSIZE * 8));
1625 
1626             if (msw)
1627             {
1628                 genmulimm(cdb,DX,DX,lsw);          // IMUL EDX,EDX,lsw
1629                 genmulimm(cdb,reg,AX,msw);         // IMUL reg,EAX,msw
1630                 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX
1631             }
1632             else
1633                 genmulimm(cdb,reg,DX,lsw);         // IMUL reg,EDX,lsw
1634 
1635             movregconst(cdb,DX,lsw,0);             // MOV EDX,lsw
1636             getregs(cdb,mDX);
1637             cdb.gen2(0xF7,modregrm(3,4,DX));       // MUL EDX
1638             cdb.gen2(0x03,modregrm(3,DX,reg));     // ADD EDX,reg
1639         }
1640         else
1641         {
1642             retregs = mDX | mAX;
1643             regm_t rretregs = (config.target_cpu >= TARGET_PentiumPro) ? allregs & ~retregs : mCX | mBX;
1644             codelem(cdb,e2,&rretregs,false);
1645             getlvalue(cdb,&cs,e1,retregs | rretregs);
1646             getregs(cdb,retregs);
1647             cs.Iop = LOD;
1648             cdb.gen(&cs);                   // MOV AX,EA
1649             getlvalue_msw(&cs);
1650             cs.Irm |= modregrm(0,DX,0);
1651             cdb.gen(&cs);                   // MOV DX,EA+2
1652             getlvalue_lsw(&cs);
1653             if (config.target_cpu >= TARGET_PentiumPro)
1654             {
1655                 regm_t rlo = findreglsw(rretregs);
1656                 regm_t rhi = findregmsw(rretregs);
1657                 /*  IMUL    rhi,EAX
1658                     IMUL    EDX,rlo
1659                     ADD     rhi,EDX
1660                     MUL     rlo
1661                     ADD     EDX,Erhi
1662                  */
1663                  getregs(cdb,mAX|mDX|mask(rhi));
1664                  cdb.gen2(0x0FAF,modregrm(3,rhi,AX));
1665                  cdb.gen2(0x0FAF,modregrm(3,DX,rlo));
1666                  cdb.gen2(0x03,modregrm(3,rhi,DX));
1667                  cdb.gen2(0xF7,modregrm(3,4,rlo));
1668                  cdb.gen2(0x03,modregrm(3,DX,rhi));
1669             }
1670             else
1671             {
1672                 callclib(cdb,e,CLIB.lmul,&retregs,idxregm(&cs));
1673             }
1674         }
1675 
1676         opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs);
1677         return;
1678     }
1679     else
1680     {
1681         assert(0);
1682     }
1683 }
1684 
1685 
1686 /********************************
1687  * Generate code for /= %=
1688  */
1689 
1690 void cddivass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1691 {
1692     elem *e1 = e.EV.E1;
1693     elem *e2 = e.EV.E2;
1694 
1695     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1696     OPER op = e.Eoper;                     // OPxxxx
1697 
1698     // See if evaluate in XMM registers
1699     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0))
1700     {
1701         xmmopass(cdb,e,pretregs);
1702         return;
1703     }
1704 
1705     if (tyfloating(tyml))
1706     {
1707         if (config.exe & EX_posix)
1708         {
1709             opass87(cdb,e,pretregs);
1710         }
1711         else
1712         {
1713             opassdbl(cdb,e,pretregs,op);
1714         }
1715         return;
1716     }
1717 
1718     code cs = void;
1719 
1720     //printf("cddivass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1721     char uns = tyuns(tyml) || tyuns(e2.Ety);
1722     uint sz = _tysize[tyml];
1723 
1724     uint rex = (I64 && sz == 8) ? REX_W : 0;
1725     uint grex = rex << 16;          // 64 bit operands
1726 
1727     if (sz <= REGSIZE)                  // if word or byte
1728     {
1729         uint isbyte = (sz == 1);        // 1 for byte operation
1730         reg_t resreg;
1731         targ_size_t e2factor;
1732         targ_size_t d;
1733         bool neg;
1734         int pow2;
1735 
1736         assert(!isbyte);                      // should never happen
1737         assert(I16 || sz != SHORTSIZE);
1738 
1739         if (e2.Eoper == OPconst)
1740         {
1741             e2factor = cast(targ_size_t)el_tolong(e2);
1742             pow2 = ispow2(e2factor);
1743             d = e2factor;
1744             if (!uns && cast(targ_llong)e2factor < 0)
1745             {
1746                 neg = true;
1747                 d = -d;
1748             }
1749         }
1750 
1751         // Signed divide by a constant
1752         if (config.flags4 & CFG4speed &&
1753             e2.Eoper == OPconst &&
1754             !uns &&
1755             (d & (d - 1)) &&
1756             ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))))
1757         {
1758             /* R1 / 10
1759              *
1760              *  MOV     EAX,m
1761              *  IMUL    R1
1762              *  MOV     EAX,R1
1763              *  SAR     EAX,31
1764              *  SAR     EDX,shpost
1765              *  SUB     EDX,EAX
1766              *  IMUL    EAX,EDX,d
1767              *  SUB     R1,EAX
1768              *
1769              * EDX = quotient
1770              * R1 = remainder
1771              */
1772             assert(sz == 4 || sz == 8);
1773 
1774             ulong m;
1775             int shpost;
1776             const int N = sz * 8;
1777             const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost);
1778 
1779             freenode(e2);
1780 
1781             getlvalue(cdb,&cs,e1,mAX | mDX);
1782             reg_t reg;
1783             opAssLoadReg(cdb, cs, e, reg, allregs & ~( mAX | mDX | idxregm(&cs)));    // MOV reg,EA
1784             getregs(cdb, mAX|mDX);
1785 
1786             /* Algorithm 5.2
1787              * if m>=2**(N-1)
1788              *    q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n)
1789              * else
1790              *    q = SRA(MULSH(m,n), shpost) - XSIGN(n)
1791              * if (neg)
1792              *    q = -q
1793              */
1794             const bool mgt = mhighbit || m >= (1UL << (N - 1));
1795             movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EAX,m
1796             cdb.gen2(0xF7,grex | modregrmx(3,5,reg));               // IMUL reg
1797             if (mgt)
1798                 cdb.gen2(0x03,grex | modregrmx(3,DX,reg));          // ADD EDX,reg
1799             getregsNoSave(mAX);                                     // EAX no longer contains 'm'
1800             genmovreg(cdb, AX, reg);                                // MOV EAX,reg
1801             cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1);     // SAR EAX,31
1802             if (shpost)
1803                 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost);     // SAR EDX,shpost
1804             reg_t r3;
1805             if (neg && op == OPdivass)
1806             {
1807                 cdb.gen2(0x2B,grex | modregrm(3,AX,DX));            // SUB EAX,EDX
1808                 r3 = AX;
1809             }
1810             else
1811             {
1812                 cdb.gen2(0x2B,grex | modregrm(3,DX,AX));            // SUB EDX,EAX
1813                 r3 = DX;
1814             }
1815 
1816             // r3 is quotient
1817             reg_t resregx;
1818             switch (op)
1819             {   case OPdivass:
1820                     resregx = r3;
1821                     break;
1822 
1823                 case OPmodass:
1824                     assert(reg != AX && r3 == DX);
1825                     if (sz == 4 || (sz == 8 && cast(targ_long)d == d))
1826                     {
1827                         cdb.genc2(0x69,grex | modregrm(3,AX,DX),d);      // IMUL EAX,EDX,d
1828                     }
1829                     else
1830                     {
1831                         movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0);     // MOV EAX,d
1832                         cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX));     // IMUL EAX,EDX
1833                         getregsNoSave(mAX);                             // EAX no longer contains 'd'
1834                     }
1835                     cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));          // SUB R1,EAX
1836                     resregx = reg;
1837                     break;
1838 
1839                 default:
1840                     assert(0);
1841             }
1842 
1843             opAssStoreReg(cdb, cs, e, resregx, pretregs);
1844             return;
1845         }
1846 
1847         // Unsigned divide by a constant
1848         void unsignedDivideByConstant(ref CodeBuilder cdb)
1849         {
1850             assert(sz == 4 || sz == 8);
1851 
1852             reg_t r3;
1853             reg_t reg;
1854             ulong m;
1855             int shpre;
1856             int shpost;
1857             code cs = void;
1858 
1859             if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost))
1860             {
1861                 /* t1 = MULUH(m, n)
1862                  * q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
1863                  *   MOV   EAX,reg
1864                  *   MOV   EDX,m
1865                  *   MUL   EDX
1866                  *   MOV   EAX,reg
1867                  *   SUB   EAX,EDX
1868                  *   SHR   EAX,1
1869                  *   LEA   R3,[EAX][EDX]
1870                  *   SHR   R3,shpost-1
1871                  */
1872                 assert(shpre == 0);
1873 
1874                 freenode(e2);
1875                 getlvalue(cdb,&cs,e1,mAX | mDX);
1876                 regm_t idxregs = idxregm(&cs);
1877                 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA
1878                 getregs(cdb, mAX|mDX);
1879 
1880                 genmovreg(cdb,AX,reg);                                // MOV EAX,reg
1881                 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m
1882                 getregs(cdb,mask(reg) | mDX | mAX);
1883                 cdb.gen2(0xF7,grex | modregrmx(3,4,DX));              // MUL EDX
1884                 genmovreg(cdb,AX,reg);                                // MOV EAX,reg
1885                 cdb.gen2(0x2B,grex | modregrm(3,AX,DX));              // SUB EAX,EDX
1886                 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1);            // SHR EAX,1
1887                 regm_t regm3 = allregs & ~idxregs;
1888                 if (op == OPmodass)
1889                 {
1890                     regm3 &= ~mask(reg);
1891                     if (!el_signx32(e2))
1892                         regm3 &= ~mAX;
1893                 }
1894                 allocreg(cdb,&regm3,&r3,TYint);
1895                 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX]
1896                 if (shpost != 1)
1897                     cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1);   // SHR R3,shpost-1
1898             }
1899             else
1900             {
1901                 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost)
1902                  *   SHR   EAX,shpre
1903                  *   MOV   reg,m
1904                  *   MUL   reg
1905                  *   SHR   EDX,shpost
1906                  */
1907 
1908                 freenode(e2);
1909                 getlvalue(cdb,&cs,e1,mAX | mDX);
1910                 regm_t idxregs = idxregm(&cs);
1911                 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA
1912                 getregs(cdb, mAX|mDX);
1913 
1914                 if (reg != AX)
1915                 {
1916                     getregs(cdb,mAX);
1917                     genmovreg(cdb,AX,reg);                              // MOV EAX,reg
1918                 }
1919                 if (shpre)
1920                 {
1921                     getregs(cdb,mAX);
1922                     cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre);      // SHR EAX,shpre
1923                 }
1924                 getregs(cdb,mDX);
1925                 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0);  // MOV EDX,m
1926                 getregs(cdb,mDX | mAX);
1927                 cdb.gen2(0xF7,grex | modregrmx(3,4,DX));                // MUL EDX
1928                 if (shpost)
1929                     cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost);     // SHR EDX,shpost
1930                 r3 = DX;
1931             }
1932 
1933             reg_t resregx;
1934             switch (op)
1935             {
1936                 case OPdivass:
1937                     // r3 = quotient
1938                     resregx = r3;
1939                     break;
1940 
1941                 case OPmodass:
1942                     /* reg = original value
1943                      * r3  = quotient
1944                      */
1945                     assert(reg != AX);
1946                     if (el_signx32(e2))
1947                     {
1948                         cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor
1949                     }
1950                     else
1951                     {
1952                         assert(!(mask(r3) & mAX));
1953                         movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0);  // MOV EAX,e2factor
1954                         getregs(cdb,mAX);
1955                         cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3));   // IMUL EAX,r3
1956                     }
1957                     getregs(cdb,mask(reg));
1958                     cdb.gen2(0x2B,grex | modregxrm(3,reg,AX));        // SUB reg,EAX
1959                     resregx = reg;
1960                     break;
1961 
1962                 default:
1963                     assert(0);
1964             }
1965 
1966             opAssStoreReg(cdb, cs, e, resregx, pretregs);
1967             return;
1968         }
1969 
1970         if (config.flags4 & CFG4speed &&
1971             e2.Eoper == OPconst &&
1972             uns &&
1973             e2factor > 2 && (e2factor & (e2factor - 1)) &&
1974             ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))))
1975         {
1976             unsignedDivideByConstant(cdb);
1977             return;
1978         }
1979 
1980         if (config.flags4 & CFG4speed &&
1981             e2.Eoper == OPconst && !uns &&
1982             (sz == REGSIZE || (I64 && sz == 4)) &&
1983             pow2 != -1 &&
1984             e2factor == cast(int)e2factor &&
1985             !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass)
1986            )
1987         {
1988             freenode(e2);
1989             if (pow2 == 1 && op == OPdivass && config.target_cpu > TARGET_80386)
1990             {
1991                 /* This is better than the code further down because it is
1992                  * not constrained to using AX and DX.
1993                  */
1994                 getlvalue(cdb,&cs,e1,0);
1995                 regm_t idxregs = idxregm(&cs);
1996                 reg_t reg;
1997                 opAssLoadReg(cdb,cs,e,reg,allregs & ~idxregs); // MOV reg,EA
1998 
1999                 reg_t r = allocScratchReg(cdb, allregs & ~(idxregs | mask(reg)));
2000                 genmovreg(cdb,r,reg);                        // MOV r,reg
2001                 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31
2002                 cdb.gen2(0x03,grex | modregxrmx(3,reg,r));   // ADD reg,r
2003                 cdb.gen2(0xD1,grex | modregrmx(3,7,reg));    // SAR reg,1
2004 
2005                 opAssStoreReg(cdb, cs, e, reg, pretregs);
2006                 return;
2007             }
2008 
2009             // Signed divide or modulo by power of 2
2010             getlvalue(cdb,&cs,e1,mAX | mDX);
2011             reg_t reg;
2012             opAssLoadReg(cdb,cs,e,reg,mAX);
2013 
2014             getregs(cdb,mDX);                   // DX is scratch register
2015             cdb.gen1(0x99);                     // CWD
2016             code_orrex(cdb.last(), rex);
2017             if (pow2 == 1)
2018             {
2019                 if (op == OPdivass)
2020                 {
2021                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2022                     cdb.gen2(0xD1,grex | modregrm(3,7,AX));        // SAR AX,1
2023                     resreg = AX;
2024                 }
2025                 else // OPmod
2026                 {
2027                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2028                     cdb.genc2(0x81,grex | modregrm(3,4,AX),1);     // AND AX,1
2029                     cdb.gen2(0x03,grex | modregrm(3,DX,AX));       // ADD DX,AX
2030                     resreg = DX;
2031                 }
2032             }
2033             else
2034             {
2035                 assert(pow2 < 32);
2036                 targ_ulong m = (1 << pow2) - 1;
2037                 if (op == OPdivass)
2038                 {
2039                     cdb.genc2(0x81,grex | modregrm(3,4,DX),m);     // AND DX,m
2040                     cdb.gen2(0x03,grex | modregrm(3,AX,DX));       // ADD AX,DX
2041                     // Be careful not to generate this for 8088
2042                     assert(config.target_cpu >= TARGET_80286);
2043                     cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2);  // SAR AX,pow2
2044                     resreg = AX;
2045                 }
2046                 else // OPmodass
2047                 {
2048                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2049                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2050                     cdb.genc2(0x81,grex | modregrm(3,4,AX),m);     // AND AX,m
2051                     cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
2052                     cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
2053                     resreg = AX;
2054                 }
2055             }
2056         }
2057         else
2058         {
2059             regm_t retregs = ALLREGS & ~(mAX|mDX);     // DX gets sign extension
2060             codelem(cdb,e2,&retregs,false);            // load rvalue in retregs
2061             reg_t reg = findreg(retregs);
2062             getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA
2063             getregs(cdb,mAX | mDX);         // destroy these regs
2064             cs.Irm |= modregrm(0,AX,0);
2065             cs.Iop = LOD;
2066             cdb.gen(&cs);                   // MOV AX,EA
2067             if (uns)                        // if uint
2068                 movregconst(cdb,DX,0,0);    // CLR DX
2069             else                            // else signed
2070             {
2071                 cdb.gen1(0x99);             // CWD
2072                 code_orrex(cdb.last(),rex);
2073             }
2074             getregs(cdb,mDX | mAX); // DX and AX will be destroyed
2075             const uint opr = uns ? 6 : 7;     // DIV/IDIV
2076             genregs(cdb,0xF7,opr,reg);   // OPR reg
2077             code_orrex(cdb.last(),rex);
2078             resreg = (op == OPmodass) ? DX : AX;        // result register
2079         }
2080         opAssStoreReg(cdb, cs, e, resreg, pretregs);
2081         return;
2082     }
2083 
2084     assert(sz == 2 * REGSIZE);
2085 
2086     targ_size_t e2factor;
2087     int pow2;
2088     if (e2.Eoper == OPconst)
2089     {
2090         e2factor = cast(targ_size_t)el_tolong(e2);
2091         pow2 = ispow2(e2factor);
2092     }
2093 
2094     // Register pair signed divide by power of 2
2095     if (op == OPdivass &&
2096         !uns &&
2097         e.Eoper == OPconst &&
2098         pow2 != -1 &&
2099         I32 // not set up for I16 or I64 cent
2100        )
2101     {
2102         freenode(e2);
2103         regm_t retregs = mDX|mAX | mCX|mBX;     // LSW must be byte reg because of later SETZ
2104         reg_t rhi, rlo;
2105         opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
2106         const regm_t keepmsk = idxregm(&cs);
2107         retregs = mask(rhi) | mask(rlo);
2108 
2109         if (pow2 < 32)
2110         {
2111             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2112 
2113             genmovreg(cdb,r1,rhi);                                        // MOV  r1,rhi
2114             if (pow2 == 1)
2115                 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR  r1,31
2116             else
2117             {
2118                 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR  r1,31
2119                 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND  r1,mask
2120             }
2121             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD  rlo,r1
2122             cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC  rhi,0
2123             cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2);            // SHRD rlo,rhi,pow2
2124             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2);               // SAR  rhi,pow2
2125         }
2126         else if (pow2 == 32)
2127         {
2128             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2129 
2130             genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
2131             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
2132             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                   // ADD rlo,r1
2133             cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0);                 // ADC rhi,0
2134             cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
2135             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
2136         }
2137         else if (pow2 < 63)
2138         {
2139             reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk));
2140             reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk | mask(r1)));
2141 
2142             genmovreg(cdb,r1,rhi);                                        // MOV r1,rhi
2143             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);     // SAR r1,31
2144             cdb.genmovreg(r2,r1);                                         // MOV r2,r1
2145 
2146             if (pow2 == 33)
2147             {
2148                 cdb.gen2(0xF7,modregrmx(3,3,r1));                         // NEG r1
2149                 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2));               // ADD rlo,r2
2150                 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1));               // ADC rhi,r1
2151             }
2152             else
2153             {
2154                 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask
2155                 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                    // ADD rlo,r1
2156                 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                    // ADC rhi,r2
2157             }
2158 
2159             cdb.genmovreg(rlo,rhi);                                       // MOV rlo,rhi
2160             cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32);          // SAR rlo,pow2-32
2161             cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1);    // SAR rhi,31
2162         }
2163         else
2164         {
2165             // This may be better done by cgelem.d
2166             assert(pow2 == 63);
2167             assert(mask(rlo) & BYTEREGS);                          // for SETZ
2168             cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000
2169             cdb.genregs(0x09,rlo,rhi);                             // OR  rlo,rhi
2170             cdb.gen2(0x0F94,modregrmx(3,0,rlo));                   // SETZ rlo
2171             cdb.genregs(MOVZXb,rlo,rlo);                           // MOVZX rlo,rloL
2172             movregconst(cdb,rhi,0,0);                              // MOV rhi,0
2173         }
2174 
2175         opAssStorePair(cdb, cs, e, rlo, rhi, pretregs);
2176         return;
2177     }
2178 
2179     // Register pair signed modulo by power of 2
2180     if (op == OPmodass &&
2181         !uns &&
2182         e.Eoper == OPconst &&
2183         pow2 != -1 &&
2184         I32 // not set up for I64 cent yet
2185        )
2186     {
2187         freenode(e2);
2188         regm_t retregs = mDX|mAX;
2189         reg_t rhi, rlo;
2190         opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0);
2191         const regm_t keepmsk = idxregm(&cs);
2192 
2193         regm_t scratchm = allregs & ~(retregs | keepmsk);
2194         if (pow2 == 63)
2195             scratchm &= BYTEREGS;               // because of SETZ
2196         reg_t r1 = allocScratchReg(cdb, scratchm);
2197 
2198         if (pow2 < 32)
2199         {
2200             cdb.genmovreg(r1,rhi);                                    // MOV r1,rhi
2201             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31
2202             cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
2203             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
2204             cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1);    // AND rlo,(1<<pow2)-1
2205             cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1));               // XOR rlo,r1
2206             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));               // SUB rlo,r1
2207             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));              // SBB rhi,rhi
2208         }
2209         else if (pow2 == 32)
2210         {
2211             cdb.genmovreg(r1,rhi);                                      // MOV r1,rhi
2212             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR r1,31
2213             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD rlo,r1
2214             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB rlo,r1
2215             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi));                // SBB rhi,rhi
2216         }
2217         else if (pow2 < 63)
2218         {
2219             scratchm = allregs & ~(retregs | scratchm);
2220             reg_t r2;
2221             allocreg(cdb,&scratchm,&r2,TYint);
2222 
2223             cdb.genmovreg(r1,rhi);                                      // MOV  r1,rhi
2224             cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1);   // SAR  r1,31
2225             cdb.genmovreg(r2,r1);                                       // MOV  r2,r1
2226             cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2);         // SHRD r1,r2,64-pow2
2227             cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2);           // SHR  r2,64-pow2
2228             cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1));                 // ADD  rlo,r1
2229             cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2));                 // ADC  rhi,r2
2230             cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND  rhi,(1<<(pow2-32))-1
2231             cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1));                 // SUB  rlo,r1
2232             cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2));                 // SBB  rhi,r2
2233         }
2234         else
2235         {
2236             // This may be better done by cgelem.d
2237             assert(pow2 == 63);
2238 
2239             cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi]
2240             cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo));               // OR   r1,rlo
2241             cdb.gen2(0x0F94,modregrmx(3,0,r1));                       // SETZ r1
2242             cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL  r1,31
2243             cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1));               // SUB  rhi,r1
2244         }
2245 
2246         opAssStorePair(cdb, cs, e, rlo, rhi, pretregs);
2247         return;
2248     }
2249 
2250     regm_t rretregs = mCX|mBX;
2251     codelem(cdb,e2,&rretregs,false);    // load e2 into CX|BX
2252 
2253     reg_t rlo;
2254     reg_t rhi;
2255     opAssLoadPair(cdb, cs, e, rhi, rlo, mDX|mAX, rretregs);
2256 
2257     regm_t retregs = (op == OPmodass) ? mCX|mBX : mDX|mAX;
2258     uint lib = uns ? CLIB.uldiv : CLIB.ldiv;
2259     if (op == OPmodass)
2260         ++lib;
2261     callclib(cdb,e,lib,&retregs,idxregm(&cs));
2262 
2263     opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs);
2264 }
2265 
2266 
2267 /********************************
2268  * Generate code for <<= and >>=
2269  */
2270 
2271 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2272 {
2273     code cs;
2274     regm_t retregs;
2275     uint op1,op2;
2276     reg_t reg;
2277 
2278     elem *e1 = e.EV.E1;
2279     elem *e2 = e.EV.E2;
2280 
2281     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
2282     uint sz = _tysize[tyml];
2283     uint isbyte = tybyte(e.Ety) != 0;        // 1 for byte operations
2284     tym_t tym = tybasic(e.Ety);                // type of result
2285     OPER oper = e.Eoper;
2286     assert(tysize(e2.Ety) <= REGSIZE);
2287 
2288     uint rex = (I64 && sz == 8) ? REX_W : 0;
2289 
2290     // if our lvalue is a cse, make sure we evaluate for result in register
2291     if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,&reg))
2292         *pretregs |= ALLREGS;
2293 
2294     version (SCPP)
2295     {
2296         // Do this until the rest of the compiler does OPshr/OPashr correctly
2297         if (oper == OPshrass)
2298             oper = tyuns(tyml) ? OPshrass : OPashrass;
2299     }
2300 
2301     // Select opcodes. op2 is used for msw for long shifts.
2302 
2303     switch (oper)
2304     {
2305         case OPshlass:
2306             op1 = 4;                    // SHL
2307             op2 = 2;                    // RCL
2308             break;
2309 
2310         case OPshrass:
2311             op1 = 5;                    // SHR
2312             op2 = 3;                    // RCR
2313             break;
2314 
2315         case OPashrass:
2316             op1 = 7;                    // SAR
2317             op2 = 3;                    // RCR
2318             break;
2319 
2320         default:
2321             assert(0);
2322     }
2323 
2324 
2325     uint v = 0xD3;                  // for SHIFT xx,CL cases
2326     uint loopcnt = 1;
2327     uint conste2 = false;
2328     uint shiftcnt = 0;              // avoid "use before initialized" warnings
2329     if (e2.Eoper == OPconst)
2330     {
2331         conste2 = true;                 // e2 is a constant
2332         shiftcnt = e2.EV.Vint;         // byte ordering of host
2333         if (config.target_cpu >= TARGET_80286 &&
2334             sz <= REGSIZE &&
2335             shiftcnt != 1)
2336             v = 0xC1;                   // SHIFT xx,shiftcnt
2337         else if (shiftcnt <= 3)
2338         {
2339             loopcnt = shiftcnt;
2340             v = 0xD1;                   // SHIFT xx,1
2341         }
2342     }
2343 
2344     if (v == 0xD3)                        // if COUNT == CL
2345     {
2346         retregs = mCX;
2347         codelem(cdb,e2,&retregs,false);
2348     }
2349     else
2350         freenode(e2);
2351     getlvalue(cdb,&cs,e1,mCX);          // get lvalue, preserve CX
2352     modEA(cdb,&cs);             // check for modifying register
2353 
2354     if (*pretregs == 0 ||               // if don't return result
2355         (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) ||
2356         sz > REGSIZE
2357        )
2358     {
2359         retregs = 0;            // value not returned in a register
2360         cs.Iop = v ^ isbyte;
2361         while (loopcnt--)
2362         {
2363             NEWREG(cs.Irm,op1);           // make sure op1 is first
2364             if (sz <= REGSIZE)
2365             {
2366                 if (conste2)
2367                 {
2368                     cs.IFL2 = FLconst;
2369                     cs.IEV2.Vint = shiftcnt;
2370                 }
2371                 cdb.gen(&cs);             // SHIFT EA,[CL|1]
2372                 if (*pretregs & mPSW && !loopcnt && conste2)
2373                   code_orflag(cdb.last(),CFpsw);
2374             }
2375             else // TYlong
2376             {
2377                 cs.Iop = 0xD1;            // plain shift
2378                 code *ce = gennop(null);                  // ce: NOP
2379                 if (v == 0xD3)
2380                 {
2381                     getregs(cdb,mCX);
2382                     if (!conste2)
2383                     {
2384                         assert(loopcnt == 0);
2385                         genjmp(cdb,JCXZ,FLcode,cast(block *) ce);   // JCXZ ce
2386                     }
2387                 }
2388                 code *cg;
2389                 if (oper == OPshlass)
2390                 {
2391                     cdb.gen(&cs);               // cg: SHIFT EA
2392                     cg = cdb.last();
2393                     code_orflag(cg,CFpsw);
2394                     getlvalue_msw(&cs);
2395                     NEWREG(cs.Irm,op2);
2396                     cdb.gen(&cs);               // SHIFT EA
2397                     getlvalue_lsw(&cs);
2398                 }
2399                 else
2400                 {
2401                     getlvalue_msw(&cs);
2402                     cdb.gen(&cs);
2403                     cg = cdb.last();
2404                     code_orflag(cg,CFpsw);
2405                     NEWREG(cs.Irm,op2);
2406                     getlvalue_lsw(&cs);
2407                     cdb.gen(&cs);
2408                 }
2409                 if (v == 0xD3)                    // if building a loop
2410                 {
2411                     genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg
2412                     regimmed_set(CX,0);           // note that now CX == 0
2413                 }
2414                 cdb.append(ce);
2415             }
2416         }
2417 
2418         // If we want the result, we must load it from the EA
2419         // into a register.
2420 
2421         if (sz == 2 * REGSIZE && *pretregs)
2422         {
2423             retregs = *pretregs & (ALLREGS | mBP);
2424             if (retregs)
2425             {
2426                 retregs &= ~idxregm(&cs);
2427                 allocreg(cdb,&retregs,&reg,tym);
2428                 cs.Iop = LOD;
2429 
2430                 // be careful not to trash any index regs
2431                 // do MSW first (which can't be an index reg)
2432                 getlvalue_msw(&cs);
2433                 NEWREG(cs.Irm,reg);
2434                 cdb.gen(&cs);
2435                 getlvalue_lsw(&cs);
2436                 reg = findreglsw(retregs);
2437                 NEWREG(cs.Irm,reg);
2438                 cdb.gen(&cs);
2439                 if (*pretregs & mPSW)
2440                     tstresult(cdb,retregs,tyml,true);
2441             }
2442             else        // flags only
2443             {
2444                 retregs = ALLREGS & ~idxregm(&cs);
2445                 allocreg(cdb,&retregs,&reg,TYint);
2446                 cs.Iop = LOD;
2447                 NEWREG(cs.Irm,reg);
2448                 cdb.gen(&cs);           // MOV reg,EA
2449                 cs.Iop = 0x0B;          // OR reg,EA+2
2450                 cs.Iflags |= CFpsw;
2451                 getlvalue_msw(&cs);
2452                 cdb.gen(&cs);
2453             }
2454         }
2455         if (e1.Ecount && !(retregs & regcon.mvar))   // if lvalue is a CSE
2456             cssave(e1,retregs,!OTleaf(e1.Eoper));
2457         freenode(e1);
2458         *pretregs = retregs;
2459         return;
2460     }
2461     else                                // else must evaluate in register
2462     {
2463         if (sz <= REGSIZE)
2464         {
2465             regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs);
2466             if (isbyte)
2467                 possregs &= BYTEREGS;
2468             retregs = *pretregs & possregs;
2469             if (retregs == 0)
2470                 retregs = possregs;
2471             allocreg(cdb,&retregs,&reg,tym);
2472             cs.Iop = LOD ^ isbyte;
2473             code_newreg(&cs, reg);
2474             if (isbyte && I64 && (reg >= 4))
2475                 cs.Irex |= REX;
2476             cdb.gen(&cs);                     // MOV reg,EA
2477             if (!I16)
2478             {
2479                 assert(!isbyte || (mask(reg) & BYTEREGS));
2480                 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt);
2481                 if (isbyte && I64 && (reg >= 4))
2482                     cdb.last().Irex |= REX;
2483                 code_orrex(cdb.last(), rex);
2484                 // We can do a 32 bit shift on a 16 bit operand if
2485                 // it's a left shift and we're not concerned about
2486                 // the flags. Remember that flags are not set if
2487                 // a shift of 0 occurs.
2488                 if (_tysize[tym] == SHORTSIZE &&
2489                     (oper == OPshrass || oper == OPashrass ||
2490                      (*pretregs & mPSW && conste2)))
2491                      cdb.last().Iflags |= CFopsize;            // 16 bit operand
2492             }
2493             else
2494             {
2495                 while (loopcnt--)
2496                 {   // Generate shift instructions.
2497                     cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt);
2498                 }
2499             }
2500             if (*pretregs & mPSW && conste2)
2501             {
2502                 assert(shiftcnt);
2503                 *pretregs &= ~mPSW;     // result is already in flags
2504                 code_orflag(cdb.last(),CFpsw);
2505             }
2506 
2507             opAssStoreReg(cdb,cs,e,reg,pretregs);
2508             return;
2509         }
2510         assert(0);
2511     }
2512 }
2513 
2514 
2515 /**********************************
2516  * Generate code for compares.
2517  * Handles lt,gt,le,ge,eqeq,ne for all data types.
2518  */
2519 
2520 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2521 {
2522     regm_t retregs,rretregs;
2523     reg_t reg,rreg;
2524     int fl;
2525 
2526     //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs));
2527     // Collect extra parameter. This is pretty ugly...
2528     int flag = cdcmp_flag;
2529     cdcmp_flag = 0;
2530 
2531     elem *e1 = e.EV.E1;
2532     elem *e2 = e.EV.E2;
2533     if (*pretregs == 0)                 // if don't want result
2534     {
2535         codelem(cdb,e1,pretregs,false);
2536         *pretregs = 0;                  // in case e1 changed it
2537         codelem(cdb,e2,pretregs,false);
2538         return;
2539     }
2540 
2541     uint jop = jmpopcode(e);        // must be computed before
2542                                         // leaves are free'd
2543     uint reverse = 0;
2544 
2545     OPER op = e.Eoper;
2546     assert(OTrel(op));
2547     bool eqorne = (op == OPeqeq) || (op == OPne);
2548 
2549     tym_t tym = tybasic(e1.Ety);
2550     uint sz = _tysize[tym];
2551     uint isbyte = sz == 1;
2552 
2553     uint rex = (I64 && sz == 8) ? REX_W : 0;
2554     uint grex = rex << 16;          // 64 bit operands
2555 
2556     code cs;
2557     code *ce;
2558     if (tyfloating(tym))                  // if floating operation
2559     {
2560         if (config.fpxmmregs)
2561         {
2562             retregs = mPSW;
2563             if (tyxmmreg(tym))
2564                 orthxmm(cdb,e,&retregs);
2565             else
2566                 orth87(cdb,e,&retregs);
2567         }
2568         else if (config.inline8087)
2569         {   retregs = mPSW;
2570             orth87(cdb,e,&retregs);
2571         }
2572         else
2573         {
2574             if (config.exe & EX_windos)
2575             {
2576                 int clib;
2577 
2578                 retregs = 0;                /* skip result for now          */
2579                 if (iffalse(e2))            /* second operand is constant 0 */
2580                 {
2581                     assert(!eqorne);        /* should be OPbool or OPnot    */
2582                     if (tym == TYfloat)
2583                     {
2584                         retregs = FLOATREGS;
2585                         clib = CLIB.ftst0;
2586                     }
2587                     else
2588                     {
2589                         retregs = DOUBLEREGS;
2590                         clib = CLIB.dtst0;
2591                     }
2592                     if (rel_exception(op))
2593                         clib += CLIB.dtst0exc - CLIB.dtst0;
2594                     codelem(cdb,e1,&retregs,false);
2595                     retregs = 0;
2596                     callclib(cdb,e,clib,&retregs,0);
2597                     freenode(e2);
2598                 }
2599                 else
2600                 {
2601                     clib = CLIB.dcmp;
2602                     if (rel_exception(op))
2603                         clib += CLIB.dcmpexc - CLIB.dcmp;
2604                     opdouble(cdb,e,&retregs,clib);
2605                 }
2606             }
2607             else
2608             {
2609                 assert(0);
2610             }
2611         }
2612         goto L3;
2613     }
2614 
2615     /* If it's a signed comparison of longs, we have to call a library    */
2616     /* routine, because we don't know the target of the signed branch     */
2617     /* (have to set up flags so that jmpopcode() will do it right)        */
2618     if (!eqorne &&
2619         (I16 && tym == TYlong  && tybasic(e2.Ety) == TYlong ||
2620          I32 && tym == TYllong && tybasic(e2.Ety) == TYllong)
2621        )
2622     {
2623         assert(jop != JC && jop != JNC);
2624         retregs = mDX | mAX;
2625         codelem(cdb,e1,&retregs,false);
2626         retregs = mCX | mBX;
2627         scodelem(cdb,e2,&retregs,mDX | mAX,false);
2628 
2629         if (I16)
2630         {
2631             retregs = 0;
2632             callclib(cdb,e,CLIB.lcmp,&retregs,0);    // gross, but it works
2633         }
2634         else
2635         {
2636             /* Generate:
2637              *      CMP  EDX,ECX
2638              *      JNE  C1
2639              *      XOR  EDX,EDX
2640              *      CMP  EAX,EBX
2641              *      JZ   C1
2642              *      JA   C3
2643              *      DEC  EDX
2644              *      JMP  C1
2645              * C3:  INC  EDX
2646              * C1:
2647              */
2648              getregs(cdb,mDX);
2649              genregs(cdb,0x39,CX,DX);             // CMP EDX,ECX
2650              code *c1 = gennop(null);
2651              genjmp(cdb,JNE,FLcode,cast(block *)c1);  // JNE C1
2652              movregconst(cdb,DX,0,0);             // XOR EDX,EDX
2653              genregs(cdb,0x39,BX,AX);             // CMP EAX,EBX
2654              genjmp(cdb,JE,FLcode,cast(block *)c1);   // JZ C1
2655              code *c3 = gen1(null,0x40 + DX);                  // INC EDX
2656              genjmp(cdb,JA,FLcode,cast(block *)c3);   // JA C3
2657              cdb.gen1(0x48 + DX);                              // DEC EDX
2658              genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1
2659              cdb.append(c3);
2660              cdb.append(c1);
2661              getregs(cdb,mDX);
2662              retregs = mPSW;
2663         }
2664         goto L3;
2665     }
2666 
2667     /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC
2668      * (This is already reflected in the jop)
2669      */
2670     if ((jop == JC || jop == JNC) &&
2671         (op == OPgt || op == OPle) &&
2672         (tyuns(tym) || tyuns(e2.Ety))
2673        )
2674     {   // jmpopcode() sez comparison should be reversed
2675         assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst);
2676         reverse ^= 2;
2677     }
2678 
2679     /* See if we should swap operands     */
2680     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
2681     {
2682         e1 = e.EV.E2;
2683         e2 = e.EV.E1;
2684         reverse ^= 2;
2685     }
2686 
2687     retregs = allregs;
2688     if (isbyte)
2689         retregs = BYTEREGS;
2690 
2691     ce = null;
2692     cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
2693     cs.Irex = cast(ubyte)rex;
2694     if (sz > REGSIZE)
2695         ce = gennop(ce);
2696 
2697     switch (e2.Eoper)
2698     {
2699         default:
2700         L2:
2701             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
2702             rretregs = allregs & ~retregs;
2703             if (isbyte)
2704                 rretregs &= BYTEREGS;
2705             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
2706             if (sz <= REGSIZE)                              // CMP reg,rreg
2707             {
2708                 reg = findreg(retregs);             // get reg that e1 is in
2709                 rreg = findreg(rretregs);
2710                 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg);
2711                 code_orrex(cdb.last(), rex);
2712                 if (!I16 && sz == SHORTSIZE)
2713                     cdb.last().Iflags |= CFopsize;          // compare only 16 bits
2714                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
2715                     cdb.last().Irex |= REX;                 // address byte registers
2716             }
2717             else
2718             {
2719                 assert(sz <= 2 * REGSIZE);
2720 
2721                 // Compare MSW, if they're equal then compare the LSW
2722                 reg = findregmsw(retregs);
2723                 rreg = findregmsw(rretregs);
2724                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2725                 if (I32 && sz == 6)
2726                     cdb.last().Iflags |= CFopsize;         // seg is only 16 bits
2727                 else if (I64)
2728                     code_orrex(cdb.last(), REX_W);
2729                 genjmp(cdb,JNE,FLcode,cast(block *) ce);   // JNE nop
2730 
2731                 reg = findreglsw(retregs);
2732                 rreg = findreglsw(rretregs);
2733                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2734                 if (I64)
2735                     code_orrex(cdb.last(), REX_W);
2736             }
2737             break;
2738 
2739         case OPrelconst:
2740             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
2741                 goto L2;
2742             fl = el_fl(e2);
2743             switch (fl)
2744             {
2745                 case FLfunc:
2746                     fl = FLextern;          // so it won't be self-relative
2747                     break;
2748 
2749                 case FLdata:
2750                 case FLudata:
2751                 case FLextern:
2752                     if (sz > REGSIZE)       // compare against DS, not DGROUP
2753                         goto L2;
2754                     break;
2755 
2756                 case FLfardata:
2757                     break;
2758 
2759                 default:
2760                     goto L2;
2761             }
2762             cs.IFL2 = cast(ubyte)fl;
2763             cs.IEV2.Vsym = e2.EV.Vsym;
2764             if (sz > REGSIZE)
2765             {
2766                 cs.Iflags |= CFseg;
2767                 cs.IEV2.Voffset = 0;
2768             }
2769             else
2770             {
2771                 cs.Iflags |= CFoff;
2772                 cs.IEV2.Voffset = e2.EV.Voffset;
2773             }
2774             goto L4;
2775 
2776         case OPconst:
2777             // If compare against 0
2778             if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) &&
2779                 isregvar(e1,&retregs,&reg)
2780                )
2781             {   // Just do a TEST instruction
2782                 genregs(cdb,0x85 ^ isbyte,reg,reg);      // TEST reg,reg
2783                 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw;
2784                 code_orrex(cdb.last(), rex);
2785                 if (I64 && isbyte && reg >= 4)
2786                     cdb.last().Irex |= REX;                 // address byte registers
2787                 retregs = mPSW;
2788                 break;
2789             }
2790 
2791             if (!tyuns(tym) && !tyuns(e2.Ety) &&
2792                 !boolres(e2) && !(*pretregs & mPSW) &&
2793                 (sz == REGSIZE || (I64 && sz == 4)) &&
2794                 (!I16 || op == OPlt || op == OPge))
2795             {
2796                 assert(*pretregs & (allregs));
2797                 codelem(cdb,e1,pretregs,false);
2798                 reg = findreg(*pretregs);
2799                 getregs(cdb,mask(reg));
2800                 switch (op)
2801                 {
2802                     case OPle:
2803                         cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1);   // ADD reg,-1
2804                         code_orflag(cdb.last(), CFpsw);
2805                         cdb.genc2(0x81,grex | modregrmx(3,2,reg),0);          // ADC reg,0
2806                         goto oplt;
2807 
2808                     case OPgt:
2809                         cdb.gen2(0xF7,grex | modregrmx(3,3,reg));         // NEG reg
2810                             /* Flips the sign bit unless the value is 0 or int.min.
2811                             Also sets the carry bit when the value is not 0. */
2812                         code_orflag(cdb.last(), CFpsw);
2813                         cdb.genc2(0x81,grex | modregrmx(3,3,reg),0);  // SBB reg,0
2814                             /* Subtracts the carry bit. This turns int.min into
2815                             int.max, flipping the sign bit.
2816                             For other negative and positive values, subtracting 1
2817                             doesn't affect the sign bit.
2818                             For 0, the carry bit is not set, so this does nothing
2819                             and the sign bit is not affected. */
2820                         goto oplt;
2821 
2822                     case OPlt:
2823                     oplt:
2824                         // Get the sign bit, i.e. 1 if the value is negative.
2825                         if (!I16)
2826                             cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31
2827                         else
2828                         {   /* 8088-286 do not have a barrel shifter, so use this
2829                                faster sequence
2830                              */
2831                             genregs(cdb,0xD1,0,reg);   // ROL reg,1
2832                             reg_t regi;
2833                             if (reghasvalue(allregs,1,&regi))
2834                                 genregs(cdb,0x23,reg,regi);  // AND reg,regi
2835                             else
2836                                 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1
2837                         }
2838                         break;
2839 
2840                     case OPge:
2841                         genregs(cdb,0xD1,4,reg);        // SHL reg,1
2842                         code_orrex(cdb.last(),rex);
2843                         code_orflag(cdb.last(), CFpsw);
2844                         genregs(cdb,0x19,reg,reg);      // SBB reg,reg
2845                         code_orrex(cdb.last(),rex);
2846                         if (I64)
2847                         {
2848                             cdb.gen2(0xFF,modregrmx(3,0,reg));       // INC reg
2849                             code_orrex(cdb.last(), rex);
2850                         }
2851                         else
2852                             cdb.gen1(0x40 + reg);                    // INC reg
2853                         break;
2854 
2855                     default:
2856                         assert(0);
2857                 }
2858                 freenode(e2);
2859                 goto ret;
2860             }
2861 
2862             cs.IFL2 = FLconst;
2863             if (sz == 16)
2864                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.msw;
2865             else if (sz > REGSIZE)
2866                 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);
2867             else
2868                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong;
2869 
2870             // The cmp immediate relies on sign extension of the 32 bit immediate value
2871             if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint)
2872                 goto L2;
2873           L4:
2874             cs.Iop = 0x81 ^ isbyte;
2875 
2876             /* if ((e1 is data or a '*' reference) and it's not a
2877              * common subexpression
2878              */
2879 
2880             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
2881                  e1.Eoper == OPind) &&
2882                 !evalinregister(e1))
2883             {
2884                 getlvalue(cdb,&cs,e1,RMload);
2885                 freenode(e1);
2886                 if (evalinregister(e2))
2887                 {
2888                     retregs = idxregm(&cs);
2889                     if ((cs.Iflags & CFSEG) == CFes)
2890                         retregs |= mES;             // take no chances
2891                     rretregs = allregs & ~retregs;
2892                     if (isbyte)
2893                         rretregs &= BYTEREGS;
2894                     scodelem(cdb,e2,&rretregs,retregs,true);
2895                     cs.Iop = 0x39 ^ isbyte ^ reverse;
2896                     if (sz > REGSIZE)
2897                     {
2898                         rreg = findregmsw(rretregs);
2899                         cs.Irm |= modregrm(0,rreg,0);
2900                         getlvalue_msw(&cs);
2901                         cdb.gen(&cs);              // CMP EA+2,rreg
2902                         if (I32 && sz == 6)
2903                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2904                         if (I64 && isbyte && rreg >= 4)
2905                             cdb.last().Irex |= REX;
2906                         genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop
2907                         rreg = findreglsw(rretregs);
2908                         NEWREG(cs.Irm,rreg);
2909                         getlvalue_lsw(&cs);
2910                     }
2911                     else
2912                     {
2913                         rreg = findreg(rretregs);
2914                         code_newreg(&cs, rreg);
2915                         if (I64 && isbyte && rreg >= 4)
2916                             cs.Irex |= REX;
2917                     }
2918                 }
2919                 else
2920                 {
2921                     cs.Irm |= modregrm(0,7,0);
2922                     if (sz > REGSIZE)
2923                     {
2924                         if (sz == 6)
2925                             assert(0);
2926                         if (e2.Eoper == OPrelconst)
2927                         {   cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg;
2928                             cs.IEV2.Voffset = 0;
2929                         }
2930                         getlvalue_msw(&cs);
2931                         cdb.gen(&cs);              // CMP EA+2,const
2932                         if (!I16 && sz == 6)
2933                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2934                         genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop
2935                         if (e2.Eoper == OPconst)
2936                             cs.IEV2.Vint = cast(int)e2.EV.Vllong;
2937                         else if (e2.Eoper == OPrelconst)
2938                         {   // Turn off CFseg, on CFoff
2939                             cs.Iflags ^= CFseg | CFoff;
2940                             cs.IEV2.Voffset = e2.EV.Voffset;
2941                         }
2942                         else
2943                             assert(0);
2944                         getlvalue_lsw(&cs);
2945                     }
2946                     freenode(e2);
2947                 }
2948                 cdb.gen(&cs);
2949                 break;
2950             }
2951 
2952             if (evalinregister(e2) && !OTassign(e1.Eoper) &&
2953                 !isregvar(e1,null,null))
2954             {
2955                 regm_t m;
2956 
2957                 m = allregs & ~regcon.mvar;
2958                 if (isbyte)
2959                     m &= BYTEREGS;
2960                 if (m & (m - 1))    // if more than one free register
2961                     goto L2;
2962             }
2963             if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) &&
2964                 !boolres(e2) && !evalinregister(e1))
2965             {
2966                 retregs = mPSW;
2967                 scodelem(cdb,e1,&retregs,0,false);
2968                 freenode(e2);
2969                 break;
2970             }
2971             if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW)
2972             {
2973                 retregs |= mPSW;
2974                 scodelem(cdb,e1,&retregs,0,false);
2975                 freenode(e2);
2976                 break;
2977             }
2978             scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
2979             if (sz == 1)
2980             {
2981                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2982                 cs.Irm = modregrm(3,7,reg & 7);
2983                 if (reg & 8)
2984                     cs.Irex |= REX_B;
2985                 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg)
2986                 {   assert(reg < 4);
2987                     cs.Irm |= 4;                    // use upper register half
2988                 }
2989                 if (I64 && reg >= 4)
2990                     cs.Irex |= REX;                 // address byte registers
2991             }
2992             else if (sz <= REGSIZE)
2993             {   // CMP reg,const
2994                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2995                 rretregs = allregs & ~retregs;
2996                 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg))
2997                 {
2998                     genregs(cdb,0x3B,reg,rreg);
2999                     code_orrex(cdb.last(), rex);
3000                     if (!I16)
3001                         cdb.last().Iflags |= cs.Iflags & CFopsize;
3002                     freenode(e2);
3003                     break;
3004                 }
3005                 cs.Irm = modregrm(3,7,reg & 7);
3006                 if (reg & 8)
3007                     cs.Irex |= REX_B;
3008             }
3009             else if (sz <= 2 * REGSIZE)
3010             {
3011                 reg = findregmsw(retregs);          // get reg that e1 is in
3012                 cs.Irm = modregrm(3,7,reg);
3013                 cdb.gen(&cs);                       // CMP reg,MSW
3014                 if (I32 && sz == 6)
3015                     cdb.last().Iflags |= CFopsize;  // seg is only 16 bits
3016                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
3017 
3018                 reg = findreglsw(retregs);
3019                 cs.Irm = modregrm(3,7,reg);
3020                 if (e2.Eoper == OPconst)
3021                     cs.IEV2.Vint = e2.EV.Vlong;
3022                 else if (e2.Eoper == OPrelconst)
3023                 {   // Turn off CFseg, on CFoff
3024                     cs.Iflags ^= CFseg | CFoff;
3025                     cs.IEV2.Voffset = e2.EV.Voffset;
3026                 }
3027                 else
3028                     assert(0);
3029             }
3030             else
3031                 assert(0);
3032             cdb.gen(&cs);                         // CMP sucreg,LSW
3033             freenode(e2);
3034             break;
3035 
3036         case OPind:
3037             if (e2.Ecount)
3038                 goto L2;
3039             goto L5;
3040 
3041         case OPvar:
3042             if (config.exe & (EX_OSX | EX_OSX64))
3043             {
3044                 if (movOnly(e2))
3045                     goto L2;
3046             }
3047             if ((e1.Eoper == OPvar &&
3048                  isregvar(e2,&rretregs,&reg) &&
3049                  sz <= REGSIZE
3050                 ) ||
3051                 (e1.Eoper == OPind &&
3052                  isregvar(e2,&rretregs,&reg) &&
3053                  !evalinregister(e1) &&
3054                  sz <= REGSIZE
3055                 )
3056                )
3057             {
3058                 // CMP EA,e2
3059                 getlvalue(cdb,&cs,e1,RMload);
3060                 freenode(e1);
3061                 cs.Iop = 0x39 ^ isbyte ^ reverse;
3062                 code_newreg(&cs,reg);
3063                 if (I64 && isbyte && reg >= 4)
3064                     cs.Irex |= REX;                 // address byte registers
3065                 cdb.gen(&cs);
3066                 freenode(e2);
3067                 break;
3068             }
3069           L5:
3070             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
3071             if (sz <= REGSIZE)                      // CMP reg,EA
3072             {
3073                 reg = findreg(retregs & allregs);   // get reg that e1 is in
3074                 uint opsize = cs.Iflags & CFopsize;
3075                 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0);
3076                 code_orflag(cdb.last(),opsize);
3077             }
3078             else if (sz <= 2 * REGSIZE)
3079             {
3080                 reg = findregmsw(retregs);   // get reg that e1 is in
3081                 // CMP reg,EA
3082                 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0);
3083                 if (I32 && sz == 6)
3084                     cdb.last().Iflags |= CFopsize;        // seg is only 16 bits
3085                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
3086                 reg = findreglsw(retregs);
3087                 if (e2.Eoper == OPind)
3088                 {
3089                     NEWREG(cs.Irm,reg);
3090                     getlvalue_lsw(&cs);
3091                     cdb.gen(&cs);
3092                 }
3093                 else
3094                     loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0);
3095             }
3096             else
3097                 assert(0);
3098             freenode(e2);
3099             break;
3100     }
3101     cdb.append(ce);
3102 
3103 L3:
3104     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
3105     {
3106         if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00))
3107         {
3108             regm_t resregs = retregs;
3109             if (!I64)
3110             {
3111                 resregs &= BYTEREGS;
3112                 if (!resregs)
3113                     resregs = BYTEREGS;
3114             }
3115             allocreg(cdb,&resregs,&reg,TYint);
3116             cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg
3117             if (I64 && reg >= 4)
3118                 code_orrex(cdb.last(),REX);
3119             if (tysize(e.Ety) > 1)
3120             {
3121                 genregs(cdb,MOVZXb,reg,reg);       // MOVZX reg,reg
3122                 if (I64 && sz == 8)
3123                     code_orrex(cdb.last(),REX_W);
3124                 if (I64 && reg >= 4)
3125                     code_orrex(cdb.last(),REX);
3126             }
3127             *pretregs &= ~mPSW;
3128             fixresult(cdb,e,resregs,pretregs);
3129         }
3130         else
3131         {
3132             code *nop = null;
3133             regm_t save = regcon.immed.mval;
3134             allocreg(cdb,&retregs,&reg,TYint);
3135             regcon.immed.mval = save;
3136             if ((*pretregs & mPSW) == 0 &&
3137                 (jop == JC || jop == JNC))
3138             {
3139                 getregs(cdb,retregs);
3140                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
3141                 if (rex || flag & REX_W)
3142                     code_orrex(cdb.last(), REX_W);
3143                 if (flag)
3144                 { }                                         // cdcond() will handle it
3145                 else if (jop == JNC)
3146                 {
3147                     if (I64)
3148                     {
3149                         cdb.gen2(0xFF,modregrmx(3,0,reg));  // INC reg
3150                         code_orrex(cdb.last(), rex);
3151                     }
3152                     else
3153                         cdb.gen1(0x40 + reg);               // INC reg
3154                 }
3155                 else
3156                 {
3157                     cdb.gen2(0xF7,modregrmx(3,3,reg));      // NEG reg
3158                     code_orrex(cdb.last(), rex);
3159                 }
3160             }
3161             else if (I64 && sz == 8)
3162             {
3163                 assert(!flag);
3164                 movregconst(cdb,reg,1,64|8);   // MOV reg,1
3165                 nop = gennop(nop);
3166                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
3167                                                             // MOV reg,0
3168                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64);
3169                 regcon.immed.mval &= ~mask(reg);
3170             }
3171             else
3172             {
3173                 assert(!flag);
3174                 movregconst(cdb,reg,1,8);      // MOV reg,1
3175                 nop = gennop(nop);
3176                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
3177                                                             // MOV reg,0
3178                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0);
3179                 regcon.immed.mval &= ~mask(reg);
3180             }
3181             *pretregs = retregs;
3182             cdb.append(nop);
3183         }
3184     }
3185 ret:
3186     { }
3187 }
3188 
3189 
3190 /**********************************
3191  * Generate code for signed compare of longs.
3192  * Input:
3193  *      targ    block* or code*
3194  */
3195 
3196 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ)
3197 {
3198                                          // <=  >   <   >=
3199     static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ];
3200     static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ];
3201 
3202     //printf("longcmp(e = %p)\n", e);
3203     elem *e1 = e.EV.E1;
3204     elem *e2 = e.EV.E2;
3205     OPER op = e.Eoper;
3206 
3207     // See if we should swap operands
3208     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
3209     {
3210         e1 = e.EV.E2;
3211         e2 = e.EV.E1;
3212         op = swaprel(op);
3213     }
3214 
3215     code cs;
3216     cs.Iflags = 0;
3217     cs.Irex = 0;
3218 
3219     code *ce = gennop(null);
3220     regm_t retregs = ALLREGS;
3221     regm_t rretregs;
3222     reg_t reg,rreg;
3223 
3224     uint jop = jopmsw[op - OPle];
3225     if (!(jcond & 1)) jop ^= (JL ^ JG);                   // toggle jump condition
3226     CodeBuilder cdbjmp;
3227     cdbjmp.ctor();
3228     genjmp(cdbjmp,jop,fltarg, cast(block *) targ);             // Jx targ
3229     genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce);   // Jy nop
3230 
3231     switch (e2.Eoper)
3232     {
3233         default:
3234         L2:
3235             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
3236             rretregs = ALLREGS & ~retregs;
3237             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
3238             cse_flush(cdb,1);
3239             // Compare MSW, if they're equal then compare the LSW
3240             reg = findregmsw(retregs);
3241             rreg = findregmsw(rretregs);
3242             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
3243             cdb.append(cdbjmp);
3244 
3245             reg = findreglsw(retregs);
3246             rreg = findreglsw(rretregs);
3247             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
3248             break;
3249 
3250         case OPconst:
3251             cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);            // MSW first
3252             cs.IFL2 = FLconst;
3253             cs.Iop = 0x81;
3254 
3255             /* if ((e1 is data or a '*' reference) and it's not a
3256              * common subexpression
3257              */
3258 
3259             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
3260                  e1.Eoper == OPind) &&
3261                 !evalinregister(e1))
3262             {
3263                 getlvalue(cdb,&cs,e1,0);
3264                 freenode(e1);
3265                 if (evalinregister(e2))
3266                 {
3267                     retregs = idxregm(&cs);
3268                     if ((cs.Iflags & CFSEG) == CFes)
3269                             retregs |= mES;         // take no chances
3270                     rretregs = ALLREGS & ~retregs;
3271                     scodelem(cdb,e2,&rretregs,retregs,true);
3272                     cse_flush(cdb,1);
3273                     rreg = findregmsw(rretregs);
3274                     cs.Iop = 0x39;
3275                     cs.Irm |= modregrm(0,rreg,0);
3276                     getlvalue_msw(&cs);
3277                     cdb.gen(&cs);           // CMP EA+2,rreg
3278                     cdb.append(cdbjmp);
3279                     rreg = findreglsw(rretregs);
3280                     NEWREG(cs.Irm,rreg);
3281                 }
3282                 else
3283                 {
3284                     cse_flush(cdb,1);
3285                     cs.Irm |= modregrm(0,7,0);
3286                     getlvalue_msw(&cs);
3287                     cdb.gen(&cs);           // CMP EA+2,const
3288                     cdb.append(cdbjmp);
3289                     cs.IEV2.Vint = e2.EV.Vlong;
3290                     freenode(e2);
3291                 }
3292                 getlvalue_lsw(&cs);
3293                 cdb.gen(&cs);                   // CMP EA,rreg/const
3294                 break;
3295             }
3296             if (evalinregister(e2))
3297                 goto L2;
3298 
3299             scodelem(cdb,e1,&retregs,0,true);    // compute left leaf
3300             cse_flush(cdb,1);
3301             reg = findregmsw(retregs);              // get reg that e1 is in
3302             cs.Irm = modregrm(3,7,reg);
3303 
3304             cdb.gen(&cs);                           // CMP reg,MSW
3305             cdb.append(cdbjmp);
3306             reg = findreglsw(retregs);
3307             cs.Irm = modregrm(3,7,reg);
3308             cs.IEV2.Vint = e2.EV.Vlong;
3309             cdb.gen(&cs);                           // CMP sucreg,LSW
3310             freenode(e2);
3311             break;
3312 
3313         case OPvar:
3314             if (!e1.Ecount && e1.Eoper == OPs32_64)
3315             {
3316                 reg_t msreg;
3317 
3318                 retregs = allregs;
3319                 scodelem(cdb,e1.EV.E1,&retregs,0,true);
3320                 freenode(e1);
3321                 reg = findreg(retregs);
3322                 retregs = allregs & ~retregs;
3323                 allocreg(cdb,&retregs,&msreg,TYint);
3324                 genmovreg(cdb,msreg,reg);                  // MOV msreg,reg
3325                 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3326                 cse_flush(cdb,1);
3327                 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0);
3328                 cdb.append(cdbjmp);
3329                 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0);
3330                 freenode(e2);
3331             }
3332             else
3333             {
3334                 scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
3335                 cse_flush(cdb,1);
3336                 reg = findregmsw(retregs);   // get reg that e1 is in
3337                 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0);
3338                 cdb.append(cdbjmp);
3339                 reg = findreglsw(retregs);
3340                 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0);
3341                 freenode(e2);
3342             }
3343             break;
3344     }
3345 
3346     jop = joplsw[op - OPle];
3347     if (!(jcond & 1)) jop ^= 1;                           // toggle jump condition
3348     genjmp(cdb,jop,fltarg,cast(block *) targ);   // Jcond targ
3349 
3350     cdb.append(ce);
3351     freenode(e);
3352 }
3353 
3354 /*****************************
3355  * Do conversions.
3356  * Depends on OPd_s32 and CLIB.dbllng being in sequence.
3357  */
3358 
3359 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
3360 {
3361     //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs));
3362     //elem_print(e);
3363 
3364     static immutable ubyte[2][16] clib =
3365     [
3366         [ OPd_s32,        CLIB.dbllng   ],
3367         [ OPs32_d,        CLIB.lngdbl   ],
3368         [ OPd_s16,        CLIB.dblint   ],
3369         [ OPs16_d,        CLIB.intdbl   ],
3370         [ OPd_u16,        CLIB.dbluns   ],
3371         [ OPu16_d,        CLIB.unsdbl   ],
3372         [ OPd_u32,        CLIB.dblulng  ],
3373         [ OPu32_d,        CLIB.ulngdbl  ],
3374         [ OPd_s64,        CLIB.dblllng  ],
3375         [ OPs64_d,        CLIB.llngdbl  ],
3376         [ OPd_u64,        CLIB.dblullng ],
3377         [ OPu64_d,        CLIB.ullngdbl ],
3378         [ OPd_f,          CLIB.dblflt   ],
3379         [ OPf_d,          CLIB.fltdbl   ],
3380         [ OPvp_fp,        CLIB.vptrfptr ],
3381         [ OPcvp_fp,       CLIB.cvptrfptr]
3382     ];
3383 
3384     if (!*pretregs)
3385     {
3386         codelem(cdb,e.EV.E1,pretregs,false);
3387         return;
3388     }
3389 
3390     regm_t retregs;
3391     if (config.inline8087)
3392     {
3393         switch (e.Eoper)
3394         {
3395             case OPld_d:
3396             case OPd_ld:
3397             {
3398                 if (tycomplex(e.EV.E1.Ety))
3399                 {
3400             Lcomplex:
3401                     regm_t retregsx = mST01 | (*pretregs & mPSW);
3402                     codelem(cdb,e.EV.E1, &retregsx, false);
3403                     fixresult_complex87(cdb, e, retregsx, pretregs);
3404                     return;
3405                 }
3406                 regm_t retregsx = mST0 | (*pretregs & mPSW);
3407                 codelem(cdb,e.EV.E1, &retregsx, false);
3408                 fixresult87(cdb, e, retregsx, pretregs);
3409                 return;
3410             }
3411 
3412             case OPf_d:
3413             case OPd_f:
3414                 if (tycomplex(e.EV.E1.Ety))
3415                     goto Lcomplex;
3416                 if (config.fpxmmregs && *pretregs & XMMREGS)
3417                 {
3418                     xmmcnvt(cdb, e, pretregs);
3419                     return;
3420                 }
3421 
3422                 /* if won't do us much good to transfer back and        */
3423                 /* forth between 8088 registers and 8087 registers      */
3424                 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs))
3425                 {
3426                     retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety);
3427                     if (retregs & (mXMM1 | mXMM0 |mST01 | mST0))       // if return in ST0
3428                     {
3429                         codelem(cdb,e.EV.E1,pretregs,false);
3430                         if (*pretregs & mST0)
3431                             note87(e, 0, 0);
3432                         return;
3433                     }
3434                     else
3435                         break;
3436                 }
3437                 goto Lload87;
3438 
3439             case OPs64_d:
3440                 if (!I64)
3441                     goto Lload87;
3442                 goto case OPs32_d;
3443 
3444             case OPs32_d:
3445                 if (config.fpxmmregs && *pretregs & XMMREGS)
3446                 {
3447                     xmmcnvt(cdb, e, pretregs);
3448                     return;
3449                 }
3450                 goto Lload87;
3451 
3452             case OPs16_d:
3453             case OPu16_d:
3454             Lload87:
3455                 load87(cdb,e,0,pretregs,null,-1);
3456                 return;
3457 
3458             case OPu32_d:
3459                 if (I64 && config.fpxmmregs && *pretregs & XMMREGS)
3460                 {
3461                     xmmcnvt(cdb,e,pretregs);
3462                     return;
3463                 }
3464                 else if (!I16)
3465                 {
3466                     regm_t retregsx = ALLREGS;
3467                     codelem(cdb,e.EV.E1, &retregsx, false);
3468                     reg_t reg = findreg(retregsx);
3469                     cdb.genfltreg(STO, reg, 0);
3470                     regwithvalue(cdb,ALLREGS,0,&reg,0);
3471                     cdb.genfltreg(STO, reg, 4);
3472 
3473                     push87(cdb);
3474                     cdb.genfltreg(0xDF,5,0);     // FILD m64int
3475 
3476                     regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/;
3477                     fixresult87(cdb, e, retregsy, pretregs);
3478                     return;
3479                 }
3480                 break;
3481 
3482             case OPd_s64:
3483                 if (!I64)
3484                     goto Lcnvt87;
3485                 goto case OPd_s32;
3486 
3487             case OPd_s32:
3488                 if (config.fpxmmregs)
3489                 {
3490                     xmmcnvt(cdb,e,pretregs);
3491                     return;
3492                 }
3493                 goto Lcnvt87;
3494 
3495             case OPd_s16:
3496             case OPd_u16:
3497             Lcnvt87:
3498                 cnvt87(cdb,e,pretregs);
3499                 return;
3500 
3501             case OPd_u32:               // use subroutine, not 8087
3502                 if (I64 && config.fpxmmregs)
3503                 {
3504                     xmmcnvt(cdb,e,pretregs);
3505                     return;
3506                 }
3507                 if (I32 || I64)
3508                 {
3509                     cdd_u32(cdb,e,pretregs);
3510                     return;
3511                 }
3512                 if (config.exe & EX_posix)
3513                 {
3514                     retregs = mST0;
3515                 }
3516                 else
3517                 {
3518                     retregs = DOUBLEREGS;
3519                 }
3520                 goto L1;
3521 
3522             case OPd_u64:
3523                 if (I32 || I64)
3524                 {
3525                     cdd_u64(cdb,e,pretregs);
3526                     return;
3527                 }
3528                 retregs = DOUBLEREGS;
3529                 goto L1;
3530 
3531             case OPu64_d:
3532                 if (*pretregs & mST0)
3533                 {
3534                     regm_t retregsx = I64 ? mAX : mAX|mDX;
3535                     codelem(cdb,e.EV.E1,&retregsx,false);
3536                     callclib(cdb,e,CLIB.u64_ldbl,pretregs,0);
3537                     return;
3538                 }
3539                 break;
3540 
3541             case OPld_u64:
3542             {
3543                 if (I32 || I64)
3544                 {
3545                     cdd_u64(cdb,e,pretregs);
3546                     return;
3547                 }
3548                 regm_t retregsx = mST0;
3549                 codelem(cdb,e.EV.E1,&retregsx,false);
3550                 callclib(cdb,e,CLIB.ld_u64,pretregs,0);
3551                 return;
3552             }
3553 
3554             default:
3555                 break;
3556         }
3557     }
3558     retregs = regmask(e.EV.E1.Ety, TYnfunc);
3559 L1:
3560     codelem(cdb,e.EV.E1,&retregs,false);
3561     for (int i = 0; 1; i++)
3562     {
3563         assert(i < clib.length);
3564         if (clib[i][0] == e.Eoper)
3565         {
3566             callclib(cdb,e,clib[i][1],pretregs,0);
3567             break;
3568         }
3569     }
3570 }
3571 
3572 
3573 /***************************
3574  * Convert short to long.
3575  * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64,
3576  * OPu64_128, OPs64_128
3577  */
3578 
3579 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3580 {
3581     reg_t reg;
3582     regm_t retregs;
3583 
3584     //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3585     int e1comsub = e.EV.E1.Ecount;
3586     ubyte op = e.Eoper;
3587     if ((*pretregs & (ALLREGS | mBP)) == 0)    // if don't need result in regs
3588     {
3589         codelem(cdb,e.EV.E1,pretregs,false);     // then conversion isn't necessary
3590         return;
3591     }
3592     else if (
3593              op == OPnp_fp ||
3594              (I16 && op == OPu16_32) ||
3595              (I32 && op == OPu32_64)
3596             )
3597     {
3598         /* Result goes into a register pair.
3599          * Zero extend by putting a zero into most significant reg.
3600          */
3601 
3602         regm_t retregsx = *pretregs & mLSW;
3603         assert(retregsx);
3604         tym_t tym1 = tybasic(e.EV.E1.Ety);
3605         codelem(cdb,e.EV.E1,&retregsx,false);
3606 
3607         regm_t regm = *pretregs & (mMSW & ALLREGS);
3608         if (regm == 0)                  // *pretregs could be mES
3609             regm = mMSW & ALLREGS;
3610         allocreg(cdb,&regm,&reg,TYint);
3611         if (e1comsub)
3612             getregs(cdb,retregsx);
3613         if (op == OPnp_fp)
3614         {
3615             int segreg;
3616 
3617             // BUG: what about pointers to functions?
3618             switch (tym1)
3619             {
3620                 case TYimmutPtr:
3621                 case TYnptr:    segreg = SEG_DS;        break;
3622                 case TYcptr:    segreg = SEG_CS;        break;
3623                 case TYsptr:    segreg = SEG_SS;        break;
3624                 default:        assert(0);
3625             }
3626             cdb.gen2(0x8C,modregrm(3,segreg,reg));  // MOV reg,segreg
3627         }
3628         else
3629             movregconst(cdb,reg,0,0);  // 0 extend
3630 
3631         fixresult(cdb,e,retregsx | regm,pretregs);
3632         return;
3633     }
3634     else if (I64 && op == OPu32_64)
3635     {
3636         elem *e1 = e.EV.E1;
3637         retregs = *pretregs;
3638         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3639         {
3640             code cs;
3641 
3642             allocreg(cdb,&retregs,&reg,TYint);
3643             loadea(cdb,e1,&cs,LOD,reg,0,retregs,retregs);  //  MOV Ereg,EA
3644             freenode(e1);
3645         }
3646         else
3647         {
3648             *pretregs &= ~mPSW;                 // flags are set by eval of e1
3649             codelem(cdb,e1,&retregs,false);
3650             /* Determine if high 32 bits are already 0
3651              */
3652             if (e1.Eoper == OPu16_32 && !e1.Ecount)
3653             {
3654             }
3655             else
3656             {
3657                 // Zero high 32 bits
3658                 getregs(cdb,retregs);
3659                 reg = findreg(retregs);
3660                 // Don't use x89 because that will get optimized away
3661                 genregs(cdb,LOD,reg,reg);  // MOV Ereg,Ereg
3662             }
3663         }
3664         fixresult(cdb,e,retregs,pretregs);
3665         return;
3666     }
3667     else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount)
3668     {
3669         /* Due to how e1 is calculated, the high 32 bits of the register
3670          * are already 0.
3671          */
3672         retregs = *pretregs;
3673         codelem(cdb,e.EV.E1,&retregs,false);
3674         fixresult(cdb,e,retregs,pretregs);
3675         return;
3676     }
3677     else if (!I16 && (op == OPs16_32 || op == OPu16_32) ||
3678               I64 && op == OPs32_64)
3679     {
3680         elem *e11;
3681         elem *e1 = e.EV.E1;
3682 
3683         if (e1.Eoper == OPu8_16 && !e1.Ecount &&
3684             ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount))
3685            )
3686         {
3687             code cs;
3688 
3689             retregs = *pretregs & BYTEREGS;
3690             if (!retregs)
3691                 retregs = BYTEREGS;
3692             allocreg(cdb,&retregs,&reg,TYint);
3693             movregconst(cdb,reg,0,0);                   //  XOR reg,reg
3694             loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs);  //  MOV regL,EA
3695             freenode(e11);
3696             freenode(e1);
3697         }
3698         else if (e1.Eoper == OPvar ||
3699             (e1.Eoper == OPind && !e1.Ecount))
3700         {
3701             code cs = void;
3702 
3703             if (I32 && op == OPu16_32 && config.flags4 & CFG4speed)
3704                 goto L2;
3705             retregs = *pretregs;
3706             allocreg(cdb,&retregs,&reg,TYint);
3707             const opcode = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,EA
3708             if (op == OPs32_64)
3709             {
3710                 assert(I64);
3711                 // MOVSXD reg,e1
3712                 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs);
3713                 code_orrex(cdb.last(), REX_W);
3714             }
3715             else
3716                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs);
3717             freenode(e1);
3718         }
3719         else
3720         {
3721         L2:
3722             retregs = *pretregs;
3723             if (op == OPs32_64)
3724                 retregs = mAX | (*pretregs & mPSW);
3725             *pretregs &= ~mPSW;             // flags are already set
3726             CodeBuilder cdbx;
3727             cdbx.ctor();
3728             codelem(cdbx,e1,&retregs,false);
3729             code *cx = cdbx.finish();
3730             cdb.append(cdbx);
3731             getregs(cdb,retregs);
3732             if (op == OPu16_32 && cx)
3733             {
3734                 cx = code_last(cx);
3735                 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) &&
3736                     mask(cx.Irm & 7) == retregs)
3737                 {
3738                     // Convert AND of a word to AND of a dword, zeroing upper word
3739                     if (cx.Irex & REX_B)
3740                         retregs = mask(8 | (cx.Irm & 7));
3741                     cx.Iflags &= ~CFopsize;
3742                     cx.IEV2.Vint &= 0xFFFF;
3743                     goto L1;
3744                 }
3745             }
3746             if (op == OPs16_32 && retregs == mAX)
3747                 cdb.gen1(0x98);         // CWDE
3748             else if (op == OPs32_64 && retregs == mAX)
3749             {
3750                 cdb.gen1(0x98);         // CDQE
3751                 code_orrex(cdb.last(), REX_W);
3752             }
3753             else
3754             {
3755                 reg = findreg(retregs);
3756                 if (config.flags4 & CFG4speed && op == OPu16_32)
3757                 {   // AND reg,0xFFFF
3758                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu);
3759                 }
3760                 else
3761                 {
3762                     opcode_t iop = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,reg
3763                     genregs(cdb,iop,reg,reg);
3764                 }
3765             }
3766          L1:
3767             if (e1comsub)
3768                 getregs(cdb,retregs);
3769         }
3770         fixresult(cdb,e,retregs,pretregs);
3771         return;
3772     }
3773     else if (*pretregs & mPSW || config.target_cpu < TARGET_80286)
3774     {
3775         // OPs16_32, OPs32_64
3776         // CWD doesn't affect flags, so we can depend on the integer
3777         // math to provide the flags.
3778         retregs = mAX | mPSW;               // want integer result in AX
3779         *pretregs &= ~mPSW;                 // flags are already set
3780         codelem(cdb,e.EV.E1,&retregs,false);
3781         getregs(cdb,mDX);           // sign extend into DX
3782         cdb.gen1(0x99);                     // CWD/CDQ
3783         if (e1comsub)
3784             getregs(cdb,retregs);
3785         fixresult(cdb,e,mDX | retregs,pretregs);
3786         return;
3787     }
3788     else
3789     {
3790         // OPs16_32, OPs32_64
3791         uint msreg,lsreg;
3792 
3793         retregs = *pretregs & mLSW;
3794         assert(retregs);
3795         codelem(cdb,e.EV.E1,&retregs,false);
3796         retregs |= *pretregs & mMSW;
3797         allocreg(cdb,&retregs,&reg,e.Ety);
3798         msreg = findregmsw(retregs);
3799         lsreg = findreglsw(retregs);
3800         genmovreg(cdb,msreg,lsreg);                // MOV msreg,lsreg
3801         assert(config.target_cpu >= TARGET_80286);              // 8088 can't handle SAR reg,imm8
3802         cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3803         fixresult(cdb,e,retregs,pretregs);
3804         return;
3805     }
3806 }
3807 
3808 
3809 /***************************
3810  * Convert byte to int.
3811  * For OPu8_16 and OPs8_16.
3812  */
3813 
3814 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3815 {
3816     regm_t retregs;
3817     char size;
3818 
3819     if ((*pretregs & (ALLREGS | mBP)) == 0)     // if don't need result in regs
3820     {
3821         codelem(cdb,e.EV.E1,pretregs,false);      // then conversion isn't necessary
3822         return;
3823     }
3824 
3825     //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
3826     char op = e.Eoper;
3827     elem *e1 = e.EV.E1;
3828     if (e1.Eoper == OPcomma)
3829         docommas(cdb,&e1);
3830     if (!I16)
3831     {
3832         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3833         {
3834             code cs;
3835 
3836             regm_t retregsx = *pretregs;
3837             reg_t reg;
3838             allocreg(cdb,&retregsx,&reg,TYint);
3839             if (config.flags4 & CFG4speed &&
3840                 op == OPu8_16 && mask(reg) & BYTEREGS &&
3841                 config.target_cpu < TARGET_PentiumPro)
3842             {
3843                 movregconst(cdb,reg,0,0);                 //  XOR reg,reg
3844                 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); //  MOV regL,EA
3845             }
3846             else
3847             {
3848                 const opcode = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,EA
3849                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx);
3850             }
3851             freenode(e1);
3852             fixresult(cdb,e,retregsx,pretregs);
3853             return;
3854         }
3855         size = tysize(e.Ety);
3856         retregs = *pretregs & BYTEREGS;
3857         if (retregs == 0)
3858             retregs = BYTEREGS;
3859         retregs |= *pretregs & mPSW;
3860         *pretregs &= ~mPSW;
3861     }
3862     else
3863     {
3864         if (op == OPu8_16)              // if uint conversion
3865         {
3866             retregs = *pretregs & BYTEREGS;
3867             if (retregs == 0)
3868                 retregs = BYTEREGS;
3869         }
3870         else
3871         {
3872             // CBW doesn't affect flags, so we can depend on the integer
3873             // math to provide the flags.
3874             retregs = mAX | (*pretregs & mPSW); // want integer result in AX
3875         }
3876     }
3877 
3878     CodeBuilder cdb1;
3879     cdb1.ctor();
3880     codelem(cdb1,e1,&retregs,false);
3881     code *c1 = cdb1.finish();
3882     cdb.append(cdb1);
3883     reg_t reg = findreg(retregs);
3884     code *c;
3885     if (!c1)
3886         goto L1;
3887 
3888     // If previous instruction is an AND bytereg,value
3889     c = cdb.last();
3890     if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) &&
3891         (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0))
3892     {
3893         if (*pretregs & mPSW)
3894             c.Iflags |= CFpsw;
3895         c.Iop |= 1;                    // convert to word operation
3896         c.IEV2.Vuns &= 0xFF;           // dump any high order bits
3897         *pretregs &= ~mPSW;             // flags already set
3898     }
3899     else
3900     {
3901      L1:
3902         if (!I16)
3903         {
3904             if (op == OPs8_16 && reg == AX && size == 2)
3905             {
3906                 cdb.gen1(0x98);                  // CBW
3907                 cdb.last().Iflags |= CFopsize;  // don't do a CWDE
3908             }
3909             else
3910             {
3911                 // We could do better by not forcing the src and dst
3912                 // registers to be the same.
3913 
3914                 if (config.flags4 & CFG4speed && op == OPu8_16)
3915                 {   // AND reg,0xFF
3916                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFF);
3917                 }
3918                 else
3919                 {
3920                     opcode_t iop = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,reg
3921                     genregs(cdb,iop,reg,reg);
3922                     if (I64 && reg >= 4)
3923                         code_orrex(cdb.last(), REX);
3924                 }
3925             }
3926         }
3927         else
3928         {
3929             if (op == OPu8_16)
3930                 genregs(cdb,0x30,reg+4,reg+4);  // XOR regH,regH
3931             else
3932             {
3933                 cdb.gen1(0x98);                 // CBW
3934                 *pretregs &= ~mPSW;             // flags already set
3935             }
3936         }
3937     }
3938     getregs(cdb,retregs);
3939     fixresult(cdb,e,retregs,pretregs);
3940 }
3941 
3942 
3943 /***************************
3944  * Convert long to short (OP32_16).
3945  * Get offset of far pointer (OPoffset).
3946  * Convert int to byte (OP16_8).
3947  * Convert long long to long (OP64_32).
3948  * OP128_64
3949  */
3950 
3951 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3952 {
3953     debug
3954     {
3955         switch (e.Eoper)
3956         {
3957             case OP32_16:
3958             case OPoffset:
3959             case OP16_8:
3960             case OP64_32:
3961             case OP128_64:
3962                 break;
3963 
3964             default:
3965                 assert(0);
3966         }
3967     }
3968 
3969     regm_t retregs;
3970     if (e.Eoper == OP16_8)
3971     {
3972         retregs = *pretregs ? BYTEREGS : 0;
3973         codelem(cdb,e.EV.E1,&retregs,false);
3974     }
3975     else
3976     {
3977         if (e.EV.E1.Eoper == OPrelconst)
3978             offsetinreg(cdb,e.EV.E1,&retregs);
3979         else
3980         {
3981             retregs = *pretregs ? ALLREGS : 0;
3982             codelem(cdb,e.EV.E1,&retregs,false);
3983             bool isOff = e.Eoper == OPoffset;
3984             if (I16 ||
3985                 I32 && (isOff || e.Eoper == OP64_32) ||
3986                 I64 && (isOff || e.Eoper == OP128_64))
3987                 retregs &= mLSW;                // want LSW only
3988         }
3989     }
3990 
3991     /* We "destroy" a reg by assigning it the result of a new e, even
3992      * though the values are the same. Weakness of our CSE strategy that
3993      * a register can only hold the contents of one elem at a time.
3994      */
3995     if (e.Ecount)
3996         getregs(cdb,retregs);
3997     else
3998         useregs(retregs);
3999 
4000     debug
4001     if (!(!*pretregs || retregs))
4002     {
4003         WROP(e.Eoper),
4004         printf(" *pretregs = %s, retregs = %s, e = %p\n",regm_str(*pretregs),regm_str(retregs),e);
4005     }
4006 
4007     assert(!*pretregs || retregs);
4008     fixresult(cdb,e,retregs,pretregs);  // lsw only
4009 }
4010 
4011 /**********************************************
4012  * Get top 32 bits of 64 bit value (I32)
4013  * or top 16 bits of 32 bit value (I16)
4014  * or top 64 bits of 128 bit value (I64).
4015  * OPmsw
4016  */
4017 
4018 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4019 {
4020     assert(e.Eoper == OPmsw);
4021 
4022     regm_t retregs = *pretregs ? ALLREGS : 0;
4023     codelem(cdb,e.EV.E1,&retregs,false);
4024     retregs &= mMSW;                    // want MSW only
4025 
4026     /* We "destroy" a reg by assigning it the result of a new e, even
4027      * though the values are the same. Weakness of our CSE strategy that
4028      * a register can only hold the contents of one elem at a time.
4029      */
4030     if (e.Ecount)
4031         getregs(cdb,retregs);
4032     else
4033         useregs(retregs);
4034 
4035     debug
4036     if (!(!*pretregs || retregs))
4037     {   WROP(e.Eoper);
4038         printf(" *pretregs = %s, retregs = %s\n",regm_str(*pretregs),regm_str(retregs));
4039         elem_print(e);
4040     }
4041 
4042     assert(!*pretregs || retregs);
4043     fixresult(cdb,e,retregs,pretregs);  // msw only
4044 }
4045 
4046 
4047 
4048 /******************************
4049  * Handle operators OPinp and OPoutp.
4050  */
4051 
4052 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4053 {
4054     //printf("cdport\n");
4055     ubyte op = 0xE4;            // root of all IN/OUT opcodes
4056     elem *e1 = e.EV.E1;
4057 
4058     // See if we can use immediate mode of IN/OUT opcodes
4059     ubyte port;
4060     if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 &&
4061         (!evalinregister(e1) || regcon.mvar & mDX))
4062     {
4063         port = cast(ubyte)e1.EV.Vuns;
4064         freenode(e1);
4065     }
4066     else
4067     {
4068         regm_t retregs = mDX;           // port number is always DX
4069         codelem(cdb,e1,&retregs,false);
4070         op |= 0x08;                     // DX version of opcode
4071         port = 0;                       // not logically needed, but
4072                                         // quiets "uninitialized var" complaints
4073     }
4074 
4075     uint sz;
4076     if (e.Eoper == OPoutp)
4077     {
4078         sz = tysize(e.EV.E2.Ety);
4079         regm_t retregs = mAX;           // byte/word to output is in AL/AX
4080         scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true);
4081         op |= 0x02;                     // OUT opcode
4082     }
4083     else // OPinp
4084     {
4085         getregs(cdb,mAX);
4086         sz = tysize(e.Ety);
4087     }
4088 
4089     if (sz != 1)
4090         op |= 1;                        // word operation
4091     cdb.genc2(op,0,port);               // IN/OUT AL/AX,DX/port
4092     if (op & 1 && sz != REGSIZE)        // if need size override
4093         cdb.last().Iflags |= CFopsize;
4094     regm_t retregs = mAX;
4095     fixresult(cdb,e,retregs,pretregs);
4096 }
4097 
4098 /************************
4099  * Generate code for an asm elem.
4100  */
4101 
4102 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4103 {
4104     // Assume only regs normally destroyed by a function are destroyed
4105     getregs(cdb,(ALLREGS | mES) & ~fregsaved);
4106     cdb.genasm(cast(char *)e.EV.Vstring, cast(uint) e.EV.Vstrlen);
4107     fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs);
4108 }
4109 
4110 /************************
4111  * Generate code for OPnp_f16p and OPf16p_np.
4112  */
4113 
4114 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4115 {
4116     code *cnop;
4117     code cs;
4118 
4119     assert(I32);
4120     codelem(cdb,e.EV.E1,pretregs,false);
4121     reg_t reg = findreg(*pretregs);
4122     getregs(cdb,*pretregs);      // we will destroy the regs
4123 
4124     cs.Iop = 0xC1;
4125     cs.Irm = modregrm(3,0,reg);
4126     cs.Iflags = 0;
4127     cs.Irex = 0;
4128     cs.IFL2 = FLconst;
4129     cs.IEV2.Vuns = 16;
4130 
4131     cdb.gen(&cs);                       // ROL ereg,16
4132     cs.Irm |= modregrm(0,1,0);
4133     cdb.gen(&cs);                       // ROR ereg,16
4134     cs.IEV2.Vuns = 3;
4135     cs.Iflags |= CFopsize;
4136 
4137     if (e.Eoper == OPnp_f16p)
4138     {
4139         /*      OR  ereg,ereg
4140                 JE  L1
4141                 ROR ereg,16
4142                 SHL reg,3
4143                 MOV rx,SS
4144                 AND rx,3                ;mask off CPL bits
4145                 OR  rl,4                ;run on LDT bit
4146                 OR  regl,rl
4147                 ROL ereg,16
4148             L1: NOP
4149          */
4150         reg_t rx;
4151 
4152         regm_t retregs = BYTEREGS & ~*pretregs;
4153         allocreg(cdb,&retregs,&rx,TYint);
4154         cnop = gennop(null);
4155         int jop = JCXZ;
4156         if (reg != CX)
4157         {
4158             gentstreg(cdb,reg);
4159             jop = JE;
4160         }
4161         genjmp(cdb,jop,FLcode, cast(block *)cnop);  // Jop L1
4162         NEWREG(cs.Irm,4);
4163         cdb.gen(&cs);                                   // SHL reg,3
4164         genregs(cdb,0x8C,2,rx);            // MOV rx,SS
4165         int isbyte = (mask(reg) & BYTEREGS) == 0;
4166         cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3);      // AND rl,3
4167         cdb.genc2(0x80,modregrm(3,1,rx),4);             // OR  rl,4
4168         genregs(cdb,0x0A | isbyte,reg,rx);   // OR  regl,rl
4169     }
4170     else // OPf16p_np
4171     {
4172         /*      ROR ereg,16
4173                 SHR reg,3
4174                 ROL ereg,16
4175          */
4176 
4177         cs.Irm |= modregrm(0,5,0);
4178         cdb.gen(&cs);                                   // SHR reg,3
4179         cnop = null;
4180     }
4181 }
4182 
4183 /*************************
4184  * Generate code for OPbtst
4185  */
4186 
4187 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4188 {
4189     regm_t retregs;
4190     reg_t reg;
4191 
4192     //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
4193 
4194     opcode_t op = 0xA3;                        // BT EA,value
4195     int mode = 4;
4196 
4197     elem *e1 = e.EV.E1;
4198     elem *e2 = e.EV.E2;
4199     code cs;
4200     cs.Iflags = 0;
4201 
4202     if (*pretregs == 0)                   // if don't want result
4203     {
4204         codelem(cdb,e1,pretregs,false);  // eval left leaf
4205         *pretregs = 0;                    // in case they got set
4206         codelem(cdb,e2,pretregs,false);
4207         return;
4208     }
4209 
4210     regm_t idxregs;
4211     if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar)
4212     {
4213         getlvalue(cdb, &cs, e1, RMload);    // get addressing mode
4214         idxregs = idxregm(&cs);             // mask if index regs used
4215     }
4216     else
4217     {
4218         retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs;
4219         codelem(cdb,e1, &retregs, false);
4220         reg = findreg(retregs);
4221         cs.Irm = modregrm(3,0,reg & 7);
4222         cs.Iflags = 0;
4223         cs.Irex = 0;
4224         if (reg & 8)
4225             cs.Irex |= REX_B;
4226         idxregs = retregs;
4227     }
4228 
4229     tym_t ty1 = tybasic(e1.Ety);
4230     const sz = tysize(e1.Ety);
4231     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
4232 
4233 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
4234     if (e2.Eoper == OPconst)
4235     {
4236         cs.Iop = 0x0FBA;                         // BT rm,imm8
4237         cs.Irm |= modregrm(0,mode,0);
4238         cs.Iflags |= CFpsw | word;
4239         cs.IFL2 = FLconst;
4240         if (sz <= SHORTSIZE)
4241         {
4242             cs.IEV2.Vint = e2.EV.Vint & 15;
4243         }
4244         else if (sz == 4)
4245         {
4246             cs.IEV2.Vint = e2.EV.Vint & 31;
4247         }
4248         else
4249         {
4250             cs.IEV2.Vint = e2.EV.Vint & 63;
4251             if (I64)
4252                 cs.Irex |= REX_W;
4253         }
4254         cdb.gen(&cs);
4255     }
4256     else
4257     {
4258         retregs = ALLREGS & ~idxregs;
4259 
4260         /* A register variable may not have its upper 32
4261          * bits 0, so pick a different register to force
4262          * a MOV which will clear it
4263          */
4264         if (I64 && sz == 8 && tysize(e2.Ety) == 4)
4265         {
4266             regm_t rregm;
4267             if (isregvar(e2, &rregm, null))
4268                 retregs &= ~rregm;
4269         }
4270 
4271         scodelem(cdb,e2,&retregs,idxregs,true);
4272         reg = findreg(retregs);
4273 
4274         cs.Iop = 0x0F00 | op;                     // BT rm,reg
4275         code_newreg(&cs,reg);
4276         cs.Iflags |= CFpsw | word;
4277         if (I64 && _tysize[ty1] == 8)
4278             cs.Irex |= REX_W;
4279         cdb.gen(&cs);
4280     }
4281 
4282     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4283     {
4284         if (tysize(e.Ety) == 1)
4285         {
4286             assert(I64 || retregs & BYTEREGS);
4287             allocreg(cdb,&retregs,&reg,TYint);
4288             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
4289             if (I64 && reg >= 4)
4290                 code_orrex(cdb.last(), REX);
4291             *pretregs = retregs;
4292         }
4293         else
4294         {
4295             code *cnop = null;
4296             regm_t save = regcon.immed.mval;
4297             allocreg(cdb,&retregs,&reg,TYint);
4298             regcon.immed.mval = save;
4299             if ((*pretregs & mPSW) == 0)
4300             {
4301                 getregs(cdb,retregs);
4302                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
4303                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
4304             }
4305             else
4306             {
4307                 movregconst(cdb,reg,1,8);      // MOV reg,1
4308                 cnop = gennop(null);
4309                 genjmp(cdb,JC,FLcode, cast(block *) cnop);  // Jtrue nop
4310                                                             // MOV reg,0
4311                 movregconst(cdb,reg,0,8);
4312                 regcon.immed.mval &= ~mask(reg);
4313             }
4314             *pretregs = retregs;
4315             cdb.append(cnop);
4316         }
4317     }
4318 }
4319 
4320 /*************************
4321  * Generate code for OPbt, OPbtc, OPbtr, OPbts
4322  */
4323 
4324 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
4325 {
4326     //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs));
4327     regm_t retregs;
4328     reg_t reg;
4329     opcode_t op;
4330     int mode;
4331 
4332     switch (e.Eoper)
4333     {
4334         case OPbt:      op = 0xA3; mode = 4; break;
4335         case OPbtc:     op = 0xBB; mode = 7; break;
4336         case OPbtr:     op = 0xB3; mode = 6; break;
4337         case OPbts:     op = 0xAB; mode = 5; break;
4338 
4339         default:
4340             assert(0);
4341     }
4342 
4343     elem *e1 = e.EV.E1;
4344     elem *e2 = e.EV.E2;
4345     code cs;
4346     cs.Iflags = 0;
4347 
4348     getlvalue(cdb, &cs, e, RMload);      // get addressing mode
4349     if (e.Eoper == OPbt && *pretregs == 0)
4350     {
4351         codelem(cdb,e2,pretregs,false);
4352         return;
4353     }
4354 
4355     const ty1 = tybasic(e1.Ety);
4356     const ty2 = tybasic(e2.Ety);
4357     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
4358     regm_t idxregs = idxregm(&cs);         // mask if index regs used
4359 
4360 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
4361     if (e2.Eoper == OPconst)
4362     {
4363         cs.Iop = 0x0FBA;                         // BT rm,imm8
4364         cs.Irm |= modregrm(0,mode,0);
4365         cs.Iflags |= CFpsw | word;
4366         cs.IFL2 = FLconst;
4367         if (_tysize[ty1] == SHORTSIZE)
4368         {
4369             cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3;
4370             cs.IEV2.Vint = e2.EV.Vint & 15;
4371         }
4372         else if (_tysize[ty1] == 4)
4373         {
4374             cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3;
4375             cs.IEV2.Vint = e2.EV.Vint & 31;
4376         }
4377         else
4378         {
4379             cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3;
4380             cs.IEV2.Vint = e2.EV.Vint & 63;
4381             if (I64)
4382                 cs.Irex |= REX_W;
4383         }
4384         cdb.gen(&cs);
4385     }
4386     else
4387     {
4388         retregs = ALLREGS & ~idxregs;
4389         scodelem(cdb,e2,&retregs,idxregs,true);
4390         reg = findreg(retregs);
4391 
4392         cs.Iop = 0x0F00 | op;                     // BT rm,reg
4393         code_newreg(&cs,reg);
4394         cs.Iflags |= CFpsw | word;
4395         if (_tysize[ty2] == 8 && I64)
4396             cs.Irex |= REX_W;
4397         cdb.gen(&cs);
4398     }
4399 
4400     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4401     {
4402         if (_tysize[e.Ety] == 1)
4403         {
4404             assert(I64 || retregs & BYTEREGS);
4405             allocreg(cdb,&retregs,&reg,TYint);
4406             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
4407             if (I64 && reg >= 4)
4408                 code_orrex(cdb.last(), REX);
4409             *pretregs = retregs;
4410         }
4411         else
4412         {
4413             code *cnop = null;
4414             const save = regcon.immed.mval;
4415             allocreg(cdb,&retregs,&reg,TYint);
4416             regcon.immed.mval = save;
4417             if ((*pretregs & mPSW) == 0)
4418             {
4419                 getregs(cdb,retregs);
4420                 genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
4421                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
4422             }
4423             else
4424             {
4425                 movregconst(cdb,reg,1,8);      // MOV reg,1
4426                 cnop = gennop(null);
4427                 genjmp(cdb,JC,FLcode, cast(block *) cnop);    // Jtrue nop
4428                                                             // MOV reg,0
4429                 movregconst(cdb,reg,0,8);
4430                 regcon.immed.mval &= ~mask(reg);
4431             }
4432             *pretregs = retregs;
4433             cdb.append(cnop);
4434         }
4435     }
4436 }
4437 
4438 /*************************************
4439  * Generate code for OPbsf and OPbsr.
4440  */
4441 
4442 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4443 {
4444     //printf("cdbscan()\n");
4445     //elem_print(e);
4446     if (!*pretregs)
4447     {
4448         codelem(cdb,e.EV.E1,pretregs,false);
4449         return;
4450     }
4451 
4452     const tyml = tybasic(e.EV.E1.Ety);
4453     const sz = _tysize[tyml];
4454     assert(sz == 2 || sz == 4 || sz == 8);
4455     code cs = void;
4456 
4457     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
4458     {
4459         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
4460     }
4461     else
4462     {
4463         regm_t retregs = allregs;
4464         codelem(cdb,e.EV.E1, &retregs, false);
4465         const reg = findreg(retregs);
4466         cs.Irm = modregrm(3,0,reg & 7);
4467         cs.Iflags = 0;
4468         cs.Irex = 0;
4469         if (reg & 8)
4470             cs.Irex |= REX_B;
4471     }
4472 
4473     regm_t retregs = *pretregs & allregs;
4474     if  (!retregs)
4475         retregs = allregs;
4476     reg_t reg;
4477     allocreg(cdb,&retregs, &reg, e.Ety);
4478 
4479     cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD;        // BSF/BSR reg,EA
4480     code_newreg(&cs, reg);
4481     if (!I16 && sz == SHORTSIZE)
4482         cs.Iflags |= CFopsize;
4483     cdb.gen(&cs);
4484     if (sz == 8)
4485         code_orrex(cdb.last(), REX_W);
4486 
4487     fixresult(cdb,e,retregs,pretregs);
4488 }
4489 
4490 /************************
4491  * OPpopcnt operator
4492  */
4493 
4494 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
4495 {
4496     //printf("cdpopcnt()\n");
4497     //elem_print(e);
4498     assert(!I16);
4499     if (!*pretregs)
4500     {
4501         codelem(cdb,e.EV.E1,pretregs,false);
4502         return;
4503     }
4504 
4505     const tyml = tybasic(e.EV.E1.Ety);
4506 
4507     const sz = _tysize[tyml];
4508     assert(sz == 2 || sz == 4 || (sz == 8 && I64));     // no byte op
4509 
4510     code cs = void;
4511     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
4512     {
4513         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
4514     }
4515     else
4516     {
4517         regm_t retregs = allregs;
4518         codelem(cdb,e.EV.E1, &retregs, false);
4519         const reg = findreg(retregs);
4520         cs.Irm = modregrm(3,0,reg & 7);
4521         cs.Iflags = 0;
4522         cs.Irex = 0;
4523         if (reg & 8)
4524             cs.Irex |= REX_B;
4525     }
4526 
4527     regm_t retregs = *pretregs & allregs;
4528     if  (!retregs)
4529         retregs = allregs;
4530     reg_t reg;
4531     allocreg(cdb,&retregs, &reg, e.Ety);
4532 
4533     cs.Iop = POPCNT;            // POPCNT reg,EA
4534     code_newreg(&cs, reg);
4535     if (sz == SHORTSIZE)
4536         cs.Iflags |= CFopsize;
4537     if (*pretregs & mPSW)
4538         cs.Iflags |= CFpsw;
4539     cdb.gen(&cs);
4540     if (sz == 8)
4541         code_orrex(cdb.last(), REX_W);
4542     *pretregs &= mBP | ALLREGS;             // flags already set
4543 
4544     fixresult(cdb,e,retregs,pretregs);
4545 }
4546 
4547 
4548 /*******************************************
4549  * Generate code for OPpair, OPrpair.
4550  */
4551 
4552 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4553 {
4554     if (*pretregs == 0)                         // if don't want result
4555     {
4556         codelem(cdb,e.EV.E1,pretregs,false);     // eval left leaf
4557         *pretregs = 0;                          // in case they got set
4558         codelem(cdb,e.EV.E2,pretregs,false);
4559         return;
4560     }
4561 
4562     //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
4563     //printf("Ecount = %d\n", e.Ecount);
4564 
4565     regm_t retregs = *pretregs;
4566     if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087)
4567     {
4568         if (config.fpxmmregs)
4569             retregs |= mXMM0 | mXMM1;
4570         else
4571             retregs |= mST01;
4572     }
4573 
4574     if (retregs & mST01)
4575     {
4576         loadPair87(cdb, e, pretregs);
4577         return;
4578     }
4579 
4580     regm_t regs1;
4581     regm_t regs2;
4582     if (retregs & XMMREGS)
4583     {
4584         retregs &= XMMREGS;
4585         const reg = findreg(retregs);
4586         regs1 = mask(reg);
4587         regs2 = mask(findreg(retregs & ~regs1));
4588     }
4589     else
4590     {
4591         retregs &= allregs;
4592         if  (!retregs)
4593             retregs = allregs;
4594         regs1 = retregs & mLSW;
4595         regs2 = retregs & mMSW;
4596     }
4597     if (e.Eoper == OPrpair)
4598     {
4599         // swap
4600         regs1 ^= regs2;
4601         regs2 ^= regs1;
4602         regs1 ^= regs2;
4603     }
4604     //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2));
4605 
4606     codelem(cdb,e.EV.E1, &regs1, false);
4607     scodelem(cdb,e.EV.E2, &regs2, regs1, false);
4608     //printf("2: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2));
4609 
4610     if (e.EV.E1.Ecount)
4611         getregs(cdb,regs1);
4612     if (e.EV.E2.Ecount)
4613         getregs(cdb,regs2);
4614 
4615     fixresult(cdb,e,regs1 | regs2,pretregs);
4616 }
4617 
4618 /*************************
4619  * Generate code for OPcmpxchg
4620  */
4621 
4622 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4623 {
4624     /* The form is:
4625      *     OPcmpxchg
4626      *    /     \
4627      * lvalue   OPparam
4628      *          /     \
4629      *        old     new
4630      */
4631 
4632     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
4633     elem *e1 = e.EV.E1;
4634     elem *e2 = e.EV.E2;
4635     assert(e2.Eoper == OPparam);
4636     assert(!e2.Ecount);
4637 
4638     const tyml = tybasic(e1.Ety);                   // type of lvalue
4639     const sz = _tysize[tyml];
4640 
4641     if (I32 && sz == 8)
4642     {
4643         regm_t retregsx = mDX|mAX;
4644         codelem(cdb,e2.EV.E1,&retregsx,false);          // [DX,AX] = e2.EV.E1
4645 
4646         regm_t retregs = mCX|mBX;
4647         scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false);  // [CX,BX] = e2.EV.E2
4648 
4649         code cs = void;
4650         getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX);        // get EA
4651 
4652         getregs(cdb,mDX|mAX);                 // CMPXCHG destroys these regs
4653 
4654         if (e1.Ety & mTYvolatile)
4655             cdb.gen1(LOCK);                           // LOCK prefix
4656         cs.Iop = 0x0FC7;                              // CMPXCHG8B EA
4657         cs.Iflags |= CFpsw;
4658         code_newreg(&cs,1);
4659         cdb.gen(&cs);
4660 
4661         assert(!e1.Ecount);
4662         freenode(e1);
4663     }
4664     else
4665     {
4666         const uint isbyte = (sz == 1);            // 1 for byte operation
4667         const ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
4668         const uint rex = (I64 && sz == 8) ? REX_W : 0;
4669 
4670         regm_t retregsx = mAX;
4671         codelem(cdb,e2.EV.E1,&retregsx,false);       // AX = e2.EV.E1
4672 
4673         regm_t retregs = (ALLREGS | mBP) & ~mAX;
4674         scodelem(cdb,e2.EV.E2,&retregs,mAX,false);   // load rvalue in reg
4675 
4676         code cs = void;
4677         getlvalue(cdb,&cs,e1,mAX | retregs); // get EA
4678 
4679         getregs(cdb,mAX);                  // CMPXCHG destroys AX
4680 
4681         if (e1.Ety & mTYvolatile)
4682             cdb.gen1(LOCK);                        // LOCK prefix
4683         cs.Iop = 0x0FB1 ^ isbyte;                    // CMPXCHG EA,reg
4684         cs.Iflags |= CFpsw | word;
4685         cs.Irex |= rex;
4686         const reg = findreg(retregs);
4687         code_newreg(&cs,reg);
4688         cdb.gen(&cs);
4689 
4690         assert(!e1.Ecount);
4691         freenode(e1);
4692     }
4693 
4694     if (regm_t retregs = *pretregs & (ALLREGS | mBP)) // if return result in register
4695     {
4696         assert(tysize(e.Ety) == 1);
4697         assert(I64 || retregs & BYTEREGS);
4698         reg_t reg;
4699         allocreg(cdb,&retregs,&reg,TYint);
4700         uint ea = modregrmx(3,0,reg);
4701         if (I64 && reg >= 4)
4702             ea |= REX << 16;
4703         cdb.gen2(0x0F94,ea);        // SETZ reg
4704         *pretregs = retregs;
4705     }
4706 }
4707 
4708 /*************************
4709  * Generate code for OPprefetch
4710  */
4711 
4712 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4713 {
4714     /* Generate the following based on e2:
4715      *    0: prefetch0
4716      *    1: prefetch1
4717      *    2: prefetch2
4718      *    3: prefetchnta
4719      *    4: prefetchw
4720      *    5: prefetchwt1
4721      */
4722     //printf("cdprefetch\n");
4723     elem *e1 = e.EV.E1;
4724 
4725     assert(*pretregs == 0);
4726     assert(e.EV.E2.Eoper == OPconst);
4727     opcode_t op;
4728     reg_t reg;
4729     switch (e.EV.E2.EV.Vuns)
4730     {
4731         case 0: op = PREFETCH; reg = 1; break;  // PREFETCH0
4732         case 1: op = PREFETCH; reg = 2; break;  // PREFETCH1
4733         case 2: op = PREFETCH; reg = 3; break;  // PREFETCH2
4734         case 3: op = PREFETCH; reg = 0; break;  // PREFETCHNTA
4735         case 4: op = 0x0F0D;   reg = 1; break;  // PREFETCHW
4736         case 5: op = 0x0F0D;   reg = 2; break;  // PREFETCHWT1
4737         default: assert(0);
4738     }
4739 
4740     freenode(e.EV.E2);
4741 
4742     code cs = void;
4743     getlvalue(cdb,&cs,e1,0);
4744     cs.Iop = op;
4745     cs.Irm |= modregrm(0,reg,0);
4746     cs.Iflags |= CFvolatile;            // do not schedule
4747     cdb.gen(&cs);
4748 }
4749 
4750 
4751 /*********************
4752  * Load register from EA of assignment operation.
4753  * Params:
4754  *      cdb = store generated code here
4755  *      cs = instruction with EA already set in it
4756  *      e = assignment expression that will be evaluated
4757  *      reg = set to register loaded from EA
4758  *      retregs = register candidates for reg
4759  */
4760 private
4761 void opAssLoadReg(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t reg, regm_t retregs)
4762 {
4763     modEA(cdb, &cs);
4764     allocreg(cdb,&retregs,&reg,TYoffset);
4765 
4766     cs.Iop = LOD;
4767     code_newreg(&cs,reg);
4768     cdb.gen(&cs);                   // MOV reg,EA
4769 }
4770 
4771 /*********************
4772  * Load register pair from EA of assignment operation.
4773  * Params:
4774  *      cdb = store generated code here
4775  *      cs = instruction with EA already set in it
4776  *      e = assignment expression that will be evaluated
4777  *      rhi = set to most significant register of the pair
4778  *      rlo = set toleast significant register of the pair
4779  *      retregs = register candidates for rhi, rlo
4780  *      keepmsk = registers to not modify
4781  */
4782 private
4783 void opAssLoadPair(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t rhi, out reg_t rlo, regm_t retregs, regm_t keepmsk)
4784 {
4785     getlvalue(cdb,&cs,e.EV.E1,retregs | keepmsk);
4786     const tym_t tyml = tybasic(e.EV.E1.Ety);              // type of lvalue
4787     reg_t reg;
4788     allocreg(cdb,&retregs,&reg,tyml);
4789 
4790     rhi = findregmsw(retregs);
4791     rlo = findreglsw(retregs);
4792 
4793     cs.Iop = LOD;
4794     code_newreg(&cs,rlo);
4795     cdb.gen(&cs);                   // MOV rlo,EA
4796     getlvalue_msw(&cs);
4797     code_newreg(&cs,rhi);
4798     cdb.gen(&cs);                   // MOV rhi,EA+2
4799     getlvalue_lsw(&cs);
4800 }
4801 
4802 
4803 /*********************************************************
4804  * Store register result of assignment operation EA.
4805  * Params:
4806  *      cdb = store generated code here
4807  *      cs = instruction with EA already set in it
4808  *      e = assignment expression that was evaluated
4809  *      reg = register of result
4810  *      pretregs = registers to store result in
4811  */
4812 private
4813 void opAssStoreReg(ref CodeBuilder cdb, ref code cs, elem* e, reg_t reg, regm_t* pretregs)
4814 {
4815     elem* e1 = e.EV.E1;
4816     const tym_t tyml = tybasic(e1.Ety);     // type of lvalue
4817     const uint sz = _tysize[tyml];
4818     const ubyte isbyte = (sz == 1);         // 1 for byte operation
4819     cs.Iop = STO ^ isbyte;
4820     code_newreg(&cs,reg);
4821     cdb.gen(&cs);                           // MOV EA,resreg
4822     if (e1.Ecount)                          // if we gen a CSE
4823         cssave(e1,mask(reg),!OTleaf(e1.Eoper));
4824     freenode(e1);
4825     fixresult(cdb,e,mask(reg),pretregs);
4826 }
4827 
4828 /*********************************************************
4829  * Store register pair result of assignment operation EA.
4830  * Params:
4831  *      cdb = store generated code here
4832  *      cs = instruction with EA already set in it
4833  *      e = assignment expression that was evaluated
4834  *      rhi = most significant register of the pair
4835  *      rlo = least significant register of the pair
4836  *      pretregs = registers to store result in
4837  */
4838 private
4839 void opAssStorePair(ref CodeBuilder cdb, ref code cs, elem* e, reg_t rhi, reg_t rlo, regm_t* pretregs)
4840 {
4841     cs.Iop = STO;
4842     code_newreg(&cs,rlo);
4843     cdb.gen(&cs);                   // MOV EA,lsreg
4844     code_newreg(&cs,rhi);
4845     getlvalue_msw(&cs);
4846     cdb.gen(&cs);                   // MOV EA+REGSIZE,msreg
4847     const regm_t retregs = mask(rhi) | mask(rlo);
4848     elem* e1 = e.EV.E1;
4849     if (e1.Ecount)                 // if we gen a CSE
4850         cssave(e1,retregs,!OTleaf(e1.Eoper));
4851     freenode(e1);
4852     fixresult(cdb,e,retregs,pretregs);
4853 }
4854 
4855 
4856 }