1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1985-1998 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d)
10  */
11 
12 module dmd.backend.cod4;
13 
14 version (SCPP)
15     version = COMPILE;
16 version (MARS)
17     version = COMPILE;
18 
19 version (COMPILE)
20 {
21 
22 import core.stdc.stdio;
23 import core.stdc.stdlib;
24 import core.stdc.string;
25 
26 import dmd.backend.cc;
27 import dmd.backend.cdef;
28 import dmd.backend.code;
29 import dmd.backend.code_x86;
30 import dmd.backend.codebuilder;
31 import dmd.backend.mem;
32 import dmd.backend.el;
33 import dmd.backend.global;
34 import dmd.backend.oper;
35 import dmd.backend.ty;
36 import dmd.backend.evalu8 : el_toldoubled;
37 import dmd.backend.xmm;
38 
39 extern (C++):
40 
41 nothrow:
42 
43 int REGSIZE();
44 
45 extern __gshared CGstate cgstate;
46 extern __gshared bool[FLMAX] datafl;
47 
48 private extern (D) uint mask(uint m) { return 1 << m; }
49 
50                         /*   AX,CX,DX,BX                */
51 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ];
52 
53 
54 /*******************************
55  * Return number of times symbol s appears in tree e.
56  */
57 
58 private int intree(Symbol *s,elem *e)
59 {
60     if (!OTleaf(e.Eoper))
61         return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0);
62     return e.Eoper == OPvar && e.EV.Vsym == s;
63 }
64 
65 /***********************************
66  * Determine if expression e can be evaluated directly into register
67  * variable s.
68  * Have to be careful about things like x=x+x+x, and x=a+x.
69  * Returns:
70  *      !=0     can
71  *      0       can't
72  */
73 
74 int doinreg(Symbol *s, elem *e)
75 {
76     int in_ = 0;
77     OPER op;
78 
79  L1:
80     op = e.Eoper;
81     if (op == OPind ||
82         OTcall(op)  ||
83         OTleaf(op) ||
84         (in_ = intree(s,e)) == 0 ||
85         (OTunary(op) && OTleaf(e.EV.E1.Eoper))
86        )
87         return 1;
88     if (in_ == 1)
89     {
90         switch (op)
91         {
92             case OPadd:
93             case OPmin:
94             case OPand:
95             case OPor:
96             case OPxor:
97             case OPshl:
98             case OPmul:
99                 if (!intree(s,e.EV.E2))
100                 {
101                     e = e.EV.E1;
102                     goto L1;
103                 }
104                 break;
105 
106             default:
107                 break;
108         }
109     }
110     return 0;
111 }
112 
113 /****************************
114  * Return code for saving common subexpressions if EA
115  * turns out to be a register.
116  * This is called just before modifying an EA.
117  */
118 
119 void modEA(ref CodeBuilder cdb,code *c)
120 {
121     if ((c.Irm & 0xC0) == 0xC0)        // addressing mode refers to a register
122     {
123         reg_t reg = c.Irm & 7;
124         if (c.Irex & REX_B)
125         {   reg |= 8;
126             assert(I64);
127         }
128         getregs(cdb,mask(reg));
129     }
130 }
131 
132 static if (TARGET_WINDOS)
133 {
134 // This code is for CPUs that do not support the 8087
135 
136 /****************************
137  * Gen code for op= for doubles.
138  */
139 
140 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op)
141 {
142     static immutable uint[OPdivass - OPpostinc + 1] clibtab =
143     /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass       */
144     [  CLIB.dadd, CLIB.dsub, cast(uint)-1,  CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ];
145 
146     if (config.inline8087)
147     {
148         opass87(cdb,e,pretregs);
149         return;
150     }
151 
152     code cs;
153     regm_t retregs2,retregs,idxregs;
154 
155     uint clib = clibtab[op - OPpostinc];
156     elem *e1 = e.EV.E1;
157     tym_t tym = tybasic(e1.Ety);
158     getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX);
159 
160     if (tym == TYfloat)
161     {
162         clib += CLIB.fadd - CLIB.dadd;    /* convert to float operation   */
163 
164         // Load EA into FLOATREGS
165         getregs(cdb,FLOATREGS);
166         cs.Iop = 0x8B;
167         cs.Irm |= modregrm(0,AX,0);
168         cdb.gen(&cs);
169 
170         if (!I32)
171         {
172             cs.Irm |= modregrm(0,DX,0);
173             getlvalue_msw(&cs);
174             cdb.gen(&cs);
175             getlvalue_lsw(&cs);
176 
177         }
178         retregs2 = FLOATREGS2;
179         idxregs = FLOATREGS | idxregm(&cs);
180         retregs = FLOATREGS;
181     }
182     else
183     {
184         if (I32)
185         {
186             // Load EA into DOUBLEREGS
187             getregs(cdb,DOUBLEREGS_32);
188             cs.Iop = 0x8B;
189             cs.Irm |= modregrm(0,AX,0);
190             cdb.gen(&cs);
191             cs.Irm |= modregrm(0,DX,0);
192             getlvalue_msw(&cs);
193             cdb.gen(&cs);
194             getlvalue_lsw(&cs);
195 
196             retregs2 = DOUBLEREGS2_32;
197             idxregs = DOUBLEREGS_32 | idxregm(&cs);
198         }
199         else
200         {
201             // Push EA onto stack
202             cs.Iop = 0xFF;
203             cs.Irm |= modregrm(0,6,0);
204             cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
205             cdb.gen(&cs);
206             getlvalue_lsw(&cs);
207             cdb.gen(&cs);
208             getlvalue_lsw(&cs);
209             cdb.gen(&cs);
210             getlvalue_lsw(&cs);
211             cdb.gen(&cs);
212             stackpush += DOUBLESIZE;
213 
214             retregs2 = DOUBLEREGS_16;
215             idxregs = idxregm(&cs);
216         }
217         retregs = DOUBLEREGS;
218     }
219 
220     if ((cs.Iflags & CFSEG) == CFes)
221         idxregs |= mES;
222     cgstate.stackclean++;
223     scodelem(cdb,e.EV.E2,&retregs2,idxregs,false);
224     cgstate.stackclean--;
225     callclib(cdb,e,clib,&retregs,0);
226     if (e1.Ecount)
227         cssave(e1,retregs,!OTleaf(e1.Eoper));             // if lvalue is a CSE
228     freenode(e1);
229     cs.Iop = 0x89;                              // MOV EA,DOUBLEREGS
230     fltregs(cdb,&cs,tym);
231     fixresult(cdb,e,retregs,pretregs);
232 }
233 
234 /****************************
235  * Gen code for OPnegass for doubles.
236  */
237 
238 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
239 {
240     if (config.inline8087)
241     {
242         cdnegass87(cdb,e,pretregs);
243         return;
244     }
245     elem *e1 = e.EV.E1;
246     tym_t tym = tybasic(e1.Ety);
247     int sz = _tysize[tym];
248     code cs;
249 
250     getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0);
251     modEA(cdb,&cs);
252     cs.Irm |= modregrm(0,6,0);
253     cs.Iop = 0x80;
254     cs.IEV1.Voffset += sz - 1;
255     cs.IFL2 = FLconst;
256     cs.IEV2.Vuns = 0x80;
257     cdb.gen(&cs);                       // XOR 7[EA],0x80
258     if (tycomplex(tym))
259     {
260         cs.IEV1.Voffset -= sz / 2;
261         cdb.gen(&cs);                   // XOR 7[EA],0x80
262     }
263 
264     regm_t retregs;
265     if (*pretregs || e1.Ecount)
266     {
267         cs.IEV1.Voffset -= sz - 1;
268 
269         if (tym == TYfloat)
270         {
271             // Load EA into FLOATREGS
272             getregs(cdb,FLOATREGS);
273             cs.Iop = 0x8B;
274             NEWREG(cs.Irm, AX);
275             cdb.gen(&cs);
276 
277             if (!I32)
278             {
279                 NEWREG(cs.Irm, DX);
280                 getlvalue_msw(&cs);
281                 cdb.gen(&cs);
282                 getlvalue_lsw(&cs);
283 
284             }
285             retregs = FLOATREGS;
286         }
287         else
288         {
289             if (I32)
290             {
291                 // Load EA into DOUBLEREGS
292                 getregs(cdb,DOUBLEREGS_32);
293                 cs.Iop = 0x8B;
294                 cs.Irm &= ~cast(uint)modregrm(0,7,0);
295                 cs.Irm |= modregrm(0,AX,0);
296                 cdb.gen(&cs);
297                 cs.Irm |= modregrm(0,DX,0);
298                 getlvalue_msw(&cs);
299                 cdb.gen(&cs);
300                 getlvalue_lsw(&cs);
301             }
302             else
303             {
304                 static if (1)
305                 {
306                     cs.Iop = 0x8B;
307                     fltregs(cdb,&cs,TYdouble);     // MOV DOUBLEREGS, EA
308                 }
309                 else
310                 {
311                     // Push EA onto stack
312                     cs.Iop = 0xFF;
313                     cs.Irm |= modregrm(0,6,0);
314                     cs.IEV1.Voffset += DOUBLESIZE - REGSIZE;
315                     cdb.gen(&cs);
316                     cs.IEV1.Voffset -= REGSIZE;
317                     cdb.gen(&cs);
318                     cs.IEV1.Voffset -= REGSIZE;
319                     cdb.gen(&cs);
320                     cs.IEV1.Voffset -= REGSIZE;
321                     cdb.gen(&cs);
322                     stackpush += DOUBLESIZE;
323                 }
324             }
325             retregs = DOUBLEREGS;
326         }
327         if (e1.Ecount)
328             cssave(e1,retregs,!OTleaf(e1.Eoper));         /* if lvalue is a CSE   */
329     }
330     else
331     {
332         retregs = 0;
333         assert(e1.Ecount == 0);
334     }
335 
336     freenode(e1);
337     fixresult(cdb,e,retregs,pretregs);
338 }
339 }
340 
341 
342 
343 /************************
344  * Generate code for an assignment.
345  */
346 
347 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
348 {
349     tym_t tymll;
350     reg_t reg;
351     code cs;
352     elem *e11;
353     bool regvar;                  // true means evaluate into register variable
354     regm_t varregm;
355     reg_t varreg;
356     targ_int postinc;
357 
358     //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
359     elem *e1 = e.EV.E1;
360     elem *e2 = e.EV.E2;
361     int e2oper = e2.Eoper;
362     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
363     regm_t retregs = *pretregs;
364 
365     if (tyxmmreg(tyml) && config.fpxmmregs)
366     {
367         xmmeq(cdb, e, CMP, e1, e2, pretregs);
368         return;
369     }
370 
371     if (tyfloating(tyml) && config.inline8087)
372     {
373         if (tycomplex(tyml))
374         {
375             complex_eq87(cdb, e, pretregs);
376             return;
377         }
378 
379         if (!(retregs == 0 &&
380               (e2oper == OPconst || e2oper == OPvar || e2oper == OPind))
381            )
382         {
383             eq87(cdb,e,pretregs);
384             return;
385         }
386         if (config.target_cpu >= TARGET_PentiumPro &&
387             (e2oper == OPvar || e2oper == OPind)
388            )
389         {
390             eq87(cdb,e,pretregs);
391             return;
392         }
393         if (tyml == TYldouble || tyml == TYildouble)
394         {
395             eq87(cdb,e,pretregs);
396             return;
397         }
398     }
399 
400     uint sz = _tysize[tyml];           // # of bytes to transfer
401     assert(cast(int)sz > 0);
402 
403     if (retregs == 0)                     // if no return value
404     {
405         int fl;
406 
407         /* If registers are tight, and we might need them for the lvalue,
408          * prefer to not use them for the rvalue
409          */
410         bool plenty = true;
411         if (e1.Eoper == OPind)
412         {
413             /* Will need 1 register for evaluation, +2 registers for
414              * e1's addressing mode
415              */
416             regm_t m = allregs & ~regcon.mvar;  // mask of non-register variables
417             m &= m - 1;         // clear least significant bit
418             m &= m - 1;         // clear least significant bit
419             plenty = m != 0;    // at least 3 registers
420         }
421 
422         if ((e2oper == OPconst ||       // if rvalue is a constant
423              e2oper == OPrelconst &&
424              !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) &&
425              ((fl = el_fl(e2)) == FLdata ||
426               fl==FLudata || fl == FLextern)
427               && !(e2.EV.Vsym.ty() & mTYcs)
428             ) &&
429             !(evalinregister(e2) && plenty) &&
430             !e1.Ecount)        // and no CSE headaches
431         {
432             // Look for special case of (*p++ = ...), where p is a register variable
433             if (e1.Eoper == OPind &&
434                 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
435                 e11.EV.E1.Eoper == OPvar &&
436                 e11.EV.E1.EV.Vsym.Sfl == FLreg &&
437                 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
438                )
439             {
440                 Symbol *s = e11.EV.E1.EV.Vsym;
441                 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
442                 {
443                     regcon.params &= ~s.Spregm();
444                 }
445                 postinc = e11.EV.E2.EV.Vint;
446                 if (e11.Eoper == OPpostdec)
447                     postinc = -postinc;
448                 getlvalue(cdb,&cs,e1,RMstore);
449                 freenode(e11.EV.E2);
450             }
451             else
452             {
453                 postinc = 0;
454                 getlvalue(cdb,&cs,e1,RMstore);
455 
456                 if (e2oper == OPconst &&
457                     config.flags4 & CFG4speed &&
458                     (config.target_cpu == TARGET_Pentium ||
459                      config.target_cpu == TARGET_PentiumMMX) &&
460                     (cs.Irm & 0xC0) == 0x80
461                    )
462                 {
463                     if (I64 && sz == 8 && e2.EV.Vpointer)
464                     {
465                         // MOV reg,imm64
466                         // MOV EA,reg
467                         regm_t rregm = allregs & ~idxregm(&cs);
468                         reg_t regx;
469                         regwithvalue(cdb,rregm,e2.EV.Vpointer,&regx,64);
470                         cs.Iop = 0x89;
471                         cs.Irm |= modregrm(0,regx & 7,0);
472                         if (regx & 8)
473                             cs.Irex |= REX_R;
474                         cdb.gen(&cs);
475                         freenode(e2);
476                         goto Lp;
477                     }
478                     if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint)
479                     {
480                         // MOV reg,imm
481                         // MOV EA,reg
482                         regm_t rregm = allregs & ~idxregm(&cs);
483                         reg_t regx;
484                         regwithvalue(cdb,rregm,e2.EV.Vint,&regx,0);
485                         cs.Iop = 0x89;
486                         cs.Irm |= modregrm(0,regx & 7,0);
487                         if (regx & 8)
488                             cs.Irex |= REX_R;
489                         cdb.gen(&cs);
490                         freenode(e2);
491                         goto Lp;
492                     }
493                     if (sz == 2 * REGSIZE && e2.EV.Vllong == 0)
494                     {
495                         // MOV reg,imm
496                         // MOV EA,reg
497                         // MOV EA+2,reg
498                         regm_t rregm = getscratch() & ~idxregm(&cs);
499                         if (rregm)
500                         {
501                             reg_t regx;
502                             regwithvalue(cdb,rregm,e2.EV.Vint,&regx,0);
503                             cs.Iop = 0x89;
504                             cs.Irm |= modregrm(0,regx,0);
505                             cdb.gen(&cs);
506                             getlvalue_msw(&cs);
507                             cdb.gen(&cs);
508                             freenode(e2);
509                             goto Lp;
510                         }
511                     }
512                 }
513             }
514 
515             // If loading result into a register
516             if ((cs.Irm & 0xC0) == 0xC0)
517             {
518                 modEA(cdb,&cs);
519                 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg)
520                     getregs(cdb,cs.IEV1.Vsym.Sregm);
521             }
522             cs.Iop = (sz == 1) ? 0xC6 : 0xC7;
523 
524             if (e2oper == OPrelconst)
525             {
526                 cs.IEV2.Voffset = e2.EV.Voffset;
527                 cs.IFL2 = cast(ubyte)fl;
528                 cs.IEV2.Vsym = e2.EV.Vsym;
529                 cs.Iflags |= CFoff;
530                 cdb.gen(&cs);       // MOV EA,&variable
531                 if (I64 && sz == 8)
532                     code_orrex(cdb.last(), REX_W);
533                 if (sz > REGSIZE)
534                 {
535                     cs.Iop = 0x8C;
536                     getlvalue_msw(&cs);
537                     cs.Irm |= modregrm(0,3,0);
538                     cdb.gen(&cs);   // MOV EA+2,DS
539                 }
540             }
541             else
542             {
543                 assert(e2oper == OPconst);
544                 cs.IFL2 = FLconst;
545                 targ_size_t *p = cast(targ_size_t *) &(e2.EV);
546                 cs.IEV2.Vsize_t = *p;
547                 // Look for loading a register variable
548                 if ((cs.Irm & 0xC0) == 0xC0)
549                 {
550                     reg_t regx = cs.Irm & 7;
551 
552                     if (cs.Irex & REX_B)
553                         regx |= 8;
554                     if (I64 && sz == 8)
555                         movregconst(cdb,regx,*p,64);
556                     else
557                         movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1));
558                     if (sz == 2 * REGSIZE)
559                     {   getlvalue_msw(&cs);
560                         if (REGSIZE == 2)
561                             movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0);
562                         else if (REGSIZE == 4)
563                             movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0);
564                         else if (REGSIZE == 8)
565                             movregconst(cdb,cs.Irm & 7,p[1],0);
566                         else
567                             assert(0);
568                     }
569                 }
570                 else if (I64 && sz == 8 && *p >= 0x80000000)
571                 {   // Use 64 bit MOV, as the 32 bit one gets sign extended
572                     // MOV reg,imm64
573                     // MOV EA,reg
574                     regm_t rregm = allregs & ~idxregm(&cs);
575                     reg_t regx;
576                     regwithvalue(cdb,rregm,*p,&regx,64);
577                     cs.Iop = 0x89;
578                     cs.Irm |= modregrm(0,regx & 7,0);
579                     if (regx & 8)
580                         cs.Irex |= REX_R;
581                     cdb.gen(&cs);
582                 }
583                 else
584                 {
585                     int off = sz;
586                     do
587                     {   int regsize = REGSIZE;
588                         if (off >= 4 && I16 && config.target_cpu >= TARGET_80386)
589                         {
590                             regsize = 4;
591                             cs.Iflags |= CFopsize;      // use opsize to do 32 bit operation
592                         }
593                         else if (I64 && sz == 16 && *p >= 0x80000000)
594                         {
595                             regm_t rregm = allregs & ~idxregm(&cs);
596                             reg_t regx;
597                             regwithvalue(cdb,rregm,*p,&regx,64);
598                             cs.Iop = 0x89;
599                             cs.Irm |= modregrm(0,regx & 7,0);
600                             if (regx & 8)
601                                 cs.Irex |= REX_R;
602                         }
603                         else
604                         {
605                             regm_t retregsx = (sz == 1) ? BYTEREGS : allregs;
606                             reg_t regx;
607                             if (reghasvalue(retregsx,*p,&regx))
608                             {
609                                 cs.Iop = (cs.Iop & 1) | 0x88;
610                                 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx
611                                 if (regx & 8)
612                                     cs.Irex |= REX_R;
613                                 if (I64 && sz == 1 && regx >= 4)
614                                     cs.Irex |= REX;
615                             }
616                             if (!I16 && off == 2)      // if 16 bit operand
617                                 cs.Iflags |= CFopsize;
618                             if (I64 && sz == 8)
619                                 cs.Irex |= REX_W;
620                         }
621                         cdb.gen(&cs);           // MOV EA,const
622 
623                         p = cast(targ_size_t *)(cast(char *) p + regsize);
624                         cs.Iop = (cs.Iop & 1) | 0xC6;
625                         cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0);
626                         cs.Irex &= ~REX_R;
627                         cs.IEV1.Voffset += regsize;
628                         cs.IEV2.Vint = cast(int)*p;
629                         off -= regsize;
630                     } while (off > 0);
631                 }
632             }
633             freenode(e2);
634             goto Lp;
635         }
636         retregs = allregs;        // pick a reg, any reg
637         if (sz == 2 * REGSIZE)
638             retregs &= ~mBP;      // BP cannot be used for register pair
639     }
640     if (retregs == mPSW)
641     {
642         retregs = allregs;
643         if (sz == 2 * REGSIZE)
644             retregs &= ~mBP;      // BP cannot be used for register pair
645     }
646     cs.Iop = 0x89;
647     if (sz == 1)                  // must have byte regs
648     {
649         cs.Iop = 0x88;
650         retregs &= BYTEREGS;
651         if (!retregs)
652             retregs = BYTEREGS;
653     }
654     else if (retregs & mES &&
655            (
656              (e1.Eoper == OPind &&
657                 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) ||
658              (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata)
659            )
660           )
661         // getlvalue() needs ES, so we can't return it
662         retregs = allregs;              // no conflicts with ES
663     else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0)
664         retregs = DOUBLEREGS;
665 
666     regvar = false;
667     varregm = 0;
668     if (config.flags4 & CFG4optimized)
669     {
670         // Be careful of cases like (x = x+x+x). We cannot evaluate in
671         // x if x is in a register.
672         if (isregvar(e1,&varregm,&varreg) &&    // if lvalue is register variable
673             doinreg(e1.EV.Vsym,e2) &&       // and we can compute directly into it
674             !(sz == 1 && e1.EV.Voffset == 1)
675            )
676         {
677             regvar = true;
678             retregs = varregm;
679             reg = varreg;       // evaluate directly in target register
680             if (tysize(e1.Ety) == REGSIZE &&
681                 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE)
682             {
683                 if (e1.EV.Voffset)
684                     retregs &= mMSW;
685                 else
686                     retregs &= mLSW;
687                 reg = findreg(retregs);
688             }
689         }
690     }
691     if (*pretregs & mPSW && OTleaf(e1.Eoper))     // if evaluating e1 couldn't change flags
692     {   // Be careful that this lines up with jmpopcode()
693         retregs |= mPSW;
694         *pretregs &= ~mPSW;
695     }
696     scodelem(cdb,e2,&retregs,0,true);    // get rvalue
697 
698     // Look for special case of (*p++ = ...), where p is a register variable
699     if (e1.Eoper == OPind &&
700         ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) &&
701         e11.EV.E1.Eoper == OPvar &&
702         e11.EV.E1.EV.Vsym.Sfl == FLreg &&
703         (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS)
704        )
705     {
706         Symbol *s = e11.EV.E1.EV.Vsym;
707         if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
708         {
709             regcon.params &= ~s.Spregm();
710         }
711 
712         postinc = e11.EV.E2.EV.Vint;
713         if (e11.Eoper == OPpostdec)
714             postinc = -postinc;
715         getlvalue(cdb,&cs,e1,RMstore | retregs);
716         freenode(e11.EV.E2);
717     }
718     else
719     {
720         postinc = 0;
721         getlvalue(cdb,&cs,e1,RMstore | retregs);     // get lvalue (cl == null if regvar)
722     }
723 
724     getregs(cdb,varregm);
725 
726     assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes));
727     if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES)
728     {
729         reg = findreglsw(retregs);
730         cs.Irm |= modregrm(0,reg,0);
731         cdb.gen(&cs);                   // MOV EA,reg
732         getlvalue_msw(&cs);             // point to where segment goes
733         cs.Iop = 0x8C;
734         NEWREG(cs.Irm,0);
735         cdb.gen(&cs);                   // MOV EA+2,ES
736     }
737     else
738     {
739         if (!I16)
740         {
741             reg = findreg(retregs &
742                     ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS));
743             cs.Irm |= modregrm(0,reg & 7,0);
744             if (reg & 8)
745                 cs.Irex |= REX_R;
746             for (; true; sz -= REGSIZE)
747             {
748                 // Do not generate mov from register onto itself
749                 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0)))
750                     break;
751                 if (sz == 2)            // if 16 bit operand
752                     cs.Iflags |= CFopsize;
753                 else if (sz == 1 && reg >= 4)
754                     cs.Irex |= REX;
755                 cdb.gen(&cs);           // MOV EA+offset,reg
756                 if (sz <= REGSIZE)
757                     break;
758                 getlvalue_msw(&cs);
759                 reg = findregmsw(retregs);
760                 code_newreg(&cs, reg);
761             }
762         }
763         else
764         {
765             if (sz > REGSIZE)
766                 cs.IEV1.Voffset += sz - REGSIZE;  // 0,2,6
767             reg = findreg(retregs &
768                     (sz > REGSIZE ? mMSW : ALLREGS));
769             if (tyml == TYdouble || tyml == TYdouble_alias)
770                 reg = AX;
771             cs.Irm |= modregrm(0,reg,0);
772             // Do not generate mov from register onto itself
773             if (!regvar || reg != (cs.Irm & 7))
774                 for (; true; sz -= REGSIZE)             // 1,2,4
775                 {
776                     cdb.gen(&cs);             // MOV EA+offset,reg
777                     if (sz <= REGSIZE)
778                         break;
779                     cs.IEV1.Voffset -= REGSIZE;
780                     if (tyml == TYdouble || tyml == TYdouble_alias)
781                             reg = dblreg[reg];
782                     else
783                             reg = findreglsw(retregs);
784                     NEWREG(cs.Irm,reg);
785                 }
786         }
787     }
788     if (e1.Ecount ||                    // if lvalue is a CSE or
789         regvar)                         // rvalue can't be a CSE
790     {
791         getregs_imm(cdb,retregs);       // necessary if both lvalue and
792                                         //  rvalue are CSEs (since a reg
793                                         //  can hold only one e at a time)
794         cssave(e1,retregs,!OTleaf(e1.Eoper));     // if lvalue is a CSE
795     }
796 
797     fixresult(cdb,e,retregs,pretregs);
798 Lp:
799     if (postinc)
800     {
801         reg_t ireg = findreg(idxregm(&cs));
802         if (*pretregs & mPSW)
803         {   // Use LEA to avoid touching the flags
804             uint rm = cs.Irm & 7;
805             if (cs.Irex & REX_B)
806                 rm |= 8;
807             cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc);
808             if (tysize(e11.EV.E1.Ety) == 8)
809                 code_orrex(cdb.last(), REX_W);
810         }
811         else if (I64)
812         {
813             cdb.genc2(0x81,modregrmx(3,0,ireg),postinc);
814             if (tysize(e11.EV.E1.Ety) == 8)
815                 code_orrex(cdb.last(), REX_W);
816         }
817         else
818         {
819             if (postinc == 1)
820                 cdb.gen1(0x40 + ireg);        // INC ireg
821             else if (postinc == -cast(targ_int)1)
822                 cdb.gen1(0x48 + ireg);        // DEC ireg
823             else
824             {
825                 cdb.genc2(0x81,modregrm(3,0,ireg),postinc);
826             }
827         }
828     }
829     freenode(e1);
830 }
831 
832 
833 /************************
834  * Generate code for += -= &= |= ^= negass
835  */
836 
837 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
838 {
839     //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
840     OPER op = e.Eoper;
841     regm_t retregs = 0;
842     uint reverse = 0;
843     elem *e1 = e.EV.E1;
844     tym_t tyml = tybasic(e1.Ety);            // type of lvalue
845     int sz = _tysize[tyml];
846     int isbyte = (sz == 1);                     // 1 for byte operation, else 0
847 
848     // See if evaluate in XMM registers
849     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0))
850     {
851         xmmopass(cdb,e,pretregs);
852         return;
853     }
854 
855     if (tyfloating(tyml))
856     {
857         static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
858         {
859             if (op == OPnegass)
860                 cdnegass87(cdb,e,pretregs);
861             else
862                 opass87(cdb,e,pretregs);
863         }
864         else
865         {
866             if (op == OPnegass)
867                 opnegassdbl(cdb,e,pretregs);
868             else
869                 opassdbl(cdb,e,pretregs,op);
870         }
871         return;
872     }
873     uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386)
874         ? CFopsize : 0;
875     uint cflags = 0;
876     regm_t forccs = *pretregs & mPSW;            // return result in flags
877     regm_t forregs = *pretregs & ~mPSW;          // return result in regs
878     // true if we want the result in a register
879     uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper));
880 
881     reg_t reg;
882     uint op1,op2,mode;
883     code cs;
884     elem *e2;
885     regm_t varregm;
886     reg_t varreg;
887     uint jop;
888 
889 
890     switch (op)                   // select instruction opcodes
891     {
892         case OPpostinc: op = OPaddass;                  // i++ => +=
893                         goto case OPaddass;
894 
895         case OPaddass:  op1 = 0x01; op2 = 0x11;
896                         cflags = CFpsw;
897                         mode = 0; break;                // ADD, ADC
898 
899         case OPpostdec: op = OPminass;                  // i-- => -=
900                         goto case OPminass;
901 
902         case OPminass:  op1 = 0x29; op2 = 0x19;
903                         cflags = CFpsw;
904                         mode = 5; break;                // SUB, SBC
905 
906         case OPandass:  op1 = op2 = 0x21;
907                         mode = 4; break;                // AND, AND
908 
909         case OPorass:   op1 = op2 = 0x09;
910                         mode = 1; break;                // OR , OR
911 
912         case OPxorass:  op1 = op2 = 0x31;
913                         mode = 6; break;                // XOR, XOR
914 
915         case OPnegass:  op1 = 0xF7;                     // NEG
916                         break;
917 
918         default:
919                 assert(0);
920     }
921     op1 ^= isbyte;                  // bit 0 is 0 for byte operation
922 
923     if (op == OPnegass)
924     {
925         getlvalue(cdb,&cs,e1,0);
926         modEA(cdb,&cs);
927         cs.Irm |= modregrm(0,3,0);
928         cs.Iop = op1;
929         switch (_tysize[tyml])
930         {
931             case CHARSIZE:
932                 cdb.gen(&cs);
933                 break;
934 
935             case SHORTSIZE:
936                 cdb.gen(&cs);
937                 if (!I16 && *pretregs & mPSW)
938                     cdb.last().Iflags |= CFopsize | CFpsw;
939                 break;
940 
941             case LONGSIZE:
942                 if (!I16 || opsize)
943                 {   cdb.gen(&cs);
944                     cdb.last().Iflags |= opsize;
945                     break;
946                 }
947             neg_2reg:
948                 getlvalue_msw(&cs);
949                 cdb.gen(&cs);              // NEG EA+2
950                 getlvalue_lsw(&cs);
951                 cdb.gen(&cs);              // NEG EA
952                 code_orflag(cdb.last(),CFpsw);
953                 cs.Iop = 0x81;
954                 getlvalue_msw(&cs);
955                 cs.IFL2 = FLconst;
956                 cs.IEV2.Vuns = 0;
957                 cdb.gen(&cs);              // SBB EA+2,0
958                 break;
959 
960             case LLONGSIZE:
961                 if (I16)
962                     assert(0);             // not implemented yet
963                 if (I32)
964                     goto neg_2reg;
965                 cdb.gen(&cs);
966                 break;
967 
968             default:
969                 assert(0);
970         }
971         forccs = 0;             // flags already set by NEG
972         *pretregs &= ~mPSW;
973     }
974     else if ((e2 = e.EV.E2).Eoper == OPconst &&    // if rvalue is a const
975              el_signx32(e2) &&
976              // Don't evaluate e2 in register if we can use an INC or DEC
977              (((sz <= REGSIZE || tyfv(tyml)) &&
978                (op == OPaddass || op == OPminass) &&
979                (el_allbits(e2, 1) || el_allbits(e2, -1))
980               ) ||
981               (!evalinregister(e2)
982                && tyml != TYhptr
983               )
984              )
985             )
986     {
987         getlvalue(cdb,&cs,e1,0);
988         modEA(cdb,&cs);
989         cs.IFL2 = FLconst;
990         cs.IEV2.Vsize_t = e2.EV.Vint;
991         if (sz <= REGSIZE || tyfv(tyml) || opsize)
992         {
993             targ_int i = cs.IEV2.Vint;
994 
995             // Handle shortcuts. Watch out for if result has
996             // to be in flags.
997 
998             if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&reg) && i != 1 && i != -1 &&
999                 !opsize)
1000             {
1001                 cs.Iop = op1;
1002                 cs.Irm |= modregrm(0,reg & 7,0);
1003                 if (I64)
1004                 {   if (isbyte && reg >= 4)
1005                         cs.Irex |= REX;
1006                     if (reg & 8)
1007                         cs.Irex |= REX_R;
1008                 }
1009             }
1010             else
1011             {
1012                 cs.Iop = 0x81;
1013                 cs.Irm |= modregrm(0,mode,0);
1014                 switch (op)
1015                 {
1016                     case OPminass:      // convert to +=
1017                         cs.Irm ^= modregrm(0,5,0);
1018                         i = -i;
1019                         cs.IEV2.Vsize_t = i;
1020                         goto case OPaddass;
1021 
1022                     case OPaddass:
1023                         if (i == 1)             // INC EA
1024                                 goto L1;
1025                         else if (i == -1)       // DEC EA
1026                         {       cs.Irm |= modregrm(0,1,0);
1027                            L1:  cs.Iop = 0xFF;
1028                         }
1029                         break;
1030 
1031                     default:
1032                         break;
1033                 }
1034                 cs.Iop ^= isbyte;             // for byte operations
1035             }
1036             cs.Iflags |= opsize;
1037             if (forccs)
1038                 cs.Iflags |= CFpsw;
1039             else if (!I16 && cs.Iflags & CFopsize)
1040             {
1041                 switch (op)
1042                 {   case OPorass:
1043                     case OPxorass:
1044                         cs.IEV2.Vsize_t &= 0xFFFF;
1045                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1046                         break;
1047 
1048                     case OPandass:
1049                         cs.IEV2.Vsize_t |= ~0xFFFFL;
1050                         cs.Iflags &= ~CFopsize; // don't worry about MSW
1051                         break;
1052 
1053                     case OPminass:
1054                     case OPaddass:
1055                         static if (1)
1056                         {
1057                             if ((cs.Irm & 0xC0) == 0xC0)    // EA is register
1058                                 cs.Iflags &= ~CFopsize;
1059                         }
1060                         else
1061                         {
1062                             if ((cs.Irm & 0xC0) == 0xC0 &&  // EA is register and
1063                                 e1.Eoper == OPind)          // not a register var
1064                                 cs.Iflags &= ~CFopsize;
1065                         }
1066                         break;
1067 
1068                     default:
1069                         assert(0);
1070                 }
1071             }
1072 
1073             // For scheduling purposes, we wish to replace:
1074             //    OP    EA
1075             // with:
1076             //    MOV   reg,EA
1077             //    OP    reg
1078             //    MOV   EA,reg
1079             if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 &&
1080                 (config.target_cpu == TARGET_Pentium ||
1081                  config.target_cpu == TARGET_PentiumMMX) &&
1082                 config.flags4 & CFG4speed)
1083             {
1084                 regm_t sregm;
1085                 code cs2;
1086 
1087                 // Determine which registers to use
1088                 sregm = allregs & ~idxregm(&cs);
1089                 if (isbyte)
1090                     sregm &= BYTEREGS;
1091                 if (sregm & forregs)
1092                     sregm &= forregs;
1093 
1094                 allocreg(cdb,&sregm,&reg,tyml);      // allocate register
1095 
1096                 cs2 = cs;
1097                 cs2.Iflags &= ~CFpsw;
1098                 cs2.Iop = 0x8B ^ isbyte;
1099                 code_newreg(&cs2, reg);
1100                 cdb.gen(&cs2);                      // MOV reg,EA
1101 
1102                 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7);
1103                 if (reg & 8)
1104                     cs.Irex |= REX_B;
1105                 cdb.gen(&cs);                       // OP reg
1106 
1107                 cs2.Iop ^= 2;
1108                 cdb.gen(&cs2);                      // MOV EA,reg
1109 
1110                 retregs = sregm;
1111                 wantres = 0;
1112                 if (e1.Ecount)
1113                     cssave(e1,retregs,!OTleaf(e1.Eoper));
1114             }
1115             else
1116             {
1117                 cdb.gen(&cs);
1118                 cs.Iflags &= ~opsize;
1119                 cs.Iflags &= ~CFpsw;
1120                 if (I16 && opsize)                     // if DWORD operand
1121                     cs.IEV1.Voffset += 2; // compensate for wantres code
1122             }
1123         }
1124         else if (sz == 2 * REGSIZE)
1125         {
1126             targ_uns msw;
1127 
1128             cs.Iop = 0x81;
1129             cs.Irm |= modregrm(0,mode,0);
1130             cs.Iflags |= cflags;
1131             cdb.gen(&cs);
1132             cs.Iflags &= ~CFpsw;
1133 
1134             getlvalue_msw(&cs);             // point to msw
1135             msw = cast(uint)MSREG(e.EV.E2.EV.Vllong);
1136             cs.IEV2.Vuns = msw;             // msw of constant
1137             switch (op)
1138             {
1139                 case OPminass:
1140                     cs.Irm ^= modregrm(0,6,0);      // SUB => SBB
1141                     break;
1142 
1143                 case OPaddass:
1144                     cs.Irm |= modregrm(0,2,0);      // ADD => ADC
1145                     break;
1146 
1147                 default:
1148                     break;
1149             }
1150             cdb.gen(&cs);
1151         }
1152         else
1153             assert(0);
1154         freenode(e.EV.E2);        // don't need it anymore
1155     }
1156     else if (isregvar(e1,&varregm,&varreg) &&
1157              (e2.Eoper == OPvar || e2.Eoper == OPind) &&
1158             !evalinregister(e2) &&
1159              sz <= REGSIZE)               // deal with later
1160     {
1161         getlvalue(cdb,&cs,e2,0);
1162         freenode(e2);
1163         getregs(cdb,varregm);
1164         code_newreg(&cs, varreg);
1165         if (I64 && sz == 1 && varreg >= 4)
1166             cs.Irex |= REX;
1167         cs.Iop = op1 ^ 2;                       // toggle direction bit
1168         if (forccs)
1169             cs.Iflags |= CFpsw;
1170         reverse = 2;                            // remember we toggled it
1171         cdb.gen(&cs);
1172         retregs = 0;            // to trigger a bug if we attempt to use it
1173     }
1174     else if ((op == OPaddass || op == OPminass) &&
1175              sz <= REGSIZE &&
1176              !e2.Ecount &&
1177              ((jop = jmpopcode(e2)) == JC || jop == JNC ||
1178               (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC)))
1179             )
1180     {
1181         /* e1 += (x < y)    ADC EA,0
1182          * e1 -= (x < y)    SBB EA,0
1183          * e1 += (x >= y)   SBB EA,-1
1184          * e1 -= (x >= y)   ADC EA,-1
1185          */
1186         getlvalue(cdb,&cs,e1,0);             // get lvalue
1187         modEA(cdb,&cs);
1188         regm_t keepmsk = idxregm(&cs);
1189         retregs = mPSW;
1190         if (OTconv(e2.Eoper))
1191         {
1192             scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true);
1193             freenode(e2);
1194         }
1195         else
1196             scodelem(cdb,e2,&retregs,keepmsk,true);
1197         cs.Iop = 0x81 ^ isbyte;                   // ADC EA,imm16/32
1198         uint regop = 2;                     // ADC
1199         if ((op == OPaddass) ^ (jop == JC))
1200             regop = 3;                          // SBB
1201         code_newreg(&cs,regop);
1202         cs.Iflags |= opsize;
1203         if (forccs)
1204             cs.Iflags |= CFpsw;
1205         cs.IFL2 = FLconst;
1206         cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0;
1207         cdb.gen(&cs);
1208         retregs = 0;            // to trigger a bug if we attempt to use it
1209     }
1210     else // evaluate e2 into register
1211     {
1212         retregs = (isbyte) ? BYTEREGS : ALLREGS;  // pick working reg
1213         if (tyml == TYhptr)
1214             retregs &= ~mCX;                    // need CX for shift count
1215         scodelem(cdb,e.EV.E2,&retregs,0,true);   // get rvalue
1216         getlvalue(cdb,&cs,e1,retregs);         // get lvalue
1217         modEA(cdb,&cs);
1218         cs.Iop = op1;
1219         if (sz <= REGSIZE || tyfv(tyml))
1220         {
1221             reg = findreg(retregs);
1222             code_newreg(&cs, reg);              // OP1 EA,reg
1223             if (sz == 1 && reg >= 4 && I64)
1224                 cs.Irex |= REX;
1225             if (forccs)
1226                 cs.Iflags |= CFpsw;
1227         }
1228         else if (tyml == TYhptr)
1229         {
1230             uint mreg = findregmsw(retregs);
1231             uint lreg = findreglsw(retregs);
1232             getregs(cdb,retregs | mCX);
1233 
1234             // If h -= l, convert to h += -l
1235             if (e.Eoper == OPminass)
1236             {
1237                 cdb.gen2(0xF7,modregrm(3,3,mreg));      // NEG mreg
1238                 cdb.gen2(0xF7,modregrm(3,3,lreg));      // NEG lreg
1239                 code_orflag(cdb.last(),CFpsw);
1240                 cdb.genc2(0x81,modregrm(3,3,mreg),0);   // SBB mreg,0
1241             }
1242             cs.Iop = 0x01;
1243             cs.Irm |= modregrm(0,lreg,0);
1244             cdb.gen(&cs);                               // ADD EA,lreg
1245             code_orflag(cdb.last(),CFpsw);
1246             cdb.genc2(0x81,modregrm(3,2,mreg),0);       // ADC mreg,0
1247             genshift(cdb);                              // MOV CX,offset __AHSHIFT
1248             cdb.gen2(0xD3,modregrm(3,4,mreg));          // SHL mreg,CL
1249             NEWREG(cs.Irm,mreg);                        // ADD EA+2,mreg
1250             getlvalue_msw(&cs);
1251         }
1252         else if (sz == 2 * REGSIZE)
1253         {
1254             cs.Irm |= modregrm(0,findreglsw(retregs),0);
1255             cdb.gen(&cs);                               // OP1 EA,reg+1
1256             code_orflag(cdb.last(),cflags);
1257             cs.Iop = op2;
1258             NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg
1259             getlvalue_msw(&cs);
1260         }
1261         else
1262             assert(0);
1263         cdb.gen(&cs);
1264         retregs = 0;            // to trigger a bug if we attempt to use it
1265     }
1266 
1267     // See if we need to reload result into a register.
1268     // Need result in registers in case we have a 32 bit
1269     // result and we want the flags as a result.
1270     if (wantres || (sz > REGSIZE && forccs))
1271     {
1272         if (sz <= REGSIZE)
1273         {
1274             regm_t possregs;
1275 
1276             possregs = ALLREGS;
1277             if (isbyte)
1278                 possregs = BYTEREGS;
1279             retregs = forregs & possregs;
1280             if (!retregs)
1281                 retregs = possregs;
1282 
1283             // If reg field is destination
1284             if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5)
1285             {
1286                 reg = (cs.Irm >> 3) & 7;
1287                 if (cs.Irex & REX_R)
1288                     reg |= 8;
1289                 retregs = mask(reg);
1290                 allocreg(cdb,&retregs,&reg,tyml);
1291             }
1292             // If lvalue is a register, just use that register
1293             else if ((cs.Irm & 0xC0) == 0xC0)
1294             {
1295                 reg = cs.Irm & 7;
1296                 if (cs.Irex & REX_B)
1297                     reg |= 8;
1298                 retregs = mask(reg);
1299                 allocreg(cdb,&retregs,&reg,tyml);
1300             }
1301             else
1302             {
1303                 allocreg(cdb,&retregs,&reg,tyml);
1304                 cs.Iop = 0x8B ^ isbyte ^ reverse;
1305                 code_newreg(&cs, reg);
1306                 if (I64 && isbyte && reg >= 4)
1307                     cs.Irex |= REX_W;
1308                 cdb.gen(&cs);               // MOV reg,EA
1309             }
1310         }
1311         else if (tyfv(tyml) || tyml == TYhptr)
1312         {
1313             regm_t idxregs;
1314 
1315             if (tyml == TYhptr)
1316                 getlvalue_lsw(&cs);
1317             idxregs = idxregm(&cs);
1318             retregs = forregs & ~idxregs;
1319             if (!(retregs & IDXREGS))
1320                 retregs |= IDXREGS & ~idxregs;
1321             if (!(retregs & mMSW))
1322                 retregs |= mMSW & ALLREGS;
1323             allocreg(cdb,&retregs,&reg,tyml);
1324             NEWREG(cs.Irm,findreglsw(retregs));
1325             if (retregs & mES)              // if want ES loaded
1326             {
1327                 cs.Iop = 0xC4;
1328                 cdb.gen(&cs);               // LES lreg,EA
1329             }
1330             else
1331             {
1332                 cs.Iop = 0x8B;
1333                 cdb.gen(&cs);               // MOV lreg,EA
1334                 getlvalue_msw(&cs);
1335                 if (I32)
1336                     cs.Iflags |= CFopsize;
1337                 NEWREG(cs.Irm,reg);
1338                 cdb.gen(&cs);               // MOV mreg,EA+2
1339             }
1340         }
1341         else if (sz == 2 * REGSIZE)
1342         {
1343             regm_t idx = idxregm(&cs);
1344             retregs = forregs;
1345             if (!retregs)
1346                 retregs = ALLREGS;
1347             allocreg(cdb,&retregs,&reg,tyml);
1348             cs.Iop = 0x8B;
1349             NEWREG(cs.Irm,reg);
1350 
1351             code csl = cs;
1352             NEWREG(csl.Irm,findreglsw(retregs));
1353             getlvalue_lsw(&csl);
1354 
1355             if (mask(reg) & idx)
1356             {
1357                 cdb.gen(&csl);             // MOV reg+1,EA
1358                 cdb.gen(&cs);              // MOV reg,EA+2
1359             }
1360             else
1361             {
1362                 cdb.gen(&cs);              // MOV reg,EA+2
1363                 cdb.gen(&csl);             // MOV reg+1,EA
1364             }
1365         }
1366         else
1367             assert(0);
1368         if (e1.Ecount)                 // if we gen a CSE
1369             cssave(e1,retregs,!OTleaf(e1.Eoper));
1370     }
1371     freenode(e1);
1372     if (sz <= REGSIZE)
1373         *pretregs &= ~mPSW;            // flags are already set
1374     fixresult(cdb,e,retregs,pretregs);
1375 }
1376 
1377 /********************************
1378  * Generate code for *= /= %=
1379  */
1380 
1381 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1382 {
1383     code cs;
1384     regm_t retregs;
1385     reg_t resreg;
1386     reg_t reg;
1387     uint opr,lib,isbyte;
1388 
1389     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
1390     elem *e1 = e.EV.E1;
1391     elem *e2 = e.EV.E2;
1392     OPER op = e.Eoper;                     // OPxxxx
1393 
1394     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1395     char uns = tyuns(tyml) || tyuns(e2.Ety);
1396     uint sz = _tysize[tyml];
1397 
1398     uint rex = (I64 && sz == 8) ? REX_W : 0;
1399     uint grex = rex << 16;          // 64 bit operands
1400 
1401     // See if evaluate in XMM registers
1402     if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0))
1403     {
1404         xmmopass(cdb,e,pretregs);
1405         return;
1406     }
1407 
1408     if (tyfloating(tyml))
1409     {
1410         static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1411         {
1412             opass87(cdb,e,pretregs);
1413         }
1414         else
1415         {
1416             opassdbl(cdb,e,pretregs,op);
1417         }
1418         return;
1419     }
1420 
1421     if (sz <= REGSIZE)                  // if word or byte
1422     {
1423         isbyte = (sz == 1);               // 1 for byte operation
1424         resreg = AX;                    // result register for * or /
1425         if (uns)                        // if uint operation
1426             opr = 4;                    // MUL
1427         else                            // else signed
1428             opr = 5;                    // IMUL
1429         if (op != OPmulass)             // if /= or %=
1430         {
1431             opr += 2;                   // MUL => DIV, IMUL => IDIV
1432             if (op == OPmodass)
1433                 resreg = DX;            // remainder is in DX
1434         }
1435         if (op == OPmulass)             // if multiply
1436         {
1437             if (config.target_cpu >= TARGET_80286 &&
1438                 e2.Eoper == OPconst && !isbyte)
1439             {
1440                 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2);
1441                 if (I64 && sz == 8 && e2factor != cast(int)e2factor)
1442                     goto L1;
1443                 freenode(e2);
1444                 getlvalue(cdb,&cs,e1,0);     // get EA
1445                 regm_t idxregs = idxregm(&cs);
1446                 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs;
1447                 if (!retregs)
1448                     retregs = ALLREGS & ~idxregs;
1449                 allocreg(cdb,&retregs,&resreg,tyml);
1450                 cs.Iop = 0x69;                  // IMUL reg,EA,e2value
1451                 cs.IFL2 = FLconst;
1452                 cs.IEV2.Vint = cast(int)e2factor;
1453                 opr = resreg;
1454             }
1455             else if (!I16 && !isbyte)
1456             {
1457              L1:
1458                 retregs = *pretregs & (ALLREGS | mBP);
1459                 if (!retregs)
1460                     retregs = ALLREGS;
1461                 codelem(cdb,e2,&retregs,false); // load rvalue in reg
1462                 getlvalue(cdb,&cs,e1,retregs);  // get EA
1463                 getregs(cdb,retregs);           // destroy these regs
1464                 cs.Iop = 0x0FAF;                        // IMUL resreg,EA
1465                 resreg = findreg(retregs);
1466                 opr = resreg;
1467             }
1468             else
1469             {
1470                 retregs = mAX;
1471                 codelem(cdb,e2,&retregs,false);      // load rvalue in AX
1472                 getlvalue(cdb,&cs,e1,mAX);           // get EA
1473                 getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs
1474                 cs.Iop = 0xF7 ^ isbyte;                        // [I]MUL EA
1475             }
1476             code_newreg(&cs,opr);
1477             cdb.gen(&cs);
1478         }
1479         else // /= or %=
1480         {
1481             targ_size_t e2factor;
1482             int pow2;
1483 
1484             assert(!isbyte);                      // should never happen
1485             assert(I16 || sz != SHORTSIZE);
1486             if (config.flags4 & CFG4speed &&
1487                 e2.Eoper == OPconst && !uns &&
1488                 (sz == REGSIZE || (I64 && sz == 4)) &&
1489                 (pow2 = ispow2(e2factor = cast(targ_size_t)el_tolong(e2))) != -1 &&
1490                 e2factor == cast(int)e2factor &&
1491                 !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass)
1492                )
1493             {
1494                 // Signed divide or modulo by power of 2
1495                 getlvalue(cdb,&cs,e1,mAX | mDX);
1496                 cs.Iop = 0x8B;
1497                 code_newreg(&cs, AX);
1498                 cdb.gen(&cs);                       // MOV AX,EA
1499                 freenode(e2);
1500                 getregs(cdb,mAX | mDX);     // trash these regs
1501                 cdb.gen1(0x99);                     // CWD
1502                 code_orrex(cdb.last(), rex);
1503                 if (pow2 == 1)
1504                 {
1505                     if (op == OPdivass)
1506                     {
1507                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
1508                         cdb.gen2(0xD1,grex | modregrm(3,7,AX));        // SAR AX,1
1509                         resreg = AX;
1510                     }
1511                     else // OPmod
1512                     {
1513                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
1514                         cdb.genc2(0x81,grex | modregrm(3,4,AX),1);     // AND AX,1
1515                         cdb.gen2(0x03,grex | modregrm(3,DX,AX));       // ADD DX,AX
1516                         resreg = DX;
1517                     }
1518                 }
1519                 else
1520                 {
1521                     assert(pow2 < 32);
1522                     targ_ulong m = (1 << pow2) - 1;
1523                     if (op == OPdivass)
1524                     {
1525                         cdb.genc2(0x81,grex | modregrm(3,4,DX),m);     // AND DX,m
1526                         cdb.gen2(0x03,grex | modregrm(3,AX,DX));       // ADD AX,DX
1527                         // Be careful not to generate this for 8088
1528                         assert(config.target_cpu >= TARGET_80286);
1529                         cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2);  // SAR AX,pow2
1530                         resreg = AX;
1531                     }
1532                     else // OPmodass
1533                     {
1534                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
1535                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
1536                         cdb.genc2(0x81,grex | modregrm(3,4,AX),m);     // AND AX,m
1537                         cdb.gen2(0x33,grex | modregrm(3,AX,DX));       // XOR AX,DX
1538                         cdb.gen2(0x2B,grex | modregrm(3,AX,DX));       // SUB AX,DX
1539                         resreg = AX;
1540                     }
1541                 }
1542             }
1543             else
1544             {
1545                 retregs = ALLREGS & ~(mAX|mDX);         // DX gets sign extension
1546                 codelem(cdb,e2,&retregs,false); // load rvalue in retregs
1547                 reg = findreg(retregs);
1548                 getlvalue(cdb,&cs,e1,mAX | mDX | retregs);     // get EA
1549                 getregs(cdb,mAX | mDX);         // destroy these regs
1550                 cs.Irm |= modregrm(0,AX,0);
1551                 cs.Iop = 0x8B;
1552                 cdb.gen(&cs);                   // MOV AX,EA
1553                 if (uns)                        // if uint
1554                     movregconst(cdb,DX,0,0);      // CLR DX
1555                 else                            // else signed
1556                 {   cdb.gen1(0x99);             // CWD
1557                     code_orrex(cdb.last(),rex);
1558                 }
1559                 getregs(cdb,mDX | mAX); // DX and AX will be destroyed
1560                 genregs(cdb,0xF7,opr,reg);   // OPR reg
1561                 code_orrex(cdb.last(),rex);
1562             }
1563         }
1564         cs.Iop = 0x89 ^ isbyte;
1565         code_newreg(&cs,resreg);
1566         cdb.gen(&cs);                           // MOV EA,resreg
1567         if (e1.Ecount)                         // if we gen a CSE
1568                 cssave(e1,mask(resreg),!OTleaf(e1.Eoper));
1569         freenode(e1);
1570         fixresult(cdb,e,mask(resreg),pretregs);
1571         return;
1572     }
1573     else if (sz == 2 * REGSIZE)
1574     {
1575         lib = CLIB.lmul;
1576         if (op == OPdivass || op == OPmodass)
1577         {
1578             lib = (uns) ? CLIB.uldiv : CLIB.ldiv;
1579             if (op == OPmodass)
1580                 lib++;
1581         }
1582         retregs = mCX | mBX;
1583         codelem(cdb,e2,&retregs,false);
1584         getlvalue(cdb,&cs,e1,mDX|mAX | mCX|mBX);
1585         getregs(cdb,mDX | mAX);
1586         cs.Iop = 0x8B;
1587         cdb.gen(&cs);                   // MOV AX,EA
1588         getlvalue_msw(&cs);
1589         cs.Irm |= modregrm(0,DX,0);
1590         cdb.gen(&cs);                   // MOV DX,EA+2
1591         getlvalue_lsw(&cs);
1592         retregs = mDX | mAX;
1593         if (config.target_cpu >= TARGET_PentiumPro && op == OPmulass)
1594         {
1595             /*  IMUL    ECX,EAX
1596                 IMUL    EDX,EBX
1597                 ADD     ECX,EDX
1598                 MUL     EBX
1599                 ADD     EDX,ECX
1600              */
1601              getregs(cdb,mAX|mDX|mCX);
1602              cdb.gen2(0x0FAF,modregrm(3,CX,AX));
1603              cdb.gen2(0x0FAF,modregrm(3,DX,BX));
1604              cdb.gen2(0x03,modregrm(3,CX,DX));
1605              cdb.gen2(0xF7,modregrm(3,4,BX));
1606              cdb.gen2(0x03,modregrm(3,DX,CX));
1607         }
1608         else
1609         {
1610             if (op == OPmodass)
1611                 retregs = mBX | mCX;
1612             callclib(cdb,e,lib,&retregs,idxregm(&cs));
1613         }
1614         reg = findreglsw(retregs);
1615         cs.Iop = 0x89;
1616         NEWREG(cs.Irm,reg);
1617         cdb.gen(&cs);                   // MOV EA,lsreg
1618         reg = findregmsw(retregs);
1619         NEWREG(cs.Irm,reg);
1620         getlvalue_msw(&cs);
1621         cdb.gen(&cs);                   // MOV EA+2,msreg
1622         if (e1.Ecount)                 // if we gen a CSE
1623             cssave(e1,retregs,!OTleaf(e1.Eoper));
1624         freenode(e1);
1625         fixresult(cdb,e,retregs,pretregs);
1626         return;
1627     }
1628     else
1629     {
1630         assert(0);
1631     }
1632 }
1633 
1634 
1635 /********************************
1636  * Generate code for <<= and >>=
1637  */
1638 
1639 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1640 {
1641     code cs;
1642     regm_t retregs;
1643     uint op1,op2;
1644     reg_t reg;
1645 
1646     elem *e1 = e.EV.E1;
1647     elem *e2 = e.EV.E2;
1648 
1649     tym_t tyml = tybasic(e1.Ety);              // type of lvalue
1650     uint sz = _tysize[tyml];
1651     uint isbyte = tybyte(e.Ety) != 0;        // 1 for byte operations
1652     tym_t tym = tybasic(e.Ety);                // type of result
1653     OPER oper = e.Eoper;
1654     assert(tysize(e2.Ety) <= REGSIZE);
1655 
1656     uint rex = (I64 && sz == 8) ? REX_W : 0;
1657 
1658     // if our lvalue is a cse, make sure we evaluate for result in register
1659     if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,&reg))
1660         *pretregs |= ALLREGS;
1661 
1662     version (SCPP)
1663     {
1664         // Do this until the rest of the compiler does OPshr/OPashr correctly
1665         if (oper == OPshrass)
1666             oper = tyuns(tyml) ? OPshrass : OPashrass;
1667     }
1668 
1669     // Select opcodes. op2 is used for msw for long shifts.
1670 
1671     switch (oper)
1672     {
1673         case OPshlass:
1674             op1 = 4;                    // SHL
1675             op2 = 2;                    // RCL
1676             break;
1677 
1678         case OPshrass:
1679             op1 = 5;                    // SHR
1680             op2 = 3;                    // RCR
1681             break;
1682 
1683         case OPashrass:
1684             op1 = 7;                    // SAR
1685             op2 = 3;                    // RCR
1686             break;
1687 
1688         default:
1689             assert(0);
1690     }
1691 
1692 
1693     uint v = 0xD3;                  // for SHIFT xx,CL cases
1694     uint loopcnt = 1;
1695     uint conste2 = false;
1696     uint shiftcnt = 0;              // avoid "use before initialized" warnings
1697     if (e2.Eoper == OPconst)
1698     {
1699         conste2 = true;                 // e2 is a constant
1700         shiftcnt = e2.EV.Vint;         // byte ordering of host
1701         if (config.target_cpu >= TARGET_80286 &&
1702             sz <= REGSIZE &&
1703             shiftcnt != 1)
1704             v = 0xC1;                   // SHIFT xx,shiftcnt
1705         else if (shiftcnt <= 3)
1706         {
1707             loopcnt = shiftcnt;
1708             v = 0xD1;                   // SHIFT xx,1
1709         }
1710     }
1711 
1712     if (v == 0xD3)                        // if COUNT == CL
1713     {
1714         retregs = mCX;
1715         codelem(cdb,e2,&retregs,false);
1716     }
1717     else
1718         freenode(e2);
1719     getlvalue(cdb,&cs,e1,mCX);          // get lvalue, preserve CX
1720     modEA(cdb,&cs);             // check for modifying register
1721 
1722     if (*pretregs == 0 ||               // if don't return result
1723         (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) ||
1724         sz > REGSIZE
1725        )
1726     {
1727         retregs = 0;            // value not returned in a register
1728         cs.Iop = v ^ isbyte;
1729         while (loopcnt--)
1730         {
1731             NEWREG(cs.Irm,op1);           // make sure op1 is first
1732             if (sz <= REGSIZE)
1733             {
1734                 if (conste2)
1735                 {
1736                     cs.IFL2 = FLconst;
1737                     cs.IEV2.Vint = shiftcnt;
1738                 }
1739                 cdb.gen(&cs);             // SHIFT EA,[CL|1]
1740                 if (*pretregs & mPSW && !loopcnt && conste2)
1741                   code_orflag(cdb.last(),CFpsw);
1742             }
1743             else // TYlong
1744             {
1745                 cs.Iop = 0xD1;            // plain shift
1746                 code *ce = gennop(null);                  // ce: NOP
1747                 if (v == 0xD3)
1748                 {
1749                     getregs(cdb,mCX);
1750                     if (!conste2)
1751                     {
1752                         assert(loopcnt == 0);
1753                         genjmp(cdb,JCXZ,FLcode,cast(block *) ce);   // JCXZ ce
1754                     }
1755                 }
1756                 code *cg;
1757                 if (oper == OPshlass)
1758                 {
1759                     cdb.gen(&cs);               // cg: SHIFT EA
1760                     cg = cdb.last();
1761                     code_orflag(cg,CFpsw);
1762                     getlvalue_msw(&cs);
1763                     NEWREG(cs.Irm,op2);
1764                     cdb.gen(&cs);               // SHIFT EA
1765                     getlvalue_lsw(&cs);
1766                 }
1767                 else
1768                 {
1769                     getlvalue_msw(&cs);
1770                     cdb.gen(&cs);
1771                     cg = cdb.last();
1772                     code_orflag(cg,CFpsw);
1773                     NEWREG(cs.Irm,op2);
1774                     getlvalue_lsw(&cs);
1775                     cdb.gen(&cs);
1776                 }
1777                 if (v == 0xD3)                    // if building a loop
1778                 {
1779                     genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg
1780                     regimmed_set(CX,0);           // note that now CX == 0
1781                 }
1782                 cdb.append(ce);
1783             }
1784         }
1785 
1786         // If we want the result, we must load it from the EA
1787         // into a register.
1788 
1789         if (sz == 2 * REGSIZE && *pretregs)
1790         {
1791             retregs = *pretregs & (ALLREGS | mBP);
1792             if (retregs)
1793             {
1794                 retregs &= ~idxregm(&cs);
1795                 allocreg(cdb,&retregs,&reg,tym);
1796                 cs.Iop = 0x8B;
1797 
1798                 // be careful not to trash any index regs
1799                 // do MSW first (which can't be an index reg)
1800                 getlvalue_msw(&cs);
1801                 NEWREG(cs.Irm,reg);
1802                 cdb.gen(&cs);
1803                 getlvalue_lsw(&cs);
1804                 reg = findreglsw(retregs);
1805                 NEWREG(cs.Irm,reg);
1806                 cdb.gen(&cs);
1807                 if (*pretregs & mPSW)
1808                     tstresult(cdb,retregs,tyml,true);
1809             }
1810             else        // flags only
1811             {
1812                 retregs = ALLREGS & ~idxregm(&cs);
1813                 allocreg(cdb,&retregs,&reg,TYint);
1814                 cs.Iop = 0x8B;
1815                 NEWREG(cs.Irm,reg);
1816                 cdb.gen(&cs);           // MOV reg,EA
1817                 cs.Iop = 0x0B;          // OR reg,EA+2
1818                 cs.Iflags |= CFpsw;
1819                 getlvalue_msw(&cs);
1820                 cdb.gen(&cs);
1821             }
1822         }
1823     }
1824     else                                // else must evaluate in register
1825     {
1826         if (sz <= REGSIZE)
1827         {
1828             regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs);
1829             if (isbyte)
1830                 possregs &= BYTEREGS;
1831             retregs = *pretregs & possregs;
1832             if (retregs == 0)
1833                 retregs = possregs;
1834             allocreg(cdb,&retregs,&reg,tym);
1835             cs.Iop = 0x8B ^ isbyte;
1836             code_newreg(&cs, reg);
1837             if (isbyte && I64 && (reg >= 4))
1838                 cs.Irex |= REX;
1839             cdb.gen(&cs);                     // MOV reg,EA
1840             if (!I16)
1841             {
1842                 assert(!isbyte || (mask(reg) & BYTEREGS));
1843                 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt);
1844                 if (isbyte && I64 && (reg >= 4))
1845                     cdb.last().Irex |= REX;
1846                 code_orrex(cdb.last(), rex);
1847                 // We can do a 32 bit shift on a 16 bit operand if
1848                 // it's a left shift and we're not concerned about
1849                 // the flags. Remember that flags are not set if
1850                 // a shift of 0 occurs.
1851                 if (_tysize[tym] == SHORTSIZE &&
1852                     (oper == OPshrass || oper == OPashrass ||
1853                      (*pretregs & mPSW && conste2)))
1854                      cdb.last().Iflags |= CFopsize;            // 16 bit operand
1855             }
1856             else
1857             {
1858                 while (loopcnt--)
1859                 {   // Generate shift instructions.
1860                     cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt);
1861                 }
1862             }
1863             if (*pretregs & mPSW && conste2)
1864             {
1865                 assert(shiftcnt);
1866                 *pretregs &= ~mPSW;     // result is already in flags
1867                 code_orflag(cdb.last(),CFpsw);
1868             }
1869 
1870             cs.Iop = 0x89 ^ isbyte;
1871             if (isbyte && I64 && (reg >= 4))
1872                 cs.Irex |= REX;
1873             cdb.gen(&cs);                                // MOV EA,reg
1874 
1875             // If result is not in correct register
1876             fixresult(cdb,e,retregs,pretregs);
1877             retregs = *pretregs;
1878         }
1879         else
1880             assert(0);
1881     }
1882     if (e1.Ecount && !(retregs & regcon.mvar))   // if lvalue is a CSE
1883         cssave(e1,retregs,!OTleaf(e1.Eoper));
1884     freenode(e1);
1885     *pretregs = retregs;
1886 }
1887 
1888 
1889 /**********************************
1890  * Generate code for compares.
1891  * Handles lt,gt,le,ge,eqeq,ne for all data types.
1892  */
1893 
1894 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1895 {
1896     regm_t retregs,rretregs;
1897     reg_t reg,rreg;
1898     int fl;
1899 
1900     //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs));
1901     // Collect extra parameter. This is pretty ugly...
1902     int flag = cdcmp_flag;
1903     cdcmp_flag = 0;
1904 
1905     elem *e1 = e.EV.E1;
1906     elem *e2 = e.EV.E2;
1907     if (*pretregs == 0)                 // if don't want result
1908     {
1909         codelem(cdb,e1,pretregs,false);
1910         *pretregs = 0;                  // in case e1 changed it
1911         codelem(cdb,e2,pretregs,false);
1912         return;
1913     }
1914 
1915     uint jop = jmpopcode(e);        // must be computed before
1916                                         // leaves are free'd
1917     uint reverse = 0;
1918 
1919     OPER op = e.Eoper;
1920     assert(OTrel(op));
1921     bool eqorne = (op == OPeqeq) || (op == OPne);
1922 
1923     tym_t tym = tybasic(e1.Ety);
1924     uint sz = _tysize[tym];
1925     uint isbyte = sz == 1;
1926 
1927     uint rex = (I64 && sz == 8) ? REX_W : 0;
1928     uint grex = rex << 16;          // 64 bit operands
1929 
1930     code cs;
1931     code *ce;
1932     if (tyfloating(tym))                  // if floating operation
1933     {
1934         if (config.fpxmmregs)
1935         {
1936             retregs = mPSW;
1937             if (tyxmmreg(tym))
1938                 orthxmm(cdb,e,&retregs);
1939             else
1940                 orth87(cdb,e,&retregs);
1941         }
1942         else if (config.inline8087)
1943         {   retregs = mPSW;
1944             orth87(cdb,e,&retregs);
1945         }
1946         else
1947         {
1948             static if (TARGET_WINDOS)
1949             {
1950                 int clib;
1951 
1952                 retregs = 0;                /* skip result for now          */
1953                 if (iffalse(e2))            /* second operand is constant 0 */
1954                 {
1955                     assert(!eqorne);        /* should be OPbool or OPnot    */
1956                     if (tym == TYfloat)
1957                     {
1958                         retregs = FLOATREGS;
1959                         clib = CLIB.ftst0;
1960                     }
1961                     else
1962                     {
1963                         retregs = DOUBLEREGS;
1964                         clib = CLIB.dtst0;
1965                     }
1966                     if (rel_exception(op))
1967                         clib += CLIB.dtst0exc - CLIB.dtst0;
1968                     codelem(cdb,e1,&retregs,false);
1969                     retregs = 0;
1970                     callclib(cdb,e,clib,&retregs,0);
1971                     freenode(e2);
1972                 }
1973                 else
1974                 {
1975                     clib = CLIB.dcmp;
1976                     if (rel_exception(op))
1977                         clib += CLIB.dcmpexc - CLIB.dcmp;
1978                     opdouble(cdb,e,&retregs,clib);
1979                 }
1980             }
1981             else
1982             {
1983                 assert(0);
1984             }
1985         }
1986         goto L3;
1987     }
1988 
1989     /* If it's a signed comparison of longs, we have to call a library    */
1990     /* routine, because we don't know the target of the signed branch     */
1991     /* (have to set up flags so that jmpopcode() will do it right)        */
1992     if (!eqorne &&
1993         (I16 && tym == TYlong  && tybasic(e2.Ety) == TYlong ||
1994          I32 && tym == TYllong && tybasic(e2.Ety) == TYllong)
1995        )
1996     {
1997         assert(jop != JC && jop != JNC);
1998         retregs = mDX | mAX;
1999         codelem(cdb,e1,&retregs,false);
2000         retregs = mCX | mBX;
2001         scodelem(cdb,e2,&retregs,mDX | mAX,false);
2002 
2003         if (I16)
2004         {
2005             retregs = 0;
2006             callclib(cdb,e,CLIB.lcmp,&retregs,0);    // gross, but it works
2007         }
2008         else
2009         {
2010             /* Generate:
2011              *      CMP  EDX,ECX
2012              *      JNE  C1
2013              *      XOR  EDX,EDX
2014              *      CMP  EAX,EBX
2015              *      JZ   C1
2016              *      JA   C3
2017              *      DEC  EDX
2018              *      JMP  C1
2019              * C3:  INC  EDX
2020              * C1:
2021              */
2022              getregs(cdb,mDX);
2023              genregs(cdb,0x39,CX,DX);             // CMP EDX,ECX
2024              code *c1 = gennop(null);
2025              genjmp(cdb,JNE,FLcode,cast(block *)c1);  // JNE C1
2026              movregconst(cdb,DX,0,0);             // XOR EDX,EDX
2027              genregs(cdb,0x39,BX,AX);             // CMP EAX,EBX
2028              genjmp(cdb,JE,FLcode,cast(block *)c1);   // JZ C1
2029              code *c3 = gen1(null,0x40 + DX);                  // INC EDX
2030              genjmp(cdb,JA,FLcode,cast(block *)c3);   // JA C3
2031              cdb.gen1(0x48 + DX);                              // DEC EDX
2032              genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1
2033              cdb.append(c3);
2034              cdb.append(c1);
2035              getregs(cdb,mDX);
2036              retregs = mPSW;
2037         }
2038         goto L3;
2039     }
2040 
2041     /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC
2042      * (This is already reflected in the jop)
2043      */
2044     if ((jop == JC || jop == JNC) &&
2045         (op == OPgt || op == OPle) &&
2046         (tyuns(tym) || tyuns(e2.Ety))
2047        )
2048     {   // jmpopcode() sez comparison should be reversed
2049         assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst);
2050         reverse ^= 2;
2051     }
2052 
2053     /* See if we should swap operands     */
2054     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
2055     {
2056         e1 = e.EV.E2;
2057         e2 = e.EV.E1;
2058         reverse ^= 2;
2059     }
2060 
2061     retregs = allregs;
2062     if (isbyte)
2063         retregs = BYTEREGS;
2064 
2065     ce = null;
2066     cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
2067     cs.Irex = cast(ubyte)rex;
2068     if (sz > REGSIZE)
2069         ce = gennop(ce);
2070 
2071     switch (e2.Eoper)
2072     {
2073         default:
2074         L2:
2075             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
2076             rretregs = allregs & ~retregs;
2077             if (isbyte)
2078                 rretregs &= BYTEREGS;
2079             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
2080             if (sz <= REGSIZE)                              // CMP reg,rreg
2081             {
2082                 reg = findreg(retregs);             // get reg that e1 is in
2083                 rreg = findreg(rretregs);
2084                 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg);
2085                 code_orrex(cdb.last(), rex);
2086                 if (!I16 && sz == SHORTSIZE)
2087                     cdb.last().Iflags |= CFopsize;          // compare only 16 bits
2088                 if (I64 && isbyte && (reg >= 4 || rreg >= 4))
2089                     cdb.last().Irex |= REX;                 // address byte registers
2090             }
2091             else
2092             {
2093                 assert(sz <= 2 * REGSIZE);
2094 
2095                 // Compare MSW, if they're equal then compare the LSW
2096                 reg = findregmsw(retregs);
2097                 rreg = findregmsw(rretregs);
2098                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2099                 if (I32 && sz == 6)
2100                     cdb.last().Iflags |= CFopsize;         // seg is only 16 bits
2101                 else if (I64)
2102                     code_orrex(cdb.last(), REX_W);
2103                 genjmp(cdb,JNE,FLcode,cast(block *) ce);   // JNE nop
2104 
2105                 reg = findreglsw(retregs);
2106                 rreg = findreglsw(rretregs);
2107                 genregs(cdb,0x3B ^ reverse,reg,rreg);  // CMP reg,rreg
2108                 if (I64)
2109                     code_orrex(cdb.last(), REX_W);
2110             }
2111             break;
2112 
2113         case OPrelconst:
2114             if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64))
2115                 goto L2;
2116             fl = el_fl(e2);
2117             switch (fl)
2118             {
2119                 case FLfunc:
2120                     fl = FLextern;          // so it won't be self-relative
2121                     break;
2122 
2123                 case FLdata:
2124                 case FLudata:
2125                 case FLextern:
2126                     if (sz > REGSIZE)       // compare against DS, not DGROUP
2127                         goto L2;
2128                     break;
2129 
2130                 case FLfardata:
2131                     break;
2132 
2133                 default:
2134                     goto L2;
2135             }
2136             cs.IFL2 = cast(ubyte)fl;
2137             cs.IEV2.Vsym = e2.EV.Vsym;
2138             if (sz > REGSIZE)
2139             {
2140                 cs.Iflags |= CFseg;
2141                 cs.IEV2.Voffset = 0;
2142             }
2143             else
2144             {
2145                 cs.Iflags |= CFoff;
2146                 cs.IEV2.Voffset = e2.EV.Voffset;
2147             }
2148             goto L4;
2149 
2150         case OPconst:
2151             // If compare against 0
2152             if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) &&
2153                 isregvar(e1,&retregs,&reg)
2154                )
2155             {   // Just do a TEST instruction
2156                 genregs(cdb,0x85 ^ isbyte,reg,reg);      // TEST reg,reg
2157                 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw;
2158                 code_orrex(cdb.last(), rex);
2159                 if (I64 && isbyte && reg >= 4)
2160                     cdb.last().Irex |= REX;                 // address byte registers
2161                 retregs = mPSW;
2162                 break;
2163             }
2164 
2165             if (!tyuns(tym) && !tyuns(e2.Ety) &&
2166                 !boolres(e2) && !(*pretregs & mPSW) &&
2167                 (sz == REGSIZE || (I64 && sz == 4)) &&
2168                 (!I16 || op == OPlt || op == OPge))
2169             {
2170                 assert(*pretregs & (allregs));
2171                 codelem(cdb,e1,pretregs,false);
2172                 reg = findreg(*pretregs);
2173                 getregs(cdb,mask(reg));
2174                 switch (op)
2175                 {
2176                     case OPle:
2177                         cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1);   // ADD reg,-1
2178                         code_orflag(cdb.last(), CFpsw);
2179                         cdb.genc2(0x81,grex | modregrmx(3,2,reg),0);          // ADC reg,0
2180                         goto oplt;
2181 
2182                     case OPgt:
2183                         cdb.gen2(0xF7,grex | modregrmx(3,3,reg));         // NEG reg
2184                             /* Flips the sign bit unless the value is 0 or int.min.
2185                             Also sets the carry bit when the value is not 0. */
2186                         code_orflag(cdb.last(), CFpsw);
2187                         cdb.genc2(0x81,grex | modregrmx(3,3,reg),0);  // SBB reg,0
2188                             /* Subtracts the carry bit. This turns int.min into
2189                             int.max, flipping the sign bit.
2190                             For other negative and positive values, subtracting 1
2191                             doesn't affect the sign bit.
2192                             For 0, the carry bit is not set, so this does nothing
2193                             and the sign bit is not affected. */
2194                         goto oplt;
2195 
2196                     case OPlt:
2197                     oplt:
2198                         // Get the sign bit, i.e. 1 if the value is negative.
2199                         if (!I16)
2200                             cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31
2201                         else
2202                         {   /* 8088-286 do not have a barrel shifter, so use this
2203                                faster sequence
2204                              */
2205                             genregs(cdb,0xD1,0,reg);   // ROL reg,1
2206                             reg_t regi;
2207                             if (reghasvalue(allregs,1,&regi))
2208                                 genregs(cdb,0x23,reg,regi);  // AND reg,regi
2209                             else
2210                                 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1
2211                         }
2212                         break;
2213 
2214                     case OPge:
2215                         genregs(cdb,0xD1,4,reg);        // SHL reg,1
2216                         code_orrex(cdb.last(),rex);
2217                         code_orflag(cdb.last(), CFpsw);
2218                         genregs(cdb,0x19,reg,reg);      // SBB reg,reg
2219                         code_orrex(cdb.last(),rex);
2220                         if (I64)
2221                         {
2222                             cdb.gen2(0xFF,modregrmx(3,0,reg));       // INC reg
2223                             code_orrex(cdb.last(), rex);
2224                         }
2225                         else
2226                             cdb.gen1(0x40 + reg);                    // INC reg
2227                         break;
2228 
2229                     default:
2230                         assert(0);
2231                 }
2232                 freenode(e2);
2233                 goto ret;
2234             }
2235 
2236             cs.IFL2 = FLconst;
2237             if (sz == 16)
2238                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.msw;
2239             else if (sz > REGSIZE)
2240                 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);
2241             else
2242                 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong;
2243 
2244             // The cmp immediate relies on sign extension of the 32 bit immediate value
2245             if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint)
2246                 goto L2;
2247           L4:
2248             cs.Iop = 0x81 ^ isbyte;
2249 
2250             /* if ((e1 is data or a '*' reference) and it's not a
2251              * common subexpression
2252              */
2253 
2254             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
2255                  e1.Eoper == OPind) &&
2256                 !evalinregister(e1))
2257             {
2258                 getlvalue(cdb,&cs,e1,RMload);
2259                 freenode(e1);
2260                 if (evalinregister(e2))
2261                 {
2262                     retregs = idxregm(&cs);
2263                     if ((cs.Iflags & CFSEG) == CFes)
2264                         retregs |= mES;             // take no chances
2265                     rretregs = allregs & ~retregs;
2266                     if (isbyte)
2267                         rretregs &= BYTEREGS;
2268                     scodelem(cdb,e2,&rretregs,retregs,true);
2269                     cs.Iop = 0x39 ^ isbyte ^ reverse;
2270                     if (sz > REGSIZE)
2271                     {
2272                         rreg = findregmsw(rretregs);
2273                         cs.Irm |= modregrm(0,rreg,0);
2274                         getlvalue_msw(&cs);
2275                         cdb.gen(&cs);              // CMP EA+2,rreg
2276                         if (I32 && sz == 6)
2277                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2278                         if (I64 && isbyte && rreg >= 4)
2279                             cdb.last().Irex |= REX;
2280                         genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop
2281                         rreg = findreglsw(rretregs);
2282                         NEWREG(cs.Irm,rreg);
2283                         getlvalue_lsw(&cs);
2284                     }
2285                     else
2286                     {
2287                         rreg = findreg(rretregs);
2288                         code_newreg(&cs, rreg);
2289                         if (I64 && isbyte && rreg >= 4)
2290                             cs.Irex |= REX;
2291                     }
2292                 }
2293                 else
2294                 {
2295                     cs.Irm |= modregrm(0,7,0);
2296                     if (sz > REGSIZE)
2297                     {
2298                         if (sz == 6)
2299                             assert(0);
2300                         if (e2.Eoper == OPrelconst)
2301                         {   cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg;
2302                             cs.IEV2.Voffset = 0;
2303                         }
2304                         getlvalue_msw(&cs);
2305                         cdb.gen(&cs);              // CMP EA+2,const
2306                         if (!I16 && sz == 6)
2307                             cdb.last().Iflags |= CFopsize;      // seg is only 16 bits
2308                         genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop
2309                         if (e2.Eoper == OPconst)
2310                             cs.IEV2.Vint = cast(int)e2.EV.Vllong;
2311                         else if (e2.Eoper == OPrelconst)
2312                         {   // Turn off CFseg, on CFoff
2313                             cs.Iflags ^= CFseg | CFoff;
2314                             cs.IEV2.Voffset = e2.EV.Voffset;
2315                         }
2316                         else
2317                             assert(0);
2318                         getlvalue_lsw(&cs);
2319                     }
2320                     freenode(e2);
2321                 }
2322                 cdb.gen(&cs);
2323                 break;
2324             }
2325 
2326             if (evalinregister(e2) && !OTassign(e1.Eoper) &&
2327                 !isregvar(e1,null,null))
2328             {
2329                 regm_t m;
2330 
2331                 m = allregs & ~regcon.mvar;
2332                 if (isbyte)
2333                     m &= BYTEREGS;
2334                 if (m & (m - 1))    // if more than one free register
2335                     goto L2;
2336             }
2337             if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) &&
2338                 !boolres(e2) && !evalinregister(e1))
2339             {
2340                 retregs = mPSW;
2341                 scodelem(cdb,e1,&retregs,0,false);
2342                 freenode(e2);
2343                 break;
2344             }
2345             if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW)
2346             {
2347                 retregs |= mPSW;
2348                 scodelem(cdb,e1,&retregs,0,false);
2349                 freenode(e2);
2350                 break;
2351             }
2352             scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
2353             if (sz == 1)
2354             {
2355                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2356                 cs.Irm = modregrm(3,7,reg & 7);
2357                 if (reg & 8)
2358                     cs.Irex |= REX_B;
2359                 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg)
2360                 {   assert(reg < 4);
2361                     cs.Irm |= 4;                    // use upper register half
2362                 }
2363                 if (I64 && reg >= 4)
2364                     cs.Irex |= REX;                 // address byte registers
2365             }
2366             else if (sz <= REGSIZE)
2367             {   // CMP reg,const
2368                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2369                 rretregs = allregs & ~retregs;
2370                 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg))
2371                 {
2372                     genregs(cdb,0x3B,reg,rreg);
2373                     code_orrex(cdb.last(), rex);
2374                     if (!I16)
2375                         cdb.last().Iflags |= cs.Iflags & CFopsize;
2376                     freenode(e2);
2377                     break;
2378                 }
2379                 cs.Irm = modregrm(3,7,reg & 7);
2380                 if (reg & 8)
2381                     cs.Irex |= REX_B;
2382             }
2383             else if (sz <= 2 * REGSIZE)
2384             {
2385                 reg = findregmsw(retregs);          // get reg that e1 is in
2386                 cs.Irm = modregrm(3,7,reg);
2387                 cdb.gen(&cs);                       // CMP reg,MSW
2388                 if (I32 && sz == 6)
2389                     cdb.last().Iflags |= CFopsize;  // seg is only 16 bits
2390                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
2391 
2392                 reg = findreglsw(retregs);
2393                 cs.Irm = modregrm(3,7,reg);
2394                 if (e2.Eoper == OPconst)
2395                     cs.IEV2.Vint = e2.EV.Vlong;
2396                 else if (e2.Eoper == OPrelconst)
2397                 {   // Turn off CFseg, on CFoff
2398                     cs.Iflags ^= CFseg | CFoff;
2399                     cs.IEV2.Voffset = e2.EV.Voffset;
2400                 }
2401                 else
2402                     assert(0);
2403             }
2404             else
2405                 assert(0);
2406             cdb.gen(&cs);                         // CMP sucreg,LSW
2407             freenode(e2);
2408             break;
2409 
2410         case OPind:
2411             if (e2.Ecount)
2412                 goto L2;
2413             goto L5;
2414 
2415         case OPvar:
2416             static if (TARGET_OSX)
2417             {
2418                 if (movOnly(e2))
2419                     goto L2;
2420             }
2421             if ((e1.Eoper == OPvar &&
2422                  isregvar(e2,&rretregs,&reg) &&
2423                  sz <= REGSIZE
2424                 ) ||
2425                 (e1.Eoper == OPind &&
2426                  isregvar(e2,&rretregs,&reg) &&
2427                  !evalinregister(e1) &&
2428                  sz <= REGSIZE
2429                 )
2430                )
2431             {
2432                 // CMP EA,e2
2433                 getlvalue(cdb,&cs,e1,RMload);
2434                 freenode(e1);
2435                 cs.Iop = 0x39 ^ isbyte ^ reverse;
2436                 code_newreg(&cs,reg);
2437                 if (I64 && isbyte && reg >= 4)
2438                     cs.Irex |= REX;                 // address byte registers
2439                 cdb.gen(&cs);
2440                 freenode(e2);
2441                 break;
2442             }
2443           L5:
2444             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
2445             if (sz <= REGSIZE)                      // CMP reg,EA
2446             {
2447                 reg = findreg(retregs & allregs);   // get reg that e1 is in
2448                 uint opsize = cs.Iflags & CFopsize;
2449                 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0);
2450                 code_orflag(cdb.last(),opsize);
2451             }
2452             else if (sz <= 2 * REGSIZE)
2453             {
2454                 reg = findregmsw(retregs);   // get reg that e1 is in
2455                 // CMP reg,EA
2456                 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0);
2457                 if (I32 && sz == 6)
2458                     cdb.last().Iflags |= CFopsize;        // seg is only 16 bits
2459                 genjmp(cdb,JNE,FLcode, cast(block *) ce);  // JNE ce
2460                 reg = findreglsw(retregs);
2461                 if (e2.Eoper == OPind)
2462                 {
2463                     NEWREG(cs.Irm,reg);
2464                     getlvalue_lsw(&cs);
2465                     cdb.gen(&cs);
2466                 }
2467                 else
2468                     loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0);
2469             }
2470             else
2471                 assert(0);
2472             freenode(e2);
2473             break;
2474     }
2475     cdb.append(ce);
2476 
2477 L3:
2478     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
2479     {
2480         if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00))
2481         {
2482             regm_t resregs = retregs;
2483             if (!I64)
2484             {
2485                 resregs &= BYTEREGS;
2486                 if (!resregs)
2487                     resregs = BYTEREGS;
2488             }
2489             allocreg(cdb,&resregs,&reg,TYint);
2490             cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg
2491             if (I64 && reg >= 4)
2492                 code_orrex(cdb.last(),REX);
2493             if (tysize(e.Ety) > 1)
2494             {
2495                 genregs(cdb,0x0FB6,reg,reg);       // MOVZX reg,reg
2496                 if (I64 && sz == 8)
2497                     code_orrex(cdb.last(),REX_W);
2498                 if (I64 && reg >= 4)
2499                     code_orrex(cdb.last(),REX);
2500             }
2501             *pretregs &= ~mPSW;
2502             fixresult(cdb,e,resregs,pretregs);
2503         }
2504         else
2505         {
2506             code *nop = null;
2507             regm_t save = regcon.immed.mval;
2508             allocreg(cdb,&retregs,&reg,TYint);
2509             regcon.immed.mval = save;
2510             if ((*pretregs & mPSW) == 0 &&
2511                 (jop == JC || jop == JNC))
2512             {
2513                 getregs(cdb,retregs);
2514                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
2515                 if (rex)
2516                     code_orrex(cdb.last(), rex);
2517                 if (flag)
2518                 { }                                         // cdcond() will handle it
2519                 else if (jop == JNC)
2520                 {
2521                     if (I64)
2522                     {
2523                         cdb.gen2(0xFF,modregrmx(3,0,reg));  // INC reg
2524                         code_orrex(cdb.last(), rex);
2525                     }
2526                     else
2527                         cdb.gen1(0x40 + reg);               // INC reg
2528                 }
2529                 else
2530                 {
2531                     cdb.gen2(0xF7,modregrmx(3,3,reg));      // NEG reg
2532                     code_orrex(cdb.last(), rex);
2533                 }
2534             }
2535             else if (I64 && sz == 8)
2536             {
2537                 assert(!flag);
2538                 movregconst(cdb,reg,1,64|8);   // MOV reg,1
2539                 nop = gennop(nop);
2540                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
2541                                                             // MOV reg,0
2542                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64);
2543                 regcon.immed.mval &= ~mask(reg);
2544             }
2545             else
2546             {
2547                 assert(!flag);
2548                 movregconst(cdb,reg,1,8);      // MOV reg,1
2549                 nop = gennop(nop);
2550                 genjmp(cdb,jop,FLcode,cast(block *) nop);  // Jtrue nop
2551                                                             // MOV reg,0
2552                 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0);
2553                 regcon.immed.mval &= ~mask(reg);
2554             }
2555             *pretregs = retregs;
2556             cdb.append(nop);
2557         }
2558     }
2559 ret:
2560     { }
2561 }
2562 
2563 
2564 /**********************************
2565  * Generate code for signed compare of longs.
2566  * Input:
2567  *      targ    block* or code*
2568  */
2569 
2570 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ)
2571 {
2572                                          // <=  >   <   >=
2573     static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ];
2574     static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ];
2575 
2576     //printf("longcmp(e = %p)\n", e);
2577     elem *e1 = e.EV.E1;
2578     elem *e2 = e.EV.E2;
2579     OPER op = e.Eoper;
2580 
2581     // See if we should swap operands
2582     if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2))
2583     {
2584         e1 = e.EV.E2;
2585         e2 = e.EV.E1;
2586         op = swaprel(op);
2587     }
2588 
2589     code cs;
2590     cs.Iflags = 0;
2591     cs.Irex = 0;
2592 
2593     code *ce = gennop(null);
2594     regm_t retregs = ALLREGS;
2595     regm_t rretregs;
2596     reg_t reg,rreg;
2597 
2598     uint jop = jopmsw[op - OPle];
2599     if (!(jcond & 1)) jop ^= (JL ^ JG);                   // toggle jump condition
2600     CodeBuilder cdbjmp;
2601     cdbjmp.ctor();
2602     genjmp(cdbjmp,jop,fltarg, cast(block *) targ);             // Jx targ
2603     genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce);   // Jy nop
2604 
2605     switch (e2.Eoper)
2606     {
2607         default:
2608         L2:
2609             scodelem(cdb,e1,&retregs,0,true);      // compute left leaf
2610             rretregs = ALLREGS & ~retregs;
2611             scodelem(cdb,e2,&rretregs,retregs,true);     // get right leaf
2612             cse_flush(cdb,1);
2613             // Compare MSW, if they're equal then compare the LSW
2614             reg = findregmsw(retregs);
2615             rreg = findregmsw(rretregs);
2616             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
2617             cdb.append(cdbjmp);
2618 
2619             reg = findreglsw(retregs);
2620             rreg = findreglsw(rretregs);
2621             genregs(cdb,0x3B,reg,rreg);        // CMP reg,rreg
2622             break;
2623 
2624         case OPconst:
2625             cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong);            // MSW first
2626             cs.IFL2 = FLconst;
2627             cs.Iop = 0x81;
2628 
2629             /* if ((e1 is data or a '*' reference) and it's not a
2630              * common subexpression
2631              */
2632 
2633             if ((e1.Eoper == OPvar && datafl[el_fl(e1)] ||
2634                  e1.Eoper == OPind) &&
2635                 !evalinregister(e1))
2636             {
2637                 getlvalue(cdb,&cs,e1,0);
2638                 freenode(e1);
2639                 if (evalinregister(e2))
2640                 {
2641                     retregs = idxregm(&cs);
2642                     if ((cs.Iflags & CFSEG) == CFes)
2643                             retregs |= mES;         // take no chances
2644                     rretregs = ALLREGS & ~retregs;
2645                     scodelem(cdb,e2,&rretregs,retregs,true);
2646                     cse_flush(cdb,1);
2647                     rreg = findregmsw(rretregs);
2648                     cs.Iop = 0x39;
2649                     cs.Irm |= modregrm(0,rreg,0);
2650                     getlvalue_msw(&cs);
2651                     cdb.gen(&cs);           // CMP EA+2,rreg
2652                     cdb.append(cdbjmp);
2653                     rreg = findreglsw(rretregs);
2654                     NEWREG(cs.Irm,rreg);
2655                 }
2656                 else
2657                 {
2658                     cse_flush(cdb,1);
2659                     cs.Irm |= modregrm(0,7,0);
2660                     getlvalue_msw(&cs);
2661                     cdb.gen(&cs);           // CMP EA+2,const
2662                     cdb.append(cdbjmp);
2663                     cs.IEV2.Vint = e2.EV.Vlong;
2664                     freenode(e2);
2665                 }
2666                 getlvalue_lsw(&cs);
2667                 cdb.gen(&cs);                   // CMP EA,rreg/const
2668                 break;
2669             }
2670             if (evalinregister(e2))
2671                 goto L2;
2672 
2673             scodelem(cdb,e1,&retregs,0,true);    // compute left leaf
2674             cse_flush(cdb,1);
2675             reg = findregmsw(retregs);              // get reg that e1 is in
2676             cs.Irm = modregrm(3,7,reg);
2677 
2678             cdb.gen(&cs);                           // CMP reg,MSW
2679             cdb.append(cdbjmp);
2680             reg = findreglsw(retregs);
2681             cs.Irm = modregrm(3,7,reg);
2682             cs.IEV2.Vint = e2.EV.Vlong;
2683             cdb.gen(&cs);                           // CMP sucreg,LSW
2684             freenode(e2);
2685             break;
2686 
2687         case OPvar:
2688             if (!e1.Ecount && e1.Eoper == OPs32_64)
2689             {
2690                 reg_t msreg;
2691 
2692                 retregs = allregs;
2693                 scodelem(cdb,e1.EV.E1,&retregs,0,true);
2694                 freenode(e1);
2695                 reg = findreg(retregs);
2696                 retregs = allregs & ~retregs;
2697                 allocreg(cdb,&retregs,&msreg,TYint);
2698                 genmovreg(cdb,msreg,reg);                  // MOV msreg,reg
2699                 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
2700                 cse_flush(cdb,1);
2701                 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0);
2702                 cdb.append(cdbjmp);
2703                 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0);
2704                 freenode(e2);
2705             }
2706             else
2707             {
2708                 scodelem(cdb,e1,&retregs,0,true);  // compute left leaf
2709                 cse_flush(cdb,1);
2710                 reg = findregmsw(retregs);   // get reg that e1 is in
2711                 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0);
2712                 cdb.append(cdbjmp);
2713                 reg = findreglsw(retregs);
2714                 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0);
2715                 freenode(e2);
2716             }
2717             break;
2718     }
2719 
2720     jop = joplsw[op - OPle];
2721     if (!(jcond & 1)) jop ^= 1;                           // toggle jump condition
2722     genjmp(cdb,jop,fltarg,cast(block *) targ);   // Jcond targ
2723 
2724     cdb.append(ce);
2725     freenode(e);
2726 }
2727 
2728 /*****************************
2729  * Do conversions.
2730  * Depends on OPd_s32 and CLIB.dbllng being in sequence.
2731  */
2732 
2733 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
2734 {
2735     //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs));
2736     //elem_print(e);
2737 
2738     static immutable ubyte[2][16] clib =
2739     [
2740         [ OPd_s32,        CLIB.dbllng   ],
2741         [ OPs32_d,        CLIB.lngdbl   ],
2742         [ OPd_s16,        CLIB.dblint   ],
2743         [ OPs16_d,        CLIB.intdbl   ],
2744         [ OPd_u16,        CLIB.dbluns   ],
2745         [ OPu16_d,        CLIB.unsdbl   ],
2746         [ OPd_u32,        CLIB.dblulng  ],
2747         [ OPu32_d,        CLIB.ulngdbl  ],
2748         [ OPd_s64,        CLIB.dblllng  ],
2749         [ OPs64_d,        CLIB.llngdbl  ],
2750         [ OPd_u64,        CLIB.dblullng ],
2751         [ OPu64_d,        CLIB.ullngdbl ],
2752         [ OPd_f,          CLIB.dblflt   ],
2753         [ OPf_d,          CLIB.fltdbl   ],
2754         [ OPvp_fp,        CLIB.vptrfptr ],
2755         [ OPcvp_fp,       CLIB.cvptrfptr]
2756     ];
2757 
2758     if (!*pretregs)
2759     {
2760         codelem(cdb,e.EV.E1,pretregs,false);
2761         return;
2762     }
2763 
2764     regm_t retregs;
2765     if (config.inline8087)
2766     {
2767         switch (e.Eoper)
2768         {
2769             case OPld_d:
2770             case OPd_ld:
2771             {
2772                 if (tycomplex(e.EV.E1.Ety))
2773                 {
2774             Lcomplex:
2775                     regm_t retregsx = mST01 | (*pretregs & mPSW);
2776                     codelem(cdb,e.EV.E1, &retregsx, false);
2777                     fixresult_complex87(cdb, e, retregsx, pretregs);
2778                     return;
2779                 }
2780                 regm_t retregsx = mST0 | (*pretregs & mPSW);
2781                 codelem(cdb,e.EV.E1, &retregsx, false);
2782                 fixresult87(cdb, e, retregsx, pretregs);
2783                 return;
2784             }
2785 
2786             case OPf_d:
2787             case OPd_f:
2788                 if (tycomplex(e.EV.E1.Ety))
2789                     goto Lcomplex;
2790                 if (config.fpxmmregs && *pretregs & XMMREGS)
2791                 {
2792                     xmmcnvt(cdb, e, pretregs);
2793                     return;
2794                 }
2795 
2796                 /* if won't do us much good to transfer back and        */
2797                 /* forth between 8088 registers and 8087 registers      */
2798                 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs))
2799                 {
2800                     retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety);
2801                     if (retregs & (mXMM1 | mXMM0 |mST01 | mST0))       // if return in ST0
2802                     {
2803                         codelem(cdb,e.EV.E1,pretregs,false);
2804                         if (*pretregs & mST0)
2805                             note87(e, 0, 0);
2806                         return;
2807                     }
2808                     else
2809                         break;
2810                 }
2811                 goto Lload87;
2812 
2813             case OPs64_d:
2814                 if (!I64)
2815                     goto Lload87;
2816                 goto case OPs32_d;
2817 
2818             case OPs32_d:
2819                 if (config.fpxmmregs && *pretregs & XMMREGS)
2820                 {
2821                     xmmcnvt(cdb, e, pretregs);
2822                     return;
2823                 }
2824                 goto Lload87;
2825 
2826             case OPs16_d:
2827             case OPu16_d:
2828             Lload87:
2829                 load87(cdb,e,0,pretregs,null,-1);
2830                 return;
2831 
2832             case OPu32_d:
2833                 if (I64 && config.fpxmmregs && *pretregs & XMMREGS)
2834                 {
2835                     xmmcnvt(cdb,e,pretregs);
2836                     return;
2837                 }
2838                 else if (!I16)
2839                 {
2840                     regm_t retregsx = ALLREGS;
2841                     codelem(cdb,e.EV.E1, &retregsx, false);
2842                     reg_t reg = findreg(retregsx);
2843                     cdb.genfltreg(0x89, reg, 0);
2844                     regwithvalue(cdb,ALLREGS,0,&reg,0);
2845                     cdb.genfltreg(0x89, reg, 4);
2846 
2847                     push87(cdb);
2848                     cdb.genfltreg(0xDF,5,0);     // FILD m64int
2849 
2850                     regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/;
2851                     fixresult87(cdb, e, retregsy, pretregs);
2852                     return;
2853                 }
2854                 break;
2855 
2856             case OPd_s64:
2857                 if (!I64)
2858                     goto Lcnvt87;
2859                 goto case OPd_s32;
2860 
2861             case OPd_s32:
2862                 if (config.fpxmmregs)
2863                 {
2864                     xmmcnvt(cdb,e,pretregs);
2865                     return;
2866                 }
2867                 goto Lcnvt87;
2868 
2869             case OPd_s16:
2870             case OPd_u16:
2871             Lcnvt87:
2872                 cnvt87(cdb,e,pretregs);
2873                 return;
2874 
2875             case OPd_u32:               // use subroutine, not 8087
2876                 if (I64 && config.fpxmmregs)
2877                 {
2878                     xmmcnvt(cdb,e,pretregs);
2879                     return;
2880                 }
2881                 if (I32 || I64)
2882                 {
2883                     cdd_u32(cdb,e,pretregs);
2884                     return;
2885                 }
2886                 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD ||
2887                            TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
2888                 {
2889                     retregs = mST0;
2890                 }
2891                 else
2892                 {
2893                     retregs = DOUBLEREGS;
2894                 }
2895                 goto L1;
2896 
2897             case OPd_u64:
2898                 if (I32 || I64)
2899                 {
2900                     cdd_u64(cdb,e,pretregs);
2901                     return;
2902                 }
2903                 retregs = DOUBLEREGS;
2904                 goto L1;
2905 
2906             case OPu64_d:
2907                 if (*pretregs & mST0)
2908                 {
2909                     regm_t retregsx = I64 ? mAX : mAX|mDX;
2910                     codelem(cdb,e.EV.E1,&retregsx,false);
2911                     callclib(cdb,e,CLIB.u64_ldbl,pretregs,0);
2912                     return;
2913                 }
2914                 break;
2915 
2916             case OPld_u64:
2917             {
2918                 if (I32 || I64)
2919                 {
2920                     cdd_u64(cdb,e,pretregs);
2921                     return;
2922                 }
2923                 regm_t retregsx = mST0;
2924                 codelem(cdb,e.EV.E1,&retregsx,false);
2925                 callclib(cdb,e,CLIB.ld_u64,pretregs,0);
2926                 return;
2927             }
2928 
2929             default:
2930                 break;
2931         }
2932     }
2933     retregs = regmask(e.EV.E1.Ety, TYnfunc);
2934 L1:
2935     codelem(cdb,e.EV.E1,&retregs,false);
2936     for (int i = 0; 1; i++)
2937     {
2938         assert(i < clib.length);
2939         if (clib[i][0] == e.Eoper)
2940         {
2941             callclib(cdb,e,clib[i][1],pretregs,0);
2942             break;
2943         }
2944     }
2945 }
2946 
2947 
2948 /***************************
2949  * Convert short to long.
2950  * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64,
2951  * OPu64_128, OPs64_128
2952  */
2953 
2954 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2955 {
2956     reg_t reg;
2957     regm_t retregs;
2958 
2959     //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2960     int e1comsub = e.EV.E1.Ecount;
2961     ubyte op = e.Eoper;
2962     if ((*pretregs & (ALLREGS | mBP)) == 0)    // if don't need result in regs
2963     {
2964         codelem(cdb,e.EV.E1,pretregs,false);     // then conversion isn't necessary
2965         return;
2966     }
2967     else if (
2968              op == OPnp_fp ||
2969              (I16 && op == OPu16_32) ||
2970              (I32 && op == OPu32_64)
2971             )
2972     {
2973         /* Result goes into a register pair.
2974          * Zero extend by putting a zero into most significant reg.
2975          */
2976 
2977         regm_t retregsx = *pretregs & mLSW;
2978         assert(retregsx);
2979         tym_t tym1 = tybasic(e.EV.E1.Ety);
2980         codelem(cdb,e.EV.E1,&retregsx,false);
2981 
2982         regm_t regm = *pretregs & (mMSW & ALLREGS);
2983         if (regm == 0)                  // *pretregs could be mES
2984             regm = mMSW & ALLREGS;
2985         allocreg(cdb,&regm,&reg,TYint);
2986         if (e1comsub)
2987             getregs(cdb,retregsx);
2988         if (op == OPnp_fp)
2989         {
2990             int segreg;
2991 
2992             // BUG: what about pointers to functions?
2993             switch (tym1)
2994             {
2995                 case TYimmutPtr:
2996                 case TYnptr:    segreg = SEG_DS;        break;
2997                 case TYcptr:    segreg = SEG_CS;        break;
2998                 case TYsptr:    segreg = SEG_SS;        break;
2999                 default:        assert(0);
3000             }
3001             cdb.gen2(0x8C,modregrm(3,segreg,reg));  // MOV reg,segreg
3002         }
3003         else
3004             movregconst(cdb,reg,0,0);  // 0 extend
3005 
3006         fixresult(cdb,e,retregsx | regm,pretregs);
3007         return;
3008     }
3009     else if (I64 && op == OPu32_64)
3010     {
3011         elem *e1 = e.EV.E1;
3012         retregs = *pretregs;
3013         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3014         {
3015             code cs;
3016 
3017             allocreg(cdb,&retregs,&reg,TYint);
3018             loadea(cdb,e1,&cs,0x8B,reg,0,retregs,retregs);  //  MOV Ereg,EA
3019             freenode(e1);
3020         }
3021         else
3022         {
3023             *pretregs &= ~mPSW;                 // flags are set by eval of e1
3024             codelem(cdb,e1,&retregs,false);
3025             /* Determine if high 32 bits are already 0
3026              */
3027             if (e1.Eoper == OPu16_32 && !e1.Ecount)
3028             {
3029             }
3030             else
3031             {
3032                 // Zero high 32 bits
3033                 getregs(cdb,retregs);
3034                 reg = findreg(retregs);
3035                 // Don't use x89 because that will get optimized away
3036                 genregs(cdb,0x8B,reg,reg);  // MOV Ereg,Ereg
3037             }
3038         }
3039         fixresult(cdb,e,retregs,pretregs);
3040         return;
3041     }
3042     else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount)
3043     {
3044         /* Due to how e1 is calculated, the high 32 bits of the register
3045          * are already 0.
3046          */
3047         retregs = *pretregs;
3048         codelem(cdb,e.EV.E1,&retregs,false);
3049         fixresult(cdb,e,retregs,pretregs);
3050         return;
3051     }
3052     else if (!I16 && (op == OPs16_32 || op == OPu16_32) ||
3053               I64 && op == OPs32_64)
3054     {
3055         elem *e11;
3056         elem *e1 = e.EV.E1;
3057 
3058         if (e1.Eoper == OPu8_16 && !e1.Ecount &&
3059             ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount))
3060            )
3061         {
3062             code cs;
3063 
3064             retregs = *pretregs & BYTEREGS;
3065             if (!retregs)
3066                 retregs = BYTEREGS;
3067             allocreg(cdb,&retregs,&reg,TYint);
3068             movregconst(cdb,reg,0,0);                   //  XOR reg,reg
3069             loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs);  //  MOV regL,EA
3070             freenode(e11);
3071             freenode(e1);
3072         }
3073         else if (e1.Eoper == OPvar ||
3074             (e1.Eoper == OPind && !e1.Ecount))
3075         {
3076             code cs = void;
3077 
3078             if (I32 && op == OPu16_32 && config.flags4 & CFG4speed)
3079                 goto L2;
3080             retregs = *pretregs;
3081             allocreg(cdb,&retregs,&reg,TYint);
3082             const opcode = (op == OPu16_32) ? 0x0FB7 : 0x0FBF; // MOVZX/MOVSX reg,EA
3083             if (op == OPs32_64)
3084             {
3085                 assert(I64);
3086                 // MOVSXD reg,e1
3087                 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs);
3088                 code_orrex(cdb.last(), REX_W);
3089             }
3090             else
3091                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs);
3092             freenode(e1);
3093         }
3094         else
3095         {
3096         L2:
3097             retregs = *pretregs;
3098             if (op == OPs32_64)
3099                 retregs = mAX | (*pretregs & mPSW);
3100             *pretregs &= ~mPSW;             // flags are already set
3101             CodeBuilder cdbx;
3102             cdbx.ctor();
3103             codelem(cdbx,e1,&retregs,false);
3104             code *cx = cdbx.finish();
3105             cdb.append(cdbx);
3106             getregs(cdb,retregs);
3107             if (op == OPu16_32 && cx)
3108             {
3109                 cx = code_last(cx);
3110                 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) &&
3111                     mask(cx.Irm & 7) == retregs)
3112                 {
3113                     // Convert AND of a word to AND of a dword, zeroing upper word
3114                     if (cx.Irex & REX_B)
3115                         retregs = mask(8 | (cx.Irm & 7));
3116                     cx.Iflags &= ~CFopsize;
3117                     cx.IEV2.Vint &= 0xFFFF;
3118                     goto L1;
3119                 }
3120             }
3121             if (op == OPs16_32 && retregs == mAX)
3122                 cdb.gen1(0x98);         // CWDE
3123             else if (op == OPs32_64 && retregs == mAX)
3124             {
3125                 cdb.gen1(0x98);         // CDQE
3126                 code_orrex(cdb.last(), REX_W);
3127             }
3128             else
3129             {
3130                 reg = findreg(retregs);
3131                 if (config.flags4 & CFG4speed && op == OPu16_32)
3132                 {   // AND reg,0xFFFF
3133                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu);
3134                 }
3135                 else
3136                 {
3137                     uint iop = (op == OPu16_32) ? 0x0FB7 : 0x0FBF; // MOVZX/MOVSX reg,reg
3138                     genregs(cdb,iop,reg,reg);
3139                 }
3140             }
3141          L1:
3142             if (e1comsub)
3143                 getregs(cdb,retregs);
3144         }
3145         fixresult(cdb,e,retregs,pretregs);
3146         return;
3147     }
3148     else if (*pretregs & mPSW || config.target_cpu < TARGET_80286)
3149     {
3150         // OPs16_32, OPs32_64
3151         // CWD doesn't affect flags, so we can depend on the integer
3152         // math to provide the flags.
3153         retregs = mAX | mPSW;               // want integer result in AX
3154         *pretregs &= ~mPSW;                 // flags are already set
3155         codelem(cdb,e.EV.E1,&retregs,false);
3156         getregs(cdb,mDX);           // sign extend into DX
3157         cdb.gen1(0x99);                     // CWD/CDQ
3158         if (e1comsub)
3159             getregs(cdb,retregs);
3160         fixresult(cdb,e,mDX | retregs,pretregs);
3161         return;
3162     }
3163     else
3164     {
3165         // OPs16_32, OPs32_64
3166         uint msreg,lsreg;
3167 
3168         retregs = *pretregs & mLSW;
3169         assert(retregs);
3170         codelem(cdb,e.EV.E1,&retregs,false);
3171         retregs |= *pretregs & mMSW;
3172         allocreg(cdb,&retregs,&reg,e.Ety);
3173         msreg = findregmsw(retregs);
3174         lsreg = findreglsw(retregs);
3175         genmovreg(cdb,msreg,lsreg);                // MOV msreg,lsreg
3176         assert(config.target_cpu >= TARGET_80286);              // 8088 can't handle SAR reg,imm8
3177         cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1);    // SAR msreg,31
3178         fixresult(cdb,e,retregs,pretregs);
3179         return;
3180     }
3181 }
3182 
3183 
3184 /***************************
3185  * Convert byte to int.
3186  * For OPu8_16 and OPs8_16.
3187  */
3188 
3189 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3190 {
3191     regm_t retregs;
3192     char size;
3193 
3194     if ((*pretregs & (ALLREGS | mBP)) == 0)     // if don't need result in regs
3195     {
3196         codelem(cdb,e.EV.E1,pretregs,false);      // then conversion isn't necessary
3197         return;
3198     }
3199 
3200     //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
3201     char op = e.Eoper;
3202     elem *e1 = e.EV.E1;
3203     if (e1.Eoper == OPcomma)
3204         docommas(cdb,&e1);
3205     if (!I16)
3206     {
3207         if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount))
3208         {
3209             code cs;
3210 
3211             regm_t retregsx = *pretregs;
3212             reg_t reg;
3213             allocreg(cdb,&retregsx,&reg,TYint);
3214             if (config.flags4 & CFG4speed &&
3215                 op == OPu8_16 && mask(reg) & BYTEREGS &&
3216                 config.target_cpu < TARGET_PentiumPro)
3217             {
3218                 movregconst(cdb,reg,0,0);                 //  XOR reg,reg
3219                 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); //  MOV regL,EA
3220             }
3221             else
3222             {
3223                 const opcode = (op == OPu8_16) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX reg,EA
3224                 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx);
3225             }
3226             freenode(e1);
3227             fixresult(cdb,e,retregsx,pretregs);
3228             return;
3229         }
3230         size = tysize(e.Ety);
3231         retregs = *pretregs & BYTEREGS;
3232         if (retregs == 0)
3233             retregs = BYTEREGS;
3234         retregs |= *pretregs & mPSW;
3235         *pretregs &= ~mPSW;
3236     }
3237     else
3238     {
3239         if (op == OPu8_16)              // if uint conversion
3240         {
3241             retregs = *pretregs & BYTEREGS;
3242             if (retregs == 0)
3243                 retregs = BYTEREGS;
3244         }
3245         else
3246         {
3247             // CBW doesn't affect flags, so we can depend on the integer
3248             // math to provide the flags.
3249             retregs = mAX | (*pretregs & mPSW); // want integer result in AX
3250         }
3251     }
3252 
3253     CodeBuilder cdb1;
3254     cdb1.ctor();
3255     codelem(cdb1,e1,&retregs,false);
3256     code *c1 = cdb1.finish();
3257     cdb.append(cdb1);
3258     reg_t reg = findreg(retregs);
3259     code *c;
3260     if (!c1)
3261         goto L1;
3262 
3263     // If previous instruction is an AND bytereg,value
3264     c = cdb.last();
3265     if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) &&
3266         (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0))
3267     {
3268         if (*pretregs & mPSW)
3269             c.Iflags |= CFpsw;
3270         c.Iop |= 1;                    // convert to word operation
3271         c.IEV2.Vuns &= 0xFF;           // dump any high order bits
3272         *pretregs &= ~mPSW;             // flags already set
3273     }
3274     else
3275     {
3276      L1:
3277         if (!I16)
3278         {
3279             if (op == OPs8_16 && reg == AX && size == 2)
3280             {
3281                 cdb.gen1(0x98);                  // CBW
3282                 cdb.last().Iflags |= CFopsize;  // don't do a CWDE
3283             }
3284             else
3285             {
3286                 // We could do better by not forcing the src and dst
3287                 // registers to be the same.
3288 
3289                 if (config.flags4 & CFG4speed && op == OPu8_16)
3290                 {   // AND reg,0xFF
3291                     cdb.genc2(0x81,modregrmx(3,4,reg),0xFF);
3292                 }
3293                 else
3294                 {
3295                     uint iop = (op == OPu8_16) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX reg,reg
3296                     genregs(cdb,iop,reg,reg);
3297                     if (I64 && reg >= 4)
3298                         code_orrex(cdb.last(), REX);
3299                 }
3300             }
3301         }
3302         else
3303         {
3304             if (op == OPu8_16)
3305                 genregs(cdb,0x30,reg+4,reg+4);  // XOR regH,regH
3306             else
3307             {
3308                 cdb.gen1(0x98);                 // CBW
3309                 *pretregs &= ~mPSW;             // flags already set
3310             }
3311         }
3312     }
3313     getregs(cdb,retregs);
3314     fixresult(cdb,e,retregs,pretregs);
3315 }
3316 
3317 
3318 /***************************
3319  * Convert long to short (OP32_16).
3320  * Get offset of far pointer (OPoffset).
3321  * Convert int to byte (OP16_8).
3322  * Convert long long to long (OP64_32).
3323  * OP128_64
3324  */
3325 
3326 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3327 {
3328     debug
3329     {
3330         switch (e.Eoper)
3331         {
3332             case OP32_16:
3333             case OPoffset:
3334             case OP16_8:
3335             case OP64_32:
3336             case OP128_64:
3337                 break;
3338 
3339             default:
3340                 assert(0);
3341         }
3342     }
3343 
3344     regm_t retregs;
3345     if (e.Eoper == OP16_8)
3346     {
3347         retregs = *pretregs ? BYTEREGS : 0;
3348         codelem(cdb,e.EV.E1,&retregs,false);
3349     }
3350     else
3351     {
3352         if (e.EV.E1.Eoper == OPrelconst)
3353             offsetinreg(cdb,e.EV.E1,&retregs);
3354         else
3355         {
3356             retregs = *pretregs ? ALLREGS : 0;
3357             codelem(cdb,e.EV.E1,&retregs,false);
3358             bool isOff = e.Eoper == OPoffset;
3359             if (I16 ||
3360                 I32 && (isOff || e.Eoper == OP64_32) ||
3361                 I64 && (isOff || e.Eoper == OP128_64))
3362                 retregs &= mLSW;                // want LSW only
3363         }
3364     }
3365 
3366     /* We "destroy" a reg by assigning it the result of a new e, even
3367      * though the values are the same. Weakness of our CSE strategy that
3368      * a register can only hold the contents of one elem at a time.
3369      */
3370     if (e.Ecount)
3371         getregs(cdb,retregs);
3372     else
3373         useregs(retregs);
3374 
3375     debug
3376     if (!(!*pretregs || retregs))
3377     {
3378         WROP(e.Eoper),
3379         printf(" *pretregs = %s, retregs = %s, e = %p\n",regm_str(*pretregs),regm_str(retregs),e);
3380     }
3381 
3382     assert(!*pretregs || retregs);
3383     fixresult(cdb,e,retregs,pretregs);  // lsw only
3384 }
3385 
3386 /**********************************************
3387  * Get top 32 bits of 64 bit value (I32)
3388  * or top 16 bits of 32 bit value (I16)
3389  * or top 64 bits of 128 bit value (I64).
3390  * OPmsw
3391  */
3392 
3393 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3394 {
3395     assert(e.Eoper == OPmsw);
3396 
3397     regm_t retregs = *pretregs ? ALLREGS : 0;
3398     codelem(cdb,e.EV.E1,&retregs,false);
3399     retregs &= mMSW;                    // want MSW only
3400 
3401     /* We "destroy" a reg by assigning it the result of a new e, even
3402      * though the values are the same. Weakness of our CSE strategy that
3403      * a register can only hold the contents of one elem at a time.
3404      */
3405     if (e.Ecount)
3406         getregs(cdb,retregs);
3407     else
3408         useregs(retregs);
3409 
3410     debug
3411     if (!(!*pretregs || retregs))
3412     {   WROP(e.Eoper);
3413         printf(" *pretregs = %s, retregs = %s\n",regm_str(*pretregs),regm_str(retregs));
3414         elem_print(e);
3415     }
3416 
3417     assert(!*pretregs || retregs);
3418     fixresult(cdb,e,retregs,pretregs);  // msw only
3419 }
3420 
3421 
3422 
3423 /******************************
3424  * Handle operators OPinp and OPoutp.
3425  */
3426 
3427 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3428 {
3429     //printf("cdport\n");
3430     ubyte op = 0xE4;            // root of all IN/OUT opcodes
3431     elem *e1 = e.EV.E1;
3432 
3433     // See if we can use immediate mode of IN/OUT opcodes
3434     ubyte port;
3435     if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 &&
3436         (!evalinregister(e1) || regcon.mvar & mDX))
3437     {
3438         port = cast(ubyte)e1.EV.Vuns;
3439         freenode(e1);
3440     }
3441     else
3442     {
3443         regm_t retregs = mDX;           // port number is always DX
3444         codelem(cdb,e1,&retregs,false);
3445         op |= 0x08;                     // DX version of opcode
3446         port = 0;                       // not logically needed, but
3447                                         // quiets "uninitialized var" complaints
3448     }
3449 
3450     uint sz;
3451     if (e.Eoper == OPoutp)
3452     {
3453         sz = tysize(e.EV.E2.Ety);
3454         regm_t retregs = mAX;           // byte/word to output is in AL/AX
3455         scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true);
3456         op |= 0x02;                     // OUT opcode
3457     }
3458     else // OPinp
3459     {
3460         getregs(cdb,mAX);
3461         sz = tysize(e.Ety);
3462     }
3463 
3464     if (sz != 1)
3465         op |= 1;                        // word operation
3466     cdb.genc2(op,0,port);               // IN/OUT AL/AX,DX/port
3467     if (op & 1 && sz != REGSIZE)        // if need size override
3468         cdb.last().Iflags |= CFopsize;
3469     regm_t retregs = mAX;
3470     fixresult(cdb,e,retregs,pretregs);
3471 }
3472 
3473 /************************
3474  * Generate code for an asm elem.
3475  */
3476 
3477 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3478 {
3479     // Assume only regs normally destroyed by a function are destroyed
3480     getregs(cdb,(ALLREGS | mES) & ~fregsaved);
3481     cdb.genasm(cast(char *)e.EV.Vstring, cast(uint)e.EV.Vstrlen);
3482     fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs);
3483 }
3484 
3485 /************************
3486  * Generate code for OPnp_f16p and OPf16p_np.
3487  */
3488 
3489 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3490 {
3491     code *cnop;
3492     code cs;
3493 
3494     assert(I32);
3495     codelem(cdb,e.EV.E1,pretregs,false);
3496     reg_t reg = findreg(*pretregs);
3497     getregs(cdb,*pretregs);      // we will destroy the regs
3498 
3499     cs.Iop = 0xC1;
3500     cs.Irm = modregrm(3,0,reg);
3501     cs.Iflags = 0;
3502     cs.Irex = 0;
3503     cs.IFL2 = FLconst;
3504     cs.IEV2.Vuns = 16;
3505 
3506     cdb.gen(&cs);                       // ROL ereg,16
3507     cs.Irm |= modregrm(0,1,0);
3508     cdb.gen(&cs);                       // ROR ereg,16
3509     cs.IEV2.Vuns = 3;
3510     cs.Iflags |= CFopsize;
3511 
3512     if (e.Eoper == OPnp_f16p)
3513     {
3514         /*      OR  ereg,ereg
3515                 JE  L1
3516                 ROR ereg,16
3517                 SHL reg,3
3518                 MOV rx,SS
3519                 AND rx,3                ;mask off CPL bits
3520                 OR  rl,4                ;run on LDT bit
3521                 OR  regl,rl
3522                 ROL ereg,16
3523             L1: NOP
3524          */
3525         reg_t rx;
3526 
3527         regm_t retregs = BYTEREGS & ~*pretregs;
3528         allocreg(cdb,&retregs,&rx,TYint);
3529         cnop = gennop(null);
3530         int jop = JCXZ;
3531         if (reg != CX)
3532         {
3533             gentstreg(cdb,reg);
3534             jop = JE;
3535         }
3536         genjmp(cdb,jop,FLcode, cast(block *)cnop);  // Jop L1
3537         NEWREG(cs.Irm,4);
3538         cdb.gen(&cs);                                   // SHL reg,3
3539         genregs(cdb,0x8C,2,rx);            // MOV rx,SS
3540         int isbyte = (mask(reg) & BYTEREGS) == 0;
3541         cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3);      // AND rl,3
3542         cdb.genc2(0x80,modregrm(3,1,rx),4);             // OR  rl,4
3543         genregs(cdb,0x0A | isbyte,reg,rx);   // OR  regl,rl
3544     }
3545     else // OPf16p_np
3546     {
3547         /*      ROR ereg,16
3548                 SHR reg,3
3549                 ROL ereg,16
3550          */
3551 
3552         cs.Irm |= modregrm(0,5,0);
3553         cdb.gen(&cs);                                   // SHR reg,3
3554         cnop = null;
3555     }
3556 }
3557 
3558 /*************************
3559  * Generate code for OPbtst
3560  */
3561 
3562 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3563 {
3564     regm_t retregs;
3565     reg_t reg;
3566 
3567     //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs));
3568 
3569     opcode_t op = 0xA3;                        // BT EA,value
3570     int mode = 4;
3571 
3572     elem *e1 = e.EV.E1;
3573     elem *e2 = e.EV.E2;
3574     code cs;
3575     cs.Iflags = 0;
3576 
3577     if (*pretregs == 0)                   // if don't want result
3578     {
3579         codelem(cdb,e1,pretregs,false);  // eval left leaf
3580         *pretregs = 0;                    // in case they got set
3581         codelem(cdb,e2,pretregs,false);
3582         return;
3583     }
3584 
3585     regm_t idxregs;
3586     if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar)
3587     {
3588         getlvalue(cdb, &cs, e1, RMload);    // get addressing mode
3589         idxregs = idxregm(&cs);             // mask if index regs used
3590     }
3591     else
3592     {
3593         retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs;
3594         codelem(cdb,e1, &retregs, false);
3595         reg = findreg(retregs);
3596         cs.Irm = modregrm(3,0,reg & 7);
3597         cs.Iflags = 0;
3598         cs.Irex = 0;
3599         if (reg & 8)
3600             cs.Irex |= REX_B;
3601         idxregs = retregs;
3602     }
3603 
3604     tym_t ty1 = tybasic(e1.Ety);
3605     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
3606 
3607 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
3608     if (e2.Eoper == OPconst)
3609     {
3610         cs.Iop = 0x0FBA;                         // BT rm,imm8
3611         cs.Irm |= modregrm(0,mode,0);
3612         cs.Iflags |= CFpsw | word;
3613         cs.IFL2 = FLconst;
3614         if (_tysize[ty1] == SHORTSIZE)
3615         {
3616             cs.IEV2.Vint = e2.EV.Vint & 15;
3617         }
3618         else if (_tysize[ty1] == 4)
3619         {
3620             cs.IEV2.Vint = e2.EV.Vint & 31;
3621         }
3622         else
3623         {
3624             cs.IEV2.Vint = e2.EV.Vint & 63;
3625             if (I64)
3626                 cs.Irex |= REX_W;
3627         }
3628         cdb.gen(&cs);
3629     }
3630     else
3631     {
3632         retregs = ALLREGS & ~idxregs;
3633         scodelem(cdb,e2,&retregs,idxregs,true);
3634         reg = findreg(retregs);
3635 
3636         cs.Iop = 0x0F00 | op;                     // BT rm,reg
3637         code_newreg(&cs,reg);
3638         cs.Iflags |= CFpsw | word;
3639         if (I64 && _tysize[ty1] == 8)
3640             cs.Irex |= REX_W;
3641         cdb.gen(&cs);
3642     }
3643 
3644     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
3645     {
3646         if (tysize(e.Ety) == 1)
3647         {
3648             assert(I64 || retregs & BYTEREGS);
3649             allocreg(cdb,&retregs,&reg,TYint);
3650             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
3651             if (I64 && reg >= 4)
3652                 code_orrex(cdb.last(), REX);
3653             *pretregs = retregs;
3654         }
3655         else
3656         {
3657             code *cnop = null;
3658             regm_t save = regcon.immed.mval;
3659             allocreg(cdb,&retregs,&reg,TYint);
3660             regcon.immed.mval = save;
3661             if ((*pretregs & mPSW) == 0)
3662             {
3663                 getregs(cdb,retregs);
3664                 genregs(cdb,0x19,reg,reg);     // SBB reg,reg
3665                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
3666             }
3667             else
3668             {
3669                 movregconst(cdb,reg,1,8);      // MOV reg,1
3670                 cnop = gennop(null);
3671                 genjmp(cdb,JC,FLcode, cast(block *) cnop);  // Jtrue nop
3672                                                             // MOV reg,0
3673                 movregconst(cdb,reg,0,8);
3674                 regcon.immed.mval &= ~mask(reg);
3675             }
3676             *pretregs = retregs;
3677             cdb.append(cnop);
3678         }
3679     }
3680 }
3681 
3682 /*************************
3683  * Generate code for OPbt, OPbtc, OPbtr, OPbts
3684  */
3685 
3686 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs)
3687 {
3688     //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs));
3689     regm_t retregs;
3690     reg_t reg;
3691     opcode_t op;
3692     int mode;
3693 
3694     switch (e.Eoper)
3695     {
3696         case OPbt:      op = 0xA3; mode = 4; break;
3697         case OPbtc:     op = 0xBB; mode = 7; break;
3698         case OPbtr:     op = 0xB3; mode = 6; break;
3699         case OPbts:     op = 0xAB; mode = 5; break;
3700 
3701         default:
3702             assert(0);
3703     }
3704 
3705     elem *e1 = e.EV.E1;
3706     elem *e2 = e.EV.E2;
3707     code cs;
3708     cs.Iflags = 0;
3709 
3710     getlvalue(cdb, &cs, e, RMload);      // get addressing mode
3711     if (e.Eoper == OPbt && *pretregs == 0)
3712     {
3713         codelem(cdb,e2,pretregs,false);
3714         return;
3715     }
3716 
3717     tym_t ty1 = tybasic(e1.Ety);
3718     tym_t ty2 = tybasic(e2.Ety);
3719     ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0;
3720     regm_t idxregs = idxregm(&cs);         // mask if index regs used
3721 
3722 //    if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100)  // should do this instead?
3723     if (e2.Eoper == OPconst)
3724     {
3725         cs.Iop = 0x0FBA;                         // BT rm,imm8
3726         cs.Irm |= modregrm(0,mode,0);
3727         cs.Iflags |= CFpsw | word;
3728         cs.IFL2 = FLconst;
3729         if (_tysize[ty1] == SHORTSIZE)
3730         {
3731             cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3;
3732             cs.IEV2.Vint = e2.EV.Vint & 15;
3733         }
3734         else if (_tysize[ty1] == 4)
3735         {
3736             cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3;
3737             cs.IEV2.Vint = e2.EV.Vint & 31;
3738         }
3739         else
3740         {
3741             cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3;
3742             cs.IEV2.Vint = e2.EV.Vint & 63;
3743             if (I64)
3744                 cs.Irex |= REX_W;
3745         }
3746         cdb.gen(&cs);
3747     }
3748     else
3749     {
3750         retregs = ALLREGS & ~idxregs;
3751         scodelem(cdb,e2,&retregs,idxregs,true);
3752         reg = findreg(retregs);
3753 
3754         cs.Iop = 0x0F00 | op;                     // BT rm,reg
3755         code_newreg(&cs,reg);
3756         cs.Iflags |= CFpsw | word;
3757         if (_tysize[ty2] == 8 && I64)
3758             cs.Irex |= REX_W;
3759         cdb.gen(&cs);
3760     }
3761 
3762     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
3763     {
3764         if (_tysize[e.Ety] == 1)
3765         {
3766             assert(I64 || retregs & BYTEREGS);
3767             allocreg(cdb,&retregs,&reg,TYint);
3768             cdb.gen2(0x0F92,modregrmx(3,0,reg));        // SETC reg
3769             if (I64 && reg >= 4)
3770                 code_orrex(cdb.last(), REX);
3771             *pretregs = retregs;
3772         }
3773         else
3774         {
3775             code *cnop = null;
3776             regm_t save = regcon.immed.mval;
3777             allocreg(cdb,&retregs,&reg,TYint);
3778             regcon.immed.mval = save;
3779             if ((*pretregs & mPSW) == 0)
3780             {
3781                 getregs(cdb,retregs);
3782                 genregs(cdb,0x19,reg,reg);                  // SBB reg,reg
3783                 cdb.gen2(0xF7,modregrmx(3,3,reg));          // NEG reg
3784             }
3785             else
3786             {
3787                 movregconst(cdb,reg,1,8);      // MOV reg,1
3788                 cnop = gennop(null);
3789                 genjmp(cdb,JC,FLcode, cast(block *) cnop);    // Jtrue nop
3790                                                             // MOV reg,0
3791                 movregconst(cdb,reg,0,8);
3792                 regcon.immed.mval &= ~mask(reg);
3793             }
3794             *pretregs = retregs;
3795             cdb.append(cnop);
3796         }
3797     }
3798 }
3799 
3800 /*************************************
3801  * Generate code for OPbsf and OPbsr.
3802  */
3803 
3804 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3805 {
3806     //printf("cdbscan()\n");
3807     //elem_print(e);
3808     if (!*pretregs)
3809     {
3810         codelem(cdb,e.EV.E1,pretregs,false);
3811         return;
3812     }
3813 
3814     tym_t tyml = tybasic(e.EV.E1.Ety);
3815     int sz = _tysize[tyml];
3816     assert(sz == 2 || sz == 4 || sz == 8);
3817     regm_t retregs;
3818     reg_t reg;
3819     code cs;
3820 
3821     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
3822     {
3823         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
3824     }
3825     else
3826     {
3827         retregs = allregs;
3828         codelem(cdb,e.EV.E1, &retregs, false);
3829         reg = findreg(retregs);
3830         cs.Irm = modregrm(3,0,reg & 7);
3831         cs.Iflags = 0;
3832         cs.Irex = 0;
3833         if (reg & 8)
3834             cs.Irex |= REX_B;
3835     }
3836 
3837     retregs = *pretregs & allregs;
3838     if  (!retregs)
3839         retregs = allregs;
3840     allocreg(cdb,&retregs, &reg, e.Ety);
3841 
3842     cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD;        // BSF/BSR reg,EA
3843     code_newreg(&cs, reg);
3844     if (!I16 && sz == SHORTSIZE)
3845         cs.Iflags |= CFopsize;
3846     cdb.gen(&cs);
3847     if (sz == 8)
3848         code_orrex(cdb.last(), REX_W);
3849 
3850     fixresult(cdb,e,retregs,pretregs);
3851 }
3852 
3853 /************************
3854  * OPpopcnt operator
3855  */
3856 
3857 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3858 {
3859     //printf("cdpopcnt()\n");
3860     //elem_print(e);
3861     assert(!I16);
3862     if (!*pretregs)
3863     {
3864         codelem(cdb,e.EV.E1,pretregs,false);
3865         return;
3866     }
3867 
3868     tym_t tyml = tybasic(e.EV.E1.Ety);
3869 
3870     int sz = _tysize[tyml];
3871     assert(sz == 2 || sz == 4 || (sz == 8 && I64));     // no byte op
3872 
3873     code cs;
3874     if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar)
3875     {
3876         getlvalue(cdb, &cs, e.EV.E1, RMload);     // get addressing mode
3877     }
3878     else
3879     {
3880         regm_t retregs = allregs;
3881         codelem(cdb,e.EV.E1, &retregs, false);
3882         reg_t reg = cast(ubyte)findreg(retregs);
3883         cs.Irm = modregrm(3,0,reg & 7);
3884         cs.Iflags = 0;
3885         cs.Irex = 0;
3886         if (reg & 8)
3887             cs.Irex |= REX_B;
3888     }
3889 
3890     regm_t retregs = *pretregs & allregs;
3891     if  (!retregs)
3892         retregs = allregs;
3893     reg_t reg;
3894     allocreg(cdb,&retregs, &reg, e.Ety);
3895 
3896     cs.Iop = POPCNT;            // POPCNT reg,EA
3897     code_newreg(&cs, reg);
3898     if (sz == SHORTSIZE)
3899         cs.Iflags |= CFopsize;
3900     if (*pretregs & mPSW)
3901         cs.Iflags |= CFpsw;
3902     cdb.gen(&cs);
3903     if (sz == 8)
3904         code_orrex(cdb.last(), REX_W);
3905     *pretregs &= mBP | ALLREGS;             // flags already set
3906 
3907     fixresult(cdb,e,retregs,pretregs);
3908 }
3909 
3910 
3911 /*******************************************
3912  * Generate code for OPpair, OPrpair.
3913  */
3914 
3915 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3916 {
3917     if (*pretregs == 0)                         // if don't want result
3918     {
3919         codelem(cdb,e.EV.E1,pretregs,false);     // eval left leaf
3920         *pretregs = 0;                          // in case they got set
3921         codelem(cdb,e.EV.E2,pretregs,false);
3922         return;
3923     }
3924 
3925     //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3926     //printf("Ecount = %d\n", e.Ecount);
3927 
3928     regm_t retregs = *pretregs;
3929     if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087)
3930     {
3931         if (config.fpxmmregs)
3932             retregs |= mXMM0 | mXMM1;
3933         else
3934             retregs |= mST01;
3935     }
3936 
3937     if (retregs & mST01)
3938     {
3939         loadPair87(cdb, e, pretregs);
3940         return;
3941     }
3942 
3943     regm_t regs1;
3944     regm_t regs2;
3945     if (retregs & XMMREGS)
3946     {
3947         retregs &= XMMREGS;
3948         const reg = findreg(retregs);
3949         regs1 = mask(reg);
3950         regs2 = mask(findreg(retregs & ~regs1));
3951     }
3952     else
3953     {
3954         retregs &= allregs;
3955         if  (!retregs)
3956             retregs = allregs;
3957         regs1 = retregs & mLSW;
3958         regs2 = retregs & mMSW;
3959     }
3960     if (e.Eoper == OPrpair)
3961     {
3962         // swap
3963         regs1 ^= regs2;
3964         regs2 ^= regs1;
3965         regs1 ^= regs2;
3966     }
3967     //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2));
3968 
3969     codelem(cdb,e.EV.E1, &regs1, false);
3970     scodelem(cdb,e.EV.E2, &regs2, regs1, false);
3971     //printf("2: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2));
3972 
3973     if (e.EV.E1.Ecount)
3974         getregs(cdb,regs1);
3975     if (e.EV.E2.Ecount)
3976         getregs(cdb,regs2);
3977 
3978     fixresult(cdb,e,regs1 | regs2,pretregs);
3979 }
3980 
3981 /*************************
3982  * Generate code for OPcmpxchg
3983  */
3984 
3985 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3986 {
3987     /* The form is:
3988      *     OPcmpxchg
3989      *    /     \
3990      * lvalue   OPparam
3991      *          /     \
3992      *        old     new
3993      */
3994 
3995     //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs));
3996     elem *e1 = e.EV.E1;
3997     elem *e2 = e.EV.E2;
3998     assert(e2.Eoper == OPparam);
3999     assert(!e2.Ecount);
4000 
4001     tym_t tyml = tybasic(e1.Ety);                   // type of lvalue
4002     uint sz = _tysize[tyml];
4003 
4004     if (I32 && sz == 8)
4005     {
4006         regm_t retregs = mDX|mAX;
4007         codelem(cdb,e2.EV.E1,&retregs,false);           // [DX,AX] = e2.EV.E1
4008 
4009         retregs = mCX|mBX;
4010         scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false);  // [CX,BX] = e2.EV.E2
4011 
4012         code cs;
4013         getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX);        // get EA
4014 
4015         getregs(cdb,mDX|mAX);                 // CMPXCHG destroys these regs
4016 
4017         if (e1.Ety & mTYvolatile)
4018             cdb.gen1(LOCK);                           // LOCK prefix
4019         cs.Iop = 0x0FC7;                              // CMPXCHG8B EA
4020         cs.Iflags |= CFpsw;
4021         code_newreg(&cs,1);
4022         cdb.gen(&cs);
4023 
4024         assert(!e1.Ecount);
4025         freenode(e1);
4026     }
4027     else
4028     {
4029         uint isbyte = (sz == 1);                  // 1 for byte operation
4030         ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0;
4031         uint rex = (I64 && sz == 8) ? REX_W : 0;
4032 
4033         regm_t retregs = mAX;
4034         codelem(cdb,e2.EV.E1,&retregs,false);        // AX = e2.EV.E1
4035 
4036         retregs = (ALLREGS | mBP) & ~mAX;
4037         scodelem(cdb,e2.EV.E2,&retregs,mAX,false);   // load rvalue in reg
4038 
4039         code cs;
4040         getlvalue(cdb,&cs,e1,mAX | retregs); // get EA
4041 
4042         getregs(cdb,mAX);                  // CMPXCHG destroys AX
4043 
4044         if (e1.Ety & mTYvolatile)
4045             cdb.gen1(LOCK);                        // LOCK prefix
4046         cs.Iop = 0x0FB1 ^ isbyte;                    // CMPXCHG EA,reg
4047         cs.Iflags |= CFpsw | word;
4048         cs.Irex |= rex;
4049         reg_t reg = findreg(retregs);
4050         code_newreg(&cs,reg);
4051         cdb.gen(&cs);
4052 
4053         assert(!e1.Ecount);
4054         freenode(e1);
4055     }
4056 
4057     regm_t retregs;
4058     if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register
4059     {
4060         assert(tysize(e.Ety) == 1);
4061         assert(I64 || retregs & BYTEREGS);
4062         reg_t reg;
4063         allocreg(cdb,&retregs,&reg,TYint);
4064         uint ea = modregrmx(3,0,reg);
4065         if (I64 && reg >= 4)
4066             ea |= REX << 16;
4067         cdb.gen2(0x0F94,ea);        // SETZ reg
4068         *pretregs = retregs;
4069     }
4070 }
4071 
4072 /*************************
4073  * Generate code for OPprefetch
4074  */
4075 
4076 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
4077 {
4078     /* Generate the following based on e2:
4079      *    0: prefetch0
4080      *    1: prefetch1
4081      *    2: prefetch2
4082      *    3: prefetchnta
4083      *    4: prefetchw
4084      *    5: prefetchwt1
4085      */
4086     //printf("cdprefetch\n");
4087     elem *e1 = e.EV.E1;
4088 
4089     assert(*pretregs == 0);
4090     assert(e.EV.E2.Eoper == OPconst);
4091     opcode_t op;
4092     reg_t reg;
4093     switch (e.EV.E2.EV.Vuns)
4094     {
4095         case 0: op = PREFETCH; reg = 1; break;  // PREFETCH0
4096         case 1: op = PREFETCH; reg = 2; break;  // PREFETCH1
4097         case 2: op = PREFETCH; reg = 3; break;  // PREFETCH2
4098         case 3: op = PREFETCH; reg = 0; break;  // PREFETCHNTA
4099         case 4: op = 0x0F0D;   reg = 1; break;  // PREFETCHW
4100         case 5: op = 0x0F0D;   reg = 2; break;  // PREFETCHWT1
4101         default: assert(0);
4102     }
4103 
4104     freenode(e.EV.E2);
4105 
4106     code cs;
4107     getlvalue(cdb,&cs,e1,0);
4108     cs.Iop = op;
4109     cs.Irm |= modregrm(0,reg,0);
4110     cs.Iflags |= CFvolatile;            // do not schedule
4111     cdb.gen(&cs);
4112 }
4113 
4114 }