1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1987-1995 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cg87.d, backend/cg87.d)
10  */
11 
12 module dmd.backend.cg87;
13 
14 version (SCPP)
15     version = COMPILE;
16 version (MARS)
17     version = COMPILE;
18 
19 version (COMPILE)
20 {
21 
22 import core.stdc.stdio;
23 import core.stdc.stdlib;
24 import core.stdc.string;
25 
26 import dmd.backend.barray;
27 import dmd.backend.cc;
28 import dmd.backend.cdef;
29 import dmd.backend.code;
30 import dmd.backend.code_x86;
31 import dmd.backend.codebuilder;
32 import dmd.backend.mem;
33 import dmd.backend.el;
34 import dmd.backend.global;
35 import dmd.backend.oper;
36 import dmd.backend.ty;
37 import dmd.backend.evalu8 : el_toldoubled;
38 
39 extern (C++):
40 
41 nothrow:
42 
43 // NOTE: this could be a TLS global which would allow this variable to be used in
44 //       a multi-threaded version of the backend
45 __gshared Globals87 global87;
46 
47 private:
48 
49 int REGSIZE();
50 
51 private extern (D) uint mask(uint m) { return 1 << m; }
52 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op);
53 
54 
55 // Constants that the 8087 supports directly
56 // BUG: rewrite for 80 bit long doubles
57 enum PI            = 3.14159265358979323846;
58 enum LOG2          = 0.30102999566398119521;
59 enum LN2           = 0.6931471805599453094172321;
60 enum LOG2T         = 3.32192809488736234787;
61 enum LOG2E         = 1.4426950408889634074;   // 1/LN2
62 
63 enum FWAIT = 0x9B;            // FWAIT opcode
64 
65 /* Mark variable referenced by e as not a register candidate            */
66 uint notreg(elem* e) { return e.EV.Vsym.Sflags &= ~GTregcand; }
67 
68 /* Generate the appropriate ESC instruction     */
69 ubyte ESC(uint MF, uint b) { return cast(ubyte)(0xD8 + (MF << 1) + b); }
70 enum
71 {   // Values for MF
72     MFfloat         = 0,
73     MFlong          = 1,
74     MFdouble        = 2,
75     MFword          = 3
76 }
77 
78 /*********************************
79  */
80 
81 struct Dconst
82 {
83     int round;
84     Symbol *roundto0;
85     Symbol *roundtonearest;
86 }
87 
88 private __gshared Dconst oldd;
89 
90 enum NDPP = 0;       // print out debugging info
91 bool NOSAHF() { return I64 || config.fpxmmregs; }     // can't use SAHF instruction
92 
93 enum CW_roundto0 = 0xFBF;
94 enum CW_roundtonearest = 0x3BF;
95 
96 
97 /**********************************
98  * When we need to temporarilly save 8087 registers, we record information
99  * about the save into an array of NDP structs.
100  */
101 
102 private void getlvalue87(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk)
103 {
104     // the x87 instructions cannot read XMM registers
105     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
106         e.EV.Vsym.Sflags &= ~GTregcand;
107 
108     getlvalue(cdb, pcs, e, keepmsk);
109     if (ADDFWAIT())
110         pcs.Iflags |= CFwait;
111     if (I32)
112         pcs.Iflags &= ~CFopsize;
113     else if (I64)
114         pcs.Irex &= ~REX_W;
115 }
116 
117 /****************************************
118  * Store/load to ndp save location i
119  */
120 
121 private void ndp_fstp(ref CodeBuilder cdb, int i, tym_t ty)
122 {
123     switch (tybasic(ty))
124     {
125         case TYfloat:
126         case TYifloat:
127         case TYcfloat:
128             cdb.genc1(0xD9,modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]
129             break;
130 
131         case TYdouble:
132         case TYdouble_alias:
133         case TYidouble:
134         case TYcdouble:
135             cdb.genc1(0xDD,modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]
136             break;
137 
138         case TYldouble:
139         case TYildouble:
140         case TYcldouble:
141             cdb.genc1(0xDB,modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]
142             break;
143 
144         default:
145             assert(0);
146     }
147 }
148 
149 private void ndp_fld(ref CodeBuilder cdb, int i, tym_t ty)
150 {
151     switch (tybasic(ty))
152     {
153         case TYfloat:
154         case TYifloat:
155         case TYcfloat:
156             cdb.genc1(0xD9,modregrm(2,0,BPRM),FLndp,i);
157             break;
158 
159         case TYdouble:
160         case TYdouble_alias:
161         case TYidouble:
162         case TYcdouble:
163             cdb.genc1(0xDD,modregrm(2,0,BPRM),FLndp,i);
164             break;
165 
166         case TYldouble:
167         case TYildouble:
168         case TYcldouble:
169             cdb.genc1(0xDB,modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]
170             break;
171 
172         default:
173             assert(0);
174     }
175 }
176 
177 /**************************
178  * Return index of empty slot in global87.save[].
179  */
180 
181 private int getemptyslot()
182 {
183     int i;
184 
185     for (i = 0; i < global87.save.length; ++i)
186         if (global87.save[i].e == null)
187             return i;
188 
189     global87.save.push(NDP());
190     return i;
191 }
192 
193 /*********************************
194  * Pop 8087 stack.
195  */
196 
197 void pop87() { pop87(__LINE__, __FILE__); }
198 
199 void pop87(int line, const(char)* file)
200 {
201     int i;
202 
203     if (NDPP)
204         printf("pop87(%s(%d): stackused=%d)\n", file, line, global87.stackused);
205 
206     --global87.stackused;
207     assert(global87.stackused >= 0);
208     for (i = 0; i < global87.stack.length - 1; i++)
209         global87.stack[i] = global87.stack[i + 1];
210     // end of stack is nothing
211     global87.stack[$ - 1] = NDP();
212 }
213 
214 
215 /*******************************
216  * Push 8087 stack. Generate and return any code
217  * necessary to preserve anything that might run off the end of the stack.
218  */
219 
220 void push87(ref CodeBuilder cdb) { push87(cdb,__LINE__,__FILE__); }
221 
222 void push87(ref CodeBuilder cdb, int line, const(char)* file)
223 {
224     // if we would lose the top register off of the stack
225     if (global87.stack[7].e != null)
226     {
227         int i = getemptyslot();
228         global87.save[i] = global87.stack[7];
229         cdb.genf2(0xD9,0xF6);                         // FDECSTP
230         genfwait(cdb);
231         ndp_fstp(cdb, i, global87.stack[7].e.Ety);       // FSTP i[BP]
232         assert(global87.stackused == 8);
233         if (NDPP) printf("push87() : overflow\n");
234     }
235     else
236     {
237         if (NDPP) printf("push87(%s(%d): %d)\n", file, line, global87.stackused);
238         global87.stackused++;
239         assert(global87.stackused <= 8);
240     }
241     // Shift the stack up
242     for (int i = 7; i > 0; i--)
243         global87.stack[i] = global87.stack[i - 1];
244     global87.stack[0] = NDP();
245 }
246 
247 /*****************************
248  * Note elem e as being in ST(i) as being a value we want to keep.
249  */
250 
251 void note87(elem *e, uint offset, int i)
252 {
253     note87(e, offset, i, __LINE__);
254 }
255 
256 void note87(elem *e, uint offset, int i, int linnum)
257 {
258     if (NDPP)
259         printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum);
260 
261     static if (0)
262     {
263         if (global87.stack[i].e)
264             printf("global87.stack[%d].e = %p\n",i,global87.stack[i].e);
265     }
266 
267     debug if (i >= global87.stackused)
268     {
269         printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum);
270         elem_print(e);
271     }
272     assert(i < global87.stackused);
273 
274     while (e.Eoper == OPcomma)
275         e = e.EV.E2;
276     global87.stack[i].e = e;
277     global87.stack[i].offset = offset;
278 }
279 
280 /****************************************************
281  * Exchange two entries in 8087 stack.
282  */
283 
284 void xchg87(int i, int j)
285 {
286     NDP save;
287 
288     save = global87.stack[i];
289     global87.stack[i] = global87.stack[j];
290     global87.stack[j] = save;
291 }
292 
293 /****************************
294  * Make sure that elem e is in register ST(i). Reload it if necessary.
295  * Input:
296  *      i       0..3    8087 register number
297  *      flag    1       don't bother with FXCH
298  */
299 
300 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag)
301 {
302     makesure87(cdb,e,offset,i,flag,__LINE__);
303 }
304 
305 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag,int linnum)
306 {
307     debug if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum);
308 
309     while (e.Eoper == OPcomma)
310         e = e.EV.E2;
311     assert(e && i < 4);
312 L1:
313     if (global87.stack[i].e != e || global87.stack[i].offset != offset)
314     {
315         debug if (global87.stack[i].e)
316             printf("global87.stack[%d].e = %p, .offset = %d\n",i,global87.stack[i].e,global87.stack[i].offset);
317 
318         assert(global87.stack[i].e == null);
319         int j;
320         for (j = 0; 1; j++)
321         {
322             if (j >= global87.save.length && e.Eoper == OPcomma)
323             {
324                 e = e.EV.E2;              // try right side
325                 goto L1;
326             }
327 
328             debug if (j >= global87.save.length)
329                 printf("e = %p, global87.save.length = %llu\n",e, cast(ulong) global87.save.length);
330 
331             assert(j < global87.save.length);
332             //printf("\tglobal87.save[%d] = %p, .offset = %d\n", j, global87.save[j].e, global87.save[j].offset);
333             if (e == global87.save[j].e && offset == global87.save[j].offset)
334                 break;
335         }
336         push87(cdb);
337         genfwait(cdb);
338         ndp_fld(cdb, j, e.Ety);         // FLD j[BP]
339         if (!(flag & 1))
340         {
341             while (i != 0)
342             {
343                 cdb.genf2(0xD9,0xC8 + i);       // FXCH ST(i)
344                 i--;
345             }
346         }
347         global87.save[j] = NDP();               // back in 8087
348     }
349     //global87.stack[i].e = null;
350 }
351 
352 /****************************
353  * Save in memory any values in the 8087 that we want to keep.
354  */
355 
356 void save87(ref CodeBuilder cdb)
357 {
358     bool any = false;
359     while (global87.stack[0].e && global87.stackused)
360     {
361         // Save it
362         int i = getemptyslot();
363         if (NDPP) printf("saving %p in temporary global87.save[%d]\n",global87.stack[0].e,i);
364         global87.save[i] = global87.stack[0];
365 
366         genfwait(cdb);
367         ndp_fstp(cdb,i,global87.stack[0].e.Ety); // FSTP i[BP]
368         pop87();
369         any = true;
370     }
371     if (any)                          // if any stores
372         genfwait(cdb);   // wait for last one to finish
373 }
374 
375 /******************************************
376  * Save any noted values that would be destroyed by n pushes
377  */
378 
379 void save87regs(ref CodeBuilder cdb, uint n)
380 {
381     assert(n <= 7);
382     uint j = 8 - n;
383     if (global87.stackused > j)
384     {
385         for (uint k = 8; k > j; k--)
386         {
387             cdb.genf2(0xD9,0xF6);     // FDECSTP
388             genfwait(cdb);
389             if (k <= global87.stackused)
390             {
391                 int i = getemptyslot();
392                 ndp_fstp(cdb, i, global87.stack[k - 1].e.Ety);   // FSTP i[BP]
393                 global87.save[i] = global87.stack[k - 1];
394                 global87.stack[k - 1] = NDP();
395             }
396         }
397 
398         for (uint k = 8; k > j; k--)
399         {
400             if (k > global87.stackused)
401             {   cdb.genf2(0xD9,0xF7); // FINCSTP
402                 genfwait(cdb);
403             }
404         }
405         global87.stackused = j;
406     }
407 }
408 
409 /*****************************************************
410  * Save/restore ST0 or ST01
411  */
412 
413 void gensaverestore87(regm_t regm, ref CodeBuilder cdbsave, ref CodeBuilder cdbrestore)
414 {
415     //printf("gensaverestore87(%s)\n", regm_str(regm));
416     assert(regm == mST0 || regm == mST01);
417 
418     int i = getemptyslot();
419     global87.save[i].e = el_calloc();       // this blocks slot [i] for the life of this function
420     ndp_fstp(cdbsave, i, TYldouble);
421 
422     CodeBuilder cdb2a;
423     cdb2a.ctor();
424     ndp_fld(cdb2a, i, TYldouble);
425 
426     if (regm == mST01)
427     {
428         int j = getemptyslot();
429         global87.save[j].e = el_calloc();
430         ndp_fstp(cdbsave, j, TYldouble);
431         ndp_fld(cdbrestore, j, TYldouble);
432     }
433 
434     cdbrestore.append(cdb2a);
435 }
436 
437 /*************************************
438  * Find which, if any, slot on stack holds elem e.
439  */
440 
441 private int cse_get(elem *e, uint offset)
442 {
443     int i;
444 
445     for (i = 0; 1; i++)
446     {
447         if (i == global87.stackused)
448         {
449             i = -1;
450             //printf("cse not found\n");
451             //elem_print(e);
452             break;
453         }
454         if (global87.stack[i].e == e &&
455             global87.stack[i].offset == offset)
456         {   //printf("cse found %d\n",i);
457             //elem_print(e);
458             break;
459         }
460     }
461     return i;
462 }
463 
464 /*************************************
465  * Reload common subexpression.
466  */
467 
468 void comsub87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
469 {
470     //printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
471     // Look on 8087 stack
472     int i = cse_get(e, 0);
473 
474     if (tycomplex(e.Ety))
475     {
476         uint sz = tysize(e.Ety);
477         int j = cse_get(e, sz / 2);
478         if (i >= 0 && j >= 0)
479         {
480             push87(cdb);
481             push87(cdb);
482             cdb.genf2(0xD9,0xC0 + i);         // FLD ST(i)
483             cdb.genf2(0xD9,0xC0 + j + 1);     // FLD ST(j + 1)
484             fixresult_complex87(cdb,e,mST01,pretregs);
485         }
486         else
487             // Reload
488             loaddata(cdb,e,pretregs);
489     }
490     else
491     {
492         if (i >= 0)
493         {
494             push87(cdb);
495             cdb.genf2(0xD9,0xC0 + i); // FLD ST(i)
496             if (*pretregs & XMMREGS)
497                 fixresult87(cdb,e,mST0,pretregs);
498             else
499                 fixresult(cdb,e,mST0,pretregs);
500         }
501         else
502             // Reload
503             loaddata(cdb,e,pretregs);
504     }
505 }
506 
507 
508 /*******************************
509  * Decide if we need to gen an FWAIT.
510  */
511 
512 void genfwait(ref CodeBuilder cdb)
513 {
514     if (ADDFWAIT())
515         cdb.gen1(FWAIT);
516 }
517 
518 
519 /***************************
520  * Put the 8087 flags into the CPU flags.
521  */
522 
523 private void cg87_87topsw(ref CodeBuilder cdb)
524 {
525     /* Note that SAHF is not available on some early I64 processors
526      * and will cause a seg fault
527      */
528     assert(!NOSAHF);
529     getregs(cdb,mAX);
530     if (config.target_cpu >= TARGET_80286)
531         cdb.genf2(0xDF,0xE0);             // FSTSW AX
532     else
533     {
534         cdb.genfltreg(0xD8+5,7,0);        // FSTSW floatreg[BP]
535         genfwait(cdb);          // FWAIT
536         cdb.genfltreg(0x8A,4,1);          // MOV AH,floatreg+1[BP]
537     }
538     cdb.gen1(0x9E);                       // SAHF
539     code_orflag(cdb.last(),CFpsw);
540 }
541 
542 /*****************************************
543  * Jump to ctarget if condition code C2 is set.
544  */
545 
546 private void genjmpifC2(ref CodeBuilder cdb, code *ctarget)
547 {
548     if (NOSAHF)
549     {
550         getregs(cdb,mAX);
551         cdb.genf2(0xDF,0xE0);                                    // FSTSW AX
552         cdb.genc2(0xF6,modregrm(3,0,4),4);                       // TEST AH,4
553         genjmp(cdb, JNE, FLcode, cast(block *)ctarget); // JNE ctarget
554     }
555     else
556     {
557         cg87_87topsw(cdb);
558         genjmp(cdb, JP, FLcode, cast(block *)ctarget);  // JP ctarget
559     }
560 }
561 
562 /***************************
563  * Set the PSW based on the state of ST0.
564  * Input:
565  *      pop     if stack should be popped after test
566  * Returns:
567  *      start of code appended to c.
568  */
569 
570 private void genftst(ref CodeBuilder cdb,elem *e,int pop)
571 {
572     if (NOSAHF)
573     {
574         push87(cdb);
575         cdb.gen2(0xD9,0xEE);          // FLDZ
576         cdb.gen2(0xDF,0xE9);          // FUCOMIP ST1
577         pop87();
578         if (pop)
579         {
580             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
581             pop87();
582         }
583     }
584     else if (config.flags4 & CFG4fastfloat)  // if fast floating point
585     {
586         cdb.genf2(0xD9,0xE4);                // FTST
587         cg87_87topsw(cdb);                   // put 8087 flags in CPU flags
588         if (pop)
589         {
590             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
591             pop87();
592         }
593     }
594     else if (config.target_cpu >= TARGET_80386)
595     {
596         // FUCOMP doesn't raise exceptions on QNANs, unlike FTST
597         push87(cdb);
598         cdb.gen2(0xD9,0xEE);                 // FLDZ
599         cdb.gen2(pop ? 0xDA : 0xDD,0xE9);    // FUCOMPP / FUCOMP
600         pop87();
601         if (pop)
602             pop87();
603         cg87_87topsw(cdb);                   // put 8087 flags in CPU flags
604     }
605     else
606     {
607         // Call library function which does not raise exceptions
608         regm_t regm = 0;
609 
610         callclib(cdb,e,CLIB.ftest,&regm,0);
611         if (pop)
612         {
613             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
614             pop87();
615         }
616     }
617 }
618 
619 /*************************************
620  * Determine if there is a special 8087 instruction to load
621  * constant e.
622  * Input:
623  *      im      0       load real part
624  *              1       load imaginary part
625  * Returns:
626  *      opcode if found
627  *      0 if not
628  */
629 
630 ubyte loadconst(elem *e, int im)
631 {
632     elem_debug(e);
633     assert(im == 0 || im == 1);
634 
635     immutable float[7] fval =
636         [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2];
637     immutable double[7] dval =
638         [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2];
639 
640     static if (real.sizeof < 10)
641     {
642         import dmd.root.longdouble;
643         immutable targ_ldouble[7] ldval =
644         [ld_zero,ld_one,ld_pi,ld_log2t,ld_log2e,ld_log2,ld_ln2];
645     }
646     else
647     {
648         enum M_PI_L        = 0x1.921fb54442d1846ap+1L;       // 3.14159 fldpi
649         enum M_LOG2T_L     = 0x1.a934f0979a3715fcp+1L;       // 3.32193 fldl2t
650         enum M_LOG2E_L     = 0x1.71547652b82fe178p+0L;       // 1.4427 fldl2e
651         enum M_LOG2_L      = 0x1.34413509f79fef32p-2L;       // 0.30103 fldlg2
652         enum M_LN2_L       = 0x1.62e42fefa39ef358p-1L;       // 0.693147 fldln2
653         immutable targ_ldouble[7] ldval =
654         [0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L];
655     }
656 
657     immutable ubyte[7 + 1] opcode =
658         /* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */
659         [0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0];
660 
661     int i;
662     targ_float f;
663     targ_double d;
664     targ_ldouble ld;
665     int sz;
666     int zero;
667     void *p;
668     immutable ubyte[16] zeros;
669 
670     if (im == 0)
671     {
672         switch (tybasic(e.Ety))
673         {
674             case TYfloat:
675             case TYifloat:
676             case TYcfloat:
677                 f = e.EV.Vfloat;
678                 sz = 4;
679                 p = &f;
680                 break;
681 
682             case TYdouble:
683             case TYdouble_alias:
684             case TYidouble:
685             case TYcdouble:
686                 d = e.EV.Vdouble;
687                 sz = 8;
688                 p = &d;
689                 break;
690 
691             case TYldouble:
692             case TYildouble:
693             case TYcldouble:
694                 ld = e.EV.Vldouble;
695                 sz = 10;
696                 p = &ld;
697                 break;
698 
699             default:
700                 assert(0);
701         }
702     }
703     else
704     {
705         switch (tybasic(e.Ety))
706         {
707             case TYcfloat:
708                 f = e.EV.Vcfloat.im;
709                 sz = 4;
710                 p = &f;
711                 break;
712 
713             case TYcdouble:
714                 d = e.EV.Vcdouble.im;
715                 sz = 8;
716                 p = &d;
717                 break;
718 
719             case TYcldouble:
720                 ld = e.EV.Vcldouble.im;
721                 sz = 10;
722                 p = &ld;
723                 break;
724 
725             default:
726                 assert(0);
727         }
728     }
729 
730     // Note that for this purpose, -0 is not regarded as +0,
731     // since FLDZ loads a +0
732     assert(sz <= zeros.length);
733     zero = (memcmp(p, zeros.ptr, sz) == 0);
734     if (zero && config.target_cpu >= TARGET_PentiumPro)
735         return 0xEE;            // FLDZ is the only one with 1 micro-op
736 
737     // For some reason, these instructions take more clocks
738     if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
739         return 0;
740 
741     if (zero)
742         return 0xEE;
743 
744     for (i = 1; i < fval.length; i++)
745     {
746         switch (sz)
747         {
748             case 4:
749                 if (fval[i] != f)
750                     continue;
751                 break;
752             case 8:
753                 if (dval[i] != d)
754                     continue;
755                 break;
756             case 10:
757                 if (ldval[i] != ld)
758                     continue;
759                 break;
760             default:
761                 assert(0);
762         }
763         break;
764     }
765     return opcode[i];
766 }
767 
768 /******************************
769  * Given the result of an expression is in retregs,
770  * generate necessary code to return result in *pretregs.
771  */
772 
773 
774 void fixresult87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs)
775 {
776     //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs);
777     //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs));
778     assert(!*pretregs || retregs);
779 
780     if ((*pretregs | retregs) & mST01)
781     {
782         fixresult_complex87(cdb, e, retregs, pretregs);
783         return;
784     }
785 
786     tym_t tym = tybasic(e.Ety);
787     uint sz = _tysize[tym];
788     //printf("tym = x%x, sz = %d\n", tym, sz);
789 
790     /* if retregs needs to be transferred into the 8087 */
791     if (*pretregs & mST0 && retregs & (mBP | ALLREGS))
792     {
793         debug if (sz > DOUBLESIZE)
794         {
795             elem_print(e);
796             printf("retregs = %s\n", regm_str(retregs));
797         }
798         assert(sz <= DOUBLESIZE);
799         if (!I16)
800         {
801 
802             if (*pretregs & mPSW)
803             {   // Set flags
804                 regm_t r = retregs | mPSW;
805                 fixresult(cdb,e,retregs,&r);
806             }
807             push87(cdb);
808             if (sz == REGSIZE || (I64 && sz == 4))
809             {
810                 const reg = findreg(retregs);
811                 cdb.genfltreg(STO,reg,0);           // MOV fltreg,reg
812                 cdb.genfltreg(0xD9,0,0);            // FLD float ptr fltreg
813             }
814             else
815             {
816                 const msreg = findregmsw(retregs);
817                 const lsreg = findreglsw(retregs);
818                 cdb.genfltreg(STO,lsreg,0);         // MOV fltreg,lsreg
819                 cdb.genfltreg(STO,msreg,4);         // MOV fltreg+4,msreg
820                 cdb.genfltreg(0xDD,0,0);            // FLD double ptr fltreg
821             }
822         }
823         else
824         {
825             regm_t regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS;
826             regm |= *pretregs & mPSW;
827             fixresult(cdb,e,retregs,&regm);
828             regm = 0;           // don't worry about result from CLIB.xxx
829             callclib(cdb,e,
830                     ((sz == FLOATSIZE) ? CLIB.fltto87 : CLIB.dblto87),
831                     &regm,0);
832         }
833     }
834     else if (*pretregs & (mBP | ALLREGS) && retregs & mST0)
835     {
836         assert(sz <= DOUBLESIZE);
837         uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
838         if (*pretregs & mPSW && !(retregs & mPSW))
839             genftst(cdb,e,0);
840         // FSTP floatreg
841         pop87();
842         cdb.genfltreg(ESC(mf,1),3,0);
843         genfwait(cdb);
844         reg_t reg;
845         allocreg(cdb,pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
846         if (sz == FLOATSIZE)
847         {
848             if (!I16)
849                 cdb.genfltreg(LOD,reg,0);
850             else
851             {
852                 cdb.genfltreg(LOD,reg,REGSIZE);
853                 cdb.genfltreg(LOD,findreglsw(*pretregs),0);
854             }
855         }
856         else
857         {   assert(sz == DOUBLESIZE);
858             if (I16)
859             {
860                 cdb.genfltreg(LOD,AX,6);
861                 cdb.genfltreg(LOD,BX,4);
862                 cdb.genfltreg(LOD,CX,2);
863                 cdb.genfltreg(LOD,DX,0);
864             }
865             else if (I32)
866             {
867                 cdb.genfltreg(LOD,reg,REGSIZE);
868                 cdb.genfltreg(LOD,findreglsw(*pretregs),0);
869             }
870             else // I64
871             {
872                 cdb.genfltreg(LOD,reg,0);
873                 code_orrex(cdb.last(), REX_W);
874             }
875         }
876     }
877     else if (*pretregs == 0 && retregs == mST0)
878     {
879         cdb.genf2(0xDD,modregrm(3,3,0));    // FPOP
880         pop87();
881     }
882     else
883     {
884         if (*pretregs & mPSW)
885         {
886             if (!(retregs & mPSW))
887             {
888                 genftst(cdb,e,!(*pretregs & (mST0 | XMMREGS))); // FTST
889             }
890         }
891         if (*pretregs & mST0 && retregs & XMMREGS)
892         {
893             assert(sz <= DOUBLESIZE);
894             uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
895             // MOVD floatreg,XMM?
896             const reg = findreg(retregs);
897             cdb.genxmmreg(xmmstore(tym),reg,0,tym);
898             push87(cdb);
899             cdb.genfltreg(ESC(mf,1),0,0);                 // FLD float/double ptr fltreg
900         }
901         else if (retregs & mST0 && *pretregs & XMMREGS)
902         {
903             assert(sz <= DOUBLESIZE);
904             uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
905             // FSTP floatreg
906             pop87();
907             cdb.genfltreg(ESC(mf,1),3,0);
908             genfwait(cdb);
909             // MOVD XMM?,floatreg
910             reg_t reg;
911             allocreg(cdb,pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
912             cdb.genxmmreg(xmmload(tym),reg,0,tym);
913         }
914         else
915             assert(!(*pretregs & mST0) || (retregs & mST0));
916     }
917     if (*pretregs & mST0)
918         note87(e,0,0);
919 }
920 
921 /********************************
922  * Generate in-line 8087 code for the following operators:
923  *      add
924  *      min
925  *      mul
926  *      div
927  *      cmp
928  */
929 
930 // Reverse the order that the op is done in
931 __gshared const ubyte[9] oprev = [ cast(ubyte)-1,0,1,2,3,5,4,7,6 ];
932 
933 void orth87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
934 {
935     //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
936     // we could be evaluating / for side effects only
937     assert(*pretregs != 0);
938 
939     elem *e1 = e.EV.E1;
940     elem *e2 = e.EV.E2;
941     uint sz2 = tysize(e1.Ety);
942     if (tycomplex(e1.Ety))
943         sz2 /= 2;
944 
945     OPER eoper = e.Eoper;
946     if (eoper == OPmul && e2.Eoper == OPconst && el_toldoubled(e.EV.E2) == 2.0L)
947     {
948         // Perform "mul 2.0" as fadd ST(0), ST
949         regm_t retregs = mST0;
950         codelem(cdb,e1,&retregs,false);
951         cdb.genf2(0xDC, 0xC0);                    // fadd ST(0), ST;
952         fixresult87(cdb,e,mST0,pretregs);         // result is in ST(0).
953         freenode(e2);
954         return;
955     }
956 
957     uint op;
958     if (OTrel(eoper))
959         eoper = OPeqeq;
960     bool imaginary;
961     static uint X(OPER op, uint ty1, uint ty2) { return (op << 16) + ty1 * 256 + ty2; }
962     switch (X(eoper, tybasic(e1.Ety), tybasic(e2.Ety)))
963     {
964         case X(OPadd, TYfloat, TYfloat):
965         case X(OPadd, TYdouble, TYdouble):
966         case X(OPadd, TYdouble_alias, TYdouble_alias):
967         case X(OPadd, TYldouble, TYldouble):
968         case X(OPadd, TYldouble, TYdouble):
969         case X(OPadd, TYdouble, TYldouble):
970         case X(OPadd, TYifloat, TYifloat):
971         case X(OPadd, TYidouble, TYidouble):
972         case X(OPadd, TYildouble, TYildouble):
973             op = 0;                             // FADDP
974             break;
975 
976         case X(OPmin, TYfloat, TYfloat):
977         case X(OPmin, TYdouble, TYdouble):
978         case X(OPmin, TYdouble_alias, TYdouble_alias):
979         case X(OPmin, TYldouble, TYldouble):
980         case X(OPmin, TYldouble, TYdouble):
981         case X(OPmin, TYdouble, TYldouble):
982         case X(OPmin, TYifloat, TYifloat):
983         case X(OPmin, TYidouble, TYidouble):
984         case X(OPmin, TYildouble, TYildouble):
985             op = 4;                             // FSUBP
986             break;
987 
988         case X(OPmul, TYfloat, TYfloat):
989         case X(OPmul, TYdouble, TYdouble):
990         case X(OPmul, TYdouble_alias, TYdouble_alias):
991         case X(OPmul, TYldouble, TYldouble):
992         case X(OPmul, TYldouble, TYdouble):
993         case X(OPmul, TYdouble, TYldouble):
994         case X(OPmul, TYifloat, TYifloat):
995         case X(OPmul, TYidouble, TYidouble):
996         case X(OPmul, TYildouble, TYildouble):
997         case X(OPmul, TYfloat, TYifloat):
998         case X(OPmul, TYdouble, TYidouble):
999         case X(OPmul, TYldouble, TYildouble):
1000         case X(OPmul, TYifloat, TYfloat):
1001         case X(OPmul, TYidouble, TYdouble):
1002         case X(OPmul, TYildouble, TYldouble):
1003             op = 1;                             // FMULP
1004             break;
1005 
1006         case X(OPdiv, TYfloat, TYfloat):
1007         case X(OPdiv, TYdouble, TYdouble):
1008         case X(OPdiv, TYdouble_alias, TYdouble_alias):
1009         case X(OPdiv, TYldouble, TYldouble):
1010         case X(OPdiv, TYldouble, TYdouble):
1011         case X(OPdiv, TYdouble, TYldouble):
1012         case X(OPdiv, TYifloat, TYifloat):
1013         case X(OPdiv, TYidouble, TYidouble):
1014         case X(OPdiv, TYildouble, TYildouble):
1015             op = 6;                             // FDIVP
1016             break;
1017 
1018         case X(OPmod, TYfloat, TYfloat):
1019         case X(OPmod, TYdouble, TYdouble):
1020         case X(OPmod, TYdouble_alias, TYdouble_alias):
1021         case X(OPmod, TYldouble, TYldouble):
1022         case X(OPmod, TYfloat, TYifloat):
1023         case X(OPmod, TYdouble, TYidouble):
1024         case X(OPmod, TYldouble, TYildouble):
1025         case X(OPmod, TYifloat, TYifloat):
1026         case X(OPmod, TYidouble, TYidouble):
1027         case X(OPmod, TYildouble, TYildouble):
1028         case X(OPmod, TYifloat, TYfloat):
1029         case X(OPmod, TYidouble, TYdouble):
1030         case X(OPmod, TYildouble, TYldouble):
1031             op = cast(uint) -1;
1032             break;
1033 
1034         case X(OPeqeq, TYfloat, TYfloat):
1035         case X(OPeqeq, TYdouble, TYdouble):
1036         case X(OPeqeq, TYdouble_alias, TYdouble_alias):
1037         case X(OPeqeq, TYldouble, TYldouble):
1038         case X(OPeqeq, TYifloat, TYifloat):
1039         case X(OPeqeq, TYidouble, TYidouble):
1040         case X(OPeqeq, TYildouble, TYildouble):
1041         {
1042             assert(OTrel(e.Eoper));
1043             assert((*pretregs & mST0) == 0);
1044             regm_t retregs = mST0;
1045             codelem(cdb,e1,&retregs,false);
1046             note87(e1,0,0);
1047             regm_t resregm = mPSW;
1048 
1049             if (rel_exception(e.Eoper) || config.flags4 & CFG4fastfloat)
1050             {
1051                 if (e2.Eoper == OPconst && !boolres(e2))
1052                 {
1053                     if (NOSAHF)
1054                     {
1055                         push87(cdb);
1056                         cdb.gen2(0xD9,0xEE);             // FLDZ
1057                         cdb.gen2(0xDF,0xF1);             // FCOMIP ST1
1058                         pop87();
1059                     }
1060                     else
1061                     {
1062                         cdb.genf2(0xD9,0xE4);            // FTST
1063                         cg87_87topsw(cdb);
1064                     }
1065                     cdb.genf2(0xDD,modregrm(3,3,0));     // FPOP
1066                     pop87();
1067                 }
1068                 else if (NOSAHF)
1069                 {
1070                     note87(e1,0,0);
1071                     load87(cdb,e2,0,&retregs,e1,-1);
1072                     makesure87(cdb,e1,0,1,0);
1073                     resregm = 0;
1074                     //cdb.genf2(0xD9,0xC8 + 1);          // FXCH ST1
1075                     cdb.gen2(0xDF,0xF1);                 // FCOMIP ST1
1076                     pop87();
1077                     cdb.genf2(0xDD,modregrm(3,3,0));     // FPOP
1078                     pop87();
1079                 }
1080                 else
1081                 {
1082                     load87(cdb,e2, 0, pretregs, e1, 3);  // FCOMPP
1083                 }
1084             }
1085             else
1086             {
1087                 if (e2.Eoper == OPconst && !boolres(e2) &&
1088                     config.target_cpu < TARGET_80386)
1089                 {
1090                     regm_t regm = 0;
1091 
1092                     callclib(cdb,e,CLIB.ftest0,&regm,0);
1093                     pop87();
1094                 }
1095                 else
1096                 {
1097                     note87(e1,0,0);
1098                     load87(cdb,e2,0,&retregs,e1,-1);
1099                     makesure87(cdb,e1,0,1,0);
1100                     resregm = 0;
1101                     if (NOSAHF)
1102                     {
1103                         cdb.gen2(0xDF,0xE9);              // FUCOMIP ST1
1104                         pop87();
1105                         cdb.genf2(0xDD,modregrm(3,3,0));  // FPOP
1106                         pop87();
1107                     }
1108                     else if (config.target_cpu >= TARGET_80386)
1109                     {
1110                         cdb.gen2(0xDA,0xE9);      // FUCOMPP
1111                         cg87_87topsw(cdb);
1112                         pop87();
1113                         pop87();
1114                     }
1115                     else
1116                         // Call a function instead so that exceptions
1117                         // are not generated.
1118                         callclib(cdb,e,CLIB.fcompp,&resregm,0);
1119                 }
1120             }
1121 
1122             freenode(e2);
1123             return;
1124         }
1125 
1126         case X(OPadd, TYcfloat, TYcfloat):
1127         case X(OPadd, TYcdouble, TYcdouble):
1128         case X(OPadd, TYcldouble, TYcldouble):
1129         case X(OPadd, TYcfloat, TYfloat):
1130         case X(OPadd, TYcdouble, TYdouble):
1131         case X(OPadd, TYcldouble, TYldouble):
1132         case X(OPadd, TYfloat, TYcfloat):
1133         case X(OPadd, TYdouble, TYcdouble):
1134         case X(OPadd, TYldouble, TYcldouble):
1135             goto Lcomplex;
1136 
1137         case X(OPadd, TYifloat, TYcfloat):
1138         case X(OPadd, TYidouble, TYcdouble):
1139         case X(OPadd, TYildouble, TYcldouble):
1140             goto Lcomplex2;
1141 
1142         case X(OPmin, TYcfloat, TYcfloat):
1143         case X(OPmin, TYcdouble, TYcdouble):
1144         case X(OPmin, TYcldouble, TYcldouble):
1145         case X(OPmin, TYcfloat, TYfloat):
1146         case X(OPmin, TYcdouble, TYdouble):
1147         case X(OPmin, TYcldouble, TYldouble):
1148         case X(OPmin, TYfloat, TYcfloat):
1149         case X(OPmin, TYdouble, TYcdouble):
1150         case X(OPmin, TYldouble, TYcldouble):
1151             goto Lcomplex;
1152 
1153         case X(OPmin, TYifloat, TYcfloat):
1154         case X(OPmin, TYidouble, TYcdouble):
1155         case X(OPmin, TYildouble, TYcldouble):
1156             goto Lcomplex2;
1157 
1158         case X(OPmul, TYcfloat, TYcfloat):
1159         case X(OPmul, TYcdouble, TYcdouble):
1160         case X(OPmul, TYcldouble, TYcldouble):
1161             goto Lcomplex;
1162 
1163         case X(OPdiv, TYcfloat, TYcfloat):
1164         case X(OPdiv, TYcdouble, TYcdouble):
1165         case X(OPdiv, TYcldouble, TYcldouble):
1166         case X(OPdiv, TYfloat, TYcfloat):
1167         case X(OPdiv, TYdouble, TYcdouble):
1168         case X(OPdiv, TYldouble, TYcldouble):
1169         case X(OPdiv, TYifloat, TYcfloat):
1170         case X(OPdiv, TYidouble, TYcdouble):
1171         case X(OPdiv, TYildouble, TYcldouble):
1172             goto Lcomplex;
1173 
1174         case X(OPdiv, TYifloat,   TYfloat):
1175         case X(OPdiv, TYidouble,  TYdouble):
1176         case X(OPdiv, TYildouble, TYldouble):
1177             op = 6;                             // FDIVP
1178             break;
1179 
1180         Lcomplex:
1181         {
1182             loadComplex(cdb,e1);
1183             loadComplex(cdb,e2);
1184             makesure87(cdb, e1, sz2, 2, 0);
1185             makesure87(cdb, e1, 0, 3, 0);
1186             regm_t retregs = mST01;
1187             if (eoper == OPadd)
1188             {
1189                 cdb.genf2(0xDE, 0xC0+2);    // FADDP ST(2),ST
1190                 cdb.genf2(0xDE, 0xC0+2);    // FADDP ST(2),ST
1191                 pop87();
1192                 pop87();
1193             }
1194             else if (eoper == OPmin)
1195             {
1196                 cdb.genf2(0xDE, 0xE8+2);    // FSUBP ST(2),ST
1197                 cdb.genf2(0xDE, 0xE8+2);    // FSUBP ST(2),ST
1198                 pop87();
1199                 pop87();
1200             }
1201             else
1202             {
1203                 int clib = eoper == OPmul ? CLIB.cmul : CLIB.cdiv;
1204                 callclib(cdb, e, clib, &retregs, 0);
1205             }
1206             fixresult_complex87(cdb, e, retregs, pretregs);
1207             return;
1208         }
1209 
1210         Lcomplex2:
1211         {
1212             regm_t retregs = mST0;
1213             codelem(cdb,e1, &retregs, false);
1214             note87(e1, 0, 0);
1215             loadComplex(cdb,e2);
1216             makesure87(cdb, e1, 0, 2, 0);
1217             retregs = mST01;
1218             if (eoper == OPadd)
1219             {
1220                 cdb.genf2(0xDE, 0xC0+2);   // FADDP ST(2),ST
1221             }
1222             else if (eoper == OPmin)
1223             {
1224                 cdb.genf2(0xDE, 0xE8+2);   // FSUBP ST(2),ST
1225                 cdb.genf2(0xD9, 0xE0);     // FCHS
1226             }
1227             else
1228                 assert(0);
1229             pop87();
1230             cdb.genf2(0xD9, 0xC8 + 1);     // FXCH ST(1)
1231             fixresult_complex87(cdb, e, retregs, pretregs);
1232             return;
1233         }
1234 
1235         case X(OPeqeq, TYcfloat, TYcfloat):
1236         case X(OPeqeq, TYcdouble, TYcdouble):
1237         case X(OPeqeq, TYcldouble, TYcldouble):
1238         case X(OPeqeq, TYcfloat, TYifloat):
1239         case X(OPeqeq, TYcdouble, TYidouble):
1240         case X(OPeqeq, TYcldouble, TYildouble):
1241         case X(OPeqeq, TYcfloat, TYfloat):
1242         case X(OPeqeq, TYcdouble, TYdouble):
1243         case X(OPeqeq, TYcldouble, TYldouble):
1244         case X(OPeqeq, TYifloat, TYcfloat):
1245         case X(OPeqeq, TYidouble, TYcdouble):
1246         case X(OPeqeq, TYildouble, TYcldouble):
1247         case X(OPeqeq, TYfloat, TYcfloat):
1248         case X(OPeqeq, TYdouble, TYcdouble):
1249         case X(OPeqeq, TYldouble, TYcldouble):
1250         case X(OPeqeq, TYfloat, TYifloat):
1251         case X(OPeqeq, TYdouble, TYidouble):
1252         case X(OPeqeq, TYldouble, TYildouble):
1253         case X(OPeqeq, TYifloat, TYfloat):
1254         case X(OPeqeq, TYidouble, TYdouble):
1255         case X(OPeqeq, TYildouble, TYldouble):
1256         {
1257             loadComplex(cdb,e1);
1258             loadComplex(cdb,e2);
1259             makesure87(cdb, e1, sz2, 2, 0);
1260             makesure87(cdb, e1, 0, 3, 0);
1261             regm_t retregs = 0;
1262             callclib(cdb, e, CLIB.ccmp, &retregs, 0);
1263             return;
1264         }
1265 
1266         case X(OPadd, TYfloat, TYifloat):
1267         case X(OPadd, TYdouble, TYidouble):
1268         case X(OPadd, TYldouble, TYildouble):
1269         case X(OPadd, TYifloat, TYfloat):
1270         case X(OPadd, TYidouble, TYdouble):
1271         case X(OPadd, TYildouble, TYldouble):
1272 
1273         case X(OPmin, TYfloat, TYifloat):
1274         case X(OPmin, TYdouble, TYidouble):
1275         case X(OPmin, TYldouble, TYildouble):
1276         case X(OPmin, TYifloat, TYfloat):
1277         case X(OPmin, TYidouble, TYdouble):
1278         case X(OPmin, TYildouble, TYldouble):
1279         {
1280             regm_t retregs = mST0;
1281             codelem(cdb,e1, &retregs, false);
1282             note87(e1, 0, 0);
1283             codelem(cdb,e2, &retregs, false);
1284             makesure87(cdb, e1, 0, 1, 0);
1285             if (eoper == OPmin)
1286                 cdb.genf2(0xD9, 0xE0);     // FCHS
1287             if (tyimaginary(e1.Ety))
1288                 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1)
1289             retregs = mST01;
1290             fixresult_complex87(cdb, e, retregs, pretregs);
1291             return;
1292         }
1293 
1294         case X(OPadd, TYcfloat, TYifloat):
1295         case X(OPadd, TYcdouble, TYidouble):
1296         case X(OPadd, TYcldouble, TYildouble):
1297             op = 0;
1298             goto Lci;
1299 
1300         case X(OPmin, TYcfloat, TYifloat):
1301         case X(OPmin, TYcdouble, TYidouble):
1302         case X(OPmin, TYcldouble, TYildouble):
1303             op = 4;
1304             goto Lci;
1305 
1306         Lci:
1307         {
1308             loadComplex(cdb,e1);
1309             regm_t retregs = mST0;
1310             load87(cdb,e2,sz2,&retregs,e1,op);
1311             freenode(e2);
1312             retregs = mST01;
1313             makesure87(cdb, e1,0,1,0);
1314             fixresult_complex87(cdb,e, retregs, pretregs);
1315             return;
1316         }
1317 
1318         case X(OPmul, TYcfloat, TYfloat):
1319         case X(OPmul, TYcdouble, TYdouble):
1320         case X(OPmul, TYcldouble, TYldouble):
1321             imaginary = false;
1322             goto Lcmul;
1323 
1324         case X(OPmul, TYcfloat, TYifloat):
1325         case X(OPmul, TYcdouble, TYidouble):
1326         case X(OPmul, TYcldouble, TYildouble):
1327             imaginary = true;
1328         Lcmul:
1329         {
1330             loadComplex(cdb,e1);
1331             if (imaginary)
1332             {
1333                 cdb.genf2(0xD9, 0xE0);          // FCHS
1334                 cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
1335                 if (elemisone(e2))
1336                 {
1337                     freenode(e2);
1338                     fixresult_complex87(cdb, e, mST01, pretregs);
1339                     return;
1340                 }
1341             }
1342             regm_t retregs = mST0;
1343             codelem(cdb,e2, &retregs, false);
1344             makesure87(cdb, e1, sz2, 1, 0);
1345             makesure87(cdb, e1, 0, 2, 0);
1346             cdb.genf2(0xDC,0xC8 + 2);           // FMUL ST(2), ST
1347             cdb.genf2(0xDE,0xC8 + 1);           // FMULP ST(1), ST
1348             pop87();
1349             fixresult_complex87(cdb, e, mST01, pretregs);
1350             return;
1351         }
1352 
1353         case X(OPmul, TYfloat, TYcfloat):
1354         case X(OPmul, TYdouble, TYcdouble):
1355         case X(OPmul, TYldouble, TYcldouble):
1356             imaginary = false;
1357             goto Lcmul2;
1358 
1359         case X(OPmul, TYifloat, TYcfloat):
1360         case X(OPmul, TYidouble, TYcdouble):
1361         case X(OPmul, TYildouble, TYcldouble):
1362             imaginary = true;
1363         Lcmul2:
1364         {
1365             regm_t retregs = mST0;
1366             codelem(cdb,e1, &retregs, false);
1367             note87(e1, 0, 0);
1368             loadComplex(cdb,e2);
1369             makesure87(cdb, e1, 0, 2, 0);
1370             cdb.genf2(0xD9, imaginary ? 0xE0 : 0xC8 + 1); // FCHS / FXCH ST(1)
1371             cdb.genf2(0xD9,0xC8 + 2);        // FXCH ST(2)
1372             cdb.genf2(0xDC,0xC8 + 2);        // FMUL ST(2), ST
1373             cdb.genf2(0xDE,0xC8 + 1);        // FMULP ST(1), ST
1374             pop87();
1375             fixresult_complex87(cdb, e, mST01, pretregs);
1376             return;
1377         }
1378 
1379         case X(OPdiv, TYcfloat, TYfloat):
1380         case X(OPdiv, TYcdouble, TYdouble):
1381         case X(OPdiv, TYcldouble, TYldouble):
1382         {
1383             loadComplex(cdb,e1);
1384             regm_t retregs = mST0;
1385             codelem(cdb,e2, &retregs, false);
1386             makesure87(cdb, e1, sz2, 1, 0);
1387             makesure87(cdb, e1, 0, 2, 0);
1388             cdb.genf2(0xDC,0xF8 + 2);            // FDIV ST(2), ST
1389             cdb.genf2(0xDE,0xF8 + 1);            // FDIVP ST(1), ST
1390             pop87();
1391             fixresult_complex87(cdb, e, mST01, pretregs);
1392             return;
1393         }
1394 
1395         case X(OPdiv, TYcfloat, TYifloat):
1396         case X(OPdiv, TYcdouble, TYidouble):
1397         case X(OPdiv, TYcldouble, TYildouble):
1398         {
1399             loadComplex(cdb,e1);
1400             cdb.genf2(0xD9,0xC8 + 1);        // FXCH ST(1)
1401             xchg87(0, 1);
1402             cdb.genf2(0xD9, 0xE0);               // FCHS
1403             regm_t retregs = mST0;
1404             codelem(cdb,e2, &retregs, false);
1405             makesure87(cdb, e1, 0, 1, 0);
1406             makesure87(cdb, e1, sz2, 2, 0);
1407             cdb.genf2(0xDC,0xF8 + 2);        // FDIV ST(2), ST
1408             cdb.genf2(0xDE,0xF8 + 1);             // FDIVP ST(1), ST
1409             pop87();
1410             fixresult_complex87(cdb, e, mST01, pretregs);
1411             return;
1412         }
1413 
1414         case X(OPmod, TYcfloat, TYfloat):
1415         case X(OPmod, TYcdouble, TYdouble):
1416         case X(OPmod, TYcldouble, TYldouble):
1417         case X(OPmod, TYcfloat, TYifloat):
1418         case X(OPmod, TYcdouble, TYidouble):
1419         case X(OPmod, TYcldouble, TYildouble):
1420         {
1421             /*
1422                         fld     E1.re
1423                         fld     E1.im
1424                         fld     E2
1425                         fxch    ST(1)
1426                 FM1:    fprem
1427                         fstsw   word ptr sw
1428                         fwait
1429                         mov     AH, byte ptr sw+1
1430                         jp      FM1
1431                         fxch    ST(2)
1432                 FM2:    fprem
1433                         fstsw   word ptr sw
1434                         fwait
1435                         mov     AH, byte ptr sw+1
1436                         jp      FM2
1437                         fstp    ST(1)
1438                         fxch    ST(1)
1439              */
1440             loadComplex(cdb,e1);
1441             regm_t retregs = mST0;
1442             codelem(cdb,e2, &retregs, false);
1443             makesure87(cdb, e1, sz2, 1, 0);
1444             makesure87(cdb, e1, 0, 2, 0);
1445             cdb.genf2(0xD9, 0xC8 + 1);             // FXCH ST(1)
1446 
1447             cdb.gen2(0xD9, 0xF8);                  // FPREM
1448             code *cfm1 = cdb.last();
1449             genjmpifC2(cdb, cfm1);                 // JC2 FM1
1450             cdb.genf2(0xD9, 0xC8 + 2);             // FXCH ST(2)
1451 
1452             cdb.gen2(0xD9, 0xF8);                  // FPREM
1453             code *cfm2 = cdb.last();
1454 
1455             genjmpifC2(cdb, cfm2);                 // JC2 FM2
1456             cdb.genf2(0xDD,0xD8 + 1);              // FSTP ST(1)
1457             cdb.genf2(0xD9, 0xC8 + 1);             // FXCH ST(1)
1458 
1459             pop87();
1460             fixresult_complex87(cdb, e, mST01, pretregs);
1461             return;
1462         }
1463 
1464         default:
1465 
1466             debug
1467             elem_print(e);
1468 
1469             assert(0);
1470     }
1471 
1472     int reverse = 0;
1473     int e2oper = e2.Eoper;
1474 
1475     /* Move double-sized operand into the second position if there's a chance
1476      * it will allow combining a load with an operation (DMD Bugzilla 2905)
1477      */
1478     if ( ((tybasic(e1.Ety) == TYdouble)
1479           && ((e1.Eoper == OPvar) || (e1.Eoper == OPconst))
1480           && (tybasic(e2.Ety) != TYdouble)) ||
1481         (e1.Eoper == OPconst) ||
1482         (e1.Eoper == OPvar &&
1483          ((e1.Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) ||
1484           (e2oper == OPd_f &&
1485             (e2.EV.E1.Eoper == OPs32_d || e2.EV.E1.Eoper == OPs64_d || e2.EV.E1.Eoper == OPs16_d) &&
1486             e2.EV.E1.EV.E1.Eoper == OPvar
1487           ) ||
1488           ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) &&
1489             e2.EV.E1.Eoper == OPvar
1490           )
1491          )
1492         )
1493        )
1494     {   // Reverse order of evaluation
1495         e1 = e.EV.E2;
1496         e2 = e.EV.E1;
1497         op = oprev[op + 1];
1498         reverse ^= 1;
1499     }
1500 
1501     regm_t retregs1 = mST0;
1502     codelem(cdb,e1,&retregs1,false);
1503     note87(e1,0,0);
1504 
1505     if (config.flags4 & CFG4fdivcall && e.Eoper == OPdiv)
1506     {
1507         regm_t retregs = mST0;
1508         load87(cdb,e2,0,&retregs,e1,-1);
1509         makesure87(cdb, e1,0,1,0);
1510         if (op == 7)                    // if reverse divide
1511             cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
1512         callclib(cdb,e,CLIB.fdiv87,&retregs,0);
1513         pop87();
1514         regm_t resregm = mST0;
1515         freenode(e2);
1516         fixresult87(cdb,e,resregm,pretregs);
1517     }
1518     else if (e.Eoper == OPmod)
1519     {
1520         /*
1521          *              fld     tbyte ptr y
1522          *              fld     tbyte ptr x             // ST = x, ST1 = y
1523          *      FM1:    // We don't use fprem1 because for some inexplicable
1524          *              // reason we get -5 when we do _modulo(15, 10)
1525          *              fprem                           // ST = ST % ST1
1526          *              fstsw   word ptr sw
1527          *              fwait
1528          *              mov     AH,byte ptr sw+1        // get msb of status word in AH
1529          *              sahf                            // transfer to flags
1530          *              jp      FM1                     // continue till ST < ST1
1531          *              fstp    ST(1)                   // leave remainder on stack
1532          */
1533         regm_t retregs = mST0;
1534         load87(cdb,e2,0,&retregs,e1,-1);
1535         makesure87(cdb,e1,0,1,0);       // now have x,y on stack; need y,x
1536         if (!reverse)                           // if not reverse modulo
1537             cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
1538 
1539         cdb.gen2(0xD9, 0xF8);                   // FM1: FPREM
1540         code *cfm1 = cdb.last();
1541         genjmpifC2(cdb, cfm1);                  // JC2 FM1
1542         cdb.genf2(0xDD,0xD8 + 1);               // FSTP ST(1)
1543 
1544         pop87();
1545         freenode(e2);
1546         fixresult87(cdb,e,mST0,pretregs);
1547     }
1548     else
1549     {
1550         load87(cdb,e2,0,pretregs,e1,op);
1551         freenode(e2);
1552     }
1553     if (*pretregs & mST0)
1554         note87(e,0,0);
1555     //printf("orth87(-e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1556 }
1557 
1558 /*****************************
1559  * Load e into ST01.
1560  */
1561 
1562 private void loadComplex(ref CodeBuilder cdb,elem *e)
1563 {
1564     regm_t retregs;
1565 
1566     int sz = tysize(e.Ety);
1567     switch (tybasic(e.Ety))
1568     {
1569         case TYfloat:
1570         case TYdouble:
1571         case TYldouble:
1572             retregs = mST0;
1573             codelem(cdb,e,&retregs,false);
1574             // Convert to complex with a 0 for the imaginary part
1575             push87(cdb);
1576             cdb.gen2(0xD9,0xEE);              // FLDZ
1577             break;
1578 
1579         case TYifloat:
1580         case TYidouble:
1581         case TYildouble:
1582             // Convert to complex with a 0 for the real part
1583             push87(cdb);
1584             cdb.gen2(0xD9,0xEE);              // FLDZ
1585             retregs = mST0;
1586             codelem(cdb,e,&retregs,false);
1587             break;
1588 
1589         case TYcfloat:
1590         case TYcdouble:
1591         case TYcldouble:
1592             sz /= 2;
1593             retregs = mST01;
1594             codelem(cdb,e,&retregs,false);
1595             break;
1596 
1597         default:
1598             assert(0);
1599     }
1600     note87(e, 0, 1);
1601     note87(e, sz, 0);
1602 }
1603 
1604 /*************************
1605  * If op == -1, load expression e into ST0.
1606  * else compute (eleft op e), eleft is in ST0.
1607  * Must follow same logic as cmporder87();
1608  */
1609 
1610 void load87(ref CodeBuilder cdb,elem *e,uint eoffset,regm_t *pretregs,elem *eleft,OPER op)
1611 {
1612     code cs;
1613     regm_t retregs;
1614     reg_t reg;
1615     uint mf1;
1616     ubyte ldop;
1617     int i;
1618 
1619     if (NDPP)
1620         printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused);
1621 
1622     assert(!(NOSAHF && op == 3));
1623     elem_debug(e);
1624     if (ADDFWAIT())
1625         cs.Iflags = CFwait;
1626     else
1627         cs.Iflags = 0;
1628     cs.Irex = 0;
1629     OPER opr = oprev[op + 1];
1630     tym_t ty = tybasic(e.Ety);
1631     uint mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble;
1632     if ((ty == TYldouble || ty == TYildouble) &&
1633         op != -1 && e.Eoper != OPd_ld)
1634         goto Ldefault;
1635 L5:
1636     switch (e.Eoper)
1637     {
1638         case OPcomma:
1639             docommas(cdb,&e);
1640             goto L5;
1641 
1642         case OPvar:
1643             notreg(e);
1644             goto L2;
1645 
1646         case OPind:
1647         L2:
1648             if (op != -1)
1649             {
1650                 if (e.Ecount && e.Ecount != e.Ecomsub &&
1651                     (i = cse_get(e, 0)) >= 0)
1652                 {
1653                     immutable ubyte[8] b2 = [0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8];
1654 
1655                     cdb.genf2(0xD8,b2[op] + i);        // Fop ST(i)
1656                 }
1657                 else
1658                 {
1659                     getlvalue87(cdb,&cs,e,0);
1660                     makesure87(cdb,eleft,eoffset,0,0);
1661                     cs.Iop = ESC(mf,0);
1662                     cs.Irm |= modregrm(0,op,0);
1663                     cdb.gen(&cs);
1664                 }
1665             }
1666             else
1667             {
1668                 push87(cdb);
1669                 switch (ty)
1670                 {
1671                     case TYfloat:
1672                     case TYdouble:
1673                     case TYifloat:
1674                     case TYidouble:
1675                     case TYcfloat:
1676                     case TYcdouble:
1677                     case TYdouble_alias:
1678                         loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var
1679                         break;
1680                     case TYldouble:
1681                     case TYildouble:
1682                     case TYcldouble:
1683                         loadea(cdb,e,&cs,0xDB,5,0,0,0);      // FLD var
1684                         break;
1685                     default:
1686                         printf("ty = x%x\n", ty);
1687                         assert(0);
1688                 }
1689                 note87(e,0,0);
1690             }
1691             break;
1692 
1693         case OPd_f:
1694         case OPf_d:
1695         case OPd_ld:
1696             mf1 = (tybasic(e.EV.E1.Ety) == TYfloat || tybasic(e.EV.E1.Ety) == TYifloat)
1697                     ? MFfloat : MFdouble;
1698             if (op != -1 && global87.stackused)
1699                 note87(eleft,eoffset,0);    // don't trash this value
1700             if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind)
1701             {
1702                 static if (1)
1703                 {
1704                   L4:
1705                     getlvalue87(cdb,&cs,e.EV.E1,0);
1706                     cs.Iop = ESC(mf1,0);
1707                     if (op != -1)
1708                     {
1709                         cs.Irm |= modregrm(0,op,0);
1710                         makesure87(cdb,eleft,eoffset,0,0);
1711                     }
1712                     else
1713                     {
1714                         cs.Iop |= 1;
1715                         push87(cdb);
1716                     }
1717                     cdb.gen(&cs);                     // FLD / Fop
1718                 }
1719                 else
1720                 {
1721                     loadea(cdb,e.EV.E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e.EV.E1 */
1722                 }
1723 
1724                 // Variable cannot be put into a register anymore
1725                 if (e.EV.E1.Eoper == OPvar)
1726                     notreg(e.EV.E1);
1727                 freenode(e.EV.E1);
1728             }
1729             else
1730             {
1731                 retregs = mST0;
1732                 codelem(cdb,e.EV.E1,&retregs,false);
1733                 if (op != -1)
1734                 {
1735                     makesure87(cdb,eleft,eoffset,1,0);
1736                     cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP
1737                     pop87();
1738                 }
1739             }
1740             break;
1741 
1742         case OPs64_d:
1743             if (e.EV.E1.Eoper == OPvar ||
1744                 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0))
1745             {
1746                 getlvalue87(cdb,&cs,e.EV.E1,0);
1747                 cs.Iop = 0xDF;
1748                 push87(cdb);
1749                 cs.Irm |= modregrm(0,5,0);
1750                 cdb.gen(&cs);                     // FILD m64
1751                 // Variable cannot be put into a register anymore
1752                 if (e.EV.E1.Eoper == OPvar)
1753                     notreg(e.EV.E1);
1754                 freenode(e.EV.E1);
1755             }
1756             else if (I64)
1757             {
1758                 retregs = ALLREGS;
1759                 codelem(cdb,e.EV.E1,&retregs,false);
1760                 reg = findreg(retregs);
1761                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reg
1762                 code_orrex(cdb.last(), REX_W);
1763                 push87(cdb);
1764                 cdb.genfltreg(0xDF,5,0);          // FILD long long ptr floatreg
1765             }
1766             else
1767             {
1768                 retregs = ALLREGS;
1769                 codelem(cdb,e.EV.E1,&retregs,false);
1770                 reg = findreglsw(retregs);
1771                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reglsw
1772                 reg = findregmsw(retregs);
1773                 cdb.genfltreg(STO,reg,4);         // MOV floatreg+4,regmsw
1774                 push87(cdb);
1775                 cdb.genfltreg(0xDF,5,0);          // FILD long long ptr floatreg
1776             }
1777             if (op != -1)
1778             {
1779                 makesure87(cdb,eleft,eoffset,1,0);
1780                 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP
1781                 pop87();
1782             }
1783             break;
1784 
1785         case OPconst:
1786             ldop = loadconst(e, 0);
1787             if (ldop)
1788             {
1789                 push87(cdb);
1790                 cdb.genf2(0xD9,ldop);          // FLDx
1791                 if (op != -1)
1792                 {
1793                     cdb.genf2(0xDE,modregrm(3,opr,1));        // FopRP
1794                     pop87();
1795                 }
1796             }
1797             else
1798             {
1799                 assert(0);
1800             }
1801             break;
1802 
1803         case OPu16_d:
1804         {
1805             /* This opcode should never be generated        */
1806             /* (probably shouldn't be for 16 bit code too)  */
1807             assert(!I32);
1808 
1809             if (op != -1)
1810                 note87(eleft,eoffset,0);    // don't trash this value
1811             retregs = ALLREGS & mLSW;
1812             codelem(cdb,e.EV.E1,&retregs,false);
1813             regwithvalue(cdb,ALLREGS & mMSW,0,&reg,0);  // 0-extend
1814             retregs |= mask(reg);
1815             mf1 = MFlong;
1816             goto L3;
1817         }
1818 
1819         case OPs16_d:       mf1 = MFword;   goto L6;
1820         case OPs32_d:       mf1 = MFlong;   goto L6;
1821         L6:
1822             if (e.Ecount)
1823                 goto Ldefault;
1824             if (op != -1)
1825                 note87(eleft,eoffset,0);    // don't trash this value
1826             if (e.EV.E1.Eoper == OPvar ||
1827                 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0))
1828             {
1829                 goto L4;
1830             }
1831             else
1832             {
1833                 retregs = ALLREGS;
1834                 codelem(cdb,e.EV.E1,&retregs,false);
1835             L3:
1836                 if (I16 && e.Eoper != OPs16_d)
1837                 {
1838                     /* MOV floatreg+2,reg   */
1839                     reg = findregmsw(retregs);
1840                     cdb.genfltreg(STO,reg,REGSIZE);
1841                     retregs &= mLSW;
1842                 }
1843                 reg = findreg(retregs);
1844                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reg
1845                 if (op != -1)
1846                 {
1847                     makesure87(cdb,eleft,eoffset,0,0);
1848                     cdb.genfltreg(ESC(mf1,0),op,0);   // Fop floatreg
1849                 }
1850                 else
1851                 {
1852                     /* FLD long ptr floatreg        */
1853                     push87(cdb);
1854                     cdb.genfltreg(ESC(mf1,1),0,0);
1855                 }
1856             }
1857             break;
1858         default:
1859         Ldefault:
1860             retregs = mST0;
1861             codelem(cdb,e,&retregs,2);
1862 
1863             if (op != -1)
1864             {
1865                 makesure87(cdb,eleft,eoffset,1,(op == 0 || op == 1));
1866                 pop87();
1867                 if (op == 4 || op == 6)     // sub or div
1868                 {
1869                     code *cl = cdb.last();
1870                     if (cl && cl.Iop == 0xD9 && cl.Irm == 0xC9)   // FXCH ST(1)
1871                     {   cl.Iop = NOP;
1872                         opr = op;           // reverse operands
1873                     }
1874                 }
1875                 cdb.genf2(0xDE,modregrm(3,opr,1));        // FopRP
1876             }
1877             break;
1878     }
1879     if (op == 3)                    // FCOMP
1880     {   pop87();                    // extra pop was done
1881         cg87_87topsw(cdb);
1882     }
1883     fixresult87(cdb,e,((op == 3) ? mPSW : mST0),pretregs);
1884     if (NDPP)
1885         printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused);
1886 }
1887 
1888 /********************************
1889  * Determine if a compare is to be done forwards (return 0)
1890  * or backwards (return 1).
1891  * Must follow same logic as load87().
1892  */
1893 
1894 int cmporder87(elem *e)
1895 {
1896     //printf("cmporder87(%p)\n",e);
1897   L1:
1898     switch (e.Eoper)
1899     {
1900         case OPcomma:
1901             e = e.EV.E2;
1902             goto L1;
1903 
1904         case OPd_f:
1905         case OPf_d:
1906         case OPd_ld:
1907             if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind)
1908                 goto ret0;
1909             else
1910                 goto ret1;
1911 
1912         case OPconst:
1913             if (loadconst(e, 0) || tybasic(e.Ety) == TYldouble
1914                                 || tybasic(e.Ety) == TYildouble)
1915             {
1916                 //printf("ret 1, loadconst(e) = %d\n", loadconst(e));
1917                 goto ret1;
1918             }
1919             goto ret0;
1920 
1921         case OPvar:
1922         case OPind:
1923             if (tybasic(e.Ety) == TYldouble ||
1924                 tybasic(e.Ety) == TYildouble)
1925                 goto ret1;
1926             goto ret0;
1927 
1928         case OPu16_d:
1929         case OPs16_d:
1930         case OPs32_d:
1931             goto ret0;
1932 
1933         case OPs64_d:
1934             goto ret1;
1935 
1936         default:
1937             goto ret1;
1938     }
1939 
1940 ret1:
1941     return 1;
1942 
1943 ret0:
1944     return 0;
1945 }
1946 
1947 /*******************************
1948  * Perform an assignment to a long double/double/float.
1949  */
1950 
1951 void eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1952 {
1953     code cs;
1954     opcode_t op1;
1955     uint op2;
1956 
1957     //printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1958     assert(e.Eoper == OPeq);
1959     regm_t retregs = mST0 | (*pretregs & mPSW);
1960     codelem(cdb,e.EV.E2,&retregs,false);
1961     tym_t ty1 = tybasic(e.EV.E1.Ety);
1962     switch (ty1)
1963     {
1964         case TYdouble_alias:
1965         case TYidouble:
1966         case TYdouble:      op1 = ESC(MFdouble,1);  op2 = 3; break;
1967 
1968         case TYifloat:
1969         case TYfloat:       op1 = ESC(MFfloat,1);   op2 = 3; break;
1970 
1971         case TYildouble:
1972         case TYldouble:     op1 = 0xDB;             op2 = 7; break;
1973 
1974         default:
1975             assert(0);
1976     }
1977     if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
1978     {
1979         if (ty1 == TYldouble || ty1 == TYildouble)
1980         {
1981             push87(cdb);
1982             cdb.genf2(0xD9,0xC0);           // FLD ST(0)
1983             pop87();
1984         }
1985         else
1986             op2 = 2;                        // FST e.EV.E1
1987     }
1988     else
1989     {   // FSTP e.EV.E1
1990         pop87();
1991     }
1992 
1993     static if (0)
1994     {
1995         // Doesn't work if ST(0) gets saved to the stack by getlvalue()
1996         loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0);
1997     }
1998     else
1999     {
2000         cs.Irex = 0;
2001         cs.Iflags = 0;
2002         cs.Iop = op1;
2003         if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
2004         {   // Make sure it's still there
2005             elem *e2 = e.EV.E2;
2006             while (e2.Eoper == OPcomma)
2007                 e2 = e2.EV.E2;
2008             note87(e2,0,0);
2009             getlvalue87(cdb, &cs, e.EV.E1, 0);
2010             makesure87(cdb,e2,0,0,1);
2011         }
2012         else
2013         {
2014             getlvalue87(cdb, &cs, e.EV.E1, 0);
2015         }
2016         cs.Irm |= modregrm(0,op2,0);            // OR in reg field
2017         cdb.gen(&cs);
2018         if (tysize(TYldouble) == 12)
2019         {
2020             /* This deals with the fact that 10 byte reals really
2021              * occupy 12 bytes by zeroing the extra 2 bytes.
2022              */
2023             if (op1 == 0xDB)
2024             {
2025                 cs.Iop = 0xC7;                      // MOV EA+10,0
2026                 NEWREG(cs.Irm, 0);
2027                 cs.IEV1.Voffset += 10;
2028                 cs.IFL2 = FLconst;
2029                 cs.IEV2.Vint = 0;
2030                 cs.Iflags |= CFopsize;
2031                 cdb.gen(&cs);
2032             }
2033         }
2034         else if (tysize(TYldouble) == 16)
2035         {
2036             /* This deals with the fact that 10 byte reals really
2037              * occupy 16 bytes by zeroing the extra 6 bytes.
2038              */
2039             if (op1 == 0xDB)
2040             {
2041                 cs.Irex &= ~REX_W;
2042                 cs.Iop = 0xC7;                      // MOV EA+10,0
2043                 NEWREG(cs.Irm, 0);
2044                 cs.IEV1.Voffset += 10;
2045                 cs.IFL2 = FLconst;
2046                 cs.IEV2.Vint = 0;
2047                 cs.Iflags |= CFopsize;
2048                 cdb.gen(&cs);
2049 
2050                 cs.IEV1.Voffset += 2;
2051                 cs.Iflags &= ~CFopsize;
2052                 cdb.gen(&cs);
2053             }
2054         }
2055     }
2056     genfwait(cdb);
2057     freenode(e.EV.E1);
2058     fixresult87(cdb,e,mST0 | mPSW,pretregs);
2059 }
2060 
2061 /*******************************
2062  * Perform an assignment to a long double/double/float.
2063  */
2064 
2065 void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2066 {
2067     code cs;
2068     opcode_t op1;
2069     uint op2;
2070     uint sz;
2071     int fxch = 0;
2072 
2073     //printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2074     assert(e.Eoper == OPeq);
2075     cs.Iflags = ADDFWAIT() ? CFwait : 0;
2076     cs.Irex = 0;
2077     regm_t retregs = mST01 | (*pretregs & mPSW);
2078     codelem(cdb,e.EV.E2,&retregs,false);
2079     tym_t ty1 = tybasic(e.EV.E1.Ety);
2080     switch (ty1)
2081     {
2082         case TYcdouble:     op1 = ESC(MFdouble,1);  op2 = 3; break;
2083         case TYcfloat:      op1 = ESC(MFfloat,1);   op2 = 3; break;
2084         case TYcldouble:    op1 = 0xDB;             op2 = 7; break;
2085         default:
2086             assert(0);
2087     }
2088     if (*pretregs & (mST01 | mXMM0 | mXMM1))  // if want result on stack too
2089     {
2090         if (ty1 == TYcldouble)
2091         {
2092             push87(cdb);
2093             push87(cdb);
2094             cdb.genf2(0xD9,0xC0 + 1);       // FLD ST(1)
2095             cdb.genf2(0xD9,0xC0 + 1);       // FLD ST(1)
2096             pop87();
2097             pop87();
2098         }
2099         else
2100         {   op2 = 2;                        // FST e.EV.E1
2101             fxch = 1;
2102         }
2103     }
2104     else
2105     {   // FSTP e.EV.E1
2106         pop87();
2107         pop87();
2108     }
2109     sz = tysize(ty1) / 2;
2110     if (*pretregs & (mST01 | mXMM0 | mXMM1))
2111     {
2112         cs.Iflags = 0;
2113         cs.Irex = 0;
2114         cs.Iop = op1;
2115         getlvalue87(cdb, &cs, e.EV.E1, 0);
2116         cs.IEV1.Voffset += sz;
2117         cs.Irm |= modregrm(0, op2, 0);
2118         makesure87(cdb,e.EV.E2, sz, 0, 0);
2119         cdb.gen(&cs);
2120         genfwait(cdb);
2121         makesure87(cdb,e.EV.E2,  0, 1, 0);
2122     }
2123     else
2124     {
2125         loadea(cdb,e.EV.E1,&cs,op1,op2,sz,0,0);
2126         genfwait(cdb);
2127     }
2128     if (fxch)
2129         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2130     cs.IEV1.Voffset -= sz;
2131     cdb.gen(&cs);
2132     if (fxch)
2133         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2134     if (tysize(TYldouble) == 12)
2135     {
2136         if (op1 == 0xDB)
2137         {
2138             cs.Iop = 0xC7;              // MOV EA+10,0
2139             NEWREG(cs.Irm, 0);
2140             cs.IEV1.Voffset += 10;
2141             cs.IFL2 = FLconst;
2142             cs.IEV2.Vint = 0;
2143             cs.Iflags |= CFopsize;
2144             cdb.gen(&cs);
2145             cs.IEV1.Voffset += 12;
2146             cdb.gen(&cs);               // MOV EA+22,0
2147         }
2148     }
2149     if (tysize(TYldouble) == 16)
2150     {
2151         if (op1 == 0xDB)
2152         {
2153             cs.Iop = 0xC7;              // MOV EA+10,0
2154             NEWREG(cs.Irm, 0);
2155             cs.IEV1.Voffset += 10;
2156             cs.IFL2 = FLconst;
2157             cs.IEV2.Vint = 0;
2158             cs.Iflags |= CFopsize;
2159             cdb.gen(&cs);
2160 
2161             cs.IEV1.Voffset += 2;
2162             cs.Iflags &= ~CFopsize;
2163             cdb.gen(&cs);
2164 
2165             cs.IEV1.Voffset += 14;
2166             cs.Iflags |= CFopsize;
2167             cdb.gen(&cs);
2168 
2169             cs.IEV1.Voffset += 2;
2170             cs.Iflags &= ~CFopsize;
2171             cdb.gen(&cs);
2172         }
2173     }
2174     genfwait(cdb);
2175     freenode(e.EV.E1);
2176     fixresult_complex87(cdb, e,mST01 | mPSW,pretregs);
2177 }
2178 
2179 /*******************************
2180  * Perform an assignment while converting to integral type,
2181  * i.e. handle (e1 = (int) e2)
2182  */
2183 
2184 private void cnvteq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2185 {
2186     code cs;
2187     opcode_t op1;
2188     uint op2;
2189 
2190     assert(e.Eoper == OPeq);
2191     assert(!*pretregs);
2192     regm_t retregs = mST0;
2193     elem_debug(e.EV.E2);
2194     codelem(cdb,e.EV.E2.EV.E1,&retregs,false);
2195 
2196     switch (e.EV.E2.Eoper)
2197     {   case OPd_s16:
2198             op1 = ESC(MFword,1);
2199             op2 = 3;
2200             break;
2201         case OPd_s32:
2202         case OPd_u16:
2203             op1 = ESC(MFlong,1);
2204             op2 = 3;
2205             break;
2206         case OPd_s64:
2207             op1 = 0xDF;
2208             op2 = 7;
2209             break;
2210         default:
2211             assert(0);
2212     }
2213     freenode(e.EV.E2);
2214 
2215     genfwait(cdb);
2216     genrnd(cdb, CW_roundto0);               // FLDCW roundto0
2217 
2218     pop87();
2219     cs.Iflags = ADDFWAIT() ? CFwait : 0;
2220     if (e.EV.E1.Eoper == OPvar)
2221         notreg(e.EV.E1);                    // cannot be put in register anymore
2222     loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0);
2223 
2224     genfwait(cdb);
2225     genrnd(cdb, CW_roundtonearest);         // FLDCW roundtonearest
2226 
2227     freenode(e.EV.E1);
2228 }
2229 
2230 /**********************************
2231  * Perform +=, -=, *= and /= for doubles.
2232  */
2233 
2234 void opass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2235 {
2236     code cs;
2237     uint op;
2238     opcode_t opld;
2239     opcode_t op1;
2240     uint op2;
2241     tym_t ty1 = tybasic(e.EV.E1.Ety);
2242 
2243     switch (ty1)
2244     {
2245         case TYdouble_alias:
2246         case TYidouble:
2247         case TYdouble:      op1 = ESC(MFdouble,1);  op2 = 3; break;
2248         case TYifloat:
2249         case TYfloat:       op1 = ESC(MFfloat,1);   op2 = 3; break;
2250         case TYildouble:
2251         case TYldouble:     op1 = 0xDB;             op2 = 7; break;
2252 
2253         case TYcfloat:
2254         case TYcdouble:
2255         case TYcldouble:
2256             if (e.Eoper == OPmodass)
2257                opmod_complex87(cdb, e, pretregs);
2258             else
2259                opass_complex87(cdb, e, pretregs);
2260             return;
2261 
2262         default:
2263             assert(0);
2264     }
2265     switch (e.Eoper)
2266     {
2267         case OPpostinc:
2268         case OPaddass:      op = 0 << 3;    opld = 0xC1;    break;  // FADD
2269         case OPpostdec:
2270         case OPminass:      op = 5 << 3;    opld = 0xE1; /*0xE9;*/  break;  // FSUBR
2271         case OPmulass:      op = 1 << 3;    opld = 0xC9;    break;  // FMUL
2272         case OPdivass:      op = 7 << 3;    opld = 0xF1;    break;  // FDIVR
2273         case OPmodass:      break;
2274         default:            assert(0);
2275     }
2276     regm_t retregs = mST0;
2277     codelem(cdb,e.EV.E2,&retregs,false);     // evaluate rvalue
2278     note87(e.EV.E2,0,0);
2279     getlvalue87(cdb,&cs,e.EV.E1,e.Eoper==OPmodass?mAX:0);
2280     makesure87(cdb,e.EV.E2,0,0,0);
2281     if (config.flags4 & CFG4fdivcall && e.Eoper == OPdivass)
2282     {
2283         push87(cdb);
2284         cs.Iop = op1;
2285         if (ty1 == TYldouble || ty1 == TYildouble)
2286             cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2287         cdb.gen(&cs);
2288         cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
2289         callclib(cdb,e,CLIB.fdiv87,&retregs,0);
2290         pop87();
2291     }
2292     else if (e.Eoper == OPmodass)
2293     {
2294         /*
2295          *          fld     tbyte ptr y
2296          *          fld     tbyte ptr x             // ST = x, ST1 = y
2297          *  FM1:    // We don't use fprem1 because for some inexplicable
2298          *          // reason we get -5 when we do _modulo(15, 10)
2299          *          fprem                           // ST = ST % ST1
2300          *          fstsw   word ptr sw
2301          *          fwait
2302          *          mov     AH,byte ptr sw+1        // get msb of status word in AH
2303          *          sahf                            // transfer to flags
2304          *          jp      FM1                     // continue till ST < ST1
2305          *          fstp    ST(1)                   // leave remainder on stack
2306          */
2307         code *c1;
2308 
2309         push87(cdb);
2310         cs.Iop = op1;
2311         if (ty1 == TYldouble || ty1 == TYildouble)
2312             cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2313         cdb.gen(&cs);                       // FLD   e.EV.E1
2314 
2315         cdb.gen2(0xD9, 0xF8);               // FPREM
2316         code *cfm1 = cdb.last();
2317         genjmpifC2(cdb, cfm1);              // JC2 FM1
2318         cdb.genf2(0xDD,0xD8 + 1);           // FSTP ST(1)
2319 
2320         pop87();
2321     }
2322     else if (ty1 == TYldouble || ty1 == TYildouble)
2323     {
2324         push87(cdb);
2325         cs.Iop = op1;
2326         cs.Irm |= modregrm(0, 5, 0);        // FLD tbyte ptr ...
2327         cdb.gen(&cs);                       // FLD   e.EV.E1
2328         cdb.genf2(0xDE,opld);               // FopP  ST(1)
2329         pop87();
2330     }
2331     else
2332     {
2333         cs.Iop = op1 & ~1;
2334         cs.Irm |= op;
2335         cdb.gen(&cs);                       // Fop e.EV.E1
2336     }
2337     if (*pretregs & mPSW)
2338         genftst(cdb,e,0);                   // FTST ST0
2339     // if want result in registers
2340     if (*pretregs & (mST0 | ALLREGS | mBP))
2341     {
2342         if (ty1 == TYldouble || ty1 == TYildouble)
2343         {
2344             push87(cdb);
2345             cdb.genf2(0xD9,0xC0);           // FLD ST(0)
2346             pop87();
2347         }
2348         else
2349             op2 = 2;                        // FST e.EV.E1
2350     }
2351     else
2352     {   // FSTP
2353         pop87();
2354     }
2355     cs.Iop = op1;
2356     NEWREG(cs.Irm,op2);                     // FSTx e.EV.E1
2357     freenode(e.EV.E1);
2358     cdb.gen(&cs);
2359     genfwait(cdb);
2360     fixresult87(cdb,e,mST0 | mPSW,pretregs);
2361 }
2362 
2363 /***********************************
2364  * Perform %= where E1 is complex and E2 is real or imaginary.
2365  */
2366 
2367 private void opmod_complex87(ref CodeBuilder cdb, elem *e,regm_t *pretregs)
2368 {
2369 
2370     /*          fld     E2
2371                 fld     E1.re
2372         FM1:    fprem
2373                 fstsw   word ptr sw
2374                 fwait
2375                 mov     AH, byte ptr sw+1
2376                 jp      FM1
2377                 fxch    ST(1)
2378                 fld     E1.im
2379         FM2:    fprem
2380                 fstsw   word ptr sw
2381                 fwait
2382                 mov     AH, byte ptr sw+1
2383                 jp      FM2
2384                 fstp    ST(1)
2385      */
2386 
2387     code cs;
2388 
2389     tym_t ty1 = tybasic(e.EV.E1.Ety);
2390     uint sz2 = _tysize[ty1] / 2;
2391 
2392     regm_t retregs = mST0;
2393     codelem(cdb,e.EV.E2,&retregs,false);         // FLD E2
2394     note87(e.EV.E2,0,0);
2395     getlvalue87(cdb,&cs,e.EV.E1,0);
2396     makesure87(cdb,e.EV.E2,0,0,0);
2397 
2398     push87(cdb);
2399     switch (ty1)
2400     {
2401         case TYcdouble:  cs.Iop = ESC(MFdouble,1);      break;
2402         case TYcfloat:   cs.Iop = ESC(MFfloat,1);       break;
2403         case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break;
2404         default:
2405             assert(0);
2406     }
2407     cdb.gen(&cs);                               // FLD E1.re
2408 
2409     cdb.gen2(0xD9, 0xF8);                       // FPREM
2410     code *cfm1 = cdb.last();
2411     genjmpifC2(cdb, cfm1);                      // JC2 FM1
2412     cdb.genf2(0xD9, 0xC8 + 1);                  // FXCH ST(1)
2413 
2414     push87(cdb);
2415     cs.IEV1.Voffset += sz2;
2416     cdb.gen(&cs);                               // FLD E1.im
2417 
2418     cdb.gen2(0xD9, 0xF8);                       // FPREM
2419     code *cfm2 = cdb.last();
2420     genjmpifC2(cdb, cfm2);                      // JC2 FM2
2421     cdb.genf2(0xDD,0xD8 + 1);                   // FSTP ST(1)
2422 
2423     pop87();
2424 
2425     if (*pretregs & (mST01 | mPSW))
2426     {
2427         cs.Irm |= modregrm(0, 2, 0);
2428         cdb.gen(&cs);            // FST mreal.im
2429         cs.IEV1.Voffset -= sz2;
2430         cdb.gen(&cs);            // FST mreal.re
2431         retregs = mST01;
2432     }
2433     else
2434     {
2435         cs.Irm |= modregrm(0, 3, 0);
2436         cdb.gen(&cs);            // FSTP mreal.im
2437         cs.IEV1.Voffset -= sz2;
2438         cdb.gen(&cs);            // FSTP mreal.re
2439         pop87();
2440         pop87();
2441         retregs = 0;
2442     }
2443     freenode(e.EV.E1);
2444     genfwait(cdb);
2445     fixresult_complex87(cdb,e,retregs,pretregs);
2446 }
2447 
2448 /**********************************
2449  * Perform +=, -=, *= and /= for the lvalue being complex.
2450  */
2451 
2452 private void opass_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2453 {
2454     regm_t retregs;
2455     regm_t idxregs;
2456     code cs;
2457     uint op;
2458     opcode_t op2;
2459 
2460     tym_t ty1 = tybasic(e.EV.E1.Ety);
2461     uint sz2 = _tysize[ty1] / 2;
2462     switch (e.Eoper)
2463     {
2464         case OPpostinc:
2465         case OPaddass:  op = 0 << 3;            // FADD
2466                         op2 = 0xC0;             // FADDP ST(i),ST
2467                         break;
2468 
2469         case OPpostdec:
2470         case OPminass:  op = 5 << 3;            // FSUBR
2471                         op2 = 0xE0;             // FSUBRP ST(i),ST
2472                         break;
2473 
2474         case OPmulass:  op = 1 << 3;            // FMUL
2475                         op2 = 0xC8;             // FMULP ST(i),ST
2476                         break;
2477 
2478         case OPdivass:  op = 7 << 3;            // FDIVR
2479                         op2 = 0xF0;             // FDIVRP ST(i),ST
2480                         break;
2481 
2482         default:        assert(0);
2483     }
2484 
2485     if (!tycomplex(e.EV.E2.Ety) &&
2486         (e.Eoper == OPmulass || e.Eoper == OPdivass))
2487     {
2488         retregs = mST0;
2489         codelem(cdb,e.EV.E2, &retregs, false);
2490         note87(e.EV.E2, 0, 0);
2491         getlvalue87(cdb,&cs, e.EV.E1, 0);
2492         makesure87(cdb,e.EV.E2,0,0,0);
2493         push87(cdb);
2494         cdb.genf2(0xD9,0xC0);                   // FLD ST(0)
2495         goto L1;
2496     }
2497     else
2498     {
2499         loadComplex(cdb,e.EV.E2);
2500         getlvalue87(cdb,&cs,e.EV.E1,0);
2501         makesure87(cdb,e.EV.E2,sz2,0,0);
2502         makesure87(cdb,e.EV.E2,0,1,0);
2503     }
2504 
2505     switch (e.Eoper)
2506     {
2507         case OPpostinc:
2508         case OPaddass:
2509         case OPpostdec:
2510         case OPminass:
2511         L1:
2512             if (ty1 == TYcldouble)
2513             {
2514                 push87(cdb);
2515                 push87(cdb);
2516                 cs.Iop = 0xDB;
2517                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2518                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2519                 cs.IEV1.Voffset += sz2;
2520                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2521                 cdb.genf2(0xDE, op2 + 2);       // FADDP/FSUBRP ST(2),ST
2522                 cdb.genf2(0xDE, op2 + 2);       // FADDP/FSUBRP ST(2),ST
2523                 pop87();
2524                 pop87();
2525                 if (tyimaginary(e.EV.E2.Ety))
2526                 {
2527                     if (e.Eoper == OPmulass)
2528                     {
2529                         cdb.genf2(0xD9, 0xE0);   // FCHS
2530                         cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1)
2531                     }
2532                     else if (e.Eoper == OPdivass)
2533                     {
2534                         cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1)
2535                         cdb.genf2(0xD9, 0xE0);   // FCHS
2536                     }
2537                 }
2538             L2:
2539                 if (*pretregs & (mST01 | mPSW))
2540                 {
2541                     push87(cdb);
2542                     push87(cdb);
2543                     cdb.genf2(0xD9,0xC1);       // FLD ST(1)
2544                     cdb.genf2(0xD9,0xC1);       // FLD ST(1)
2545                     retregs = mST01;
2546                 }
2547                 else
2548                     retregs = 0;
2549                 cs.Iop = 0xDB;
2550                 cs.Irm |= modregrm(0,7,0);
2551                 cdb.gen(&cs);                   // FSTP e.EV.E1.im
2552                 cs.IEV1.Voffset -= sz2;
2553                 cdb.gen(&cs);                   // FSTP e.EV.E1.re
2554                 pop87();
2555                 pop87();
2556 
2557             }
2558             else
2559             {
2560                 ubyte rmop = cast(ubyte)(cs.Irm | op);
2561                 ubyte rmfst = cs.Irm | modregrm(0,2,0);
2562                 ubyte rmfstp = cs.Irm | modregrm(0,3,0);
2563                 ubyte iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2564                 opcode_t iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC;
2565 
2566                 cs.Iop = iop;
2567                 cs.Irm = rmop;
2568                 cs.IEV1.Voffset += sz2;
2569                 cdb.gen(&cs);                           // FSUBR mreal.im
2570                 if (tyimaginary(e.EV.E2.Ety) && (e.Eoper == OPmulass || e.Eoper == OPdivass))
2571                 {
2572                     if (e.Eoper == OPmulass)
2573                         cdb.genf2(0xD9, 0xE0);          // FCHS
2574                     cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
2575                     cs.IEV1.Voffset -= sz2;
2576                     cdb.gen(&cs);                       // FMUL mreal.re
2577                     if (e.Eoper == OPdivass)
2578                         cdb.genf2(0xD9, 0xE0);          // FCHS
2579                     if (*pretregs & (mST01 | mPSW))
2580                     {
2581                         cs.Iop = iopfst;
2582                         cs.Irm = rmfst;
2583                         cs.IEV1.Voffset += sz2;
2584                         cdb.gen(&cs);                   // FST mreal.im
2585                         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2586                         cs.IEV1.Voffset -= sz2;
2587                         cdb.gen(&cs);                   // FST mreal.re
2588                         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2589                         retregs = mST01;
2590                     }
2591                     else
2592                     {
2593                         cs.Iop = iopfst;
2594                         cs.Irm = rmfstp;
2595                         cs.IEV1.Voffset += sz2;
2596                         cdb.gen(&cs);                   // FSTP mreal.im
2597                         pop87();
2598                         cs.IEV1.Voffset -= sz2;
2599                         cdb.gen(&cs);                   // FSTP mreal.re
2600                         pop87();
2601                         retregs = 0;
2602                     }
2603                     goto L3;
2604                 }
2605 
2606                 if (*pretregs & (mST01 | mPSW))
2607                 {
2608                     cs.Iop = iopfst;
2609                     cs.Irm = rmfst;
2610                     cdb.gen(&cs);               // FST mreal.im
2611                     cdb.genf2(0xD9,0xC8 + 1);   // FXCH ST(1)
2612                     cs.Iop = iop;
2613                     cs.Irm = rmop;
2614                     cs.IEV1.Voffset -= sz2;
2615                     cdb.gen(&cs);               // FSUBR mreal.re
2616                     cs.Iop = iopfst;
2617                     cs.Irm = rmfst;
2618                     cdb.gen(&cs);               // FST mreal.re
2619                     cdb.genf2(0xD9,0xC8 + 1);   // FXCH ST(1)
2620                     retregs = mST01;
2621                 }
2622                 else
2623                 {
2624                     cs.Iop = iopfst;
2625                     cs.Irm = rmfstp;
2626                     cdb.gen(&cs);               // FSTP mreal.im
2627                     pop87();
2628                     cs.Iop = iop;
2629                     cs.Irm = rmop;
2630                     cs.IEV1.Voffset -= sz2;
2631                     cdb.gen(&cs);               // FSUBR mreal.re
2632                     cs.Iop = iopfst;
2633                     cs.Irm = rmfstp;
2634                     cdb.gen(&cs);               // FSTP mreal.re
2635                     pop87();
2636                     retregs = 0;
2637                 }
2638             }
2639         L3:
2640             freenode(e.EV.E1);
2641             genfwait(cdb);
2642             fixresult_complex87(cdb,e,retregs,pretregs);
2643             return;
2644 
2645         case OPmulass:
2646             push87(cdb);
2647             push87(cdb);
2648             if (ty1 == TYcldouble)
2649             {
2650                 cs.Iop = 0xDB;
2651                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2652                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2653                 cs.IEV1.Voffset += sz2;
2654                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2655                 retregs = mST01;
2656                 callclib(cdb, e, CLIB.cmul, &retregs, 0);
2657                 goto L2;
2658             }
2659             else
2660             {
2661                 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2662                 cs.Irm |= modregrm(0, 0, 0);    // FLD tbyte ptr ...
2663                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2664                 cs.IEV1.Voffset += sz2;
2665                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2666                 retregs = mST01;
2667                 callclib(cdb, e, CLIB.cmul, &retregs, 0);
2668                 if (*pretregs & (mST01 | mPSW))
2669                 {
2670                     cs.Irm |= modregrm(0, 2, 0);
2671                     cdb.gen(&cs);               // FST mreal.im
2672                     cs.IEV1.Voffset -= sz2;
2673                     cdb.gen(&cs);               // FST mreal.re
2674                     retregs = mST01;
2675                 }
2676                 else
2677                 {
2678                     cs.Irm |= modregrm(0, 3, 0);
2679                     cdb.gen(&cs);               // FSTP mreal.im
2680                     cs.IEV1.Voffset -= sz2;
2681                     cdb.gen(&cs);               // FSTP mreal.re
2682                     pop87();
2683                     pop87();
2684                     retregs = 0;
2685                 }
2686                 goto L3;
2687             }
2688 
2689         case OPdivass:
2690             push87(cdb);
2691             push87(cdb);
2692             idxregs = idxregm(&cs);             // mask of index regs used
2693             if (ty1 == TYcldouble)
2694             {
2695                 cs.Iop = 0xDB;
2696                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2697                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2698                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2699                 cs.IEV1.Voffset += sz2;
2700                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2701                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2702                 retregs = mST01;
2703                 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs);
2704                 goto L2;
2705             }
2706             else
2707             {
2708                 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2709                 cs.Irm |= modregrm(0, 0, 0);    // FLD tbyte ptr ...
2710                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2711                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2712                 cs.IEV1.Voffset += sz2;
2713                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2714                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2715                 retregs = mST01;
2716                 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs);
2717                 if (*pretregs & (mST01 | mPSW))
2718                 {
2719                     cs.Irm |= modregrm(0, 2, 0);
2720                     cdb.gen(&cs);               // FST mreal.im
2721                     cs.IEV1.Voffset -= sz2;
2722                     cdb.gen(&cs);               // FST mreal.re
2723                     retregs = mST01;
2724                 }
2725                 else
2726                 {
2727                     cs.Irm |= modregrm(0, 3, 0);
2728                     cdb.gen(&cs);               // FSTP mreal.im
2729                     cs.IEV1.Voffset -= sz2;
2730                     cdb.gen(&cs);               // FSTP mreal.re
2731                     pop87();
2732                     pop87();
2733                     retregs = 0;
2734                 }
2735                 goto L3;
2736             }
2737 
2738         default:
2739             assert(0);
2740     }
2741 }
2742 
2743 /**************************
2744  * OPnegass
2745  */
2746 
2747 void cdnegass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2748 {
2749     regm_t retregs;
2750     uint op;
2751 
2752     //printf("cdnegass87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2753     elem *e1 = e.EV.E1;
2754     tym_t tyml = tybasic(e1.Ety);            // type of lvalue
2755     int sz = _tysize[tyml];
2756 
2757     code cs;
2758     getlvalue87(cdb,&cs,e1,0);
2759 
2760     /* If the EA is really an XMM register, modEA() will fail.
2761      * So disallow putting e1 into a register.
2762      * A better way would be to negate the XMM register in place.
2763      */
2764     if (e1.Eoper == OPvar)
2765         e1.EV.Vsym.Sflags &= ~GTregcand;
2766 
2767     modEA(cdb,&cs);
2768     cs.Irm |= modregrm(0,6,0);
2769     cs.Iop = 0x80;
2770     if (tysize(TYldouble) > 10)
2771     {
2772         if (tyml == TYldouble || tyml == TYildouble)
2773             cs.IEV1.Voffset += 10 - 1;
2774         else if (tyml == TYcldouble)
2775             cs.IEV1.Voffset += tysize(TYldouble) + 10 - 1;
2776         else
2777             cs.IEV1.Voffset += sz - 1;
2778     }
2779     else
2780         cs.IEV1.Voffset += sz - 1;
2781     cs.IFL2 = FLconst;
2782     cs.IEV2.Vuns = 0x80;
2783     cdb.gen(&cs);                       // XOR 7[EA],0x80
2784     if (tycomplex(tyml))
2785     {
2786         cs.IEV1.Voffset -= sz / 2;
2787         cdb.gen(&cs);                   // XOR 7[EA],0x80
2788     }
2789 
2790     if (*pretregs)
2791     {
2792         switch (tyml)
2793         {
2794             case TYifloat:
2795             case TYfloat:               cs.Iop = 0xD9;  op = 0; break;
2796             case TYidouble:
2797             case TYdouble:
2798             case TYdouble_alias:        cs.Iop = 0xDD;  op = 0; break;
2799             case TYildouble:
2800             case TYldouble:             cs.Iop = 0xDB;  op = 5; break;
2801             default:
2802                 assert(0);
2803         }
2804         NEWREG(cs.Irm,op);
2805         cs.IEV1.Voffset -= sz - 1;
2806         push87(cdb);
2807         cdb.gen(&cs);                   // FLD EA
2808         retregs = mST0;
2809     }
2810     else
2811         retregs = 0;
2812 
2813     freenode(e1);
2814     fixresult87(cdb,e,retregs,pretregs);
2815 }
2816 
2817 /************************
2818  * Take care of OPpostinc and OPpostdec.
2819  */
2820 
2821 void post87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2822 {
2823     uint op;
2824     opcode_t op1;
2825     reg_t reg;
2826 
2827     //printf("post87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2828     code cs;
2829     assert(*pretregs);
2830     getlvalue87(cdb,&cs,e.EV.E1,0);
2831     tym_t ty1 = tybasic(e.EV.E1.Ety);
2832     switch (ty1)
2833     {
2834         case TYdouble_alias:
2835         case TYidouble:
2836         case TYdouble:
2837         case TYcdouble:     op1 = ESC(MFdouble,1);  reg = 0;        break;
2838         case TYifloat:
2839         case TYfloat:
2840         case TYcfloat:      op1 = ESC(MFfloat,1);   reg = 0;        break;
2841         case TYildouble:
2842         case TYldouble:
2843         case TYcldouble:    op1 = 0xDB;             reg = 5;        break;
2844         default:
2845             assert(0);
2846     }
2847     NEWREG(cs.Irm, reg);
2848     if (reg == 5)
2849         reg = 7;
2850     else
2851         reg = 3;
2852     cs.Iop = op1;
2853     push87(cdb);
2854     cdb.gen(&cs);                   // FLD e.EV.E1
2855     if (tycomplex(ty1))
2856     {
2857         uint sz = _tysize[ty1] / 2;
2858 
2859         push87(cdb);
2860         cs.IEV1.Voffset += sz;
2861         cdb.gen(&cs);               // FLD e.EV.E1
2862         regm_t retregs = mST0;      // note kludge to only load real part
2863         codelem(cdb,e.EV.E2,&retregs,false); // load rvalue
2864         cdb.genf2(0xD8,             // FADD/FSUBR ST,ST2
2865             (e.Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2);
2866         NEWREG(cs.Irm,reg);
2867         pop87();
2868         cs.IEV1.Voffset -= sz;
2869         cdb.gen(&cs);               // FSTP e.EV.E1
2870         genfwait(cdb);
2871         freenode(e.EV.E1);
2872         fixresult_complex87(cdb, e, mST01, pretregs);
2873         return;
2874     }
2875 
2876     if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS))
2877     {   // Want the result in a register
2878         push87(cdb);
2879         cdb.genf2(0xD9,0xC0);       // FLD ST0
2880     }
2881     if (*pretregs & mPSW)           // if result in flags
2882         genftst(cdb,e,0);           // FTST ST0
2883     regm_t retregs = mST0;
2884     codelem(cdb,e.EV.E2,&retregs,false);    // load rvalue
2885     pop87();
2886     op = (e.Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1);
2887     cdb.genf2(0xDE,op);             // FADDP/FSUBRP ST1
2888     NEWREG(cs.Irm,reg);
2889     pop87();
2890     cdb.gen(&cs);                   // FSTP e.EV.E1
2891     genfwait(cdb);
2892     freenode(e.EV.E1);
2893     fixresult87(cdb,e,mPSW | mST0,pretregs);
2894 }
2895 
2896 /************************
2897  * Do the following opcodes:
2898  *      OPd_u64
2899  *      OPld_u64
2900  */
2901 void cdd_u64(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2902 {
2903     assert(I32 || I64);
2904     assert(*pretregs);
2905     if (I32)
2906         cdd_u64_I32(cdb, e, pretregs);
2907     else
2908         cdd_u64_I64(cdb, e, pretregs);
2909 }
2910 
2911 private void cdd_u64_I32(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2912 {
2913     /* Generate:
2914             mov         EDX,0x8000_0000
2915             mov         floatreg+0,0
2916             mov         floatreg+4,EDX
2917             mov         floatreg+8,0x0FBF403e       // (roundTo0<<16) | adjust
2918             fld         real ptr floatreg           // adjust (= 1/real.epsilon)
2919             fcomp
2920             fstsw       AX
2921             fstcw       floatreg+12
2922             fldcw       floatreg+10                 // roundTo0
2923             test        AH,1
2924             jz          L1                          // jae L1
2925 
2926             fld         real ptr floatreg           // adjust
2927             fsubp       ST(1), ST
2928             fistp       floatreg
2929             mov         EAX,floatreg
2930             add         EDX,floatreg+4
2931             fldcw       floatreg+12
2932             jmp         L2
2933 
2934     L1:
2935             fistp       floatreg
2936             mov         EAX,floatreg
2937             mov         EDX,floatreg+4
2938             fldcw       floatreg+12
2939     L2:
2940      */
2941     regm_t retregs = mST0;
2942     codelem(cdb,e.EV.E1, &retregs, false);
2943     tym_t tym = e.Ety;
2944     retregs = *pretregs;
2945     if (!retregs)
2946         retregs = ALLREGS;
2947     reg_t reg, reg2;
2948     allocreg(cdb,&retregs,&reg,tym);
2949     reg  = findreglsw(retregs);
2950     reg2 = findregmsw(retregs);
2951     movregconst(cdb,reg2,0x80000000,0);
2952     getregs(cdb,mask(reg2) | mAX);
2953 
2954     cdb.genfltreg(0xC7,0,0);
2955     code *cf1 = cdb.last();
2956     cf1.IFL2 = FLconst;
2957     cf1.IEV2.Vint = 0;                             // MOV floatreg+0,0
2958     cdb.genfltreg(STO,reg2,4);                      // MOV floatreg+4,EDX
2959     cdb.genfltreg(0xC7,0,8);
2960     code *cf3 = cdb.last();
2961     cf3.IFL2 = FLconst;
2962     cf3.IEV2.Vint = 0xFBF403E;                     // MOV floatreg+8,(roundTo0<<16)|adjust
2963 
2964     push87(cdb);
2965     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
2966     cdb.gen2(0xD8,0xD9);                            // FCOMP
2967     pop87();
2968     cdb.gen2(0xDF,0xE0);                            // FSTSW AX
2969     cdb.genfltreg(0xD9,7,12);                       // FSTCW floatreg+12
2970     cdb.genfltreg(0xD9,5,10);                       // FLDCW floatreg+10
2971     cdb.genc2(0xF6,modregrm(3,0,4),1);              // TEST AH,1
2972     code *cnop1 = gennop(null);
2973     genjmp(cdb,JE,FLcode,cast(block *)cnop1);       // JZ L1
2974 
2975     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
2976     cdb.genf2(0xDE,0xE8+1);                         // FSUBP ST(1),ST
2977     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
2978     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
2979     cdb.genfltreg(0x03,reg2,4);                     // ADD reg,floatreg+4
2980     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
2981     code *cnop2 = gennop(null);
2982     genjmp(cdb,JMP,FLcode,cast(block *)cnop2);      // JMP L2
2983 
2984     cdb.append(cnop1);
2985     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
2986     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
2987     cdb.genfltreg(LOD,reg2,4);                      // MOV reg,floatreg+4
2988     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
2989     cdb.append(cnop2);
2990 
2991     pop87();
2992     fixresult(cdb,e,retregs,pretregs);
2993 }
2994 
2995 private void cdd_u64_I64(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2996 {
2997     /* Generate:
2998             mov         EDX,0x8000_0000
2999             mov         floatreg+0,0
3000             mov         floatreg+4,EDX
3001             mov         floatreg+8,0x0FBF403e       // (roundTo0<<16) | adjust
3002             fld         real ptr floatreg           // adjust
3003             fcomp
3004             fstsw       AX
3005             fstcw       floatreg+12
3006             fldcw       floatreg+10                 // roundTo0
3007             test        AH,1
3008             jz          L1                          // jae L1
3009 
3010             fld         real ptr floatreg           // adjust
3011             fsubp       ST(1), ST
3012             fistp       floatreg
3013             mov         RAX,floatreg
3014             shl         RDX,32
3015             add         RAX,RDX
3016             fldcw       floatreg+12
3017             jmp         L2
3018 
3019     L1:
3020             fistp       floatreg
3021             mov         RAX,floatreg
3022             fldcw       floatreg+12
3023     L2:
3024      */
3025     regm_t retregs = mST0;
3026     codelem(cdb,e.EV.E1, &retregs, false);
3027     tym_t tym = e.Ety;
3028     retregs = *pretregs;
3029     if (!retregs)
3030         retregs = ALLREGS;
3031     reg_t reg;
3032     allocreg(cdb,&retregs,&reg,tym);
3033     regm_t regm2 = ALLREGS & ~retregs & ~mAX;
3034     reg_t reg2;
3035     allocreg(cdb,&regm2,&reg2,tym);
3036     movregconst(cdb,reg2,0x80000000,0);
3037     getregs(cdb,mask(reg2) | mAX);
3038 
3039     cdb.genfltreg(0xC7,0,0);
3040     code *cf1 = cdb.last();
3041     cf1.IFL2 = FLconst;
3042     cf1.IEV2.Vint = 0;                             // MOV floatreg+0,0
3043     cdb.genfltreg(STO,reg2,4);                      // MOV floatreg+4,EDX
3044     cdb.genfltreg(0xC7,0,8);
3045     code *cf3 = cdb.last();
3046     cf3.IFL2 = FLconst;
3047     cf3.IEV2.Vint = 0xFBF403E;                     // MOV floatreg+8,(roundTo0<<16)|adjust
3048 
3049     push87(cdb);
3050     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3051     cdb.gen2(0xD8,0xD9);                            // FCOMP
3052     pop87();
3053     cdb.gen2(0xDF,0xE0);                            // FSTSW AX
3054     cdb.genfltreg(0xD9,7,12);                       // FSTCW floatreg+12
3055     cdb.genfltreg(0xD9,5,10);                       // FLDCW floatreg+10
3056     cdb.genc2(0xF6,modregrm(3,0,4),1);              // TEST AH,1
3057     code *cnop1 = gennop(null);
3058     genjmp(cdb,JE,FLcode,cast(block *)cnop1);       // JZ L1
3059 
3060     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3061     cdb.genf2(0xDE,0xE8+1);                         // FSUBP ST(1),ST
3062     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3063     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3064     code_orrex(cdb.last(), REX_W);
3065     cdb.genc2(0xC1,(REX_W << 16) | modregrmx(3,4,reg2),32); // SHL reg2,32
3066     cdb.gen2(0x03,(REX_W << 16) | modregxrmx(3,reg,reg2));  // ADD reg,reg2
3067     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3068     code *cnop2 = gennop(null);
3069     genjmp(cdb,JMP,FLcode,cast(block *)cnop2);      // JMP L2
3070 
3071     cdb.append(cnop1);
3072     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3073     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3074     code_orrex(cdb.last(), REX_W);
3075     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3076     cdb.append(cnop2);
3077 
3078     pop87();
3079     fixresult(cdb,e,retregs,pretregs);
3080 }
3081 
3082 /************************
3083  * Do the following opcodes:
3084  *      OPd_u32
3085  */
3086 void cdd_u32(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3087 {
3088     assert(I32 || I64);
3089 
3090     /* Generate:
3091             mov         floatreg+8,0x0FBF0000   // (roundTo0<<16)
3092             fstcw       floatreg+12
3093             fldcw       floatreg+10             // roundTo0
3094             fistp       floatreg
3095             fldcw       floatreg+12
3096             mov         EAX,floatreg
3097      */
3098     regm_t retregs = mST0;
3099     codelem(cdb,e.EV.E1, &retregs, false);
3100     tym_t tym = e.Ety;
3101     retregs = *pretregs & ALLREGS;
3102     if (!retregs)
3103         retregs = ALLREGS;
3104     reg_t reg;
3105     allocreg(cdb,&retregs,&reg,tym);
3106 
3107     cdb.genfltreg(0xC7,0,8);
3108     code *cf3 = cdb.last();
3109     cf3.IFL2 = FLconst;
3110     cf3.IEV2.Vint = 0x0FBF0000;                 // MOV floatreg+8,(roundTo0<<16)
3111 
3112     cdb.genfltreg(0xD9,7,12);                    // FSTCW floatreg+12
3113     cdb.genfltreg(0xD9,5,10);                    // FLDCW floatreg+10
3114 
3115     cdb.genfltreg(0xDF,7,0);                     // FISTP dword ptr floatreg
3116     cdb.genfltreg(0xD9,5,12);                    // FLDCW floatreg+12
3117     cdb.genfltreg(LOD,reg,0);                    // MOV reg,floatreg
3118 
3119     pop87();
3120     fixresult(cdb,e,retregs,pretregs);
3121 }
3122 
3123 /************************
3124  * Do the following opcodes:
3125  *      OPd_s16
3126  *      OPd_s32
3127  *      OPd_u16
3128  *      OPd_s64
3129  */
3130 
3131 void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3132 {
3133     regm_t retregs;
3134     uint mf,rf;
3135     reg_t reg;
3136     int clib;
3137 
3138     //printf("cnvt87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3139     assert(*pretregs);
3140     tym_t tym = e.Ety;
3141     int sz = tysize(tym);
3142     int szoff = sz;
3143 
3144     switch (e.Eoper)
3145     {
3146         case OPd_s16:
3147             clib = CLIB.dblint87;
3148             mf = ESC(MFword,1);
3149             rf = 3;
3150             break;
3151 
3152         case OPd_u16:
3153             szoff = 4;
3154             goto case OPd_s32;
3155 
3156         case OPd_s32:
3157             clib = CLIB.dbllng87;
3158             mf = ESC(MFlong,1);
3159             rf = 3;
3160             break;
3161 
3162         case OPd_s64:
3163             clib = CLIB.dblllng;
3164             mf = 0xDF;
3165             rf = 7;
3166             break;
3167 
3168         default:
3169             assert(0);
3170     }
3171 
3172     if (I16)                       // C may change the default control word
3173     {
3174         if (clib == CLIB.dblllng)
3175         {   retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
3176             codelem(cdb,e.EV.E1,&retregs,false);
3177             callclib(cdb,e,clib,pretregs,0);
3178         }
3179         else
3180         {   retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
3181             codelem(cdb,e.EV.E1,&retregs,false);
3182             callclib(cdb,e,clib,pretregs,0);
3183             pop87();
3184         }
3185     }
3186     else if (1)
3187     {   //  Generate:
3188         //  sub     ESP,12
3189         //  fstcw   8[ESP]
3190         //  fldcw   roundto0
3191         //  fistp   long64 ptr [ESP]
3192         //  fldcw   8[ESP]
3193         //  pop     lsw
3194         //  pop     msw
3195         //  add     ESP,4
3196 
3197         uint szpush = szoff + 2;
3198         if (config.flags3 & CFG3pic)
3199             szpush += 2;
3200         szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1);
3201 
3202         retregs = mST0;
3203         codelem(cdb,e.EV.E1,&retregs,false);
3204 
3205         if (szpush == REGSIZE)
3206             cdb.gen1(0x50 + AX);                // PUSH EAX
3207         else
3208             cod3_stackadj(cdb, szpush);
3209         genfwait(cdb);
3210         cdb.genc1(0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP]
3211 
3212         genfwait(cdb);
3213 
3214         if (config.flags3 & CFG3pic)
3215         {
3216             cdb.genc(0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0
3217             code_orflag(cdb.last(), CFopsize);
3218             cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP]
3219         }
3220         else
3221             genrnd(cdb, CW_roundto0);   // FLDCW roundto0
3222 
3223         pop87();
3224 
3225         genfwait(cdb);
3226         cdb.gen2sib(mf,modregrm(0,rf,4),modregrm(0,4,SP));                   // FISTP [ESP]
3227 
3228         retregs = *pretregs & (ALLREGS | mBP);
3229         if (!retregs)
3230                 retregs = ALLREGS;
3231         allocreg(cdb,&retregs,&reg,tym);
3232 
3233         genfwait(cdb);                                           // FWAIT
3234         cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP]
3235 
3236         if (szoff > REGSIZE)
3237         {   szpush -= REGSIZE;
3238             genpop(cdb,findreglsw(retregs));       // POP lsw
3239         }
3240         szpush -= REGSIZE;
3241         genpop(cdb,reg);                           // POP reg
3242 
3243         if (szpush)
3244             cod3_stackadj(cdb, -szpush);
3245         fixresult(cdb,e,retregs,pretregs);
3246     }
3247     else
3248     {
3249         // This is incorrect. For -inf and nan, the 8087 returns the largest
3250         // negative int (0x80000....). For -inf, 0x7FFFF... should be returned,
3251         // and for nan, 0 should be returned.
3252         retregs = mST0;
3253         codelem(cdb,e.EV.E1,&retregs,false);
3254 
3255         genfwait(cdb);
3256         genrnd(cdb, CW_roundto0);                  // FLDCW roundto0
3257 
3258         pop87();
3259         cdb.genfltreg(mf,rf,0);                    // FISTP floatreg
3260         retregs = *pretregs & (ALLREGS | mBP);
3261         if (!retregs)
3262                 retregs = ALLREGS;
3263         allocreg(cdb,&retregs,&reg,tym);
3264 
3265         genfwait(cdb);
3266 
3267         if (sz > REGSIZE)
3268         {
3269             cdb.genfltreg(LOD,reg,REGSIZE);          // MOV reg,floatreg + REGSIZE
3270                                                      // MOV lsreg,floatreg
3271             cdb.genfltreg(LOD,findreglsw(retregs),0);
3272         }
3273         else
3274             cdb.genfltreg(LOD,reg,0);                // MOV reg,floatreg
3275         genrnd(cdb, CW_roundtonearest);              // FLDCW roundtonearest
3276         fixresult(cdb,e,retregs,pretregs);
3277     }
3278 }
3279 
3280 /************************
3281  * Do OPrndtol.
3282  */
3283 
3284 void cdrndtol(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3285 {
3286     if (*pretregs == 0)
3287     {
3288         codelem(cdb,e.EV.E1,pretregs,false);
3289         return;
3290     }
3291     regm_t retregs = mST0;
3292     codelem(cdb,e.EV.E1,&retregs,false);
3293 
3294     ubyte op1,op2;
3295     tym_t tym = e.Ety;
3296     uint sz = tysize(tym);
3297     switch (sz)
3298     {   case 2:
3299             op1 = 0xDF;
3300             op2 = 3;
3301             break;
3302         case 4:
3303             op1 = 0xDB;
3304             op2 = 3;
3305             break;
3306         case 8:
3307             op1 = 0xDF;
3308             op2 = 7;
3309             break;
3310         default:
3311             assert(0);
3312     }
3313 
3314     pop87();
3315     cdb.genfltreg(op1,op2,0);           // FISTP floatreg
3316     retregs = *pretregs & (ALLREGS | mBP);
3317     if (!retregs)
3318         retregs = ALLREGS;
3319     reg_t reg;
3320     allocreg(cdb,&retregs,&reg,tym);
3321     genfwait(cdb);                      // FWAIT
3322     if (tysize(tym) > REGSIZE)
3323     {
3324         cdb.genfltreg(LOD,reg,REGSIZE);             // MOV reg,floatreg + REGSIZE
3325                                                     // MOV lsreg,floatreg
3326         cdb.genfltreg(LOD,findreglsw(retregs),0);
3327     }
3328     else
3329     {
3330         cdb.genfltreg(LOD,reg,0);       // MOV reg,floatreg
3331         if (tysize(tym) == 8 && I64)
3332             code_orrex(cdb.last(), REX_W);
3333     }
3334     fixresult(cdb,e,retregs,pretregs);
3335 }
3336 
3337 /*************************
3338  * Do OPscale, OPyl2x, OPyl2xp1.
3339  */
3340 
3341 void cdscale(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3342 {
3343     assert(*pretregs != 0);
3344 
3345     regm_t retregs = mST0;
3346     codelem(cdb,e.EV.E1,&retregs,false);
3347     note87(e.EV.E1,0,0);
3348     codelem(cdb,e.EV.E2,&retregs,false);
3349     makesure87(cdb,e.EV.E1,0,1,0);       // now have x,y on stack; need y,x
3350     switch (e.Eoper)
3351     {
3352         case OPscale:
3353             cdb.genf2(0xD9,0xFD);                   // FSCALE
3354             cdb.genf2(0xDD,0xD8 + 1);                    // FSTP ST(1)
3355             break;
3356 
3357         case OPyl2x:
3358             cdb.genf2(0xD9,0xF1);                   // FYL2X
3359             break;
3360 
3361         case OPyl2xp1:
3362             cdb.genf2(0xD9,0xF9);                   // FYL2XP1
3363             break;
3364 
3365         default:
3366             assert(0);
3367     }
3368     pop87();
3369     fixresult87(cdb,e,mST0,pretregs);
3370 }
3371 
3372 
3373 /**********************************
3374  * Unary -, absolute value, square root, sine, cosine
3375  */
3376 
3377 void neg87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3378 {
3379     //printf("neg87()\n");
3380 
3381     assert(*pretregs);
3382     opcode_t op;
3383     switch (e.Eoper)
3384     {   case OPneg:  op = 0xE0;     break;
3385         case OPabs:  op = 0xE1;     break;
3386         case OPsqrt: op = 0xFA;     break;
3387         case OPsin:  op = 0xFE;     break;
3388         case OPcos:  op = 0xFF;     break;
3389         case OPrint: op = 0xFC;     break;  // FRNDINT
3390         default:
3391             assert(0);
3392     }
3393     regm_t retregs = mST0;
3394     codelem(cdb,e.EV.E1,&retregs,false);
3395     cdb.genf2(0xD9,op);                 // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT
3396     fixresult87(cdb,e,mST0,pretregs);
3397 }
3398 
3399 /**********************************
3400  * Unary - for complex operands
3401  */
3402 
3403 void neg_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3404 {
3405     assert(e.Eoper == OPneg);
3406     regm_t retregs = mST01;
3407     codelem(cdb,e.EV.E1,&retregs,false);
3408     cdb.genf2(0xD9,0xE0);           // FCHS
3409     cdb.genf2(0xD9,0xC8 + 1);            // FXCH ST(1)
3410     cdb.genf2(0xD9,0xE0);                // FCHS
3411     cdb.genf2(0xD9,0xC8 + 1);            // FXCH ST(1)
3412     fixresult_complex87(cdb,e,mST01,pretregs);
3413 }
3414 
3415 /*********************************
3416  */
3417 
3418 void cdind87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3419 {
3420     //printf("cdind87(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
3421     code cs;
3422 
3423     getlvalue87(cdb,&cs,e,0);           // get addressing mode
3424     if (*pretregs)
3425     {
3426         switch (tybasic(e.Ety))
3427         {   case TYfloat:
3428             case TYifloat:
3429                 cs.Iop = 0xD9;
3430                 break;
3431 
3432             case TYidouble:
3433             case TYdouble:
3434             case TYdouble_alias:
3435                 cs.Iop = 0xDD;
3436                 break;
3437 
3438             case TYildouble:
3439             case TYldouble:
3440                 cs.Iop = 0xDB;
3441                 cs.Irm |= modregrm(0,5,0);
3442                 break;
3443 
3444             default:
3445                 assert(0);
3446         }
3447         push87(cdb);
3448         cdb.gen(&cs);                 // FLD EA
3449         fixresult87(cdb,e,mST0,pretregs);
3450     }
3451 }
3452 
3453 /************************************
3454  * Reset statics for another .obj file.
3455  */
3456 
3457 void cg87_reset()
3458 {
3459     memset(&oldd,0,oldd.sizeof);
3460 }
3461 
3462 
3463 /*****************************************
3464  * Initialize control word constants.
3465  */
3466 
3467 private void genrnd(ref CodeBuilder cdb, short cw)
3468 {
3469     if (config.flags3 & CFG3pic)
3470     {
3471         cdb.genfltreg(0xC7, 0, 0);       // MOV floatreg, cw
3472         code *c1 = cdb.last();
3473         c1.IFL2 = FLconst;
3474         c1.IEV2.Vuns = cw;
3475 
3476         cdb.genfltreg(0xD9, 5, 0);         // FLDCW floatreg
3477     }
3478     else
3479     {
3480         if (!oldd.round)                // if not initialized
3481         {
3482             short cwi;
3483 
3484             oldd.round = 1;
3485 
3486             cwi = CW_roundto0;          // round to 0
3487             oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2);
3488             cwi = CW_roundtonearest;            // round to nearest
3489             oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2);
3490         }
3491         Symbol *rnddir = (cw == CW_roundto0) ? oldd.roundto0 : oldd.roundtonearest;
3492         code cs;
3493         cs.Iop = 0xD9;
3494         cs.Iflags = CFoff;
3495         cs.Irex = 0;
3496         cs.IEV1.Vsym = rnddir;
3497         cs.IFL1 = rnddir.Sfl;
3498         cs.IEV1.Voffset = 0;
3499         cs.Irm = modregrm(0,5,BPRM);
3500         cdb.gen(&cs);
3501     }
3502 }
3503 
3504 /************************* Complex Numbers *********************/
3505 
3506 /***************************
3507  * Set the PSW based on the state of ST01.
3508  * Input:
3509  *      pop     if stack should be popped after test
3510  */
3511 
3512 private void genctst(ref CodeBuilder cdb,elem *e,int pop)
3513 {
3514     assert(pop == 0 || pop == 1);
3515 
3516     // Generate:
3517     //  if (NOSAHF && pop)
3518     //          FLDZ
3519     //          FUCOMIP
3520     //          JNE     L1
3521     //          JP      L1              // if NAN
3522     //          FLDZ
3523     //          FUCOMIP ST(2)
3524     //      L1:
3525     //        if (pop)
3526     //          FPOP
3527     //          FPOP
3528     //  if (pop)
3529     //          FLDZ
3530     //          FUCOMPP
3531     //          FSTSW   AX
3532     //          SAHF
3533     //          FLDZ
3534     //          FUCOMPP
3535     //          JNE     L1
3536     //          JP      L1              // if NAN
3537     //          FSTSW   AX
3538     //          SAHF
3539     //      L1:
3540     //  else
3541     //          FLDZ
3542     //          FUCOM
3543     //          FSTSW   AX
3544     //          SAHF
3545     //          FUCOMP  ST(2)
3546     //          JNE     L1
3547     //          JP      L1              // if NAN
3548     //          FSTSW   AX
3549     //          SAHF
3550     //      L1:
3551     // FUCOMP doesn't raise exceptions on QNANs, unlike FTST
3552 
3553     CodeBuilder cdbnop;
3554     cdbnop.ctor();
3555     cdbnop.gennop();
3556     code *cnop = cdbnop.peek();
3557     push87(cdb);
3558     cdb.gen2(0xD9,0xEE);                       // FLDZ
3559     if (NOSAHF)
3560     {
3561         cdb.gen2(0xDF,0xE9);                   // FUCOMIP
3562         pop87();
3563         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3564         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3565         cdb.gen2(0xD9,0xEE);                   // FLDZ
3566         cdb.gen2(0xDF,0xEA);                   // FUCOMIP ST(2)
3567         if (pop)
3568         {
3569             cdbnop.genf2(0xDD,modregrm(3,3,0));  // FPOP
3570             cdbnop.genf2(0xDD,modregrm(3,3,0));  // FPOP
3571             pop87();
3572             pop87();
3573         }
3574     }
3575     else if (pop)
3576     {
3577         cdb.gen2(0xDA,0xE9);                   // FUCOMPP
3578         pop87();
3579         pop87();
3580         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3581         cdb.gen2(0xD9,0xEE);                   // FLDZ
3582         cdb.gen2(0xDA,0xE9);                   // FUCOMPP
3583         pop87();
3584         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3585         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3586         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3587     }
3588     else
3589     {
3590         cdb.gen2(0xDD,0xE1);                   // FUCOM
3591         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3592         cdb.gen2(0xDD,0xEA);                   // FUCOMP ST(2)
3593         pop87();
3594         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3595         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3596         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3597     }
3598     cdb.append(cdbnop);
3599 }
3600 
3601 /******************************
3602  * Given the result of an expression is in retregs,
3603  * generate necessary code to return result in *pretregs.
3604  */
3605 
3606 
3607 void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs)
3608 {
3609     static if (0)
3610     {
3611         printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n",
3612             e,regm_str(retregs),regm_str(*pretregs));
3613     }
3614 
3615     assert(!*pretregs || retregs);
3616     tym_t tym = tybasic(e.Ety);
3617     uint sz = _tysize[tym];
3618 
3619     if (*pretregs == 0 && retregs == mST01)
3620     {
3621         cdb.genf2(0xDD,modregrm(3,3,0));        // FPOP
3622         pop87();
3623         cdb.genf2(0xDD,modregrm(3,3,0));        // FPOP
3624         pop87();
3625     }
3626     else if (tym == TYllong)
3627     {
3628         // passing cfloat through register for I64
3629         assert(retregs & mST01, "this float expression is not implemented");
3630         pop87();
3631         cdb.genfltreg(ESC(MFfloat,1),BX,4);     // FSTP floatreg
3632         pop87();
3633         cdb.genfltreg(ESC(MFfloat,1),BX,0);     // FSTP floatreg+4
3634         genfwait(cdb);
3635         const reg = findreg(*pretregs);
3636         getregs(cdb,reg);
3637         cdb.genfltreg(LOD, reg, 0);             // MOV ECX,floatreg
3638         code_orrex(cdb.last(), REX_W);          // extend to RCX
3639     }
3640     else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01)
3641     {
3642         if (*pretregs & mPSW && !(retregs & mPSW))
3643             genctst(cdb,e,0);                   // FTST
3644         pop87();
3645         cdb.genfltreg(ESC(MFfloat,1),3,0);      // FSTP floatreg
3646         genfwait(cdb);
3647         getregs(cdb,mDX|mAX);
3648         cdb.genfltreg(LOD, DX, 0);              // MOV EDX,floatreg
3649 
3650         pop87();
3651         cdb.genfltreg(ESC(MFfloat,1),3,0);      // FSTP floatreg
3652         genfwait(cdb);
3653         cdb.genfltreg(LOD, AX, 0);              // MOV EAX,floatreg
3654     }
3655     else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01)
3656     {
3657         push87(cdb);
3658         cdb.genfltreg(STO, AX, 0);              // MOV floatreg, EAX
3659         cdb.genfltreg(0xD9, 0, 0);              // FLD float ptr floatreg
3660 
3661         push87(cdb);
3662         cdb.genfltreg(STO, DX, 0);              // MOV floatreg, EDX
3663         cdb.genfltreg(0xD9, 0, 0);              // FLD float ptr floatreg
3664 
3665         if (*pretregs & mPSW)
3666             genctst(cdb,e,0);                   // FTST
3667     }
3668     else if ((tym == TYcfloat || tym == TYcdouble) &&
3669              *pretregs & (mXMM0|mXMM1) && retregs & mST01)
3670     {
3671         tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble;
3672         uint xop = xmmload(tyf);
3673         uint mf = tyf == TYfloat ? MFfloat : MFdouble;
3674         if (*pretregs & mPSW && !(retregs & mPSW))
3675             genctst(cdb,e,0);                   // FTST
3676         pop87();
3677         cdb.genfltreg(ESC(mf,1),3,0);           // FSTP floatreg
3678         genfwait(cdb);
3679         getregs(cdb,mXMM0|mXMM1);
3680         cdb.genxmmreg(xop,XMM1,0,tyf);
3681 
3682         pop87();
3683         cdb.genfltreg(ESC(mf,1),3,0);           // FSTP floatreg
3684         genfwait(cdb);
3685         cdb.genxmmreg(xop, XMM0, 0, tyf);       // MOVD XMM0,floatreg
3686     }
3687     else if ((tym == TYcfloat || tym == TYcdouble) &&
3688              retregs & (mXMM0|mXMM1) && *pretregs & mST01)
3689     {
3690         tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble;
3691         uint xop = xmmstore(tyf);
3692         uint fop = tym == TYcfloat ? 0xD9 : 0xDD;
3693         push87(cdb);
3694         cdb.genfltreg(xop, XMM0-XMM0, 0);       // STOS(SD) floatreg, XMM0
3695         checkSetVex(cdb.last(),tyf);
3696         cdb.genfltreg(fop, 0, 0);               // FLD double ptr floatreg
3697 
3698         push87(cdb);
3699         cdb.genxmmreg(xop, XMM1, 0, tyf);       // MOV floatreg, XMM1
3700         cdb.genfltreg(fop, 0, 0);               // FLD double ptr floatreg
3701 
3702         if (*pretregs & mPSW)
3703             genctst(cdb,e,0);                   // FTST
3704     }
3705     else
3706     {   if (*pretregs & mPSW)
3707         {   if (!(retregs & mPSW))
3708             {   assert(retregs & mST01);
3709                 genctst(cdb,e,!(*pretregs & mST01));        // FTST
3710             }
3711         }
3712         assert(!(*pretregs & mST01) || (retregs & mST01));
3713     }
3714     if (*pretregs & mST01)
3715     {   note87(e,0,1);
3716         note87(e,sz/2,0);
3717     }
3718 }
3719 
3720 /*****************************************
3721  * Operators OPc_r and OPc_i
3722  */
3723 
3724 void cdconvt87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3725 {
3726     regm_t retregs = mST01;
3727     codelem(cdb,e.EV.E1, &retregs, false);
3728     switch (e.Eoper)
3729     {
3730         case OPc_r:
3731             cdb.genf2(0xDD,0xD8 + 0); // FPOP
3732             pop87();
3733             break;
3734 
3735         case OPc_i:
3736             cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1)
3737             pop87();
3738             break;
3739 
3740         default:
3741             assert(0);
3742     }
3743     retregs = mST0;
3744     fixresult87(cdb, e, retregs, pretregs);
3745 }
3746 
3747 /**************************************
3748  * Load complex operand into ST01 or flags or both.
3749  */
3750 
3751 void cload87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3752 {
3753     //printf("e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3754     //elem_print(e);
3755     assert(!I16);
3756     debug
3757     if (I32)
3758     {
3759         assert(config.inline8087);
3760         elem_debug(e);
3761         assert(*pretregs & (mST01 | mPSW));
3762         assert(!(*pretregs & ~(mST01 | mPSW)));
3763     }
3764 
3765     tym_t ty = tybasic(e.Ety);
3766     code cs = void;
3767     uint mf;
3768     uint sz;
3769     ubyte ldop;
3770     regm_t retregs;
3771     int i;
3772 
3773     //printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3774     sz = _tysize[ty] / 2;
3775     memset(&cs, 0, cs.sizeof);
3776     if (ADDFWAIT())
3777         cs.Iflags = CFwait;
3778     switch (ty)
3779     {
3780         case TYcfloat:      mf = MFfloat;           break;
3781         case TYcdouble:     mf = MFdouble;          break;
3782         case TYcldouble:    break;
3783         default:            assert(0);
3784     }
3785     switch (e.Eoper)
3786     {
3787         case OPvar:
3788             notreg(e);                  // never enregister this variable
3789             goto case OPind;
3790 
3791         case OPind:
3792             push87(cdb);
3793             push87(cdb);
3794             switch (ty)
3795             {
3796                 case TYcfloat:
3797                 case TYcdouble:
3798                     loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0);        // FLD var
3799                     cs.IEV1.Voffset += sz;
3800                     cdb.gen(&cs);
3801                     break;
3802 
3803                 case TYcldouble:
3804                     loadea(cdb,e,&cs,0xDB,5,0,0,0);             // FLD var
3805                     cs.IEV1.Voffset += sz;
3806                     cdb.gen(&cs);
3807                     break;
3808 
3809                 default:
3810                     assert(0);
3811             }
3812             retregs = mST01;
3813             break;
3814 
3815         case OPd_ld:
3816         case OPld_d:
3817         case OPf_d:
3818         case OPd_f:
3819             cload87(cdb,e.EV.E1, pretregs);
3820             freenode(e.EV.E1);
3821             return;
3822 
3823         case OPconst:
3824             push87(cdb);
3825             push87(cdb);
3826             for (i = 0; i < 2; i++)
3827             {
3828                 ldop = loadconst(e, i);
3829                 if (ldop)
3830                 {
3831                     cdb.genf2(0xD9,ldop);             // FLDx
3832                 }
3833                 else
3834                 {
3835                     assert(0);
3836                 }
3837             }
3838             retregs = mST01;
3839             break;
3840 
3841         default:
3842             debug elem_print(e);
3843             assert(0);
3844     }
3845     fixresult_complex87(cdb, e, retregs, pretregs);
3846 }
3847 
3848 /**********************************************
3849  * Load OPpair or OPrpair into mST01
3850  */
3851 void loadPair87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3852 {
3853     assert(e.Eoper == OPpair || e.Eoper == OPrpair);
3854     regm_t retregs = mST0;
3855     codelem(cdb,e.EV.E1, &retregs, false);
3856     note87(e.EV.E1, 0, 0);
3857     codelem(cdb,e.EV.E2, &retregs, false);
3858     makesure87(cdb,e.EV.E1, 0, 1, 0);
3859     if (e.Eoper == OPrpair)
3860         cdb.genf2(0xD9, 0xC8 + 1);   // FXCH ST(1)
3861     retregs = mST01;
3862     fixresult_complex87(cdb, e, retregs, pretregs);
3863 }
3864 
3865 /**********************************************
3866  * Round 80 bit precision to 32 or 64 bits.
3867  * OPtoprec
3868  */
3869 void cdtoprec(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3870 {
3871     //printf("cdtoprec: *pretregs = %s\n", regm_str(*pretregs));
3872     if (!*pretregs)
3873     {
3874         codelem(cdb,e.EV.E1,pretregs,false);
3875         return;
3876     }
3877 
3878     assert(config.inline8087);
3879     regm_t retregs = mST0;
3880     codelem(cdb,e.EV.E1, &retregs, false);
3881     if (*pretregs & mST0)
3882     {
3883         const tym = tybasic(e.Ety);
3884         const sz = _tysize[tym];
3885         uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
3886         cdb.genfltreg(ESC(mf,1),3,0);   // FSTP float/double ptr fltreg
3887         genfwait(cdb);
3888         cdb.genfltreg(ESC(mf,1),0,0);   // FLD float/double ptr fltreg
3889     }
3890     fixresult87(cdb, e, retregs, pretregs);
3891 }
3892 
3893 }